From 4b2e8fc5ec7d9a6852b1a520cffddbe7df2892ea Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Tue, 2 Jun 2026 03:30:43 +0300 Subject: [PATCH] docs(android): add audio-capture design + missing-functionality review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - android-audio-capture-plan.md — design behind the merged on-device audio capture feature (487259a). - android-missing-functionality.md — Android missing-feature review notes. --- ANDROID-REVIEW/android-audio-capture-plan.md | 308 ++++++++++++++++++ .../android-missing-functionality.md | 153 +++++++++ 2 files changed, 461 insertions(+) create mode 100644 ANDROID-REVIEW/android-audio-capture-plan.md create mode 100644 ANDROID-REVIEW/android-missing-functionality.md diff --git a/ANDROID-REVIEW/android-audio-capture-plan.md b/ANDROID-REVIEW/android-audio-capture-plan.md new file mode 100644 index 0000000..0b05391 --- /dev/null +++ b/ANDROID-REVIEW/android-audio-capture-plan.md @@ -0,0 +1,308 @@ +# Plan: Android on-device audio capture + +> Status: proposed plan (not yet approved). No code changes. Last updated 2026-06-01. + +## Context + +LedGrab's audio-reactive features (music analyzer, audio value sources, band filters) +depend on capturing an audio stream and running it through `AudioAnalyzer` +(`server/src/ledgrab/core/audio/analysis.py`). On desktop this is fed by **WASAPI** +(Windows) or **Sounddevice/PortAudio** (cross-platform). On the **experimental +Android-TV build** neither is available — `sounddevice` has no Chaquopy wheel and PortAudio +isn't bundled — so `core/audio/__init__.py` registers only `DemoAudioEngine`, and +audio-reactive lighting is effectively dead on Android. + +Android does not need PortAudio: the platform exposes **`AudioPlaybackCapture`** (API 29+), +which captures system playback audio and **takes a `MediaProjection` token — the very token +the app already obtains for screen capture** (`ScreenCapture(projection, …)`). This plan adds +a push-based Android audio engine so the TV box can drive sound-reactive lighting from its own +media playback, at parity with how desktop audio feeds the analyzer. + +The design mirrors the working screen-capture bridge +(`mediaprojection_engine.py` ↔ `ScreenCapture.kt` ↔ `PythonBridge`) and the existing audio +engine abstraction (`AudioCaptureEngine` / `AudioCaptureStreamBase` / +`AudioEngineRegistry`). **No new Python dependencies** (`numpy` is already bundled) → no +Chaquopy / `build.gradle.kts` `pip {}` changes. + +--- + +## Approach + +A new **push-based** audio engine registered in the existing `AudioEngineRegistry`: + +- **Python:** `AndroidAudioEngine` + `AndroidAudioCaptureStream` mirroring `SounddeviceEngine`, + but `read_chunk()` pops PCM from a module-level queue that **Kotlin fills** (mirror of + `mediaprojection_engine.push_frame`). High `ENGINE_PRIORITY` so + `AudioEngineRegistry.get_best_available_engine()` selects it on Android. The existing + `ManagedAudioStream` capture loop and `AudioAnalyzer` consume `read_chunk()` unchanged. +- **Android:** an `AudioCapture` helper using `AudioRecord` + `AudioPlaybackCaptureConfiguration` + (reusing `CaptureService`'s `MediaProjection`), pushing float32 PCM to Python. Mic + (`AudioSource.MIC`) fallback. Wired into `CaptureService` next to `ScreenCapture`. + +``` +[media playback] → AudioRecord (AudioPlaybackCapture, reuses MediaProjection) + → AudioCapture.kt → PythonBridge.pushAudio(pcmFloat32, frames, channels) + → android_audio_engine.push_samples() [module-level queue] + → AndroidAudioCaptureStream.read_chunk() → ManagedAudioStream → AudioAnalyzer [unchanged] +``` + +--- + +## Part A — Python (server) + +**New file: `server/src/ledgrab/core/audio/android_audio_engine.py`** — mirror +`mediaprojection_engine.py` (queue + configure + push) and `sounddevice_engine.py` (engine/stream shape): + +```python +import queue +import numpy as np +from typing import Any, Dict, List +from ledgrab.core.audio.base import AudioCaptureEngine, AudioCaptureStreamBase, AudioDeviceInfo +from ledgrab.utils import get_logger + +logger = get_logger(__name__) + +_pcm_queue: "queue.Queue[np.ndarray]" = queue.Queue(maxsize=8) +_sample_rate = 48000 +_channels = 2 +_chunk_size = 1024 +_active = False + +def configure(sample_rate: int, channels: int, chunk_size: int) -> None: + """Called from Kotlin before audio frames start flowing. Drains stale PCM.""" + global _sample_rate, _channels, _chunk_size, _active + while not _pcm_queue.empty(): + try: _pcm_queue.get_nowait() + except queue.Empty: break + _sample_rate, _channels, _chunk_size = sample_rate, channels, chunk_size + _active = True + +def push_samples(pcm_float32: bytes) -> None: + """Push one interleaved float32 PCM chunk from Kotlin. Drops oldest if full.""" + samples = np.frombuffer(pcm_float32, dtype=np.float32) + try: + _pcm_queue.put_nowait(samples) + except queue.Full: + try: _pcm_queue.get_nowait() + except queue.Empty: pass + try: _pcm_queue.put_nowait(samples) + except queue.Full: pass + +def shutdown() -> None: + global _active + _active = False + + +class AndroidAudioCaptureStream(AudioCaptureStreamBase): + @property + def channels(self) -> int: return _channels + @property + def sample_rate(self) -> int: return _sample_rate + @property + def chunk_size(self) -> int: return _chunk_size + def initialize(self) -> None: + if not _active: + raise RuntimeError("Android audio engine not configured (only valid in-app).") + self._initialized = True + def cleanup(self) -> None: + self._initialized = False + def read_chunk(self) -> np.ndarray | None: + try: + return _pcm_queue.get(timeout=0.1) # 1-D float32 interleaved + except queue.Empty: + return None + + +class AndroidAudioEngine(AudioCaptureEngine): + ENGINE_TYPE = "android_playback" + ENGINE_PRIORITY = 100 # highest on Android (demo is lower) + @classmethod + def is_available(cls) -> bool: + from ledgrab.utils.platform import is_android + return is_android() and _active + @classmethod + def get_default_config(cls) -> Dict[str, Any]: + return {"sample_rate": _sample_rate, "channels": _channels, "chunk_size": _chunk_size} + @classmethod + def enumerate_devices(cls) -> List[AudioDeviceInfo]: + if not cls.is_available(): return [] + return [AudioDeviceInfo(index=0, name="Android playback (system audio)", + is_input=True, is_loopback=True, + channels=_channels, default_samplerate=float(_sample_rate))] + @classmethod + def create_stream(cls, device_index, is_loopback, config) -> AndroidAudioCaptureStream: + return AndroidAudioCaptureStream(device_index, is_loopback, {**cls.get_default_config(), **config}) +``` + +**Modify `server/src/ledgrab/core/audio/__init__.py`** — register behind a guarded import, +matching the existing `_has_wasapi` / `_has_sounddevice` pattern: + +```python +try: + from ledgrab.core.audio.android_audio_engine import AndroidAudioEngine + _has_android_audio = True +except ImportError: + _has_android_audio = False +... +if _has_android_audio: + AudioEngineRegistry.register(AndroidAudioEngine) +``` + +**Reused, unchanged:** `AudioEngineRegistry.get_best_available_engine()` (picks by priority), +`ManagedAudioStream._capture_loop()` (`audio_capture.py`), `AudioAnalyzer`, the audio value +sources, and the device-enumeration endpoints. The Android engine appears as one loopback +device named "Android playback (system audio)". + +--- + +## Part B — Android (Kotlin + manifest) + +**New file: `android/app/src/main/java/com/ledgrab/android/AudioCapture.kt`** + +Mirrors `ScreenCapture.kt`, taking the same `MediaProjection`: + +```kotlin +class AudioCapture( + private val projection: MediaProjection, + private val bridge: PythonBridge, + private val sampleRate: Int = 48000, + private val channels: Int = 2, + private val chunkFrames: Int = 1024, +) +``` + +- `start()` (API 29+, MediaProjection mode): + - Build `AudioPlaybackCaptureConfiguration(projection)` adding usages + `USAGE_MEDIA`, `USAGE_GAME`, `USAGE_UNKNOWN` (the capturable set). + - `AudioRecord.Builder().setAudioPlaybackCaptureConfig(cfg)` with + `AudioFormat(ENCODING_PCM_FLOAT, sampleRate, CHANNEL_IN_STEREO)`. + - On a dedicated `HandlerThread`, loop `audioRecord.read(floatBuf, …, READ_BLOCKING)` → + wrap into a little-endian float32 `ByteArray` (reusable buffer, like `ScreenCapture`'s + `frameBuffer`) → `bridge.pushAudio(bytes, framesRead, channels)`. +- `stop()`: stop/release `AudioRecord`, quit the thread. +- **Mic fallback** (`startMic()`): `AudioSource.MIC` for root mode (no MediaProjection) or + API < 29. Used only when playback capture is unavailable. + +**Modify `android/app/src/main/java/com/ledgrab/android/PythonBridge.kt`** — add the audio +push path (same shape as `pushFrame`, with a cached PyObject handle): + +```kotlin +@Volatile private var androidAudioEngine: PyObject? = null + +fun configureAudio(sampleRate: Int, channels: Int, chunkFrames: Int) { + val engine = Python.getInstance().getModule("ledgrab.core.audio.android_audio_engine") + engine.callAttr("configure", sampleRate, channels, chunkFrames) + androidAudioEngine = engine +} +fun pushAudio(pcmFloat32: ByteArray, frames: Int, channels: Int) { + if (!running) return + androidAudioEngine?.let { + try { it.callAttr("push_samples", pcmFloat32) } + catch (e: Exception) { Log.w(TAG, "pushAudio failed: ${e.message}") } + } +} +``` + +**Modify `android/app/src/main/java/com/ledgrab/android/CaptureService.kt`** — in the +MediaProjection start path (where `ScreenCapture` is created with the projection), if +`RECORD_AUDIO` is granted and API ≥ 29, also `bridge.configureAudio(...)` and start an +`AudioCapture(projection, bridge)`. Stop/release it in `onDestroy` alongside `ScreenCapture`. +Root path → optional mic fallback (or skip; see Risks). + +**Modify `android/app/src/main/AndroidManifest.xml`:** +```xml + + + +``` +The existing `CaptureService` already declares `foregroundServiceType="mediaProjection|specialUse"` +and holds `FOREGROUND_SERVICE_MEDIA_PROJECTION`; add `microphone` to the type only if mic +fallback is implemented. + +**Modify `MainActivity.kt`** — request `RECORD_AUDIO` at runtime alongside the existing +`ensureNotificationPermission()` (POST_NOTIFICATIONS) flow, before starting capture. Capture +proceeds without audio if denied (graceful degradation). + +--- + +## Orchestration decision (the main trade-off) + +Desktop starts audio capture **on demand** when an audio-reactive source is acquired +(`AudioCaptureManager.acquire`). On Android, PCM only flows if Kotlin has set up `AudioRecord`. + +- **MVP (recommended):** start `AudioCapture` when `CaptureService` starts (if `RECORD_AUDIO` + granted + MediaProjection mode + API ≥ 29) and push continuously; the bounded queue drops + frames when no audio source consumes them. Simplest; modest extra CPU. +- **Future optimization:** on-demand start/stop signaled Python→Kotlin (Chaquopy can call + Kotlin, as `BleBridge`/`UsbSerialBridge` show) so `AudioRecord` runs only while an + audio-reactive source is active. Defer unless CPU/battery on low-end boxes warrants it. + +--- + +## What does NOT change + +- **Frontend / API** — audio engine + device selection, the music analyzer UI, and audio value + sources are engine-agnostic; the Android engine shows up via the existing device enumeration. +- **`build.gradle.kts` / Chaquopy pip block** — no new Python packages. +- **Audio analysis pipeline** — `AudioAnalyzer`, band filters, `ManagedAudioStream` untouched. + +--- + +## Files + +**Create** +- `server/src/ledgrab/core/audio/android_audio_engine.py` +- `android/app/src/main/java/com/ledgrab/android/AudioCapture.kt` +- `server/tests/core/audio/test_android_audio_engine.py` + +**Modify** +- `server/src/ledgrab/core/audio/__init__.py` — guarded import + registry registration. +- `android/app/src/main/java/com/ledgrab/android/PythonBridge.kt` — `configureAudio` + `pushAudio`. +- `android/app/src/main/java/com/ledgrab/android/CaptureService.kt` — start/stop `AudioCapture`. +- `android/app/src/main/java/com/ledgrab/android/MainActivity.kt` — request `RECORD_AUDIO`. +- `android/app/src/main/AndroidManifest.xml` — `RECORD_AUDIO` (+ mic FGS if mic fallback). + +--- + +## Tests (Python — run on desktop CI, no Android device needed) + +New `server/tests/core/audio/test_android_audio_engine.py`: + +- `configure()` then `push_samples()` → `read_chunk()` returns the same float32 samples; + queue drops oldest when full (push > maxsize). +- `AndroidAudioEngine.is_available()` is `False` until `configure()` and only on Android + (monkeypatch `ledgrab.utils.platform.is_android`); `True` after. +- `enumerate_devices()` returns exactly one loopback device when active, `[]` otherwise. +- Integration: with `is_android()` patched true + `configure()`, `get_best_available_engine()` + returns `"android_playback"` (priority beats demo), and a stream created via + `AudioEngineRegistry.create_stream("android_playback", 0, True, {})` yields pushed chunks. +- Registry isolation: use `AudioEngineRegistry.clear_registry()` / re-register in fixtures so + desktop engines aren't disturbed. + +## Verification + +1. **Python:** `py -3.13 -m pytest tests/core/audio/test_android_audio_engine.py --no-cov -q` + (from `server/`), then the full suite. +2. **Lint:** `ruff check src/ tests/ --fix` (from `server/`). +3. **Android build:** `./gradlew :app:assembleDebug` (from `android/`). +4. **On device/emulator (manual):** install APK → grant `RECORD_AUDIO` + screen-capture consent + → start capture → play non-DRM media (e.g. a local video / YouTube web) → create an + audio-reactive value source bound to a strip → confirm the LEDs react to the audio, and the + Android playback device appears in audio device enumeration. + +## Risks / notes + +- **DRM opt-out:** Netflix/Disney+/etc. set audio as non-capturable; `AudioPlaybackCapture` + yields silence for them. Works for non-DRM media and the device's own audio. Document in UI. +- **API 29 minimum** for playback capture (minSdk is 24). API 24–28 and root mode (no + MediaProjection) → mic fallback only, or audio unsupported. Gate cleanly + log. +- **`RECORD_AUDIO`** is a runtime "dangerous" permission — must be requested; capture must + degrade gracefully when denied. +- **Format:** request `ENCODING_PCM_FLOAT` so Kotlin pushes float32 matching + `read_chunk()`'s contract (1-D interleaved float32, length = frames × channels). If a device + rejects float, capture 16-bit PCM and convert (`/32768.0`) before pushing. +- **Latency/CPU:** small `chunkFrames` (e.g. 1024 @ 48 kHz ≈ 21 ms) keeps reactivity tight; + continuous capture (MVP) adds modest CPU on low-end boxes — see the orchestration trade-off. +- **R8/ProGuard:** minify is disabled and the Python module is resolved by string from Kotlin; + no new keep-rules needed. diff --git a/ANDROID-REVIEW/android-missing-functionality.md b/ANDROID-REVIEW/android-missing-functionality.md new file mode 100644 index 0000000..d382164 --- /dev/null +++ b/ANDROID-REVIEW/android-missing-functionality.md @@ -0,0 +1,153 @@ +# Android (TV) — Missing Functionality Assessment + +> Status: review/feasibility document. No code changes. Last updated 2026-06-01. + +## Context + +LedGrab ships an **experimental on-device Android-TV build**: a Kotlin shell that +embeds the Python FastAPI server via **Chaquopy**, with Kotlin↔Python **bridges** +(`PythonBridge`, `BleBridge`, `UsbSerialBridge`). Several desktop features are +unavailable on this build because their Python backends rely on native libraries +that have no Android/Chaquopy wheels (`mss`, `dxcam`, `sounddevice`/PortAudio, +`opencv`, `nvidia-ml-py`, `winrt`, `dbus-next`), or on OS facilities Android +sandboxes differently. + +The README "Feature support by OS" table now carries an Android column reflecting +this. This document assesses **whether each missing feature can be added**, how, and +whether it's worth it. + +### The enabling pattern (why most of this is feasible) + +Every desktop capability that's "missing" on Android is missing only because of a +*native dependency*, not because the capability is impossible. Android exposes the +same capability through a platform API, and the codebase already has the bridge +shape to plug it in: + +> **Bridge pattern:** a Kotlin component captures an event/buffer → pushes it across +> the Chaquopy JNI boundary into a **module-level receiver** in a small Python engine +> → an existing engine/stream consumes it unchanged. + +Reference implementation: `server/src/ledgrab/core/capture_engines/mediaprojection_engine.py` +(`configure()` + `push_frame()` + a bounded `queue.Queue`) ↔ +`android/app/src/main/java/com/ledgrab/android/ScreenCapture.kt` ↔ +`PythonBridge.pushFrame()`. Screen capture already works on Android this exact way. + +So for most missing features the work is: **add a Kotlin capture source + a thin +Python receiver engine mirroring that pattern.** + +--- + +## Current Android capability matrix + +| Feature | Desktop | Android (TV) today | Missing? | +| ------- | ------- | ------------------ | -------- | +| Screen capture | DXCam/WGC/MSS | ✅ MediaProjection + root `screenrecord` | No | +| LED transports (network/USB-serial/BLE) | ✅ | ✅ (USB via Android driver, BLE via Android bridge) | No | +| System metrics | psutil | ✅ CPU/RAM/battery/thermal via `/proc`, `/sys` (`AndroidMetricsProvider`) | No | +| **Audio capture** | WASAPI / Sounddevice | ❌ no PortAudio | **Yes** | +| **Notification capture** | WinRT / D-Bus | ❌ listener only Win/Linux | **Yes** | +| Webcam capture | OpenCV | ❌ no OpenCV wheel | Yes (niche) | +| GPU monitoring | NVML | ❌ no NVIDIA GPU | Marginal | +| Capture from *another* Android phone | scrcpy/ADB | ❌ | Skip (redundant) | +| Automation: window/process conditions | Windows ctypes | ❌ sandboxed | Partial | +| Monitor names / multi-display | WMI / generic | Single built-in display | Low value | + +--- + +## Per-feature feasibility + +### 🔊 Audio capture — **FEASIBLE, HIGH VALUE** ⭐ (detailed plan exists) + +- **Blocker:** only `sounddevice`/PortAudio is missing — not the capability. +- **Android path:** `AudioPlaybackCapture` (API 29+) captures system playback audio and + **takes a `MediaProjection` token — which the app already obtains for screen capture.** + Kotlin `AudioRecord` → push PCM (float32) → a new push-based `AndroidAudioEngine` + mirroring `mediaprojection_engine.py`, registered in `core/audio/__init__.py`, feeding + the existing `AudioAnalyzer` unchanged. Mic (`AudioSource.MIC`) is the fallback. +- **Effort:** moderate. **Value:** high — music/sound-reactive lighting is a flagship use + on a TV box. **No new Python deps.** +- ⚠️ DRM-protected apps (Netflix etc.) opt out of playback capture; works for non-DRM + media and the device's own audio. Root mode (no MediaProjection) → mic-only. +- 📄 **See `android-audio-capture-plan.md`** for the full implementation plan. + +### 🔔 Notification capture — **FEASIBLE, HIGH VALUE** ⭐ (planned) + +- **Android is the *best* platform for this:** `NotificationListenerService` is the native, + event-push mechanism (no polling). +- **Path:** a `NotificationListenerService` resolves the posting app's display label and + pushes it via a module-level `push_notification()` into the existing + `os_notification_listener.py` pipeline (a new push-based `_AndroidBackend` alongside + `_WindowsBackend`/`_LinuxBackend`). Existing `NotificationColorStripSource` filters, + per-app colors/sounds, and the history endpoint all work unchanged. **No new Python deps.** +- **Permission:** user enables "Notification access" in Settings (`ACTION_NOTIFICATION_LISTENER_SETTINGS`); + no runtime-permission popup. +- **Effort:** moderate. **Value:** high. +- 📄 **Plan approved & detailed** — see `C:\Users\Alexei\.claude\plans\deep-enchanting-muffin.md` + (app-name parity; prompt-once permission UX). + +### 📷 Webcam capture — **FEASIBLE, LOW VALUE** + +- **Blocker** is `opencv-python-headless` (no Chaquopy cp311 wheel) — but capture doesn't + *need* OpenCV. Use **CameraX / Camera2** + `ImageReader` in Kotlin and push frames through + the same bridge as MediaProjection into a new `CameraBridgeEngine`. +- **Effort:** moderate. **Value:** low — TVs rarely have cameras; USB-UVC webcams need extra + device handling. Recommend deferring unless a concrete use case appears. + +### 🎮 GPU monitoring — **MARGINAL, SKIP FOR NOW** + +- NVML is desktop-NVIDIA only. Android GPU load lives in **vendor-specific sysfs** + (Adreno `/sys/class/kgsl/kgsl-3d0/gpubusy`, Mali `/sys/class/devfreq/*.mali/...`), + inconsistent and often root-only. +- CPU/RAM/battery/thermal are **already** covered by `AndroidMetricsProvider`. A best-effort + GPU-load reader could be added to that provider, but reliability is poor and value is low. + +### 🪟 Automation: window/process conditions — **PARTIAL** + +- Android forbids full window/process enumeration (`getRunningTasks` restricted since API 21+). +- **Obtainable:** the *current foreground app package* via `UsageStatsManager` (needs the + `PACKAGE_USAGE_STATS` special access) or an `AccessibilityService`. +- So "when is in the foreground → scene X" is feasible (mirrors + `automations/platform_detector.py`, which currently returns empty off-Windows); full + window-title matching is **not**. **Effort:** moderate. **Value:** moderate (per-app scenes + on a TV box). + +### 📱 Capture from *another* Android phone (scrcpy/ADB) — **SKIP** + +- Impractical and redundant: no `adb` binary in Chaquopy, TV boxes can't reliably host an + adb server, and the device already captures its **own** screen via MediaProjection. + +### 🖥️ Monitor names / multi-display — **LOW VALUE** + +- `DisplayManager` can report a better display name and enumerate secondary (HDMI) displays, + but MediaProjection captures the default display; capturing a secondary display is more + involved and rarely useful on a single-screen box. + +--- + +## Prioritization + +| Priority | Feature | Effort | Value | New Python deps | Status | +| -------- | ------- | ------ | ----- | --------------- | ------ | +| 1 | Notification capture | Moderate | High | None | **Plan approved** | +| 2 | Audio capture | Moderate | High | None | **Plan written** (this folder) | +| 3 | Automation: foreground-app condition | Moderate | Moderate | None | Idea | +| 4 | Webcam capture (CameraX) | Moderate | Low | None | Idea | +| — | GPU load (vendor sysfs) | Low–Med | Low | None | Not recommended | +| — | Capture from another phone | — | — | — | Won't do | +| — | Multi-display / monitor names | Low | Low | None | Not recommended | + +**Recommended order:** ship notifications → ship audio → reassess. Both reuse existing +infrastructure (bridge pattern, the MediaProjection consent token, the audio/notification +pipelines) and add **zero** Python dependencies, so neither risks the Chaquopy +`--no-deps` build constraint documented in `CLAUDE.md`. + +## Cross-cutting notes + +- **No `build.gradle.kts` / Chaquopy pip impact** for notifications or audio — both use Android + platform APIs (Kotlin) + stdlib/`numpy` (already bundled) on the Python side. +- **Per-instance `PythonBridge`:** `PythonBridge` is created per `CaptureService` instance, so + system-bound services (e.g. a `NotificationListenerService`) call Python via the + process-global `Python.getInstance()` rather than borrowing that bridge. +- **Permissions are the recurring friction**, not the capture: audio needs `RECORD_AUDIO` + + (for playback capture) a MediaProjection token; notifications need the "Notification access" + settings toggle; foreground-app automation needs `PACKAGE_USAGE_STATS`.