feat(audio): Android on-device system playback capture
Enable audio-reactive lighting on the Android-TV build. A push-based AndroidAudioEngine captures system playback audio via AudioPlaybackCapture (API 29+), reusing the existing MediaProjection token, and feeds PCM into the unchanged AudioAnalyzer pipeline. No new Python deps; no Chaquopy/pip changes (numpy already bundled). - Python: android_audio_engine.py — module-level queue + configure/ push_samples/shutdown mirroring mediaprojection_engine; AndroidAudioEngine (priority 100) registered behind a guarded import. push_samples copies and defensively trims/clamps each block so the analyzer can't crash on variable-length or non-frame-divisible PCM. - Kotlin: AudioCapture.kt — AudioRecord + AudioPlaybackCaptureConfiguration, fixed chunk-size block framing, little-endian float32, mic fallback; reads back the actual negotiated channel/sample rate. PythonBridge gains configureAudio/pushAudio/shutdownAudio with a cached module handle. - Wiring: CaptureService starts/stops AudioCapture in the MediaProjection path (gated on API>=29 + RECORD_AUDIO + live projection); MainActivity requests RECORD_AUDIO; manifest declares it. Degrades gracefully when denied; root path stays audio-less by design. - Tests: 13 desktop-CI tests incl. an over-length/non-divisible regression guard that exercises the full read_chunk -> AudioAnalyzer.analyze path.
This commit is contained in:
@@ -39,6 +39,14 @@
|
||||
<!-- POST_NOTIFICATIONS for Android 13+ foreground service notification -->
|
||||
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
|
||||
|
||||
<!-- RECORD_AUDIO for on-device system-playback capture (AudioPlaybackCapture,
|
||||
API 29+) feeding audio-reactive lighting. Runtime "dangerous" permission,
|
||||
requested in MainActivity; capture degrades gracefully when denied.
|
||||
Playback capture runs under the existing mediaProjection FGS type, so no
|
||||
FOREGROUND_SERVICE_MICROPHONE / microphone FGS type is needed (that would
|
||||
only be required if the mic-fallback path ran inside the service). -->
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
|
||||
<!-- Autostart on boot — BootReceiver spawns CaptureService in root
|
||||
mode so capture resumes without the user touching the remote. -->
|
||||
<uses-permission android:name="android.permission.RECEIVE_BOOT_COMPLETED" />
|
||||
|
||||
@@ -0,0 +1,234 @@
|
||||
package com.ledgrab.android
|
||||
|
||||
import android.annotation.SuppressLint
|
||||
import android.media.AudioAttributes
|
||||
import android.media.AudioFormat
|
||||
import android.media.AudioPlaybackCaptureConfiguration
|
||||
import android.media.AudioRecord
|
||||
import android.media.MediaRecorder
|
||||
import android.media.projection.MediaProjection
|
||||
import android.os.Build
|
||||
import android.util.Log
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.ByteOrder
|
||||
|
||||
/**
|
||||
* Captures audio with [AudioRecord] and pushes interleaved float32 PCM to
|
||||
* the LedGrab Python server via [PythonBridge], where the
|
||||
* `android_audio_engine` feeds it into the unchanged audio-analysis
|
||||
* pipeline.
|
||||
*
|
||||
* Two sources:
|
||||
* - [start] — system playback capture via `AudioPlaybackCapture` (API 29+),
|
||||
* reusing the same [MediaProjection] token the app already holds for
|
||||
* screen capture. This is the primary path on the consent flow.
|
||||
* - [startMic] — microphone fallback (`AudioSource.MIC`) for paths with no
|
||||
* MediaProjection (root mode) or API < 29.
|
||||
*
|
||||
* Mirrors [ScreenCapture]'s shape: a dedicated capture thread, a single
|
||||
* reusable cross-JNI buffer (no per-block allocation → no GC churn on
|
||||
* low-end TV boxes), and graceful teardown in [stop].
|
||||
*
|
||||
* The capture format is negotiated by [AudioRecord]; the **actual**
|
||||
* channel count and sample rate are read back and forwarded to
|
||||
* `configureAudio` so the Python analyzer's interleaving matches the bytes
|
||||
* we push (e.g. a stereo request that the device satisfies as mono).
|
||||
*/
|
||||
class AudioCapture(
|
||||
private val projection: MediaProjection?,
|
||||
private val bridge: PythonBridge,
|
||||
private val sampleRate: Int = 48000,
|
||||
private val channels: Int = 2,
|
||||
private val chunkFrames: Int = 1024,
|
||||
) {
|
||||
companion object {
|
||||
private const val TAG = "AudioCapture"
|
||||
private const val BYTES_PER_FLOAT = 4
|
||||
}
|
||||
|
||||
private var audioRecord: AudioRecord? = null
|
||||
private var captureThread: Thread? = null
|
||||
@Volatile private var running = false
|
||||
|
||||
/**
|
||||
* Start system playback capture (API 29+). Requires the app to hold
|
||||
* RECORD_AUDIO and a valid [projection]. Returns true if capture began.
|
||||
*/
|
||||
@SuppressLint("MissingPermission")
|
||||
fun start(): Boolean {
|
||||
if (running) return true
|
||||
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.Q) {
|
||||
Log.i(TAG, "Playback capture needs API 29+; skipping (have ${Build.VERSION.SDK_INT})")
|
||||
return false
|
||||
}
|
||||
val proj = projection
|
||||
if (proj == null) {
|
||||
Log.i(TAG, "No MediaProjection; playback capture unavailable")
|
||||
return false
|
||||
}
|
||||
|
||||
val config = AudioPlaybackCaptureConfiguration.Builder(proj)
|
||||
.addMatchingUsage(AudioAttributes.USAGE_MEDIA)
|
||||
.addMatchingUsage(AudioAttributes.USAGE_GAME)
|
||||
.addMatchingUsage(AudioAttributes.USAGE_UNKNOWN)
|
||||
.build()
|
||||
|
||||
val record = try {
|
||||
AudioRecord.Builder()
|
||||
.setAudioFormat(audioFormat())
|
||||
.setBufferSizeInBytes(bufferBytes())
|
||||
.setAudioPlaybackCaptureConfig(config)
|
||||
.build()
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to build playback AudioRecord: ${e.message}")
|
||||
return false
|
||||
}
|
||||
return begin(record, "playback")
|
||||
}
|
||||
|
||||
/**
|
||||
* Start microphone capture (fallback). Works on API 24+ and needs no
|
||||
* MediaProjection. Requires RECORD_AUDIO. Returns true if capture began.
|
||||
*
|
||||
* ⚠️ SECURITY/POLICY: currently UNWIRED (no caller). Microphone capture is
|
||||
* a materially different posture than playback capture — it records real
|
||||
* room audio (bystander voices). Before wiring this into [CaptureService]:
|
||||
* - add FOREGROUND_SERVICE_MICROPHONE permission + the `microphone` FGS
|
||||
* type (on API 34+ the service is killed without it), and
|
||||
* - add the Play Store privacy disclosure for microphone use,
|
||||
* - re-trigger a security review.
|
||||
* Do NOT call this from inside the foreground service without the above.
|
||||
*/
|
||||
@SuppressLint("MissingPermission")
|
||||
fun startMic(): Boolean {
|
||||
if (running) return true
|
||||
val record = try {
|
||||
AudioRecord.Builder()
|
||||
.setAudioSource(MediaRecorder.AudioSource.MIC)
|
||||
.setAudioFormat(audioFormat())
|
||||
.setBufferSizeInBytes(bufferBytes())
|
||||
.build()
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to build mic AudioRecord: ${e.message}")
|
||||
return false
|
||||
}
|
||||
return begin(record, "mic")
|
||||
}
|
||||
|
||||
/** Stop capturing and release all resources. Idempotent. */
|
||||
fun stop() {
|
||||
running = false
|
||||
// AudioRecord.stop() unblocks a pending READ_BLOCKING read within
|
||||
// milliseconds, so the loop sees running=false and returns well inside
|
||||
// the 500ms join window — release() below won't race a live read.
|
||||
// (Mirrors ScreenCapture's bounded join.)
|
||||
runCatching { audioRecord?.stop() }
|
||||
captureThread?.let { runCatching { it.join(500) } }
|
||||
captureThread = null
|
||||
runCatching { audioRecord?.release() }
|
||||
audioRecord = null
|
||||
runCatching { bridge.shutdownAudio() }
|
||||
Log.i(TAG, "Audio capture stopped")
|
||||
}
|
||||
|
||||
// ── internals ──────────────────────────────────────────────────────
|
||||
|
||||
private fun begin(record: AudioRecord, mode: String): Boolean {
|
||||
if (record.state != AudioRecord.STATE_INITIALIZED) {
|
||||
Log.e(TAG, "AudioRecord ($mode) failed to initialize")
|
||||
runCatching { record.release() }
|
||||
return false
|
||||
}
|
||||
val actualChannels = record.channelCount.coerceAtLeast(1)
|
||||
val actualRate = record.sampleRate
|
||||
|
||||
// Confirm recording actually started before reporting success —
|
||||
// startRecording() can throw (exclusive-capture contention) or
|
||||
// leave the record in a non-recording state, in which case read()
|
||||
// would only ever return errors.
|
||||
val started = runCatching { record.startRecording() }.isSuccess &&
|
||||
record.recordingState == AudioRecord.RECORDSTATE_RECORDING
|
||||
if (!started) {
|
||||
Log.e(TAG, "AudioRecord ($mode) failed to start recording")
|
||||
runCatching { record.release() }
|
||||
return false
|
||||
}
|
||||
|
||||
// Recording confirmed — tell Python the real negotiated format
|
||||
// before frames flow, so the analyzer's channel/sample-rate match
|
||||
// the interleaving we push.
|
||||
bridge.configureAudio(actualRate, actualChannels, chunkFrames)
|
||||
|
||||
audioRecord = record
|
||||
running = true
|
||||
captureThread = Thread(
|
||||
{ captureLoop(record, actualChannels) },
|
||||
"LedGrab-AudioCapture",
|
||||
).also { it.start() }
|
||||
Log.i(TAG, "Audio capture started ($mode, sr=$actualRate ch=$actualChannels chunk=$chunkFrames)")
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Blocking read loop. Accumulates into fixed `chunkFrames * channels`
|
||||
* float blocks and pushes only COMPLETE blocks — [AudioRecord.read]
|
||||
* returns a variable count, so partial reads are stitched here rather
|
||||
* than handed to Python as ragged chunks (the analyzer requires
|
||||
* whole-frame, ≤ chunk-size blocks).
|
||||
*/
|
||||
private fun captureLoop(record: AudioRecord, actualChannels: Int) {
|
||||
val blockFloats = chunkFrames * actualChannels
|
||||
val floatBuf = FloatArray(blockFloats)
|
||||
// Reusable little-endian byte buffer — Python copies on push, so the
|
||||
// same backing array is safe to overwrite next block. Default
|
||||
// ByteBuffer order is BIG_ENDIAN, which would corrupt every sample;
|
||||
// LITTLE_ENDIAN matches numpy's native float32 on all Android ABIs.
|
||||
val byteBuf = ByteArray(blockFloats * BYTES_PER_FLOAT)
|
||||
val floatView = ByteBuffer.wrap(byteBuf).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer()
|
||||
|
||||
var filled = 0
|
||||
while (running) {
|
||||
val n = record.read(floatBuf, filled, blockFloats - filled, AudioRecord.READ_BLOCKING)
|
||||
if (n < 0) {
|
||||
if (running) {
|
||||
// A negative read (e.g. ERROR_DEAD_OBJECT after an audio-route
|
||||
// change, ERROR_INVALID_OPERATION) means this AudioRecord is
|
||||
// finished. Deactivate the Python engine so is_available() stops
|
||||
// advertising a dead stream and the audio-reactive consumer isn't
|
||||
// left polling an empty queue forever. We're on the capture thread,
|
||||
// so we can't call stop() (it would self-join) — just flip running
|
||||
// and shut the engine down; onDestroy's stop() releases the record.
|
||||
Log.w(TAG, "AudioRecord.read error: $n — stopping audio capture")
|
||||
running = false
|
||||
runCatching { bridge.shutdownAudio() }
|
||||
}
|
||||
break
|
||||
}
|
||||
filled += n
|
||||
if (filled < blockFloats) continue
|
||||
|
||||
floatView.clear()
|
||||
floatView.put(floatBuf, 0, blockFloats)
|
||||
bridge.pushAudio(byteBuf)
|
||||
filled = 0
|
||||
}
|
||||
}
|
||||
|
||||
private fun channelMask(): Int =
|
||||
if (channels >= 2) AudioFormat.CHANNEL_IN_STEREO else AudioFormat.CHANNEL_IN_MONO
|
||||
|
||||
private fun audioFormat(): AudioFormat =
|
||||
AudioFormat.Builder()
|
||||
.setEncoding(AudioFormat.ENCODING_PCM_FLOAT)
|
||||
.setSampleRate(sampleRate)
|
||||
.setChannelMask(channelMask())
|
||||
.build()
|
||||
|
||||
private fun bufferBytes(): Int {
|
||||
val minBuf = AudioRecord.getMinBufferSize(sampleRate, channelMask(), AudioFormat.ENCODING_PCM_FLOAT)
|
||||
// A few blocks of headroom so a slow consumer doesn't overrun the
|
||||
// hardware buffer between reads.
|
||||
val want = chunkFrames * channels * BYTES_PER_FLOAT * 4
|
||||
return if (minBuf > 0) maxOf(minBuf, want) else want
|
||||
}
|
||||
}
|
||||
@@ -4,9 +4,11 @@ import android.app.Notification
|
||||
import android.app.NotificationChannel
|
||||
import android.app.NotificationManager
|
||||
import android.app.PendingIntent
|
||||
import android.Manifest
|
||||
import android.app.Service
|
||||
import android.content.Context
|
||||
import android.content.Intent
|
||||
import android.content.pm.PackageManager
|
||||
import android.content.pm.ServiceInfo
|
||||
import android.media.projection.MediaProjection
|
||||
import android.media.projection.MediaProjectionManager
|
||||
@@ -85,6 +87,7 @@ class CaptureService : Service() {
|
||||
private var bridge: PythonBridge? = null
|
||||
private var screenCapture: ScreenCapture? = null
|
||||
private var rootCapture: RootScreenrecord? = null
|
||||
private var audioCapture: AudioCapture? = null
|
||||
private var mediaProjection: MediaProjection? = null
|
||||
|
||||
// Service-scoped coroutine scope for the root-capture watchdog.
|
||||
@@ -338,6 +341,25 @@ class CaptureService : Service() {
|
||||
onProjectionStopped = { stopSelf() },
|
||||
).also { it.start() }
|
||||
|
||||
// Reuse the same projection to capture system playback audio so
|
||||
// audio-reactive lighting works on-device (API 29+, RECORD_AUDIO
|
||||
// granted). Best-effort: screen capture and the server keep running
|
||||
// if audio is unavailable. Started AFTER ScreenCapture so the
|
||||
// projection's callback is already registered.
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q &&
|
||||
checkSelfPermission(Manifest.permission.RECORD_AUDIO) ==
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
) {
|
||||
audioCapture = AudioCapture(projection, newBridge).also { ac ->
|
||||
if (!ac.start()) {
|
||||
Log.i(TAG, "Playback audio capture unavailable — continuing without audio")
|
||||
audioCapture = null
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Log.i(TAG, "RECORD_AUDIO not granted or API < 29 — audio-reactive capture disabled")
|
||||
}
|
||||
|
||||
Log.i(TAG, "LedGrab service started (MediaProjection) — web UI at $url")
|
||||
}
|
||||
|
||||
@@ -351,6 +373,10 @@ class CaptureService : Service() {
|
||||
screenCapture?.stop()
|
||||
screenCapture = null
|
||||
|
||||
// Stop audio before the server: stop() calls bridge.shutdownAudio().
|
||||
audioCapture?.stop()
|
||||
audioCapture = null
|
||||
|
||||
rootCapture?.stop()
|
||||
rootCapture = null
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ class MainActivity : Activity() {
|
||||
private const val SERVER_PORT = 8080
|
||||
private const val REQUEST_MEDIA_PROJECTION = 1001
|
||||
private const val REQUEST_POST_NOTIFICATIONS = 1002
|
||||
private const val REQUEST_RECORD_AUDIO = 1003
|
||||
private const val QR_SIZE_PX = 560
|
||||
}
|
||||
|
||||
@@ -215,6 +216,7 @@ class MainActivity : Activity() {
|
||||
|
||||
private fun startCaptureService(resultCode: Int, resultData: Intent) {
|
||||
ensureNotificationPermission()
|
||||
ensureAudioPermission()
|
||||
val intent = CaptureService.createIntent(this, resultCode, resultData)
|
||||
ContextCompat.startForegroundService(this, intent)
|
||||
updateUI()
|
||||
@@ -471,4 +473,24 @@ class MainActivity : Activity() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Request RECORD_AUDIO (API 29+) so the capture service can capture
|
||||
* system playback audio for audio-reactive lighting. Fire-and-forget,
|
||||
* like [ensureNotificationPermission]: capture still works without it
|
||||
* (just no audio), so we don't block on the result. If first granted
|
||||
* here, audio becomes available on the next Start.
|
||||
*/
|
||||
private fun ensureAudioPermission() {
|
||||
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.Q) return
|
||||
if (checkSelfPermission(Manifest.permission.RECORD_AUDIO)
|
||||
!= PackageManager.PERMISSION_GRANTED
|
||||
) {
|
||||
@Suppress("DEPRECATION")
|
||||
requestPermissions(
|
||||
arrayOf(Manifest.permission.RECORD_AUDIO),
|
||||
REQUEST_RECORD_AUDIO,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ class PythonBridge(private val context: Context) {
|
||||
// single-writer/single-reader pattern we have here.
|
||||
@Volatile private var mediaProjectionEngine: PyObject? = null
|
||||
@Volatile private var rootEngine: PyObject? = null
|
||||
@Volatile private var androidAudioEngine: PyObject? = null
|
||||
|
||||
/**
|
||||
* Configure the MediaProjection engine with screen dimensions.
|
||||
@@ -53,6 +54,49 @@ class PythonBridge(private val context: Context) {
|
||||
Log.i(TAG, "Root screenrecord engine configured: ${width}x${height}")
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the Android playback-capture audio engine with the format
|
||||
* actually negotiated by [AudioCapture]'s `AudioRecord`. Must be called
|
||||
* before [pushAudio]. Caches the module handle for the per-block fast
|
||||
* path (same pattern as [configureCapture]).
|
||||
*/
|
||||
fun configureAudio(sampleRate: Int, channels: Int, chunkFrames: Int) {
|
||||
val py = Python.getInstance()
|
||||
val engine = py.getModule("ledgrab.core.audio.android_audio_engine")
|
||||
engine.callAttr("configure", sampleRate, channels, chunkFrames)
|
||||
androidAudioEngine = engine
|
||||
Log.i(TAG, "Android audio engine configured: sr=$sampleRate ch=$channels chunk=$chunkFrames")
|
||||
}
|
||||
|
||||
/**
|
||||
* Push one interleaved little-endian float32 PCM block to the Python
|
||||
* audio engine. Called from [AudioCapture]'s capture thread. The byte
|
||||
* array crosses the JNI boundary; Python copies it on receipt, so the
|
||||
* caller may reuse the same buffer for the next block.
|
||||
*/
|
||||
fun pushAudio(pcmFloat32: ByteArray) {
|
||||
if (!running) return
|
||||
val engine = androidAudioEngine ?: return
|
||||
try {
|
||||
engine.callAttr("push_samples", pcmFloat32)
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "Failed to push audio: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deactivate the Python audio engine. Called from [AudioCapture.stop].
|
||||
*/
|
||||
fun shutdownAudio() {
|
||||
val engine = androidAudioEngine ?: return
|
||||
try {
|
||||
engine.callAttr("shutdown")
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "Failed to shut down audio engine: ${e.message}")
|
||||
}
|
||||
androidAudioEngine = null
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the LedGrab FastAPI server on a background thread.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user