Add audio visualizer with spectrogram, beat-reactive art, and device selection

- New audio_analyzer service: loopback capture via soundcard + numpy FFT - Real-time spectrogram bars below album art with accent color gradient - Album art and vinyl pulse to bass energy beats - WebSocket subscriber pattern for opt-in audio data streaming - Audio device selection in Settings tab with auto-detect fallback - Optimized FFT pipeline: vectorized cumsum bin grouping, pre-serialized JSON broadcast - Visualizer config: enabled/fps/bins/device in config.yaml - Optional deps: soundcard + numpy (graceful degradation if missing) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 21:42:19 +03:00
parent 8a8f00ff31
commit 0691e3d338
11 changed files with 919 additions and 2 deletions
--- a/media_server/services/audio_analyzer.py
+++ b/media_server/services/audio_analyzer.py
@@ -0,0 +1,315 @@
+"""Audio spectrum analyzer service using system loopback capture."""
+
+import logging
+import platform
+import threading
+import time
+
+logger = logging.getLogger(__name__)
+
+_np = None
+_sc = None
+
+
+def _load_numpy():
+    global _np
+    if _np is None:
+        try:
+            import numpy as np
+            _np = np
+        except ImportError:
+            logger.info("numpy not installed - audio visualizer unavailable")
+    return _np
+
+
+def _load_soundcard():
+    global _sc
+    if _sc is None:
+        try:
+            import soundcard as sc
+            _sc = sc
+        except ImportError:
+            logger.info("soundcard not installed - audio visualizer unavailable")
+    return _sc
+
+
+class AudioAnalyzer:
+    """Captures system audio loopback and performs real-time FFT analysis."""
+
+    def __init__(
+        self,
+        num_bins: int = 32,
+        sample_rate: int = 44100,
+        chunk_size: int = 2048,
+        target_fps: int = 25,
+        device_name: str | None = None,
+    ):
+        self.num_bins = num_bins
+        self.sample_rate = sample_rate
+        self.chunk_size = chunk_size
+        self.target_fps = target_fps
+        self.device_name = device_name
+
+        self._running = False
+        self._thread: threading.Thread | None = None
+        self._lock = threading.Lock()
+        self._data: dict | None = None
+        self._current_device_name: str | None = None
+
+        # Pre-compute logarithmic bin edges
+        self._bin_edges = self._compute_bin_edges()
+
+    def _compute_bin_edges(self) -> list[int]:
+        """Compute logarithmic frequency bin boundaries for perceptual grouping."""
+        np = _load_numpy()
+        if np is None:
+            return []
+
+        fft_size = self.chunk_size // 2 + 1
+        min_freq = 20.0
+        max_freq = min(16000.0, self.sample_rate / 2)
+
+        edges = []
+        for i in range(self.num_bins + 1):
+            freq = min_freq * (max_freq / min_freq) ** (i / self.num_bins)
+            bin_idx = int(freq * self.chunk_size / self.sample_rate)
+            edges.append(min(bin_idx, fft_size - 1))
+        return edges
+
+    @property
+    def available(self) -> bool:
+        """Whether audio capture dependencies are available."""
+        return _load_numpy() is not None and _load_soundcard() is not None
+
+    @property
+    def running(self) -> bool:
+        """Whether capture is currently active."""
+        return self._running
+
+    def start(self) -> bool:
+        """Start audio capture in a background thread. Returns False if unavailable."""
+        if self._running:
+            return True
+        if not self.available:
+            return False
+
+        self._running = True
+        self._thread = threading.Thread(target=self._capture_loop, daemon=True)
+        self._thread.start()
+        return True
+
+    def stop(self) -> None:
+        """Stop audio capture and cleanup."""
+        self._running = False
+        if self._thread:
+            self._thread.join(timeout=3.0)
+            self._thread = None
+        with self._lock:
+            self._data = None
+
+    def get_frequency_data(self) -> dict | None:
+        """Return latest frequency data (thread-safe). None if not running."""
+        with self._lock:
+            return self._data
+
+    @staticmethod
+    def list_loopback_devices() -> list[dict[str, str]]:
+        """List all available loopback audio devices."""
+        sc = _load_soundcard()
+        if sc is None:
+            return []
+
+        devices = []
+        try:
+            # COM may be needed on Windows for WASAPI
+            if platform.system() == "Windows":
+                try:
+                    import comtypes
+                    comtypes.CoInitializeEx(comtypes.COINIT_MULTITHREADED)
+                except Exception:
+                    pass
+
+            loopback_mics = sc.all_microphones(include_loopback=True)
+            for mic in loopback_mics:
+                if mic.isloopback:
+                    devices.append({"id": mic.id, "name": mic.name})
+        except Exception as e:
+            logger.warning("Failed to list loopback devices: %s", e)
+
+        return devices
+
+    def _find_loopback_device(self):
+        """Find a loopback device for system audio capture."""
+        sc = _load_soundcard()
+        if sc is None:
+            return None
+
+        try:
+            loopback_mics = sc.all_microphones(include_loopback=True)
+
+            # If a specific device is requested, find it by name (partial match)
+            if self.device_name:
+                target = self.device_name.lower()
+                for mic in loopback_mics:
+                    if mic.isloopback and target in mic.name.lower():
+                        logger.info("Found requested loopback device: %s", mic.name)
+                        self._current_device_name = mic.name
+                        return mic
+                logger.warning("Requested device '%s' not found, falling back to default", self.device_name)
+
+            # Default: first loopback device
+            for mic in loopback_mics:
+                if mic.isloopback:
+                    logger.info("Found loopback device: %s", mic.name)
+                    self._current_device_name = mic.name
+                    return mic
+
+            # Fallback: try to get default speaker's loopback
+            default_speaker = sc.default_speaker()
+            if default_speaker:
+                for mic in loopback_mics:
+                    if default_speaker.name in mic.name:
+                        logger.info("Found speaker loopback: %s", mic.name)
+                        self._current_device_name = mic.name
+                        return mic
+
+        except Exception as e:
+            logger.warning("Failed to find loopback device: %s", e)
+
+        return None
+
+    def set_device(self, device_name: str | None) -> bool:
+        """Change the loopback device. Restarts capture if running. Returns True on success."""
+        was_running = self._running
+        if was_running:
+            self.stop()
+
+        self.device_name = device_name
+        self._current_device_name = None
+
+        if was_running:
+            return self.start()
+        return True
+
+    @property
+    def current_device(self) -> str | None:
+        """Return the name of the currently active loopback device."""
+        return self._current_device_name
+
+    def _capture_loop(self) -> None:
+        """Background thread: capture audio and compute FFT continuously."""
+        # Initialize COM on Windows (required for WASAPI/SoundCard)
+        if platform.system() == "Windows":
+            try:
+                import comtypes
+                comtypes.CoInitializeEx(comtypes.COINIT_MULTITHREADED)
+            except Exception:
+                try:
+                    import ctypes
+                    ctypes.windll.ole32.CoInitializeEx(0, 0)
+                except Exception as e:
+                    logger.warning("Failed to initialize COM: %s", e)
+
+        np = _load_numpy()
+        sc = _load_soundcard()
+        if np is None or sc is None:
+            self._running = False
+            return
+
+        device = self._find_loopback_device()
+        if device is None:
+            logger.warning("No loopback audio device found - visualizer disabled")
+            self._running = False
+            return
+
+        interval = 1.0 / self.target_fps
+        window = np.hanning(self.chunk_size)
+
+        # Pre-compute bin edge pairs for vectorized grouping
+        edges = self._bin_edges
+        bin_starts = np.array([edges[i] for i in range(self.num_bins)], dtype=np.intp)
+        bin_ends = np.array([max(edges[i + 1], edges[i] + 1) for i in range(self.num_bins)], dtype=np.intp)
+
+        try:
+            with device.recorder(
+                samplerate=self.sample_rate,
+                channels=1,
+                blocksize=self.chunk_size,
+            ) as recorder:
+                logger.info("Audio capture started on: %s", device.name)
+                while self._running:
+                    t0 = time.monotonic()
+
+                    try:
+                        data = recorder.record(numframes=self.chunk_size)
+                    except Exception as e:
+                        logger.debug("Audio capture read error: %s", e)
+                        time.sleep(interval)
+                        continue
+
+                    # Mono mix if needed
+                    if data.ndim > 1:
+                        mono = data.mean(axis=1)
+                    else:
+                        mono = data.ravel()
+
+                    if len(mono) < self.chunk_size:
+                        time.sleep(interval)
+                        continue
+
+                    # Apply window and compute FFT
+                    windowed = mono[:self.chunk_size] * window
+                    fft_mag = np.abs(np.fft.rfft(windowed))
+
+                    # Group into logarithmic bins (vectorized via cumsum)
+                    cumsum = np.concatenate(([0.0], np.cumsum(fft_mag)))
+                    counts = bin_ends - bin_starts
+                    bins = (cumsum[bin_ends] - cumsum[bin_starts]) / counts
+
+                    # Normalize to 0-1
+                    max_val = bins.max()
+                    if max_val > 0:
+                        bins *= (1.0 / max_val)
+
+                    # Bass energy: average of first 4 bins (~20-200Hz)
+                    bass = float(bins[:4].mean()) if self.num_bins >= 4 else 0.0
+
+                    # Round for compact JSON
+                    frequencies = np.round(bins, 3).tolist()
+                    bass = round(bass, 3)
+
+                    with self._lock:
+                        self._data = {"frequencies": frequencies, "bass": bass}
+
+                    # Throttle to target FPS
+                    elapsed = time.monotonic() - t0
+                    if elapsed < interval:
+                        time.sleep(interval - elapsed)
+
+        except Exception as e:
+            logger.error("Audio capture loop error: %s", e)
+        finally:
+            self._running = False
+            logger.info("Audio capture stopped")
+
+
+# Global singleton
+_analyzer: AudioAnalyzer | None = None
+
+
+def get_audio_analyzer(
+    num_bins: int = 32,
+    sample_rate: int = 44100,
+    target_fps: int = 25,
+    device_name: str | None = None,
+) -> AudioAnalyzer:
+    """Get or create the global AudioAnalyzer instance."""
+    global _analyzer
+    if _analyzer is None:
+        _analyzer = AudioAnalyzer(
+            num_bins=num_bins,
+            sample_rate=sample_rate,
+            target_fps=target_fps,
+            device_name=device_name,
+        )
+    return _analyzer
--- a/media_server/services/websocket_manager.py
+++ b/media_server/services/websocket_manager.py
@@ -1,6 +1,7 @@
 """WebSocket connection manager and status broadcaster."""

 import asyncio
+import json
 import logging
 import time
 from typing import Any, Callable, Coroutine
@@ -23,6 +24,10 @@ class ConnectionManager:
        self._position_broadcast_interval: float = 5.0  # Send position updates every 5s during playback
        self._last_broadcast_time: float = 0.0
        self._running: bool = False
+        # Audio visualizer
+        self._visualizer_subscribers: set[WebSocket] = set()
+        self._audio_task: asyncio.Task | None = None
+        self._audio_analyzer = None

    async def connect(self, websocket: WebSocket) -> None:
        """Accept a new WebSocket connection."""
@@ -44,6 +49,7 @@ class ConnectionManager:
        """Remove a WebSocket connection."""
        async with self._lock:
            self._active_connections.discard(websocket)
+            self._visualizer_subscribers.discard(websocket)
        logger.info(
            "WebSocket client disconnected. Total: %d", len(self._active_connections)
        )
@@ -83,6 +89,85 @@ class ConnectionManager:
        await self.broadcast(message)
        logger.info("Broadcast sent: links_changed")

+    async def subscribe_visualizer(self, websocket: WebSocket) -> None:
+        """Subscribe a client to audio visualizer data."""
+        async with self._lock:
+            self._visualizer_subscribers.add(websocket)
+        logger.debug("Visualizer subscriber added. Total: %d", len(self._visualizer_subscribers))
+
+    async def unsubscribe_visualizer(self, websocket: WebSocket) -> None:
+        """Unsubscribe a client from audio visualizer data."""
+        async with self._lock:
+            self._visualizer_subscribers.discard(websocket)
+        logger.debug("Visualizer subscriber removed. Total: %d", len(self._visualizer_subscribers))
+
+    async def start_audio_monitor(self, analyzer) -> None:
+        """Start audio frequency broadcasting if analyzer is available."""
+        self._audio_analyzer = analyzer
+        if analyzer and analyzer.running:
+            self._audio_task = asyncio.create_task(self._audio_broadcast_loop())
+            logger.info("Audio visualizer broadcast started")
+
+    async def stop_audio_monitor(self) -> None:
+        """Stop audio frequency broadcasting."""
+        if self._audio_task:
+            self._audio_task.cancel()
+            try:
+                await self._audio_task
+            except asyncio.CancelledError:
+                pass
+            self._audio_task = None
+
+    async def _audio_broadcast_loop(self) -> None:
+        """Background loop: read frequency data from analyzer and broadcast to subscribers."""
+        from ..config import settings
+        interval = 1.0 / settings.visualizer_fps
+
+        _last_data = None
+
+        while True:
+            try:
+                async with self._lock:
+                    subscribers = list(self._visualizer_subscribers)
+
+                if not subscribers or not self._audio_analyzer or not self._audio_analyzer.running:
+                    await asyncio.sleep(interval)
+                    continue
+
+                data = self._audio_analyzer.get_frequency_data()
+                if data is None or data is _last_data:
+                    await asyncio.sleep(interval)
+                    continue
+                _last_data = data
+
+                # Pre-serialize once for all subscribers (avoids per-client JSON encoding)
+                text = json.dumps({"type": "audio_data", "data": data}, separators=(',', ':'))
+
+                async def _send(ws: WebSocket) -> WebSocket | None:
+                    try:
+                        await ws.send_text(text)
+                        return None
+                    except Exception:
+                        return ws
+
+                results = await asyncio.gather(*(_send(ws) for ws in subscribers))
+
+                failed = [ws for ws in results if ws is not None]
+                if failed:
+                    async with self._lock:
+                        for ws in failed:
+                            self._visualizer_subscribers.discard(ws)
+                    for ws in failed:
+                        await self.disconnect(ws)
+
+                await asyncio.sleep(interval)
+
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error("Error in audio broadcast: %s", e)
+                await asyncio.sleep(interval)
+
    def status_changed(
        self, old: dict[str, Any] | None, new: dict[str, Any]
    ) -> bool: