Add audio visualizer with spectrogram, beat-reactive art, and device selection
- New audio_analyzer service: loopback capture via soundcard + numpy FFT - Real-time spectrogram bars below album art with accent color gradient - Album art and vinyl pulse to bass energy beats - WebSocket subscriber pattern for opt-in audio data streaming - Audio device selection in Settings tab with auto-detect fallback - Optimized FFT pipeline: vectorized cumsum bin grouping, pre-serialized JSON broadcast - Visualizer config: enabled/fps/bins/device in config.yaml - Optional deps: soundcard + numpy (graceful degradation if missing) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
315
media_server/services/audio_analyzer.py
Normal file
315
media_server/services/audio_analyzer.py
Normal file
@@ -0,0 +1,315 @@
|
||||
"""Audio spectrum analyzer service using system loopback capture."""
|
||||
|
||||
import logging
|
||||
import platform
|
||||
import threading
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_np = None
|
||||
_sc = None
|
||||
|
||||
|
||||
def _load_numpy():
|
||||
global _np
|
||||
if _np is None:
|
||||
try:
|
||||
import numpy as np
|
||||
_np = np
|
||||
except ImportError:
|
||||
logger.info("numpy not installed - audio visualizer unavailable")
|
||||
return _np
|
||||
|
||||
|
||||
def _load_soundcard():
|
||||
global _sc
|
||||
if _sc is None:
|
||||
try:
|
||||
import soundcard as sc
|
||||
_sc = sc
|
||||
except ImportError:
|
||||
logger.info("soundcard not installed - audio visualizer unavailable")
|
||||
return _sc
|
||||
|
||||
|
||||
class AudioAnalyzer:
|
||||
"""Captures system audio loopback and performs real-time FFT analysis."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_bins: int = 32,
|
||||
sample_rate: int = 44100,
|
||||
chunk_size: int = 2048,
|
||||
target_fps: int = 25,
|
||||
device_name: str | None = None,
|
||||
):
|
||||
self.num_bins = num_bins
|
||||
self.sample_rate = sample_rate
|
||||
self.chunk_size = chunk_size
|
||||
self.target_fps = target_fps
|
||||
self.device_name = device_name
|
||||
|
||||
self._running = False
|
||||
self._thread: threading.Thread | None = None
|
||||
self._lock = threading.Lock()
|
||||
self._data: dict | None = None
|
||||
self._current_device_name: str | None = None
|
||||
|
||||
# Pre-compute logarithmic bin edges
|
||||
self._bin_edges = self._compute_bin_edges()
|
||||
|
||||
def _compute_bin_edges(self) -> list[int]:
|
||||
"""Compute logarithmic frequency bin boundaries for perceptual grouping."""
|
||||
np = _load_numpy()
|
||||
if np is None:
|
||||
return []
|
||||
|
||||
fft_size = self.chunk_size // 2 + 1
|
||||
min_freq = 20.0
|
||||
max_freq = min(16000.0, self.sample_rate / 2)
|
||||
|
||||
edges = []
|
||||
for i in range(self.num_bins + 1):
|
||||
freq = min_freq * (max_freq / min_freq) ** (i / self.num_bins)
|
||||
bin_idx = int(freq * self.chunk_size / self.sample_rate)
|
||||
edges.append(min(bin_idx, fft_size - 1))
|
||||
return edges
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
"""Whether audio capture dependencies are available."""
|
||||
return _load_numpy() is not None and _load_soundcard() is not None
|
||||
|
||||
@property
|
||||
def running(self) -> bool:
|
||||
"""Whether capture is currently active."""
|
||||
return self._running
|
||||
|
||||
def start(self) -> bool:
|
||||
"""Start audio capture in a background thread. Returns False if unavailable."""
|
||||
if self._running:
|
||||
return True
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
self._running = True
|
||||
self._thread = threading.Thread(target=self._capture_loop, daemon=True)
|
||||
self._thread.start()
|
||||
return True
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop audio capture and cleanup."""
|
||||
self._running = False
|
||||
if self._thread:
|
||||
self._thread.join(timeout=3.0)
|
||||
self._thread = None
|
||||
with self._lock:
|
||||
self._data = None
|
||||
|
||||
def get_frequency_data(self) -> dict | None:
|
||||
"""Return latest frequency data (thread-safe). None if not running."""
|
||||
with self._lock:
|
||||
return self._data
|
||||
|
||||
@staticmethod
|
||||
def list_loopback_devices() -> list[dict[str, str]]:
|
||||
"""List all available loopback audio devices."""
|
||||
sc = _load_soundcard()
|
||||
if sc is None:
|
||||
return []
|
||||
|
||||
devices = []
|
||||
try:
|
||||
# COM may be needed on Windows for WASAPI
|
||||
if platform.system() == "Windows":
|
||||
try:
|
||||
import comtypes
|
||||
comtypes.CoInitializeEx(comtypes.COINIT_MULTITHREADED)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
loopback_mics = sc.all_microphones(include_loopback=True)
|
||||
for mic in loopback_mics:
|
||||
if mic.isloopback:
|
||||
devices.append({"id": mic.id, "name": mic.name})
|
||||
except Exception as e:
|
||||
logger.warning("Failed to list loopback devices: %s", e)
|
||||
|
||||
return devices
|
||||
|
||||
def _find_loopback_device(self):
|
||||
"""Find a loopback device for system audio capture."""
|
||||
sc = _load_soundcard()
|
||||
if sc is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
loopback_mics = sc.all_microphones(include_loopback=True)
|
||||
|
||||
# If a specific device is requested, find it by name (partial match)
|
||||
if self.device_name:
|
||||
target = self.device_name.lower()
|
||||
for mic in loopback_mics:
|
||||
if mic.isloopback and target in mic.name.lower():
|
||||
logger.info("Found requested loopback device: %s", mic.name)
|
||||
self._current_device_name = mic.name
|
||||
return mic
|
||||
logger.warning("Requested device '%s' not found, falling back to default", self.device_name)
|
||||
|
||||
# Default: first loopback device
|
||||
for mic in loopback_mics:
|
||||
if mic.isloopback:
|
||||
logger.info("Found loopback device: %s", mic.name)
|
||||
self._current_device_name = mic.name
|
||||
return mic
|
||||
|
||||
# Fallback: try to get default speaker's loopback
|
||||
default_speaker = sc.default_speaker()
|
||||
if default_speaker:
|
||||
for mic in loopback_mics:
|
||||
if default_speaker.name in mic.name:
|
||||
logger.info("Found speaker loopback: %s", mic.name)
|
||||
self._current_device_name = mic.name
|
||||
return mic
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to find loopback device: %s", e)
|
||||
|
||||
return None
|
||||
|
||||
def set_device(self, device_name: str | None) -> bool:
|
||||
"""Change the loopback device. Restarts capture if running. Returns True on success."""
|
||||
was_running = self._running
|
||||
if was_running:
|
||||
self.stop()
|
||||
|
||||
self.device_name = device_name
|
||||
self._current_device_name = None
|
||||
|
||||
if was_running:
|
||||
return self.start()
|
||||
return True
|
||||
|
||||
@property
|
||||
def current_device(self) -> str | None:
|
||||
"""Return the name of the currently active loopback device."""
|
||||
return self._current_device_name
|
||||
|
||||
def _capture_loop(self) -> None:
|
||||
"""Background thread: capture audio and compute FFT continuously."""
|
||||
# Initialize COM on Windows (required for WASAPI/SoundCard)
|
||||
if platform.system() == "Windows":
|
||||
try:
|
||||
import comtypes
|
||||
comtypes.CoInitializeEx(comtypes.COINIT_MULTITHREADED)
|
||||
except Exception:
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.windll.ole32.CoInitializeEx(0, 0)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to initialize COM: %s", e)
|
||||
|
||||
np = _load_numpy()
|
||||
sc = _load_soundcard()
|
||||
if np is None or sc is None:
|
||||
self._running = False
|
||||
return
|
||||
|
||||
device = self._find_loopback_device()
|
||||
if device is None:
|
||||
logger.warning("No loopback audio device found - visualizer disabled")
|
||||
self._running = False
|
||||
return
|
||||
|
||||
interval = 1.0 / self.target_fps
|
||||
window = np.hanning(self.chunk_size)
|
||||
|
||||
# Pre-compute bin edge pairs for vectorized grouping
|
||||
edges = self._bin_edges
|
||||
bin_starts = np.array([edges[i] for i in range(self.num_bins)], dtype=np.intp)
|
||||
bin_ends = np.array([max(edges[i + 1], edges[i] + 1) for i in range(self.num_bins)], dtype=np.intp)
|
||||
|
||||
try:
|
||||
with device.recorder(
|
||||
samplerate=self.sample_rate,
|
||||
channels=1,
|
||||
blocksize=self.chunk_size,
|
||||
) as recorder:
|
||||
logger.info("Audio capture started on: %s", device.name)
|
||||
while self._running:
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
data = recorder.record(numframes=self.chunk_size)
|
||||
except Exception as e:
|
||||
logger.debug("Audio capture read error: %s", e)
|
||||
time.sleep(interval)
|
||||
continue
|
||||
|
||||
# Mono mix if needed
|
||||
if data.ndim > 1:
|
||||
mono = data.mean(axis=1)
|
||||
else:
|
||||
mono = data.ravel()
|
||||
|
||||
if len(mono) < self.chunk_size:
|
||||
time.sleep(interval)
|
||||
continue
|
||||
|
||||
# Apply window and compute FFT
|
||||
windowed = mono[:self.chunk_size] * window
|
||||
fft_mag = np.abs(np.fft.rfft(windowed))
|
||||
|
||||
# Group into logarithmic bins (vectorized via cumsum)
|
||||
cumsum = np.concatenate(([0.0], np.cumsum(fft_mag)))
|
||||
counts = bin_ends - bin_starts
|
||||
bins = (cumsum[bin_ends] - cumsum[bin_starts]) / counts
|
||||
|
||||
# Normalize to 0-1
|
||||
max_val = bins.max()
|
||||
if max_val > 0:
|
||||
bins *= (1.0 / max_val)
|
||||
|
||||
# Bass energy: average of first 4 bins (~20-200Hz)
|
||||
bass = float(bins[:4].mean()) if self.num_bins >= 4 else 0.0
|
||||
|
||||
# Round for compact JSON
|
||||
frequencies = np.round(bins, 3).tolist()
|
||||
bass = round(bass, 3)
|
||||
|
||||
with self._lock:
|
||||
self._data = {"frequencies": frequencies, "bass": bass}
|
||||
|
||||
# Throttle to target FPS
|
||||
elapsed = time.monotonic() - t0
|
||||
if elapsed < interval:
|
||||
time.sleep(interval - elapsed)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Audio capture loop error: %s", e)
|
||||
finally:
|
||||
self._running = False
|
||||
logger.info("Audio capture stopped")
|
||||
|
||||
|
||||
# Global singleton
|
||||
_analyzer: AudioAnalyzer | None = None
|
||||
|
||||
|
||||
def get_audio_analyzer(
|
||||
num_bins: int = 32,
|
||||
sample_rate: int = 44100,
|
||||
target_fps: int = 25,
|
||||
device_name: str | None = None,
|
||||
) -> AudioAnalyzer:
|
||||
"""Get or create the global AudioAnalyzer instance."""
|
||||
global _analyzer
|
||||
if _analyzer is None:
|
||||
_analyzer = AudioAnalyzer(
|
||||
num_bins=num_bins,
|
||||
sample_rate=sample_rate,
|
||||
target_fps=target_fps,
|
||||
device_name=device_name,
|
||||
)
|
||||
return _analyzer
|
||||
@@ -1,6 +1,7 @@
|
||||
"""WebSocket connection manager and status broadcaster."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Callable, Coroutine
|
||||
@@ -23,6 +24,10 @@ class ConnectionManager:
|
||||
self._position_broadcast_interval: float = 5.0 # Send position updates every 5s during playback
|
||||
self._last_broadcast_time: float = 0.0
|
||||
self._running: bool = False
|
||||
# Audio visualizer
|
||||
self._visualizer_subscribers: set[WebSocket] = set()
|
||||
self._audio_task: asyncio.Task | None = None
|
||||
self._audio_analyzer = None
|
||||
|
||||
async def connect(self, websocket: WebSocket) -> None:
|
||||
"""Accept a new WebSocket connection."""
|
||||
@@ -44,6 +49,7 @@ class ConnectionManager:
|
||||
"""Remove a WebSocket connection."""
|
||||
async with self._lock:
|
||||
self._active_connections.discard(websocket)
|
||||
self._visualizer_subscribers.discard(websocket)
|
||||
logger.info(
|
||||
"WebSocket client disconnected. Total: %d", len(self._active_connections)
|
||||
)
|
||||
@@ -83,6 +89,85 @@ class ConnectionManager:
|
||||
await self.broadcast(message)
|
||||
logger.info("Broadcast sent: links_changed")
|
||||
|
||||
async def subscribe_visualizer(self, websocket: WebSocket) -> None:
|
||||
"""Subscribe a client to audio visualizer data."""
|
||||
async with self._lock:
|
||||
self._visualizer_subscribers.add(websocket)
|
||||
logger.debug("Visualizer subscriber added. Total: %d", len(self._visualizer_subscribers))
|
||||
|
||||
async def unsubscribe_visualizer(self, websocket: WebSocket) -> None:
|
||||
"""Unsubscribe a client from audio visualizer data."""
|
||||
async with self._lock:
|
||||
self._visualizer_subscribers.discard(websocket)
|
||||
logger.debug("Visualizer subscriber removed. Total: %d", len(self._visualizer_subscribers))
|
||||
|
||||
async def start_audio_monitor(self, analyzer) -> None:
|
||||
"""Start audio frequency broadcasting if analyzer is available."""
|
||||
self._audio_analyzer = analyzer
|
||||
if analyzer and analyzer.running:
|
||||
self._audio_task = asyncio.create_task(self._audio_broadcast_loop())
|
||||
logger.info("Audio visualizer broadcast started")
|
||||
|
||||
async def stop_audio_monitor(self) -> None:
|
||||
"""Stop audio frequency broadcasting."""
|
||||
if self._audio_task:
|
||||
self._audio_task.cancel()
|
||||
try:
|
||||
await self._audio_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._audio_task = None
|
||||
|
||||
async def _audio_broadcast_loop(self) -> None:
|
||||
"""Background loop: read frequency data from analyzer and broadcast to subscribers."""
|
||||
from ..config import settings
|
||||
interval = 1.0 / settings.visualizer_fps
|
||||
|
||||
_last_data = None
|
||||
|
||||
while True:
|
||||
try:
|
||||
async with self._lock:
|
||||
subscribers = list(self._visualizer_subscribers)
|
||||
|
||||
if not subscribers or not self._audio_analyzer or not self._audio_analyzer.running:
|
||||
await asyncio.sleep(interval)
|
||||
continue
|
||||
|
||||
data = self._audio_analyzer.get_frequency_data()
|
||||
if data is None or data is _last_data:
|
||||
await asyncio.sleep(interval)
|
||||
continue
|
||||
_last_data = data
|
||||
|
||||
# Pre-serialize once for all subscribers (avoids per-client JSON encoding)
|
||||
text = json.dumps({"type": "audio_data", "data": data}, separators=(',', ':'))
|
||||
|
||||
async def _send(ws: WebSocket) -> WebSocket | None:
|
||||
try:
|
||||
await ws.send_text(text)
|
||||
return None
|
||||
except Exception:
|
||||
return ws
|
||||
|
||||
results = await asyncio.gather(*(_send(ws) for ws in subscribers))
|
||||
|
||||
failed = [ws for ws in results if ws is not None]
|
||||
if failed:
|
||||
async with self._lock:
|
||||
for ws in failed:
|
||||
self._visualizer_subscribers.discard(ws)
|
||||
for ws in failed:
|
||||
await self.disconnect(ws)
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error("Error in audio broadcast: %s", e)
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
def status_changed(
|
||||
self, old: dict[str, Any] | None, new: dict[str, Any]
|
||||
) -> bool:
|
||||
|
||||
Reference in New Issue
Block a user