Add audio-reactive color strip sources, improve delete error messages
Add new "audio" color strip source type with three visualization modes (spectrum analyzer, beat pulse, VU meter) supporting WASAPI loopback and microphone input via PyAudioWPatch. Includes shared audio capture with ref counting, real-time FFT spectrum analysis, and beat detection. Improve all referential integrity 409 error messages across delete endpoints to include specific names of referencing entities instead of generic "one or more" messages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
0
server/src/wled_controller/core/audio/__init__.py
Normal file
0
server/src/wled_controller/core/audio/__init__.py
Normal file
442
server/src/wled_controller/core/audio/audio_capture.py
Normal file
442
server/src/wled_controller/core/audio/audio_capture.py
Normal file
@@ -0,0 +1,442 @@
|
||||
"""Audio capture service — shared audio analysis with ref counting.
|
||||
|
||||
Provides real-time FFT spectrum, RMS level, and beat detection from
|
||||
system audio (WASAPI loopback) or microphone/line-in. Multiple
|
||||
AudioColorStripStreams sharing the same device reuse a single capture
|
||||
thread via AudioCaptureManager.
|
||||
|
||||
Uses PyAudioWPatch for WASAPI loopback support on Windows.
|
||||
"""
|
||||
|
||||
import math
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from wled_controller.utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Number of logarithmic frequency bands for spectrum analysis
|
||||
NUM_BANDS = 64
|
||||
|
||||
# Audio defaults
|
||||
DEFAULT_SAMPLE_RATE = 44100
|
||||
DEFAULT_CHUNK_SIZE = 2048 # ~46 ms at 44100 Hz
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AudioAnalysis — thread-safe snapshot of latest analysis results
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class AudioAnalysis:
|
||||
"""Snapshot of audio analysis results.
|
||||
|
||||
Written by the capture thread, read by visualization streams.
|
||||
"""
|
||||
|
||||
timestamp: float = 0.0
|
||||
rms: float = 0.0
|
||||
peak: float = 0.0
|
||||
spectrum: np.ndarray = field(default_factory=lambda: np.zeros(NUM_BANDS, dtype=np.float32))
|
||||
beat: bool = False
|
||||
beat_intensity: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AudioCaptureStream — one per unique audio device
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _build_log_bands(num_bands: int, fft_size: int, sample_rate: int) -> List[Tuple[int, int]]:
|
||||
"""Build logarithmically-spaced frequency band boundaries for FFT bins.
|
||||
|
||||
Returns list of (start_bin, end_bin) pairs.
|
||||
"""
|
||||
nyquist = sample_rate / 2
|
||||
# Map bands to log-spaced frequencies from 20 Hz to Nyquist
|
||||
min_freq = 20.0
|
||||
max_freq = min(nyquist, 20000.0)
|
||||
log_min = math.log10(min_freq)
|
||||
log_max = math.log10(max_freq)
|
||||
|
||||
freqs = np.logspace(log_min, log_max, num_bands + 1)
|
||||
bin_width = sample_rate / fft_size
|
||||
|
||||
bands = []
|
||||
for i in range(num_bands):
|
||||
start_bin = max(1, int(freqs[i] / bin_width))
|
||||
end_bin = max(start_bin + 1, int(freqs[i + 1] / bin_width))
|
||||
# Clamp to FFT range
|
||||
end_bin = min(end_bin, fft_size // 2)
|
||||
bands.append((start_bin, end_bin))
|
||||
return bands
|
||||
|
||||
|
||||
class AudioCaptureStream:
|
||||
"""Captures audio from a single device and provides real-time analysis.
|
||||
|
||||
Runs a background thread that reads audio chunks, computes FFT, RMS,
|
||||
and beat detection. Consumers read the latest analysis via
|
||||
``get_latest_analysis()`` (thread-safe).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
device_index: int,
|
||||
is_loopback: bool,
|
||||
sample_rate: int = DEFAULT_SAMPLE_RATE,
|
||||
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
||||
):
|
||||
self._device_index = device_index
|
||||
self._is_loopback = is_loopback
|
||||
self._sample_rate = sample_rate
|
||||
self._chunk_size = chunk_size
|
||||
|
||||
self._running = False
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._lock = threading.Lock()
|
||||
self._latest: Optional[AudioAnalysis] = None
|
||||
|
||||
# Pre-allocated FFT helpers
|
||||
self._window = np.hanning(chunk_size).astype(np.float32)
|
||||
self._bands = _build_log_bands(NUM_BANDS, chunk_size, sample_rate)
|
||||
|
||||
# Beat detection state
|
||||
self._energy_history: np.ndarray = np.zeros(43, dtype=np.float64) # ~1s at 44100/2048
|
||||
self._energy_idx = 0
|
||||
|
||||
# Smoothed spectrum (exponential decay between frames)
|
||||
self._smooth_spectrum = np.zeros(NUM_BANDS, dtype=np.float32)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._running:
|
||||
return
|
||||
self._running = True
|
||||
self._thread = threading.Thread(
|
||||
target=self._capture_loop, daemon=True,
|
||||
name=f"AudioCapture-{self._device_index}-{'lb' if self._is_loopback else 'in'}",
|
||||
)
|
||||
self._thread.start()
|
||||
logger.info(
|
||||
f"AudioCaptureStream started: device={self._device_index} "
|
||||
f"loopback={self._is_loopback} sr={self._sample_rate} chunk={self._chunk_size}"
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
self._running = False
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=5.0)
|
||||
self._thread = None
|
||||
with self._lock:
|
||||
self._latest = None
|
||||
logger.info(f"AudioCaptureStream stopped: device={self._device_index}")
|
||||
|
||||
def get_latest_analysis(self) -> Optional[AudioAnalysis]:
|
||||
with self._lock:
|
||||
return self._latest
|
||||
|
||||
def _capture_loop(self) -> None:
|
||||
try:
|
||||
import pyaudiowpatch as pyaudio
|
||||
except ImportError:
|
||||
logger.error("PyAudioWPatch is not installed — audio capture unavailable")
|
||||
self._running = False
|
||||
return
|
||||
|
||||
pa = None
|
||||
stream = None
|
||||
try:
|
||||
pa = pyaudio.PyAudio()
|
||||
|
||||
if self._is_loopback:
|
||||
# Loopback capture: find the loopback device for the output device
|
||||
loopback_device = self._find_loopback_device(pa, self._device_index)
|
||||
if loopback_device is None:
|
||||
logger.error(
|
||||
f"No loopback device found for output device {self._device_index}"
|
||||
)
|
||||
self._running = False
|
||||
return
|
||||
|
||||
device_idx = loopback_device["index"]
|
||||
channels = loopback_device["maxInputChannels"]
|
||||
sample_rate = int(loopback_device["defaultSampleRate"])
|
||||
else:
|
||||
# Regular input device
|
||||
device_idx = self._device_index if self._device_index >= 0 else None
|
||||
if device_idx is not None:
|
||||
dev_info = pa.get_device_info_by_index(device_idx)
|
||||
channels = max(1, dev_info["maxInputChannels"])
|
||||
sample_rate = int(dev_info["defaultSampleRate"])
|
||||
else:
|
||||
channels = 1
|
||||
sample_rate = self._sample_rate
|
||||
|
||||
# Update FFT helpers if sample rate changed
|
||||
if sample_rate != self._sample_rate:
|
||||
self._sample_rate = sample_rate
|
||||
self._bands = _build_log_bands(NUM_BANDS, self._chunk_size, sample_rate)
|
||||
|
||||
stream = pa.open(
|
||||
format=pyaudio.paFloat32,
|
||||
channels=channels,
|
||||
rate=sample_rate,
|
||||
input=True,
|
||||
input_device_index=device_idx,
|
||||
frames_per_buffer=self._chunk_size,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Audio stream opened: device={device_idx} loopback={self._is_loopback} "
|
||||
f"channels={channels} sr={sample_rate}"
|
||||
)
|
||||
|
||||
spectrum_buf = np.zeros(NUM_BANDS, dtype=np.float32)
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
raw_data = stream.read(self._chunk_size, exception_on_overflow=False)
|
||||
data = np.frombuffer(raw_data, dtype=np.float32)
|
||||
except Exception as e:
|
||||
logger.warning(f"Audio read error: {e}")
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
|
||||
# Mix to mono if multi-channel
|
||||
if channels > 1:
|
||||
data = data.reshape(-1, channels)
|
||||
samples = data.mean(axis=1).astype(np.float32)
|
||||
else:
|
||||
samples = data
|
||||
|
||||
# RMS and peak
|
||||
rms = float(np.sqrt(np.mean(samples ** 2)))
|
||||
peak = float(np.max(np.abs(samples)))
|
||||
|
||||
# FFT
|
||||
chunk = samples[: self._chunk_size]
|
||||
if len(chunk) < self._chunk_size:
|
||||
chunk = np.pad(chunk, (0, self._chunk_size - len(chunk)))
|
||||
windowed = chunk * self._window
|
||||
fft_mag = np.abs(np.fft.rfft(windowed))
|
||||
# Normalize by chunk size
|
||||
fft_mag /= self._chunk_size
|
||||
|
||||
# Bin into logarithmic bands
|
||||
for b, (start, end) in enumerate(self._bands):
|
||||
if start < len(fft_mag) and end <= len(fft_mag):
|
||||
spectrum_buf[b] = float(np.mean(fft_mag[start:end]))
|
||||
else:
|
||||
spectrum_buf[b] = 0.0
|
||||
|
||||
# Normalize spectrum to 0-1 range (adaptive)
|
||||
spec_max = float(np.max(spectrum_buf))
|
||||
if spec_max > 1e-6:
|
||||
spectrum_buf /= spec_max
|
||||
|
||||
# Exponential smoothing
|
||||
alpha = 0.3 # smoothing factor (lower = smoother)
|
||||
self._smooth_spectrum[:] = (
|
||||
alpha * spectrum_buf + (1.0 - alpha) * self._smooth_spectrum
|
||||
)
|
||||
|
||||
# Beat detection — compare current energy to rolling average
|
||||
energy = float(np.sum(samples ** 2))
|
||||
self._energy_history[self._energy_idx % len(self._energy_history)] = energy
|
||||
self._energy_idx += 1
|
||||
avg_energy = float(np.mean(self._energy_history))
|
||||
|
||||
beat = False
|
||||
beat_intensity = 0.0
|
||||
if avg_energy > 1e-8:
|
||||
ratio = energy / avg_energy
|
||||
if ratio > 1.5:
|
||||
beat = True
|
||||
beat_intensity = min(1.0, (ratio - 1.0) / 2.0)
|
||||
|
||||
analysis = AudioAnalysis(
|
||||
timestamp=time.perf_counter(),
|
||||
rms=rms,
|
||||
peak=peak,
|
||||
spectrum=self._smooth_spectrum.copy(),
|
||||
beat=beat,
|
||||
beat_intensity=beat_intensity,
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._latest = analysis
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AudioCaptureStream fatal error: {e}", exc_info=True)
|
||||
finally:
|
||||
if stream is not None:
|
||||
try:
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
except Exception:
|
||||
pass
|
||||
if pa is not None:
|
||||
try:
|
||||
pa.terminate()
|
||||
except Exception:
|
||||
pass
|
||||
self._running = False
|
||||
logger.info(f"AudioCaptureStream loop ended: device={self._device_index}")
|
||||
|
||||
@staticmethod
|
||||
def _find_loopback_device(pa, output_device_index: int) -> Optional[dict]:
|
||||
"""Find the PyAudioWPatch loopback device for a given output device.
|
||||
|
||||
PyAudioWPatch exposes virtual loopback input devices for each WASAPI
|
||||
output device. We match by name via ``get_loopback_device_info_generator()``.
|
||||
"""
|
||||
try:
|
||||
first_loopback = None
|
||||
for loopback in pa.get_loopback_device_info_generator():
|
||||
if first_loopback is None:
|
||||
first_loopback = loopback
|
||||
|
||||
# Default (-1): return first loopback device (typically default speakers)
|
||||
if output_device_index < 0:
|
||||
return loopback
|
||||
|
||||
# Match by output device name contained in loopback device name
|
||||
target_info = pa.get_device_info_by_index(output_device_index)
|
||||
if target_info["name"] in loopback["name"]:
|
||||
return loopback
|
||||
|
||||
# No exact match — return first available loopback
|
||||
return first_loopback
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding loopback device: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AudioCaptureManager — ref-counted shared capture streams
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class AudioCaptureManager:
|
||||
"""Manages shared AudioCaptureStream instances with reference counting.
|
||||
|
||||
Multiple AudioColorStripStreams using the same audio device share a
|
||||
single capture thread.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._streams: Dict[Tuple[int, bool], Tuple[AudioCaptureStream, int]] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def acquire(self, device_index: int, is_loopback: bool) -> AudioCaptureStream:
|
||||
"""Get or create an AudioCaptureStream for the given device.
|
||||
|
||||
Returns:
|
||||
Shared AudioCaptureStream instance.
|
||||
"""
|
||||
key = (device_index, is_loopback)
|
||||
with self._lock:
|
||||
if key in self._streams:
|
||||
stream, ref_count = self._streams[key]
|
||||
self._streams[key] = (stream, ref_count + 1)
|
||||
logger.info(f"Reusing audio capture {key} (ref_count={ref_count + 1})")
|
||||
return stream
|
||||
|
||||
stream = AudioCaptureStream(device_index, is_loopback)
|
||||
stream.start()
|
||||
self._streams[key] = (stream, 1)
|
||||
logger.info(f"Created audio capture {key}")
|
||||
return stream
|
||||
|
||||
def release(self, device_index: int, is_loopback: bool) -> None:
|
||||
"""Release a reference to an AudioCaptureStream."""
|
||||
key = (device_index, is_loopback)
|
||||
with self._lock:
|
||||
if key not in self._streams:
|
||||
logger.warning(f"Attempted to release unknown audio capture: {key}")
|
||||
return
|
||||
|
||||
stream, ref_count = self._streams[key]
|
||||
ref_count -= 1
|
||||
if ref_count <= 0:
|
||||
stream.stop()
|
||||
del self._streams[key]
|
||||
logger.info(f"Removed audio capture {key}")
|
||||
else:
|
||||
self._streams[key] = (stream, ref_count)
|
||||
logger.debug(f"Released audio capture {key} (ref_count={ref_count})")
|
||||
|
||||
def release_all(self) -> None:
|
||||
"""Stop and remove all capture streams. Called on shutdown."""
|
||||
with self._lock:
|
||||
for key, (stream, _) in list(self._streams.items()):
|
||||
try:
|
||||
stream.stop()
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping audio capture {key}: {e}")
|
||||
self._streams.clear()
|
||||
logger.info("Released all audio capture streams")
|
||||
|
||||
@staticmethod
|
||||
def enumerate_devices() -> List[dict]:
|
||||
"""List available audio devices for the frontend dropdown.
|
||||
|
||||
Returns list of dicts with device info. Output devices with WASAPI
|
||||
hostapi are marked as loopback candidates.
|
||||
"""
|
||||
try:
|
||||
import pyaudiowpatch as pyaudio
|
||||
except ImportError:
|
||||
logger.warning("PyAudioWPatch not installed — no audio devices available")
|
||||
return []
|
||||
|
||||
pa = None
|
||||
try:
|
||||
pa = pyaudio.PyAudio()
|
||||
wasapi_info = pa.get_host_api_info_by_type(pyaudio.paWASAPI)
|
||||
wasapi_idx = wasapi_info["index"]
|
||||
|
||||
result = []
|
||||
device_count = pa.get_device_count()
|
||||
for i in range(device_count):
|
||||
dev = pa.get_device_info_by_index(i)
|
||||
if dev["hostApi"] != wasapi_idx:
|
||||
continue
|
||||
|
||||
is_input = dev["maxInputChannels"] > 0
|
||||
is_output = dev["maxOutputChannels"] > 0
|
||||
|
||||
if is_input:
|
||||
result.append({
|
||||
"index": i,
|
||||
"name": dev["name"],
|
||||
"is_input": True,
|
||||
"is_loopback": False,
|
||||
"channels": dev["maxInputChannels"],
|
||||
"default_samplerate": dev["defaultSampleRate"],
|
||||
})
|
||||
|
||||
if is_output:
|
||||
result.append({
|
||||
"index": i,
|
||||
"name": f"{dev['name']} [Loopback]",
|
||||
"is_input": False,
|
||||
"is_loopback": True,
|
||||
"channels": dev["maxOutputChannels"],
|
||||
"default_samplerate": dev["defaultSampleRate"],
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to enumerate audio devices: {e}", exc_info=True)
|
||||
return []
|
||||
finally:
|
||||
if pa is not None:
|
||||
try:
|
||||
pa.terminate()
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user