diff --git a/server/pyproject.toml b/server/pyproject.toml index 2ddba87..7d91085 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ "psutil>=5.9.0", "nvidia-ml-py>=12.0.0; sys_platform == 'win32'", "PyAudioWPatch>=0.2.12; sys_platform == 'win32'", + "sounddevice>=0.5", ] [project.optional-dependencies] diff --git a/server/src/wled_controller/api/__init__.py b/server/src/wled_controller/api/__init__.py index 25c81e5..e365064 100644 --- a/server/src/wled_controller/api/__init__.py +++ b/server/src/wled_controller/api/__init__.py @@ -12,6 +12,7 @@ from .routes.picture_targets import router as picture_targets_router from .routes.color_strip_sources import router as color_strip_sources_router from .routes.audio import router as audio_router from .routes.audio_sources import router as audio_sources_router +from .routes.audio_templates import router as audio_templates_router from .routes.value_sources import router as value_sources_router from .routes.profiles import router as profiles_router @@ -25,6 +26,7 @@ router.include_router(picture_sources_router) router.include_router(color_strip_sources_router) router.include_router(audio_router) router.include_router(audio_sources_router) +router.include_router(audio_templates_router) router.include_router(value_sources_router) router.include_router(picture_targets_router) router.include_router(profiles_router) diff --git a/server/src/wled_controller/api/dependencies.py b/server/src/wled_controller/api/dependencies.py index 76916c4..97e8a32 100644 --- a/server/src/wled_controller/api/dependencies.py +++ b/server/src/wled_controller/api/dependencies.py @@ -9,6 +9,7 @@ from wled_controller.storage.picture_source_store import PictureSourceStore from wled_controller.storage.picture_target_store import PictureTargetStore from wled_controller.storage.color_strip_store import ColorStripStore from wled_controller.storage.audio_source_store import AudioSourceStore +from wled_controller.storage.audio_template_store import AudioTemplateStore from wled_controller.storage.value_source_store import ValueSourceStore from wled_controller.storage.profile_store import ProfileStore from wled_controller.core.profiles.profile_engine import ProfileEngine @@ -22,6 +23,7 @@ _picture_source_store: PictureSourceStore | None = None _picture_target_store: PictureTargetStore | None = None _color_strip_store: ColorStripStore | None = None _audio_source_store: AudioSourceStore | None = None +_audio_template_store: AudioTemplateStore | None = None _value_source_store: ValueSourceStore | None = None _processor_manager: ProcessorManager | None = None _profile_store: ProfileStore | None = None @@ -84,6 +86,13 @@ def get_audio_source_store() -> AudioSourceStore: return _audio_source_store +def get_audio_template_store() -> AudioTemplateStore: + """Get audio template store dependency.""" + if _audio_template_store is None: + raise RuntimeError("Audio template store not initialized") + return _audio_template_store + + def get_value_source_store() -> ValueSourceStore: """Get value source store dependency.""" if _value_source_store is None: @@ -122,6 +131,7 @@ def init_dependencies( picture_target_store: PictureTargetStore | None = None, color_strip_store: ColorStripStore | None = None, audio_source_store: AudioSourceStore | None = None, + audio_template_store: AudioTemplateStore | None = None, value_source_store: ValueSourceStore | None = None, profile_store: ProfileStore | None = None, profile_engine: ProfileEngine | None = None, @@ -129,7 +139,8 @@ def init_dependencies( """Initialize global dependencies.""" global _device_store, _template_store, _processor_manager global _pp_template_store, _pattern_template_store, _picture_source_store, _picture_target_store - global _color_strip_store, _audio_source_store, _value_source_store, _profile_store, _profile_engine + global _color_strip_store, _audio_source_store, _audio_template_store + global _value_source_store, _profile_store, _profile_engine _device_store = device_store _template_store = template_store _processor_manager = processor_manager @@ -139,6 +150,7 @@ def init_dependencies( _picture_target_store = picture_target_store _color_strip_store = color_strip_store _audio_source_store = audio_source_store + _audio_template_store = audio_template_store _value_source_store = value_source_store _profile_store = profile_store _profile_engine = profile_engine diff --git a/server/src/wled_controller/api/routes/audio_sources.py b/server/src/wled_controller/api/routes/audio_sources.py index 3e22b77..e4a8262 100644 --- a/server/src/wled_controller/api/routes/audio_sources.py +++ b/server/src/wled_controller/api/routes/audio_sources.py @@ -33,6 +33,7 @@ def _to_response(source: AudioSource) -> AudioSourceResponse: source_type=source.source_type, device_index=getattr(source, "device_index", None), is_loopback=getattr(source, "is_loopback", None), + audio_template_id=getattr(source, "audio_template_id", None), audio_source_id=getattr(source, "audio_source_id", None), channel=getattr(source, "channel", None), description=source.description, @@ -73,6 +74,7 @@ async def create_audio_source( audio_source_id=data.audio_source_id, channel=data.channel, description=data.description, + audio_template_id=data.audio_template_id, ) return _to_response(source) except ValueError as e: @@ -110,6 +112,7 @@ async def update_audio_source( audio_source_id=data.audio_source_id, channel=data.channel, description=data.description, + audio_template_id=data.audio_template_id, ) return _to_response(source) except ValueError as e: diff --git a/server/src/wled_controller/api/routes/audio_templates.py b/server/src/wled_controller/api/routes/audio_templates.py new file mode 100644 index 0000000..ff927c9 --- /dev/null +++ b/server/src/wled_controller/api/routes/audio_templates.py @@ -0,0 +1,159 @@ +"""Audio capture template and engine routes.""" + +from fastapi import APIRouter, HTTPException, Depends + +from wled_controller.api.auth import AuthRequired +from wled_controller.api.dependencies import get_audio_template_store, get_audio_source_store +from wled_controller.api.schemas.audio_templates import ( + AudioEngineInfo, + AudioEngineListResponse, + AudioTemplateCreate, + AudioTemplateListResponse, + AudioTemplateResponse, + AudioTemplateUpdate, +) +from wled_controller.core.audio.factory import AudioEngineRegistry +from wled_controller.storage.audio_template_store import AudioTemplateStore +from wled_controller.storage.audio_source_store import AudioSourceStore +from wled_controller.utils import get_logger + +logger = get_logger(__name__) + +router = APIRouter() + + +# ===== AUDIO TEMPLATE ENDPOINTS ===== + +@router.get("/api/v1/audio-templates", response_model=AudioTemplateListResponse, tags=["Audio Templates"]) +async def list_audio_templates( + _auth: AuthRequired, + store: AudioTemplateStore = Depends(get_audio_template_store), +): + """List all audio capture templates.""" + try: + templates = store.get_all_templates() + responses = [ + AudioTemplateResponse( + id=t.id, name=t.name, engine_type=t.engine_type, + engine_config=t.engine_config, created_at=t.created_at, + updated_at=t.updated_at, description=t.description, + ) + for t in templates + ] + return AudioTemplateListResponse(templates=responses, count=len(responses)) + except Exception as e: + logger.error(f"Failed to list audio templates: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/api/v1/audio-templates", response_model=AudioTemplateResponse, tags=["Audio Templates"], status_code=201) +async def create_audio_template( + data: AudioTemplateCreate, + _auth: AuthRequired, + store: AudioTemplateStore = Depends(get_audio_template_store), +): + """Create a new audio capture template.""" + try: + template = store.create_template( + name=data.name, engine_type=data.engine_type, + engine_config=data.engine_config, description=data.description, + ) + return AudioTemplateResponse( + id=template.id, name=template.name, engine_type=template.engine_type, + engine_config=template.engine_config, created_at=template.created_at, + updated_at=template.updated_at, description=template.description, + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error(f"Failed to create audio template: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/api/v1/audio-templates/{template_id}", response_model=AudioTemplateResponse, tags=["Audio Templates"]) +async def get_audio_template( + template_id: str, + _auth: AuthRequired, + store: AudioTemplateStore = Depends(get_audio_template_store), +): + """Get audio template by ID.""" + try: + t = store.get_template(template_id) + except ValueError: + raise HTTPException(status_code=404, detail=f"Audio template {template_id} not found") + return AudioTemplateResponse( + id=t.id, name=t.name, engine_type=t.engine_type, + engine_config=t.engine_config, created_at=t.created_at, + updated_at=t.updated_at, description=t.description, + ) + + +@router.put("/api/v1/audio-templates/{template_id}", response_model=AudioTemplateResponse, tags=["Audio Templates"]) +async def update_audio_template( + template_id: str, + data: AudioTemplateUpdate, + _auth: AuthRequired, + store: AudioTemplateStore = Depends(get_audio_template_store), +): + """Update an audio template.""" + try: + t = store.update_template( + template_id=template_id, name=data.name, + engine_type=data.engine_type, engine_config=data.engine_config, + description=data.description, + ) + return AudioTemplateResponse( + id=t.id, name=t.name, engine_type=t.engine_type, + engine_config=t.engine_config, created_at=t.created_at, + updated_at=t.updated_at, description=t.description, + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error(f"Failed to update audio template: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.delete("/api/v1/audio-templates/{template_id}", status_code=204, tags=["Audio Templates"]) +async def delete_audio_template( + template_id: str, + _auth: AuthRequired, + store: AudioTemplateStore = Depends(get_audio_template_store), + audio_source_store: AudioSourceStore = Depends(get_audio_source_store), +): + """Delete an audio template.""" + try: + store.delete_template(template_id, audio_source_store=audio_source_store) + except HTTPException: + raise + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error(f"Failed to delete audio template: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# ===== AUDIO ENGINE ENDPOINTS ===== + +@router.get("/api/v1/audio-engines", response_model=AudioEngineListResponse, tags=["Audio Templates"]) +async def list_audio_engines(_auth: AuthRequired): + """List all registered audio capture engines.""" + try: + available_set = set(AudioEngineRegistry.get_available_engines()) + all_engines = AudioEngineRegistry.get_all_engines() + + engines = [] + for engine_type, engine_class in all_engines.items(): + engines.append( + AudioEngineInfo( + type=engine_type, + name=engine_type.upper(), + default_config=engine_class.get_default_config(), + available=(engine_type in available_set), + ) + ) + + return AudioEngineListResponse(engines=engines, count=len(engines)) + except Exception as e: + logger.error(f"Failed to list audio engines: {e}") + raise HTTPException(status_code=500, detail=str(e)) diff --git a/server/src/wled_controller/api/schemas/audio_sources.py b/server/src/wled_controller/api/schemas/audio_sources.py index b31a119..ffaa857 100644 --- a/server/src/wled_controller/api/schemas/audio_sources.py +++ b/server/src/wled_controller/api/schemas/audio_sources.py @@ -14,6 +14,7 @@ class AudioSourceCreate(BaseModel): # multichannel fields device_index: Optional[int] = Field(None, description="Audio device index (-1 = default)") is_loopback: Optional[bool] = Field(None, description="True for system audio (WASAPI loopback)") + audio_template_id: Optional[str] = Field(None, description="Audio capture template ID") # mono fields audio_source_id: Optional[str] = Field(None, description="Parent multichannel audio source ID") channel: Optional[str] = Field(None, description="Channel: mono|left|right") @@ -26,6 +27,7 @@ class AudioSourceUpdate(BaseModel): name: Optional[str] = Field(None, description="Source name", min_length=1, max_length=100) device_index: Optional[int] = Field(None, description="Audio device index (-1 = default)") is_loopback: Optional[bool] = Field(None, description="True for system audio (WASAPI loopback)") + audio_template_id: Optional[str] = Field(None, description="Audio capture template ID") audio_source_id: Optional[str] = Field(None, description="Parent multichannel audio source ID") channel: Optional[str] = Field(None, description="Channel: mono|left|right") description: Optional[str] = Field(None, description="Optional description", max_length=500) @@ -39,6 +41,7 @@ class AudioSourceResponse(BaseModel): source_type: str = Field(description="Source type: multichannel or mono") device_index: Optional[int] = Field(None, description="Audio device index") is_loopback: Optional[bool] = Field(None, description="WASAPI loopback mode") + audio_template_id: Optional[str] = Field(None, description="Audio capture template ID") audio_source_id: Optional[str] = Field(None, description="Parent multichannel source ID") channel: Optional[str] = Field(None, description="Channel: mono|left|right") description: Optional[str] = Field(None, description="Description") diff --git a/server/src/wled_controller/api/schemas/audio_templates.py b/server/src/wled_controller/api/schemas/audio_templates.py new file mode 100644 index 0000000..225e711 --- /dev/null +++ b/server/src/wled_controller/api/schemas/audio_templates.py @@ -0,0 +1,59 @@ +"""Audio capture template and engine schemas.""" + +from datetime import datetime +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + + +class AudioTemplateCreate(BaseModel): + """Request to create an audio capture template.""" + + name: str = Field(description="Template name", min_length=1, max_length=100) + engine_type: str = Field(description="Audio engine type (e.g., 'wasapi', 'sounddevice')", min_length=1) + engine_config: Dict = Field(default_factory=dict, description="Engine-specific configuration") + description: Optional[str] = Field(None, description="Template description", max_length=500) + + +class AudioTemplateUpdate(BaseModel): + """Request to update an audio template.""" + + name: Optional[str] = Field(None, description="Template name", min_length=1, max_length=100) + engine_type: Optional[str] = Field(None, description="Audio engine type") + engine_config: Optional[Dict] = Field(None, description="Engine-specific configuration") + description: Optional[str] = Field(None, description="Template description", max_length=500) + + +class AudioTemplateResponse(BaseModel): + """Audio template information response.""" + + id: str = Field(description="Template ID") + name: str = Field(description="Template name") + engine_type: str = Field(description="Engine type identifier") + engine_config: Dict = Field(description="Engine-specific configuration") + created_at: datetime = Field(description="Creation timestamp") + updated_at: datetime = Field(description="Last update timestamp") + description: Optional[str] = Field(None, description="Template description") + + +class AudioTemplateListResponse(BaseModel): + """List of audio templates response.""" + + templates: List[AudioTemplateResponse] = Field(description="List of audio templates") + count: int = Field(description="Number of templates") + + +class AudioEngineInfo(BaseModel): + """Audio capture engine information.""" + + type: str = Field(description="Engine type identifier (e.g., 'wasapi', 'sounddevice')") + name: str = Field(description="Human-readable engine name") + default_config: Dict = Field(description="Default configuration for this engine") + available: bool = Field(description="Whether engine is available on this system") + + +class AudioEngineListResponse(BaseModel): + """List of audio engines response.""" + + engines: List[AudioEngineInfo] = Field(description="Available audio engines") + count: int = Field(description="Number of engines") diff --git a/server/src/wled_controller/config.py b/server/src/wled_controller/config.py index 95595c4..ff3b7c2 100644 --- a/server/src/wled_controller/config.py +++ b/server/src/wled_controller/config.py @@ -35,6 +35,7 @@ class StorageConfig(BaseSettings): pattern_templates_file: str = "data/pattern_templates.json" color_strip_sources_file: str = "data/color_strip_sources.json" audio_sources_file: str = "data/audio_sources.json" + audio_templates_file: str = "data/audio_templates.json" value_sources_file: str = "data/value_sources.json" profiles_file: str = "data/profiles.json" diff --git a/server/src/wled_controller/core/audio/__init__.py b/server/src/wled_controller/core/audio/__init__.py index e69de29..b010021 100644 --- a/server/src/wled_controller/core/audio/__init__.py +++ b/server/src/wled_controller/core/audio/__init__.py @@ -0,0 +1,37 @@ +"""Audio capture engine abstraction layer.""" + +from wled_controller.core.audio.base import ( + AudioCaptureEngine, + AudioCaptureStreamBase, + AudioDeviceInfo, +) +from wled_controller.core.audio.factory import AudioEngineRegistry +from wled_controller.core.audio.analysis import ( + AudioAnalysis, + AudioAnalyzer, + NUM_BANDS, + DEFAULT_SAMPLE_RATE, + DEFAULT_CHUNK_SIZE, +) +from wled_controller.core.audio.wasapi_engine import WasapiEngine, WasapiCaptureStream +from wled_controller.core.audio.sounddevice_engine import SounddeviceEngine, SounddeviceCaptureStream + +# Auto-register available engines +AudioEngineRegistry.register(WasapiEngine) +AudioEngineRegistry.register(SounddeviceEngine) + +__all__ = [ + "AudioCaptureEngine", + "AudioCaptureStreamBase", + "AudioDeviceInfo", + "AudioEngineRegistry", + "AudioAnalysis", + "AudioAnalyzer", + "NUM_BANDS", + "DEFAULT_SAMPLE_RATE", + "DEFAULT_CHUNK_SIZE", + "WasapiEngine", + "WasapiCaptureStream", + "SounddeviceEngine", + "SounddeviceCaptureStream", +] diff --git a/server/src/wled_controller/core/audio/analysis.py b/server/src/wled_controller/core/audio/analysis.py new file mode 100644 index 0000000..9b25abb --- /dev/null +++ b/server/src/wled_controller/core/audio/analysis.py @@ -0,0 +1,217 @@ +"""Shared audio analysis — FFT spectrum, RMS, beat detection. + +Engines provide raw audio chunks; AudioAnalyzer processes them into +AudioAnalysis snapshots consumed by visualization streams. +""" + +import math +import time +from dataclasses import dataclass, field +from typing import List, Tuple + +import numpy as np + +# Number of logarithmic frequency bands for spectrum analysis +NUM_BANDS = 64 + +# Audio defaults +DEFAULT_SAMPLE_RATE = 44100 +DEFAULT_CHUNK_SIZE = 2048 # ~46 ms at 44100 Hz + + +@dataclass +class AudioAnalysis: + """Snapshot of audio analysis results. + + Written by the capture thread, read by visualization streams. + Mono fields contain the mixed-down signal (all channels averaged). + Per-channel fields (left/right) are populated when the source is stereo+. + For mono sources, left/right are copies of the mono data. + """ + + timestamp: float = 0.0 + # Mono (mixed) — backward-compatible fields + rms: float = 0.0 + peak: float = 0.0 + spectrum: np.ndarray = field(default_factory=lambda: np.zeros(NUM_BANDS, dtype=np.float32)) + beat: bool = False + beat_intensity: float = 0.0 + # Per-channel + left_rms: float = 0.0 + left_spectrum: np.ndarray = field(default_factory=lambda: np.zeros(NUM_BANDS, dtype=np.float32)) + right_rms: float = 0.0 + right_spectrum: np.ndarray = field(default_factory=lambda: np.zeros(NUM_BANDS, dtype=np.float32)) + + +def _build_log_bands(num_bands: int, fft_size: int, sample_rate: int) -> List[Tuple[int, int]]: + """Build logarithmically-spaced frequency band boundaries for FFT bins. + + Returns list of (start_bin, end_bin) pairs. + """ + nyquist = sample_rate / 2 + min_freq = 20.0 + max_freq = min(nyquist, 20000.0) + log_min = math.log10(min_freq) + log_max = math.log10(max_freq) + + freqs = np.logspace(log_min, log_max, num_bands + 1) + bin_width = sample_rate / fft_size + + bands = [] + for i in range(num_bands): + start_bin = max(1, int(freqs[i] / bin_width)) + end_bin = max(start_bin + 1, int(freqs[i + 1] / bin_width)) + end_bin = min(end_bin, fft_size // 2) + bands.append((start_bin, end_bin)) + return bands + + +class AudioAnalyzer: + """Stateful audio analyzer — call analyze() per raw chunk. + + Maintains smoothing buffers, energy history for beat detection, + and pre-allocated FFT scratch buffers. Thread-safe only if a single + thread calls analyze() (the capture thread). + """ + + def __init__(self, sample_rate: int = DEFAULT_SAMPLE_RATE, chunk_size: int = DEFAULT_CHUNK_SIZE): + self._sample_rate = sample_rate + self._chunk_size = chunk_size + + # FFT helpers + self._window = np.hanning(chunk_size).astype(np.float32) + self._bands = _build_log_bands(NUM_BANDS, chunk_size, sample_rate) + + # Beat detection state + self._energy_history: np.ndarray = np.zeros(43, dtype=np.float64) # ~1s at 44100/2048 + self._energy_idx = 0 + + # Smoothed spectrum (exponential decay) + self._smooth_spectrum = np.zeros(NUM_BANDS, dtype=np.float32) + self._smooth_spectrum_left = np.zeros(NUM_BANDS, dtype=np.float32) + self._smooth_spectrum_right = np.zeros(NUM_BANDS, dtype=np.float32) + self._smoothing_alpha = 0.3 + + # Pre-allocated scratch buffers + self._fft_windowed = np.empty(chunk_size, dtype=np.float32) + self._spectrum_buf = np.zeros(NUM_BANDS, dtype=np.float32) + self._spectrum_buf_left = np.zeros(NUM_BANDS, dtype=np.float32) + self._spectrum_buf_right = np.zeros(NUM_BANDS, dtype=np.float32) + self._sq_buf = np.empty(chunk_size, dtype=np.float32) + + # Pre-allocated channel buffers for stereo + self._left_buf = np.empty(chunk_size, dtype=np.float32) + self._right_buf = np.empty(chunk_size, dtype=np.float32) + self._mono_buf = np.empty(chunk_size, dtype=np.float32) + + @property + def sample_rate(self) -> int: + return self._sample_rate + + @sample_rate.setter + def sample_rate(self, value: int): + if value != self._sample_rate: + self._sample_rate = value + self._bands = _build_log_bands(NUM_BANDS, self._chunk_size, value) + + def analyze(self, raw_data: np.ndarray, channels: int) -> AudioAnalysis: + """Analyze a raw audio chunk and return an AudioAnalysis snapshot. + + Args: + raw_data: 1-D float32 array of interleaved samples (length = chunk_size * channels) + channels: Number of audio channels + + Returns: + AudioAnalysis with spectrum, RMS, beat, etc. + """ + chunk_size = self._chunk_size + alpha = self._smoothing_alpha + one_minus_alpha = 1.0 - alpha + + # Split channels and mix to mono + if channels > 1: + data = raw_data.reshape(-1, channels) + np.copyto(self._left_buf[:len(data)], data[:, 0]) + right_col = data[:, 1] if channels >= 2 else data[:, 0] + np.copyto(self._right_buf[:len(data)], right_col) + np.add(data[:, 0], right_col, out=self._mono_buf[:len(data)]) + self._mono_buf[:len(data)] *= 0.5 + samples = self._mono_buf[:len(data)] + left_samples = self._left_buf[:len(data)] + right_samples = self._right_buf[:len(data)] + else: + samples = raw_data + left_samples = samples + right_samples = samples + + # RMS and peak + n = len(samples) + np.multiply(samples, samples, out=self._sq_buf[:n]) + rms = float(np.sqrt(np.mean(self._sq_buf[:n]))) + peak = float(np.max(np.abs(samples))) + + if channels > 1: + np.multiply(left_samples, left_samples, out=self._sq_buf[:n]) + left_rms = float(np.sqrt(np.mean(self._sq_buf[:n]))) + np.multiply(right_samples, right_samples, out=self._sq_buf[:n]) + right_rms = float(np.sqrt(np.mean(self._sq_buf[:n]))) + else: + left_rms = rms + right_rms = rms + + # FFT for mono, left, right + self._fft_bands(samples, self._spectrum_buf, self._smooth_spectrum, + alpha, one_minus_alpha) + self._fft_bands(left_samples, self._spectrum_buf_left, self._smooth_spectrum_left, + alpha, one_minus_alpha) + self._fft_bands(right_samples, self._spectrum_buf_right, self._smooth_spectrum_right, + alpha, one_minus_alpha) + + # Beat detection — compare current energy to rolling average (mono) + np.multiply(samples, samples, out=self._sq_buf[:n]) + energy = float(np.sum(self._sq_buf[:n])) + self._energy_history[self._energy_idx] = energy + self._energy_idx = (self._energy_idx + 1) % len(self._energy_history) + avg_energy = float(np.mean(self._energy_history)) + + beat = False + beat_intensity = 0.0 + if avg_energy > 1e-8: + ratio = energy / avg_energy + if ratio > 1.5: + beat = True + beat_intensity = min(1.0, (ratio - 1.0) / 2.0) + + return AudioAnalysis( + timestamp=time.perf_counter(), + rms=rms, + peak=peak, + spectrum=self._smooth_spectrum.copy(), + beat=beat, + beat_intensity=beat_intensity, + left_rms=left_rms, + left_spectrum=self._smooth_spectrum_left.copy(), + right_rms=right_rms, + right_spectrum=self._smooth_spectrum_right.copy(), + ) + + def _fft_bands(self, samps, buf, smooth_buf, alpha, one_minus_alpha): + """Compute FFT, bin into bands, normalize, and smooth.""" + chunk_size = self._chunk_size + chunk = samps[:chunk_size] + if len(chunk) < chunk_size: + chunk = np.pad(chunk, (0, chunk_size - len(chunk))) + np.multiply(chunk, self._window, out=self._fft_windowed) + fft_mag = np.abs(np.fft.rfft(self._fft_windowed)) + fft_mag *= (1.0 / chunk_size) + fft_len = len(fft_mag) + for b, (s, e) in enumerate(self._bands): + if s < fft_len and e <= fft_len: + buf[b] = float(np.mean(fft_mag[s:e])) + else: + buf[b] = 0.0 + spec_max = float(np.max(buf)) + if spec_max > 1e-6: + buf *= (1.0 / spec_max) + smooth_buf *= one_minus_alpha + smooth_buf += alpha * buf diff --git a/server/src/wled_controller/core/audio/audio_capture.py b/server/src/wled_controller/core/audio/audio_capture.py index ec5586e..39197ec 100644 --- a/server/src/wled_controller/core/audio/audio_capture.py +++ b/server/src/wled_controller/core/audio/audio_capture.py @@ -1,137 +1,66 @@ """Audio capture service — shared audio analysis with ref counting. Provides real-time FFT spectrum, RMS level, and beat detection from -system audio (WASAPI loopback) or microphone/line-in. Multiple -AudioColorStripStreams sharing the same device reuse a single capture -thread via AudioCaptureManager. +system audio or microphone/line-in. Multiple AudioColorStripStreams +sharing the same device reuse a single capture thread via +AudioCaptureManager. -Uses PyAudioWPatch for WASAPI loopback support on Windows. +Engine-agnostic: uses AudioEngineRegistry to create the underlying +capture stream (WASAPI, sounddevice, etc.). """ -import math import threading import time -from dataclasses import dataclass, field -from typing import Dict, List, Optional, Tuple - -import numpy as np +from typing import Any, Dict, List, Optional, Tuple +from wled_controller.core.audio.analysis import ( + AudioAnalysis, + AudioAnalyzer, + DEFAULT_CHUNK_SIZE, + DEFAULT_SAMPLE_RATE, +) +from wled_controller.core.audio.base import AudioCaptureStreamBase +from wled_controller.core.audio.factory import AudioEngineRegistry from wled_controller.utils import get_logger logger = get_logger(__name__) -# Number of logarithmic frequency bands for spectrum analysis -NUM_BANDS = 64 -# Audio defaults -DEFAULT_SAMPLE_RATE = 44100 -DEFAULT_CHUNK_SIZE = 2048 # ~46 ms at 44100 Hz +# Re-export for backward compatibility +__all__ = [ + "AudioAnalysis", + "ManagedAudioStream", + "AudioCaptureManager", +] # --------------------------------------------------------------------------- -# AudioAnalysis — thread-safe snapshot of latest analysis results +# ManagedAudioStream — wraps engine stream + analyzer in background thread # --------------------------------------------------------------------------- -@dataclass -class AudioAnalysis: - """Snapshot of audio analysis results. +class ManagedAudioStream: + """Wraps an AudioCaptureStreamBase + AudioAnalyzer in a background thread. - Written by the capture thread, read by visualization streams. - Mono fields contain the mixed-down signal (all channels averaged). - Per-channel fields (left/right) are populated when the source is stereo+. - For mono sources, left/right are copies of the mono data. - """ - - timestamp: float = 0.0 - # Mono (mixed) — backward-compatible fields - rms: float = 0.0 - peak: float = 0.0 - spectrum: np.ndarray = field(default_factory=lambda: np.zeros(NUM_BANDS, dtype=np.float32)) - beat: bool = False - beat_intensity: float = 0.0 - # Per-channel - left_rms: float = 0.0 - left_spectrum: np.ndarray = field(default_factory=lambda: np.zeros(NUM_BANDS, dtype=np.float32)) - right_rms: float = 0.0 - right_spectrum: np.ndarray = field(default_factory=lambda: np.zeros(NUM_BANDS, dtype=np.float32)) - - -# --------------------------------------------------------------------------- -# AudioCaptureStream — one per unique audio device -# --------------------------------------------------------------------------- - -def _build_log_bands(num_bands: int, fft_size: int, sample_rate: int) -> List[Tuple[int, int]]: - """Build logarithmically-spaced frequency band boundaries for FFT bins. - - Returns list of (start_bin, end_bin) pairs. - """ - nyquist = sample_rate / 2 - # Map bands to log-spaced frequencies from 20 Hz to Nyquist - min_freq = 20.0 - max_freq = min(nyquist, 20000.0) - log_min = math.log10(min_freq) - log_max = math.log10(max_freq) - - freqs = np.logspace(log_min, log_max, num_bands + 1) - bin_width = sample_rate / fft_size - - bands = [] - for i in range(num_bands): - start_bin = max(1, int(freqs[i] / bin_width)) - end_bin = max(start_bin + 1, int(freqs[i + 1] / bin_width)) - # Clamp to FFT range - end_bin = min(end_bin, fft_size // 2) - bands.append((start_bin, end_bin)) - return bands - - -class AudioCaptureStream: - """Captures audio from a single device and provides real-time analysis. - - Runs a background thread that reads audio chunks, computes FFT, RMS, - and beat detection. Consumers read the latest analysis via - ``get_latest_analysis()`` (thread-safe). + Public API is the same as the old AudioCaptureStream: + start(), stop(), get_latest_analysis(), get_last_timing(). """ def __init__( self, + engine_type: str, device_index: int, is_loopback: bool, - sample_rate: int = DEFAULT_SAMPLE_RATE, - chunk_size: int = DEFAULT_CHUNK_SIZE, + engine_config: Optional[Dict[str, Any]] = None, ): + self._engine_type = engine_type self._device_index = device_index self._is_loopback = is_loopback - self._sample_rate = sample_rate - self._chunk_size = chunk_size + self._engine_config = engine_config or {} self._running = False self._thread: Optional[threading.Thread] = None self._lock = threading.Lock() self._latest: Optional[AudioAnalysis] = None - - # Pre-allocated FFT helpers - self._window = np.hanning(chunk_size).astype(np.float32) - self._bands = _build_log_bands(NUM_BANDS, chunk_size, sample_rate) - - # Beat detection state - self._energy_history: np.ndarray = np.zeros(43, dtype=np.float64) # ~1s at 44100/2048 - self._energy_idx = 0 - - # Smoothed spectrum (exponential decay between frames) - self._smooth_spectrum = np.zeros(NUM_BANDS, dtype=np.float32) - self._smooth_spectrum_left = np.zeros(NUM_BANDS, dtype=np.float32) - self._smooth_spectrum_right = np.zeros(NUM_BANDS, dtype=np.float32) - self._smoothing_alpha = 0.3 # lower = smoother - - # Pre-allocated FFT scratch buffers - self._fft_windowed = np.empty(chunk_size, dtype=np.float32) - self._fft_mag = None # allocated on first use (depends on rfft output size) - - # Pre-compute valid band ranges (avoid per-frame bounds checks) - self._valid_bands = None # set after first FFT when fft_mag size is known - - # Per-iteration timing (written by capture thread, read by consumers) self._last_timing: dict = {} def start(self) -> None: @@ -140,12 +69,13 @@ class AudioCaptureStream: self._running = True self._thread = threading.Thread( target=self._capture_loop, daemon=True, - name=f"AudioCapture-{self._device_index}-{'lb' if self._is_loopback else 'in'}", + name=f"AudioCapture-{self._engine_type}-{self._device_index}-" + f"{'lb' if self._is_loopback else 'in'}", ) self._thread.start() logger.info( - f"AudioCaptureStream started: device={self._device_index} " - f"loopback={self._is_loopback} sr={self._sample_rate} chunk={self._chunk_size}" + f"ManagedAudioStream started: engine={self._engine_type} " + f"device={self._device_index} loopback={self._is_loopback}" ) def stop(self) -> None: @@ -155,179 +85,48 @@ class AudioCaptureStream: self._thread = None with self._lock: self._latest = None - logger.info(f"AudioCaptureStream stopped: device={self._device_index}") + logger.info( + f"ManagedAudioStream stopped: engine={self._engine_type} " + f"device={self._device_index}" + ) def get_latest_analysis(self) -> Optional[AudioAnalysis]: with self._lock: return self._latest def get_last_timing(self) -> dict: - """Return per-iteration timing from the capture loop (ms).""" return dict(self._last_timing) - def _fft_bands(self, samps, buf, smooth_buf, window, bands, alpha, one_minus_alpha): - """Compute FFT, bin into bands, normalize, and smooth.""" - chunk_size = self._chunk_size - chunk = samps[:chunk_size] - if len(chunk) < chunk_size: - chunk = np.pad(chunk, (0, chunk_size - len(chunk))) - np.multiply(chunk, window, out=self._fft_windowed) - fft_mag = np.abs(np.fft.rfft(self._fft_windowed)) - fft_mag *= (1.0 / chunk_size) # in-place scale (faster than /=) - fft_len = len(fft_mag) - for b, (s, e) in enumerate(bands): - if s < fft_len and e <= fft_len: - buf[b] = float(np.mean(fft_mag[s:e])) - else: - buf[b] = 0.0 - spec_max = float(np.max(buf)) - if spec_max > 1e-6: - buf *= (1.0 / spec_max) - # Exponential smoothing: smooth = alpha * new + (1-alpha) * old - smooth_buf *= one_minus_alpha - smooth_buf += alpha * buf - def _capture_loop(self) -> None: + stream: Optional[AudioCaptureStreamBase] = None try: - import pyaudiowpatch as pyaudio - except ImportError: - logger.error("PyAudioWPatch is not installed — audio capture unavailable") - self._running = False - return - - pa = None - stream = None - try: - pa = pyaudio.PyAudio() - - if self._is_loopback: - # Loopback capture: find the loopback device for the output device - loopback_device = self._find_loopback_device(pa, self._device_index) - if loopback_device is None: - logger.error( - f"No loopback device found for output device {self._device_index}" - ) - self._running = False - return - - device_idx = loopback_device["index"] - channels = loopback_device["maxInputChannels"] - sample_rate = int(loopback_device["defaultSampleRate"]) - else: - # Regular input device - device_idx = self._device_index if self._device_index >= 0 else None - if device_idx is not None: - dev_info = pa.get_device_info_by_index(device_idx) - channels = max(1, dev_info["maxInputChannels"]) - sample_rate = int(dev_info["defaultSampleRate"]) - else: - channels = 1 - sample_rate = self._sample_rate - - # Update FFT helpers if sample rate changed - if sample_rate != self._sample_rate: - self._sample_rate = sample_rate - self._bands = _build_log_bands(NUM_BANDS, self._chunk_size, sample_rate) - - stream = pa.open( - format=pyaudio.paFloat32, - channels=channels, - rate=sample_rate, - input=True, - input_device_index=device_idx, - frames_per_buffer=self._chunk_size, + stream = AudioEngineRegistry.create_stream( + self._engine_type, self._device_index, + self._is_loopback, self._engine_config, ) + stream.initialize() + + sample_rate = stream.sample_rate + chunk_size = stream.chunk_size + channels = stream.channels + + analyzer = AudioAnalyzer(sample_rate=sample_rate, chunk_size=chunk_size) logger.info( - f"Audio stream opened: device={device_idx} loopback={self._is_loopback} " + f"Audio stream opened: engine={self._engine_type} " + f"device={self._device_index} loopback={self._is_loopback} " f"channels={channels} sr={sample_rate}" ) - spectrum_buf = np.zeros(NUM_BANDS, dtype=np.float32) - spectrum_buf_left = np.zeros(NUM_BANDS, dtype=np.float32) - spectrum_buf_right = np.zeros(NUM_BANDS, dtype=np.float32) - - # Pre-allocate channel buffers for stereo splitting - chunk_samples = self._chunk_size - if channels > 1: - _left_buf = np.empty(chunk_samples, dtype=np.float32) - _right_buf = np.empty(chunk_samples, dtype=np.float32) - _mono_buf = np.empty(chunk_samples, dtype=np.float32) - else: - _left_buf = _right_buf = _mono_buf = None - - # Pre-allocate scratch for RMS (avoid samples**2 temp array) - _sq_buf = np.empty(chunk_samples, dtype=np.float32) - - # Snapshot loop-invariant values - window = self._window - bands = self._bands - energy_history = self._energy_history - energy_len = len(energy_history) - alpha = self._smoothing_alpha - one_minus_alpha = 1.0 - alpha - while self._running: t_read_start = time.perf_counter() - try: - raw_data = stream.read(self._chunk_size, exception_on_overflow=False) - data = np.frombuffer(raw_data, dtype=np.float32) - except Exception as e: - logger.warning(f"Audio read error: {e}") + raw_data = stream.read_chunk() + if raw_data is None: time.sleep(0.05) continue t_read_end = time.perf_counter() - # Split channels and mix to mono - if channels > 1: - data = data.reshape(-1, channels) - np.copyto(_left_buf, data[:, 0]) - np.copyto(_right_buf, data[:, 1] if channels >= 2 else data[:, 0]) - np.add(data[:, 0], data[:, 1] if channels >= 2 else data[:, 0], out=_mono_buf) - _mono_buf *= 0.5 - samples = _mono_buf - left_samples = _left_buf - right_samples = _right_buf - else: - samples = data - left_samples = samples - right_samples = samples - - # RMS and peak — reuse scratch buffer - np.multiply(samples, samples, out=_sq_buf[:len(samples)]) - rms = float(np.sqrt(np.mean(_sq_buf[:len(samples)]))) - peak = float(np.max(np.abs(samples))) - if channels > 1: - np.multiply(left_samples, left_samples, out=_sq_buf) - left_rms = float(np.sqrt(np.mean(_sq_buf))) - np.multiply(right_samples, right_samples, out=_sq_buf) - right_rms = float(np.sqrt(np.mean(_sq_buf))) - else: - left_rms = rms - right_rms = rms - - # Compute FFT for mono, left, right - self._fft_bands(samples, spectrum_buf, self._smooth_spectrum, - window, bands, alpha, one_minus_alpha) - self._fft_bands(left_samples, spectrum_buf_left, self._smooth_spectrum_left, - window, bands, alpha, one_minus_alpha) - self._fft_bands(right_samples, spectrum_buf_right, self._smooth_spectrum_right, - window, bands, alpha, one_minus_alpha) - - # Beat detection — compare current energy to rolling average (mono) - np.multiply(samples, samples, out=_sq_buf[:len(samples)]) - energy = float(np.sum(_sq_buf[:len(samples)])) - energy_history[self._energy_idx] = energy - self._energy_idx = (self._energy_idx + 1) % energy_len - avg_energy = float(np.mean(energy_history)) - - beat = False - beat_intensity = 0.0 - if avg_energy > 1e-8: - ratio = energy / avg_energy - if ratio > 1.5: - beat = True - beat_intensity = min(1.0, (ratio - 1.0) / 2.0) + analysis = analyzer.analyze(raw_data, channels) t_fft_end = time.perf_counter() self._last_timing = { @@ -335,66 +134,22 @@ class AudioCaptureStream: "fft_ms": (t_fft_end - t_read_end) * 1000, } - analysis = AudioAnalysis( - timestamp=time.perf_counter(), - rms=rms, - peak=peak, - spectrum=self._smooth_spectrum.copy(), - beat=beat, - beat_intensity=beat_intensity, - left_rms=left_rms, - left_spectrum=self._smooth_spectrum_left.copy(), - right_rms=right_rms, - right_spectrum=self._smooth_spectrum_right.copy(), - ) - with self._lock: self._latest = analysis except Exception as e: - logger.error(f"AudioCaptureStream fatal error: {e}", exc_info=True) + logger.error(f"ManagedAudioStream fatal error: {e}", exc_info=True) finally: if stream is not None: try: - stream.stop_stream() - stream.close() - except Exception: - pass - if pa is not None: - try: - pa.terminate() + stream.cleanup() except Exception: pass self._running = False - logger.info(f"AudioCaptureStream loop ended: device={self._device_index}") - - @staticmethod - def _find_loopback_device(pa, output_device_index: int) -> Optional[dict]: - """Find the PyAudioWPatch loopback device for a given output device. - - PyAudioWPatch exposes virtual loopback input devices for each WASAPI - output device. We match by name via ``get_loopback_device_info_generator()``. - """ - try: - first_loopback = None - for loopback in pa.get_loopback_device_info_generator(): - if first_loopback is None: - first_loopback = loopback - - # Default (-1): return first loopback device (typically default speakers) - if output_device_index < 0: - return loopback - - # Match by output device name contained in loopback device name - target_info = pa.get_device_info_by_index(output_device_index) - if target_info["name"] in loopback["name"]: - return loopback - - # No exact match — return first available loopback - return first_loopback - except Exception as e: - logger.error(f"Error finding loopback device: {e}") - return None + logger.info( + f"ManagedAudioStream loop ended: engine={self._engine_type} " + f"device={self._device_index}" + ) # --------------------------------------------------------------------------- @@ -402,23 +157,43 @@ class AudioCaptureStream: # --------------------------------------------------------------------------- class AudioCaptureManager: - """Manages shared AudioCaptureStream instances with reference counting. + """Manages shared ManagedAudioStream instances with reference counting. Multiple AudioColorStripStreams using the same audio device share a - single capture thread. + single capture thread. Key: (engine_type, device_index, is_loopback). """ def __init__(self): - self._streams: Dict[Tuple[int, bool], Tuple[AudioCaptureStream, int]] = {} + self._streams: Dict[ + Tuple[str, int, bool], + Tuple[ManagedAudioStream, int], + ] = {} self._lock = threading.Lock() - def acquire(self, device_index: int, is_loopback: bool) -> AudioCaptureStream: - """Get or create an AudioCaptureStream for the given device. + def acquire( + self, + device_index: int, + is_loopback: bool, + engine_type: Optional[str] = None, + engine_config: Optional[Dict[str, Any]] = None, + ) -> ManagedAudioStream: + """Get or create a ManagedAudioStream for the given device. + + Args: + device_index: Audio device index + is_loopback: Whether to capture loopback audio + engine_type: Engine type (falls back to best available if None) + engine_config: Engine-specific configuration Returns: - Shared AudioCaptureStream instance. + Shared ManagedAudioStream instance. """ - key = (device_index, is_loopback) + if engine_type is None: + engine_type = AudioEngineRegistry.get_best_available_engine() + if engine_type is None: + raise RuntimeError("No audio capture engines available") + + key = (engine_type, device_index, is_loopback) with self._lock: if key in self._streams: stream, ref_count = self._streams[key] @@ -426,15 +201,27 @@ class AudioCaptureManager: logger.info(f"Reusing audio capture {key} (ref_count={ref_count + 1})") return stream - stream = AudioCaptureStream(device_index, is_loopback) + stream = ManagedAudioStream( + engine_type, device_index, is_loopback, engine_config, + ) stream.start() self._streams[key] = (stream, 1) logger.info(f"Created audio capture {key}") return stream - def release(self, device_index: int, is_loopback: bool) -> None: - """Release a reference to an AudioCaptureStream.""" - key = (device_index, is_loopback) + def release( + self, + device_index: int, + is_loopback: bool, + engine_type: Optional[str] = None, + ) -> None: + """Release a reference to a ManagedAudioStream.""" + if engine_type is None: + engine_type = AudioEngineRegistry.get_best_available_engine() + if engine_type is None: + return + + key = (engine_type, device_index, is_loopback) with self._lock: if key not in self._streams: logger.warning(f"Attempted to release unknown audio capture: {key}") @@ -463,61 +250,25 @@ class AudioCaptureManager: @staticmethod def enumerate_devices() -> List[dict]: - """List available audio devices for the frontend dropdown. + """List available audio devices from all registered engines. - Returns list of dicts with device info. Output devices with WASAPI - hostapi are marked as loopback candidates. + Returns list of dicts with device info, each tagged with engine_type. """ - try: - import pyaudiowpatch as pyaudio - except ImportError: - logger.warning("PyAudioWPatch not installed — no audio devices available") - return [] - - pa = None - try: - pa = pyaudio.PyAudio() - wasapi_info = pa.get_host_api_info_by_type(pyaudio.paWASAPI) - wasapi_idx = wasapi_info["index"] - - result = [] - device_count = pa.get_device_count() - for i in range(device_count): - dev = pa.get_device_info_by_index(i) - if dev["hostApi"] != wasapi_idx: + result = [] + for engine_type, engine_class in AudioEngineRegistry.get_all_engines().items(): + try: + if not engine_class.is_available(): continue - - is_input = dev["maxInputChannels"] > 0 - is_output = dev["maxOutputChannels"] > 0 - - if is_input: + for dev in engine_class.enumerate_devices(): result.append({ - "index": i, - "name": dev["name"], - "is_input": True, - "is_loopback": False, - "channels": dev["maxInputChannels"], - "default_samplerate": dev["defaultSampleRate"], + "index": dev.index, + "name": dev.name, + "is_input": dev.is_input, + "is_loopback": dev.is_loopback, + "channels": dev.channels, + "default_samplerate": dev.default_samplerate, + "engine_type": engine_type, }) - - if is_output: - result.append({ - "index": i, - "name": f"{dev['name']} [Loopback]", - "is_input": False, - "is_loopback": True, - "channels": dev["maxOutputChannels"], - "default_samplerate": dev["defaultSampleRate"], - }) - - return result - - except Exception as e: - logger.error(f"Failed to enumerate audio devices: {e}", exc_info=True) - return [] - finally: - if pa is not None: - try: - pa.terminate() - except Exception: - pass + except Exception as e: + logger.error(f"Error enumerating devices for engine '{engine_type}': {e}") + return result diff --git a/server/src/wled_controller/core/audio/base.py b/server/src/wled_controller/core/audio/base.py new file mode 100644 index 0000000..f6c0506 --- /dev/null +++ b/server/src/wled_controller/core/audio/base.py @@ -0,0 +1,165 @@ +"""Base classes for audio capture engines.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +import numpy as np + + +@dataclass +class AudioDeviceInfo: + """Information about an audio device.""" + + index: int + name: str + is_input: bool + is_loopback: bool + channels: int + default_samplerate: float + + +class AudioCaptureStreamBase(ABC): + """Abstract base class for an audio capture session. + + An AudioCaptureStreamBase is a stateful session bound to a specific + audio device. It holds device-specific resources and provides raw + audio chunk reading. + + Created by AudioCaptureEngine.create_stream(). + + Lifecycle: + stream = engine.create_stream(device_index, is_loopback, config) + stream.initialize() + chunk = stream.read_chunk() + stream.cleanup() + + Or via context manager: + with engine.create_stream(device_index, is_loopback, config) as stream: + chunk = stream.read_chunk() + """ + + def __init__( + self, + device_index: int, + is_loopback: bool, + config: Dict[str, Any], + ): + self.device_index = device_index + self.is_loopback = is_loopback + self.config = config + self._initialized = False + + @property + @abstractmethod + def channels(self) -> int: + """Number of audio channels in the stream.""" + pass + + @property + @abstractmethod + def sample_rate(self) -> int: + """Sample rate of the audio stream.""" + pass + + @property + @abstractmethod + def chunk_size(self) -> int: + """Number of frames per read_chunk() call.""" + pass + + @abstractmethod + def initialize(self) -> None: + """Initialize audio capture resources. + + Raises: + RuntimeError: If initialization fails + """ + pass + + @abstractmethod + def cleanup(self) -> None: + """Release all audio capture resources.""" + pass + + @abstractmethod + def read_chunk(self) -> Optional[np.ndarray]: + """Read one chunk of raw audio data. + + Returns: + 1-D float32 ndarray of interleaved samples (length = chunk_size * channels), + or None if no data available. + + Raises: + RuntimeError: If read fails + """ + pass + + def __enter__(self): + """Context manager entry — initialize stream.""" + self.initialize() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit — cleanup stream.""" + self.cleanup() + + +class AudioCaptureEngine(ABC): + """Abstract base class for audio capture engines. + + An AudioCaptureEngine is a stateless factory that knows about an audio + capture technology. It can enumerate devices, check availability, and + create AudioCaptureStreamBase instances. + + All methods are classmethods — no instance creation needed. + """ + + ENGINE_TYPE: str = "base" + ENGINE_PRIORITY: int = 0 + + @classmethod + @abstractmethod + def is_available(cls) -> bool: + """Check if this engine is available on the current system.""" + pass + + @classmethod + @abstractmethod + def get_default_config(cls) -> Dict[str, Any]: + """Get default configuration for this engine.""" + pass + + @classmethod + @abstractmethod + def enumerate_devices(cls) -> List[AudioDeviceInfo]: + """Get list of available audio devices. + + Returns: + List of AudioDeviceInfo objects + + Raises: + RuntimeError: If unable to detect devices + """ + pass + + @classmethod + @abstractmethod + def create_stream( + cls, + device_index: int, + is_loopback: bool, + config: Dict[str, Any], + ) -> AudioCaptureStreamBase: + """Create a capture stream for the specified device. + + Args: + device_index: Index of audio device + is_loopback: Whether to capture loopback audio + config: Engine-specific configuration dict + + Returns: + Uninitialized AudioCaptureStreamBase. Caller must call + initialize() or use as context manager. + """ + pass diff --git a/server/src/wled_controller/core/audio/factory.py b/server/src/wled_controller/core/audio/factory.py new file mode 100644 index 0000000..33495fe --- /dev/null +++ b/server/src/wled_controller/core/audio/factory.py @@ -0,0 +1,156 @@ +"""Engine registry and factory for audio capture engines.""" + +from typing import Any, Dict, List, Optional, Type + +from wled_controller.core.audio.base import AudioCaptureEngine, AudioCaptureStreamBase +from wled_controller.utils import get_logger + +logger = get_logger(__name__) + + +class AudioEngineRegistry: + """Registry for available audio capture engines. + + Maintains a registry of all audio engine implementations + and provides factory methods for creating capture streams. + """ + + _engines: Dict[str, Type[AudioCaptureEngine]] = {} + + @classmethod + def register(cls, engine_class: Type[AudioCaptureEngine]): + """Register an audio capture engine. + + Args: + engine_class: Engine class to register (must inherit from AudioCaptureEngine) + + Raises: + ValueError: If engine_class is not a subclass of AudioCaptureEngine + """ + if not issubclass(engine_class, AudioCaptureEngine): + raise ValueError(f"{engine_class} must be a subclass of AudioCaptureEngine") + + engine_type = engine_class.ENGINE_TYPE + if engine_type == "base": + raise ValueError("Cannot register base engine type") + + if engine_type in cls._engines: + logger.warning(f"Audio engine '{engine_type}' already registered, overwriting") + + cls._engines[engine_type] = engine_class + logger.info(f"Registered audio engine: {engine_type}") + + @classmethod + def get_engine(cls, engine_type: str) -> Type[AudioCaptureEngine]: + """Get engine class by type. + + Args: + engine_type: Engine type identifier (e.g., "wasapi", "sounddevice") + + Returns: + Engine class + + Raises: + ValueError: If engine type not found + """ + if engine_type not in cls._engines: + available = ", ".join(cls._engines.keys()) or "none" + raise ValueError( + f"Unknown audio engine type: '{engine_type}'. Available engines: {available}" + ) + return cls._engines[engine_type] + + @classmethod + def get_available_engines(cls) -> List[str]: + """Get list of available engine types on this system. + + Returns: + List of engine type identifiers that are available + """ + available = [] + for engine_type, engine_class in cls._engines.items(): + try: + if engine_class.is_available(): + available.append(engine_type) + except Exception as e: + logger.error( + f"Error checking availability for audio engine '{engine_type}': {e}" + ) + return available + + @classmethod + def get_best_available_engine(cls) -> Optional[str]: + """Get the highest-priority available engine type. + + Returns: + Engine type string, or None if no engines are available. + """ + best_type = None + best_priority = -1 + for engine_type, engine_class in cls._engines.items(): + try: + if engine_class.is_available() and engine_class.ENGINE_PRIORITY > best_priority: + best_priority = engine_class.ENGINE_PRIORITY + best_type = engine_type + except Exception as e: + logger.error( + f"Error checking availability for audio engine '{engine_type}': {e}" + ) + return best_type + + @classmethod + def get_all_engines(cls) -> Dict[str, Type[AudioCaptureEngine]]: + """Get all registered engines (available or not). + + Returns: + Dictionary mapping engine type to engine class + """ + return cls._engines.copy() + + @classmethod + def create_stream( + cls, + engine_type: str, + device_index: int, + is_loopback: bool, + config: Dict[str, Any], + ) -> AudioCaptureStreamBase: + """Create an AudioCaptureStreamBase for the specified engine and device. + + Args: + engine_type: Engine type identifier + device_index: Audio device index + is_loopback: Whether to capture loopback audio + config: Engine-specific configuration + + Returns: + Uninitialized AudioCaptureStreamBase instance + + Raises: + ValueError: If engine type not found or not available + """ + engine_class = cls.get_engine(engine_type) + + if not engine_class.is_available(): + raise ValueError( + f"Audio engine '{engine_type}' is not available on this system" + ) + + try: + stream = engine_class.create_stream(device_index, is_loopback, config) + logger.debug( + f"Created audio stream: {engine_type} " + f"(device={device_index}, loopback={is_loopback})" + ) + return stream + except Exception as e: + logger.error(f"Failed to create stream for audio engine '{engine_type}': {e}") + raise RuntimeError( + f"Failed to create stream for audio engine '{engine_type}': {e}" + ) + + @classmethod + def clear_registry(cls): + """Clear all registered engines (for testing).""" + cls._engines.clear() + logger.debug("Cleared audio engine registry") diff --git a/server/src/wled_controller/core/audio/sounddevice_engine.py b/server/src/wled_controller/core/audio/sounddevice_engine.py new file mode 100644 index 0000000..41265a4 --- /dev/null +++ b/server/src/wled_controller/core/audio/sounddevice_engine.py @@ -0,0 +1,159 @@ +"""Sounddevice audio capture engine (cross-platform, via PortAudio).""" + +from typing import Any, Dict, List, Optional + +import numpy as np + +from wled_controller.core.audio.base import ( + AudioCaptureEngine, + AudioCaptureStreamBase, + AudioDeviceInfo, +) +from wled_controller.utils import get_logger + +logger = get_logger(__name__) + + +class SounddeviceCaptureStream(AudioCaptureStreamBase): + """Audio capture stream using sounddevice (PortAudio).""" + + def __init__(self, device_index: int, is_loopback: bool, config: Dict[str, Any]): + super().__init__(device_index, is_loopback, config) + self._sd_stream = None + self._channels = config.get("channels", 2) + self._sample_rate = config.get("sample_rate", 44100) + self._chunk_size = config.get("chunk_size", 2048) + + @property + def channels(self) -> int: + return self._channels + + @property + def sample_rate(self) -> int: + return self._sample_rate + + @property + def chunk_size(self) -> int: + return self._chunk_size + + def initialize(self) -> None: + if self._initialized: + return + + try: + import sounddevice as sd + except ImportError: + raise RuntimeError("sounddevice is not installed — sounddevice engine unavailable") + + # Resolve device + device_id = self.device_index if self.device_index >= 0 else None + if device_id is not None: + dev_info = sd.query_devices(device_id) + self._channels = min(self._channels, int(dev_info["max_input_channels"])) + if self._channels < 1: + raise RuntimeError( + f"Device {device_id} ({dev_info['name']}) has no input channels" + ) + self._sample_rate = int(dev_info["default_samplerate"]) + + self._sd_stream = sd.InputStream( + device=device_id, + channels=self._channels, + samplerate=self._sample_rate, + blocksize=self._chunk_size, + dtype="float32", + ) + self._sd_stream.start() + self._initialized = True + logger.info( + f"sounddevice stream opened: device={device_id} loopback={self.is_loopback} " + f"channels={self._channels} sr={self._sample_rate}" + ) + + def cleanup(self) -> None: + if self._sd_stream is not None: + try: + self._sd_stream.stop() + self._sd_stream.close() + except Exception: + pass + self._sd_stream = None + self._initialized = False + + def read_chunk(self) -> Optional[np.ndarray]: + if self._sd_stream is None: + return None + try: + # sd.InputStream.read() returns (data, overflowed) + data, _ = self._sd_stream.read(self._chunk_size) + # data shape: (chunk_size, channels) — flatten to interleaved 1-D + return data.flatten().astype(np.float32) + except Exception as e: + logger.warning(f"sounddevice read error: {e}") + return None + + +class SounddeviceEngine(AudioCaptureEngine): + """Sounddevice (PortAudio) audio capture engine — cross-platform.""" + + ENGINE_TYPE = "sounddevice" + ENGINE_PRIORITY = 5 + + @classmethod + def is_available(cls) -> bool: + try: + import sounddevice # noqa: F401 + return True + except ImportError: + return False + + @classmethod + def get_default_config(cls) -> Dict[str, Any]: + return { + "sample_rate": 44100, + "chunk_size": 2048, + } + + @classmethod + def enumerate_devices(cls) -> List[AudioDeviceInfo]: + try: + import sounddevice as sd + except ImportError: + return [] + + try: + devices = sd.query_devices() + result = [] + for i, dev in enumerate(devices): + max_in = int(dev["max_input_channels"]) + if max_in < 1: + continue + + name = dev["name"] + # On PulseAudio/PipeWire, monitor sources are loopback-capable + is_loopback = "monitor" in name.lower() + + result.append(AudioDeviceInfo( + index=i, + name=name, + is_input=True, + is_loopback=is_loopback, + channels=max_in, + default_samplerate=dev["default_samplerate"], + )) + + return result + + except Exception as e: + logger.error(f"Failed to enumerate sounddevice devices: {e}", exc_info=True) + return [] + + @classmethod + def create_stream( + cls, + device_index: int, + is_loopback: bool, + config: Dict[str, Any], + ) -> SounddeviceCaptureStream: + merged = {**cls.get_default_config(), **config} + return SounddeviceCaptureStream(device_index, is_loopback, merged) diff --git a/server/src/wled_controller/core/audio/wasapi_engine.py b/server/src/wled_controller/core/audio/wasapi_engine.py new file mode 100644 index 0000000..c5ab61c --- /dev/null +++ b/server/src/wled_controller/core/audio/wasapi_engine.py @@ -0,0 +1,215 @@ +"""WASAPI audio capture engine (Windows only, via PyAudioWPatch).""" + +from typing import Any, Dict, List, Optional + +import numpy as np + +from wled_controller.core.audio.base import ( + AudioCaptureEngine, + AudioCaptureStreamBase, + AudioDeviceInfo, +) +from wled_controller.utils import get_logger + +logger = get_logger(__name__) + + +class WasapiCaptureStream(AudioCaptureStreamBase): + """Audio capture stream using PyAudioWPatch (WASAPI).""" + + def __init__(self, device_index: int, is_loopback: bool, config: Dict[str, Any]): + super().__init__(device_index, is_loopback, config) + self._pa = None + self._stream = None + self._channels = config.get("channels", 2) + self._sample_rate = config.get("sample_rate", 44100) + self._chunk_size = config.get("chunk_size", 2048) + + @property + def channels(self) -> int: + return self._channels + + @property + def sample_rate(self) -> int: + return self._sample_rate + + @property + def chunk_size(self) -> int: + return self._chunk_size + + def initialize(self) -> None: + if self._initialized: + return + + try: + import pyaudiowpatch as pyaudio + except ImportError: + raise RuntimeError("PyAudioWPatch is not installed — WASAPI engine unavailable") + + self._pa = pyaudio.PyAudio() + + if self.is_loopback: + loopback_device = self._find_loopback_device(self._pa, self.device_index) + if loopback_device is None: + self._pa.terminate() + self._pa = None + raise RuntimeError( + f"No loopback device found for output device {self.device_index}" + ) + device_idx = loopback_device["index"] + self._channels = loopback_device["maxInputChannels"] + self._sample_rate = int(loopback_device["defaultSampleRate"]) + else: + device_idx = self.device_index if self.device_index >= 0 else None + if device_idx is not None: + dev_info = self._pa.get_device_info_by_index(device_idx) + self._channels = max(1, dev_info["maxInputChannels"]) + self._sample_rate = int(dev_info["defaultSampleRate"]) + + self._stream = self._pa.open( + format=pyaudio.paFloat32, + channels=self._channels, + rate=self._sample_rate, + input=True, + input_device_index=device_idx, + frames_per_buffer=self._chunk_size, + ) + self._initialized = True + logger.info( + f"WASAPI stream opened: device={device_idx} loopback={self.is_loopback} " + f"channels={self._channels} sr={self._sample_rate}" + ) + + def cleanup(self) -> None: + if self._stream is not None: + try: + self._stream.stop_stream() + self._stream.close() + except Exception: + pass + self._stream = None + if self._pa is not None: + try: + self._pa.terminate() + except Exception: + pass + self._pa = None + self._initialized = False + + def read_chunk(self) -> Optional[np.ndarray]: + if self._stream is None: + return None + try: + raw_data = self._stream.read(self._chunk_size, exception_on_overflow=False) + return np.frombuffer(raw_data, dtype=np.float32) + except Exception as e: + logger.warning(f"WASAPI read error: {e}") + return None + + @staticmethod + def _find_loopback_device(pa, output_device_index: int) -> Optional[dict]: + """Find the PyAudioWPatch loopback device for a given output device.""" + try: + first_loopback = None + for loopback in pa.get_loopback_device_info_generator(): + if first_loopback is None: + first_loopback = loopback + + if output_device_index < 0: + return loopback + + target_info = pa.get_device_info_by_index(output_device_index) + if target_info["name"] in loopback["name"]: + return loopback + + return first_loopback + except Exception as e: + logger.error(f"Error finding loopback device: {e}") + return None + + +class WasapiEngine(AudioCaptureEngine): + """WASAPI audio capture engine (Windows only).""" + + ENGINE_TYPE = "wasapi" + ENGINE_PRIORITY = 10 + + @classmethod + def is_available(cls) -> bool: + try: + import pyaudiowpatch # noqa: F401 + return True + except ImportError: + return False + + @classmethod + def get_default_config(cls) -> Dict[str, Any]: + return { + "sample_rate": 44100, + "chunk_size": 2048, + } + + @classmethod + def enumerate_devices(cls) -> List[AudioDeviceInfo]: + try: + import pyaudiowpatch as pyaudio + except ImportError: + return [] + + pa = None + try: + pa = pyaudio.PyAudio() + wasapi_info = pa.get_host_api_info_by_type(pyaudio.paWASAPI) + wasapi_idx = wasapi_info["index"] + + result = [] + device_count = pa.get_device_count() + for i in range(device_count): + dev = pa.get_device_info_by_index(i) + if dev["hostApi"] != wasapi_idx: + continue + + is_input = dev["maxInputChannels"] > 0 + is_output = dev["maxOutputChannels"] > 0 + + if is_input: + result.append(AudioDeviceInfo( + index=i, + name=dev["name"], + is_input=True, + is_loopback=False, + channels=dev["maxInputChannels"], + default_samplerate=dev["defaultSampleRate"], + )) + + if is_output: + result.append(AudioDeviceInfo( + index=i, + name=f"{dev['name']} [Loopback]", + is_input=False, + is_loopback=True, + channels=dev["maxOutputChannels"], + default_samplerate=dev["defaultSampleRate"], + )) + + return result + + except Exception as e: + logger.error(f"Failed to enumerate WASAPI devices: {e}", exc_info=True) + return [] + finally: + if pa is not None: + try: + pa.terminate() + except Exception: + pass + + @classmethod + def create_stream( + cls, + device_index: int, + is_loopback: bool, + config: Dict[str, Any], + ) -> WasapiCaptureStream: + merged = {**cls.get_default_config(), **config} + return WasapiCaptureStream(device_index, is_loopback, merged) diff --git a/server/src/wled_controller/core/processing/audio_stream.py b/server/src/wled_controller/core/processing/audio_stream.py index 93f26ef..2ff0fb8 100644 --- a/server/src/wled_controller/core/processing/audio_stream.py +++ b/server/src/wled_controller/core/processing/audio_stream.py @@ -15,7 +15,8 @@ from typing import Optional import numpy as np -from wled_controller.core.audio.audio_capture import AudioCaptureManager, NUM_BANDS +from wled_controller.core.audio.analysis import NUM_BANDS +from wled_controller.core.audio.audio_capture import AudioCaptureManager from wled_controller.core.processing.color_strip_stream import ColorStripStream from wled_controller.core.processing.effect_stream import _build_palette_lut from wled_controller.utils import get_logger @@ -35,9 +36,10 @@ class AudioColorStripStream(ColorStripStream): thread, double-buffered output, configure() for auto-sizing. """ - def __init__(self, source, audio_capture_manager: AudioCaptureManager, audio_source_store=None): + def __init__(self, source, audio_capture_manager: AudioCaptureManager, audio_source_store=None, audio_template_store=None): self._audio_capture_manager = audio_capture_manager self._audio_source_store = audio_source_store + self._audio_template_store = audio_template_store self._audio_stream = None # acquired on start self._colors_lock = threading.Lock() @@ -74,15 +76,26 @@ class AudioColorStripStream(ColorStripStream): self._led_count = source.led_count if source.led_count and source.led_count > 0 else 1 self._mirror = bool(getattr(source, "mirror", False)) - # Resolve audio device/channel via audio_source_id + # Resolve audio device/channel/template via audio_source_id audio_source_id = getattr(source, "audio_source_id", "") self._audio_source_id = audio_source_id + self._audio_engine_type = None + self._audio_engine_config = None if audio_source_id and self._audio_source_store: try: - device_index, is_loopback, channel = self._audio_source_store.resolve_audio_source(audio_source_id) + device_index, is_loopback, channel, template_id = ( + self._audio_source_store.resolve_audio_source(audio_source_id) + ) self._audio_device_index = device_index self._audio_loopback = is_loopback self._audio_channel = channel + if template_id and self._audio_template_store: + try: + tpl = self._audio_template_store.get_template(template_id) + self._audio_engine_type = tpl.engine_type + self._audio_engine_config = tpl.engine_config + except ValueError: + pass except ValueError as e: logger.warning(f"Failed to resolve audio source {audio_source_id}: {e}") self._audio_device_index = -1 @@ -121,7 +134,9 @@ class AudioColorStripStream(ColorStripStream): return # Acquire shared audio capture stream self._audio_stream = self._audio_capture_manager.acquire( - self._audio_device_index, self._audio_loopback + self._audio_device_index, self._audio_loopback, + engine_type=self._audio_engine_type, + engine_config=self._audio_engine_config, ) self._running = True self._thread = threading.Thread( @@ -132,6 +147,7 @@ class AudioColorStripStream(ColorStripStream): self._thread.start() logger.info( f"AudioColorStripStream started (viz={self._visualization_mode}, " + f"engine={self._audio_engine_type}, " f"device={self._audio_device_index}, loopback={self._audio_loopback})" ) @@ -144,7 +160,10 @@ class AudioColorStripStream(ColorStripStream): self._thread = None # Release shared audio capture if self._audio_stream is not None: - self._audio_capture_manager.release(self._audio_device_index, self._audio_loopback) + self._audio_capture_manager.release( + self._audio_device_index, self._audio_loopback, + engine_type=self._audio_engine_type, + ) self._audio_stream = None self._prev_spectrum = None logger.info("AudioColorStripStream stopped") @@ -161,20 +180,31 @@ class AudioColorStripStream(ColorStripStream): if isinstance(source, AudioColorStripSource): old_device = self._audio_device_index old_loopback = self._audio_loopback + old_engine_type = self._audio_engine_type prev_led_count = self._led_count if self._auto_size else None self._update_from_source(source) if prev_led_count and self._auto_size: self._led_count = prev_led_count - # If audio device changed, swap capture stream - if self._running and (self._audio_device_index != old_device or self._audio_loopback != old_loopback): - self._audio_capture_manager.release(old_device, old_loopback) + # If audio device or engine changed, swap capture stream + needs_swap = ( + self._audio_device_index != old_device + or self._audio_loopback != old_loopback + or self._audio_engine_type != old_engine_type + ) + if self._running and needs_swap: + self._audio_capture_manager.release( + old_device, old_loopback, engine_type=old_engine_type, + ) self._audio_stream = self._audio_capture_manager.acquire( - self._audio_device_index, self._audio_loopback + self._audio_device_index, self._audio_loopback, + engine_type=self._audio_engine_type, + engine_config=self._audio_engine_config, ) logger.info( f"AudioColorStripStream swapped audio device: " - f"{old_device}:{old_loopback} → {self._audio_device_index}:{self._audio_loopback}" + f"{old_engine_type}:{old_device}:{old_loopback} → " + f"{self._audio_engine_type}:{self._audio_device_index}:{self._audio_loopback}" ) logger.info("AudioColorStripStream params updated in-place") diff --git a/server/src/wled_controller/core/processing/color_strip_stream_manager.py b/server/src/wled_controller/core/processing/color_strip_stream_manager.py index 2e6f7ce..0738a2a 100644 --- a/server/src/wled_controller/core/processing/color_strip_stream_manager.py +++ b/server/src/wled_controller/core/processing/color_strip_stream_manager.py @@ -58,7 +58,7 @@ class ColorStripStreamManager: keyed by ``{css_id}:{consumer_id}``. """ - def __init__(self, color_strip_store, live_stream_manager, audio_capture_manager=None, audio_source_store=None): + def __init__(self, color_strip_store, live_stream_manager, audio_capture_manager=None, audio_source_store=None, audio_template_store=None): """ Args: color_strip_store: ColorStripStore for resolving source configs @@ -70,6 +70,7 @@ class ColorStripStreamManager: self._live_stream_manager = live_stream_manager self._audio_capture_manager = audio_capture_manager self._audio_source_store = audio_source_store + self._audio_template_store = audio_template_store self._streams: Dict[str, _ColorStripEntry] = {} def _resolve_key(self, css_id: str, consumer_id: str) -> str: @@ -108,7 +109,7 @@ class ColorStripStreamManager: if not source.sharable: if source.source_type == "audio": from wled_controller.core.processing.audio_stream import AudioColorStripStream - css_stream = AudioColorStripStream(source, self._audio_capture_manager, self._audio_source_store) + css_stream = AudioColorStripStream(source, self._audio_capture_manager, self._audio_source_store, self._audio_template_store) elif source.source_type == "composite": from wled_controller.core.processing.composite_stream import CompositeColorStripStream css_stream = CompositeColorStripStream(source, self) diff --git a/server/src/wled_controller/core/processing/processor_manager.py b/server/src/wled_controller/core/processing/processor_manager.py index 6772597..b55d932 100644 --- a/server/src/wled_controller/core/processing/processor_manager.py +++ b/server/src/wled_controller/core/processing/processor_manager.py @@ -66,7 +66,7 @@ class ProcessorManager: Targets are registered for processing via polymorphic TargetProcessor subclasses. """ - def __init__(self, picture_source_store=None, capture_template_store=None, pp_template_store=None, pattern_template_store=None, device_store=None, color_strip_store=None, audio_source_store=None, value_source_store=None): + def __init__(self, picture_source_store=None, capture_template_store=None, pp_template_store=None, pattern_template_store=None, device_store=None, color_strip_store=None, audio_source_store=None, value_source_store=None, audio_template_store=None): """Initialize processor manager.""" self._devices: Dict[str, DeviceState] = {} self._processors: Dict[str, TargetProcessor] = {} @@ -80,6 +80,7 @@ class ProcessorManager: self._device_store = device_store self._color_strip_store = color_strip_store self._audio_source_store = audio_source_store + self._audio_template_store = audio_template_store self._value_source_store = value_source_store self._live_stream_manager = LiveStreamManager( picture_source_store, capture_template_store, pp_template_store @@ -90,12 +91,14 @@ class ProcessorManager: live_stream_manager=self._live_stream_manager, audio_capture_manager=self._audio_capture_manager, audio_source_store=audio_source_store, + audio_template_store=audio_template_store, ) self._value_stream_manager = ValueStreamManager( value_source_store=value_source_store, audio_capture_manager=self._audio_capture_manager, audio_source_store=audio_source_store, live_stream_manager=self._live_stream_manager, + audio_template_store=audio_template_store, ) if value_source_store else None self._overlay_manager = OverlayManager() self._event_queues: List[asyncio.Queue] = [] diff --git a/server/src/wled_controller/core/processing/value_stream.py b/server/src/wled_controller/core/processing/value_stream.py index c4c5245..c5811ab 100644 --- a/server/src/wled_controller/core/processing/value_stream.py +++ b/server/src/wled_controller/core/processing/value_stream.py @@ -160,6 +160,7 @@ class AudioValueStream(ValueStream): max_value: float = 1.0, audio_capture_manager: Optional["AudioCaptureManager"] = None, audio_source_store: Optional["AudioSourceStore"] = None, + audio_template_store=None, ): self._audio_source_id = audio_source_id self._mode = mode @@ -169,11 +170,14 @@ class AudioValueStream(ValueStream): self._max = max_value self._audio_capture_manager = audio_capture_manager self._audio_source_store = audio_source_store + self._audio_template_store = audio_template_store # Resolved audio device params self._audio_device_index = -1 self._audio_loopback = True self._audio_channel = "mono" + self._audio_engine_type = None + self._audio_engine_config = None self._audio_stream = None self._prev_value = 0.0 @@ -182,15 +186,22 @@ class AudioValueStream(ValueStream): self._resolve_audio_source() def _resolve_audio_source(self) -> None: - """Resolve audio source (mono or multichannel) to device index / channel.""" + """Resolve audio source to device index / channel / engine info.""" if self._audio_source_id and self._audio_source_store: try: - device_index, is_loopback, channel = ( + device_index, is_loopback, channel, template_id = ( self._audio_source_store.resolve_audio_source(self._audio_source_id) ) self._audio_device_index = device_index self._audio_loopback = is_loopback self._audio_channel = channel + if template_id and self._audio_template_store: + try: + tpl = self._audio_template_store.get_template(template_id) + self._audio_engine_type = tpl.engine_type + self._audio_engine_config = tpl.engine_config + except ValueError: + pass except ValueError as e: logger.warning(f"Failed to resolve audio source {self._audio_source_id}: {e}") @@ -198,16 +209,21 @@ class AudioValueStream(ValueStream): if self._audio_capture_manager is None: return self._audio_stream = self._audio_capture_manager.acquire( - self._audio_device_index, self._audio_loopback + self._audio_device_index, self._audio_loopback, + engine_type=self._audio_engine_type, + engine_config=self._audio_engine_config, ) logger.info( - f"AudioValueStream started (mode={self._mode}, " + f"AudioValueStream started (mode={self._mode}, engine={self._audio_engine_type}, " f"device={self._audio_device_index}, loopback={self._audio_loopback})" ) def stop(self) -> None: if self._audio_stream is not None and self._audio_capture_manager is not None: - self._audio_capture_manager.release(self._audio_device_index, self._audio_loopback) + self._audio_capture_manager.release( + self._audio_device_index, self._audio_loopback, + engine_type=self._audio_engine_type, + ) self._audio_stream = None self._prev_value = 0.0 self._beat_brightness = 0.0 @@ -279,16 +295,21 @@ class AudioValueStream(ValueStream): if source.audio_source_id != old_source_id: old_device = self._audio_device_index old_loopback = self._audio_loopback + old_engine_type = self._audio_engine_type self._resolve_audio_source() if self._audio_stream is not None and self._audio_capture_manager is not None: - self._audio_capture_manager.release(old_device, old_loopback) + self._audio_capture_manager.release( + old_device, old_loopback, engine_type=old_engine_type, + ) self._audio_stream = self._audio_capture_manager.acquire( - self._audio_device_index, self._audio_loopback + self._audio_device_index, self._audio_loopback, + engine_type=self._audio_engine_type, + engine_config=self._audio_engine_config, ) logger.info( f"AudioValueStream swapped audio device: " - f"{old_device}:{old_loopback} → " - f"{self._audio_device_index}:{self._audio_loopback}" + f"{old_engine_type}:{old_device}:{old_loopback} → " + f"{self._audio_engine_type}:{self._audio_device_index}:{self._audio_loopback}" ) @@ -521,11 +542,13 @@ class ValueStreamManager: audio_capture_manager: Optional["AudioCaptureManager"] = None, audio_source_store: Optional["AudioSourceStore"] = None, live_stream_manager: Optional["LiveStreamManager"] = None, + audio_template_store=None, ): self._value_source_store = value_source_store self._audio_capture_manager = audio_capture_manager self._audio_source_store = audio_source_store self._live_stream_manager = live_stream_manager + self._audio_template_store = audio_template_store self._streams: Dict[str, ValueStream] = {} def acquire(self, vs_id: str, consumer_id: str) -> ValueStream: @@ -611,6 +634,7 @@ class ValueStreamManager: max_value=source.max_value, audio_capture_manager=self._audio_capture_manager, audio_source_store=self._audio_source_store, + audio_template_store=self._audio_template_store, ) if isinstance(source, AdaptiveValueSource): diff --git a/server/src/wled_controller/main.py b/server/src/wled_controller/main.py index 1157a63..4d774b8 100644 --- a/server/src/wled_controller/main.py +++ b/server/src/wled_controller/main.py @@ -24,6 +24,8 @@ from wled_controller.storage.picture_source_store import PictureSourceStore from wled_controller.storage.picture_target_store import PictureTargetStore from wled_controller.storage.color_strip_store import ColorStripStore from wled_controller.storage.audio_source_store import AudioSourceStore +from wled_controller.storage.audio_template_store import AudioTemplateStore +import wled_controller.core.audio # noqa: F401 — trigger engine auto-registration from wled_controller.storage.value_source_store import ValueSourceStore from wled_controller.storage.profile_store import ProfileStore from wled_controller.core.profiles.profile_engine import ProfileEngine @@ -45,11 +47,14 @@ picture_target_store = PictureTargetStore(config.storage.picture_targets_file) pattern_template_store = PatternTemplateStore(config.storage.pattern_templates_file) color_strip_store = ColorStripStore(config.storage.color_strip_sources_file) audio_source_store = AudioSourceStore(config.storage.audio_sources_file) +audio_template_store = AudioTemplateStore(config.storage.audio_templates_file) value_source_store = ValueSourceStore(config.storage.value_sources_file) profile_store = ProfileStore(config.storage.profiles_file) # Migrate embedded audio config from CSS entities to audio sources audio_source_store.migrate_from_css(color_strip_store) +# Assign default audio template to multichannel sources that have none +audio_source_store.migrate_add_default_template(audio_template_store) processor_manager = ProcessorManager( picture_source_store=picture_source_store, @@ -60,6 +65,7 @@ processor_manager = ProcessorManager( color_strip_store=color_strip_store, audio_source_store=audio_source_store, value_source_store=value_source_store, + audio_template_store=audio_template_store, ) @@ -104,6 +110,7 @@ async def lifespan(app: FastAPI): picture_target_store=picture_target_store, color_strip_store=color_strip_store, audio_source_store=audio_source_store, + audio_template_store=audio_template_store, value_source_store=value_source_store, profile_store=profile_store, profile_engine=profile_engine, diff --git a/server/src/wled_controller/static/css/modal.css b/server/src/wled_controller/static/css/modal.css index cb70ba0..262cc95 100644 --- a/server/src/wled_controller/static/css/modal.css +++ b/server/src/wled_controller/static/css/modal.css @@ -13,6 +13,11 @@ animation: fadeIn 0.2s ease-out; } +/* Confirm dialog must stack above all other modals */ +#confirm-modal { + z-index: 2500; +} + @keyframes fadeIn { from { opacity: 0; } to { opacity: 1; } diff --git a/server/src/wled_controller/static/js/app.js b/server/src/wled_controller/static/js/app.js index 88272d8..282f496 100644 --- a/server/src/wled_controller/static/js/app.js +++ b/server/src/wled_controller/static/js/app.js @@ -46,6 +46,8 @@ import { loadPictureSources, switchStreamTab, showAddTemplateModal, editTemplate, closeTemplateModal, saveTemplate, deleteTemplate, showTestTemplateModal, closeTestTemplateModal, onEngineChange, runTemplateTest, + showAddAudioTemplateModal, editAudioTemplate, closeAudioTemplateModal, saveAudioTemplate, deleteAudioTemplate, + cloneAudioTemplate, onAudioEngineChange, showAddStreamModal, editStream, closeStreamModal, saveStream, deleteStream, onStreamTypeChange, onStreamDisplaySelected, onTestDisplaySelected, showTestStreamModal, closeTestStreamModal, updateStreamTestDuration, runStreamTest, @@ -238,6 +240,13 @@ Object.assign(window, { cloneStream, cloneCaptureTemplate, clonePPTemplate, + showAddAudioTemplateModal, + editAudioTemplate, + closeAudioTemplateModal, + saveAudioTemplate, + deleteAudioTemplate, + cloneAudioTemplate, + onAudioEngineChange, // kc-targets createKCTargetCard, diff --git a/server/src/wled_controller/static/js/core/icons.js b/server/src/wled_controller/static/js/core/icons.js index 882e19e..21d866d 100644 --- a/server/src/wled_controller/static/js/core/icons.js +++ b/server/src/wled_controller/static/js/core/icons.js @@ -67,6 +67,7 @@ export const ICON_TEMPLATE = '\uD83D\uDCCB'; // 📋 (generic card head export const ICON_CAPTURE_TEMPLATE = '\uD83D\uDCF7'; // 📷 export const ICON_PP_TEMPLATE = '\uD83D\uDD27'; // 🔧 export const ICON_PATTERN_TEMPLATE = '\uD83D\uDCC4'; // 📄 +export const ICON_AUDIO_TEMPLATE = '\uD83C\uDFB5'; // 🎵 // ── Action constants ──────────────────────────────────────── diff --git a/server/src/wled_controller/static/js/core/state.js b/server/src/wled_controller/static/js/core/state.js index 3a613a4..5d01e0a 100644 --- a/server/src/wled_controller/static/js/core/state.js +++ b/server/src/wled_controller/static/js/core/state.js @@ -178,6 +178,19 @@ export const PATTERN_RECT_BORDERS = [ export let _cachedAudioSources = []; export function set_cachedAudioSources(v) { _cachedAudioSources = v; } +// Audio templates +export let _cachedAudioTemplates = []; +export function set_cachedAudioTemplates(v) { _cachedAudioTemplates = v; } + +export let availableAudioEngines = []; +export function setAvailableAudioEngines(v) { availableAudioEngines = v; } + +export let currentEditingAudioTemplateId = null; +export function setCurrentEditingAudioTemplateId(v) { currentEditingAudioTemplateId = v; } + +export let _audioTemplateNameManuallyEdited = false; +export function set_audioTemplateNameManuallyEdited(v) { _audioTemplateNameManuallyEdited = v; } + // Value sources export let _cachedValueSources = []; export function set_cachedValueSources(v) { _cachedValueSources = v; } diff --git a/server/src/wled_controller/static/js/features/audio-sources.js b/server/src/wled_controller/static/js/features/audio-sources.js index 1a9f198..61fa156 100644 --- a/server/src/wled_controller/static/js/features/audio-sources.js +++ b/server/src/wled_controller/static/js/features/audio-sources.js @@ -10,7 +10,7 @@ * This module manages the editor modal and API operations. */ -import { _cachedAudioSources, set_cachedAudioSources } from '../core/state.js'; +import { _cachedAudioSources, set_cachedAudioSources, _cachedAudioTemplates } from '../core/state.js'; import { fetchWithAuth, escapeHtml } from '../core/api.js'; import { t } from '../core/i18n.js'; import { showToast, showConfirm } from '../core/ui.js'; @@ -26,6 +26,7 @@ class AudioSourceModal extends Modal { description: document.getElementById('audio-source-description').value, type: document.getElementById('audio-source-type').value, device: document.getElementById('audio-source-device').value, + audioTemplate: document.getElementById('audio-source-audio-template').value, parent: document.getElementById('audio-source-parent').value, channel: document.getElementById('audio-source-channel').value, }; @@ -57,6 +58,7 @@ export async function showAudioSourceModal(sourceType, editData) { document.getElementById('audio-source-description').value = editData.description || ''; if (editData.source_type === 'multichannel') { + _loadAudioTemplates(editData.audio_template_id); await _loadAudioDevices(); _selectAudioDevice(editData.device_index, editData.is_loopback); } else { @@ -68,6 +70,7 @@ export async function showAudioSourceModal(sourceType, editData) { document.getElementById('audio-source-description').value = ''; if (sourceType === 'multichannel') { + _loadAudioTemplates(); await _loadAudioDevices(); } else { _loadMultichannelSources(); @@ -110,6 +113,7 @@ export async function saveAudioSource() { const [devIdx, devLoop] = deviceVal.split(':'); payload.device_index = parseInt(devIdx) || -1; payload.is_loopback = devLoop !== '0'; + payload.audio_template_id = document.getElementById('audio-source-audio-template').value || null; } else { payload.audio_source_id = document.getElementById('audio-source-parent').value; payload.channel = document.getElementById('audio-source-channel').value; @@ -223,3 +227,12 @@ function _loadMultichannelSources(selectedId) { `` ).join(''); } + +function _loadAudioTemplates(selectedId) { + const select = document.getElementById('audio-source-audio-template'); + if (!select) return; + const templates = _cachedAudioTemplates || []; + select.innerHTML = templates.map(t => + `` + ).join(''); +} diff --git a/server/src/wled_controller/static/js/features/streams.js b/server/src/wled_controller/static/js/features/streams.js index bda9291..fd9674f 100644 --- a/server/src/wled_controller/static/js/features/streams.js +++ b/server/src/wled_controller/static/js/features/streams.js @@ -20,6 +20,10 @@ import { _lastValidatedImageSource, set_lastValidatedImageSource, _cachedAudioSources, set_cachedAudioSources, _cachedValueSources, set_cachedValueSources, + _cachedAudioTemplates, set_cachedAudioTemplates, + availableAudioEngines, setAvailableAudioEngines, + currentEditingAudioTemplateId, setCurrentEditingAudioTemplateId, + _audioTemplateNameManuallyEdited, set_audioTemplateNameManuallyEdited, _sourcesLoading, set_sourcesLoading, apiKey, } from '../core/state.js'; @@ -35,6 +39,7 @@ import { getEngineIcon, getPictureSourceIcon, getAudioSourceIcon, ICON_TEMPLATE, ICON_CLONE, ICON_EDIT, ICON_TEST, ICON_LINK_SOURCE, ICON_FPS, ICON_WEB, ICON_VALUE_SOURCE, ICON_AUDIO_LOOPBACK, ICON_AUDIO_INPUT, + ICON_AUDIO_TEMPLATE, } from '../core/icons.js'; // ── Card section instances ── @@ -45,6 +50,7 @@ const csProcTemplates = new CardSection('proc-templates', { titleKey: 'postproce const csAudioMulti = new CardSection('audio-multi', { titleKey: 'audio_source.group.multichannel', gridClass: 'templates-grid', addCardOnclick: "showAudioSourceModal('multichannel')" }); const csAudioMono = new CardSection('audio-mono', { titleKey: 'audio_source.group.mono', gridClass: 'templates-grid', addCardOnclick: "showAudioSourceModal('mono')" }); const csStaticStreams = new CardSection('static-streams', { titleKey: 'streams.group.static_image', gridClass: 'templates-grid', addCardOnclick: "showAddStreamModal('static_image')" }); +const csAudioTemplates = new CardSection('audio-templates', { titleKey: 'audio_template.title', gridClass: 'templates-grid', addCardOnclick: "showAddAudioTemplateModal()" }); const csValueSources = new CardSection('value-sources', { titleKey: 'value_source.group.title', gridClass: 'templates-grid', addCardOnclick: "showValueSourceModal()" }); // Re-render picture sources when language changes @@ -113,12 +119,34 @@ class PPTemplateEditorModal extends Modal { } } +class AudioTemplateModal extends Modal { + constructor() { super('audio-template-modal'); } + + snapshotValues() { + const vals = { + name: document.getElementById('audio-template-name').value, + description: document.getElementById('audio-template-description').value, + engine: document.getElementById('audio-template-engine').value, + }; + document.querySelectorAll('#audio-engine-config-fields [data-config-key]').forEach(field => { + vals['cfg_' + field.dataset.configKey] = field.value; + }); + return vals; + } + + onForceClose() { + setCurrentEditingAudioTemplateId(null); + set_audioTemplateNameManuallyEdited(false); + } +} + const templateModal = new CaptureTemplateModal(); const testTemplateModal = new Modal('test-template-modal'); const streamModal = new StreamEditorModal(); const testStreamModal = new Modal('test-stream-modal'); const ppTemplateModal = new PPTemplateEditorModal(); const testPPTemplateModal = new Modal('test-pp-template-modal'); +const audioTemplateModal = new AudioTemplateModal(); // ===== Capture Templates ===== @@ -511,6 +539,261 @@ export async function deleteTemplate(templateId) { } } +// ===== Audio Templates ===== + +async function loadAvailableAudioEngines() { + try { + const response = await fetchWithAuth('/audio-engines'); + if (!response.ok) throw new Error(`Failed to load audio engines: ${response.status}`); + const data = await response.json(); + setAvailableAudioEngines(data.engines || []); + + const select = document.getElementById('audio-template-engine'); + select.innerHTML = ''; + + availableAudioEngines.forEach(engine => { + const option = document.createElement('option'); + option.value = engine.type; + option.textContent = `${engine.type.toUpperCase()}`; + if (!engine.available) { + option.disabled = true; + option.textContent += ` (${t('audio_template.engine.unavailable')})`; + } + select.appendChild(option); + }); + + if (!select.value) { + const firstAvailable = availableAudioEngines.find(e => e.available); + if (firstAvailable) select.value = firstAvailable.type; + } + } catch (error) { + console.error('Error loading audio engines:', error); + showToast(t('audio_template.error.engines') + ': ' + error.message, 'error'); + } +} + +export async function onAudioEngineChange() { + const engineType = document.getElementById('audio-template-engine').value; + const configSection = document.getElementById('audio-engine-config-section'); + const configFields = document.getElementById('audio-engine-config-fields'); + + if (!engineType) { configSection.style.display = 'none'; return; } + + const engine = availableAudioEngines.find(e => e.type === engineType); + if (!engine) { configSection.style.display = 'none'; return; } + + if (!_audioTemplateNameManuallyEdited && !document.getElementById('audio-template-id').value) { + document.getElementById('audio-template-name').value = engine.type.toUpperCase(); + } + + const hint = document.getElementById('audio-engine-availability-hint'); + if (!engine.available) { + hint.textContent = t('audio_template.engine.unavailable.hint'); + hint.style.display = 'block'; + hint.style.color = 'var(--error-color)'; + } else { + hint.style.display = 'none'; + } + + configFields.innerHTML = ''; + const defaultConfig = engine.default_config || {}; + + if (Object.keys(defaultConfig).length === 0) { + configSection.style.display = 'none'; + return; + } else { + let gridHtml = '
| ${escapeHtml(key)} | +${escapeHtml(String(val))} | +