bcc6d40ed7
Lint & Test / test (push) Successful in 20s
Security
- Default bind 127.0.0.1; first-run bootstrap generates random api_token
and refuses to bind non-loopback without auth unless explicitly opted in
- Path-traversal hardened: BrowserService.validate_path rejects absolute
paths, drive letters, UNC, NUL bytes. /api/browser/{play,metadata,
thumbnail} now require folder_id and a folder-relative path
- Pydantic validators on links: http(s) URLs only, mdi:<slug> icons only
- Scripts/callbacks/links create/update/delete gated by *_management flags
- Strict CSP, X-Frame-Options DENY, Referrer-Policy no-referrer,
X-Content-Type-Options nosniff
- CORS locked to localhost:<port> + 127.0.0.1:<port> by default; configurable
- config.yaml writes atomic (tmp + os.replace) and 0o600 on POSIX
- Subprocesses spawned in their own process group / new session so timeout
kills the whole tree (Windows CREATE_NEW_PROCESS_GROUP, POSIX
start_new_session=True)
- Frontend XSS: monitor name + details escapeHtml'd; power button moved to
delegated data-action handler; remote MDI SVGs parsed and sanitized
(strip script/foreignObject/on*/javascript: hrefs) before innerHTML
- All dynamic URL segments now wrapped in encodeURIComponent
Bugs
- WebSocket reconnect: close previous socket before opening new, clear
ping interval per-socket, clear reconnectTimeout up-front, retry on
online/visibilitychange, try/catch JSON.parse
- Artwork fetch race: AbortController + generation guard
- _broadcast_after_open: initialize status, swallow per-poll errors,
background tasks tracked in a strong-ref set with done-callback cleanup
- Audio analyzer: sticky _unavailable flag prevents infinite start/stop
spin when no loopback device exists; cleared by set_device()
- Volume short-circuit cache invalidated when server reports remote volume
- Browser thumbnail race: per-folder generation counter + isConnected
checks; aborts in-flight fetches on navigation
- Track-skip uses cached title instead of full WinRT status round-trip
Performance
- Linux MPRIS/pactl and /api/display DDC-CI handlers wrapped in
asyncio.to_thread so blocking IO never stalls the event loop
- browse_directory moved off the event loop (SMB shares could freeze it)
- Windows status poll caches one asyncio loop per worker thread via
threading.local instead of new_event_loop/close on every 0.5s tick
- broadcast() serializes JSON once and uses send_text to all clients
- Hourly thumbnail cache cleanup scheduled in lifespan (was never invoked
— cache grew unbounded)
- Progress drag listeners attached only while dragging
Quality
- All asyncio.get_event_loop() in coroutines → get_running_loop()
- ThreadPoolExecutors shut down cleanly during lifespan teardown
- config_manager dedup: 12 near-identical methods collapsed onto generic
_upsert/_delete helpers (~290 lines removed)
- Service worker no longer pass-throughs every fetch
- M3U playlist written via NamedTemporaryFile (no fixed-path symlink
clobber race)
- __version__ now prefers live pyproject.toml in dev checkouts so
pip install -e . users see the source-of-truth version, not the stale
package-metadata version baked in at install time
UI/UX (Studio Reference)
- Green leftover focus rings (rgba(29,185,84,...)) all replaced with
copper accent (rgba(var(--copper-rgb),...))
- Dialogs: square corners, copper top hairline, unified with editorial
chrome
- .browser-item: transparent with copper hover border (was filled card)
- Audio device select uses var(--sans) instead of generic system font
- Mobile container padding tuned for ≤480px screens
- Breadcrumb home is a real <button> with aria-label; aria-current on root
- i18n: filled display.msg.power_*, execution.*, scripts.params.execute,
callbacks.empty in both en + ru
426 lines
16 KiB
Python
426 lines
16 KiB
Python
"""Audio spectrum analyzer service using system loopback capture."""
|
|
|
|
import logging
|
|
import math
|
|
import platform
|
|
import threading
|
|
import time
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_np = None
|
|
_sc = None
|
|
|
|
|
|
def _load_numpy():
|
|
global _np
|
|
if _np is None:
|
|
try:
|
|
import os
|
|
import sys
|
|
if sys.platform == 'win32':
|
|
# Embedded Python doesn't auto-load DLLs from numpy.libs;
|
|
# add the directory explicitly so libopenblas can be found.
|
|
try:
|
|
import importlib.util
|
|
spec = importlib.util.find_spec('numpy')
|
|
if spec and spec.submodule_search_locations:
|
|
numpy_dir = list(spec.submodule_search_locations)[0]
|
|
libs_dir = os.path.join(os.path.dirname(numpy_dir), 'numpy.libs')
|
|
if os.path.isdir(libs_dir):
|
|
os.add_dll_directory(libs_dir)
|
|
except Exception:
|
|
pass
|
|
import numpy as np
|
|
_np = np
|
|
except Exception as e:
|
|
logger.warning("numpy unavailable - audio visualizer disabled: %s", e)
|
|
return _np
|
|
|
|
|
|
def _load_soundcard():
|
|
global _sc
|
|
if _sc is None:
|
|
try:
|
|
import soundcard as sc
|
|
_sc = sc
|
|
except Exception as e:
|
|
logger.warning("soundcard unavailable - audio visualizer disabled: %s", e)
|
|
return _sc
|
|
|
|
|
|
class AudioAnalyzer:
|
|
"""Captures system audio loopback and performs real-time FFT analysis."""
|
|
|
|
def __init__(
|
|
self,
|
|
num_bins: int = 32,
|
|
sample_rate: int = 44100,
|
|
chunk_size: int = 1024,
|
|
target_fps: int = 30,
|
|
device_name: str | None = None,
|
|
):
|
|
self.num_bins = num_bins
|
|
self.sample_rate = sample_rate
|
|
self.chunk_size = chunk_size
|
|
self.target_fps = target_fps
|
|
self.device_name = device_name
|
|
|
|
self._running = False
|
|
self._thread: threading.Thread | None = None
|
|
self._lock = threading.Lock()
|
|
self._lifecycle_lock = threading.Lock()
|
|
self._data: dict | None = None
|
|
self._current_device_name: str | None = None
|
|
# Sticky "no usable device" flag — flipped to True if a capture
|
|
# attempt fails because no loopback device exists. Prevents the
|
|
# WebSocket manager from looping on start()/stop()/start() forever
|
|
# when there's nothing to capture. Cleared by set_device().
|
|
self._unavailable = False
|
|
# Generation counter — bumped each time _data is refreshed.
|
|
# Lets the broadcast loop dedupe without comparing dict identity
|
|
# (which is fragile because we always allocate a new dict).
|
|
self._data_seq = 0
|
|
# Threading.Event signaled when new frame data is available.
|
|
# The broadcast loop awaits this instead of polling on a timer,
|
|
# so it wakes up exactly once per produced frame.
|
|
self._data_event = threading.Event()
|
|
# Slow AGC envelope so the spectrum reflects real dynamics
|
|
# instead of being renormalized to peak=1.0 every frame.
|
|
# A loud transient (e.g. notification beep) lifts the reference
|
|
# for a few seconds afterwards; this is the price of real loudness.
|
|
self._spectrum_ref = 0.01
|
|
|
|
# Pre-compute logarithmic bin edges
|
|
self._bin_edges = self._compute_bin_edges()
|
|
|
|
def _compute_bin_edges(self) -> list[int]:
|
|
"""Compute logarithmic frequency bin boundaries for perceptual grouping."""
|
|
np = _load_numpy()
|
|
if np is None:
|
|
return []
|
|
|
|
fft_size = self.chunk_size // 2 + 1
|
|
min_freq = 20.0
|
|
max_freq = min(16000.0, self.sample_rate / 2)
|
|
|
|
edges = []
|
|
for i in range(self.num_bins + 1):
|
|
freq = min_freq * (max_freq / min_freq) ** (i / self.num_bins)
|
|
bin_idx = int(freq * self.chunk_size / self.sample_rate)
|
|
edges.append(min(bin_idx, fft_size - 1))
|
|
return edges
|
|
|
|
@property
|
|
def available(self) -> bool:
|
|
"""Whether audio capture dependencies are available."""
|
|
return _load_numpy() is not None and _load_soundcard() is not None
|
|
|
|
@property
|
|
def running(self) -> bool:
|
|
"""Whether capture is currently active."""
|
|
return self._running
|
|
|
|
def start(self) -> bool:
|
|
"""Start audio capture in a background thread. Returns False if unavailable."""
|
|
with self._lifecycle_lock:
|
|
if self._running:
|
|
return True
|
|
if not self.available:
|
|
return False
|
|
if self._unavailable:
|
|
# We already tried and failed to acquire a device. Don't
|
|
# spin a new capture thread for each new subscriber.
|
|
return False
|
|
|
|
# Reset AGC envelope so a long silent gap between sessions
|
|
# doesn't make the first new transients clip at the ceiling.
|
|
self._spectrum_ref = 0.01
|
|
|
|
self._running = True
|
|
self._thread = threading.Thread(target=self._capture_loop, daemon=True)
|
|
self._thread.start()
|
|
return True
|
|
|
|
def stop(self) -> None:
|
|
"""Stop audio capture and cleanup."""
|
|
with self._lifecycle_lock:
|
|
self._running = False
|
|
# Wake any waiter so it can observe _running and exit cleanly.
|
|
self._data_event.set()
|
|
if self._thread:
|
|
self._thread.join(timeout=3.0)
|
|
self._thread = None
|
|
with self._lock:
|
|
self._data = None
|
|
self._data_event.clear()
|
|
|
|
def get_frequency_data(self) -> dict | None:
|
|
"""Return latest frequency data (thread-safe). None if not running."""
|
|
with self._lock:
|
|
return self._data
|
|
|
|
def get_frequency_data_versioned(self) -> tuple[dict | None, int]:
|
|
"""Return (data, seq) so callers can dedupe without identity tricks."""
|
|
with self._lock:
|
|
return self._data, self._data_seq
|
|
|
|
@property
|
|
def data_event(self) -> threading.Event:
|
|
"""Event signaled when a fresh frame is ready. Caller must clear()."""
|
|
return self._data_event
|
|
|
|
@staticmethod
|
|
def list_loopback_devices() -> list[dict[str, str]]:
|
|
"""List all available loopback audio devices."""
|
|
sc = _load_soundcard()
|
|
if sc is None:
|
|
return []
|
|
|
|
devices = []
|
|
try:
|
|
# COM may be needed on Windows for WASAPI
|
|
if platform.system() == "Windows":
|
|
try:
|
|
import comtypes
|
|
comtypes.CoInitializeEx(comtypes.COINIT_MULTITHREADED)
|
|
except Exception:
|
|
pass
|
|
|
|
loopback_mics = sc.all_microphones(include_loopback=True)
|
|
for mic in loopback_mics:
|
|
if mic.isloopback:
|
|
devices.append({"id": mic.id, "name": mic.name})
|
|
except Exception as e:
|
|
logger.warning("Failed to list loopback devices: %s", e)
|
|
|
|
return devices
|
|
|
|
def _find_loopback_device(self):
|
|
"""Find a loopback device for system audio capture."""
|
|
sc = _load_soundcard()
|
|
if sc is None:
|
|
return None
|
|
|
|
try:
|
|
loopback_mics = sc.all_microphones(include_loopback=True)
|
|
|
|
# If a specific device is requested, find it by name (partial match)
|
|
if self.device_name:
|
|
target = self.device_name.lower()
|
|
for mic in loopback_mics:
|
|
if mic.isloopback and target in mic.name.lower():
|
|
logger.info("Found requested loopback device: %s", mic.name)
|
|
self._current_device_name = mic.name
|
|
return mic
|
|
logger.warning("Requested device '%s' not found, falling back to default", self.device_name)
|
|
|
|
# Default: first loopback device
|
|
for mic in loopback_mics:
|
|
if mic.isloopback:
|
|
logger.info("Found loopback device: %s", mic.name)
|
|
self._current_device_name = mic.name
|
|
return mic
|
|
|
|
# Fallback: try to get default speaker's loopback
|
|
default_speaker = sc.default_speaker()
|
|
if default_speaker:
|
|
for mic in loopback_mics:
|
|
if default_speaker.name in mic.name:
|
|
logger.info("Found speaker loopback: %s", mic.name)
|
|
self._current_device_name = mic.name
|
|
return mic
|
|
|
|
except Exception as e:
|
|
logger.warning("Failed to find loopback device: %s", e)
|
|
|
|
return None
|
|
|
|
def set_device(self, device_name: str | None) -> bool:
|
|
"""Change the loopback device. Restarts capture if running. Returns True on success."""
|
|
was_running = self._running
|
|
if was_running:
|
|
self.stop()
|
|
|
|
self.device_name = device_name
|
|
self._current_device_name = None
|
|
# Clear the "no device" sticky flag — the user is asking for a
|
|
# different device so it's worth attempting capture again.
|
|
self._unavailable = False
|
|
|
|
if was_running:
|
|
return self.start()
|
|
return True
|
|
|
|
@property
|
|
def current_device(self) -> str | None:
|
|
"""Return the name of the currently active loopback device."""
|
|
return self._current_device_name
|
|
|
|
def _capture_loop(self) -> None:
|
|
"""Background thread: capture audio and compute FFT continuously."""
|
|
# Initialize COM on Windows (required for WASAPI/SoundCard)
|
|
if platform.system() == "Windows":
|
|
try:
|
|
import comtypes
|
|
comtypes.CoInitializeEx(comtypes.COINIT_MULTITHREADED)
|
|
except Exception:
|
|
try:
|
|
import ctypes
|
|
ctypes.windll.ole32.CoInitializeEx(0, 0)
|
|
except Exception as e:
|
|
logger.warning("Failed to initialize COM: %s", e)
|
|
|
|
np = _load_numpy()
|
|
sc = _load_soundcard()
|
|
if np is None or sc is None:
|
|
self._running = False
|
|
return
|
|
|
|
device = self._find_loopback_device()
|
|
if device is None:
|
|
logger.warning("No loopback audio device found - visualizer disabled")
|
|
self._running = False
|
|
self._unavailable = True
|
|
return
|
|
|
|
interval = 1.0 / self.target_fps
|
|
# Float32 window — matches soundcard's typical buffer dtype and
|
|
# halves FFT memory traffic vs. the default float64.
|
|
window = np.hanning(self.chunk_size).astype(np.float32)
|
|
|
|
# Pre-compute bin edge pairs for vectorized grouping
|
|
edges = self._bin_edges
|
|
bin_starts = np.array([edges[i] for i in range(self.num_bins)], dtype=np.intp)
|
|
bin_ends = np.array([max(edges[i + 1], edges[i] + 1) for i in range(self.num_bins)], dtype=np.intp)
|
|
# Counts are constant — compute once.
|
|
bin_counts = (bin_ends - bin_starts).astype(np.float32)
|
|
|
|
# Pre-allocate working buffers so the per-frame allocator churn
|
|
# on the capture thread (which runs at target_fps Hz, hours on
|
|
# end) drops to zero copies for these arrays.
|
|
fft_size = self.chunk_size // 2 + 1
|
|
windowed = np.empty(self.chunk_size, dtype=np.float32)
|
|
cumsum = np.empty(fft_size + 1, dtype=np.float32)
|
|
cumsum[0] = 0.0
|
|
|
|
try:
|
|
with device.recorder(
|
|
samplerate=self.sample_rate,
|
|
channels=1,
|
|
blocksize=self.chunk_size,
|
|
) as recorder:
|
|
logger.info("Audio capture started on: %s", device.name)
|
|
while self._running:
|
|
t0 = time.monotonic()
|
|
|
|
try:
|
|
data = recorder.record(numframes=self.chunk_size)
|
|
except Exception as e:
|
|
logger.debug("Audio capture read error: %s", e)
|
|
time.sleep(interval)
|
|
continue
|
|
|
|
# Mono mix if needed
|
|
if data.ndim > 1:
|
|
mono = data.mean(axis=1)
|
|
else:
|
|
mono = data.ravel()
|
|
|
|
if len(mono) < self.chunk_size:
|
|
time.sleep(interval)
|
|
continue
|
|
|
|
# Apply window in-place into the pre-allocated buffer.
|
|
np.multiply(mono[:self.chunk_size], window, out=windowed)
|
|
fft_mag = np.abs(np.fft.rfft(windowed))
|
|
|
|
# Group into logarithmic bins (vectorized via cumsum).
|
|
# Write into the pre-allocated [1:] slice so cumsum[0]
|
|
# stays 0.0 and we never allocate a new array.
|
|
np.cumsum(fft_mag, out=cumsum[1:])
|
|
bins = (cumsum[bin_ends] - cumsum[bin_starts]) / bin_counts
|
|
|
|
# True loudness from time-domain RMS via single BLAS
|
|
# dot — avoids astype() and ** allocations.
|
|
mono32 = mono if mono.dtype == np.float32 else mono.astype(np.float32, copy=False)
|
|
energy = float(np.dot(mono32, mono32))
|
|
if energy > 1e-12:
|
|
rms = (energy / mono32.size) ** 0.5
|
|
db = 20.0 * math.log10(rms)
|
|
# Map -60 dB..-6 dB to 0..1 (typical music range)
|
|
level = max(0.0, min(1.0, (db + 60.0) / 54.0))
|
|
else:
|
|
level = 0.0
|
|
|
|
# Slow auto-gain: envelope follower with fast attack,
|
|
# slow release. Quiet music yields small bars; loud
|
|
# passages reach the top; the reference adapts over
|
|
# seconds instead of resetting every frame.
|
|
current_peak = float(bins.max())
|
|
if current_peak > self._spectrum_ref:
|
|
self._spectrum_ref += (current_peak - self._spectrum_ref) * 0.05
|
|
else:
|
|
self._spectrum_ref += (current_peak - self._spectrum_ref) * 0.005
|
|
ref = max(self._spectrum_ref, 1e-4)
|
|
np.divide(bins, ref, out=bins)
|
|
np.clip(bins, 0.0, 1.5, out=bins)
|
|
|
|
# Bass energy: average of first 4 bins (~20-200Hz)
|
|
bass = float(bins[:4].mean()) if self.num_bins >= 4 else 0.0
|
|
|
|
# Quantize to 0..1000 ints — same wire fidelity as
|
|
# 3-decimal floats but smaller GC churn on both ends
|
|
# (frontend smooths anyway, so quantization is
|
|
# invisible). JSON encodes ints faster than floats.
|
|
frequencies = (bins * 1000.0).astype(np.int16).tolist()
|
|
bass_i = int(bass * 1000.0)
|
|
level_i = int(level * 1000.0)
|
|
|
|
new_data = {
|
|
"frequencies": frequencies,
|
|
"bass": bass_i,
|
|
"level": level_i,
|
|
# Wire-format flag: clients that see this know
|
|
# values are 0..1000 ints, not 0..1 floats.
|
|
"scale": 1000,
|
|
}
|
|
with self._lock:
|
|
self._data = new_data
|
|
self._data_seq += 1
|
|
# Wake any broadcast loop waiting on fresh data.
|
|
self._data_event.set()
|
|
|
|
# Throttle to target FPS
|
|
elapsed = time.monotonic() - t0
|
|
if elapsed < interval:
|
|
time.sleep(interval - elapsed)
|
|
|
|
except Exception as e:
|
|
logger.error("Audio capture loop error: %s", e)
|
|
finally:
|
|
self._running = False
|
|
logger.info("Audio capture stopped")
|
|
|
|
|
|
# Global singleton
|
|
_analyzer: AudioAnalyzer | None = None
|
|
|
|
|
|
def get_audio_analyzer(
|
|
num_bins: int = 32,
|
|
sample_rate: int = 44100,
|
|
target_fps: int = 25,
|
|
device_name: str | None = None,
|
|
) -> AudioAnalyzer:
|
|
"""Get or create the global AudioAnalyzer instance."""
|
|
global _analyzer
|
|
if _analyzer is None:
|
|
_analyzer = AudioAnalyzer(
|
|
num_bins=num_bins,
|
|
sample_rate=sample_rate,
|
|
target_fps=target_fps,
|
|
device_name=device_name,
|
|
)
|
|
return _analyzer
|