Optimize audio capture and render loop performance

audio_capture.py: - Move _fft_bands from inner function to method (avoid per-frame closure) - Pre-allocate channel split buffers and RMS scratch arrays - Use in-place numpy ops (np.copyto, np.multiply) instead of copies - In-place FFT smoothing instead of temp array allocation - Cache loop-invariant values as locals - Fix energy index to wrap-around instead of unbounded increment audio_stream.py: - Pre-compute interpolation arrays (band_x, led_x, full_amp, indices_buf, vu_gradient) once on LED count change instead of every frame - Pre-compute VU meter base/peak float arrays in _update_from_source - Reuse full_amp and indices_buf buffers across frames - In-place spectrum smoothing to avoid temp allocations Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 00:36:51 +03:00
parent 0cd8304004
commit 04ee2e5830
2 changed files with 132 additions and 68 deletions
--- a/server/src/wled_controller/core/audio/audio_capture.py
+++ b/server/src/wled_controller/core/audio/audio_capture.py
@@ -122,6 +122,14 @@ class AudioCaptureStream:
        self._smooth_spectrum = np.zeros(NUM_BANDS, dtype=np.float32)
        self._smooth_spectrum_left = np.zeros(NUM_BANDS, dtype=np.float32)
        self._smooth_spectrum_right = np.zeros(NUM_BANDS, dtype=np.float32)
        self._smoothing_alpha = 0.3  # lower = smoother
        # Pre-allocated FFT scratch buffers
        self._fft_windowed = np.empty(chunk_size, dtype=np.float32)
        self._fft_mag = None  # allocated on first use (depends on rfft output size)
        # Pre-compute valid band ranges (avoid per-frame bounds checks)
        self._valid_bands = None  # set after first FFT when fft_mag size is known
        # Per-iteration timing (written by capture thread, read by consumers)
        self._last_timing: dict = {}
@@ -157,6 +165,28 @@ class AudioCaptureStream:
        """Return per-iteration timing from the capture loop (ms)."""
        return dict(self._last_timing)
    def _fft_bands(self, samps, buf, smooth_buf, window, bands, alpha, one_minus_alpha):
        """Compute FFT, bin into bands, normalize, and smooth."""
        chunk_size = self._chunk_size
        chunk = samps[:chunk_size]
        if len(chunk) < chunk_size:
            chunk = np.pad(chunk, (0, chunk_size - len(chunk)))
        np.multiply(chunk, window, out=self._fft_windowed)
        fft_mag = np.abs(np.fft.rfft(self._fft_windowed))
        fft_mag *= (1.0 / chunk_size)  # in-place scale (faster than /=)
        fft_len = len(fft_mag)
        for b, (s, e) in enumerate(bands):
            if s < fft_len and e <= fft_len:
                buf[b] = float(np.mean(fft_mag[s:e]))
            else:
                buf[b] = 0.0
        spec_max = float(np.max(buf))
        if spec_max > 1e-6:
            buf *= (1.0 / spec_max)
        # Exponential smoothing: smooth = alpha * new + (1-alpha) * old
        smooth_buf *= one_minus_alpha
        smooth_buf += alpha * buf
    def _capture_loop(self) -> None:
        try:
            import pyaudiowpatch as pyaudio
@@ -217,6 +247,26 @@ class AudioCaptureStream:
            spectrum_buf_left = np.zeros(NUM_BANDS, dtype=np.float32)
            spectrum_buf_right = np.zeros(NUM_BANDS, dtype=np.float32)
            # Pre-allocate channel buffers for stereo splitting
            chunk_samples = self._chunk_size
            if channels > 1:
                _left_buf = np.empty(chunk_samples, dtype=np.float32)
                _right_buf = np.empty(chunk_samples, dtype=np.float32)
                _mono_buf = np.empty(chunk_samples, dtype=np.float32)
            else:
                _left_buf = _right_buf = _mono_buf = None
            # Pre-allocate scratch for RMS (avoid samples**2 temp array)
            _sq_buf = np.empty(chunk_samples, dtype=np.float32)
            # Snapshot loop-invariant values
            window = self._window
            bands = self._bands
            energy_history = self._energy_history
            energy_len = len(energy_history)
            alpha = self._smoothing_alpha
            one_minus_alpha = 1.0 - alpha
            while self._running:
                t_read_start = time.perf_counter()
                try:
@@ -231,50 +281,45 @@ class AudioCaptureStream:
                # Split channels and mix to mono
                if channels > 1:
                    data = data.reshape(-1, channels)
-                    left_samples = data[:, 0].copy()
+                    np.copyto(_left_buf, data[:, 0])
-                    right_samples = data[:, 1].copy() if channels >= 2 else left_samples.copy()
+                    np.copyto(_right_buf, data[:, 1] if channels >= 2 else data[:, 0])
-                    samples = data.mean(axis=1).astype(np.float32)
+                    np.add(data[:, 0], data[:, 1] if channels >= 2 else data[:, 0], out=_mono_buf)
                    _mono_buf *= 0.5
                    samples = _mono_buf
                    left_samples = _left_buf
                    right_samples = _right_buf
                else:
                    samples = data
                    left_samples = samples
                    right_samples = samples
-                # RMS and peak (mono)
+                # RMS and peak — reuse scratch buffer
-                rms = float(np.sqrt(np.mean(samples ** 2)))
+                np.multiply(samples, samples, out=_sq_buf[:len(samples)])
                rms = float(np.sqrt(np.mean(_sq_buf[:len(samples)])))
                peak = float(np.max(np.abs(samples)))
-                left_rms = float(np.sqrt(np.mean(left_samples ** 2)))
+                if channels > 1:
-                right_rms = float(np.sqrt(np.mean(right_samples ** 2)))
+                    np.multiply(left_samples, left_samples, out=_sq_buf)
-
+                    left_rms = float(np.sqrt(np.mean(_sq_buf)))
-                # FFT helper
+                    np.multiply(right_samples, right_samples, out=_sq_buf)
-                alpha = 0.3  # smoothing factor (lower = smoother)
+                    right_rms = float(np.sqrt(np.mean(_sq_buf)))
                def _fft_bands(samps, buf, smooth_buf):
                    chunk = samps[: self._chunk_size]
                    if len(chunk) < self._chunk_size:
                        chunk = np.pad(chunk, (0, self._chunk_size - len(chunk)))
                    windowed = chunk * self._window
                    fft_mag = np.abs(np.fft.rfft(windowed))
                    fft_mag /= self._chunk_size
                    for b, (s, e) in enumerate(self._bands):
                        if s < len(fft_mag) and e <= len(fft_mag):
                            buf[b] = float(np.mean(fft_mag[s:e]))
                else:
-                            buf[b] = 0.0
+                    left_rms = rms
-                    spec_max = float(np.max(buf))
+                    right_rms = rms
                    if spec_max > 1e-6:
                        buf /= spec_max
                    smooth_buf[:] = alpha * buf + (1.0 - alpha) * smooth_buf
                # Compute FFT for mono, left, right
-                _fft_bands(samples, spectrum_buf, self._smooth_spectrum)
+                self._fft_bands(samples, spectrum_buf, self._smooth_spectrum,
-                _fft_bands(left_samples, spectrum_buf_left, self._smooth_spectrum_left)
+                                window, bands, alpha, one_minus_alpha)
-                _fft_bands(right_samples, spectrum_buf_right, self._smooth_spectrum_right)
+                self._fft_bands(left_samples, spectrum_buf_left, self._smooth_spectrum_left,
                                window, bands, alpha, one_minus_alpha)
                self._fft_bands(right_samples, spectrum_buf_right, self._smooth_spectrum_right,
                                window, bands, alpha, one_minus_alpha)
                # Beat detection — compare current energy to rolling average (mono)
-                energy = float(np.sum(samples ** 2))
+                np.multiply(samples, samples, out=_sq_buf[:len(samples)])
-                self._energy_history[self._energy_idx % len(self._energy_history)] = energy
+                energy = float(np.sum(_sq_buf[:len(samples)]))
-                self._energy_idx += 1
+                energy_history[self._energy_idx] = energy
-                avg_energy = float(np.mean(self._energy_history))
+                self._energy_idx = (self._energy_idx + 1) % energy_len
                avg_energy = float(np.mean(energy_history))
                beat = False
                beat_intensity = 0.0
--- a/server/src/wled_controller/core/processing/audio_stream.py
+++ b/server/src/wled_controller/core/processing/audio_stream.py
@@ -15,7 +15,7 @@ from typing import Optional
 import numpy as np
-from wled_controller.core.audio.audio_capture import AudioCaptureManager
+from wled_controller.core.audio.audio_capture import AudioCaptureManager, NUM_BANDS
 from wled_controller.core.processing.color_strip_stream import ColorStripStream
 from wled_controller.core.processing.effect_stream import _build_palette_lut
 from wled_controller.utils import get_logger
@@ -67,6 +67,9 @@ class AudioColorStripStream(ColorStripStream):
        self._color = color if isinstance(color, list) and len(color) == 3 else [0, 255, 0]
        color_peak = getattr(source, "color_peak", None)
        self._color_peak = color_peak if isinstance(color_peak, list) and len(color_peak) == 3 else [255, 0, 0]
        # Pre-computed float arrays for VU meter (avoid per-frame np.array())
        self._color_f = np.array(self._color, dtype=np.float32)
        self._color_peak_f = np.array(self._color_peak, dtype=np.float32)
        self._auto_size = not source.led_count
        self._led_count = source.led_count if source.led_count and source.led_count > 0 else 1
        self._mirror = bool(getattr(source, "mirror", False))
@@ -182,6 +185,13 @@ class AudioColorStripStream(ColorStripStream):
        _pool_n = 0
        _buf_a = _buf_b = None
        _use_a = True
        # Pre-computed interpolation arrays (rebuilt when LED count changes)
        _band_x = None
        _led_x = None
        _led_x_mirror = None
        _full_amp = None
        _vu_gradient = None
        _indices_buf = None
        renderers = {
            "spectrum": self._render_spectrum,
@@ -195,11 +205,27 @@ class AudioColorStripStream(ColorStripStream):
                frame_time = 1.0 / self._fps
                n = self._led_count
-                # Rebuild scratch buffers when LED count changes
+                # Rebuild scratch buffers and pre-computed arrays when LED count changes
                if n != _pool_n:
                    _pool_n = n
                    _buf_a = np.zeros((n, 3), dtype=np.uint8)
                    _buf_b = np.zeros((n, 3), dtype=np.uint8)
                    _band_x = np.arange(NUM_BANDS, dtype=np.float32)
                    half = (n + 1) // 2
                    _led_x_mirror = np.linspace(0, NUM_BANDS - 1, half)
                    _led_x = np.linspace(0, NUM_BANDS - 1, n)
                    _full_amp = np.empty(n, dtype=np.float32)
                    _vu_gradient = np.linspace(0, 1, n, dtype=np.float32)
                    _indices_buf = np.empty(n, dtype=np.int32)
                    self._prev_spectrum = None  # reset smoothing on resize
                # Make pre-computed arrays available to render methods
                self._band_x = _band_x
                self._led_x = _led_x
                self._led_x_mirror = _led_x_mirror
                self._full_amp = _full_amp
                self._vu_gradient = _vu_gradient
                self._indices_buf = _indices_buf
                buf = _buf_a if _use_a else _buf_b
                _use_a = not _use_a
@@ -252,41 +278,40 @@ class AudioColorStripStream(ColorStripStream):
        sensitivity = self._sensitivity
        smoothing = self._smoothing
        lut = self._palette_lut
-
+        band_x = self._band_x
-        num_bands = len(spectrum)
+        full_amp = self._full_amp
-        band_x = np.arange(num_bands, dtype=np.float32)
+        indices_buf = self._indices_buf
        if self._mirror:
            half = (n + 1) // 2
-            led_x = np.linspace(0, num_bands - 1, half)
+            amplitudes = np.interp(self._led_x_mirror, band_x, spectrum)
            amplitudes = np.interp(led_x, band_x, spectrum)
            amplitudes *= sensitivity
            np.clip(amplitudes, 0.0, 1.0, out=amplitudes)
            # Temporal smoothing
            if self._prev_spectrum is not None and len(self._prev_spectrum) == half:
-                amplitudes[:] = smoothing * self._prev_spectrum + (1.0 - smoothing) * amplitudes
+                amplitudes *= (1.0 - smoothing)
                amplitudes += smoothing * self._prev_spectrum
            self._prev_spectrum = amplitudes.copy()
            # Mirror: center = bass, edges = treble
            full_amp = np.empty(n, dtype=np.float32)
            full_amp[:half] = amplitudes[::-1]
            full_amp[half:] = amplitudes[: n - half]
        else:
-            led_x = np.linspace(0, num_bands - 1, n)
+            amplitudes = np.interp(self._led_x, band_x, spectrum)
            amplitudes = np.interp(led_x, band_x, spectrum)
            amplitudes *= sensitivity
            np.clip(amplitudes, 0.0, 1.0, out=amplitudes)
            # Temporal smoothing
            if self._prev_spectrum is not None and len(self._prev_spectrum) == n:
-                amplitudes[:] = smoothing * self._prev_spectrum + (1.0 - smoothing) * amplitudes
+                amplitudes *= (1.0 - smoothing)
                amplitudes += smoothing * self._prev_spectrum
            self._prev_spectrum = amplitudes.copy()
-            full_amp = amplitudes
+            full_amp[:] = amplitudes
        # Map to palette: amplitude → palette index → color
-        indices = (full_amp * 255).astype(np.int32)
+        np.multiply(full_amp, 255, out=full_amp)
-        np.clip(indices, 0, 255, out=indices)
+        np.clip(full_amp, 0, 255, out=full_amp)
-        colors = lut[indices]  # (n, 3) uint8
+        np.copyto(indices_buf, full_amp, casting='unsafe')
        colors = lut[indices_buf]  # (n, 3) uint8
-        # Scale brightness by amplitude
+        # Scale brightness by amplitude — restore full_amp to [0, 1]
        full_amp *= (1.0 / 255.0)
        for ch in range(3):
            buf[:, ch] = (colors[:, ch].astype(np.float32) * full_amp).astype(np.uint8)
@@ -299,7 +324,6 @@ class AudioColorStripStream(ColorStripStream):
        _, ch_rms = self._pick_channel(analysis)
        rms = ch_rms * self._sensitivity
        # Temporal smoothing on RMS
        rms = self._smoothing * self._prev_rms + (1.0 - self._smoothing) * rms
        self._prev_rms = rms
        rms = min(1.0, rms)
@@ -308,11 +332,9 @@ class AudioColorStripStream(ColorStripStream):
        buf[:] = 0
        if fill_count > 0:
-            base = np.array(self._color, dtype=np.float32)
+            base = self._color_f
-            peak = np.array(self._color_peak, dtype=np.float32)
+            peak = self._color_peak_f
-
+            t = self._vu_gradient[:fill_count]
            # Gradient from base color to peak color
            t = np.linspace(0, 1, n, dtype=np.float32)[:fill_count]
            for ch in range(3):
                buf[:fill_count, ch] = np.clip(
                    base[ch] + (peak[ch] - base[ch]) * t, 0, 255
@@ -325,11 +347,9 @@ class AudioColorStripStream(ColorStripStream):
            buf[:] = 0
            return
        # On beat: flash to full brightness
        if analysis.beat:
            self._pulse_brightness = 1.0
        else:
            # Exponential decay — sensitivity controls decay speed
            decay_rate = 0.05 + 0.15 * (1.0 / max(self._sensitivity, 0.1))
            self._pulse_brightness = max(0.0, self._pulse_brightness - decay_rate)
@@ -338,12 +358,11 @@ class AudioColorStripStream(ColorStripStream):
            buf[:] = 0
            return
-        # Color from palette based on beat intensity
+        palette_idx = max(0, min(255, int(analysis.beat_intensity * 255)))
        palette_idx = int(analysis.beat_intensity * 255)
        palette_idx = max(0, min(255, palette_idx))
        base_color = self._palette_lut[palette_idx]
-        # Fill all LEDs with pulsing color
+        # Vectorized fill: scale color by brightness and broadcast to all LEDs
-        buf[:, 0] = int(base_color[0] * brightness)
+        r, g, b = int(base_color[0] * brightness), int(base_color[1] * brightness), int(base_color[2] * brightness)
-        buf[:, 1] = int(base_color[1] * brightness)
+        buf[:, 0] = r
-        buf[:, 2] = int(base_color[2] * brightness)
+        buf[:, 1] = g
        buf[:, 2] = b