diff --git a/server/src/wled_controller/core/audio/audio_capture.py b/server/src/wled_controller/core/audio/audio_capture.py
index 4eab2d0..ec5586e 100644
--- a/server/src/wled_controller/core/audio/audio_capture.py
+++ b/server/src/wled_controller/core/audio/audio_capture.py
@@ -122,6 +122,14 @@ class AudioCaptureStream:
         self._smooth_spectrum = np.zeros(NUM_BANDS, dtype=np.float32)
         self._smooth_spectrum_left = np.zeros(NUM_BANDS, dtype=np.float32)
         self._smooth_spectrum_right = np.zeros(NUM_BANDS, dtype=np.float32)
+        self._smoothing_alpha = 0.3  # lower = smoother
+
+        # Pre-allocated FFT scratch buffers
+        self._fft_windowed = np.empty(chunk_size, dtype=np.float32)
+        self._fft_mag = None  # allocated on first use (depends on rfft output size)
+
+        # Pre-compute valid band ranges (avoid per-frame bounds checks)
+        self._valid_bands = None  # set after first FFT when fft_mag size is known
 
         # Per-iteration timing (written by capture thread, read by consumers)
         self._last_timing: dict = {}
@@ -157,6 +165,28 @@ class AudioCaptureStream:
         """Return per-iteration timing from the capture loop (ms)."""
         return dict(self._last_timing)
 
+    def _fft_bands(self, samps, buf, smooth_buf, window, bands, alpha, one_minus_alpha):
+        """Compute FFT, bin into bands, normalize, and smooth."""
+        chunk_size = self._chunk_size
+        chunk = samps[:chunk_size]
+        if len(chunk) < chunk_size:
+            chunk = np.pad(chunk, (0, chunk_size - len(chunk)))
+        np.multiply(chunk, window, out=self._fft_windowed)
+        fft_mag = np.abs(np.fft.rfft(self._fft_windowed))
+        fft_mag *= (1.0 / chunk_size)  # in-place scale (faster than /=)
+        fft_len = len(fft_mag)
+        for b, (s, e) in enumerate(bands):
+            if s < fft_len and e <= fft_len:
+                buf[b] = float(np.mean(fft_mag[s:e]))
+            else:
+                buf[b] = 0.0
+        spec_max = float(np.max(buf))
+        if spec_max > 1e-6:
+            buf *= (1.0 / spec_max)
+        # Exponential smoothing: smooth = alpha * new + (1-alpha) * old
+        smooth_buf *= one_minus_alpha
+        smooth_buf += alpha * buf
+
     def _capture_loop(self) -> None:
         try:
             import pyaudiowpatch as pyaudio
@@ -217,6 +247,26 @@ class AudioCaptureStream:
             spectrum_buf_left = np.zeros(NUM_BANDS, dtype=np.float32)
             spectrum_buf_right = np.zeros(NUM_BANDS, dtype=np.float32)
 
+            # Pre-allocate channel buffers for stereo splitting
+            chunk_samples = self._chunk_size
+            if channels > 1:
+                _left_buf = np.empty(chunk_samples, dtype=np.float32)
+                _right_buf = np.empty(chunk_samples, dtype=np.float32)
+                _mono_buf = np.empty(chunk_samples, dtype=np.float32)
+            else:
+                _left_buf = _right_buf = _mono_buf = None
+
+            # Pre-allocate scratch for RMS (avoid samples**2 temp array)
+            _sq_buf = np.empty(chunk_samples, dtype=np.float32)
+
+            # Snapshot loop-invariant values
+            window = self._window
+            bands = self._bands
+            energy_history = self._energy_history
+            energy_len = len(energy_history)
+            alpha = self._smoothing_alpha
+            one_minus_alpha = 1.0 - alpha
+
             while self._running:
                 t_read_start = time.perf_counter()
                 try:
@@ -231,50 +281,45 @@ class AudioCaptureStream:
                 # Split channels and mix to mono
                 if channels > 1:
                     data = data.reshape(-1, channels)
-                    left_samples = data[:, 0].copy()
-                    right_samples = data[:, 1].copy() if channels >= 2 else left_samples.copy()
-                    samples = data.mean(axis=1).astype(np.float32)
+                    np.copyto(_left_buf, data[:, 0])
+                    np.copyto(_right_buf, data[:, 1] if channels >= 2 else data[:, 0])
+                    np.add(data[:, 0], data[:, 1] if channels >= 2 else data[:, 0], out=_mono_buf)
+                    _mono_buf *= 0.5
+                    samples = _mono_buf
+                    left_samples = _left_buf
+                    right_samples = _right_buf
                 else:
                     samples = data
                     left_samples = samples
                     right_samples = samples
 
-                # RMS and peak (mono)
-                rms = float(np.sqrt(np.mean(samples ** 2)))
+                # RMS and peak — reuse scratch buffer
+                np.multiply(samples, samples, out=_sq_buf[:len(samples)])
+                rms = float(np.sqrt(np.mean(_sq_buf[:len(samples)])))
                 peak = float(np.max(np.abs(samples)))
-                left_rms = float(np.sqrt(np.mean(left_samples ** 2)))
-                right_rms = float(np.sqrt(np.mean(right_samples ** 2)))
-
-                # FFT helper
-                alpha = 0.3  # smoothing factor (lower = smoother)
-
-                def _fft_bands(samps, buf, smooth_buf):
-                    chunk = samps[: self._chunk_size]
-                    if len(chunk) < self._chunk_size:
-                        chunk = np.pad(chunk, (0, self._chunk_size - len(chunk)))
-                    windowed = chunk * self._window
-                    fft_mag = np.abs(np.fft.rfft(windowed))
-                    fft_mag /= self._chunk_size
-                    for b, (s, e) in enumerate(self._bands):
-                        if s < len(fft_mag) and e <= len(fft_mag):
-                            buf[b] = float(np.mean(fft_mag[s:e]))
-                        else:
-                            buf[b] = 0.0
-                    spec_max = float(np.max(buf))
-                    if spec_max > 1e-6:
-                        buf /= spec_max
-                    smooth_buf[:] = alpha * buf + (1.0 - alpha) * smooth_buf
+                if channels > 1:
+                    np.multiply(left_samples, left_samples, out=_sq_buf)
+                    left_rms = float(np.sqrt(np.mean(_sq_buf)))
+                    np.multiply(right_samples, right_samples, out=_sq_buf)
+                    right_rms = float(np.sqrt(np.mean(_sq_buf)))
+                else:
+                    left_rms = rms
+                    right_rms = rms
 
                 # Compute FFT for mono, left, right
-                _fft_bands(samples, spectrum_buf, self._smooth_spectrum)
-                _fft_bands(left_samples, spectrum_buf_left, self._smooth_spectrum_left)
-                _fft_bands(right_samples, spectrum_buf_right, self._smooth_spectrum_right)
+                self._fft_bands(samples, spectrum_buf, self._smooth_spectrum,
+                                window, bands, alpha, one_minus_alpha)
+                self._fft_bands(left_samples, spectrum_buf_left, self._smooth_spectrum_left,
+                                window, bands, alpha, one_minus_alpha)
+                self._fft_bands(right_samples, spectrum_buf_right, self._smooth_spectrum_right,
+                                window, bands, alpha, one_minus_alpha)
 
                 # Beat detection — compare current energy to rolling average (mono)
-                energy = float(np.sum(samples ** 2))
-                self._energy_history[self._energy_idx % len(self._energy_history)] = energy
-                self._energy_idx += 1
-                avg_energy = float(np.mean(self._energy_history))
+                np.multiply(samples, samples, out=_sq_buf[:len(samples)])
+                energy = float(np.sum(_sq_buf[:len(samples)]))
+                energy_history[self._energy_idx] = energy
+                self._energy_idx = (self._energy_idx + 1) % energy_len
+                avg_energy = float(np.mean(energy_history))
 
                 beat = False
                 beat_intensity = 0.0
diff --git a/server/src/wled_controller/core/processing/audio_stream.py b/server/src/wled_controller/core/processing/audio_stream.py
index a7d5ee3..93f26ef 100644
--- a/server/src/wled_controller/core/processing/audio_stream.py
+++ b/server/src/wled_controller/core/processing/audio_stream.py
@@ -15,7 +15,7 @@ from typing import Optional
 
 import numpy as np
 
-from wled_controller.core.audio.audio_capture import AudioCaptureManager
+from wled_controller.core.audio.audio_capture import AudioCaptureManager, NUM_BANDS
 from wled_controller.core.processing.color_strip_stream import ColorStripStream
 from wled_controller.core.processing.effect_stream import _build_palette_lut
 from wled_controller.utils import get_logger
@@ -67,6 +67,9 @@ class AudioColorStripStream(ColorStripStream):
         self._color = color if isinstance(color, list) and len(color) == 3 else [0, 255, 0]
         color_peak = getattr(source, "color_peak", None)
         self._color_peak = color_peak if isinstance(color_peak, list) and len(color_peak) == 3 else [255, 0, 0]
+        # Pre-computed float arrays for VU meter (avoid per-frame np.array())
+        self._color_f = np.array(self._color, dtype=np.float32)
+        self._color_peak_f = np.array(self._color_peak, dtype=np.float32)
         self._auto_size = not source.led_count
         self._led_count = source.led_count if source.led_count and source.led_count > 0 else 1
         self._mirror = bool(getattr(source, "mirror", False))
@@ -182,6 +185,13 @@ class AudioColorStripStream(ColorStripStream):
         _pool_n = 0
         _buf_a = _buf_b = None
         _use_a = True
+        # Pre-computed interpolation arrays (rebuilt when LED count changes)
+        _band_x = None
+        _led_x = None
+        _led_x_mirror = None
+        _full_amp = None
+        _vu_gradient = None
+        _indices_buf = None
 
         renderers = {
             "spectrum": self._render_spectrum,
@@ -195,11 +205,27 @@ class AudioColorStripStream(ColorStripStream):
                 frame_time = 1.0 / self._fps
                 n = self._led_count
 
-                # Rebuild scratch buffers when LED count changes
+                # Rebuild scratch buffers and pre-computed arrays when LED count changes
                 if n != _pool_n:
                     _pool_n = n
                     _buf_a = np.zeros((n, 3), dtype=np.uint8)
                     _buf_b = np.zeros((n, 3), dtype=np.uint8)
+                    _band_x = np.arange(NUM_BANDS, dtype=np.float32)
+                    half = (n + 1) // 2
+                    _led_x_mirror = np.linspace(0, NUM_BANDS - 1, half)
+                    _led_x = np.linspace(0, NUM_BANDS - 1, n)
+                    _full_amp = np.empty(n, dtype=np.float32)
+                    _vu_gradient = np.linspace(0, 1, n, dtype=np.float32)
+                    _indices_buf = np.empty(n, dtype=np.int32)
+                    self._prev_spectrum = None  # reset smoothing on resize
+
+                # Make pre-computed arrays available to render methods
+                self._band_x = _band_x
+                self._led_x = _led_x
+                self._led_x_mirror = _led_x_mirror
+                self._full_amp = _full_amp
+                self._vu_gradient = _vu_gradient
+                self._indices_buf = _indices_buf
 
                 buf = _buf_a if _use_a else _buf_b
                 _use_a = not _use_a
@@ -252,41 +278,40 @@ class AudioColorStripStream(ColorStripStream):
         sensitivity = self._sensitivity
         smoothing = self._smoothing
         lut = self._palette_lut
-
-        num_bands = len(spectrum)
-        band_x = np.arange(num_bands, dtype=np.float32)
+        band_x = self._band_x
+        full_amp = self._full_amp
+        indices_buf = self._indices_buf
 
         if self._mirror:
             half = (n + 1) // 2
-            led_x = np.linspace(0, num_bands - 1, half)
-            amplitudes = np.interp(led_x, band_x, spectrum)
+            amplitudes = np.interp(self._led_x_mirror, band_x, spectrum)
             amplitudes *= sensitivity
             np.clip(amplitudes, 0.0, 1.0, out=amplitudes)
-            # Temporal smoothing
             if self._prev_spectrum is not None and len(self._prev_spectrum) == half:
-                amplitudes[:] = smoothing * self._prev_spectrum + (1.0 - smoothing) * amplitudes
+                amplitudes *= (1.0 - smoothing)
+                amplitudes += smoothing * self._prev_spectrum
             self._prev_spectrum = amplitudes.copy()
             # Mirror: center = bass, edges = treble
-            full_amp = np.empty(n, dtype=np.float32)
             full_amp[:half] = amplitudes[::-1]
             full_amp[half:] = amplitudes[: n - half]
         else:
-            led_x = np.linspace(0, num_bands - 1, n)
-            amplitudes = np.interp(led_x, band_x, spectrum)
+            amplitudes = np.interp(self._led_x, band_x, spectrum)
             amplitudes *= sensitivity
             np.clip(amplitudes, 0.0, 1.0, out=amplitudes)
-            # Temporal smoothing
             if self._prev_spectrum is not None and len(self._prev_spectrum) == n:
-                amplitudes[:] = smoothing * self._prev_spectrum + (1.0 - smoothing) * amplitudes
+                amplitudes *= (1.0 - smoothing)
+                amplitudes += smoothing * self._prev_spectrum
             self._prev_spectrum = amplitudes.copy()
-            full_amp = amplitudes
+            full_amp[:] = amplitudes
 
         # Map to palette: amplitude → palette index → color
-        indices = (full_amp * 255).astype(np.int32)
-        np.clip(indices, 0, 255, out=indices)
-        colors = lut[indices]  # (n, 3) uint8
+        np.multiply(full_amp, 255, out=full_amp)
+        np.clip(full_amp, 0, 255, out=full_amp)
+        np.copyto(indices_buf, full_amp, casting='unsafe')
+        colors = lut[indices_buf]  # (n, 3) uint8
 
-        # Scale brightness by amplitude
+        # Scale brightness by amplitude — restore full_amp to [0, 1]
+        full_amp *= (1.0 / 255.0)
         for ch in range(3):
             buf[:, ch] = (colors[:, ch].astype(np.float32) * full_amp).astype(np.uint8)
 
@@ -299,7 +324,6 @@ class AudioColorStripStream(ColorStripStream):
 
         _, ch_rms = self._pick_channel(analysis)
         rms = ch_rms * self._sensitivity
-        # Temporal smoothing on RMS
         rms = self._smoothing * self._prev_rms + (1.0 - self._smoothing) * rms
         self._prev_rms = rms
         rms = min(1.0, rms)
@@ -308,11 +332,9 @@ class AudioColorStripStream(ColorStripStream):
         buf[:] = 0
 
         if fill_count > 0:
-            base = np.array(self._color, dtype=np.float32)
-            peak = np.array(self._color_peak, dtype=np.float32)
-
-            # Gradient from base color to peak color
-            t = np.linspace(0, 1, n, dtype=np.float32)[:fill_count]
+            base = self._color_f
+            peak = self._color_peak_f
+            t = self._vu_gradient[:fill_count]
             for ch in range(3):
                 buf[:fill_count, ch] = np.clip(
                     base[ch] + (peak[ch] - base[ch]) * t, 0, 255
@@ -325,11 +347,9 @@ class AudioColorStripStream(ColorStripStream):
             buf[:] = 0
             return
 
-        # On beat: flash to full brightness
         if analysis.beat:
             self._pulse_brightness = 1.0
         else:
-            # Exponential decay — sensitivity controls decay speed
             decay_rate = 0.05 + 0.15 * (1.0 / max(self._sensitivity, 0.1))
             self._pulse_brightness = max(0.0, self._pulse_brightness - decay_rate)
 
@@ -338,12 +358,11 @@ class AudioColorStripStream(ColorStripStream):
             buf[:] = 0
             return
 
-        # Color from palette based on beat intensity
-        palette_idx = int(analysis.beat_intensity * 255)
-        palette_idx = max(0, min(255, palette_idx))
+        palette_idx = max(0, min(255, int(analysis.beat_intensity * 255)))
         base_color = self._palette_lut[palette_idx]
 
-        # Fill all LEDs with pulsing color
-        buf[:, 0] = int(base_color[0] * brightness)
-        buf[:, 1] = int(base_color[1] * brightness)
-        buf[:, 2] = int(base_color[2] * brightness)
+        # Vectorized fill: scale color by brightness and broadcast to all LEDs
+        r, g, b = int(base_color[0] * brightness), int(base_color[1] * brightness), int(base_color[2] * brightness)
+        buf[:, 0] = r
+        buf[:, 1] = g
+        buf[:, 2] = b