diff --git a/server/src/wled_controller/core/capture/calibration.py b/server/src/wled_controller/core/capture/calibration.py
index 2dd943e..3d44e66 100644
--- a/server/src/wled_controller/core/capture/calibration.py
+++ b/server/src/wled_controller/core/capture/calibration.py
@@ -264,15 +264,11 @@ class PixelMapper:
 
         # Compute segment boundaries (matching get_edge_segments float stepping)
         step = edge_len / led_count
-        boundaries = np.empty(led_count + 1, dtype=np.int64)
-        for i in range(led_count + 1):
-            boundaries[i] = int(i * step)
+        boundaries = (np.arange(led_count + 1, dtype=np.float64) * step).astype(np.int64)
         # Ensure each segment has at least 1 pixel
-        for i in range(led_count):
-            if boundaries[i + 1] <= boundaries[i]:
-                boundaries[i + 1] = boundaries[i] + 1
+        boundaries[1:] = np.maximum(boundaries[1:], boundaries[:-1] + 1)
         # Clamp all boundaries to edge_len (not just the last one)
-        boundaries = np.minimum(boundaries, edge_len)
+        np.minimum(boundaries, edge_len, out=boundaries)
 
         # Cumulative sum for O(1) range means — no per-LED Python numpy calls
         cumsum = np.zeros((edge_len + 1, 3), dtype=np.float64)
diff --git a/server/src/wled_controller/core/devices/ddp_client.py b/server/src/wled_controller/core/devices/ddp_client.py
index 58e71bd..b07c945 100644
--- a/server/src/wled_controller/core/devices/ddp_client.py
+++ b/server/src/wled_controller/core/devices/ddp_client.py
@@ -52,6 +52,9 @@ class DDPClient:
         self._protocol = None
         self._sequence = 0
         self._buses: List[BusConfig] = []
+        # Pre-allocated RGBW buffer (resized on demand)
+        self._rgbw_buf: Optional[np.ndarray] = None
+        self._rgbw_buf_n: int = 0
 
     async def connect(self):
         """Establish UDP connection."""
@@ -136,26 +139,23 @@ class DDPClient:
 
         return header + rgb_data
 
-    def _reorder_pixels(
-        self,
-        pixels: List[Tuple[int, int, int]],
-    ) -> List[Tuple[int, int, int]]:
-        """Apply per-bus color order reordering.
+    def _reorder_pixels_numpy(self, pixel_array: np.ndarray) -> np.ndarray:
+        """Apply per-bus color order reordering using numpy fancy indexing.
 
         WLED may not apply per-bus color order conversion for DDP data on
         all buses (observed in multi-bus setups). We reorder pixel channels
         here so the hardware receives the correct byte order directly.
 
         Args:
-            pixels: List of (R, G, B) tuples in standard RGB order
+            pixel_array: (N, 3) uint8 numpy array in RGB order
 
         Returns:
-            List of reordered tuples matching each bus's hardware color order
+            Reordered array (may be a view or copy depending on buses)
         """
         if not self._buses:
-            return pixels
+            return pixel_array
 
-        result = list(pixels)
+        result = pixel_array.copy()
         for bus in self._buses:
             order_map = COLOR_ORDER_MAP.get(bus.color_order)
             if not order_map or order_map == (0, 1, 2):
@@ -163,10 +163,7 @@ class DDPClient:
 
             start = bus.start
             end = min(bus.start + bus.length, len(result))
-            for i in range(start, end):
-                r, g, b = result[i]
-                rgb = (r, g, b)
-                result[i] = (rgb[order_map[0]], rgb[order_map[1]], rgb[order_map[2]])
+            result[start:end] = result[start:end][:, order_map]
 
         return result
 
@@ -197,8 +194,12 @@ class DDPClient:
             bpp = 4 if self.rgbw else 3  # bytes per pixel
             pixel_array = np.array(pixels, dtype=np.uint8)
             if self.rgbw:
-                white = np.zeros((pixel_array.shape[0], 1), dtype=np.uint8)
-                pixel_array = np.hstack((pixel_array, white))
+                n = pixel_array.shape[0]
+                if n != self._rgbw_buf_n:
+                    self._rgbw_buf = np.zeros((n, 4), dtype=np.uint8)
+                    self._rgbw_buf_n = n
+                self._rgbw_buf[:, :3] = pixel_array
+                pixel_array = self._rgbw_buf
             pixel_bytes = pixel_array.tobytes()
 
             total_bytes = len(pixel_bytes)
@@ -256,10 +257,14 @@ class DDPClient:
         if not self._transport:
             raise RuntimeError("DDP client not connected")
 
-        # Handle RGBW: insert zero white channel column
+        # Handle RGBW: copy RGB into pre-allocated (N, 4) buffer
         if self.rgbw:
-            white = np.zeros((pixel_array.shape[0], 1), dtype=np.uint8)
-            pixel_array = np.hstack((pixel_array, white))
+            n = pixel_array.shape[0]
+            if n != self._rgbw_buf_n:
+                self._rgbw_buf = np.zeros((n, 4), dtype=np.uint8)
+                self._rgbw_buf_n = n
+            self._rgbw_buf[:, :3] = pixel_array
+            pixel_array = self._rgbw_buf
 
         pixel_bytes = pixel_array.tobytes()
 
diff --git a/server/src/wled_controller/core/devices/wled_client.py b/server/src/wled_controller/core/devices/wled_client.py
index 6a3a84e..08d7036 100644
--- a/server/src/wled_controller/core/devices/wled_client.py
+++ b/server/src/wled_controller/core/devices/wled_client.py
@@ -333,18 +333,25 @@ class WLEDClient(LEDClient):
             RuntimeError: If request fails
         """
         # Validate inputs
-        if not pixels:
-            raise ValueError("Pixels list cannot be empty")
+        if isinstance(pixels, np.ndarray):
+            if pixels.size == 0:
+                raise ValueError("Pixels array cannot be empty")
+            pixel_arr = pixels
+        else:
+            if not pixels:
+                raise ValueError("Pixels list cannot be empty")
+            pixel_arr = np.array(pixels, dtype=np.int16)
 
         if not 0 <= brightness <= 255:
             raise ValueError(f"Brightness must be 0-255, got {brightness}")
 
-        # Validate pixel values
-        validated_pixels = []
-        for i, (r, g, b) in enumerate(pixels):
-            if not (0 <= r <= 255 and 0 <= g <= 255 and 0 <= b <= 255):
-                raise ValueError(f"Invalid RGB values at index {i}: ({r}, {g}, {b})")
-            validated_pixels.append((int(r), int(g), int(b)))
+        # Validate pixel values using vectorized bounds check
+        if pixel_arr.dtype != np.uint8:
+            if np.any((pixel_arr < 0) | (pixel_arr > 255)):
+                bad_mask = np.any((pixel_arr < 0) | (pixel_arr > 255), axis=1)
+                idx = int(np.argmax(bad_mask))
+                raise ValueError(f"Invalid RGB values at index {idx}: {tuple(pixel_arr[idx])}")
+        validated_pixels = pixel_arr.astype(np.uint8) if pixel_arr.dtype != np.uint8 else pixel_arr
 
         # Use DDP protocol if enabled
         if self.use_ddp and self._ddp_client:
@@ -354,33 +361,24 @@ class WLEDClient(LEDClient):
 
     async def _send_pixels_ddp(
         self,
-        pixels: List[Tuple[int, int, int]],
+        pixels: np.ndarray,
         brightness: int = 255,
     ) -> bool:
         """Send pixels via DDP protocol.
 
         Args:
-            pixels: List of (R, G, B) tuples
+            pixels: (N, 3) uint8 numpy array of RGB values
             brightness: Global brightness (0-255)
 
         Returns:
             True if successful
         """
         try:
-            # Apply brightness to pixels
             if brightness < 255:
-                brightness_factor = brightness / 255.0
-                pixels = [
-                    (
-                        int(r * brightness_factor),
-                        int(g * brightness_factor),
-                        int(b * brightness_factor)
-                    )
-                    for r, g, b in pixels
-                ]
+                pixels = (pixels.astype(np.uint16) * brightness >> 8).astype(np.uint8)
 
             logger.debug(f"Sending {len(pixels)} LEDs via DDP")
-            await self._ddp_client.send_pixels(pixels)
+            self._ddp_client.send_pixels_numpy(pixels)
             logger.debug(f"Successfully sent pixel colors via DDP")
             return True
 
@@ -390,14 +388,14 @@ class WLEDClient(LEDClient):
 
     async def _send_pixels_http(
         self,
-        pixels: List[Tuple[int, int, int]],
+        pixels: np.ndarray,
         brightness: int = 255,
         segment_id: int = 0,
     ) -> bool:
         """Send pixels via HTTP JSON API.
 
         Args:
-            pixels: List of (R, G, B) tuples
+            pixels: (N, 3) uint8 numpy array of RGB values
             brightness: Global brightness (0-255)
             segment_id: Segment ID to update
 
@@ -406,9 +404,8 @@ class WLEDClient(LEDClient):
         """
         try:
             # Build indexed pixel array: [led_index, r, g, b, ...]
-            indexed_pixels = []
-            for i, (r, g, b) in enumerate(pixels):
-                indexed_pixels.extend([i, int(r), int(g), int(b)])
+            indices = np.arange(len(pixels), dtype=np.int32).reshape(-1, 1)
+            indexed_pixels = np.hstack([indices, pixels.astype(np.int32)]).ravel().tolist()
 
             # Build WLED JSON state
             payload = {
diff --git a/server/src/wled_controller/core/filters/color_correction.py b/server/src/wled_controller/core/filters/color_correction.py
index bde9fde..3de2482 100644
--- a/server/src/wled_controller/core/filters/color_correction.py
+++ b/server/src/wled_controller/core/filters/color_correction.py
@@ -3,6 +3,7 @@
 import math
 from typing import Any, Dict, List, Optional
 
+import cv2
 import numpy as np
 
 from wled_controller.core.filters.base import FilterOptionDef, PostprocessingFilter
@@ -68,11 +69,12 @@ class ColorCorrectionFilter(PostprocessingFilter):
         g_mult = (tg / _REF_G) * gg
         b_mult = (tb / _REF_B) * bg
 
-        # Build per-channel LUTs
+        # Build merged (256, 1, 3) LUT for single-pass cv2.LUT
         src = np.arange(256, dtype=np.float32)
-        self._lut_r = np.clip(src * r_mult, 0, 255).astype(np.uint8)
-        self._lut_g = np.clip(src * g_mult, 0, 255).astype(np.uint8)
-        self._lut_b = np.clip(src * b_mult, 0, 255).astype(np.uint8)
+        lut_r = np.clip(src * r_mult, 0, 255).astype(np.uint8)
+        lut_g = np.clip(src * g_mult, 0, 255).astype(np.uint8)
+        lut_b = np.clip(src * b_mult, 0, 255).astype(np.uint8)
+        self._lut = np.stack([lut_r, lut_g, lut_b], axis=-1).reshape(256, 1, 3)
 
         self._is_neutral = (temp == 6500 and rg == 1.0 and gg == 1.0 and bg == 1.0)
 
@@ -120,7 +122,5 @@ class ColorCorrectionFilter(PostprocessingFilter):
     def process_image(self, image: np.ndarray, image_pool: ImagePool) -> Optional[np.ndarray]:
         if self._is_neutral:
             return None
-        image[:, :, 0] = self._lut_r[image[:, :, 0]]
-        image[:, :, 1] = self._lut_g[image[:, :, 1]]
-        image[:, :, 2] = self._lut_b[image[:, :, 2]]
+        cv2.LUT(image, self._lut, dst=image)
         return None
diff --git a/server/src/wled_controller/core/filters/frame_interpolation.py b/server/src/wled_controller/core/filters/frame_interpolation.py
index 8ec29a5..6b4b8c4 100644
--- a/server/src/wled_controller/core/filters/frame_interpolation.py
+++ b/server/src/wled_controller/core/filters/frame_interpolation.py
@@ -43,6 +43,10 @@ class FrameInterpolationFilter(PostprocessingFilter):
         self._time_a: float = 0.0
         self._time_b: float = 0.0
         self._sig_b: Optional[bytes] = None  # 64-byte signature of frame_b input
+        # Pre-allocated uint16 scratch buffers for blending
+        self._u16_a: Optional[np.ndarray] = None
+        self._u16_b: Optional[np.ndarray] = None
+        self._blend_shape: Optional[tuple] = None
 
     @classmethod
     def get_options_schema(cls) -> List[FilterOptionDef]:
@@ -80,10 +84,20 @@ class FrameInterpolationFilter(PostprocessingFilter):
         # Blend: output = (1 - alpha)*A + alpha*B  (integer fast path)
         alpha_i = int(alpha * 256)
         h, w, c = image.shape
+        shape = (h, w, c)
+
+        # Resize scratch buffers on shape change
+        if self._blend_shape != shape:
+            self._u16_a = np.empty(shape, dtype=np.uint16)
+            self._u16_b = np.empty(shape, dtype=np.uint16)
+            self._blend_shape = shape
+
         out = image_pool.acquire(h, w, c)
-        blended = (
-            (256 - alpha_i) * self._frame_a.astype(np.uint16)
-            + alpha_i * image.astype(np.uint16)
-        ) >> 8
-        np.copyto(out, blended, casting="unsafe")
+        np.copyto(self._u16_a, self._frame_a, casting='unsafe')
+        np.copyto(self._u16_b, image, casting='unsafe')
+        self._u16_a *= (256 - alpha_i)
+        self._u16_b *= alpha_i
+        self._u16_a += self._u16_b
+        self._u16_a >>= 8
+        np.copyto(out, self._u16_a, casting='unsafe')
         return out
diff --git a/server/src/wled_controller/core/filters/saturation.py b/server/src/wled_controller/core/filters/saturation.py
index aa6593b..ef023d8 100644
--- a/server/src/wled_controller/core/filters/saturation.py
+++ b/server/src/wled_controller/core/filters/saturation.py
@@ -11,14 +11,15 @@ from wled_controller.core.filters.registry import FilterRegistry
 
 @FilterRegistry.register
 class SaturationFilter(PostprocessingFilter):
-    """Adjusts color saturation via luminance blending."""
+    """Adjusts color saturation via luminance blending (integer math)."""
 
     filter_id = "saturation"
     filter_name = "Saturation"
 
     def __init__(self, options: Dict[str, Any]):
         super().__init__(options)
-        self._float_buf: Optional[np.ndarray] = None
+        self._i32_buf: Optional[np.ndarray] = None
+        self._i32_gray: Optional[np.ndarray] = None
 
     @classmethod
     def get_options_schema(cls) -> List[FilterOptionDef]:
@@ -39,14 +40,22 @@ class SaturationFilter(PostprocessingFilter):
         if value == 1.0:
             return None
         h, w, c = image.shape
-        if self._float_buf is None or self._float_buf.shape != (h, w, c):
-            self._float_buf = np.empty((h, w, c), dtype=np.float32)
-        arr = self._float_buf
-        np.copyto(arr, image)
-        arr *= (1.0 / 255.0)
-        lum = np.dot(arr[..., :3], [0.299, 0.587, 0.114])[..., np.newaxis]
-        arr[..., :3] = lum + (arr[..., :3] - lum) * value
-        np.clip(arr, 0, 1.0, out=arr)
-        arr *= 255.0
-        np.copyto(image, arr, casting='unsafe')
+        shape3 = (h, w, c)
+        shape1 = (h, w, 1)
+        if self._i32_buf is None or self._i32_buf.shape != shape3:
+            self._i32_buf = np.empty(shape3, dtype=np.int32)
+            self._i32_gray = np.empty(shape1, dtype=np.int32)
+        i32 = self._i32_buf
+        gray = self._i32_gray
+        sat_i = int(value * 256)
+        # Rec.601 luminance: (R*299 + G*587 + B*114) / 1000
+        np.copyto(i32, image, casting='unsafe')
+        gray[:, :, 0] = (i32[:, :, 0] * 299 + i32[:, :, 1] * 587 + i32[:, :, 2] * 114) // 1000
+        # Blend: out = ((256 - sat) * gray + sat * color) >> 8
+        i32 *= sat_i
+        gray *= (256 - sat_i)
+        i32 += gray
+        i32 >>= 8
+        np.clip(i32, 0, 255, out=i32)
+        np.copyto(image, i32, casting='unsafe')
         return None
diff --git a/server/src/wled_controller/core/processing/processor_manager.py b/server/src/wled_controller/core/processing/processor_manager.py
index 7008d54..30bcb86 100644
--- a/server/src/wled_controller/core/processing/processor_manager.py
+++ b/server/src/wled_controller/core/processing/processor_manager.py
@@ -828,18 +828,31 @@ class ProcessorManager:
             for p in self._processors.values()
         )
 
+    def _is_device_streaming(self, device_id: str) -> bool:
+        """Check if any running processor targets this device."""
+        for proc in self._processors.values():
+            if getattr(proc, 'device_id', None) == device_id and proc.is_running:
+                return True
+        return False
+
     async def _health_check_loop(self, device_id: str):
-        """Background loop that periodically checks a device."""
+        """Background loop that periodically checks a device.
+
+        Uses adaptive intervals: 10s for actively streaming devices,
+        60s for idle devices, to balance responsiveness with overhead.
+        """
         state = self._devices.get(device_id)
         if not state:
             return
 
-        check_interval = DEFAULT_STATE_CHECK_INTERVAL
+        ACTIVE_INTERVAL = 10   # streaming devices — faster detection
+        IDLE_INTERVAL = 60     # idle devices — less overhead
 
         try:
             while self._health_monitoring_active:
                 await self._check_device_health(device_id)
-                await asyncio.sleep(check_interval)
+                interval = ACTIVE_INTERVAL if self._is_device_streaming(device_id) else IDLE_INTERVAL
+                await asyncio.sleep(interval)
         except asyncio.CancelledError:
             pass
         except Exception as e:
diff --git a/server/src/wled_controller/core/processing/wled_target_processor.py b/server/src/wled_controller/core/processing/wled_target_processor.py
index f143ef0..09f0dc0 100644
--- a/server/src/wled_controller/core/processing/wled_target_processor.py
+++ b/server/src/wled_controller/core/processing/wled_target_processor.py
@@ -67,6 +67,11 @@ class WledTargetProcessor(TargetProcessor):
 
         self._resolved_display_index: Optional[int] = None
 
+        # Fit-to-device linspace cache (per-instance to avoid cross-target thrash)
+        self._fit_cache_key: tuple = (0, 0)
+        self._fit_cache_src: Optional[np.ndarray] = None
+        self._fit_cache_dst: Optional[np.ndarray] = None
+
         # LED preview WebSocket clients
         self._preview_clients: list = []
         self._last_preview_colors: np.ndarray | None = None
@@ -461,7 +466,7 @@ class WledTargetProcessor(TargetProcessor):
         self._preview_clients.append(ws)
         # Send last known frame immediately so late joiners see current state
         if self._last_preview_colors is not None:
-            data = bytes([self._last_preview_brightness]) + self._last_preview_colors.astype(np.uint8).tobytes()
+            data = bytes([self._last_preview_brightness]) + self._last_preview_colors.tobytes()
             asyncio.ensure_future(self._send_preview_to(ws, data))
 
     @staticmethod
@@ -484,7 +489,7 @@ class WledTargetProcessor(TargetProcessor):
         if not self._preview_clients:
             return
 
-        data = bytes([brightness]) + colors.astype(np.uint8).tobytes()
+        data = bytes([brightness]) + colors.tobytes()
 
         async def _send_safe(ws):
             try:
@@ -501,16 +506,18 @@ class WledTargetProcessor(TargetProcessor):
 
     # ----- Private: processing loop -----
 
-    @staticmethod
-    def _fit_to_device(colors: np.ndarray, device_led_count: int) -> np.ndarray:
+    def _fit_to_device(self, colors: np.ndarray, device_led_count: int) -> np.ndarray:
         """Resample colors to match the target LED count."""
         n = len(colors)
         if n == device_led_count or device_led_count <= 0:
             return colors
-        src_x = np.linspace(0, 1, n)
-        dst_x = np.linspace(0, 1, device_led_count)
+        key = (n, device_led_count)
+        if self._fit_cache_key != key:
+            self._fit_cache_src = np.linspace(0, 1, n)
+            self._fit_cache_dst = np.linspace(0, 1, device_led_count)
+            self._fit_cache_key = key
         result = np.column_stack([
-            np.interp(dst_x, src_x, colors[:, ch]).astype(np.uint8)
+            np.interp(self._fit_cache_dst, self._fit_cache_src, colors[:, ch]).astype(np.uint8)
             for ch in range(colors.shape[1])
         ])
         return result
@@ -568,9 +575,9 @@ class WledTargetProcessor(TargetProcessor):
         # --- Timing diagnostics ---
         _diag_interval = 5.0
         _diag_next_report = time.perf_counter() + _diag_interval
-        _diag_sleep_jitters: list = []
-        _diag_slow_iters: list = []
-        _diag_iter_times: list = []
+        _diag_sleep_jitters: collections.deque = collections.deque(maxlen=300)
+        _diag_slow_iters: collections.deque = collections.deque(maxlen=50)
+        _diag_iter_times: collections.deque = collections.deque(maxlen=300)
         _diag_device_info: Optional[DeviceInfo] = None
         _diag_device_info_age = 0
 
@@ -817,7 +824,7 @@ class WledTargetProcessor(TargetProcessor):
                     iter_ms = (iter_end - loop_start) * 1000
                     _diag_iter_times.append(iter_ms)
                     if iter_ms > frame_time * 1500:
-                        if "sleep_jitter" not in [s[1] for s in _diag_slow_iters[-1:]]:
+                        if not _diag_slow_iters or _diag_slow_iters[-1][1] != "sleep_jitter":
                             _diag_slow_iters.append((iter_ms, "slow_iter"))
 
                     # Periodic diagnostics report
@@ -845,7 +852,7 @@ class WledTargetProcessor(TargetProcessor):
                             logger.warning(
                                 f"[DIAG] {self._target_id} slow iterations: "
                                 f"{len(_diag_slow_iters)} in last {_diag_interval}s — "
-                                f"{_diag_slow_iters[:5]}"
+                                f"{list(_diag_slow_iters)[:5]}"
                             )
                         _diag_sleep_jitters.clear()
                         _diag_slow_iters.clear()
@@ -855,7 +862,9 @@ class WledTargetProcessor(TargetProcessor):
             logger.info(f"Processing loop cancelled for target {self._target_id}")
             raise
         except Exception as e:
-            logger.error(f"Fatal error in processing loop for target {self._target_id}: {e}")
+            logger.error(f"Fatal error in processing loop for target {self._target_id}: {e}", exc_info=True)
+            self._metrics.last_error = f"FATAL: {e}"
+            self._metrics.errors_count += 1
             self._is_running = False
             raise
         finally:
diff --git a/server/src/wled_controller/core/profiles/profile_engine.py b/server/src/wled_controller/core/profiles/profile_engine.py
index eb3d68c..ef0015e 100644
--- a/server/src/wled_controller/core/profiles/profile_engine.py
+++ b/server/src/wled_controller/core/profiles/profile_engine.py
@@ -15,7 +15,7 @@ logger = get_logger(__name__)
 class ProfileEngine:
     """Evaluates profile conditions and starts/stops targets accordingly."""
 
-    def __init__(self, profile_store: ProfileStore, processor_manager, poll_interval: float = 3.0):
+    def __init__(self, profile_store: ProfileStore, processor_manager, poll_interval: float = 1.0):
         self._store = profile_store
         self._manager = processor_manager
         self._poll_interval = poll_interval