Optimize numpy pipeline, add per-stage timing, and auto-sync LED count

- Eliminate 5 numpy↔tuple conversions per frame in processing hot path: map_border_to_leds returns ndarray, inline numpy smoothing with integer math, send_pixels_fast accepts ndarray directly - Fix numpy boolean bug in keepalive check (use `is not None`) - Add per-stage pipeline timing (extract/map/smooth/send) to metrics API and UI with color-coded breakdown bar - Expose device_fps from WLED health check in API schemas - Auto-sync LED count from WLED device: health check detects changes and updates storage, calibration, and active targets automatically - Use integer math for brightness scaling (uint16 * brightness >> 8) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 17:43:16 +03:00
parent 350dafb1e8
commit ac5c1d0c82
12 changed files with 218 additions and 29 deletions
--- a/server/src/wled_controller/core/adalight_client.py
+++ b/server/src/wled_controller/core/adalight_client.py
@@ -116,10 +116,15 @@ class AdalightClient(LEDClient):

    async def send_pixels(
        self,
-        pixels: List[Tuple[int, int, int]],
+        pixels,
        brightness: int = 255,
    ) -> bool:
-        """Send pixel data over serial using Adalight protocol (non-blocking)."""
+        """Send pixel data over serial using Adalight protocol (non-blocking).
+
+        Args:
+            pixels: numpy array (N, 3) uint8 or list of (R, G, B) tuples
+            brightness: Global brightness (0-255)
+        """
        if not self.is_connected:
            return False

@@ -136,9 +141,12 @@ class AdalightClient(LEDClient):
        # Serial write is blocking — use async send_pixels path instead
        return False

-    def _build_frame(self, pixels: List[Tuple[int, int, int]], brightness: int) -> bytes:
+    def _build_frame(self, pixels, brightness: int) -> bytes:
        """Build a complete Adalight frame: header + brightness-scaled RGB data."""
-        arr = np.array(pixels, dtype=np.uint16)
+        if isinstance(pixels, np.ndarray):
+            arr = pixels.astype(np.uint16)
+        else:
+            arr = np.array(pixels, dtype=np.uint16)

        if brightness < 255:
            arr = arr * brightness // 255
--- a/server/src/wled_controller/core/calibration.py
+++ b/server/src/wled_controller/core/calibration.py
@@ -255,14 +255,14 @@ class PixelMapper:
    def map_border_to_leds(
        self,
        border_pixels: BorderPixels
-    ) -> List[Tuple[int, int, int]]:
+    ) -> np.ndarray:
        """Map screen border pixels to LED colors.

        Args:
            border_pixels: Extracted border pixels from screen

        Returns:
-            List of (R, G, B) tuples for each LED
+            numpy array of shape (total_leds, 3), dtype uint8

        Raises:
            ValueError: If border pixels don't match calibration
@@ -338,7 +338,7 @@ class PixelMapper:
            elif active_count <= 0:
                led_array[:] = 0

-            return [tuple(c) for c in led_array]
+            return led_array
        else:
            if offset > 0:
                led_colors = led_colors[total_leds - offset:] + led_colors[:total_leds - offset]
@@ -358,7 +358,7 @@ class PixelMapper:
            elif active_count <= 0:
                led_colors = [(0, 0, 0)] * total_leds

-            return led_colors
+            return np.array(led_colors, dtype=np.uint8)

    def test_calibration(self, edge: str, color: Tuple[int, int, int]) -> List[Tuple[int, int, int]]:
        """Generate test pattern to light up specific edge.
--- a/server/src/wled_controller/core/led_client.py
+++ b/server/src/wled_controller/core/led_client.py
@@ -19,6 +19,7 @@ class DeviceHealth:
    device_led_count: Optional[int] = None
    device_rgbw: Optional[bool] = None
    device_led_type: Optional[str] = None
+    device_fps: Optional[int] = None
    error: Optional[str] = None


--- a/server/src/wled_controller/core/processor_manager.py
+++ b/server/src/wled_controller/core/processor_manager.py
@@ -19,7 +19,6 @@ from wled_controller.core.calibration import (
 from wled_controller.core.capture_engines.base import ScreenCapture
 from wled_controller.core.live_stream import LiveStream
 from wled_controller.core.live_stream_manager import LiveStreamManager
-from wled_controller.core.pixel_processor import smooth_colors
 from wled_controller.core.screen_capture import (
    calculate_average_color,
    calculate_dominant_color,
@@ -42,18 +41,32 @@ DEFAULT_STATE_CHECK_INTERVAL = 30  # seconds between health checks
 def _process_frame(capture, border_width, pixel_mapper, previous_colors, smoothing):
    """All CPU-bound work for one WLED frame (runs in thread pool).

-    Args:
-        capture: ScreenCapture from live_stream.get_latest_frame()
-        border_width: Border pixel width for extraction
-        pixel_mapper: PixelMapper for LED mapping
-        previous_colors: Previous frame colors for smoothing
-        smoothing: Smoothing factor (0-1)
+    Returns (led_colors, timing_ms) where led_colors is numpy array (N, 3) uint8
+    and timing_ms is a dict with per-stage timing in milliseconds.
    """
+    t0 = time.perf_counter()
    border_pixels = extract_border_pixels(capture, border_width)
+    t1 = time.perf_counter()
    led_colors = pixel_mapper.map_border_to_leds(border_pixels)
-    if previous_colors and smoothing > 0:
-        led_colors = smooth_colors(led_colors, previous_colors, smoothing)
-    return led_colors
+    t2 = time.perf_counter()
+
+    # Inline numpy smoothing — avoids list↔numpy round-trip
+    if previous_colors is not None and smoothing > 0 and len(previous_colors) == len(led_colors):
+        alpha = int(smoothing * 256)
+        led_colors = (
+            (256 - alpha) * led_colors.astype(np.uint16)
+            + alpha * previous_colors.astype(np.uint16)
+        ) >> 8
+        led_colors = led_colors.astype(np.uint8)
+    t3 = time.perf_counter()
+
+    timing_ms = {
+        "extract": (t1 - t0) * 1000,
+        "map_leds": (t2 - t1) * 1000,
+        "smooth": (t3 - t2) * 1000,
+        "total": (t3 - t0) * 1000,
+    }
+    return led_colors, timing_ms


 def _process_kc_frame(capture, rectangles, calc_fn, previous_colors, smoothing):
@@ -121,6 +134,12 @@ class ProcessingMetrics:
    fps_actual: float = 0.0
    fps_potential: float = 0.0
    fps_current: int = 0
+    # Per-stage timing (ms), averaged over last 10 frames
+    timing_extract_ms: float = 0.0
+    timing_map_leds_ms: float = 0.0
+    timing_smooth_ms: float = 0.0
+    timing_send_ms: float = 0.0
+    timing_total_ms: float = 0.0


@dataclass
@@ -192,7 +211,7 @@ class ProcessorManager:
    Targets are registered for processing (streaming sources to devices).
    """

-    def __init__(self, picture_source_store=None, capture_template_store=None, pp_template_store=None, pattern_template_store=None):
+    def __init__(self, picture_source_store=None, capture_template_store=None, pp_template_store=None, pattern_template_store=None, device_store=None):
        """Initialize processor manager."""
        self._devices: Dict[str, DeviceState] = {}
        self._targets: Dict[str, TargetState] = {}
@@ -203,6 +222,7 @@ class ProcessorManager:
        self._capture_template_store = capture_template_store
        self._pp_template_store = pp_template_store
        self._pattern_template_store = pattern_template_store
+        self._device_store = device_store
        self._live_stream_manager = LiveStreamManager(
            picture_source_store, capture_template_store, pp_template_store
        )
@@ -346,6 +366,7 @@ class ProcessorManager:
            "device_led_count": h.device_led_count,
            "device_rgbw": h.device_rgbw,
            "device_led_type": h.device_led_type,
+            "device_fps": h.device_fps,
            "error": h.error,
        }

@@ -365,6 +386,7 @@ class ProcessorManager:
            "device_led_count": h.device_led_count,
            "device_rgbw": h.device_rgbw,
            "device_led_type": h.device_led_type,
+            "device_fps": h.device_fps,
            "device_last_checked": h.last_checked,
            "device_error": h.error,
            "test_mode": ds.test_mode_active,
@@ -648,6 +670,7 @@ class ProcessorManager:
        frame_time = 1.0 / target_fps
        standby_interval = settings.standby_interval
        fps_samples = []
+        timing_samples: collections.deque = collections.deque(maxlen=10)  # per-stage timing
        prev_frame_time_stamp = time.time()
        prev_capture = None  # Track previous ScreenCapture for change detection
        last_send_time = 0.0  # Timestamp of last DDP send (for keepalive)
@@ -679,7 +702,7 @@ class ProcessorManager:
                    # Skip processing + send if the frame hasn't changed
                    if capture is prev_capture:
                        # Keepalive: resend last colors to prevent device exiting live mode
-                        if state.previous_colors and (loop_start - last_send_time) >= standby_interval:
+                        if state.previous_colors is not None and (loop_start - last_send_time) >= standby_interval:
                            if not state.is_running or state.led_client is None:
                                break
                            brightness_value = int(led_brightness * 255)
@@ -701,7 +724,7 @@ class ProcessorManager:
                    prev_capture = capture

                    # CPU-bound work in thread pool
-                    led_colors = await asyncio.to_thread(
+                    led_colors, frame_timing = await asyncio.to_thread(
                        _process_frame,
                        capture, border_width,
                        state.pixel_mapper, state.previous_colors, smoothing,
@@ -711,17 +734,36 @@ class ProcessorManager:
                    if not state.is_running or state.led_client is None:
                        break
                    brightness_value = int(led_brightness * 255)
+                    t_send_start = time.perf_counter()
                    if state.led_client.supports_fast_send:
                        state.led_client.send_pixels_fast(led_colors, brightness=brightness_value)
                    else:
                        await state.led_client.send_pixels(led_colors, brightness=brightness_value)
+                    send_ms = (time.perf_counter() - t_send_start) * 1000
                    last_send_time = time.time()
                    send_timestamps.append(last_send_time)

+                    # Per-stage timing (rolling average over last 10 frames)
+                    frame_timing["send"] = send_ms
+                    timing_samples.append(frame_timing)
+                    n = len(timing_samples)
+                    state.metrics.timing_extract_ms = sum(s["extract"] for s in timing_samples) / n
+                    state.metrics.timing_map_leds_ms = sum(s["map_leds"] for s in timing_samples) / n
+                    state.metrics.timing_smooth_ms = sum(s["smooth"] for s in timing_samples) / n
+                    state.metrics.timing_send_ms = sum(s["send"] for s in timing_samples) / n
+                    state.metrics.timing_total_ms = sum(s["total"] for s in timing_samples) / n + send_ms
+
                    # Update metrics
                    state.metrics.frames_processed += 1
                    if state.metrics.frames_processed <= 3 or state.metrics.frames_processed % 100 == 0:
-                        logger.info(f"Frame {state.metrics.frames_processed} sent for target {target_id} ({len(led_colors)} LEDs, bri={brightness_value})")
+                        logger.info(
+                            f"Frame {state.metrics.frames_processed} for {target_id} "
+                            f"({len(led_colors)} LEDs, bri={brightness_value}) — "
+                            f"extract={frame_timing['extract']:.1f}ms "
+                            f"map={frame_timing['map_leds']:.1f}ms "
+                            f"smooth={frame_timing['smooth']:.1f}ms "
+                            f"send={send_ms:.1f}ms"
+                        )
                    state.metrics.last_update = datetime.utcnow()
                    state.previous_colors = led_colors

@@ -784,6 +826,7 @@ class ProcessorManager:
                "device_led_count": h.device_led_count,
                "device_rgbw": h.device_rgbw,
                "device_led_type": h.device_led_type,
+                "device_fps": h.device_fps,
                "device_last_checked": h.last_checked,
                "device_error": h.error,
            }
@@ -798,6 +841,11 @@ class ProcessorManager:
            "frames_skipped": metrics.frames_skipped if state.is_running else None,
            "frames_keepalive": metrics.frames_keepalive if state.is_running else None,
            "fps_current": metrics.fps_current if state.is_running else None,
+            "timing_extract_ms": round(metrics.timing_extract_ms, 1) if state.is_running else None,
+            "timing_map_leds_ms": round(metrics.timing_map_leds_ms, 1) if state.is_running else None,
+            "timing_smooth_ms": round(metrics.timing_smooth_ms, 1) if state.is_running else None,
+            "timing_send_ms": round(metrics.timing_send_ms, 1) if state.is_running else None,
+            "timing_total_ms": round(metrics.timing_total_ms, 1) if state.is_running else None,
            "display_index": state.resolved_display_index if state.resolved_display_index is not None else state.settings.display_index,
            "last_update": metrics.last_update,
            "errors": [metrics.last_error] if metrics.last_error else [],
@@ -1034,7 +1082,10 @@ class ProcessorManager:
            logger.error(f"Fatal error in health check loop for {device_id}: {e}")

    async def _check_device_health(self, device_id: str):
-        """Check device health via the LED client abstraction."""
+        """Check device health via the LED client abstraction.
+
+        Also auto-syncs LED count if the device reports a different value.
+        """
        state = self._devices.get(device_id)
        if not state:
            return
@@ -1043,6 +1094,33 @@ class ProcessorManager:
            state.device_type, state.device_url, client, state.health,
        )

+        # Auto-sync LED count when device reports a different value
+        reported = state.health.device_led_count
+        if reported and reported != state.led_count and self._device_store:
+            old_count = state.led_count
+            logger.info(
+                f"Device {device_id} LED count changed: {old_count} → {reported}, "
+                f"updating calibration"
+            )
+            try:
+                # Update persistent storage (creates new default calibration)
+                device = self._device_store.update_device(device_id, led_count=reported)
+                # Sync in-memory state
+                state.led_count = reported
+                state.calibration = device.calibration
+                # Update any active targets using this device
+                for ts in self._targets.values():
+                    if ts.device_id == device_id:
+                        ts.led_count = reported
+                        ts.calibration = device.calibration
+                        if ts.pixel_mapper:
+                            ts.pixel_mapper = PixelMapper(
+                                device.calibration,
+                                interpolation_mode=ts.settings.interpolation_mode,
+                            )
+            except Exception as e:
+                logger.error(f"Failed to sync LED count for {device_id}: {e}")
+
    # ===== KEY COLORS TARGET MANAGEMENT =====

    def add_kc_target(self, target_id: str, picture_source_id: str, settings) -> None:
--- a/server/src/wled_controller/core/wled_client.py
+++ b/server/src/wled_controller/core/wled_client.py
@@ -435,25 +435,28 @@ class WLEDClient(LEDClient):

    def send_pixels_fast(
        self,
-        pixels: List[Tuple[int, int, int]],
+        pixels,
        brightness: int = 255,
    ) -> None:
-        """Optimized send for the hot loop — numpy packing + brightness, fire-and-forget DDP.
+        """Optimized send for the hot loop — fire-and-forget DDP.

+        Accepts numpy array (N,3) uint8 directly to avoid conversion overhead.
        Synchronous (no await). Only works for DDP path.
-        Falls back to raising if DDP is not available.

        Args:
-            pixels: List of (R, G, B) tuples
+            pixels: numpy array (N, 3) uint8 or list of (R, G, B) tuples
            brightness: Global brightness (0-255)
        """
        if not self.use_ddp or not self._ddp_client:
            raise RuntimeError("send_pixels_fast requires DDP; use send_pixels for HTTP")

-        pixel_array = np.array(pixels, dtype=np.uint8)
+        if isinstance(pixels, np.ndarray):
+            pixel_array = pixels
+        else:
+            pixel_array = np.array(pixels, dtype=np.uint8)

        if brightness < 255:
-            pixel_array = (pixel_array.astype(np.float32) * (brightness / 255.0)).astype(np.uint8)
+            pixel_array = (pixel_array.astype(np.uint16) * brightness >> 8).astype(np.uint8)

        self._ddp_client.send_pixels_numpy(pixel_array)

@@ -530,6 +533,7 @@ class WLEDClient(LEDClient):
                device_led_count=leds_info.get("count"),
                device_rgbw=leds_info.get("rgbw", False),
                device_led_type=device_led_type,
+                device_fps=leds_info.get("fps"),
                error=None,
            )
        except Exception as e:
@@ -542,6 +546,7 @@ class WLEDClient(LEDClient):
                device_led_count=prev_health.device_led_count if prev_health else None,
                device_rgbw=prev_health.device_rgbw if prev_health else None,
                device_led_type=prev_health.device_led_type if prev_health else None,
+                device_fps=prev_health.device_fps if prev_health else None,
                error=str(e),
            )