diff --git a/server/src/wled_controller/api/schemas/picture_targets.py b/server/src/wled_controller/api/schemas/picture_targets.py index 729b53c..c094e32 100644 --- a/server/src/wled_controller/api/schemas/picture_targets.py +++ b/server/src/wled_controller/api/schemas/picture_targets.py @@ -123,6 +123,7 @@ class TargetProcessingState(BaseModel): device_id: Optional[str] = Field(None, description="Device ID") processing: bool = Field(description="Whether processing is active") fps_actual: Optional[float] = Field(None, description="Actual FPS achieved") + fps_potential: Optional[float] = Field(None, description="Potential FPS (processing speed without throttle)") fps_target: int = Field(default=0, description="Target FPS") display_index: int = Field(default=0, description="Current display index") last_update: Optional[datetime] = Field(None, description="Last successful update") diff --git a/server/src/wled_controller/core/calibration.py b/server/src/wled_controller/core/calibration.py index 5019f77..1e15ac9 100644 --- a/server/src/wled_controller/core/calibration.py +++ b/server/src/wled_controller/core/calibration.py @@ -3,6 +3,8 @@ from dataclasses import dataclass, field from typing import Dict, List, Literal, Tuple +import numpy as np + from wled_controller.core.screen_capture import ( BorderPixels, get_edge_segments, @@ -189,6 +191,62 @@ class PixelMapper: f"using {interpolation_mode} interpolation" ) + def _get_edge_pixels(self, border_pixels: BorderPixels, edge_name: str) -> np.ndarray: + """Get edge pixel array with span slicing applied.""" + if edge_name == "top": + edge_pixels = border_pixels.top + elif edge_name == "right": + edge_pixels = border_pixels.right + elif edge_name == "bottom": + edge_pixels = border_pixels.bottom + else: + edge_pixels = border_pixels.left + + span_start, span_end = self.calibration.get_edge_span(edge_name) + if span_start > 0.0 or span_end < 1.0: + if edge_name in ("top", "bottom"): + total_w = edge_pixels.shape[1] + s, e = int(span_start * total_w), int(span_end * total_w) + edge_pixels = edge_pixels[:, s:e, :] + else: + total_h = edge_pixels.shape[0] + s, e = int(span_start * total_h), int(span_end * total_h) + edge_pixels = edge_pixels[s:e, :, :] + return edge_pixels + + def _map_edge_average( + self, edge_pixels: np.ndarray, edge_name: str, led_count: int + ) -> np.ndarray: + """Vectorized average-color mapping for one edge. Returns (led_count, 3) uint8.""" + # Reduce border dimension → 1D array of shape (edge_length, 3) + if edge_name in ("top", "bottom"): + edge_1d = edge_pixels.mean(axis=0) # mean across border_width + else: + edge_1d = edge_pixels.mean(axis=1) # mean across border_width + + edge_len = edge_1d.shape[0] + + # Compute segment boundaries (matching get_edge_segments float stepping) + step = edge_len / led_count + boundaries = np.empty(led_count + 1, dtype=np.int64) + for i in range(led_count + 1): + boundaries[i] = int(i * step) + # Ensure each segment has at least 1 pixel + for i in range(led_count): + if boundaries[i + 1] <= boundaries[i]: + boundaries[i + 1] = boundaries[i] + 1 + boundaries[-1] = min(boundaries[-1], edge_len) + + # Cumulative sum for O(1) range means — no per-LED Python numpy calls + cumsum = np.zeros((edge_len + 1, 3), dtype=np.float64) + cumsum[1:] = np.cumsum(edge_1d.astype(np.float64), axis=0) + + starts = boundaries[:-1] + ends = boundaries[1:] + lengths = (ends - starts).reshape(-1, 1).astype(np.float64) + segment_sums = cumsum[ends] - cumsum[starts] + return np.clip(segment_sums / lengths, 0, 255).astype(np.uint8) + def map_border_to_leds( self, border_pixels: BorderPixels @@ -205,72 +263,58 @@ class PixelMapper: ValueError: If border pixels don't match calibration """ total_leds = self.calibration.get_total_leds() - led_colors = [(0, 0, 0)] * total_leds + use_fast_avg = self.interpolation_mode == "average" + + if use_fast_avg: + led_array = np.zeros((total_leds, 3), dtype=np.uint8) + else: + led_colors = [(0, 0, 0)] * total_leds - # Process each edge for edge_name in ["top", "right", "bottom", "left"]: segment = self.calibration.get_segment_for_edge(edge_name) - if not segment: - # This edge is not configured continue - # Get pixels for this edge - if edge_name == "top": - edge_pixels = border_pixels.top - elif edge_name == "right": - edge_pixels = border_pixels.right - elif edge_name == "bottom": - edge_pixels = border_pixels.bottom - else: # left - edge_pixels = border_pixels.left + edge_pixels = self._get_edge_pixels(border_pixels, edge_name) - # Slice to span region if not full coverage - span_start, span_end = self.calibration.get_edge_span(edge_name) - if span_start > 0.0 or span_end < 1.0: - if edge_name in ("top", "bottom"): - total_w = edge_pixels.shape[1] - s = int(span_start * total_w) - e = int(span_end * total_w) - edge_pixels = edge_pixels[:, s:e, :] - else: - total_h = edge_pixels.shape[0] - s = int(span_start * total_h) - e = int(span_end * total_h) - edge_pixels = edge_pixels[s:e, :, :] - - # Divide edge into segments matching LED count - try: - pixel_segments = get_edge_segments( - edge_pixels, - segment.led_count, - edge_name + if use_fast_avg: + # Vectorized: compute all LED colors for this edge at once + colors = self._map_edge_average( + edge_pixels, edge_name, segment.led_count ) - except ValueError as e: - logger.error(f"Failed to segment {edge_name} edge: {e}") - raise + led_indices = np.arange(segment.led_start, segment.led_start + segment.led_count) + if segment.reverse: + led_indices = led_indices[::-1] + led_array[led_indices] = colors + else: + # Per-LED fallback for median/dominant modes + try: + pixel_segments = get_edge_segments( + edge_pixels, segment.led_count, edge_name + ) + except ValueError as e: + logger.error(f"Failed to segment {edge_name} edge: {e}") + raise - # Calculate LED indices for this segment - led_indices = list(range(segment.led_start, segment.led_start + segment.led_count)) + led_indices = list(range(segment.led_start, segment.led_start + segment.led_count)) + if segment.reverse: + led_indices = list(reversed(led_indices)) - # Reverse if needed - if segment.reverse: - led_indices = list(reversed(led_indices)) + for led_idx, pixel_segment in zip(led_indices, pixel_segments): + color = self._calc_color(pixel_segment) + led_colors[led_idx] = color - # Map pixel segments to LEDs - for led_idx, pixel_segment in zip(led_indices, pixel_segments): - color = self._calc_color(pixel_segment) - led_colors[led_idx] = color - - # Apply physical LED offset by rotating the array - # Offset = number of LEDs from LED 0 to the start corner - # Physical LED[i] should get calibration color[(i - offset) % total] offset = self.calibration.offset % total_leds if total_leds > 0 else 0 - if offset > 0: - led_colors = led_colors[total_leds - offset:] + led_colors[:total_leds - offset] - logger.debug(f"Mapped border pixels to {total_leds} LED colors (offset={offset})") - return led_colors + if use_fast_avg: + if offset > 0: + led_array = np.roll(led_array, offset, axis=0) + return [tuple(c) for c in led_array] + else: + if offset > 0: + led_colors = led_colors[total_leds - offset:] + led_colors[:total_leds - offset] + logger.debug(f"Mapped border pixels to {total_leds} LED colors (offset={offset})") + return led_colors def test_calibration(self, edge: str, color: Tuple[int, int, int]) -> List[Tuple[int, int, int]]: """Generate test pattern to light up specific edge. diff --git a/server/src/wled_controller/core/ddp_client.py b/server/src/wled_controller/core/ddp_client.py index 7b0e383..94297b0 100644 --- a/server/src/wled_controller/core/ddp_client.py +++ b/server/src/wled_controller/core/ddp_client.py @@ -4,6 +4,8 @@ import struct from dataclasses import dataclass from typing import Dict, List, Optional, Tuple +import numpy as np + from wled_controller.utils import get_logger logger = get_logger(__name__) @@ -207,7 +209,7 @@ class DDPClient: # Split into multiple packets if needed num_packets = (total_bytes + bytes_per_packet - 1) // bytes_per_packet - logger.info( + logger.debug( f"DDP: Sending {len(pixels)} pixels ({total_bytes} bytes) " f"in {num_packets} packet(s) to {self.host}:{self.port}" ) @@ -241,6 +243,45 @@ class DDPClient: logger.error(f"Failed to send DDP pixels: {e}") raise RuntimeError(f"DDP send failed: {e}") + def send_pixels_numpy(self, pixel_array: np.ndarray, max_packet_size: int = 1400) -> bool: + """Send pixel data via DDP from a numpy array — no per-pixel Python loops. + + Args: + pixel_array: (N, 3) uint8 numpy array of RGB values + max_packet_size: Maximum UDP packet size (default 1400 bytes for safety) + + Returns: + True if successful + """ + if not self._transport: + raise RuntimeError("DDP client not connected") + + # Handle RGBW: insert zero white channel column + if self.rgbw: + white = np.zeros((pixel_array.shape[0], 1), dtype=np.uint8) + pixel_array = np.hstack((pixel_array, white)) + + pixel_bytes = pixel_array.tobytes() + + bpp = 4 if self.rgbw else 3 + total_bytes = len(pixel_bytes) + max_payload = max_packet_size - 10 # 10-byte header + bytes_per_packet = (max_payload // bpp) * bpp + num_packets = (total_bytes + bytes_per_packet - 1) // bytes_per_packet + + for i in range(num_packets): + start = i * bytes_per_packet + end = min(start + bytes_per_packet, total_bytes) + chunk = pixel_bytes[start:end] + self._sequence = (self._sequence + 1) % 256 + packet = self._build_ddp_packet( + chunk, offset=start, + sequence=self._sequence, push=False, + ) + self._transport.sendto(packet) + + return True + async def __aenter__(self): """Async context manager entry.""" await self.connect() diff --git a/server/src/wled_controller/core/live_stream.py b/server/src/wled_controller/core/live_stream.py index faf597f..0eb11f3 100644 --- a/server/src/wled_controller/core/live_stream.py +++ b/server/src/wled_controller/core/live_stream.py @@ -147,12 +147,9 @@ class ScreenCaptureLiveStream(LiveStream): class ProcessedLiveStream(LiveStream): """Live stream that applies postprocessing filters to a source stream. - Reads frames from a source LiveStream and applies a chain of filters. - Uses identity caching — if the source frame hasn't changed, returns - the previously processed result without recomputing. - - Thread-safe: a lock protects the filter application so concurrent - consumers don't duplicate work. + A background thread polls the source for new frames, applies the filter + chain, and caches the result. Consumers call get_latest_frame() which + returns the pre-computed result under a lock (sub-microsecond). """ def __init__( @@ -163,9 +160,10 @@ class ProcessedLiveStream(LiveStream): self._source = source self._filters = filters self._image_pool = ImagePool() - self._process_lock = threading.Lock() - self._cached_source_frame: Optional[ScreenCapture] = None - self._cached_result: Optional[ScreenCapture] = None + self._latest_frame: Optional[ScreenCapture] = None + self._frame_lock = threading.Lock() + self._running = False + self._thread: Optional[threading.Thread] = None @property def target_fps(self) -> int: @@ -176,23 +174,47 @@ class ProcessedLiveStream(LiveStream): return self._source.display_index def start(self) -> None: - # Source lifecycle managed by LiveStreamManager - pass + # Source lifecycle managed by LiveStreamManager; only start our thread + if self._running: + return + self._running = True + self._thread = threading.Thread( + target=self._process_loop, + name="processed-stream", + daemon=True, + ) + self._thread.start() + logger.info("ProcessedLiveStream background thread started") def stop(self) -> None: - # Source lifecycle managed by LiveStreamManager - self._cached_source_frame = None - self._cached_result = None + # Source lifecycle managed by LiveStreamManager; only stop our thread + self._running = False + if self._thread: + self._thread.join(timeout=5.0) + if self._thread.is_alive(): + logger.warning("ProcessedLiveStream thread did not terminate within 5s") + self._thread = None + self._latest_frame = None def get_latest_frame(self) -> Optional[ScreenCapture]: - source_frame = self._source.get_latest_frame() - if source_frame is None: - return None + with self._frame_lock: + return self._latest_frame - with self._process_lock: - # Identity cache: if source frame object hasn't changed, reuse result - if source_frame is self._cached_source_frame and self._cached_result is not None: - return self._cached_result + def _process_loop(self) -> None: + """Background thread: poll source, apply filters, cache result.""" + cached_source_frame: Optional[ScreenCapture] = None + while self._running: + source_frame = self._source.get_latest_frame() + if source_frame is None: + time.sleep(0.001) + continue + + # Identity cache: skip if source frame object hasn't changed + if source_frame is cached_source_frame: + time.sleep(0.001) + continue + + cached_source_frame = source_frame # Apply filters to a copy of the source image image = source_frame.image.copy() @@ -207,9 +229,8 @@ class ProcessedLiveStream(LiveStream): height=source_frame.height, display_index=source_frame.display_index, ) - self._cached_source_frame = source_frame - self._cached_result = processed - return processed + with self._frame_lock: + self._latest_frame = processed class StaticImageLiveStream(LiveStream): diff --git a/server/src/wled_controller/core/processor_manager.py b/server/src/wled_controller/core/processor_manager.py index 62c8fa8..a075bd9 100644 --- a/server/src/wled_controller/core/processor_manager.py +++ b/server/src/wled_controller/core/processor_manager.py @@ -32,6 +32,58 @@ logger = get_logger(__name__) DEFAULT_STATE_CHECK_INTERVAL = 30 # seconds between health checks + +def _process_frame(live_stream, border_width, pixel_mapper, previous_colors, smoothing): + """All CPU-bound work for one WLED frame (runs in thread pool). + + Includes get_latest_frame() because ProcessedLiveStream may apply + filters (image copy + processing) which should not block the event loop. + """ + capture = live_stream.get_latest_frame() + if capture is None: + return None + border_pixels = extract_border_pixels(capture, border_width) + led_colors = pixel_mapper.map_border_to_leds(border_pixels) + if previous_colors and smoothing > 0: + led_colors = smooth_colors(led_colors, previous_colors, smoothing) + return led_colors + + +def _process_kc_frame(live_stream, rectangles, calc_fn, previous_colors, smoothing): + """All CPU-bound work for one KC frame (runs in thread pool). + + Includes get_latest_frame() because ProcessedLiveStream may apply + filters which should not block the event loop. + """ + capture = live_stream.get_latest_frame() + if capture is None: + return None + img = capture.image + h, w = img.shape[:2] + colors = {} + for rect in rectangles: + px_x = max(0, int(rect.x * w)) + px_y = max(0, int(rect.y * h)) + px_w = max(1, int(rect.width * w)) + px_h = max(1, int(rect.height * h)) + px_x = min(px_x, w - 1) + px_y = min(px_y, h - 1) + px_w = min(px_w, w - px_x) + px_h = min(px_h, h - px_y) + sub_img = img[px_y:px_y + px_h, px_x:px_x + px_w] + colors[rect.name] = calc_fn(sub_img) + if previous_colors and smoothing > 0: + for name, color in colors.items(): + if name in previous_colors: + prev = previous_colors[name] + alpha = smoothing + colors[name] = ( + int(color[0] * (1 - alpha) + prev[0] * alpha), + int(color[1] * (1 - alpha) + prev[1] * alpha), + int(color[2] * (1 - alpha) + prev[2] * alpha), + ) + return colors + # WLED LED bus type codes from const.h → human-readable names WLED_LED_TYPES: Dict[int, str] = { 18: "WS2812 1ch", 19: "WS2812 1ch x3", 20: "WS2812 CCT", 21: "WS2812 WWA", @@ -86,6 +138,7 @@ class ProcessingMetrics: last_update: Optional[datetime] = None start_time: Optional[datetime] = None fps_actual: float = 0.0 + fps_potential: float = 0.0 @dataclass @@ -608,7 +661,7 @@ class ProcessorManager: state = self._targets[target_id] settings = state.settings - target_fps = state.resolved_target_fps or settings.fps + target_fps = settings.fps smoothing = settings.smoothing border_width = settings.border_width wled_brightness = settings.brightness @@ -620,6 +673,7 @@ class ProcessorManager: frame_time = 1.0 / target_fps fps_samples = [] + prev_frame_time_stamp = time.time() # Check if the device has test mode active — skip capture while in test mode device_state = self._devices.get(state.device_id) @@ -634,35 +688,27 @@ class ProcessorManager: continue try: - # Get frame from live stream - capture = await asyncio.to_thread(state.live_stream.get_latest_frame) + # Batch all CPU work (frame read + processing) in a single thread call. + led_colors = await asyncio.to_thread( + _process_frame, + state.live_stream, border_width, + state.pixel_mapper, state.previous_colors, smoothing, + ) - if capture is None: + if led_colors is None: if state.metrics.frames_processed == 0: logger.info(f"Capture returned None for target {target_id} (no new frame yet)") await asyncio.sleep(frame_time) continue - # Extract border pixels - border_pixels = await asyncio.to_thread(extract_border_pixels, capture, border_width) - - # Map to LED colors - led_colors = await asyncio.to_thread(state.pixel_mapper.map_border_to_leds, border_pixels) - - # Apply smoothing - if state.previous_colors and smoothing > 0: - led_colors = await asyncio.to_thread( - smooth_colors, - led_colors, - state.previous_colors, - smoothing, - ) - # Send to WLED with device brightness if not state.is_running or state.wled_client is None: break brightness_value = int(wled_brightness * 255) - await state.wled_client.send_pixels(led_colors, brightness=brightness_value) + if state.wled_client.use_ddp: + state.wled_client.send_pixels_fast(led_colors, brightness=brightness_value) + else: + await state.wled_client.send_pixels(led_colors, brightness=brightness_value) # Update metrics state.metrics.frames_processed += 1 @@ -671,13 +717,19 @@ class ProcessorManager: state.metrics.last_update = datetime.utcnow() state.previous_colors = led_colors - # Calculate actual FPS - loop_time = time.time() - loop_start - fps_samples.append(1.0 / loop_time if loop_time > 0 else 0) + # Calculate actual FPS from frame-to-frame interval + now = time.time() + interval = now - prev_frame_time_stamp + prev_frame_time_stamp = now + fps_samples.append(1.0 / interval if interval > 0 else 0) if len(fps_samples) > 10: fps_samples.pop(0) state.metrics.fps_actual = sum(fps_samples) / len(fps_samples) + # Potential FPS = how fast the pipeline could run without throttle + processing_time = now - loop_start + state.metrics.fps_potential = 1.0 / processing_time if processing_time > 0 else 0 + except Exception as e: state.metrics.errors_count += 1 state.metrics.last_error = str(e) @@ -728,7 +780,8 @@ class ProcessorManager: "device_id": state.device_id, "processing": state.is_running, "fps_actual": metrics.fps_actual if state.is_running else None, - "fps_target": state.resolved_target_fps or state.settings.fps, + "fps_potential": metrics.fps_potential if state.is_running else None, + "fps_target": state.settings.fps, "display_index": state.resolved_display_index if state.resolved_display_index is not None else state.settings.display_index, "last_update": metrics.last_update, "errors": [metrics.last_error] if metrics.last_error else [], @@ -1131,7 +1184,7 @@ class ProcessorManager: state = self._kc_targets[target_id] settings = state.settings - target_fps = state.resolved_target_fps or settings.fps + target_fps = settings.fps smoothing = settings.smoothing # Select color calculation function @@ -1157,45 +1210,16 @@ class ProcessorManager: loop_start = time.time() try: - capture = await asyncio.to_thread(state.live_stream.get_latest_frame) - if capture is None: + # Batch all CPU work in a single thread call + colors = await asyncio.to_thread( + _process_kc_frame, + state.live_stream, rectangles, calc_fn, + state.previous_colors, smoothing, + ) + if colors is None: await asyncio.sleep(frame_time) continue - img = capture.image - h, w = img.shape[:2] - - colors: Dict[str, Tuple[int, int, int]] = {} - for rect in rectangles: - # Convert relative coords to pixel coords - px_x = max(0, int(rect.x * w)) - px_y = max(0, int(rect.y * h)) - px_w = max(1, int(rect.width * w)) - px_h = max(1, int(rect.height * h)) - - # Clamp to image bounds - px_x = min(px_x, w - 1) - px_y = min(px_y, h - 1) - px_w = min(px_w, w - px_x) - px_h = min(px_h, h - px_y) - - # Extract sub-image and compute color - sub_img = img[px_y:px_y + px_h, px_x:px_x + px_w] - color = calc_fn(sub_img) - colors[rect.name] = color - - # Apply per-rectangle temporal smoothing - if state.previous_colors and smoothing > 0: - for name, color in colors.items(): - if name in state.previous_colors: - prev = state.previous_colors[name] - alpha = smoothing - colors[name] = ( - int(color[0] * (1 - alpha) + prev[0] * alpha), - int(color[1] * (1 - alpha) + prev[1] * alpha), - int(color[2] * (1 - alpha) + prev[2] * alpha), - ) - state.previous_colors = dict(colors) state.latest_colors = dict(colors) @@ -1281,7 +1305,7 @@ class ProcessorManager: "target_id": target_id, "processing": state.is_running, "fps_actual": round(state.metrics.fps_actual, 1) if state.is_running else None, - "fps_target": state.resolved_target_fps or state.settings.fps, + "fps_target": state.settings.fps, "last_update": state.metrics.last_update.isoformat() if state.metrics.last_update else None, "errors": [state.metrics.last_error] if state.metrics.last_error else [], } @@ -1300,7 +1324,7 @@ class ProcessorManager: "target_id": target_id, "processing": state.is_running, "fps_actual": round(state.metrics.fps_actual, 1), - "fps_target": state.resolved_target_fps or state.settings.fps, + "fps_target": state.settings.fps, "uptime_seconds": round(uptime, 1), "frames_processed": state.metrics.frames_processed, "errors_count": state.metrics.errors_count, diff --git a/server/src/wled_controller/core/wled_client.py b/server/src/wled_controller/core/wled_client.py index 8382182..ae8b096 100644 --- a/server/src/wled_controller/core/wled_client.py +++ b/server/src/wled_controller/core/wled_client.py @@ -6,6 +6,7 @@ from typing import List, Tuple, Optional, Dict, Any from urllib.parse import urlparse import httpx +import numpy as np from wled_controller.utils import get_logger from wled_controller.core.ddp_client import BusConfig, DDPClient @@ -420,6 +421,30 @@ class WLEDClient: logger.error(f"Failed to send pixels via HTTP: {e}") raise + def send_pixels_fast( + self, + pixels: List[Tuple[int, int, int]], + brightness: int = 255, + ) -> None: + """Optimized send for the hot loop — numpy packing + brightness, fire-and-forget DDP. + + Synchronous (no await). Only works for DDP path. + Falls back to raising if DDP is not available. + + Args: + pixels: List of (R, G, B) tuples + brightness: Global brightness (0-255) + """ + if not self.use_ddp or not self._ddp_client: + raise RuntimeError("send_pixels_fast requires DDP; use send_pixels for HTTP") + + pixel_array = np.array(pixels, dtype=np.uint8) + + if brightness < 255: + pixel_array = (pixel_array.astype(np.float32) * (brightness / 255.0)).astype(np.uint8) + + self._ddp_client.send_pixels_numpy(pixel_array) + async def set_power(self, on: bool) -> bool: """Turn WLED device on or off. diff --git a/server/src/wled_controller/static/app.js b/server/src/wled_controller/static/app.js index f8f595e..23e0b1e 100644 --- a/server/src/wled_controller/static/app.js +++ b/server/src/wled_controller/static/app.js @@ -4168,6 +4168,7 @@ function createTargetCard(target, deviceMap, sourceMap) {