Optimize streaming pipeline and capture hot paths

- Replace asyncio.to_thread with dedicated ThreadPoolExecutor (skip
  per-frame context copy overhead)
- Move brightness scaling into _process_frame thread (avoid extra
  numpy array copies on event loop)
- Remove PIL intermediate in MSS capture (direct bytes→numpy)
- Unify median/dominant pixel mapping to numpy arrays (eliminate
  Python list-of-tuples path and duplicate Phase 2/3 code)
- Cache CalibrationConfig.segments property (avoid ~240 rebuilds/sec)
- Make KC WebSocket broadcasts concurrent via asyncio.gather
- Fix fps_samples list.pop(0) → deque(maxlen=10) in both processors
- Cache time.time() calls to reduce redundant syscalls per frame
- Log event queue drops instead of silently discarding

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 22:55:21 +03:00
parent bfe6a7a2ab
commit fbf597dc29
6 changed files with 131 additions and 127 deletions

View File

@@ -273,7 +273,7 @@ class KCTargetProcessor(TargetProcessor):
calc_fn = calc_fns.get(settings.interpolation_mode, calculate_average_color)
frame_time = 1.0 / target_fps
fps_samples: List[float] = []
fps_samples: collections.deque = collections.deque(maxlen=10)
timing_samples: collections.deque = collections.deque(maxlen=10)
prev_frame_time_stamp = time.time()
prev_capture = None
@@ -366,8 +366,6 @@ class KCTargetProcessor(TargetProcessor):
interval = now - prev_frame_time_stamp
prev_frame_time_stamp = now
fps_samples.append(1.0 / interval if interval > 0 else 0)
if len(fps_samples) > 10:
fps_samples.pop(0)
self._metrics.fps_actual = sum(fps_samples) / len(fps_samples)
# Potential FPS
@@ -401,7 +399,7 @@ class KCTargetProcessor(TargetProcessor):
logger.info(f"KC processing loop ended for target {self._target_id}")
async def _broadcast_colors(self, colors: Dict[str, Tuple[int, int, int]]) -> None:
"""Broadcast extracted colors to WebSocket clients."""
"""Broadcast extracted colors to WebSocket clients (concurrent sends)."""
if not self._ws_clients:
return
@@ -415,12 +413,15 @@ class KCTargetProcessor(TargetProcessor):
"timestamp": datetime.utcnow().isoformat(),
})
disconnected = []
for ws in self._ws_clients:
async def _send_safe(ws):
try:
await ws.send_text(message)
return True
except Exception:
disconnected.append(ws)
return False
results = await asyncio.gather(*[_send_safe(ws) for ws in self._ws_clients])
disconnected = [ws for ws, ok in zip(self._ws_clients, results) if not ok]
for ws in disconnected:
self._ws_clients.remove(ws)

View File

@@ -136,7 +136,7 @@ class ProcessorManager:
try:
q.put_nowait(event)
except asyncio.QueueFull:
pass
logger.warning(f"Event queue full, dropping: {event.get('type', '?')}")
async def _get_http_client(self) -> httpx.AsyncClient:
"""Get or create a shared HTTP client for health checks."""

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import asyncio
import collections
import concurrent.futures
import time
from datetime import datetime
from typing import TYPE_CHECKING, Optional
@@ -31,16 +32,22 @@ if TYPE_CHECKING:
logger = get_logger(__name__)
_frame_executor = concurrent.futures.ThreadPoolExecutor(
max_workers=4, thread_name_prefix="frame-proc",
)
# ---------------------------------------------------------------------------
# CPU-bound frame processing (runs in thread pool via asyncio.to_thread)
# CPU-bound frame processing (runs in dedicated thread-pool executor)
# ---------------------------------------------------------------------------
def _process_frame(capture, border_width, pixel_mapper, previous_colors, smoothing):
def _process_frame(capture, border_width, pixel_mapper, previous_colors, smoothing, brightness):
"""All CPU-bound work for one WLED frame.
Returns (led_colors, timing_ms) where led_colors is numpy array (N, 3) uint8
and timing_ms is a dict with per-stage timing in milliseconds.
Returns (raw_colors, send_colors, timing_ms).
raw_colors: unscaled array for smoothing history.
send_colors: brightness-scaled array ready for DDP send.
timing_ms: dict with per-stage timing in milliseconds.
"""
t0 = time.perf_counter()
border_pixels = extract_border_pixels(capture, border_width)
@@ -58,13 +65,20 @@ def _process_frame(capture, border_width, pixel_mapper, previous_colors, smoothi
led_colors = led_colors.astype(np.uint8)
t3 = time.perf_counter()
# Apply brightness scaling in thread pool (avoids extra array copies on event loop)
if brightness < 255:
send_colors = (led_colors.astype(np.uint16) * brightness >> 8).astype(np.uint8)
else:
send_colors = led_colors
t4 = time.perf_counter()
timing_ms = {
"extract": (t1 - t0) * 1000,
"map_leds": (t2 - t1) * 1000,
"smooth": (t3 - t2) * 1000,
"total": (t3 - t0) * 1000,
"total": (t4 - t0) * 1000,
}
return led_colors, timing_ms
return led_colors, send_colors, timing_ms
# ---------------------------------------------------------------------------
@@ -405,16 +419,17 @@ class WledTargetProcessor(TargetProcessor):
frame_time = 1.0 / target_fps
standby_interval = settings.standby_interval
fps_samples = []
fps_samples: collections.deque = collections.deque(maxlen=10)
timing_samples: collections.deque = collections.deque(maxlen=10)
prev_frame_time_stamp = time.time()
prev_capture = None
last_send_time = 0.0
send_timestamps: collections.deque = collections.deque()
loop = asyncio.get_running_loop()
try:
while self._is_running:
loop_start = time.time()
now = loop_start = time.time()
# Re-fetch device info for runtime changes (test mode, brightness)
device_info = self._ctx.get_device_info(self._device_id)
@@ -445,39 +460,43 @@ class WledTargetProcessor(TargetProcessor):
self._led_client.send_pixels_fast(self._previous_colors, brightness=brightness_value)
else:
await self._led_client.send_pixels(self._previous_colors, brightness=brightness_value)
last_send_time = time.time()
send_timestamps.append(last_send_time)
now = time.time()
last_send_time = now
send_timestamps.append(now)
self._metrics.frames_keepalive += 1
self._metrics.frames_skipped += 1
now_ts = time.time()
while send_timestamps and send_timestamps[0] < now_ts - 1.0:
while send_timestamps and send_timestamps[0] < now - 1.0:
send_timestamps.popleft()
self._metrics.fps_current = len(send_timestamps)
await asyncio.sleep(frame_time)
continue
prev_capture = capture
# CPU-bound work in thread pool
led_colors, frame_timing = await asyncio.to_thread(
_process_frame,
capture, border_width,
self._pixel_mapper, self._previous_colors, smoothing,
)
# Send to LED device with brightness
if not self._is_running or self._led_client is None:
break
# Compute brightness before thread dispatch
brightness_value = int(led_brightness * 255)
if device_info and device_info.software_brightness < 255:
brightness_value = brightness_value * device_info.software_brightness // 255
# CPU-bound work in dedicated thread-pool executor
raw_colors, send_colors, frame_timing = await loop.run_in_executor(
_frame_executor,
_process_frame, capture, border_width,
self._pixel_mapper, self._previous_colors, smoothing,
brightness_value,
)
# Send to LED device (brightness already applied in thread)
if not self._is_running or self._led_client is None:
break
t_send_start = time.perf_counter()
if self._led_client.supports_fast_send:
self._led_client.send_pixels_fast(led_colors, brightness=brightness_value)
self._led_client.send_pixels_fast(send_colors)
else:
await self._led_client.send_pixels(led_colors, brightness=brightness_value)
await self._led_client.send_pixels(send_colors)
send_ms = (time.perf_counter() - t_send_start) * 1000
last_send_time = time.time()
send_timestamps.append(last_send_time)
now = time.time()
last_send_time = now
send_timestamps.append(now)
# Per-stage timing (rolling average over last 10 frames)
frame_timing["send"] = send_ms
@@ -494,22 +513,19 @@ class WledTargetProcessor(TargetProcessor):
if self._metrics.frames_processed <= 3 or self._metrics.frames_processed % 100 == 0:
logger.info(
f"Frame {self._metrics.frames_processed} for {self._target_id} "
f"({len(led_colors)} LEDs, bri={brightness_value}) — "
f"({len(send_colors)} LEDs, bri={brightness_value}) — "
f"extract={frame_timing['extract']:.1f}ms "
f"map={frame_timing['map_leds']:.1f}ms "
f"smooth={frame_timing['smooth']:.1f}ms "
f"send={send_ms:.1f}ms"
)
self._metrics.last_update = datetime.utcnow()
self._previous_colors = led_colors
self._previous_colors = raw_colors
# Calculate actual FPS
now = time.time()
# Calculate actual FPS (reuse cached 'now' from send timestamp)
interval = now - prev_frame_time_stamp
prev_frame_time_stamp = now
fps_samples.append(1.0 / interval if interval > 0 else 0)
if len(fps_samples) > 10:
fps_samples.pop(0)
self._metrics.fps_actual = sum(fps_samples) / len(fps_samples)
# Potential FPS
@@ -527,7 +543,7 @@ class WledTargetProcessor(TargetProcessor):
logger.error(f"Processing error for target {self._target_id}: {e}", exc_info=True)
# Throttle to target FPS
elapsed = time.time() - loop_start
elapsed = now - loop_start
remaining = frame_time - elapsed
if remaining > 0:
await asyncio.sleep(remaining)