Optimize streaming pipeline and capture hot paths
- Replace asyncio.to_thread with dedicated ThreadPoolExecutor (skip per-frame context copy overhead) - Move brightness scaling into _process_frame thread (avoid extra numpy array copies on event loop) - Remove PIL intermediate in MSS capture (direct bytes→numpy) - Unify median/dominant pixel mapping to numpy arrays (eliminate Python list-of-tuples path and duplicate Phase 2/3 code) - Cache CalibrationConfig.segments property (avoid ~240 rebuilds/sec) - Make KC WebSocket broadcasts concurrent via asyncio.gather - Fix fps_samples list.pop(0) → deque(maxlen=10) in both processors - Cache time.time() calls to reduce redundant syscalls per frame - Log event queue drops instead of silently discarding Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -273,7 +273,7 @@ class KCTargetProcessor(TargetProcessor):
|
||||
calc_fn = calc_fns.get(settings.interpolation_mode, calculate_average_color)
|
||||
|
||||
frame_time = 1.0 / target_fps
|
||||
fps_samples: List[float] = []
|
||||
fps_samples: collections.deque = collections.deque(maxlen=10)
|
||||
timing_samples: collections.deque = collections.deque(maxlen=10)
|
||||
prev_frame_time_stamp = time.time()
|
||||
prev_capture = None
|
||||
@@ -366,8 +366,6 @@ class KCTargetProcessor(TargetProcessor):
|
||||
interval = now - prev_frame_time_stamp
|
||||
prev_frame_time_stamp = now
|
||||
fps_samples.append(1.0 / interval if interval > 0 else 0)
|
||||
if len(fps_samples) > 10:
|
||||
fps_samples.pop(0)
|
||||
self._metrics.fps_actual = sum(fps_samples) / len(fps_samples)
|
||||
|
||||
# Potential FPS
|
||||
@@ -401,7 +399,7 @@ class KCTargetProcessor(TargetProcessor):
|
||||
logger.info(f"KC processing loop ended for target {self._target_id}")
|
||||
|
||||
async def _broadcast_colors(self, colors: Dict[str, Tuple[int, int, int]]) -> None:
|
||||
"""Broadcast extracted colors to WebSocket clients."""
|
||||
"""Broadcast extracted colors to WebSocket clients (concurrent sends)."""
|
||||
if not self._ws_clients:
|
||||
return
|
||||
|
||||
@@ -415,12 +413,15 @@ class KCTargetProcessor(TargetProcessor):
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
})
|
||||
|
||||
disconnected = []
|
||||
for ws in self._ws_clients:
|
||||
async def _send_safe(ws):
|
||||
try:
|
||||
await ws.send_text(message)
|
||||
return True
|
||||
except Exception:
|
||||
disconnected.append(ws)
|
||||
return False
|
||||
|
||||
results = await asyncio.gather(*[_send_safe(ws) for ws in self._ws_clients])
|
||||
|
||||
disconnected = [ws for ws, ok in zip(self._ws_clients, results) if not ok]
|
||||
for ws in disconnected:
|
||||
self._ws_clients.remove(ws)
|
||||
|
||||
@@ -136,7 +136,7 @@ class ProcessorManager:
|
||||
try:
|
||||
q.put_nowait(event)
|
||||
except asyncio.QueueFull:
|
||||
pass
|
||||
logger.warning(f"Event queue full, dropping: {event.get('type', '?')}")
|
||||
|
||||
async def _get_http_client(self) -> httpx.AsyncClient:
|
||||
"""Get or create a shared HTTP client for health checks."""
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import collections
|
||||
import concurrent.futures
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
@@ -31,16 +32,22 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_frame_executor = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=4, thread_name_prefix="frame-proc",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CPU-bound frame processing (runs in thread pool via asyncio.to_thread)
|
||||
# CPU-bound frame processing (runs in dedicated thread-pool executor)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _process_frame(capture, border_width, pixel_mapper, previous_colors, smoothing):
|
||||
def _process_frame(capture, border_width, pixel_mapper, previous_colors, smoothing, brightness):
|
||||
"""All CPU-bound work for one WLED frame.
|
||||
|
||||
Returns (led_colors, timing_ms) where led_colors is numpy array (N, 3) uint8
|
||||
and timing_ms is a dict with per-stage timing in milliseconds.
|
||||
Returns (raw_colors, send_colors, timing_ms).
|
||||
raw_colors: unscaled array for smoothing history.
|
||||
send_colors: brightness-scaled array ready for DDP send.
|
||||
timing_ms: dict with per-stage timing in milliseconds.
|
||||
"""
|
||||
t0 = time.perf_counter()
|
||||
border_pixels = extract_border_pixels(capture, border_width)
|
||||
@@ -58,13 +65,20 @@ def _process_frame(capture, border_width, pixel_mapper, previous_colors, smoothi
|
||||
led_colors = led_colors.astype(np.uint8)
|
||||
t3 = time.perf_counter()
|
||||
|
||||
# Apply brightness scaling in thread pool (avoids extra array copies on event loop)
|
||||
if brightness < 255:
|
||||
send_colors = (led_colors.astype(np.uint16) * brightness >> 8).astype(np.uint8)
|
||||
else:
|
||||
send_colors = led_colors
|
||||
t4 = time.perf_counter()
|
||||
|
||||
timing_ms = {
|
||||
"extract": (t1 - t0) * 1000,
|
||||
"map_leds": (t2 - t1) * 1000,
|
||||
"smooth": (t3 - t2) * 1000,
|
||||
"total": (t3 - t0) * 1000,
|
||||
"total": (t4 - t0) * 1000,
|
||||
}
|
||||
return led_colors, timing_ms
|
||||
return led_colors, send_colors, timing_ms
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -405,16 +419,17 @@ class WledTargetProcessor(TargetProcessor):
|
||||
|
||||
frame_time = 1.0 / target_fps
|
||||
standby_interval = settings.standby_interval
|
||||
fps_samples = []
|
||||
fps_samples: collections.deque = collections.deque(maxlen=10)
|
||||
timing_samples: collections.deque = collections.deque(maxlen=10)
|
||||
prev_frame_time_stamp = time.time()
|
||||
prev_capture = None
|
||||
last_send_time = 0.0
|
||||
send_timestamps: collections.deque = collections.deque()
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
try:
|
||||
while self._is_running:
|
||||
loop_start = time.time()
|
||||
now = loop_start = time.time()
|
||||
|
||||
# Re-fetch device info for runtime changes (test mode, brightness)
|
||||
device_info = self._ctx.get_device_info(self._device_id)
|
||||
@@ -445,39 +460,43 @@ class WledTargetProcessor(TargetProcessor):
|
||||
self._led_client.send_pixels_fast(self._previous_colors, brightness=brightness_value)
|
||||
else:
|
||||
await self._led_client.send_pixels(self._previous_colors, brightness=brightness_value)
|
||||
last_send_time = time.time()
|
||||
send_timestamps.append(last_send_time)
|
||||
now = time.time()
|
||||
last_send_time = now
|
||||
send_timestamps.append(now)
|
||||
self._metrics.frames_keepalive += 1
|
||||
self._metrics.frames_skipped += 1
|
||||
now_ts = time.time()
|
||||
while send_timestamps and send_timestamps[0] < now_ts - 1.0:
|
||||
while send_timestamps and send_timestamps[0] < now - 1.0:
|
||||
send_timestamps.popleft()
|
||||
self._metrics.fps_current = len(send_timestamps)
|
||||
await asyncio.sleep(frame_time)
|
||||
continue
|
||||
prev_capture = capture
|
||||
|
||||
# CPU-bound work in thread pool
|
||||
led_colors, frame_timing = await asyncio.to_thread(
|
||||
_process_frame,
|
||||
capture, border_width,
|
||||
self._pixel_mapper, self._previous_colors, smoothing,
|
||||
)
|
||||
|
||||
# Send to LED device with brightness
|
||||
if not self._is_running or self._led_client is None:
|
||||
break
|
||||
# Compute brightness before thread dispatch
|
||||
brightness_value = int(led_brightness * 255)
|
||||
if device_info and device_info.software_brightness < 255:
|
||||
brightness_value = brightness_value * device_info.software_brightness // 255
|
||||
|
||||
# CPU-bound work in dedicated thread-pool executor
|
||||
raw_colors, send_colors, frame_timing = await loop.run_in_executor(
|
||||
_frame_executor,
|
||||
_process_frame, capture, border_width,
|
||||
self._pixel_mapper, self._previous_colors, smoothing,
|
||||
brightness_value,
|
||||
)
|
||||
|
||||
# Send to LED device (brightness already applied in thread)
|
||||
if not self._is_running or self._led_client is None:
|
||||
break
|
||||
t_send_start = time.perf_counter()
|
||||
if self._led_client.supports_fast_send:
|
||||
self._led_client.send_pixels_fast(led_colors, brightness=brightness_value)
|
||||
self._led_client.send_pixels_fast(send_colors)
|
||||
else:
|
||||
await self._led_client.send_pixels(led_colors, brightness=brightness_value)
|
||||
await self._led_client.send_pixels(send_colors)
|
||||
send_ms = (time.perf_counter() - t_send_start) * 1000
|
||||
last_send_time = time.time()
|
||||
send_timestamps.append(last_send_time)
|
||||
now = time.time()
|
||||
last_send_time = now
|
||||
send_timestamps.append(now)
|
||||
|
||||
# Per-stage timing (rolling average over last 10 frames)
|
||||
frame_timing["send"] = send_ms
|
||||
@@ -494,22 +513,19 @@ class WledTargetProcessor(TargetProcessor):
|
||||
if self._metrics.frames_processed <= 3 or self._metrics.frames_processed % 100 == 0:
|
||||
logger.info(
|
||||
f"Frame {self._metrics.frames_processed} for {self._target_id} "
|
||||
f"({len(led_colors)} LEDs, bri={brightness_value}) — "
|
||||
f"({len(send_colors)} LEDs, bri={brightness_value}) — "
|
||||
f"extract={frame_timing['extract']:.1f}ms "
|
||||
f"map={frame_timing['map_leds']:.1f}ms "
|
||||
f"smooth={frame_timing['smooth']:.1f}ms "
|
||||
f"send={send_ms:.1f}ms"
|
||||
)
|
||||
self._metrics.last_update = datetime.utcnow()
|
||||
self._previous_colors = led_colors
|
||||
self._previous_colors = raw_colors
|
||||
|
||||
# Calculate actual FPS
|
||||
now = time.time()
|
||||
# Calculate actual FPS (reuse cached 'now' from send timestamp)
|
||||
interval = now - prev_frame_time_stamp
|
||||
prev_frame_time_stamp = now
|
||||
fps_samples.append(1.0 / interval if interval > 0 else 0)
|
||||
if len(fps_samples) > 10:
|
||||
fps_samples.pop(0)
|
||||
self._metrics.fps_actual = sum(fps_samples) / len(fps_samples)
|
||||
|
||||
# Potential FPS
|
||||
@@ -527,7 +543,7 @@ class WledTargetProcessor(TargetProcessor):
|
||||
logger.error(f"Processing error for target {self._target_id}: {e}", exc_info=True)
|
||||
|
||||
# Throttle to target FPS
|
||||
elapsed = time.time() - loop_start
|
||||
elapsed = now - loop_start
|
||||
remaining = frame_time - elapsed
|
||||
if remaining > 0:
|
||||
await asyncio.sleep(remaining)
|
||||
|
||||
Reference in New Issue
Block a user