Add real-time system performance charts to dashboard

Backend: GET /api/v1/system/performance endpoint using psutil (CPU/RAM) and nvidia-ml-py (GPU utilization, memory, temperature) with graceful fallback. Frontend: Chart.js line charts with rolling 60-sample history persisted to sessionStorage, flicker-free updates via persistent DOM and diff-based dynamic section refresh. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 21:02:55 +03:00
parent 66d1a77981
commit 4a1b4f7674
11 changed files with 390 additions and 46 deletions
--- a/server/src/wled_controller/api/routes/system.py
+++ b/server/src/wled_controller/api/routes/system.py
@@ -1,8 +1,9 @@
-"""System routes: health, version, displays."""
+"""System routes: health, version, displays, performance."""

 import sys
 from datetime import datetime

+import psutil
 from fastapi import APIRouter, HTTPException

 from wled_controller import __version__
@@ -10,7 +11,9 @@ from wled_controller.api.auth import AuthRequired
 from wled_controller.api.schemas.system import (
    DisplayInfo,
    DisplayListResponse,
+    GpuInfo,
    HealthResponse,
+    PerformanceResponse,
    ProcessListResponse,
    VersionResponse,
 )
@@ -19,6 +22,23 @@ from wled_controller.utils import get_logger

 logger = get_logger(__name__)

+# Prime psutil CPU counter (first call always returns 0.0)
+psutil.cpu_percent(interval=None)
+
+# Try to initialize NVIDIA GPU monitoring
+_nvml_available = False
+try:
+    import pynvml as _pynvml_mod  # nvidia-ml-py (the pynvml wrapper is deprecated)
+
+    _pynvml_mod.nvmlInit()
+    _nvml_handle = _pynvml_mod.nvmlDeviceGetHandleByIndex(0)
+    _nvml_available = True
+    _nvml = _pynvml_mod
+    logger.info(f"NVIDIA GPU monitoring enabled: {_nvml.nvmlDeviceGetName(_nvml_handle)}")
+except Exception:
+    _nvml = None
+    logger.info("NVIDIA GPU monitoring unavailable (pynvml not installed or no NVIDIA GPU)")
+
 router = APIRouter()


@@ -113,3 +133,40 @@ async def get_running_processes(_: AuthRequired):
            status_code=500,
            detail=f"Failed to retrieve process list: {str(e)}"
        )
+
+
+@router.get(
+    "/api/v1/system/performance",
+    response_model=PerformanceResponse,
+    tags=["Config"],
+)
+async def get_system_performance(_: AuthRequired):
+    """Get current system performance metrics (CPU, RAM, GPU)."""
+    mem = psutil.virtual_memory()
+
+    gpu = None
+    if _nvml_available:
+        try:
+            util = _nvml.nvmlDeviceGetUtilizationRates(_nvml_handle)
+            mem_info = _nvml.nvmlDeviceGetMemoryInfo(_nvml_handle)
+            temp = _nvml.nvmlDeviceGetTemperature(
+                _nvml_handle, _nvml.NVML_TEMPERATURE_GPU
+            )
+            gpu = GpuInfo(
+                name=_nvml.nvmlDeviceGetName(_nvml_handle),
+                utilization=float(util.gpu),
+                memory_used_mb=round(mem_info.used / 1024 / 1024, 1),
+                memory_total_mb=round(mem_info.total / 1024 / 1024, 1),
+                temperature_c=float(temp),
+            )
+        except Exception:
+            pass
+
+    return PerformanceResponse(
+        cpu_percent=psutil.cpu_percent(interval=None),
+        ram_used_mb=round(mem.used / 1024 / 1024, 1),
+        ram_total_mb=round(mem.total / 1024 / 1024, 1),
+        ram_percent=mem.percent,
+        gpu=gpu,
+        timestamp=datetime.utcnow(),
+    )
--- a/server/src/wled_controller/api/schemas/system.py
+++ b/server/src/wled_controller/api/schemas/system.py
@@ -47,3 +47,24 @@ class ProcessListResponse(BaseModel):

    processes: List[str] = Field(description="Sorted list of unique process names")
    count: int = Field(description="Number of unique processes")
+
+
+class GpuInfo(BaseModel):
+    """GPU performance information."""
+
+    name: str | None = Field(default=None, description="GPU device name")
+    utilization: float | None = Field(default=None, description="GPU core usage percent")
+    memory_used_mb: float | None = Field(default=None, description="GPU memory used in MB")
+    memory_total_mb: float | None = Field(default=None, description="GPU total memory in MB")
+    temperature_c: float | None = Field(default=None, description="GPU temperature in Celsius")
+
+
+class PerformanceResponse(BaseModel):
+    """System performance metrics."""
+
+    cpu_percent: float = Field(description="System-wide CPU usage percent")
+    ram_used_mb: float = Field(description="RAM used in MB")
+    ram_total_mb: float = Field(description="RAM total in MB")
+    ram_percent: float = Field(description="RAM usage percent")
+    gpu: GpuInfo | None = Field(default=None, description="GPU info (null if unavailable)")
+    timestamp: datetime = Field(description="Measurement timestamp")