feat(metrics): battery + thermal-zone readings with dashboard temp chart
Build Android APK / build-android (push) Failing after 1m40s
Lint & Test / test (push) Successful in 4m18s

Extends MetricsProvider with thermals() returning a ThermalSnapshot
(battery_percent, battery_temp_c, cpu_temp_c — all optional). Each
provider implements it independently:

- AndroidMetricsProvider reads /sys/class/power_supply/battery/{capacity,
  temp} (battery temp is tenths of degC) and walks
  /sys/class/thermal/thermal_zone*, filtering by zone type
  (cpu/soc/tsens/core) so battery and skin sensors don't dominate the
  reading. Rejects nonsense values like INT_MAX from buggy zones.
- PsutilMetricsProvider uses sensors_battery() and
  sensors_temperatures() when present (Linux+laptops); no-ops on
  Windows/macOS where psutil doesn't expose them.
- NullMetricsProvider returns the empty snapshot.

PerformanceResponse gains battery_percent / battery_temp_c / cpu_temp_c.
The metrics-history ring buffer also carries cpu_temp / battery_pct /
battery_temp per sample so the dashboard can graph them over time.

Frontend dashboard (perf-charts.ts) gets a new Temperature chart card,
hidden by default and revealed only after seed/poll confirms the
backend reports cpu_temp_c. Battery temperature shows inline as a
secondary badge. The GPU card now also hides entirely when the backend
reports gpu=null instead of showing an "unavailable" placeholder.
HOST_ONLY_KEYS prevents the System/App/Both toggle from flipping a
non-existent app dataset for temp.

Tests: 6 new for thermals (battery tenths-of-degC parsing, CPU zone
filtering, fallback when sensors absent, INT_MAX rejection); 18 metrics
tests total; full suite 733 passing.
This commit is contained in:
2026-04-14 13:48:01 +03:00
parent 546b24d015
commit ecae05d00b
14 changed files with 308 additions and 23 deletions
+3 -3
View File
@@ -77,6 +77,6 @@ Drive USB LED controllers (APA102, WS2812) connected directly to the Android TV
Beyond the `/proc`-based AndroidMetricsProvider that's now in place:
- [ ] Optional: app-specific memory via `Debug.getMemoryInfo()` through a Kotlin → Python Chaquopy bridge (more accurate than `VmRSS` for split-app-process accounting)
- [ ] Consider: device battery/temperature readings for TV boxes (some have thermal throttling)
- [ ] Optional: GPU usage via `/sys/class/kgsl/kgsl-3d0/gpubusy` on Adreno, Mali-specific paths for Mali GPUs
- [x] Device battery + thermal-zone readings (`/sys/class/power_supply/battery/{capacity,temp}`, `/sys/class/thermal/thermal_zone*/temp` filtered by zone type). Surfaced through `MetricsProvider.thermals()`, `PerformanceResponse.{cpu_temp_c,battery_percent,battery_temp_c}`, the metrics-history snapshot, and a new dashboard temperature chart that hides itself when the backend reports null. GPU card now hides (no "unavailable" placeholder) when no GPU is present.
- [WONTDO] Optional: app-specific memory via `Debug.getMemoryInfo()` through a Kotlin → Python Chaquopy bridge (more accurate than `VmRSS` for split-app-process accounting)
- [WONTDO] Optional: GPU usage via `/sys/class/kgsl/kgsl-3d0/gpubusy` on Adreno, Mali-specific paths for Mali GPUs
+4
View File
@@ -273,6 +273,7 @@ def get_system_performance(_: AuthRequired):
metrics = get_metrics_provider()
mem = metrics.virtual_memory()
proc = metrics.process_snapshot()
thermals = metrics.thermals()
app_ram_mb = round(proc.rss_bytes / 1024 / 1024, 1)
gpu = None
@@ -313,6 +314,9 @@ def get_system_performance(_: AuthRequired):
app_cpu_percent=proc.cpu_percent,
app_ram_mb=app_ram_mb,
gpu=gpu,
battery_percent=thermals.battery_percent,
battery_temp_c=thermals.battery_temp_c,
cpu_temp_c=thermals.cpu_temp_c,
timestamp=datetime.now(timezone.utc),
)
+10
View File
@@ -80,6 +80,16 @@ class PerformanceResponse(BaseModel):
app_cpu_percent: float = Field(description="App process CPU usage percent")
app_ram_mb: float = Field(description="App process resident memory in MB")
gpu: GpuInfo | None = Field(default=None, description="GPU info (null if unavailable)")
battery_percent: float | None = Field(
default=None, description="Battery charge percent (null if no battery)"
)
battery_temp_c: float | None = Field(
default=None, description="Battery temperature in °C (null if unsupported)"
)
cpu_temp_c: float | None = Field(
default=None,
description="Hottest CPU/SoC thermal zone in °C (null if unsupported)",
)
timestamp: datetime = Field(description="Measurement timestamp")
@@ -28,6 +28,7 @@ def _collect_system_snapshot() -> dict:
metrics = get_metrics_provider()
mem = metrics.virtual_memory()
proc = metrics.process_snapshot()
thermals = metrics.thermals()
snapshot = {
"t": datetime.now(timezone.utc).isoformat(),
"cpu": metrics.cpu_percent(),
@@ -39,6 +40,9 @@ def _collect_system_snapshot() -> dict:
"gpu_util": None,
"gpu_temp": None,
"app_gpu_mem": None,
"cpu_temp": thermals.cpu_temp_c,
"battery_pct": thermals.battery_percent,
"battery_temp": thermals.battery_temp_c,
}
try:
@@ -15,14 +15,18 @@ import { isActiveTab } from '../core/tab-registry.ts';
import { createColorPicker, registerColorPicker } from '../core/color-picker.ts';
const MAX_SAMPLES = 120;
const CHART_KEYS = ['cpu', 'ram', 'gpu'];
const CHART_KEYS = ['cpu', 'ram', 'gpu', 'temp'];
const PERF_MODE_KEY = 'perfMetricsMode';
/** Metrics that don't have a per-process variant (host-only). */
const HOST_ONLY_KEYS = new Set(['temp']);
/** Default accent colors per metric — distinct hues for visual identity. */
const METRIC_COLORS: Record<string, string> = {
cpu: '#FF6B6B', // warm coral
ram: '#A855F7', // electric violet
gpu: '#10B981', // emerald teal
temp: '#FCD34D', // amber / heat
};
/** Complementary app/process line colors — clearly different hue per metric. */
@@ -35,10 +39,11 @@ const APP_COLORS: Record<string, string> = {
type PerfMode = 'system' | 'app' | 'both';
let _pollTimer: ReturnType<typeof setInterval> | null = null;
let _charts: Record<string, any> = {}; // { cpu: Chart, ram: Chart, gpu: Chart }
let _history: Record<string, number[]> = { cpu: [], ram: [], gpu: [] };
let _appHistory: Record<string, number[]> = { cpu: [], ram: [], gpu: [] };
let _charts: Record<string, any> = {}; // { cpu, ram, gpu, temp }
let _history: Record<string, number[]> = { cpu: [], ram: [], gpu: [], temp: [] };
let _appHistory: Record<string, number[]> = { cpu: [], ram: [], gpu: [], temp: [] };
let _hasGpu: boolean | null = null; // null = unknown, true/false after first fetch
let _hasTemp: boolean | null = null; // null = unknown, true/false after first fetch
let _mode: PerfMode = (localStorage.getItem(PERF_MODE_KEY) as PerfMode) || 'both';
function _getColor(key: string): string {
@@ -103,7 +108,8 @@ export function setPerfMode(mode: PerfMode): void {
const showSystem = mode === 'system' || mode === 'both';
const showApp = mode === 'app' || mode === 'both';
chart.data.datasets[0].hidden = !showSystem;
chart.data.datasets[1].hidden = !showApp;
// Host-only metrics never have an app dataset to show.
chart.data.datasets[1].hidden = HOST_ONLY_KEYS.has(key) ? true : !showApp;
chart.update('none');
}
}
@@ -137,6 +143,13 @@ export function renderPerfSection(): string {
</div>
<div class="perf-chart-wrap"><span class="perf-chart-subtitle" id="perf-gpu-name"></span><canvas id="perf-chart-gpu"></canvas></div>
</div>
<div class="perf-chart-card" data-metric="temp" id="perf-temp-card" hidden>
<div class="perf-chart-header">
<span class="perf-chart-label">${t('dashboard.perf.temp')} ${createColorPicker({ id: 'perf-temp', currentColor: _getColor('temp'), onPick: undefined, anchor: 'left', showReset: true })}</span>
<span class="perf-chart-value" id="perf-temp-value">-</span>
</div>
<div class="perf-chart-wrap"><canvas id="perf-chart-temp"></canvas></div>
</div>
</div>`;
}
@@ -145,8 +158,9 @@ function _createChart(canvasId: string, key: string): any {
if (!ctx) return null;
const color = _getColor(key);
const appColor = _getAppColor(key);
const isHostOnly = HOST_ONLY_KEYS.has(key);
const showSystem = _mode === 'system' || _mode === 'both';
const showApp = _mode === 'app' || _mode === 'both';
const showApp = !isHostOnly && (_mode === 'app' || _mode === 'both');
return new Chart(ctx, {
type: 'line',
data: {
@@ -200,13 +214,27 @@ async function _seedFromServer(): Promise<void> {
_history.cpu = samples.map((s: any) => s.cpu).filter((v: any) => v != null);
_history.ram = samples.map((s: any) => s.ram_pct).filter((v: any) => v != null);
_history.gpu = samples.map((s: any) => s.gpu_util).filter((v: any) => v != null);
_history.temp = samples.map((s: any) => s.cpu_temp).filter((v: any) => v != null);
_appHistory.cpu = samples.map((s: any) => s.app_cpu).filter((v: any) => v != null);
_appHistory.ram = samples.map((s: any) => s.app_ram).filter((v: any) => v != null);
_appHistory.gpu = samples.map((s: any) => s.app_gpu_mem).filter((v: any) => v != null);
// Detect GPU availability from history
// Detect GPU availability from history. Only conclude "no GPU" when
// we actually have samples — an empty history shouldn't hide the
// card prematurely.
if (_history.gpu.length > 0) {
_hasGpu = true;
} else if (samples.length > 0) {
_hasGpu = false;
const card = document.getElementById('perf-gpu-card');
if (card) card.setAttribute('hidden', '');
}
// Detect temperature availability from history; reveal the card now
// so the user doesn't see it appear/disappear after the first poll.
if (_history.temp.length > 0) {
_hasTemp = true;
const card = document.getElementById('perf-temp-card');
if (card) card.removeAttribute('hidden');
}
for (const key of CHART_KEYS) {
@@ -236,6 +264,7 @@ export async function initPerfCharts(): Promise<void> {
_charts.cpu = _createChart('perf-chart-cpu', 'cpu');
_charts.ram = _createChart('perf-chart-ram', 'ram');
_charts.gpu = _createChart('perf-chart-gpu', 'gpu');
_charts.temp = _createChart('perf-chart-temp', 'temp');
await _seedFromServer();
}
@@ -331,6 +360,27 @@ async function _fetchPerformance(): Promise<void> {
));
}
// Temperature (host-only, no app variant)
if (data.cpu_temp_c != null) {
if (_hasTemp !== true) {
_hasTemp = true;
const card = document.getElementById('perf-temp-card');
if (card) card.removeAttribute('hidden');
}
_pushSample('temp', data.cpu_temp_c, null);
const tempEl = document.getElementById('perf-temp-value');
if (tempEl) {
let display = `${data.cpu_temp_c.toFixed(0)}°C`;
if (data.battery_temp_c != null) {
display += ` <span class="perf-val-app">bat ${data.battery_temp_c.toFixed(0)}°C</span>`;
}
tempEl.innerHTML = display;
}
} else if (_hasTemp === null) {
// No temp data on first poll → backend doesn't expose it; keep card hidden.
_hasTemp = false;
}
// GPU
if (data.gpu) {
_hasGpu = true;
@@ -353,16 +403,10 @@ async function _fetchPerformance(): Promise<void> {
if (nameEl && !nameEl.textContent) nameEl.textContent = data.gpu.name;
}
} else if (_hasGpu === null) {
// No GPU info on first poll → backend doesn't expose it; hide the card.
_hasGpu = false;
const card = document.getElementById('perf-gpu-card');
if (card) {
const canvas = card.querySelector('canvas');
if (canvas) canvas.style.display = 'none';
const noGpu = document.createElement('div');
noGpu.className = 'perf-chart-unavailable';
noGpu.textContent = t('dashboard.perf.unavailable');
card.appendChild(noGpu);
}
if (card) card.setAttribute('hidden', '');
}
} catch {
// Silently ignore fetch errors (e.g., network issues, tab hidden)
@@ -765,6 +765,7 @@
"dashboard.perf.cpu": "CPU",
"dashboard.perf.ram": "RAM",
"dashboard.perf.gpu": "GPU",
"dashboard.perf.temp": "Temperature",
"dashboard.perf.unavailable": "unavailable",
"dashboard.perf.color": "Chart color",
"dashboard.perf.mode.system": "System",
@@ -746,6 +746,7 @@
"dashboard.perf.cpu": "ЦП",
"dashboard.perf.ram": "ОЗУ",
"dashboard.perf.gpu": "ГП",
"dashboard.perf.temp": "Температура",
"dashboard.perf.unavailable": "недоступно",
"dashboard.perf.color": "Цвет графика",
"dashboard.perf.mode.system": "Система",
@@ -746,6 +746,7 @@
"dashboard.perf.cpu": "CPU",
"dashboard.perf.ram": "内存",
"dashboard.perf.gpu": "GPU",
"dashboard.perf.temp": "温度",
"dashboard.perf.unavailable": "不可用",
"dashboard.perf.color": "图表颜色",
"dashboard.perf.mode.system": "系统",
+2 -1
View File
@@ -20,7 +20,7 @@ from ledgrab.utils.platform import is_android
from .android_provider import AndroidMetricsProvider, is_supported as _android_supported
from .null_provider import NullMetricsProvider
from .psutil_provider import PsutilMetricsProvider
from .types import MemorySnapshot, MetricsProvider, ProcessSnapshot
from .types import MemorySnapshot, MetricsProvider, ProcessSnapshot, ThermalSnapshot
__all__ = [
"AndroidMetricsProvider",
@@ -29,6 +29,7 @@ __all__ = [
"NullMetricsProvider",
"ProcessSnapshot",
"PsutilMetricsProvider",
"ThermalSnapshot",
"get_metrics_provider",
"reset_metrics_provider",
]
@@ -19,7 +19,9 @@ import os
from dataclasses import dataclass
from typing import Optional
from .types import MemorySnapshot, ProcessSnapshot
import glob
from .types import MemorySnapshot, ProcessSnapshot, ThermalSnapshot
def is_supported() -> bool:
@@ -117,6 +119,60 @@ def _read_meminfo() -> MemorySnapshot:
)
def _read_int_file(path: str) -> Optional[int]:
"""Read a sysfs node holding a single integer; None on failure."""
try:
with open(path, "r") as f:
return int(f.read().strip())
except (OSError, ValueError):
return None
def _read_text_file(path: str) -> Optional[str]:
"""Read a sysfs node holding a short string; None on failure."""
try:
with open(path, "r") as f:
return f.read().strip()
except OSError:
return None
def _read_battery() -> tuple[Optional[float], Optional[float]]:
"""Return (capacity_percent, temp_celsius). Either may be None."""
base = "/sys/class/power_supply/battery"
capacity = _read_int_file(f"{base}/capacity")
# Battery temp is typically tenths of °C: 350 → 35.0°C.
temp_raw = _read_int_file(f"{base}/temp")
pct = float(capacity) if capacity is not None else None
temp = temp_raw / 10.0 if temp_raw is not None else None
return pct, temp
def _read_cpu_temp_c() -> Optional[float]:
"""Hottest CPU thermal zone in °C, or None if /sys/class/thermal/* is empty.
Walks every ``thermal_zone*/temp`` (millidegrees) and returns the max.
Filters by zone type when possible to skip battery/skin sensors that
would otherwise dominate the reading.
"""
hottest: Optional[float] = None
for zone_dir in glob.glob("/sys/class/thermal/thermal_zone*"):
zone_type = (_read_text_file(f"{zone_dir}/type") or "").lower()
# Skip non-CPU zones — battery/skin/usb sensors are noise here.
if zone_type and not any(tag in zone_type for tag in ("cpu", "soc", "tsens", "core")):
continue
millideg = _read_int_file(f"{zone_dir}/temp")
if millideg is None:
continue
celsius = millideg / 1000.0
# Sanity bound — some buggy zones report nonsense like 2147483647.
if celsius < -40.0 or celsius > 150.0:
continue
if hottest is None or celsius > hottest:
hottest = celsius
return hottest
def _read_self_rss_bytes() -> int:
"""Read VmRSS (resident set size) for the current process from /proc/self/status."""
try:
@@ -189,3 +245,11 @@ class AndroidMetricsProvider:
self._last_host_total = host_sample.total
return ProcessSnapshot(cpu_percent=cpu, rss_bytes=_read_self_rss_bytes())
def thermals(self) -> ThermalSnapshot:
battery_pct, battery_temp = _read_battery()
return ThermalSnapshot(
battery_percent=battery_pct,
battery_temp_c=battery_temp,
cpu_temp_c=_read_cpu_temp_c(),
)
@@ -2,7 +2,7 @@
from __future__ import annotations
from .types import MemorySnapshot, ProcessSnapshot
from .types import MemorySnapshot, ProcessSnapshot, ThermalSnapshot
class NullMetricsProvider:
@@ -26,3 +26,6 @@ class NullMetricsProvider:
def process_snapshot(self) -> ProcessSnapshot:
return ProcessSnapshot(cpu_percent=0.0, rss_bytes=0)
def thermals(self) -> ThermalSnapshot:
return ThermalSnapshot()
@@ -4,7 +4,7 @@ from __future__ import annotations
import os
from .types import MemorySnapshot, ProcessSnapshot
from .types import MemorySnapshot, ProcessSnapshot, ThermalSnapshot
class PsutilMetricsProvider:
@@ -44,3 +44,44 @@ class PsutilMetricsProvider:
cpu = self._process.cpu_percent(interval=None) / self._cpu_count
rss = int(self._process.memory_info().rss)
return ProcessSnapshot(cpu_percent=float(cpu), rss_bytes=rss)
def thermals(self) -> ThermalSnapshot:
battery_pct: float | None = None
battery_temp: float | None = None
cpu_temp: float | None = None
# Battery: only some hosts report it (laptops, tablets); psutil
# raises or returns None on desktops without a battery.
sensors_battery = getattr(self._psutil, "sensors_battery", None)
if sensors_battery is not None:
try:
bat = sensors_battery()
if bat is not None:
battery_pct = float(bat.percent)
except Exception:
pass
# CPU temperature: pick the hottest reading across all sensors.
# sensors_temperatures() is Linux-only on psutil; absent on Win/macOS.
sensors_temps = getattr(self._psutil, "sensors_temperatures", None)
if sensors_temps is not None:
try:
temps = sensors_temps()
hottest = None
for entries in (temps or {}).values():
for entry in entries:
current = getattr(entry, "current", None)
if current is None:
continue
if hottest is None or current > hottest:
hottest = float(current)
if hottest is not None:
cpu_temp = hottest
except Exception:
pass
return ThermalSnapshot(
battery_percent=battery_pct,
battery_temp_c=battery_temp,
cpu_temp_c=cpu_temp,
)
+17 -1
View File
@@ -3,7 +3,7 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Protocol
from typing import Optional, Protocol
@dataclass(frozen=True)
@@ -19,6 +19,21 @@ class ProcessSnapshot:
rss_bytes: int
@dataclass(frozen=True)
class ThermalSnapshot:
"""Battery + thermal readings; every field is optional.
Different platforms expose different subsets of these — desktops
rarely have a battery temp, headless servers rarely report any
thermal zone, and stock Android often locks down everything except
the battery node. ``None`` means "not available", *not* "zero".
"""
battery_percent: Optional[float] = None
battery_temp_c: Optional[float] = None
cpu_temp_c: Optional[float] = None # max across all thermal zones
class MetricsProvider(Protocol):
"""Read-only host + current-process metrics."""
@@ -28,3 +43,4 @@ class MetricsProvider(Protocol):
def cpu_count(self) -> int: ...
def virtual_memory(self) -> MemorySnapshot: ...
def process_snapshot(self) -> ProcessSnapshot: ...
def thermals(self) -> ThermalSnapshot: ...
+95
View File
@@ -13,6 +13,7 @@ from ledgrab.utils.metrics import (
NullMetricsProvider,
ProcessSnapshot,
PsutilMetricsProvider,
ThermalSnapshot,
get_metrics_provider,
reset_metrics_provider,
)
@@ -181,3 +182,97 @@ def test_factory_prefers_android_when_running_on_android(monkeypatch) -> None:
monkeypatch.setattr("ledgrab.utils.metrics._android_supported", lambda: True)
provider = get_metrics_provider()
assert isinstance(provider, AndroidMetricsProvider)
# ── Thermals ────────────────────────────────────────────────────────
def test_null_provider_thermals_are_all_none() -> None:
snap = NullMetricsProvider().thermals()
assert snap == ThermalSnapshot()
def test_psutil_provider_thermals_picks_hottest_sensor() -> None:
psutil_mock = MagicMock()
psutil_mock.Process.return_value = MagicMock()
psutil_mock.cpu_count.return_value = 4
bat = MagicMock(percent=78.0)
psutil_mock.sensors_battery.return_value = bat
psutil_mock.sensors_temperatures.return_value = {
"coretemp": [
MagicMock(current=55.0),
MagicMock(current=72.5),
],
"acpi": [MagicMock(current=40.0)],
}
provider = PsutilMetricsProvider(psutil_mock)
snap = provider.thermals()
assert snap.battery_percent == 78.0
assert snap.cpu_temp_c == 72.5 # hottest across all sensors
assert snap.battery_temp_c is None # psutil doesn't expose battery temp
def test_psutil_provider_thermals_handles_missing_sensors() -> None:
psutil_mock = MagicMock()
psutil_mock.Process.return_value = MagicMock()
psutil_mock.cpu_count.return_value = 1
# Strip the optional sensor methods entirely (e.g. Windows psutil).
del psutil_mock.sensors_battery
del psutil_mock.sensors_temperatures
provider = PsutilMetricsProvider(psutil_mock)
assert provider.thermals() == ThermalSnapshot()
def test_android_battery_parses_tenths_of_celsius(monkeypatch) -> None:
def _fake_int(path: str):
return {
"/sys/class/power_supply/battery/capacity": 78,
"/sys/class/power_supply/battery/temp": 312, # tenths of °C → 31.2°C
}.get(path)
monkeypatch.setattr(android_mod, "_read_int_file", _fake_int)
pct, temp = android_mod._read_battery()
assert pct == 78.0
assert temp == 31.2
def test_android_cpu_temp_filters_non_cpu_zones_and_picks_hottest(monkeypatch) -> None:
monkeypatch.setattr(
"glob.glob",
lambda _: [
"/sys/class/thermal/thermal_zone0",
"/sys/class/thermal/thermal_zone1",
"/sys/class/thermal/thermal_zone2",
"/sys/class/thermal/thermal_zone3",
],
)
def _fake_text(path: str):
return {
"/sys/class/thermal/thermal_zone0/type": "battery",
"/sys/class/thermal/thermal_zone1/type": "cpu-thermal",
"/sys/class/thermal/thermal_zone2/type": "soc-max",
"/sys/class/thermal/thermal_zone3/type": "skin-therm",
}.get(path)
def _fake_int(path: str):
return {
# Battery & skin should be filtered out by zone type
"/sys/class/thermal/thermal_zone0/temp": 99000,
"/sys/class/thermal/thermal_zone1/temp": 52000, # 52°C
"/sys/class/thermal/thermal_zone2/temp": 67500, # 67.5°C ← hottest
"/sys/class/thermal/thermal_zone3/temp": 99000,
}.get(path)
monkeypatch.setattr(android_mod, "_read_text_file", _fake_text)
monkeypatch.setattr(android_mod, "_read_int_file", _fake_int)
assert android_mod._read_cpu_temp_c() == 67.5
def test_android_cpu_temp_rejects_nonsense_values(monkeypatch) -> None:
monkeypatch.setattr("glob.glob", lambda _: ["/sys/class/thermal/thermal_zone0"])
monkeypatch.setattr(android_mod, "_read_text_file", lambda _: "cpu-thermal")
# Some buggy zones report INT_MAX
monkeypatch.setattr(android_mod, "_read_int_file", lambda _: 2147483647)
assert android_mod._read_cpu_temp_c() is None