fix: production-readiness hardening — security, perf, a11y, observability

Security - Default scripts_management, callbacks_management, links_management, and media_folders_management to False so a leaked token cannot escalate to RCE through admin CRUD endpoints. - TokenSpec + scope hierarchy (read | control | admin); legacy bare-string api_tokens entries promote to admin for back-compat. Management endpoints now require admin scope. - WebSocket subprotocol auth (Sec-WebSocket-Protocol: media-server.token.<T>) preferred over ?token= query so the token no longer lands in URL/history/ Referer; query fallback retained for HA integration back-compat. - Origin allow-list check on the WS endpoint (CSWSH defence). - In-process token-bucket rate limiter: 5/min for failed auths, 10/min for /api/scripts/execute and /api/callbacks/execute. - shell=False subprocess path (shlex.split) + per-parameter regex `pattern` in ScriptParameterConfig to harden shell=true scripts against parameter injection (Windows cmd.exe env-var expansion). - CSP gains form-action, worker-src, manifest-src directives. - Refuse cors_origins=["*"] at startup; strip token=... from uvicorn access logs; validate Gitea release tag against strict SemVer regex. - noopener noreferrer + no-referrer referrerpolicy on every outbound link. - icacls hardening of config.yaml on Windows (current user + SYSTEM + Administrators only); 0600 still enforced on POSIX. - WS volume handler clamps input and never drops the socket on bad messages. Performance - Album-art read in windows_media gated by track key — was decoding the WinRT thumbnail twice per second regardless of track changes. - /api/media/artwork returns content-derived ETag + Cache-Control so the browser sends If-None-Match and gets 304s on track repeats. - Foreground-service ctypes argtypes hoisted to one-time module init (was re-declaring ~14 prototypes per probe). - display_service _static_cache keyed by (edid_hash, ...) tuple with eviction of disappeared monitors — fixes stale capabilities on hot-plug swaps where the new topology has the same monitor count. - Visualizer rAF loop paused on document.hidden, resumed on visible. Reliability / bug fixes - Lifespan rewritten as try/yield/finally so a partial-startup failure cannot orphan background tasks or executors. - _run_callback in routes/media.py keeps a strong task ref (GC-safe) and uses the dedicated callback executor instead of the default pool. - macos_media.set_volume() no longer always returns True. - TrayManager._restart_requested initialised in __init__; set before signalling exit so the main thread observes it correctly. - Missing static_dir now logs a WARNING instead of silent UI disable. UX / accessibility / PWA - manifest.json theme_color and background_color match the Studio Reference base (#0E0D0B); added id and scope for PWA installability. - ARIA on mini-player icon buttons; inner SVGs marked aria-hidden. - OS mediaSession API wired so headset / lockscreen / Bluetooth buttons drive play/pause/next/prev/seek and show track metadata + artwork. Observability - X-Request-ID middleware (accept upstream id if it matches a safe regex, otherwise UUID4); request_id_var added to ContextVars and included in every log line alongside the token label. - Audit log (append-only JSONL) for every script + callback execution, including the on_play/on_pause/etc. event callbacks. Background-thread writer; queue capped; flushed in lifespan teardown. Deployment - proxy_headers + forwarded_allow_ips plumbed through Settings → uvicorn.Config for reverse-proxy installs. - HTTPS support via ssl_certfile + ssl_keyfile (+ optional password); startup refuses to launch with only one of the pair set. - Thumbnail cache moved from project-root .cache to %LOCALAPPDATA%/media-server/cache (Windows) and $XDG_CACHE_HOME/media-server/thumbnails (POSIX). Tests - 35 new tests across auth scopes, rate limiter, browser path traversal (../ NUL UNC absolute), script-param validation incl. regex, Gitea tag whitelist, config atomic write + POSIX perms. 47 passed / 4 skipped.
2026-05-22 22:25:54 +03:00
parent 450f9fe1ee
commit d131ba461c
31 changed files with 1586 additions and 204 deletions
@@ -8,12 +8,14 @@ import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any

-from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import APIRouter, Depends, HTTPException, Request, status
 from pydantic import BaseModel, Field

 from ..auth import verify_token
 from ..config import CallbackConfig, settings
 from ..config_manager import config_manager
+from ..services.rate_limit import check as ratelimit_check
+from ..services.rate_limit import get_peer

 router = APIRouter(prefix="/api/callbacks", tags=["callbacks"])
 logger = logging.getLogger(__name__)
@@ -28,6 +30,7 @@ def shutdown_callback_executor() -> None:


 def _require_callbacks_management() -> None:
+    """Authorise a callbacks-CRUD operation. Operator flag + per-token admin scope."""
    if not settings.callbacks_management:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
@@ -36,6 +39,14 @@ def _require_callbacks_management() -> None:
                " in config.yaml to enable."
            ),
        )
+    from ..auth import auth_enabled, token_has_scope, token_label_var
+    if auth_enabled():
+        label = token_label_var.get("unknown")
+        if not token_has_scope(label, "admin"):
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail=f"Token '{label}' lacks required scope: admin",
+            )


 class CallbackInfo(BaseModel):
@@ -122,6 +133,7 @@ async def list_callbacks(_: str = Depends(verify_token)) -> list[CallbackInfo]:
@router.post("/execute/{callback_name}")
 async def execute_callback(
    callback_name: str,
+    http_request: Request,
    _: str = Depends(verify_token),
 ) -> CallbackExecuteResponse:
    """Execute a callback for debugging purposes.
@@ -132,6 +144,16 @@ async def execute_callback(
    Returns:
        Execution result including stdout, stderr, and exit code
    """
+    # Rate-limit callback execution per peer (10/min) — callbacks also run
+    # subprocesses and need the same protection as scripts.
+    allowed, retry_after = ratelimit_check("execute", get_peer(http_request))
+    if not allowed:
+        raise HTTPException(
+            status_code=429,
+            detail="Too many callback executions, slow down",
+            headers={"Retry-After": str(int(retry_after or 60))},
+        )
+
    # Validate callback name
    _validate_callback_name(callback_name)

@@ -146,6 +168,8 @@ async def execute_callback(

    logger.info(f"Executing callback for debugging: {callback_name}")

+    from ..services.audit_log import record_script_execution
+
    try:
        # Execute in dedicated thread pool to not block the default executor
        loop = asyncio.get_running_loop()
@@ -159,6 +183,15 @@ async def execute_callback(
            ),
        )

+        record_script_execution(
+            kind="callback",
+            name=callback_name,
+            exit_code=result["exit_code"],
+            duration=result.get("execution_time"),
+            stdout=result.get("stdout"),
+            stderr=result.get("stderr"),
+        )
+
        return CallbackExecuteResponse(
            success=result["exit_code"] == 0,
            callback=callback_name,
@@ -170,6 +203,13 @@ async def execute_callback(

    except Exception as e:
        logger.error(f"Callback execution error: {e}")
+        record_script_execution(
+            kind="callback",
+            name=callback_name,
+            exit_code=None,
+            duration=None,
+            error=str(e),
+        )
        return CallbackExecuteResponse(
            success=False,
            callback=callback_name,