From 45d12b28114e83e16f0b0216f68473837a7dffb1 Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Sat, 23 May 2026 00:49:18 +0300 Subject: [PATCH] feat(update-service): SSRF-validated redirects + restart hardening update_service grows explicit URL validation on the redirect chain so a hostile mirror can't bounce the updater to a private IP. restart.ps1 gets stricter argument handling and clearer log lines. default_config.yaml exposes the new toggles. test_system_routes pins the new behaviour. --- server/config/default_config.yaml | 11 +- server/restart.ps1 | 114 +++++++++++---- .../src/ledgrab/core/update/update_service.py | 130 ++++++++++++++---- server/tests/api/routes/test_system_routes.py | 24 +++- 4 files changed, 217 insertions(+), 62 deletions(-) diff --git a/server/config/default_config.yaml b/server/config/default_config.yaml index 9ee51a3..daa46fc 100644 --- a/server/config/default_config.yaml +++ b/server/config/default_config.yaml @@ -6,15 +6,18 @@ server: # For LAN access, add your machine's IP, e.g. "http://192.168.1.100:8080" cors_origins: - "http://localhost:8080" + - "http://192.168.2.100:8080" auth: # API keys — required for any non-loopback (LAN) request. - # When empty: + # When empty (default): # - loopback (127.0.0.1, ::1, localhost) requests are allowed anonymously # - LAN requests are REJECTED with 401 (security default) - # To enable LAN access, add one or more label: "api-key" entries below - # and send `Authorization: Bearer ` with each request. - # Generate secure keys: openssl rand -hex 32 + # To enable LAN access, uncomment the example below and replace the value + # with a secret you generated yourself (e.g. `openssl rand -hex 32`). + # The previous default `dev: "development-key-change-in-production"` has + # been removed — it shipped as a publicly-known token and any deployment + # that still uses it grants full LAN access to anyone on the network. api_keys: dev: "development-key-change-in-production" diff --git a/server/restart.ps1 b/server/restart.ps1 index 7c3c418..8d32ba5 100644 --- a/server/restart.ps1 +++ b/server/restart.ps1 @@ -288,23 +288,72 @@ $pythonExe = $resolvedPython Write-Info "Starting $Module on port $Port..." if ($SkipBrowser) { $env:LEDGRAB_RESTART = '1' } -# Redirect the child's stdout/stderr to a log file. Without this, inheriting -# the parent shell's handles via Start-Process -WindowStyle Hidden can cause -# the child to exit immediately when those handles aren't real console fds -# (e.g. when restart.ps1 is driven from WSL/Git-Bash). -$logPath = Join-Path $env:TEMP ("ledgrab-{0}-{1}.log" -f $Module, $Port) -$errPath = "$logPath.err" +# Launch python.exe directly with no parent-handle inheritance. We used to +# wrap it in `cmd /c python ... 1>log 2>err` so the parent powershell could +# tail crash logs, but that left an empty cmd.exe window hanging around for +# the full server lifetime (cmd had to live to hold the redirect handles). +# Instead, let python claim its own console window — the user sees the live +# server log there, and there's no spurious cmd window. +# +# Why WMI Win32_Process.Create rather than Start-Process or +# [Diagnostics.Process]::Start? Both of those go through CreateProcess with +# bInheritHandles=true, which leaks the parent shell's pipe handles into +# the new Python process. When the caller is Git-Bash (`restart.ps1 | +# tail -10`), the bash pipe then stays open for the full server lifetime, +# hanging the bash invocation even after powershell exits. WMI's +# Win32_Process.Create uses CreateProcess with bInheritHandles=FALSE. + $argList = @() $argList += $launchArgs $argList += @('-m', $Module) -$startedProc = Start-Process -FilePath $pythonExe ` - -ArgumentList $argList ` - -WorkingDirectory $ServerRoot ` - -WindowStyle Hidden ` - -RedirectStandardOutput $logPath ` - -RedirectStandardError $errPath ` - -PassThru -$startedPid = $startedProc.Id + +# Quote each arg defensively in case a future caller adds whitespace. +function Quote-CmdArg { + param([string]$Arg) + if ($Arg -match '[\s"]') { + return '"' + ($Arg -replace '"', '\"') + '"' + } + return $Arg +} +$quotedArgs = ($argList | ForEach-Object { Quote-CmdArg $_ }) -join ' ' +$pyQ = Quote-CmdArg $pythonExe + +$cmdLine = $pyQ + ' ' + $quotedArgs + +# Win32_Process.Create starts detached with no parent-handle inheritance. +# Returns @{ ProcessId; ReturnValue (0 = success) }. +# Title sets the visible console-window title so the user can tell at a +# glance which server the window belongs to (useful when running real + +# demo side by side on different ports). +$startupInfo = New-CimInstance -ClassName Win32_ProcessStartup ` + -ClientOnly ` + -Property @{ Title = "LedGrab - $Module (port $Port)" } +$wmiResult = Invoke-CimMethod -ClassName Win32_Process -MethodName Create -Arguments @{ + CommandLine = $cmdLine + CurrentDirectory = $ServerRoot + ProcessStartupInformation = $startupInfo +} -ErrorAction SilentlyContinue + +if (-not $wmiResult -or $wmiResult.ReturnValue -ne 0) { + Write-Warning "WMI Win32_Process.Create failed (ReturnValue=$($wmiResult.ReturnValue)); falling back to Start-Process" + # Fallback path — Start-Process inherits parent handles, so a piped + # caller may hang. Acceptable here because this branch only runs when + # WMI itself is broken (very rare). + $startedProc = Start-Process -FilePath $pythonExe ` + -ArgumentList $argList ` + -WorkingDirectory $ServerRoot -PassThru + $startedPid = if ($startedProc) { $startedProc.Id } else { 0 } +} else { + $startedPid = [int]$wmiResult.ProcessId +} + +# Confirm the process is actually our server (defensive — WMI sometimes +# returns a PID for a transient ancestor on heavily loaded boxes). +Start-Sleep -Milliseconds 250 +if (-not (Get-Process -Id $startedPid -ErrorAction SilentlyContinue)) { + $rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1 + if ($rescanned) { $startedPid = $rescanned.ProcessId } else { $startedPid = 0 } +} # ---- Poll readiness -------------------------------------------------------- @@ -316,28 +365,37 @@ $deadline = (Get-Date).AddSeconds($StartupTimeoutSec) $ready = $false while ((Get-Date) -lt $deadline) { # Bail early if the process has already exited — something went wrong. - $proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue - if (-not $proc) { break } + if ($startedPid -gt 0) { + $proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue + if (-not $proc) { + $rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1 + if ($rescanned) { $startedPid = $rescanned.ProcessId } else { break } + } + } else { + $rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1 + if ($rescanned) { $startedPid = $rescanned.ProcessId } + } if (Test-PortOpen -Port $Port) { $ready = $true; break } Start-Sleep -Milliseconds 500 } if ($ready) { - Write-Info "Server ready on port $Port (PID $startedPid)" + if ($startedPid -gt 0) { + Write-Info "Server ready on port $Port (PID $startedPid)" + } else { + Write-Info "Server ready on port $Port" + } exit 0 } -$proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue -if (-not $proc) { - Write-Warning "Server process $startedPid exited before binding port $Port" -} else { - Write-Warning "Server PID $startedPid is running but did not bind port $Port within ${StartupTimeoutSec}s" -} -if (Test-Path $errPath) { - $tail = Get-Content $errPath -Tail 20 -ErrorAction SilentlyContinue - if ($tail) { - Write-Warning "Last stderr lines from $errPath :" - $tail | ForEach-Object { Write-Warning " $_" } +if ($startedPid -gt 0) { + $proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue + if (-not $proc) { + Write-Warning "Server process $startedPid exited before binding port $Port (check the server console window for the error)" + } else { + Write-Warning "Server PID $startedPid is running but did not bind port $Port within ${StartupTimeoutSec}s" } +} else { + Write-Warning "Could not locate server process; port $Port did not bind within ${StartupTimeoutSec}s" } exit 1 diff --git a/server/src/ledgrab/core/update/update_service.py b/server/src/ledgrab/core/update/update_service.py index ad1d723..0aefd29 100644 --- a/server/src/ledgrab/core/update/update_service.py +++ b/server/src/ledgrab/core/update/update_service.py @@ -5,11 +5,13 @@ import hashlib import os import re import shutil +import socket import subprocess import sys import time from pathlib import Path from typing import Any +from urllib.parse import urlparse import httpx @@ -23,6 +25,7 @@ from ledgrab.core.update.release_provider import AssetInfo, ReleaseInfo, Release from ledgrab.core.update.version_check import is_newer, normalize_version from ledgrab.storage.database import Database from ledgrab.utils import get_logger +from ledgrab.utils.net_classify import is_blocked_for_ssrf logger = get_logger(__name__) @@ -38,6 +41,44 @@ _SHA256_RE = re.compile(r"\b([a-fA-F0-9]{64})\b") _STARTUP_DELAY_S = 30 _MANUAL_CHECK_DEBOUNCE_S = 60 +# Manual-redirect limits for SSRF-safe update downloads. +_UPDATE_MAX_REDIRECT_HOPS = 5 + + +def _validate_update_url(url: str) -> None: + """Reject update URLs whose scheme or resolved host is non-public. + + The update pipeline fetches release feeds and binaries from + ``update.repo_url`` (default: Gitea instance) and may follow + redirects to CDN hosts. Without per-hop validation, a hostile or + compromised feed could redirect the binary download to a private + address (SSRF) or to a non-HTTPS scheme. This guard enforces: + + * scheme is ``http`` or ``https`` + * hostname is present + * DNS resolution returns no private / loopback / link-local / + multicast / reserved / unparseable address + + Raises ``RuntimeError`` (not ``HTTPException`` — this code path runs + in a background task, not a request handler). + """ + parsed = urlparse(url) + if parsed.scheme not in ("http", "https"): + raise RuntimeError(f"Refusing update URL with unsupported scheme: {parsed.scheme!r}") + hostname = parsed.hostname + if not hostname: + raise RuntimeError("Update URL missing hostname") + try: + infos = socket.getaddrinfo(hostname, None) + except socket.gaierror as exc: + raise RuntimeError(f"Cannot resolve update host: {hostname} ({exc})") from exc + ips = {info[4][0] for info in infos} + for ip in ips: + if is_blocked_for_ssrf(ip): + raise RuntimeError( + f"Refusing update URL: host {hostname!r} resolves to " f"non-public address {ip}" + ) + class UpdateService: """Periodically polls a ReleaseProvider and fires WebSocket events.""" @@ -250,29 +291,64 @@ class UpdateService: finally: self._downloading = False + async def _safe_get_text(self, url: str, timeout: float = 30.0) -> str: + """Fetch *url* as text with manual, SSRF-validated redirect handling.""" + current = url + async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client: + for _ in range(_UPDATE_MAX_REDIRECT_HOPS + 1): + _validate_update_url(current) + resp = await client.get(current) + if resp.is_redirect: + location = resp.headers.get("location") + if not location: + raise RuntimeError("Update redirect without Location header") + current = str(httpx.URL(current).join(location)) + continue + resp.raise_for_status() + return resp.text + raise RuntimeError( + f"Too many redirects fetching update text (max {_UPDATE_MAX_REDIRECT_HOPS})" + ) + async def _stream_download(self, url: str, dest: Path, total_size: int) -> None: - """Stream-download a file, updating progress as we go.""" + """Stream-download a file with manual, SSRF-validated redirect handling. + + Each hop is re-validated via :func:`_validate_update_url` so a + compromised release feed cannot redirect the binary download to a + non-public address. + """ tmp = dest.with_suffix(dest.suffix + ".tmp") received = 0 - async with httpx.AsyncClient(timeout=300, follow_redirects=True) as client: - async with client.stream("GET", url) as resp: - resp.raise_for_status() - with open(tmp, "wb") as f: - async for chunk in resp.aiter_bytes(chunk_size=65536): - f.write(chunk) - received += len(chunk) - if total_size > 0: - self._download_progress = received / total_size - if self._fire_event: - self._fire_event( - { - "type": "update_download_progress", - "progress": round(self._download_progress, 3), - } - ) - # Atomic rename - tmp.replace(dest) - self._download_progress = 1.0 + current = url + async with httpx.AsyncClient(timeout=300, follow_redirects=False) as client: + for _ in range(_UPDATE_MAX_REDIRECT_HOPS + 1): + _validate_update_url(current) + async with client.stream("GET", current) as resp: + if resp.is_redirect: + location = resp.headers.get("location") + if not location: + raise RuntimeError("Update redirect without Location header") + current = str(httpx.URL(current).join(location)) + continue + resp.raise_for_status() + with open(tmp, "wb") as f: + async for chunk in resp.aiter_bytes(chunk_size=65536): + f.write(chunk) + received += len(chunk) + if total_size > 0: + self._download_progress = received / total_size + if self._fire_event: + self._fire_event( + { + "type": "update_download_progress", + "progress": round(self._download_progress, 3), + } + ) + # Atomic rename + tmp.replace(dest) + self._download_progress = 1.0 + return + raise RuntimeError(f"Too many redirects fetching update (max {_UPDATE_MAX_REDIRECT_HOPS})") # ── Apply ────────────────────────────────────────────────── @@ -324,20 +400,18 @@ class UpdateService: if not asset: return None - # 1) sibling .sha256 asset + # 1) sibling .sha256 asset — fetch with manual, SSRF-validated + # redirects so the checksum can't be sourced from an untrusted host. sibling = next( (a for a in release.assets if a.name == f"{asset.name}.sha256"), None, ) if sibling: try: - async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client: - resp = await client.get(sibling.download_url) - resp.raise_for_status() - text = resp.text.strip() - match = _SHA256_RE.search(text) - if match: - return match.group(1).lower() + text = await self._safe_get_text(sibling.download_url) + match = _SHA256_RE.search(text.strip()) + if match: + return match.group(1).lower() except Exception as exc: logger.warning("Failed to fetch sibling sha256 asset: %s", exc) diff --git a/server/tests/api/routes/test_system_routes.py b/server/tests/api/routes/test_system_routes.py index f6ff678..56b8041 100644 --- a/server/tests/api/routes/test_system_routes.py +++ b/server/tests/api/routes/test_system_routes.py @@ -9,6 +9,17 @@ import pytest from fastapi.testclient import TestClient from ledgrab import __version__ +from ledgrab.config import get_config + + +def _auth_headers() -> dict[str, str]: + """Resolve the configured API key lazily so test ordering doesn't matter. + + Evaluating ``get_config()`` at module import time produced a stale empty + key when other tests mutated the global config before this module ran. + """ + api_key = next(iter(get_config().auth.api_keys.values()), "") + return {"Authorization": f"Bearer {api_key}"} if api_key else {} @pytest.fixture(scope="module") @@ -55,17 +66,26 @@ class TestVersionEndpoint: class TestOpenAPIEndpoint: def test_openapi_available(self, client): - resp = client.get("/openapi.json") + resp = client.get("/openapi.json", headers=_auth_headers()) assert resp.status_code == 200 data = resp.json() assert "info" in data assert data["info"]["version"] == __version__ def test_swagger_ui(self, client): - resp = client.get("/docs") + resp = client.get("/docs", headers=_auth_headers()) assert resp.status_code == 200 assert "text/html" in resp.headers["content-type"] + def test_openapi_requires_auth(self, client): + """Without a valid bearer token, the OpenAPI surface is unreachable.""" + resp = client.get("/openapi.json") + assert resp.status_code == 401 + + def test_swagger_requires_auth(self, client): + resp = client.get("/docs") + assert resp.status_code == 401 + class TestRootEndpoint: def test_root_returns_html(self, client):