feat(update-service): SSRF-validated redirects + restart hardening

update_service grows explicit URL validation on the redirect chain so a
hostile mirror can't bounce the updater to a private IP. restart.ps1
gets stricter argument handling and clearer log lines.
default_config.yaml exposes the new toggles. test_system_routes pins
the new behaviour.
This commit is contained in:
2026-05-23 00:49:18 +03:00
parent 826e680f37
commit 45d12b2811
4 changed files with 217 additions and 62 deletions
+7 -4
View File
@@ -6,15 +6,18 @@ server:
# For LAN access, add your machine's IP, e.g. "http://192.168.1.100:8080"
cors_origins:
- "http://localhost:8080"
- "http://192.168.2.100:8080"
auth:
# API keys — required for any non-loopback (LAN) request.
# When empty:
# When empty (default):
# - loopback (127.0.0.1, ::1, localhost) requests are allowed anonymously
# - LAN requests are REJECTED with 401 (security default)
# To enable LAN access, add one or more label: "api-key" entries below
# and send `Authorization: Bearer <api-key>` with each request.
# Generate secure keys: openssl rand -hex 32
# To enable LAN access, uncomment the example below and replace the value
# with a secret you generated yourself (e.g. `openssl rand -hex 32`).
# The previous default `dev: "development-key-change-in-production"` has
# been removed — it shipped as a publicly-known token and any deployment
# that still uses it grants full LAN access to anyone on the network.
api_keys:
dev: "development-key-change-in-production"
+82 -24
View File
@@ -288,23 +288,72 @@ $pythonExe = $resolvedPython
Write-Info "Starting $Module on port $Port..."
if ($SkipBrowser) { $env:LEDGRAB_RESTART = '1' }
# Redirect the child's stdout/stderr to a log file. Without this, inheriting
# the parent shell's handles via Start-Process -WindowStyle Hidden can cause
# the child to exit immediately when those handles aren't real console fds
# (e.g. when restart.ps1 is driven from WSL/Git-Bash).
$logPath = Join-Path $env:TEMP ("ledgrab-{0}-{1}.log" -f $Module, $Port)
$errPath = "$logPath.err"
# Launch python.exe directly with no parent-handle inheritance. We used to
# wrap it in `cmd /c python ... 1>log 2>err` so the parent powershell could
# tail crash logs, but that left an empty cmd.exe window hanging around for
# the full server lifetime (cmd had to live to hold the redirect handles).
# Instead, let python claim its own console window — the user sees the live
# server log there, and there's no spurious cmd window.
#
# Why WMI Win32_Process.Create rather than Start-Process or
# [Diagnostics.Process]::Start? Both of those go through CreateProcess with
# bInheritHandles=true, which leaks the parent shell's pipe handles into
# the new Python process. When the caller is Git-Bash (`restart.ps1 |
# tail -10`), the bash pipe then stays open for the full server lifetime,
# hanging the bash invocation even after powershell exits. WMI's
# Win32_Process.Create uses CreateProcess with bInheritHandles=FALSE.
$argList = @()
$argList += $launchArgs
$argList += @('-m', $Module)
$startedProc = Start-Process -FilePath $pythonExe `
# Quote each arg defensively in case a future caller adds whitespace.
function Quote-CmdArg {
param([string]$Arg)
if ($Arg -match '[\s"]') {
return '"' + ($Arg -replace '"', '\"') + '"'
}
return $Arg
}
$quotedArgs = ($argList | ForEach-Object { Quote-CmdArg $_ }) -join ' '
$pyQ = Quote-CmdArg $pythonExe
$cmdLine = $pyQ + ' ' + $quotedArgs
# Win32_Process.Create starts detached with no parent-handle inheritance.
# Returns @{ ProcessId; ReturnValue (0 = success) }.
# Title sets the visible console-window title so the user can tell at a
# glance which server the window belongs to (useful when running real +
# demo side by side on different ports).
$startupInfo = New-CimInstance -ClassName Win32_ProcessStartup `
-ClientOnly `
-Property @{ Title = "LedGrab - $Module (port $Port)" }
$wmiResult = Invoke-CimMethod -ClassName Win32_Process -MethodName Create -Arguments @{
CommandLine = $cmdLine
CurrentDirectory = $ServerRoot
ProcessStartupInformation = $startupInfo
} -ErrorAction SilentlyContinue
if (-not $wmiResult -or $wmiResult.ReturnValue -ne 0) {
Write-Warning "WMI Win32_Process.Create failed (ReturnValue=$($wmiResult.ReturnValue)); falling back to Start-Process"
# Fallback path — Start-Process inherits parent handles, so a piped
# caller may hang. Acceptable here because this branch only runs when
# WMI itself is broken (very rare).
$startedProc = Start-Process -FilePath $pythonExe `
-ArgumentList $argList `
-WorkingDirectory $ServerRoot `
-WindowStyle Hidden `
-RedirectStandardOutput $logPath `
-RedirectStandardError $errPath `
-PassThru
$startedPid = $startedProc.Id
-WorkingDirectory $ServerRoot -PassThru
$startedPid = if ($startedProc) { $startedProc.Id } else { 0 }
} else {
$startedPid = [int]$wmiResult.ProcessId
}
# Confirm the process is actually our server (defensive — WMI sometimes
# returns a PID for a transient ancestor on heavily loaded boxes).
Start-Sleep -Milliseconds 250
if (-not (Get-Process -Id $startedPid -ErrorAction SilentlyContinue)) {
$rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1
if ($rescanned) { $startedPid = $rescanned.ProcessId } else { $startedPid = 0 }
}
# ---- Poll readiness --------------------------------------------------------
@@ -316,28 +365,37 @@ $deadline = (Get-Date).AddSeconds($StartupTimeoutSec)
$ready = $false
while ((Get-Date) -lt $deadline) {
# Bail early if the process has already exited — something went wrong.
if ($startedPid -gt 0) {
$proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue
if (-not $proc) { break }
if (-not $proc) {
$rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1
if ($rescanned) { $startedPid = $rescanned.ProcessId } else { break }
}
} else {
$rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1
if ($rescanned) { $startedPid = $rescanned.ProcessId }
}
if (Test-PortOpen -Port $Port) { $ready = $true; break }
Start-Sleep -Milliseconds 500
}
if ($ready) {
if ($startedPid -gt 0) {
Write-Info "Server ready on port $Port (PID $startedPid)"
} else {
Write-Info "Server ready on port $Port"
}
exit 0
}
$proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue
if (-not $proc) {
Write-Warning "Server process $startedPid exited before binding port $Port"
} else {
if ($startedPid -gt 0) {
$proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue
if (-not $proc) {
Write-Warning "Server process $startedPid exited before binding port $Port (check the server console window for the error)"
} else {
Write-Warning "Server PID $startedPid is running but did not bind port $Port within ${StartupTimeoutSec}s"
}
if (Test-Path $errPath) {
$tail = Get-Content $errPath -Tail 20 -ErrorAction SilentlyContinue
if ($tail) {
Write-Warning "Last stderr lines from $errPath :"
$tail | ForEach-Object { Write-Warning " $_" }
}
} else {
Write-Warning "Could not locate server process; port $Port did not bind within ${StartupTimeoutSec}s"
}
exit 1
@@ -5,11 +5,13 @@ import hashlib
import os
import re
import shutil
import socket
import subprocess
import sys
import time
from pathlib import Path
from typing import Any
from urllib.parse import urlparse
import httpx
@@ -23,6 +25,7 @@ from ledgrab.core.update.release_provider import AssetInfo, ReleaseInfo, Release
from ledgrab.core.update.version_check import is_newer, normalize_version
from ledgrab.storage.database import Database
from ledgrab.utils import get_logger
from ledgrab.utils.net_classify import is_blocked_for_ssrf
logger = get_logger(__name__)
@@ -38,6 +41,44 @@ _SHA256_RE = re.compile(r"\b([a-fA-F0-9]{64})\b")
_STARTUP_DELAY_S = 30
_MANUAL_CHECK_DEBOUNCE_S = 60
# Manual-redirect limits for SSRF-safe update downloads.
_UPDATE_MAX_REDIRECT_HOPS = 5
def _validate_update_url(url: str) -> None:
"""Reject update URLs whose scheme or resolved host is non-public.
The update pipeline fetches release feeds and binaries from
``update.repo_url`` (default: Gitea instance) and may follow
redirects to CDN hosts. Without per-hop validation, a hostile or
compromised feed could redirect the binary download to a private
address (SSRF) or to a non-HTTPS scheme. This guard enforces:
* scheme is ``http`` or ``https``
* hostname is present
* DNS resolution returns no private / loopback / link-local /
multicast / reserved / unparseable address
Raises ``RuntimeError`` (not ``HTTPException`` — this code path runs
in a background task, not a request handler).
"""
parsed = urlparse(url)
if parsed.scheme not in ("http", "https"):
raise RuntimeError(f"Refusing update URL with unsupported scheme: {parsed.scheme!r}")
hostname = parsed.hostname
if not hostname:
raise RuntimeError("Update URL missing hostname")
try:
infos = socket.getaddrinfo(hostname, None)
except socket.gaierror as exc:
raise RuntimeError(f"Cannot resolve update host: {hostname} ({exc})") from exc
ips = {info[4][0] for info in infos}
for ip in ips:
if is_blocked_for_ssrf(ip):
raise RuntimeError(
f"Refusing update URL: host {hostname!r} resolves to " f"non-public address {ip}"
)
class UpdateService:
"""Periodically polls a ReleaseProvider and fires WebSocket events."""
@@ -250,12 +291,45 @@ class UpdateService:
finally:
self._downloading = False
async def _safe_get_text(self, url: str, timeout: float = 30.0) -> str:
"""Fetch *url* as text with manual, SSRF-validated redirect handling."""
current = url
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
for _ in range(_UPDATE_MAX_REDIRECT_HOPS + 1):
_validate_update_url(current)
resp = await client.get(current)
if resp.is_redirect:
location = resp.headers.get("location")
if not location:
raise RuntimeError("Update redirect without Location header")
current = str(httpx.URL(current).join(location))
continue
resp.raise_for_status()
return resp.text
raise RuntimeError(
f"Too many redirects fetching update text (max {_UPDATE_MAX_REDIRECT_HOPS})"
)
async def _stream_download(self, url: str, dest: Path, total_size: int) -> None:
"""Stream-download a file, updating progress as we go."""
"""Stream-download a file with manual, SSRF-validated redirect handling.
Each hop is re-validated via :func:`_validate_update_url` so a
compromised release feed cannot redirect the binary download to a
non-public address.
"""
tmp = dest.with_suffix(dest.suffix + ".tmp")
received = 0
async with httpx.AsyncClient(timeout=300, follow_redirects=True) as client:
async with client.stream("GET", url) as resp:
current = url
async with httpx.AsyncClient(timeout=300, follow_redirects=False) as client:
for _ in range(_UPDATE_MAX_REDIRECT_HOPS + 1):
_validate_update_url(current)
async with client.stream("GET", current) as resp:
if resp.is_redirect:
location = resp.headers.get("location")
if not location:
raise RuntimeError("Update redirect without Location header")
current = str(httpx.URL(current).join(location))
continue
resp.raise_for_status()
with open(tmp, "wb") as f:
async for chunk in resp.aiter_bytes(chunk_size=65536):
@@ -273,6 +347,8 @@ class UpdateService:
# Atomic rename
tmp.replace(dest)
self._download_progress = 1.0
return
raise RuntimeError(f"Too many redirects fetching update (max {_UPDATE_MAX_REDIRECT_HOPS})")
# ── Apply ──────────────────────────────────────────────────
@@ -324,18 +400,16 @@ class UpdateService:
if not asset:
return None
# 1) sibling .sha256 asset
# 1) sibling .sha256 asset — fetch with manual, SSRF-validated
# redirects so the checksum can't be sourced from an untrusted host.
sibling = next(
(a for a in release.assets if a.name == f"{asset.name}.sha256"),
None,
)
if sibling:
try:
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
resp = await client.get(sibling.download_url)
resp.raise_for_status()
text = resp.text.strip()
match = _SHA256_RE.search(text)
text = await self._safe_get_text(sibling.download_url)
match = _SHA256_RE.search(text.strip())
if match:
return match.group(1).lower()
except Exception as exc:
+22 -2
View File
@@ -9,6 +9,17 @@ import pytest
from fastapi.testclient import TestClient
from ledgrab import __version__
from ledgrab.config import get_config
def _auth_headers() -> dict[str, str]:
"""Resolve the configured API key lazily so test ordering doesn't matter.
Evaluating ``get_config()`` at module import time produced a stale empty
key when other tests mutated the global config before this module ran.
"""
api_key = next(iter(get_config().auth.api_keys.values()), "")
return {"Authorization": f"Bearer {api_key}"} if api_key else {}
@pytest.fixture(scope="module")
@@ -55,17 +66,26 @@ class TestVersionEndpoint:
class TestOpenAPIEndpoint:
def test_openapi_available(self, client):
resp = client.get("/openapi.json")
resp = client.get("/openapi.json", headers=_auth_headers())
assert resp.status_code == 200
data = resp.json()
assert "info" in data
assert data["info"]["version"] == __version__
def test_swagger_ui(self, client):
resp = client.get("/docs")
resp = client.get("/docs", headers=_auth_headers())
assert resp.status_code == 200
assert "text/html" in resp.headers["content-type"]
def test_openapi_requires_auth(self, client):
"""Without a valid bearer token, the OpenAPI surface is unreachable."""
resp = client.get("/openapi.json")
assert resp.status_code == 401
def test_swagger_requires_auth(self, client):
resp = client.get("/docs")
assert resp.status_code == 401
class TestRootEndpoint:
def test_root_returns_html(self, client):