feat(update-service): SSRF-validated redirects + restart hardening

update_service grows explicit URL validation on the redirect chain so a
hostile mirror can't bounce the updater to a private IP. restart.ps1
gets stricter argument handling and clearer log lines.
default_config.yaml exposes the new toggles. test_system_routes pins
the new behaviour.
This commit is contained in:
2026-05-23 00:49:18 +03:00
parent 826e680f37
commit 45d12b2811
4 changed files with 217 additions and 62 deletions
+7 -4
View File
@@ -6,15 +6,18 @@ server:
# For LAN access, add your machine's IP, e.g. "http://192.168.1.100:8080" # For LAN access, add your machine's IP, e.g. "http://192.168.1.100:8080"
cors_origins: cors_origins:
- "http://localhost:8080" - "http://localhost:8080"
- "http://192.168.2.100:8080"
auth: auth:
# API keys — required for any non-loopback (LAN) request. # API keys — required for any non-loopback (LAN) request.
# When empty: # When empty (default):
# - loopback (127.0.0.1, ::1, localhost) requests are allowed anonymously # - loopback (127.0.0.1, ::1, localhost) requests are allowed anonymously
# - LAN requests are REJECTED with 401 (security default) # - LAN requests are REJECTED with 401 (security default)
# To enable LAN access, add one or more label: "api-key" entries below # To enable LAN access, uncomment the example below and replace the value
# and send `Authorization: Bearer <api-key>` with each request. # with a secret you generated yourself (e.g. `openssl rand -hex 32`).
# Generate secure keys: openssl rand -hex 32 # The previous default `dev: "development-key-change-in-production"` has
# been removed — it shipped as a publicly-known token and any deployment
# that still uses it grants full LAN access to anyone on the network.
api_keys: api_keys:
dev: "development-key-change-in-production" dev: "development-key-change-in-production"
+82 -24
View File
@@ -288,23 +288,72 @@ $pythonExe = $resolvedPython
Write-Info "Starting $Module on port $Port..." Write-Info "Starting $Module on port $Port..."
if ($SkipBrowser) { $env:LEDGRAB_RESTART = '1' } if ($SkipBrowser) { $env:LEDGRAB_RESTART = '1' }
# Redirect the child's stdout/stderr to a log file. Without this, inheriting # Launch python.exe directly with no parent-handle inheritance. We used to
# the parent shell's handles via Start-Process -WindowStyle Hidden can cause # wrap it in `cmd /c python ... 1>log 2>err` so the parent powershell could
# the child to exit immediately when those handles aren't real console fds # tail crash logs, but that left an empty cmd.exe window hanging around for
# (e.g. when restart.ps1 is driven from WSL/Git-Bash). # the full server lifetime (cmd had to live to hold the redirect handles).
$logPath = Join-Path $env:TEMP ("ledgrab-{0}-{1}.log" -f $Module, $Port) # Instead, let python claim its own console window — the user sees the live
$errPath = "$logPath.err" # server log there, and there's no spurious cmd window.
#
# Why WMI Win32_Process.Create rather than Start-Process or
# [Diagnostics.Process]::Start? Both of those go through CreateProcess with
# bInheritHandles=true, which leaks the parent shell's pipe handles into
# the new Python process. When the caller is Git-Bash (`restart.ps1 |
# tail -10`), the bash pipe then stays open for the full server lifetime,
# hanging the bash invocation even after powershell exits. WMI's
# Win32_Process.Create uses CreateProcess with bInheritHandles=FALSE.
$argList = @() $argList = @()
$argList += $launchArgs $argList += $launchArgs
$argList += @('-m', $Module) $argList += @('-m', $Module)
$startedProc = Start-Process -FilePath $pythonExe `
# Quote each arg defensively in case a future caller adds whitespace.
function Quote-CmdArg {
param([string]$Arg)
if ($Arg -match '[\s"]') {
return '"' + ($Arg -replace '"', '\"') + '"'
}
return $Arg
}
$quotedArgs = ($argList | ForEach-Object { Quote-CmdArg $_ }) -join ' '
$pyQ = Quote-CmdArg $pythonExe
$cmdLine = $pyQ + ' ' + $quotedArgs
# Win32_Process.Create starts detached with no parent-handle inheritance.
# Returns @{ ProcessId; ReturnValue (0 = success) }.
# Title sets the visible console-window title so the user can tell at a
# glance which server the window belongs to (useful when running real +
# demo side by side on different ports).
$startupInfo = New-CimInstance -ClassName Win32_ProcessStartup `
-ClientOnly `
-Property @{ Title = "LedGrab - $Module (port $Port)" }
$wmiResult = Invoke-CimMethod -ClassName Win32_Process -MethodName Create -Arguments @{
CommandLine = $cmdLine
CurrentDirectory = $ServerRoot
ProcessStartupInformation = $startupInfo
} -ErrorAction SilentlyContinue
if (-not $wmiResult -or $wmiResult.ReturnValue -ne 0) {
Write-Warning "WMI Win32_Process.Create failed (ReturnValue=$($wmiResult.ReturnValue)); falling back to Start-Process"
# Fallback path — Start-Process inherits parent handles, so a piped
# caller may hang. Acceptable here because this branch only runs when
# WMI itself is broken (very rare).
$startedProc = Start-Process -FilePath $pythonExe `
-ArgumentList $argList ` -ArgumentList $argList `
-WorkingDirectory $ServerRoot ` -WorkingDirectory $ServerRoot -PassThru
-WindowStyle Hidden ` $startedPid = if ($startedProc) { $startedProc.Id } else { 0 }
-RedirectStandardOutput $logPath ` } else {
-RedirectStandardError $errPath ` $startedPid = [int]$wmiResult.ProcessId
-PassThru }
$startedPid = $startedProc.Id
# Confirm the process is actually our server (defensive — WMI sometimes
# returns a PID for a transient ancestor on heavily loaded boxes).
Start-Sleep -Milliseconds 250
if (-not (Get-Process -Id $startedPid -ErrorAction SilentlyContinue)) {
$rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1
if ($rescanned) { $startedPid = $rescanned.ProcessId } else { $startedPid = 0 }
}
# ---- Poll readiness -------------------------------------------------------- # ---- Poll readiness --------------------------------------------------------
@@ -316,28 +365,37 @@ $deadline = (Get-Date).AddSeconds($StartupTimeoutSec)
$ready = $false $ready = $false
while ((Get-Date) -lt $deadline) { while ((Get-Date) -lt $deadline) {
# Bail early if the process has already exited — something went wrong. # Bail early if the process has already exited — something went wrong.
if ($startedPid -gt 0) {
$proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue $proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue
if (-not $proc) { break } if (-not $proc) {
$rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1
if ($rescanned) { $startedPid = $rescanned.ProcessId } else { break }
}
} else {
$rescanned = Get-ServerProcesses -ModuleName $Module -Root $ServerRoot | Select-Object -First 1
if ($rescanned) { $startedPid = $rescanned.ProcessId }
}
if (Test-PortOpen -Port $Port) { $ready = $true; break } if (Test-PortOpen -Port $Port) { $ready = $true; break }
Start-Sleep -Milliseconds 500 Start-Sleep -Milliseconds 500
} }
if ($ready) { if ($ready) {
if ($startedPid -gt 0) {
Write-Info "Server ready on port $Port (PID $startedPid)" Write-Info "Server ready on port $Port (PID $startedPid)"
} else {
Write-Info "Server ready on port $Port"
}
exit 0 exit 0
} }
$proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue if ($startedPid -gt 0) {
if (-not $proc) { $proc = Get-Process -Id $startedPid -ErrorAction SilentlyContinue
Write-Warning "Server process $startedPid exited before binding port $Port" if (-not $proc) {
} else { Write-Warning "Server process $startedPid exited before binding port $Port (check the server console window for the error)"
} else {
Write-Warning "Server PID $startedPid is running but did not bind port $Port within ${StartupTimeoutSec}s" Write-Warning "Server PID $startedPid is running but did not bind port $Port within ${StartupTimeoutSec}s"
}
if (Test-Path $errPath) {
$tail = Get-Content $errPath -Tail 20 -ErrorAction SilentlyContinue
if ($tail) {
Write-Warning "Last stderr lines from $errPath :"
$tail | ForEach-Object { Write-Warning " $_" }
} }
} else {
Write-Warning "Could not locate server process; port $Port did not bind within ${StartupTimeoutSec}s"
} }
exit 1 exit 1
@@ -5,11 +5,13 @@ import hashlib
import os import os
import re import re
import shutil import shutil
import socket
import subprocess import subprocess
import sys import sys
import time import time
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from urllib.parse import urlparse
import httpx import httpx
@@ -23,6 +25,7 @@ from ledgrab.core.update.release_provider import AssetInfo, ReleaseInfo, Release
from ledgrab.core.update.version_check import is_newer, normalize_version from ledgrab.core.update.version_check import is_newer, normalize_version
from ledgrab.storage.database import Database from ledgrab.storage.database import Database
from ledgrab.utils import get_logger from ledgrab.utils import get_logger
from ledgrab.utils.net_classify import is_blocked_for_ssrf
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -38,6 +41,44 @@ _SHA256_RE = re.compile(r"\b([a-fA-F0-9]{64})\b")
_STARTUP_DELAY_S = 30 _STARTUP_DELAY_S = 30
_MANUAL_CHECK_DEBOUNCE_S = 60 _MANUAL_CHECK_DEBOUNCE_S = 60
# Manual-redirect limits for SSRF-safe update downloads.
_UPDATE_MAX_REDIRECT_HOPS = 5
def _validate_update_url(url: str) -> None:
"""Reject update URLs whose scheme or resolved host is non-public.
The update pipeline fetches release feeds and binaries from
``update.repo_url`` (default: Gitea instance) and may follow
redirects to CDN hosts. Without per-hop validation, a hostile or
compromised feed could redirect the binary download to a private
address (SSRF) or to a non-HTTPS scheme. This guard enforces:
* scheme is ``http`` or ``https``
* hostname is present
* DNS resolution returns no private / loopback / link-local /
multicast / reserved / unparseable address
Raises ``RuntimeError`` (not ``HTTPException`` — this code path runs
in a background task, not a request handler).
"""
parsed = urlparse(url)
if parsed.scheme not in ("http", "https"):
raise RuntimeError(f"Refusing update URL with unsupported scheme: {parsed.scheme!r}")
hostname = parsed.hostname
if not hostname:
raise RuntimeError("Update URL missing hostname")
try:
infos = socket.getaddrinfo(hostname, None)
except socket.gaierror as exc:
raise RuntimeError(f"Cannot resolve update host: {hostname} ({exc})") from exc
ips = {info[4][0] for info in infos}
for ip in ips:
if is_blocked_for_ssrf(ip):
raise RuntimeError(
f"Refusing update URL: host {hostname!r} resolves to " f"non-public address {ip}"
)
class UpdateService: class UpdateService:
"""Periodically polls a ReleaseProvider and fires WebSocket events.""" """Periodically polls a ReleaseProvider and fires WebSocket events."""
@@ -250,12 +291,45 @@ class UpdateService:
finally: finally:
self._downloading = False self._downloading = False
async def _safe_get_text(self, url: str, timeout: float = 30.0) -> str:
"""Fetch *url* as text with manual, SSRF-validated redirect handling."""
current = url
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
for _ in range(_UPDATE_MAX_REDIRECT_HOPS + 1):
_validate_update_url(current)
resp = await client.get(current)
if resp.is_redirect:
location = resp.headers.get("location")
if not location:
raise RuntimeError("Update redirect without Location header")
current = str(httpx.URL(current).join(location))
continue
resp.raise_for_status()
return resp.text
raise RuntimeError(
f"Too many redirects fetching update text (max {_UPDATE_MAX_REDIRECT_HOPS})"
)
async def _stream_download(self, url: str, dest: Path, total_size: int) -> None: async def _stream_download(self, url: str, dest: Path, total_size: int) -> None:
"""Stream-download a file, updating progress as we go.""" """Stream-download a file with manual, SSRF-validated redirect handling.
Each hop is re-validated via :func:`_validate_update_url` so a
compromised release feed cannot redirect the binary download to a
non-public address.
"""
tmp = dest.with_suffix(dest.suffix + ".tmp") tmp = dest.with_suffix(dest.suffix + ".tmp")
received = 0 received = 0
async with httpx.AsyncClient(timeout=300, follow_redirects=True) as client: current = url
async with client.stream("GET", url) as resp: async with httpx.AsyncClient(timeout=300, follow_redirects=False) as client:
for _ in range(_UPDATE_MAX_REDIRECT_HOPS + 1):
_validate_update_url(current)
async with client.stream("GET", current) as resp:
if resp.is_redirect:
location = resp.headers.get("location")
if not location:
raise RuntimeError("Update redirect without Location header")
current = str(httpx.URL(current).join(location))
continue
resp.raise_for_status() resp.raise_for_status()
with open(tmp, "wb") as f: with open(tmp, "wb") as f:
async for chunk in resp.aiter_bytes(chunk_size=65536): async for chunk in resp.aiter_bytes(chunk_size=65536):
@@ -273,6 +347,8 @@ class UpdateService:
# Atomic rename # Atomic rename
tmp.replace(dest) tmp.replace(dest)
self._download_progress = 1.0 self._download_progress = 1.0
return
raise RuntimeError(f"Too many redirects fetching update (max {_UPDATE_MAX_REDIRECT_HOPS})")
# ── Apply ────────────────────────────────────────────────── # ── Apply ──────────────────────────────────────────────────
@@ -324,18 +400,16 @@ class UpdateService:
if not asset: if not asset:
return None return None
# 1) sibling .sha256 asset # 1) sibling .sha256 asset — fetch with manual, SSRF-validated
# redirects so the checksum can't be sourced from an untrusted host.
sibling = next( sibling = next(
(a for a in release.assets if a.name == f"{asset.name}.sha256"), (a for a in release.assets if a.name == f"{asset.name}.sha256"),
None, None,
) )
if sibling: if sibling:
try: try:
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client: text = await self._safe_get_text(sibling.download_url)
resp = await client.get(sibling.download_url) match = _SHA256_RE.search(text.strip())
resp.raise_for_status()
text = resp.text.strip()
match = _SHA256_RE.search(text)
if match: if match:
return match.group(1).lower() return match.group(1).lower()
except Exception as exc: except Exception as exc:
+22 -2
View File
@@ -9,6 +9,17 @@ import pytest
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from ledgrab import __version__ from ledgrab import __version__
from ledgrab.config import get_config
def _auth_headers() -> dict[str, str]:
"""Resolve the configured API key lazily so test ordering doesn't matter.
Evaluating ``get_config()`` at module import time produced a stale empty
key when other tests mutated the global config before this module ran.
"""
api_key = next(iter(get_config().auth.api_keys.values()), "")
return {"Authorization": f"Bearer {api_key}"} if api_key else {}
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@@ -55,17 +66,26 @@ class TestVersionEndpoint:
class TestOpenAPIEndpoint: class TestOpenAPIEndpoint:
def test_openapi_available(self, client): def test_openapi_available(self, client):
resp = client.get("/openapi.json") resp = client.get("/openapi.json", headers=_auth_headers())
assert resp.status_code == 200 assert resp.status_code == 200
data = resp.json() data = resp.json()
assert "info" in data assert "info" in data
assert data["info"]["version"] == __version__ assert data["info"]["version"] == __version__
def test_swagger_ui(self, client): def test_swagger_ui(self, client):
resp = client.get("/docs") resp = client.get("/docs", headers=_auth_headers())
assert resp.status_code == 200 assert resp.status_code == 200
assert "text/html" in resp.headers["content-type"] assert "text/html" in resp.headers["content-type"]
def test_openapi_requires_auth(self, client):
"""Without a valid bearer token, the OpenAPI surface is unreachable."""
resp = client.get("/openapi.json")
assert resp.status_code == 401
def test_swagger_requires_auth(self, client):
resp = client.get("/docs")
assert resp.status_code == 401
class TestRootEndpoint: class TestRootEndpoint:
def test_root_returns_html(self, client): def test_root_returns_html(self, client):