fix(security,perf): harden restore, CSRF, token_version + perf pass
Security
- Sign pending_restore.json (SHA256 stored in AppSetting, verified on
startup apply) + refuse path outside data_dir, tighten to 0600.
- Require same-origin Origin/Referer on POST /api/backup/apply-restart —
Bearer-in-localStorage is CSRF-reachable from any XSS'd admin tab.
- Bump token_version on role/username change and admin password reset so
demoted admins lose admin in already-issued JWTs. Guard last-admin
TOCTOU via COUNT + post-commit re-check that rolls back a race.
- SSRF guard (validate_outbound_url) in ImmichClient.__init__ and the
external_domain setter — admin-mutable URLs were bypassing the check
that webhook/slack/discord paths already used. Dev restart script now
sets NOTIFY_BRIDGE_ALLOW_PRIVATE_URLS=1 so homelab Immich still works.
- Redact + cap Immich error bodies to ~120 chars before they flow into
ActionExecution.error / EventLog.details (both UI-visible).
- Deny-list sensitive keys (api_key / token / secret / password /
authorization / cookie / ...) in template-context merges so a rogue
template can't exfiltrate provider creds via {{ api_key }}.
- Cap user-controlled Immich search params (query ≤256, person_ids ≤50,
size ≤100) so a Telegram listener can't DoS upstream.
- Stream upload reads with running byte counter + content-length precheck
instead of buffering the full body and then rejecting.
- Log Telegram parse_mode fallbacks instead of swallowing silently;
template escape bugs now surface in server logs.
- Rollback partial imports on pending-restore failure (error recorded on
a fresh session).
Performance
- Fix N+1 in _refresh_telegram_chat_titles: single IN query instead of
session.get per chat.
- Parallelize album + shared-link fetches in test_dispatch (asyncio.gather)
and per-receiver Telegram test sends in notifier (semaphore 5).
- Early-exit collect_scheduled_assets(limit=0) so the periodic-summary
test path skips full per-album filter/sample (was O(album_assets)).
- Emit explicit CREATE INDEX IF NOT EXISTS for event_log user_id /
action_id / provider_id so the first boot after upgrade isn't left
unindexed for the dashboard query.
- Add AbortController timeout (120s) to fetchAuth so uploads/downloads
don't hang indefinitely.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,13 +1,15 @@
|
||||
"""Configuration backup/restore API (admin only)."""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
from datetime import datetime, timezone
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, UploadFile, File, Query
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, UploadFile, File, Query
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
|
||||
@@ -28,6 +30,11 @@ PENDING_RESTORE_PATH_KEY = "pending_restore_path"
|
||||
PENDING_RESTORE_CONFLICT_KEY = "pending_restore_conflict_mode"
|
||||
PENDING_RESTORE_UPLOADED_AT_KEY = "pending_restore_uploaded_at"
|
||||
PENDING_RESTORE_UPLOADED_BY_KEY = "pending_restore_uploaded_by"
|
||||
# SHA256 of the staged pending_restore.json, written atomically with the file.
|
||||
# The startup hook refuses to apply if the on-disk file's hash does not match —
|
||||
# defends against anyone dropping a tampered file into data/ between prepare
|
||||
# and restart.
|
||||
PENDING_RESTORE_SHA256_KEY = "pending_restore_sha256"
|
||||
|
||||
|
||||
def _pending_restore_path():
|
||||
@@ -44,6 +51,69 @@ router = APIRouter(prefix="/api/backup", tags=["backup"])
|
||||
MAX_UPLOAD_SIZE = 10 * 1024 * 1024 # 10 MB
|
||||
|
||||
|
||||
async def _read_upload_bounded(file: UploadFile, max_bytes: int = MAX_UPLOAD_SIZE) -> bytes:
|
||||
"""Read an UploadFile into memory, failing fast if it exceeds ``max_bytes``.
|
||||
|
||||
Rejects on ``content_length`` header up-front when available; always
|
||||
stream-reads with a running byte counter so we never allocate more than
|
||||
the limit even when the header is missing or lies.
|
||||
"""
|
||||
# Fast path: reject on header before we allocate anything.
|
||||
cl = file.headers.get("content-length") if hasattr(file, "headers") else None
|
||||
if cl:
|
||||
try:
|
||||
if int(cl) > max_bytes:
|
||||
raise HTTPException(status_code=400, detail="File too large (max 10 MB)")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
chunks: list[bytes] = []
|
||||
total = 0
|
||||
while True:
|
||||
chunk = await file.read(64 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
total += len(chunk)
|
||||
if total > max_bytes:
|
||||
raise HTTPException(status_code=400, detail="File too large (max 10 MB)")
|
||||
chunks.append(chunk)
|
||||
return b"".join(chunks)
|
||||
|
||||
|
||||
def _check_same_origin(request: Request) -> None:
|
||||
"""Reject cross-origin admin-write POSTs (CSRF defense).
|
||||
|
||||
Bearer tokens in ``localStorage`` plus cookie-less CORS mean a malicious
|
||||
page cannot technically submit our Authorization header from a victim's
|
||||
session, BUT browser extensions and misconfigured CORS policies routinely
|
||||
break this assumption. For endpoints whose blast radius is restart/RCE-
|
||||
equivalent (restore apply), we additionally require the request to come
|
||||
from our own origin.
|
||||
"""
|
||||
host = request.headers.get("host", "").lower()
|
||||
if not host:
|
||||
raise HTTPException(status_code=400, detail="Missing Host header")
|
||||
|
||||
def _host_of(u: str | None) -> str:
|
||||
if not u:
|
||||
return ""
|
||||
try:
|
||||
return (urlparse(u).netloc or "").lower()
|
||||
except Exception: # noqa: BLE001
|
||||
return ""
|
||||
|
||||
origin_host = _host_of(request.headers.get("origin"))
|
||||
referer_host = _host_of(request.headers.get("referer"))
|
||||
# At least one of Origin/Referer must be present and match Host.
|
||||
# Legitimate browser requests to this endpoint always ship Origin.
|
||||
same = (origin_host and origin_host == host) or (referer_host and referer_host == host)
|
||||
if not same:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Cross-origin request rejected",
|
||||
)
|
||||
|
||||
|
||||
def _backup_dir():
|
||||
return app_config.data_dir / "backups"
|
||||
|
||||
@@ -104,9 +174,7 @@ async def validate_config(
|
||||
user: User = Depends(require_admin),
|
||||
):
|
||||
"""Validate a backup file without importing."""
|
||||
content = await file.read()
|
||||
if len(content) > MAX_UPLOAD_SIZE:
|
||||
raise HTTPException(status_code=400, detail="File too large (max 10 MB)")
|
||||
content = await _read_upload_bounded(file)
|
||||
|
||||
try:
|
||||
raw = json.loads(content)
|
||||
@@ -129,9 +197,7 @@ async def import_config(
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
"""Import configuration from a backup file."""
|
||||
content = await file.read()
|
||||
if len(content) > MAX_UPLOAD_SIZE:
|
||||
raise HTTPException(status_code=400, detail="File too large (max 10 MB)")
|
||||
content = await _read_upload_bounded(file)
|
||||
|
||||
try:
|
||||
raw = json.loads(content)
|
||||
@@ -167,6 +233,7 @@ async def _clear_pending_restore_markers(session: AsyncSession) -> None:
|
||||
PENDING_RESTORE_CONFLICT_KEY,
|
||||
PENDING_RESTORE_UPLOADED_AT_KEY,
|
||||
PENDING_RESTORE_UPLOADED_BY_KEY,
|
||||
PENDING_RESTORE_SHA256_KEY,
|
||||
):
|
||||
row = await session.get(AppSetting, key)
|
||||
if row:
|
||||
@@ -185,9 +252,7 @@ async def prepare_restore(
|
||||
Validates the uploaded file, writes it to ``data/pending_restore.json``,
|
||||
and persists marker settings so startup will apply it atomically.
|
||||
"""
|
||||
content = await file.read()
|
||||
if len(content) > MAX_UPLOAD_SIZE:
|
||||
raise HTTPException(status_code=400, detail="File too large (max 10 MB)")
|
||||
content = await _read_upload_bounded(file)
|
||||
|
||||
try:
|
||||
raw = json.loads(content)
|
||||
@@ -205,15 +270,25 @@ async def prepare_restore(
|
||||
pending_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Atomic write: write to tmp then rename, so a crash mid-write never
|
||||
# leaves a truncated pending_restore.json that would break startup apply.
|
||||
payload = json.dumps(raw).encode("utf-8")
|
||||
digest = hashlib.sha256(payload).hexdigest()
|
||||
tmp_path = pending_path.with_suffix(pending_path.suffix + ".tmp")
|
||||
tmp_path.write_text(json.dumps(raw), encoding="utf-8")
|
||||
tmp_path.write_bytes(payload)
|
||||
os.replace(tmp_path, pending_path)
|
||||
# Best-effort tighten perms so a non-root local user cannot swap the file
|
||||
# for one they control between prepare and restart. On Windows this is a
|
||||
# no-op; on POSIX we restrict to owner-only rw.
|
||||
try:
|
||||
os.chmod(pending_path, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
now_iso = datetime.now(timezone.utc).isoformat()
|
||||
await _set_app_setting(session, PENDING_RESTORE_PATH_KEY, str(pending_path))
|
||||
await _set_app_setting(session, PENDING_RESTORE_CONFLICT_KEY, conflict_mode.value)
|
||||
await _set_app_setting(session, PENDING_RESTORE_UPLOADED_AT_KEY, now_iso)
|
||||
await _set_app_setting(session, PENDING_RESTORE_UPLOADED_BY_KEY, user.username)
|
||||
await _set_app_setting(session, PENDING_RESTORE_SHA256_KEY, digest)
|
||||
await session.commit()
|
||||
|
||||
return {
|
||||
@@ -292,6 +367,7 @@ def _is_supervised() -> bool:
|
||||
|
||||
@router.post("/apply-restart")
|
||||
async def apply_and_restart(
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
user: User = Depends(require_admin),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
@@ -299,7 +375,11 @@ async def apply_and_restart(
|
||||
"""Trigger a graceful exit so the supervisor respawns and applies the pending restore.
|
||||
|
||||
Only allowed when a pending restore is staged AND the process is supervised.
|
||||
Requires same-origin Origin/Referer — this endpoint's blast radius is a
|
||||
full config replace + restart, so an admin token alone (vulnerable to
|
||||
XSS-driven CSRF) is not enough.
|
||||
"""
|
||||
_check_same_origin(request)
|
||||
path_row = await session.get(AppSetting, PENDING_RESTORE_PATH_KEY)
|
||||
if not path_row or not path_row.value:
|
||||
raise HTTPException(status_code=409, detail="No pending restore to apply")
|
||||
|
||||
Reference in New Issue
Block a user