fix(security,perf): harden restore, CSRF, token_version + perf pass

Security
- Sign pending_restore.json (SHA256 stored in AppSetting, verified on
  startup apply) + refuse path outside data_dir, tighten to 0600.
- Require same-origin Origin/Referer on POST /api/backup/apply-restart —
  Bearer-in-localStorage is CSRF-reachable from any XSS'd admin tab.
- Bump token_version on role/username change and admin password reset so
  demoted admins lose admin in already-issued JWTs.  Guard last-admin
  TOCTOU via COUNT + post-commit re-check that rolls back a race.
- SSRF guard (validate_outbound_url) in ImmichClient.__init__ and the
  external_domain setter — admin-mutable URLs were bypassing the check
  that webhook/slack/discord paths already used.  Dev restart script now
  sets NOTIFY_BRIDGE_ALLOW_PRIVATE_URLS=1 so homelab Immich still works.
- Redact + cap Immich error bodies to ~120 chars before they flow into
  ActionExecution.error / EventLog.details (both UI-visible).
- Deny-list sensitive keys (api_key / token / secret / password /
  authorization / cookie / ...) in template-context merges so a rogue
  template can't exfiltrate provider creds via {{ api_key }}.
- Cap user-controlled Immich search params (query ≤256, person_ids ≤50,
  size ≤100) so a Telegram listener can't DoS upstream.
- Stream upload reads with running byte counter + content-length precheck
  instead of buffering the full body and then rejecting.
- Log Telegram parse_mode fallbacks instead of swallowing silently;
  template escape bugs now surface in server logs.
- Rollback partial imports on pending-restore failure (error recorded on
  a fresh session).

Performance
- Fix N+1 in _refresh_telegram_chat_titles: single IN query instead of
  session.get per chat.
- Parallelize album + shared-link fetches in test_dispatch (asyncio.gather)
  and per-receiver Telegram test sends in notifier (semaphore 5).
- Early-exit collect_scheduled_assets(limit=0) so the periodic-summary
  test path skips full per-album filter/sample (was O(album_assets)).
- Emit explicit CREATE INDEX IF NOT EXISTS for event_log user_id /
  action_id / provider_id so the first boot after upgrade isn't left
  unindexed for the dashboard query.
- Add AbortController timeout (120s) to fetchAuth so uploads/downloads
  don't hang indefinitely.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-22 02:28:55 +03:00
parent fe92b206b7
commit 56993d2ca3
13 changed files with 530 additions and 100 deletions
@@ -1,5 +1,6 @@
"""Notification sender — unified send logic for all paths (dispatch + test)."""
import asyncio
import logging
from typing import Any
@@ -11,6 +12,10 @@ from ..database.models import NotificationTarget, TargetReceiver
_LOGGER = logging.getLogger(__name__)
# Cap on concurrent per-receiver test sends. Keeps us under Telegram's per-bot
# rate limit (~30 msg/s) while still saving ~N×RTT on multi-chat broadcasts.
_TEST_SEND_CONCURRENCY = 5
_TEST_MESSAGES: dict[str, dict[str, str]] = {
"en": {
"telegram": "\u2705 Test message from <b>Notify Bridge</b>",
@@ -358,19 +363,29 @@ async def _send_telegram_test_per_receiver(
http = await get_http_session()
client = TelegramClient(http, bot_token)
results: list[dict] = []
for r in recv_rows:
# Parallelize per-receiver sends with a small semaphore — broadcast to
# N chats now takes ~ceil(N / concurrency) × RTT instead of N × RTT,
# matching the dispatcher's bounded-concurrency pattern. Capped below
# Telegram's rate limit so we don't trigger 429s on large fleets.
sem = asyncio.Semaphore(_TEST_SEND_CONCURRENCY)
async def _send_one(r: TargetReceiver) -> dict | None:
chat_id = str(r.config.get("chat_id", ""))
if not chat_id:
continue
return None
explicit = getattr(r, "locale", "") or ""
locale = explicit or chat_locale_map.get(chat_id) or default_locale
message = _get_test_message(locale[:2].lower(), "telegram")
results.append(await client.send_message(
chat_id=chat_id,
text=message,
disable_web_page_preview=bool(disable_preview),
))
async with sem:
return await client.send_message(
chat_id=chat_id,
text=message,
disable_web_page_preview=bool(disable_preview),
)
raw = await asyncio.gather(*(_send_one(r) for r in recv_rows))
results = [r for r in raw if r is not None]
return _aggregate(results)
@@ -10,10 +10,19 @@ If the apply fails, the pending file is kept so the operator can inspect it
and markers are updated to record the last error. On success, the staged file
is archived under data/applied_restores/<timestamp>.json and markers are
cleared.
Integrity checks on startup:
- The on-disk file's SHA256 must match ``PENDING_RESTORE_SHA256_KEY``
(written atomically with the staged file). Protects against tampering
between prepare and restart.
- The pending path must resolve *inside* ``app_config.data_dir``. Protects
against a rogue AppSetting pointing at an arbitrary file.
"""
from __future__ import annotations
import asyncio
import hashlib
import json
import logging
import shutil
@@ -24,11 +33,13 @@ from sqlmodel.ext.asyncio.session import AsyncSession
from ..api.backup import (
PENDING_RESTORE_CONFLICT_KEY,
PENDING_RESTORE_PATH_KEY,
PENDING_RESTORE_SHA256_KEY,
PENDING_RESTORE_UPLOADED_AT_KEY,
PENDING_RESTORE_UPLOADED_BY_KEY,
_applied_restores_dir,
_pending_restore_path,
)
from ..config import settings as app_config
from ..database.engine import get_engine
from ..database.models import AppSetting
from .backup_schema import BackupFile, ConflictMode
@@ -49,6 +60,23 @@ async def apply_pending_restore_if_any() -> None:
return
pending_path = _pending_restore_path()
# Defensive: ensure the hard-coded path still lives inside data_dir.
# If future refactors let this be read from AppSetting, this check
# blocks arbitrary-file reads.
try:
resolved = pending_path.resolve()
data_root = app_config.data_dir.resolve()
resolved.relative_to(data_root)
except (ValueError, OSError):
_LOGGER.error(
"Pending-restore path %s is outside data_dir %s — refusing to apply",
pending_path, app_config.data_dir,
)
await _record_error(session, "Pending path outside data_dir")
await session.commit()
return
if not pending_path.exists():
_LOGGER.warning(
"Pending-restore marker present but file missing at %s — clearing marker",
@@ -62,9 +90,42 @@ async def apply_pending_restore_if_any() -> None:
conflict_mode = ConflictMode(conflict_row.value) if conflict_row and conflict_row.value else ConflictMode.SKIP
uploaded_by_row = await session.get(AppSetting, PENDING_RESTORE_UPLOADED_BY_KEY)
uploaded_by = uploaded_by_row.value if uploaded_by_row else "admin"
sha_row = await session.get(AppSetting, PENDING_RESTORE_SHA256_KEY)
expected_sha = (sha_row.value or "").strip().lower() if sha_row else ""
try:
raw = json.loads(pending_path.read_text(encoding="utf-8"))
raw_bytes = await asyncio.to_thread(pending_path.read_bytes)
except OSError as err:
_LOGGER.exception("Pending-restore file unreadable")
await _record_error(session, f"Unreadable backup: {err}")
await session.commit()
return
# Integrity: reject unless hash matches what prepare-restore stored.
# An attacker with write access to data/ (swapped file, bind-mount
# abuse) does not also have write access to the DB.
if not expected_sha:
_LOGGER.error("Pending-restore marker has no SHA256; refusing to apply")
await _record_error(session, "Missing integrity marker")
await session.commit()
return
actual_sha = hashlib.sha256(raw_bytes).hexdigest()
if actual_sha != expected_sha:
_LOGGER.error(
"Pending-restore SHA256 mismatch (expected %s, got %s) — refusing to apply",
expected_sha, actual_sha,
)
await _record_error(
session,
"Integrity check failed: on-disk backup SHA256 does not match the hash "
"recorded at prepare time. File may have been tampered with; cancel and "
"re-upload.",
)
await session.commit()
return
try:
raw = json.loads(raw_bytes.decode("utf-8"))
backup = BackupFile.model_validate(raw)
except Exception as err: # noqa: BLE001
_LOGGER.exception("Pending-restore file unreadable")
@@ -88,8 +149,14 @@ async def apply_pending_restore_if_any() -> None:
result = await import_backup(session, admin_row.id, backup, conflict_mode)
except Exception as err: # noqa: BLE001
_LOGGER.exception("Pending-restore apply failed")
await _record_error(session, str(err))
await session.commit()
# Discard any partial inserts the importer made before raising —
# committing partial state would let a crafted failing backup
# selectively mutate entities. The error-record commit below
# happens on a *fresh* session.
await session.rollback()
async with AsyncSession(engine) as fresh:
await _record_error(fresh, str(err))
await fresh.commit()
return
# Archive the file
@@ -136,6 +203,7 @@ async def _clear_markers(session: AsyncSession) -> None:
PENDING_RESTORE_CONFLICT_KEY,
PENDING_RESTORE_UPLOADED_AT_KEY,
PENDING_RESTORE_UPLOADED_BY_KEY,
PENDING_RESTORE_SHA256_KEY,
):
row = await session.get(AppSetting, key)
if row:
@@ -166,30 +166,41 @@ async def _refresh_telegram_chat_titles() -> None:
refreshed = 0
errors = 0
# Bucket results first, then fetch all rows in one IN-query instead of
# per-row ``session.get`` — otherwise a 50-chat fleet issues 50 extra
# SELECTs before commit.
successes: dict[int, dict] = {}
for chat_id, info, err in results:
if err is not None or info is None:
errors += 1
if err:
_LOGGER.debug("getChat failed for chat row %s: %s", chat_id, err)
continue
if chat_id is not None:
successes[chat_id] = info
async with AsyncSession(engine) as session:
for chat_id, info, err in results:
if err is not None or info is None:
errors += 1
if err:
_LOGGER.debug("getChat failed for chat row %s: %s", chat_id, err)
continue
merged = await session.get(TelegramChat, chat_id)
if not merged:
continue
title = info.get("title") or (
(info.get("first_name", "") + " " + info.get("last_name", "")).strip()
)
changed = False
if title and merged.title != title:
merged.title = title
changed = True
new_username = info.get("username")
if new_username is not None and merged.username != new_username:
merged.username = new_username
changed = True
if changed:
session.add(merged)
refreshed += 1
if successes:
rows = (await session.exec(
select(TelegramChat).where(TelegramChat.id.in_(list(successes.keys())))
)).all()
for merged in rows:
info = successes.get(merged.id)
if not info:
continue
title = info.get("title") or (
(info.get("first_name", "") + " " + info.get("last_name", "")).strip()
)
changed = False
if title and merged.title != title:
merged.title = title
changed = True
new_username = info.get("username")
if new_username is not None and merged.username != new_username:
merged.username = new_username
changed = True
if changed:
session.add(merged)
refreshed += 1
await session.commit()
_LOGGER.info(
"Telegram chat title refresh: %s updated, %s errors", refreshed, errors
@@ -250,14 +250,23 @@ async def _build_immich_event(
collection_ids, limit, asset_type, favorite_only, min_rating,
)
# Album-based path: use shared collect_scheduled_assets
# Album-based path: use shared collect_scheduled_assets.
# Fetch albums + shared links in parallel — on a 20-album tracker the old
# serial ``await`` loop took ~2 × 20 × RTT, now it's one round-trip.
import asyncio as _asyncio
album_tasks = [immich.client.get_album(aid) for aid in collection_ids]
link_tasks = [immich.client.get_shared_links(aid) for aid in collection_ids]
album_results, link_results = await _asyncio.gather(
_asyncio.gather(*album_tasks, return_exceptions=True),
_asyncio.gather(*link_tasks, return_exceptions=True),
)
albums: dict[str, ImmichAlbumData] = {}
shared_links: dict[str, list[SharedLinkInfo]] = {}
for album_id in collection_ids:
album = await immich.client.get_album(album_id)
if album:
albums[album_id] = album
shared_links[album_id] = await immich.client.get_shared_links(album_id)
for album_id, album, links in zip(collection_ids, album_results, link_results):
if isinstance(album, Exception) or album is None:
continue
albums[album_id] = album
shared_links[album_id] = links if not isinstance(links, Exception) else []
assets, collections_extra = collect_scheduled_assets(
albums, shared_links, ext_domain,
@@ -320,13 +329,21 @@ async def _build_immich_periodic_event(
ext_domain = provider_config.get("external_domain") or provider_config.get("url", "")
# Parallel fetch — see _build_immich_event above for the same rationale.
import asyncio as _asyncio
album_tasks = [immich.client.get_album(aid) for aid in collection_ids]
link_tasks = [immich.client.get_shared_links(aid) for aid in collection_ids]
album_results, link_results = await _asyncio.gather(
_asyncio.gather(*album_tasks, return_exceptions=True),
_asyncio.gather(*link_tasks, return_exceptions=True),
)
albums: dict[str, ImmichAlbumData] = {}
shared_links: dict[str, list[SharedLinkInfo]] = {}
for album_id in collection_ids:
album = await immich.client.get_album(album_id)
if album:
albums[album_id] = album
shared_links[album_id] = await immich.client.get_shared_links(album_id)
for album_id, album, links in zip(collection_ids, album_results, link_results):
if isinstance(album, Exception) or album is None:
continue
albums[album_id] = album
shared_links[album_id] = links if not isinstance(links, Exception) else []
# limit=0 → returns ([], collections_extra) with full per-album stats.
_assets, collections_extra = collect_scheduled_assets(