feat: production-readiness hardening across security, async, DB, ops

Security - SSRF: async DNS resolver; allow_redirects=False on all outbound clients; matrix homeserver_url validated on create/update/test; update_provider and email_bot merge incoming config and reject ***-masked secrets. - Auth: bcrypt offloaded to asyncio.to_thread; JWT now carries iss/aud + leeway and rejects missing claims; setup TOCTOU closed inside a transaction; rate limits extended (default 600/min, 10/min on password change, 30/min on needs-setup); constant-time login to prevent username enumeration. - Config: rejects known dev secret keys; validates CORS origin schemes, port range, token lifetimes. - Webhook handlers stream-read body with a 1 MiB cap; Discord 429 retries bounded (3 attempts, Retry-After capped at 60 s). - CSP + HSTS added to SecurityHeadersMiddleware. Async / runtime - SQLite engine: WAL, synchronous=NORMAL, foreign_keys=ON, busy_timeout, pool_pre_ping, dispose on shutdown. - Lifespan shutdown now stops scheduler before closing HTTP session and disposing the engine. - Shared aiohttp session locked against concurrent first-caller races; core NotificationDispatcher accepts and reuses it. - Storage and scheduled backup writes wrapped in asyncio.to_thread. - NUT client writes bounded by asyncio.wait_for. - Telegram poller switched from 3 s short-poll to 30 s interval + 25 s long-poll (~10x fewer API calls). Database - New performance-indexes migration covers every FK/owner column and hot-path composite (notification_tracker(provider_id, enabled); event_log(user_id, created_at DESC); webhook_payload_log(provider_id, created_at DESC); action_execution(action_id, started_at DESC)). - New schema_version table for future upgrade gating. - __system__ placeholder user (id=0) seeded so user_id=0 system defaults satisfy the newly enforced FK; filtered out of /auth/needs-setup, /api/users, and setup. - list_notification_trackers rewritten to batched loads (was 1+N+N*M). - Retention job extended to event_log, webhook_payload_log, and action_execution; retention days exposed as a setting. Scheduler - AsyncIOScheduler job_defaults: coalesce, misfire_grace_time=300, max_instances=1. Ops - uvicorn runs with proxy_headers, forwarded_allow_ips, timeout_graceful_shutdown; access log suppressed in non-debug. - FastAPI version string now reads from importlib.metadata. - New /api/ready endpoint separate from /api/health. - docker-compose drops the ALLOW_PRIVATE_URLS=1 default, adds mem/cpu/pid limits, read_only + tmpfs, cap_drop:ALL, no-new-privileges; healthcheck targets /api/ready. - CI now runs on push/PR with backend pytest, frontend svelte-check + build, and a non-push image build; release workflow gated on tests, publishes immutable sha-<commit> image tag, adds Trivy scan. Tests - New packages/server/tests/ with 29 passing tests: config validation, JWT round-trip + aud/alg=none rejection, SSRF scheme and private-range enforcement (sync + async), Discord bounded retry, and a lifespan-level /api/health + /api/ready smoke check. - Renamed the misnamed services/test_dispatch.py to manual_dispatch.py so pytest never auto-collects production code. Frontend - /login now redirects already-authenticated users to /, shows a distinct 'backend unreachable' banner (en/ru) when /auth/needs-setup fails.
2026-04-23 19:44:56 +03:00
parent f50d465c0e
commit 920920bc67
44 changed files with 1426 additions and 186 deletions
@@ -37,6 +37,42 @@ _LOGGER = logging.getLogger(__name__)

 router = APIRouter(prefix="/api/webhooks", tags=["webhooks"])

+# Hard cap on inbound webhook body size (1 MiB is far larger than anything
+# legitimate providers send and keeps the worst-case memory footprint bounded
+# when a malicious peer lies about Content-Length or streams slowly).
+_MAX_WEBHOOK_BODY_BYTES = 1_000_000
+
+
+async def _read_bounded_body(request: Request, limit: int = _MAX_WEBHOOK_BODY_BYTES) -> bytes:
+    """Reject oversized inbound bodies before they exhaust memory.
+
+    First checks ``Content-Length`` (fast-path for honest peers), then
+    streams the body in chunks enforcing the same cap on actual bytes
+    received so a peer that lies about Content-Length cannot slip through.
+    """
+    declared = request.headers.get("content-length")
+    if declared:
+        try:
+            if int(declared) > limit:
+                raise HTTPException(
+                    status_code=413,
+                    detail=f"Payload too large (max {limit} bytes)",
+                )
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid Content-Length")
+
+    chunks: list[bytes] = []
+    size = 0
+    async for chunk in request.stream():
+        size += len(chunk)
+        if size > limit:
+            raise HTTPException(
+                status_code=413,
+                detail=f"Payload too large (max {limit} bytes)",
+            )
+        chunks.append(chunk)
+    return b"".join(chunks)
+

 async def _get_provider_by_token(
    session: AsyncSession, token: str, expected_type: str,
@@ -169,7 +205,8 @@ async def _dispatch_webhook_event(
            ))

            # Dispatch to targets
-            dispatcher = NotificationDispatcher()
+            from ..services.http_session import get_http_session
+            dispatcher = NotificationDispatcher(session=await get_http_session())
            target_configs = _build_target_configs(event, link_data, provider_config, app_tz)
            if target_configs:
                results = await dispatcher.dispatch(event, target_configs)
@@ -203,7 +240,7 @@ async def gitea_webhook(token: str, request: Request):
        webhook_secret = (provider.config or {}).get("webhook_secret", "")

    # Read raw body for HMAC check
-    raw_body = await request.body()
+    raw_body = await _read_bounded_body(request)

    if not webhook_secret:
        raise HTTPException(
@@ -221,8 +258,8 @@ async def gitea_webhook(token: str, request: Request):
        return {"ok": True, "skipped": "no event header"}

    try:
-        payload = await request.json()
-    except (json.JSONDecodeError, ValueError):
+        payload = json.loads(raw_body.decode("utf-8"))
+    except (UnicodeDecodeError, json.JSONDecodeError, ValueError):
        raise HTTPException(status_code=400, detail="Invalid JSON")

    event = parse_gitea_webhook(event_header, payload, provider.name)
@@ -280,10 +317,10 @@ async def planka_webhook(token: str, request: Request):
    if not _verify_planka_token(webhook_secret, request):
        raise HTTPException(status_code=403, detail="Invalid token")

-    # Parse payload
+    # Parse payload from the bounded raw_body we already read.
    try:
-        payload = await request.json()
-    except (json.JSONDecodeError, ValueError):
+        payload = json.loads(raw_body.decode("utf-8"))
+    except (UnicodeDecodeError, json.JSONDecodeError, ValueError):
        raise HTTPException(status_code=400, detail="Invalid JSON")

    event_type = payload.get("type", "")
@@ -446,23 +483,22 @@ async def generic_webhook(token: str, request: Request):
    store_payloads = provider_config.get("store_payloads", True)
    max_stored = min(max(int(provider_config.get("max_stored_payloads", 20)), 1), 100)

-    raw_body = await request.body()
+    raw_body = await _read_bounded_body(request)

-    # Enforce payload size limit BEFORE parsing JSON
-    if len(raw_body) > 1_000_000:
-        raise HTTPException(status_code=413, detail="Payload too large (max 1 MB)")
+    # Bounded read above already enforces the size cap; no need to re-check.

    if not _verify_generic_webhook_auth(provider_config, request, raw_body):
        raise HTTPException(status_code=403, detail="Authentication failed")

    safe_headers = _filter_headers(dict(request.headers))

-    # Parse JSON payload
+    # Parse JSON payload from the already-bounded raw_body (request.body()
+    # has been consumed, so request.json() is no longer usable here).
    try:
-        payload = await request.json()
+        payload = json.loads(raw_body.decode("utf-8"))
        if not isinstance(payload, dict):
            raise ValueError("Payload must be a JSON object")
-    except (json.JSONDecodeError, ValueError):
+    except (UnicodeDecodeError, json.JSONDecodeError, ValueError):
        if store_payloads:
            async with AsyncSession(get_engine()) as log_session:
                await _save_webhook_log(