feat: production-readiness hardening across security, async, DB, ops
Security - SSRF: async DNS resolver; allow_redirects=False on all outbound clients; matrix homeserver_url validated on create/update/test; update_provider and email_bot merge incoming config and reject ***-masked secrets. - Auth: bcrypt offloaded to asyncio.to_thread; JWT now carries iss/aud + leeway and rejects missing claims; setup TOCTOU closed inside a transaction; rate limits extended (default 600/min, 10/min on password change, 30/min on needs-setup); constant-time login to prevent username enumeration. - Config: rejects known dev secret keys; validates CORS origin schemes, port range, token lifetimes. - Webhook handlers stream-read body with a 1 MiB cap; Discord 429 retries bounded (3 attempts, Retry-After capped at 60 s). - CSP + HSTS added to SecurityHeadersMiddleware. Async / runtime - SQLite engine: WAL, synchronous=NORMAL, foreign_keys=ON, busy_timeout, pool_pre_ping, dispose on shutdown. - Lifespan shutdown now stops scheduler before closing HTTP session and disposing the engine. - Shared aiohttp session locked against concurrent first-caller races; core NotificationDispatcher accepts and reuses it. - Storage and scheduled backup writes wrapped in asyncio.to_thread. - NUT client writes bounded by asyncio.wait_for. - Telegram poller switched from 3 s short-poll to 30 s interval + 25 s long-poll (~10x fewer API calls). Database - New performance-indexes migration covers every FK/owner column and hot-path composite (notification_tracker(provider_id, enabled); event_log(user_id, created_at DESC); webhook_payload_log(provider_id, created_at DESC); action_execution(action_id, started_at DESC)). - New schema_version table for future upgrade gating. - __system__ placeholder user (id=0) seeded so user_id=0 system defaults satisfy the newly enforced FK; filtered out of /auth/needs-setup, /api/users, and setup. - list_notification_trackers rewritten to batched loads (was 1+N+N*M). - Retention job extended to event_log, webhook_payload_log, and action_execution; retention days exposed as a setting. Scheduler - AsyncIOScheduler job_defaults: coalesce, misfire_grace_time=300, max_instances=1. Ops - uvicorn runs with proxy_headers, forwarded_allow_ips, timeout_graceful_shutdown; access log suppressed in non-debug. - FastAPI version string now reads from importlib.metadata. - New /api/ready endpoint separate from /api/health. - docker-compose drops the ALLOW_PRIVATE_URLS=1 default, adds mem/cpu/pid limits, read_only + tmpfs, cap_drop:ALL, no-new-privileges; healthcheck targets /api/ready. - CI now runs on push/PR with backend pytest, frontend svelte-check + build, and a non-push image build; release workflow gated on tests, publishes immutable sha-<commit> image tag, adds Trivy scan. Tests - New packages/server/tests/ with 29 passing tests: config validation, JWT round-trip + aud/alg=none rejection, SSRF scheme and private-range enforcement (sync + async), Discord bounded retry, and a lifespan-level /api/health + /api/ready smoke check. - Renamed the misnamed services/test_dispatch.py to manual_dispatch.py so pytest never auto-collects production code. Frontend - /login now redirects already-authenticated users to /, shows a distinct 'backend unreachable' banner (en/ru) when /auth/needs-setup fails.
This commit is contained in:
@@ -52,12 +52,31 @@ from .api.webhook_logs import router as webhook_logs_router
|
||||
from .api.backup import router as backup_router
|
||||
|
||||
|
||||
# Readiness flag — flipped to True once the scheduler has started and the
|
||||
# app is fully initialized. Exposed via /api/ready for orchestrators.
|
||||
_READY: bool = False
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
global _READY
|
||||
await init_db()
|
||||
# Run data migrations (idempotent)
|
||||
from .database.engine import get_engine
|
||||
from .database.migrations import migrate_schema, migrate_tracker_targets, migrate_entity_refactor, migrate_template_slots, migrate_target_receivers, migrate_template_locale, migrate_receivers_from_config, migrate_command_slot_locale, migrate_notification_slot_locale, migrate_user_token_version
|
||||
from .database.migrations import (
|
||||
migrate_schema,
|
||||
migrate_tracker_targets,
|
||||
migrate_entity_refactor,
|
||||
migrate_template_slots,
|
||||
migrate_target_receivers,
|
||||
migrate_template_locale,
|
||||
migrate_receivers_from_config,
|
||||
migrate_command_slot_locale,
|
||||
migrate_notification_slot_locale,
|
||||
migrate_user_token_version,
|
||||
migrate_performance_indexes,
|
||||
migrate_schema_version,
|
||||
)
|
||||
engine = get_engine()
|
||||
await migrate_schema(engine)
|
||||
await migrate_tracker_targets(engine)
|
||||
@@ -69,6 +88,8 @@ async def lifespan(app: FastAPI):
|
||||
await migrate_command_slot_locale(engine)
|
||||
await migrate_notification_slot_locale(engine)
|
||||
await migrate_user_token_version(engine)
|
||||
await migrate_performance_indexes(engine)
|
||||
await migrate_schema_version(engine)
|
||||
from .database.seeds import seed_all
|
||||
await seed_all()
|
||||
# Apply DB-backed logging settings (override env-based boot config).
|
||||
@@ -100,16 +121,28 @@ async def lifespan(app: FastAPI):
|
||||
set_webhook_secret(_secret or None)
|
||||
from .services.scheduler import start_scheduler, get_scheduler
|
||||
await start_scheduler()
|
||||
_READY = True
|
||||
yield
|
||||
# Graceful shutdown
|
||||
from .services.http_session import close_http_session
|
||||
await close_http_session()
|
||||
# Graceful shutdown — stop the scheduler FIRST so in-flight jobs finish
|
||||
# before we close their HTTP session. Then close the shared session and
|
||||
# dispose the DB engine.
|
||||
_READY = False
|
||||
scheduler = get_scheduler()
|
||||
if scheduler.running:
|
||||
scheduler.shutdown()
|
||||
scheduler.shutdown(wait=True)
|
||||
from .services.http_session import close_http_session
|
||||
await close_http_session()
|
||||
from .database.engine import dispose_engine
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
app = FastAPI(title="Notify Bridge", version="0.1.0", lifespan=lifespan)
|
||||
try:
|
||||
from importlib.metadata import version as _pkg_version
|
||||
_APP_VERSION = _pkg_version("notify-bridge-server")
|
||||
except Exception: # pragma: no cover — editable install edge cases
|
||||
_APP_VERSION = "0.0.0+unknown"
|
||||
|
||||
app = FastAPI(title="Notify Bridge", version=_APP_VERSION, lifespan=lifespan)
|
||||
|
||||
# --- Security headers ---
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
@@ -117,6 +150,19 @@ from starlette.requests import Request as StarletteRequest
|
||||
from starlette.responses import Response as StarletteResponse
|
||||
|
||||
|
||||
_CSP = (
|
||||
"default-src 'self'; "
|
||||
"img-src 'self' data: blob: https:; "
|
||||
"style-src 'self' 'unsafe-inline'; "
|
||||
"script-src 'self'; "
|
||||
"connect-src 'self'; "
|
||||
"font-src 'self' data:; "
|
||||
"base-uri 'self'; "
|
||||
"form-action 'self'; "
|
||||
"frame-ancestors 'none'"
|
||||
)
|
||||
|
||||
|
||||
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
||||
async def dispatch(self, request: StarletteRequest, call_next):
|
||||
response: StarletteResponse = await call_next(request)
|
||||
@@ -124,6 +170,14 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
||||
response.headers["X-Frame-Options"] = "DENY"
|
||||
response.headers["X-XSS-Protection"] = "1; mode=block"
|
||||
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
||||
response.headers.setdefault("Content-Security-Policy", _CSP)
|
||||
# HSTS only makes sense over HTTPS; set when the edge terminates TLS
|
||||
# and forwards X-Forwarded-Proto=https.
|
||||
if request.headers.get("x-forwarded-proto") == "https":
|
||||
response.headers.setdefault(
|
||||
"Strict-Transport-Security",
|
||||
"max-age=31536000; includeSubDomains",
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
@@ -176,7 +230,22 @@ app.include_router(backup_router)
|
||||
|
||||
@app.get("/api/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
"""Liveness: process is up and responding. Always returns 200 once the
|
||||
ASGI app has started. Keep this endpoint anonymous and trivially cheap."""
|
||||
return {"status": "ok", "version": _APP_VERSION}
|
||||
|
||||
|
||||
@app.get("/api/ready")
|
||||
async def ready():
|
||||
"""Readiness: migrations and scheduler have started, app can serve traffic.
|
||||
|
||||
Returns 503 until the lifespan startup sequence has completed. Use this
|
||||
for orchestrator readiness probes (Docker, Kubernetes).
|
||||
"""
|
||||
if not _READY:
|
||||
from starlette.responses import JSONResponse
|
||||
return JSONResponse({"status": "starting"}, status_code=503)
|
||||
return {"status": "ready", "version": _APP_VERSION}
|
||||
|
||||
|
||||
# --- Serve frontend static files (production) ---
|
||||
@@ -209,4 +278,12 @@ if _cfg.static_dir and Path(_cfg.static_dir).is_dir():
|
||||
|
||||
def run():
|
||||
import uvicorn
|
||||
uvicorn.run(app, host=_cfg.host, port=_cfg.port)
|
||||
uvicorn.run(
|
||||
app,
|
||||
host=_cfg.host,
|
||||
port=_cfg.port,
|
||||
proxy_headers=True,
|
||||
forwarded_allow_ips=_cfg.forwarded_allow_ips or "127.0.0.1",
|
||||
timeout_graceful_shutdown=_cfg.graceful_shutdown_seconds,
|
||||
access_log=not _cfg.debug,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user