feat: production-readiness hardening across security, async, DB, ops
Security - SSRF: async DNS resolver; allow_redirects=False on all outbound clients; matrix homeserver_url validated on create/update/test; update_provider and email_bot merge incoming config and reject ***-masked secrets. - Auth: bcrypt offloaded to asyncio.to_thread; JWT now carries iss/aud + leeway and rejects missing claims; setup TOCTOU closed inside a transaction; rate limits extended (default 600/min, 10/min on password change, 30/min on needs-setup); constant-time login to prevent username enumeration. - Config: rejects known dev secret keys; validates CORS origin schemes, port range, token lifetimes. - Webhook handlers stream-read body with a 1 MiB cap; Discord 429 retries bounded (3 attempts, Retry-After capped at 60 s). - CSP + HSTS added to SecurityHeadersMiddleware. Async / runtime - SQLite engine: WAL, synchronous=NORMAL, foreign_keys=ON, busy_timeout, pool_pre_ping, dispose on shutdown. - Lifespan shutdown now stops scheduler before closing HTTP session and disposing the engine. - Shared aiohttp session locked against concurrent first-caller races; core NotificationDispatcher accepts and reuses it. - Storage and scheduled backup writes wrapped in asyncio.to_thread. - NUT client writes bounded by asyncio.wait_for. - Telegram poller switched from 3 s short-poll to 30 s interval + 25 s long-poll (~10x fewer API calls). Database - New performance-indexes migration covers every FK/owner column and hot-path composite (notification_tracker(provider_id, enabled); event_log(user_id, created_at DESC); webhook_payload_log(provider_id, created_at DESC); action_execution(action_id, started_at DESC)). - New schema_version table for future upgrade gating. - __system__ placeholder user (id=0) seeded so user_id=0 system defaults satisfy the newly enforced FK; filtered out of /auth/needs-setup, /api/users, and setup. - list_notification_trackers rewritten to batched loads (was 1+N+N*M). - Retention job extended to event_log, webhook_payload_log, and action_execution; retention days exposed as a setting. Scheduler - AsyncIOScheduler job_defaults: coalesce, misfire_grace_time=300, max_instances=1. Ops - uvicorn runs with proxy_headers, forwarded_allow_ips, timeout_graceful_shutdown; access log suppressed in non-debug. - FastAPI version string now reads from importlib.metadata. - New /api/ready endpoint separate from /api/health. - docker-compose drops the ALLOW_PRIVATE_URLS=1 default, adds mem/cpu/pid limits, read_only + tmpfs, cap_drop:ALL, no-new-privileges; healthcheck targets /api/ready. - CI now runs on push/PR with backend pytest, frontend svelte-check + build, and a non-push image build; release workflow gated on tests, publishes immutable sha-<commit> image tag, adds Trivy scan. Tests - New packages/server/tests/ with 29 passing tests: config validation, JWT round-trip + aud/alg=none rejection, SSRF scheme and private-range enforcement (sync + async), Discord bounded retry, and a lifespan-level /api/health + /api/ready smoke check. - Renamed the misnamed services/test_dispatch.py to manual_dispatch.py so pytest never auto-collects production code. Frontend - /login now redirects already-authenticated users to /, shows a distinct 'backend unreachable' banner (en/ru) when /auth/needs-setup fails.
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
"""Authentication API routes."""
|
||||
|
||||
import asyncio
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||
from pydantic import BaseModel
|
||||
from slowapi import Limiter
|
||||
@@ -16,7 +18,9 @@ from .jwt import create_access_token, create_refresh_token, decode_token
|
||||
|
||||
router = APIRouter(prefix="/api/auth", tags=["auth"])
|
||||
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
# Default rate limit applied by SlowAPIMiddleware to every route that does NOT
|
||||
# specify its own @limiter.limit(...) — protects against blanket abuse.
|
||||
limiter = Limiter(key_func=get_remote_address, default_limits=["600/minute"])
|
||||
|
||||
|
||||
class SetupRequest(BaseModel):
|
||||
@@ -45,27 +49,52 @@ class RefreshRequest(BaseModel):
|
||||
refresh_token: str
|
||||
|
||||
|
||||
def _hash_password(password: str) -> str:
|
||||
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
|
||||
async def _hash_password(password: str) -> str:
|
||||
"""bcrypt.hashpw is CPU-bound (~200-500ms); never run it on the event loop."""
|
||||
|
||||
def _work() -> str:
|
||||
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
|
||||
|
||||
return await asyncio.to_thread(_work)
|
||||
|
||||
|
||||
def _verify_password(password: str, hashed: str) -> bool:
|
||||
return bcrypt.checkpw(password.encode(), hashed.encode())
|
||||
async def _verify_password(password: str, hashed: str) -> bool:
|
||||
def _work() -> bool:
|
||||
try:
|
||||
return bcrypt.checkpw(password.encode(), hashed.encode())
|
||||
except ValueError:
|
||||
# Malformed hash in DB — treat as mismatch, never raise to caller.
|
||||
return False
|
||||
|
||||
return await asyncio.to_thread(_work)
|
||||
|
||||
|
||||
@router.post("/setup", response_model=TokenResponse)
|
||||
@limiter.limit("3/minute")
|
||||
async def setup(request: Request, body: SetupRequest, session: AsyncSession = Depends(get_session)):
|
||||
result = await session.exec(select(func.count()).select_from(User))
|
||||
count = result.one()
|
||||
if count > 0:
|
||||
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Setup already completed.")
|
||||
|
||||
if len(body.password) < 8:
|
||||
raise HTTPException(status_code=400, detail="Password must be at least 8 characters")
|
||||
user = User(username=body.username, hashed_password=_hash_password(body.password), role="admin")
|
||||
session.add(user)
|
||||
await session.commit()
|
||||
# Compute hash BEFORE opening the transaction so we don't hold a writer lock
|
||||
# during the CPU-bound bcrypt work.
|
||||
hashed = await _hash_password(body.password)
|
||||
|
||||
# Serialize setup via an INSERT-inside-transaction-with-count-guard.
|
||||
# SQLite's writer lock plus the count check inside the transaction closes
|
||||
# the TOCTOU window between two concurrent POSTs. We ignore id=0 — that's
|
||||
# the internal "__system__" placeholder used for ownership of default
|
||||
# templates, never a real admin.
|
||||
async with session.begin():
|
||||
result = await session.exec(
|
||||
select(func.count()).select_from(User).where(User.id != 0)
|
||||
)
|
||||
count = result.one()
|
||||
if count > 0:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail="Setup already completed.",
|
||||
)
|
||||
user = User(username=body.username, hashed_password=hashed, role="admin")
|
||||
session.add(user)
|
||||
await session.refresh(user)
|
||||
|
||||
return TokenResponse(
|
||||
@@ -79,7 +108,13 @@ async def setup(request: Request, body: SetupRequest, session: AsyncSession = De
|
||||
async def login(request: Request, body: LoginRequest, session: AsyncSession = Depends(get_session)):
|
||||
result = await session.exec(select(User).where(User.username == body.username))
|
||||
user = result.first()
|
||||
if not user or not _verify_password(body.password, user.hashed_password):
|
||||
# Always run a bcrypt verification to keep the response time constant,
|
||||
# preventing username-enumeration via timing side channel.
|
||||
password_ok = await _verify_password(
|
||||
body.password,
|
||||
user.hashed_password if user else "$2b$12$" + "a" * 53,
|
||||
)
|
||||
if not user or not password_ok:
|
||||
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid username or password")
|
||||
|
||||
return TokenResponse(
|
||||
@@ -124,16 +159,18 @@ class PasswordChangeRequest(BaseModel):
|
||||
|
||||
|
||||
@router.put("/password")
|
||||
@limiter.limit("10/minute")
|
||||
async def change_password(
|
||||
request: Request,
|
||||
body: PasswordChangeRequest,
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
if not _verify_password(body.current_password, user.hashed_password):
|
||||
if not await _verify_password(body.current_password, user.hashed_password):
|
||||
raise HTTPException(status_code=400, detail="Current password is incorrect")
|
||||
if len(body.new_password) < 8:
|
||||
raise HTTPException(status_code=400, detail="New password must be at least 8 characters")
|
||||
user.hashed_password = _hash_password(body.new_password)
|
||||
user.hashed_password = await _hash_password(body.new_password)
|
||||
user.token_version = (user.token_version or 1) + 1
|
||||
session.add(user)
|
||||
await session.commit()
|
||||
@@ -141,7 +178,12 @@ async def change_password(
|
||||
|
||||
|
||||
@router.get("/needs-setup")
|
||||
async def needs_setup(session: AsyncSession = Depends(get_session)):
|
||||
result = await session.exec(select(func.count()).select_from(User))
|
||||
@limiter.limit("30/minute")
|
||||
async def needs_setup(request: Request, session: AsyncSession = Depends(get_session)):
|
||||
# Exclude the internal __system__ placeholder (id=0) from the count so
|
||||
# a fresh install still reports needs_setup=True.
|
||||
result = await session.exec(
|
||||
select(func.count()).select_from(User).where(User.id != 0)
|
||||
)
|
||||
count = result.one()
|
||||
return {"needs_setup": count == 0}
|
||||
|
||||
Reference in New Issue
Block a user