Phase 7: Hardening — logging, security, Docker, production readiness

Backend: - Structured JSON logging (python-json-logger) with request ID correlation - RequestIDMiddleware (server-generated UUID, no client trust) - Global exception handlers: AppException, RequestValidationError, generic 500 — all return consistent {"error": {code, message, request_id}} format - Async rate limiting with lock + stale key eviction on auth endpoints - Health endpoint checks DB connectivity, returns version + status - Custom exception classes (NotFoundException, ForbiddenException, etc.) - OpenAPI docs with tag descriptions, conditional URL (disabled in production) - LOG_LEVEL, DOCS_ENABLED, RATE_LIMIT_* settings added Docker: - Backend: multi-stage build (builder + runtime), non-root user, HEALTHCHECK - Frontend: removed dead user, HEALTHCHECK directive - docker-compose: restart policies, healthchecks, Redis service, named volumes for uploads/PDFs, rate limit env vars forwarded - Alembic migrations run only in Dockerfile CMD (removed from lifespan) Nginx: - server_tokens off - CSP, Referrer-Policy, Permissions-Policy headers - HSTS ready (commented, enable with TLS) Config & Docs: - .env.production.example with production-ready settings - CLAUDE.md project conventions (structure, workflow, naming, how-to) - .env.example updated with new variables Review fixes applied: - Rate limiter: async lock prevents race condition, stale key eviction - Request ID: always server-generated (no log injection) - Removed duplicate alembic migration from lifespan - Removed dead app user from frontend Dockerfile - Health check logs DB errors - Rate limit env vars forwarded in docker-compose Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 14:52:21 +03:00
parent fed6a3df1b
commit 4cbce89129
18 changed files with 485 additions and 15 deletions
--- a/backend/app/core/exceptions.py
+++ b/backend/app/core/exceptions.py
@@ -0,0 +1,25 @@
+class AppException(Exception):
+    def __init__(self, status_code: int = 500, code: str = "INTERNAL_ERROR", detail: str = "An error occurred"):
+        self.status_code = status_code
+        self.code = code
+        self.detail = detail
+
+
+class NotFoundException(AppException):
+    def __init__(self, detail: str = "Resource not found"):
+        super().__init__(status_code=404, code="NOT_FOUND", detail=detail)
+
+
+class ForbiddenException(AppException):
+    def __init__(self, detail: str = "Access denied"):
+        super().__init__(status_code=403, code="FORBIDDEN", detail=detail)
+
+
+class ValidationException(AppException):
+    def __init__(self, detail: str = "Validation error"):
+        super().__init__(status_code=422, code="VALIDATION_ERROR", detail=detail)
+
+
+class RateLimitException(AppException):
+    def __init__(self, detail: str = "Too many requests"):
+        super().__init__(status_code=429, code="RATE_LIMIT_EXCEEDED", detail=detail)
--- a/backend/app/core/logging.py
+++ b/backend/app/core/logging.py
@@ -0,0 +1,24 @@
+import logging
+import sys
+
+from pythonjsonlogger import jsonlogger
+
+from app.config import settings
+
+
+def setup_logging():
+    handler = logging.StreamHandler(sys.stdout)
+    formatter = jsonlogger.JsonFormatter(
+        fmt="%(asctime)s %(levelname)s %(name)s %(message)s",
+        rename_fields={"asctime": "timestamp", "levelname": "level"},
+    )
+    handler.setFormatter(formatter)
+
+    root = logging.getLogger()
+    root.handlers.clear()
+    root.addHandler(handler)
+    root.setLevel(getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO))
+
+    # Quiet noisy loggers
+    logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+    logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
--- a/backend/app/core/middleware.py
+++ b/backend/app/core/middleware.py
@@ -0,0 +1,21 @@
+import uuid
+from contextvars import ContextVar
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+
+request_id_var: ContextVar[str] = ContextVar("request_id", default="")
+
+
+class RequestIDMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request: Request, call_next):
+        rid = str(uuid.uuid4())
+        request_id_var.set(rid)
+        response: Response = await call_next(request)
+        response.headers["X-Request-ID"] = rid
+        return response
+
+
+def get_request_id() -> str:
+    return request_id_var.get()
--- a/backend/app/core/rate_limit.py
+++ b/backend/app/core/rate_limit.py
@@ -0,0 +1,39 @@
+"""In-memory sliding window rate limiter.
+
+Note: For multi-instance deployments, swap to Redis-backed implementation.
+"""
+import asyncio
+import time
+from collections import defaultdict
+
+from fastapi import Request, HTTPException, status
+
+from app.config import settings
+
+_requests: dict[str, list[float]] = defaultdict(list)
+_lock = asyncio.Lock()
+
+
+async def check_rate_limit(request: Request) -> None:
+    """Check if the request IP is within rate limits. Raises 429 if exceeded."""
+    client_ip = request.client.host if request.client else "unknown"
+    now = time.time()
+    window = settings.RATE_LIMIT_WINDOW_SECONDS
+    max_requests = settings.RATE_LIMIT_REQUESTS
+
+    async with _lock:
+        # Clean old entries
+        _requests[client_ip] = [t for t in _requests[client_ip] if t > now - window]
+
+        if len(_requests[client_ip]) >= max_requests:
+            raise HTTPException(
+                status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                detail="Too many requests. Please try again later.",
+            )
+
+        _requests[client_ip].append(now)
+
+        # Evict empty keys to prevent unbounded growth
+        stale = [ip for ip, ts in _requests.items() if not ts]
+        for ip in stale:
+            del _requests[ip]