Phase 7: Hardening — logging, security, Docker, production readiness

Backend: - Structured JSON logging (python-json-logger) with request ID correlation - RequestIDMiddleware (server-generated UUID, no client trust) - Global exception handlers: AppException, RequestValidationError, generic 500 — all return consistent {"error": {code, message, request_id}} format - Async rate limiting with lock + stale key eviction on auth endpoints - Health endpoint checks DB connectivity, returns version + status - Custom exception classes (NotFoundException, ForbiddenException, etc.) - OpenAPI docs with tag descriptions, conditional URL (disabled in production) - LOG_LEVEL, DOCS_ENABLED, RATE_LIMIT_* settings added Docker: - Backend: multi-stage build (builder + runtime), non-root user, HEALTHCHECK - Frontend: removed dead user, HEALTHCHECK directive - docker-compose: restart policies, healthchecks, Redis service, named volumes for uploads/PDFs, rate limit env vars forwarded - Alembic migrations run only in Dockerfile CMD (removed from lifespan) Nginx: - server_tokens off - CSP, Referrer-Policy, Permissions-Policy headers - HSTS ready (commented, enable with TLS) Config & Docs: - .env.production.example with production-ready settings - CLAUDE.md project conventions (structure, workflow, naming, how-to) - .env.example updated with new variables Review fixes applied: - Rate limiter: async lock prevents race condition, stale key eviction - Request ID: always server-generated (no log injection) - Removed duplicate alembic migration from lifespan - Removed dead app user from frontend Dockerfile - Health check logs DB errors - Rate limit env vars forwarded in docker-compose Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 14:52:21 +03:00
parent fed6a3df1b
commit 4cbce89129
18 changed files with 485 additions and 15 deletions
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,15 +1,34 @@
-FROM python:3.12-slim
+FROM python:3.12-slim AS builder

-WORKDIR /app
+WORKDIR /build

 RUN apt-get update && apt-get install -y --no-install-recommends \
-    gcc libpq-dev \
-    libpango-1.0-0 libcairo2 libgdk-pixbuf-2.0-0 libffi-dev \
+    gcc libpq-dev libffi-dev \
    && rm -rf /var/lib/apt/lists/*

+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
 COPY pyproject.toml .
 RUN pip install --no-cache-dir .

-COPY . .
+FROM python:3.12-slim
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libpq5 libpango-1.0-0 libcairo2 libgdk-pixbuf-2.0-0 curl \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN addgroup --system app && adduser --system --ingroup app app
+
+COPY --from=builder /opt/venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+WORKDIR /app
+COPY --chown=app:app . .
+
+USER app
+
+HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1

 CMD ["sh", "-c", "alembic upgrade head && uvicorn app.main:app --host 0.0.0.0 --port 8000"]
--- a/backend/app/api/v1/auth.py
+++ b/backend/app/api/v1/auth.py
@@ -14,6 +14,7 @@ from app.schemas.auth import (
    UserResponse,
    RegisterRequest,
 )
+from app.core.rate_limit import check_rate_limit
 from app.services import auth_service

 router = APIRouter(prefix="/auth", tags=["auth"])
@@ -25,6 +26,7 @@ async def register(
    request: Request,
    db: Annotated[AsyncSession, Depends(get_db)],
 ):
+    await check_rate_limit(request)
    from app.services.setting_service import get_setting_value
    registration_enabled = await get_setting_value(db, "self_registration_enabled", True)
    if not registration_enabled:
@@ -45,6 +47,7 @@ async def login(
    request: Request,
    db: Annotated[AsyncSession, Depends(get_db)],
 ):
+    await check_rate_limit(request)
    return await auth_service.login_user(
        db,
        email=data.email,
--- a/backend/app/api/v1/router.py
+++ b/backend/app/api/v1/router.py
@@ -25,6 +25,25 @@ api_v1_router.include_router(ws_router)
 api_v1_router.include_router(pdf_router)


-@api_v1_router.get("/health")
+@api_v1_router.get("/health", tags=["health"])
 async def health():
-    return {"status": "ok"}
+    from sqlalchemy import text
+    from app.database import async_session_factory
+
+    db_status = "ok"
+    try:
+        async with async_session_factory() as db:
+            await db.execute(text("SELECT 1"))
+    except Exception:
+        import logging
+        logging.getLogger(__name__).warning("Health check DB error", exc_info=True)
+        db_status = "error"
+
+    status_val = "ok" if db_status == "ok" else "degraded"
+    status_code = 200 if status_val == "ok" else 503
+
+    from fastapi.responses import JSONResponse
+    return JSONResponse(
+        status_code=status_code,
+        content={"status": status_val, "db": db_status, "version": "0.1.0"},
+    )
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -18,6 +18,11 @@ class Settings(BaseSettings):
    UPLOAD_DIR: str = "/data/uploads"
    MAX_UPLOAD_SIZE_MB: int = 20

+    LOG_LEVEL: str = "INFO"
+    DOCS_ENABLED: bool = True
+    RATE_LIMIT_REQUESTS: int = 20
+    RATE_LIMIT_WINDOW_SECONDS: int = 60
+
    FIRST_ADMIN_EMAIL: str = "admin@example.com"
    FIRST_ADMIN_USERNAME: str = "admin"
    FIRST_ADMIN_PASSWORD: str = "changeme_admin_password"
--- a/backend/app/core/exceptions.py
+++ b/backend/app/core/exceptions.py
@@ -0,0 +1,25 @@
+class AppException(Exception):
+    def __init__(self, status_code: int = 500, code: str = "INTERNAL_ERROR", detail: str = "An error occurred"):
+        self.status_code = status_code
+        self.code = code
+        self.detail = detail
+
+
+class NotFoundException(AppException):
+    def __init__(self, detail: str = "Resource not found"):
+        super().__init__(status_code=404, code="NOT_FOUND", detail=detail)
+
+
+class ForbiddenException(AppException):
+    def __init__(self, detail: str = "Access denied"):
+        super().__init__(status_code=403, code="FORBIDDEN", detail=detail)
+
+
+class ValidationException(AppException):
+    def __init__(self, detail: str = "Validation error"):
+        super().__init__(status_code=422, code="VALIDATION_ERROR", detail=detail)
+
+
+class RateLimitException(AppException):
+    def __init__(self, detail: str = "Too many requests"):
+        super().__init__(status_code=429, code="RATE_LIMIT_EXCEEDED", detail=detail)
--- a/backend/app/core/logging.py
+++ b/backend/app/core/logging.py
@@ -0,0 +1,24 @@
+import logging
+import sys
+
+from pythonjsonlogger import jsonlogger
+
+from app.config import settings
+
+
+def setup_logging():
+    handler = logging.StreamHandler(sys.stdout)
+    formatter = jsonlogger.JsonFormatter(
+        fmt="%(asctime)s %(levelname)s %(name)s %(message)s",
+        rename_fields={"asctime": "timestamp", "levelname": "level"},
+    )
+    handler.setFormatter(formatter)
+
+    root = logging.getLogger()
+    root.handlers.clear()
+    root.addHandler(handler)
+    root.setLevel(getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO))
+
+    # Quiet noisy loggers
+    logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+    logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
--- a/backend/app/core/middleware.py
+++ b/backend/app/core/middleware.py
@@ -0,0 +1,21 @@
+import uuid
+from contextvars import ContextVar
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+
+request_id_var: ContextVar[str] = ContextVar("request_id", default="")
+
+
+class RequestIDMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request: Request, call_next):
+        rid = str(uuid.uuid4())
+        request_id_var.set(rid)
+        response: Response = await call_next(request)
+        response.headers["X-Request-ID"] = rid
+        return response
+
+
+def get_request_id() -> str:
+    return request_id_var.get()
--- a/backend/app/core/rate_limit.py
+++ b/backend/app/core/rate_limit.py
@@ -0,0 +1,39 @@
+"""In-memory sliding window rate limiter.
+
+Note: For multi-instance deployments, swap to Redis-backed implementation.
+"""
+import asyncio
+import time
+from collections import defaultdict
+
+from fastapi import Request, HTTPException, status
+
+from app.config import settings
+
+_requests: dict[str, list[float]] = defaultdict(list)
+_lock = asyncio.Lock()
+
+
+async def check_rate_limit(request: Request) -> None:
+    """Check if the request IP is within rate limits. Raises 429 if exceeded."""
+    client_ip = request.client.host if request.client else "unknown"
+    now = time.time()
+    window = settings.RATE_LIMIT_WINDOW_SECONDS
+    max_requests = settings.RATE_LIMIT_REQUESTS
+
+    async with _lock:
+        # Clean old entries
+        _requests[client_ip] = [t for t in _requests[client_ip] if t > now - window]
+
+        if len(_requests[client_ip]) >= max_requests:
+            raise HTTPException(
+                status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                detail="Too many requests. Please try again later.",
+            )
+
+        _requests[client_ip].append(now)
+
+        # Evict empty keys to prevent unbounded growth
+        stale = [ip for ip, ts in _requests.items() if not ts]
+        for ip in stale:
+            del _requests[ip]
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,34 +1,59 @@
+import logging
 from contextlib import asynccontextmanager

-from fastapi import FastAPI
+from fastapi import FastAPI, Request
+from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse

 from app.config import settings
+from app.core.exceptions import AppException
+from app.core.logging import setup_logging
+from app.core.middleware import RequestIDMiddleware, get_request_id
+
+logger = logging.getLogger(__name__)


@asynccontextmanager
 async def lifespan(app: FastAPI):
-    from alembic import command
-    from alembic.config import Config
-
-    alembic_cfg = Config("alembic.ini")
-    command.upgrade(alembic_cfg, "head")
+    setup_logging()
+    logger.info("Starting AI Assistant API")

+    # Note: Alembic migrations run via Dockerfile CMD before uvicorn starts
    from app.services.scheduler_service import start_scheduler, shutdown_scheduler
    start_scheduler()

    yield

    shutdown_scheduler()
+    logger.info("Shutting down AI Assistant API")


 def create_app() -> FastAPI:
    app = FastAPI(
        title="AI Assistant API",
+        description="Personal AI health assistant with document management, chat, and notifications.",
        version="0.1.0",
        lifespan=lifespan,
+        docs_url="/api/docs" if settings.DOCS_ENABLED else None,
+        redoc_url="/api/redoc" if settings.DOCS_ENABLED else None,
+        openapi_url="/api/openapi.json" if settings.DOCS_ENABLED else None,
+        openapi_tags=[
+            {"name": "auth", "description": "Authentication and registration"},
+            {"name": "chats", "description": "AI chat conversations"},
+            {"name": "documents", "description": "Health document management"},
+            {"name": "memory", "description": "Health memory entries"},
+            {"name": "skills", "description": "AI specialist skills"},
+            {"name": "notifications", "description": "User notifications"},
+            {"name": "pdf", "description": "PDF report generation"},
+            {"name": "admin", "description": "Admin management"},
+            {"name": "users", "description": "User profile and context"},
+            {"name": "websocket", "description": "WebSocket endpoints"},
+        ],
    )

+    # Middleware (order matters: outermost first)
+    app.add_middleware(RequestIDMiddleware)
    app.add_middleware(
        CORSMiddleware,
        allow_origins=settings.BACKEND_CORS_ORIGINS,
@@ -37,6 +62,48 @@ def create_app() -> FastAPI:
        allow_headers=["*"],
    )

+    # Exception handlers
+    @app.exception_handler(AppException)
+    async def app_exception_handler(request: Request, exc: AppException):
+        return JSONResponse(
+            status_code=exc.status_code,
+            content={
+                "error": {
+                    "code": exc.code,
+                    "message": exc.detail,
+                    "request_id": get_request_id(),
+                }
+            },
+        )
+
+    @app.exception_handler(RequestValidationError)
+    async def validation_exception_handler(request: Request, exc: RequestValidationError):
+        return JSONResponse(
+            status_code=422,
+            content={
+                "error": {
+                    "code": "VALIDATION_ERROR",
+                    "message": "Request validation failed",
+                    "details": exc.errors(),
+                    "request_id": get_request_id(),
+                }
+            },
+        )
+
+    @app.exception_handler(Exception)
+    async def generic_exception_handler(request: Request, exc: Exception):
+        logger.exception("Unhandled exception", extra={"request_id": get_request_id()})
+        return JSONResponse(
+            status_code=500,
+            content={
+                "error": {
+                    "code": "INTERNAL_ERROR",
+                    "message": "An internal error occurred",
+                    "request_id": get_request_id(),
+                }
+            },
+        )
+
    from app.api.v1.router import api_v1_router
    app.include_router(api_v1_router)

--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
    "apscheduler>=3.10.0",
    "weasyprint>=62.0",
    "jinja2>=3.1.0",
+    "python-json-logger>=2.0.0",
 ]

 [project.optional-dependencies]