feat: observability, per-receiver Telegram options, oversized-video fallback
Operability: - Correlation IDs end-to-end: shared dispatch_id between log lines and EventLog rows (event/watcher/scheduled/deferred/action/HA/command paths) and a new X-Request-Id middleware that normalizes inbound ids and binds request_id into log context. - dispatch_summary block merged into EventLog.details: per-target success/failure counts plus Telegram media delivered/skipped/failed and truncated error lists, so partial outcomes surface in the UI. - Diagnostic mode: admin can flip one module to DEBUG for a bounded window with auto-revert (in-memory only; setup_logging() resets on boot, lifespan reverts on shutdown). New /diagnostic-mode endpoints plus DiagnosticsCassette UI on the settings page. Telegram: - Per-receiver options: disable_notification (silent send) and message_thread_id (forum-topic routing), wired through the dispatcher via a ContextVar so all four send sites (sendMessage / sendPhoto-Video- Document / sendMediaGroup / cache-hit POST) pick them up. - send_large_videos_as_documents target setting: bypass the 50 MB sendVideo cap by falling back to sendDocument for oversized videos. - sendMediaGroup byte-budget enforcement (TELEGRAM_MAX_GROUP_TOTAL_BYTES, 45 MB) with per-item fallback on chunk failure so a stale file_id no longer silently drops a cached asset. Tests: - New: diagnostic_mode, dispatch_summary, request_correlation, telegram_media_group_partial, telegram_per_send_options. Docs: - .claude/reviews/: six-axis production-readiness review of v0.8.1. - .claude/docs/functional-review-2026-05-28.md: focused review of Telegram/Immich/logging subsystems.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Notify Bridge Server — FastAPI application entry point."""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
@@ -8,6 +9,11 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from slowapi import _rate_limit_exceeded_handler
|
||||
from slowapi.errors import RateLimitExceeded
|
||||
from slowapi.middleware import SlowAPIMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
||||
from starlette.requests import Request as StarletteRequest
|
||||
from starlette.responses import Response as StarletteResponse
|
||||
|
||||
from notify_bridge_core.log_context import bind_log_context
|
||||
|
||||
from .config import settings as _log_cfg
|
||||
from .logging_setup import setup_logging
|
||||
@@ -163,6 +169,16 @@ async def lifespan(app: FastAPI):
|
||||
_READY = False
|
||||
from .services.ha_subscription import stop_all as stop_ha_subscriptions
|
||||
await stop_ha_subscriptions()
|
||||
# Restore the DB-configured baseline level for any temporary DEBUG
|
||||
# overrides before the engine is disposed — so even a forced restart
|
||||
# leaves the world tidy and doesn't leak DEBUG state into the next
|
||||
# process (which would also be wiped by setup_logging() at boot, but
|
||||
# being explicit about shutdown is cheaper than relying on a re-init).
|
||||
from .services.diagnostic_mode import revert_all as revert_diagnostics
|
||||
try:
|
||||
await revert_diagnostics()
|
||||
except Exception: # pragma: no cover — never block shutdown on this.
|
||||
_LOGGER.exception("Failed to revert diagnostic overrides during shutdown")
|
||||
scheduler = get_scheduler()
|
||||
if scheduler.running:
|
||||
scheduler.shutdown(wait=True)
|
||||
@@ -178,9 +194,55 @@ _APP_VERSION = _resolve_version()
|
||||
app = FastAPI(title="Notify Bridge", version=_APP_VERSION, lifespan=lifespan)
|
||||
|
||||
# --- Security headers ---
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import Request as StarletteRequest
|
||||
from starlette.responses import Response as StarletteResponse
|
||||
|
||||
|
||||
# Bounded character set for accepted inbound X-Request-Id values. Anything
|
||||
# outside this is replaced with a server-generated id so a malicious header
|
||||
# can't smuggle CR/LF into log lines or break grep-by-field parsing.
|
||||
# ``:`` is intentionally excluded so an inbound value can't masquerade as a
|
||||
# server-minted ``disp:<hex>`` / ``req:<hex>`` id and confuse operator greps.
|
||||
_REQUEST_ID_MAX_LEN = 64
|
||||
_REQUEST_ID_ALLOWED = set(
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
||||
)
|
||||
|
||||
|
||||
def _normalize_request_id(raw: str | None) -> str:
|
||||
if not raw:
|
||||
return f"req:{uuid.uuid4().hex[:12]}"
|
||||
raw = raw.strip()
|
||||
if not raw or len(raw) > _REQUEST_ID_MAX_LEN:
|
||||
return f"req:{uuid.uuid4().hex[:12]}"
|
||||
if not all(c in _REQUEST_ID_ALLOWED for c in raw):
|
||||
return f"req:{uuid.uuid4().hex[:12]}"
|
||||
return raw
|
||||
|
||||
|
||||
class RequestContextMiddleware(BaseHTTPMiddleware):
|
||||
"""Bind a per-request ``request_id`` ContextVar and echo it back.
|
||||
|
||||
Reads ``X-Request-Id`` from the inbound request (so an upstream proxy
|
||||
with its own correlation system can propagate its id), falling back to
|
||||
a short random ``req:<12 hex>`` value. Always sets the same id on the
|
||||
response ``X-Request-Id`` header so the SPA can surface it for
|
||||
operator-friendly bug reports.
|
||||
|
||||
Bound via :func:`bind_log_context` so the id appears on every log line
|
||||
emitted during request handling (``[req=...]``) and is picked up by
|
||||
:func:`notify_bridge_core.log_context.enrich_details_with_correlation`
|
||||
when an ``EventLog`` row is written during the same request.
|
||||
"""
|
||||
|
||||
async def dispatch(
|
||||
self,
|
||||
request: StarletteRequest,
|
||||
call_next: RequestResponseEndpoint,
|
||||
) -> StarletteResponse:
|
||||
req_id = _normalize_request_id(request.headers.get("x-request-id"))
|
||||
with bind_log_context(request_id=req_id):
|
||||
response: StarletteResponse = await call_next(request)
|
||||
response.headers["X-Request-Id"] = req_id
|
||||
return response
|
||||
|
||||
|
||||
_CSP = (
|
||||
@@ -238,6 +300,12 @@ app.add_middleware(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Request-ID middleware is added LAST so it becomes the outermost wrapper —
|
||||
# every other middleware (CORS, rate limit, security headers) then logs with
|
||||
# the request_id already bound, and CORS preflight responses also carry the
|
||||
# X-Request-Id echo header.
|
||||
app.add_middleware(RequestContextMiddleware)
|
||||
|
||||
# Register routes — static paths before parameterized
|
||||
app.include_router(auth_router)
|
||||
app.include_router(template_vars_router)
|
||||
|
||||
Reference in New Issue
Block a user