Files
notify-bridge/packages/server/src/notify_bridge_server/api/app_settings.py
T
alexei.dolgolyov f50d465c0e feat(logging): production-grade logging with context vars, secret masking, and runtime level control
Boot-time logging was a three-line basicConfig stub with no timestamps, no
correlation, and silent drops at every layer of the Telegram send path — a
/random command that delivered text but no media left zero evidence in the
log. This replaces the setup and closes every silent drop encountered end-to-end.

New infrastructure:
- notify_bridge_core.log_context: request_id/command/chat_id/bot_id/dispatch_id
  ContextVars with a bind_log_context() context manager so deep call sites
  (TelegramClient, NotificationDispatcher) inherit the correlation tag without
  threading args through.
- notify_bridge_server.logging_setup: dictConfig-based setup with a
  LogRecordFactory that tags every record, a SecretMaskingFilter that redacts
  /botN:TOKEN plus Authorization/x-api-key/password/secret in messages AND
  tracebacks, a JSON formatter for aggregators, text formatter with grep-friendly
  [req=... cmd=... bot=... chat=... disp=...] prefix, and default dampening
  for sqlalchemy/aiohttp/apscheduler/urllib3/PIL.

Runtime control:
- NOTIFY_BRIDGE_LOG_LEVEL / _FORMAT / _LEVELS env vars (boot).
- DB-backed log_level / log_format / log_levels AppSettings, applied on
  boot after migrations and live via apply_log_levels() when edited in
  the settings UI (format still requires restart, logs a WARN).
- Frontend settings page gains a Logging card (level dropdown, format
  dropdown, per-module overrides); en/ru i18n keys added.

Call-site fixes (/random media-group blind spot and adjacent):
- TelegramClient._fetch_asset: every silent drop now WARN-logs with reason
  (missing url, HTTP non-200, size/dimension limits, ClientError).
- TelegramClient._send_media_group: WARN on "chunk had N items but 0 usable",
  ERROR on sendMediaGroup non-ok/transport with full context; returns
  success=False + "no_items_delivered" instead of success=True with an empty
  message_ids list so callers can distinguish.
- TelegramClient.send_message / _upload_media / _send_from_cache: ERROR on
  non-ok + transport failures with status/code/desc; DEBUG for cache-hit
  fallbacks.
- NotificationDispatcher.dispatch: generates a dispatch_id, binds it, logs
  start/finish with failure count, uses exc_info for target failures.
- commands/handler: missing/failed templates -> ERROR + exc_info; send_reply
  and send_media_group errors upgraded WARNING -> ERROR with chat/error_code
  context; rate-limit and truncation cases logged with full context.
- commands/webhook and services/telegram_poller: bind_log_context(request_id
  =tg:<update_id>, command, chat_id, bot_id), INFO on receive/dispatch/
  completion with duration, exc_info on raise, INFO when commands disabled.
- commands/immich: INFO when album scope is empty; WARN per asset dropped
  from media payload and a summary WARN when "N assets in, 0 out".
2026-04-23 14:41:26 +03:00

244 lines
9.6 KiB
Python

"""App-level settings API (admin only)."""
import logging
import os
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from ..auth.dependencies import get_current_user, require_admin
from ..database.engine import get_session
from ..database.models import AppSetting, TelegramBot, User
_LOGGER = logging.getLogger(__name__)
router = APIRouter(prefix="/api/settings", tags=["settings"])
# Keys exposed to the frontend with their env-var fallbacks
_SETTING_KEYS = {
"external_url": "NOTIFY_BRIDGE_EXTERNAL_URL",
"telegram_webhook_secret": "NOTIFY_BRIDGE_TELEGRAM_WEBHOOK_SECRET",
"telegram_cache_ttl_hours": None, # URL cache TTL; 0 disables TTL
"telegram_asset_cache_max_entries": None, # LRU cap for both caches
"supported_locales": None, # comma-separated locale codes
"timezone": "NOTIFY_BRIDGE_TIMEZONE", # IANA tz (e.g. "Europe/Warsaw"); empty = UTC
# Logging — applied live via apply_log_levels() when changed.
"log_level": "NOTIFY_BRIDGE_LOG_LEVEL", # DEBUG/INFO/WARNING/ERROR
"log_format": "NOTIFY_BRIDGE_LOG_FORMAT", # text|json (requires restart to switch)
"log_levels": "NOTIFY_BRIDGE_LOG_LEVELS", # module=LEVEL,module2=LEVEL
}
_DEFAULTS = {
"external_url": "",
"telegram_webhook_secret": "",
# 720h = 30d. URL cache only; asset cache uses thumbhash validation
# (content-addressable) and ignores TTL entirely.
"telegram_cache_ttl_hours": "720",
"telegram_asset_cache_max_entries": "5000",
"supported_locales": "en,ru",
"timezone": "UTC",
"log_level": "INFO",
"log_format": "text",
"log_levels": "",
}
# Settings whose changes require dropping in-memory Telegram caches so the
# next dispatch rebuilds them with the new parameters. Files are preserved.
_CACHE_SETTING_KEYS = {"telegram_cache_ttl_hours", "telegram_asset_cache_max_entries"}
# Settings that change logging behaviour. ``log_level`` + ``log_levels`` apply
# live via apply_log_levels(); ``log_format`` requires a restart because
# changing it means swapping the handler formatter entirely.
_LOG_SETTING_KEYS = {"log_level", "log_levels", "log_format"}
async def get_setting(session: AsyncSession, key: str) -> str:
"""Read a setting from DB, falling back to env var then default."""
row = await session.get(AppSetting, key)
if row and row.value:
return row.value
env_key = _SETTING_KEYS.get(key)
if env_key:
env_val = os.environ.get(env_key, "")
if env_val:
return env_val
return _DEFAULTS.get(key, "")
class SettingsUpdate(BaseModel):
# Numeric fields declared as int|str so clients can send either form.
# Svelte's bind:value on <input type="number"> coerces to a JS number,
# so the frontend sends ints for these; older/manual clients may send
# strings. We normalize to str before persisting.
external_url: str | None = None
telegram_webhook_secret: str | None = None
telegram_cache_ttl_hours: int | str | None = None
telegram_asset_cache_max_entries: int | str | None = None
supported_locales: str | None = None
timezone: str | None = None
log_level: str | None = None
log_format: str | None = None
log_levels: str | None = None
@router.get("")
async def get_settings(
user: User = Depends(require_admin),
session: AsyncSession = Depends(get_session),
):
"""Return all app settings."""
result = {}
for key in _SETTING_KEYS:
value = await get_setting(session, key)
if key == "telegram_webhook_secret" and value:
result[key] = f"***{value[-4:]}" if len(value) > 4 else "***"
else:
result[key] = value
return result
@router.put("")
async def update_settings(
body: SettingsUpdate,
user: User = Depends(require_admin),
session: AsyncSession = Depends(get_session),
):
"""Update app settings (admin). Re-registers webhooks when base URL changes."""
old_base_url = await get_setting(session, "external_url")
old_secret = await get_setting(session, "telegram_webhook_secret")
old_cache_values = {k: await get_setting(session, k) for k in _CACHE_SETTING_KEYS}
old_timezone = await get_setting(session, "timezone")
old_log_values = {k: await get_setting(session, k) for k in _LOG_SETTING_KEYS}
for key in _SETTING_KEYS:
value = getattr(body, key, None)
if value is None:
continue
value_str = str(value)
# GET masks the webhook secret as "***<last4>" so the real value is
# never exposed to the frontend. If the client sends the mask back
# (which happens on every save, since bind:value holds whatever GET
# returned), treat it as "unchanged" — otherwise we'd overwrite the
# real secret with its mask, silently breaking webhook HMAC.
if key == "telegram_webhook_secret" and value_str.startswith("***"):
continue
row = await session.get(AppSetting, key)
if row:
row.value = value_str
else:
row = AppSetting(key=key, value=value_str)
session.add(row)
await session.commit()
# Drop in-memory caches if any cache-tuning setting actually changed, so
# the next dispatch rebuilds them with the new parameters. Files survive.
cache_changed = False
for key in _CACHE_SETTING_KEYS:
if await get_setting(session, key) != old_cache_values[key]:
cache_changed = True
break
if cache_changed:
from ..services.watcher import reset_telegram_caches_in_memory
await reset_telegram_caches_in_memory()
new_base_url = await get_setting(session, "external_url")
new_secret = await get_setting(session, "telegram_webhook_secret")
new_timezone = await get_setting(session, "timezone")
new_log_values = {k: await get_setting(session, k) for k in _LOG_SETTING_KEYS}
# Apply live log-level changes (log_format still needs a restart).
if (new_log_values["log_level"] != old_log_values["log_level"]
or new_log_values["log_levels"] != old_log_values["log_levels"]):
from ..logging_setup import apply_log_levels
apply_log_levels(
level=new_log_values["log_level"] or None,
per_module_levels=new_log_values["log_levels"],
)
_LOGGER.info(
"Log levels updated: root=%s overrides=%r",
new_log_values["log_level"], new_log_values["log_levels"],
)
if new_log_values["log_format"] != old_log_values["log_format"]:
_LOGGER.warning(
"log_format changed from %r to %r — restart the server for it to take effect",
old_log_values["log_format"], new_log_values["log_format"],
)
# Cron triggers freeze their timezone at construction time, so a tz change
# has no effect until the jobs are rebuilt — do that here, before we
# return success, so the UI reflects the actual schedule immediately.
if new_timezone != old_timezone:
from ..services.scheduler import reschedule_cron_jobs_for_timezone_change
await reschedule_cron_jobs_for_timezone_change()
# Update webhook secret in the webhook handler module
if new_secret != old_secret:
from ..commands.webhook import set_webhook_secret
set_webhook_secret(new_secret or None)
# Re-register webhooks for all bots in webhook mode if URL or secret changed
if new_base_url and (new_base_url != old_base_url or new_secret != old_secret):
await _reregister_webhooks(session, new_base_url, new_secret)
result = {}
for key in _SETTING_KEYS:
result[key] = await get_setting(session, key)
return result
@router.get("/telegram-cache/stats")
async def telegram_cache_stats(
user: User = Depends(require_admin),
):
"""Return counts and sizes for the Telegram file_id caches."""
from ..services.watcher import get_telegram_cache_stats
return await get_telegram_cache_stats()
@router.post("/telegram-cache/clear")
async def clear_telegram_cache(
user: User = Depends(require_admin),
):
"""Clear the Telegram file_id cache (URL and asset) from disk and memory."""
from ..services.watcher import clear_telegram_caches
result = await clear_telegram_caches()
return result
@router.get("/locales")
async def get_supported_locales(
user: User = Depends(get_current_user),
session: AsyncSession = Depends(get_session),
):
"""Return list of supported template locales (available to all users)."""
raw = await get_setting(session, "supported_locales")
locales = [loc.strip() for loc in raw.split(",") if loc.strip()]
return locales or ["en"]
async def _reregister_webhooks(
session: AsyncSession, base_url: str, secret: str
) -> None:
"""Re-register webhooks for all bots in webhook mode."""
from ..commands.webhook import register_webhook
result = await session.exec(
select(TelegramBot).where(TelegramBot.update_mode == "webhook")
)
bots = result.all()
for bot in bots:
webhook_url = f"{base_url.rstrip('/')}/api/telegram/webhook/{bot.webhook_path_id}"
res = await register_webhook(bot.token, webhook_url, secret or None)
if res.get("success"):
_LOGGER.info("Re-registered webhook for bot %d (%s)", bot.id, bot.name)
else:
# Webhook re-register failure means the bot silently stops
# delivering updates — this is operational visibility for an
# admin, ERROR is appropriate.
_LOGGER.error(
"Failed to re-register webhook for bot %d (%s): %s",
bot.id, bot.name, res.get("error"),
)