f50d465c0e
Boot-time logging was a three-line basicConfig stub with no timestamps, no correlation, and silent drops at every layer of the Telegram send path — a /random command that delivered text but no media left zero evidence in the log. This replaces the setup and closes every silent drop encountered end-to-end. New infrastructure: - notify_bridge_core.log_context: request_id/command/chat_id/bot_id/dispatch_id ContextVars with a bind_log_context() context manager so deep call sites (TelegramClient, NotificationDispatcher) inherit the correlation tag without threading args through. - notify_bridge_server.logging_setup: dictConfig-based setup with a LogRecordFactory that tags every record, a SecretMaskingFilter that redacts /botN:TOKEN plus Authorization/x-api-key/password/secret in messages AND tracebacks, a JSON formatter for aggregators, text formatter with grep-friendly [req=... cmd=... bot=... chat=... disp=...] prefix, and default dampening for sqlalchemy/aiohttp/apscheduler/urllib3/PIL. Runtime control: - NOTIFY_BRIDGE_LOG_LEVEL / _FORMAT / _LEVELS env vars (boot). - DB-backed log_level / log_format / log_levels AppSettings, applied on boot after migrations and live via apply_log_levels() when edited in the settings UI (format still requires restart, logs a WARN). - Frontend settings page gains a Logging card (level dropdown, format dropdown, per-module overrides); en/ru i18n keys added. Call-site fixes (/random media-group blind spot and adjacent): - TelegramClient._fetch_asset: every silent drop now WARN-logs with reason (missing url, HTTP non-200, size/dimension limits, ClientError). - TelegramClient._send_media_group: WARN on "chunk had N items but 0 usable", ERROR on sendMediaGroup non-ok/transport with full context; returns success=False + "no_items_delivered" instead of success=True with an empty message_ids list so callers can distinguish. - TelegramClient.send_message / _upload_media / _send_from_cache: ERROR on non-ok + transport failures with status/code/desc; DEBUG for cache-hit fallbacks. - NotificationDispatcher.dispatch: generates a dispatch_id, binds it, logs start/finish with failure count, uses exc_info for target failures. - commands/handler: missing/failed templates -> ERROR + exc_info; send_reply and send_media_group errors upgraded WARNING -> ERROR with chat/error_code context; rate-limit and truncation cases logged with full context. - commands/webhook and services/telegram_poller: bind_log_context(request_id =tg:<update_id>, command, chat_id, bot_id), INFO on receive/dispatch/ completion with duration, exc_info on raise, INFO when commands disabled. - commands/immich: INFO when album scope is empty; WARN per asset dropped from media payload and a summary WARN when "N assets in, 0 out".
325 lines
12 KiB
Python
325 lines
12 KiB
Python
"""Production-grade logging configuration.
|
|
|
|
Installs one ``dictConfig`` layout with:
|
|
|
|
* A ``LogRecordFactory`` that pulls request-scoped identifiers from
|
|
``notify_bridge_core.log_context`` onto every record, so logs can be
|
|
filtered/correlated by ``request_id``, ``command``, ``chat_id``,
|
|
``bot_id``, ``dispatch_id`` without each call site passing them.
|
|
* A ``SecretMaskingFilter`` that redacts Telegram bot tokens and common
|
|
``Authorization`` / ``x-api-key`` headers so an accidental ``repr`` or
|
|
dumped request doesn't leak credentials into the log aggregator.
|
|
* A text formatter (default) or a JSON formatter (one line per record)
|
|
selectable via ``NOTIFY_BRIDGE_LOG_FORMAT`` / app setting.
|
|
|
|
Levels are configurable three ways (later wins):
|
|
|
|
1. ``NOTIFY_BRIDGE_LOG_LEVEL`` env var (root) plus
|
|
``NOTIFY_BRIDGE_LOG_LEVELS`` (``mod=LEVEL,mod2=LEVEL``).
|
|
2. DB ``AppSetting`` rows ``log_level`` / ``log_levels`` / ``log_format``,
|
|
applied after migrations during startup.
|
|
3. Live edits via the settings API — ``apply_log_levels()`` updates
|
|
existing loggers in place without a server restart.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import logging.config
|
|
import re
|
|
import sys
|
|
from typing import Any
|
|
|
|
from notify_bridge_core.log_context import (
|
|
bot_id_var,
|
|
chat_id_var,
|
|
command_var,
|
|
dispatch_id_var,
|
|
request_id_var,
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Secret masking
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Telegram bot tokens: /bot<digits>:<alnum with dashes/underscores>
|
|
_TELEGRAM_TOKEN_RE = re.compile(r"/bot\d+:[A-Za-z0-9_-]{20,}")
|
|
|
|
# Header-style secrets: Authorization: Bearer xxx, x-api-key=xxx, etc.
|
|
# Only matches reasonably long tokens so short legitimate values don't trip.
|
|
_HEADER_SECRET_RE = re.compile(
|
|
r"(?i)(authorization|x-api-key|api[_-]?key|password|secret|access[_-]?token|refresh[_-]?token)"
|
|
r"([\"']?\s*[:=]\s*[\"']?)"
|
|
r"([A-Za-z0-9._+/=\-]{12,})"
|
|
)
|
|
|
|
|
|
def _mask(text: str) -> str:
|
|
redacted = _TELEGRAM_TOKEN_RE.sub("/bot***", text)
|
|
redacted = _HEADER_SECRET_RE.sub(r"\1\2***", redacted)
|
|
return redacted
|
|
|
|
|
|
class SecretMaskingFilter(logging.Filter):
|
|
"""Redact likely secrets from every log message before it's emitted.
|
|
|
|
Covers three surfaces where a leaked token can end up in the log:
|
|
the formatted message, a cached exception traceback (``exc_text``),
|
|
and a cached stack frame dump (``stack_info``). The formatter still
|
|
expands ``exc_info`` for us when ``exc_text`` is None, so we also
|
|
pre-render + mask on first emission.
|
|
"""
|
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
try:
|
|
msg = record.getMessage()
|
|
except Exception:
|
|
return True
|
|
redacted = _mask(msg)
|
|
if redacted != msg:
|
|
# Replace the formatted message and drop args so the handler
|
|
# doesn't re-format with the original values.
|
|
record.msg = redacted
|
|
record.args = ()
|
|
|
|
if record.exc_info and not record.exc_text:
|
|
# Pre-render so we can mask before the formatter caches it.
|
|
fmt = logging.Formatter()
|
|
record.exc_text = fmt.formatException(record.exc_info)
|
|
if record.exc_text:
|
|
record.exc_text = _mask(record.exc_text)
|
|
if record.stack_info:
|
|
record.stack_info = _mask(record.stack_info)
|
|
return True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Record factory — injects context identifiers onto every record
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_CONTEXT_FIELDS = ("request_id", "command", "chat_id", "bot_id", "dispatch_id")
|
|
_PLACEHOLDER = "-"
|
|
|
|
_original_factory = logging.getLogRecordFactory()
|
|
|
|
|
|
def _context_record_factory(*args: Any, **kwargs: Any) -> logging.LogRecord:
|
|
record = _original_factory(*args, **kwargs)
|
|
record.request_id = request_id_var.get() or _PLACEHOLDER
|
|
record.command = command_var.get() or _PLACEHOLDER
|
|
record.chat_id = chat_id_var.get() or _PLACEHOLDER
|
|
bid = bot_id_var.get()
|
|
record.bot_id = str(bid) if bid is not None else _PLACEHOLDER
|
|
record.dispatch_id = dispatch_id_var.get() or _PLACEHOLDER
|
|
return record
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# JSON formatter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class JsonFormatter(logging.Formatter):
|
|
"""Emit one JSON object per log record."""
|
|
|
|
def format(self, record: logging.LogRecord) -> str:
|
|
payload: dict[str, Any] = {
|
|
"ts": self.formatTime(record, "%Y-%m-%dT%H:%M:%S") + f".{int(record.msecs):03d}",
|
|
"level": record.levelname,
|
|
"logger": record.name,
|
|
"module": record.module,
|
|
"line": record.lineno,
|
|
"msg": record.getMessage(),
|
|
}
|
|
for field in _CONTEXT_FIELDS:
|
|
val = getattr(record, field, None)
|
|
if val and val != _PLACEHOLDER:
|
|
payload[field] = val
|
|
# Prefer the pre-masked exc_text cached by SecretMaskingFilter over
|
|
# re-formatting from exc_info, which would bypass the mask.
|
|
if record.exc_text:
|
|
payload["exc"] = record.exc_text
|
|
elif record.exc_info:
|
|
payload["exc"] = self.formatException(record.exc_info)
|
|
if record.stack_info:
|
|
payload["stack"] = record.stack_info
|
|
return json.dumps(payload, ensure_ascii=False, default=str)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Text formatter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Keeps all context fields on one line so grep-by-field works. Empty values
|
|
# are rendered as "-" by the record factory to avoid KeyError if a record
|
|
# arrives without the filter.
|
|
_TEXT_FORMAT = (
|
|
"%(asctime)s %(levelname)-7s %(name)s:%(lineno)d "
|
|
"[req=%(request_id)s cmd=%(command)s bot=%(bot_id)s chat=%(chat_id)s disp=%(dispatch_id)s] "
|
|
"%(message)s"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Level override parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_VALID_LEVELS = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "NOTSET"}
|
|
|
|
|
|
def parse_level_overrides(raw: str) -> dict[str, str]:
|
|
"""Parse ``module=LEVEL,module2=LEVEL`` into a mapping of validated levels.
|
|
|
|
Invalid entries (bad format, unknown level) are silently dropped —
|
|
a malformed env var or DB setting must not crash boot.
|
|
"""
|
|
result: dict[str, str] = {}
|
|
for chunk in (raw or "").split(","):
|
|
chunk = chunk.strip()
|
|
if not chunk or "=" not in chunk:
|
|
continue
|
|
mod, _, lvl = chunk.partition("=")
|
|
mod = mod.strip()
|
|
lvl = lvl.strip().upper()
|
|
if not mod or lvl not in _VALID_LEVELS:
|
|
continue
|
|
result[mod] = lvl
|
|
return result
|
|
|
|
|
|
def _normalize_level(level: str | None, default: str = "INFO") -> str:
|
|
if not level:
|
|
return default
|
|
up = level.strip().upper()
|
|
return up if up in _VALID_LEVELS else default
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Setup + live apply
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Libraries we quiet by default — noisy at DEBUG and almost always irrelevant
|
|
# to a service issue. Override via LOG_LEVELS=sqlalchemy.engine=DEBUG when
|
|
# actually debugging.
|
|
_NOISY_LIBRARY_DEFAULTS: dict[str, str] = {
|
|
"sqlalchemy": "WARNING",
|
|
"sqlalchemy.engine": "WARNING",
|
|
"sqlalchemy.pool": "WARNING",
|
|
"aiohttp": "WARNING",
|
|
"aiohttp.access": "WARNING",
|
|
"aiohttp.client": "WARNING",
|
|
"aiohttp.server": "WARNING",
|
|
"apscheduler": "WARNING",
|
|
"apscheduler.scheduler": "WARNING",
|
|
"apscheduler.executors.default": "WARNING",
|
|
"urllib3": "WARNING",
|
|
"asyncio": "WARNING",
|
|
"httpx": "WARNING",
|
|
"httpcore": "WARNING",
|
|
"PIL": "WARNING",
|
|
"uvicorn.access": "WARNING",
|
|
}
|
|
|
|
|
|
def setup_logging(
|
|
*,
|
|
level: str = "INFO",
|
|
fmt: str = "text",
|
|
per_module_levels: str = "",
|
|
) -> None:
|
|
"""Install the logging configuration. Safe to call more than once.
|
|
|
|
Args:
|
|
level: Root log level (applied to ``notify_bridge_*`` loggers).
|
|
fmt: ``"text"`` (default) or ``"json"``.
|
|
per_module_levels: ``mod=LEVEL,mod2=LEVEL`` overrides. Wins over the
|
|
root level for the listed loggers.
|
|
"""
|
|
root_level = _normalize_level(level, "INFO")
|
|
overrides = parse_level_overrides(per_module_levels)
|
|
|
|
# Install the context-aware record factory (idempotent — setting the same
|
|
# factory twice is fine because ``_original_factory`` is captured at
|
|
# import time).
|
|
logging.setLogRecordFactory(_context_record_factory)
|
|
|
|
if fmt == "json":
|
|
formatters = {"default": {"()": f"{__name__}.JsonFormatter"}}
|
|
else:
|
|
formatters = {
|
|
"default": {
|
|
"format": _TEXT_FORMAT,
|
|
"datefmt": "%Y-%m-%d %H:%M:%S",
|
|
}
|
|
}
|
|
|
|
# Start with noisy-library defaults, then layer user overrides on top so
|
|
# the user can raise them to DEBUG when actually debugging.
|
|
loggers: dict[str, dict[str, Any]] = {}
|
|
for mod, lvl in _NOISY_LIBRARY_DEFAULTS.items():
|
|
loggers[mod] = {"level": lvl, "propagate": True}
|
|
# App loggers follow the root level unless overridden.
|
|
loggers["notify_bridge_server"] = {"level": root_level, "propagate": True}
|
|
loggers["notify_bridge_core"] = {"level": root_level, "propagate": True}
|
|
# User overrides win.
|
|
for mod, lvl in overrides.items():
|
|
loggers[mod] = {"level": lvl, "propagate": True}
|
|
|
|
config: dict[str, Any] = {
|
|
"version": 1,
|
|
"disable_existing_loggers": False,
|
|
"filters": {
|
|
"mask_secrets": {"()": f"{__name__}.SecretMaskingFilter"},
|
|
},
|
|
"formatters": formatters,
|
|
"handlers": {
|
|
"stderr": {
|
|
"class": "logging.StreamHandler",
|
|
"stream": sys.stderr,
|
|
"formatter": "default",
|
|
"filters": ["mask_secrets"],
|
|
},
|
|
},
|
|
"root": {
|
|
"level": root_level,
|
|
"handlers": ["stderr"],
|
|
},
|
|
"loggers": loggers,
|
|
}
|
|
logging.config.dictConfig(config)
|
|
|
|
|
|
def apply_log_levels(
|
|
*,
|
|
level: str | None,
|
|
per_module_levels: str | None,
|
|
) -> None:
|
|
"""Update existing logger levels in-place without rebuilding handlers.
|
|
|
|
Called when an admin changes the log settings at runtime. Setting
|
|
``level`` to None leaves the root untouched; setting it to a valid
|
|
level applies to ``notify_bridge_server`` / ``notify_bridge_core``.
|
|
|
|
``per_module_levels`` is treated as an exclusive set — loggers that
|
|
previously had an override but aren't in the new string are reset
|
|
*toward* the root level so a removed override actually takes effect.
|
|
"""
|
|
if level:
|
|
lvl = _normalize_level(level, "INFO")
|
|
logging.getLogger("notify_bridge_server").setLevel(lvl)
|
|
logging.getLogger("notify_bridge_core").setLevel(lvl)
|
|
# NOTSET on root is almost never what you want — keep root where it is
|
|
# unless the caller explicitly set something.
|
|
logging.getLogger().setLevel(lvl)
|
|
|
|
if per_module_levels is not None:
|
|
overrides = parse_level_overrides(per_module_levels)
|
|
# Apply new overrides
|
|
for mod, lvl in overrides.items():
|
|
logging.getLogger(mod).setLevel(lvl)
|
|
# Reset noisy libs that aren't in the new overrides back to defaults
|
|
for mod, default_lvl in _NOISY_LIBRARY_DEFAULTS.items():
|
|
if mod not in overrides:
|
|
logging.getLogger(mod).setLevel(default_lvl)
|