notify-bridge/packages/server/src/notify_bridge_server/logging_setup.py

"""Production-grade logging configuration.

Installs one ``dictConfig`` layout with:

* A ``LogRecordFactory`` that pulls request-scoped identifiers from
  ``notify_bridge_core.log_context`` onto every record, so logs can be
  filtered/correlated by ``request_id``, ``command``, ``chat_id``,
  ``bot_id``, ``dispatch_id`` without each call site passing them.
* A ``SecretMaskingFilter`` that redacts Telegram bot tokens and common
  ``Authorization`` / ``x-api-key`` headers so an accidental ``repr`` or
  dumped request doesn't leak credentials into the log aggregator.
* A text formatter (default) or a JSON formatter (one line per record)
  selectable via ``NOTIFY_BRIDGE_LOG_FORMAT`` / app setting.

Levels are configurable three ways (later wins):

1. ``NOTIFY_BRIDGE_LOG_LEVEL`` env var (root) plus
   ``NOTIFY_BRIDGE_LOG_LEVELS`` (``mod=LEVEL,mod2=LEVEL``).
2. DB ``AppSetting`` rows ``log_level`` / ``log_levels`` / ``log_format``,
   applied after migrations during startup.
3. Live edits via the settings API — ``apply_log_levels()`` updates
   existing loggers in place without a server restart.
"""

from __future__ import annotations

import json
import logging
import logging.config
import re
import sys
from typing import Any

from notify_bridge_core.log_context import (
    bot_id_var,
    chat_id_var,
    command_var,
    dispatch_id_var,
    request_id_var,
)

# ---------------------------------------------------------------------------
# Secret masking
# ---------------------------------------------------------------------------

# Telegram bot tokens: /bot<digits>:<alnum with dashes/underscores>
_TELEGRAM_TOKEN_RE = re.compile(r"/bot\d+:[A-Za-z0-9_-]{20,}")

# Header-style secrets: Authorization: Bearer xxx, x-api-key=xxx, etc.
# Only matches reasonably long tokens so short legitimate values don't trip.
_HEADER_SECRET_RE = re.compile(
    r"(?i)(authorization|x-api-key|api[_-]?key|password|secret|access[_-]?token|refresh[_-]?token)"
    r"([\"']?\s*[:=]\s*[\"']?)"
    r"([A-Za-z0-9._+/=\-]{12,})"
)


def _mask(text: str) -> str:
    redacted = _TELEGRAM_TOKEN_RE.sub("/bot***", text)
    redacted = _HEADER_SECRET_RE.sub(r"\1\2***", redacted)
    return redacted


class SecretMaskingFilter(logging.Filter):
    """Redact likely secrets from every log message before it's emitted.

    Covers three surfaces where a leaked token can end up in the log:
    the formatted message, a cached exception traceback (``exc_text``),
    and a cached stack frame dump (``stack_info``). The formatter still
    expands ``exc_info`` for us when ``exc_text`` is None, so we also
    pre-render + mask on first emission.
    """

    def filter(self, record: logging.LogRecord) -> bool:
        try:
            msg = record.getMessage()
        except Exception:
            return True
        redacted = _mask(msg)
        if redacted != msg:
            # Replace the formatted message and drop args so the handler
            # doesn't re-format with the original values.
            record.msg = redacted
            record.args = ()

        if record.exc_info and not record.exc_text:
            # Pre-render so we can mask before the formatter caches it.
            fmt = logging.Formatter()
            record.exc_text = fmt.formatException(record.exc_info)
        if record.exc_text:
            record.exc_text = _mask(record.exc_text)
        if record.stack_info:
            record.stack_info = _mask(record.stack_info)
        return True


# ---------------------------------------------------------------------------
# Record factory — injects context identifiers onto every record
# ---------------------------------------------------------------------------

_CONTEXT_FIELDS = ("request_id", "command", "chat_id", "bot_id", "dispatch_id")
_PLACEHOLDER = "-"

_original_factory = logging.getLogRecordFactory()


def _context_record_factory(*args: Any, **kwargs: Any) -> logging.LogRecord:
    record = _original_factory(*args, **kwargs)
    record.request_id = request_id_var.get() or _PLACEHOLDER
    record.command = command_var.get() or _PLACEHOLDER
    record.chat_id = chat_id_var.get() or _PLACEHOLDER
    bid = bot_id_var.get()
    record.bot_id = str(bid) if bid is not None else _PLACEHOLDER
    record.dispatch_id = dispatch_id_var.get() or _PLACEHOLDER
    return record


# ---------------------------------------------------------------------------
# JSON formatter
# ---------------------------------------------------------------------------


class JsonFormatter(logging.Formatter):
    """Emit one JSON object per log record."""

    def format(self, record: logging.LogRecord) -> str:
        payload: dict[str, Any] = {
            "ts": self.formatTime(record, "%Y-%m-%dT%H:%M:%S") + f".{int(record.msecs):03d}",
            "level": record.levelname,
            "logger": record.name,
            "module": record.module,
            "line": record.lineno,
            "msg": record.getMessage(),
        }
        for field in _CONTEXT_FIELDS:
            val = getattr(record, field, None)
            if val and val != _PLACEHOLDER:
                payload[field] = val
        # Prefer the pre-masked exc_text cached by SecretMaskingFilter over
        # re-formatting from exc_info, which would bypass the mask.
        if record.exc_text:
            payload["exc"] = record.exc_text
        elif record.exc_info:
            payload["exc"] = self.formatException(record.exc_info)
        if record.stack_info:
            payload["stack"] = record.stack_info
        return json.dumps(payload, ensure_ascii=False, default=str)


# ---------------------------------------------------------------------------
# Text formatter
# ---------------------------------------------------------------------------

# Keeps all context fields on one line so grep-by-field works. Empty values
# are rendered as "-" by the record factory to avoid KeyError if a record
# arrives without the filter.
_TEXT_FORMAT = (
    "%(asctime)s %(levelname)-7s %(name)s:%(lineno)d "
    "[req=%(request_id)s cmd=%(command)s bot=%(bot_id)s chat=%(chat_id)s disp=%(dispatch_id)s] "
    "%(message)s"
)


# ---------------------------------------------------------------------------
# Level override parsing
# ---------------------------------------------------------------------------

_VALID_LEVELS = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "NOTSET"}


def parse_level_overrides(raw: str) -> dict[str, str]:
    """Parse ``module=LEVEL,module2=LEVEL`` into a mapping of validated levels.

    Invalid entries (bad format, unknown level) are silently dropped —
    a malformed env var or DB setting must not crash boot.
    """
    result: dict[str, str] = {}
    for chunk in (raw or "").split(","):
        chunk = chunk.strip()
        if not chunk or "=" not in chunk:
            continue
        mod, _, lvl = chunk.partition("=")
        mod = mod.strip()
        lvl = lvl.strip().upper()
        if not mod or lvl not in _VALID_LEVELS:
            continue
        result[mod] = lvl
    return result


def _normalize_level(level: str | None, default: str = "INFO") -> str:
    if not level:
        return default
    up = level.strip().upper()
    return up if up in _VALID_LEVELS else default


# ---------------------------------------------------------------------------
# Setup + live apply
# ---------------------------------------------------------------------------

# Libraries we quiet by default — noisy at DEBUG and almost always irrelevant
# to a service issue. Override via LOG_LEVELS=sqlalchemy.engine=DEBUG when
# actually debugging.
_NOISY_LIBRARY_DEFAULTS: dict[str, str] = {
    "sqlalchemy": "WARNING",
    "sqlalchemy.engine": "WARNING",
    "sqlalchemy.pool": "WARNING",
    "aiohttp": "WARNING",
    "aiohttp.access": "WARNING",
    "aiohttp.client": "WARNING",
    "aiohttp.server": "WARNING",
    "apscheduler": "WARNING",
    "apscheduler.scheduler": "WARNING",
    "apscheduler.executors.default": "WARNING",
    "urllib3": "WARNING",
    "asyncio": "WARNING",
    "httpx": "WARNING",
    "httpcore": "WARNING",
    "PIL": "WARNING",
    "uvicorn.access": "WARNING",
}


def setup_logging(
    *,
    level: str = "INFO",
    fmt: str = "text",
    per_module_levels: str = "",
) -> None:
    """Install the logging configuration. Safe to call more than once.

    Args:
        level: Root log level (applied to ``notify_bridge_*`` loggers).
        fmt: ``"text"`` (default) or ``"json"``.
        per_module_levels: ``mod=LEVEL,mod2=LEVEL`` overrides. Wins over the
            root level for the listed loggers.
    """
    root_level = _normalize_level(level, "INFO")
    overrides = parse_level_overrides(per_module_levels)

    # Install the context-aware record factory (idempotent — setting the same
    # factory twice is fine because ``_original_factory`` is captured at
    # import time).
    logging.setLogRecordFactory(_context_record_factory)

    if fmt == "json":
        formatters = {"default": {"()": f"{__name__}.JsonFormatter"}}
    else:
        formatters = {
            "default": {
                "format": _TEXT_FORMAT,
                "datefmt": "%Y-%m-%d %H:%M:%S",
            }
        }

    # Start with noisy-library defaults, then layer user overrides on top so
    # the user can raise them to DEBUG when actually debugging.
    loggers: dict[str, dict[str, Any]] = {}
    for mod, lvl in _NOISY_LIBRARY_DEFAULTS.items():
        loggers[mod] = {"level": lvl, "propagate": True}
    # App loggers follow the root level unless overridden.
    loggers["notify_bridge_server"] = {"level": root_level, "propagate": True}
    loggers["notify_bridge_core"] = {"level": root_level, "propagate": True}
    # User overrides win.
    for mod, lvl in overrides.items():
        loggers[mod] = {"level": lvl, "propagate": True}

    config: dict[str, Any] = {
        "version": 1,
        "disable_existing_loggers": False,
        "filters": {
            "mask_secrets": {"()": f"{__name__}.SecretMaskingFilter"},
        },
        "formatters": formatters,
        "handlers": {
            "stderr": {
                "class": "logging.StreamHandler",
                "stream": sys.stderr,
                "formatter": "default",
                "filters": ["mask_secrets"],
            },
        },
        "root": {
            "level": root_level,
            "handlers": ["stderr"],
        },
        "loggers": loggers,
    }
    logging.config.dictConfig(config)


def apply_log_levels(
    *,
    level: str | None,
    per_module_levels: str | None,
) -> None:
    """Update existing logger levels in-place without rebuilding handlers.

    Called when an admin changes the log settings at runtime. Setting
    ``level`` to None leaves the root untouched; setting it to a valid
    level applies to ``notify_bridge_server`` / ``notify_bridge_core``.

    ``per_module_levels`` is treated as an exclusive set — loggers that
    previously had an override but aren't in the new string are reset
    *toward* the root level so a removed override actually takes effect.
    """
    if level:
        lvl = _normalize_level(level, "INFO")
        logging.getLogger("notify_bridge_server").setLevel(lvl)
        logging.getLogger("notify_bridge_core").setLevel(lvl)
        # NOTSET on root is almost never what you want — keep root where it is
        # unless the caller explicitly set something.
        logging.getLogger().setLevel(lvl)

    if per_module_levels is not None:
        overrides = parse_level_overrides(per_module_levels)
        # Apply new overrides
        for mod, lvl in overrides.items():
            logging.getLogger(mod).setLevel(lvl)
        # Reset noisy libs that aren't in the new overrides back to defaults
        for mod, default_lvl in _NOISY_LIBRARY_DEFAULTS.items():
            if mod not in overrides:
                logging.getLogger(mod).setLevel(default_lvl)