feat(logging): production-grade logging with context vars, secret masking, and runtime level control
Boot-time logging was a three-line basicConfig stub with no timestamps, no correlation, and silent drops at every layer of the Telegram send path — a /random command that delivered text but no media left zero evidence in the log. This replaces the setup and closes every silent drop encountered end-to-end. New infrastructure: - notify_bridge_core.log_context: request_id/command/chat_id/bot_id/dispatch_id ContextVars with a bind_log_context() context manager so deep call sites (TelegramClient, NotificationDispatcher) inherit the correlation tag without threading args through. - notify_bridge_server.logging_setup: dictConfig-based setup with a LogRecordFactory that tags every record, a SecretMaskingFilter that redacts /botN:TOKEN plus Authorization/x-api-key/password/secret in messages AND tracebacks, a JSON formatter for aggregators, text formatter with grep-friendly [req=... cmd=... bot=... chat=... disp=...] prefix, and default dampening for sqlalchemy/aiohttp/apscheduler/urllib3/PIL. Runtime control: - NOTIFY_BRIDGE_LOG_LEVEL / _FORMAT / _LEVELS env vars (boot). - DB-backed log_level / log_format / log_levels AppSettings, applied on boot after migrations and live via apply_log_levels() when edited in the settings UI (format still requires restart, logs a WARN). - Frontend settings page gains a Logging card (level dropdown, format dropdown, per-module overrides); en/ru i18n keys added. Call-site fixes (/random media-group blind spot and adjacent): - TelegramClient._fetch_asset: every silent drop now WARN-logs with reason (missing url, HTTP non-200, size/dimension limits, ClientError). - TelegramClient._send_media_group: WARN on "chunk had N items but 0 usable", ERROR on sendMediaGroup non-ok/transport with full context; returns success=False + "no_items_delivered" instead of success=True with an empty message_ids list so callers can distinguish. - TelegramClient.send_message / _upload_media / _send_from_cache: ERROR on non-ok + transport failures with status/code/desc; DEBUG for cache-hit fallbacks. - NotificationDispatcher.dispatch: generates a dispatch_id, binds it, logs start/finish with failure count, uses exc_info for target failures. - commands/handler: missing/failed templates -> ERROR + exc_info; send_reply and send_media_group errors upgraded WARNING -> ERROR with chat/error_code context; rate-limit and truncation cases logged with full context. - commands/webhook and services/telegram_poller: bind_log_context(request_id =tg:<update_id>, command, chat_id, bot_id), INFO on receive/dispatch/ completion with duration, exc_info on raise, INFO when commands disabled. - commands/immich: INFO when album scope is empty; WARN per asset dropped from media payload and a summary WARN when "N assets in, 0 out".
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
"""Request-scoped ContextVars that propagate into log records.
|
||||
|
||||
The server sets these at entry points (Telegram webhook, scheduler dispatch,
|
||||
REST call) and they propagate through async calls automatically. A
|
||||
``LogRecordFactory`` installed by ``notify_bridge_server.logging_setup``
|
||||
reads them so every log line is tagged (``request_id``, ``command``,
|
||||
``chat_id``, ``bot_id``, ``dispatch_id``) without each call site having
|
||||
to pass the values explicitly.
|
||||
|
||||
Kept in ``notify_bridge_core`` so core modules (``TelegramClient``,
|
||||
``NotificationDispatcher``) can *set* additional context (e.g. a
|
||||
``dispatch_id``) without depending on the server package.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from contextvars import ContextVar, Token
|
||||
from typing import Any, Iterator
|
||||
|
||||
request_id_var: ContextVar[str | None] = ContextVar("request_id", default=None)
|
||||
command_var: ContextVar[str | None] = ContextVar("command", default=None)
|
||||
chat_id_var: ContextVar[str | None] = ContextVar("chat_id", default=None)
|
||||
bot_id_var: ContextVar[int | None] = ContextVar("bot_id", default=None)
|
||||
dispatch_id_var: ContextVar[str | None] = ContextVar("dispatch_id", default=None)
|
||||
|
||||
_VAR_MAP: dict[str, ContextVar[Any]] = {
|
||||
"request_id": request_id_var,
|
||||
"command": command_var,
|
||||
"chat_id": chat_id_var,
|
||||
"bot_id": bot_id_var,
|
||||
"dispatch_id": dispatch_id_var,
|
||||
}
|
||||
|
||||
|
||||
@contextmanager
|
||||
def bind_log_context(**kwargs: Any) -> Iterator[None]:
|
||||
"""Bind the given context fields for the duration of the ``with`` block.
|
||||
|
||||
Unknown keys are ignored so callers can pass whatever they want without
|
||||
an ``if`` ladder. Values are reset on exit even if the block raises.
|
||||
|
||||
Example:
|
||||
``with bind_log_context(request_id="abc", command="random"): ...``
|
||||
"""
|
||||
tokens: list[tuple[ContextVar[Any], Token]] = []
|
||||
try:
|
||||
for key, value in kwargs.items():
|
||||
var = _VAR_MAP.get(key)
|
||||
if var is None:
|
||||
continue
|
||||
tokens.append((var, var.set(value)))
|
||||
yield
|
||||
finally:
|
||||
for var, tok in tokens:
|
||||
var.reset(tok)
|
||||
|
||||
|
||||
def current_log_context() -> dict[str, Any]:
|
||||
"""Return a snapshot of the currently-bound context values (non-None)."""
|
||||
snap: dict[str, Any] = {}
|
||||
for key, var in _VAR_MAP.items():
|
||||
val = var.get()
|
||||
if val is not None:
|
||||
snap[key] = val
|
||||
return snap
|
||||
@@ -4,11 +4,13 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
|
||||
from notify_bridge_core.log_context import bind_log_context, dispatch_id_var
|
||||
from notify_bridge_core.models.events import ServiceEvent
|
||||
from notify_bridge_core.templates.context import build_template_context
|
||||
from notify_bridge_core.templates.renderer import render_template
|
||||
@@ -95,18 +97,40 @@ class NotificationDispatcher:
|
||||
|
||||
Returns list of results (one per target).
|
||||
"""
|
||||
raw_results = await asyncio.gather(
|
||||
*[self._send_to_target(event, t) for t in targets],
|
||||
return_exceptions=True,
|
||||
)
|
||||
results = []
|
||||
for raw in raw_results:
|
||||
if isinstance(raw, Exception):
|
||||
_LOGGER.error("Failed to dispatch to target: %s", raw)
|
||||
results.append({"success": False, "error": str(raw)})
|
||||
else:
|
||||
results.append(raw)
|
||||
return results
|
||||
# Bind a dispatch_id so every log line emitted by the target sends
|
||||
# (including deep in TelegramClient) can be correlated to the same
|
||||
# upstream event.
|
||||
new_id = dispatch_id_var.get() or f"disp:{uuid.uuid4().hex[:12]}"
|
||||
|
||||
with bind_log_context(dispatch_id=new_id):
|
||||
_LOGGER.info(
|
||||
"Dispatching event %s (collection=%r) to %d target(s)",
|
||||
event.event_type.value if hasattr(event.event_type, "value") else event.event_type,
|
||||
getattr(event, "collection_name", None), len(targets),
|
||||
)
|
||||
raw_results = await asyncio.gather(
|
||||
*[self._send_to_target(event, t) for t in targets],
|
||||
return_exceptions=True,
|
||||
)
|
||||
results = []
|
||||
failures = 0
|
||||
for target, raw in zip(targets, raw_results):
|
||||
if isinstance(raw, Exception):
|
||||
failures += 1
|
||||
_LOGGER.error(
|
||||
"Dispatch to target type=%s failed: %s",
|
||||
target.type, raw, exc_info=raw,
|
||||
)
|
||||
results.append({"success": False, "error": str(raw)})
|
||||
else:
|
||||
if isinstance(raw, dict) and not raw.get("success"):
|
||||
failures += 1
|
||||
results.append(raw)
|
||||
_LOGGER.info(
|
||||
"Dispatch finished: %d target(s), %d failure(s)",
|
||||
len(targets), failures,
|
||||
)
|
||||
return results
|
||||
|
||||
def _resolve_template(
|
||||
self, event: ServiceEvent, target: TargetConfig, locale: str,
|
||||
|
||||
@@ -162,8 +162,20 @@ class TelegramClient:
|
||||
"message_id": result.get("result", {}).get("message_id"),
|
||||
"cached": True,
|
||||
}
|
||||
except aiohttp.ClientError:
|
||||
pass
|
||||
# Non-ok from a cached send — file_id stale or file deleted on
|
||||
# Telegram's side. Log at DEBUG so operators who are hunting
|
||||
# "why didn't the cached send work?" can see it, but the
|
||||
# caller will fall through to a fresh upload.
|
||||
_LOGGER.debug(
|
||||
"Telegram %s (cached) returned non-ok: status=%s code=%s desc=%r — falling back to fresh upload",
|
||||
kind.api_method, response.status, result.get("error_code"),
|
||||
result.get("description"),
|
||||
)
|
||||
except aiohttp.ClientError as err:
|
||||
_LOGGER.debug(
|
||||
"Telegram %s (cached) transport error — falling back to fresh upload: %s",
|
||||
kind.api_method, err,
|
||||
)
|
||||
return None
|
||||
|
||||
async def _upload_media(
|
||||
@@ -203,8 +215,17 @@ class TelegramClient:
|
||||
thumbhash=thumbhash, size=len(data),
|
||||
)
|
||||
return {"success": True, "message_id": res.get("message_id")}
|
||||
_LOGGER.error(
|
||||
"Telegram %s failed: status=%s code=%s desc=%r bytes=%d",
|
||||
kind.api_method, response.status, result.get("error_code"),
|
||||
result.get("description", "Unknown"), len(data),
|
||||
)
|
||||
return {"success": False, "error": result.get("description", "Unknown Telegram error")}
|
||||
except aiohttp.ClientError as err:
|
||||
_LOGGER.error(
|
||||
"Telegram %s transport error (bytes=%d): %s",
|
||||
kind.api_method, len(data), err, exc_info=True,
|
||||
)
|
||||
return {"success": False, "error": str(err)}
|
||||
|
||||
async def send_notification(
|
||||
@@ -327,8 +348,14 @@ class TelegramClient:
|
||||
retry_result = await retry_resp.json()
|
||||
if retry_resp.status == 200 and retry_result.get("ok"):
|
||||
return {"success": True, "message_id": retry_result.get("result", {}).get("message_id")}
|
||||
_LOGGER.error(
|
||||
"Telegram sendMessage failed: status=%s code=%s desc=%r",
|
||||
response.status, result.get("error_code"),
|
||||
result.get("description", "Unknown"),
|
||||
)
|
||||
return {"success": False, "error": result.get("description", "Unknown Telegram error"), "error_code": result.get("error_code")}
|
||||
except aiohttp.ClientError as err:
|
||||
_LOGGER.error("Telegram sendMessage transport error: %s", err, exc_info=True)
|
||||
return {"success": False, "error": str(err)}
|
||||
|
||||
async def send_chat_action(self, chat_id: str, action: str = "typing") -> bool:
|
||||
@@ -513,11 +540,14 @@ class TelegramClient:
|
||||
# Tuple is (cache_key, media_type, thumbhash, uploaded_size).
|
||||
media_cache_info: list[tuple[str, str, str | None, int] | None] = []
|
||||
|
||||
# Resolve cache hits and collect download tasks in parallel
|
||||
# Resolve cache hits and collect download tasks in parallel.
|
||||
# Each drop site logs the reason — otherwise a filtered asset
|
||||
# disappears silently and the media group silently shrinks.
|
||||
async def _fetch_asset(idx: int, item: dict) -> tuple[int, dict | None, bytes | None]:
|
||||
"""Return (index, cache_entry_or_None, downloaded_bytes_or_None)."""
|
||||
url = item.get("url")
|
||||
if not url:
|
||||
_LOGGER.warning("Media skipped: missing url (idx=%d type=%s)", idx, item.get("type"))
|
||||
return idx, None, None
|
||||
media_type = item.get("type", "photo")
|
||||
custom_cache_key = item.get("cache_key")
|
||||
@@ -537,12 +567,24 @@ class TelegramClient:
|
||||
if preloaded is not None:
|
||||
data = preloaded
|
||||
if max_asset_data_size and len(data) > max_asset_data_size:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: preloaded size %d exceeds max_asset_data_size %d (idx=%d type=%s url=%s)",
|
||||
len(data), max_asset_data_size, idx, media_type, url,
|
||||
)
|
||||
return idx, None, None
|
||||
if media_type == "video" and len(data) > TELEGRAM_MAX_VIDEO_SIZE:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: preloaded video %d bytes exceeds Telegram limit %d (idx=%d url=%s)",
|
||||
len(data), TELEGRAM_MAX_VIDEO_SIZE, idx, url,
|
||||
)
|
||||
return idx, None, None
|
||||
if media_type == "photo":
|
||||
exceeds, _, _, _ = check_photo_limits(data)
|
||||
exceeds, reason, _, _ = check_photo_limits(data)
|
||||
if exceeds:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: preloaded photo %s (idx=%d url=%s)",
|
||||
reason, idx, url,
|
||||
)
|
||||
return idx, None, None
|
||||
return idx, None, data
|
||||
|
||||
@@ -551,18 +593,38 @@ class TelegramClient:
|
||||
dl_headers = item.get("headers") or {}
|
||||
async with self._session.get(download_url, headers=dl_headers) as resp:
|
||||
if resp.status != 200:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: download HTTP %d (idx=%d type=%s url=%s)",
|
||||
resp.status, idx, media_type, url,
|
||||
)
|
||||
return idx, None, None
|
||||
data = await resp.read()
|
||||
if max_asset_data_size and len(data) > max_asset_data_size:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: downloaded size %d exceeds max_asset_data_size %d (idx=%d type=%s url=%s)",
|
||||
len(data), max_asset_data_size, idx, media_type, url,
|
||||
)
|
||||
return idx, None, None
|
||||
if media_type == "video" and len(data) > TELEGRAM_MAX_VIDEO_SIZE:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: video %d bytes exceeds Telegram %d-byte limit (idx=%d url=%s)",
|
||||
len(data), TELEGRAM_MAX_VIDEO_SIZE, idx, url,
|
||||
)
|
||||
return idx, None, None
|
||||
if media_type == "photo":
|
||||
exceeds, _, _, _ = check_photo_limits(data)
|
||||
exceeds, reason, _, _ = check_photo_limits(data)
|
||||
if exceeds:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: photo %s (idx=%d url=%s)",
|
||||
reason, idx, url,
|
||||
)
|
||||
return idx, None, None
|
||||
return idx, None, data
|
||||
except aiohttp.ClientError:
|
||||
except aiohttp.ClientError as err:
|
||||
_LOGGER.warning(
|
||||
"Media skipped: download failed (idx=%d type=%s url=%s): %s",
|
||||
idx, media_type, url, err,
|
||||
)
|
||||
return idx, None, None
|
||||
|
||||
results = await asyncio.gather(
|
||||
@@ -602,6 +664,14 @@ class TelegramClient:
|
||||
media_json.append(mij)
|
||||
|
||||
if not media_json:
|
||||
# Every asset in this chunk was filtered out (size, download
|
||||
# failure, etc.). Without this log, sendMediaGroup returns
|
||||
# success=True with zero message_ids and nobody knows why
|
||||
# the user sees only the text reply and no media.
|
||||
_LOGGER.warning(
|
||||
"sendMediaGroup skipped — chunk %d/%d had %d input items but 0 usable (all filtered/failed)",
|
||||
chunk_idx + 1, len(chunks), len(chunk),
|
||||
)
|
||||
continue
|
||||
|
||||
form.add_field("media", json.dumps(media_json))
|
||||
@@ -638,10 +708,35 @@ class TelegramClient:
|
||||
if eff_cache:
|
||||
await eff_cache.async_set_many(cache_entries)
|
||||
else:
|
||||
return {"success": False, "error": result.get("description", "Unknown"), "failed_at_chunk": chunk_idx + 1}
|
||||
_LOGGER.error(
|
||||
"Telegram sendMediaGroup failed: status=%s code=%s desc=%r chunk=%d/%d items=%d",
|
||||
response.status, result.get("error_code"),
|
||||
result.get("description", "Unknown"),
|
||||
chunk_idx + 1, len(chunks), len(media_json),
|
||||
)
|
||||
return {
|
||||
"success": False,
|
||||
"error": result.get("description", "Unknown"),
|
||||
"error_code": result.get("error_code"),
|
||||
"failed_at_chunk": chunk_idx + 1,
|
||||
}
|
||||
except aiohttp.ClientError as err:
|
||||
_LOGGER.error(
|
||||
"Telegram sendMediaGroup transport error on chunk %d/%d (%d items): %s",
|
||||
chunk_idx + 1, len(chunks), len(media_json), err,
|
||||
exc_info=True,
|
||||
)
|
||||
return {"success": False, "error": str(err), "failed_at_chunk": chunk_idx + 1}
|
||||
|
||||
# Distinguish "posted something" from "posted nothing" so the caller
|
||||
# can surface an ERROR when a command produced a caption reply but no
|
||||
# media ever reached Telegram.
|
||||
if not all_message_ids:
|
||||
_LOGGER.warning(
|
||||
"sendMediaGroup completed with 0 message_ids across %d chunk(s) — nothing was delivered",
|
||||
len(chunks),
|
||||
)
|
||||
return {"success": False, "error": "no_items_delivered", "chunks_sent": len(chunks)}
|
||||
return {"success": True, "message_ids": all_message_ids, "chunks_sent": len(chunks)}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user