feat(logging): production-grade logging with context vars, secret masking, and runtime level control
Boot-time logging was a three-line basicConfig stub with no timestamps, no correlation, and silent drops at every layer of the Telegram send path — a /random command that delivered text but no media left zero evidence in the log. This replaces the setup and closes every silent drop encountered end-to-end. New infrastructure: - notify_bridge_core.log_context: request_id/command/chat_id/bot_id/dispatch_id ContextVars with a bind_log_context() context manager so deep call sites (TelegramClient, NotificationDispatcher) inherit the correlation tag without threading args through. - notify_bridge_server.logging_setup: dictConfig-based setup with a LogRecordFactory that tags every record, a SecretMaskingFilter that redacts /botN:TOKEN plus Authorization/x-api-key/password/secret in messages AND tracebacks, a JSON formatter for aggregators, text formatter with grep-friendly [req=... cmd=... bot=... chat=... disp=...] prefix, and default dampening for sqlalchemy/aiohttp/apscheduler/urllib3/PIL. Runtime control: - NOTIFY_BRIDGE_LOG_LEVEL / _FORMAT / _LEVELS env vars (boot). - DB-backed log_level / log_format / log_levels AppSettings, applied on boot after migrations and live via apply_log_levels() when edited in the settings UI (format still requires restart, logs a WARN). - Frontend settings page gains a Logging card (level dropdown, format dropdown, per-module overrides); en/ru i18n keys added. Call-site fixes (/random media-group blind spot and adjacent): - TelegramClient._fetch_asset: every silent drop now WARN-logs with reason (missing url, HTTP non-200, size/dimension limits, ClientError). - TelegramClient._send_media_group: WARN on "chunk had N items but 0 usable", ERROR on sendMediaGroup non-ok/transport with full context; returns success=False + "no_items_delivered" instead of success=True with an empty message_ids list so callers can distinguish. - TelegramClient.send_message / _upload_media / _send_from_cache: ERROR on non-ok + transport failures with status/code/desc; DEBUG for cache-hit fallbacks. - NotificationDispatcher.dispatch: generates a dispatch_id, binds it, logs start/finish with failure count, uses exc_info for target failures. - commands/handler: missing/failed templates -> ERROR + exc_info; send_reply and send_media_group errors upgraded WARNING -> ERROR with chat/error_code context; rate-limit and truncation cases logged with full context. - commands/webhook and services/telegram_poller: bind_log_context(request_id =tg:<update_id>, command, chat_id, bot_id), INFO on receive/dispatch/ completion with duration, exc_info on raise, INFO when commands disabled. - commands/immich: INFO when album scope is empty; WARN per asset dropped from media payload and a summary WARN when "N assets in, 0 out".
This commit is contained in:
@@ -11,11 +11,13 @@ CommandTrackerListeners with enabled CommandTrackers.
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from sqlmodel import select
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
|
||||
from notify_bridge_core.log_context import bind_log_context
|
||||
from notify_bridge_core.notifications.telegram.client import TelegramClient
|
||||
|
||||
from ..database.engine import get_engine
|
||||
@@ -289,29 +291,64 @@ async def _poll_bot(bot_id: int) -> None:
|
||||
|
||||
# Dispatch commands (only if chat has commands enabled)
|
||||
if text and text.startswith("/"):
|
||||
try:
|
||||
async with AsyncSession(engine) as cmd_session:
|
||||
chat_row = (await cmd_session.exec(
|
||||
select(TelegramChat).where(
|
||||
TelegramChat.bot_id == bot_obj.id,
|
||||
TelegramChat.chat_id == chat_id,
|
||||
from ..commands.parser import parse_command
|
||||
cmd_name, _, _ = parse_command(text)
|
||||
update_id = update.get("update_id")
|
||||
message_id = message.get("message_id")
|
||||
request_id = f"tg:{update_id}" if update_id is not None else f"tg:msg{message_id}"
|
||||
with bind_log_context(
|
||||
request_id=request_id,
|
||||
command=cmd_name or "-",
|
||||
chat_id=chat_id,
|
||||
bot_id=bot_obj.id,
|
||||
):
|
||||
started = time.monotonic()
|
||||
try:
|
||||
async with AsyncSession(engine) as cmd_session:
|
||||
chat_row = (await cmd_session.exec(
|
||||
select(TelegramChat).where(
|
||||
TelegramChat.bot_id == bot_obj.id,
|
||||
TelegramChat.chat_id == chat_id,
|
||||
)
|
||||
)).first()
|
||||
if not chat_row or not chat_row.commands_enabled:
|
||||
_LOGGER.info(
|
||||
"Command ignored — commands disabled (poll) for bot=%s chat=%s",
|
||||
bot_obj.id, chat_id,
|
||||
)
|
||||
continue
|
||||
effective_lang = chat_row.language_override or msg_language
|
||||
_LOGGER.info("Command received (poll): /%s args=%r lang=%s", cmd_name, text[:200], effective_lang)
|
||||
async with telegram_chat_action(
|
||||
bot_token, chat_id, classify_command_chat_action(text),
|
||||
):
|
||||
responses = await handle_command(bot_obj, chat_id, text, language_code=effective_lang)
|
||||
if not responses:
|
||||
_LOGGER.info(
|
||||
"Command produced no response (cmd=%r, poll) after %.0f ms",
|
||||
cmd_name, (time.monotonic() - started) * 1000,
|
||||
)
|
||||
continue
|
||||
text_count = sum(1 for r in responses if r.text)
|
||||
media_count = sum(len(r.media or []) for r in responses)
|
||||
_LOGGER.info(
|
||||
"Command dispatching %d response(s): text=%d media_items=%d",
|
||||
len(responses), text_count, media_count,
|
||||
)
|
||||
)).first()
|
||||
if not chat_row or not chat_row.commands_enabled:
|
||||
continue
|
||||
effective_lang = chat_row.language_override or msg_language
|
||||
message_id = message.get("message_id")
|
||||
async with telegram_chat_action(
|
||||
bot_token, chat_id, classify_command_chat_action(text),
|
||||
):
|
||||
responses = await handle_command(bot_obj, chat_id, text, language_code=effective_lang)
|
||||
if responses:
|
||||
for resp in responses:
|
||||
if resp.text:
|
||||
await send_reply(bot_token, chat_id, resp.text, reply_to_message_id=message_id)
|
||||
if resp.media:
|
||||
await send_media_group(bot_token, chat_id, resp.media, reply_to_message_id=message_id)
|
||||
except Exception:
|
||||
_LOGGER.error("Error handling command from bot %d", bot_id, exc_info=True)
|
||||
_LOGGER.info(
|
||||
"Command /%s completed in %.0f ms (responses=%d media=%d)",
|
||||
cmd_name, (time.monotonic() - started) * 1000,
|
||||
len(responses), media_count,
|
||||
)
|
||||
except Exception:
|
||||
_LOGGER.exception(
|
||||
"Error handling command /%s from bot %d after %.0f ms",
|
||||
cmd_name, bot_id, (time.monotonic() - started) * 1000,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user