feat: observability, per-receiver Telegram options, oversized-video fallback

Operability:
- Correlation IDs end-to-end: shared dispatch_id between log lines and
  EventLog rows (event/watcher/scheduled/deferred/action/HA/command paths)
  and a new X-Request-Id middleware that normalizes inbound ids and binds
  request_id into log context.
- dispatch_summary block merged into EventLog.details: per-target
  success/failure counts plus Telegram media delivered/skipped/failed and
  truncated error lists, so partial outcomes surface in the UI.
- Diagnostic mode: admin can flip one module to DEBUG for a bounded
  window with auto-revert (in-memory only; setup_logging() resets on
  boot, lifespan reverts on shutdown). New /diagnostic-mode endpoints
  plus DiagnosticsCassette UI on the settings page.

Telegram:
- Per-receiver options: disable_notification (silent send) and
  message_thread_id (forum-topic routing), wired through the dispatcher
  via a ContextVar so all four send sites (sendMessage / sendPhoto-Video-
  Document / sendMediaGroup / cache-hit POST) pick them up.
- send_large_videos_as_documents target setting: bypass the 50 MB
  sendVideo cap by falling back to sendDocument for oversized videos.
- sendMediaGroup byte-budget enforcement (TELEGRAM_MAX_GROUP_TOTAL_BYTES,
  45 MB) with per-item fallback on chunk failure so a stale file_id no
  longer silently drops a cached asset.

Tests:
- New: diagnostic_mode, dispatch_summary, request_correlation,
  telegram_media_group_partial, telegram_per_send_options.

Docs:
- .claude/reviews/: six-axis production-readiness review of v0.8.1.
- .claude/docs/functional-review-2026-05-28.md: focused review of
  Telegram/Immich/logging subsystems.
This commit is contained in:
2026-05-28 15:19:31 +03:00
parent 85a8f1e71c
commit 6a8f374678
39 changed files with 7239 additions and 142 deletions
@@ -14,6 +14,7 @@ Kept in ``notify_bridge_core`` so core modules (``TelegramClient``,
from __future__ import annotations
import uuid
from contextlib import contextmanager
from contextvars import ContextVar, Token
from typing import Any, Iterator
@@ -56,6 +57,22 @@ def bind_log_context(**kwargs: Any) -> Iterator[None]:
var.reset(tok)
def ensure_dispatch_id() -> str:
"""Return the bound ``dispatch_id`` if one is active, else a new one.
Format matches :class:`NotificationDispatcher.dispatch` (``disp:<12 hex>``)
so logs and ``EventLog.details.dispatch_id`` use a single shape. Callers
typically wrap a top-level handler with::
with bind_log_context(dispatch_id=ensure_dispatch_id()):
...
so nested calls inherit the same id and any ``EventLog`` row written
inside the block can be correlated with the dispatcher's log lines.
"""
return dispatch_id_var.get() or f"disp:{uuid.uuid4().hex[:12]}"
def current_log_context() -> dict[str, Any]:
"""Return a snapshot of the currently-bound context values (non-None)."""
snap: dict[str, Any] = {}
@@ -64,3 +81,43 @@ def current_log_context() -> dict[str, Any]:
if val is not None:
snap[key] = val
return snap
# Keys copied onto ``EventLog.details`` so an operator can grep stderr for
# the matching ``disp=``/``req=`` log lines after spotting a row in the UI.
# Kept narrow on purpose — ``chat_id``/``bot_id``/``command`` are already
# represented by dedicated EventLog columns.
_CORRELATION_KEYS = ("dispatch_id", "request_id")
def enrich_details_with_correlation(
details: dict[str, Any] | None,
) -> dict[str, Any]:
"""Return a (shallow) copy of ``details`` with active correlation IDs merged in.
Use this when constructing an ``EventLog.details`` dict so the persisted
row carries the same ``dispatch_id`` / ``request_id`` that the stderr log
lines emitted during the same dispatch carry. The mapping makes it
possible to jump from a row in the dashboard to the corresponding log
lines without server-side correlation.
Existing keys in ``details`` are NOT overwritten — callers can pin a
specific value (e.g. a synthetic dispatch_id for a backfilled row) by
setting it themselves before calling.
The copy is shallow. Nested mutable values (lists, dicts) are shared with
the input — fine for the all-scalar dicts every current call site passes,
but callers that intend to mutate after this returns should ``deepcopy``
themselves.
"""
result: dict[str, Any] = dict(details or {})
for key in _CORRELATION_KEYS:
if key in result:
continue
var = _VAR_MAP.get(key)
if var is None:
continue
val = var.get()
if val is not None:
result[key] = val
return result
@@ -5,13 +5,12 @@ from __future__ import annotations
import asyncio
import contextlib
import logging
import uuid
from dataclasses import dataclass, field
from typing import Any, AsyncIterator, Awaitable, Callable, Final
import aiohttp
from notify_bridge_core.log_context import bind_log_context, dispatch_id_var
from notify_bridge_core.log_context import bind_log_context, ensure_dispatch_id
from notify_bridge_core.models.events import ServiceEvent
from notify_bridge_core.templates.context import build_template_context
from notify_bridge_core.templates.renderer import render_template
@@ -132,7 +131,7 @@ class NotificationDispatcher:
Returns one result per target. Per-target failures are isolated;
a single bad target cannot poison the batch.
"""
new_id = dispatch_id_var.get() or f"disp:{uuid.uuid4().hex[:12]}"
new_id = ensure_dispatch_id()
with bind_log_context(dispatch_id=new_id):
_LOGGER.info(
@@ -341,6 +340,7 @@ class NotificationDispatcher:
max_size_mb = target.config.get("max_asset_size")
max_size_bytes = max_size_mb * 1024 * 1024 if max_size_mb else None
send_large_as_docs = target.config.get("send_large_photos_as_documents", False)
send_large_videos_as_docs = target.config.get("send_large_videos_as_documents", False)
if not bot_token:
return {"success": False, "error": "Missing bot_token"}
@@ -392,6 +392,8 @@ class NotificationDispatcher:
chat_id=receiver.chat_id,
text=message,
disable_web_page_preview=bool(disable_preview),
disable_notification=receiver.disable_notification,
message_thread_id=receiver.message_thread_id,
)
if not text_result.get("success"):
_LOGGER.warning(
@@ -409,22 +411,45 @@ class NotificationDispatcher:
chunk_delay=chunk_delay,
max_asset_data_size=max_size_bytes,
send_large_photos_as_documents=send_large_as_docs,
send_large_videos_as_documents=send_large_videos_as_docs,
chat_action=chat_action or None,
disable_notification=receiver.disable_notification,
message_thread_id=receiver.message_thread_id,
)
if not media_result.get("success"):
delivered = media_result.get("delivered_count", 0)
skipped = media_result.get("skipped_count", 0)
failed = media_result.get("failed_count", 0)
media_success = media_result.get("success", False)
has_partial_loss = skipped > 0 or failed > 0
if not media_success:
_LOGGER.warning(
"Text sent OK but media failed for chat %s: %s",
receiver.chat_id, media_result.get("error"),
"Text sent OK but media failed for chat %s "
"(delivered=%d skipped=%d failed=%d): %s",
receiver.chat_id, delivered, skipped, failed,
media_result.get("error"),
)
elif has_partial_loss:
_LOGGER.warning(
"Partial media delivery for chat %s "
"(delivered=%d skipped=%d failed=%d)",
receiver.chat_id, delivered, skipped, failed,
)
if not media_success or has_partial_loss:
# Preserve both outcomes — text succeeded, media
# didn't. Operators losing media-failure detail
# in the result dict made root-cause analysis
# partially or fully didn't. Operators losing
# media-failure detail made root-cause analysis
# impossible.
return {
"success": True,
"message_id": text_result.get("message_id"),
"media_error": media_result.get("error"),
"media_failed_at_chunk": media_result.get("failed_at_chunk"),
"media_delivered_count": delivered,
"media_skipped_count": skipped,
"media_failed_count": failed,
"media_errors": media_result.get("errors"),
}
return text_result
@@ -20,9 +20,21 @@ class Receiver:
@dataclass
class TelegramReceiver(Receiver):
"""Telegram chat receiver."""
"""Telegram chat receiver.
``disable_notification`` toggles Telegram's ``disable_notification=true``
flag — the message is delivered without an audible / vibration alert.
Useful for low-priority chats that the user reads but doesn't want to
be paged by.
``message_thread_id`` routes the send into a specific forum topic on a
supergroup with topics enabled. ``None`` means "general topic" (default
Telegram behaviour).
"""
chat_id: str = ""
disable_notification: bool = False
message_thread_id: int | None = None
@dataclass
@@ -80,9 +92,30 @@ def _coerce_int(value: Any, default: int) -> int:
return default
def _coerce_telegram_thread_id(value: Any) -> int | None:
"""Coerce a config value to a positive Telegram forum-topic id.
The Bot API treats omission, ``0``, and negative values all as
"general topic", so we collapse them to ``None`` for consistency
with the frontend (which rejects ``<= 0``). Booleans are explicitly
rejected so ``int(True) == 1`` doesn't silently route a misconfigured
chat into topic #1.
"""
if value is None or value == "" or isinstance(value, bool):
return None
try:
n = int(value)
except (TypeError, ValueError):
return None
return n if n > 0 else None
_RECEIVER_FACTORIES: dict[str, _ReceiverFactory] = {
"telegram": lambda locale, config: TelegramReceiver(
locale=locale, config=config, chat_id=str(config.get("chat_id", "")),
locale=locale, config=config,
chat_id=str(config.get("chat_id", "")),
disable_notification=bool(config.get("disable_notification", False)),
message_thread_id=_coerce_telegram_thread_id(config.get("message_thread_id")),
),
"webhook": lambda locale, config: WebhookReceiver(
locale=locale, config=config,
@@ -3,12 +3,14 @@
from __future__ import annotations
import asyncio
import contextlib
import json
import logging
import mimetypes
import re
from contextvars import ContextVar
from dataclasses import dataclass, field
from typing import Any, Callable, Final
from typing import Any, Callable, Final, Iterator
import aiohttp
from aiohttp import FormData
@@ -19,6 +21,7 @@ from .cache import TelegramFileCache
from .media import (
TELEGRAM_API_BASE_URL,
TELEGRAM_MAX_CAPTION_LENGTH,
TELEGRAM_MAX_GROUP_TOTAL_BYTES,
TELEGRAM_MAX_PHOTO_SIZE,
TELEGRAM_MAX_TEXT_LENGTH,
TELEGRAM_MAX_VIDEO_SIZE,
@@ -27,7 +30,6 @@ from .media import (
extract_asset_id_from_url,
is_asset_cache_key,
is_asset_id,
split_media_by_upload_size,
)
_LOGGER = logging.getLogger(__name__)
@@ -56,6 +58,68 @@ _UPLOAD_TIMEOUT: Final = aiohttp.ClientTimeout(total=120, connect=10)
_DOWNLOAD_TIMEOUT: Final = aiohttp.ClientTimeout(total=120, connect=10)
# ---------------------------------------------------------------------------
# Per-send options (disable_notification, message_thread_id, …)
# ---------------------------------------------------------------------------
#
# These are properties of a single send, not of the bot or the client, and
# they fan out into the JSON / multipart payload at four different sites
# (sendMessage, sendPhoto/Video/Document, sendMediaGroup, cache-hit POST).
# Rather than threading the kwargs through every internal helper, we bind
# them on a ContextVar inside the public ``send_message`` / ``send_notification``
# entry points; the payload builders read the var when constructing the
# request. ContextVar propagation isolates concurrent ``asyncio.gather``
# fan-outs in the dispatcher (one task per receiver) — each task sees the
# value its own caller bound.
@dataclass(frozen=True)
class _SendOptions:
"""Per-send Telegram flags applied to every API call within one send.
``disable_notification`` maps to Bot API ``disable_notification=true``
— the chat receives the message silently. ``message_thread_id`` routes
the message into a specific forum-topic on supergroups with topics
enabled; ``None`` means "general topic" (Bot API omits the field).
"""
disable_notification: bool = False
message_thread_id: int | None = None
_send_options_var: ContextVar[_SendOptions] = ContextVar(
"_tg_send_options", default=_SendOptions(),
)
@contextlib.contextmanager
def _bind_send_options(opts: _SendOptions) -> Iterator[None]:
"""Bind per-send options for the duration of the ``with`` block."""
token = _send_options_var.set(opts)
try:
yield
finally:
_send_options_var.reset(token)
def _apply_send_opts_to_payload(payload: dict[str, Any]) -> None:
"""Merge the active per-send options into a JSON request body."""
opts = _send_options_var.get()
if opts.disable_notification:
payload["disable_notification"] = True
if opts.message_thread_id is not None:
payload["message_thread_id"] = opts.message_thread_id
def _apply_send_opts_to_form(form: FormData) -> None:
"""Merge the active per-send options into a multipart form payload."""
opts = _send_options_var.get()
if opts.disable_notification:
form.add_field("disable_notification", "true")
if opts.message_thread_id is not None:
form.add_field("message_thread_id", str(opts.message_thread_id))
def _extract_retry_after(result: dict[str, Any]) -> int | None:
"""Return the retry_after seconds from a Telegram error response.
@@ -135,10 +199,27 @@ class _MediaItem:
keyed by position. Bundling these together prevents the
``media_json`` and ``cache_info`` lists from drifting out of
alignment under future edits.
``source_url`` and ``download_headers`` let the per-item fallback
re-download a cache-hit item if its ``file_id`` POST returns
transient errors — without them, a stale ``file_id`` would silently
lose a cached asset that the original single-item path would have
recovered.
"""
media_json: dict[str, Any]
cache_info: tuple[str, str, str | None, int] | None
attachment: tuple[str, bytes, str, str] | None # (name, data, filename, content_type)
source_url: str | None = None
download_headers: dict[str, str] | None = None
@property
def upload_bytes(self) -> int:
"""Bytes this item contributes to a multipart sendMediaGroup payload.
Cached items (referenced by ``file_id``) contribute 0 since
Telegram serves them server-side without us re-uploading.
"""
return len(self.attachment[1]) if self.attachment else 0
def _truncate(text: str, limit: int, *, marker: str = "") -> str:
@@ -302,6 +383,7 @@ class TelegramClient:
payload["caption"] = _truncate(caption, TELEGRAM_MAX_CAPTION_LENGTH)
if reply_to_message_id is not None:
payload["reply_parameters"] = {"message_id": reply_to_message_id}
_apply_send_opts_to_payload(payload)
try:
async with self._session.post(
self._api_url(kind.api_method), json=payload, timeout=_API_TIMEOUT,
@@ -351,6 +433,7 @@ class TelegramClient:
f.add_field("caption", capped_caption)
if reply_to_message_id is not None:
f.add_field("reply_parameters", json.dumps({"message_id": reply_to_message_id}))
_apply_send_opts_to_form(f)
return f
for attempt in range(1, _TG_429_MAX_ATTEMPTS + 1):
@@ -415,18 +498,54 @@ class TelegramClient:
chunk_delay: int = 0,
max_asset_data_size: int | None = None,
send_large_photos_as_documents: bool = False,
send_large_videos_as_documents: bool = False,
chat_action: str | None = "typing",
*,
disable_notification: bool = False,
message_thread_id: int | None = None,
) -> NotificationResult:
if not assets:
return await self.send_message(
chat_id, caption or "", reply_to_message_id,
disable_web_page_preview, parse_mode,
disable_notification=disable_notification,
message_thread_id=message_thread_id,
)
keepalive: _KeepaliveHandle | None = None
if chat_action:
keepalive = self.start_chat_action_keepalive(chat_id, chat_action)
# Bind for the whole media-send fan-out — every internal helper
# (_send_photo / _send_video / _send_document / _send_media_group /
# _post_media_group / _send_from_cache / _upload_media) reads the
# current value when it constructs its request payload.
opts = _SendOptions(
disable_notification=disable_notification,
message_thread_id=message_thread_id,
)
with _bind_send_options(opts):
return await self._send_notification_body(
chat_id, assets, caption, reply_to_message_id, parse_mode,
max_group_size, chunk_delay, max_asset_data_size,
send_large_photos_as_documents, send_large_videos_as_documents,
keepalive,
)
async def _send_notification_body(
self,
chat_id: str,
assets: list[dict[str, Any]],
caption: str | None,
reply_to_message_id: int | None,
parse_mode: str,
max_group_size: int,
chunk_delay: int,
max_asset_data_size: int | None,
send_large_photos_as_documents: bool,
send_large_videos_as_documents: bool,
keepalive: _KeepaliveHandle | None,
) -> NotificationResult:
try:
if len(assets) == 1 and assets[0].get("type") == "photo":
return await self._send_photo(
@@ -443,6 +562,7 @@ class TelegramClient:
assets[0].get("content_type"), assets[0].get("cache_key"),
download_headers=assets[0].get("headers"),
preloaded_data=assets[0].get("data"),
send_large_videos_as_documents=send_large_videos_as_documents,
)
if len(assets) == 1 and assets[0].get("type", "document") == "document":
url = assets[0].get("url")
@@ -465,7 +585,7 @@ class TelegramClient:
return await self._send_media_group(
chat_id, assets, caption, reply_to_message_id, max_group_size,
chunk_delay, parse_mode, max_asset_data_size,
send_large_photos_as_documents,
send_large_photos_as_documents, send_large_videos_as_documents,
)
finally:
await self.stop_keepalive(keepalive)
@@ -477,6 +597,9 @@ class TelegramClient:
reply_to_message_id: int | None = None,
disable_web_page_preview: bool | None = None,
parse_mode: str = "HTML",
*,
disable_notification: bool = False,
message_thread_id: int | None = None,
) -> NotificationResult:
if not text:
_LOGGER.warning("send_message called with empty text — using placeholder")
@@ -490,7 +613,19 @@ class TelegramClient:
payload["reply_parameters"] = {"message_id": reply_to_message_id}
if disable_web_page_preview:
payload["link_preview_options"] = {"is_disabled": True}
# sendMessage is a leaf call — its kwargs go straight into the
# JSON body. The ContextVar pattern is reserved for the deeper
# media paths (``_upload_media`` / ``_post_media_group`` /
# ``_send_from_cache``) that can't easily plumb kwargs through.
if disable_notification:
payload["disable_notification"] = True
if message_thread_id is not None:
payload["message_thread_id"] = message_thread_id
return await self._post_send_message(payload)
async def _post_send_message(
self, payload: dict[str, Any],
) -> NotificationResult:
url = self._api_url("sendMessage")
try:
async with self._session.post(url, json=payload, timeout=_API_TIMEOUT) as response:
@@ -651,6 +786,7 @@ class TelegramClient:
max_asset_data_size: int | None = None, content_type: str | None = None,
cache_key: str | None = None, download_headers: dict[str, str] | None = None,
preloaded_data: bytes | None = None,
send_large_videos_as_documents: bool = False,
) -> NotificationResult:
if not url:
return {"success": False, "error": "Missing 'url' for video"}
@@ -672,6 +808,18 @@ class TelegramClient:
if max_asset_data_size is not None and len(data) > max_asset_data_size:
return {"success": False, "error": "Video exceeds size limit", "skipped": True}
if len(data) > TELEGRAM_MAX_VIDEO_SIZE:
# Telegram's sendVideo hard-caps at 50 MB. Documents accept
# up to 2 GB, so when the operator opts in we deliver the
# bytes as a document instead of silently dropping the asset.
# Loses inline playback but preserves delivery.
if send_large_videos_as_documents:
filename = url.split("/")[-1].split("?")[0] or "video.mp4"
if "." not in filename:
filename = "video.mp4"
return await self._send_document(
chat_id, data, filename, caption, reply_to_message_id,
parse_mode, url, content_type, cache_key,
)
return {
"success": False,
"error": f"Video exceeds Telegram's {TELEGRAM_MAX_VIDEO_SIZE // (1024*1024)} MB limit",
@@ -723,6 +871,7 @@ class TelegramClient:
caption: str | None = None, reply_to_message_id: int | None = None,
max_group_size: int = 10, chunk_delay: int = 0, parse_mode: str = "HTML",
max_asset_data_size: int | None = None, send_large_photos_as_documents: bool = False,
send_large_videos_as_documents: bool = False,
) -> NotificationResult:
# Telegram rejects mixed photo/video + document in a single
# sendMediaGroup. Split before chunking so a malformed input
@@ -730,75 +879,293 @@ class TelegramClient:
partitions = self._partition_media_by_kind(assets)
all_message_ids: list[int] = []
first_chunk_overall = True
errors: list[dict[str, Any]] = []
delivered = 0
skipped = 0
failed = 0
first_send = True
# Oversized videos that the operator wants delivered as
# documents. Sent after all media-group chunks finish so
# they ride out on their own (Telegram refuses to mix
# documents with photo/video in one group).
deferred_documents: list[_MediaItem] = []
# Caption + reply_to are "spent" on the first send attempt,
# mirroring the prior contract. If that first attempt fails
# entirely, they're lost — same as before. Tracking these as
# standalone flags (rather than deriving from ``chunk_idx==0``)
# keeps the semantics right across multiple partitions.
caption_pending = bool(caption)
reply_pending = reply_to_message_id is not None
async def maybe_delay() -> None:
nonlocal first_send
if not first_send and chunk_delay > 0:
await asyncio.sleep(chunk_delay / 1000)
first_send = False
for partition in partitions:
chunks = [
partition[i:i + max_group_size]
for i in range(0, len(partition), max_group_size)
]
for chunk_idx, chunk in enumerate(chunks):
if not first_chunk_overall and chunk_delay > 0:
await asyncio.sleep(chunk_delay / 1000)
# Single-item chunk → use the simpler send_photo/video path.
if len(chunk) == 1:
item = chunk[0]
chunk_caption = caption if first_chunk_overall else None
chunk_reply = reply_to_message_id if first_chunk_overall else None
if item.get("type") == "photo":
result = await self._send_photo(
chat_id, item.get("url"), chunk_caption, chunk_reply, parse_mode,
max_asset_data_size, send_large_photos_as_documents,
item.get("content_type"), item.get("cache_key"),
download_headers=item.get("headers"),
preloaded_data=item.get("data"),
)
elif item.get("type") == "video":
result = await self._send_video(
chat_id, item.get("url"), chunk_caption, chunk_reply, parse_mode,
max_asset_data_size,
item.get("content_type"), item.get("cache_key"),
download_headers=item.get("headers"),
preloaded_data=item.get("data"),
)
else:
first_chunk_overall = False
continue
first_chunk_overall = False
if not result.get("success"):
result["failed_at_chunk"] = chunk_idx + 1
return result
if result.get("message_id") is not None:
all_message_ids.append(result["message_id"])
continue
items = await self._build_media_items(
chunk, max_asset_data_size, caption if first_chunk_overall else None,
parse_mode,
# Fetch + filter the parent chunk. Skipped items
# (oversized, bad photo, failed download) never enter
# ``items`` — count them so the operator-facing result
# reflects what actually went out vs got dropped.
# Oversized videos opted into doc-fallback get
# deferred — they're delivered (eventually) so they
# don't count as skipped.
items, chunk_deferred = await self._build_media_items(
chunk, max_asset_data_size, send_large_videos_as_documents,
)
deferred_documents.extend(chunk_deferred)
skipped += len(chunk) - len(items) - len(chunk_deferred)
if not items:
_LOGGER.warning(
"sendMediaGroup skipped — chunk %d/%d had %d input items but 0 usable (all filtered/failed)",
"sendMediaGroup: chunk %d/%d had %d input items but 0 usable",
chunk_idx + 1, len(chunks), len(chunk),
)
first_chunk_overall = False
continue
chunk_msg_ids, chunk_err = await self._post_media_group(
chat_id, items, reply_to_message_id if first_chunk_overall else None,
chunk_idx, len(chunks),
# Split the chunk into sub-chunks that each fit under
# Telegram's per-request byte cap. Per-item filtering
# alone can't prevent 413s when several legal-sized
# items together bust the envelope.
sub_chunks = self._split_items_by_byte_budget(
items, TELEGRAM_MAX_GROUP_TOTAL_BYTES,
)
first_chunk_overall = False
if chunk_err is not None:
return chunk_err
all_message_ids.extend(chunk_msg_ids)
if len(sub_chunks) > 1:
_LOGGER.info(
"sendMediaGroup: byte-budget split chunk %d/%d into %d sub-chunks",
chunk_idx + 1, len(chunks), len(sub_chunks),
)
if not all_message_ids:
_LOGGER.warning(
"sendMediaGroup completed with 0 message_ids — nothing was delivered",
for sub_items in sub_chunks:
await maybe_delay()
sub_caption = caption if caption_pending else None
sub_reply = reply_to_message_id if reply_pending else None
caption_pending = False
reply_pending = False
if sub_caption:
self._attach_caption_to_first(
sub_items, sub_caption, parse_mode,
)
msg_ids, err = await self._post_media_group(
chat_id, sub_items, sub_reply, chunk_idx, len(chunks),
)
if err is None:
all_message_ids.extend(msg_ids)
delivered += len(sub_items)
continue
# Telegram rejected the sub-chunk after our
# pre-flight passed (content / transient / rate).
# Try each item as its own message so partial
# delivery survives the chunk-level failure.
# Record the chunk-level cause first so the
# operator-visible ``errors`` list reads in
# cause-then-consequence order.
_LOGGER.warning(
"sendMediaGroup chunk %d/%d failed (%s) — falling back to per-item",
chunk_idx + 1, len(chunks), err.get("error"),
)
errors.append({
"kind": "chunk",
"chunk": chunk_idx + 1,
"error": err.get("error", "unknown"),
"code": err.get("error_code"),
})
for item_idx, item in enumerate(sub_items):
item_caption = sub_caption if item_idx == 0 else None
item_reply = sub_reply if item_idx == 0 else None
# No ``maybe_delay()`` here: per-item retries
# are a recovery path where added latency
# only widens the outage window — the
# individual sendPhoto/sendVideo calls have
# their own 429 backoff in ``_upload_media``.
item_result = await self._send_item_individually(
chat_id, item, item_caption, item_reply, parse_mode,
)
if item_result.get("success"):
delivered += 1
mid = item_result.get("message_id")
if mid is not None:
all_message_ids.append(mid)
else:
failed += 1
errors.append({
"kind": "item",
"chunk": chunk_idx + 1,
"item_index": item_idx,
"error": item_result.get("error", "unknown"),
})
# Deferred oversized-videos-as-documents: send each on its own
# via sendDocument. They couldn't ride in the media group
# because Telegram refuses to mix document with photo/video,
# and per-item failures don't poison siblings.
for deferred in deferred_documents:
await maybe_delay()
d_caption = caption if caption_pending else None
d_reply = reply_to_message_id if reply_pending else None
caption_pending = False
reply_pending = False
d_result = await self._send_item_individually(
chat_id, deferred, d_caption, d_reply, parse_mode,
)
return {"success": False, "error": "no_items_delivered"}
return {"success": True, "message_ids": all_message_ids}
if d_result.get("success"):
delivered += 1
mid = d_result.get("message_id")
if mid is not None:
all_message_ids.append(mid)
else:
failed += 1
errors.append({
"kind": "deferred_document",
"error": d_result.get("error", "unknown"),
})
if delivered == 0:
if skipped > 0 and not errors:
msg = f"all {skipped} item(s) filtered before send"
elif errors:
msg = errors[0].get("error", "no_items_delivered")
else:
msg = "no_items_delivered"
_LOGGER.warning(
"sendMediaGroup delivered 0 items (skipped=%d failed=%d)",
skipped, failed,
)
return {
"success": False,
"error": msg,
"message_ids": [],
"delivered_count": 0,
"skipped_count": skipped,
"failed_count": failed,
"errors": errors or None,
"failed_at_chunk": errors[0].get("chunk") if errors else None,
}
return {
"success": True,
"message_ids": all_message_ids,
"delivered_count": delivered,
"skipped_count": skipped,
"failed_count": failed,
"errors": errors or None,
}
@staticmethod
def _split_items_by_byte_budget(
items: list[_MediaItem], max_bytes: int,
) -> list[list[_MediaItem]]:
"""Greedy-pack ``items`` into sub-chunks under ``max_bytes`` each.
Cached items (``upload_bytes == 0``) are free and never force a
split. A single item that on its own exceeds the budget is
placed alone — letting Telegram return a precise error rather
than dropping it silently. Order is preserved so caption
attachment stays deterministic.
"""
if not items:
return []
groups: list[list[_MediaItem]] = []
current: list[_MediaItem] = []
current_size = 0
for item in items:
cost = item.upload_bytes
if current and current_size + cost > max_bytes:
groups.append(current)
current = []
current_size = 0
current.append(item)
current_size += cost
if current:
groups.append(current)
return groups
@staticmethod
def _attach_caption_to_first(
items: list[_MediaItem], caption: str, parse_mode: str,
) -> None:
"""Inject caption + parse_mode into the first item's media_json.
Telegram displays the caption of the first media-group item; the
rest are ignored. Idempotent — re-attaching simply overwrites.
"""
if not items:
return
items[0].media_json["caption"] = _truncate(caption, TELEGRAM_MAX_CAPTION_LENGTH)
items[0].media_json["parse_mode"] = parse_mode
async def _send_item_individually(
self, chat_id: str, item: _MediaItem,
caption: str | None, reply_to_message_id: int | None,
parse_mode: str,
) -> NotificationResult:
"""Send one ``_MediaItem`` as a standalone sendPhoto/sendVideo/sendDocument.
Used as the per-item fallback when sendMediaGroup itself
rejects a sub-chunk after pre-flight passed. Reuses already-
fetched bytes for fresh items; for cache-hit items that fail
the file_id POST, re-downloads from ``source_url`` so a stale
``file_id`` doesn't silently lose an asset — the original
single-item path does the same recovery.
"""
media_type = item.media_json.get("type") or "photo"
if media_type == "photo":
kind = _PHOTO_KIND
elif media_type == "video":
kind = _VIDEO_KIND
else:
kind = _DOCUMENT_KIND
cache: TelegramFileCache | None = None
cache_key: str | None = None
thumbhash: str | None = None
if item.cache_info is not None:
ck, _ck_type, ck_thumb, _ck_size = item.cache_info
cache = self._get_cache_for_key(ck)
cache_key = ck
thumbhash = ck_thumb
# Cached items have no attachment bytes — POST the file_id
# reference first; if that fails transiently, re-download via
# source_url and upload fresh. This matches what _send_photo /
# _send_video do for their cache path.
if item.attachment is None:
file_id = item.media_json.get("media", "")
if file_id and not file_id.startswith("attach://"):
cached_result = await self._send_from_cache(
kind, chat_id, file_id, caption, reply_to_message_id, parse_mode,
)
if cached_result is not None:
return cached_result
if not item.source_url:
return {"success": False, "error": "Cached fallback send failed (no source URL)"}
data, err = await self._safe_get(
self._resolve_url(item.source_url), item.download_headers,
)
if data is None:
return {"success": False, "error": f"Re-download failed: {err}"}
return await self._upload_media(
kind, chat_id, data,
kind.default_filename, kind.default_content_type,
caption, reply_to_message_id, parse_mode,
cache, cache_key, thumbhash,
)
_, data, filename, content_type = item.attachment
return await self._upload_media(
kind, chat_id, data, filename, content_type,
caption, reply_to_message_id, parse_mode,
cache, cache_key, thumbhash,
)
@staticmethod
def _partition_media_by_kind(
@@ -830,23 +1197,40 @@ class TelegramClient:
self,
chunk: list[dict[str, Any]],
max_asset_data_size: int | None,
first_caption: str | None,
parse_mode: str,
) -> list[_MediaItem]:
send_large_videos_as_documents: bool = False,
) -> tuple[list[_MediaItem], list[_MediaItem]]:
"""Fetch + filter a chunk and return aligned media-group items.
Returns ``(items, deferred_documents)`` — ``items`` go into
sendMediaGroup, ``deferred_documents`` are oversized videos
retagged as documents (when the caller opted in) that will be
sent individually via ``_send_item_individually`` *after* the
group sends. Telegram rejects mixing documents with photo/video
in one group, so they have to ride out separately.
Concurrency is bounded by ``_MEDIA_FETCH_CONCURRENCY`` so peak
memory stays predictable. Per-fetch exceptions are isolated via
``return_exceptions=True`` so a single failed download cannot
cancel its peers.
Caption injection is intentionally NOT performed here — callers
attach the caption after byte-budget sub-splitting so it lands
on the first item of the first delivered sub-chunk.
"""
sem = asyncio.Semaphore(_MEDIA_FETCH_CONCURRENCY)
async def fetch(idx: int, item: dict[str, Any]) -> tuple[int, dict | None, bytes | None]:
async def fetch(
idx: int, item: dict[str, Any],
) -> tuple[int, dict | None, bytes | None, bool]:
"""Returns ``(idx, cached_entry, data, defer_as_document)``.
``defer_as_document=True`` signals "video bytes valid but
too big for sendVideo — caller should send as document".
"""
url = item.get("url")
if not url:
_LOGGER.warning("Media skipped: missing url (idx=%d type=%s)", idx, item.get("type"))
return idx, None, None
return idx, None, None, False
media_type = item.get("type", "photo")
custom_cache_key = item.get("cache_key")
@@ -860,7 +1244,7 @@ class TelegramClient:
)
cached = item_cache.get(ck, thumbhash=item_thumbhash) if item_cache else None
if cached and cached.get("file_id"):
return idx, cached, None
return idx, cached, None, False
preloaded = item.get("data")
data: bytes | None
@@ -874,34 +1258,40 @@ class TelegramClient:
"Media skipped: download failed (idx=%d type=%s): %s",
idx, media_type, err,
)
return idx, None, None
return idx, None, None, False
if max_asset_data_size and len(data) > max_asset_data_size:
_LOGGER.warning(
"Media skipped: size %d exceeds max_asset_data_size %d (idx=%d type=%s)",
len(data), max_asset_data_size, idx, media_type,
)
return idx, None, None
return idx, None, None, False
if media_type == "video" and len(data) > TELEGRAM_MAX_VIDEO_SIZE:
if send_large_videos_as_documents:
_LOGGER.info(
"Video %d bytes over Telegram limit (idx=%d) — deferring as document",
len(data), idx,
)
return idx, None, data, True
_LOGGER.warning(
"Media skipped: video %d bytes exceeds Telegram limit %d (idx=%d)",
len(data), TELEGRAM_MAX_VIDEO_SIZE, idx,
)
return idx, None, None
return idx, None, None, False
if media_type == "photo":
exceeds, reason, _, _ = check_photo_limits(data)
if exceeds:
_LOGGER.warning(
"Media skipped: photo %s (idx=%d)", reason, idx,
)
return idx, None, None
return idx, None, data
return idx, None, None, False
return idx, None, data, False
raw = await asyncio.gather(
*(fetch(i, item) for i, item in enumerate(chunk)),
return_exceptions=True,
)
results: list[tuple[int, dict | None, bytes | None]] = []
results: list[tuple[int, dict | None, bytes | None, bool]] = []
for entry in raw:
if isinstance(entry, Exception):
_LOGGER.warning("Media fetch raised: %s", redact_exc(entry))
@@ -909,8 +1299,9 @@ class TelegramClient:
results.append(entry)
items: list[_MediaItem] = []
deferred_documents: list[_MediaItem] = []
upload_idx = 0
for idx, cached_entry, data in results:
for idx, cached_entry, data, defer_as_document in results:
item = chunk[idx]
url = item.get("url")
if not url:
@@ -918,6 +1309,35 @@ class TelegramClient:
media_type = item.get("type") or "photo"
custom_cache_key = item.get("cache_key")
# Deferred videos-as-documents are NEVER cache hits (the
# cache lookup branch returns early before the size check),
# so we always have fresh bytes here. Retag the
# media_json so ``_send_item_individually`` routes via
# ``_DOCUMENT_KIND`` to /sendDocument.
if defer_as_document and data is not None:
ct = item.get("content_type") or "video/mp4"
# Best-effort filename preserves the original
# extension so Telegram clients give it a sensible
# icon and the recipient can re-open it.
fname = url.split("/")[-1].split("?")[0] or "video.mp4"
if "." not in fname:
fname = "video.mp4"
ck = custom_cache_key or extract_asset_id_from_url(url) or url
ck_is_asset = is_asset_cache_key(ck)
bare_ck = asset_id_from_cache_key(ck) if ck_is_asset else ck
th = (
self._thumbhash_resolver(bare_ck)
if ck_is_asset and self._thumbhash_resolver else None
)
deferred_documents.append(_MediaItem(
media_json={"type": "document", "media": "attach://deferred"},
cache_info=(ck, "document", th, len(data)),
attachment=("deferred", data, fname, ct),
source_url=url,
download_headers=item.get("headers"),
))
continue
if cached_entry and cached_entry.get("file_id"):
mij: dict[str, Any] = {"type": media_type, "media": cached_entry["file_id"]}
cache_info: tuple[str, str, str | None, int] | None = None
@@ -940,14 +1360,14 @@ class TelegramClient:
else:
continue
if first_caption and not items:
# Only the first usable item in the first chunk receives
# the caption, per Telegram's media-group semantics.
mij["caption"] = _truncate(first_caption, TELEGRAM_MAX_CAPTION_LENGTH)
mij["parse_mode"] = parse_mode
items.append(_MediaItem(media_json=mij, cache_info=cache_info, attachment=attachment))
return items
items.append(_MediaItem(
media_json=mij,
cache_info=cache_info,
attachment=attachment,
source_url=url,
download_headers=item.get("headers"),
))
return items, deferred_documents
async def _post_media_group(
self,
@@ -973,6 +1393,7 @@ class TelegramClient:
for name, payload, filename, ct in attachments:
f.add_field(name, payload, filename=filename, content_type=ct)
f.add_field("media", json.dumps(media_json))
_apply_send_opts_to_form(f)
return f
for attempt in range(1, _TG_429_MAX_ATTEMPTS + 1):
@@ -13,6 +13,11 @@ _LOGGER = logging.getLogger(__name__)
TELEGRAM_API_BASE_URL: Final = "https://api.telegram.org/bot"
TELEGRAM_MAX_PHOTO_SIZE: Final = 10 * 1024 * 1024 # 10 MB
TELEGRAM_MAX_VIDEO_SIZE: Final = 50 * 1024 * 1024 # 50 MB
# Telegram's sendMediaGroup envelope tops out near 50 MB total (multipart
# bytes including form overhead). 45 MB keeps a safety margin so we don't
# eat 413s when the per-item budget admits items that, summed, would
# bust Telegram's request cap.
TELEGRAM_MAX_GROUP_TOTAL_BYTES: Final = 45 * 1024 * 1024 # 45 MB
TELEGRAM_MAX_DIMENSION_SUM: Final = 10000
# Telegram message-text limit (sendMessage) and caption limit
# (sendPhoto/sendVideo/sendDocument/first item of sendMediaGroup).
@@ -126,36 +131,6 @@ def build_telegram_asset_entry(
return entry
def split_media_by_upload_size(
media_items: list[tuple], max_upload_size: int
) -> list[list[tuple]]:
"""Split media items into sub-groups respecting upload size limit."""
if not media_items:
return []
groups: list[list[tuple]] = []
current_group: list[tuple] = []
current_size = 0
for item in media_items:
media_ref = item[1]
is_cached = item[4]
item_size = 0 if is_cached else (len(media_ref) if isinstance(media_ref, bytes) else 0)
if current_group and current_size + item_size > max_upload_size:
groups.append(current_group)
current_group = []
current_size = 0
current_group.append(item)
current_size += item_size
if current_group:
groups.append(current_group)
return groups
def check_photo_limits(
data: bytes,
) -> tuple[bool, str | None, int | None, int | None]:
@@ -315,6 +315,63 @@ async def clear_telegram_cache(
return result
class DiagnosticActivateBody(BaseModel):
module: str
duration_minutes: int = 30
@router.get("/diagnostic-mode")
async def list_diagnostic_overrides(
user: User = Depends(require_admin),
):
"""List currently-active temporary DEBUG overrides + their countdown.
Drives the dashboard panel that lets admins toggle a module to DEBUG
for a bounded window with auto-revert.
"""
from ..services.diagnostic_mode import list_active
return {"active": list_active()}
@router.post("/diagnostic-mode")
async def activate_diagnostic_override(
body: DiagnosticActivateBody,
user: User = Depends(require_admin),
):
"""Flip ``module`` to DEBUG and schedule an auto-revert.
Re-activating an already-active module replaces the prior schedule.
Returns the new entry shape so the UI can render countdown without
a follow-up GET. The service module reads the current ``log_levels``
setting at activation and at revert so an admin who edits overrides
mid-window doesn't see a stale baseline restored.
"""
from ..services.diagnostic_mode import set_diagnostic
try:
entry = await set_diagnostic(body.module, body.duration_minutes)
except ValueError as err:
raise HTTPException(status_code=400, detail=str(err)) from err
return entry
@router.delete("/diagnostic-mode/{module:path}")
async def revert_diagnostic_override(
module: str,
user: User = Depends(require_admin),
):
"""Manually revert a single module before its window ends.
Returns 404 when no override was active so the caller can fall through
to a friendly "nothing to revert" UX without parsing booleans.
"""
from ..services.diagnostic_mode import revert_diagnostic
if not await revert_diagnostic(module):
raise HTTPException(
status_code=404, detail=f"No active override for {module!r}",
)
return {"reverted": module}
@router.get("/locales")
async def get_supported_locales(
user: User = Depends(get_current_user),
@@ -13,6 +13,7 @@ from jinja2.sandbox import SandboxedEnvironment
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from notify_bridge_core.log_context import enrich_details_with_correlation
from notify_bridge_core.notifications.telegram.client import TelegramClient
from ..database.engine import get_engine
from ..database.models import (
@@ -347,7 +348,7 @@ async def _log_command_event(
collection_id=str(chat_id),
collection_name=_format_command_subject(cmd, args),
assets_count=media_total,
details=details,
details=enrich_details_with_correlation(details),
))
await session.commit()
except Exception: # noqa: BLE001 — diagnostic only, never block reply
@@ -1,6 +1,7 @@
"""Notify Bridge Server — FastAPI application entry point."""
import logging
import uuid
from contextlib import asynccontextmanager
from fastapi import FastAPI
@@ -8,6 +9,11 @@ from fastapi.middleware.cors import CORSMiddleware
from slowapi import _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.requests import Request as StarletteRequest
from starlette.responses import Response as StarletteResponse
from notify_bridge_core.log_context import bind_log_context
from .config import settings as _log_cfg
from .logging_setup import setup_logging
@@ -163,6 +169,16 @@ async def lifespan(app: FastAPI):
_READY = False
from .services.ha_subscription import stop_all as stop_ha_subscriptions
await stop_ha_subscriptions()
# Restore the DB-configured baseline level for any temporary DEBUG
# overrides before the engine is disposed — so even a forced restart
# leaves the world tidy and doesn't leak DEBUG state into the next
# process (which would also be wiped by setup_logging() at boot, but
# being explicit about shutdown is cheaper than relying on a re-init).
from .services.diagnostic_mode import revert_all as revert_diagnostics
try:
await revert_diagnostics()
except Exception: # pragma: no cover — never block shutdown on this.
_LOGGER.exception("Failed to revert diagnostic overrides during shutdown")
scheduler = get_scheduler()
if scheduler.running:
scheduler.shutdown(wait=True)
@@ -178,9 +194,55 @@ _APP_VERSION = _resolve_version()
app = FastAPI(title="Notify Bridge", version=_APP_VERSION, lifespan=lifespan)
# --- Security headers ---
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request as StarletteRequest
from starlette.responses import Response as StarletteResponse
# Bounded character set for accepted inbound X-Request-Id values. Anything
# outside this is replaced with a server-generated id so a malicious header
# can't smuggle CR/LF into log lines or break grep-by-field parsing.
# ``:`` is intentionally excluded so an inbound value can't masquerade as a
# server-minted ``disp:<hex>`` / ``req:<hex>`` id and confuse operator greps.
_REQUEST_ID_MAX_LEN = 64
_REQUEST_ID_ALLOWED = set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
)
def _normalize_request_id(raw: str | None) -> str:
if not raw:
return f"req:{uuid.uuid4().hex[:12]}"
raw = raw.strip()
if not raw or len(raw) > _REQUEST_ID_MAX_LEN:
return f"req:{uuid.uuid4().hex[:12]}"
if not all(c in _REQUEST_ID_ALLOWED for c in raw):
return f"req:{uuid.uuid4().hex[:12]}"
return raw
class RequestContextMiddleware(BaseHTTPMiddleware):
"""Bind a per-request ``request_id`` ContextVar and echo it back.
Reads ``X-Request-Id`` from the inbound request (so an upstream proxy
with its own correlation system can propagate its id), falling back to
a short random ``req:<12 hex>`` value. Always sets the same id on the
response ``X-Request-Id`` header so the SPA can surface it for
operator-friendly bug reports.
Bound via :func:`bind_log_context` so the id appears on every log line
emitted during request handling (``[req=...]``) and is picked up by
:func:`notify_bridge_core.log_context.enrich_details_with_correlation`
when an ``EventLog`` row is written during the same request.
"""
async def dispatch(
self,
request: StarletteRequest,
call_next: RequestResponseEndpoint,
) -> StarletteResponse:
req_id = _normalize_request_id(request.headers.get("x-request-id"))
with bind_log_context(request_id=req_id):
response: StarletteResponse = await call_next(request)
response.headers["X-Request-Id"] = req_id
return response
_CSP = (
@@ -238,6 +300,12 @@ app.add_middleware(
allow_headers=["*"],
)
# Request-ID middleware is added LAST so it becomes the outermost wrapper —
# every other middleware (CORS, rate limit, security headers) then logs with
# the request_id already bound, and CORS preflight responses also carry the
# X-Request-Id echo header.
app.add_middleware(RequestContextMiddleware)
# Register routes — static paths before parameterized
app.include_router(auth_router)
app.include_router(template_vars_router)
@@ -9,6 +9,11 @@ from typing import Any
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from notify_bridge_core.log_context import (
bind_log_context,
ensure_dispatch_id,
enrich_details_with_correlation,
)
from notify_bridge_core.providers.action_executor import ActionResult
from ..database.engine import get_engine
@@ -27,6 +32,15 @@ async def run_action(
action_id: int, *, trigger: str = "scheduled"
) -> ActionResult:
"""Load an action from DB, execute it, and save the execution log."""
# One dispatch_id per action run so the EventLog row (and any inner log
# lines emitted by the action executor) share a correlation id.
with bind_log_context(dispatch_id=ensure_dispatch_id()):
return await _run_action_impl(action_id, trigger=trigger)
async def _run_action_impl(
action_id: int, *, trigger: str = "scheduled"
) -> ActionResult:
engine = get_engine()
# ------------------------------------------------------------------
@@ -142,7 +156,7 @@ async def run_action(
# without a separate action_name renderer.
collection_name=action.name,
assets_count=action_result.total_items_affected,
details={
details=enrich_details_with_correlation({
"action_type": action.action_type,
"trigger": trigger,
"rules_processed": action_result.rules_processed,
@@ -150,7 +164,7 @@ async def run_action(
"rules_failed": action_result.rules_failed,
"error": action_result.error or "",
"execution_id": execution_id,
},
}),
))
await session.commit()
@@ -33,6 +33,11 @@ from sqlalchemy.orm.attributes import flag_modified
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from notify_bridge_core.log_context import (
bind_log_context,
ensure_dispatch_id,
enrich_details_with_correlation,
)
from notify_bridge_core.models.events import EventType, ServiceEvent
from notify_bridge_core.models.media import MediaAsset, MediaType
from notify_bridge_core.notifications.dispatcher import (
@@ -56,6 +61,7 @@ from .dispatch_helpers import (
load_link_data,
resolve_provider_credential,
)
from .dispatch_summary import summarize_dispatch_results
_LOGGER = logging.getLogger(__name__)
@@ -616,12 +622,12 @@ async def _mark_dropped(
collection_name=payload.get("collection_name", ""),
assets_count=int(payload.get("added_count", 0))
or int(payload.get("removed_count", 0)),
details={
details=enrich_details_with_correlation({
"dispatch_status": "deferred_then_dropped",
"reason": reason,
"original_event_log_id": row.event_log_id,
"provider_type": payload.get("provider_type", ""),
},
}),
))
@@ -644,6 +650,28 @@ async def _process_row(
entry produces its own target_config so a broadcast deferred row fans
out to all current children at drain time.
"""
# Bind a fresh dispatch_id per drained row so the EventLog rows written
# by the success/drop paths AND the inner dispatcher's log lines share
# one id. Each deferred row is a logically separate dispatch attempt.
with bind_log_context(dispatch_id=ensure_dispatch_id()):
await _process_row_impl(
session, row, tracker, provider_id, provider_name,
provider_config, app_tz, link_by_id, dispatcher, stats,
)
async def _process_row_impl(
session: AsyncSession,
row: DeferredDispatch,
tracker: NotificationTracker,
provider_id: int,
provider_name: str,
provider_config: dict[str, Any],
app_tz: str,
link_by_id: dict[int, list[dict[str, Any]]],
dispatcher: NotificationDispatcher,
stats: dict[str, int],
) -> None:
expanded = link_by_id.get(row.link_id)
if not expanded:
# Link removed/disabled between defer and drain.
@@ -735,6 +763,8 @@ async def _process_row(
row.fired_at = datetime.now(timezone.utc)
session.add(row)
summary = summarize_dispatch_results(results)
if success:
stats["fired"] += 1
session.add(EventLog(
@@ -747,14 +777,15 @@ async def _process_row(
collection_id=row.collection_id,
collection_name=event.collection_name,
assets_count=event.added_count or event.removed_count or 0,
details={
details=enrich_details_with_correlation({
"dispatch_status": "delivered_after_quiet_hours",
"original_event_log_id": row.event_log_id,
"deferred_for_seconds": int(
(row.fired_at - row.created_at).total_seconds()
),
"provider_type": event.provider_type.value,
},
"dispatch_summary": summary,
}),
))
else:
stats["dropped"] += 1
@@ -769,12 +800,13 @@ async def _process_row(
collection_id=row.collection_id,
collection_name=event.collection_name,
assets_count=event.added_count or event.removed_count or 0,
details={
details=enrich_details_with_correlation({
"dispatch_status": "deferred_then_failed",
"reason": str(first_err)[:200],
"original_event_log_id": row.event_log_id,
"provider_type": event.provider_type.value,
},
"dispatch_summary": summary,
}),
))
@@ -0,0 +1,381 @@
"""Temporary per-module DEBUG overrides with auto-revert.
The runtime ``apply_log_levels()`` API in ``logging_setup`` already lets
admins flip a module to DEBUG, but the existing path requires editing the
``log_levels`` DB setting and remembering to revert it. Operators end up
either forgetting (leaving DEBUG-flooded logs in production) or never
turning it on (debugging through stderr only).
This module gives the dashboard a cheap toggle: "give me DEBUG for
``notify_bridge_core.notifications.telegram.client`` for 30 minutes"
apply immediately, schedule a one-shot job at ``now + 30 min`` that
reverts to whatever level that module would normally have under the
current DB-configured ``log_levels``.
State is in-memory only. A server restart wipes every active override,
which is the right semantic: ``setup_logging`` re-applies the
DB-configured baseline at boot, so a forgotten override can never
silently carry across a deploy. The lifespan shutdown also calls
:func:`revert_all` to cleanly restore baselines before the process
exits — useful for hot-reload dev loops where the server restarts in
place.
"""
from __future__ import annotations
import asyncio
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Any
from sqlmodel.ext.asyncio.session import AsyncSession
from ..database.engine import get_engine
from ..logging_setup import (
_NOISY_LIBRARY_DEFAULTS,
parse_level_overrides,
)
_LOGGER = logging.getLogger(__name__)
# Limits picked to match what "an operator clicked this button" looks like.
# One minute is enough to reproduce a single failing dispatch; four hours is
# long enough for a slow-rolling incident without risking a forgotten
# override outliving a workday.
_MIN_DURATION_MINUTES = 1
_MAX_DURATION_MINUTES = 240
# Allowlist of module namespaces an operator can flip. Lets us catch typos
# and blocks ``""`` (root) — flipping the root logger to DEBUG floods
# stderr with stuff the operator probably didn't want (boto3, jinja2,
# every dependency). Anything matching is accepted, anything else is
# rejected with a 400.
_ALLOWED_PREFIXES = (
"notify_bridge_core",
"notify_bridge_server",
"sqlalchemy",
"aiohttp",
"apscheduler",
"urllib3",
"httpx",
"httpcore",
"asyncio",
"PIL",
"uvicorn",
"starlette",
"fastapi",
)
@dataclass(frozen=True)
class _Override:
"""One active DEBUG override.
``baseline_level`` is what the module had at activation time — used
for the dashboard's "→ WARNING" display. The actual revert path
re-reads the current DB-configured ``log_levels`` so a setting change
made *while* the override is active is honored at expiry.
"""
module: str
baseline_level: str
activated_at: datetime
expires_at: datetime
# Module name → active override. Mutated only from the asyncio thread.
_active: dict[str, _Override] = {}
# Strong references for background tasks created via the asyncio-timer
# fallback path. CPython's event loop holds only weak refs, so a task
# without an external retainer can be GC'd before it fires. Tasks are
# discarded automatically when they complete.
_bg_tasks: set[asyncio.Task[None]] = set()
def _is_allowed(module: str) -> bool:
if not module:
return False
return any(module == p or module.startswith(p + ".") for p in _ALLOWED_PREFIXES)
def _normalize_level_name(lvl: int) -> str:
"""Return a canonical string for a logging level code."""
name = logging.getLevelName(lvl)
if isinstance(name, str) and name and not name.startswith("Level "):
return name
return "INFO"
def _walk_dotted(name: str) -> list[str]:
"""Yield ``name`` then progressively shorter dotted prefixes.
``"sqlalchemy.engine.Engine"`` →
``["sqlalchemy.engine.Engine", "sqlalchemy.engine", "sqlalchemy"]``.
Mirrors Python's logger-hierarchy traversal so a sub-logger inherits
its parent's override / noisy default rather than falling through to
the root level.
"""
out = [name]
while "." in name:
name = name.rsplit(".", 1)[0]
out.append(name)
return out
def _baseline_for(module: str, db_log_levels: str | None) -> str:
"""The level ``module`` would have if no diagnostic override were active.
Precedence per dotted-parent walk:
1. Explicit DB ``log_levels`` entry (most specific wins).
2. Curated noisy-library default in ``_NOISY_LIBRARY_DEFAULTS``.
3. Root logger effective level.
"""
overrides = parse_level_overrides(db_log_levels or "")
for candidate in _walk_dotted(module):
if candidate in overrides:
return overrides[candidate]
if candidate in _NOISY_LIBRARY_DEFAULTS:
return _NOISY_LIBRARY_DEFAULTS[candidate]
root_level = logging.getLogger().getEffectiveLevel()
return _normalize_level_name(root_level)
async def _read_db_log_levels() -> str:
"""Snapshot the current ``log_levels`` setting in a short-lived session.
Called at activation AND at revert time so the revert reflects any
setting change made while the override was active. Best-effort: a
DB hiccup degrades to empty (no DB overrides), which makes the
revert use noisy-library defaults — safer than crashing the timer.
"""
try:
from ..api.app_settings import get_setting
async with AsyncSession(get_engine()) as session:
return await get_setting(session, "log_levels") or ""
except Exception: # noqa: BLE001
_LOGGER.debug(
"diagnostic_mode: failed to read log_levels from DB; "
"revert will use noisy-library defaults",
exc_info=True,
)
return ""
def list_active() -> list[dict[str, Any]]:
"""Snapshot the currently active overrides for the dashboard.
Also sweeps any entry whose ``expires_at`` is in the past — protects
against a scheduler misfire that left a ghost row in ``_active``.
"""
now = datetime.now(timezone.utc)
out: list[dict[str, Any]] = []
expired: list[str] = []
for module, ov in _active.items():
if ov.expires_at <= now:
expired.append(module)
continue
out.append({
"module": ov.module,
"baseline_level": ov.baseline_level,
"current_level": "DEBUG",
"activated_at": ov.activated_at.isoformat(),
"expires_at": ov.expires_at.isoformat(),
"remaining_seconds": int((ov.expires_at - now).total_seconds()),
})
for module in expired:
_active.pop(module, None)
return out
def is_active(module: str) -> bool:
ov = _active.get(module)
if ov is None:
return False
return ov.expires_at > datetime.now(timezone.utc)
async def set_diagnostic(
module: str,
duration_minutes: int,
) -> dict[str, Any]:
"""Activate a DEBUG override for ``module`` lasting ``duration_minutes``.
Re-activating an already-active module replaces the prior schedule
(a clicked-twice button extends the window rather than stacking).
Returns the dashboard-ready dict; raises ``ValueError`` on bad input
so the API layer can surface a 400 with a precise message.
"""
if not _is_allowed(module):
raise ValueError(
f"Module {module!r} is not in the diagnostic allowlist",
)
if not (_MIN_DURATION_MINUTES <= duration_minutes <= _MAX_DURATION_MINUTES):
raise ValueError(
f"duration_minutes must be between {_MIN_DURATION_MINUTES} and "
f"{_MAX_DURATION_MINUTES}",
)
db_log_levels = await _read_db_log_levels()
baseline = _baseline_for(module, db_log_levels)
now = datetime.now(timezone.utc)
expires_at = now + timedelta(minutes=duration_minutes)
# Apply DEBUG immediately. ``logging.getLogger(name).setLevel`` is the
# same primitive ``apply_log_levels`` uses, so the two mechanisms stay
# consistent.
logging.getLogger(module).setLevel("DEBUG")
# Replace any prior schedule for this module before recording the new one.
_remove_scheduled(module)
_active[module] = _Override(
module=module,
baseline_level=baseline,
activated_at=now,
expires_at=expires_at,
)
_schedule_revert(module, expires_at)
_LOGGER.info(
"Diagnostic mode: %s set to DEBUG (was %s) for %d min, expires at %s",
module, baseline, duration_minutes, expires_at.isoformat(),
)
return {
"module": module,
"baseline_level": baseline,
"current_level": "DEBUG",
"activated_at": now.isoformat(),
"expires_at": expires_at.isoformat(),
"remaining_seconds": int((expires_at - now).total_seconds()),
}
async def revert_diagnostic(module: str) -> bool:
"""Immediately end the override for ``module``. Returns ``False`` if
no override was active (so callers can return a 404)."""
ov = _active.pop(module, None)
if ov is None:
return False
_remove_scheduled(module)
db_log_levels = await _read_db_log_levels()
target = _baseline_for(module, db_log_levels)
logging.getLogger(module).setLevel(target)
_LOGGER.info(
"Diagnostic mode: %s reverted from DEBUG back to %s (manual)",
module, target,
)
return True
async def revert_all() -> int:
"""Revert every active override. Wired into the lifespan shutdown so a
server stop / hot-reload leaves the world in a clean state. Also
callable from a debug endpoint if we ever add one."""
count = 0
for module in list(_active.keys()):
if await revert_diagnostic(module):
count += 1
return count
# ---------------------------------------------------------------------------
# APScheduler glue — wired here so the API layer doesn't import scheduler.
# ---------------------------------------------------------------------------
_JOB_PREFIX = "diag_revert::"
def _job_id_for(module: str) -> str:
return _JOB_PREFIX + module
def _remove_scheduled(module: str) -> None:
"""Drop a previously-scheduled revert job for ``module``, if any.
Best-effort: scheduler isn't always available in tests; a missing job
is the normal path on first-time activation. Logged at DEBUG so an
operator chasing a scheduler problem still sees the trail.
"""
try:
from .scheduler import get_scheduler
scheduler = get_scheduler()
except Exception: # noqa: BLE001
_LOGGER.debug(
"diagnostic_mode: scheduler not yet available for remove(%s)",
module, exc_info=True,
)
return
job_id = _job_id_for(module)
try:
scheduler.remove_job(job_id)
except Exception: # noqa: BLE001 — JobLookupError or not-running.
_LOGGER.debug(
"diagnostic_mode: no prior schedule to remove for %s",
module, exc_info=True,
)
def _schedule_revert(module: str, when: datetime) -> None:
"""Schedule the auto-revert one-shot.
Falls back to a strongly-referenced ``asyncio`` task if the
APScheduler instance isn't running (tests, very early startup) so the
revert still happens.
"""
try:
from .scheduler import get_scheduler
scheduler = get_scheduler()
if scheduler.running:
scheduler.add_job(
_expire_callback,
trigger="date",
run_date=when,
args=[module],
id=_job_id_for(module),
replace_existing=True,
misfire_grace_time=60,
)
return
except Exception: # noqa: BLE001 — fall through to the task path.
_LOGGER.debug(
"diagnostic_mode: scheduler unavailable; using asyncio fallback",
exc_info=True,
)
# Fallback: in-process timer. Retain the task in a module-level set so
# CPython doesn't GC it before the timer fires.
delay = max(0.0, (when - datetime.now(timezone.utc)).total_seconds())
async def _wait_and_expire() -> None:
try:
await asyncio.sleep(delay)
except asyncio.CancelledError:
return
await _expire_callback(module)
try:
loop = asyncio.get_running_loop()
except RuntimeError:
return
task = loop.create_task(_wait_and_expire())
_bg_tasks.add(task)
task.add_done_callback(_bg_tasks.discard)
async def _expire_callback(module: str) -> None:
"""Fired by the scheduler at ``expires_at``. Re-applies the baseline.
Re-reads ``log_levels`` from the DB so a setting change made while
the window was active is honored at revert time (instead of using a
stale snapshot taken at activation).
"""
ov = _active.pop(module, None)
db_log_levels = await _read_db_log_levels()
target = _baseline_for(module, db_log_levels)
logging.getLogger(module).setLevel(target)
_LOGGER.info(
"Diagnostic mode: %s auto-reverted from DEBUG to %s (was active=%s)",
module, target, ov is not None,
)
@@ -0,0 +1,255 @@
"""Aggregate per-target dispatch results into an ``EventLog.details`` summary.
Every dispatch site (``event_dispatch``, ``watcher``, ``deferred_dispatch``,
``scheduled_dispatch``) calls :func:`NotificationDispatcher.dispatch` and
gets back a ``list[dict]`` — one entry per target. Each entry has at minimum
``success: bool`` and (on failure) ``error: str``. Telegram media-group
sends additionally include ``delivered_count``, ``skipped_count``,
``failed_count``, ``errors`` and ``failed_at_chunk`` so a partial delivery
is observable from the result.
Historically the dashboard only saw the per-row ``status`` derived at
EventLog insert time — partial failures (one target out of three failed,
two assets out of ten dropped) showed up as a generic success/failure and
the operator had to read stderr to find the cause. This module collapses
the per-target dicts into a small ``dispatch_summary`` block that's merged
into ``EventLog.details`` after the dispatch completes, so the same
information surfaces in the UI without re-reading logs.
"""
from __future__ import annotations
import asyncio
import logging
from typing import Any
from sqlalchemy.orm.attributes import flag_modified
from sqlmodel.ext.asyncio.session import AsyncSession
from ..database.models import EventLog
_LOGGER = logging.getLogger(__name__)
# Bound the error list we stash on the row. A pathological dispatch (50
# targets, 50 media items each, all failing) would otherwise bloat the
# row past anything useful — and the dashboard renders a fixed-height
# strip anyway. Excess entries are summarized as ``errors_truncated``.
_MAX_ERRORS = 20
_MAX_MEDIA_ERRORS = 20
# Cap error message length to avoid pathological payloads in the row.
_MAX_ERROR_MSG_LEN = 500
# Distinct sentinel so an operator scanning the dashboard can tell our
# clipping apart from a literal ``…`` that often appears in upstream API
# error text (Telegram does this in some Bad Request messages).
_TRUNCATION_MARKER = "…[truncated]"
def _trim(value: Any) -> Any:
"""Truncate string values to keep the persisted summary bounded."""
if isinstance(value, str) and len(value) > _MAX_ERROR_MSG_LEN:
return value[:_MAX_ERROR_MSG_LEN] + _TRUNCATION_MARKER
return value
def summarize_dispatch_results(
results: list[dict[str, Any]],
) -> dict[str, Any]:
"""Aggregate per-target dispatch results into a compact summary dict.
The shape is intentionally narrow so it round-trips cleanly through
SQLite JSON storage and stays cheap to render in the dashboard.
Returns a dict with keys:
* ``targets_attempted`` / ``targets_succeeded`` / ``targets_failed``
— counts across the results list.
* ``errors`` — per-target failure entries
(``[{index, error}, ...]``), capped at ``_MAX_ERRORS``.
* ``media`` — present only when at least one result reports media
counts. ``{delivered, skipped, failed}``.
* ``media_errors`` — per-item / per-chunk failure entries from the
Telegram media-group fallback, capped at ``_MAX_MEDIA_ERRORS``.
* ``errors_truncated`` / ``media_errors_truncated`` — count of dropped
entries when the corresponding cap was hit. Present only when > 0.
Input shape: each entry is what ``NotificationDispatcher._aggregate_results``
returns for one target — ``{success, receivers, successes, failures,
results: [per-receiver, ...], errors?, error?}``. Media counts live
on each per-receiver dict under ``media_delivered_count`` /
``media_skipped_count`` / ``media_failed_count`` / ``media_errors``,
so the walk drills one level deeper than the obvious top-level reads.
For backward compat with simpler call sites that pass a single leaf
dict (the Telegram media-group result directly), the leaf shape is
accepted as a fallback when ``results`` is absent.
"""
if not results:
# Empty results = nothing to summarize. Returning ``{}`` lets the
# callers' ``if summary`` / ``if results`` guards keep the row
# clean rather than stamping a misleading zero-counts block.
return {}
succeeded = 0
failed = 0
errors: list[dict[str, Any]] = []
media_delivered = 0
media_skipped = 0
media_failed = 0
media_errors: list[dict[str, Any]] = []
has_media_counts = False
errors_dropped = 0
media_errors_dropped = 0
for index, result in enumerate(results):
if result.get("success"):
succeeded += 1
else:
failed += 1
if len(errors) < _MAX_ERRORS:
errors.append({
"index": index,
"error": _trim(result.get("error", "unknown")),
})
else:
errors_dropped += 1
# Per-receiver detail is bundled under ``results`` by the
# dispatcher's ``_aggregate_results``. Walk it when present; fall
# back to reading the leaf shape directly so older callers and
# direct-test fixtures keep working.
per_receiver = result.get("results")
leaves: list[dict[str, Any]]
if isinstance(per_receiver, list):
leaves = [r for r in per_receiver if isinstance(r, dict)]
else:
leaves = [result]
for receiver_index, leaf in enumerate(leaves):
# The dispatcher's Telegram path renames the media counters
# to ``media_*`` to disambiguate them from the surrounding
# text-message result. Accept both names so a future provider
# that surfaces top-level counts (single-shot text+media)
# also gets picked up.
d = leaf.get("media_delivered_count")
if d is None:
d = leaf.get("delivered_count")
s = leaf.get("media_skipped_count")
if s is None:
s = leaf.get("skipped_count")
f = leaf.get("media_failed_count")
if f is None:
f = leaf.get("failed_count")
if d is not None or s is not None or f is not None:
has_media_counts = True
media_delivered += int(d or 0)
media_skipped += int(s or 0)
media_failed += int(f or 0)
sub_errors = leaf.get("media_errors") or leaf.get("errors") or []
for sub in sub_errors:
if not isinstance(sub, dict):
# ``_aggregate_results`` populates a string list at
# the target level; only dict entries carry structured
# per-chunk / per-item detail worth keeping here.
continue
if len(media_errors) >= _MAX_MEDIA_ERRORS:
media_errors_dropped += 1
continue
entry: dict[str, Any] = {"target_index": index}
# Only stamp the receiver index when we actually drilled
# into a multi-receiver target — single-leaf fallbacks
# leave the key off so the existing one-target tests
# stay shape-compatible.
if len(leaves) > 1 or isinstance(per_receiver, list):
entry["receiver_index"] = receiver_index
entry.update({k: _trim(v) for k, v in sub.items()})
media_errors.append(entry)
summary: dict[str, Any] = {
"targets_attempted": len(results),
"targets_succeeded": succeeded,
"targets_failed": failed,
}
if errors:
summary["errors"] = errors
if errors_dropped:
summary["errors_truncated"] = errors_dropped
if has_media_counts:
summary["media"] = {
"delivered": media_delivered,
"skipped": media_skipped,
"failed": media_failed,
}
if media_errors:
summary["media_errors"] = media_errors
if media_errors_dropped:
summary["media_errors_truncated"] = media_errors_dropped
return summary
def attach_summary_in_place(
row: EventLog, results: list[dict[str, Any]],
) -> None:
"""Merge a dispatch summary into ``row.details`` before its session commits.
Use when the EventLog row is still attached to a session that has not
yet committed — the caller's session.commit() carries the update.
"""
summary = summarize_dispatch_results(results)
if not summary:
return
details = dict(row.details or {})
# Don't overwrite a summary that a caller / previous pass already
# set explicitly — that's the same "caller wins" rule the correlation
# enricher follows in ``log_context.py``.
if "dispatch_summary" in details:
return
details["dispatch_summary"] = summary
row.details = details
# Identity-changing reassignment above is enough for SQLAlchemy to mark
# the column dirty. ``flag_modified`` is belt-and-suspenders against a
# future refactor that switches this to in-place mutation.
flag_modified(row, "details")
async def record_dispatch_summary_async(
session: AsyncSession,
event_log_id: int | None,
results: list[dict[str, Any]],
) -> None:
"""Best-effort update of an already-committed ``EventLog`` row.
Used by call sites where the row was committed in an earlier
transaction (the polling watcher commits its EventLog rows before
invoking the dispatcher, so we need a follow-up update).
Best-effort: a DB hiccup here must never abort the wider dispatch
flow — the row keeps its prior status / details and the operator
can still trace via stderr (via the ``dispatch_id`` correlation
written at insert time).
"""
if event_log_id is None or not results:
return
summary = summarize_dispatch_results(results)
if not summary:
return
try:
row = await session.get(EventLog, event_log_id)
if row is None:
return
details = dict(row.details or {})
if "dispatch_summary" in details:
return
details["dispatch_summary"] = summary
row.details = details
flag_modified(row, "details")
session.add(row)
await session.commit()
except asyncio.CancelledError:
# Cancellation must propagate so APScheduler can drain shutdown.
# Swallowing it here would pin the task and leave the row in an
# indeterminate state.
raise
except Exception: # noqa: BLE001
_LOGGER.exception(
"Failed to record dispatch_summary on event_log %s", event_log_id,
)
@@ -20,6 +20,11 @@ from typing import Any, Awaitable, Callable
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from notify_bridge_core.log_context import (
bind_log_context,
ensure_dispatch_id,
enrich_details_with_correlation,
)
from notify_bridge_core.models.events import ServiceEvent
from notify_bridge_core.notifications.dispatcher import (
NotificationDispatcher,
@@ -36,6 +41,7 @@ from .dispatch_helpers import (
load_link_data,
resolve_provider_credential,
)
from .dispatch_summary import attach_summary_in_place
_LOGGER = logging.getLogger(__name__)
@@ -141,6 +147,31 @@ async def dispatch_provider_event(
int
Number of successfully dispatched notifications across all trackers.
"""
# Bind a dispatch_id for the whole event so every EventLog row written
# below — and every log line emitted by the inner dispatcher — share the
# same correlation id. The dispatcher's own ``ensure_dispatch_id()`` call
# reuses this id rather than generating its own.
with bind_log_context(dispatch_id=ensure_dispatch_id()):
return await _dispatch_provider_event_impl(
engine, provider_id, provider_name, provider_config,
event, detail_keys, filter_fn,
)
async def _dispatch_provider_event_impl(
engine: Any,
provider_id: int,
provider_name: str,
provider_config: dict[str, Any],
event: ServiceEvent,
detail_keys: tuple[str, ...],
filter_fn: FilterFn,
) -> int:
"""Implementation body for :func:`dispatch_provider_event`.
Split out so the public function can wrap the body in
:func:`bind_log_context` without re-indenting the entire flow.
"""
dispatched = 0
# Drain-scheduling is best-effort: a scheduling failure must not roll
# back the persisted defer rows (startup catch-up re-establishes them).
@@ -188,10 +219,10 @@ async def dispatch_provider_event(
collection_id=event.collection_id,
collection_name=event.collection_name,
assets_count=0,
details={
details=enrich_details_with_correlation({
"provider_type": event.provider_type.value,
**extra_details,
},
}),
)
session.add(event_log_row)
await session.flush()
@@ -294,6 +325,11 @@ async def dispatch_provider_event(
event.provider_type.value != "bridge_self"
)
# Accumulate per-target results across every tracking-config
# group so the EventLog row carries a single ``dispatch_summary``
# covering the full fan-out (not just the last group).
all_results: list[dict[str, Any]] = []
for tc, target_entries in groups.values():
if not target_entries:
continue
@@ -308,6 +344,7 @@ async def dispatch_provider_event(
"Dispatcher raised for tracker %d: %s", tracker.id, err,
)
continue
all_results.extend(results)
for entry, r in zip(target_entries, results):
_, target_id, target_name = entry
if r.get("success"):
@@ -332,6 +369,12 @@ async def dispatch_provider_event(
"bridge_self target-failure emission failed",
)
# Merge the aggregated per-target results onto the EventLog row
# while the session still owns it. The commit below carries the
# ``dispatch_summary`` block alongside the row's original fields.
if all_results:
attach_summary_in_place(event_log_row, all_results)
await session.commit()
# Schedule drain jobs OUTSIDE the DB session so an APScheduler hiccup
@@ -28,6 +28,7 @@ from typing import Any
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from notify_bridge_core.log_context import enrich_details_with_correlation
from notify_bridge_core.models.events import ServiceEvent
from notify_bridge_core.providers.home_assistant import (
HomeAssistantAuthError,
@@ -139,11 +140,11 @@ async def _record_ha_status(
collection_id="",
collection_name="",
assets_count=0,
details={
details=enrich_details_with_correlation({
"provider_type": "home_assistant",
"ha_status": state,
"ha_status_detail": detail or "",
},
}),
))
await session.commit()
except Exception: # noqa: BLE001
@@ -29,6 +29,11 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from notify_bridge_core.log_context import (
bind_log_context,
ensure_dispatch_id,
enrich_details_with_correlation,
)
from notify_bridge_core.models.events import EventType
from notify_bridge_core.notifications.dispatcher import (
NotificationDispatcher,
@@ -51,6 +56,7 @@ from .dispatch_helpers import (
load_link_data,
resolve_provider_credential,
)
from .dispatch_summary import summarize_dispatch_results
from .manual_dispatch import build_immich_dispatch_events
_LOGGER = logging.getLogger(__name__)
@@ -135,12 +141,12 @@ async def _log_skip(
collection_id="",
collection_name="",
assets_count=0,
details={
details=enrich_details_with_correlation({
"kind": kind,
"trigger": "cron",
"status": "skipped",
"skip_reason": reason,
},
}),
))
await session.commit()
@@ -164,6 +170,15 @@ async def dispatch_scheduled_for_tracker(
the slot is disabled on the tracker's default tracking config, or no link
has a ``TemplateConfig`` with the corresponding slot row.
"""
# Bind a dispatch_id for the whole cron fire so the EventLog "skipped" /
# "sent" rows AND the inner dispatcher log lines share one correlation id.
with bind_log_context(dispatch_id=ensure_dispatch_id()):
await _dispatch_scheduled_for_tracker_impl(tracker_id, kind)
async def _dispatch_scheduled_for_tracker_impl(
tracker_id: int, kind: ScheduledKind
) -> None:
engine = get_engine()
async with AsyncSession(engine) as session:
tracker = await session.get(NotificationTracker, tracker_id)
@@ -390,6 +405,9 @@ async def dispatch_scheduled_for_tracker(
any_sent = True
successes = sum(1 for r in results if isinstance(r, dict) and r.get("success"))
summary = summarize_dispatch_results(
[r for r in results if isinstance(r, dict)],
)
async with AsyncSession(engine) as session:
session.add(EventLog(
user_id=tracker_user_id,
@@ -401,7 +419,7 @@ async def dispatch_scheduled_for_tracker(
collection_id=event.collection_id,
collection_name=event.collection_name,
assets_count=event.added_count or 0,
details={
details=enrich_details_with_correlation({
"kind": kind,
"slot": slot_name,
"trigger": "cron",
@@ -410,7 +428,8 @@ async def dispatch_scheduled_for_tracker(
"status": "sent",
"targets_dispatched": total_targets,
"targets_succeeded": successes,
},
"dispatch_summary": summary,
}),
))
await session.commit()
@@ -95,6 +95,7 @@ async def send_telegram_media(
chunk_delay: int = 0,
max_asset_data_size: int | None = None,
send_large_photos_as_documents: bool = False,
send_large_videos_as_documents: bool = False,
chat_action: str | None = "typing",
thumbhash_resolver: Callable[[str], str | None] | None = None,
) -> NotificationResult:
@@ -116,6 +117,7 @@ async def send_telegram_media(
chunk_delay=chunk_delay,
max_asset_data_size=max_asset_data_size,
send_large_photos_as_documents=send_large_photos_as_documents,
send_large_videos_as_documents=send_large_videos_as_documents,
chat_action=chat_action,
)
@@ -9,6 +9,11 @@ from typing import Any, Awaitable, Callable
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from notify_bridge_core.log_context import (
bind_log_context,
ensure_dispatch_id,
enrich_details_with_correlation,
)
from notify_bridge_core.models.events import ServiceEvent
from notify_bridge_core.notifications.dispatcher import NotificationDispatcher, TargetConfig
from notify_bridge_core.notifications.telegram.cache import TelegramFileCache
@@ -30,6 +35,7 @@ from .dispatch_helpers import (
load_link_data,
resolve_provider_credential,
)
from .dispatch_summary import record_dispatch_summary_async
_LOGGER = logging.getLogger(__name__)
@@ -262,6 +268,13 @@ _POLL_FACTORIES: dict[str, PollerFactory] = {
async def check_tracker(tracker_id: int) -> dict[str, Any]:
"""Poll a tracker's provider for changes and dispatch notifications."""
# Bind a per-tick dispatch_id so the EventLog row written for each detected
# change carries the same correlation id as the dispatcher's log lines.
with bind_log_context(dispatch_id=ensure_dispatch_id()):
return await _check_tracker_impl(tracker_id)
async def _check_tracker_impl(tracker_id: int) -> dict[str, Any]:
engine = get_engine()
# Load all DB data eagerly before entering aiohttp context
@@ -457,7 +470,7 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
collection_id=event.collection_id,
collection_name=event.collection_name,
assets_count=assets_count,
details=details,
details=enrich_details_with_correlation(details),
)
session.add(log)
await session.flush()
@@ -605,6 +618,10 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
event.provider_type.value != "bridge_self"
)
# Per-event accumulator so the summary write covers every
# tracking-config group, not just the last one.
event_results: list[dict[str, Any]] = []
for tc, target_entries in groups.values():
if not target_entries:
continue
@@ -616,6 +633,7 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
continue
target_configs = [entry[0] for entry in target_entries]
results = await dispatcher.dispatch(shaped_event, target_configs)
event_results.extend(results)
for entry, r in zip(target_entries, results):
_, target_id, target_name = entry
if r.get("success"):
@@ -637,6 +655,15 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
"bridge_self target-failure emission failed",
)
# The EventLog row was committed in the earlier session block
# so we run a tiny follow-up UPDATE in a fresh session. Best-
# effort: a failure here logs but does not abort the watcher.
if event_log_id is not None and event_results:
async with AsyncSession(engine) as summary_session:
await record_dispatch_summary_async(
summary_session, event_log_id, event_results,
)
return {
"status": "ok",
"events_detected": len(events),
@@ -0,0 +1,372 @@
"""Temporary per-module DEBUG overrides with auto-revert.
Covers the in-memory service module + a smoke pass over the API layer
using ``dependency_overrides`` to bypass auth. The APScheduler glue is
exercised via the fallback asyncio-timer path since tests run without a
running scheduler.
"""
from __future__ import annotations
import asyncio
import logging
from datetime import datetime, timedelta, timezone
from typing import Any
import pytest
from fastapi.testclient import TestClient
# ---------------------------------------------------------------------------
# Test scaffolding
# ---------------------------------------------------------------------------
def _reset_state() -> None:
"""Clear the module-level ``_active`` dict between tests so prior
activations don't bleed across cases."""
from notify_bridge_server.services import diagnostic_mode as svc
svc._active.clear()
@pytest.fixture(autouse=True)
def _stub_db_read(monkeypatch):
"""Default every test to a fixed empty ``log_levels`` snapshot.
A test that wants to exercise DB-override precedence overrides this
fixture by re-patching the function explicitly.
"""
async def fake() -> str:
return ""
from notify_bridge_server.services import diagnostic_mode as svc
monkeypatch.setattr(svc, "_read_db_log_levels", fake)
def _patch_db_read(monkeypatch, value: str) -> None:
"""Override the auto-applied fixture for a single test that needs a
non-empty ``log_levels`` value."""
async def fake() -> str:
return value
from notify_bridge_server.services import diagnostic_mode as svc
monkeypatch.setattr(svc, "_read_db_log_levels", fake)
# ---------------------------------------------------------------------------
# Unit tests — service module
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_set_diagnostic_applies_debug_immediately(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
module = "notify_bridge_core.notifications.telegram.client"
entry = await set_diagnostic(module, duration_minutes=30)
assert entry["module"] == module
assert entry["current_level"] == "DEBUG"
assert entry["remaining_seconds"] > 60 * 29
assert logging.getLogger(module).level == logging.DEBUG
@pytest.mark.asyncio
async def test_set_diagnostic_rejects_unlisted_module(tmp_data_dir) -> None: # noqa: ARG001
"""Only the documented namespaces should be flippable from the UI."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
with pytest.raises(ValueError, match="allowlist"):
await set_diagnostic("some_random_third_party", 30)
@pytest.mark.asyncio
async def test_set_diagnostic_rejects_root_logger(tmp_data_dir) -> None: # noqa: ARG001
"""The empty string would target root — explicitly disallowed."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
with pytest.raises(ValueError, match="allowlist"):
await set_diagnostic("", 30)
@pytest.mark.asyncio
async def test_set_diagnostic_rejects_unreasonable_durations(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
with pytest.raises(ValueError, match="duration_minutes"):
await set_diagnostic("notify_bridge_core", 0)
with pytest.raises(ValueError, match="duration_minutes"):
await set_diagnostic("notify_bridge_core", 9999)
@pytest.mark.asyncio
async def test_baseline_from_db_override(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""``log_levels`` setting wins over the noisy-library default."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
_patch_db_read(monkeypatch, "sqlalchemy.engine=ERROR")
entry = await set_diagnostic("sqlalchemy.engine", duration_minutes=15)
assert entry["baseline_level"] == "ERROR"
@pytest.mark.asyncio
async def test_baseline_from_noisy_default(tmp_data_dir) -> None: # noqa: ARG001
"""No DB override falls through to the curated noisy-lib quiet list."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
entry = await set_diagnostic("sqlalchemy.engine", duration_minutes=15)
assert entry["baseline_level"] == "WARNING"
@pytest.mark.asyncio
async def test_baseline_prefix_walks_for_submodule(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""A sub-logger like ``sqlalchemy.engine.Engine`` inherits its parent's
noisy-default level (WARNING), not the root INFO."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
entry = await set_diagnostic(
"sqlalchemy.engine.Engine", duration_minutes=15,
)
assert entry["baseline_level"] == "WARNING"
@pytest.mark.asyncio
async def test_baseline_prefix_walks_for_db_override(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""An explicit ``log_levels`` entry covers all sub-loggers below it."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
_patch_db_read(
monkeypatch, "notify_bridge_core.notifications=ERROR",
)
entry = await set_diagnostic(
"notify_bridge_core.notifications.telegram.client",
duration_minutes=15,
)
assert entry["baseline_level"] == "ERROR"
@pytest.mark.asyncio
async def test_set_diagnostic_twice_replaces_schedule(tmp_data_dir) -> None: # noqa: ARG001
"""Clicking the button twice extends, doesn't stack."""
from notify_bridge_server.services.diagnostic_mode import (
list_active, set_diagnostic,
)
_reset_state()
module = "notify_bridge_core"
await set_diagnostic(module, 5)
first_active = list_active()
assert len(first_active) == 1
first_expires = first_active[0]["expires_at"]
# Sleep just long enough to make the timestamps distinct, then re-set.
await asyncio.sleep(0.05)
await set_diagnostic(module, 60)
second_active = list_active()
assert len(second_active) == 1
assert second_active[0]["expires_at"] != first_expires
assert second_active[0]["remaining_seconds"] > 30 * 60
@pytest.mark.asyncio
async def test_manual_revert_restores_baseline(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import (
revert_diagnostic, set_diagnostic,
)
_reset_state()
module = "sqlalchemy.engine"
await set_diagnostic(module, 30)
assert logging.getLogger(module).level == logging.DEBUG
reverted = await revert_diagnostic(module)
assert reverted is True
# noisy-library default is WARNING (30)
assert logging.getLogger(module).level == logging.WARNING
@pytest.mark.asyncio
async def test_revert_reads_db_at_revert_time(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""Editing ``log_levels`` while the override is active is honored when
the revert fires — not the snapshot taken at activation time."""
from notify_bridge_server.services.diagnostic_mode import (
revert_diagnostic, set_diagnostic,
)
_reset_state()
module = "sqlalchemy.engine"
_patch_db_read(monkeypatch, "")
await set_diagnostic(module, 30)
# Operator edits the setting mid-window — bump to ERROR.
_patch_db_read(monkeypatch, "sqlalchemy.engine=ERROR")
assert await revert_diagnostic(module) is True
assert logging.getLogger(module).level == logging.ERROR
@pytest.mark.asyncio
async def test_manual_revert_no_active_returns_false(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import revert_diagnostic
_reset_state()
assert await revert_diagnostic("notify_bridge_core") is False
@pytest.mark.asyncio
async def test_auto_revert_after_window_elapses(tmp_data_dir) -> None: # noqa: ARG001
"""The asyncio-timer fallback fires near ``expires_at`` and restores
the baseline. Uses a sub-second window so the test stays fast.
Bypasses ``set_diagnostic`` (which clamps to minutes) by populating the
``_active`` dict and calling ``_schedule_revert`` directly.
"""
from notify_bridge_server.services import diagnostic_mode as svc
_reset_state()
module = "sqlalchemy.engine"
baseline = svc._baseline_for(module, db_log_levels="")
now = datetime.now(timezone.utc)
expires = now + timedelta(seconds=0.3)
logging.getLogger(module).setLevel("DEBUG")
svc._active[module] = svc._Override(
module=module,
baseline_level=baseline,
activated_at=now,
expires_at=expires,
)
svc._schedule_revert(module, expires)
await asyncio.sleep(0.5)
assert module not in svc._active
assert logging.getLogger(module).level == logging.WARNING
@pytest.mark.asyncio
async def test_fallback_task_retained_until_fire(tmp_data_dir) -> None: # noqa: ARG001
"""The asyncio fallback path must keep a strong reference to its task
so CPython doesn't GC it before the timer fires."""
from notify_bridge_server.services import diagnostic_mode as svc
_reset_state()
when = datetime.now(timezone.utc) + timedelta(seconds=10)
svc._schedule_revert("notify_bridge_core", when)
# The retainer set should hold exactly the task we just queued.
assert len(svc._bg_tasks) == 1
# Cancel it to clean up; the done-callback will drop it.
for task in list(svc._bg_tasks):
task.cancel()
await asyncio.sleep(0)
def test_list_active_omits_and_sweeps_expired(tmp_data_dir) -> None: # noqa: ARG001
"""Expired entries are filtered AND removed so a delayed scheduler
fire doesn't leave ghost rows in ``_active`` forever."""
from notify_bridge_server.services import diagnostic_mode as svc
_reset_state()
past = datetime.now(timezone.utc) - timedelta(minutes=1)
svc._active["sqlalchemy.engine"] = svc._Override(
module="sqlalchemy.engine",
baseline_level="WARNING",
activated_at=past - timedelta(minutes=30),
expires_at=past,
)
assert svc.list_active() == []
assert "sqlalchemy.engine" not in svc._active
@pytest.mark.asyncio
async def test_revert_all_clears_every_override(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import (
list_active, revert_all, set_diagnostic,
)
_reset_state()
await set_diagnostic("notify_bridge_core", 30)
await set_diagnostic("sqlalchemy.engine", 30)
assert len(list_active()) == 2
count = await revert_all()
assert count == 2
assert list_active() == []
# ---------------------------------------------------------------------------
# API smoke — bypasses auth via dependency_overrides
# ---------------------------------------------------------------------------
@pytest.fixture
def _admin_client(tmp_data_dir): # noqa: ARG001
"""Yield a TestClient with ``require_admin`` short-circuited.
Keeps the auth-flow's SQLAlchemy/greenlet issues out of the picture
while still exercising the FastAPI router, path converters, and the
``HTTPException`` paths.
"""
_reset_state()
from notify_bridge_server.auth.dependencies import require_admin
from notify_bridge_server.database.models import User
from notify_bridge_server.main import app
fake = User(
id=1, username="admin",
password_hash="x", role="admin", token_version=0,
)
app.dependency_overrides[require_admin] = lambda: fake
with TestClient(app) as client:
yield client
app.dependency_overrides.pop(require_admin, None)
_reset_state()
def test_api_post_rejects_unlisted_module_with_400(_admin_client: TestClient) -> None:
resp = _admin_client.post(
"/api/settings/diagnostic-mode",
json={"module": "evil.namespace", "duration_minutes": 15},
)
assert resp.status_code == 400
assert "allowlist" in resp.json().get("detail", "")
def test_api_post_rejects_huge_duration_with_400(_admin_client: TestClient) -> None:
resp = _admin_client.post(
"/api/settings/diagnostic-mode",
json={"module": "notify_bridge_core", "duration_minutes": 99999},
)
assert resp.status_code == 400
def test_api_delete_unknown_returns_404(_admin_client: TestClient) -> None:
resp = _admin_client.delete(
"/api/settings/diagnostic-mode/notify_bridge_core",
)
assert resp.status_code == 404
def test_api_delete_handles_dotted_module_path(_admin_client: TestClient) -> None:
"""``{module:path}`` lets dotted names survive URL routing intact."""
target = "notify_bridge_core.notifications.telegram.client"
_admin_client.post(
"/api/settings/diagnostic-mode",
json={"module": target, "duration_minutes": 15},
)
resp = _admin_client.delete(f"/api/settings/diagnostic-mode/{target}")
assert resp.status_code == 200, resp.text
assert resp.json()["reverted"] == target
@@ -0,0 +1,357 @@
"""Aggregation of per-target dispatch results into ``EventLog.details``.
Covers ``summarize_dispatch_results`` and ``attach_summary_in_place``.
The async ``record_dispatch_summary_async`` is exercised through the
in-process update path; the watcher-style flow is covered indirectly via
the full server tests.
"""
from __future__ import annotations
from typing import Any
import pytest
def test_summarize_empty_returns_empty(tmp_data_dir) -> None: # noqa: ARG001
"""Empty results = nothing to summarize. Callers can short-circuit
on the falsy return so a row with zero dispatches doesn't get a
misleading zero-counts block."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
assert summarize_dispatch_results([]) == {}
def test_summarize_all_success_no_errors_block(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results = [
{"success": True, "message_id": 1},
{"success": True, "message_id": 2},
]
summary = summarize_dispatch_results(results)
assert summary["targets_attempted"] == 2
assert summary["targets_succeeded"] == 2
assert summary["targets_failed"] == 0
assert "errors" not in summary
assert "media" not in summary
def test_summarize_mixed_records_only_failures(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results = [
{"success": True},
{"success": False, "error": "Bad Request: chat not found"},
{"success": False, "error": "timeout"},
]
summary = summarize_dispatch_results(results)
assert summary["targets_succeeded"] == 1
assert summary["targets_failed"] == 2
assert summary["errors"] == [
{"index": 1, "error": "Bad Request: chat not found"},
{"index": 2, "error": "timeout"},
]
def test_summarize_media_counts_aggregate(tmp_data_dir) -> None: # noqa: ARG001
"""Media counts from a Telegram media-group success are merged."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results = [
{
"success": True,
"delivered_count": 5,
"skipped_count": 1,
"failed_count": 0,
},
{
"success": True,
"delivered_count": 3,
"skipped_count": 0,
"failed_count": 0,
},
]
summary = summarize_dispatch_results(results)
assert summary["media"] == {"delivered": 8, "skipped": 1, "failed": 0}
def test_summarize_sub_errors_carry_target_index(tmp_data_dir) -> None: # noqa: ARG001
"""Per-chunk/per-item failures from a partial media-group send are flattened."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results = [
{"success": True, "delivered_count": 1, "skipped_count": 0, "failed_count": 0},
{
"success": True, # group landed but with partial failure
"delivered_count": 2,
"skipped_count": 0,
"failed_count": 1,
"errors": [
{"kind": "chunk", "chunk": 1, "error": "Bad Request: ..."},
{"kind": "item", "chunk": 1, "item_index": 2, "error": "media not found"},
],
},
]
summary = summarize_dispatch_results(results)
assert summary["media_errors"] == [
{"target_index": 1, "kind": "chunk", "chunk": 1, "error": "Bad Request: ..."},
{
"target_index": 1,
"kind": "item",
"chunk": 1,
"item_index": 2,
"error": "media not found",
},
]
def test_summarize_caps_errors_and_reports_truncation(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results: list[dict[str, Any]] = [
{"success": False, "error": f"err {i}"} for i in range(25)
]
summary = summarize_dispatch_results(results)
assert len(summary["errors"]) == 20
assert summary["errors_truncated"] == 5
def test_summarize_trims_long_error_messages(tmp_data_dir) -> None: # noqa: ARG001
"""A pathological multi-KB error string is bounded so the row stays small."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
long_err = "x" * 2000
results = [{"success": False, "error": long_err}]
summary = summarize_dispatch_results(results)
persisted = summary["errors"][0]["error"]
assert persisted.endswith("…[truncated]")
# 500 char body + the explicit "…[truncated]" marker.
assert len(persisted) == 500 + len("…[truncated]")
@pytest.mark.asyncio
async def test_attach_summary_in_place_mutates_details_dict(tmp_data_dir) -> None: # noqa: ARG001
"""In-session call merges the summary without losing original keys."""
from notify_bridge_server.database.models import EventLog
from notify_bridge_server.services.dispatch_summary import (
attach_summary_in_place,
)
row = EventLog(
event_type="assets_added",
collection_id="abc",
collection_name="Album",
details={"provider_type": "immich", "added_count": 3},
)
attach_summary_in_place(row, [{"success": True}, {"success": False, "error": "x"}])
assert row.details["provider_type"] == "immich"
assert row.details["added_count"] == 3
assert row.details["dispatch_summary"] == {
"targets_attempted": 2,
"targets_succeeded": 1,
"targets_failed": 1,
"errors": [{"index": 1, "error": "x"}],
}
@pytest.mark.asyncio
async def test_attach_summary_in_place_with_no_results_is_noop(tmp_data_dir) -> None: # noqa: ARG001
"""Empty results → no ``dispatch_summary`` key written. Original
details survive untouched."""
from notify_bridge_server.database.models import EventLog
from notify_bridge_server.services.dispatch_summary import (
attach_summary_in_place,
)
row = EventLog(
event_type="assets_added",
collection_id="abc",
collection_name="Album",
details={"k": "v"},
)
attach_summary_in_place(row, [])
assert row.details == {"k": "v"}
assert "dispatch_summary" not in row.details
def test_summarize_handles_malformed_sub_errors(tmp_data_dir) -> None: # noqa: ARG001
"""A non-dict sub-error entry is silently skipped, not crashed on."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results = [
{
"success": True,
"delivered_count": 1,
"errors": ["not a dict", {"kind": "item", "error": "real"}],
},
]
summary = summarize_dispatch_results(results)
assert summary["media_errors"] == [
{"target_index": 0, "kind": "item", "error": "real"}
]
# ---------------------------------------------------------------------------
# Integration: real dispatcher output shape from ``_aggregate_results``
# ---------------------------------------------------------------------------
#
# The dispatcher wraps each Telegram fan-out in a per-target envelope:
#
# {
# "success": True,
# "receivers": 2,
# "successes": 2,
# "failures": 0,
# "results": [<per-receiver dict>, ...], # ← media counts live HERE
# }
#
# These tests use that exact shape so a future refactor of the dispatcher
# doesn't silently zero out the dashboard's ``dispatch_summary.media``
# block. Earlier versions of this file passed leaf dicts directly, which
# masked the wrong-shape read in production.
def test_summarize_drills_into_aggregated_per_receiver_dicts(tmp_data_dir) -> None: # noqa: ARG001
"""Media counts on per-receiver leaves are summed across receivers."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
# Two targets, each with two Telegram receivers.
results = [
{
"success": True,
"receivers": 2,
"successes": 2,
"failures": 0,
"results": [
{
"success": True,
"message_id": 100,
"media_delivered_count": 5,
"media_skipped_count": 1,
"media_failed_count": 0,
},
{
"success": True,
"message_id": 101,
"media_delivered_count": 3,
"media_skipped_count": 0,
"media_failed_count": 0,
},
],
},
]
summary = summarize_dispatch_results(results)
assert summary["media"] == {"delivered": 8, "skipped": 1, "failed": 0}
def test_summarize_collects_aggregated_media_errors_with_receiver_index(
tmp_data_dir, # noqa: ARG001
) -> None:
"""Per-chunk / per-item media errors carry both target AND receiver index."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results = [
{
"success": True,
"receivers": 1,
"successes": 1,
"failures": 0,
"results": [
{
"success": True,
"message_id": 200,
"media_delivered_count": 2,
"media_failed_count": 1,
"media_errors": [
{"kind": "chunk", "chunk": 1, "error": "Bad Request"},
{"kind": "item", "chunk": 1, "item_index": 2,
"error": "media not found"},
],
},
],
},
]
summary = summarize_dispatch_results(results)
assert summary["media_errors"] == [
{"target_index": 0, "receiver_index": 0, "kind": "chunk",
"chunk": 1, "error": "Bad Request"},
{"target_index": 0, "receiver_index": 0, "kind": "item",
"chunk": 1, "item_index": 2, "error": "media not found"},
]
def test_summarize_aggregated_target_errors_list_is_safely_ignored(
tmp_data_dir, # noqa: ARG001
) -> None:
"""``_aggregate_results`` stamps a flat ``errors: [str, ...]`` at the
target level on failure. The summarizer must not try to treat the
strings as structured sub-errors."""
from notify_bridge_server.services.dispatch_summary import (
summarize_dispatch_results,
)
results = [
{
"success": False,
"receivers": 2,
"successes": 0,
"failures": 2,
"error": "All receivers failed",
"errors": ["chat_not_found", "blocked_by_user"],
"results": [
{"success": False, "error": "chat_not_found"},
{"success": False, "error": "blocked_by_user"},
],
},
]
summary = summarize_dispatch_results(results)
assert summary["targets_failed"] == 1
assert summary["errors"] == [
{"index": 0, "error": "All receivers failed"},
]
# The string list at the target level is ignored — the per-receiver
# errors are already represented by the target-level error message.
assert "media_errors" not in summary
assert "media" not in summary
@pytest.mark.asyncio
async def test_attach_summary_in_place_skips_when_already_set(
tmp_data_dir, # noqa: ARG001
) -> None:
"""Caller-set ``dispatch_summary`` wins — the same "caller pins"
rule that ``enrich_details_with_correlation`` follows."""
from notify_bridge_server.database.models import EventLog
from notify_bridge_server.services.dispatch_summary import (
attach_summary_in_place,
)
row = EventLog(
event_type="assets_added",
collection_id="abc",
collection_name="Album",
details={"dispatch_summary": {"pinned": True}},
)
attach_summary_in_place(row, [{"success": True}])
assert row.details["dispatch_summary"] == {"pinned": True}
@@ -0,0 +1,158 @@
"""Request-ID middleware + EventLog dispatch_id correlation.
Covers two halves of the same correlation story:
* ``RequestContextMiddleware`` generates / accepts an inbound request id,
binds it onto the log-context ContextVar for the duration of the request,
and echoes it back as the ``X-Request-Id`` response header.
* ``enrich_details_with_correlation`` merges the active ``dispatch_id`` and
``request_id`` into an ``EventLog.details`` dict so the persisted row can
be cross-referenced with the stderr log lines emitted during the same
dispatch.
"""
from __future__ import annotations
import re
import pytest
from fastapi.testclient import TestClient
_REQ_ID_PATTERN = re.compile(r"^req:[0-9a-f]{12}$")
def test_response_carries_generated_request_id(tmp_data_dir) -> None: # noqa: ARG001
"""No inbound header → server generates ``req:<12 hex>`` and echoes it."""
from notify_bridge_server.main import app
with TestClient(app) as client:
resp = client.get("/api/health")
assert resp.status_code == 200
req_id = resp.headers.get("X-Request-Id")
assert req_id is not None
assert _REQ_ID_PATTERN.match(req_id), (
f"generated id {req_id!r} should match req:<12 hex>"
)
def test_response_echoes_safe_inbound_request_id(tmp_data_dir) -> None: # noqa: ARG001
"""A well-formed inbound ``X-Request-Id`` is preserved unchanged."""
from notify_bridge_server.main import app
inbound = "abc-123_XYZ_trace"
with TestClient(app) as client:
resp = client.get("/api/health", headers={"X-Request-Id": inbound})
assert resp.status_code == 200
assert resp.headers.get("X-Request-Id") == inbound
def test_colon_prefixed_inbound_id_is_replaced(tmp_data_dir) -> None: # noqa: ARG001
"""``:`` is reserved for server-minted ids — a colon in the inbound value
must trigger replacement so a client can't masquerade as ``disp:...``."""
from notify_bridge_server.main import app
with TestClient(app) as client:
resp = client.get(
"/api/health", headers={"X-Request-Id": "disp:fake12345678"},
)
assert resp.status_code == 200
echoed = resp.headers.get("X-Request-Id", "")
assert echoed != "disp:fake12345678"
assert _REQ_ID_PATTERN.match(echoed)
@pytest.mark.parametrize(
"bad_value",
[
# CRLF injection attempt — would split log lines / inject headers.
"abc\r\ninjected: yes",
# Way too long.
"x" * 256,
# Disallowed characters.
"<script>alert(1)</script>",
# Empty after stripping.
" ",
],
)
def test_unsafe_inbound_request_id_is_replaced(
tmp_data_dir, bad_value: str, # noqa: ARG001
) -> None:
"""An attacker-controlled id must not flow into logs verbatim."""
from notify_bridge_server.main import app
with TestClient(app) as client:
resp = client.get("/api/health", headers={"X-Request-Id": bad_value})
assert resp.status_code == 200
echoed = resp.headers.get("X-Request-Id", "")
assert echoed != bad_value, "unsafe id was passed through unchanged"
assert _REQ_ID_PATTERN.match(echoed), (
f"replacement id {echoed!r} should match req:<12 hex>"
)
def test_enrich_details_merges_active_correlation_ids() -> None:
"""Within a ``bind_log_context`` block, the helper copies the active ids."""
from notify_bridge_core.log_context import (
bind_log_context,
enrich_details_with_correlation,
)
with bind_log_context(
dispatch_id="disp:deadbeef0001",
request_id="req:cafecafe0002",
):
result = enrich_details_with_correlation({"existing": "value"})
assert result == {
"existing": "value",
"dispatch_id": "disp:deadbeef0001",
"request_id": "req:cafecafe0002",
}
def test_enrich_details_does_not_overwrite_explicit_keys() -> None:
"""If the caller pre-set a correlation key, the helper leaves it alone."""
from notify_bridge_core.log_context import (
bind_log_context,
enrich_details_with_correlation,
)
with bind_log_context(dispatch_id="disp:newvalue00001"):
result = enrich_details_with_correlation({"dispatch_id": "disp:pinned"})
assert result["dispatch_id"] == "disp:pinned"
def test_enrich_details_no_context_returns_copy() -> None:
"""Outside any binding, the helper returns the dict unchanged but copied."""
from notify_bridge_core.log_context import enrich_details_with_correlation
original = {"key": "value"}
result = enrich_details_with_correlation(original)
assert result == original
# Mutating the result must not leak into the caller's dict.
result["extra"] = "added"
assert "extra" not in original
def test_enrich_details_handles_none() -> None:
"""``None`` is accepted (callers may build details lazily)."""
from notify_bridge_core.log_context import enrich_details_with_correlation
assert enrich_details_with_correlation(None) == {}
def test_ensure_dispatch_id_generates_or_reuses() -> None:
"""Fresh call produces a new id; inside a bind it returns the bound one."""
from notify_bridge_core.log_context import (
bind_log_context,
ensure_dispatch_id,
)
fresh = ensure_dispatch_id()
assert fresh.startswith("disp:")
assert len(fresh) == len("disp:") + 12
with bind_log_context(dispatch_id="disp:bound00000001"):
assert ensure_dispatch_id() == "disp:bound00000001"
@@ -0,0 +1,511 @@
"""Tests for partial-delivery resilience in TelegramClient._send_media_group.
Covers the three independent failure modes that previously aborted the
whole send:
1. **Per-item oversize** — one item over ``max_asset_data_size`` is
silently dropped; siblings still deliver. ``skipped_count`` reflects
the drop.
2. **Combined chunk over Telegram's byte envelope** — pre-flight splits
into byte-budgeted sub-chunks, avoiding the 413 entirely.
3. **Telegram-side chunk rejection after pre-flight** — fall back to
sending each item individually so partial delivery still happens.
"""
from __future__ import annotations
from typing import Any
from unittest.mock import patch
import aiohttp
import pytest
from aioresponses import aioresponses
from notify_bridge_core.notifications.telegram.client import (
TelegramClient,
_MediaItem,
)
from notify_bridge_core.notifications.telegram.media import (
TELEGRAM_MAX_GROUP_TOTAL_BYTES,
)
BOT_TOKEN = "TEST_TOKEN"
TG = f"https://api.telegram.org/bot{BOT_TOKEN}"
CHAT_ID = "-1001234567890"
# ---------------------------------------------------------------------------
# Pure unit tests for the new helpers
# ---------------------------------------------------------------------------
def _item(upload_bytes: int, media_type: str = "photo") -> _MediaItem:
"""Build a synthetic _MediaItem with the given upload byte cost."""
if upload_bytes == 0:
return _MediaItem(
media_json={"type": media_type, "media": "file_id_cached"},
cache_info=None,
attachment=None,
)
return _MediaItem(
media_json={"type": media_type, "media": "attach://x"},
cache_info=("ck", media_type, None, upload_bytes),
attachment=("x", b"\x00" * upload_bytes, "f.jpg", "image/jpeg"),
)
def test_split_empty_returns_empty() -> None:
assert TelegramClient._split_items_by_byte_budget([], 1000) == []
def test_split_fits_in_single_group() -> None:
items = [_item(10), _item(20), _item(30)]
groups = TelegramClient._split_items_by_byte_budget(items, 100)
assert len(groups) == 1
assert sum(it.upload_bytes for it in groups[0]) == 60
def test_split_packs_greedily_across_budget() -> None:
# Three items @ 40 each, budget 100 → groups of [40,40] and [40].
items = [_item(40), _item(40), _item(40)]
groups = TelegramClient._split_items_by_byte_budget(items, 100)
assert [len(g) for g in groups] == [2, 1]
assert sum(it.upload_bytes for it in groups[0]) == 80
assert sum(it.upload_bytes for it in groups[1]) == 40
def test_split_oversized_single_item_kept_alone() -> None:
# An item that exceeds the budget on its own goes alone — Telegram
# gets to return a precise per-item error instead of silently
# dropping it client-side.
items = [_item(200)]
groups = TelegramClient._split_items_by_byte_budget(items, 100)
assert len(groups) == 1
assert groups[0][0].upload_bytes == 200
def test_split_cached_items_are_free() -> None:
# Cached items contribute 0 bytes — they never force a split.
items = [_item(0), _item(0), _item(0)]
groups = TelegramClient._split_items_by_byte_budget(items, 10)
assert len(groups) == 1
assert len(groups[0]) == 3
def test_split_mixes_cached_and_fresh_correctly() -> None:
# Cached items piggyback freely into whatever group they land in.
items = [_item(40), _item(0), _item(40), _item(0), _item(40)]
groups = TelegramClient._split_items_by_byte_budget(items, 100)
# [40, 0, 40] = 80 bytes (fits), next 0 fits, next 40 starts new.
assert [len(g) for g in groups] == [4, 1]
def test_attach_caption_to_first_idempotent() -> None:
items = [_item(10), _item(10)]
TelegramClient._attach_caption_to_first(items, "Hello", "HTML")
assert items[0].media_json["caption"] == "Hello"
assert items[0].media_json["parse_mode"] == "HTML"
assert "caption" not in items[1].media_json
# Re-attaching overwrites in-place, doesn't duplicate.
TelegramClient._attach_caption_to_first(items, "Bye", "MarkdownV2")
assert items[0].media_json["caption"] == "Bye"
assert items[0].media_json["parse_mode"] == "MarkdownV2"
def test_attach_caption_truncates_to_telegram_limit() -> None:
from notify_bridge_core.notifications.telegram.media import (
TELEGRAM_MAX_CAPTION_LENGTH,
)
items = [_item(10)]
long_caption = "A" * (TELEGRAM_MAX_CAPTION_LENGTH + 500)
TelegramClient._attach_caption_to_first(items, long_caption, "HTML")
assert len(items[0].media_json["caption"]) <= TELEGRAM_MAX_CAPTION_LENGTH
def test_attach_caption_no_items_is_noop() -> None:
TelegramClient._attach_caption_to_first([], "x", "HTML") # must not raise
# ---------------------------------------------------------------------------
# Integration tests for the full _send_media_group flow
# ---------------------------------------------------------------------------
def _png_bytes(size: int) -> bytes:
"""Minimal valid PNG header + pad bytes to reach the requested size.
Required so ``check_photo_limits`` can identify the bytes as an
image rather than rejecting them. The PIL inspection only reads the
header so padding with zeros is harmless.
"""
# 8-byte PNG signature + IHDR chunk for a 1x1 image (zero-padded
# to size). Pillow accepts this enough to read dimensions; the
# remaining bytes after IHDR are treated as trailing garbage.
sig = b"\x89PNG\r\n\x1a\n"
ihdr = bytes.fromhex(
# length=13, type=IHDR, w=1, h=1, depth=8, color=2 (RGB),
# compression=0, filter=0, interlace=0, crc=ignored
"0000000d49484452000000010000000108020000009077"
"53de"
)
base = sig + ihdr
if len(base) >= size:
return base[:size]
return base + b"\x00" * (size - len(base))
async def _build_client(session: aiohttp.ClientSession) -> TelegramClient:
return TelegramClient(session, BOT_TOKEN)
@pytest.mark.asyncio
async def test_oversized_item_skipped_others_delivered() -> None:
"""One item over max_asset_data_size is dropped; siblings still go."""
mock_url_big = "http://assets.test/big.jpg"
mock_url_a = "http://assets.test/a.jpg"
mock_url_b = "http://assets.test/b.jpg"
max_size = 1_000_000 # 1 MB cap
# We pre-load bytes via the asset dict so we don't have to mock the
# asset HTTP server. Telegram side is mocked so sendMediaGroup
# returns a clean 200 with two message IDs.
assets = [
{"type": "photo", "url": mock_url_big, "data": _png_bytes(2_000_000)},
{"type": "photo", "url": mock_url_a, "data": _png_bytes(50_000)},
{"type": "photo", "url": mock_url_b, "data": _png_bytes(50_000)},
]
with aioresponses() as mocked:
mocked.post(
f"{TG}/sendMediaGroup",
payload={
"ok": True,
"result": [
{"message_id": 100, "photo": [{"file_id": "fa"}]},
{"message_id": 101, "photo": [{"file_id": "fb"}]},
],
},
)
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
result = await client._send_media_group(
CHAT_ID, assets, max_asset_data_size=max_size,
)
assert result["success"] is True
assert result["delivered_count"] == 2
assert result["skipped_count"] == 1
assert result["failed_count"] == 0
assert result["message_ids"] == [100, 101]
@pytest.mark.asyncio
async def test_byte_budget_splits_into_sub_chunks() -> None:
"""Three items that combined exceed the byte budget pre-split into 2 calls."""
# Sized so 2 fit (sum < budget) but 3 don't (sum > budget) →
# [2 items, 1 item] split.
per_item = TELEGRAM_MAX_GROUP_TOTAL_BYTES // 3 + 1
# Use generated PNGs so check_photo_limits doesn't reject them as
# malformed; the size doesn't matter for the photo dimension check
# since the PNG header advertises 1x1.
assets = [
{"type": "photo", "url": f"http://t/{i}.jpg", "data": _png_bytes(per_item)}
for i in range(3)
]
calls: list[int] = []
def _ok_response_for_n(n: int) -> dict[str, Any]:
return {
"ok": True,
"result": [
{"message_id": 200 + i, "photo": [{"file_id": f"x{i}"}]}
for i in range(n)
],
}
with aioresponses() as mocked:
# We don't know item count per call up front, so respond with
# 10-item payloads (Telegram ignores trailing IDs we don't use).
mocked.post(
f"{TG}/sendMediaGroup",
payload=_ok_response_for_n(10),
repeat=True,
)
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
# Disable photo limits — large PNG bodies trip dimension
# checks since we pad past the IHDR.
with patch(
"notify_bridge_core.notifications.telegram.client.check_photo_limits",
return_value=(False, None, None, None),
):
result = await client._send_media_group(CHAT_ID, assets)
# Count outbound sendMediaGroup calls via the mock registry.
req_log = mocked.requests
send_calls = [
k for k in req_log if k[1].path.endswith("/sendMediaGroup")
]
assert len(send_calls) >= 1
# At least one call → multiple requests recorded.
for k in send_calls:
calls.append(len(req_log[k]))
assert result["success"] is True
# Pre-split avoided 413 entirely.
assert result["failed_count"] == 0
# The 3 items went out across 2 sub-chunks (2+1).
assert sum(calls) == 2
@pytest.mark.asyncio
async def test_chunk_413_falls_back_to_per_item() -> None:
"""If Telegram 413s a chunk anyway, retry each item individually."""
assets = [
{"type": "photo", "url": f"http://t/{i}.jpg", "data": _png_bytes(50_000)}
for i in range(2)
]
with aioresponses() as mocked:
# The group send fails hard (Telegram-side rejection).
mocked.post(
f"{TG}/sendMediaGroup",
status=413,
payload={"ok": False, "error_code": 413, "description": "Request Entity Too Large"},
)
# Per-item fallback: two sendPhoto calls succeed.
mocked.post(
f"{TG}/sendPhoto",
payload={"ok": True, "result": {"message_id": 300, "photo": [{"file_id": "z0"}]}},
)
mocked.post(
f"{TG}/sendPhoto",
payload={"ok": True, "result": {"message_id": 301, "photo": [{"file_id": "z1"}]}},
)
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
with patch(
"notify_bridge_core.notifications.telegram.client.check_photo_limits",
return_value=(False, None, None, None),
):
result = await client._send_media_group(CHAT_ID, assets)
assert result["success"] is True
assert result["delivered_count"] == 2
assert result["failed_count"] == 0
# We still record the original chunk-level error for diagnostics,
# tagged with kind="chunk" so operators can distinguish cause from
# per-item consequences.
assert result["errors"] is not None
chunk_errors = [e for e in result["errors"] if e.get("kind") == "chunk"]
assert len(chunk_errors) == 1
assert "Request Entity Too Large" in str(chunk_errors[0]["error"])
@pytest.mark.asyncio
async def test_chunk_failure_with_per_item_partial_failure() -> None:
"""Per-item fallback can itself partially fail; we report both."""
assets = [
{"type": "photo", "url": f"http://t/{i}.jpg", "data": _png_bytes(50_000)}
for i in range(2)
]
with aioresponses() as mocked:
mocked.post(
f"{TG}/sendMediaGroup",
status=400,
payload={"ok": False, "error_code": 400, "description": "Bad Request"},
)
# First per-item OK, second fails.
mocked.post(
f"{TG}/sendPhoto",
payload={"ok": True, "result": {"message_id": 400, "photo": [{"file_id": "p0"}]}},
)
mocked.post(
f"{TG}/sendPhoto",
status=400,
payload={"ok": False, "error_code": 400, "description": "PHOTO_INVALID_DIMENSIONS"},
)
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
with patch(
"notify_bridge_core.notifications.telegram.client.check_photo_limits",
return_value=(False, None, None, None),
):
result = await client._send_media_group(CHAT_ID, assets)
# At least one item delivered → overall success.
assert result["success"] is True
assert result["delivered_count"] == 1
assert result["failed_count"] == 1
assert result["message_ids"] == [400]
# The failed item carries its index so operators can correlate
# with the original asset list.
item_errors = [e for e in result["errors"] if e.get("kind") == "item"]
assert len(item_errors) == 1
assert item_errors[0]["item_index"] == 1
@pytest.mark.asyncio
async def test_document_chunk_failure_falls_back_to_sendDocument() -> None:
"""Document items must hit /sendDocument in fallback, not /sendVideo.
Regression guard: an earlier draft routed any non-photo through
_VIDEO_KIND, silently misrouting documents to the video endpoint
where Telegram would reject them with a confusing error.
"""
assets = [
{"type": "document", "url": f"http://t/f{i}.bin", "data": b"\x00" * 50_000}
for i in range(2)
]
with aioresponses() as mocked:
mocked.post(
f"{TG}/sendMediaGroup",
status=400,
payload={"ok": False, "error_code": 400, "description": "Bad Request"},
)
mocked.post(
f"{TG}/sendDocument",
payload={"ok": True, "result": {"message_id": 500, "document": {"file_id": "d0"}}},
)
mocked.post(
f"{TG}/sendDocument",
payload={"ok": True, "result": {"message_id": 501, "document": {"file_id": "d1"}}},
)
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
result = await client._send_media_group(CHAT_ID, assets)
# No /sendVideo or /sendPhoto calls should have been made.
for key in mocked.requests:
assert "/sendVideo" not in key[1].path
assert "/sendPhoto" not in key[1].path
assert result["success"] is True
assert result["delivered_count"] == 2
assert result["message_ids"] == [500, 501]
@pytest.mark.asyncio
async def test_oversized_video_deferred_as_document_when_opted_in() -> None:
"""Oversized videos are sent as documents post-chunk when the flag is set.
Telegram caps sendVideo at 50 MB but accepts up to 2 GB via
sendDocument. With ``send_large_videos_as_documents=True``, an
oversized video should be deferred out of the media group, then
delivered as its own document send instead of being silently
dropped. Other items in the same group must ride through the
normal sendMediaGroup path unaffected.
"""
# 60 MB exceeds the 50 MB sendVideo cap but is under document's 2 GB cap.
oversized_video = b"\x00" * (60 * 1024 * 1024)
assets = [
{"type": "video", "url": "http://t/big.mp4", "data": oversized_video,
"content_type": "video/mp4"},
{"type": "photo", "url": "http://t/a.jpg", "data": _png_bytes(50_000)},
{"type": "photo", "url": "http://t/b.jpg", "data": _png_bytes(50_000)},
]
with aioresponses() as mocked:
# The 2 photos ride out in sendMediaGroup together.
mocked.post(
f"{TG}/sendMediaGroup",
payload={
"ok": True,
"result": [
{"message_id": 700, "photo": [{"file_id": "p0"}]},
{"message_id": 701, "photo": [{"file_id": "p1"}]},
],
},
)
# The deferred video lands as a document after the chunk.
mocked.post(
f"{TG}/sendDocument",
payload={"ok": True, "result": {"message_id": 702, "document": {"file_id": "d0"}}},
)
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
with patch(
"notify_bridge_core.notifications.telegram.client.check_photo_limits",
return_value=(False, None, None, None),
):
result = await client._send_media_group(
CHAT_ID, assets,
send_large_videos_as_documents=True,
)
# sendVideo must NOT have been called — the oversized video
# bypasses sendVideo entirely and goes straight to sendDocument.
for key in mocked.requests:
assert "/sendVideo" not in key[1].path
assert result["success"] is True
assert result["delivered_count"] == 3
assert result["skipped_count"] == 0
assert result["failed_count"] == 0
assert sorted(result["message_ids"]) == [700, 701, 702]
@pytest.mark.asyncio
async def test_oversized_video_skipped_when_flag_off() -> None:
"""Without the opt-in flag, oversized videos are dropped (legacy behavior)."""
oversized_video = b"\x00" * (60 * 1024 * 1024)
assets = [
{"type": "video", "url": "http://t/big.mp4", "data": oversized_video,
"content_type": "video/mp4"},
{"type": "photo", "url": "http://t/a.jpg", "data": _png_bytes(50_000)},
]
with aioresponses() as mocked:
mocked.post(
f"{TG}/sendMediaGroup",
payload={
"ok": True,
"result": [{"message_id": 800, "photo": [{"file_id": "p0"}]}],
},
)
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
with patch(
"notify_bridge_core.notifications.telegram.client.check_photo_limits",
return_value=(False, None, None, None),
):
result = await client._send_media_group(CHAT_ID, assets)
# No sendDocument call either — video is simply dropped.
for key in mocked.requests:
assert "/sendDocument" not in key[1].path
assert result["success"] is True
assert result["delivered_count"] == 1
assert result["skipped_count"] == 1
@pytest.mark.asyncio
async def test_all_items_oversized_returns_failure() -> None:
"""When every asset is filtered before send, success is False."""
assets = [
{"type": "photo", "url": "http://t/big.jpg", "data": _png_bytes(5_000_000)}
for _ in range(2)
]
async with aiohttp.ClientSession() as sess:
client = await _build_client(sess)
# No HTTP mock needed — nothing should reach Telegram.
result = await client._send_media_group(
CHAT_ID, assets, max_asset_data_size=1_000_000,
)
assert result["success"] is False
assert result["delivered_count"] == 0
assert result["skipped_count"] == 2
assert result["failed_count"] == 0
assert "filtered" in result["error"]
@@ -0,0 +1,249 @@
"""Per-send Telegram options (`disable_notification`, `message_thread_id`).
Verifies the ContextVar-based plumbing inside ``TelegramClient`` so the
two new flags actually land in the request payloads at all four send
paths (sendMessage, single-asset send, media-group, cache-hit POST) and
that concurrent ``asyncio.gather`` fan-outs in the dispatcher don't leak
options between tasks.
"""
from __future__ import annotations
import asyncio
import json
from typing import Any
import pytest
from aiohttp import FormData
def test_telegram_receiver_factory_reads_new_fields() -> None:
"""The receiver factory turns config-dict keys into typed fields."""
from notify_bridge_core.notifications.receiver import (
TelegramReceiver, build_receiver,
)
recv = build_receiver(
"telegram",
{
"chat_id": "12345",
"disable_notification": True,
"message_thread_id": "7", # string form, common from JSON UI
},
)
assert isinstance(recv, TelegramReceiver)
assert recv.chat_id == "12345"
assert recv.disable_notification is True
assert recv.message_thread_id == 7
def test_telegram_receiver_factory_defaults_when_missing() -> None:
"""Missing keys default to off / general topic."""
from notify_bridge_core.notifications.receiver import (
TelegramReceiver, build_receiver,
)
recv = build_receiver("telegram", {"chat_id": "12345"})
assert isinstance(recv, TelegramReceiver)
assert recv.disable_notification is False
assert recv.message_thread_id is None
@pytest.mark.parametrize(
"raw_thread, expected",
[
(None, None),
("", None),
("not-a-number", None),
("42", 42),
(42, 42),
# ``0`` is Telegram's "general topic" sentinel — collapse to None
# so the Bot API just omits the field, matching the frontend's
# ``<= 0 → unset`` behaviour.
("0", None),
(0, None),
(-5, None),
# bool would otherwise pass through as int(True)==1 / int(False)==0
# and silently route into topic #1; reject explicitly.
(True, None),
(False, None),
],
)
def test_telegram_receiver_thread_id_coercion(raw_thread: Any, expected: Any) -> None:
from notify_bridge_core.notifications.receiver import build_receiver
recv = build_receiver(
"telegram",
{"chat_id": "1", "message_thread_id": raw_thread},
)
assert recv.message_thread_id == expected # type: ignore[attr-defined]
def test_apply_send_opts_to_payload_merges_when_bound() -> None:
"""Inside ``_bind_send_options``, payload helper writes the two keys."""
from notify_bridge_core.notifications.telegram.client import (
_SendOptions,
_apply_send_opts_to_payload,
_bind_send_options,
)
payload: dict[str, Any] = {"chat_id": "1"}
with _bind_send_options(_SendOptions(disable_notification=True, message_thread_id=7)):
_apply_send_opts_to_payload(payload)
assert payload["disable_notification"] is True
assert payload["message_thread_id"] == 7
def test_apply_send_opts_to_payload_omits_when_default() -> None:
"""No bind = no flags written (Bot API treats omission as default)."""
from notify_bridge_core.notifications.telegram.client import (
_apply_send_opts_to_payload,
)
payload: dict[str, Any] = {"chat_id": "1"}
_apply_send_opts_to_payload(payload)
assert "disable_notification" not in payload
assert "message_thread_id" not in payload
def test_apply_send_opts_to_form_merges_when_bound() -> None:
"""Multipart payload helper writes the two fields when bound."""
from notify_bridge_core.notifications.telegram.client import (
_SendOptions,
_apply_send_opts_to_form,
_bind_send_options,
)
form = FormData()
with _bind_send_options(_SendOptions(disable_notification=True, message_thread_id=42)):
_apply_send_opts_to_form(form)
# aiohttp.FormData stores fields as ``(MultiDict{name, ...}, headers, value)``.
name_to_value = {}
for type_opts, _headers, value in form._fields: # type: ignore[attr-defined]
name_to_value[type_opts.get("name")] = value
assert name_to_value.get("disable_notification") == "true"
assert name_to_value.get("message_thread_id") == "42"
def test_bind_send_options_resets_on_exit() -> None:
"""Token-reset semantics: the var is restored even after a raise."""
from notify_bridge_core.notifications.telegram.client import (
_SendOptions,
_bind_send_options,
_send_options_var,
)
default = _send_options_var.get()
try:
with _bind_send_options(_SendOptions(disable_notification=True)):
raise RuntimeError("boom")
except RuntimeError:
pass
assert _send_options_var.get() == default
@pytest.mark.asyncio
async def test_concurrent_binds_do_not_leak_between_tasks() -> None:
"""Two ``asyncio.gather`` tasks see only their own bound options.
This is the load-bearing invariant for the dispatcher's per-receiver
fan-out: one chat with ``disable_notification=True`` must not silence
a peer chat in the same dispatch.
"""
from notify_bridge_core.notifications.telegram.client import (
_SendOptions,
_apply_send_opts_to_payload,
_bind_send_options,
)
results: list[dict[str, Any]] = []
async def run_with(opts: _SendOptions, label: str) -> None:
payload: dict[str, Any] = {"label": label}
with _bind_send_options(opts):
# Yield to the loop to interleave with the sibling task.
await asyncio.sleep(0)
_apply_send_opts_to_payload(payload)
results.append(payload)
await asyncio.gather(
run_with(_SendOptions(disable_notification=True, message_thread_id=1), "silent"),
run_with(_SendOptions(disable_notification=False, message_thread_id=2), "loud"),
)
by_label = {r["label"]: r for r in results}
assert by_label["silent"].get("disable_notification") is True
assert by_label["silent"].get("message_thread_id") == 1
assert "disable_notification" not in by_label["loud"] # False → omitted
assert by_label["loud"].get("message_thread_id") == 2
@pytest.mark.asyncio
async def test_send_message_passes_options_into_payload(monkeypatch) -> None:
"""``send_message(disable_notification=True, message_thread_id=N)``
surfaces both keys in the JSON request body."""
from notify_bridge_core.notifications.telegram.client import TelegramClient
captured: dict[str, Any] = {}
class _FakeResp:
status = 200
async def json(self) -> dict[str, Any]:
return {"ok": True, "result": {"message_id": 99}}
async def __aenter__(self) -> "_FakeResp":
return self
async def __aexit__(self, *args: Any) -> None:
return None
class _FakeSession:
def post(self, url: str, *, json: dict[str, Any] | None = None, **_kw: Any) -> _FakeResp:
captured["url"] = url
captured["json"] = json
return _FakeResp()
client = TelegramClient(_FakeSession(), "TEST:token") # type: ignore[arg-type]
result = await client.send_message(
chat_id="123",
text="hello",
disable_notification=True,
message_thread_id=5,
)
assert result["success"] is True
payload = captured["json"]
assert payload["disable_notification"] is True
assert payload["message_thread_id"] == 5
@pytest.mark.asyncio
async def test_send_message_without_options_omits_keys(monkeypatch) -> None:
"""Default kwargs leave the payload Bot-API-clean."""
from notify_bridge_core.notifications.telegram.client import TelegramClient
captured: dict[str, Any] = {}
class _FakeResp:
status = 200
async def json(self) -> dict[str, Any]:
return {"ok": True, "result": {"message_id": 1}}
async def __aenter__(self) -> "_FakeResp":
return self
async def __aexit__(self, *args: Any) -> None:
return None
class _FakeSession:
def post(self, url: str, *, json: dict[str, Any] | None = None, **_kw: Any) -> _FakeResp:
captured["json"] = json
return _FakeResp()
client = TelegramClient(_FakeSession(), "TEST:token") # type: ignore[arg-type]
await client.send_message(chat_id="123", text="hello")
payload = captured["json"]
assert "disable_notification" not in payload
assert "message_thread_id" not in payload