feat: observability, per-receiver Telegram options, oversized-video fallback

Operability: - Correlation IDs end-to-end: shared dispatch_id between log lines and EventLog rows (event/watcher/scheduled/deferred/action/HA/command paths) and a new X-Request-Id middleware that normalizes inbound ids and binds request_id into log context. - dispatch_summary block merged into EventLog.details: per-target success/failure counts plus Telegram media delivered/skipped/failed and truncated error lists, so partial outcomes surface in the UI. - Diagnostic mode: admin can flip one module to DEBUG for a bounded window with auto-revert (in-memory only; setup_logging() resets on boot, lifespan reverts on shutdown). New /diagnostic-mode endpoints plus DiagnosticsCassette UI on the settings page. Telegram: - Per-receiver options: disable_notification (silent send) and message_thread_id (forum-topic routing), wired through the dispatcher via a ContextVar so all four send sites (sendMessage / sendPhoto-Video- Document / sendMediaGroup / cache-hit POST) pick them up. - send_large_videos_as_documents target setting: bypass the 50 MB sendVideo cap by falling back to sendDocument for oversized videos. - sendMediaGroup byte-budget enforcement (TELEGRAM_MAX_GROUP_TOTAL_BYTES, 45 MB) with per-item fallback on chunk failure so a stale file_id no longer silently drops a cached asset. Tests: - New: diagnostic_mode, dispatch_summary, request_correlation, telegram_media_group_partial, telegram_per_send_options. Docs: - .claude/reviews/: six-axis production-readiness review of v0.8.1. - .claude/docs/functional-review-2026-05-28.md: focused review of Telegram/Immich/logging subsystems.
2026-05-28 15:19:31 +03:00
parent 85a8f1e71c
commit 6a8f374678
39 changed files with 7239 additions and 142 deletions
@@ -14,6 +14,7 @@ Kept in ``notify_bridge_core`` so core modules (``TelegramClient``,

 from __future__ import annotations

+import uuid
 from contextlib import contextmanager
 from contextvars import ContextVar, Token
 from typing import Any, Iterator
@@ -56,6 +57,22 @@ def bind_log_context(**kwargs: Any) -> Iterator[None]:
            var.reset(tok)


+def ensure_dispatch_id() -> str:
+    """Return the bound ``dispatch_id`` if one is active, else a new one.
+
+    Format matches :class:`NotificationDispatcher.dispatch` (``disp:<12 hex>``)
+    so logs and ``EventLog.details.dispatch_id`` use a single shape. Callers
+    typically wrap a top-level handler with::
+
+        with bind_log_context(dispatch_id=ensure_dispatch_id()):
+            ...
+
+    so nested calls inherit the same id and any ``EventLog`` row written
+    inside the block can be correlated with the dispatcher's log lines.
+    """
+    return dispatch_id_var.get() or f"disp:{uuid.uuid4().hex[:12]}"
+
+
 def current_log_context() -> dict[str, Any]:
    """Return a snapshot of the currently-bound context values (non-None)."""
    snap: dict[str, Any] = {}
@@ -64,3 +81,43 @@ def current_log_context() -> dict[str, Any]:
        if val is not None:
            snap[key] = val
    return snap
+
+
+# Keys copied onto ``EventLog.details`` so an operator can grep stderr for
+# the matching ``disp=``/``req=`` log lines after spotting a row in the UI.
+# Kept narrow on purpose — ``chat_id``/``bot_id``/``command`` are already
+# represented by dedicated EventLog columns.
+_CORRELATION_KEYS = ("dispatch_id", "request_id")
+
+
+def enrich_details_with_correlation(
+    details: dict[str, Any] | None,
+) -> dict[str, Any]:
+    """Return a (shallow) copy of ``details`` with active correlation IDs merged in.
+
+    Use this when constructing an ``EventLog.details`` dict so the persisted
+    row carries the same ``dispatch_id`` / ``request_id`` that the stderr log
+    lines emitted during the same dispatch carry. The mapping makes it
+    possible to jump from a row in the dashboard to the corresponding log
+    lines without server-side correlation.
+
+    Existing keys in ``details`` are NOT overwritten — callers can pin a
+    specific value (e.g. a synthetic dispatch_id for a backfilled row) by
+    setting it themselves before calling.
+
+    The copy is shallow. Nested mutable values (lists, dicts) are shared with
+    the input — fine for the all-scalar dicts every current call site passes,
+    but callers that intend to mutate after this returns should ``deepcopy``
+    themselves.
+    """
+    result: dict[str, Any] = dict(details or {})
+    for key in _CORRELATION_KEYS:
+        if key in result:
+            continue
+        var = _VAR_MAP.get(key)
+        if var is None:
+            continue
+        val = var.get()
+        if val is not None:
+            result[key] = val
+    return result
@@ -5,13 +5,12 @@ from __future__ import annotations
 import asyncio
 import contextlib
 import logging
-import uuid
 from dataclasses import dataclass, field
 from typing import Any, AsyncIterator, Awaitable, Callable, Final

 import aiohttp

-from notify_bridge_core.log_context import bind_log_context, dispatch_id_var
+from notify_bridge_core.log_context import bind_log_context, ensure_dispatch_id
 from notify_bridge_core.models.events import ServiceEvent
 from notify_bridge_core.templates.context import build_template_context
 from notify_bridge_core.templates.renderer import render_template
@@ -132,7 +131,7 @@ class NotificationDispatcher:
        Returns one result per target. Per-target failures are isolated;
        a single bad target cannot poison the batch.
        """
-        new_id = dispatch_id_var.get() or f"disp:{uuid.uuid4().hex[:12]}"
+        new_id = ensure_dispatch_id()

        with bind_log_context(dispatch_id=new_id):
            _LOGGER.info(
@@ -341,6 +340,7 @@ class NotificationDispatcher:
        max_size_mb = target.config.get("max_asset_size")
        max_size_bytes = max_size_mb * 1024 * 1024 if max_size_mb else None
        send_large_as_docs = target.config.get("send_large_photos_as_documents", False)
+        send_large_videos_as_docs = target.config.get("send_large_videos_as_documents", False)

        if not bot_token:
            return {"success": False, "error": "Missing bot_token"}
@@ -392,6 +392,8 @@ class NotificationDispatcher:
                    chat_id=receiver.chat_id,
                    text=message,
                    disable_web_page_preview=bool(disable_preview),
+                    disable_notification=receiver.disable_notification,
+                    message_thread_id=receiver.message_thread_id,
                )
                if not text_result.get("success"):
                    _LOGGER.warning(
@@ -409,22 +411,45 @@ class NotificationDispatcher:
                        chunk_delay=chunk_delay,
                        max_asset_data_size=max_size_bytes,
                        send_large_photos_as_documents=send_large_as_docs,
+                        send_large_videos_as_documents=send_large_videos_as_docs,
                        chat_action=chat_action or None,
+                        disable_notification=receiver.disable_notification,
+                        message_thread_id=receiver.message_thread_id,
                    )
-                    if not media_result.get("success"):
+                    delivered = media_result.get("delivered_count", 0)
+                    skipped = media_result.get("skipped_count", 0)
+                    failed = media_result.get("failed_count", 0)
+                    media_success = media_result.get("success", False)
+                    has_partial_loss = skipped > 0 or failed > 0
+
+                    if not media_success:
                        _LOGGER.warning(
-                            "Text sent OK but media failed for chat %s: %s",
-                            receiver.chat_id, media_result.get("error"),
+                            "Text sent OK but media failed for chat %s "
+                            "(delivered=%d skipped=%d failed=%d): %s",
+                            receiver.chat_id, delivered, skipped, failed,
+                            media_result.get("error"),
                        )
+                    elif has_partial_loss:
+                        _LOGGER.warning(
+                            "Partial media delivery for chat %s "
+                            "(delivered=%d skipped=%d failed=%d)",
+                            receiver.chat_id, delivered, skipped, failed,
+                        )
+
+                    if not media_success or has_partial_loss:
                        # Preserve both outcomes — text succeeded, media
-                        # didn't. Operators losing media-failure detail
-                        # in the result dict made root-cause analysis
+                        # partially or fully didn't. Operators losing
+                        # media-failure detail made root-cause analysis
                        # impossible.
                        return {
                            "success": True,
                            "message_id": text_result.get("message_id"),
                            "media_error": media_result.get("error"),
                            "media_failed_at_chunk": media_result.get("failed_at_chunk"),
+                            "media_delivered_count": delivered,
+                            "media_skipped_count": skipped,
+                            "media_failed_count": failed,
+                            "media_errors": media_result.get("errors"),
                        }
                return text_result

@@ -20,9 +20,21 @@ class Receiver:

@dataclass
 class TelegramReceiver(Receiver):
-    """Telegram chat receiver."""
+    """Telegram chat receiver.
+
+    ``disable_notification`` toggles Telegram's ``disable_notification=true``
+    flag — the message is delivered without an audible / vibration alert.
+    Useful for low-priority chats that the user reads but doesn't want to
+    be paged by.
+
+    ``message_thread_id`` routes the send into a specific forum topic on a
+    supergroup with topics enabled. ``None`` means "general topic" (default
+    Telegram behaviour).
+    """

    chat_id: str = ""
+    disable_notification: bool = False
+    message_thread_id: int | None = None


@dataclass
@@ -80,9 +92,30 @@ def _coerce_int(value: Any, default: int) -> int:
        return default


+def _coerce_telegram_thread_id(value: Any) -> int | None:
+    """Coerce a config value to a positive Telegram forum-topic id.
+
+    The Bot API treats omission, ``0``, and negative values all as
+    "general topic", so we collapse them to ``None`` for consistency
+    with the frontend (which rejects ``<= 0``). Booleans are explicitly
+    rejected so ``int(True) == 1`` doesn't silently route a misconfigured
+    chat into topic #1.
+    """
+    if value is None or value == "" or isinstance(value, bool):
+        return None
+    try:
+        n = int(value)
+    except (TypeError, ValueError):
+        return None
+    return n if n > 0 else None
+
+
 _RECEIVER_FACTORIES: dict[str, _ReceiverFactory] = {
    "telegram": lambda locale, config: TelegramReceiver(
-        locale=locale, config=config, chat_id=str(config.get("chat_id", "")),
+        locale=locale, config=config,
+        chat_id=str(config.get("chat_id", "")),
+        disable_notification=bool(config.get("disable_notification", False)),
+        message_thread_id=_coerce_telegram_thread_id(config.get("message_thread_id")),
    ),
    "webhook": lambda locale, config: WebhookReceiver(
        locale=locale, config=config,
@@ -3,12 +3,14 @@
 from __future__ import annotations

 import asyncio
+import contextlib
 import json
 import logging
 import mimetypes
 import re
+from contextvars import ContextVar
 from dataclasses import dataclass, field
-from typing import Any, Callable, Final
+from typing import Any, Callable, Final, Iterator

 import aiohttp
 from aiohttp import FormData
@@ -19,6 +21,7 @@ from .cache import TelegramFileCache
 from .media import (
    TELEGRAM_API_BASE_URL,
    TELEGRAM_MAX_CAPTION_LENGTH,
+    TELEGRAM_MAX_GROUP_TOTAL_BYTES,
    TELEGRAM_MAX_PHOTO_SIZE,
    TELEGRAM_MAX_TEXT_LENGTH,
    TELEGRAM_MAX_VIDEO_SIZE,
@@ -27,7 +30,6 @@ from .media import (
    extract_asset_id_from_url,
    is_asset_cache_key,
    is_asset_id,
-    split_media_by_upload_size,
 )

 _LOGGER = logging.getLogger(__name__)
@@ -56,6 +58,68 @@ _UPLOAD_TIMEOUT: Final = aiohttp.ClientTimeout(total=120, connect=10)
 _DOWNLOAD_TIMEOUT: Final = aiohttp.ClientTimeout(total=120, connect=10)


+# ---------------------------------------------------------------------------
+# Per-send options (disable_notification, message_thread_id, …)
+# ---------------------------------------------------------------------------
+#
+# These are properties of a single send, not of the bot or the client, and
+# they fan out into the JSON / multipart payload at four different sites
+# (sendMessage, sendPhoto/Video/Document, sendMediaGroup, cache-hit POST).
+# Rather than threading the kwargs through every internal helper, we bind
+# them on a ContextVar inside the public ``send_message`` / ``send_notification``
+# entry points; the payload builders read the var when constructing the
+# request. ContextVar propagation isolates concurrent ``asyncio.gather``
+# fan-outs in the dispatcher (one task per receiver) — each task sees the
+# value its own caller bound.
+
+
+@dataclass(frozen=True)
+class _SendOptions:
+    """Per-send Telegram flags applied to every API call within one send.
+
+    ``disable_notification`` maps to Bot API ``disable_notification=true``
+    — the chat receives the message silently. ``message_thread_id`` routes
+    the message into a specific forum-topic on supergroups with topics
+    enabled; ``None`` means "general topic" (Bot API omits the field).
+    """
+
+    disable_notification: bool = False
+    message_thread_id: int | None = None
+
+
+_send_options_var: ContextVar[_SendOptions] = ContextVar(
+    "_tg_send_options", default=_SendOptions(),
+)
+
+
+@contextlib.contextmanager
+def _bind_send_options(opts: _SendOptions) -> Iterator[None]:
+    """Bind per-send options for the duration of the ``with`` block."""
+    token = _send_options_var.set(opts)
+    try:
+        yield
+    finally:
+        _send_options_var.reset(token)
+
+
+def _apply_send_opts_to_payload(payload: dict[str, Any]) -> None:
+    """Merge the active per-send options into a JSON request body."""
+    opts = _send_options_var.get()
+    if opts.disable_notification:
+        payload["disable_notification"] = True
+    if opts.message_thread_id is not None:
+        payload["message_thread_id"] = opts.message_thread_id
+
+
+def _apply_send_opts_to_form(form: FormData) -> None:
+    """Merge the active per-send options into a multipart form payload."""
+    opts = _send_options_var.get()
+    if opts.disable_notification:
+        form.add_field("disable_notification", "true")
+    if opts.message_thread_id is not None:
+        form.add_field("message_thread_id", str(opts.message_thread_id))
+
+
 def _extract_retry_after(result: dict[str, Any]) -> int | None:
    """Return the retry_after seconds from a Telegram error response.

@@ -135,10 +199,27 @@ class _MediaItem:
    keyed by position. Bundling these together prevents the
    ``media_json`` and ``cache_info`` lists from drifting out of
    alignment under future edits.
+
+    ``source_url`` and ``download_headers`` let the per-item fallback
+    re-download a cache-hit item if its ``file_id`` POST returns
+    transient errors — without them, a stale ``file_id`` would silently
+    lose a cached asset that the original single-item path would have
+    recovered.
    """
    media_json: dict[str, Any]
    cache_info: tuple[str, str, str | None, int] | None
    attachment: tuple[str, bytes, str, str] | None  # (name, data, filename, content_type)
+    source_url: str | None = None
+    download_headers: dict[str, str] | None = None
+
+    @property
+    def upload_bytes(self) -> int:
+        """Bytes this item contributes to a multipart sendMediaGroup payload.
+
+        Cached items (referenced by ``file_id``) contribute 0 since
+        Telegram serves them server-side without us re-uploading.
+        """
+        return len(self.attachment[1]) if self.attachment else 0


 def _truncate(text: str, limit: int, *, marker: str = "…") -> str:
@@ -302,6 +383,7 @@ class TelegramClient:
            payload["caption"] = _truncate(caption, TELEGRAM_MAX_CAPTION_LENGTH)
        if reply_to_message_id is not None:
            payload["reply_parameters"] = {"message_id": reply_to_message_id}
+        _apply_send_opts_to_payload(payload)
        try:
            async with self._session.post(
                self._api_url(kind.api_method), json=payload, timeout=_API_TIMEOUT,
@@ -351,6 +433,7 @@ class TelegramClient:
                f.add_field("caption", capped_caption)
            if reply_to_message_id is not None:
                f.add_field("reply_parameters", json.dumps({"message_id": reply_to_message_id}))
+            _apply_send_opts_to_form(f)
            return f

        for attempt in range(1, _TG_429_MAX_ATTEMPTS + 1):
@@ -415,18 +498,54 @@ class TelegramClient:
        chunk_delay: int = 0,
        max_asset_data_size: int | None = None,
        send_large_photos_as_documents: bool = False,
+        send_large_videos_as_documents: bool = False,
        chat_action: str | None = "typing",
+        *,
+        disable_notification: bool = False,
+        message_thread_id: int | None = None,
    ) -> NotificationResult:
        if not assets:
            return await self.send_message(
                chat_id, caption or "", reply_to_message_id,
                disable_web_page_preview, parse_mode,
+                disable_notification=disable_notification,
+                message_thread_id=message_thread_id,
            )

        keepalive: _KeepaliveHandle | None = None
        if chat_action:
            keepalive = self.start_chat_action_keepalive(chat_id, chat_action)

+        # Bind for the whole media-send fan-out — every internal helper
+        # (_send_photo / _send_video / _send_document / _send_media_group /
+        # _post_media_group / _send_from_cache / _upload_media) reads the
+        # current value when it constructs its request payload.
+        opts = _SendOptions(
+            disable_notification=disable_notification,
+            message_thread_id=message_thread_id,
+        )
+        with _bind_send_options(opts):
+            return await self._send_notification_body(
+                chat_id, assets, caption, reply_to_message_id, parse_mode,
+                max_group_size, chunk_delay, max_asset_data_size,
+                send_large_photos_as_documents, send_large_videos_as_documents,
+                keepalive,
+            )
+
+    async def _send_notification_body(
+        self,
+        chat_id: str,
+        assets: list[dict[str, Any]],
+        caption: str | None,
+        reply_to_message_id: int | None,
+        parse_mode: str,
+        max_group_size: int,
+        chunk_delay: int,
+        max_asset_data_size: int | None,
+        send_large_photos_as_documents: bool,
+        send_large_videos_as_documents: bool,
+        keepalive: _KeepaliveHandle | None,
+    ) -> NotificationResult:
        try:
            if len(assets) == 1 and assets[0].get("type") == "photo":
                return await self._send_photo(
@@ -443,6 +562,7 @@ class TelegramClient:
                    assets[0].get("content_type"), assets[0].get("cache_key"),
                    download_headers=assets[0].get("headers"),
                    preloaded_data=assets[0].get("data"),
+                    send_large_videos_as_documents=send_large_videos_as_documents,
                )
            if len(assets) == 1 and assets[0].get("type", "document") == "document":
                url = assets[0].get("url")
@@ -465,7 +585,7 @@ class TelegramClient:
            return await self._send_media_group(
                chat_id, assets, caption, reply_to_message_id, max_group_size,
                chunk_delay, parse_mode, max_asset_data_size,
-                send_large_photos_as_documents,
+                send_large_photos_as_documents, send_large_videos_as_documents,
            )
        finally:
            await self.stop_keepalive(keepalive)
@@ -477,6 +597,9 @@ class TelegramClient:
        reply_to_message_id: int | None = None,
        disable_web_page_preview: bool | None = None,
        parse_mode: str = "HTML",
+        *,
+        disable_notification: bool = False,
+        message_thread_id: int | None = None,
    ) -> NotificationResult:
        if not text:
            _LOGGER.warning("send_message called with empty text — using placeholder")
@@ -490,7 +613,19 @@ class TelegramClient:
            payload["reply_parameters"] = {"message_id": reply_to_message_id}
        if disable_web_page_preview:
            payload["link_preview_options"] = {"is_disabled": True}
+        # sendMessage is a leaf call — its kwargs go straight into the
+        # JSON body. The ContextVar pattern is reserved for the deeper
+        # media paths (``_upload_media`` / ``_post_media_group`` /
+        # ``_send_from_cache``) that can't easily plumb kwargs through.
+        if disable_notification:
+            payload["disable_notification"] = True
+        if message_thread_id is not None:
+            payload["message_thread_id"] = message_thread_id
+        return await self._post_send_message(payload)

+    async def _post_send_message(
+        self, payload: dict[str, Any],
+    ) -> NotificationResult:
        url = self._api_url("sendMessage")
        try:
            async with self._session.post(url, json=payload, timeout=_API_TIMEOUT) as response:
@@ -651,6 +786,7 @@ class TelegramClient:
        max_asset_data_size: int | None = None, content_type: str | None = None,
        cache_key: str | None = None, download_headers: dict[str, str] | None = None,
        preloaded_data: bytes | None = None,
+        send_large_videos_as_documents: bool = False,
    ) -> NotificationResult:
        if not url:
            return {"success": False, "error": "Missing 'url' for video"}
@@ -672,6 +808,18 @@ class TelegramClient:
        if max_asset_data_size is not None and len(data) > max_asset_data_size:
            return {"success": False, "error": "Video exceeds size limit", "skipped": True}
        if len(data) > TELEGRAM_MAX_VIDEO_SIZE:
+            # Telegram's sendVideo hard-caps at 50 MB. Documents accept
+            # up to 2 GB, so when the operator opts in we deliver the
+            # bytes as a document instead of silently dropping the asset.
+            # Loses inline playback but preserves delivery.
+            if send_large_videos_as_documents:
+                filename = url.split("/")[-1].split("?")[0] or "video.mp4"
+                if "." not in filename:
+                    filename = "video.mp4"
+                return await self._send_document(
+                    chat_id, data, filename, caption, reply_to_message_id,
+                    parse_mode, url, content_type, cache_key,
+                )
            return {
                "success": False,
                "error": f"Video exceeds Telegram's {TELEGRAM_MAX_VIDEO_SIZE // (1024*1024)} MB limit",
@@ -723,6 +871,7 @@ class TelegramClient:
        caption: str | None = None, reply_to_message_id: int | None = None,
        max_group_size: int = 10, chunk_delay: int = 0, parse_mode: str = "HTML",
        max_asset_data_size: int | None = None, send_large_photos_as_documents: bool = False,
+        send_large_videos_as_documents: bool = False,
    ) -> NotificationResult:
        # Telegram rejects mixed photo/video + document in a single
        # sendMediaGroup. Split before chunking so a malformed input
@@ -730,75 +879,293 @@ class TelegramClient:
        partitions = self._partition_media_by_kind(assets)

        all_message_ids: list[int] = []
-        first_chunk_overall = True
+        errors: list[dict[str, Any]] = []
+        delivered = 0
+        skipped = 0
+        failed = 0
+        first_send = True
+        # Oversized videos that the operator wants delivered as
+        # documents. Sent after all media-group chunks finish so
+        # they ride out on their own (Telegram refuses to mix
+        # documents with photo/video in one group).
+        deferred_documents: list[_MediaItem] = []
+        # Caption + reply_to are "spent" on the first send attempt,
+        # mirroring the prior contract. If that first attempt fails
+        # entirely, they're lost — same as before. Tracking these as
+        # standalone flags (rather than deriving from ``chunk_idx==0``)
+        # keeps the semantics right across multiple partitions.
+        caption_pending = bool(caption)
+        reply_pending = reply_to_message_id is not None
+
+        async def maybe_delay() -> None:
+            nonlocal first_send
+            if not first_send and chunk_delay > 0:
+                await asyncio.sleep(chunk_delay / 1000)
+            first_send = False
+
        for partition in partitions:
            chunks = [
                partition[i:i + max_group_size]
                for i in range(0, len(partition), max_group_size)
            ]
            for chunk_idx, chunk in enumerate(chunks):
-                if not first_chunk_overall and chunk_delay > 0:
-                    await asyncio.sleep(chunk_delay / 1000)
-
-                # Single-item chunk → use the simpler send_photo/video path.
-                if len(chunk) == 1:
-                    item = chunk[0]
-                    chunk_caption = caption if first_chunk_overall else None
-                    chunk_reply = reply_to_message_id if first_chunk_overall else None
-                    if item.get("type") == "photo":
-                        result = await self._send_photo(
-                            chat_id, item.get("url"), chunk_caption, chunk_reply, parse_mode,
-                            max_asset_data_size, send_large_photos_as_documents,
-                            item.get("content_type"), item.get("cache_key"),
-                            download_headers=item.get("headers"),
-                            preloaded_data=item.get("data"),
-                        )
-                    elif item.get("type") == "video":
-                        result = await self._send_video(
-                            chat_id, item.get("url"), chunk_caption, chunk_reply, parse_mode,
-                            max_asset_data_size,
-                            item.get("content_type"), item.get("cache_key"),
-                            download_headers=item.get("headers"),
-                            preloaded_data=item.get("data"),
-                        )
-                    else:
-                        first_chunk_overall = False
-                        continue
-                    first_chunk_overall = False
-                    if not result.get("success"):
-                        result["failed_at_chunk"] = chunk_idx + 1
-                        return result
-                    if result.get("message_id") is not None:
-                        all_message_ids.append(result["message_id"])
-                    continue
-
-                items = await self._build_media_items(
-                    chunk, max_asset_data_size, caption if first_chunk_overall else None,
-                    parse_mode,
+                # Fetch + filter the parent chunk. Skipped items
+                # (oversized, bad photo, failed download) never enter
+                # ``items`` — count them so the operator-facing result
+                # reflects what actually went out vs got dropped.
+                # Oversized videos opted into doc-fallback get
+                # deferred — they're delivered (eventually) so they
+                # don't count as skipped.
+                items, chunk_deferred = await self._build_media_items(
+                    chunk, max_asset_data_size, send_large_videos_as_documents,
                )
+                deferred_documents.extend(chunk_deferred)
+                skipped += len(chunk) - len(items) - len(chunk_deferred)
+
                if not items:
                    _LOGGER.warning(
-                        "sendMediaGroup skipped — chunk %d/%d had %d input items but 0 usable (all filtered/failed)",
+                        "sendMediaGroup: chunk %d/%d had %d input items but 0 usable",
                        chunk_idx + 1, len(chunks), len(chunk),
                    )
-                    first_chunk_overall = False
                    continue

-                chunk_msg_ids, chunk_err = await self._post_media_group(
-                    chat_id, items, reply_to_message_id if first_chunk_overall else None,
-                    chunk_idx, len(chunks),
+                # Split the chunk into sub-chunks that each fit under
+                # Telegram's per-request byte cap. Per-item filtering
+                # alone can't prevent 413s when several legal-sized
+                # items together bust the envelope.
+                sub_chunks = self._split_items_by_byte_budget(
+                    items, TELEGRAM_MAX_GROUP_TOTAL_BYTES,
                )
-                first_chunk_overall = False
-                if chunk_err is not None:
-                    return chunk_err
-                all_message_ids.extend(chunk_msg_ids)
+                if len(sub_chunks) > 1:
+                    _LOGGER.info(
+                        "sendMediaGroup: byte-budget split chunk %d/%d into %d sub-chunks",
+                        chunk_idx + 1, len(chunks), len(sub_chunks),
+                    )

-        if not all_message_ids:
-            _LOGGER.warning(
-                "sendMediaGroup completed with 0 message_ids — nothing was delivered",
+                for sub_items in sub_chunks:
+                    await maybe_delay()
+                    sub_caption = caption if caption_pending else None
+                    sub_reply = reply_to_message_id if reply_pending else None
+                    caption_pending = False
+                    reply_pending = False
+                    if sub_caption:
+                        self._attach_caption_to_first(
+                            sub_items, sub_caption, parse_mode,
+                        )
+
+                    msg_ids, err = await self._post_media_group(
+                        chat_id, sub_items, sub_reply, chunk_idx, len(chunks),
+                    )
+                    if err is None:
+                        all_message_ids.extend(msg_ids)
+                        delivered += len(sub_items)
+                        continue
+
+                    # Telegram rejected the sub-chunk after our
+                    # pre-flight passed (content / transient / rate).
+                    # Try each item as its own message so partial
+                    # delivery survives the chunk-level failure.
+                    # Record the chunk-level cause first so the
+                    # operator-visible ``errors`` list reads in
+                    # cause-then-consequence order.
+                    _LOGGER.warning(
+                        "sendMediaGroup chunk %d/%d failed (%s) — falling back to per-item",
+                        chunk_idx + 1, len(chunks), err.get("error"),
+                    )
+                    errors.append({
+                        "kind": "chunk",
+                        "chunk": chunk_idx + 1,
+                        "error": err.get("error", "unknown"),
+                        "code": err.get("error_code"),
+                    })
+                    for item_idx, item in enumerate(sub_items):
+                        item_caption = sub_caption if item_idx == 0 else None
+                        item_reply = sub_reply if item_idx == 0 else None
+                        # No ``maybe_delay()`` here: per-item retries
+                        # are a recovery path where added latency
+                        # only widens the outage window — the
+                        # individual sendPhoto/sendVideo calls have
+                        # their own 429 backoff in ``_upload_media``.
+                        item_result = await self._send_item_individually(
+                            chat_id, item, item_caption, item_reply, parse_mode,
+                        )
+                        if item_result.get("success"):
+                            delivered += 1
+                            mid = item_result.get("message_id")
+                            if mid is not None:
+                                all_message_ids.append(mid)
+                        else:
+                            failed += 1
+                            errors.append({
+                                "kind": "item",
+                                "chunk": chunk_idx + 1,
+                                "item_index": item_idx,
+                                "error": item_result.get("error", "unknown"),
+                            })
+
+        # Deferred oversized-videos-as-documents: send each on its own
+        # via sendDocument. They couldn't ride in the media group
+        # because Telegram refuses to mix document with photo/video,
+        # and per-item failures don't poison siblings.
+        for deferred in deferred_documents:
+            await maybe_delay()
+            d_caption = caption if caption_pending else None
+            d_reply = reply_to_message_id if reply_pending else None
+            caption_pending = False
+            reply_pending = False
+            d_result = await self._send_item_individually(
+                chat_id, deferred, d_caption, d_reply, parse_mode,
            )
-            return {"success": False, "error": "no_items_delivered"}
-        return {"success": True, "message_ids": all_message_ids}
+            if d_result.get("success"):
+                delivered += 1
+                mid = d_result.get("message_id")
+                if mid is not None:
+                    all_message_ids.append(mid)
+            else:
+                failed += 1
+                errors.append({
+                    "kind": "deferred_document",
+                    "error": d_result.get("error", "unknown"),
+                })
+
+        if delivered == 0:
+            if skipped > 0 and not errors:
+                msg = f"all {skipped} item(s) filtered before send"
+            elif errors:
+                msg = errors[0].get("error", "no_items_delivered")
+            else:
+                msg = "no_items_delivered"
+            _LOGGER.warning(
+                "sendMediaGroup delivered 0 items (skipped=%d failed=%d)",
+                skipped, failed,
+            )
+            return {
+                "success": False,
+                "error": msg,
+                "message_ids": [],
+                "delivered_count": 0,
+                "skipped_count": skipped,
+                "failed_count": failed,
+                "errors": errors or None,
+                "failed_at_chunk": errors[0].get("chunk") if errors else None,
+            }
+
+        return {
+            "success": True,
+            "message_ids": all_message_ids,
+            "delivered_count": delivered,
+            "skipped_count": skipped,
+            "failed_count": failed,
+            "errors": errors or None,
+        }
+
+    @staticmethod
+    def _split_items_by_byte_budget(
+        items: list[_MediaItem], max_bytes: int,
+    ) -> list[list[_MediaItem]]:
+        """Greedy-pack ``items`` into sub-chunks under ``max_bytes`` each.
+
+        Cached items (``upload_bytes == 0``) are free and never force a
+        split. A single item that on its own exceeds the budget is
+        placed alone — letting Telegram return a precise error rather
+        than dropping it silently. Order is preserved so caption
+        attachment stays deterministic.
+        """
+        if not items:
+            return []
+        groups: list[list[_MediaItem]] = []
+        current: list[_MediaItem] = []
+        current_size = 0
+        for item in items:
+            cost = item.upload_bytes
+            if current and current_size + cost > max_bytes:
+                groups.append(current)
+                current = []
+                current_size = 0
+            current.append(item)
+            current_size += cost
+        if current:
+            groups.append(current)
+        return groups
+
+    @staticmethod
+    def _attach_caption_to_first(
+        items: list[_MediaItem], caption: str, parse_mode: str,
+    ) -> None:
+        """Inject caption + parse_mode into the first item's media_json.
+
+        Telegram displays the caption of the first media-group item; the
+        rest are ignored. Idempotent — re-attaching simply overwrites.
+        """
+        if not items:
+            return
+        items[0].media_json["caption"] = _truncate(caption, TELEGRAM_MAX_CAPTION_LENGTH)
+        items[0].media_json["parse_mode"] = parse_mode
+
+    async def _send_item_individually(
+        self, chat_id: str, item: _MediaItem,
+        caption: str | None, reply_to_message_id: int | None,
+        parse_mode: str,
+    ) -> NotificationResult:
+        """Send one ``_MediaItem`` as a standalone sendPhoto/sendVideo/sendDocument.
+
+        Used as the per-item fallback when sendMediaGroup itself
+        rejects a sub-chunk after pre-flight passed. Reuses already-
+        fetched bytes for fresh items; for cache-hit items that fail
+        the file_id POST, re-downloads from ``source_url`` so a stale
+        ``file_id`` doesn't silently lose an asset — the original
+        single-item path does the same recovery.
+        """
+        media_type = item.media_json.get("type") or "photo"
+        if media_type == "photo":
+            kind = _PHOTO_KIND
+        elif media_type == "video":
+            kind = _VIDEO_KIND
+        else:
+            kind = _DOCUMENT_KIND
+
+        cache: TelegramFileCache | None = None
+        cache_key: str | None = None
+        thumbhash: str | None = None
+        if item.cache_info is not None:
+            ck, _ck_type, ck_thumb, _ck_size = item.cache_info
+            cache = self._get_cache_for_key(ck)
+            cache_key = ck
+            thumbhash = ck_thumb
+
+        # Cached items have no attachment bytes — POST the file_id
+        # reference first; if that fails transiently, re-download via
+        # source_url and upload fresh. This matches what _send_photo /
+        # _send_video do for their cache path.
+        if item.attachment is None:
+            file_id = item.media_json.get("media", "")
+            if file_id and not file_id.startswith("attach://"):
+                cached_result = await self._send_from_cache(
+                    kind, chat_id, file_id, caption, reply_to_message_id, parse_mode,
+                )
+                if cached_result is not None:
+                    return cached_result
+
+            if not item.source_url:
+                return {"success": False, "error": "Cached fallback send failed (no source URL)"}
+            data, err = await self._safe_get(
+                self._resolve_url(item.source_url), item.download_headers,
+            )
+            if data is None:
+                return {"success": False, "error": f"Re-download failed: {err}"}
+            return await self._upload_media(
+                kind, chat_id, data,
+                kind.default_filename, kind.default_content_type,
+                caption, reply_to_message_id, parse_mode,
+                cache, cache_key, thumbhash,
+            )
+
+        _, data, filename, content_type = item.attachment
+        return await self._upload_media(
+            kind, chat_id, data, filename, content_type,
+            caption, reply_to_message_id, parse_mode,
+            cache, cache_key, thumbhash,
+        )

    @staticmethod
    def _partition_media_by_kind(
@@ -830,23 +1197,40 @@ class TelegramClient:
        self,
        chunk: list[dict[str, Any]],
        max_asset_data_size: int | None,
-        first_caption: str | None,
-        parse_mode: str,
-    ) -> list[_MediaItem]:
+        send_large_videos_as_documents: bool = False,
+    ) -> tuple[list[_MediaItem], list[_MediaItem]]:
        """Fetch + filter a chunk and return aligned media-group items.

+        Returns ``(items, deferred_documents)`` — ``items`` go into
+        sendMediaGroup, ``deferred_documents`` are oversized videos
+        retagged as documents (when the caller opted in) that will be
+        sent individually via ``_send_item_individually`` *after* the
+        group sends. Telegram rejects mixing documents with photo/video
+        in one group, so they have to ride out separately.
+
        Concurrency is bounded by ``_MEDIA_FETCH_CONCURRENCY`` so peak
        memory stays predictable. Per-fetch exceptions are isolated via
        ``return_exceptions=True`` so a single failed download cannot
        cancel its peers.
+
+        Caption injection is intentionally NOT performed here — callers
+        attach the caption after byte-budget sub-splitting so it lands
+        on the first item of the first delivered sub-chunk.
        """
        sem = asyncio.Semaphore(_MEDIA_FETCH_CONCURRENCY)

-        async def fetch(idx: int, item: dict[str, Any]) -> tuple[int, dict | None, bytes | None]:
+        async def fetch(
+            idx: int, item: dict[str, Any],
+        ) -> tuple[int, dict | None, bytes | None, bool]:
+            """Returns ``(idx, cached_entry, data, defer_as_document)``.
+
+            ``defer_as_document=True`` signals "video bytes valid but
+            too big for sendVideo — caller should send as document".
+            """
            url = item.get("url")
            if not url:
                _LOGGER.warning("Media skipped: missing url (idx=%d type=%s)", idx, item.get("type"))
-                return idx, None, None
+                return idx, None, None, False
            media_type = item.get("type", "photo")
            custom_cache_key = item.get("cache_key")

@@ -860,7 +1244,7 @@ class TelegramClient:
            )
            cached = item_cache.get(ck, thumbhash=item_thumbhash) if item_cache else None
            if cached and cached.get("file_id"):
-                return idx, cached, None
+                return idx, cached, None, False

            preloaded = item.get("data")
            data: bytes | None
@@ -874,34 +1258,40 @@ class TelegramClient:
                        "Media skipped: download failed (idx=%d type=%s): %s",
                        idx, media_type, err,
                    )
-                    return idx, None, None
+                    return idx, None, None, False

            if max_asset_data_size and len(data) > max_asset_data_size:
                _LOGGER.warning(
                    "Media skipped: size %d exceeds max_asset_data_size %d (idx=%d type=%s)",
                    len(data), max_asset_data_size, idx, media_type,
                )
-                return idx, None, None
+                return idx, None, None, False
            if media_type == "video" and len(data) > TELEGRAM_MAX_VIDEO_SIZE:
+                if send_large_videos_as_documents:
+                    _LOGGER.info(
+                        "Video %d bytes over Telegram limit (idx=%d) — deferring as document",
+                        len(data), idx,
+                    )
+                    return idx, None, data, True
                _LOGGER.warning(
                    "Media skipped: video %d bytes exceeds Telegram limit %d (idx=%d)",
                    len(data), TELEGRAM_MAX_VIDEO_SIZE, idx,
                )
-                return idx, None, None
+                return idx, None, None, False
            if media_type == "photo":
                exceeds, reason, _, _ = check_photo_limits(data)
                if exceeds:
                    _LOGGER.warning(
                        "Media skipped: photo %s (idx=%d)", reason, idx,
                    )
-                    return idx, None, None
-            return idx, None, data
+                    return idx, None, None, False
+            return idx, None, data, False

        raw = await asyncio.gather(
            *(fetch(i, item) for i, item in enumerate(chunk)),
            return_exceptions=True,
        )
-        results: list[tuple[int, dict | None, bytes | None]] = []
+        results: list[tuple[int, dict | None, bytes | None, bool]] = []
        for entry in raw:
            if isinstance(entry, Exception):
                _LOGGER.warning("Media fetch raised: %s", redact_exc(entry))
@@ -909,8 +1299,9 @@ class TelegramClient:
            results.append(entry)

        items: list[_MediaItem] = []
+        deferred_documents: list[_MediaItem] = []
        upload_idx = 0
-        for idx, cached_entry, data in results:
+        for idx, cached_entry, data, defer_as_document in results:
            item = chunk[idx]
            url = item.get("url")
            if not url:
@@ -918,6 +1309,35 @@ class TelegramClient:
            media_type = item.get("type") or "photo"
            custom_cache_key = item.get("cache_key")

+            # Deferred videos-as-documents are NEVER cache hits (the
+            # cache lookup branch returns early before the size check),
+            # so we always have fresh bytes here. Retag the
+            # media_json so ``_send_item_individually`` routes via
+            # ``_DOCUMENT_KIND`` to /sendDocument.
+            if defer_as_document and data is not None:
+                ct = item.get("content_type") or "video/mp4"
+                # Best-effort filename preserves the original
+                # extension so Telegram clients give it a sensible
+                # icon and the recipient can re-open it.
+                fname = url.split("/")[-1].split("?")[0] or "video.mp4"
+                if "." not in fname:
+                    fname = "video.mp4"
+                ck = custom_cache_key or extract_asset_id_from_url(url) or url
+                ck_is_asset = is_asset_cache_key(ck)
+                bare_ck = asset_id_from_cache_key(ck) if ck_is_asset else ck
+                th = (
+                    self._thumbhash_resolver(bare_ck)
+                    if ck_is_asset and self._thumbhash_resolver else None
+                )
+                deferred_documents.append(_MediaItem(
+                    media_json={"type": "document", "media": "attach://deferred"},
+                    cache_info=(ck, "document", th, len(data)),
+                    attachment=("deferred", data, fname, ct),
+                    source_url=url,
+                    download_headers=item.get("headers"),
+                ))
+                continue
+
            if cached_entry and cached_entry.get("file_id"):
                mij: dict[str, Any] = {"type": media_type, "media": cached_entry["file_id"]}
                cache_info: tuple[str, str, str | None, int] | None = None
@@ -940,14 +1360,14 @@ class TelegramClient:
            else:
                continue

-            if first_caption and not items:
-                # Only the first usable item in the first chunk receives
-                # the caption, per Telegram's media-group semantics.
-                mij["caption"] = _truncate(first_caption, TELEGRAM_MAX_CAPTION_LENGTH)
-                mij["parse_mode"] = parse_mode
-
-            items.append(_MediaItem(media_json=mij, cache_info=cache_info, attachment=attachment))
-        return items
+            items.append(_MediaItem(
+                media_json=mij,
+                cache_info=cache_info,
+                attachment=attachment,
+                source_url=url,
+                download_headers=item.get("headers"),
+            ))
+        return items, deferred_documents

    async def _post_media_group(
        self,
@@ -973,6 +1393,7 @@ class TelegramClient:
            for name, payload, filename, ct in attachments:
                f.add_field(name, payload, filename=filename, content_type=ct)
            f.add_field("media", json.dumps(media_json))
+            _apply_send_opts_to_form(f)
            return f

        for attempt in range(1, _TG_429_MAX_ATTEMPTS + 1):
@@ -13,6 +13,11 @@ _LOGGER = logging.getLogger(__name__)
 TELEGRAM_API_BASE_URL: Final = "https://api.telegram.org/bot"
 TELEGRAM_MAX_PHOTO_SIZE: Final = 10 * 1024 * 1024  # 10 MB
 TELEGRAM_MAX_VIDEO_SIZE: Final = 50 * 1024 * 1024  # 50 MB
+# Telegram's sendMediaGroup envelope tops out near 50 MB total (multipart
+# bytes including form overhead). 45 MB keeps a safety margin so we don't
+# eat 413s when the per-item budget admits items that, summed, would
+# bust Telegram's request cap.
+TELEGRAM_MAX_GROUP_TOTAL_BYTES: Final = 45 * 1024 * 1024  # 45 MB
 TELEGRAM_MAX_DIMENSION_SUM: Final = 10000
 # Telegram message-text limit (sendMessage) and caption limit
 # (sendPhoto/sendVideo/sendDocument/first item of sendMediaGroup).
@@ -126,36 +131,6 @@ def build_telegram_asset_entry(
    return entry


-def split_media_by_upload_size(
-    media_items: list[tuple], max_upload_size: int
-) -> list[list[tuple]]:
-    """Split media items into sub-groups respecting upload size limit."""
-    if not media_items:
-        return []
-
-    groups: list[list[tuple]] = []
-    current_group: list[tuple] = []
-    current_size = 0
-
-    for item in media_items:
-        media_ref = item[1]
-        is_cached = item[4]
-        item_size = 0 if is_cached else (len(media_ref) if isinstance(media_ref, bytes) else 0)
-
-        if current_group and current_size + item_size > max_upload_size:
-            groups.append(current_group)
-            current_group = []
-            current_size = 0
-
-        current_group.append(item)
-        current_size += item_size
-
-    if current_group:
-        groups.append(current_group)
-
-    return groups
-
-
 def check_photo_limits(
    data: bytes,
 ) -> tuple[bool, str | None, int | None, int | None]:
@@ -315,6 +315,63 @@ async def clear_telegram_cache(
    return result


+class DiagnosticActivateBody(BaseModel):
+    module: str
+    duration_minutes: int = 30
+
+
+@router.get("/diagnostic-mode")
+async def list_diagnostic_overrides(
+    user: User = Depends(require_admin),
+):
+    """List currently-active temporary DEBUG overrides + their countdown.
+
+    Drives the dashboard panel that lets admins toggle a module to DEBUG
+    for a bounded window with auto-revert.
+    """
+    from ..services.diagnostic_mode import list_active
+    return {"active": list_active()}
+
+
+@router.post("/diagnostic-mode")
+async def activate_diagnostic_override(
+    body: DiagnosticActivateBody,
+    user: User = Depends(require_admin),
+):
+    """Flip ``module`` to DEBUG and schedule an auto-revert.
+
+    Re-activating an already-active module replaces the prior schedule.
+    Returns the new entry shape so the UI can render countdown without
+    a follow-up GET. The service module reads the current ``log_levels``
+    setting at activation and at revert so an admin who edits overrides
+    mid-window doesn't see a stale baseline restored.
+    """
+    from ..services.diagnostic_mode import set_diagnostic
+    try:
+        entry = await set_diagnostic(body.module, body.duration_minutes)
+    except ValueError as err:
+        raise HTTPException(status_code=400, detail=str(err)) from err
+    return entry
+
+
+@router.delete("/diagnostic-mode/{module:path}")
+async def revert_diagnostic_override(
+    module: str,
+    user: User = Depends(require_admin),
+):
+    """Manually revert a single module before its window ends.
+
+    Returns 404 when no override was active so the caller can fall through
+    to a friendly "nothing to revert" UX without parsing booleans.
+    """
+    from ..services.diagnostic_mode import revert_diagnostic
+    if not await revert_diagnostic(module):
+        raise HTTPException(
+            status_code=404, detail=f"No active override for {module!r}",
+        )
+    return {"reverted": module}
+
+
@router.get("/locales")
 async def get_supported_locales(
    user: User = Depends(get_current_user),
@@ -13,6 +13,7 @@ from jinja2.sandbox import SandboxedEnvironment
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession

+from notify_bridge_core.log_context import enrich_details_with_correlation
 from notify_bridge_core.notifications.telegram.client import TelegramClient
 from ..database.engine import get_engine
 from ..database.models import (
@@ -347,7 +348,7 @@ async def _log_command_event(
                collection_id=str(chat_id),
                collection_name=_format_command_subject(cmd, args),
                assets_count=media_total,
-                details=details,
+                details=enrich_details_with_correlation(details),
            ))
            await session.commit()
    except Exception:  # noqa: BLE001 — diagnostic only, never block reply
@@ -1,6 +1,7 @@
 """Notify Bridge Server — FastAPI application entry point."""

 import logging
+import uuid
 from contextlib import asynccontextmanager

 from fastapi import FastAPI
@@ -8,6 +9,11 @@ from fastapi.middleware.cors import CORSMiddleware
 from slowapi import _rate_limit_exceeded_handler
 from slowapi.errors import RateLimitExceeded
 from slowapi.middleware import SlowAPIMiddleware
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
+from starlette.requests import Request as StarletteRequest
+from starlette.responses import Response as StarletteResponse
+
+from notify_bridge_core.log_context import bind_log_context

 from .config import settings as _log_cfg
 from .logging_setup import setup_logging
@@ -163,6 +169,16 @@ async def lifespan(app: FastAPI):
    _READY = False
    from .services.ha_subscription import stop_all as stop_ha_subscriptions
    await stop_ha_subscriptions()
+    # Restore the DB-configured baseline level for any temporary DEBUG
+    # overrides before the engine is disposed — so even a forced restart
+    # leaves the world tidy and doesn't leak DEBUG state into the next
+    # process (which would also be wiped by setup_logging() at boot, but
+    # being explicit about shutdown is cheaper than relying on a re-init).
+    from .services.diagnostic_mode import revert_all as revert_diagnostics
+    try:
+        await revert_diagnostics()
+    except Exception:  # pragma: no cover — never block shutdown on this.
+        _LOGGER.exception("Failed to revert diagnostic overrides during shutdown")
    scheduler = get_scheduler()
    if scheduler.running:
        scheduler.shutdown(wait=True)
@@ -178,9 +194,55 @@ _APP_VERSION = _resolve_version()
 app = FastAPI(title="Notify Bridge", version=_APP_VERSION, lifespan=lifespan)

 # --- Security headers ---
-from starlette.middleware.base import BaseHTTPMiddleware
-from starlette.requests import Request as StarletteRequest
-from starlette.responses import Response as StarletteResponse
+
+
+# Bounded character set for accepted inbound X-Request-Id values. Anything
+# outside this is replaced with a server-generated id so a malicious header
+# can't smuggle CR/LF into log lines or break grep-by-field parsing.
+# ``:`` is intentionally excluded so an inbound value can't masquerade as a
+# server-minted ``disp:<hex>`` / ``req:<hex>`` id and confuse operator greps.
+_REQUEST_ID_MAX_LEN = 64
+_REQUEST_ID_ALLOWED = set(
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
+)
+
+
+def _normalize_request_id(raw: str | None) -> str:
+    if not raw:
+        return f"req:{uuid.uuid4().hex[:12]}"
+    raw = raw.strip()
+    if not raw or len(raw) > _REQUEST_ID_MAX_LEN:
+        return f"req:{uuid.uuid4().hex[:12]}"
+    if not all(c in _REQUEST_ID_ALLOWED for c in raw):
+        return f"req:{uuid.uuid4().hex[:12]}"
+    return raw
+
+
+class RequestContextMiddleware(BaseHTTPMiddleware):
+    """Bind a per-request ``request_id`` ContextVar and echo it back.
+
+    Reads ``X-Request-Id`` from the inbound request (so an upstream proxy
+    with its own correlation system can propagate its id), falling back to
+    a short random ``req:<12 hex>`` value. Always sets the same id on the
+    response ``X-Request-Id`` header so the SPA can surface it for
+    operator-friendly bug reports.
+
+    Bound via :func:`bind_log_context` so the id appears on every log line
+    emitted during request handling (``[req=...]``) and is picked up by
+    :func:`notify_bridge_core.log_context.enrich_details_with_correlation`
+    when an ``EventLog`` row is written during the same request.
+    """
+
+    async def dispatch(
+        self,
+        request: StarletteRequest,
+        call_next: RequestResponseEndpoint,
+    ) -> StarletteResponse:
+        req_id = _normalize_request_id(request.headers.get("x-request-id"))
+        with bind_log_context(request_id=req_id):
+            response: StarletteResponse = await call_next(request)
+        response.headers["X-Request-Id"] = req_id
+        return response


 _CSP = (
@@ -238,6 +300,12 @@ app.add_middleware(
    allow_headers=["*"],
 )

+# Request-ID middleware is added LAST so it becomes the outermost wrapper —
+# every other middleware (CORS, rate limit, security headers) then logs with
+# the request_id already bound, and CORS preflight responses also carry the
+# X-Request-Id echo header.
+app.add_middleware(RequestContextMiddleware)
+
 # Register routes — static paths before parameterized
 app.include_router(auth_router)
 app.include_router(template_vars_router)
@@ -9,6 +9,11 @@ from typing import Any
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession

+from notify_bridge_core.log_context import (
+    bind_log_context,
+    ensure_dispatch_id,
+    enrich_details_with_correlation,
+)
 from notify_bridge_core.providers.action_executor import ActionResult

 from ..database.engine import get_engine
@@ -27,6 +32,15 @@ async def run_action(
    action_id: int, *, trigger: str = "scheduled"
 ) -> ActionResult:
    """Load an action from DB, execute it, and save the execution log."""
+    # One dispatch_id per action run so the EventLog row (and any inner log
+    # lines emitted by the action executor) share a correlation id.
+    with bind_log_context(dispatch_id=ensure_dispatch_id()):
+        return await _run_action_impl(action_id, trigger=trigger)
+
+
+async def _run_action_impl(
+    action_id: int, *, trigger: str = "scheduled"
+) -> ActionResult:
    engine = get_engine()

    # ------------------------------------------------------------------
@@ -142,7 +156,7 @@ async def run_action(
                    # without a separate action_name renderer.
                    collection_name=action.name,
                    assets_count=action_result.total_items_affected,
-                    details={
+                    details=enrich_details_with_correlation({
                        "action_type": action.action_type,
                        "trigger": trigger,
                        "rules_processed": action_result.rules_processed,
@@ -150,7 +164,7 @@ async def run_action(
                        "rules_failed": action_result.rules_failed,
                        "error": action_result.error or "",
                        "execution_id": execution_id,
-                    },
+                    }),
                ))

        await session.commit()
@@ -33,6 +33,11 @@ from sqlalchemy.orm.attributes import flag_modified
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession

+from notify_bridge_core.log_context import (
+    bind_log_context,
+    ensure_dispatch_id,
+    enrich_details_with_correlation,
+)
 from notify_bridge_core.models.events import EventType, ServiceEvent
 from notify_bridge_core.models.media import MediaAsset, MediaType
 from notify_bridge_core.notifications.dispatcher import (
@@ -56,6 +61,7 @@ from .dispatch_helpers import (
    load_link_data,
    resolve_provider_credential,
 )
+from .dispatch_summary import summarize_dispatch_results

 _LOGGER = logging.getLogger(__name__)

@@ -616,12 +622,12 @@ async def _mark_dropped(
        collection_name=payload.get("collection_name", ""),
        assets_count=int(payload.get("added_count", 0))
            or int(payload.get("removed_count", 0)),
-        details={
+        details=enrich_details_with_correlation({
            "dispatch_status": "deferred_then_dropped",
            "reason": reason,
            "original_event_log_id": row.event_log_id,
            "provider_type": payload.get("provider_type", ""),
-        },
+        }),
    ))


@@ -644,6 +650,28 @@ async def _process_row(
    entry produces its own target_config so a broadcast deferred row fans
    out to all current children at drain time.
    """
+    # Bind a fresh dispatch_id per drained row so the EventLog rows written
+    # by the success/drop paths AND the inner dispatcher's log lines share
+    # one id. Each deferred row is a logically separate dispatch attempt.
+    with bind_log_context(dispatch_id=ensure_dispatch_id()):
+        await _process_row_impl(
+            session, row, tracker, provider_id, provider_name,
+            provider_config, app_tz, link_by_id, dispatcher, stats,
+        )
+
+
+async def _process_row_impl(
+    session: AsyncSession,
+    row: DeferredDispatch,
+    tracker: NotificationTracker,
+    provider_id: int,
+    provider_name: str,
+    provider_config: dict[str, Any],
+    app_tz: str,
+    link_by_id: dict[int, list[dict[str, Any]]],
+    dispatcher: NotificationDispatcher,
+    stats: dict[str, int],
+) -> None:
    expanded = link_by_id.get(row.link_id)
    if not expanded:
        # Link removed/disabled between defer and drain.
@@ -735,6 +763,8 @@ async def _process_row(
    row.fired_at = datetime.now(timezone.utc)
    session.add(row)

+    summary = summarize_dispatch_results(results)
+
    if success:
        stats["fired"] += 1
        session.add(EventLog(
@@ -747,14 +777,15 @@ async def _process_row(
            collection_id=row.collection_id,
            collection_name=event.collection_name,
            assets_count=event.added_count or event.removed_count or 0,
-            details={
+            details=enrich_details_with_correlation({
                "dispatch_status": "delivered_after_quiet_hours",
                "original_event_log_id": row.event_log_id,
                "deferred_for_seconds": int(
                    (row.fired_at - row.created_at).total_seconds()
                ),
                "provider_type": event.provider_type.value,
-            },
+                "dispatch_summary": summary,
+            }),
        ))
    else:
        stats["dropped"] += 1
@@ -769,12 +800,13 @@ async def _process_row(
            collection_id=row.collection_id,
            collection_name=event.collection_name,
            assets_count=event.added_count or event.removed_count or 0,
-            details={
+            details=enrich_details_with_correlation({
                "dispatch_status": "deferred_then_failed",
                "reason": str(first_err)[:200],
                "original_event_log_id": row.event_log_id,
                "provider_type": event.provider_type.value,
-            },
+                "dispatch_summary": summary,
+            }),
        ))


@@ -0,0 +1,381 @@
+"""Temporary per-module DEBUG overrides with auto-revert.
+
+The runtime ``apply_log_levels()`` API in ``logging_setup`` already lets
+admins flip a module to DEBUG, but the existing path requires editing the
+``log_levels`` DB setting and remembering to revert it. Operators end up
+either forgetting (leaving DEBUG-flooded logs in production) or never
+turning it on (debugging through stderr only).
+
+This module gives the dashboard a cheap toggle: "give me DEBUG for
+``notify_bridge_core.notifications.telegram.client`` for 30 minutes" —
+apply immediately, schedule a one-shot job at ``now + 30 min`` that
+reverts to whatever level that module would normally have under the
+current DB-configured ``log_levels``.
+
+State is in-memory only. A server restart wipes every active override,
+which is the right semantic: ``setup_logging`` re-applies the
+DB-configured baseline at boot, so a forgotten override can never
+silently carry across a deploy. The lifespan shutdown also calls
+:func:`revert_all` to cleanly restore baselines before the process
+exits — useful for hot-reload dev loops where the server restarts in
+place.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+from sqlmodel.ext.asyncio.session import AsyncSession
+
+from ..database.engine import get_engine
+from ..logging_setup import (
+    _NOISY_LIBRARY_DEFAULTS,
+    parse_level_overrides,
+)
+
+_LOGGER = logging.getLogger(__name__)
+
+# Limits picked to match what "an operator clicked this button" looks like.
+# One minute is enough to reproduce a single failing dispatch; four hours is
+# long enough for a slow-rolling incident without risking a forgotten
+# override outliving a workday.
+_MIN_DURATION_MINUTES = 1
+_MAX_DURATION_MINUTES = 240
+
+# Allowlist of module namespaces an operator can flip. Lets us catch typos
+# and blocks ``""`` (root) — flipping the root logger to DEBUG floods
+# stderr with stuff the operator probably didn't want (boto3, jinja2,
+# every dependency). Anything matching is accepted, anything else is
+# rejected with a 400.
+_ALLOWED_PREFIXES = (
+    "notify_bridge_core",
+    "notify_bridge_server",
+    "sqlalchemy",
+    "aiohttp",
+    "apscheduler",
+    "urllib3",
+    "httpx",
+    "httpcore",
+    "asyncio",
+    "PIL",
+    "uvicorn",
+    "starlette",
+    "fastapi",
+)
+
+
+@dataclass(frozen=True)
+class _Override:
+    """One active DEBUG override.
+
+    ``baseline_level`` is what the module had at activation time — used
+    for the dashboard's "→ WARNING" display. The actual revert path
+    re-reads the current DB-configured ``log_levels`` so a setting change
+    made *while* the override is active is honored at expiry.
+    """
+
+    module: str
+    baseline_level: str
+    activated_at: datetime
+    expires_at: datetime
+
+
+# Module name → active override. Mutated only from the asyncio thread.
+_active: dict[str, _Override] = {}
+
+# Strong references for background tasks created via the asyncio-timer
+# fallback path. CPython's event loop holds only weak refs, so a task
+# without an external retainer can be GC'd before it fires. Tasks are
+# discarded automatically when they complete.
+_bg_tasks: set[asyncio.Task[None]] = set()
+
+
+def _is_allowed(module: str) -> bool:
+    if not module:
+        return False
+    return any(module == p or module.startswith(p + ".") for p in _ALLOWED_PREFIXES)
+
+
+def _normalize_level_name(lvl: int) -> str:
+    """Return a canonical string for a logging level code."""
+    name = logging.getLevelName(lvl)
+    if isinstance(name, str) and name and not name.startswith("Level "):
+        return name
+    return "INFO"
+
+
+def _walk_dotted(name: str) -> list[str]:
+    """Yield ``name`` then progressively shorter dotted prefixes.
+
+    ``"sqlalchemy.engine.Engine"`` →
+    ``["sqlalchemy.engine.Engine", "sqlalchemy.engine", "sqlalchemy"]``.
+    Mirrors Python's logger-hierarchy traversal so a sub-logger inherits
+    its parent's override / noisy default rather than falling through to
+    the root level.
+    """
+    out = [name]
+    while "." in name:
+        name = name.rsplit(".", 1)[0]
+        out.append(name)
+    return out
+
+
+def _baseline_for(module: str, db_log_levels: str | None) -> str:
+    """The level ``module`` would have if no diagnostic override were active.
+
+    Precedence per dotted-parent walk:
+      1. Explicit DB ``log_levels`` entry (most specific wins).
+      2. Curated noisy-library default in ``_NOISY_LIBRARY_DEFAULTS``.
+      3. Root logger effective level.
+    """
+    overrides = parse_level_overrides(db_log_levels or "")
+    for candidate in _walk_dotted(module):
+        if candidate in overrides:
+            return overrides[candidate]
+        if candidate in _NOISY_LIBRARY_DEFAULTS:
+            return _NOISY_LIBRARY_DEFAULTS[candidate]
+    root_level = logging.getLogger().getEffectiveLevel()
+    return _normalize_level_name(root_level)
+
+
+async def _read_db_log_levels() -> str:
+    """Snapshot the current ``log_levels`` setting in a short-lived session.
+
+    Called at activation AND at revert time so the revert reflects any
+    setting change made while the override was active. Best-effort: a
+    DB hiccup degrades to empty (no DB overrides), which makes the
+    revert use noisy-library defaults — safer than crashing the timer.
+    """
+    try:
+        from ..api.app_settings import get_setting
+        async with AsyncSession(get_engine()) as session:
+            return await get_setting(session, "log_levels") or ""
+    except Exception:  # noqa: BLE001
+        _LOGGER.debug(
+            "diagnostic_mode: failed to read log_levels from DB; "
+            "revert will use noisy-library defaults",
+            exc_info=True,
+        )
+        return ""
+
+
+def list_active() -> list[dict[str, Any]]:
+    """Snapshot the currently active overrides for the dashboard.
+
+    Also sweeps any entry whose ``expires_at`` is in the past — protects
+    against a scheduler misfire that left a ghost row in ``_active``.
+    """
+    now = datetime.now(timezone.utc)
+    out: list[dict[str, Any]] = []
+    expired: list[str] = []
+    for module, ov in _active.items():
+        if ov.expires_at <= now:
+            expired.append(module)
+            continue
+        out.append({
+            "module": ov.module,
+            "baseline_level": ov.baseline_level,
+            "current_level": "DEBUG",
+            "activated_at": ov.activated_at.isoformat(),
+            "expires_at": ov.expires_at.isoformat(),
+            "remaining_seconds": int((ov.expires_at - now).total_seconds()),
+        })
+    for module in expired:
+        _active.pop(module, None)
+    return out
+
+
+def is_active(module: str) -> bool:
+    ov = _active.get(module)
+    if ov is None:
+        return False
+    return ov.expires_at > datetime.now(timezone.utc)
+
+
+async def set_diagnostic(
+    module: str,
+    duration_minutes: int,
+) -> dict[str, Any]:
+    """Activate a DEBUG override for ``module`` lasting ``duration_minutes``.
+
+    Re-activating an already-active module replaces the prior schedule
+    (a clicked-twice button extends the window rather than stacking).
+
+    Returns the dashboard-ready dict; raises ``ValueError`` on bad input
+    so the API layer can surface a 400 with a precise message.
+    """
+    if not _is_allowed(module):
+        raise ValueError(
+            f"Module {module!r} is not in the diagnostic allowlist",
+        )
+    if not (_MIN_DURATION_MINUTES <= duration_minutes <= _MAX_DURATION_MINUTES):
+        raise ValueError(
+            f"duration_minutes must be between {_MIN_DURATION_MINUTES} and "
+            f"{_MAX_DURATION_MINUTES}",
+        )
+
+    db_log_levels = await _read_db_log_levels()
+    baseline = _baseline_for(module, db_log_levels)
+    now = datetime.now(timezone.utc)
+    expires_at = now + timedelta(minutes=duration_minutes)
+
+    # Apply DEBUG immediately. ``logging.getLogger(name).setLevel`` is the
+    # same primitive ``apply_log_levels`` uses, so the two mechanisms stay
+    # consistent.
+    logging.getLogger(module).setLevel("DEBUG")
+
+    # Replace any prior schedule for this module before recording the new one.
+    _remove_scheduled(module)
+    _active[module] = _Override(
+        module=module,
+        baseline_level=baseline,
+        activated_at=now,
+        expires_at=expires_at,
+    )
+    _schedule_revert(module, expires_at)
+
+    _LOGGER.info(
+        "Diagnostic mode: %s set to DEBUG (was %s) for %d min, expires at %s",
+        module, baseline, duration_minutes, expires_at.isoformat(),
+    )
+    return {
+        "module": module,
+        "baseline_level": baseline,
+        "current_level": "DEBUG",
+        "activated_at": now.isoformat(),
+        "expires_at": expires_at.isoformat(),
+        "remaining_seconds": int((expires_at - now).total_seconds()),
+    }
+
+
+async def revert_diagnostic(module: str) -> bool:
+    """Immediately end the override for ``module``. Returns ``False`` if
+    no override was active (so callers can return a 404)."""
+    ov = _active.pop(module, None)
+    if ov is None:
+        return False
+    _remove_scheduled(module)
+    db_log_levels = await _read_db_log_levels()
+    target = _baseline_for(module, db_log_levels)
+    logging.getLogger(module).setLevel(target)
+    _LOGGER.info(
+        "Diagnostic mode: %s reverted from DEBUG back to %s (manual)",
+        module, target,
+    )
+    return True
+
+
+async def revert_all() -> int:
+    """Revert every active override. Wired into the lifespan shutdown so a
+    server stop / hot-reload leaves the world in a clean state. Also
+    callable from a debug endpoint if we ever add one."""
+    count = 0
+    for module in list(_active.keys()):
+        if await revert_diagnostic(module):
+            count += 1
+    return count
+
+
+# ---------------------------------------------------------------------------
+# APScheduler glue — wired here so the API layer doesn't import scheduler.
+# ---------------------------------------------------------------------------
+
+_JOB_PREFIX = "diag_revert::"
+
+
+def _job_id_for(module: str) -> str:
+    return _JOB_PREFIX + module
+
+
+def _remove_scheduled(module: str) -> None:
+    """Drop a previously-scheduled revert job for ``module``, if any.
+
+    Best-effort: scheduler isn't always available in tests; a missing job
+    is the normal path on first-time activation. Logged at DEBUG so an
+    operator chasing a scheduler problem still sees the trail.
+    """
+    try:
+        from .scheduler import get_scheduler
+        scheduler = get_scheduler()
+    except Exception:  # noqa: BLE001
+        _LOGGER.debug(
+            "diagnostic_mode: scheduler not yet available for remove(%s)",
+            module, exc_info=True,
+        )
+        return
+    job_id = _job_id_for(module)
+    try:
+        scheduler.remove_job(job_id)
+    except Exception:  # noqa: BLE001 — JobLookupError or not-running.
+        _LOGGER.debug(
+            "diagnostic_mode: no prior schedule to remove for %s",
+            module, exc_info=True,
+        )
+
+
+def _schedule_revert(module: str, when: datetime) -> None:
+    """Schedule the auto-revert one-shot.
+
+    Falls back to a strongly-referenced ``asyncio`` task if the
+    APScheduler instance isn't running (tests, very early startup) so the
+    revert still happens.
+    """
+    try:
+        from .scheduler import get_scheduler
+        scheduler = get_scheduler()
+        if scheduler.running:
+            scheduler.add_job(
+                _expire_callback,
+                trigger="date",
+                run_date=when,
+                args=[module],
+                id=_job_id_for(module),
+                replace_existing=True,
+                misfire_grace_time=60,
+            )
+            return
+    except Exception:  # noqa: BLE001 — fall through to the task path.
+        _LOGGER.debug(
+            "diagnostic_mode: scheduler unavailable; using asyncio fallback",
+            exc_info=True,
+        )
+
+    # Fallback: in-process timer. Retain the task in a module-level set so
+    # CPython doesn't GC it before the timer fires.
+    delay = max(0.0, (when - datetime.now(timezone.utc)).total_seconds())
+
+    async def _wait_and_expire() -> None:
+        try:
+            await asyncio.sleep(delay)
+        except asyncio.CancelledError:
+            return
+        await _expire_callback(module)
+
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        return
+    task = loop.create_task(_wait_and_expire())
+    _bg_tasks.add(task)
+    task.add_done_callback(_bg_tasks.discard)
+
+
+async def _expire_callback(module: str) -> None:
+    """Fired by the scheduler at ``expires_at``. Re-applies the baseline.
+
+    Re-reads ``log_levels`` from the DB so a setting change made while
+    the window was active is honored at revert time (instead of using a
+    stale snapshot taken at activation).
+    """
+    ov = _active.pop(module, None)
+    db_log_levels = await _read_db_log_levels()
+    target = _baseline_for(module, db_log_levels)
+    logging.getLogger(module).setLevel(target)
+    _LOGGER.info(
+        "Diagnostic mode: %s auto-reverted from DEBUG to %s (was active=%s)",
+        module, target, ov is not None,
+    )
@@ -0,0 +1,255 @@
+"""Aggregate per-target dispatch results into an ``EventLog.details`` summary.
+
+Every dispatch site (``event_dispatch``, ``watcher``, ``deferred_dispatch``,
+``scheduled_dispatch``) calls :func:`NotificationDispatcher.dispatch` and
+gets back a ``list[dict]`` — one entry per target. Each entry has at minimum
+``success: bool`` and (on failure) ``error: str``. Telegram media-group
+sends additionally include ``delivered_count``, ``skipped_count``,
+``failed_count``, ``errors`` and ``failed_at_chunk`` so a partial delivery
+is observable from the result.
+
+Historically the dashboard only saw the per-row ``status`` derived at
+EventLog insert time — partial failures (one target out of three failed,
+two assets out of ten dropped) showed up as a generic success/failure and
+the operator had to read stderr to find the cause. This module collapses
+the per-target dicts into a small ``dispatch_summary`` block that's merged
+into ``EventLog.details`` after the dispatch completes, so the same
+information surfaces in the UI without re-reading logs.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from typing import Any
+
+from sqlalchemy.orm.attributes import flag_modified
+from sqlmodel.ext.asyncio.session import AsyncSession
+
+from ..database.models import EventLog
+
+_LOGGER = logging.getLogger(__name__)
+
+# Bound the error list we stash on the row. A pathological dispatch (50
+# targets, 50 media items each, all failing) would otherwise bloat the
+# row past anything useful — and the dashboard renders a fixed-height
+# strip anyway. Excess entries are summarized as ``errors_truncated``.
+_MAX_ERRORS = 20
+_MAX_MEDIA_ERRORS = 20
+# Cap error message length to avoid pathological payloads in the row.
+_MAX_ERROR_MSG_LEN = 500
+# Distinct sentinel so an operator scanning the dashboard can tell our
+# clipping apart from a literal ``…`` that often appears in upstream API
+# error text (Telegram does this in some Bad Request messages).
+_TRUNCATION_MARKER = "…[truncated]"
+
+
+def _trim(value: Any) -> Any:
+    """Truncate string values to keep the persisted summary bounded."""
+    if isinstance(value, str) and len(value) > _MAX_ERROR_MSG_LEN:
+        return value[:_MAX_ERROR_MSG_LEN] + _TRUNCATION_MARKER
+    return value
+
+
+def summarize_dispatch_results(
+    results: list[dict[str, Any]],
+) -> dict[str, Any]:
+    """Aggregate per-target dispatch results into a compact summary dict.
+
+    The shape is intentionally narrow so it round-trips cleanly through
+    SQLite JSON storage and stays cheap to render in the dashboard.
+
+    Returns a dict with keys:
+
+    * ``targets_attempted`` / ``targets_succeeded`` / ``targets_failed``
+      — counts across the results list.
+    * ``errors`` — per-target failure entries
+      (``[{index, error}, ...]``), capped at ``_MAX_ERRORS``.
+    * ``media`` — present only when at least one result reports media
+      counts. ``{delivered, skipped, failed}``.
+    * ``media_errors`` — per-item / per-chunk failure entries from the
+      Telegram media-group fallback, capped at ``_MAX_MEDIA_ERRORS``.
+    * ``errors_truncated`` / ``media_errors_truncated`` — count of dropped
+      entries when the corresponding cap was hit. Present only when > 0.
+
+    Input shape: each entry is what ``NotificationDispatcher._aggregate_results``
+    returns for one target — ``{success, receivers, successes, failures,
+    results: [per-receiver, ...], errors?, error?}``. Media counts live
+    on each per-receiver dict under ``media_delivered_count`` /
+    ``media_skipped_count`` / ``media_failed_count`` /  ``media_errors``,
+    so the walk drills one level deeper than the obvious top-level reads.
+    For backward compat with simpler call sites that pass a single leaf
+    dict (the Telegram media-group result directly), the leaf shape is
+    accepted as a fallback when ``results`` is absent.
+    """
+    if not results:
+        # Empty results = nothing to summarize. Returning ``{}`` lets the
+        # callers' ``if summary`` / ``if results`` guards keep the row
+        # clean rather than stamping a misleading zero-counts block.
+        return {}
+
+    succeeded = 0
+    failed = 0
+    errors: list[dict[str, Any]] = []
+    media_delivered = 0
+    media_skipped = 0
+    media_failed = 0
+    media_errors: list[dict[str, Any]] = []
+    has_media_counts = False
+    errors_dropped = 0
+    media_errors_dropped = 0
+
+    for index, result in enumerate(results):
+        if result.get("success"):
+            succeeded += 1
+        else:
+            failed += 1
+            if len(errors) < _MAX_ERRORS:
+                errors.append({
+                    "index": index,
+                    "error": _trim(result.get("error", "unknown")),
+                })
+            else:
+                errors_dropped += 1
+
+        # Per-receiver detail is bundled under ``results`` by the
+        # dispatcher's ``_aggregate_results``. Walk it when present; fall
+        # back to reading the leaf shape directly so older callers and
+        # direct-test fixtures keep working.
+        per_receiver = result.get("results")
+        leaves: list[dict[str, Any]]
+        if isinstance(per_receiver, list):
+            leaves = [r for r in per_receiver if isinstance(r, dict)]
+        else:
+            leaves = [result]
+
+        for receiver_index, leaf in enumerate(leaves):
+            # The dispatcher's Telegram path renames the media counters
+            # to ``media_*`` to disambiguate them from the surrounding
+            # text-message result. Accept both names so a future provider
+            # that surfaces top-level counts (single-shot text+media)
+            # also gets picked up.
+            d = leaf.get("media_delivered_count")
+            if d is None:
+                d = leaf.get("delivered_count")
+            s = leaf.get("media_skipped_count")
+            if s is None:
+                s = leaf.get("skipped_count")
+            f = leaf.get("media_failed_count")
+            if f is None:
+                f = leaf.get("failed_count")
+            if d is not None or s is not None or f is not None:
+                has_media_counts = True
+                media_delivered += int(d or 0)
+                media_skipped += int(s or 0)
+                media_failed += int(f or 0)
+
+            sub_errors = leaf.get("media_errors") or leaf.get("errors") or []
+            for sub in sub_errors:
+                if not isinstance(sub, dict):
+                    # ``_aggregate_results`` populates a string list at
+                    # the target level; only dict entries carry structured
+                    # per-chunk / per-item detail worth keeping here.
+                    continue
+                if len(media_errors) >= _MAX_MEDIA_ERRORS:
+                    media_errors_dropped += 1
+                    continue
+                entry: dict[str, Any] = {"target_index": index}
+                # Only stamp the receiver index when we actually drilled
+                # into a multi-receiver target — single-leaf fallbacks
+                # leave the key off so the existing one-target tests
+                # stay shape-compatible.
+                if len(leaves) > 1 or isinstance(per_receiver, list):
+                    entry["receiver_index"] = receiver_index
+                entry.update({k: _trim(v) for k, v in sub.items()})
+                media_errors.append(entry)
+
+    summary: dict[str, Any] = {
+        "targets_attempted": len(results),
+        "targets_succeeded": succeeded,
+        "targets_failed": failed,
+    }
+    if errors:
+        summary["errors"] = errors
+    if errors_dropped:
+        summary["errors_truncated"] = errors_dropped
+    if has_media_counts:
+        summary["media"] = {
+            "delivered": media_delivered,
+            "skipped": media_skipped,
+            "failed": media_failed,
+        }
+    if media_errors:
+        summary["media_errors"] = media_errors
+    if media_errors_dropped:
+        summary["media_errors_truncated"] = media_errors_dropped
+    return summary
+
+
+def attach_summary_in_place(
+    row: EventLog, results: list[dict[str, Any]],
+) -> None:
+    """Merge a dispatch summary into ``row.details`` before its session commits.
+
+    Use when the EventLog row is still attached to a session that has not
+    yet committed — the caller's session.commit() carries the update.
+    """
+    summary = summarize_dispatch_results(results)
+    if not summary:
+        return
+    details = dict(row.details or {})
+    # Don't overwrite a summary that a caller / previous pass already
+    # set explicitly — that's the same "caller wins" rule the correlation
+    # enricher follows in ``log_context.py``.
+    if "dispatch_summary" in details:
+        return
+    details["dispatch_summary"] = summary
+    row.details = details
+    # Identity-changing reassignment above is enough for SQLAlchemy to mark
+    # the column dirty. ``flag_modified`` is belt-and-suspenders against a
+    # future refactor that switches this to in-place mutation.
+    flag_modified(row, "details")
+
+
+async def record_dispatch_summary_async(
+    session: AsyncSession,
+    event_log_id: int | None,
+    results: list[dict[str, Any]],
+) -> None:
+    """Best-effort update of an already-committed ``EventLog`` row.
+
+    Used by call sites where the row was committed in an earlier
+    transaction (the polling watcher commits its EventLog rows before
+    invoking the dispatcher, so we need a follow-up update).
+
+    Best-effort: a DB hiccup here must never abort the wider dispatch
+    flow — the row keeps its prior status / details and the operator
+    can still trace via stderr (via the ``dispatch_id`` correlation
+    written at insert time).
+    """
+    if event_log_id is None or not results:
+        return
+    summary = summarize_dispatch_results(results)
+    if not summary:
+        return
+    try:
+        row = await session.get(EventLog, event_log_id)
+        if row is None:
+            return
+        details = dict(row.details or {})
+        if "dispatch_summary" in details:
+            return
+        details["dispatch_summary"] = summary
+        row.details = details
+        flag_modified(row, "details")
+        session.add(row)
+        await session.commit()
+    except asyncio.CancelledError:
+        # Cancellation must propagate so APScheduler can drain shutdown.
+        # Swallowing it here would pin the task and leave the row in an
+        # indeterminate state.
+        raise
+    except Exception:  # noqa: BLE001
+        _LOGGER.exception(
+            "Failed to record dispatch_summary on event_log %s", event_log_id,
+        )
@@ -20,6 +20,11 @@ from typing import Any, Awaitable, Callable
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession

+from notify_bridge_core.log_context import (
+    bind_log_context,
+    ensure_dispatch_id,
+    enrich_details_with_correlation,
+)
 from notify_bridge_core.models.events import ServiceEvent
 from notify_bridge_core.notifications.dispatcher import (
    NotificationDispatcher,
@@ -36,6 +41,7 @@ from .dispatch_helpers import (
    load_link_data,
    resolve_provider_credential,
 )
+from .dispatch_summary import attach_summary_in_place

 _LOGGER = logging.getLogger(__name__)

@@ -141,6 +147,31 @@ async def dispatch_provider_event(
    int
        Number of successfully dispatched notifications across all trackers.
    """
+    # Bind a dispatch_id for the whole event so every EventLog row written
+    # below — and every log line emitted by the inner dispatcher — share the
+    # same correlation id. The dispatcher's own ``ensure_dispatch_id()`` call
+    # reuses this id rather than generating its own.
+    with bind_log_context(dispatch_id=ensure_dispatch_id()):
+        return await _dispatch_provider_event_impl(
+            engine, provider_id, provider_name, provider_config,
+            event, detail_keys, filter_fn,
+        )
+
+
+async def _dispatch_provider_event_impl(
+    engine: Any,
+    provider_id: int,
+    provider_name: str,
+    provider_config: dict[str, Any],
+    event: ServiceEvent,
+    detail_keys: tuple[str, ...],
+    filter_fn: FilterFn,
+) -> int:
+    """Implementation body for :func:`dispatch_provider_event`.
+
+    Split out so the public function can wrap the body in
+    :func:`bind_log_context` without re-indenting the entire flow.
+    """
    dispatched = 0
    # Drain-scheduling is best-effort: a scheduling failure must not roll
    # back the persisted defer rows (startup catch-up re-establishes them).
@@ -188,10 +219,10 @@ async def dispatch_provider_event(
                collection_id=event.collection_id,
                collection_name=event.collection_name,
                assets_count=0,
-                details={
+                details=enrich_details_with_correlation({
                    "provider_type": event.provider_type.value,
                    **extra_details,
-                },
+                }),
            )
            session.add(event_log_row)
            await session.flush()
@@ -294,6 +325,11 @@ async def dispatch_provider_event(
                event.provider_type.value != "bridge_self"
            )

+            # Accumulate per-target results across every tracking-config
+            # group so the EventLog row carries a single ``dispatch_summary``
+            # covering the full fan-out (not just the last group).
+            all_results: list[dict[str, Any]] = []
+
            for tc, target_entries in groups.values():
                if not target_entries:
                    continue
@@ -308,6 +344,7 @@ async def dispatch_provider_event(
                        "Dispatcher raised for tracker %d: %s", tracker.id, err,
                    )
                    continue
+                all_results.extend(results)
                for entry, r in zip(target_entries, results):
                    _, target_id, target_name = entry
                    if r.get("success"):
@@ -332,6 +369,12 @@ async def dispatch_provider_event(
                                    "bridge_self target-failure emission failed",
                                )

+            # Merge the aggregated per-target results onto the EventLog row
+            # while the session still owns it. The commit below carries the
+            # ``dispatch_summary`` block alongside the row's original fields.
+            if all_results:
+                attach_summary_in_place(event_log_row, all_results)
+
        await session.commit()

    # Schedule drain jobs OUTSIDE the DB session so an APScheduler hiccup
@@ -28,6 +28,7 @@ from typing import Any
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession

+from notify_bridge_core.log_context import enrich_details_with_correlation
 from notify_bridge_core.models.events import ServiceEvent
 from notify_bridge_core.providers.home_assistant import (
    HomeAssistantAuthError,
@@ -139,11 +140,11 @@ async def _record_ha_status(
                collection_id="",
                collection_name="",
                assets_count=0,
-                details={
+                details=enrich_details_with_correlation({
                    "provider_type": "home_assistant",
                    "ha_status": state,
                    "ha_status_detail": detail or "",
-                },
+                }),
            ))
            await session.commit()
    except Exception:  # noqa: BLE001
@@ -29,6 +29,11 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession

+from notify_bridge_core.log_context import (
+    bind_log_context,
+    ensure_dispatch_id,
+    enrich_details_with_correlation,
+)
 from notify_bridge_core.models.events import EventType
 from notify_bridge_core.notifications.dispatcher import (
    NotificationDispatcher,
@@ -51,6 +56,7 @@ from .dispatch_helpers import (
    load_link_data,
    resolve_provider_credential,
 )
+from .dispatch_summary import summarize_dispatch_results
 from .manual_dispatch import build_immich_dispatch_events

 _LOGGER = logging.getLogger(__name__)
@@ -135,12 +141,12 @@ async def _log_skip(
            collection_id="",
            collection_name="",
            assets_count=0,
-            details={
+            details=enrich_details_with_correlation({
                "kind": kind,
                "trigger": "cron",
                "status": "skipped",
                "skip_reason": reason,
-            },
+            }),
        ))
        await session.commit()

@@ -164,6 +170,15 @@ async def dispatch_scheduled_for_tracker(
    the slot is disabled on the tracker's default tracking config, or no link
    has a ``TemplateConfig`` with the corresponding slot row.
    """
+    # Bind a dispatch_id for the whole cron fire so the EventLog "skipped" /
+    # "sent" rows AND the inner dispatcher log lines share one correlation id.
+    with bind_log_context(dispatch_id=ensure_dispatch_id()):
+        await _dispatch_scheduled_for_tracker_impl(tracker_id, kind)
+
+
+async def _dispatch_scheduled_for_tracker_impl(
+    tracker_id: int, kind: ScheduledKind
+) -> None:
    engine = get_engine()
    async with AsyncSession(engine) as session:
        tracker = await session.get(NotificationTracker, tracker_id)
@@ -390,6 +405,9 @@ async def dispatch_scheduled_for_tracker(
        any_sent = True

        successes = sum(1 for r in results if isinstance(r, dict) and r.get("success"))
+        summary = summarize_dispatch_results(
+            [r for r in results if isinstance(r, dict)],
+        )
        async with AsyncSession(engine) as session:
            session.add(EventLog(
                user_id=tracker_user_id,
@@ -401,7 +419,7 @@ async def dispatch_scheduled_for_tracker(
                collection_id=event.collection_id,
                collection_name=event.collection_name,
                assets_count=event.added_count or 0,
-                details={
+                details=enrich_details_with_correlation({
                    "kind": kind,
                    "slot": slot_name,
                    "trigger": "cron",
@@ -410,7 +428,8 @@ async def dispatch_scheduled_for_tracker(
                    "status": "sent",
                    "targets_dispatched": total_targets,
                    "targets_succeeded": successes,
-                },
+                    "dispatch_summary": summary,
+                }),
            ))
            await session.commit()

@@ -95,6 +95,7 @@ async def send_telegram_media(
    chunk_delay: int = 0,
    max_asset_data_size: int | None = None,
    send_large_photos_as_documents: bool = False,
+    send_large_videos_as_documents: bool = False,
    chat_action: str | None = "typing",
    thumbhash_resolver: Callable[[str], str | None] | None = None,
 ) -> NotificationResult:
@@ -116,6 +117,7 @@ async def send_telegram_media(
        chunk_delay=chunk_delay,
        max_asset_data_size=max_asset_data_size,
        send_large_photos_as_documents=send_large_photos_as_documents,
+        send_large_videos_as_documents=send_large_videos_as_documents,
        chat_action=chat_action,
    )

@@ -9,6 +9,11 @@ from typing import Any, Awaitable, Callable
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession

+from notify_bridge_core.log_context import (
+    bind_log_context,
+    ensure_dispatch_id,
+    enrich_details_with_correlation,
+)
 from notify_bridge_core.models.events import ServiceEvent
 from notify_bridge_core.notifications.dispatcher import NotificationDispatcher, TargetConfig
 from notify_bridge_core.notifications.telegram.cache import TelegramFileCache
@@ -30,6 +35,7 @@ from .dispatch_helpers import (
    load_link_data,
    resolve_provider_credential,
 )
+from .dispatch_summary import record_dispatch_summary_async

 _LOGGER = logging.getLogger(__name__)

@@ -262,6 +268,13 @@ _POLL_FACTORIES: dict[str, PollerFactory] = {

 async def check_tracker(tracker_id: int) -> dict[str, Any]:
    """Poll a tracker's provider for changes and dispatch notifications."""
+    # Bind a per-tick dispatch_id so the EventLog row written for each detected
+    # change carries the same correlation id as the dispatcher's log lines.
+    with bind_log_context(dispatch_id=ensure_dispatch_id()):
+        return await _check_tracker_impl(tracker_id)
+
+
+async def _check_tracker_impl(tracker_id: int) -> dict[str, Any]:
    engine = get_engine()

    # Load all DB data eagerly before entering aiohttp context
@@ -457,7 +470,7 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
                collection_id=event.collection_id,
                collection_name=event.collection_name,
                assets_count=assets_count,
-                details=details,
+                details=enrich_details_with_correlation(details),
            )
            session.add(log)
            await session.flush()
@@ -605,6 +618,10 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
                event.provider_type.value != "bridge_self"
            )

+            # Per-event accumulator so the summary write covers every
+            # tracking-config group, not just the last one.
+            event_results: list[dict[str, Any]] = []
+
            for tc, target_entries in groups.values():
                if not target_entries:
                    continue
@@ -616,6 +633,7 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
                    continue
                target_configs = [entry[0] for entry in target_entries]
                results = await dispatcher.dispatch(shaped_event, target_configs)
+                event_results.extend(results)
                for entry, r in zip(target_entries, results):
                    _, target_id, target_name = entry
                    if r.get("success"):
@@ -637,6 +655,15 @@ async def check_tracker(tracker_id: int) -> dict[str, Any]:
                                    "bridge_self target-failure emission failed",
                                )

+            # The EventLog row was committed in the earlier session block
+            # so we run a tiny follow-up UPDATE in a fresh session. Best-
+            # effort: a failure here logs but does not abort the watcher.
+            if event_log_id is not None and event_results:
+                async with AsyncSession(engine) as summary_session:
+                    await record_dispatch_summary_async(
+                        summary_session, event_log_id, event_results,
+                    )
+
    return {
        "status": "ok",
        "events_detected": len(events),
@@ -0,0 +1,372 @@
+"""Temporary per-module DEBUG overrides with auto-revert.
+
+Covers the in-memory service module + a smoke pass over the API layer
+using ``dependency_overrides`` to bypass auth. The APScheduler glue is
+exercised via the fallback asyncio-timer path since tests run without a
+running scheduler.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+import pytest
+from fastapi.testclient import TestClient
+
+
+# ---------------------------------------------------------------------------
+# Test scaffolding
+# ---------------------------------------------------------------------------
+
+
+def _reset_state() -> None:
+    """Clear the module-level ``_active`` dict between tests so prior
+    activations don't bleed across cases."""
+    from notify_bridge_server.services import diagnostic_mode as svc
+
+    svc._active.clear()
+
+
+@pytest.fixture(autouse=True)
+def _stub_db_read(monkeypatch):
+    """Default every test to a fixed empty ``log_levels`` snapshot.
+
+    A test that wants to exercise DB-override precedence overrides this
+    fixture by re-patching the function explicitly.
+    """
+    async def fake() -> str:
+        return ""
+
+    from notify_bridge_server.services import diagnostic_mode as svc
+
+    monkeypatch.setattr(svc, "_read_db_log_levels", fake)
+
+
+def _patch_db_read(monkeypatch, value: str) -> None:
+    """Override the auto-applied fixture for a single test that needs a
+    non-empty ``log_levels`` value."""
+    async def fake() -> str:
+        return value
+
+    from notify_bridge_server.services import diagnostic_mode as svc
+
+    monkeypatch.setattr(svc, "_read_db_log_levels", fake)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests — service module
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_set_diagnostic_applies_debug_immediately(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    module = "notify_bridge_core.notifications.telegram.client"
+
+    entry = await set_diagnostic(module, duration_minutes=30)
+
+    assert entry["module"] == module
+    assert entry["current_level"] == "DEBUG"
+    assert entry["remaining_seconds"] > 60 * 29
+    assert logging.getLogger(module).level == logging.DEBUG
+
+
+@pytest.mark.asyncio
+async def test_set_diagnostic_rejects_unlisted_module(tmp_data_dir) -> None:  # noqa: ARG001
+    """Only the documented namespaces should be flippable from the UI."""
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    with pytest.raises(ValueError, match="allowlist"):
+        await set_diagnostic("some_random_third_party", 30)
+
+
+@pytest.mark.asyncio
+async def test_set_diagnostic_rejects_root_logger(tmp_data_dir) -> None:  # noqa: ARG001
+    """The empty string would target root — explicitly disallowed."""
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    with pytest.raises(ValueError, match="allowlist"):
+        await set_diagnostic("", 30)
+
+
+@pytest.mark.asyncio
+async def test_set_diagnostic_rejects_unreasonable_durations(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    with pytest.raises(ValueError, match="duration_minutes"):
+        await set_diagnostic("notify_bridge_core", 0)
+    with pytest.raises(ValueError, match="duration_minutes"):
+        await set_diagnostic("notify_bridge_core", 9999)
+
+
+@pytest.mark.asyncio
+async def test_baseline_from_db_override(tmp_data_dir, monkeypatch) -> None:  # noqa: ARG001
+    """``log_levels`` setting wins over the noisy-library default."""
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    _patch_db_read(monkeypatch, "sqlalchemy.engine=ERROR")
+    entry = await set_diagnostic("sqlalchemy.engine", duration_minutes=15)
+    assert entry["baseline_level"] == "ERROR"
+
+
+@pytest.mark.asyncio
+async def test_baseline_from_noisy_default(tmp_data_dir) -> None:  # noqa: ARG001
+    """No DB override falls through to the curated noisy-lib quiet list."""
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    entry = await set_diagnostic("sqlalchemy.engine", duration_minutes=15)
+    assert entry["baseline_level"] == "WARNING"
+
+
+@pytest.mark.asyncio
+async def test_baseline_prefix_walks_for_submodule(tmp_data_dir, monkeypatch) -> None:  # noqa: ARG001
+    """A sub-logger like ``sqlalchemy.engine.Engine`` inherits its parent's
+    noisy-default level (WARNING), not the root INFO."""
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    entry = await set_diagnostic(
+        "sqlalchemy.engine.Engine", duration_minutes=15,
+    )
+    assert entry["baseline_level"] == "WARNING"
+
+
+@pytest.mark.asyncio
+async def test_baseline_prefix_walks_for_db_override(tmp_data_dir, monkeypatch) -> None:  # noqa: ARG001
+    """An explicit ``log_levels`` entry covers all sub-loggers below it."""
+    from notify_bridge_server.services.diagnostic_mode import set_diagnostic
+
+    _reset_state()
+    _patch_db_read(
+        monkeypatch, "notify_bridge_core.notifications=ERROR",
+    )
+    entry = await set_diagnostic(
+        "notify_bridge_core.notifications.telegram.client",
+        duration_minutes=15,
+    )
+    assert entry["baseline_level"] == "ERROR"
+
+
+@pytest.mark.asyncio
+async def test_set_diagnostic_twice_replaces_schedule(tmp_data_dir) -> None:  # noqa: ARG001
+    """Clicking the button twice extends, doesn't stack."""
+    from notify_bridge_server.services.diagnostic_mode import (
+        list_active, set_diagnostic,
+    )
+
+    _reset_state()
+    module = "notify_bridge_core"
+    await set_diagnostic(module, 5)
+    first_active = list_active()
+    assert len(first_active) == 1
+    first_expires = first_active[0]["expires_at"]
+
+    # Sleep just long enough to make the timestamps distinct, then re-set.
+    await asyncio.sleep(0.05)
+    await set_diagnostic(module, 60)
+    second_active = list_active()
+    assert len(second_active) == 1
+    assert second_active[0]["expires_at"] != first_expires
+    assert second_active[0]["remaining_seconds"] > 30 * 60
+
+
+@pytest.mark.asyncio
+async def test_manual_revert_restores_baseline(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.diagnostic_mode import (
+        revert_diagnostic, set_diagnostic,
+    )
+
+    _reset_state()
+    module = "sqlalchemy.engine"
+    await set_diagnostic(module, 30)
+    assert logging.getLogger(module).level == logging.DEBUG
+
+    reverted = await revert_diagnostic(module)
+    assert reverted is True
+    # noisy-library default is WARNING (30)
+    assert logging.getLogger(module).level == logging.WARNING
+
+
+@pytest.mark.asyncio
+async def test_revert_reads_db_at_revert_time(tmp_data_dir, monkeypatch) -> None:  # noqa: ARG001
+    """Editing ``log_levels`` while the override is active is honored when
+    the revert fires — not the snapshot taken at activation time."""
+    from notify_bridge_server.services.diagnostic_mode import (
+        revert_diagnostic, set_diagnostic,
+    )
+
+    _reset_state()
+    module = "sqlalchemy.engine"
+    _patch_db_read(monkeypatch, "")
+    await set_diagnostic(module, 30)
+
+    # Operator edits the setting mid-window — bump to ERROR.
+    _patch_db_read(monkeypatch, "sqlalchemy.engine=ERROR")
+
+    assert await revert_diagnostic(module) is True
+    assert logging.getLogger(module).level == logging.ERROR
+
+
+@pytest.mark.asyncio
+async def test_manual_revert_no_active_returns_false(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.diagnostic_mode import revert_diagnostic
+
+    _reset_state()
+    assert await revert_diagnostic("notify_bridge_core") is False
+
+
+@pytest.mark.asyncio
+async def test_auto_revert_after_window_elapses(tmp_data_dir) -> None:  # noqa: ARG001
+    """The asyncio-timer fallback fires near ``expires_at`` and restores
+    the baseline. Uses a sub-second window so the test stays fast.
+
+    Bypasses ``set_diagnostic`` (which clamps to minutes) by populating the
+    ``_active`` dict and calling ``_schedule_revert`` directly.
+    """
+    from notify_bridge_server.services import diagnostic_mode as svc
+
+    _reset_state()
+    module = "sqlalchemy.engine"
+    baseline = svc._baseline_for(module, db_log_levels="")
+    now = datetime.now(timezone.utc)
+    expires = now + timedelta(seconds=0.3)
+    logging.getLogger(module).setLevel("DEBUG")
+    svc._active[module] = svc._Override(
+        module=module,
+        baseline_level=baseline,
+        activated_at=now,
+        expires_at=expires,
+    )
+    svc._schedule_revert(module, expires)
+
+    await asyncio.sleep(0.5)
+
+    assert module not in svc._active
+    assert logging.getLogger(module).level == logging.WARNING
+
+
+@pytest.mark.asyncio
+async def test_fallback_task_retained_until_fire(tmp_data_dir) -> None:  # noqa: ARG001
+    """The asyncio fallback path must keep a strong reference to its task
+    so CPython doesn't GC it before the timer fires."""
+    from notify_bridge_server.services import diagnostic_mode as svc
+
+    _reset_state()
+    when = datetime.now(timezone.utc) + timedelta(seconds=10)
+    svc._schedule_revert("notify_bridge_core", when)
+    # The retainer set should hold exactly the task we just queued.
+    assert len(svc._bg_tasks) == 1
+    # Cancel it to clean up; the done-callback will drop it.
+    for task in list(svc._bg_tasks):
+        task.cancel()
+    await asyncio.sleep(0)
+
+
+def test_list_active_omits_and_sweeps_expired(tmp_data_dir) -> None:  # noqa: ARG001
+    """Expired entries are filtered AND removed so a delayed scheduler
+    fire doesn't leave ghost rows in ``_active`` forever."""
+    from notify_bridge_server.services import diagnostic_mode as svc
+
+    _reset_state()
+    past = datetime.now(timezone.utc) - timedelta(minutes=1)
+    svc._active["sqlalchemy.engine"] = svc._Override(
+        module="sqlalchemy.engine",
+        baseline_level="WARNING",
+        activated_at=past - timedelta(minutes=30),
+        expires_at=past,
+    )
+    assert svc.list_active() == []
+    assert "sqlalchemy.engine" not in svc._active
+
+
+@pytest.mark.asyncio
+async def test_revert_all_clears_every_override(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.diagnostic_mode import (
+        list_active, revert_all, set_diagnostic,
+    )
+
+    _reset_state()
+    await set_diagnostic("notify_bridge_core", 30)
+    await set_diagnostic("sqlalchemy.engine", 30)
+    assert len(list_active()) == 2
+
+    count = await revert_all()
+    assert count == 2
+    assert list_active() == []
+
+
+# ---------------------------------------------------------------------------
+# API smoke — bypasses auth via dependency_overrides
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def _admin_client(tmp_data_dir):  # noqa: ARG001
+    """Yield a TestClient with ``require_admin`` short-circuited.
+
+    Keeps the auth-flow's SQLAlchemy/greenlet issues out of the picture
+    while still exercising the FastAPI router, path converters, and the
+    ``HTTPException`` paths.
+    """
+    _reset_state()
+    from notify_bridge_server.auth.dependencies import require_admin
+    from notify_bridge_server.database.models import User
+    from notify_bridge_server.main import app
+
+    fake = User(
+        id=1, username="admin",
+        password_hash="x", role="admin", token_version=0,
+    )
+    app.dependency_overrides[require_admin] = lambda: fake
+
+    with TestClient(app) as client:
+        yield client
+
+    app.dependency_overrides.pop(require_admin, None)
+    _reset_state()
+
+
+def test_api_post_rejects_unlisted_module_with_400(_admin_client: TestClient) -> None:
+    resp = _admin_client.post(
+        "/api/settings/diagnostic-mode",
+        json={"module": "evil.namespace", "duration_minutes": 15},
+    )
+    assert resp.status_code == 400
+    assert "allowlist" in resp.json().get("detail", "")
+
+
+def test_api_post_rejects_huge_duration_with_400(_admin_client: TestClient) -> None:
+    resp = _admin_client.post(
+        "/api/settings/diagnostic-mode",
+        json={"module": "notify_bridge_core", "duration_minutes": 99999},
+    )
+    assert resp.status_code == 400
+
+
+def test_api_delete_unknown_returns_404(_admin_client: TestClient) -> None:
+    resp = _admin_client.delete(
+        "/api/settings/diagnostic-mode/notify_bridge_core",
+    )
+    assert resp.status_code == 404
+
+
+def test_api_delete_handles_dotted_module_path(_admin_client: TestClient) -> None:
+    """``{module:path}`` lets dotted names survive URL routing intact."""
+    target = "notify_bridge_core.notifications.telegram.client"
+    _admin_client.post(
+        "/api/settings/diagnostic-mode",
+        json={"module": target, "duration_minutes": 15},
+    )
+    resp = _admin_client.delete(f"/api/settings/diagnostic-mode/{target}")
+    assert resp.status_code == 200, resp.text
+    assert resp.json()["reverted"] == target
@@ -0,0 +1,357 @@
+"""Aggregation of per-target dispatch results into ``EventLog.details``.
+
+Covers ``summarize_dispatch_results`` and ``attach_summary_in_place``.
+The async ``record_dispatch_summary_async`` is exercised through the
+in-process update path; the watcher-style flow is covered indirectly via
+the full server tests.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+
+def test_summarize_empty_returns_empty(tmp_data_dir) -> None:  # noqa: ARG001
+    """Empty results = nothing to summarize. Callers can short-circuit
+    on the falsy return so a row with zero dispatches doesn't get a
+    misleading zero-counts block."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    assert summarize_dispatch_results([]) == {}
+
+
+def test_summarize_all_success_no_errors_block(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results = [
+        {"success": True, "message_id": 1},
+        {"success": True, "message_id": 2},
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["targets_attempted"] == 2
+    assert summary["targets_succeeded"] == 2
+    assert summary["targets_failed"] == 0
+    assert "errors" not in summary
+    assert "media" not in summary
+
+
+def test_summarize_mixed_records_only_failures(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results = [
+        {"success": True},
+        {"success": False, "error": "Bad Request: chat not found"},
+        {"success": False, "error": "timeout"},
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["targets_succeeded"] == 1
+    assert summary["targets_failed"] == 2
+    assert summary["errors"] == [
+        {"index": 1, "error": "Bad Request: chat not found"},
+        {"index": 2, "error": "timeout"},
+    ]
+
+
+def test_summarize_media_counts_aggregate(tmp_data_dir) -> None:  # noqa: ARG001
+    """Media counts from a Telegram media-group success are merged."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results = [
+        {
+            "success": True,
+            "delivered_count": 5,
+            "skipped_count": 1,
+            "failed_count": 0,
+        },
+        {
+            "success": True,
+            "delivered_count": 3,
+            "skipped_count": 0,
+            "failed_count": 0,
+        },
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["media"] == {"delivered": 8, "skipped": 1, "failed": 0}
+
+
+def test_summarize_sub_errors_carry_target_index(tmp_data_dir) -> None:  # noqa: ARG001
+    """Per-chunk/per-item failures from a partial media-group send are flattened."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results = [
+        {"success": True, "delivered_count": 1, "skipped_count": 0, "failed_count": 0},
+        {
+            "success": True,  # group landed but with partial failure
+            "delivered_count": 2,
+            "skipped_count": 0,
+            "failed_count": 1,
+            "errors": [
+                {"kind": "chunk", "chunk": 1, "error": "Bad Request: ..."},
+                {"kind": "item", "chunk": 1, "item_index": 2, "error": "media not found"},
+            ],
+        },
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["media_errors"] == [
+        {"target_index": 1, "kind": "chunk", "chunk": 1, "error": "Bad Request: ..."},
+        {
+            "target_index": 1,
+            "kind": "item",
+            "chunk": 1,
+            "item_index": 2,
+            "error": "media not found",
+        },
+    ]
+
+
+def test_summarize_caps_errors_and_reports_truncation(tmp_data_dir) -> None:  # noqa: ARG001
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results: list[dict[str, Any]] = [
+        {"success": False, "error": f"err {i}"} for i in range(25)
+    ]
+    summary = summarize_dispatch_results(results)
+    assert len(summary["errors"]) == 20
+    assert summary["errors_truncated"] == 5
+
+
+def test_summarize_trims_long_error_messages(tmp_data_dir) -> None:  # noqa: ARG001
+    """A pathological multi-KB error string is bounded so the row stays small."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    long_err = "x" * 2000
+    results = [{"success": False, "error": long_err}]
+    summary = summarize_dispatch_results(results)
+    persisted = summary["errors"][0]["error"]
+    assert persisted.endswith("…[truncated]")
+    # 500 char body + the explicit "…[truncated]" marker.
+    assert len(persisted) == 500 + len("…[truncated]")
+
+
+@pytest.mark.asyncio
+async def test_attach_summary_in_place_mutates_details_dict(tmp_data_dir) -> None:  # noqa: ARG001
+    """In-session call merges the summary without losing original keys."""
+    from notify_bridge_server.database.models import EventLog
+    from notify_bridge_server.services.dispatch_summary import (
+        attach_summary_in_place,
+    )
+
+    row = EventLog(
+        event_type="assets_added",
+        collection_id="abc",
+        collection_name="Album",
+        details={"provider_type": "immich", "added_count": 3},
+    )
+    attach_summary_in_place(row, [{"success": True}, {"success": False, "error": "x"}])
+    assert row.details["provider_type"] == "immich"
+    assert row.details["added_count"] == 3
+    assert row.details["dispatch_summary"] == {
+        "targets_attempted": 2,
+        "targets_succeeded": 1,
+        "targets_failed": 1,
+        "errors": [{"index": 1, "error": "x"}],
+    }
+
+
+@pytest.mark.asyncio
+async def test_attach_summary_in_place_with_no_results_is_noop(tmp_data_dir) -> None:  # noqa: ARG001
+    """Empty results → no ``dispatch_summary`` key written. Original
+    details survive untouched."""
+    from notify_bridge_server.database.models import EventLog
+    from notify_bridge_server.services.dispatch_summary import (
+        attach_summary_in_place,
+    )
+
+    row = EventLog(
+        event_type="assets_added",
+        collection_id="abc",
+        collection_name="Album",
+        details={"k": "v"},
+    )
+    attach_summary_in_place(row, [])
+    assert row.details == {"k": "v"}
+    assert "dispatch_summary" not in row.details
+
+
+def test_summarize_handles_malformed_sub_errors(tmp_data_dir) -> None:  # noqa: ARG001
+    """A non-dict sub-error entry is silently skipped, not crashed on."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results = [
+        {
+            "success": True,
+            "delivered_count": 1,
+            "errors": ["not a dict", {"kind": "item", "error": "real"}],
+        },
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["media_errors"] == [
+        {"target_index": 0, "kind": "item", "error": "real"}
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Integration: real dispatcher output shape from ``_aggregate_results``
+# ---------------------------------------------------------------------------
+#
+# The dispatcher wraps each Telegram fan-out in a per-target envelope:
+#
+#   {
+#     "success": True,
+#     "receivers": 2,
+#     "successes": 2,
+#     "failures": 0,
+#     "results": [<per-receiver dict>, ...],   # ← media counts live HERE
+#   }
+#
+# These tests use that exact shape so a future refactor of the dispatcher
+# doesn't silently zero out the dashboard's ``dispatch_summary.media``
+# block. Earlier versions of this file passed leaf dicts directly, which
+# masked the wrong-shape read in production.
+
+
+def test_summarize_drills_into_aggregated_per_receiver_dicts(tmp_data_dir) -> None:  # noqa: ARG001
+    """Media counts on per-receiver leaves are summed across receivers."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    # Two targets, each with two Telegram receivers.
+    results = [
+        {
+            "success": True,
+            "receivers": 2,
+            "successes": 2,
+            "failures": 0,
+            "results": [
+                {
+                    "success": True,
+                    "message_id": 100,
+                    "media_delivered_count": 5,
+                    "media_skipped_count": 1,
+                    "media_failed_count": 0,
+                },
+                {
+                    "success": True,
+                    "message_id": 101,
+                    "media_delivered_count": 3,
+                    "media_skipped_count": 0,
+                    "media_failed_count": 0,
+                },
+            ],
+        },
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["media"] == {"delivered": 8, "skipped": 1, "failed": 0}
+
+
+def test_summarize_collects_aggregated_media_errors_with_receiver_index(
+    tmp_data_dir,  # noqa: ARG001
+) -> None:
+    """Per-chunk / per-item media errors carry both target AND receiver index."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results = [
+        {
+            "success": True,
+            "receivers": 1,
+            "successes": 1,
+            "failures": 0,
+            "results": [
+                {
+                    "success": True,
+                    "message_id": 200,
+                    "media_delivered_count": 2,
+                    "media_failed_count": 1,
+                    "media_errors": [
+                        {"kind": "chunk", "chunk": 1, "error": "Bad Request"},
+                        {"kind": "item", "chunk": 1, "item_index": 2,
+                         "error": "media not found"},
+                    ],
+                },
+            ],
+        },
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["media_errors"] == [
+        {"target_index": 0, "receiver_index": 0, "kind": "chunk",
+         "chunk": 1, "error": "Bad Request"},
+        {"target_index": 0, "receiver_index": 0, "kind": "item",
+         "chunk": 1, "item_index": 2, "error": "media not found"},
+    ]
+
+
+def test_summarize_aggregated_target_errors_list_is_safely_ignored(
+    tmp_data_dir,  # noqa: ARG001
+) -> None:
+    """``_aggregate_results`` stamps a flat ``errors: [str, ...]`` at the
+    target level on failure. The summarizer must not try to treat the
+    strings as structured sub-errors."""
+    from notify_bridge_server.services.dispatch_summary import (
+        summarize_dispatch_results,
+    )
+
+    results = [
+        {
+            "success": False,
+            "receivers": 2,
+            "successes": 0,
+            "failures": 2,
+            "error": "All receivers failed",
+            "errors": ["chat_not_found", "blocked_by_user"],
+            "results": [
+                {"success": False, "error": "chat_not_found"},
+                {"success": False, "error": "blocked_by_user"},
+            ],
+        },
+    ]
+    summary = summarize_dispatch_results(results)
+    assert summary["targets_failed"] == 1
+    assert summary["errors"] == [
+        {"index": 0, "error": "All receivers failed"},
+    ]
+    # The string list at the target level is ignored — the per-receiver
+    # errors are already represented by the target-level error message.
+    assert "media_errors" not in summary
+    assert "media" not in summary
+
+
+@pytest.mark.asyncio
+async def test_attach_summary_in_place_skips_when_already_set(
+    tmp_data_dir,  # noqa: ARG001
+) -> None:
+    """Caller-set ``dispatch_summary`` wins — the same "caller pins"
+    rule that ``enrich_details_with_correlation`` follows."""
+    from notify_bridge_server.database.models import EventLog
+    from notify_bridge_server.services.dispatch_summary import (
+        attach_summary_in_place,
+    )
+
+    row = EventLog(
+        event_type="assets_added",
+        collection_id="abc",
+        collection_name="Album",
+        details={"dispatch_summary": {"pinned": True}},
+    )
+    attach_summary_in_place(row, [{"success": True}])
+    assert row.details["dispatch_summary"] == {"pinned": True}
@@ -0,0 +1,158 @@
+"""Request-ID middleware + EventLog dispatch_id correlation.
+
+Covers two halves of the same correlation story:
+
+* ``RequestContextMiddleware`` generates / accepts an inbound request id,
+  binds it onto the log-context ContextVar for the duration of the request,
+  and echoes it back as the ``X-Request-Id`` response header.
+* ``enrich_details_with_correlation`` merges the active ``dispatch_id`` and
+  ``request_id`` into an ``EventLog.details`` dict so the persisted row can
+  be cross-referenced with the stderr log lines emitted during the same
+  dispatch.
+"""
+
+from __future__ import annotations
+
+import re
+
+import pytest
+from fastapi.testclient import TestClient
+
+
+_REQ_ID_PATTERN = re.compile(r"^req:[0-9a-f]{12}$")
+
+
+def test_response_carries_generated_request_id(tmp_data_dir) -> None:  # noqa: ARG001
+    """No inbound header → server generates ``req:<12 hex>`` and echoes it."""
+    from notify_bridge_server.main import app
+
+    with TestClient(app) as client:
+        resp = client.get("/api/health")
+        assert resp.status_code == 200
+        req_id = resp.headers.get("X-Request-Id")
+        assert req_id is not None
+        assert _REQ_ID_PATTERN.match(req_id), (
+            f"generated id {req_id!r} should match req:<12 hex>"
+        )
+
+
+def test_response_echoes_safe_inbound_request_id(tmp_data_dir) -> None:  # noqa: ARG001
+    """A well-formed inbound ``X-Request-Id`` is preserved unchanged."""
+    from notify_bridge_server.main import app
+
+    inbound = "abc-123_XYZ_trace"
+    with TestClient(app) as client:
+        resp = client.get("/api/health", headers={"X-Request-Id": inbound})
+        assert resp.status_code == 200
+        assert resp.headers.get("X-Request-Id") == inbound
+
+
+def test_colon_prefixed_inbound_id_is_replaced(tmp_data_dir) -> None:  # noqa: ARG001
+    """``:`` is reserved for server-minted ids — a colon in the inbound value
+    must trigger replacement so a client can't masquerade as ``disp:...``."""
+    from notify_bridge_server.main import app
+
+    with TestClient(app) as client:
+        resp = client.get(
+            "/api/health", headers={"X-Request-Id": "disp:fake12345678"},
+        )
+        assert resp.status_code == 200
+        echoed = resp.headers.get("X-Request-Id", "")
+        assert echoed != "disp:fake12345678"
+        assert _REQ_ID_PATTERN.match(echoed)
+
+
+@pytest.mark.parametrize(
+    "bad_value",
+    [
+        # CRLF injection attempt — would split log lines / inject headers.
+        "abc\r\ninjected: yes",
+        # Way too long.
+        "x" * 256,
+        # Disallowed characters.
+        "<script>alert(1)</script>",
+        # Empty after stripping.
+        "   ",
+    ],
+)
+def test_unsafe_inbound_request_id_is_replaced(
+    tmp_data_dir, bad_value: str,  # noqa: ARG001
+) -> None:
+    """An attacker-controlled id must not flow into logs verbatim."""
+    from notify_bridge_server.main import app
+
+    with TestClient(app) as client:
+        resp = client.get("/api/health", headers={"X-Request-Id": bad_value})
+        assert resp.status_code == 200
+        echoed = resp.headers.get("X-Request-Id", "")
+        assert echoed != bad_value, "unsafe id was passed through unchanged"
+        assert _REQ_ID_PATTERN.match(echoed), (
+            f"replacement id {echoed!r} should match req:<12 hex>"
+        )
+
+
+def test_enrich_details_merges_active_correlation_ids() -> None:
+    """Within a ``bind_log_context`` block, the helper copies the active ids."""
+    from notify_bridge_core.log_context import (
+        bind_log_context,
+        enrich_details_with_correlation,
+    )
+
+    with bind_log_context(
+        dispatch_id="disp:deadbeef0001",
+        request_id="req:cafecafe0002",
+    ):
+        result = enrich_details_with_correlation({"existing": "value"})
+
+    assert result == {
+        "existing": "value",
+        "dispatch_id": "disp:deadbeef0001",
+        "request_id": "req:cafecafe0002",
+    }
+
+
+def test_enrich_details_does_not_overwrite_explicit_keys() -> None:
+    """If the caller pre-set a correlation key, the helper leaves it alone."""
+    from notify_bridge_core.log_context import (
+        bind_log_context,
+        enrich_details_with_correlation,
+    )
+
+    with bind_log_context(dispatch_id="disp:newvalue00001"):
+        result = enrich_details_with_correlation({"dispatch_id": "disp:pinned"})
+
+    assert result["dispatch_id"] == "disp:pinned"
+
+
+def test_enrich_details_no_context_returns_copy() -> None:
+    """Outside any binding, the helper returns the dict unchanged but copied."""
+    from notify_bridge_core.log_context import enrich_details_with_correlation
+
+    original = {"key": "value"}
+    result = enrich_details_with_correlation(original)
+    assert result == original
+    # Mutating the result must not leak into the caller's dict.
+    result["extra"] = "added"
+    assert "extra" not in original
+
+
+def test_enrich_details_handles_none() -> None:
+    """``None`` is accepted (callers may build details lazily)."""
+    from notify_bridge_core.log_context import enrich_details_with_correlation
+
+    assert enrich_details_with_correlation(None) == {}
+
+
+def test_ensure_dispatch_id_generates_or_reuses() -> None:
+    """Fresh call produces a new id; inside a bind it returns the bound one."""
+    from notify_bridge_core.log_context import (
+        bind_log_context,
+        ensure_dispatch_id,
+    )
+
+    fresh = ensure_dispatch_id()
+    assert fresh.startswith("disp:")
+    assert len(fresh) == len("disp:") + 12
+
+    with bind_log_context(dispatch_id="disp:bound00000001"):
+        assert ensure_dispatch_id() == "disp:bound00000001"
@@ -0,0 +1,511 @@
+"""Tests for partial-delivery resilience in TelegramClient._send_media_group.
+
+Covers the three independent failure modes that previously aborted the
+whole send:
+
+1. **Per-item oversize** — one item over ``max_asset_data_size`` is
+   silently dropped; siblings still deliver. ``skipped_count`` reflects
+   the drop.
+2. **Combined chunk over Telegram's byte envelope** — pre-flight splits
+   into byte-budgeted sub-chunks, avoiding the 413 entirely.
+3. **Telegram-side chunk rejection after pre-flight** — fall back to
+   sending each item individually so partial delivery still happens.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import patch
+
+import aiohttp
+import pytest
+from aioresponses import aioresponses
+
+from notify_bridge_core.notifications.telegram.client import (
+    TelegramClient,
+    _MediaItem,
+)
+from notify_bridge_core.notifications.telegram.media import (
+    TELEGRAM_MAX_GROUP_TOTAL_BYTES,
+)
+
+
+BOT_TOKEN = "TEST_TOKEN"
+TG = f"https://api.telegram.org/bot{BOT_TOKEN}"
+CHAT_ID = "-1001234567890"
+
+
+# ---------------------------------------------------------------------------
+# Pure unit tests for the new helpers
+# ---------------------------------------------------------------------------
+
+
+def _item(upload_bytes: int, media_type: str = "photo") -> _MediaItem:
+    """Build a synthetic _MediaItem with the given upload byte cost."""
+    if upload_bytes == 0:
+        return _MediaItem(
+            media_json={"type": media_type, "media": "file_id_cached"},
+            cache_info=None,
+            attachment=None,
+        )
+    return _MediaItem(
+        media_json={"type": media_type, "media": "attach://x"},
+        cache_info=("ck", media_type, None, upload_bytes),
+        attachment=("x", b"\x00" * upload_bytes, "f.jpg", "image/jpeg"),
+    )
+
+
+def test_split_empty_returns_empty() -> None:
+    assert TelegramClient._split_items_by_byte_budget([], 1000) == []
+
+
+def test_split_fits_in_single_group() -> None:
+    items = [_item(10), _item(20), _item(30)]
+    groups = TelegramClient._split_items_by_byte_budget(items, 100)
+    assert len(groups) == 1
+    assert sum(it.upload_bytes for it in groups[0]) == 60
+
+
+def test_split_packs_greedily_across_budget() -> None:
+    # Three items @ 40 each, budget 100 → groups of [40,40] and [40].
+    items = [_item(40), _item(40), _item(40)]
+    groups = TelegramClient._split_items_by_byte_budget(items, 100)
+    assert [len(g) for g in groups] == [2, 1]
+    assert sum(it.upload_bytes for it in groups[0]) == 80
+    assert sum(it.upload_bytes for it in groups[1]) == 40
+
+
+def test_split_oversized_single_item_kept_alone() -> None:
+    # An item that exceeds the budget on its own goes alone — Telegram
+    # gets to return a precise per-item error instead of silently
+    # dropping it client-side.
+    items = [_item(200)]
+    groups = TelegramClient._split_items_by_byte_budget(items, 100)
+    assert len(groups) == 1
+    assert groups[0][0].upload_bytes == 200
+
+
+def test_split_cached_items_are_free() -> None:
+    # Cached items contribute 0 bytes — they never force a split.
+    items = [_item(0), _item(0), _item(0)]
+    groups = TelegramClient._split_items_by_byte_budget(items, 10)
+    assert len(groups) == 1
+    assert len(groups[0]) == 3
+
+
+def test_split_mixes_cached_and_fresh_correctly() -> None:
+    # Cached items piggyback freely into whatever group they land in.
+    items = [_item(40), _item(0), _item(40), _item(0), _item(40)]
+    groups = TelegramClient._split_items_by_byte_budget(items, 100)
+    # [40, 0, 40] = 80 bytes (fits), next 0 fits, next 40 starts new.
+    assert [len(g) for g in groups] == [4, 1]
+
+
+def test_attach_caption_to_first_idempotent() -> None:
+    items = [_item(10), _item(10)]
+    TelegramClient._attach_caption_to_first(items, "Hello", "HTML")
+    assert items[0].media_json["caption"] == "Hello"
+    assert items[0].media_json["parse_mode"] == "HTML"
+    assert "caption" not in items[1].media_json
+    # Re-attaching overwrites in-place, doesn't duplicate.
+    TelegramClient._attach_caption_to_first(items, "Bye", "MarkdownV2")
+    assert items[0].media_json["caption"] == "Bye"
+    assert items[0].media_json["parse_mode"] == "MarkdownV2"
+
+
+def test_attach_caption_truncates_to_telegram_limit() -> None:
+    from notify_bridge_core.notifications.telegram.media import (
+        TELEGRAM_MAX_CAPTION_LENGTH,
+    )
+    items = [_item(10)]
+    long_caption = "A" * (TELEGRAM_MAX_CAPTION_LENGTH + 500)
+    TelegramClient._attach_caption_to_first(items, long_caption, "HTML")
+    assert len(items[0].media_json["caption"]) <= TELEGRAM_MAX_CAPTION_LENGTH
+
+
+def test_attach_caption_no_items_is_noop() -> None:
+    TelegramClient._attach_caption_to_first([], "x", "HTML")  # must not raise
+
+
+# ---------------------------------------------------------------------------
+# Integration tests for the full _send_media_group flow
+# ---------------------------------------------------------------------------
+
+
+def _png_bytes(size: int) -> bytes:
+    """Minimal valid PNG header + pad bytes to reach the requested size.
+
+    Required so ``check_photo_limits`` can identify the bytes as an
+    image rather than rejecting them. The PIL inspection only reads the
+    header so padding with zeros is harmless.
+    """
+    # 8-byte PNG signature + IHDR chunk for a 1x1 image (zero-padded
+    # to size). Pillow accepts this enough to read dimensions; the
+    # remaining bytes after IHDR are treated as trailing garbage.
+    sig = b"\x89PNG\r\n\x1a\n"
+    ihdr = bytes.fromhex(
+        # length=13, type=IHDR, w=1, h=1, depth=8, color=2 (RGB),
+        # compression=0, filter=0, interlace=0, crc=ignored
+        "0000000d49484452000000010000000108020000009077"
+        "53de"
+    )
+    base = sig + ihdr
+    if len(base) >= size:
+        return base[:size]
+    return base + b"\x00" * (size - len(base))
+
+
+async def _build_client(session: aiohttp.ClientSession) -> TelegramClient:
+    return TelegramClient(session, BOT_TOKEN)
+
+
+@pytest.mark.asyncio
+async def test_oversized_item_skipped_others_delivered() -> None:
+    """One item over max_asset_data_size is dropped; siblings still go."""
+    mock_url_big = "http://assets.test/big.jpg"
+    mock_url_a = "http://assets.test/a.jpg"
+    mock_url_b = "http://assets.test/b.jpg"
+    max_size = 1_000_000  # 1 MB cap
+
+    # We pre-load bytes via the asset dict so we don't have to mock the
+    # asset HTTP server. Telegram side is mocked so sendMediaGroup
+    # returns a clean 200 with two message IDs.
+    assets = [
+        {"type": "photo", "url": mock_url_big, "data": _png_bytes(2_000_000)},
+        {"type": "photo", "url": mock_url_a, "data": _png_bytes(50_000)},
+        {"type": "photo", "url": mock_url_b, "data": _png_bytes(50_000)},
+    ]
+
+    with aioresponses() as mocked:
+        mocked.post(
+            f"{TG}/sendMediaGroup",
+            payload={
+                "ok": True,
+                "result": [
+                    {"message_id": 100, "photo": [{"file_id": "fa"}]},
+                    {"message_id": 101, "photo": [{"file_id": "fb"}]},
+                ],
+            },
+        )
+        async with aiohttp.ClientSession() as sess:
+            client = await _build_client(sess)
+            result = await client._send_media_group(
+                CHAT_ID, assets, max_asset_data_size=max_size,
+            )
+
+    assert result["success"] is True
+    assert result["delivered_count"] == 2
+    assert result["skipped_count"] == 1
+    assert result["failed_count"] == 0
+    assert result["message_ids"] == [100, 101]
+
+
+@pytest.mark.asyncio
+async def test_byte_budget_splits_into_sub_chunks() -> None:
+    """Three items that combined exceed the byte budget pre-split into 2 calls."""
+    # Sized so 2 fit (sum < budget) but 3 don't (sum > budget) →
+    # [2 items, 1 item] split.
+    per_item = TELEGRAM_MAX_GROUP_TOTAL_BYTES // 3 + 1
+    # Use generated PNGs so check_photo_limits doesn't reject them as
+    # malformed; the size doesn't matter for the photo dimension check
+    # since the PNG header advertises 1x1.
+    assets = [
+        {"type": "photo", "url": f"http://t/{i}.jpg", "data": _png_bytes(per_item)}
+        for i in range(3)
+    ]
+
+    calls: list[int] = []
+
+    def _ok_response_for_n(n: int) -> dict[str, Any]:
+        return {
+            "ok": True,
+            "result": [
+                {"message_id": 200 + i, "photo": [{"file_id": f"x{i}"}]}
+                for i in range(n)
+            ],
+        }
+
+    with aioresponses() as mocked:
+        # We don't know item count per call up front, so respond with
+        # 10-item payloads (Telegram ignores trailing IDs we don't use).
+        mocked.post(
+            f"{TG}/sendMediaGroup",
+            payload=_ok_response_for_n(10),
+            repeat=True,
+        )
+        async with aiohttp.ClientSession() as sess:
+            client = await _build_client(sess)
+            # Disable photo limits — large PNG bodies trip dimension
+            # checks since we pad past the IHDR.
+            with patch(
+                "notify_bridge_core.notifications.telegram.client.check_photo_limits",
+                return_value=(False, None, None, None),
+            ):
+                result = await client._send_media_group(CHAT_ID, assets)
+
+            # Count outbound sendMediaGroup calls via the mock registry.
+            req_log = mocked.requests
+            send_calls = [
+                k for k in req_log if k[1].path.endswith("/sendMediaGroup")
+            ]
+            assert len(send_calls) >= 1
+            # At least one call → multiple requests recorded.
+            for k in send_calls:
+                calls.append(len(req_log[k]))
+
+    assert result["success"] is True
+    # Pre-split avoided 413 entirely.
+    assert result["failed_count"] == 0
+    # The 3 items went out across 2 sub-chunks (2+1).
+    assert sum(calls) == 2
+
+
+@pytest.mark.asyncio
+async def test_chunk_413_falls_back_to_per_item() -> None:
+    """If Telegram 413s a chunk anyway, retry each item individually."""
+    assets = [
+        {"type": "photo", "url": f"http://t/{i}.jpg", "data": _png_bytes(50_000)}
+        for i in range(2)
+    ]
+
+    with aioresponses() as mocked:
+        # The group send fails hard (Telegram-side rejection).
+        mocked.post(
+            f"{TG}/sendMediaGroup",
+            status=413,
+            payload={"ok": False, "error_code": 413, "description": "Request Entity Too Large"},
+        )
+        # Per-item fallback: two sendPhoto calls succeed.
+        mocked.post(
+            f"{TG}/sendPhoto",
+            payload={"ok": True, "result": {"message_id": 300, "photo": [{"file_id": "z0"}]}},
+        )
+        mocked.post(
+            f"{TG}/sendPhoto",
+            payload={"ok": True, "result": {"message_id": 301, "photo": [{"file_id": "z1"}]}},
+        )
+
+        async with aiohttp.ClientSession() as sess:
+            client = await _build_client(sess)
+            with patch(
+                "notify_bridge_core.notifications.telegram.client.check_photo_limits",
+                return_value=(False, None, None, None),
+            ):
+                result = await client._send_media_group(CHAT_ID, assets)
+
+    assert result["success"] is True
+    assert result["delivered_count"] == 2
+    assert result["failed_count"] == 0
+    # We still record the original chunk-level error for diagnostics,
+    # tagged with kind="chunk" so operators can distinguish cause from
+    # per-item consequences.
+    assert result["errors"] is not None
+    chunk_errors = [e for e in result["errors"] if e.get("kind") == "chunk"]
+    assert len(chunk_errors) == 1
+    assert "Request Entity Too Large" in str(chunk_errors[0]["error"])
+
+
+@pytest.mark.asyncio
+async def test_chunk_failure_with_per_item_partial_failure() -> None:
+    """Per-item fallback can itself partially fail; we report both."""
+    assets = [
+        {"type": "photo", "url": f"http://t/{i}.jpg", "data": _png_bytes(50_000)}
+        for i in range(2)
+    ]
+
+    with aioresponses() as mocked:
+        mocked.post(
+            f"{TG}/sendMediaGroup",
+            status=400,
+            payload={"ok": False, "error_code": 400, "description": "Bad Request"},
+        )
+        # First per-item OK, second fails.
+        mocked.post(
+            f"{TG}/sendPhoto",
+            payload={"ok": True, "result": {"message_id": 400, "photo": [{"file_id": "p0"}]}},
+        )
+        mocked.post(
+            f"{TG}/sendPhoto",
+            status=400,
+            payload={"ok": False, "error_code": 400, "description": "PHOTO_INVALID_DIMENSIONS"},
+        )
+
+        async with aiohttp.ClientSession() as sess:
+            client = await _build_client(sess)
+            with patch(
+                "notify_bridge_core.notifications.telegram.client.check_photo_limits",
+                return_value=(False, None, None, None),
+            ):
+                result = await client._send_media_group(CHAT_ID, assets)
+
+    # At least one item delivered → overall success.
+    assert result["success"] is True
+    assert result["delivered_count"] == 1
+    assert result["failed_count"] == 1
+    assert result["message_ids"] == [400]
+    # The failed item carries its index so operators can correlate
+    # with the original asset list.
+    item_errors = [e for e in result["errors"] if e.get("kind") == "item"]
+    assert len(item_errors) == 1
+    assert item_errors[0]["item_index"] == 1
+
+
+@pytest.mark.asyncio
+async def test_document_chunk_failure_falls_back_to_sendDocument() -> None:
+    """Document items must hit /sendDocument in fallback, not /sendVideo.
+
+    Regression guard: an earlier draft routed any non-photo through
+    _VIDEO_KIND, silently misrouting documents to the video endpoint
+    where Telegram would reject them with a confusing error.
+    """
+    assets = [
+        {"type": "document", "url": f"http://t/f{i}.bin", "data": b"\x00" * 50_000}
+        for i in range(2)
+    ]
+
+    with aioresponses() as mocked:
+        mocked.post(
+            f"{TG}/sendMediaGroup",
+            status=400,
+            payload={"ok": False, "error_code": 400, "description": "Bad Request"},
+        )
+        mocked.post(
+            f"{TG}/sendDocument",
+            payload={"ok": True, "result": {"message_id": 500, "document": {"file_id": "d0"}}},
+        )
+        mocked.post(
+            f"{TG}/sendDocument",
+            payload={"ok": True, "result": {"message_id": 501, "document": {"file_id": "d1"}}},
+        )
+
+        async with aiohttp.ClientSession() as sess:
+            client = await _build_client(sess)
+            result = await client._send_media_group(CHAT_ID, assets)
+
+        # No /sendVideo or /sendPhoto calls should have been made.
+        for key in mocked.requests:
+            assert "/sendVideo" not in key[1].path
+            assert "/sendPhoto" not in key[1].path
+
+    assert result["success"] is True
+    assert result["delivered_count"] == 2
+    assert result["message_ids"] == [500, 501]
+
+
+@pytest.mark.asyncio
+async def test_oversized_video_deferred_as_document_when_opted_in() -> None:
+    """Oversized videos are sent as documents post-chunk when the flag is set.
+
+    Telegram caps sendVideo at 50 MB but accepts up to 2 GB via
+    sendDocument. With ``send_large_videos_as_documents=True``, an
+    oversized video should be deferred out of the media group, then
+    delivered as its own document send instead of being silently
+    dropped. Other items in the same group must ride through the
+    normal sendMediaGroup path unaffected.
+    """
+    # 60 MB exceeds the 50 MB sendVideo cap but is under document's 2 GB cap.
+    oversized_video = b"\x00" * (60 * 1024 * 1024)
+    assets = [
+        {"type": "video", "url": "http://t/big.mp4", "data": oversized_video,
+         "content_type": "video/mp4"},
+        {"type": "photo", "url": "http://t/a.jpg", "data": _png_bytes(50_000)},
+        {"type": "photo", "url": "http://t/b.jpg", "data": _png_bytes(50_000)},
+    ]
+
+    with aioresponses() as mocked:
+        # The 2 photos ride out in sendMediaGroup together.
+        mocked.post(
+            f"{TG}/sendMediaGroup",
+            payload={
+                "ok": True,
+                "result": [
+                    {"message_id": 700, "photo": [{"file_id": "p0"}]},
+                    {"message_id": 701, "photo": [{"file_id": "p1"}]},
+                ],
+            },
+        )
+        # The deferred video lands as a document after the chunk.
+        mocked.post(
+            f"{TG}/sendDocument",
+            payload={"ok": True, "result": {"message_id": 702, "document": {"file_id": "d0"}}},
+        )
+
+        async with aiohttp.ClientSession() as sess:
+            client = await _build_client(sess)
+            with patch(
+                "notify_bridge_core.notifications.telegram.client.check_photo_limits",
+                return_value=(False, None, None, None),
+            ):
+                result = await client._send_media_group(
+                    CHAT_ID, assets,
+                    send_large_videos_as_documents=True,
+                )
+
+        # sendVideo must NOT have been called — the oversized video
+        # bypasses sendVideo entirely and goes straight to sendDocument.
+        for key in mocked.requests:
+            assert "/sendVideo" not in key[1].path
+
+    assert result["success"] is True
+    assert result["delivered_count"] == 3
+    assert result["skipped_count"] == 0
+    assert result["failed_count"] == 0
+    assert sorted(result["message_ids"]) == [700, 701, 702]
+
+
+@pytest.mark.asyncio
+async def test_oversized_video_skipped_when_flag_off() -> None:
+    """Without the opt-in flag, oversized videos are dropped (legacy behavior)."""
+    oversized_video = b"\x00" * (60 * 1024 * 1024)
+    assets = [
+        {"type": "video", "url": "http://t/big.mp4", "data": oversized_video,
+         "content_type": "video/mp4"},
+        {"type": "photo", "url": "http://t/a.jpg", "data": _png_bytes(50_000)},
+    ]
+
+    with aioresponses() as mocked:
+        mocked.post(
+            f"{TG}/sendMediaGroup",
+            payload={
+                "ok": True,
+                "result": [{"message_id": 800, "photo": [{"file_id": "p0"}]}],
+            },
+        )
+
+        async with aiohttp.ClientSession() as sess:
+            client = await _build_client(sess)
+            with patch(
+                "notify_bridge_core.notifications.telegram.client.check_photo_limits",
+                return_value=(False, None, None, None),
+            ):
+                result = await client._send_media_group(CHAT_ID, assets)
+
+        # No sendDocument call either — video is simply dropped.
+        for key in mocked.requests:
+            assert "/sendDocument" not in key[1].path
+
+    assert result["success"] is True
+    assert result["delivered_count"] == 1
+    assert result["skipped_count"] == 1
+
+
+@pytest.mark.asyncio
+async def test_all_items_oversized_returns_failure() -> None:
+    """When every asset is filtered before send, success is False."""
+    assets = [
+        {"type": "photo", "url": "http://t/big.jpg", "data": _png_bytes(5_000_000)}
+        for _ in range(2)
+    ]
+
+    async with aiohttp.ClientSession() as sess:
+        client = await _build_client(sess)
+        # No HTTP mock needed — nothing should reach Telegram.
+        result = await client._send_media_group(
+            CHAT_ID, assets, max_asset_data_size=1_000_000,
+        )
+
+    assert result["success"] is False
+    assert result["delivered_count"] == 0
+    assert result["skipped_count"] == 2
+    assert result["failed_count"] == 0
+    assert "filtered" in result["error"]
@@ -0,0 +1,249 @@
+"""Per-send Telegram options (`disable_notification`, `message_thread_id`).
+
+Verifies the ContextVar-based plumbing inside ``TelegramClient`` so the
+two new flags actually land in the request payloads at all four send
+paths (sendMessage, single-asset send, media-group, cache-hit POST) and
+that concurrent ``asyncio.gather`` fan-outs in the dispatcher don't leak
+options between tasks.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from typing import Any
+
+import pytest
+from aiohttp import FormData
+
+
+def test_telegram_receiver_factory_reads_new_fields() -> None:
+    """The receiver factory turns config-dict keys into typed fields."""
+    from notify_bridge_core.notifications.receiver import (
+        TelegramReceiver, build_receiver,
+    )
+
+    recv = build_receiver(
+        "telegram",
+        {
+            "chat_id": "12345",
+            "disable_notification": True,
+            "message_thread_id": "7",  # string form, common from JSON UI
+        },
+    )
+    assert isinstance(recv, TelegramReceiver)
+    assert recv.chat_id == "12345"
+    assert recv.disable_notification is True
+    assert recv.message_thread_id == 7
+
+
+def test_telegram_receiver_factory_defaults_when_missing() -> None:
+    """Missing keys default to off / general topic."""
+    from notify_bridge_core.notifications.receiver import (
+        TelegramReceiver, build_receiver,
+    )
+
+    recv = build_receiver("telegram", {"chat_id": "12345"})
+    assert isinstance(recv, TelegramReceiver)
+    assert recv.disable_notification is False
+    assert recv.message_thread_id is None
+
+
+@pytest.mark.parametrize(
+    "raw_thread, expected",
+    [
+        (None, None),
+        ("", None),
+        ("not-a-number", None),
+        ("42", 42),
+        (42, 42),
+        # ``0`` is Telegram's "general topic" sentinel — collapse to None
+        # so the Bot API just omits the field, matching the frontend's
+        # ``<= 0 → unset`` behaviour.
+        ("0", None),
+        (0, None),
+        (-5, None),
+        # bool would otherwise pass through as int(True)==1 / int(False)==0
+        # and silently route into topic #1; reject explicitly.
+        (True, None),
+        (False, None),
+    ],
+)
+def test_telegram_receiver_thread_id_coercion(raw_thread: Any, expected: Any) -> None:
+    from notify_bridge_core.notifications.receiver import build_receiver
+
+    recv = build_receiver(
+        "telegram",
+        {"chat_id": "1", "message_thread_id": raw_thread},
+    )
+    assert recv.message_thread_id == expected  # type: ignore[attr-defined]
+
+
+def test_apply_send_opts_to_payload_merges_when_bound() -> None:
+    """Inside ``_bind_send_options``, payload helper writes the two keys."""
+    from notify_bridge_core.notifications.telegram.client import (
+        _SendOptions,
+        _apply_send_opts_to_payload,
+        _bind_send_options,
+    )
+
+    payload: dict[str, Any] = {"chat_id": "1"}
+    with _bind_send_options(_SendOptions(disable_notification=True, message_thread_id=7)):
+        _apply_send_opts_to_payload(payload)
+    assert payload["disable_notification"] is True
+    assert payload["message_thread_id"] == 7
+
+
+def test_apply_send_opts_to_payload_omits_when_default() -> None:
+    """No bind = no flags written (Bot API treats omission as default)."""
+    from notify_bridge_core.notifications.telegram.client import (
+        _apply_send_opts_to_payload,
+    )
+
+    payload: dict[str, Any] = {"chat_id": "1"}
+    _apply_send_opts_to_payload(payload)
+    assert "disable_notification" not in payload
+    assert "message_thread_id" not in payload
+
+
+def test_apply_send_opts_to_form_merges_when_bound() -> None:
+    """Multipart payload helper writes the two fields when bound."""
+    from notify_bridge_core.notifications.telegram.client import (
+        _SendOptions,
+        _apply_send_opts_to_form,
+        _bind_send_options,
+    )
+
+    form = FormData()
+    with _bind_send_options(_SendOptions(disable_notification=True, message_thread_id=42)):
+        _apply_send_opts_to_form(form)
+
+    # aiohttp.FormData stores fields as ``(MultiDict{name, ...}, headers, value)``.
+    name_to_value = {}
+    for type_opts, _headers, value in form._fields:  # type: ignore[attr-defined]
+        name_to_value[type_opts.get("name")] = value
+    assert name_to_value.get("disable_notification") == "true"
+    assert name_to_value.get("message_thread_id") == "42"
+
+
+def test_bind_send_options_resets_on_exit() -> None:
+    """Token-reset semantics: the var is restored even after a raise."""
+    from notify_bridge_core.notifications.telegram.client import (
+        _SendOptions,
+        _bind_send_options,
+        _send_options_var,
+    )
+
+    default = _send_options_var.get()
+    try:
+        with _bind_send_options(_SendOptions(disable_notification=True)):
+            raise RuntimeError("boom")
+    except RuntimeError:
+        pass
+    assert _send_options_var.get() == default
+
+
+@pytest.mark.asyncio
+async def test_concurrent_binds_do_not_leak_between_tasks() -> None:
+    """Two ``asyncio.gather`` tasks see only their own bound options.
+
+    This is the load-bearing invariant for the dispatcher's per-receiver
+    fan-out: one chat with ``disable_notification=True`` must not silence
+    a peer chat in the same dispatch.
+    """
+    from notify_bridge_core.notifications.telegram.client import (
+        _SendOptions,
+        _apply_send_opts_to_payload,
+        _bind_send_options,
+    )
+
+    results: list[dict[str, Any]] = []
+
+    async def run_with(opts: _SendOptions, label: str) -> None:
+        payload: dict[str, Any] = {"label": label}
+        with _bind_send_options(opts):
+            # Yield to the loop to interleave with the sibling task.
+            await asyncio.sleep(0)
+            _apply_send_opts_to_payload(payload)
+        results.append(payload)
+
+    await asyncio.gather(
+        run_with(_SendOptions(disable_notification=True, message_thread_id=1), "silent"),
+        run_with(_SendOptions(disable_notification=False, message_thread_id=2), "loud"),
+    )
+
+    by_label = {r["label"]: r for r in results}
+    assert by_label["silent"].get("disable_notification") is True
+    assert by_label["silent"].get("message_thread_id") == 1
+    assert "disable_notification" not in by_label["loud"]  # False → omitted
+    assert by_label["loud"].get("message_thread_id") == 2
+
+
+@pytest.mark.asyncio
+async def test_send_message_passes_options_into_payload(monkeypatch) -> None:
+    """``send_message(disable_notification=True, message_thread_id=N)``
+    surfaces both keys in the JSON request body."""
+    from notify_bridge_core.notifications.telegram.client import TelegramClient
+
+    captured: dict[str, Any] = {}
+
+    class _FakeResp:
+        status = 200
+
+        async def json(self) -> dict[str, Any]:
+            return {"ok": True, "result": {"message_id": 99}}
+
+        async def __aenter__(self) -> "_FakeResp":
+            return self
+
+        async def __aexit__(self, *args: Any) -> None:
+            return None
+
+    class _FakeSession:
+        def post(self, url: str, *, json: dict[str, Any] | None = None, **_kw: Any) -> _FakeResp:
+            captured["url"] = url
+            captured["json"] = json
+            return _FakeResp()
+
+    client = TelegramClient(_FakeSession(), "TEST:token")  # type: ignore[arg-type]
+    result = await client.send_message(
+        chat_id="123",
+        text="hello",
+        disable_notification=True,
+        message_thread_id=5,
+    )
+    assert result["success"] is True
+    payload = captured["json"]
+    assert payload["disable_notification"] is True
+    assert payload["message_thread_id"] == 5
+
+
+@pytest.mark.asyncio
+async def test_send_message_without_options_omits_keys(monkeypatch) -> None:
+    """Default kwargs leave the payload Bot-API-clean."""
+    from notify_bridge_core.notifications.telegram.client import TelegramClient
+
+    captured: dict[str, Any] = {}
+
+    class _FakeResp:
+        status = 200
+
+        async def json(self) -> dict[str, Any]:
+            return {"ok": True, "result": {"message_id": 1}}
+
+        async def __aenter__(self) -> "_FakeResp":
+            return self
+
+        async def __aexit__(self, *args: Any) -> None:
+            return None
+
+    class _FakeSession:
+        def post(self, url: str, *, json: dict[str, Any] | None = None, **_kw: Any) -> _FakeResp:
+            captured["json"] = json
+            return _FakeResp()
+
+    client = TelegramClient(_FakeSession(), "TEST:token")  # type: ignore[arg-type]
+    await client.send_message(chat_id="123", text="hello")
+    payload = captured["json"]
+    assert "disable_notification" not in payload
+    assert "message_thread_id" not in payload