Files
notify-bridge/packages/server/tests/test_diagnostic_mode.py
T
alexei.dolgolyov 6a8f374678 feat: observability, per-receiver Telegram options, oversized-video fallback
Operability:
- Correlation IDs end-to-end: shared dispatch_id between log lines and
  EventLog rows (event/watcher/scheduled/deferred/action/HA/command paths)
  and a new X-Request-Id middleware that normalizes inbound ids and binds
  request_id into log context.
- dispatch_summary block merged into EventLog.details: per-target
  success/failure counts plus Telegram media delivered/skipped/failed and
  truncated error lists, so partial outcomes surface in the UI.
- Diagnostic mode: admin can flip one module to DEBUG for a bounded
  window with auto-revert (in-memory only; setup_logging() resets on
  boot, lifespan reverts on shutdown). New /diagnostic-mode endpoints
  plus DiagnosticsCassette UI on the settings page.

Telegram:
- Per-receiver options: disable_notification (silent send) and
  message_thread_id (forum-topic routing), wired through the dispatcher
  via a ContextVar so all four send sites (sendMessage / sendPhoto-Video-
  Document / sendMediaGroup / cache-hit POST) pick them up.
- send_large_videos_as_documents target setting: bypass the 50 MB
  sendVideo cap by falling back to sendDocument for oversized videos.
- sendMediaGroup byte-budget enforcement (TELEGRAM_MAX_GROUP_TOTAL_BYTES,
  45 MB) with per-item fallback on chunk failure so a stale file_id no
  longer silently drops a cached asset.

Tests:
- New: diagnostic_mode, dispatch_summary, request_correlation,
  telegram_media_group_partial, telegram_per_send_options.

Docs:
- .claude/reviews/: six-axis production-readiness review of v0.8.1.
- .claude/docs/functional-review-2026-05-28.md: focused review of
  Telegram/Immich/logging subsystems.
2026-05-28 15:19:31 +03:00

373 lines
13 KiB
Python

"""Temporary per-module DEBUG overrides with auto-revert.
Covers the in-memory service module + a smoke pass over the API layer
using ``dependency_overrides`` to bypass auth. The APScheduler glue is
exercised via the fallback asyncio-timer path since tests run without a
running scheduler.
"""
from __future__ import annotations
import asyncio
import logging
from datetime import datetime, timedelta, timezone
from typing import Any
import pytest
from fastapi.testclient import TestClient
# ---------------------------------------------------------------------------
# Test scaffolding
# ---------------------------------------------------------------------------
def _reset_state() -> None:
"""Clear the module-level ``_active`` dict between tests so prior
activations don't bleed across cases."""
from notify_bridge_server.services import diagnostic_mode as svc
svc._active.clear()
@pytest.fixture(autouse=True)
def _stub_db_read(monkeypatch):
"""Default every test to a fixed empty ``log_levels`` snapshot.
A test that wants to exercise DB-override precedence overrides this
fixture by re-patching the function explicitly.
"""
async def fake() -> str:
return ""
from notify_bridge_server.services import diagnostic_mode as svc
monkeypatch.setattr(svc, "_read_db_log_levels", fake)
def _patch_db_read(monkeypatch, value: str) -> None:
"""Override the auto-applied fixture for a single test that needs a
non-empty ``log_levels`` value."""
async def fake() -> str:
return value
from notify_bridge_server.services import diagnostic_mode as svc
monkeypatch.setattr(svc, "_read_db_log_levels", fake)
# ---------------------------------------------------------------------------
# Unit tests — service module
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_set_diagnostic_applies_debug_immediately(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
module = "notify_bridge_core.notifications.telegram.client"
entry = await set_diagnostic(module, duration_minutes=30)
assert entry["module"] == module
assert entry["current_level"] == "DEBUG"
assert entry["remaining_seconds"] > 60 * 29
assert logging.getLogger(module).level == logging.DEBUG
@pytest.mark.asyncio
async def test_set_diagnostic_rejects_unlisted_module(tmp_data_dir) -> None: # noqa: ARG001
"""Only the documented namespaces should be flippable from the UI."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
with pytest.raises(ValueError, match="allowlist"):
await set_diagnostic("some_random_third_party", 30)
@pytest.mark.asyncio
async def test_set_diagnostic_rejects_root_logger(tmp_data_dir) -> None: # noqa: ARG001
"""The empty string would target root — explicitly disallowed."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
with pytest.raises(ValueError, match="allowlist"):
await set_diagnostic("", 30)
@pytest.mark.asyncio
async def test_set_diagnostic_rejects_unreasonable_durations(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
with pytest.raises(ValueError, match="duration_minutes"):
await set_diagnostic("notify_bridge_core", 0)
with pytest.raises(ValueError, match="duration_minutes"):
await set_diagnostic("notify_bridge_core", 9999)
@pytest.mark.asyncio
async def test_baseline_from_db_override(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""``log_levels`` setting wins over the noisy-library default."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
_patch_db_read(monkeypatch, "sqlalchemy.engine=ERROR")
entry = await set_diagnostic("sqlalchemy.engine", duration_minutes=15)
assert entry["baseline_level"] == "ERROR"
@pytest.mark.asyncio
async def test_baseline_from_noisy_default(tmp_data_dir) -> None: # noqa: ARG001
"""No DB override falls through to the curated noisy-lib quiet list."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
entry = await set_diagnostic("sqlalchemy.engine", duration_minutes=15)
assert entry["baseline_level"] == "WARNING"
@pytest.mark.asyncio
async def test_baseline_prefix_walks_for_submodule(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""A sub-logger like ``sqlalchemy.engine.Engine`` inherits its parent's
noisy-default level (WARNING), not the root INFO."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
entry = await set_diagnostic(
"sqlalchemy.engine.Engine", duration_minutes=15,
)
assert entry["baseline_level"] == "WARNING"
@pytest.mark.asyncio
async def test_baseline_prefix_walks_for_db_override(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""An explicit ``log_levels`` entry covers all sub-loggers below it."""
from notify_bridge_server.services.diagnostic_mode import set_diagnostic
_reset_state()
_patch_db_read(
monkeypatch, "notify_bridge_core.notifications=ERROR",
)
entry = await set_diagnostic(
"notify_bridge_core.notifications.telegram.client",
duration_minutes=15,
)
assert entry["baseline_level"] == "ERROR"
@pytest.mark.asyncio
async def test_set_diagnostic_twice_replaces_schedule(tmp_data_dir) -> None: # noqa: ARG001
"""Clicking the button twice extends, doesn't stack."""
from notify_bridge_server.services.diagnostic_mode import (
list_active, set_diagnostic,
)
_reset_state()
module = "notify_bridge_core"
await set_diagnostic(module, 5)
first_active = list_active()
assert len(first_active) == 1
first_expires = first_active[0]["expires_at"]
# Sleep just long enough to make the timestamps distinct, then re-set.
await asyncio.sleep(0.05)
await set_diagnostic(module, 60)
second_active = list_active()
assert len(second_active) == 1
assert second_active[0]["expires_at"] != first_expires
assert second_active[0]["remaining_seconds"] > 30 * 60
@pytest.mark.asyncio
async def test_manual_revert_restores_baseline(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import (
revert_diagnostic, set_diagnostic,
)
_reset_state()
module = "sqlalchemy.engine"
await set_diagnostic(module, 30)
assert logging.getLogger(module).level == logging.DEBUG
reverted = await revert_diagnostic(module)
assert reverted is True
# noisy-library default is WARNING (30)
assert logging.getLogger(module).level == logging.WARNING
@pytest.mark.asyncio
async def test_revert_reads_db_at_revert_time(tmp_data_dir, monkeypatch) -> None: # noqa: ARG001
"""Editing ``log_levels`` while the override is active is honored when
the revert fires — not the snapshot taken at activation time."""
from notify_bridge_server.services.diagnostic_mode import (
revert_diagnostic, set_diagnostic,
)
_reset_state()
module = "sqlalchemy.engine"
_patch_db_read(monkeypatch, "")
await set_diagnostic(module, 30)
# Operator edits the setting mid-window — bump to ERROR.
_patch_db_read(monkeypatch, "sqlalchemy.engine=ERROR")
assert await revert_diagnostic(module) is True
assert logging.getLogger(module).level == logging.ERROR
@pytest.mark.asyncio
async def test_manual_revert_no_active_returns_false(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import revert_diagnostic
_reset_state()
assert await revert_diagnostic("notify_bridge_core") is False
@pytest.mark.asyncio
async def test_auto_revert_after_window_elapses(tmp_data_dir) -> None: # noqa: ARG001
"""The asyncio-timer fallback fires near ``expires_at`` and restores
the baseline. Uses a sub-second window so the test stays fast.
Bypasses ``set_diagnostic`` (which clamps to minutes) by populating the
``_active`` dict and calling ``_schedule_revert`` directly.
"""
from notify_bridge_server.services import diagnostic_mode as svc
_reset_state()
module = "sqlalchemy.engine"
baseline = svc._baseline_for(module, db_log_levels="")
now = datetime.now(timezone.utc)
expires = now + timedelta(seconds=0.3)
logging.getLogger(module).setLevel("DEBUG")
svc._active[module] = svc._Override(
module=module,
baseline_level=baseline,
activated_at=now,
expires_at=expires,
)
svc._schedule_revert(module, expires)
await asyncio.sleep(0.5)
assert module not in svc._active
assert logging.getLogger(module).level == logging.WARNING
@pytest.mark.asyncio
async def test_fallback_task_retained_until_fire(tmp_data_dir) -> None: # noqa: ARG001
"""The asyncio fallback path must keep a strong reference to its task
so CPython doesn't GC it before the timer fires."""
from notify_bridge_server.services import diagnostic_mode as svc
_reset_state()
when = datetime.now(timezone.utc) + timedelta(seconds=10)
svc._schedule_revert("notify_bridge_core", when)
# The retainer set should hold exactly the task we just queued.
assert len(svc._bg_tasks) == 1
# Cancel it to clean up; the done-callback will drop it.
for task in list(svc._bg_tasks):
task.cancel()
await asyncio.sleep(0)
def test_list_active_omits_and_sweeps_expired(tmp_data_dir) -> None: # noqa: ARG001
"""Expired entries are filtered AND removed so a delayed scheduler
fire doesn't leave ghost rows in ``_active`` forever."""
from notify_bridge_server.services import diagnostic_mode as svc
_reset_state()
past = datetime.now(timezone.utc) - timedelta(minutes=1)
svc._active["sqlalchemy.engine"] = svc._Override(
module="sqlalchemy.engine",
baseline_level="WARNING",
activated_at=past - timedelta(minutes=30),
expires_at=past,
)
assert svc.list_active() == []
assert "sqlalchemy.engine" not in svc._active
@pytest.mark.asyncio
async def test_revert_all_clears_every_override(tmp_data_dir) -> None: # noqa: ARG001
from notify_bridge_server.services.diagnostic_mode import (
list_active, revert_all, set_diagnostic,
)
_reset_state()
await set_diagnostic("notify_bridge_core", 30)
await set_diagnostic("sqlalchemy.engine", 30)
assert len(list_active()) == 2
count = await revert_all()
assert count == 2
assert list_active() == []
# ---------------------------------------------------------------------------
# API smoke — bypasses auth via dependency_overrides
# ---------------------------------------------------------------------------
@pytest.fixture
def _admin_client(tmp_data_dir): # noqa: ARG001
"""Yield a TestClient with ``require_admin`` short-circuited.
Keeps the auth-flow's SQLAlchemy/greenlet issues out of the picture
while still exercising the FastAPI router, path converters, and the
``HTTPException`` paths.
"""
_reset_state()
from notify_bridge_server.auth.dependencies import require_admin
from notify_bridge_server.database.models import User
from notify_bridge_server.main import app
fake = User(
id=1, username="admin",
password_hash="x", role="admin", token_version=0,
)
app.dependency_overrides[require_admin] = lambda: fake
with TestClient(app) as client:
yield client
app.dependency_overrides.pop(require_admin, None)
_reset_state()
def test_api_post_rejects_unlisted_module_with_400(_admin_client: TestClient) -> None:
resp = _admin_client.post(
"/api/settings/diagnostic-mode",
json={"module": "evil.namespace", "duration_minutes": 15},
)
assert resp.status_code == 400
assert "allowlist" in resp.json().get("detail", "")
def test_api_post_rejects_huge_duration_with_400(_admin_client: TestClient) -> None:
resp = _admin_client.post(
"/api/settings/diagnostic-mode",
json={"module": "notify_bridge_core", "duration_minutes": 99999},
)
assert resp.status_code == 400
def test_api_delete_unknown_returns_404(_admin_client: TestClient) -> None:
resp = _admin_client.delete(
"/api/settings/diagnostic-mode/notify_bridge_core",
)
assert resp.status_code == 404
def test_api_delete_handles_dotted_module_path(_admin_client: TestClient) -> None:
"""``{module:path}`` lets dotted names survive URL routing intact."""
target = "notify_bridge_core.notifications.telegram.client"
_admin_client.post(
"/api/settings/diagnostic-mode",
json={"module": target, "duration_minutes": 15},
)
resp = _admin_client.delete(f"/api/settings/diagnostic-mode/{target}")
assert resp.status_code == 200, resp.text
assert resp.json()["reverted"] == target