"""Regression tests for the 2026-06-18 production-readiness review fixes. Each test maps to a confirmed finding from the review and would fail against the pre-fix code. Grouped by area; see the inline finding tags (#N). """ from __future__ import annotations import threading import uuid from datetime import datetime, timedelta, timezone from unittest.mock import MagicMock, patch import pytest from ledgrab.storage.activity_log import ( ActivityCategory, ActivityLogEntry, ActivityLogFilters, ActivitySeverity, ) from ledgrab.storage.activity_log_repository import ActivityLogRepository from ledgrab.storage.database import Database def _entry(*, ts: datetime | None = None, message: str = "m") -> ActivityLogEntry: return ActivityLogEntry( id="al_" + uuid.uuid4().hex[:8], ts=ts or datetime.now(timezone.utc), category=ActivityCategory.SYSTEM, action="test.action", severity=ActivitySeverity.INFO, actor="system", message=message, ) @pytest.fixture def repo(tmp_db: Database) -> ActivityLogRepository: return ActivityLogRepository(tmp_db) # --------------------------------------------------------------------------- # #8 — activity-log entry id has full 128-bit entropy # --------------------------------------------------------------------------- def test_new_id_uses_full_uuid_hex(): from ledgrab.core.activity_log.recorder import _new_id val = _new_id() assert val.startswith("al_") hex_part = val[3:] assert len(hex_part) == 32 # full uuid4 hex (was 8 → collision-prone) int(hex_part, 16) # parses as hex # --------------------------------------------------------------------------- # #26 — sanitize_display honours its bounded-length contract for tiny maxlen # --------------------------------------------------------------------------- @pytest.mark.parametrize("maxlen", [0, 1, 2, 5]) def test_sanitize_display_respects_bound(maxlen): from ledgrab.core.activity_log.sanitize import sanitize_display result = sanitize_display("abcdef", maxlen=maxlen) assert len(result) <= maxlen def test_sanitize_display_degenerate_maxlen_returns_empty(): from ledgrab.core.activity_log.sanitize import sanitize_display assert sanitize_display("abcdef", maxlen=0) == "" assert sanitize_display("abcdef", maxlen=-3) == "" # --------------------------------------------------------------------------- # #10 — non-ASCII Bearer / WS token does not raise from compare_digest # --------------------------------------------------------------------------- def test_match_api_key_non_ascii_token_returns_none_not_raises(): from ledgrab.api.auth import _match_api_key with patch("ledgrab.api.auth.get_config") as cfg: c = MagicMock() c.auth.api_keys = {"dev": "correct-key"} cfg.return_value = c # café contains a non-ASCII char; must cleanly fail to match, not raise. assert _match_api_key("café") is None assert _match_api_key("correct-key") == "dev" # --------------------------------------------------------------------------- # #15 — game adapters tolerate non-ASCII attacker-controlled tokens # --------------------------------------------------------------------------- def test_generic_webhook_adapter_non_ascii_header_returns_false(): from ledgrab.core.game_integration.adapters.generic_webhook_adapter import ( GenericWebhookAdapter, ) ok = GenericWebhookAdapter.validate_auth( {"Authorization": "Bearer café"}, {}, {"auth_token": "secret123"} ) assert ok is False # no TypeError def test_cs2_adapter_non_ascii_payload_token_returns_false(): from ledgrab.core.game_integration.adapters.cs2_adapter import CS2Adapter ok = CS2Adapter.validate_auth({}, {"auth": {"token": "café"}}, {"auth_token": "secret123"}) assert ok is False # no TypeError # --------------------------------------------------------------------------- # #2 — async auth dependency sets the actor ContextVar visibly to the handler # --------------------------------------------------------------------------- def test_async_auth_dep_actor_visible_to_handler(): import asyncio from ledgrab.api.auth import verify_api_key # Guard against a regression back to a sync dependency (which would run the # contextvar mutation in a throwaway threadpool context the handler can't see). assert asyncio.iscoroutinefunction(verify_api_key) from fastapi import Depends, FastAPI from fastapi.testclient import TestClient from ledgrab.core.activity_log.context import current_actor app = FastAPI() # Use Depends() as a default value rather than the ``AuthRequired`` Annotated # alias: this module has ``from __future__ import annotations`` (stringized # annotations), and the route is defined in a local scope FastAPI can't # resolve the alias from — the default-value form sidesteps that entirely. @app.get("/whoami") async def whoami(auth=Depends(verify_api_key)): return {"label": auth, "actor": current_actor.get()} with patch("ledgrab.api.auth.get_config") as cfg: c = MagicMock() c.auth.api_keys = {"dev": "k"} cfg.return_value = c client = TestClient(app) resp = client.get("/whoami", headers={"Authorization": "Bearer k"}) assert resp.status_code == 200 body = resp.json() assert body["label"] == "dev" # Pre-fix (sync dep) this would be "system"; the async dep makes it visible. assert body["actor"] == "dev" # --------------------------------------------------------------------------- # #9 / #12 — auth-failure throttle dict survives concurrent access # --------------------------------------------------------------------------- def test_should_record_auth_failure_concurrent_no_exception(): from ledgrab.api import auth as auth_mod auth_mod._auth_record_last.clear() # Pre-fill to one below the hard cap so the eviction branch is hot. cap = auth_mod._AUTH_THROTTLE_HARD_CAP for i in range(cap - 1): auth_mod._auth_record_last[f"seed-{i}"] = 0.0 errors: list[BaseException] = [] def hammer(base: int): try: for j in range(500): auth_mod._should_record_auth_failure(f"{base}.{j}") except BaseException as exc: # noqa: BLE001 - capture any escape errors.append(exc) threads = [threading.Thread(target=hammer, args=(t,)) for t in range(16)] for th in threads: th.start() for th in threads: th.join(timeout=30) assert not errors, f"throttle raised under concurrency: {errors[:3]}" assert len(auth_mod._auth_record_last) <= cap auth_mod._auth_record_last.clear() # --------------------------------------------------------------------------- # #1 / #4 — since/until filter normalises naive + non-UTC-offset datetimes # --------------------------------------------------------------------------- def test_until_boundary_includes_entry_at_exact_instant(repo: ActivityLogRepository): # Entry stored at exactly 12:00 UTC. instant = datetime(2026, 6, 18, 12, 0, 0, tzinfo=timezone.utc) repo.record(_entry(ts=instant, message="boundary")) # A naive datetime-local value at the same wall-clock (no offset) — the # realistic frontend path. Pre-fix this lexically excluded the row. naive_until = datetime(2026, 6, 18, 12, 0, 0) # noqa: DTZ001 - intentional naive page = repo.query(ActivityLogFilters(until=naive_until), limit=10) assert len(page) == 1 def test_since_with_non_utc_offset_includes_correct_instant(repo: ActivityLogRepository): # Stored at 13:30 UTC. repo.record(_entry(ts=datetime(2026, 6, 18, 13, 30, tzinfo=timezone.utc), message="x")) # since expressed in +02:00 == 13:00 UTC → row (13:30Z) must be included. since_incl = datetime(2026, 6, 18, 15, 0, tzinfo=timezone(timedelta(hours=2))) assert len(repo.query(ActivityLogFilters(since=since_incl), limit=10)) == 1 # since == 16:00+02:00 == 14:00 UTC → row (13:30Z) must be excluded. since_excl = datetime(2026, 6, 18, 16, 0, tzinfo=timezone(timedelta(hours=2))) assert len(repo.query(ActivityLogFilters(since=since_excl), limit=10)) == 0 # --------------------------------------------------------------------------- # #21 — iter_export advances the keyset cursor correctly across batches # --------------------------------------------------------------------------- def test_iter_export_multi_batch_no_gaps_or_dupes(repo: ActivityLogRepository): n = 7 for i in range(n): repo.record(_entry(message=f"e{i}")) # batch_size=2 over 7 rows → 4 batches, exercising cursor advancement. exported = list(repo.iter_export(batch_size=2)) ids = [e.id for e in exported] assert len(ids) == n assert len(set(ids)) == n # no duplicates across batch boundaries # Identical set to a single-batch run. assert set(ids) == {e.id for e in repo.iter_export(batch_size=1000)} # --------------------------------------------------------------------------- # #13 — undecryptable secret envelope is preserved, not discarded # --------------------------------------------------------------------------- def test_decrypt_failure_preserves_envelope(monkeypatch): from ledgrab.storage import game_integration as gi envelope = "ENC:v1:undecryptable-blob" monkeypatch.setattr(gi.secret_box, "is_encrypted", lambda v: v == envelope) def _boom(_v): raise ValueError("secret key missing") monkeypatch.setattr(gi.secret_box, "decrypt", _boom) result = gi._decrypt_adapter_config({"auth_token": envelope, "other": "x"}) # Pre-fix: result["auth_token"] == "" (data loss on the next write-through). assert result["auth_token"] == envelope assert result["other"] == "x" # --------------------------------------------------------------------------- # #22 — real-thread / real-DB concurrency on the repository # --------------------------------------------------------------------------- def test_repo_concurrent_writes_are_consistent(repo: ActivityLogRepository): threads_n, per_thread = 8, 100 total = threads_n * per_thread def worker(): for _ in range(per_thread): repo.record(_entry()) threads = [threading.Thread(target=worker) for _ in range(threads_n)] for th in threads: th.start() for th in threads: th.join(timeout=60) assert repo.count() == total exported = list(repo.iter_export(batch_size=50)) assert len(exported) == total assert len({e.id for e in exported}) == total # unique, no corruption # --------------------------------------------------------------------------- # #23 — CSV export strips control chars from string cells (defense-in-depth) # --------------------------------------------------------------------------- def test_csv_export_strips_control_chars(repo: ActivityLogRepository): import csv import io from ledgrab.api.routes.activity_log import _CSV_COLUMNS, _export_csv_generator evil = "evil\x00dev\x1b[31mred\x1b[0m\r\ninject" repo.record(_entry(message=evil)) text = b"".join(_export_csv_generator(repo, ActivityLogFilters())).decode("utf-8") rows = list(csv.reader(io.StringIO(text))) assert len(rows) == 2 # header + 1 data row (newline did not split the field) msg_cell = rows[1][_CSV_COLUMNS.index("message")] for bad in ("\x00", "\x1b", "\r", "\n"): assert bad not in msg_cell, f"control char {bad!r} survived into the CSV cell" # --------------------------------------------------------------------------- # #44 — non-serialisable metadata is dropped best-effort, never raises # --------------------------------------------------------------------------- def test_non_serializable_metadata_does_not_raise(): from ledgrab.core.activity_log.recorder import ActivityRecorder persisted: list = [] repo = MagicMock() # Route through to_row() so the real json.dumps codec runs (and raises). repo.record.side_effect = lambda entry: persisted.append(entry.to_row()) recorder = ActivityRecorder(repo, MagicMock(), loop=None) # Must not raise into the caller despite the un-encodable values. recorder.record( category=ActivityCategory.SYSTEM, action="bad.metadata", message="m", metadata={"when": datetime.now(timezone.utc), "tags": {1, 2, 3}}, ) assert persisted == [], "non-serialisable entry must not persist"