feat(activity-log): phase 1 - storage model, migration, repository

- ActivityLogEntry dataclass + ActivityCategory/ActivitySeverity + ActivityLogFilters - additive idempotent migration 002_add_activity_log (indexed activity_log table, seq keyset tiebreaker) - ActivityLogRepository (record/query/count/prune/clear/iter_export), keyset pagination, parameterized SQL - 102 unit + adversarial tests (SQL-injection, pagination, prune, codec, migration idempotency)
2026-06-09 17:40:37 +03:00
parent 1afe7d6fcc
commit 1ac4a0f66d
8 changed files with 2100 additions and 18 deletions
@@ -0,0 +1,171 @@
+"""Activity / audit log data model.
+
+Defines the ``ActivityLogEntry`` dataclass together with its category and
+severity enumerations and the ``ActivityLogFilters`` query object.  Row-level
+codec (``to_row`` / ``from_row``) converts between the dataclass and a flat
+SQLite column dict.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Sequence
+
+
+# ---------------------------------------------------------------------------
+# Enumerations (string constants, not enum.Enum, to stay consistent with the
+# rest of the codebase which uses plain string literals)
+# ---------------------------------------------------------------------------
+
+
+class ActivityCategory:
+    """Valid ``category`` values for an ``ActivityLogEntry``."""
+
+    AUTH = "auth"
+    DEVICE = "device"
+    ENTITY = "entity"
+    CAPTURE = "capture"
+    SYSTEM = "system"
+
+    ALL: tuple[str, ...] = (AUTH, DEVICE, ENTITY, CAPTURE, SYSTEM)
+
+
+class ActivitySeverity:
+    """Valid ``severity`` values for an ``ActivityLogEntry``."""
+
+    INFO = "info"
+    WARNING = "warning"
+    ERROR = "error"
+
+    ALL: tuple[str, ...] = (INFO, WARNING, ERROR)
+
+
+# ---------------------------------------------------------------------------
+# Entry dataclass
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ActivityLogEntry:
+    """One immutable audit record.
+
+    Fields
+    ------
+    id          Stable application-assigned identifier, e.g. ``al_<uuid8>``.
+    ts          UTC timestamp of the event (server-assigned at record time).
+    category    Broad bucket — one of :class:`ActivityCategory`.
+    action      Verb-object label, e.g. ``"entity.created"`` or ``"auth.rejected"``.
+    severity    One of :class:`ActivitySeverity`.
+    actor       Who triggered the action, e.g. an API-key label or ``"system"``.
+    entity_type Optional: kind of entity involved, e.g. ``"output_target"``.
+    entity_id   Optional: stable entity identifier.
+    entity_name Optional: human-readable entity name at the time of the event.
+    message     Human-readable description suitable for display.
+    metadata    Small structured context (device address, error code, …).
+                Must be JSON-serialisable; defaults to empty dict.
+    """
+
+    id: str
+    ts: datetime
+    category: str
+    action: str
+    severity: str
+    actor: str
+    message: str
+    entity_type: str | None = None
+    entity_id: str | None = None
+    entity_name: str | None = None
+    metadata: dict = field(default_factory=dict)
+
+    # -- Row codec -----------------------------------------------------------
+
+    def to_row(self) -> dict:
+        """Return a flat dict suitable for a parameterised SQLite INSERT.
+
+        ``ts`` is stored as an ISO-8601 string (UTC).  ``metadata`` is stored
+        as a JSON string.  ``seq`` is DB-assigned and is NOT included.
+        """
+        return {
+            "id": self.id,
+            "ts": self.ts.isoformat(),
+            "category": self.category,
+            "action": self.action,
+            "severity": self.severity,
+            "actor": self.actor,
+            "entity_type": self.entity_type,
+            "entity_id": self.entity_id,
+            "entity_name": self.entity_name,
+            "message": self.message,
+            "metadata": json.dumps(self.metadata, ensure_ascii=False),
+        }
+
+    @staticmethod
+    def from_row(row: dict) -> "ActivityLogEntry":
+        """Reconstruct an ``ActivityLogEntry`` from a SQLite row dict.
+
+        ``row`` may include the ``seq`` column — it is ignored here (callers
+        that need ``seq`` for keyset pagination access it directly from the
+        row before calling this method).
+        """
+        raw_meta = row.get("metadata") or "{}"
+        try:
+            metadata = json.loads(raw_meta)
+        except (json.JSONDecodeError, TypeError):
+            metadata = {}
+
+        raw_ts = row["ts"]
+        ts = datetime.fromisoformat(raw_ts)
+        # Ensure tz-aware UTC even if stored without offset (legacy rows)
+        if ts.tzinfo is None:
+            ts = ts.replace(tzinfo=timezone.utc)
+
+        return ActivityLogEntry(
+            id=row["id"],
+            ts=ts,
+            category=row["category"],
+            action=row["action"],
+            severity=row["severity"],
+            actor=row["actor"],
+            entity_type=row.get("entity_type"),
+            entity_id=row.get("entity_id"),
+            entity_name=row.get("entity_name"),
+            message=row["message"],
+            metadata=metadata,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Filters dataclass
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ActivityLogFilters:
+    """Optional query-time filters for :class:`ActivityLogRepository`.
+
+    All fields are optional.  ``None`` / empty sequence means "no restriction
+    on this dimension".
+
+    Fields
+    ------
+    categories      Restrict to entries whose ``category`` is in this set.
+    severities      Restrict to entries whose ``severity`` is in this set.
+    actor           Exact match on the ``actor`` field.
+    entity_type     Exact match on the ``entity_type`` field.
+    entity_id       Exact match on the ``entity_id`` field.
+    since           Inclusive lower bound on ``ts`` (``ts >= since``).
+    until           Inclusive upper bound on ``ts`` (``ts <= until``).
+    message_like    Free-text substring match on ``message`` (LIKE ``%value%``).
+                    The value is escaped — no SQL injection risk.
+    """
+
+    categories: Sequence[str] | None = None
+    severities: Sequence[str] | None = None
+    actor: str | None = None
+    entity_type: str | None = None
+    entity_id: str | None = None
+    since: datetime | None = None
+    until: datetime | None = None
+    message_like: str | None = None
@@ -0,0 +1,293 @@
+"""Append-only repository for the persistent activity / audit log.
+
+Design notes
+------------
+* Does NOT subclass ``BaseSqliteStore`` — that base loads every row into an
+  in-memory cache at construction time, which is wrong for an unbounded,
+  append-heavy log.
+* All SQL parameters are passed as positional ``?`` placeholders — no user
+  input is interpolated into the query string.
+* Keyset pagination is implemented via the ``seq`` INTEGER PRIMARY KEY
+  AUTOINCREMENT column, which is strictly monotonic and survives rows with
+  identical ``ts`` values.
+* The repository takes a ``Database`` instance and calls ``db.execute`` /
+  ``db.transaction`` directly.
+* Thread safety: ``Database.execute`` holds the ``RLock`` for the duration of
+  each call.  The repository itself adds no extra locking.  Callers that
+  originate from non-event-loop threads MUST marshal via
+  ``loop.call_soon_threadsafe`` (that is Phase 2's responsibility).
+"""
+
+from __future__ import annotations
+
+import sqlite3
+from datetime import datetime
+from typing import Iterator
+
+from ledgrab.storage.activity_log import ActivityLogEntry, ActivityLogFilters
+from ledgrab.storage.database import Database
+from ledgrab.utils import get_logger
+
+logger = get_logger(__name__)
+
+_TABLE = "activity_log"
+
+
+def _build_filter_clause(
+    filters: ActivityLogFilters,
+    params: list,
+    *,
+    extra_where: str | None = None,
+) -> str:
+    """Return a WHERE clause string and append bound parameters to *params*.
+
+    The caller is responsible for providing the opening ``WHERE`` keyword when
+    the returned string is non-empty, or ``AND`` when appending to an existing
+    predicate.  This function returns only the condition fragment(s) joined by
+    ``AND``, or an empty string when *filters* has no restrictions.
+
+    ``extra_where`` is prepended (with AND) before the filter conditions if
+    provided (used for the ``seq < ?`` keyset predicate).
+    """
+    conditions: list[str] = []
+
+    if extra_where:
+        conditions.append(extra_where)
+
+    if filters.categories:
+        placeholders = ",".join("?" * len(filters.categories))
+        conditions.append(f"category IN ({placeholders})")
+        params.extend(filters.categories)
+
+    if filters.severities:
+        placeholders = ",".join("?" * len(filters.severities))
+        conditions.append(f"severity IN ({placeholders})")
+        params.extend(filters.severities)
+
+    if filters.actor is not None:
+        conditions.append("actor = ?")
+        params.append(filters.actor)
+
+    if filters.entity_type is not None:
+        conditions.append("entity_type = ?")
+        params.append(filters.entity_type)
+
+    if filters.entity_id is not None:
+        conditions.append("entity_id = ?")
+        params.append(filters.entity_id)
+
+    if filters.since is not None:
+        conditions.append("ts >= ?")
+        params.append(filters.since.isoformat())
+
+    if filters.until is not None:
+        conditions.append("ts <= ?")
+        params.append(filters.until.isoformat())
+
+    if filters.message_like is not None:
+        # Escape LIKE special characters in the user-supplied substring so that
+        # a literal '%' or '_' in the message does not act as a wildcard.
+        escaped = filters.message_like.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+        conditions.append("message LIKE ? ESCAPE '\\'")
+        params.append(f"%{escaped}%")
+
+    return " AND ".join(conditions)
+
+
+class ActivityLogRepository:
+    """Purpose-built repository for the ``activity_log`` table.
+
+    Parameters
+    ----------
+    db:
+        The shared ``Database`` singleton.  The migration that creates the
+        ``activity_log`` table must have been applied before the first call to
+        :meth:`record`.  Construction triggers migration via
+        ``MigrationRunner`` so users can create the repository directly
+        without a separate startup step.
+    """
+
+    def __init__(self, db: Database) -> None:
+        self._db = db
+        # Apply pending migrations (idempotent; most runs are no-ops)
+        from ledgrab.storage.data_migrations import ALL_MIGRATIONS, MigrationRunner
+
+        MigrationRunner(db).run(ALL_MIGRATIONS)
+
+    # -- Write ---------------------------------------------------------------
+
+    def record(self, entry: ActivityLogEntry) -> None:
+        """Append *entry* to the log.
+
+        The ``seq`` column is assigned by SQLite (AUTOINCREMENT) — ``entry``
+        does not carry a ``seq`` field.
+
+        Caller contract: this must be called from the event-loop thread (or
+        from a context already serialised through the ``Database`` RLock).
+        Cross-thread callers must marshal via ``loop.call_soon_threadsafe``;
+        that is Phase 2's responsibility.
+        """
+        row = entry.to_row()
+        self._db.execute(
+            f"INSERT INTO {_TABLE} "
+            f"(id, ts, category, action, severity, actor, "
+            f" entity_type, entity_id, entity_name, message, metadata) "
+            f"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (
+                row["id"],
+                row["ts"],
+                row["category"],
+                row["action"],
+                row["severity"],
+                row["actor"],
+                row["entity_type"],
+                row["entity_id"],
+                row["entity_name"],
+                row["message"],
+                row["metadata"],
+            ),
+        )
+
+    # -- Read ----------------------------------------------------------------
+
+    def query(
+        self,
+        filters: ActivityLogFilters,
+        *,
+        before_seq: int | None = None,
+        limit: int = 50,
+    ) -> list[ActivityLogEntry]:
+        """Return the newest matching entries, oldest-first within the page.
+
+        Keyset pagination: pass the smallest ``seq`` seen on the previous page
+        as *before_seq* to get the next page.  Entries are fetched in
+        ``seq DESC`` order from SQLite and then reversed before returning so
+        that the caller receives them in chronological order within the page.
+
+        Parameters
+        ----------
+        filters:
+            Optional filter criteria.
+        before_seq:
+            Exclusive upper bound on ``seq``.  ``None`` returns the first
+            (newest) page.
+        limit:
+            Maximum number of entries to return.
+        """
+        params: list = []
+        keyset = "seq < ?" if before_seq is not None else None
+        if before_seq is not None:
+            params.append(before_seq)
+
+        where_fragment = _build_filter_clause(filters, params, extra_where=keyset)
+        where_clause = f"WHERE {where_fragment}" if where_fragment else ""
+        params.append(limit)
+
+        sql = (
+            f"SELECT seq, id, ts, category, action, severity, actor, "
+            f"entity_type, entity_id, entity_name, message, metadata "
+            f"FROM {_TABLE} "
+            f"{where_clause} "
+            f"ORDER BY seq DESC "
+            f"LIMIT ?"
+        )
+
+        cursor = self._db.execute(sql, tuple(params))
+        rows = cursor.fetchall()
+        # Reverse to return chronological order within the page
+        return [ActivityLogEntry.from_row(dict(row)) for row in reversed(rows)]
+
+    def count(self, filters: ActivityLogFilters | None = None) -> int:
+        """Return the number of entries matching *filters* (or all entries)."""
+        if filters is None:
+            filters = ActivityLogFilters()
+        params: list = []
+        where_fragment = _build_filter_clause(filters, params)
+        where_clause = f"WHERE {where_fragment}" if where_fragment else ""
+
+        sql = f"SELECT COUNT(*) AS cnt FROM {_TABLE} {where_clause}"
+        cursor = self._db.execute(sql, tuple(params))
+        row = cursor.fetchone()
+        return int(row["cnt"])
+
+    # -- Maintenance ---------------------------------------------------------
+
+    def prune(
+        self,
+        *,
+        before_ts: datetime | None = None,
+        max_entries: int | None = None,
+    ) -> int:
+        """Delete old / excess entries; return the total rows deleted.
+
+        Both predicates are applied independently:
+
+        1. Delete all rows where ``ts < before_ts`` (age-based pruning).
+        2. Keep only the newest ``max_entries`` rows, deleting the rest
+           (count-based pruning).
+
+        The two operations run in separate statements so that each can be
+        independently enabled or disabled.
+        """
+        deleted = 0
+
+        if before_ts is not None:
+            cursor = self._db.execute(
+                f"DELETE FROM {_TABLE} WHERE ts < ?",
+                (before_ts.isoformat(),),
+            )
+            deleted += cursor.rowcount
+
+        if max_entries is not None and max_entries >= 0:
+            # Find the seq of the Nth newest row; delete everything older than it.
+            cursor = self._db.execute(
+                f"SELECT seq FROM {_TABLE} ORDER BY seq DESC LIMIT 1 OFFSET ?",
+                (max_entries,),
+            )
+            cutoff_row = cursor.fetchone()
+            if cutoff_row is not None:
+                cutoff_seq = cutoff_row["seq"]
+                cursor = self._db.execute(
+                    f"DELETE FROM {_TABLE} WHERE seq <= ?",
+                    (cutoff_seq,),
+                )
+                deleted += cursor.rowcount
+
+        return deleted
+
+    def clear(self) -> int:
+        """Delete all entries; return the number of rows deleted."""
+        cursor = self._db.execute(f"DELETE FROM {_TABLE}")
+        return cursor.rowcount
+
+    # -- Export --------------------------------------------------------------
+
+    def iter_export(self, filters: ActivityLogFilters | None = None) -> Iterator[ActivityLogEntry]:
+        """Yield all matching entries in ascending ``seq`` order.
+
+        Uses a server-side cursor so the entire result set is never loaded
+        into memory — safe for large tables.  The connection's ``RLock`` is
+        held for the duration of the iteration; callers should consume this
+        iterator promptly.
+        """
+        if filters is None:
+            filters = ActivityLogFilters()
+
+        params: list = []
+        where_fragment = _build_filter_clause(filters, params)
+        where_clause = f"WHERE {where_fragment}" if where_fragment else ""
+
+        sql = (
+            f"SELECT seq, id, ts, category, action, severity, actor, "
+            f"entity_type, entity_id, entity_name, message, metadata "
+            f"FROM {_TABLE} "
+            f"{where_clause} "
+            f"ORDER BY seq ASC"
+        )
+
+        # Use the raw connection directly to get a streaming cursor.
+        # We borrow the lock for the full iteration.
+        with self._db._lock:  # noqa: SLF001 — internal access; no public cursor API
+            cursor: sqlite3.Cursor = self._db._conn.execute(sql, tuple(params))  # noqa: SLF001
+            for row in cursor:
+                yield ActivityLogEntry.from_row(dict(row))
@@ -213,7 +213,57 @@ class StaticToSingleColorMigration(DataMigration):
        return rows_changed


+class AddActivityLogTableMigration(DataMigration):
+    """Create the ``activity_log`` table and its indexes.
+
+    This is a purely additive migration — it does not touch any existing table.
+    ``CREATE TABLE / INDEX IF NOT EXISTS`` ensures idempotency if the migration
+    somehow runs twice (e.g. after a partial restore).
+    """
+
+    name = "002_add_activity_log"
+
+    def apply(self, conn: sqlite3.Connection) -> int:
+        conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS activity_log (
+                seq         INTEGER PRIMARY KEY AUTOINCREMENT,
+                id          TEXT    UNIQUE NOT NULL,
+                ts          TEXT    NOT NULL,
+                category    TEXT    NOT NULL,
+                action      TEXT    NOT NULL,
+                severity    TEXT    NOT NULL,
+                actor       TEXT    NOT NULL,
+                entity_type TEXT,
+                entity_id   TEXT,
+                entity_name TEXT,
+                message     TEXT    NOT NULL,
+                metadata    TEXT    NOT NULL DEFAULT '{}'
+            )
+            """
+        )
+        # Primary read path: newest-first keyset pagination
+        conn.execute(
+            "CREATE INDEX IF NOT EXISTS idx_activity_log_ts_seq "
+            "ON activity_log (ts DESC, seq DESC)"
+        )
+        # Filter indexes
+        conn.execute(
+            "CREATE INDEX IF NOT EXISTS idx_activity_log_category " "ON activity_log (category)"
+        )
+        conn.execute(
+            "CREATE INDEX IF NOT EXISTS idx_activity_log_severity " "ON activity_log (severity)"
+        )
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_activity_log_actor " "ON activity_log (actor)")
+        conn.execute(
+            "CREATE INDEX IF NOT EXISTS idx_activity_log_entity "
+            "ON activity_log (entity_type, entity_id)"
+        )
+        return 0
+
+
 # Master list — ORDER MATTERS. Append new migrations; never reorder.
 ALL_MIGRATIONS: list[DataMigration] = [
    StaticToSingleColorMigration(),
+    AddActivityLogTableMigration(),
 ]