feat(activity-log): phase 1 - storage model, migration, repository
- ActivityLogEntry dataclass + ActivityCategory/ActivitySeverity + ActivityLogFilters - additive idempotent migration 002_add_activity_log (indexed activity_log table, seq keyset tiebreaker) - ActivityLogRepository (record/query/count/prune/clear/iter_export), keyset pagination, parameterized SQL - 102 unit + adversarial tests (SQL-injection, pagination, prune, codec, migration idempotency)
This commit is contained in:
@@ -0,0 +1,171 @@
|
||||
"""Activity / audit log data model.
|
||||
|
||||
Defines the ``ActivityLogEntry`` dataclass together with its category and
|
||||
severity enumerations and the ``ActivityLogFilters`` query object. Row-level
|
||||
codec (``to_row`` / ``from_row``) converts between the dataclass and a flat
|
||||
SQLite column dict.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Sequence
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Enumerations (string constants, not enum.Enum, to stay consistent with the
|
||||
# rest of the codebase which uses plain string literals)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ActivityCategory:
|
||||
"""Valid ``category`` values for an ``ActivityLogEntry``."""
|
||||
|
||||
AUTH = "auth"
|
||||
DEVICE = "device"
|
||||
ENTITY = "entity"
|
||||
CAPTURE = "capture"
|
||||
SYSTEM = "system"
|
||||
|
||||
ALL: tuple[str, ...] = (AUTH, DEVICE, ENTITY, CAPTURE, SYSTEM)
|
||||
|
||||
|
||||
class ActivitySeverity:
|
||||
"""Valid ``severity`` values for an ``ActivityLogEntry``."""
|
||||
|
||||
INFO = "info"
|
||||
WARNING = "warning"
|
||||
ERROR = "error"
|
||||
|
||||
ALL: tuple[str, ...] = (INFO, WARNING, ERROR)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry dataclass
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActivityLogEntry:
|
||||
"""One immutable audit record.
|
||||
|
||||
Fields
|
||||
------
|
||||
id Stable application-assigned identifier, e.g. ``al_<uuid8>``.
|
||||
ts UTC timestamp of the event (server-assigned at record time).
|
||||
category Broad bucket — one of :class:`ActivityCategory`.
|
||||
action Verb-object label, e.g. ``"entity.created"`` or ``"auth.rejected"``.
|
||||
severity One of :class:`ActivitySeverity`.
|
||||
actor Who triggered the action, e.g. an API-key label or ``"system"``.
|
||||
entity_type Optional: kind of entity involved, e.g. ``"output_target"``.
|
||||
entity_id Optional: stable entity identifier.
|
||||
entity_name Optional: human-readable entity name at the time of the event.
|
||||
message Human-readable description suitable for display.
|
||||
metadata Small structured context (device address, error code, …).
|
||||
Must be JSON-serialisable; defaults to empty dict.
|
||||
"""
|
||||
|
||||
id: str
|
||||
ts: datetime
|
||||
category: str
|
||||
action: str
|
||||
severity: str
|
||||
actor: str
|
||||
message: str
|
||||
entity_type: str | None = None
|
||||
entity_id: str | None = None
|
||||
entity_name: str | None = None
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
# -- Row codec -----------------------------------------------------------
|
||||
|
||||
def to_row(self) -> dict:
|
||||
"""Return a flat dict suitable for a parameterised SQLite INSERT.
|
||||
|
||||
``ts`` is stored as an ISO-8601 string (UTC). ``metadata`` is stored
|
||||
as a JSON string. ``seq`` is DB-assigned and is NOT included.
|
||||
"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"ts": self.ts.isoformat(),
|
||||
"category": self.category,
|
||||
"action": self.action,
|
||||
"severity": self.severity,
|
||||
"actor": self.actor,
|
||||
"entity_type": self.entity_type,
|
||||
"entity_id": self.entity_id,
|
||||
"entity_name": self.entity_name,
|
||||
"message": self.message,
|
||||
"metadata": json.dumps(self.metadata, ensure_ascii=False),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def from_row(row: dict) -> "ActivityLogEntry":
|
||||
"""Reconstruct an ``ActivityLogEntry`` from a SQLite row dict.
|
||||
|
||||
``row`` may include the ``seq`` column — it is ignored here (callers
|
||||
that need ``seq`` for keyset pagination access it directly from the
|
||||
row before calling this method).
|
||||
"""
|
||||
raw_meta = row.get("metadata") or "{}"
|
||||
try:
|
||||
metadata = json.loads(raw_meta)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
metadata = {}
|
||||
|
||||
raw_ts = row["ts"]
|
||||
ts = datetime.fromisoformat(raw_ts)
|
||||
# Ensure tz-aware UTC even if stored without offset (legacy rows)
|
||||
if ts.tzinfo is None:
|
||||
ts = ts.replace(tzinfo=timezone.utc)
|
||||
|
||||
return ActivityLogEntry(
|
||||
id=row["id"],
|
||||
ts=ts,
|
||||
category=row["category"],
|
||||
action=row["action"],
|
||||
severity=row["severity"],
|
||||
actor=row["actor"],
|
||||
entity_type=row.get("entity_type"),
|
||||
entity_id=row.get("entity_id"),
|
||||
entity_name=row.get("entity_name"),
|
||||
message=row["message"],
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Filters dataclass
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActivityLogFilters:
|
||||
"""Optional query-time filters for :class:`ActivityLogRepository`.
|
||||
|
||||
All fields are optional. ``None`` / empty sequence means "no restriction
|
||||
on this dimension".
|
||||
|
||||
Fields
|
||||
------
|
||||
categories Restrict to entries whose ``category`` is in this set.
|
||||
severities Restrict to entries whose ``severity`` is in this set.
|
||||
actor Exact match on the ``actor`` field.
|
||||
entity_type Exact match on the ``entity_type`` field.
|
||||
entity_id Exact match on the ``entity_id`` field.
|
||||
since Inclusive lower bound on ``ts`` (``ts >= since``).
|
||||
until Inclusive upper bound on ``ts`` (``ts <= until``).
|
||||
message_like Free-text substring match on ``message`` (LIKE ``%value%``).
|
||||
The value is escaped — no SQL injection risk.
|
||||
"""
|
||||
|
||||
categories: Sequence[str] | None = None
|
||||
severities: Sequence[str] | None = None
|
||||
actor: str | None = None
|
||||
entity_type: str | None = None
|
||||
entity_id: str | None = None
|
||||
since: datetime | None = None
|
||||
until: datetime | None = None
|
||||
message_like: str | None = None
|
||||
@@ -0,0 +1,293 @@
|
||||
"""Append-only repository for the persistent activity / audit log.
|
||||
|
||||
Design notes
|
||||
------------
|
||||
* Does NOT subclass ``BaseSqliteStore`` — that base loads every row into an
|
||||
in-memory cache at construction time, which is wrong for an unbounded,
|
||||
append-heavy log.
|
||||
* All SQL parameters are passed as positional ``?`` placeholders — no user
|
||||
input is interpolated into the query string.
|
||||
* Keyset pagination is implemented via the ``seq`` INTEGER PRIMARY KEY
|
||||
AUTOINCREMENT column, which is strictly monotonic and survives rows with
|
||||
identical ``ts`` values.
|
||||
* The repository takes a ``Database`` instance and calls ``db.execute`` /
|
||||
``db.transaction`` directly.
|
||||
* Thread safety: ``Database.execute`` holds the ``RLock`` for the duration of
|
||||
each call. The repository itself adds no extra locking. Callers that
|
||||
originate from non-event-loop threads MUST marshal via
|
||||
``loop.call_soon_threadsafe`` (that is Phase 2's responsibility).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from typing import Iterator
|
||||
|
||||
from ledgrab.storage.activity_log import ActivityLogEntry, ActivityLogFilters
|
||||
from ledgrab.storage.database import Database
|
||||
from ledgrab.utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_TABLE = "activity_log"
|
||||
|
||||
|
||||
def _build_filter_clause(
|
||||
filters: ActivityLogFilters,
|
||||
params: list,
|
||||
*,
|
||||
extra_where: str | None = None,
|
||||
) -> str:
|
||||
"""Return a WHERE clause string and append bound parameters to *params*.
|
||||
|
||||
The caller is responsible for providing the opening ``WHERE`` keyword when
|
||||
the returned string is non-empty, or ``AND`` when appending to an existing
|
||||
predicate. This function returns only the condition fragment(s) joined by
|
||||
``AND``, or an empty string when *filters* has no restrictions.
|
||||
|
||||
``extra_where`` is prepended (with AND) before the filter conditions if
|
||||
provided (used for the ``seq < ?`` keyset predicate).
|
||||
"""
|
||||
conditions: list[str] = []
|
||||
|
||||
if extra_where:
|
||||
conditions.append(extra_where)
|
||||
|
||||
if filters.categories:
|
||||
placeholders = ",".join("?" * len(filters.categories))
|
||||
conditions.append(f"category IN ({placeholders})")
|
||||
params.extend(filters.categories)
|
||||
|
||||
if filters.severities:
|
||||
placeholders = ",".join("?" * len(filters.severities))
|
||||
conditions.append(f"severity IN ({placeholders})")
|
||||
params.extend(filters.severities)
|
||||
|
||||
if filters.actor is not None:
|
||||
conditions.append("actor = ?")
|
||||
params.append(filters.actor)
|
||||
|
||||
if filters.entity_type is not None:
|
||||
conditions.append("entity_type = ?")
|
||||
params.append(filters.entity_type)
|
||||
|
||||
if filters.entity_id is not None:
|
||||
conditions.append("entity_id = ?")
|
||||
params.append(filters.entity_id)
|
||||
|
||||
if filters.since is not None:
|
||||
conditions.append("ts >= ?")
|
||||
params.append(filters.since.isoformat())
|
||||
|
||||
if filters.until is not None:
|
||||
conditions.append("ts <= ?")
|
||||
params.append(filters.until.isoformat())
|
||||
|
||||
if filters.message_like is not None:
|
||||
# Escape LIKE special characters in the user-supplied substring so that
|
||||
# a literal '%' or '_' in the message does not act as a wildcard.
|
||||
escaped = filters.message_like.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
||||
conditions.append("message LIKE ? ESCAPE '\\'")
|
||||
params.append(f"%{escaped}%")
|
||||
|
||||
return " AND ".join(conditions)
|
||||
|
||||
|
||||
class ActivityLogRepository:
|
||||
"""Purpose-built repository for the ``activity_log`` table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db:
|
||||
The shared ``Database`` singleton. The migration that creates the
|
||||
``activity_log`` table must have been applied before the first call to
|
||||
:meth:`record`. Construction triggers migration via
|
||||
``MigrationRunner`` so users can create the repository directly
|
||||
without a separate startup step.
|
||||
"""
|
||||
|
||||
def __init__(self, db: Database) -> None:
|
||||
self._db = db
|
||||
# Apply pending migrations (idempotent; most runs are no-ops)
|
||||
from ledgrab.storage.data_migrations import ALL_MIGRATIONS, MigrationRunner
|
||||
|
||||
MigrationRunner(db).run(ALL_MIGRATIONS)
|
||||
|
||||
# -- Write ---------------------------------------------------------------
|
||||
|
||||
def record(self, entry: ActivityLogEntry) -> None:
|
||||
"""Append *entry* to the log.
|
||||
|
||||
The ``seq`` column is assigned by SQLite (AUTOINCREMENT) — ``entry``
|
||||
does not carry a ``seq`` field.
|
||||
|
||||
Caller contract: this must be called from the event-loop thread (or
|
||||
from a context already serialised through the ``Database`` RLock).
|
||||
Cross-thread callers must marshal via ``loop.call_soon_threadsafe``;
|
||||
that is Phase 2's responsibility.
|
||||
"""
|
||||
row = entry.to_row()
|
||||
self._db.execute(
|
||||
f"INSERT INTO {_TABLE} "
|
||||
f"(id, ts, category, action, severity, actor, "
|
||||
f" entity_type, entity_id, entity_name, message, metadata) "
|
||||
f"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
row["id"],
|
||||
row["ts"],
|
||||
row["category"],
|
||||
row["action"],
|
||||
row["severity"],
|
||||
row["actor"],
|
||||
row["entity_type"],
|
||||
row["entity_id"],
|
||||
row["entity_name"],
|
||||
row["message"],
|
||||
row["metadata"],
|
||||
),
|
||||
)
|
||||
|
||||
# -- Read ----------------------------------------------------------------
|
||||
|
||||
def query(
|
||||
self,
|
||||
filters: ActivityLogFilters,
|
||||
*,
|
||||
before_seq: int | None = None,
|
||||
limit: int = 50,
|
||||
) -> list[ActivityLogEntry]:
|
||||
"""Return the newest matching entries, oldest-first within the page.
|
||||
|
||||
Keyset pagination: pass the smallest ``seq`` seen on the previous page
|
||||
as *before_seq* to get the next page. Entries are fetched in
|
||||
``seq DESC`` order from SQLite and then reversed before returning so
|
||||
that the caller receives them in chronological order within the page.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filters:
|
||||
Optional filter criteria.
|
||||
before_seq:
|
||||
Exclusive upper bound on ``seq``. ``None`` returns the first
|
||||
(newest) page.
|
||||
limit:
|
||||
Maximum number of entries to return.
|
||||
"""
|
||||
params: list = []
|
||||
keyset = "seq < ?" if before_seq is not None else None
|
||||
if before_seq is not None:
|
||||
params.append(before_seq)
|
||||
|
||||
where_fragment = _build_filter_clause(filters, params, extra_where=keyset)
|
||||
where_clause = f"WHERE {where_fragment}" if where_fragment else ""
|
||||
params.append(limit)
|
||||
|
||||
sql = (
|
||||
f"SELECT seq, id, ts, category, action, severity, actor, "
|
||||
f"entity_type, entity_id, entity_name, message, metadata "
|
||||
f"FROM {_TABLE} "
|
||||
f"{where_clause} "
|
||||
f"ORDER BY seq DESC "
|
||||
f"LIMIT ?"
|
||||
)
|
||||
|
||||
cursor = self._db.execute(sql, tuple(params))
|
||||
rows = cursor.fetchall()
|
||||
# Reverse to return chronological order within the page
|
||||
return [ActivityLogEntry.from_row(dict(row)) for row in reversed(rows)]
|
||||
|
||||
def count(self, filters: ActivityLogFilters | None = None) -> int:
|
||||
"""Return the number of entries matching *filters* (or all entries)."""
|
||||
if filters is None:
|
||||
filters = ActivityLogFilters()
|
||||
params: list = []
|
||||
where_fragment = _build_filter_clause(filters, params)
|
||||
where_clause = f"WHERE {where_fragment}" if where_fragment else ""
|
||||
|
||||
sql = f"SELECT COUNT(*) AS cnt FROM {_TABLE} {where_clause}"
|
||||
cursor = self._db.execute(sql, tuple(params))
|
||||
row = cursor.fetchone()
|
||||
return int(row["cnt"])
|
||||
|
||||
# -- Maintenance ---------------------------------------------------------
|
||||
|
||||
def prune(
|
||||
self,
|
||||
*,
|
||||
before_ts: datetime | None = None,
|
||||
max_entries: int | None = None,
|
||||
) -> int:
|
||||
"""Delete old / excess entries; return the total rows deleted.
|
||||
|
||||
Both predicates are applied independently:
|
||||
|
||||
1. Delete all rows where ``ts < before_ts`` (age-based pruning).
|
||||
2. Keep only the newest ``max_entries`` rows, deleting the rest
|
||||
(count-based pruning).
|
||||
|
||||
The two operations run in separate statements so that each can be
|
||||
independently enabled or disabled.
|
||||
"""
|
||||
deleted = 0
|
||||
|
||||
if before_ts is not None:
|
||||
cursor = self._db.execute(
|
||||
f"DELETE FROM {_TABLE} WHERE ts < ?",
|
||||
(before_ts.isoformat(),),
|
||||
)
|
||||
deleted += cursor.rowcount
|
||||
|
||||
if max_entries is not None and max_entries >= 0:
|
||||
# Find the seq of the Nth newest row; delete everything older than it.
|
||||
cursor = self._db.execute(
|
||||
f"SELECT seq FROM {_TABLE} ORDER BY seq DESC LIMIT 1 OFFSET ?",
|
||||
(max_entries,),
|
||||
)
|
||||
cutoff_row = cursor.fetchone()
|
||||
if cutoff_row is not None:
|
||||
cutoff_seq = cutoff_row["seq"]
|
||||
cursor = self._db.execute(
|
||||
f"DELETE FROM {_TABLE} WHERE seq <= ?",
|
||||
(cutoff_seq,),
|
||||
)
|
||||
deleted += cursor.rowcount
|
||||
|
||||
return deleted
|
||||
|
||||
def clear(self) -> int:
|
||||
"""Delete all entries; return the number of rows deleted."""
|
||||
cursor = self._db.execute(f"DELETE FROM {_TABLE}")
|
||||
return cursor.rowcount
|
||||
|
||||
# -- Export --------------------------------------------------------------
|
||||
|
||||
def iter_export(self, filters: ActivityLogFilters | None = None) -> Iterator[ActivityLogEntry]:
|
||||
"""Yield all matching entries in ascending ``seq`` order.
|
||||
|
||||
Uses a server-side cursor so the entire result set is never loaded
|
||||
into memory — safe for large tables. The connection's ``RLock`` is
|
||||
held for the duration of the iteration; callers should consume this
|
||||
iterator promptly.
|
||||
"""
|
||||
if filters is None:
|
||||
filters = ActivityLogFilters()
|
||||
|
||||
params: list = []
|
||||
where_fragment = _build_filter_clause(filters, params)
|
||||
where_clause = f"WHERE {where_fragment}" if where_fragment else ""
|
||||
|
||||
sql = (
|
||||
f"SELECT seq, id, ts, category, action, severity, actor, "
|
||||
f"entity_type, entity_id, entity_name, message, metadata "
|
||||
f"FROM {_TABLE} "
|
||||
f"{where_clause} "
|
||||
f"ORDER BY seq ASC"
|
||||
)
|
||||
|
||||
# Use the raw connection directly to get a streaming cursor.
|
||||
# We borrow the lock for the full iteration.
|
||||
with self._db._lock: # noqa: SLF001 — internal access; no public cursor API
|
||||
cursor: sqlite3.Cursor = self._db._conn.execute(sql, tuple(params)) # noqa: SLF001
|
||||
for row in cursor:
|
||||
yield ActivityLogEntry.from_row(dict(row))
|
||||
@@ -213,7 +213,57 @@ class StaticToSingleColorMigration(DataMigration):
|
||||
return rows_changed
|
||||
|
||||
|
||||
class AddActivityLogTableMigration(DataMigration):
|
||||
"""Create the ``activity_log`` table and its indexes.
|
||||
|
||||
This is a purely additive migration — it does not touch any existing table.
|
||||
``CREATE TABLE / INDEX IF NOT EXISTS`` ensures idempotency if the migration
|
||||
somehow runs twice (e.g. after a partial restore).
|
||||
"""
|
||||
|
||||
name = "002_add_activity_log"
|
||||
|
||||
def apply(self, conn: sqlite3.Connection) -> int:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS activity_log (
|
||||
seq INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
id TEXT UNIQUE NOT NULL,
|
||||
ts TEXT NOT NULL,
|
||||
category TEXT NOT NULL,
|
||||
action TEXT NOT NULL,
|
||||
severity TEXT NOT NULL,
|
||||
actor TEXT NOT NULL,
|
||||
entity_type TEXT,
|
||||
entity_id TEXT,
|
||||
entity_name TEXT,
|
||||
message TEXT NOT NULL,
|
||||
metadata TEXT NOT NULL DEFAULT '{}'
|
||||
)
|
||||
"""
|
||||
)
|
||||
# Primary read path: newest-first keyset pagination
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_activity_log_ts_seq "
|
||||
"ON activity_log (ts DESC, seq DESC)"
|
||||
)
|
||||
# Filter indexes
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_activity_log_category " "ON activity_log (category)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_activity_log_severity " "ON activity_log (severity)"
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_activity_log_actor " "ON activity_log (actor)")
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_activity_log_entity "
|
||||
"ON activity_log (entity_type, entity_id)"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
# Master list — ORDER MATTERS. Append new migrations; never reorder.
|
||||
ALL_MIGRATIONS: list[DataMigration] = [
|
||||
StaticToSingleColorMigration(),
|
||||
AddActivityLogTableMigration(),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user