4a0927521a
- GET /activity-log: filtered, keyset-paginated list (categories/severities/actor/entity/date/q) - GET /activity-log/export: streaming CSV/JSON, chunked keyset (releases DB lock per batch), CSV formula-injection guard - GET/PUT /activity-log/settings: retention config (PUT require_authenticated) - DELETE /activity-log: clear (require_authenticated, self-audited) - security: export DoS fix, settings-PUT auth gate, CSV \t/\r guard, metadata-as-JSON - 122 API tests (auth posture, CSV injection, pagination integrity, filters, settings bounds, clear-audited)
339 lines
12 KiB
Python
339 lines
12 KiB
Python
"""Append-only repository for the persistent activity / audit log.
|
|
|
|
Design notes
|
|
------------
|
|
* Does NOT subclass ``BaseSqliteStore`` — that base loads every row into an
|
|
in-memory cache at construction time, which is wrong for an unbounded,
|
|
append-heavy log.
|
|
* All SQL parameters are passed as positional ``?`` placeholders — no user
|
|
input is interpolated into the query string.
|
|
* Keyset pagination is implemented via the ``seq`` INTEGER PRIMARY KEY
|
|
AUTOINCREMENT column, which is strictly monotonic and survives rows with
|
|
identical ``ts`` values.
|
|
* The repository takes a ``Database`` instance and calls ``db.execute`` /
|
|
``db.transaction`` directly.
|
|
* Thread safety: ``Database.execute`` holds the ``RLock`` for the duration of
|
|
each call. The repository itself adds no extra locking. Callers that
|
|
originate from non-event-loop threads MUST marshal via
|
|
``loop.call_soon_threadsafe`` (that is Phase 2's responsibility).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from typing import Iterator
|
|
|
|
from ledgrab.storage.activity_log import ActivityLogEntry, ActivityLogFilters
|
|
from ledgrab.storage.database import Database
|
|
from ledgrab.utils import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
_TABLE = "activity_log"
|
|
|
|
|
|
def _build_filter_clause(
|
|
filters: ActivityLogFilters,
|
|
params: list,
|
|
*,
|
|
extra_where: str | None = None,
|
|
) -> str:
|
|
"""Return a WHERE clause string and append bound parameters to *params*.
|
|
|
|
The caller is responsible for providing the opening ``WHERE`` keyword when
|
|
the returned string is non-empty, or ``AND`` when appending to an existing
|
|
predicate. This function returns only the condition fragment(s) joined by
|
|
``AND``, or an empty string when *filters* has no restrictions.
|
|
|
|
``extra_where`` is prepended (with AND) before the filter conditions if
|
|
provided (used for the ``seq < ?`` keyset predicate).
|
|
"""
|
|
conditions: list[str] = []
|
|
|
|
if extra_where:
|
|
conditions.append(extra_where)
|
|
|
|
if filters.categories:
|
|
placeholders = ",".join("?" * len(filters.categories))
|
|
conditions.append(f"category IN ({placeholders})")
|
|
params.extend(filters.categories)
|
|
|
|
if filters.severities:
|
|
placeholders = ",".join("?" * len(filters.severities))
|
|
conditions.append(f"severity IN ({placeholders})")
|
|
params.extend(filters.severities)
|
|
|
|
if filters.actor is not None:
|
|
conditions.append("actor = ?")
|
|
params.append(filters.actor)
|
|
|
|
if filters.entity_type is not None:
|
|
conditions.append("entity_type = ?")
|
|
params.append(filters.entity_type)
|
|
|
|
if filters.entity_id is not None:
|
|
conditions.append("entity_id = ?")
|
|
params.append(filters.entity_id)
|
|
|
|
if filters.since is not None:
|
|
conditions.append("ts >= ?")
|
|
params.append(filters.since.isoformat())
|
|
|
|
if filters.until is not None:
|
|
conditions.append("ts <= ?")
|
|
params.append(filters.until.isoformat())
|
|
|
|
if filters.message_like is not None:
|
|
# Escape LIKE special characters in the user-supplied substring so that
|
|
# a literal '%' or '_' in the message does not act as a wildcard.
|
|
escaped = filters.message_like.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
|
conditions.append("message LIKE ? ESCAPE '\\'")
|
|
params.append(f"%{escaped}%")
|
|
|
|
return " AND ".join(conditions)
|
|
|
|
|
|
class ActivityLogRepository:
|
|
"""Purpose-built repository for the ``activity_log`` table.
|
|
|
|
Parameters
|
|
----------
|
|
db:
|
|
The shared ``Database`` singleton. The migration that creates the
|
|
``activity_log`` table must have been applied before the first call to
|
|
:meth:`record`. Construction triggers migration via
|
|
``MigrationRunner`` so users can create the repository directly
|
|
without a separate startup step.
|
|
"""
|
|
|
|
def __init__(self, db: Database) -> None:
|
|
self._db = db
|
|
# Apply pending migrations (idempotent; most runs are no-ops)
|
|
from ledgrab.storage.data_migrations import ALL_MIGRATIONS, MigrationRunner
|
|
|
|
MigrationRunner(db).run(ALL_MIGRATIONS)
|
|
|
|
# -- Write ---------------------------------------------------------------
|
|
|
|
def record(self, entry: ActivityLogEntry) -> None:
|
|
"""Append *entry* to the log.
|
|
|
|
The ``seq`` column is assigned by SQLite (AUTOINCREMENT) — ``entry``
|
|
does not carry a ``seq`` field.
|
|
|
|
Caller contract: this must be called from the event-loop thread (or
|
|
from a context already serialised through the ``Database`` RLock).
|
|
Cross-thread callers must marshal via ``loop.call_soon_threadsafe``;
|
|
that is Phase 2's responsibility.
|
|
"""
|
|
row = entry.to_row()
|
|
self._db.execute(
|
|
f"INSERT INTO {_TABLE} "
|
|
f"(id, ts, category, action, severity, actor, "
|
|
f" entity_type, entity_id, entity_name, message, metadata) "
|
|
f"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(
|
|
row["id"],
|
|
row["ts"],
|
|
row["category"],
|
|
row["action"],
|
|
row["severity"],
|
|
row["actor"],
|
|
row["entity_type"],
|
|
row["entity_id"],
|
|
row["entity_name"],
|
|
row["message"],
|
|
row["metadata"],
|
|
),
|
|
)
|
|
|
|
# -- Read ----------------------------------------------------------------
|
|
|
|
def query(
|
|
self,
|
|
filters: ActivityLogFilters,
|
|
*,
|
|
before_seq: int | None = None,
|
|
limit: int = 50,
|
|
) -> list[ActivityLogEntry]:
|
|
"""Return the newest matching entries, oldest-first within the page.
|
|
|
|
Keyset pagination: pass the smallest ``seq`` seen on the previous page
|
|
as *before_seq* to get the next page. Entries are fetched in
|
|
``seq DESC`` order from SQLite and then reversed before returning so
|
|
that the caller receives them in chronological order within the page.
|
|
|
|
Parameters
|
|
----------
|
|
filters:
|
|
Optional filter criteria.
|
|
before_seq:
|
|
Exclusive upper bound on ``seq``. ``None`` returns the first
|
|
(newest) page.
|
|
limit:
|
|
Maximum number of entries to return.
|
|
"""
|
|
params: list = []
|
|
keyset = "seq < ?" if before_seq is not None else None
|
|
if before_seq is not None:
|
|
params.append(before_seq)
|
|
|
|
where_fragment = _build_filter_clause(filters, params, extra_where=keyset)
|
|
where_clause = f"WHERE {where_fragment}" if where_fragment else ""
|
|
params.append(limit)
|
|
|
|
sql = (
|
|
f"SELECT seq, id, ts, category, action, severity, actor, "
|
|
f"entity_type, entity_id, entity_name, message, metadata "
|
|
f"FROM {_TABLE} "
|
|
f"{where_clause} "
|
|
f"ORDER BY seq DESC "
|
|
f"LIMIT ?"
|
|
)
|
|
|
|
cursor = self._db.execute(sql, tuple(params))
|
|
rows = cursor.fetchall()
|
|
# Reverse to return chronological order within the page
|
|
return [ActivityLogEntry.from_row(dict(row)) for row in reversed(rows)]
|
|
|
|
def count(self, filters: ActivityLogFilters | None = None) -> int:
|
|
"""Return the number of entries matching *filters* (or all entries)."""
|
|
if filters is None:
|
|
filters = ActivityLogFilters()
|
|
params: list = []
|
|
where_fragment = _build_filter_clause(filters, params)
|
|
where_clause = f"WHERE {where_fragment}" if where_fragment else ""
|
|
|
|
sql = f"SELECT COUNT(*) AS cnt FROM {_TABLE} {where_clause}"
|
|
cursor = self._db.execute(sql, tuple(params))
|
|
row = cursor.fetchone()
|
|
return int(row["cnt"])
|
|
|
|
# -- Maintenance ---------------------------------------------------------
|
|
|
|
def prune(
|
|
self,
|
|
*,
|
|
before_ts: datetime | None = None,
|
|
max_entries: int | None = None,
|
|
) -> int:
|
|
"""Delete old / excess entries; return the total rows deleted.
|
|
|
|
Both predicates are applied independently:
|
|
|
|
1. Delete all rows where ``ts < before_ts`` (age-based pruning).
|
|
2. Keep only the newest ``max_entries`` rows, deleting the rest
|
|
(count-based pruning).
|
|
|
|
The two operations run in separate statements so that each can be
|
|
independently enabled or disabled.
|
|
"""
|
|
deleted = 0
|
|
|
|
if before_ts is not None:
|
|
cursor = self._db.execute(
|
|
f"DELETE FROM {_TABLE} WHERE ts < ?",
|
|
(before_ts.isoformat(),),
|
|
)
|
|
deleted += cursor.rowcount
|
|
|
|
if max_entries is not None and max_entries >= 0:
|
|
# Find the seq of the Nth newest row; delete everything older than it.
|
|
cursor = self._db.execute(
|
|
f"SELECT seq FROM {_TABLE} ORDER BY seq DESC LIMIT 1 OFFSET ?",
|
|
(max_entries,),
|
|
)
|
|
cutoff_row = cursor.fetchone()
|
|
if cutoff_row is not None:
|
|
cutoff_seq = cutoff_row["seq"]
|
|
cursor = self._db.execute(
|
|
f"DELETE FROM {_TABLE} WHERE seq <= ?",
|
|
(cutoff_seq,),
|
|
)
|
|
deleted += cursor.rowcount
|
|
|
|
return deleted
|
|
|
|
def clear(self) -> int:
|
|
"""Delete all entries; return the number of rows deleted."""
|
|
cursor = self._db.execute(f"DELETE FROM {_TABLE}")
|
|
return cursor.rowcount
|
|
|
|
def get_seq_for_id(self, entry_id: str) -> int | None:
|
|
"""Return the ``seq`` value for the entry with *entry_id*, or ``None``.
|
|
|
|
Used by the API list endpoint to compute the keyset cursor
|
|
(``next_before_seq``) from the oldest entry on the current page.
|
|
"""
|
|
cursor = self._db.execute(
|
|
f"SELECT seq FROM {_TABLE} WHERE id = ?",
|
|
(entry_id,),
|
|
)
|
|
row = cursor.fetchone()
|
|
return int(row["seq"]) if row is not None else None
|
|
|
|
# -- Export --------------------------------------------------------------
|
|
|
|
def iter_export(
|
|
self,
|
|
filters: ActivityLogFilters | None = None,
|
|
*,
|
|
batch_size: int = 1000,
|
|
) -> Iterator[ActivityLogEntry]:
|
|
"""Yield all matching entries in ascending ``seq`` order.
|
|
|
|
Fetches rows in bounded batches (keyset-paginated by ``seq``), holding
|
|
the DB lock only for the duration of each ``fetchall()`` and releasing
|
|
it before yielding. This prevents a slow/stalled export client from
|
|
blocking all other DB operations (record, config writes, etc.) for the
|
|
full duration of the stream.
|
|
|
|
Memory usage is bounded to ``batch_size`` rows at a time.
|
|
"""
|
|
if filters is None:
|
|
filters = ActivityLogFilters()
|
|
|
|
# Keyset cursor: largest seq yielded so far; None means "start from the
|
|
# very beginning". We iterate ascending (seq ASC), so each batch uses
|
|
# "seq > ?" to advance past the already-yielded rows.
|
|
cursor_seq: int | None = None
|
|
|
|
while True:
|
|
# Build params list: cursor_seq placeholder must come first because
|
|
# _build_filter_clause prepends extra_where as the first condition.
|
|
params: list = []
|
|
if cursor_seq is not None:
|
|
params.append(cursor_seq)
|
|
keyset: str | None = "seq > ?"
|
|
else:
|
|
keyset = None
|
|
where_fragment = _build_filter_clause(filters, params, extra_where=keyset)
|
|
where_clause = f"WHERE {where_fragment}" if where_fragment else ""
|
|
params.append(batch_size)
|
|
|
|
sql = (
|
|
f"SELECT seq, id, ts, category, action, severity, actor, "
|
|
f"entity_type, entity_id, entity_name, message, metadata "
|
|
f"FROM {_TABLE} "
|
|
f"{where_clause} "
|
|
f"ORDER BY seq ASC "
|
|
f"LIMIT ?"
|
|
)
|
|
|
|
# Hold the lock only for the bounded fetchall; release before yielding.
|
|
with self._db._lock: # noqa: SLF001 — internal access; no public cursor API
|
|
rows = self._db._conn.execute(sql, tuple(params)).fetchall() # noqa: SLF001
|
|
|
|
if not rows:
|
|
break
|
|
|
|
for row in rows:
|
|
yield ActivityLogEntry.from_row(dict(row))
|
|
|
|
# The last row has the largest seq in this batch (ORDER BY seq ASC).
|
|
cursor_seq = rows[-1]["seq"]
|
|
|
|
if len(rows) < batch_size:
|
|
# Fewer rows than requested → this was the final batch.
|
|
break
|