feat(observability): event triggers + log scanner backend
Two paired backends sharing the events.Bus seam:
Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
redaction on read (placeholder echo treated as "no change" on
PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
filters (severity CSV, source CSV, message regex with memoized
compile cache). Structural loop-prevention: never writes to
event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
TierEventTrigger constant, doSendRaw shared with the legacy
Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
sending the real TriggerWebhookPayload shape. SSRF guard
rejects loopback / link-local / unspecified targets. PATCH
uses pointer-typed DTO for partial updates.
Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
per-container token bucket, atomic drop counters), tail
(multiplexed docker frame demuxer with TTY fallback + 16 MiB
payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
timestamp strip + UTF-8-safe message truncation), manager
(5s container polling, atomic.Pointer[Snapshot] hot-reload,
HitEmitter writes event_log + publishes EventLog so the
trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
EffectiveLogScanRules resolver (globals minus per-workload
overrides plus workload-only additions). Transactional
cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
(sample_line → matched/captures) + /stats (drop counters +
active tail count + last-snapshot compile errors) +
GET /api/workloads/{id}/effective-rules.
cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -181,6 +181,15 @@ func (s *Store) runMigrations() error {
|
||||
// re-write path; the LEFT JOIN in ListContainersByStageID falls back
|
||||
// to (project_id, role=stage_name) so legacy rows still resolve.
|
||||
`ALTER TABLE containers ADD COLUMN stage_id TEXT NOT NULL DEFAULT ''`,
|
||||
// Workload-first refactor columns (2026-05-10). Land additively so
|
||||
// the legacy kind/ref_id columns continue to serve existing
|
||||
// project/stack/site rows during cutover.
|
||||
`ALTER TABLE workloads ADD COLUMN source_kind TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE workloads ADD COLUMN source_config TEXT NOT NULL DEFAULT '{}'`,
|
||||
`ALTER TABLE workloads ADD COLUMN trigger_kind TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE workloads ADD COLUMN trigger_config TEXT NOT NULL DEFAULT '{}'`,
|
||||
`ALTER TABLE workloads ADD COLUMN public_faces TEXT NOT NULL DEFAULT '[]'`,
|
||||
`ALTER TABLE workloads ADD COLUMN parent_workload_id TEXT NOT NULL DEFAULT ''`,
|
||||
}
|
||||
|
||||
// Workload refactor tables (2026-05-09). Workload is the unifying primitive
|
||||
@@ -195,6 +204,12 @@ func (s *Store) runMigrations() error {
|
||||
ref_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
app_id TEXT NOT NULL DEFAULT '',
|
||||
source_kind TEXT NOT NULL DEFAULT '',
|
||||
source_config TEXT NOT NULL DEFAULT '{}',
|
||||
trigger_kind TEXT NOT NULL DEFAULT '',
|
||||
trigger_config TEXT NOT NULL DEFAULT '{}',
|
||||
public_faces TEXT NOT NULL DEFAULT '[]',
|
||||
parent_workload_id TEXT NOT NULL DEFAULT '',
|
||||
notification_url TEXT NOT NULL DEFAULT '',
|
||||
notification_secret TEXT NOT NULL DEFAULT '',
|
||||
webhook_secret TEXT NOT NULL DEFAULT '',
|
||||
@@ -231,6 +246,34 @@ func (s *Store) runMigrations() error {
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// workload_env: per-workload env overrides (encrypt-at-rest for
|
||||
// secrets). Functional analog of stage_env. Workload deletion
|
||||
// cascades through the FK so orphan rows are impossible.
|
||||
`CREATE TABLE IF NOT EXISTS workload_env (
|
||||
id TEXT PRIMARY KEY,
|
||||
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||
key TEXT NOT NULL,
|
||||
value TEXT NOT NULL DEFAULT '',
|
||||
encrypted INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(workload_id, key)
|
||||
)`,
|
||||
// workload_volumes: per-workload mount declarations. Mirrors the
|
||||
// legacy `volumes` table shape (source / target / scope / name)
|
||||
// but keyed on workload_id. UNIQUE on (workload_id, target) so a
|
||||
// re-add overwrites instead of duplicating.
|
||||
`CREATE TABLE IF NOT EXISTS workload_volumes (
|
||||
id TEXT PRIMARY KEY,
|
||||
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
target TEXT NOT NULL,
|
||||
scope TEXT NOT NULL DEFAULT 'absolute',
|
||||
name TEXT NOT NULL DEFAULT '',
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(workload_id, target)
|
||||
)`,
|
||||
}
|
||||
for _, t := range workloadTables {
|
||||
if _, err := s.db.Exec(t); err != nil {
|
||||
@@ -312,6 +355,49 @@ func (s *Store) runMigrations() error {
|
||||
}
|
||||
}
|
||||
|
||||
// Observability: event_triggers — consume EventLog entries off the
|
||||
// bus and dispatch webhook actions. Schema kept flat (comma-list
|
||||
// filters, single optional regex) — see LOGSCAN_AND_TRIGGERS_TODO.md.
|
||||
observabilityTables := []string{
|
||||
`CREATE TABLE IF NOT EXISTS event_triggers (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
filter_severity TEXT NOT NULL DEFAULT '',
|
||||
filter_source TEXT NOT NULL DEFAULT '',
|
||||
filter_message_regex TEXT NOT NULL DEFAULT '',
|
||||
action_type TEXT NOT NULL DEFAULT 'webhook',
|
||||
action_target TEXT NOT NULL DEFAULT '',
|
||||
action_secret TEXT NOT NULL DEFAULT '',
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// log_scan_rules: regex patterns the log-scanner manager
|
||||
// applies to container log lines. WorkloadID is nullable (via
|
||||
// "" sentinel) so a global rule can have OverridesID = 0 and
|
||||
// per-workload overrides reference the global's id.
|
||||
`CREATE TABLE IF NOT EXISTS log_scan_rules (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
workload_id TEXT NOT NULL DEFAULT '',
|
||||
overrides_id INTEGER NOT NULL DEFAULT 0,
|
||||
name TEXT NOT NULL,
|
||||
pattern TEXT NOT NULL,
|
||||
severity TEXT NOT NULL DEFAULT 'warn',
|
||||
streams TEXT NOT NULL DEFAULT 'all',
|
||||
cooldown_seconds INTEGER NOT NULL DEFAULT 60,
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_workload ON log_scan_rules(workload_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_overrides ON log_scan_rules(overrides_id)`,
|
||||
}
|
||||
for _, t := range observabilityTables {
|
||||
if _, err := s.db.Exec(t); err != nil {
|
||||
return fmt.Errorf("create observability table: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range migrations {
|
||||
if _, err := s.db.Exec(m); err != nil {
|
||||
// "duplicate column" / "already exists" are expected when a
|
||||
@@ -366,6 +452,8 @@ func (s *Store) runMigrations() error {
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_container_id ON containers(container_id) WHERE container_id != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_kind ON containers(workload_kind)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_stage_id ON containers(stage_id) WHERE stage_id != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_workload_env_workload ON workload_env(workload_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_workload_volumes_workload ON workload_volumes(workload_id)`,
|
||||
}
|
||||
for _, idx := range indexes {
|
||||
if _, err := s.db.Exec(idx); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user