feat(observability): event triggers + log scanner backend
Two paired backends sharing the events.Bus seam:
Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
redaction on read (placeholder echo treated as "no change" on
PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
filters (severity CSV, source CSV, message regex with memoized
compile cache). Structural loop-prevention: never writes to
event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
TierEventTrigger constant, doSendRaw shared with the legacy
Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
sending the real TriggerWebhookPayload shape. SSRF guard
rejects loopback / link-local / unspecified targets. PATCH
uses pointer-typed DTO for partial updates.
Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
per-container token bucket, atomic drop counters), tail
(multiplexed docker frame demuxer with TTY fallback + 16 MiB
payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
timestamp strip + UTF-8-safe message truncation), manager
(5s container polling, atomic.Pointer[Snapshot] hot-reload,
HitEmitter writes event_log + publishes EventLog so the
trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
EffectiveLogScanRules resolver (globals minus per-workload
overrides plus workload-only additions). Transactional
cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
(sample_line → matched/captures) + /stats (drop counters +
active tail count + last-snapshot compile errors) +
GET /api/workloads/{id}/effective-rules.
cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,208 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// CreateEventTrigger inserts a new trigger row. ID is assigned by the
|
||||
// auto-increment column and returned on the populated struct.
|
||||
func (s *Store) CreateEventTrigger(t EventTrigger) (EventTrigger, error) {
|
||||
if strings.TrimSpace(t.Name) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: name is required")
|
||||
}
|
||||
if t.ActionType == "" {
|
||||
t.ActionType = EventTriggerActionWebhook
|
||||
}
|
||||
if t.ActionType != EventTriggerActionWebhook {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: unsupported action_type %q", t.ActionType)
|
||||
}
|
||||
if strings.TrimSpace(t.ActionTarget) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: action_target is required")
|
||||
}
|
||||
|
||||
now := Now()
|
||||
t.CreatedAt = now
|
||||
t.UpdatedAt = now
|
||||
|
||||
res, err := s.db.Exec(
|
||||
`INSERT INTO event_triggers
|
||||
(name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
t.Name, t.FilterSeverity, t.FilterSource, t.FilterMessageRegex,
|
||||
t.ActionType, t.ActionTarget, t.ActionSecret, boolToInt(t.Enabled),
|
||||
t.CreatedAt, t.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("insert event trigger: %w", err)
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("get event trigger id: %w", err)
|
||||
}
|
||||
t.ID = id
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// ListEventTriggers returns every trigger row, ordered by id so the UI
|
||||
// rendering is stable across requests. Trigger counts are expected to
|
||||
// be small (operator-curated), so unbounded listing is fine.
|
||||
func (s *Store) ListEventTriggers() ([]EventTrigger, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at
|
||||
FROM event_triggers ORDER BY id`,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query event triggers: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := []EventTrigger{}
|
||||
for rows.Next() {
|
||||
t, err := scanEventTrigger(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, t)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// ListEnabledEventTriggers returns only the rows with enabled=1. The
|
||||
// dispatcher hot path uses this so a disabled trigger costs nothing.
|
||||
func (s *Store) ListEnabledEventTriggers() ([]EventTrigger, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at
|
||||
FROM event_triggers WHERE enabled = 1 ORDER BY id`,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query enabled event triggers: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := []EventTrigger{}
|
||||
for rows.Next() {
|
||||
t, err := scanEventTrigger(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, t)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// GetEventTrigger returns one trigger by ID or ErrNotFound.
|
||||
func (s *Store) GetEventTrigger(id int64) (EventTrigger, error) {
|
||||
row := s.db.QueryRow(
|
||||
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at
|
||||
FROM event_triggers WHERE id = ?`, id,
|
||||
)
|
||||
t, err := scanEventTriggerRow(row)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return EventTrigger{}, fmt.Errorf("event trigger %d: %w", id, ErrNotFound)
|
||||
}
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("query event trigger: %w", err)
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// UpdateEventTrigger overwrites the editable columns of an existing row.
|
||||
// CreatedAt is preserved; UpdatedAt is refreshed.
|
||||
func (s *Store) UpdateEventTrigger(t EventTrigger) (EventTrigger, error) {
|
||||
if t.ID == 0 {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: id is required for update")
|
||||
}
|
||||
if strings.TrimSpace(t.Name) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: name is required")
|
||||
}
|
||||
if t.ActionType == "" {
|
||||
t.ActionType = EventTriggerActionWebhook
|
||||
}
|
||||
if t.ActionType != EventTriggerActionWebhook {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: unsupported action_type %q", t.ActionType)
|
||||
}
|
||||
if strings.TrimSpace(t.ActionTarget) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: action_target is required")
|
||||
}
|
||||
|
||||
t.UpdatedAt = Now()
|
||||
res, err := s.db.Exec(
|
||||
`UPDATE event_triggers
|
||||
SET name = ?, filter_severity = ?, filter_source = ?,
|
||||
filter_message_regex = ?, action_type = ?, action_target = ?,
|
||||
action_secret = ?, enabled = ?, updated_at = ?
|
||||
WHERE id = ?`,
|
||||
t.Name, t.FilterSeverity, t.FilterSource, t.FilterMessageRegex,
|
||||
t.ActionType, t.ActionTarget, t.ActionSecret, boolToInt(t.Enabled),
|
||||
t.UpdatedAt, t.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("update event trigger: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return EventTrigger{}, fmt.Errorf("event trigger %d: %w", t.ID, ErrNotFound)
|
||||
}
|
||||
return s.GetEventTrigger(t.ID)
|
||||
}
|
||||
|
||||
// DeleteEventTrigger removes a trigger by ID. Idempotent on the
|
||||
// caller's side: returns ErrNotFound if the row is already gone so a
|
||||
// double-click in the UI gives a clean error rather than 500.
|
||||
func (s *Store) DeleteEventTrigger(id int64) error {
|
||||
res, err := s.db.Exec(`DELETE FROM event_triggers WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete event trigger: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return fmt.Errorf("event trigger %d: %w", id, ErrNotFound)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func scanEventTrigger(rows *sql.Rows) (EventTrigger, error) {
|
||||
var t EventTrigger
|
||||
var enabled int
|
||||
if err := rows.Scan(
|
||||
&t.ID, &t.Name, &t.FilterSeverity, &t.FilterSource, &t.FilterMessageRegex,
|
||||
&t.ActionType, &t.ActionTarget, &t.ActionSecret, &enabled,
|
||||
&t.CreatedAt, &t.UpdatedAt,
|
||||
); err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("scan event trigger: %w", err)
|
||||
}
|
||||
t.Enabled = enabled != 0
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func scanEventTriggerRow(row *sql.Row) (EventTrigger, error) {
|
||||
var t EventTrigger
|
||||
var enabled int
|
||||
if err := row.Scan(
|
||||
&t.ID, &t.Name, &t.FilterSeverity, &t.FilterSource, &t.FilterMessageRegex,
|
||||
&t.ActionType, &t.ActionTarget, &t.ActionSecret, &enabled,
|
||||
&t.CreatedAt, &t.UpdatedAt,
|
||||
); err != nil {
|
||||
return EventTrigger{}, err
|
||||
}
|
||||
t.Enabled = enabled != 0
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func boolToInt(b bool) int {
|
||||
if b {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
@@ -0,0 +1,256 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// CreateLogScanRule inserts a new rule row. Validates severity +
|
||||
// streams enum membership and rejects negative cooldowns.
|
||||
func (s *Store) CreateLogScanRule(r LogScanRule) (LogScanRule, error) {
|
||||
if err := validateLogScanRule(r); err != nil {
|
||||
return LogScanRule{}, err
|
||||
}
|
||||
now := Now()
|
||||
r.CreatedAt = now
|
||||
r.UpdatedAt = now
|
||||
res, err := s.db.Exec(
|
||||
`INSERT INTO log_scan_rules
|
||||
(workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
r.WorkloadID, r.OverridesID, r.Name, r.Pattern, r.Severity, r.Streams,
|
||||
r.CooldownSeconds, boolToInt(r.Enabled), r.CreatedAt, r.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("insert log scan rule: %w", err)
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("get log scan rule id: %w", err)
|
||||
}
|
||||
r.ID = id
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// ListLogScanRules returns every rule, ordered by id for stable UI
|
||||
// rendering.
|
||||
func (s *Store) ListLogScanRules() ([]LogScanRule, error) {
|
||||
return s.queryLogScanRules(
|
||||
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at
|
||||
FROM log_scan_rules ORDER BY id`,
|
||||
)
|
||||
}
|
||||
|
||||
// ListLogScanRulesByWorkload returns all rows directly attached to
|
||||
// the workload (workload-only additions and per-workload overrides),
|
||||
// excluding global rules. Useful for the workload detail page.
|
||||
func (s *Store) ListLogScanRulesByWorkload(workloadID string) ([]LogScanRule, error) {
|
||||
return s.queryLogScanRules(
|
||||
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at
|
||||
FROM log_scan_rules WHERE workload_id = ? ORDER BY id`,
|
||||
workloadID,
|
||||
)
|
||||
}
|
||||
|
||||
// GetLogScanRule fetches one rule by id or returns ErrNotFound.
|
||||
func (s *Store) GetLogScanRule(id int64) (LogScanRule, error) {
|
||||
row := s.db.QueryRow(
|
||||
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at
|
||||
FROM log_scan_rules WHERE id = ?`, id,
|
||||
)
|
||||
r, err := scanLogScanRuleRow(row)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
|
||||
}
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("query log scan rule: %w", err)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// UpdateLogScanRule overwrites the editable columns of a rule row.
|
||||
// id, workload_id, overrides_id are immutable on update — change the
|
||||
// scope of a rule by deleting + recreating, to keep the
|
||||
// hot-reload-snapshot semantics simple.
|
||||
func (s *Store) UpdateLogScanRule(r LogScanRule) (LogScanRule, error) {
|
||||
if r.ID == 0 {
|
||||
return LogScanRule{}, fmt.Errorf("log scan rule: id is required for update")
|
||||
}
|
||||
if err := validateLogScanRule(r); err != nil {
|
||||
return LogScanRule{}, err
|
||||
}
|
||||
r.UpdatedAt = Now()
|
||||
res, err := s.db.Exec(
|
||||
`UPDATE log_scan_rules
|
||||
SET name = ?, pattern = ?, severity = ?, streams = ?,
|
||||
cooldown_seconds = ?, enabled = ?, updated_at = ?
|
||||
WHERE id = ?`,
|
||||
r.Name, r.Pattern, r.Severity, r.Streams,
|
||||
r.CooldownSeconds, boolToInt(r.Enabled), r.UpdatedAt, r.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("update log scan rule: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", r.ID, ErrNotFound)
|
||||
}
|
||||
return s.GetLogScanRule(r.ID)
|
||||
}
|
||||
|
||||
// DeleteLogScanRule removes a rule by id. Override rows referencing
|
||||
// this id are cascade-deleted at the application layer because we
|
||||
// don't enforce SQLite FK constraints repo-wide. The two DELETEs run
|
||||
// inside a single transaction so a mid-cascade failure can't leave
|
||||
// overrides orphaned by a vanished global.
|
||||
func (s *Store) DeleteLogScanRule(id int64) error {
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin delete tx: %w", err)
|
||||
}
|
||||
defer tx.Rollback() //nolint:errcheck // commit path returns nil; rollback after commit is a no-op
|
||||
if _, err := tx.Exec(`DELETE FROM log_scan_rules WHERE overrides_id = ?`, id); err != nil {
|
||||
return fmt.Errorf("delete dependent log scan overrides: %w", err)
|
||||
}
|
||||
res, err := tx.Exec(`DELETE FROM log_scan_rules WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete log scan rule: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
|
||||
}
|
||||
if err := tx.Commit(); err != nil {
|
||||
return fmt.Errorf("commit delete tx: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EffectiveLogScanRules computes the effective rule set for one
|
||||
// workload according to the spec in docs/LOGSCAN_AND_TRIGGERS_TODO.md:
|
||||
//
|
||||
// 1. All global rules (workload_id == "" AND overrides_id == 0)
|
||||
// minus globals that have a per-workload override row.
|
||||
// 2. Plus workload-only rules (workload_id == X AND overrides_id == 0).
|
||||
// 3. Plus per-workload override rules (workload_id == X AND overrides_id != 0),
|
||||
// which carry the override's own enabled/pattern/severity.
|
||||
//
|
||||
// Computed in Go after two simple SELECTs since rule counts will be
|
||||
// small (operator-curated, dozens not thousands).
|
||||
func (s *Store) EffectiveLogScanRules(workloadID string) ([]LogScanRule, error) {
|
||||
all, err := s.ListLogScanRules()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
overrides := map[int64]LogScanRule{} // globalID -> override row
|
||||
var workloadOnly []LogScanRule
|
||||
var globals []LogScanRule
|
||||
for _, r := range all {
|
||||
switch {
|
||||
case r.WorkloadID == "" && r.OverridesID == 0:
|
||||
globals = append(globals, r)
|
||||
case r.WorkloadID == workloadID && r.OverridesID == 0:
|
||||
workloadOnly = append(workloadOnly, r)
|
||||
case r.WorkloadID == workloadID && r.OverridesID != 0:
|
||||
overrides[r.OverridesID] = r
|
||||
}
|
||||
}
|
||||
out := make([]LogScanRule, 0, len(globals)+len(workloadOnly))
|
||||
for _, g := range globals {
|
||||
if ov, ok := overrides[g.ID]; ok {
|
||||
// Override row's fields win — including enabled=false to
|
||||
// turn off the global for this workload.
|
||||
out = append(out, ov)
|
||||
} else {
|
||||
out = append(out, g)
|
||||
}
|
||||
}
|
||||
out = append(out, workloadOnly...)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (s *Store) queryLogScanRules(query string, args ...any) ([]LogScanRule, error) {
|
||||
rows, err := s.db.Query(query, args...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query log scan rules: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
out := []LogScanRule{}
|
||||
for rows.Next() {
|
||||
r, err := scanLogScanRuleRows(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, r)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func scanLogScanRuleRows(rows *sql.Rows) (LogScanRule, error) {
|
||||
var r LogScanRule
|
||||
var enabled int
|
||||
if err := rows.Scan(
|
||||
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
|
||||
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
||||
); err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("scan log scan rule: %w", err)
|
||||
}
|
||||
r.Enabled = enabled != 0
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func scanLogScanRuleRow(row *sql.Row) (LogScanRule, error) {
|
||||
var r LogScanRule
|
||||
var enabled int
|
||||
if err := row.Scan(
|
||||
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
|
||||
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
||||
); err != nil {
|
||||
return LogScanRule{}, err
|
||||
}
|
||||
r.Enabled = enabled != 0
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// validateLogScanRule enforces the per-row invariants. Regex
|
||||
// compilation is intentionally NOT done here — it's a hot-path
|
||||
// concern owned by the engine snapshot, and engine compile errors
|
||||
// become engine-side warnings rather than store-side rejections to
|
||||
// keep the failure mode operator-debuggable.
|
||||
func validateLogScanRule(r LogScanRule) error {
|
||||
if strings.TrimSpace(r.Name) == "" {
|
||||
return fmt.Errorf("log scan rule: name is required")
|
||||
}
|
||||
if strings.TrimSpace(r.Pattern) == "" {
|
||||
return fmt.Errorf("log scan rule: pattern is required")
|
||||
}
|
||||
switch r.Severity {
|
||||
case LogScanSeverityInfo, LogScanSeverityWarn, LogScanSeverityError:
|
||||
case "":
|
||||
// Default applied at the caller; allow blank.
|
||||
default:
|
||||
return fmt.Errorf("log scan rule: invalid severity %q", r.Severity)
|
||||
}
|
||||
switch r.Streams {
|
||||
case LogScanStreamAll, LogScanStreamStdout, LogScanStreamStderr:
|
||||
case "":
|
||||
default:
|
||||
return fmt.Errorf("log scan rule: invalid streams %q", r.Streams)
|
||||
}
|
||||
if r.CooldownSeconds < 0 {
|
||||
return fmt.Errorf("log scan rule: cooldown_seconds must be >= 0")
|
||||
}
|
||||
// An override row must reference an existing global id and live
|
||||
// under a specific workload. The store doesn't verify the FK
|
||||
// (no PRAGMA foreign_keys), but we can sanity-check the shape.
|
||||
if r.OverridesID != 0 && r.WorkloadID == "" {
|
||||
return fmt.Errorf("log scan rule: override row requires workload_id")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCreateLogScanRule_Validates(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
cases := []struct {
|
||||
name string
|
||||
in LogScanRule
|
||||
wantErr string
|
||||
}{
|
||||
{
|
||||
name: "missing name",
|
||||
in: LogScanRule{Pattern: "x"},
|
||||
wantErr: "name is required",
|
||||
},
|
||||
{
|
||||
name: "missing pattern",
|
||||
in: LogScanRule{Name: "n"},
|
||||
wantErr: "pattern is required",
|
||||
},
|
||||
{
|
||||
name: "bad severity",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", Severity: "loud"},
|
||||
wantErr: "invalid severity",
|
||||
},
|
||||
{
|
||||
name: "bad streams",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", Streams: "both"},
|
||||
wantErr: "invalid streams",
|
||||
},
|
||||
{
|
||||
name: "negative cooldown",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", CooldownSeconds: -1},
|
||||
wantErr: "cooldown_seconds must be",
|
||||
},
|
||||
{
|
||||
name: "override without workload",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", OverridesID: 5},
|
||||
wantErr: "override row requires workload_id",
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
_, err := s.CreateLogScanRule(c.in)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error containing %q, got nil", c.wantErr)
|
||||
}
|
||||
if !strings.Contains(err.Error(), c.wantErr) {
|
||||
t.Fatalf("error mismatch: got %q want substring %q", err.Error(), c.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateAndGetLogScanRule(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
r, err := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "panics", Pattern: `\bpanic\b`, Severity: "error", Streams: "stderr",
|
||||
CooldownSeconds: 30, Enabled: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("create: %v", err)
|
||||
}
|
||||
if r.ID == 0 {
|
||||
t.Fatal("id should be set")
|
||||
}
|
||||
got, err := s.GetLogScanRule(r.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get: %v", err)
|
||||
}
|
||||
if got.Pattern != `\bpanic\b` {
|
||||
t.Errorf("pattern mismatch: %q", got.Pattern)
|
||||
}
|
||||
if !got.Enabled {
|
||||
t.Error("enabled lost on round-trip")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveLogScanRules(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
g, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "global", Pattern: "panic", Severity: "warn", Streams: "all", Enabled: true,
|
||||
})
|
||||
_, _ = s.CreateLogScanRule(LogScanRule{
|
||||
Name: "w1-only", Pattern: "slow_query", WorkloadID: "w1", Severity: "info", Streams: "all", Enabled: true,
|
||||
})
|
||||
_, _ = s.CreateLogScanRule(LogScanRule{
|
||||
Name: "override-for-w1", Pattern: "panic", WorkloadID: "w1", OverridesID: g.ID,
|
||||
Severity: "error", Streams: "all", Enabled: true,
|
||||
})
|
||||
|
||||
w1, err := s.EffectiveLogScanRules("w1")
|
||||
if err != nil {
|
||||
t.Fatalf("effective w1: %v", err)
|
||||
}
|
||||
if len(w1) != 2 {
|
||||
t.Fatalf("w1 effective should be 2 (override + addition), got %d", len(w1))
|
||||
}
|
||||
// First entry replaces the global with the override (error severity).
|
||||
if w1[0].Severity != "error" {
|
||||
t.Errorf("override severity not applied: %q", w1[0].Severity)
|
||||
}
|
||||
|
||||
w2, err := s.EffectiveLogScanRules("w2")
|
||||
if err != nil {
|
||||
t.Fatalf("effective w2: %v", err)
|
||||
}
|
||||
if len(w2) != 1 {
|
||||
t.Fatalf("w2 effective should be 1 (just the global), got %d", len(w2))
|
||||
}
|
||||
if w2[0].Severity != "warn" {
|
||||
t.Errorf("w2 should see original severity: %q", w2[0].Severity)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteLogScanRule_CascadesOverrides(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
g, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "global", Pattern: "panic", Severity: "warn", Streams: "all", Enabled: true,
|
||||
})
|
||||
ov, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "override", Pattern: "panic", WorkloadID: "w1", OverridesID: g.ID,
|
||||
Severity: "error", Streams: "all", Enabled: true,
|
||||
})
|
||||
|
||||
if err := s.DeleteLogScanRule(g.ID); err != nil {
|
||||
t.Fatalf("delete: %v", err)
|
||||
}
|
||||
if _, err := s.GetLogScanRule(ov.ID); err == nil {
|
||||
t.Error("override should be cascade-deleted with its global")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateLogScanRule(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
r, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "n", Pattern: "x", Severity: "warn", Streams: "all", Enabled: true,
|
||||
})
|
||||
r.Pattern = "y"
|
||||
r.Enabled = false
|
||||
got, err := s.UpdateLogScanRule(r)
|
||||
if err != nil {
|
||||
t.Fatalf("update: %v", err)
|
||||
}
|
||||
if got.Pattern != "y" {
|
||||
t.Errorf("pattern not updated: %q", got.Pattern)
|
||||
}
|
||||
if got.Enabled {
|
||||
t.Error("enabled=false not applied")
|
||||
}
|
||||
}
|
||||
+126
-4
@@ -197,6 +197,34 @@ type StageEnv struct {
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// WorkloadVolume is the plugin-shape equivalent of legacy Volume: a
|
||||
// per-workload mount declaration. The Scope enum matches the existing
|
||||
// VolumeScope contract so the legacy resolver can be reused once its
|
||||
// project_id assumption is loosened.
|
||||
type WorkloadVolume struct {
|
||||
ID string `json:"id"`
|
||||
WorkloadID string `json:"workload_id"`
|
||||
Source string `json:"source"`
|
||||
Target string `json:"target"`
|
||||
Scope string `json:"scope"`
|
||||
Name string `json:"name"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// WorkloadEnv is the plugin-shape equivalent of StageEnv: per-workload
|
||||
// environment variable overrides, optionally encrypted at rest. Read by
|
||||
// the Source plugin at deploy time, merged on top of source_config.env.
|
||||
type WorkloadEnv struct {
|
||||
ID string `json:"id"`
|
||||
WorkloadID string `json:"workload_id"`
|
||||
Key string `json:"key"`
|
||||
Value string `json:"value"`
|
||||
Encrypted bool `json:"encrypted"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// VolumeScope defines the sharing scope for a volume mount.
|
||||
// Valid scopes: instance, stage, project, project_named, named, ephemeral.
|
||||
type VolumeScope string
|
||||
@@ -333,6 +361,82 @@ type EventLog struct {
|
||||
CreatedAt string `json:"created_at"`
|
||||
}
|
||||
|
||||
// EventTrigger is a filter+action rule evaluated against EventLog
|
||||
// entries published on the bus. When all non-empty filters match, the
|
||||
// trigger fires its configured action (webhook today, additional action
|
||||
// types extensible via the ActionType enum).
|
||||
//
|
||||
// Filter fields use a comma-separated list shape for multi-value
|
||||
// filters (severity, source) to keep the schema flat — empty string
|
||||
// means "no filter on this dimension." FilterMessageRegex is a single
|
||||
// regex evaluated against EventLog.Message.
|
||||
//
|
||||
// Loop-prevention: deliveries are recorded in webhook_deliveries (the
|
||||
// existing audit trail). The dispatcher MUST NOT write to event_log
|
||||
// or it will recurse.
|
||||
type EventTrigger struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
FilterSeverity string `json:"filter_severity"` // comma list: "warn,error"; "" = any
|
||||
FilterSource string `json:"filter_source"` // comma list: "logscan,deploy"; "" = any
|
||||
FilterMessageRegex string `json:"filter_message_regex"` // "" = any
|
||||
ActionType string `json:"action_type"` // "webhook" today
|
||||
ActionTarget string `json:"action_target"` // URL for webhook
|
||||
ActionSecret string `json:"action_secret"` // optional HMAC secret for signed delivery
|
||||
Enabled bool `json:"enabled"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// EventTriggerActionType enumerates the supported action_type values.
|
||||
// Adding a new action is additive — old triggers keep working, the
|
||||
// dispatcher just learns a new branch.
|
||||
const (
|
||||
EventTriggerActionWebhook = "webhook"
|
||||
)
|
||||
|
||||
// LogScanRule is one regex-based pattern the log scanner evaluates
|
||||
// against container log lines. The (workload_id, overrides_id) pair
|
||||
// implements the "global rule with optional per-workload override"
|
||||
// pattern documented in docs/LOGSCAN_AND_TRIGGERS_TODO.md:
|
||||
//
|
||||
// - WorkloadID == "" && OverridesID == 0 → global rule, applies to
|
||||
// every workload unless overridden.
|
||||
// - WorkloadID != "" && OverridesID == 0 → workload-only addition.
|
||||
// - WorkloadID != "" && OverridesID != 0 → override of the named
|
||||
// global rule for one workload (Enabled=false to disable globally
|
||||
// for this workload).
|
||||
type LogScanRule struct {
|
||||
ID int64 `json:"id"`
|
||||
WorkloadID string `json:"workload_id"` // "" = global
|
||||
OverridesID int64 `json:"overrides_id"` // 0 = not an override
|
||||
Name string `json:"name"`
|
||||
Pattern string `json:"pattern"` // regex, compiled at load
|
||||
Severity string `json:"severity"` // info|warn|error
|
||||
Streams string `json:"streams"` // all|stdout|stderr
|
||||
CooldownSeconds int `json:"cooldown_seconds"`
|
||||
Enabled bool `json:"enabled"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// Log scan stream filter values. "all" reads both streams; "stdout"
|
||||
// or "stderr" filter to one. Used both for store validation and at
|
||||
// docker-side log read time.
|
||||
const (
|
||||
LogScanStreamAll = "all"
|
||||
LogScanStreamStdout = "stdout"
|
||||
LogScanStreamStderr = "stderr"
|
||||
)
|
||||
|
||||
// Log scan severity values mirror the event_log enum so a matched
|
||||
// rule lands as an event_log row with the rule's severity verbatim.
|
||||
const (
|
||||
LogScanSeverityInfo = "info"
|
||||
LogScanSeverityWarn = "warn"
|
||||
LogScanSeverityError = "error"
|
||||
)
|
||||
|
||||
// WorkloadKind enumerates the kinds of things that own containers.
|
||||
// Each kind has a corresponding row in projects/stacks/static_sites referenced via Workload.RefID.
|
||||
type WorkloadKind string
|
||||
@@ -346,12 +450,24 @@ const (
|
||||
// Workload is the unifying primitive that abstracts Project, Stack, and StaticSite.
|
||||
// Each row is paired with exactly one project/stack/site via (Kind, RefID).
|
||||
// Notification + webhook config moves here so it lives in one place across kinds.
|
||||
//
|
||||
// SourceKind / SourceConfig / TriggerKind / TriggerConfig / PublicFaces /
|
||||
// ParentWorkloadID populate the unified plugin model from the Workload-first
|
||||
// refactor. Existing rows keep these empty until they are explicitly migrated
|
||||
// or replaced — the legacy Kind/RefID columns continue to point at
|
||||
// project/stack/site rows in parallel during the cutover.
|
||||
type Workload struct {
|
||||
ID string `json:"id"`
|
||||
Kind string `json:"kind"` // project | stack | site
|
||||
Kind string `json:"kind"` // project | stack | site (legacy discriminator)
|
||||
RefID string `json:"ref_id"`
|
||||
Name string `json:"name"`
|
||||
AppID string `json:"app_id"` // nullable; "" = unassigned
|
||||
AppID string `json:"app_id"` // nullable; "" = unassigned (a.k.a. GroupID after rename)
|
||||
SourceKind string `json:"source_kind"` // "" until plugin-mode populated
|
||||
SourceConfig string `json:"source_config"` // JSON-encoded, decoded by the matching Source
|
||||
TriggerKind string `json:"trigger_kind"`
|
||||
TriggerConfig string `json:"trigger_config"` // JSON-encoded, decoded by the matching Trigger
|
||||
PublicFaces string `json:"public_faces"` // JSON-encoded []PublicFace
|
||||
ParentWorkloadID string `json:"parent_workload_id"` // "" = root; non-empty = stage chain
|
||||
NotificationURL string `json:"notification_url"`
|
||||
NotificationSecret string `json:"-"` // never serialized
|
||||
WebhookSecret string `json:"-"` // URL-identifier secret; never serialized
|
||||
@@ -384,8 +500,14 @@ type Container struct {
|
||||
ProxyRouteID string `json:"proxy_route_id"`
|
||||
NpmProxyID int `json:"npm_proxy_id"`
|
||||
LastSeenAt string `json:"last_seen_at"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
// ExtraJSON carries source-specific metadata that isn't promoted to a
|
||||
// first-class column — currently per-face proxy route IDs for
|
||||
// multi-face image deploys. Stored as a JSON object; '{}' on empty
|
||||
// rows. Sources own the shape; consumers should tolerate unknown
|
||||
// keys.
|
||||
ExtraJSON string `json:"extra_json"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// App is an optional grouping of workloads (e.g., "my-saas" = web project + worker stack + redis stack).
|
||||
|
||||
@@ -181,6 +181,15 @@ func (s *Store) runMigrations() error {
|
||||
// re-write path; the LEFT JOIN in ListContainersByStageID falls back
|
||||
// to (project_id, role=stage_name) so legacy rows still resolve.
|
||||
`ALTER TABLE containers ADD COLUMN stage_id TEXT NOT NULL DEFAULT ''`,
|
||||
// Workload-first refactor columns (2026-05-10). Land additively so
|
||||
// the legacy kind/ref_id columns continue to serve existing
|
||||
// project/stack/site rows during cutover.
|
||||
`ALTER TABLE workloads ADD COLUMN source_kind TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE workloads ADD COLUMN source_config TEXT NOT NULL DEFAULT '{}'`,
|
||||
`ALTER TABLE workloads ADD COLUMN trigger_kind TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE workloads ADD COLUMN trigger_config TEXT NOT NULL DEFAULT '{}'`,
|
||||
`ALTER TABLE workloads ADD COLUMN public_faces TEXT NOT NULL DEFAULT '[]'`,
|
||||
`ALTER TABLE workloads ADD COLUMN parent_workload_id TEXT NOT NULL DEFAULT ''`,
|
||||
}
|
||||
|
||||
// Workload refactor tables (2026-05-09). Workload is the unifying primitive
|
||||
@@ -195,6 +204,12 @@ func (s *Store) runMigrations() error {
|
||||
ref_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
app_id TEXT NOT NULL DEFAULT '',
|
||||
source_kind TEXT NOT NULL DEFAULT '',
|
||||
source_config TEXT NOT NULL DEFAULT '{}',
|
||||
trigger_kind TEXT NOT NULL DEFAULT '',
|
||||
trigger_config TEXT NOT NULL DEFAULT '{}',
|
||||
public_faces TEXT NOT NULL DEFAULT '[]',
|
||||
parent_workload_id TEXT NOT NULL DEFAULT '',
|
||||
notification_url TEXT NOT NULL DEFAULT '',
|
||||
notification_secret TEXT NOT NULL DEFAULT '',
|
||||
webhook_secret TEXT NOT NULL DEFAULT '',
|
||||
@@ -231,6 +246,34 @@ func (s *Store) runMigrations() error {
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// workload_env: per-workload env overrides (encrypt-at-rest for
|
||||
// secrets). Functional analog of stage_env. Workload deletion
|
||||
// cascades through the FK so orphan rows are impossible.
|
||||
`CREATE TABLE IF NOT EXISTS workload_env (
|
||||
id TEXT PRIMARY KEY,
|
||||
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||
key TEXT NOT NULL,
|
||||
value TEXT NOT NULL DEFAULT '',
|
||||
encrypted INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(workload_id, key)
|
||||
)`,
|
||||
// workload_volumes: per-workload mount declarations. Mirrors the
|
||||
// legacy `volumes` table shape (source / target / scope / name)
|
||||
// but keyed on workload_id. UNIQUE on (workload_id, target) so a
|
||||
// re-add overwrites instead of duplicating.
|
||||
`CREATE TABLE IF NOT EXISTS workload_volumes (
|
||||
id TEXT PRIMARY KEY,
|
||||
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
target TEXT NOT NULL,
|
||||
scope TEXT NOT NULL DEFAULT 'absolute',
|
||||
name TEXT NOT NULL DEFAULT '',
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(workload_id, target)
|
||||
)`,
|
||||
}
|
||||
for _, t := range workloadTables {
|
||||
if _, err := s.db.Exec(t); err != nil {
|
||||
@@ -312,6 +355,49 @@ func (s *Store) runMigrations() error {
|
||||
}
|
||||
}
|
||||
|
||||
// Observability: event_triggers — consume EventLog entries off the
|
||||
// bus and dispatch webhook actions. Schema kept flat (comma-list
|
||||
// filters, single optional regex) — see LOGSCAN_AND_TRIGGERS_TODO.md.
|
||||
observabilityTables := []string{
|
||||
`CREATE TABLE IF NOT EXISTS event_triggers (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
filter_severity TEXT NOT NULL DEFAULT '',
|
||||
filter_source TEXT NOT NULL DEFAULT '',
|
||||
filter_message_regex TEXT NOT NULL DEFAULT '',
|
||||
action_type TEXT NOT NULL DEFAULT 'webhook',
|
||||
action_target TEXT NOT NULL DEFAULT '',
|
||||
action_secret TEXT NOT NULL DEFAULT '',
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// log_scan_rules: regex patterns the log-scanner manager
|
||||
// applies to container log lines. WorkloadID is nullable (via
|
||||
// "" sentinel) so a global rule can have OverridesID = 0 and
|
||||
// per-workload overrides reference the global's id.
|
||||
`CREATE TABLE IF NOT EXISTS log_scan_rules (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
workload_id TEXT NOT NULL DEFAULT '',
|
||||
overrides_id INTEGER NOT NULL DEFAULT 0,
|
||||
name TEXT NOT NULL,
|
||||
pattern TEXT NOT NULL,
|
||||
severity TEXT NOT NULL DEFAULT 'warn',
|
||||
streams TEXT NOT NULL DEFAULT 'all',
|
||||
cooldown_seconds INTEGER NOT NULL DEFAULT 60,
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_workload ON log_scan_rules(workload_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_overrides ON log_scan_rules(overrides_id)`,
|
||||
}
|
||||
for _, t := range observabilityTables {
|
||||
if _, err := s.db.Exec(t); err != nil {
|
||||
return fmt.Errorf("create observability table: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range migrations {
|
||||
if _, err := s.db.Exec(m); err != nil {
|
||||
// "duplicate column" / "already exists" are expected when a
|
||||
@@ -366,6 +452,8 @@ func (s *Store) runMigrations() error {
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_container_id ON containers(container_id) WHERE container_id != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_kind ON containers(workload_kind)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_stage_id ON containers(stage_id) WHERE stage_id != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_workload_env_workload ON workload_env(workload_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_workload_volumes_workload ON workload_volumes(workload_id)`,
|
||||
}
|
||||
for _, idx := range indexes {
|
||||
if _, err := s.db.Exec(idx); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user