7a9ff7ad54
Two paired backends sharing the events.Bus seam:
Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
redaction on read (placeholder echo treated as "no change" on
PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
filters (severity CSV, source CSV, message regex with memoized
compile cache). Structural loop-prevention: never writes to
event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
TierEventTrigger constant, doSendRaw shared with the legacy
Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
sending the real TriggerWebhookPayload shape. SSRF guard
rejects loopback / link-local / unspecified targets. PATCH
uses pointer-typed DTO for partial updates.
Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
per-container token bucket, atomic drop counters), tail
(multiplexed docker frame demuxer with TTY fallback + 16 MiB
payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
timestamp strip + UTF-8-safe message truncation), manager
(5s container polling, atomic.Pointer[Snapshot] hot-reload,
HitEmitter writes event_log + publishes EventLog so the
trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
EffectiveLogScanRules resolver (globals minus per-workload
overrides plus workload-only additions). Transactional
cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
(sample_line → matched/captures) + /stats (drop counters +
active tail count + last-snapshot compile errors) +
GET /api/workloads/{id}/effective-rules.
cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
257 lines
8.5 KiB
Go
257 lines
8.5 KiB
Go
package store
|
|
|
|
import (
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// CreateLogScanRule inserts a new rule row. Validates severity +
|
|
// streams enum membership and rejects negative cooldowns.
|
|
func (s *Store) CreateLogScanRule(r LogScanRule) (LogScanRule, error) {
|
|
if err := validateLogScanRule(r); err != nil {
|
|
return LogScanRule{}, err
|
|
}
|
|
now := Now()
|
|
r.CreatedAt = now
|
|
r.UpdatedAt = now
|
|
res, err := s.db.Exec(
|
|
`INSERT INTO log_scan_rules
|
|
(workload_id, overrides_id, name, pattern, severity, streams,
|
|
cooldown_seconds, enabled, created_at, updated_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
r.WorkloadID, r.OverridesID, r.Name, r.Pattern, r.Severity, r.Streams,
|
|
r.CooldownSeconds, boolToInt(r.Enabled), r.CreatedAt, r.UpdatedAt,
|
|
)
|
|
if err != nil {
|
|
return LogScanRule{}, fmt.Errorf("insert log scan rule: %w", err)
|
|
}
|
|
id, err := res.LastInsertId()
|
|
if err != nil {
|
|
return LogScanRule{}, fmt.Errorf("get log scan rule id: %w", err)
|
|
}
|
|
r.ID = id
|
|
return r, nil
|
|
}
|
|
|
|
// ListLogScanRules returns every rule, ordered by id for stable UI
|
|
// rendering.
|
|
func (s *Store) ListLogScanRules() ([]LogScanRule, error) {
|
|
return s.queryLogScanRules(
|
|
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
|
cooldown_seconds, enabled, created_at, updated_at
|
|
FROM log_scan_rules ORDER BY id`,
|
|
)
|
|
}
|
|
|
|
// ListLogScanRulesByWorkload returns all rows directly attached to
|
|
// the workload (workload-only additions and per-workload overrides),
|
|
// excluding global rules. Useful for the workload detail page.
|
|
func (s *Store) ListLogScanRulesByWorkload(workloadID string) ([]LogScanRule, error) {
|
|
return s.queryLogScanRules(
|
|
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
|
cooldown_seconds, enabled, created_at, updated_at
|
|
FROM log_scan_rules WHERE workload_id = ? ORDER BY id`,
|
|
workloadID,
|
|
)
|
|
}
|
|
|
|
// GetLogScanRule fetches one rule by id or returns ErrNotFound.
|
|
func (s *Store) GetLogScanRule(id int64) (LogScanRule, error) {
|
|
row := s.db.QueryRow(
|
|
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
|
cooldown_seconds, enabled, created_at, updated_at
|
|
FROM log_scan_rules WHERE id = ?`, id,
|
|
)
|
|
r, err := scanLogScanRuleRow(row)
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
|
|
}
|
|
if err != nil {
|
|
return LogScanRule{}, fmt.Errorf("query log scan rule: %w", err)
|
|
}
|
|
return r, nil
|
|
}
|
|
|
|
// UpdateLogScanRule overwrites the editable columns of a rule row.
|
|
// id, workload_id, overrides_id are immutable on update — change the
|
|
// scope of a rule by deleting + recreating, to keep the
|
|
// hot-reload-snapshot semantics simple.
|
|
func (s *Store) UpdateLogScanRule(r LogScanRule) (LogScanRule, error) {
|
|
if r.ID == 0 {
|
|
return LogScanRule{}, fmt.Errorf("log scan rule: id is required for update")
|
|
}
|
|
if err := validateLogScanRule(r); err != nil {
|
|
return LogScanRule{}, err
|
|
}
|
|
r.UpdatedAt = Now()
|
|
res, err := s.db.Exec(
|
|
`UPDATE log_scan_rules
|
|
SET name = ?, pattern = ?, severity = ?, streams = ?,
|
|
cooldown_seconds = ?, enabled = ?, updated_at = ?
|
|
WHERE id = ?`,
|
|
r.Name, r.Pattern, r.Severity, r.Streams,
|
|
r.CooldownSeconds, boolToInt(r.Enabled), r.UpdatedAt, r.ID,
|
|
)
|
|
if err != nil {
|
|
return LogScanRule{}, fmt.Errorf("update log scan rule: %w", err)
|
|
}
|
|
n, _ := res.RowsAffected()
|
|
if n == 0 {
|
|
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", r.ID, ErrNotFound)
|
|
}
|
|
return s.GetLogScanRule(r.ID)
|
|
}
|
|
|
|
// DeleteLogScanRule removes a rule by id. Override rows referencing
|
|
// this id are cascade-deleted at the application layer because we
|
|
// don't enforce SQLite FK constraints repo-wide. The two DELETEs run
|
|
// inside a single transaction so a mid-cascade failure can't leave
|
|
// overrides orphaned by a vanished global.
|
|
func (s *Store) DeleteLogScanRule(id int64) error {
|
|
tx, err := s.db.Begin()
|
|
if err != nil {
|
|
return fmt.Errorf("begin delete tx: %w", err)
|
|
}
|
|
defer tx.Rollback() //nolint:errcheck // commit path returns nil; rollback after commit is a no-op
|
|
if _, err := tx.Exec(`DELETE FROM log_scan_rules WHERE overrides_id = ?`, id); err != nil {
|
|
return fmt.Errorf("delete dependent log scan overrides: %w", err)
|
|
}
|
|
res, err := tx.Exec(`DELETE FROM log_scan_rules WHERE id = ?`, id)
|
|
if err != nil {
|
|
return fmt.Errorf("delete log scan rule: %w", err)
|
|
}
|
|
n, _ := res.RowsAffected()
|
|
if n == 0 {
|
|
return fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
|
|
}
|
|
if err := tx.Commit(); err != nil {
|
|
return fmt.Errorf("commit delete tx: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// EffectiveLogScanRules computes the effective rule set for one
|
|
// workload according to the spec in docs/LOGSCAN_AND_TRIGGERS_TODO.md:
|
|
//
|
|
// 1. All global rules (workload_id == "" AND overrides_id == 0)
|
|
// minus globals that have a per-workload override row.
|
|
// 2. Plus workload-only rules (workload_id == X AND overrides_id == 0).
|
|
// 3. Plus per-workload override rules (workload_id == X AND overrides_id != 0),
|
|
// which carry the override's own enabled/pattern/severity.
|
|
//
|
|
// Computed in Go after two simple SELECTs since rule counts will be
|
|
// small (operator-curated, dozens not thousands).
|
|
func (s *Store) EffectiveLogScanRules(workloadID string) ([]LogScanRule, error) {
|
|
all, err := s.ListLogScanRules()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
overrides := map[int64]LogScanRule{} // globalID -> override row
|
|
var workloadOnly []LogScanRule
|
|
var globals []LogScanRule
|
|
for _, r := range all {
|
|
switch {
|
|
case r.WorkloadID == "" && r.OverridesID == 0:
|
|
globals = append(globals, r)
|
|
case r.WorkloadID == workloadID && r.OverridesID == 0:
|
|
workloadOnly = append(workloadOnly, r)
|
|
case r.WorkloadID == workloadID && r.OverridesID != 0:
|
|
overrides[r.OverridesID] = r
|
|
}
|
|
}
|
|
out := make([]LogScanRule, 0, len(globals)+len(workloadOnly))
|
|
for _, g := range globals {
|
|
if ov, ok := overrides[g.ID]; ok {
|
|
// Override row's fields win — including enabled=false to
|
|
// turn off the global for this workload.
|
|
out = append(out, ov)
|
|
} else {
|
|
out = append(out, g)
|
|
}
|
|
}
|
|
out = append(out, workloadOnly...)
|
|
return out, nil
|
|
}
|
|
|
|
func (s *Store) queryLogScanRules(query string, args ...any) ([]LogScanRule, error) {
|
|
rows, err := s.db.Query(query, args...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("query log scan rules: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
out := []LogScanRule{}
|
|
for rows.Next() {
|
|
r, err := scanLogScanRuleRows(rows)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, r)
|
|
}
|
|
return out, rows.Err()
|
|
}
|
|
|
|
func scanLogScanRuleRows(rows *sql.Rows) (LogScanRule, error) {
|
|
var r LogScanRule
|
|
var enabled int
|
|
if err := rows.Scan(
|
|
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
|
|
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
|
); err != nil {
|
|
return LogScanRule{}, fmt.Errorf("scan log scan rule: %w", err)
|
|
}
|
|
r.Enabled = enabled != 0
|
|
return r, nil
|
|
}
|
|
|
|
func scanLogScanRuleRow(row *sql.Row) (LogScanRule, error) {
|
|
var r LogScanRule
|
|
var enabled int
|
|
if err := row.Scan(
|
|
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
|
|
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
|
); err != nil {
|
|
return LogScanRule{}, err
|
|
}
|
|
r.Enabled = enabled != 0
|
|
return r, nil
|
|
}
|
|
|
|
// validateLogScanRule enforces the per-row invariants. Regex
|
|
// compilation is intentionally NOT done here — it's a hot-path
|
|
// concern owned by the engine snapshot, and engine compile errors
|
|
// become engine-side warnings rather than store-side rejections to
|
|
// keep the failure mode operator-debuggable.
|
|
func validateLogScanRule(r LogScanRule) error {
|
|
if strings.TrimSpace(r.Name) == "" {
|
|
return fmt.Errorf("log scan rule: name is required")
|
|
}
|
|
if strings.TrimSpace(r.Pattern) == "" {
|
|
return fmt.Errorf("log scan rule: pattern is required")
|
|
}
|
|
switch r.Severity {
|
|
case LogScanSeverityInfo, LogScanSeverityWarn, LogScanSeverityError:
|
|
case "":
|
|
// Default applied at the caller; allow blank.
|
|
default:
|
|
return fmt.Errorf("log scan rule: invalid severity %q", r.Severity)
|
|
}
|
|
switch r.Streams {
|
|
case LogScanStreamAll, LogScanStreamStdout, LogScanStreamStderr:
|
|
case "":
|
|
default:
|
|
return fmt.Errorf("log scan rule: invalid streams %q", r.Streams)
|
|
}
|
|
if r.CooldownSeconds < 0 {
|
|
return fmt.Errorf("log scan rule: cooldown_seconds must be >= 0")
|
|
}
|
|
// An override row must reference an existing global id and live
|
|
// under a specific workload. The store doesn't verify the FK
|
|
// (no PRAGMA foreign_keys), but we can sanity-check the shape.
|
|
if r.OverridesID != 0 && r.WorkloadID == "" {
|
|
return fmt.Errorf("log scan rule: override row requires workload_id")
|
|
}
|
|
return nil
|
|
}
|