Files
tiny-forge/internal/store/log_scan_rules.go
T
alexei.dolgolyov 7a9ff7ad54 feat(observability): event triggers + log scanner backend
Two paired backends sharing the events.Bus seam:

Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
  redaction on read (placeholder echo treated as "no change" on
  PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
  filters (severity CSV, source CSV, message regex with memoized
  compile cache). Structural loop-prevention: never writes to
  event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
  TierEventTrigger constant, doSendRaw shared with the legacy
  Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
  sending the real TriggerWebhookPayload shape. SSRF guard
  rejects loopback / link-local / unspecified targets. PATCH
  uses pointer-typed DTO for partial updates.

Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
  per-container token bucket, atomic drop counters), tail
  (multiplexed docker frame demuxer with TTY fallback + 16 MiB
  payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
  timestamp strip + UTF-8-safe message truncation), manager
  (5s container polling, atomic.Pointer[Snapshot] hot-reload,
  HitEmitter writes event_log + publishes EventLog so the
  trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
  stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
  EffectiveLogScanRules resolver (globals minus per-workload
  overrides plus workload-only additions). Transactional
  cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
  (sample_line → matched/captures) + /stats (drop counters +
  active tail count + last-snapshot compile errors) +
  GET /api/workloads/{id}/effective-rules.

cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 22:18:11 +03:00

257 lines
8.5 KiB
Go

package store
import (
"database/sql"
"errors"
"fmt"
"strings"
)
// CreateLogScanRule inserts a new rule row. Validates severity +
// streams enum membership and rejects negative cooldowns.
func (s *Store) CreateLogScanRule(r LogScanRule) (LogScanRule, error) {
if err := validateLogScanRule(r); err != nil {
return LogScanRule{}, err
}
now := Now()
r.CreatedAt = now
r.UpdatedAt = now
res, err := s.db.Exec(
`INSERT INTO log_scan_rules
(workload_id, overrides_id, name, pattern, severity, streams,
cooldown_seconds, enabled, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
r.WorkloadID, r.OverridesID, r.Name, r.Pattern, r.Severity, r.Streams,
r.CooldownSeconds, boolToInt(r.Enabled), r.CreatedAt, r.UpdatedAt,
)
if err != nil {
return LogScanRule{}, fmt.Errorf("insert log scan rule: %w", err)
}
id, err := res.LastInsertId()
if err != nil {
return LogScanRule{}, fmt.Errorf("get log scan rule id: %w", err)
}
r.ID = id
return r, nil
}
// ListLogScanRules returns every rule, ordered by id for stable UI
// rendering.
func (s *Store) ListLogScanRules() ([]LogScanRule, error) {
return s.queryLogScanRules(
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
cooldown_seconds, enabled, created_at, updated_at
FROM log_scan_rules ORDER BY id`,
)
}
// ListLogScanRulesByWorkload returns all rows directly attached to
// the workload (workload-only additions and per-workload overrides),
// excluding global rules. Useful for the workload detail page.
func (s *Store) ListLogScanRulesByWorkload(workloadID string) ([]LogScanRule, error) {
return s.queryLogScanRules(
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
cooldown_seconds, enabled, created_at, updated_at
FROM log_scan_rules WHERE workload_id = ? ORDER BY id`,
workloadID,
)
}
// GetLogScanRule fetches one rule by id or returns ErrNotFound.
func (s *Store) GetLogScanRule(id int64) (LogScanRule, error) {
row := s.db.QueryRow(
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
cooldown_seconds, enabled, created_at, updated_at
FROM log_scan_rules WHERE id = ?`, id,
)
r, err := scanLogScanRuleRow(row)
if errors.Is(err, sql.ErrNoRows) {
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
}
if err != nil {
return LogScanRule{}, fmt.Errorf("query log scan rule: %w", err)
}
return r, nil
}
// UpdateLogScanRule overwrites the editable columns of a rule row.
// id, workload_id, overrides_id are immutable on update — change the
// scope of a rule by deleting + recreating, to keep the
// hot-reload-snapshot semantics simple.
func (s *Store) UpdateLogScanRule(r LogScanRule) (LogScanRule, error) {
if r.ID == 0 {
return LogScanRule{}, fmt.Errorf("log scan rule: id is required for update")
}
if err := validateLogScanRule(r); err != nil {
return LogScanRule{}, err
}
r.UpdatedAt = Now()
res, err := s.db.Exec(
`UPDATE log_scan_rules
SET name = ?, pattern = ?, severity = ?, streams = ?,
cooldown_seconds = ?, enabled = ?, updated_at = ?
WHERE id = ?`,
r.Name, r.Pattern, r.Severity, r.Streams,
r.CooldownSeconds, boolToInt(r.Enabled), r.UpdatedAt, r.ID,
)
if err != nil {
return LogScanRule{}, fmt.Errorf("update log scan rule: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", r.ID, ErrNotFound)
}
return s.GetLogScanRule(r.ID)
}
// DeleteLogScanRule removes a rule by id. Override rows referencing
// this id are cascade-deleted at the application layer because we
// don't enforce SQLite FK constraints repo-wide. The two DELETEs run
// inside a single transaction so a mid-cascade failure can't leave
// overrides orphaned by a vanished global.
func (s *Store) DeleteLogScanRule(id int64) error {
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("begin delete tx: %w", err)
}
defer tx.Rollback() //nolint:errcheck // commit path returns nil; rollback after commit is a no-op
if _, err := tx.Exec(`DELETE FROM log_scan_rules WHERE overrides_id = ?`, id); err != nil {
return fmt.Errorf("delete dependent log scan overrides: %w", err)
}
res, err := tx.Exec(`DELETE FROM log_scan_rules WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("delete log scan rule: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("commit delete tx: %w", err)
}
return nil
}
// EffectiveLogScanRules computes the effective rule set for one
// workload according to the spec in docs/LOGSCAN_AND_TRIGGERS_TODO.md:
//
// 1. All global rules (workload_id == "" AND overrides_id == 0)
// minus globals that have a per-workload override row.
// 2. Plus workload-only rules (workload_id == X AND overrides_id == 0).
// 3. Plus per-workload override rules (workload_id == X AND overrides_id != 0),
// which carry the override's own enabled/pattern/severity.
//
// Computed in Go after two simple SELECTs since rule counts will be
// small (operator-curated, dozens not thousands).
func (s *Store) EffectiveLogScanRules(workloadID string) ([]LogScanRule, error) {
all, err := s.ListLogScanRules()
if err != nil {
return nil, err
}
overrides := map[int64]LogScanRule{} // globalID -> override row
var workloadOnly []LogScanRule
var globals []LogScanRule
for _, r := range all {
switch {
case r.WorkloadID == "" && r.OverridesID == 0:
globals = append(globals, r)
case r.WorkloadID == workloadID && r.OverridesID == 0:
workloadOnly = append(workloadOnly, r)
case r.WorkloadID == workloadID && r.OverridesID != 0:
overrides[r.OverridesID] = r
}
}
out := make([]LogScanRule, 0, len(globals)+len(workloadOnly))
for _, g := range globals {
if ov, ok := overrides[g.ID]; ok {
// Override row's fields win — including enabled=false to
// turn off the global for this workload.
out = append(out, ov)
} else {
out = append(out, g)
}
}
out = append(out, workloadOnly...)
return out, nil
}
func (s *Store) queryLogScanRules(query string, args ...any) ([]LogScanRule, error) {
rows, err := s.db.Query(query, args...)
if err != nil {
return nil, fmt.Errorf("query log scan rules: %w", err)
}
defer rows.Close()
out := []LogScanRule{}
for rows.Next() {
r, err := scanLogScanRuleRows(rows)
if err != nil {
return nil, err
}
out = append(out, r)
}
return out, rows.Err()
}
func scanLogScanRuleRows(rows *sql.Rows) (LogScanRule, error) {
var r LogScanRule
var enabled int
if err := rows.Scan(
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
); err != nil {
return LogScanRule{}, fmt.Errorf("scan log scan rule: %w", err)
}
r.Enabled = enabled != 0
return r, nil
}
func scanLogScanRuleRow(row *sql.Row) (LogScanRule, error) {
var r LogScanRule
var enabled int
if err := row.Scan(
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
); err != nil {
return LogScanRule{}, err
}
r.Enabled = enabled != 0
return r, nil
}
// validateLogScanRule enforces the per-row invariants. Regex
// compilation is intentionally NOT done here — it's a hot-path
// concern owned by the engine snapshot, and engine compile errors
// become engine-side warnings rather than store-side rejections to
// keep the failure mode operator-debuggable.
func validateLogScanRule(r LogScanRule) error {
if strings.TrimSpace(r.Name) == "" {
return fmt.Errorf("log scan rule: name is required")
}
if strings.TrimSpace(r.Pattern) == "" {
return fmt.Errorf("log scan rule: pattern is required")
}
switch r.Severity {
case LogScanSeverityInfo, LogScanSeverityWarn, LogScanSeverityError:
case "":
// Default applied at the caller; allow blank.
default:
return fmt.Errorf("log scan rule: invalid severity %q", r.Severity)
}
switch r.Streams {
case LogScanStreamAll, LogScanStreamStdout, LogScanStreamStderr:
case "":
default:
return fmt.Errorf("log scan rule: invalid streams %q", r.Streams)
}
if r.CooldownSeconds < 0 {
return fmt.Errorf("log scan rule: cooldown_seconds must be >= 0")
}
// An override row must reference an existing global id and live
// under a specific workload. The store doesn't verify the FK
// (no PRAGMA foreign_keys), but we can sanity-check the shape.
if r.OverridesID != 0 && r.WorkloadID == "" {
return fmt.Errorf("log scan rule: override row requires workload_id")
}
return nil
}