Files
alexei.dolgolyov 7a9ff7ad54 feat(observability): event triggers + log scanner backend
Two paired backends sharing the events.Bus seam:

Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
  redaction on read (placeholder echo treated as "no change" on
  PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
  filters (severity CSV, source CSV, message regex with memoized
  compile cache). Structural loop-prevention: never writes to
  event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
  TierEventTrigger constant, doSendRaw shared with the legacy
  Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
  sending the real TriggerWebhookPayload shape. SSRF guard
  rejects loopback / link-local / unspecified targets. PATCH
  uses pointer-typed DTO for partial updates.

Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
  per-container token bucket, atomic drop counters), tail
  (multiplexed docker frame demuxer with TTY fallback + 16 MiB
  payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
  timestamp strip + UTF-8-safe message truncation), manager
  (5s container polling, atomic.Pointer[Snapshot] hot-reload,
  HitEmitter writes event_log + publishes EventLog so the
  trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
  stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
  EffectiveLogScanRules resolver (globals minus per-workload
  overrides plus workload-only additions). Transactional
  cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
  (sample_line → matched/captures) + /stats (drop counters +
  active tail count + last-snapshot compile errors) +
  GET /api/workloads/{id}/effective-rules.

cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 22:18:11 +03:00

209 lines
6.3 KiB
Go

package store
import (
"database/sql"
"errors"
"fmt"
"strings"
)
// CreateEventTrigger inserts a new trigger row. ID is assigned by the
// auto-increment column and returned on the populated struct.
func (s *Store) CreateEventTrigger(t EventTrigger) (EventTrigger, error) {
if strings.TrimSpace(t.Name) == "" {
return EventTrigger{}, fmt.Errorf("event_trigger: name is required")
}
if t.ActionType == "" {
t.ActionType = EventTriggerActionWebhook
}
if t.ActionType != EventTriggerActionWebhook {
return EventTrigger{}, fmt.Errorf("event_trigger: unsupported action_type %q", t.ActionType)
}
if strings.TrimSpace(t.ActionTarget) == "" {
return EventTrigger{}, fmt.Errorf("event_trigger: action_target is required")
}
now := Now()
t.CreatedAt = now
t.UpdatedAt = now
res, err := s.db.Exec(
`INSERT INTO event_triggers
(name, filter_severity, filter_source, filter_message_regex,
action_type, action_target, action_secret, enabled,
created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
t.Name, t.FilterSeverity, t.FilterSource, t.FilterMessageRegex,
t.ActionType, t.ActionTarget, t.ActionSecret, boolToInt(t.Enabled),
t.CreatedAt, t.UpdatedAt,
)
if err != nil {
return EventTrigger{}, fmt.Errorf("insert event trigger: %w", err)
}
id, err := res.LastInsertId()
if err != nil {
return EventTrigger{}, fmt.Errorf("get event trigger id: %w", err)
}
t.ID = id
return t, nil
}
// ListEventTriggers returns every trigger row, ordered by id so the UI
// rendering is stable across requests. Trigger counts are expected to
// be small (operator-curated), so unbounded listing is fine.
func (s *Store) ListEventTriggers() ([]EventTrigger, error) {
rows, err := s.db.Query(
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
action_type, action_target, action_secret, enabled,
created_at, updated_at
FROM event_triggers ORDER BY id`,
)
if err != nil {
return nil, fmt.Errorf("query event triggers: %w", err)
}
defer rows.Close()
out := []EventTrigger{}
for rows.Next() {
t, err := scanEventTrigger(rows)
if err != nil {
return nil, err
}
out = append(out, t)
}
return out, rows.Err()
}
// ListEnabledEventTriggers returns only the rows with enabled=1. The
// dispatcher hot path uses this so a disabled trigger costs nothing.
func (s *Store) ListEnabledEventTriggers() ([]EventTrigger, error) {
rows, err := s.db.Query(
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
action_type, action_target, action_secret, enabled,
created_at, updated_at
FROM event_triggers WHERE enabled = 1 ORDER BY id`,
)
if err != nil {
return nil, fmt.Errorf("query enabled event triggers: %w", err)
}
defer rows.Close()
out := []EventTrigger{}
for rows.Next() {
t, err := scanEventTrigger(rows)
if err != nil {
return nil, err
}
out = append(out, t)
}
return out, rows.Err()
}
// GetEventTrigger returns one trigger by ID or ErrNotFound.
func (s *Store) GetEventTrigger(id int64) (EventTrigger, error) {
row := s.db.QueryRow(
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
action_type, action_target, action_secret, enabled,
created_at, updated_at
FROM event_triggers WHERE id = ?`, id,
)
t, err := scanEventTriggerRow(row)
if errors.Is(err, sql.ErrNoRows) {
return EventTrigger{}, fmt.Errorf("event trigger %d: %w", id, ErrNotFound)
}
if err != nil {
return EventTrigger{}, fmt.Errorf("query event trigger: %w", err)
}
return t, nil
}
// UpdateEventTrigger overwrites the editable columns of an existing row.
// CreatedAt is preserved; UpdatedAt is refreshed.
func (s *Store) UpdateEventTrigger(t EventTrigger) (EventTrigger, error) {
if t.ID == 0 {
return EventTrigger{}, fmt.Errorf("event_trigger: id is required for update")
}
if strings.TrimSpace(t.Name) == "" {
return EventTrigger{}, fmt.Errorf("event_trigger: name is required")
}
if t.ActionType == "" {
t.ActionType = EventTriggerActionWebhook
}
if t.ActionType != EventTriggerActionWebhook {
return EventTrigger{}, fmt.Errorf("event_trigger: unsupported action_type %q", t.ActionType)
}
if strings.TrimSpace(t.ActionTarget) == "" {
return EventTrigger{}, fmt.Errorf("event_trigger: action_target is required")
}
t.UpdatedAt = Now()
res, err := s.db.Exec(
`UPDATE event_triggers
SET name = ?, filter_severity = ?, filter_source = ?,
filter_message_regex = ?, action_type = ?, action_target = ?,
action_secret = ?, enabled = ?, updated_at = ?
WHERE id = ?`,
t.Name, t.FilterSeverity, t.FilterSource, t.FilterMessageRegex,
t.ActionType, t.ActionTarget, t.ActionSecret, boolToInt(t.Enabled),
t.UpdatedAt, t.ID,
)
if err != nil {
return EventTrigger{}, fmt.Errorf("update event trigger: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return EventTrigger{}, fmt.Errorf("event trigger %d: %w", t.ID, ErrNotFound)
}
return s.GetEventTrigger(t.ID)
}
// DeleteEventTrigger removes a trigger by ID. Idempotent on the
// caller's side: returns ErrNotFound if the row is already gone so a
// double-click in the UI gives a clean error rather than 500.
func (s *Store) DeleteEventTrigger(id int64) error {
res, err := s.db.Exec(`DELETE FROM event_triggers WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("delete event trigger: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return fmt.Errorf("event trigger %d: %w", id, ErrNotFound)
}
return nil
}
func scanEventTrigger(rows *sql.Rows) (EventTrigger, error) {
var t EventTrigger
var enabled int
if err := rows.Scan(
&t.ID, &t.Name, &t.FilterSeverity, &t.FilterSource, &t.FilterMessageRegex,
&t.ActionType, &t.ActionTarget, &t.ActionSecret, &enabled,
&t.CreatedAt, &t.UpdatedAt,
); err != nil {
return EventTrigger{}, fmt.Errorf("scan event trigger: %w", err)
}
t.Enabled = enabled != 0
return t, nil
}
func scanEventTriggerRow(row *sql.Row) (EventTrigger, error) {
var t EventTrigger
var enabled int
if err := row.Scan(
&t.ID, &t.Name, &t.FilterSeverity, &t.FilterSource, &t.FilterMessageRegex,
&t.ActionType, &t.ActionTarget, &t.ActionSecret, &enabled,
&t.CreatedAt, &t.UpdatedAt,
); err != nil {
return EventTrigger{}, err
}
t.Enabled = enabled != 0
return t, nil
}
func boolToInt(b bool) int {
if b {
return 1
}
return 0
}