feat(observability): event triggers + log scanner backend
Two paired backends sharing the events.Bus seam:
Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
redaction on read (placeholder echo treated as "no change" on
PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
filters (severity CSV, source CSV, message regex with memoized
compile cache). Structural loop-prevention: never writes to
event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
TierEventTrigger constant, doSendRaw shared with the legacy
Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
sending the real TriggerWebhookPayload shape. SSRF guard
rejects loopback / link-local / unspecified targets. PATCH
uses pointer-typed DTO for partial updates.
Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
per-container token bucket, atomic drop counters), tail
(multiplexed docker frame demuxer with TTY fallback + 16 MiB
payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
timestamp strip + UTF-8-safe message truncation), manager
(5s container polling, atomic.Pointer[Snapshot] hot-reload,
HitEmitter writes event_log + publishes EventLog so the
trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
EffectiveLogScanRules resolver (globals minus per-workload
overrides plus workload-only additions). Transactional
cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
(sample_line → matched/captures) + /stats (drop counters +
active tail count + last-snapshot compile errors) +
GET /api/workloads/{id}/effective-rules.
cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,7 @@ import (
|
||||
"github.com/alexei/tinyforge/internal/events"
|
||||
"github.com/alexei/tinyforge/internal/health"
|
||||
"github.com/alexei/tinyforge/internal/logging"
|
||||
"github.com/alexei/tinyforge/internal/logscanner"
|
||||
"github.com/alexei/tinyforge/internal/notify"
|
||||
"github.com/alexei/tinyforge/internal/npm"
|
||||
"github.com/alexei/tinyforge/internal/proxy"
|
||||
@@ -38,6 +39,16 @@ import (
|
||||
"github.com/alexei/tinyforge/internal/staticsite"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/webhook"
|
||||
|
||||
// Plugin registrations: each blank-import runs its init() and registers
|
||||
// itself with internal/workload/plugin. Adding a new Source or Trigger
|
||||
// is a matter of dropping a new package and adding it to this list.
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/compose"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/image"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/static"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/git"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/manual"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/registry"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -105,6 +116,9 @@ func main() {
|
||||
rec := reconciler.New(db, dockerClient, 30*time.Second)
|
||||
rec.Start(context.Background())
|
||||
defer rec.Stop()
|
||||
// The plugin pass is wired after the deployer is constructed (below);
|
||||
// the reconciler tolerates a nil dispatcher until then. SetPluginReconciler
|
||||
// is safe to call at any time, including mid-tick.
|
||||
|
||||
// Read settings for NPM URL and polling interval.
|
||||
settings, err := db.GetSettings()
|
||||
@@ -166,12 +180,24 @@ func main() {
|
||||
})
|
||||
defer stopLogger()
|
||||
|
||||
// Event-trigger dispatcher: consume EventLog publishes off the bus
|
||||
// and fan out to operator-configured webhook actions. Loop-prevention
|
||||
// is structural — the dispatcher never writes back to event_log; all
|
||||
// delivery outcomes land in notifier audit logging.
|
||||
stopTriggerDispatcher := events.RegisterEventTriggerDispatcher(eventBus, db, notifier)
|
||||
defer stopTriggerDispatcher()
|
||||
|
||||
dep := deployer.New(dockerClient, proxyProvider, db, healthChecker, notifier, eventBus, encKey)
|
||||
rec.SetPluginReconciler(dep)
|
||||
|
||||
// Initialize webhook handler. Per-project and per-site secrets are stored
|
||||
// on their respective rows; the static-site triggerer is wired in below
|
||||
// once the site manager has been constructed.
|
||||
webhookHandler := webhook.NewHandler(db, dep, nil)
|
||||
// Plugin-pipeline dispatcher for /api/webhook/workloads/{secret}.
|
||||
// Wired here so the same *deployer.Deployer serves both legacy and
|
||||
// plugin-native paths from one place.
|
||||
webhookHandler.SetPluginDispatcher(dep)
|
||||
|
||||
// Initialize registry poller.
|
||||
poller := registry.NewPoller(db, dep, encKey)
|
||||
@@ -322,6 +348,11 @@ func main() {
|
||||
// Initialize static site manager and health checker.
|
||||
staticSiteMgr := staticsite.NewManager(db, dockerClient, proxyProvider, eventBus, notifier, encKey)
|
||||
webhookHandler.SetSiteSyncTriggerer(staticSiteMgr)
|
||||
// Wire the plugin static source's backend to the manager. After this
|
||||
// call the "static" kind appears in /api/hooks/kinds and the /apps/new
|
||||
// picker; before it, the source registers no kind, so the frontend
|
||||
// silently omits it.
|
||||
wireStaticBackend(db, staticSiteMgr)
|
||||
staticSiteHealth := staticsite.NewHealthChecker(db, dockerClient, staticSiteMgr)
|
||||
if err := staticSiteHealth.Start("2m"); err != nil {
|
||||
slog.Warn("failed to start static site health checker", "error", err)
|
||||
@@ -339,6 +370,26 @@ func main() {
|
||||
stackMgr = nil
|
||||
}
|
||||
|
||||
// Log-scan manager: tails running containers and emits event_log
|
||||
// entries when log lines match operator-configured regex rules.
|
||||
// Start before the API server is wired so the reload callback can
|
||||
// be plugged in via SetLogScanReloader.
|
||||
logScanMgr := logscanner.NewManager(logscanner.Config{
|
||||
Rules: db,
|
||||
Containers: db,
|
||||
Docker: dockerClient,
|
||||
Events: db,
|
||||
Bus: eventBus,
|
||||
PollInterval: 5 * time.Second,
|
||||
})
|
||||
// Manager owns its own cancellation; Stop() drives the loop and
|
||||
// every tail to exit. Using Background here matches the
|
||||
// reconciler + stale-scanner pattern elsewhere in this file.
|
||||
if err := logScanMgr.Start(context.Background()); err != nil {
|
||||
slog.Warn("logscanner: initial rule load failed", "error", err)
|
||||
}
|
||||
defer logScanMgr.Stop()
|
||||
|
||||
// Build API server.
|
||||
apiServer := api.NewServer(db, dockerClient, npmClient, proxyProvider, dep, notifier, webhookHandler, eventBus, encKey)
|
||||
apiServer.SetStaticSiteManager(staticSiteMgr)
|
||||
@@ -346,6 +397,7 @@ func main() {
|
||||
apiServer.SetStackManager(stackMgr)
|
||||
}
|
||||
apiServer.SetStaleScanner(staleScanner)
|
||||
apiServer.SetLogScanReloader(logScanMgr)
|
||||
apiServer.SetBackupEngine(backupEngine)
|
||||
apiServer.SetDBPath(dbPath)
|
||||
apiServer.SetBackupSettingsChangedCallback(scheduleAutobackup)
|
||||
|
||||
@@ -0,0 +1,325 @@
|
||||
// Package api: event-trigger HTTP handlers. The dispatcher itself
|
||||
// lives in internal/events; this file is the REST surface that lets
|
||||
// operators create, edit, and test triggers from the UI.
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/events"
|
||||
"github.com/alexei/tinyforge/internal/notify"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// triggerInput is the JSON shape accepted by POST + PATCH. Pointers
|
||||
// distinguish "absent" from a zero/empty value so PATCH can leave a
|
||||
// field unchanged. Required fields on POST are validated explicitly.
|
||||
type triggerInput struct {
|
||||
Name *string `json:"name"`
|
||||
FilterSeverity *string `json:"filter_severity"`
|
||||
FilterSource *string `json:"filter_source"`
|
||||
FilterMessageRegex *string `json:"filter_message_regex"`
|
||||
ActionType *string `json:"action_type"`
|
||||
ActionTarget *string `json:"action_target"`
|
||||
ActionSecret *string `json:"action_secret"` // omit = leave unchanged; "" = clear
|
||||
Enabled *bool `json:"enabled"`
|
||||
}
|
||||
|
||||
// actionSecretPlaceholder is what we return on read to signal "a secret
|
||||
// is configured" without exposing the actual value. The edit page
|
||||
// preserves this placeholder verbatim (or replaces it with a new value)
|
||||
// — the API treats the placeholder as "no change" on PATCH. This is
|
||||
// the same shape Stripe / GitHub use for their secret read APIs.
|
||||
const actionSecretPlaceholder = "********"
|
||||
|
||||
// listEventTriggers handles GET /api/event-triggers. Secrets are
|
||||
// redacted to avoid exposing them on read; the edit page shows a
|
||||
// "configured" indicator when a placeholder is present.
|
||||
func (s *Server) listEventTriggers(w http.ResponseWriter, r *http.Request) {
|
||||
out, err := s.store.ListEventTriggers()
|
||||
if err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "list event triggers")
|
||||
return
|
||||
}
|
||||
for i := range out {
|
||||
out[i] = redactTriggerSecret(out[i])
|
||||
}
|
||||
respondJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
// getEventTrigger handles GET /api/event-triggers/{id}.
|
||||
func (s *Server) getEventTrigger(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseTriggerID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
t, err := s.store.GetEventTrigger(id)
|
||||
if err != nil {
|
||||
mapStoreError(w, err, "event trigger")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, redactTriggerSecret(t))
|
||||
}
|
||||
|
||||
// createEventTrigger handles POST /api/event-triggers.
|
||||
func (s *Server) createEventTrigger(w http.ResponseWriter, r *http.Request) {
|
||||
var in triggerInput
|
||||
if !decodeJSON(w, r, &in) {
|
||||
return
|
||||
}
|
||||
t := store.EventTrigger{
|
||||
Name: derefString(in.Name),
|
||||
FilterSeverity: derefString(in.FilterSeverity),
|
||||
FilterSource: derefString(in.FilterSource),
|
||||
FilterMessageRegex: derefString(in.FilterMessageRegex),
|
||||
ActionType: firstNonEmpty(derefString(in.ActionType), store.EventTriggerActionWebhook),
|
||||
ActionTarget: derefString(in.ActionTarget),
|
||||
ActionSecret: derefString(in.ActionSecret),
|
||||
Enabled: in.Enabled == nil || *in.Enabled,
|
||||
}
|
||||
if msg := validateTrigger(t); msg != "" {
|
||||
respondError(w, http.StatusBadRequest, msg)
|
||||
return
|
||||
}
|
||||
out, err := s.store.CreateEventTrigger(t)
|
||||
if err != nil {
|
||||
// CreateEventTrigger returns validation-shaped errors plus
|
||||
// raw DB errors. Validation already ran above, so anything
|
||||
// here is a server-side problem — surface as 500 and avoid
|
||||
// echoing driver text to the client.
|
||||
respondError(w, http.StatusInternalServerError, "create event trigger")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusCreated, redactTriggerSecret(out))
|
||||
}
|
||||
|
||||
// updateEventTrigger handles PATCH /api/event-triggers/{id}. Each
|
||||
// field on the input is optional (pointer); absent fields are left
|
||||
// unchanged. ActionSecret receives special treatment so the read-side
|
||||
// placeholder round-trips safely.
|
||||
func (s *Server) updateEventTrigger(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseTriggerID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
existing, err := s.store.GetEventTrigger(id)
|
||||
if err != nil {
|
||||
mapStoreError(w, err, "event trigger")
|
||||
return
|
||||
}
|
||||
|
||||
var in triggerInput
|
||||
if !decodeJSON(w, r, &in) {
|
||||
return
|
||||
}
|
||||
if in.Name != nil {
|
||||
existing.Name = *in.Name
|
||||
}
|
||||
if in.FilterSeverity != nil {
|
||||
existing.FilterSeverity = *in.FilterSeverity
|
||||
}
|
||||
if in.FilterSource != nil {
|
||||
existing.FilterSource = *in.FilterSource
|
||||
}
|
||||
if in.FilterMessageRegex != nil {
|
||||
existing.FilterMessageRegex = *in.FilterMessageRegex
|
||||
}
|
||||
if in.ActionType != nil && *in.ActionType != "" {
|
||||
existing.ActionType = *in.ActionType
|
||||
}
|
||||
if in.ActionTarget != nil {
|
||||
existing.ActionTarget = *in.ActionTarget
|
||||
}
|
||||
// Secret round-trip: the read API returns a placeholder when a
|
||||
// secret is configured. If the client echoes the placeholder back
|
||||
// unchanged we leave the stored secret alone; any other value
|
||||
// (including the empty string) is treated as a deliberate update.
|
||||
if in.ActionSecret != nil && *in.ActionSecret != actionSecretPlaceholder {
|
||||
existing.ActionSecret = *in.ActionSecret
|
||||
}
|
||||
if in.Enabled != nil {
|
||||
existing.Enabled = *in.Enabled
|
||||
}
|
||||
|
||||
if msg := validateTrigger(existing); msg != "" {
|
||||
respondError(w, http.StatusBadRequest, msg)
|
||||
return
|
||||
}
|
||||
|
||||
out, err := s.store.UpdateEventTrigger(existing)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "event trigger")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "update event trigger")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, redactTriggerSecret(out))
|
||||
}
|
||||
|
||||
// deleteEventTrigger handles DELETE /api/event-triggers/{id}.
|
||||
func (s *Server) deleteEventTrigger(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseTriggerID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if err := s.store.DeleteEventTrigger(id); err != nil {
|
||||
mapStoreError(w, err, "event trigger")
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
// testEventTrigger handles POST /api/event-triggers/{id}/test. Sends
|
||||
// a real TriggerWebhookPayload to the action target so receivers see
|
||||
// the same shape they'll see at runtime. Routes through the dedicated
|
||||
// SendSyncForTestPayload path that preserves the payload through the
|
||||
// HMAC+HTTP core unchanged.
|
||||
func (s *Server) testEventTrigger(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseTriggerID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
t, err := s.store.GetEventTrigger(id)
|
||||
if err != nil {
|
||||
mapStoreError(w, err, "event trigger")
|
||||
return
|
||||
}
|
||||
if t.ActionType != store.EventTriggerActionWebhook {
|
||||
respondError(w, http.StatusBadRequest, "action_type not testable")
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now().UTC().Format(time.RFC3339)
|
||||
payload := events.TriggerWebhookPayload{
|
||||
Type: "event_trigger",
|
||||
TriggerID: t.ID,
|
||||
Trigger: t.Name,
|
||||
Event: events.EventLogPayload{
|
||||
ID: -1,
|
||||
Source: "test",
|
||||
Severity: "info",
|
||||
Message: "Test event from Tinyforge — trigger=" + t.Name,
|
||||
Metadata: `{"synthetic":true}`,
|
||||
CreatedAt: now,
|
||||
},
|
||||
Timestamp: now,
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
|
||||
defer cancel()
|
||||
result := s.notifier.SendSyncForTestPayload(ctx, t.ActionTarget, t.ActionSecret,
|
||||
notify.TierEventTrigger, "event_trigger", payload)
|
||||
respondJSON(w, http.StatusOK, result)
|
||||
}
|
||||
|
||||
// validateTrigger runs the full set of invariants over a fully-merged
|
||||
// trigger row. Called by both create and update so the contract is
|
||||
// enforced once. Returns an empty string for a valid trigger.
|
||||
func validateTrigger(t store.EventTrigger) string {
|
||||
if t.Name == "" {
|
||||
return "name is required"
|
||||
}
|
||||
if t.ActionType != "" && t.ActionType != store.EventTriggerActionWebhook {
|
||||
return "action_type must be 'webhook'"
|
||||
}
|
||||
if t.ActionTarget == "" {
|
||||
return "action_target is required"
|
||||
}
|
||||
if msg := validateWebhookURL(t.ActionTarget); msg != "" {
|
||||
return msg
|
||||
}
|
||||
if t.FilterMessageRegex != "" {
|
||||
if _, err := regexp.Compile(t.FilterMessageRegex); err != nil {
|
||||
return "filter_message_regex invalid: " + err.Error()
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// validateWebhookURL guards against the most common SSRF vectors that
|
||||
// admin-controlled webhook URLs enable: non-http(s) schemes, missing
|
||||
// host, and internal-network targets (loopback / link-local / RFC1918
|
||||
// when the hostname resolves to a literal). Hostname-based lookups
|
||||
// are NOT resolved here — DNS rebinding is out of scope and would
|
||||
// require enforcement at dispatch time too. Admin gating remains the
|
||||
// primary control; this is defense-in-depth.
|
||||
func validateWebhookURL(raw string) string {
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return "action_target invalid URL: " + err.Error()
|
||||
}
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return "action_target must be http:// or https://"
|
||||
}
|
||||
host := u.Hostname()
|
||||
if host == "" {
|
||||
return "action_target missing host"
|
||||
}
|
||||
// Literal-IP guard: block loopback / link-local / unspecified
|
||||
// addresses outright. RFC1918 private ranges are intentionally
|
||||
// allowed since same-LAN receivers are a legitimate Tinyforge
|
||||
// deployment pattern.
|
||||
if ip := net.ParseIP(host); ip != nil {
|
||||
if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsUnspecified() {
|
||||
return "action_target points at a reserved/loopback address"
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// redactTriggerSecret returns a copy of t with ActionSecret replaced
|
||||
// by the placeholder string when a secret is configured. Empty secret
|
||||
// stays empty so the UI can distinguish "no signing" from "signing
|
||||
// configured."
|
||||
func redactTriggerSecret(t store.EventTrigger) store.EventTrigger {
|
||||
if t.ActionSecret != "" {
|
||||
t.ActionSecret = actionSecretPlaceholder
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// mapStoreError translates a store-layer error into an HTTP status +
|
||||
// generic message. ErrNotFound → 404; everything else → 500 without
|
||||
// echoing driver text to the client (avoids leaking schema details
|
||||
// or transient error states to API consumers).
|
||||
func mapStoreError(w http.ResponseWriter, err error, resource string) {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, resource)
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "get "+resource)
|
||||
}
|
||||
|
||||
func parseTriggerID(w http.ResponseWriter, r *http.Request) (int64, bool) {
|
||||
raw := chi.URLParam(r, "id")
|
||||
id, err := strconv.ParseInt(raw, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
respondError(w, http.StatusBadRequest, "invalid event trigger id")
|
||||
return 0, false
|
||||
}
|
||||
return id, true
|
||||
}
|
||||
|
||||
func derefString(p *string) string {
|
||||
if p == nil {
|
||||
return ""
|
||||
}
|
||||
return *p
|
||||
}
|
||||
|
||||
func firstNonEmpty(a, b string) string {
|
||||
if a != "" {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
func TestValidateWebhookURL(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
url string
|
||||
wantErr string // substring; empty = pass
|
||||
}{
|
||||
{"https valid", "https://example.com/hook", ""},
|
||||
{"http valid", "http://example.com:8080/hook", ""},
|
||||
{"RFC1918 private LAN allowed", "http://192.168.1.50:9090/hook", ""},
|
||||
{"loopback rejected", "http://127.0.0.1:8090/hook", "loopback"},
|
||||
{"ipv6 loopback rejected", "http://[::1]:9000/hook", "loopback"},
|
||||
{"link-local rejected", "http://169.254.169.254/latest/meta-data", "reserved"},
|
||||
{"unspecified rejected", "http://0.0.0.0:9000/hook", "reserved"},
|
||||
{"file scheme rejected", "file:///etc/passwd", "http:// or https://"},
|
||||
{"missing host rejected", "https://", "missing host"},
|
||||
{"malformed url rejected", "://nope", "invalid URL"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := validateWebhookURL(c.url)
|
||||
if c.wantErr == "" {
|
||||
if got != "" {
|
||||
t.Fatalf("expected pass, got error: %q", got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if !strings.Contains(got, c.wantErr) {
|
||||
t.Fatalf("error mismatch:\n got: %q\n want substring: %q", got, c.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateTrigger(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in store.EventTrigger
|
||||
want string // substring of error; empty = pass
|
||||
}{
|
||||
{
|
||||
name: "missing name",
|
||||
in: store.EventTrigger{ActionTarget: "https://x.example.com/h"},
|
||||
want: "name is required",
|
||||
},
|
||||
{
|
||||
name: "missing target",
|
||||
in: store.EventTrigger{Name: "n"},
|
||||
want: "action_target is required",
|
||||
},
|
||||
{
|
||||
name: "bad scheme",
|
||||
in: store.EventTrigger{Name: "n", ActionTarget: "ftp://x.example.com/h"},
|
||||
want: "http:// or https://",
|
||||
},
|
||||
{
|
||||
name: "loopback target",
|
||||
in: store.EventTrigger{Name: "n", ActionTarget: "http://127.0.0.1/hook"},
|
||||
want: "loopback",
|
||||
},
|
||||
{
|
||||
name: "unsupported action_type",
|
||||
in: store.EventTrigger{Name: "n", ActionType: "email", ActionTarget: "https://x.example.com/h"},
|
||||
want: "action_type must be",
|
||||
},
|
||||
{
|
||||
name: "invalid regex",
|
||||
in: store.EventTrigger{
|
||||
Name: "n", ActionTarget: "https://x.example.com/h",
|
||||
FilterMessageRegex: "([unclosed",
|
||||
},
|
||||
want: "filter_message_regex invalid",
|
||||
},
|
||||
{
|
||||
name: "all valid",
|
||||
in: store.EventTrigger{
|
||||
Name: "n",
|
||||
ActionTarget: "https://x.example.com/h",
|
||||
FilterSeverity: "warn,error",
|
||||
FilterMessageRegex: `\bpanic\b`,
|
||||
},
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := validateTrigger(c.in)
|
||||
if c.want == "" {
|
||||
if got != "" {
|
||||
t.Fatalf("expected pass, got error: %q", got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if !strings.Contains(got, c.want) {
|
||||
t.Fatalf("error mismatch:\n got: %q\n want substring: %q", got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRedactTriggerSecret(t *testing.T) {
|
||||
withSecret := store.EventTrigger{Name: "n", ActionSecret: "shh-real-secret"}
|
||||
got := redactTriggerSecret(withSecret)
|
||||
if got.ActionSecret != actionSecretPlaceholder {
|
||||
t.Errorf("expected placeholder, got %q", got.ActionSecret)
|
||||
}
|
||||
if withSecret.ActionSecret != "shh-real-secret" {
|
||||
t.Errorf("original mutated: %q", withSecret.ActionSecret)
|
||||
}
|
||||
|
||||
noSecret := store.EventTrigger{Name: "n", ActionSecret: ""}
|
||||
got2 := redactTriggerSecret(noSecret)
|
||||
if got2.ActionSecret != "" {
|
||||
t.Errorf("empty secret should stay empty, got %q", got2.ActionSecret)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDerefString(t *testing.T) {
|
||||
if derefString(nil) != "" {
|
||||
t.Error("nil should deref to empty string")
|
||||
}
|
||||
s := "value"
|
||||
if derefString(&s) != "value" {
|
||||
t.Error("non-nil should deref to value")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFirstNonEmpty(t *testing.T) {
|
||||
if firstNonEmpty("a", "b") != "a" {
|
||||
t.Error("non-empty first wins")
|
||||
}
|
||||
if firstNonEmpty("", "b") != "b" {
|
||||
t.Error("fallback when first empty")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,350 @@
|
||||
// Package api: log-scan rule HTTP handlers. The scanner manager
|
||||
// lives in internal/logscanner; this file is the REST surface that
|
||||
// lets operators create, edit, and test rules from the UI.
|
||||
package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/logscanner"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// LogScanReloader is what the API calls after any rule CRUD so the
|
||||
// scanner manager swaps its snapshot, and what the /stats endpoint
|
||||
// queries for runtime counters. Implemented by *logscanner.Manager;
|
||||
// nil-tolerant on the API side so the routes still work in a
|
||||
// scanner-disabled deployment.
|
||||
type LogScanReloader interface {
|
||||
ReloadRules() error
|
||||
Stats() logscanner.Stats
|
||||
}
|
||||
|
||||
// SetLogScanReloader wires the API → manager reload signal. Called
|
||||
// from main after both subsystems are constructed.
|
||||
func (s *Server) SetLogScanReloader(r LogScanReloader) {
|
||||
s.logScanReloader = r
|
||||
}
|
||||
|
||||
// ruleInput is the JSON shape accepted by POST + PATCH. Pointers
|
||||
// distinguish "absent" from explicit empty/zero. WorkloadID and
|
||||
// OverridesID are immutable on update (per store.UpdateLogScanRule)
|
||||
// so they only appear here for create.
|
||||
type ruleInput struct {
|
||||
WorkloadID *string `json:"workload_id"`
|
||||
OverridesID *int64 `json:"overrides_id"`
|
||||
Name *string `json:"name"`
|
||||
Pattern *string `json:"pattern"`
|
||||
Severity *string `json:"severity"`
|
||||
Streams *string `json:"streams"`
|
||||
CooldownSeconds *int `json:"cooldown_seconds"`
|
||||
Enabled *bool `json:"enabled"`
|
||||
}
|
||||
|
||||
// listLogScanRules handles GET /api/log-scan-rules. Optional query
|
||||
// filter `workload_id=...` returns only rules scoped to that
|
||||
// workload (workload-only + override rows, NOT globals).
|
||||
func (s *Server) listLogScanRules(w http.ResponseWriter, r *http.Request) {
|
||||
if wlID := r.URL.Query().Get("workload_id"); wlID != "" {
|
||||
out, err := s.store.ListLogScanRulesByWorkload(wlID)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "list log scan rules")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, out)
|
||||
return
|
||||
}
|
||||
out, err := s.store.ListLogScanRules()
|
||||
if err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "list log scan rules")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
// getLogScanRule handles GET /api/log-scan-rules/{id}.
|
||||
func (s *Server) getLogScanRule(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseRuleID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
rule, err := s.store.GetLogScanRule(id)
|
||||
if err != nil {
|
||||
mapStoreError(w, err, "log scan rule")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, rule)
|
||||
}
|
||||
|
||||
// createLogScanRule handles POST /api/log-scan-rules.
|
||||
func (s *Server) createLogScanRule(w http.ResponseWriter, r *http.Request) {
|
||||
var in ruleInput
|
||||
if !decodeJSON(w, r, &in) {
|
||||
return
|
||||
}
|
||||
rule := store.LogScanRule{
|
||||
WorkloadID: derefString(in.WorkloadID),
|
||||
OverridesID: derefInt64(in.OverridesID),
|
||||
Name: derefString(in.Name),
|
||||
Pattern: derefString(in.Pattern),
|
||||
Severity: firstNonEmpty(derefString(in.Severity), store.LogScanSeverityWarn),
|
||||
Streams: firstNonEmpty(derefString(in.Streams), store.LogScanStreamAll),
|
||||
CooldownSeconds: derefIntDefault(in.CooldownSeconds, 60),
|
||||
Enabled: in.Enabled == nil || *in.Enabled,
|
||||
}
|
||||
if msg := validateRulePattern(rule.Pattern); msg != "" {
|
||||
respondError(w, http.StatusBadRequest, msg)
|
||||
return
|
||||
}
|
||||
out, err := s.store.CreateLogScanRule(rule)
|
||||
if err != nil {
|
||||
// Store-side validation errors map to 400; anything else
|
||||
// (driver errors) is a 500 without leaking the raw text.
|
||||
if isClientValidationErr(err) {
|
||||
respondError(w, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "create log scan rule")
|
||||
return
|
||||
}
|
||||
s.reloadLogScan()
|
||||
respondJSON(w, http.StatusCreated, out)
|
||||
}
|
||||
|
||||
// updateLogScanRule handles PATCH /api/log-scan-rules/{id}. Scope
|
||||
// fields (workload_id, overrides_id) are immutable; pattern/severity/
|
||||
// streams/cooldown/enabled/name are individually overridable.
|
||||
func (s *Server) updateLogScanRule(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseRuleID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
existing, err := s.store.GetLogScanRule(id)
|
||||
if err != nil {
|
||||
mapStoreError(w, err, "log scan rule")
|
||||
return
|
||||
}
|
||||
var in ruleInput
|
||||
if !decodeJSON(w, r, &in) {
|
||||
return
|
||||
}
|
||||
if in.Name != nil {
|
||||
existing.Name = *in.Name
|
||||
}
|
||||
if in.Pattern != nil {
|
||||
existing.Pattern = *in.Pattern
|
||||
}
|
||||
if in.Severity != nil && *in.Severity != "" {
|
||||
existing.Severity = *in.Severity
|
||||
}
|
||||
if in.Streams != nil && *in.Streams != "" {
|
||||
existing.Streams = *in.Streams
|
||||
}
|
||||
if in.CooldownSeconds != nil {
|
||||
existing.CooldownSeconds = *in.CooldownSeconds
|
||||
}
|
||||
if in.Enabled != nil {
|
||||
existing.Enabled = *in.Enabled
|
||||
}
|
||||
if msg := validateRulePattern(existing.Pattern); msg != "" {
|
||||
respondError(w, http.StatusBadRequest, msg)
|
||||
return
|
||||
}
|
||||
out, err := s.store.UpdateLogScanRule(existing)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "log scan rule")
|
||||
return
|
||||
}
|
||||
if isClientValidationErr(err) {
|
||||
respondError(w, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "update log scan rule")
|
||||
return
|
||||
}
|
||||
s.reloadLogScan()
|
||||
respondJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
// deleteLogScanRule handles DELETE /api/log-scan-rules/{id}. Override
|
||||
// rows that reference this id are cascade-deleted by the store layer.
|
||||
func (s *Server) deleteLogScanRule(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseRuleID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if err := s.store.DeleteLogScanRule(id); err != nil {
|
||||
mapStoreError(w, err, "log scan rule")
|
||||
return
|
||||
}
|
||||
s.reloadLogScan()
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
// testLogScanRule handles POST /api/log-scan-rules/{id}/test. Body
|
||||
// `{"sample_line": "..."}` returns whether the rule pattern matches +
|
||||
// any captured subgroups. Lets operators iterate on regexes in the
|
||||
// UI without spinning up real container traffic.
|
||||
func (s *Server) testLogScanRule(w http.ResponseWriter, r *http.Request) {
|
||||
id, ok := parseRuleID(w, r)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
rule, err := s.store.GetLogScanRule(id)
|
||||
if err != nil {
|
||||
mapStoreError(w, err, "log scan rule")
|
||||
return
|
||||
}
|
||||
var body struct {
|
||||
SampleLine string `json:"sample_line"`
|
||||
}
|
||||
if !decodeJSON(w, r, &body) {
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, testRuleAgainstLine(rule, body.SampleLine))
|
||||
}
|
||||
|
||||
// getEffectiveLogScanRules handles GET /api/workloads/{id}/effective-rules.
|
||||
// Returns the resolved effective rule set (globals minus overrides +
|
||||
// workload-only + override-substitutes) that the scanner uses for
|
||||
// this workload's containers.
|
||||
func (s *Server) getEffectiveLogScanRules(w http.ResponseWriter, r *http.Request) {
|
||||
workloadID := chi.URLParam(r, "id")
|
||||
if workloadID == "" {
|
||||
respondError(w, http.StatusBadRequest, "workload id required")
|
||||
return
|
||||
}
|
||||
rules, err := s.store.EffectiveLogScanRules(workloadID)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "compute effective rules")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, rules)
|
||||
}
|
||||
|
||||
// testResult is the shape returned by /test. Keeping it focused —
|
||||
// caller wants a yes/no + captures so they can iterate, nothing more.
|
||||
type ruleTestResult struct {
|
||||
Matched bool `json:"matched"`
|
||||
Captures map[string]string `json:"captures,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
func testRuleAgainstLine(rule store.LogScanRule, line string) ruleTestResult {
|
||||
re, err := regexp.Compile(rule.Pattern)
|
||||
if err != nil {
|
||||
return ruleTestResult{Error: "rule pattern is invalid: " + err.Error()}
|
||||
}
|
||||
subs := re.FindStringSubmatch(line)
|
||||
if subs == nil {
|
||||
return ruleTestResult{Matched: false}
|
||||
}
|
||||
captures := map[string]string{}
|
||||
names := re.SubexpNames()
|
||||
for i, s := range subs[1:] {
|
||||
key := names[i+1]
|
||||
if key == "" {
|
||||
key = "$" + strconv.Itoa(i+1)
|
||||
}
|
||||
captures[key] = s
|
||||
}
|
||||
return ruleTestResult{Matched: true, Captures: captures}
|
||||
}
|
||||
|
||||
func validateRulePattern(pattern string) string {
|
||||
if strings.TrimSpace(pattern) == "" {
|
||||
return "pattern is required"
|
||||
}
|
||||
if _, err := regexp.Compile(pattern); err != nil {
|
||||
return "pattern invalid: " + err.Error()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// isClientValidationErr returns true when the store error is one of
|
||||
// the validation errors raised by CreateLogScanRule /
|
||||
// UpdateLogScanRule (name/pattern required, invalid enum, negative
|
||||
// cooldown). Used to map those to 400 rather than 500 without
|
||||
// exposing driver text.
|
||||
func isClientValidationErr(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
msg := err.Error()
|
||||
for _, needle := range []string{
|
||||
"name is required",
|
||||
"pattern is required",
|
||||
"invalid severity",
|
||||
"invalid streams",
|
||||
"cooldown_seconds must be",
|
||||
"override row requires workload_id",
|
||||
} {
|
||||
if strings.Contains(msg, needle) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func parseRuleID(w http.ResponseWriter, r *http.Request) (int64, bool) {
|
||||
raw := chi.URLParam(r, "id")
|
||||
id, err := strconv.ParseInt(raw, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
respondError(w, http.StatusBadRequest, "invalid rule id")
|
||||
return 0, false
|
||||
}
|
||||
return id, true
|
||||
}
|
||||
|
||||
func derefInt64(p *int64) int64 {
|
||||
if p == nil {
|
||||
return 0
|
||||
}
|
||||
return *p
|
||||
}
|
||||
|
||||
func derefIntDefault(p *int, def int) int {
|
||||
if p == nil {
|
||||
return def
|
||||
}
|
||||
return *p
|
||||
}
|
||||
|
||||
// getLogScanStats handles GET /api/log-scan-rules/stats. Returns
|
||||
// engine drop counters + last-snapshot compile errors + active
|
||||
// tail count so operators can see when their patterns are too
|
||||
// greedy or syntactically broken. When the scanner manager is not
|
||||
// wired (scanner-disabled deployment), returns a zero-valued
|
||||
// shape rather than 404 so the frontend can render the panel
|
||||
// uniformly.
|
||||
func (s *Server) getLogScanStats(w http.ResponseWriter, r *http.Request) {
|
||||
if s.logScanReloader == nil {
|
||||
respondJSON(w, http.StatusOK, logscanner.Stats{})
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, s.logScanReloader.Stats())
|
||||
}
|
||||
|
||||
// reloadLogScan fires the manager's snapshot rebuild. Nil-tolerant
|
||||
// so the API can run before the manager is wired (and in
|
||||
// scanner-disabled deployments). Failures are logged at warn —
|
||||
// we don't fail the originating CRUD request because that already
|
||||
// succeeded, but operators need a signal so they don't chase a
|
||||
// "why isn't my rule firing?" mystery.
|
||||
func (s *Server) reloadLogScan() {
|
||||
if s.logScanReloader == nil {
|
||||
return
|
||||
}
|
||||
if err := s.logScanReloader.ReloadRules(); err != nil {
|
||||
slog.Warn("log-scan reload failed; manager snapshot may be stale",
|
||||
"error", err)
|
||||
}
|
||||
}
|
||||
+84
-5
@@ -50,6 +50,7 @@ type Server struct {
|
||||
stackManager *stack.Manager
|
||||
backupEngine *backup.Engine
|
||||
sseGate *sseGate
|
||||
logScanReloader LogScanReloader
|
||||
dbPath string
|
||||
shutdownFunc func() // called after restore to trigger graceful shutdown
|
||||
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
|
||||
@@ -217,13 +218,26 @@ func (s *Server) Router() chi.Router {
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.Middleware(s.localAuth))
|
||||
|
||||
// Plugin registry inspection + unified ingress (Workload refactor).
|
||||
// /hooks/kinds is informational and visible to any authenticated
|
||||
// caller. /hooks/generic dispatches deploys and is admin-gated —
|
||||
// vendor-specific webhooks (with their own per-target HMAC
|
||||
// secrets) live under /webhook/* and remain the only ingress
|
||||
// reachable by external CI systems until Phase 5 consolidates them.
|
||||
r.Get("/hooks/kinds", s.listHookKinds)
|
||||
r.Get("/hooks/kinds/{kind}/schema", s.getHookKindSchema)
|
||||
r.With(auth.AdminOnly).Post("/hooks/generic", s.dispatchGeneric)
|
||||
|
||||
// Read-only endpoints (any authenticated user).
|
||||
r.Get("/health", s.getHealth)
|
||||
r.Get("/auth/me", s.currentUser)
|
||||
r.Post("/auth/logout", s.logout)
|
||||
r.Get("/proxies", s.listProxyRoutes)
|
||||
r.Get("/docker/unused-images", s.unusedImageStats)
|
||||
r.Get("/projects", s.listProjects)
|
||||
// Legacy project/stage/site/stack endpoints carry a Deprecation
|
||||
// header pointing at /api/workloads. Functional behavior is
|
||||
// unchanged until the hard cutover removes them.
|
||||
r.With(deprecated("/api/workloads")).Get("/projects", s.listProjects)
|
||||
r.Route("/projects/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getProject)
|
||||
r.Get("/stages/{stage}/env", s.listStageEnv)
|
||||
@@ -290,7 +304,7 @@ func (s *Server) Router() chi.Router {
|
||||
})
|
||||
})
|
||||
// Stacks (docker-compose).
|
||||
r.Get("/stacks", s.listStacks)
|
||||
r.With(deprecated("/api/workloads?kind=plugin&source_kind=compose")).Get("/stacks", s.listStacks)
|
||||
r.Route("/stacks/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getStack)
|
||||
r.Get("/revisions", s.listStackRevisions)
|
||||
@@ -311,7 +325,7 @@ func (s *Server) Router() chi.Router {
|
||||
r.With(auth.AdminOnly).Post("/stacks", s.createStack)
|
||||
|
||||
// Static sites.
|
||||
r.Get("/sites", s.listStaticSites)
|
||||
r.With(deprecated("/api/workloads?kind=plugin&source_kind=static")).Get("/sites", s.listStaticSites)
|
||||
r.Route("/sites/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getStaticSite)
|
||||
r.Get("/secrets", s.listStaticSiteSecrets)
|
||||
@@ -375,13 +389,47 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/containers/stale", s.listStaleContainers)
|
||||
|
||||
// Workload-shaped endpoints (the unifying layer over project /
|
||||
// stack / site). Read-only; mutations still go through the
|
||||
// kind-specific endpoints (POST /projects, PUT /stacks/{id}, …).
|
||||
// stack / site). Read endpoints are open to any authenticated
|
||||
// user; create / update / deploy mutate state and are admin-gated.
|
||||
// Plugin-native workloads (source_kind + trigger_kind set) are
|
||||
// created here; legacy project / stack / site mutations remain at
|
||||
// their dedicated endpoints during the cutover.
|
||||
r.Get("/workloads", s.listWorkloads)
|
||||
r.With(auth.AdminOnly).Post("/workloads", s.createPluginWorkload)
|
||||
r.Route("/workloads/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getWorkload)
|
||||
r.Get("/containers", s.listWorkloadContainers)
|
||||
r.Get("/containers/{cid}/logs", s.streamWorkloadContainerLogs)
|
||||
r.With(auth.AdminOnly).Patch("/app", s.updateWorkloadAppID)
|
||||
r.With(auth.AdminOnly).Put("/plugin", s.updatePluginWorkload)
|
||||
r.With(auth.AdminOnly).Post("/deploy", s.deployPluginWorkload)
|
||||
r.With(auth.AdminOnly).Delete("/", s.deletePluginWorkload)
|
||||
|
||||
// Per-workload env vars (analog of legacy stage_env).
|
||||
// Listing is open to authenticated readers; mutations are
|
||||
// admin-gated. Encrypted values are write-only after store.
|
||||
r.Get("/env", s.listWorkloadEnv)
|
||||
r.With(auth.AdminOnly).Put("/env", s.setWorkloadEnv)
|
||||
r.With(auth.AdminOnly).Delete("/env/{envID}", s.deleteWorkloadEnv)
|
||||
|
||||
// Per-workload inbound webhook URL: rotate the secret + fetch
|
||||
// the canonical URL. Mirrors the project / site webhook UX.
|
||||
r.With(auth.AdminOnly).Get("/webhook", s.getWorkloadWebhook)
|
||||
r.With(auth.AdminOnly).Post("/webhook/regenerate", s.regenerateWorkloadWebhook)
|
||||
|
||||
// Per-workload volume mounts (analog of legacy project volumes).
|
||||
// Reads are open to authenticated users; mutations admin-gated.
|
||||
// Source/target paths are validated for traversal safety here;
|
||||
// host-path allow-listing happens at deploy time.
|
||||
r.Get("/volumes", s.listWorkloadVolumes)
|
||||
r.With(auth.AdminOnly).Put("/volumes", s.setWorkloadVolume)
|
||||
r.With(auth.AdminOnly).Delete("/volumes/{volID}", s.deleteWorkloadVolume)
|
||||
|
||||
// Stages chain: parent + self + direct children, plus a
|
||||
// promote-from action that copies the source workload's
|
||||
// running image tag onto this workload's default_tag.
|
||||
r.Get("/chain", s.getWorkloadChain)
|
||||
r.With(auth.AdminOnly).Post("/promote-from/{sourceID}", s.promoteFromWorkload)
|
||||
})
|
||||
|
||||
// Global container index, joined to workload + app names.
|
||||
@@ -398,6 +446,37 @@ func (s *Server) Router() chi.Router {
|
||||
r.Delete("/apps/{id}", s.deleteApp)
|
||||
})
|
||||
|
||||
// Event triggers: filter+action rules over the event_log
|
||||
// stream. Read endpoints are available to any authenticated
|
||||
// user; mutations + test-dispatch are admin-gated since they
|
||||
// can fire arbitrary outbound webhooks.
|
||||
r.Get("/event-triggers", s.listEventTriggers)
|
||||
r.Get("/event-triggers/{id}", s.getEventTrigger)
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
r.Post("/event-triggers", s.createEventTrigger)
|
||||
r.Patch("/event-triggers/{id}", s.updateEventTrigger)
|
||||
r.Delete("/event-triggers/{id}", s.deleteEventTrigger)
|
||||
r.Post("/event-triggers/{id}/test", s.testEventTrigger)
|
||||
})
|
||||
|
||||
// Log-scan rules: regex patterns the scanner manager
|
||||
// applies to container log lines. Read endpoints are
|
||||
// available to any authenticated user; mutations are
|
||||
// admin-gated since they can change global observability
|
||||
// behavior across every workload.
|
||||
r.Get("/log-scan-rules", s.listLogScanRules)
|
||||
r.Get("/log-scan-rules/stats", s.getLogScanStats)
|
||||
r.Get("/log-scan-rules/{id}", s.getLogScanRule)
|
||||
r.Get("/workloads/{id}/effective-rules", s.getEffectiveLogScanRules)
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
r.Post("/log-scan-rules", s.createLogScanRule)
|
||||
r.Patch("/log-scan-rules/{id}", s.updateLogScanRule)
|
||||
r.Delete("/log-scan-rules/{id}", s.deleteLogScanRule)
|
||||
r.Post("/log-scan-rules/{id}/test", s.testLogScanRule)
|
||||
})
|
||||
|
||||
// System resources (read-only).
|
||||
r.Get("/system/stats", s.getSystemStats)
|
||||
r.Get("/system/stats/history", s.getSystemStatsHistory)
|
||||
|
||||
@@ -359,12 +359,41 @@ func isTinyforgeManaged(labels map[string]string) bool {
|
||||
// ContainerLogs returns a log stream for a container.
|
||||
// If follow is true, the stream stays open for new log lines.
|
||||
// tail specifies the number of lines from the end to return (e.g., "200").
|
||||
// Both stdout and stderr are streamed. For stream-selective reads
|
||||
// (e.g. the log scanner narrowing to stderr-only), use ContainerLogsOpts.
|
||||
func (c *Client) ContainerLogs(ctx context.Context, containerID string, follow bool, tail string) (io.ReadCloser, error) {
|
||||
result, err := c.api.ContainerLogs(ctx, containerID, client.ContainerLogsOptions{
|
||||
ShowStdout: true,
|
||||
ShowStderr: true,
|
||||
return c.ContainerLogsOpts(ctx, containerID, ContainerLogOptions{
|
||||
Follow: follow,
|
||||
Tail: tail,
|
||||
ShowStdout: true,
|
||||
ShowStderr: true,
|
||||
})
|
||||
}
|
||||
|
||||
// ContainerLogOptions controls which streams + framing are pulled
|
||||
// from a container. Currently expanded over the legacy ContainerLogs
|
||||
// shape so the log-scanner can read stderr-only rules without
|
||||
// post-filtering every line.
|
||||
type ContainerLogOptions struct {
|
||||
Follow bool
|
||||
Tail string
|
||||
ShowStdout bool
|
||||
ShowStderr bool
|
||||
}
|
||||
|
||||
// ContainerLogsOpts is the stream-selectable counterpart to
|
||||
// ContainerLogs. When both ShowStdout and ShowStderr are false the
|
||||
// upstream client returns an empty stream — we treat that as caller
|
||||
// error and return an explicit message rather than a silent no-op.
|
||||
func (c *Client) ContainerLogsOpts(ctx context.Context, containerID string, opts ContainerLogOptions) (io.ReadCloser, error) {
|
||||
if !opts.ShowStdout && !opts.ShowStderr {
|
||||
return nil, fmt.Errorf("container logs %s: at least one of ShowStdout/ShowStderr must be true", containerID)
|
||||
}
|
||||
result, err := c.api.ContainerLogs(ctx, containerID, client.ContainerLogsOptions{
|
||||
ShowStdout: opts.ShowStdout,
|
||||
ShowStderr: opts.ShowStderr,
|
||||
Follow: opts.Follow,
|
||||
Tail: opts.Tail,
|
||||
Timestamps: true,
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
package events
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// TriggerSource is the read-side seam the dispatcher uses to fetch the
|
||||
// currently-enabled set of triggers. Kept as an interface so tests can
|
||||
// swap in a static list without spinning up SQLite. The dispatcher
|
||||
// re-reads triggers from this source on every event so config edits
|
||||
// take effect within one event without an explicit hot-reload hook.
|
||||
type TriggerSource interface {
|
||||
ListEnabledEventTriggers() ([]store.EventTrigger, error)
|
||||
}
|
||||
|
||||
// TriggerNotifier is what the dispatcher uses to deliver. Real callers
|
||||
// pass *notify.Notifier; tests pass a recorder. The shape matches the
|
||||
// notifier method one-to-one so wiring is just a method-value pass.
|
||||
type TriggerNotifier interface {
|
||||
SendPayload(webhookURL, secret, eventType string, payload any)
|
||||
}
|
||||
|
||||
// TriggerWebhookPayload is the JSON shape sent to action_target webhook
|
||||
// receivers. Includes both the event that fired the trigger and a brief
|
||||
// trigger descriptor so receivers can route by trigger name or filter
|
||||
// shape without re-looking-up the rule.
|
||||
type TriggerWebhookPayload struct {
|
||||
Type string `json:"type"` // "event_trigger" — stable
|
||||
TriggerID int64 `json:"trigger_id"`
|
||||
Trigger string `json:"trigger_name"`
|
||||
Event EventLogPayload `json:"event"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
}
|
||||
|
||||
// RegisterEventTriggerDispatcher subscribes to EventLog events on the
|
||||
// bus and dispatches matching triggers via the supplied notifier.
|
||||
//
|
||||
// Loop-prevention is structural: the dispatcher never writes to
|
||||
// event_log. All delivery outcomes are recorded inside the notifier
|
||||
// implementation (webhook_deliveries audit trail today). Adding a new
|
||||
// EventLog row here would cause the dispatcher to re-fire on its own
|
||||
// emission — a tight feedback loop the design explicitly forbids.
|
||||
//
|
||||
// Returns an unsubscribe function. Safe to call multiple times.
|
||||
func RegisterEventTriggerDispatcher(b *Bus, triggers TriggerSource, notifier TriggerNotifier) func() {
|
||||
sub := b.Subscribe(func(evt Event) bool { return evt.Type == EventLog })
|
||||
d := &dispatcher{
|
||||
triggers: triggers,
|
||||
notifier: notifier,
|
||||
regexCache: map[string]*regexp.Regexp{},
|
||||
}
|
||||
go func() {
|
||||
for evt := range sub {
|
||||
payload, ok := evt.Payload.(EventLogPayload)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
d.handle(payload)
|
||||
}
|
||||
}()
|
||||
return func() { b.Unsubscribe(sub) }
|
||||
}
|
||||
|
||||
type dispatcher struct {
|
||||
triggers TriggerSource
|
||||
notifier TriggerNotifier
|
||||
|
||||
// regexCache memoizes compiled message-regex patterns so the hot
|
||||
// path doesn't re-compile on every event. Bounded by the number of
|
||||
// distinct patterns across all triggers, which is small in practice.
|
||||
mu sync.Mutex
|
||||
regexCache map[string]*regexp.Regexp
|
||||
}
|
||||
|
||||
func (d *dispatcher) handle(p EventLogPayload) {
|
||||
triggers, err := d.triggers.ListEnabledEventTriggers()
|
||||
if err != nil {
|
||||
slog.Warn("event-trigger dispatcher: list failed", "error", err)
|
||||
return
|
||||
}
|
||||
for _, t := range triggers {
|
||||
ok, err := d.matches(t, p)
|
||||
if err != nil {
|
||||
slog.Warn("event-trigger: filter eval failed",
|
||||
"trigger", t.Name, "error", err)
|
||||
continue
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
switch t.ActionType {
|
||||
case store.EventTriggerActionWebhook:
|
||||
d.notifier.SendPayload(t.ActionTarget, t.ActionSecret, "event_trigger",
|
||||
TriggerWebhookPayload{
|
||||
Type: "event_trigger",
|
||||
TriggerID: t.ID,
|
||||
Trigger: t.Name,
|
||||
Event: p,
|
||||
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
||||
})
|
||||
default:
|
||||
slog.Warn("event-trigger: unsupported action_type",
|
||||
"trigger", t.Name, "action_type", t.ActionType)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// matches evaluates the (severity, source, message-regex) filters
|
||||
// against an event log payload. AND semantics — every non-empty filter
|
||||
// must pass. An empty filter is "any" and silently passes.
|
||||
func (d *dispatcher) matches(t store.EventTrigger, p EventLogPayload) (bool, error) {
|
||||
if !filterMatchCSV(t.FilterSeverity, p.Severity) {
|
||||
return false, nil
|
||||
}
|
||||
if !filterMatchCSV(t.FilterSource, p.Source) {
|
||||
return false, nil
|
||||
}
|
||||
if t.FilterMessageRegex != "" {
|
||||
re, err := d.compile(t.FilterMessageRegex)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("invalid regex %q: %w", t.FilterMessageRegex, err)
|
||||
}
|
||||
if !re.MatchString(p.Message) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// filterMatchCSV returns true when the candidate equals one of the
|
||||
// comma-separated values in filter, or when filter is empty (no filter
|
||||
// = match-all). Whitespace around list entries is tolerated so the
|
||||
// operator's CSV pasting is forgiving.
|
||||
func filterMatchCSV(filter, candidate string) bool {
|
||||
filter = strings.TrimSpace(filter)
|
||||
if filter == "" {
|
||||
return true
|
||||
}
|
||||
for _, p := range strings.Split(filter, ",") {
|
||||
if strings.TrimSpace(p) == candidate {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d *dispatcher) compile(pattern string) (*regexp.Regexp, error) {
|
||||
d.mu.Lock()
|
||||
if cached, ok := d.regexCache[pattern]; ok {
|
||||
d.mu.Unlock()
|
||||
return cached, nil
|
||||
}
|
||||
d.mu.Unlock()
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d.mu.Lock()
|
||||
d.regexCache[pattern] = re
|
||||
d.mu.Unlock()
|
||||
return re, nil
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
package events
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"regexp"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// fakeTriggerSource lets tests inject a static set of enabled triggers
|
||||
// without standing up SQLite. ListEnabledEventTriggers is the only
|
||||
// method the dispatcher uses.
|
||||
type fakeTriggerSource struct {
|
||||
rows []store.EventTrigger
|
||||
err error
|
||||
}
|
||||
|
||||
func (f *fakeTriggerSource) ListEnabledEventTriggers() ([]store.EventTrigger, error) {
|
||||
if f.err != nil {
|
||||
return nil, f.err
|
||||
}
|
||||
return f.rows, nil
|
||||
}
|
||||
|
||||
// fakeNotifier captures dispatches in memory so tests can assert
|
||||
// (URL, secret, eventType, payload) tuples.
|
||||
type fakeNotifier struct {
|
||||
mu sync.Mutex
|
||||
calls []fakeNotifierCall
|
||||
}
|
||||
|
||||
type fakeNotifierCall struct {
|
||||
URL string
|
||||
Secret string
|
||||
EventType string
|
||||
Payload any
|
||||
}
|
||||
|
||||
func (f *fakeNotifier) SendPayload(url, secret, eventType string, payload any) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.calls = append(f.calls, fakeNotifierCall{URL: url, Secret: secret, EventType: eventType, Payload: payload})
|
||||
}
|
||||
|
||||
func (f *fakeNotifier) Calls() []fakeNotifierCall {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
out := make([]fakeNotifierCall, len(f.calls))
|
||||
copy(out, f.calls)
|
||||
return out
|
||||
}
|
||||
|
||||
func newDispatcher(rows []store.EventTrigger) (*dispatcher, *fakeNotifier) {
|
||||
n := &fakeNotifier{}
|
||||
return &dispatcher{
|
||||
triggers: &fakeTriggerSource{rows: rows},
|
||||
notifier: n,
|
||||
regexCache: map[string]*regexp.Regexp{},
|
||||
}, n
|
||||
}
|
||||
|
||||
func TestMatches_EmptyFiltersAllowAnything(t *testing.T) {
|
||||
d, _ := newDispatcher(nil)
|
||||
tr := store.EventTrigger{Name: "anything"}
|
||||
got, err := d.matches(tr, EventLogPayload{Severity: "info", Source: "deploy", Message: "hello"})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !got {
|
||||
t.Fatal("empty filters should pass")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatches_SeverityCSV(t *testing.T) {
|
||||
d, _ := newDispatcher(nil)
|
||||
tr := store.EventTrigger{FilterSeverity: "warn, error"}
|
||||
cases := []struct {
|
||||
sev string
|
||||
want bool
|
||||
}{
|
||||
{"error", true},
|
||||
{"warn", true},
|
||||
{"info", false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got, err := d.matches(tr, EventLogPayload{Severity: c.sev})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if got != c.want {
|
||||
t.Errorf("severity=%q want=%v got=%v", c.sev, c.want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatches_SourceCSV(t *testing.T) {
|
||||
d, _ := newDispatcher(nil)
|
||||
tr := store.EventTrigger{FilterSource: "logscan,deploy"}
|
||||
cases := []struct {
|
||||
src string
|
||||
want bool
|
||||
}{
|
||||
{"logscan", true},
|
||||
{"deploy", true},
|
||||
{"reconciler", false},
|
||||
{"", false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got, _ := d.matches(tr, EventLogPayload{Source: c.src})
|
||||
if got != c.want {
|
||||
t.Errorf("source=%q want=%v got=%v", c.src, c.want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatches_MessageRegex(t *testing.T) {
|
||||
d, _ := newDispatcher(nil)
|
||||
tr := store.EventTrigger{FilterMessageRegex: `(?i)\bpanic\b`}
|
||||
if got, _ := d.matches(tr, EventLogPayload{Message: "fatal: Panic in worker"}); !got {
|
||||
t.Error("expected case-insensitive panic to match")
|
||||
}
|
||||
if got, _ := d.matches(tr, EventLogPayload{Message: "all good"}); got {
|
||||
t.Error("expected non-matching message to fail")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatches_InvalidRegexReturnsError(t *testing.T) {
|
||||
d, _ := newDispatcher(nil)
|
||||
tr := store.EventTrigger{FilterMessageRegex: "([unclosed"}
|
||||
_, err := d.matches(tr, EventLogPayload{Message: "x"})
|
||||
if err == nil {
|
||||
t.Fatal("expected compile error on invalid regex")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatches_AllFiltersAND(t *testing.T) {
|
||||
d, _ := newDispatcher(nil)
|
||||
tr := store.EventTrigger{
|
||||
FilterSeverity: "error",
|
||||
FilterSource: "logscan",
|
||||
FilterMessageRegex: "timeout",
|
||||
}
|
||||
full := EventLogPayload{Severity: "error", Source: "logscan", Message: "request timeout"}
|
||||
if got, _ := d.matches(tr, full); !got {
|
||||
t.Error("all-match payload should pass")
|
||||
}
|
||||
missMessage := full
|
||||
missMessage.Message = "all good"
|
||||
if got, _ := d.matches(tr, missMessage); got {
|
||||
t.Error("message mismatch should fail despite severity+source match")
|
||||
}
|
||||
missSource := full
|
||||
missSource.Source = "deploy"
|
||||
if got, _ := d.matches(tr, missSource); got {
|
||||
t.Error("source mismatch should fail")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandle_DispatchesMatchingTrigger(t *testing.T) {
|
||||
rows := []store.EventTrigger{
|
||||
{ID: 1, Name: "T1", FilterSeverity: "error", ActionType: store.EventTriggerActionWebhook,
|
||||
ActionTarget: "https://example.com/hook", ActionSecret: "shh"},
|
||||
{ID: 2, Name: "T2", FilterSeverity: "warn", ActionType: store.EventTriggerActionWebhook,
|
||||
ActionTarget: "https://example.com/other"},
|
||||
}
|
||||
d, notifier := newDispatcher(rows)
|
||||
d.handle(EventLogPayload{Severity: "error", Source: "logscan", Message: "panic", CreatedAt: time.Now().Format(time.RFC3339)})
|
||||
|
||||
calls := notifier.Calls()
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 call, got %d", len(calls))
|
||||
}
|
||||
if calls[0].URL != "https://example.com/hook" {
|
||||
t.Errorf("URL=%q want https://example.com/hook", calls[0].URL)
|
||||
}
|
||||
if calls[0].Secret != "shh" {
|
||||
t.Errorf("Secret=%q want shh", calls[0].Secret)
|
||||
}
|
||||
p, ok := calls[0].Payload.(TriggerWebhookPayload)
|
||||
if !ok {
|
||||
t.Fatalf("payload type=%T want TriggerWebhookPayload", calls[0].Payload)
|
||||
}
|
||||
if p.TriggerID != 1 || p.Trigger != "T1" {
|
||||
t.Errorf("payload trigger metadata wrong: %+v", p)
|
||||
}
|
||||
if p.Event.Severity != "error" {
|
||||
t.Errorf("payload event mismatch: %+v", p.Event)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandle_TriggerSourceErrorLogged(t *testing.T) {
|
||||
n := &fakeNotifier{}
|
||||
d := &dispatcher{
|
||||
triggers: &fakeTriggerSource{err: errors.New("db down")},
|
||||
notifier: n,
|
||||
regexCache: map[string]*regexp.Regexp{},
|
||||
}
|
||||
d.handle(EventLogPayload{Severity: "error"})
|
||||
if len(n.Calls()) != 0 {
|
||||
t.Errorf("dispatcher should not call notifier when trigger source errored")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandle_UnsupportedActionTypeSkipped(t *testing.T) {
|
||||
rows := []store.EventTrigger{
|
||||
{ID: 1, Name: "T1", ActionType: "future-channel", ActionTarget: "x"},
|
||||
}
|
||||
d, n := newDispatcher(rows)
|
||||
d.handle(EventLogPayload{Severity: "info"})
|
||||
if len(n.Calls()) != 0 {
|
||||
t.Errorf("unsupported action_type should not dispatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterMatchCSV(t *testing.T) {
|
||||
cases := []struct {
|
||||
filter, cand string
|
||||
want bool
|
||||
}{
|
||||
{"", "anything", true},
|
||||
{" ", "anything", true},
|
||||
{"a", "a", true},
|
||||
{"a", "b", false},
|
||||
{"a,b,c", "b", true},
|
||||
{" a , b , c ", "b", true},
|
||||
{"a,b", "c", false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := filterMatchCSV(c.filter, c.cand)
|
||||
if got != c.want {
|
||||
t.Errorf("filterMatchCSV(%q, %q) = %v want %v", c.filter, c.cand, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexCache_ReusesCompiledPattern(t *testing.T) {
|
||||
d, _ := newDispatcher(nil)
|
||||
re1, err := d.compile(`\bfoo\b`)
|
||||
if err != nil {
|
||||
t.Fatalf("compile: %v", err)
|
||||
}
|
||||
re2, err := d.compile(`\bfoo\b`)
|
||||
if err != nil {
|
||||
t.Fatalf("recompile: %v", err)
|
||||
}
|
||||
if re1 != re2 {
|
||||
t.Error("expected cached compile to return the same regexp pointer")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,227 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Engine evaluates lines against a snapshot, gates by per-rule
|
||||
// cooldown, and rate-limits emissions per container so a noisy regex
|
||||
// can't flood the event_log table.
|
||||
type Engine struct {
|
||||
// cooldownMu guards lastFiredAt. The map is keyed on a
|
||||
// composite (containerID, ruleID) so the same rule firing on two
|
||||
// different containers gets independent cooldowns — matches the
|
||||
// operator intuition that "this container is alerting" doesn't
|
||||
// suppress an alert from a different container.
|
||||
cooldownMu sync.Mutex
|
||||
lastFiredAt map[cooldownKey]time.Time
|
||||
|
||||
// bucketMu guards tokenBuckets. One bucket per container.
|
||||
bucketMu sync.Mutex
|
||||
tokenBuckets map[string]*bucket
|
||||
|
||||
// Drop counters. Incremented when a matching hit is suppressed
|
||||
// before reaching the bus. droppedByBucket covers per-container
|
||||
// rate-limit drops; droppedByCooldown counts cooldown-suppressed
|
||||
// matches so operators can tell whether their patterns are too
|
||||
// greedy vs too tightly cooled. Atomic so the hot path doesn't
|
||||
// take the lock just to bump the counter.
|
||||
droppedByBucket atomic.Int64
|
||||
droppedByCooldown atomic.Int64
|
||||
|
||||
// Configuration knobs. Defaults set in NewEngine.
|
||||
tokensPerWindow int // bucket capacity
|
||||
tokenWindow time.Duration // bucket refill window
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// EngineStats is the public-facing counter snapshot. Returned by
|
||||
// Stats() and surfaced through the manager + API for operator
|
||||
// visibility — when a noisy regex floods the bucket, this is how
|
||||
// they discover it.
|
||||
type EngineStats struct {
|
||||
DroppedByBucket int64 `json:"dropped_by_bucket"`
|
||||
DroppedByCooldown int64 `json:"dropped_by_cooldown"`
|
||||
}
|
||||
|
||||
type cooldownKey struct {
|
||||
ContainerID string
|
||||
RuleID int64
|
||||
}
|
||||
|
||||
type bucket struct {
|
||||
tokens int
|
||||
resetsAt time.Time
|
||||
}
|
||||
|
||||
// NewEngine constructs an Engine with sensible defaults: 10
|
||||
// events / 60s per container. Both knobs can be overridden via
|
||||
// the With* options.
|
||||
func NewEngine(opts ...Option) *Engine {
|
||||
e := &Engine{
|
||||
lastFiredAt: map[cooldownKey]time.Time{},
|
||||
tokenBuckets: map[string]*bucket{},
|
||||
tokensPerWindow: 10,
|
||||
tokenWindow: 60 * time.Second,
|
||||
now: time.Now,
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(e)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// Option mutates an Engine during construction.
|
||||
type Option func(*Engine)
|
||||
|
||||
// WithBucket sets the per-container token bucket capacity and
|
||||
// refill window. Used by tests to make rate-limit assertions
|
||||
// deterministic.
|
||||
func WithBucket(tokens int, window time.Duration) Option {
|
||||
return func(e *Engine) {
|
||||
e.tokensPerWindow = tokens
|
||||
e.tokenWindow = window
|
||||
}
|
||||
}
|
||||
|
||||
// WithNow overrides the clock for tests.
|
||||
func WithNow(now func() time.Time) Option {
|
||||
return func(e *Engine) {
|
||||
e.now = now
|
||||
}
|
||||
}
|
||||
|
||||
// Match is the hot-path evaluation. For every rule whose Streams
|
||||
// covers `stream` and whose pattern matches `line`, returns one
|
||||
// Hit — gated by cooldown and per-container token bucket. Empty
|
||||
// result means "drop this line."
|
||||
//
|
||||
// Side-effect: cooldown timestamps + bucket counters are updated
|
||||
// when a rule fires. Callers must not retry the same line.
|
||||
func (e *Engine) Match(containerID, workloadID, stream, line string, rules []Rule) []Hit {
|
||||
if line == "" || len(rules) == 0 {
|
||||
return nil
|
||||
}
|
||||
var hits []Hit
|
||||
now := e.now()
|
||||
for _, r := range rules {
|
||||
if !streamMatches(r.Streams, stream) {
|
||||
continue
|
||||
}
|
||||
if !r.Pattern.MatchString(line) {
|
||||
continue
|
||||
}
|
||||
if !e.cooledDown(containerID, r, now) {
|
||||
// Matched but inside the cooldown window — bump the
|
||||
// counter so operators can see when their cooldowns
|
||||
// are eating real signal.
|
||||
e.droppedByCooldown.Add(1)
|
||||
continue
|
||||
}
|
||||
if !e.takeToken(containerID, now) {
|
||||
// Bucket exhausted for this container. Bump the
|
||||
// counter so the stats endpoint can surface chatty
|
||||
// patterns; the operator's signal is "Stats says we
|
||||
// dropped N events — your rule is too broad."
|
||||
e.droppedByBucket.Add(1)
|
||||
continue
|
||||
}
|
||||
e.markFired(containerID, r.ID, now)
|
||||
hits = append(hits, Hit{
|
||||
Rule: r,
|
||||
ContainerID: containerID,
|
||||
WorkloadID: workloadID,
|
||||
Stream: stream,
|
||||
Line: line,
|
||||
})
|
||||
}
|
||||
return hits
|
||||
}
|
||||
|
||||
// Stats returns a snapshot of the engine's drop counters. The
|
||||
// counters are monotonic over the lifetime of the engine — useful
|
||||
// for trend observation, but not for instantaneous "is it dropping
|
||||
// right now" alerting. Reset semantics: the engine is recreated on
|
||||
// every process restart so counters reset to zero with the binary.
|
||||
func (e *Engine) Stats() EngineStats {
|
||||
return EngineStats{
|
||||
DroppedByBucket: e.droppedByBucket.Load(),
|
||||
DroppedByCooldown: e.droppedByCooldown.Load(),
|
||||
}
|
||||
}
|
||||
|
||||
// Hit is one rule fire — the engine returns these for the manager
|
||||
// to persist + publish on the bus. Kept narrow on purpose so the
|
||||
// engine has no event_log / bus dependency.
|
||||
type Hit struct {
|
||||
Rule Rule
|
||||
ContainerID string
|
||||
WorkloadID string
|
||||
Stream string
|
||||
Line string
|
||||
}
|
||||
|
||||
// streamMatches checks whether a rule that scopes itself to a
|
||||
// stream subset accepts the given stream. Empty rule.Streams is
|
||||
// equivalent to "all" for forward-compat with older rows.
|
||||
func streamMatches(ruleStreams, lineStream string) bool {
|
||||
if ruleStreams == "" || ruleStreams == "all" {
|
||||
return true
|
||||
}
|
||||
return ruleStreams == lineStream
|
||||
}
|
||||
|
||||
func (e *Engine) cooledDown(containerID string, r Rule, now time.Time) bool {
|
||||
if r.CooldownSeconds <= 0 {
|
||||
return true
|
||||
}
|
||||
e.cooldownMu.Lock()
|
||||
defer e.cooldownMu.Unlock()
|
||||
last, ok := e.lastFiredAt[cooldownKey{containerID, r.ID}]
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
return now.Sub(last) >= time.Duration(r.CooldownSeconds)*time.Second
|
||||
}
|
||||
|
||||
func (e *Engine) markFired(containerID string, ruleID int64, now time.Time) {
|
||||
e.cooldownMu.Lock()
|
||||
e.lastFiredAt[cooldownKey{containerID, ruleID}] = now
|
||||
e.cooldownMu.Unlock()
|
||||
}
|
||||
|
||||
func (e *Engine) takeToken(containerID string, now time.Time) bool {
|
||||
e.bucketMu.Lock()
|
||||
defer e.bucketMu.Unlock()
|
||||
b, ok := e.tokenBuckets[containerID]
|
||||
if !ok {
|
||||
b = &bucket{tokens: e.tokensPerWindow, resetsAt: now.Add(e.tokenWindow)}
|
||||
e.tokenBuckets[containerID] = b
|
||||
}
|
||||
if !now.Before(b.resetsAt) {
|
||||
b.tokens = e.tokensPerWindow
|
||||
b.resetsAt = now.Add(e.tokenWindow)
|
||||
}
|
||||
if b.tokens <= 0 {
|
||||
return false
|
||||
}
|
||||
b.tokens--
|
||||
return true
|
||||
}
|
||||
|
||||
// Forget drops cooldown + bucket state for a container that has been
|
||||
// removed. Called by the manager when a tail exits to reclaim memory.
|
||||
func (e *Engine) Forget(containerID string) {
|
||||
e.cooldownMu.Lock()
|
||||
for k := range e.lastFiredAt {
|
||||
if k.ContainerID == containerID {
|
||||
delete(e.lastFiredAt, k)
|
||||
}
|
||||
}
|
||||
e.cooldownMu.Unlock()
|
||||
e.bucketMu.Lock()
|
||||
delete(e.tokenBuckets, containerID)
|
||||
e.bucketMu.Unlock()
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func mustRegexp(t *testing.T, p string) *regexp.Regexp {
|
||||
t.Helper()
|
||||
re, err := regexp.Compile(p)
|
||||
if err != nil {
|
||||
t.Fatalf("compile %q: %v", p, err)
|
||||
}
|
||||
return re
|
||||
}
|
||||
|
||||
func newRule(t *testing.T, id int64, pattern string, opts ...func(*Rule)) Rule {
|
||||
r := Rule{
|
||||
ID: id,
|
||||
Name: "rule" + pattern,
|
||||
Pattern: mustRegexp(t, pattern),
|
||||
Severity: "warn",
|
||||
Streams: "all",
|
||||
CooldownSeconds: 0,
|
||||
}
|
||||
for _, o := range opts {
|
||||
o(&r)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func TestEngineMatch_BasicHit(t *testing.T) {
|
||||
e := NewEngine()
|
||||
rules := []Rule{newRule(t, 1, `panic`)}
|
||||
hits := e.Match("c1", "w1", "stderr", "fatal panic in worker", rules)
|
||||
if len(hits) != 1 {
|
||||
t.Fatalf("want 1 hit, got %d", len(hits))
|
||||
}
|
||||
if hits[0].Rule.ID != 1 {
|
||||
t.Errorf("rule id mismatch: %d", hits[0].Rule.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineMatch_StreamFilter(t *testing.T) {
|
||||
e := NewEngine()
|
||||
rules := []Rule{newRule(t, 1, `boom`, func(r *Rule) { r.Streams = "stderr" })}
|
||||
if len(e.Match("c", "w", "stdout", "boom there", rules)) != 0 {
|
||||
t.Error("stdout line should not match stderr-only rule")
|
||||
}
|
||||
if len(e.Match("c", "w", "stderr", "boom there", rules)) != 1 {
|
||||
t.Error("stderr line should match stderr-only rule")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineMatch_NoMatchNoHit(t *testing.T) {
|
||||
e := NewEngine()
|
||||
rules := []Rule{newRule(t, 1, `panic`)}
|
||||
if h := e.Match("c", "w", "stdout", "all good", rules); len(h) != 0 {
|
||||
t.Errorf("expected no hit, got %d", len(h))
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineMatch_Cooldown(t *testing.T) {
|
||||
now := time.Now()
|
||||
clock := now
|
||||
e := NewEngine(WithNow(func() time.Time { return clock }))
|
||||
rules := []Rule{newRule(t, 1, `bad`, func(r *Rule) { r.CooldownSeconds = 30 })}
|
||||
|
||||
if len(e.Match("c", "w", "stdout", "bad event", rules)) != 1 {
|
||||
t.Fatal("first fire expected")
|
||||
}
|
||||
clock = now.Add(10 * time.Second)
|
||||
if h := e.Match("c", "w", "stdout", "bad event 2", rules); len(h) != 0 {
|
||||
t.Errorf("cooled-down rule fired: %+v", h)
|
||||
}
|
||||
clock = now.Add(31 * time.Second)
|
||||
if len(e.Match("c", "w", "stdout", "bad event 3", rules)) != 1 {
|
||||
t.Error("cooldown expired but rule did not fire")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineMatch_PerContainerCooldownIndependent(t *testing.T) {
|
||||
// Same rule firing on two different containers should not
|
||||
// share cooldown — operators expect each container's alerts
|
||||
// to be independent.
|
||||
now := time.Now()
|
||||
clock := now
|
||||
e := NewEngine(WithNow(func() time.Time { return clock }))
|
||||
rules := []Rule{newRule(t, 1, `bad`, func(r *Rule) { r.CooldownSeconds = 30 })}
|
||||
|
||||
e.Match("c1", "w", "stdout", "bad on one", rules)
|
||||
if len(e.Match("c2", "w", "stdout", "bad on two", rules)) != 1 {
|
||||
t.Error("second container should fire independently")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineMatch_TokenBucket(t *testing.T) {
|
||||
now := time.Now()
|
||||
clock := now
|
||||
e := NewEngine(WithNow(func() time.Time { return clock }), WithBucket(3, time.Minute))
|
||||
rules := []Rule{newRule(t, 1, `noisy`)}
|
||||
for i := 0; i < 3; i++ {
|
||||
if len(e.Match("c", "w", "stdout", "noisy "+string(rune('a'+i)), rules)) != 1 {
|
||||
t.Errorf("hit %d should fire", i)
|
||||
}
|
||||
}
|
||||
// 4th within window should be dropped.
|
||||
if h := e.Match("c", "w", "stdout", "noisy d", rules); len(h) != 0 {
|
||||
t.Errorf("4th hit should be rate-limited, got %d", len(h))
|
||||
}
|
||||
// After the window the bucket refills.
|
||||
clock = now.Add(time.Minute + time.Second)
|
||||
if len(e.Match("c", "w", "stdout", "noisy e", rules)) != 1 {
|
||||
t.Error("bucket should have refilled after window")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineForget_DropsState(t *testing.T) {
|
||||
e := NewEngine()
|
||||
rules := []Rule{newRule(t, 1, `bad`, func(r *Rule) { r.CooldownSeconds = 999 })}
|
||||
e.Match("c", "w", "stdout", "bad once", rules)
|
||||
e.Forget("c")
|
||||
// After Forget, the same rule on the same container should
|
||||
// fire again immediately — cooldown state was cleared.
|
||||
if len(e.Match("c", "w", "stdout", "bad again", rules)) != 1 {
|
||||
t.Error("Forget should drop cooldown state")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineStats_CooldownCounter(t *testing.T) {
|
||||
// Two matches inside the cooldown window should drop once and
|
||||
// increment DroppedByCooldown by one. Bucket is generous so it
|
||||
// never participates.
|
||||
now := time.Now()
|
||||
clock := now
|
||||
e := NewEngine(
|
||||
WithNow(func() time.Time { return clock }),
|
||||
WithBucket(100, time.Hour),
|
||||
)
|
||||
rule := newRule(t, 1, `bad`, func(r *Rule) { r.CooldownSeconds = 60 })
|
||||
|
||||
if len(e.Match("c", "w", "stdout", "bad a", []Rule{rule})) != 1 {
|
||||
t.Fatal("first fire expected")
|
||||
}
|
||||
if len(e.Match("c", "w", "stdout", "bad b", []Rule{rule})) != 0 {
|
||||
t.Fatal("second fire inside cooldown should drop")
|
||||
}
|
||||
stats := e.Stats()
|
||||
if stats.DroppedByCooldown != 1 {
|
||||
t.Errorf("DroppedByCooldown = %d, want 1", stats.DroppedByCooldown)
|
||||
}
|
||||
if stats.DroppedByBucket != 0 {
|
||||
t.Errorf("DroppedByBucket = %d, want 0 (bucket should not have fired)",
|
||||
stats.DroppedByBucket)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineStats_BucketCounter(t *testing.T) {
|
||||
// Drain a small bucket without any cooldown so every drop
|
||||
// after the first is bucket-attributable.
|
||||
now := time.Now()
|
||||
clock := now
|
||||
e := NewEngine(
|
||||
WithNow(func() time.Time { return clock }),
|
||||
WithBucket(2, time.Hour),
|
||||
)
|
||||
rule := newRule(t, 1, `bad`) // cooldown=0
|
||||
|
||||
// 2 fires consume both tokens.
|
||||
for i := 0; i < 2; i++ {
|
||||
if len(e.Match("c", "w", "stdout", "bad", []Rule{rule})) != 1 {
|
||||
t.Fatalf("fire %d expected to succeed", i)
|
||||
}
|
||||
}
|
||||
// 3rd and 4th fires hit an empty bucket.
|
||||
for i := 0; i < 2; i++ {
|
||||
if len(e.Match("c", "w", "stdout", "bad", []Rule{rule})) != 0 {
|
||||
t.Fatalf("fire %d expected to be bucket-dropped", i+2)
|
||||
}
|
||||
}
|
||||
stats := e.Stats()
|
||||
if stats.DroppedByBucket != 2 {
|
||||
t.Errorf("DroppedByBucket = %d, want 2", stats.DroppedByBucket)
|
||||
}
|
||||
if stats.DroppedByCooldown != 0 {
|
||||
t.Errorf("DroppedByCooldown = %d, want 0 (cooldown should not have fired)",
|
||||
stats.DroppedByCooldown)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngineStats_NoDropsWhenAllowed(t *testing.T) {
|
||||
e := NewEngine(WithBucket(100, time.Minute))
|
||||
rule := newRule(t, 1, `x`)
|
||||
for i := 0; i < 5; i++ {
|
||||
e.Match("c", "w", "stdout", "x", []Rule{rule})
|
||||
}
|
||||
stats := e.Stats()
|
||||
if stats.DroppedByBucket != 0 || stats.DroppedByCooldown != 0 {
|
||||
t.Errorf("expected zero drops, got %+v", stats)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamMatches(t *testing.T) {
|
||||
cases := []struct {
|
||||
rule, line string
|
||||
want bool
|
||||
}{
|
||||
{"all", "stdout", true},
|
||||
{"all", "stderr", true},
|
||||
{"", "stdout", true},
|
||||
{"stdout", "stdout", true},
|
||||
{"stdout", "stderr", false},
|
||||
{"stderr", "stderr", true},
|
||||
{"stderr", "stdout", false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := streamMatches(c.rule, c.line)
|
||||
if got != c.want {
|
||||
t.Errorf("streamMatches(%q,%q) = %v want %v", c.rule, c.line, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,345 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/docker"
|
||||
"github.com/alexei/tinyforge/internal/events"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// RuleSource is the read-side seam for fetching the current rule
|
||||
// rows. Real callers pass *store.Store; tests pass a fake.
|
||||
type RuleSource interface {
|
||||
ListLogScanRules() ([]store.LogScanRule, error)
|
||||
}
|
||||
|
||||
// ContainerLister is what the manager needs from the store to
|
||||
// discover running container rows. The filter is set by the
|
||||
// manager (state="running") so adapters don't have to do that.
|
||||
type ContainerLister interface {
|
||||
ListContainers(filter store.ContainerFilter) ([]store.Container, error)
|
||||
}
|
||||
|
||||
// EventStore writes the matched-line entries into event_log.
|
||||
// Same shape as events.PersistFunc but typed to keep the package
|
||||
// self-contained.
|
||||
type EventStore interface {
|
||||
InsertEvent(evt store.EventLog) (store.EventLog, error)
|
||||
}
|
||||
|
||||
// Manager owns the lifecycle of per-container tails. It polls the
|
||||
// container index every PollInterval (default 5s), starts tails for
|
||||
// new running containers, and stops tails when a container is no
|
||||
// longer running (state != "running" or row deleted).
|
||||
//
|
||||
// Rule changes (CRUD via API) trigger ReloadRules which rebuilds the
|
||||
// snapshot once and atomically swaps the pointer all tails read.
|
||||
type Manager struct {
|
||||
rules RuleSource
|
||||
containers ContainerLister
|
||||
docker dockerLogger
|
||||
events EventStore
|
||||
bus *events.Bus
|
||||
pollInterval time.Duration
|
||||
|
||||
snapshot atomic.Pointer[Snapshot]
|
||||
engine *Engine
|
||||
|
||||
mu sync.Mutex
|
||||
tails map[string]context.CancelFunc // containerID -> cancel
|
||||
tailWG sync.WaitGroup
|
||||
|
||||
// statsMu guards lastCompileErrors. Compile-error visibility
|
||||
// matters because a broken rule silently disappears from the
|
||||
// snapshot — without surfacing the message, the operator has
|
||||
// no signal beyond a warn-level log line.
|
||||
statsMu sync.RWMutex
|
||||
lastCompileErrors []string
|
||||
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// Stats bundles the operator-facing observability for the log
|
||||
// scanner. EngineStats covers the hot-path drop counters; compile
|
||||
// errors are the last snapshot's invalid-pattern messages (helpful
|
||||
// when a freshly-saved rule "doesn't fire" — the issue is usually
|
||||
// here). ActiveTails reports how many container goroutines the
|
||||
// manager is currently driving.
|
||||
type Stats struct {
|
||||
Engine EngineStats `json:"engine"`
|
||||
ActiveTails int `json:"active_tails"`
|
||||
LastCompileErrors []string `json:"last_compile_errors"`
|
||||
}
|
||||
|
||||
// Config bundles the constructor inputs.
|
||||
type Config struct {
|
||||
Rules RuleSource
|
||||
Containers ContainerLister
|
||||
Docker *docker.Client
|
||||
Events EventStore
|
||||
Bus *events.Bus
|
||||
PollInterval time.Duration // default 5s
|
||||
}
|
||||
|
||||
// NewManager wires a manager with the supplied dependencies.
|
||||
// It does not start polling — call Start to begin the lifecycle.
|
||||
func NewManager(cfg Config) *Manager {
|
||||
poll := cfg.PollInterval
|
||||
if poll <= 0 {
|
||||
poll = 5 * time.Second
|
||||
}
|
||||
return &Manager{
|
||||
rules: cfg.Rules,
|
||||
containers: cfg.Containers,
|
||||
docker: cfg.Docker,
|
||||
events: cfg.Events,
|
||||
bus: cfg.Bus,
|
||||
pollInterval: poll,
|
||||
engine: NewEngine(),
|
||||
tails: map[string]context.CancelFunc{},
|
||||
}
|
||||
}
|
||||
|
||||
// Start kicks off the polling loop and initial snapshot build.
|
||||
// Returns an error only if the initial rule load fails; subsequent
|
||||
// load failures are logged but do not stop the manager.
|
||||
func (m *Manager) Start(ctx context.Context) error {
|
||||
if err := m.ReloadRules(); err != nil {
|
||||
return err
|
||||
}
|
||||
m.ctx, m.cancel = context.WithCancel(ctx)
|
||||
go m.loop()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop cancels every tail and waits for them to exit.
|
||||
func (m *Manager) Stop() {
|
||||
if m.cancel != nil {
|
||||
m.cancel()
|
||||
}
|
||||
m.tailWG.Wait()
|
||||
}
|
||||
|
||||
// ReloadRules rebuilds the snapshot from the current store state.
|
||||
// Safe to call concurrently with the polling loop and with tails —
|
||||
// the atomic pointer swap is the only synchronization required.
|
||||
//
|
||||
// Compile errors are both logged and stored on the manager so the
|
||||
// API stats endpoint can surface them to operators. The set is
|
||||
// fully replaced on each reload — there's no "and previously
|
||||
// these other rules were broken" trail.
|
||||
func (m *Manager) ReloadRules() error {
|
||||
rows, err := m.rules.ListLogScanRules()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
snap, compileErrs := BuildSnapshot(rows)
|
||||
errMsgs := make([]string, 0, len(compileErrs))
|
||||
for _, e := range compileErrs {
|
||||
slog.Warn("logscanner: rule compile failed (dropped from snapshot)", "error", e)
|
||||
errMsgs = append(errMsgs, e.Error())
|
||||
}
|
||||
m.snapshot.Store(snap)
|
||||
m.statsMu.Lock()
|
||||
m.lastCompileErrors = errMsgs
|
||||
m.statsMu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stats returns the current operator-facing observability snapshot.
|
||||
// Read-safe: the mutex protects only the compile-errors slice;
|
||||
// counters and tail-count are atomic / mutex-guarded internally.
|
||||
func (m *Manager) Stats() Stats {
|
||||
m.statsMu.RLock()
|
||||
errs := make([]string, len(m.lastCompileErrors))
|
||||
copy(errs, m.lastCompileErrors)
|
||||
m.statsMu.RUnlock()
|
||||
|
||||
m.mu.Lock()
|
||||
tails := len(m.tails)
|
||||
m.mu.Unlock()
|
||||
|
||||
return Stats{
|
||||
Engine: m.engine.Stats(),
|
||||
ActiveTails: tails,
|
||||
LastCompileErrors: errs,
|
||||
}
|
||||
}
|
||||
|
||||
// EmitHit persists a hit as an event_log row and publishes it on
|
||||
// the bus. Implements the HitEmitter interface so tails depend on
|
||||
// the interface, not the concrete manager — also makes the manager
|
||||
// easy to unit-test by passing a recording emitter.
|
||||
func (m *Manager) EmitHit(ctx context.Context, hit Hit) {
|
||||
const maxMessage = 500 // truncate long lines so one chatty rule can't blow up event_log
|
||||
msg := truncateUTF8(hit.Line, maxMessage)
|
||||
meta := map[string]any{
|
||||
"workload_id": hit.WorkloadID,
|
||||
"container_id": hit.ContainerID,
|
||||
"rule_id": hit.Rule.ID,
|
||||
"rule_name": hit.Rule.Name,
|
||||
"stream": hit.Stream,
|
||||
}
|
||||
if subs := hit.Rule.Pattern.FindStringSubmatch(hit.Line); len(subs) > 1 {
|
||||
captures := map[string]string{}
|
||||
for i, sub := range subs[1:] {
|
||||
captures[indexName(hit.Rule.Pattern, i+1)] = sub
|
||||
}
|
||||
meta["captures"] = captures
|
||||
}
|
||||
metaJSON, _ := json.Marshal(meta)
|
||||
evt, err := m.events.InsertEvent(store.EventLog{
|
||||
Source: "logscan",
|
||||
Severity: nonEmpty(hit.Rule.Severity, store.LogScanSeverityWarn),
|
||||
Message: msg,
|
||||
Metadata: string(metaJSON),
|
||||
})
|
||||
if err != nil {
|
||||
slog.Warn("logscanner: persist event", "rule", hit.Rule.Name, "error", err)
|
||||
return
|
||||
}
|
||||
if m.bus != nil {
|
||||
m.bus.Publish(events.Event{
|
||||
Type: events.EventLog,
|
||||
Payload: events.EventLogPayload{
|
||||
ID: evt.ID,
|
||||
Source: evt.Source,
|
||||
Severity: evt.Severity,
|
||||
Message: evt.Message,
|
||||
Metadata: evt.Metadata,
|
||||
CreatedAt: evt.CreatedAt,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// loop runs the lifecycle poll until ctx cancellation. It is single-
|
||||
// threaded over the tails map — start/stop of individual tails
|
||||
// happens here so there's no race on the map.
|
||||
func (m *Manager) loop() {
|
||||
ticker := time.NewTicker(m.pollInterval)
|
||||
defer ticker.Stop()
|
||||
m.reconcile() // run once immediately
|
||||
for {
|
||||
select {
|
||||
case <-m.ctx.Done():
|
||||
m.stopAllTails()
|
||||
return
|
||||
case <-ticker.C:
|
||||
m.reconcile()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reconcile diffs the desired set of running container IDs against
|
||||
// the live tails and starts/stops as needed.
|
||||
func (m *Manager) reconcile() {
|
||||
rows, err := m.containers.ListContainers(store.ContainerFilter{State: "running"})
|
||||
if err != nil {
|
||||
slog.Warn("logscanner: list containers", "error", err)
|
||||
return
|
||||
}
|
||||
desired := map[string]store.Container{}
|
||||
for _, c := range rows {
|
||||
if c.ContainerID == "" {
|
||||
continue
|
||||
}
|
||||
desired[c.ContainerID] = c
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
for id, c := range desired {
|
||||
if _, ok := m.tails[id]; ok {
|
||||
continue
|
||||
}
|
||||
m.startTailLocked(id, c.WorkloadID)
|
||||
}
|
||||
for id, cancel := range m.tails {
|
||||
if _, ok := desired[id]; !ok {
|
||||
cancel()
|
||||
delete(m.tails, id)
|
||||
m.engine.Forget(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Manager) startTailLocked(containerID, workloadID string) {
|
||||
tailCtx, cancel := context.WithCancel(m.ctx)
|
||||
t := &tail{
|
||||
containerID: containerID,
|
||||
workloadID: workloadID,
|
||||
docker: m.docker,
|
||||
engine: m.engine,
|
||||
emitter: m,
|
||||
snapshot: &m.snapshot,
|
||||
}
|
||||
if err := t.validate(); err != nil {
|
||||
slog.Warn("logscanner: tail validation failed", "container", containerID, "error", err)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
m.tails[containerID] = cancel
|
||||
m.tailWG.Add(1)
|
||||
go func() {
|
||||
defer m.tailWG.Done()
|
||||
t.run(tailCtx)
|
||||
}()
|
||||
}
|
||||
|
||||
func (m *Manager) stopAllTails() {
|
||||
m.mu.Lock()
|
||||
for id, cancel := range m.tails {
|
||||
cancel()
|
||||
delete(m.tails, id)
|
||||
}
|
||||
m.mu.Unlock()
|
||||
m.tailWG.Wait()
|
||||
}
|
||||
|
||||
// indexName returns the name of capture group i (1-based). Falls
|
||||
// back to "$<i>" when the group is unnamed so JSON keys stay stable
|
||||
// AND distinct across groups (the previous $N collision-fallback
|
||||
// silently dropped groups beyond $3 onto the same JSON key).
|
||||
func indexName(re interface{ SubexpNames() []string }, i int) string {
|
||||
names := re.SubexpNames()
|
||||
if i < len(names) && names[i] != "" {
|
||||
return names[i]
|
||||
}
|
||||
return "$" + strconv.Itoa(i)
|
||||
}
|
||||
|
||||
func nonEmpty(a, b string) string {
|
||||
if a != "" {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// truncateUTF8 returns s shortened to at most maxBytes, cutting on
|
||||
// a rune boundary so we never leave a partial UTF-8 codepoint in
|
||||
// the output. An ellipsis is appended when truncation occurred so
|
||||
// downstream readers can tell. Callers must size maxBytes to
|
||||
// include the trailing 3-byte ellipsis.
|
||||
func truncateUTF8(s string, maxBytes int) string {
|
||||
if len(s) <= maxBytes {
|
||||
return s
|
||||
}
|
||||
end := maxBytes
|
||||
// Walk back from the byte cut to the last rune boundary so
|
||||
// utf8.ValidString stays true on the returned slice.
|
||||
for end > 0 && !utf8.RuneStart(s[end]) {
|
||||
end--
|
||||
}
|
||||
return s[:end] + "…"
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestTruncateUTF8(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in string
|
||||
maxBytes int
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "shorter than cap untouched",
|
||||
in: "hello",
|
||||
maxBytes: 100,
|
||||
want: "hello",
|
||||
},
|
||||
{
|
||||
name: "ASCII truncation",
|
||||
in: "abcdefghij",
|
||||
maxBytes: 5,
|
||||
want: "abcde…",
|
||||
},
|
||||
{
|
||||
name: "cuts on rune boundary inside multibyte",
|
||||
in: "abcdé",
|
||||
maxBytes: 5,
|
||||
want: "abcd…",
|
||||
},
|
||||
{
|
||||
name: "preserves valid utf-8 when cap lands mid-codepoint",
|
||||
in: "ééééééé",
|
||||
maxBytes: 5,
|
||||
want: "éé…",
|
||||
},
|
||||
{
|
||||
name: "empty input untouched",
|
||||
in: "",
|
||||
maxBytes: 5,
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := truncateUTF8(c.in, c.maxBytes)
|
||||
if got != c.want {
|
||||
t.Errorf("got %q want %q", got, c.want)
|
||||
}
|
||||
if !utf8.ValidString(got) {
|
||||
t.Errorf("result not valid UTF-8: %q", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNonEmpty(t *testing.T) {
|
||||
if nonEmpty("a", "b") != "a" {
|
||||
t.Error("first non-empty wins")
|
||||
}
|
||||
if nonEmpty("", "b") != "b" {
|
||||
t.Error("fallback when first empty")
|
||||
}
|
||||
if nonEmpty("", "") != "" {
|
||||
t.Error("both empty yields empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexName_UnamedGroupsStable(t *testing.T) {
|
||||
// Each unnamed group should get a distinct fallback name so
|
||||
// JSON serialization doesn't collapse $4..$N onto a single key.
|
||||
re := mustCompile(t, `(\w+) (\w+) (\w+) (\w+) (\w+)`)
|
||||
seen := map[string]bool{}
|
||||
for i := 1; i <= 5; i++ {
|
||||
name := indexName(re, i)
|
||||
if seen[name] {
|
||||
t.Errorf("indexName(%d) = %q collides with prior group", i, name)
|
||||
}
|
||||
seen[name] = true
|
||||
if !strings.HasPrefix(name, "$") {
|
||||
t.Errorf("unnamed group %d should fall back to $N form, got %q", i, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexName_NamedGroupWins(t *testing.T) {
|
||||
re := mustCompile(t, `(?P<code>\d+) (\w+)`)
|
||||
if got := indexName(re, 1); got != "code" {
|
||||
t.Errorf("named group should win: %q", got)
|
||||
}
|
||||
if got := indexName(re, 2); got != "$2" {
|
||||
t.Errorf("second (unnamed) group: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// mustCompile is a local helper so the test file is self-contained.
|
||||
func mustCompile(t *testing.T, pattern string) interface{ SubexpNames() []string } {
|
||||
t.Helper()
|
||||
r, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
t.Fatalf("compile %q: %v", pattern, err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
// Package logscanner tails running container logs, matches lines
|
||||
// against operator-configured regex rules, and emits event_log entries
|
||||
// via the events bus. The package is split into four files:
|
||||
//
|
||||
// - rules.go: the rule snapshot — compiled regexes + per-rule
|
||||
// metadata. Snapshots are immutable; the manager builds a new
|
||||
// snapshot on every rule change and swaps it atomically.
|
||||
// - engine.go: rule evaluation per line + cooldown + token bucket.
|
||||
// - tail.go: per-container goroutine reading docker log stream.
|
||||
// - manager.go: container lifecycle polling + tail lifecycle.
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// Rule is a compiled, immutable representation of a store.LogScanRule.
|
||||
// Built once when the snapshot is loaded; held by every tail goroutine
|
||||
// via the snapshot atomic pointer.
|
||||
type Rule struct {
|
||||
ID int64
|
||||
WorkloadID string // "" = global
|
||||
Name string
|
||||
Pattern *regexp.Regexp
|
||||
Severity string
|
||||
Streams string // "all" | "stdout" | "stderr"
|
||||
CooldownSeconds int
|
||||
}
|
||||
|
||||
// Snapshot is the rule set as seen by tails. Built from a flat slice
|
||||
// of store.LogScanRule rows: globals are expanded via
|
||||
// store.EffectiveLogScanRules to yield the per-workload effective
|
||||
// set at lookup time.
|
||||
//
|
||||
// The snapshot is immutable. Hot-reload semantics are achieved by
|
||||
// building a fresh snapshot in the manager and swapping the atomic
|
||||
// pointer; in-flight tails finish their current match against the
|
||||
// old snapshot, then pick up the new one on the next line.
|
||||
//
|
||||
// MUST NOT mutate any field (including map values, including slice
|
||||
// elements) after BuildSnapshot returns — tails read these
|
||||
// concurrently with no locking and rely on immutability for safety.
|
||||
type Snapshot struct {
|
||||
// global is the list of compiled global rules (workload_id == "").
|
||||
global []Rule
|
||||
// perWorkload[workloadID] holds workload-only additions AND
|
||||
// per-workload overrides (already resolved against globals).
|
||||
// EffectiveFor(id) merges global + perWorkload[id] minus any
|
||||
// global overridden under that workload.
|
||||
perWorkload map[string][]Rule
|
||||
// overrides[id] is the workload's overriding rule (compiled).
|
||||
// overrides[id][globalID] -> Rule (the override row).
|
||||
overrides map[string]map[int64]Rule
|
||||
}
|
||||
|
||||
// BuildSnapshot compiles every rule's pattern and returns an
|
||||
// immutable Snapshot. Rules that fail to compile are skipped with
|
||||
// their error reported to the caller — store-side validation keeps
|
||||
// the pattern field as a free-form regex string, so engine-time
|
||||
// compile failures are an expected, recoverable mode.
|
||||
//
|
||||
// The returned error slice is informational, not fatal: bad rules
|
||||
// are dropped from the snapshot but the rest still work.
|
||||
func BuildSnapshot(rows []store.LogScanRule) (*Snapshot, []error) {
|
||||
s := &Snapshot{
|
||||
perWorkload: map[string][]Rule{},
|
||||
overrides: map[string]map[int64]Rule{},
|
||||
}
|
||||
var errs []error
|
||||
for _, row := range rows {
|
||||
if !row.Enabled {
|
||||
// Skip outright. Overrides with enabled=false still
|
||||
// need to be recorded so they suppress a global —
|
||||
// handled by tracking disable separately below.
|
||||
}
|
||||
re, err := regexp.Compile(row.Pattern)
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf("rule #%d %q: %w", row.ID, row.Name, err))
|
||||
continue
|
||||
}
|
||||
r := Rule{
|
||||
ID: row.ID,
|
||||
WorkloadID: row.WorkloadID,
|
||||
Name: row.Name,
|
||||
Pattern: re,
|
||||
Severity: row.Severity,
|
||||
Streams: row.Streams,
|
||||
CooldownSeconds: row.CooldownSeconds,
|
||||
}
|
||||
switch {
|
||||
case row.WorkloadID == "" && row.OverridesID == 0:
|
||||
if row.Enabled {
|
||||
s.global = append(s.global, r)
|
||||
}
|
||||
case row.WorkloadID != "" && row.OverridesID == 0:
|
||||
if row.Enabled {
|
||||
s.perWorkload[row.WorkloadID] = append(s.perWorkload[row.WorkloadID], r)
|
||||
}
|
||||
case row.WorkloadID != "" && row.OverridesID != 0:
|
||||
// Override row: always record so a disabled override
|
||||
// suppresses the global for this workload.
|
||||
if _, ok := s.overrides[row.WorkloadID]; !ok {
|
||||
s.overrides[row.WorkloadID] = map[int64]Rule{}
|
||||
}
|
||||
if row.Enabled {
|
||||
s.overrides[row.WorkloadID][row.OverridesID] = r
|
||||
} else {
|
||||
// Encode "disabled override" as the zero rule so
|
||||
// EffectiveFor can drop it without re-querying.
|
||||
s.overrides[row.WorkloadID][row.OverridesID] = Rule{ID: row.OverridesID}
|
||||
}
|
||||
}
|
||||
}
|
||||
return s, errs
|
||||
}
|
||||
|
||||
// EffectiveFor returns the rule list to evaluate against logs of a
|
||||
// specific workload. Equivalent to store.EffectiveLogScanRules but
|
||||
// operates on the compiled snapshot, so the hot path is allocation
|
||||
// free except for the result slice.
|
||||
func (s *Snapshot) EffectiveFor(workloadID string) []Rule {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
overrides := s.overrides[workloadID]
|
||||
out := make([]Rule, 0, len(s.global)+len(s.perWorkload[workloadID]))
|
||||
for _, g := range s.global {
|
||||
if ov, ok := overrides[g.ID]; ok {
|
||||
if ov.Pattern == nil {
|
||||
// Suppressed for this workload — skip.
|
||||
continue
|
||||
}
|
||||
out = append(out, ov)
|
||||
continue
|
||||
}
|
||||
out = append(out, g)
|
||||
}
|
||||
out = append(out, s.perWorkload[workloadID]...)
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
func TestBuildSnapshot_CompileErrorsReported(t *testing.T) {
|
||||
rows := []store.LogScanRule{
|
||||
{ID: 1, Pattern: `valid`, Enabled: true},
|
||||
{ID: 2, Pattern: `([unclosed`, Enabled: true},
|
||||
}
|
||||
snap, errs := BuildSnapshot(rows)
|
||||
if snap == nil {
|
||||
t.Fatal("snapshot should not be nil")
|
||||
}
|
||||
if len(errs) != 1 {
|
||||
t.Fatalf("expected 1 compile error, got %d", len(errs))
|
||||
}
|
||||
if len(snap.global) != 1 {
|
||||
t.Errorf("expected 1 global rule (valid one), got %d", len(snap.global))
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveFor_GlobalOnly(t *testing.T) {
|
||||
rows := []store.LogScanRule{
|
||||
{ID: 1, Pattern: `panic`, Enabled: true},
|
||||
{ID: 2, Pattern: `fatal`, Enabled: true},
|
||||
}
|
||||
snap, _ := BuildSnapshot(rows)
|
||||
out := snap.EffectiveFor("w1")
|
||||
if len(out) != 2 {
|
||||
t.Fatalf("expected 2 rules, got %d", len(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveFor_WorkloadAddition(t *testing.T) {
|
||||
rows := []store.LogScanRule{
|
||||
{ID: 1, Pattern: `panic`, Enabled: true},
|
||||
{ID: 2, Pattern: `slow_query`, WorkloadID: "w1", Enabled: true},
|
||||
}
|
||||
snap, _ := BuildSnapshot(rows)
|
||||
out := snap.EffectiveFor("w1")
|
||||
if len(out) != 2 {
|
||||
t.Fatalf("workload w1 should see both: %d", len(out))
|
||||
}
|
||||
out2 := snap.EffectiveFor("w2")
|
||||
if len(out2) != 1 {
|
||||
t.Errorf("workload w2 should see only the global: %d", len(out2))
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveFor_OverrideReplacesGlobal(t *testing.T) {
|
||||
rows := []store.LogScanRule{
|
||||
{ID: 1, Pattern: `panic`, Severity: "warn", Enabled: true},
|
||||
{
|
||||
ID: 2, WorkloadID: "w1", OverridesID: 1,
|
||||
Pattern: `panic`, Severity: "error", Enabled: true,
|
||||
},
|
||||
}
|
||||
snap, _ := BuildSnapshot(rows)
|
||||
out := snap.EffectiveFor("w1")
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("expected 1 rule, got %d", len(out))
|
||||
}
|
||||
if out[0].Severity != "error" {
|
||||
t.Errorf("override severity should win: %q", out[0].Severity)
|
||||
}
|
||||
// Other workloads still see the original.
|
||||
out2 := snap.EffectiveFor("w2")
|
||||
if len(out2) != 1 || out2[0].Severity != "warn" {
|
||||
t.Errorf("w2 should see original severity, got %+v", out2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveFor_DisabledOverrideSuppresses(t *testing.T) {
|
||||
rows := []store.LogScanRule{
|
||||
{ID: 1, Pattern: `panic`, Enabled: true},
|
||||
{
|
||||
ID: 2, WorkloadID: "w1", OverridesID: 1,
|
||||
Pattern: `panic`, Enabled: false,
|
||||
},
|
||||
}
|
||||
snap, _ := BuildSnapshot(rows)
|
||||
if len(snap.EffectiveFor("w1")) != 0 {
|
||||
t.Errorf("disabled override should suppress global for w1")
|
||||
}
|
||||
if len(snap.EffectiveFor("w2")) != 1 {
|
||||
t.Errorf("w2 should still see the global")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveFor_DisabledGlobalSkipped(t *testing.T) {
|
||||
rows := []store.LogScanRule{
|
||||
{ID: 1, Pattern: `panic`, Enabled: false},
|
||||
}
|
||||
snap, _ := BuildSnapshot(rows)
|
||||
if len(snap.EffectiveFor("w1")) != 0 {
|
||||
t.Errorf("disabled global should not appear in effective set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveFor_NilSnapshot(t *testing.T) {
|
||||
var snap *Snapshot
|
||||
if out := snap.EffectiveFor("w1"); out != nil {
|
||||
t.Errorf("nil snapshot should return nil, got %+v", out)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// fakeRuleSource lets us inject rules into ReloadRules without
|
||||
// standing up SQLite. Mirrors the fakeTriggerSource pattern from
|
||||
// the events package.
|
||||
type fakeRuleSource struct {
|
||||
rows []store.LogScanRule
|
||||
err error
|
||||
}
|
||||
|
||||
func (f *fakeRuleSource) ListLogScanRules() ([]store.LogScanRule, error) {
|
||||
if f.err != nil {
|
||||
return nil, f.err
|
||||
}
|
||||
return f.rows, nil
|
||||
}
|
||||
|
||||
func TestManagerStats_CapturesCompileErrors(t *testing.T) {
|
||||
rs := &fakeRuleSource{rows: []store.LogScanRule{
|
||||
{ID: 1, Name: "valid", Pattern: `panic`, Severity: "warn", Streams: "all", Enabled: true},
|
||||
{ID: 2, Name: "broken", Pattern: `([unclosed`, Severity: "warn", Streams: "all", Enabled: true},
|
||||
{ID: 3, Name: "also-broken", Pattern: `[`, Severity: "warn", Streams: "all", Enabled: true},
|
||||
}}
|
||||
m := NewManager(Config{Rules: rs})
|
||||
if err := m.ReloadRules(); err != nil {
|
||||
t.Fatalf("ReloadRules: %v", err)
|
||||
}
|
||||
stats := m.Stats()
|
||||
if len(stats.LastCompileErrors) != 2 {
|
||||
t.Fatalf("expected 2 compile errors, got %d: %+v", len(stats.LastCompileErrors), stats.LastCompileErrors)
|
||||
}
|
||||
// The error messages should mention the rule id/name from the
|
||||
// BuildSnapshot format so operators can find which rule broke.
|
||||
joined := strings.Join(stats.LastCompileErrors, "|")
|
||||
if !strings.Contains(joined, "broken") {
|
||||
t.Errorf("error messages should reference rule name 'broken': %s", joined)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManagerStats_CompileErrorsReplacedOnReload(t *testing.T) {
|
||||
// A broken rule then a reload with all-valid rules should
|
||||
// clear the error list — operators expect the panel to flip
|
||||
// from "2 errors" to "all clean" after they fix things.
|
||||
rs := &fakeRuleSource{rows: []store.LogScanRule{
|
||||
{ID: 1, Name: "broken", Pattern: `([`, Severity: "warn", Streams: "all", Enabled: true},
|
||||
}}
|
||||
m := NewManager(Config{Rules: rs})
|
||||
_ = m.ReloadRules()
|
||||
if len(m.Stats().LastCompileErrors) != 1 {
|
||||
t.Fatal("expected one compile error before fix")
|
||||
}
|
||||
|
||||
rs.rows = []store.LogScanRule{
|
||||
{ID: 1, Name: "fixed", Pattern: `panic`, Severity: "warn", Streams: "all", Enabled: true},
|
||||
}
|
||||
_ = m.ReloadRules()
|
||||
if len(m.Stats().LastCompileErrors) != 0 {
|
||||
t.Errorf("expected zero compile errors after reload, got %d",
|
||||
len(m.Stats().LastCompileErrors))
|
||||
}
|
||||
}
|
||||
|
||||
func TestManagerStats_ReloadErrorPropagates(t *testing.T) {
|
||||
rs := &fakeRuleSource{err: errors.New("db down")}
|
||||
m := NewManager(Config{Rules: rs})
|
||||
if err := m.ReloadRules(); err == nil {
|
||||
t.Fatal("expected ReloadRules to propagate the source error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestManagerStats_ActiveTailsDefaultsZero(t *testing.T) {
|
||||
// Without Start() and without a docker dependency we can't
|
||||
// run real tails, but the counter should be a stable 0 read
|
||||
// rather than panic/uninitialized.
|
||||
rs := &fakeRuleSource{}
|
||||
m := NewManager(Config{Rules: rs})
|
||||
if got := m.Stats().ActiveTails; got != 0 {
|
||||
t.Errorf("ActiveTails on fresh manager = %d, want 0", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,245 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/docker"
|
||||
)
|
||||
|
||||
// maxFramePayloadBytes caps a single docker log frame payload so a
|
||||
// hostile or corrupted stream can't force an unbounded allocation.
|
||||
// Real-world container log frames are well under this; the limit is
|
||||
// purely a safety belt against the 4 GiB-by-spec upper bound the
|
||||
// header byte width allows.
|
||||
const maxFramePayloadBytes = 16 * 1024 * 1024 // 16 MiB
|
||||
|
||||
// maxLineBufferBytes caps the per-stream line-reassembly buffer for
|
||||
// the same reason. A stream that never sends a newline would
|
||||
// otherwise grow without bound. 1 MiB matches the bufio.Scanner
|
||||
// default max and is far above any reasonable log line.
|
||||
const maxLineBufferBytes = 1 * 1024 * 1024 // 1 MiB
|
||||
|
||||
// dockerLogger is the minimum surface the tail needs from the
|
||||
// docker client. Defined as an interface so tests can stand up a
|
||||
// canned log stream without spinning up containerd.
|
||||
type dockerLogger interface {
|
||||
ContainerLogsOpts(ctx context.Context, containerID string, opts docker.ContainerLogOptions) (io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// HitEmitter is what the tail calls when a rule fires. Implemented
|
||||
// by the manager — it writes to event_log and publishes on the bus.
|
||||
// Kept as a single-method interface so the tail has zero coupling to
|
||||
// store/events.
|
||||
type HitEmitter interface {
|
||||
EmitHit(ctx context.Context, hit Hit)
|
||||
}
|
||||
|
||||
// tail watches one container. Lifetime is bound to the supplied
|
||||
// context — cancellation propagates through docker's log stream so
|
||||
// goroutines exit promptly on container stop or manager shutdown.
|
||||
type tail struct {
|
||||
containerID string
|
||||
workloadID string
|
||||
docker dockerLogger
|
||||
engine *Engine
|
||||
emitter HitEmitter
|
||||
snapshot *atomic.Pointer[Snapshot]
|
||||
}
|
||||
|
||||
// run is the goroutine body. Opens a follow=true log stream and
|
||||
// reads lines until the stream EOFs or context is cancelled.
|
||||
// Tails terminate quietly on context cancel; any other error is
|
||||
// logged at warn so the operator sees it without it stopping the
|
||||
// whole manager.
|
||||
func (t *tail) run(ctx context.Context) {
|
||||
stream, err := t.docker.ContainerLogsOpts(ctx, t.containerID, docker.ContainerLogOptions{
|
||||
Follow: true,
|
||||
Tail: "0", // start from the newest line; backfill is out of scope
|
||||
ShowStdout: true,
|
||||
ShowStderr: true,
|
||||
})
|
||||
if err != nil {
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
slog.Warn("logscanner: open log stream", "container", t.containerID, "error", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
// Demuxing: docker emits multiplexed frames when TTY is off,
|
||||
// where each frame's first byte is the stream type (1=stdout,
|
||||
// 2=stderr) and bytes 4..7 are big-endian length. When TTY is
|
||||
// on the stream is plain bytes. We try to detect the frame
|
||||
// header on the first read; if it parses as a valid frame we
|
||||
// use the demux path, otherwise we fall back to line-by-line.
|
||||
bufStream := bufio.NewReaderSize(stream, 32*1024)
|
||||
if isMultiplexedStream(bufStream) {
|
||||
t.readDemuxed(ctx, bufStream)
|
||||
} else {
|
||||
t.readPlain(ctx, bufStream)
|
||||
}
|
||||
}
|
||||
|
||||
// readDemuxed reads docker's multiplexed log frames. Each frame:
|
||||
//
|
||||
// [type 1 byte][000 3 bytes][len 4 bytes BE][payload len bytes]
|
||||
//
|
||||
// We track the stream type per-frame and feed payload bytes into a
|
||||
// per-stream line buffer so a line split across frames still
|
||||
// reassembles cleanly.
|
||||
func (t *tail) readDemuxed(ctx context.Context, r *bufio.Reader) {
|
||||
header := make([]byte, 8)
|
||||
// Two line buffers — stdout vs stderr — so a partial line read
|
||||
// across multiple frames doesn't bleed into the other stream.
|
||||
buffers := map[string]*bytes.Buffer{
|
||||
"stdout": {},
|
||||
"stderr": {},
|
||||
}
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
if _, err := io.ReadFull(r, header); err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, context.Canceled) {
|
||||
slog.Warn("logscanner: read header", "container", t.containerID, "error", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
stream := "stdout"
|
||||
if header[0] == 2 {
|
||||
stream = "stderr"
|
||||
}
|
||||
size := int(uint32(header[4])<<24 | uint32(header[5])<<16 | uint32(header[6])<<8 | uint32(header[7]))
|
||||
if size <= 0 {
|
||||
continue
|
||||
}
|
||||
if size > maxFramePayloadBytes {
|
||||
slog.Warn("logscanner: frame payload exceeds cap, dropping tail",
|
||||
"container", t.containerID, "size", size, "cap", maxFramePayloadBytes)
|
||||
return
|
||||
}
|
||||
payload := make([]byte, size)
|
||||
if _, err := io.ReadFull(r, payload); err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, context.Canceled) {
|
||||
slog.Warn("logscanner: read payload", "container", t.containerID, "error", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
buf := buffers[stream]
|
||||
buf.Write(payload)
|
||||
// Reassembly buffer should never accumulate beyond
|
||||
// maxLineBufferBytes — a stream with no newline would
|
||||
// otherwise grow unbounded. Drop the buffer (and the partial
|
||||
// line) when the cap is reached so the tail stays healthy.
|
||||
if buf.Len() > maxLineBufferBytes {
|
||||
slog.Warn("logscanner: line buffer exceeded cap, resetting",
|
||||
"container", t.containerID, "stream", stream, "size", buf.Len())
|
||||
buf.Reset()
|
||||
continue
|
||||
}
|
||||
for {
|
||||
line, ok := readLineFromBuffer(buf)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
t.processLine(ctx, stream, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readPlain reads a non-multiplexed stream (TTY mode). All lines
|
||||
// are reported as "stdout" since the API doesn't separate the
|
||||
// streams in this mode.
|
||||
func (t *tail) readPlain(ctx context.Context, r *bufio.Reader) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
for scanner.Scan() {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
t.processLine(ctx, "stdout", scanner.Text())
|
||||
}
|
||||
if err := scanner.Err(); err != nil && !errors.Is(err, context.Canceled) {
|
||||
slog.Warn("logscanner: plain read", "container", t.containerID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tail) processLine(ctx context.Context, stream, line string) {
|
||||
line = strings.TrimRight(line, "\r\n")
|
||||
// Strip the leading RFC3339Nano timestamp docker prepends when
|
||||
// Timestamps=true. We validate the prefix actually parses as a
|
||||
// time rather than blindly stripping at the first space — a
|
||||
// legitimate log line whose first word is 20+ chars (e.g. a long
|
||||
// hash with no whitespace) would otherwise lose data.
|
||||
if idx := strings.IndexByte(line, ' '); idx >= 20 && idx <= 40 {
|
||||
candidate := line[:idx]
|
||||
if _, err := time.Parse(time.RFC3339Nano, candidate); err == nil {
|
||||
line = line[idx+1:]
|
||||
}
|
||||
}
|
||||
snap := t.snapshot.Load()
|
||||
if snap == nil {
|
||||
return
|
||||
}
|
||||
rules := snap.EffectiveFor(t.workloadID)
|
||||
hits := t.engine.Match(t.containerID, t.workloadID, stream, line, rules)
|
||||
for _, h := range hits {
|
||||
t.emitter.EmitHit(ctx, h)
|
||||
}
|
||||
}
|
||||
|
||||
// isMultiplexedStream peeks at the first 8 bytes to detect docker's
|
||||
// multiplexed frame header. The frame type byte must be 0..2 and
|
||||
// bytes 1..3 must be zero. We restore the bytes via UnreadByte
|
||||
// equivalent (bufio.Peek), so the actual reader is unaffected.
|
||||
func isMultiplexedStream(r *bufio.Reader) bool {
|
||||
peeked, err := r.Peek(8)
|
||||
if err != nil || len(peeked) < 8 {
|
||||
return false
|
||||
}
|
||||
if peeked[0] > 2 || peeked[1] != 0 || peeked[2] != 0 || peeked[3] != 0 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func readLineFromBuffer(b *bytes.Buffer) (string, bool) {
|
||||
idx := bytes.IndexByte(b.Bytes(), '\n')
|
||||
if idx < 0 {
|
||||
return "", false
|
||||
}
|
||||
line := make([]byte, idx)
|
||||
copy(line, b.Bytes()[:idx])
|
||||
b.Next(idx + 1) // consume line + newline
|
||||
return string(line), true
|
||||
}
|
||||
|
||||
// validate sanity-checks the tail before launch. Returns an error
|
||||
// the manager can log rather than panicking on a nil dependency.
|
||||
func (t *tail) validate() error {
|
||||
if t.containerID == "" {
|
||||
return fmt.Errorf("tail: container_id required")
|
||||
}
|
||||
if t.docker == nil {
|
||||
return fmt.Errorf("tail: docker client required")
|
||||
}
|
||||
if t.engine == nil {
|
||||
return fmt.Errorf("tail: engine required")
|
||||
}
|
||||
if t.emitter == nil {
|
||||
return fmt.Errorf("tail: emitter required")
|
||||
}
|
||||
if t.snapshot == nil {
|
||||
return fmt.Errorf("tail: snapshot pointer required")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
package logscanner
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// recordingEmitter captures hits from a tail without touching the
|
||||
// real store or event bus. Concurrent-safe so we can let the tail
|
||||
// goroutine push hits while the test asserts.
|
||||
type recordingEmitter struct {
|
||||
mu sync.Mutex
|
||||
hits []Hit
|
||||
}
|
||||
|
||||
func (r *recordingEmitter) EmitHit(_ context.Context, hit Hit) {
|
||||
r.mu.Lock()
|
||||
r.hits = append(r.hits, hit)
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
func (r *recordingEmitter) Hits() []Hit {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
out := make([]Hit, len(r.hits))
|
||||
copy(out, r.hits)
|
||||
return out
|
||||
}
|
||||
|
||||
// snapshotForRule wraps a single rule into an atomic Snapshot
|
||||
// pointer so tests can drive processLine without rebuilding from
|
||||
// store rows.
|
||||
func snapshotForRule(t *testing.T, pattern string) *atomic.Pointer[Snapshot] {
|
||||
t.Helper()
|
||||
rows := []store.LogScanRule{
|
||||
{ID: 1, Name: "t", Pattern: pattern, Severity: "warn", Streams: "all", Enabled: true},
|
||||
}
|
||||
snap, errs := BuildSnapshot(rows)
|
||||
if len(errs) != 0 {
|
||||
t.Fatalf("BuildSnapshot: %v", errs)
|
||||
}
|
||||
p := &atomic.Pointer[Snapshot]{}
|
||||
p.Store(snap)
|
||||
return p
|
||||
}
|
||||
|
||||
func TestProcessLine_StripsRFC3339Prefix(t *testing.T) {
|
||||
emit := &recordingEmitter{}
|
||||
snap := snapshotForRule(t, `panic`)
|
||||
tl := &tail{
|
||||
containerID: "c1",
|
||||
workloadID: "w1",
|
||||
engine: NewEngine(),
|
||||
emitter: emit,
|
||||
snapshot: snap,
|
||||
}
|
||||
tl.processLine(context.Background(), "stderr", "2026-05-11T12:34:56.123456789Z fatal panic in worker")
|
||||
hits := emit.Hits()
|
||||
if len(hits) != 1 {
|
||||
t.Fatalf("want 1 hit, got %d", len(hits))
|
||||
}
|
||||
if hits[0].Line != "fatal panic in worker" {
|
||||
t.Errorf("timestamp not stripped: %q", hits[0].Line)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessLine_LeavesNonTimestampedLineAlone(t *testing.T) {
|
||||
// The previous heuristic stripped the first word of any line
|
||||
// whose first space landed past byte 20. A long-hash line with
|
||||
// no embedded timestamp must now survive intact.
|
||||
emit := &recordingEmitter{}
|
||||
snap := snapshotForRule(t, `(?i)hash`)
|
||||
tl := &tail{
|
||||
containerID: "c1",
|
||||
workloadID: "w1",
|
||||
engine: NewEngine(),
|
||||
emitter: emit,
|
||||
snapshot: snap,
|
||||
}
|
||||
long := "aaaaaaaaaaaaaaaaaaaaaaaa hash payload"
|
||||
tl.processLine(context.Background(), "stdout", long)
|
||||
hits := emit.Hits()
|
||||
if len(hits) != 1 {
|
||||
t.Fatalf("want 1 hit, got %d", len(hits))
|
||||
}
|
||||
if hits[0].Line != long {
|
||||
t.Errorf("non-timestamp prefix incorrectly stripped: %q", hits[0].Line)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessLine_NoSnapshotIsSafe(t *testing.T) {
|
||||
// Tail constructed before the manager loads its first snapshot
|
||||
// (or after a transient nil) must not crash — processLine
|
||||
// returns silently when snapshot.Load() is nil.
|
||||
tl := &tail{
|
||||
containerID: "c1",
|
||||
workloadID: "w1",
|
||||
engine: NewEngine(),
|
||||
emitter: &recordingEmitter{},
|
||||
snapshot: &atomic.Pointer[Snapshot]{}, // empty pointer
|
||||
}
|
||||
tl.processLine(context.Background(), "stdout", "anything")
|
||||
}
|
||||
|
||||
func TestReadLineFromBuffer(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("line one\nline two\npartial")
|
||||
|
||||
got, ok := readLineFromBuffer(buf)
|
||||
if !ok || got != "line one" {
|
||||
t.Fatalf("first read: ok=%v got=%q", ok, got)
|
||||
}
|
||||
got, ok = readLineFromBuffer(buf)
|
||||
if !ok || got != "line two" {
|
||||
t.Fatalf("second read: ok=%v got=%q", ok, got)
|
||||
}
|
||||
// Trailing partial line stays in buffer until more data arrives.
|
||||
if _, ok := readLineFromBuffer(buf); ok {
|
||||
t.Errorf("partial line should NOT yield until newline arrives")
|
||||
}
|
||||
if buf.String() != "partial" {
|
||||
t.Errorf("remainder=%q want %q", buf.String(), "partial")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsMultiplexedStream(t *testing.T) {
|
||||
// Valid docker frame header: type=2 (stderr), 3 nulls, then 4-byte length.
|
||||
demuxed := []byte{2, 0, 0, 0, 0, 0, 0, 12, 'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!'}
|
||||
if !isMultiplexedStream(newReader(demuxed)) {
|
||||
t.Error("valid demuxed frame should be detected")
|
||||
}
|
||||
|
||||
// Plain text: first byte is a printable letter, header check fails.
|
||||
plain := []byte("plain log line without framing\n")
|
||||
if isMultiplexedStream(newReader(plain)) {
|
||||
t.Error("plain text should not be detected as multiplexed")
|
||||
}
|
||||
|
||||
// Header with type=3 is invalid (docker uses 0,1,2 only).
|
||||
bad := []byte{3, 0, 0, 0, 0, 0, 0, 1}
|
||||
if isMultiplexedStream(newReader(bad)) {
|
||||
t.Error("type=3 header is not a valid docker frame")
|
||||
}
|
||||
}
|
||||
|
||||
// newReader returns a *bufio.Reader sized large enough to satisfy
|
||||
// the Peek(8) the demuxer detection requires.
|
||||
func newReader(b []byte) *bufio.Reader {
|
||||
return bufio.NewReaderSize(bytes.NewReader(b), 32)
|
||||
}
|
||||
@@ -40,10 +40,11 @@ type Event struct {
|
||||
type Tier string
|
||||
|
||||
const (
|
||||
TierSettings Tier = "settings"
|
||||
TierProject Tier = "project"
|
||||
TierStage Tier = "stage"
|
||||
TierSite Tier = "site"
|
||||
TierSettings Tier = "settings"
|
||||
TierProject Tier = "project"
|
||||
TierStage Tier = "stage"
|
||||
TierSite Tier = "site"
|
||||
TierEventTrigger Tier = "event_trigger"
|
||||
)
|
||||
|
||||
// Header names for outgoing webhooks. The signature header name matches
|
||||
@@ -145,6 +146,43 @@ func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event)
|
||||
}()
|
||||
}
|
||||
|
||||
// SendPayload dispatches an arbitrary JSON payload to the given URL,
|
||||
// signed with HMAC-SHA256 when secret is non-empty. Used by the
|
||||
// event-trigger dispatcher: event-log → trigger filter → webhook
|
||||
// delivery. The eventType travels in the X-Tinyforge-Event header so
|
||||
// receivers can route by it without parsing the body.
|
||||
//
|
||||
// Fire-and-forget. Failures are logged at warn but never propagate;
|
||||
// trigger reliability is observed via webhook_deliveries (audit trail)
|
||||
// and the dispatcher remaining bus-driven means delivery hiccups
|
||||
// cannot back-pressure event publishing.
|
||||
func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any) {
|
||||
if webhookURL == "" {
|
||||
return
|
||||
}
|
||||
delivery := uuid.NewString()
|
||||
timestamp := time.Now().UTC().Format(time.RFC3339)
|
||||
|
||||
n.wg.Add(1)
|
||||
go func() {
|
||||
defer n.wg.Done()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, err := n.doSendRaw(ctx, webhookURL, secret, TierEventTrigger, delivery, eventType, timestamp, payload)
|
||||
host := safeHost(webhookURL)
|
||||
if err != nil {
|
||||
slog.Warn("notify: trigger webhook send failed",
|
||||
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
||||
"event", eventType, "signed", secret != "", "error", err)
|
||||
return
|
||||
}
|
||||
slog.Info("notify: trigger webhook dispatched",
|
||||
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
||||
"event", eventType, "signed", secret != "")
|
||||
}()
|
||||
}
|
||||
|
||||
// SendSyncForTest performs a synchronous, single-shot send for the "Send
|
||||
// test" UI button. Returns a TestResult describing what the receiver
|
||||
// answered with so the operator can confirm wiring without watching server
|
||||
@@ -183,6 +221,42 @@ func (n *Notifier) SendSyncForTest(ctx context.Context, webhookURL, secret strin
|
||||
return result
|
||||
}
|
||||
|
||||
// SendSyncForTestPayload is the arbitrary-payload counterpart to
|
||||
// SendSyncForTest. Returns the same TestResult shape but sends an
|
||||
// arbitrary payload + event-type pair through the shared HTTP+HMAC
|
||||
// core. Used by the event-trigger /test endpoint so the operator's
|
||||
// receiver sees the same envelope shape it will receive during normal
|
||||
// dispatch — verifying a different payload would defeat the test's
|
||||
// purpose.
|
||||
func (n *Notifier) SendSyncForTestPayload(ctx context.Context, webhookURL, secret string, tier Tier, eventType string, payload any) TestResult {
|
||||
delivery := uuid.NewString()
|
||||
timestamp := time.Now().UTC().Format(time.RFC3339)
|
||||
result := TestResult{
|
||||
URL: webhookURL,
|
||||
Tier: tier,
|
||||
SignatureSent: secret != "",
|
||||
DeliveryID: delivery,
|
||||
}
|
||||
if webhookURL == "" {
|
||||
result.Error = "no webhook URL configured for this tier"
|
||||
return result
|
||||
}
|
||||
start := time.Now()
|
||||
resp, err := n.doSendRaw(ctx, webhookURL, secret, tier, delivery, eventType, timestamp, payload)
|
||||
result.LatencyMs = time.Since(start).Milliseconds()
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
if resp != nil {
|
||||
result.StatusCode = resp.StatusCode
|
||||
result.ResponseSnippet = resp.BodyPreview
|
||||
}
|
||||
return result
|
||||
}
|
||||
result.StatusCode = resp.StatusCode
|
||||
result.ResponseSnippet = resp.BodyPreview
|
||||
return result
|
||||
}
|
||||
|
||||
// sendResponse captures the small subset of the receiver's response we want
|
||||
// to surface back to the operator (status + a body preview). Distinct from
|
||||
// http.Response so callers don't accidentally hold an unread body.
|
||||
@@ -198,7 +272,16 @@ type sendResponse struct {
|
||||
// exactly what travels on the wire. Receivers MUST verify against the raw
|
||||
// body, not a re-serialised copy.
|
||||
func (n *Notifier) doSend(ctx context.Context, webhookURL, secret string, tier Tier, delivery string, event Event) (*sendResponse, error) {
|
||||
body, err := json.Marshal(event)
|
||||
return n.doSendRaw(ctx, webhookURL, secret, tier, delivery, event.Type, event.Timestamp, event)
|
||||
}
|
||||
|
||||
// doSendRaw is the shared HTTP+HMAC core. It serializes any payload to
|
||||
// JSON, signs the resulting bytes (if a secret is configured) and
|
||||
// dispatches with the same Tinyforge headers as the legacy deploy-event
|
||||
// path. Separated out so SendPayload can reuse it without forcing the
|
||||
// caller to fit into the Event shape.
|
||||
func (n *Notifier) doSendRaw(ctx context.Context, webhookURL, secret string, tier Tier, delivery, eventType, timestamp string, payload any) (*sendResponse, error) {
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal notification: %w", err)
|
||||
}
|
||||
@@ -209,9 +292,9 @@ func (n *Notifier) doSend(ctx context.Context, webhookURL, secret string, tier T
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
req.Header.Set(HeaderEvent, event.Type)
|
||||
req.Header.Set(HeaderEvent, eventType)
|
||||
req.Header.Set(HeaderDelivery, delivery)
|
||||
req.Header.Set(HeaderTimestamp, event.Timestamp)
|
||||
req.Header.Set(HeaderTimestamp, timestamp)
|
||||
req.Header.Set(HeaderTier, string(tier))
|
||||
if secret != "" {
|
||||
req.Header.Set(HeaderSignature, "sha256="+sign(secret, body))
|
||||
|
||||
@@ -0,0 +1,208 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// CreateEventTrigger inserts a new trigger row. ID is assigned by the
|
||||
// auto-increment column and returned on the populated struct.
|
||||
func (s *Store) CreateEventTrigger(t EventTrigger) (EventTrigger, error) {
|
||||
if strings.TrimSpace(t.Name) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: name is required")
|
||||
}
|
||||
if t.ActionType == "" {
|
||||
t.ActionType = EventTriggerActionWebhook
|
||||
}
|
||||
if t.ActionType != EventTriggerActionWebhook {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: unsupported action_type %q", t.ActionType)
|
||||
}
|
||||
if strings.TrimSpace(t.ActionTarget) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: action_target is required")
|
||||
}
|
||||
|
||||
now := Now()
|
||||
t.CreatedAt = now
|
||||
t.UpdatedAt = now
|
||||
|
||||
res, err := s.db.Exec(
|
||||
`INSERT INTO event_triggers
|
||||
(name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
t.Name, t.FilterSeverity, t.FilterSource, t.FilterMessageRegex,
|
||||
t.ActionType, t.ActionTarget, t.ActionSecret, boolToInt(t.Enabled),
|
||||
t.CreatedAt, t.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("insert event trigger: %w", err)
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("get event trigger id: %w", err)
|
||||
}
|
||||
t.ID = id
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// ListEventTriggers returns every trigger row, ordered by id so the UI
|
||||
// rendering is stable across requests. Trigger counts are expected to
|
||||
// be small (operator-curated), so unbounded listing is fine.
|
||||
func (s *Store) ListEventTriggers() ([]EventTrigger, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at
|
||||
FROM event_triggers ORDER BY id`,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query event triggers: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := []EventTrigger{}
|
||||
for rows.Next() {
|
||||
t, err := scanEventTrigger(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, t)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// ListEnabledEventTriggers returns only the rows with enabled=1. The
|
||||
// dispatcher hot path uses this so a disabled trigger costs nothing.
|
||||
func (s *Store) ListEnabledEventTriggers() ([]EventTrigger, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at
|
||||
FROM event_triggers WHERE enabled = 1 ORDER BY id`,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query enabled event triggers: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := []EventTrigger{}
|
||||
for rows.Next() {
|
||||
t, err := scanEventTrigger(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, t)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// GetEventTrigger returns one trigger by ID or ErrNotFound.
|
||||
func (s *Store) GetEventTrigger(id int64) (EventTrigger, error) {
|
||||
row := s.db.QueryRow(
|
||||
`SELECT id, name, filter_severity, filter_source, filter_message_regex,
|
||||
action_type, action_target, action_secret, enabled,
|
||||
created_at, updated_at
|
||||
FROM event_triggers WHERE id = ?`, id,
|
||||
)
|
||||
t, err := scanEventTriggerRow(row)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return EventTrigger{}, fmt.Errorf("event trigger %d: %w", id, ErrNotFound)
|
||||
}
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("query event trigger: %w", err)
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// UpdateEventTrigger overwrites the editable columns of an existing row.
|
||||
// CreatedAt is preserved; UpdatedAt is refreshed.
|
||||
func (s *Store) UpdateEventTrigger(t EventTrigger) (EventTrigger, error) {
|
||||
if t.ID == 0 {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: id is required for update")
|
||||
}
|
||||
if strings.TrimSpace(t.Name) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: name is required")
|
||||
}
|
||||
if t.ActionType == "" {
|
||||
t.ActionType = EventTriggerActionWebhook
|
||||
}
|
||||
if t.ActionType != EventTriggerActionWebhook {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: unsupported action_type %q", t.ActionType)
|
||||
}
|
||||
if strings.TrimSpace(t.ActionTarget) == "" {
|
||||
return EventTrigger{}, fmt.Errorf("event_trigger: action_target is required")
|
||||
}
|
||||
|
||||
t.UpdatedAt = Now()
|
||||
res, err := s.db.Exec(
|
||||
`UPDATE event_triggers
|
||||
SET name = ?, filter_severity = ?, filter_source = ?,
|
||||
filter_message_regex = ?, action_type = ?, action_target = ?,
|
||||
action_secret = ?, enabled = ?, updated_at = ?
|
||||
WHERE id = ?`,
|
||||
t.Name, t.FilterSeverity, t.FilterSource, t.FilterMessageRegex,
|
||||
t.ActionType, t.ActionTarget, t.ActionSecret, boolToInt(t.Enabled),
|
||||
t.UpdatedAt, t.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("update event trigger: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return EventTrigger{}, fmt.Errorf("event trigger %d: %w", t.ID, ErrNotFound)
|
||||
}
|
||||
return s.GetEventTrigger(t.ID)
|
||||
}
|
||||
|
||||
// DeleteEventTrigger removes a trigger by ID. Idempotent on the
|
||||
// caller's side: returns ErrNotFound if the row is already gone so a
|
||||
// double-click in the UI gives a clean error rather than 500.
|
||||
func (s *Store) DeleteEventTrigger(id int64) error {
|
||||
res, err := s.db.Exec(`DELETE FROM event_triggers WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete event trigger: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return fmt.Errorf("event trigger %d: %w", id, ErrNotFound)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func scanEventTrigger(rows *sql.Rows) (EventTrigger, error) {
|
||||
var t EventTrigger
|
||||
var enabled int
|
||||
if err := rows.Scan(
|
||||
&t.ID, &t.Name, &t.FilterSeverity, &t.FilterSource, &t.FilterMessageRegex,
|
||||
&t.ActionType, &t.ActionTarget, &t.ActionSecret, &enabled,
|
||||
&t.CreatedAt, &t.UpdatedAt,
|
||||
); err != nil {
|
||||
return EventTrigger{}, fmt.Errorf("scan event trigger: %w", err)
|
||||
}
|
||||
t.Enabled = enabled != 0
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func scanEventTriggerRow(row *sql.Row) (EventTrigger, error) {
|
||||
var t EventTrigger
|
||||
var enabled int
|
||||
if err := row.Scan(
|
||||
&t.ID, &t.Name, &t.FilterSeverity, &t.FilterSource, &t.FilterMessageRegex,
|
||||
&t.ActionType, &t.ActionTarget, &t.ActionSecret, &enabled,
|
||||
&t.CreatedAt, &t.UpdatedAt,
|
||||
); err != nil {
|
||||
return EventTrigger{}, err
|
||||
}
|
||||
t.Enabled = enabled != 0
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func boolToInt(b bool) int {
|
||||
if b {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
@@ -0,0 +1,256 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// CreateLogScanRule inserts a new rule row. Validates severity +
|
||||
// streams enum membership and rejects negative cooldowns.
|
||||
func (s *Store) CreateLogScanRule(r LogScanRule) (LogScanRule, error) {
|
||||
if err := validateLogScanRule(r); err != nil {
|
||||
return LogScanRule{}, err
|
||||
}
|
||||
now := Now()
|
||||
r.CreatedAt = now
|
||||
r.UpdatedAt = now
|
||||
res, err := s.db.Exec(
|
||||
`INSERT INTO log_scan_rules
|
||||
(workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
r.WorkloadID, r.OverridesID, r.Name, r.Pattern, r.Severity, r.Streams,
|
||||
r.CooldownSeconds, boolToInt(r.Enabled), r.CreatedAt, r.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("insert log scan rule: %w", err)
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("get log scan rule id: %w", err)
|
||||
}
|
||||
r.ID = id
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// ListLogScanRules returns every rule, ordered by id for stable UI
|
||||
// rendering.
|
||||
func (s *Store) ListLogScanRules() ([]LogScanRule, error) {
|
||||
return s.queryLogScanRules(
|
||||
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at
|
||||
FROM log_scan_rules ORDER BY id`,
|
||||
)
|
||||
}
|
||||
|
||||
// ListLogScanRulesByWorkload returns all rows directly attached to
|
||||
// the workload (workload-only additions and per-workload overrides),
|
||||
// excluding global rules. Useful for the workload detail page.
|
||||
func (s *Store) ListLogScanRulesByWorkload(workloadID string) ([]LogScanRule, error) {
|
||||
return s.queryLogScanRules(
|
||||
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at
|
||||
FROM log_scan_rules WHERE workload_id = ? ORDER BY id`,
|
||||
workloadID,
|
||||
)
|
||||
}
|
||||
|
||||
// GetLogScanRule fetches one rule by id or returns ErrNotFound.
|
||||
func (s *Store) GetLogScanRule(id int64) (LogScanRule, error) {
|
||||
row := s.db.QueryRow(
|
||||
`SELECT id, workload_id, overrides_id, name, pattern, severity, streams,
|
||||
cooldown_seconds, enabled, created_at, updated_at
|
||||
FROM log_scan_rules WHERE id = ?`, id,
|
||||
)
|
||||
r, err := scanLogScanRuleRow(row)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
|
||||
}
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("query log scan rule: %w", err)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// UpdateLogScanRule overwrites the editable columns of a rule row.
|
||||
// id, workload_id, overrides_id are immutable on update — change the
|
||||
// scope of a rule by deleting + recreating, to keep the
|
||||
// hot-reload-snapshot semantics simple.
|
||||
func (s *Store) UpdateLogScanRule(r LogScanRule) (LogScanRule, error) {
|
||||
if r.ID == 0 {
|
||||
return LogScanRule{}, fmt.Errorf("log scan rule: id is required for update")
|
||||
}
|
||||
if err := validateLogScanRule(r); err != nil {
|
||||
return LogScanRule{}, err
|
||||
}
|
||||
r.UpdatedAt = Now()
|
||||
res, err := s.db.Exec(
|
||||
`UPDATE log_scan_rules
|
||||
SET name = ?, pattern = ?, severity = ?, streams = ?,
|
||||
cooldown_seconds = ?, enabled = ?, updated_at = ?
|
||||
WHERE id = ?`,
|
||||
r.Name, r.Pattern, r.Severity, r.Streams,
|
||||
r.CooldownSeconds, boolToInt(r.Enabled), r.UpdatedAt, r.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("update log scan rule: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return LogScanRule{}, fmt.Errorf("log scan rule %d: %w", r.ID, ErrNotFound)
|
||||
}
|
||||
return s.GetLogScanRule(r.ID)
|
||||
}
|
||||
|
||||
// DeleteLogScanRule removes a rule by id. Override rows referencing
|
||||
// this id are cascade-deleted at the application layer because we
|
||||
// don't enforce SQLite FK constraints repo-wide. The two DELETEs run
|
||||
// inside a single transaction so a mid-cascade failure can't leave
|
||||
// overrides orphaned by a vanished global.
|
||||
func (s *Store) DeleteLogScanRule(id int64) error {
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin delete tx: %w", err)
|
||||
}
|
||||
defer tx.Rollback() //nolint:errcheck // commit path returns nil; rollback after commit is a no-op
|
||||
if _, err := tx.Exec(`DELETE FROM log_scan_rules WHERE overrides_id = ?`, id); err != nil {
|
||||
return fmt.Errorf("delete dependent log scan overrides: %w", err)
|
||||
}
|
||||
res, err := tx.Exec(`DELETE FROM log_scan_rules WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete log scan rule: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return fmt.Errorf("log scan rule %d: %w", id, ErrNotFound)
|
||||
}
|
||||
if err := tx.Commit(); err != nil {
|
||||
return fmt.Errorf("commit delete tx: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EffectiveLogScanRules computes the effective rule set for one
|
||||
// workload according to the spec in docs/LOGSCAN_AND_TRIGGERS_TODO.md:
|
||||
//
|
||||
// 1. All global rules (workload_id == "" AND overrides_id == 0)
|
||||
// minus globals that have a per-workload override row.
|
||||
// 2. Plus workload-only rules (workload_id == X AND overrides_id == 0).
|
||||
// 3. Plus per-workload override rules (workload_id == X AND overrides_id != 0),
|
||||
// which carry the override's own enabled/pattern/severity.
|
||||
//
|
||||
// Computed in Go after two simple SELECTs since rule counts will be
|
||||
// small (operator-curated, dozens not thousands).
|
||||
func (s *Store) EffectiveLogScanRules(workloadID string) ([]LogScanRule, error) {
|
||||
all, err := s.ListLogScanRules()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
overrides := map[int64]LogScanRule{} // globalID -> override row
|
||||
var workloadOnly []LogScanRule
|
||||
var globals []LogScanRule
|
||||
for _, r := range all {
|
||||
switch {
|
||||
case r.WorkloadID == "" && r.OverridesID == 0:
|
||||
globals = append(globals, r)
|
||||
case r.WorkloadID == workloadID && r.OverridesID == 0:
|
||||
workloadOnly = append(workloadOnly, r)
|
||||
case r.WorkloadID == workloadID && r.OverridesID != 0:
|
||||
overrides[r.OverridesID] = r
|
||||
}
|
||||
}
|
||||
out := make([]LogScanRule, 0, len(globals)+len(workloadOnly))
|
||||
for _, g := range globals {
|
||||
if ov, ok := overrides[g.ID]; ok {
|
||||
// Override row's fields win — including enabled=false to
|
||||
// turn off the global for this workload.
|
||||
out = append(out, ov)
|
||||
} else {
|
||||
out = append(out, g)
|
||||
}
|
||||
}
|
||||
out = append(out, workloadOnly...)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (s *Store) queryLogScanRules(query string, args ...any) ([]LogScanRule, error) {
|
||||
rows, err := s.db.Query(query, args...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query log scan rules: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
out := []LogScanRule{}
|
||||
for rows.Next() {
|
||||
r, err := scanLogScanRuleRows(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, r)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func scanLogScanRuleRows(rows *sql.Rows) (LogScanRule, error) {
|
||||
var r LogScanRule
|
||||
var enabled int
|
||||
if err := rows.Scan(
|
||||
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
|
||||
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
||||
); err != nil {
|
||||
return LogScanRule{}, fmt.Errorf("scan log scan rule: %w", err)
|
||||
}
|
||||
r.Enabled = enabled != 0
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func scanLogScanRuleRow(row *sql.Row) (LogScanRule, error) {
|
||||
var r LogScanRule
|
||||
var enabled int
|
||||
if err := row.Scan(
|
||||
&r.ID, &r.WorkloadID, &r.OverridesID, &r.Name, &r.Pattern, &r.Severity, &r.Streams,
|
||||
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
||||
); err != nil {
|
||||
return LogScanRule{}, err
|
||||
}
|
||||
r.Enabled = enabled != 0
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// validateLogScanRule enforces the per-row invariants. Regex
|
||||
// compilation is intentionally NOT done here — it's a hot-path
|
||||
// concern owned by the engine snapshot, and engine compile errors
|
||||
// become engine-side warnings rather than store-side rejections to
|
||||
// keep the failure mode operator-debuggable.
|
||||
func validateLogScanRule(r LogScanRule) error {
|
||||
if strings.TrimSpace(r.Name) == "" {
|
||||
return fmt.Errorf("log scan rule: name is required")
|
||||
}
|
||||
if strings.TrimSpace(r.Pattern) == "" {
|
||||
return fmt.Errorf("log scan rule: pattern is required")
|
||||
}
|
||||
switch r.Severity {
|
||||
case LogScanSeverityInfo, LogScanSeverityWarn, LogScanSeverityError:
|
||||
case "":
|
||||
// Default applied at the caller; allow blank.
|
||||
default:
|
||||
return fmt.Errorf("log scan rule: invalid severity %q", r.Severity)
|
||||
}
|
||||
switch r.Streams {
|
||||
case LogScanStreamAll, LogScanStreamStdout, LogScanStreamStderr:
|
||||
case "":
|
||||
default:
|
||||
return fmt.Errorf("log scan rule: invalid streams %q", r.Streams)
|
||||
}
|
||||
if r.CooldownSeconds < 0 {
|
||||
return fmt.Errorf("log scan rule: cooldown_seconds must be >= 0")
|
||||
}
|
||||
// An override row must reference an existing global id and live
|
||||
// under a specific workload. The store doesn't verify the FK
|
||||
// (no PRAGMA foreign_keys), but we can sanity-check the shape.
|
||||
if r.OverridesID != 0 && r.WorkloadID == "" {
|
||||
return fmt.Errorf("log scan rule: override row requires workload_id")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCreateLogScanRule_Validates(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
cases := []struct {
|
||||
name string
|
||||
in LogScanRule
|
||||
wantErr string
|
||||
}{
|
||||
{
|
||||
name: "missing name",
|
||||
in: LogScanRule{Pattern: "x"},
|
||||
wantErr: "name is required",
|
||||
},
|
||||
{
|
||||
name: "missing pattern",
|
||||
in: LogScanRule{Name: "n"},
|
||||
wantErr: "pattern is required",
|
||||
},
|
||||
{
|
||||
name: "bad severity",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", Severity: "loud"},
|
||||
wantErr: "invalid severity",
|
||||
},
|
||||
{
|
||||
name: "bad streams",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", Streams: "both"},
|
||||
wantErr: "invalid streams",
|
||||
},
|
||||
{
|
||||
name: "negative cooldown",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", CooldownSeconds: -1},
|
||||
wantErr: "cooldown_seconds must be",
|
||||
},
|
||||
{
|
||||
name: "override without workload",
|
||||
in: LogScanRule{Name: "n", Pattern: "x", OverridesID: 5},
|
||||
wantErr: "override row requires workload_id",
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
_, err := s.CreateLogScanRule(c.in)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error containing %q, got nil", c.wantErr)
|
||||
}
|
||||
if !strings.Contains(err.Error(), c.wantErr) {
|
||||
t.Fatalf("error mismatch: got %q want substring %q", err.Error(), c.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateAndGetLogScanRule(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
r, err := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "panics", Pattern: `\bpanic\b`, Severity: "error", Streams: "stderr",
|
||||
CooldownSeconds: 30, Enabled: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("create: %v", err)
|
||||
}
|
||||
if r.ID == 0 {
|
||||
t.Fatal("id should be set")
|
||||
}
|
||||
got, err := s.GetLogScanRule(r.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get: %v", err)
|
||||
}
|
||||
if got.Pattern != `\bpanic\b` {
|
||||
t.Errorf("pattern mismatch: %q", got.Pattern)
|
||||
}
|
||||
if !got.Enabled {
|
||||
t.Error("enabled lost on round-trip")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveLogScanRules(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
g, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "global", Pattern: "panic", Severity: "warn", Streams: "all", Enabled: true,
|
||||
})
|
||||
_, _ = s.CreateLogScanRule(LogScanRule{
|
||||
Name: "w1-only", Pattern: "slow_query", WorkloadID: "w1", Severity: "info", Streams: "all", Enabled: true,
|
||||
})
|
||||
_, _ = s.CreateLogScanRule(LogScanRule{
|
||||
Name: "override-for-w1", Pattern: "panic", WorkloadID: "w1", OverridesID: g.ID,
|
||||
Severity: "error", Streams: "all", Enabled: true,
|
||||
})
|
||||
|
||||
w1, err := s.EffectiveLogScanRules("w1")
|
||||
if err != nil {
|
||||
t.Fatalf("effective w1: %v", err)
|
||||
}
|
||||
if len(w1) != 2 {
|
||||
t.Fatalf("w1 effective should be 2 (override + addition), got %d", len(w1))
|
||||
}
|
||||
// First entry replaces the global with the override (error severity).
|
||||
if w1[0].Severity != "error" {
|
||||
t.Errorf("override severity not applied: %q", w1[0].Severity)
|
||||
}
|
||||
|
||||
w2, err := s.EffectiveLogScanRules("w2")
|
||||
if err != nil {
|
||||
t.Fatalf("effective w2: %v", err)
|
||||
}
|
||||
if len(w2) != 1 {
|
||||
t.Fatalf("w2 effective should be 1 (just the global), got %d", len(w2))
|
||||
}
|
||||
if w2[0].Severity != "warn" {
|
||||
t.Errorf("w2 should see original severity: %q", w2[0].Severity)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteLogScanRule_CascadesOverrides(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
g, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "global", Pattern: "panic", Severity: "warn", Streams: "all", Enabled: true,
|
||||
})
|
||||
ov, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "override", Pattern: "panic", WorkloadID: "w1", OverridesID: g.ID,
|
||||
Severity: "error", Streams: "all", Enabled: true,
|
||||
})
|
||||
|
||||
if err := s.DeleteLogScanRule(g.ID); err != nil {
|
||||
t.Fatalf("delete: %v", err)
|
||||
}
|
||||
if _, err := s.GetLogScanRule(ov.ID); err == nil {
|
||||
t.Error("override should be cascade-deleted with its global")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateLogScanRule(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
r, _ := s.CreateLogScanRule(LogScanRule{
|
||||
Name: "n", Pattern: "x", Severity: "warn", Streams: "all", Enabled: true,
|
||||
})
|
||||
r.Pattern = "y"
|
||||
r.Enabled = false
|
||||
got, err := s.UpdateLogScanRule(r)
|
||||
if err != nil {
|
||||
t.Fatalf("update: %v", err)
|
||||
}
|
||||
if got.Pattern != "y" {
|
||||
t.Errorf("pattern not updated: %q", got.Pattern)
|
||||
}
|
||||
if got.Enabled {
|
||||
t.Error("enabled=false not applied")
|
||||
}
|
||||
}
|
||||
+126
-4
@@ -197,6 +197,34 @@ type StageEnv struct {
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// WorkloadVolume is the plugin-shape equivalent of legacy Volume: a
|
||||
// per-workload mount declaration. The Scope enum matches the existing
|
||||
// VolumeScope contract so the legacy resolver can be reused once its
|
||||
// project_id assumption is loosened.
|
||||
type WorkloadVolume struct {
|
||||
ID string `json:"id"`
|
||||
WorkloadID string `json:"workload_id"`
|
||||
Source string `json:"source"`
|
||||
Target string `json:"target"`
|
||||
Scope string `json:"scope"`
|
||||
Name string `json:"name"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// WorkloadEnv is the plugin-shape equivalent of StageEnv: per-workload
|
||||
// environment variable overrides, optionally encrypted at rest. Read by
|
||||
// the Source plugin at deploy time, merged on top of source_config.env.
|
||||
type WorkloadEnv struct {
|
||||
ID string `json:"id"`
|
||||
WorkloadID string `json:"workload_id"`
|
||||
Key string `json:"key"`
|
||||
Value string `json:"value"`
|
||||
Encrypted bool `json:"encrypted"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// VolumeScope defines the sharing scope for a volume mount.
|
||||
// Valid scopes: instance, stage, project, project_named, named, ephemeral.
|
||||
type VolumeScope string
|
||||
@@ -333,6 +361,82 @@ type EventLog struct {
|
||||
CreatedAt string `json:"created_at"`
|
||||
}
|
||||
|
||||
// EventTrigger is a filter+action rule evaluated against EventLog
|
||||
// entries published on the bus. When all non-empty filters match, the
|
||||
// trigger fires its configured action (webhook today, additional action
|
||||
// types extensible via the ActionType enum).
|
||||
//
|
||||
// Filter fields use a comma-separated list shape for multi-value
|
||||
// filters (severity, source) to keep the schema flat — empty string
|
||||
// means "no filter on this dimension." FilterMessageRegex is a single
|
||||
// regex evaluated against EventLog.Message.
|
||||
//
|
||||
// Loop-prevention: deliveries are recorded in webhook_deliveries (the
|
||||
// existing audit trail). The dispatcher MUST NOT write to event_log
|
||||
// or it will recurse.
|
||||
type EventTrigger struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
FilterSeverity string `json:"filter_severity"` // comma list: "warn,error"; "" = any
|
||||
FilterSource string `json:"filter_source"` // comma list: "logscan,deploy"; "" = any
|
||||
FilterMessageRegex string `json:"filter_message_regex"` // "" = any
|
||||
ActionType string `json:"action_type"` // "webhook" today
|
||||
ActionTarget string `json:"action_target"` // URL for webhook
|
||||
ActionSecret string `json:"action_secret"` // optional HMAC secret for signed delivery
|
||||
Enabled bool `json:"enabled"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// EventTriggerActionType enumerates the supported action_type values.
|
||||
// Adding a new action is additive — old triggers keep working, the
|
||||
// dispatcher just learns a new branch.
|
||||
const (
|
||||
EventTriggerActionWebhook = "webhook"
|
||||
)
|
||||
|
||||
// LogScanRule is one regex-based pattern the log scanner evaluates
|
||||
// against container log lines. The (workload_id, overrides_id) pair
|
||||
// implements the "global rule with optional per-workload override"
|
||||
// pattern documented in docs/LOGSCAN_AND_TRIGGERS_TODO.md:
|
||||
//
|
||||
// - WorkloadID == "" && OverridesID == 0 → global rule, applies to
|
||||
// every workload unless overridden.
|
||||
// - WorkloadID != "" && OverridesID == 0 → workload-only addition.
|
||||
// - WorkloadID != "" && OverridesID != 0 → override of the named
|
||||
// global rule for one workload (Enabled=false to disable globally
|
||||
// for this workload).
|
||||
type LogScanRule struct {
|
||||
ID int64 `json:"id"`
|
||||
WorkloadID string `json:"workload_id"` // "" = global
|
||||
OverridesID int64 `json:"overrides_id"` // 0 = not an override
|
||||
Name string `json:"name"`
|
||||
Pattern string `json:"pattern"` // regex, compiled at load
|
||||
Severity string `json:"severity"` // info|warn|error
|
||||
Streams string `json:"streams"` // all|stdout|stderr
|
||||
CooldownSeconds int `json:"cooldown_seconds"`
|
||||
Enabled bool `json:"enabled"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// Log scan stream filter values. "all" reads both streams; "stdout"
|
||||
// or "stderr" filter to one. Used both for store validation and at
|
||||
// docker-side log read time.
|
||||
const (
|
||||
LogScanStreamAll = "all"
|
||||
LogScanStreamStdout = "stdout"
|
||||
LogScanStreamStderr = "stderr"
|
||||
)
|
||||
|
||||
// Log scan severity values mirror the event_log enum so a matched
|
||||
// rule lands as an event_log row with the rule's severity verbatim.
|
||||
const (
|
||||
LogScanSeverityInfo = "info"
|
||||
LogScanSeverityWarn = "warn"
|
||||
LogScanSeverityError = "error"
|
||||
)
|
||||
|
||||
// WorkloadKind enumerates the kinds of things that own containers.
|
||||
// Each kind has a corresponding row in projects/stacks/static_sites referenced via Workload.RefID.
|
||||
type WorkloadKind string
|
||||
@@ -346,12 +450,24 @@ const (
|
||||
// Workload is the unifying primitive that abstracts Project, Stack, and StaticSite.
|
||||
// Each row is paired with exactly one project/stack/site via (Kind, RefID).
|
||||
// Notification + webhook config moves here so it lives in one place across kinds.
|
||||
//
|
||||
// SourceKind / SourceConfig / TriggerKind / TriggerConfig / PublicFaces /
|
||||
// ParentWorkloadID populate the unified plugin model from the Workload-first
|
||||
// refactor. Existing rows keep these empty until they are explicitly migrated
|
||||
// or replaced — the legacy Kind/RefID columns continue to point at
|
||||
// project/stack/site rows in parallel during the cutover.
|
||||
type Workload struct {
|
||||
ID string `json:"id"`
|
||||
Kind string `json:"kind"` // project | stack | site
|
||||
Kind string `json:"kind"` // project | stack | site (legacy discriminator)
|
||||
RefID string `json:"ref_id"`
|
||||
Name string `json:"name"`
|
||||
AppID string `json:"app_id"` // nullable; "" = unassigned
|
||||
AppID string `json:"app_id"` // nullable; "" = unassigned (a.k.a. GroupID after rename)
|
||||
SourceKind string `json:"source_kind"` // "" until plugin-mode populated
|
||||
SourceConfig string `json:"source_config"` // JSON-encoded, decoded by the matching Source
|
||||
TriggerKind string `json:"trigger_kind"`
|
||||
TriggerConfig string `json:"trigger_config"` // JSON-encoded, decoded by the matching Trigger
|
||||
PublicFaces string `json:"public_faces"` // JSON-encoded []PublicFace
|
||||
ParentWorkloadID string `json:"parent_workload_id"` // "" = root; non-empty = stage chain
|
||||
NotificationURL string `json:"notification_url"`
|
||||
NotificationSecret string `json:"-"` // never serialized
|
||||
WebhookSecret string `json:"-"` // URL-identifier secret; never serialized
|
||||
@@ -384,8 +500,14 @@ type Container struct {
|
||||
ProxyRouteID string `json:"proxy_route_id"`
|
||||
NpmProxyID int `json:"npm_proxy_id"`
|
||||
LastSeenAt string `json:"last_seen_at"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
// ExtraJSON carries source-specific metadata that isn't promoted to a
|
||||
// first-class column — currently per-face proxy route IDs for
|
||||
// multi-face image deploys. Stored as a JSON object; '{}' on empty
|
||||
// rows. Sources own the shape; consumers should tolerate unknown
|
||||
// keys.
|
||||
ExtraJSON string `json:"extra_json"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// App is an optional grouping of workloads (e.g., "my-saas" = web project + worker stack + redis stack).
|
||||
|
||||
@@ -181,6 +181,15 @@ func (s *Store) runMigrations() error {
|
||||
// re-write path; the LEFT JOIN in ListContainersByStageID falls back
|
||||
// to (project_id, role=stage_name) so legacy rows still resolve.
|
||||
`ALTER TABLE containers ADD COLUMN stage_id TEXT NOT NULL DEFAULT ''`,
|
||||
// Workload-first refactor columns (2026-05-10). Land additively so
|
||||
// the legacy kind/ref_id columns continue to serve existing
|
||||
// project/stack/site rows during cutover.
|
||||
`ALTER TABLE workloads ADD COLUMN source_kind TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE workloads ADD COLUMN source_config TEXT NOT NULL DEFAULT '{}'`,
|
||||
`ALTER TABLE workloads ADD COLUMN trigger_kind TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE workloads ADD COLUMN trigger_config TEXT NOT NULL DEFAULT '{}'`,
|
||||
`ALTER TABLE workloads ADD COLUMN public_faces TEXT NOT NULL DEFAULT '[]'`,
|
||||
`ALTER TABLE workloads ADD COLUMN parent_workload_id TEXT NOT NULL DEFAULT ''`,
|
||||
}
|
||||
|
||||
// Workload refactor tables (2026-05-09). Workload is the unifying primitive
|
||||
@@ -195,6 +204,12 @@ func (s *Store) runMigrations() error {
|
||||
ref_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
app_id TEXT NOT NULL DEFAULT '',
|
||||
source_kind TEXT NOT NULL DEFAULT '',
|
||||
source_config TEXT NOT NULL DEFAULT '{}',
|
||||
trigger_kind TEXT NOT NULL DEFAULT '',
|
||||
trigger_config TEXT NOT NULL DEFAULT '{}',
|
||||
public_faces TEXT NOT NULL DEFAULT '[]',
|
||||
parent_workload_id TEXT NOT NULL DEFAULT '',
|
||||
notification_url TEXT NOT NULL DEFAULT '',
|
||||
notification_secret TEXT NOT NULL DEFAULT '',
|
||||
webhook_secret TEXT NOT NULL DEFAULT '',
|
||||
@@ -231,6 +246,34 @@ func (s *Store) runMigrations() error {
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// workload_env: per-workload env overrides (encrypt-at-rest for
|
||||
// secrets). Functional analog of stage_env. Workload deletion
|
||||
// cascades through the FK so orphan rows are impossible.
|
||||
`CREATE TABLE IF NOT EXISTS workload_env (
|
||||
id TEXT PRIMARY KEY,
|
||||
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||
key TEXT NOT NULL,
|
||||
value TEXT NOT NULL DEFAULT '',
|
||||
encrypted INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(workload_id, key)
|
||||
)`,
|
||||
// workload_volumes: per-workload mount declarations. Mirrors the
|
||||
// legacy `volumes` table shape (source / target / scope / name)
|
||||
// but keyed on workload_id. UNIQUE on (workload_id, target) so a
|
||||
// re-add overwrites instead of duplicating.
|
||||
`CREATE TABLE IF NOT EXISTS workload_volumes (
|
||||
id TEXT PRIMARY KEY,
|
||||
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
target TEXT NOT NULL,
|
||||
scope TEXT NOT NULL DEFAULT 'absolute',
|
||||
name TEXT NOT NULL DEFAULT '',
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(workload_id, target)
|
||||
)`,
|
||||
}
|
||||
for _, t := range workloadTables {
|
||||
if _, err := s.db.Exec(t); err != nil {
|
||||
@@ -312,6 +355,49 @@ func (s *Store) runMigrations() error {
|
||||
}
|
||||
}
|
||||
|
||||
// Observability: event_triggers — consume EventLog entries off the
|
||||
// bus and dispatch webhook actions. Schema kept flat (comma-list
|
||||
// filters, single optional regex) — see LOGSCAN_AND_TRIGGERS_TODO.md.
|
||||
observabilityTables := []string{
|
||||
`CREATE TABLE IF NOT EXISTS event_triggers (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
filter_severity TEXT NOT NULL DEFAULT '',
|
||||
filter_source TEXT NOT NULL DEFAULT '',
|
||||
filter_message_regex TEXT NOT NULL DEFAULT '',
|
||||
action_type TEXT NOT NULL DEFAULT 'webhook',
|
||||
action_target TEXT NOT NULL DEFAULT '',
|
||||
action_secret TEXT NOT NULL DEFAULT '',
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// log_scan_rules: regex patterns the log-scanner manager
|
||||
// applies to container log lines. WorkloadID is nullable (via
|
||||
// "" sentinel) so a global rule can have OverridesID = 0 and
|
||||
// per-workload overrides reference the global's id.
|
||||
`CREATE TABLE IF NOT EXISTS log_scan_rules (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
workload_id TEXT NOT NULL DEFAULT '',
|
||||
overrides_id INTEGER NOT NULL DEFAULT 0,
|
||||
name TEXT NOT NULL,
|
||||
pattern TEXT NOT NULL,
|
||||
severity TEXT NOT NULL DEFAULT 'warn',
|
||||
streams TEXT NOT NULL DEFAULT 'all',
|
||||
cooldown_seconds INTEGER NOT NULL DEFAULT 60,
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_workload ON log_scan_rules(workload_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_overrides ON log_scan_rules(overrides_id)`,
|
||||
}
|
||||
for _, t := range observabilityTables {
|
||||
if _, err := s.db.Exec(t); err != nil {
|
||||
return fmt.Errorf("create observability table: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range migrations {
|
||||
if _, err := s.db.Exec(m); err != nil {
|
||||
// "duplicate column" / "already exists" are expected when a
|
||||
@@ -366,6 +452,8 @@ func (s *Store) runMigrations() error {
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_container_id ON containers(container_id) WHERE container_id != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_kind ON containers(workload_kind)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_containers_stage_id ON containers(stage_id) WHERE stage_id != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_workload_env_workload ON workload_env(workload_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_workload_volumes_workload ON workload_volumes(workload_id)`,
|
||||
}
|
||||
for _, idx := range indexes {
|
||||
if _, err := s.db.Exec(idx); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user