feat(observability): event triggers + log scanner backend

Two paired backends sharing the events.Bus seam:

Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
  redaction on read (placeholder echo treated as "no change" on
  PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
  filters (severity CSV, source CSV, message regex with memoized
  compile cache). Structural loop-prevention: never writes to
  event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
  TierEventTrigger constant, doSendRaw shared with the legacy
  Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
  sending the real TriggerWebhookPayload shape. SSRF guard
  rejects loopback / link-local / unspecified targets. PATCH
  uses pointer-typed DTO for partial updates.

Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
  per-container token bucket, atomic drop counters), tail
  (multiplexed docker frame demuxer with TTY fallback + 16 MiB
  payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
  timestamp strip + UTF-8-safe message truncation), manager
  (5s container polling, atomic.Pointer[Snapshot] hot-reload,
  HitEmitter writes event_log + publishes EventLog so the
  trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
  stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
  EffectiveLogScanRules resolver (globals minus per-workload
  overrides plus workload-only additions). Transactional
  cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
  (sample_line → matched/captures) + /stats (drop counters +
  active tail count + last-snapshot compile errors) +
  GET /api/workloads/{id}/effective-rules.

cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-11 22:18:11 +03:00
parent 82d32181ba
commit 7a9ff7ad54
23 changed files with 3974 additions and 19 deletions
+325
View File
@@ -0,0 +1,325 @@
// Package api: event-trigger HTTP handlers. The dispatcher itself
// lives in internal/events; this file is the REST surface that lets
// operators create, edit, and test triggers from the UI.
package api
import (
"context"
"errors"
"net"
"net/http"
"net/url"
"regexp"
"strconv"
"time"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/notify"
"github.com/alexei/tinyforge/internal/store"
)
// triggerInput is the JSON shape accepted by POST + PATCH. Pointers
// distinguish "absent" from a zero/empty value so PATCH can leave a
// field unchanged. Required fields on POST are validated explicitly.
type triggerInput struct {
Name *string `json:"name"`
FilterSeverity *string `json:"filter_severity"`
FilterSource *string `json:"filter_source"`
FilterMessageRegex *string `json:"filter_message_regex"`
ActionType *string `json:"action_type"`
ActionTarget *string `json:"action_target"`
ActionSecret *string `json:"action_secret"` // omit = leave unchanged; "" = clear
Enabled *bool `json:"enabled"`
}
// actionSecretPlaceholder is what we return on read to signal "a secret
// is configured" without exposing the actual value. The edit page
// preserves this placeholder verbatim (or replaces it with a new value)
// — the API treats the placeholder as "no change" on PATCH. This is
// the same shape Stripe / GitHub use for their secret read APIs.
const actionSecretPlaceholder = "********"
// listEventTriggers handles GET /api/event-triggers. Secrets are
// redacted to avoid exposing them on read; the edit page shows a
// "configured" indicator when a placeholder is present.
func (s *Server) listEventTriggers(w http.ResponseWriter, r *http.Request) {
out, err := s.store.ListEventTriggers()
if err != nil {
respondError(w, http.StatusInternalServerError, "list event triggers")
return
}
for i := range out {
out[i] = redactTriggerSecret(out[i])
}
respondJSON(w, http.StatusOK, out)
}
// getEventTrigger handles GET /api/event-triggers/{id}.
func (s *Server) getEventTrigger(w http.ResponseWriter, r *http.Request) {
id, ok := parseTriggerID(w, r)
if !ok {
return
}
t, err := s.store.GetEventTrigger(id)
if err != nil {
mapStoreError(w, err, "event trigger")
return
}
respondJSON(w, http.StatusOK, redactTriggerSecret(t))
}
// createEventTrigger handles POST /api/event-triggers.
func (s *Server) createEventTrigger(w http.ResponseWriter, r *http.Request) {
var in triggerInput
if !decodeJSON(w, r, &in) {
return
}
t := store.EventTrigger{
Name: derefString(in.Name),
FilterSeverity: derefString(in.FilterSeverity),
FilterSource: derefString(in.FilterSource),
FilterMessageRegex: derefString(in.FilterMessageRegex),
ActionType: firstNonEmpty(derefString(in.ActionType), store.EventTriggerActionWebhook),
ActionTarget: derefString(in.ActionTarget),
ActionSecret: derefString(in.ActionSecret),
Enabled: in.Enabled == nil || *in.Enabled,
}
if msg := validateTrigger(t); msg != "" {
respondError(w, http.StatusBadRequest, msg)
return
}
out, err := s.store.CreateEventTrigger(t)
if err != nil {
// CreateEventTrigger returns validation-shaped errors plus
// raw DB errors. Validation already ran above, so anything
// here is a server-side problem — surface as 500 and avoid
// echoing driver text to the client.
respondError(w, http.StatusInternalServerError, "create event trigger")
return
}
respondJSON(w, http.StatusCreated, redactTriggerSecret(out))
}
// updateEventTrigger handles PATCH /api/event-triggers/{id}. Each
// field on the input is optional (pointer); absent fields are left
// unchanged. ActionSecret receives special treatment so the read-side
// placeholder round-trips safely.
func (s *Server) updateEventTrigger(w http.ResponseWriter, r *http.Request) {
id, ok := parseTriggerID(w, r)
if !ok {
return
}
existing, err := s.store.GetEventTrigger(id)
if err != nil {
mapStoreError(w, err, "event trigger")
return
}
var in triggerInput
if !decodeJSON(w, r, &in) {
return
}
if in.Name != nil {
existing.Name = *in.Name
}
if in.FilterSeverity != nil {
existing.FilterSeverity = *in.FilterSeverity
}
if in.FilterSource != nil {
existing.FilterSource = *in.FilterSource
}
if in.FilterMessageRegex != nil {
existing.FilterMessageRegex = *in.FilterMessageRegex
}
if in.ActionType != nil && *in.ActionType != "" {
existing.ActionType = *in.ActionType
}
if in.ActionTarget != nil {
existing.ActionTarget = *in.ActionTarget
}
// Secret round-trip: the read API returns a placeholder when a
// secret is configured. If the client echoes the placeholder back
// unchanged we leave the stored secret alone; any other value
// (including the empty string) is treated as a deliberate update.
if in.ActionSecret != nil && *in.ActionSecret != actionSecretPlaceholder {
existing.ActionSecret = *in.ActionSecret
}
if in.Enabled != nil {
existing.Enabled = *in.Enabled
}
if msg := validateTrigger(existing); msg != "" {
respondError(w, http.StatusBadRequest, msg)
return
}
out, err := s.store.UpdateEventTrigger(existing)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "event trigger")
return
}
respondError(w, http.StatusInternalServerError, "update event trigger")
return
}
respondJSON(w, http.StatusOK, redactTriggerSecret(out))
}
// deleteEventTrigger handles DELETE /api/event-triggers/{id}.
func (s *Server) deleteEventTrigger(w http.ResponseWriter, r *http.Request) {
id, ok := parseTriggerID(w, r)
if !ok {
return
}
if err := s.store.DeleteEventTrigger(id); err != nil {
mapStoreError(w, err, "event trigger")
return
}
w.WriteHeader(http.StatusNoContent)
}
// testEventTrigger handles POST /api/event-triggers/{id}/test. Sends
// a real TriggerWebhookPayload to the action target so receivers see
// the same shape they'll see at runtime. Routes through the dedicated
// SendSyncForTestPayload path that preserves the payload through the
// HMAC+HTTP core unchanged.
func (s *Server) testEventTrigger(w http.ResponseWriter, r *http.Request) {
id, ok := parseTriggerID(w, r)
if !ok {
return
}
t, err := s.store.GetEventTrigger(id)
if err != nil {
mapStoreError(w, err, "event trigger")
return
}
if t.ActionType != store.EventTriggerActionWebhook {
respondError(w, http.StatusBadRequest, "action_type not testable")
return
}
now := time.Now().UTC().Format(time.RFC3339)
payload := events.TriggerWebhookPayload{
Type: "event_trigger",
TriggerID: t.ID,
Trigger: t.Name,
Event: events.EventLogPayload{
ID: -1,
Source: "test",
Severity: "info",
Message: "Test event from Tinyforge — trigger=" + t.Name,
Metadata: `{"synthetic":true}`,
CreatedAt: now,
},
Timestamp: now,
}
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
defer cancel()
result := s.notifier.SendSyncForTestPayload(ctx, t.ActionTarget, t.ActionSecret,
notify.TierEventTrigger, "event_trigger", payload)
respondJSON(w, http.StatusOK, result)
}
// validateTrigger runs the full set of invariants over a fully-merged
// trigger row. Called by both create and update so the contract is
// enforced once. Returns an empty string for a valid trigger.
func validateTrigger(t store.EventTrigger) string {
if t.Name == "" {
return "name is required"
}
if t.ActionType != "" && t.ActionType != store.EventTriggerActionWebhook {
return "action_type must be 'webhook'"
}
if t.ActionTarget == "" {
return "action_target is required"
}
if msg := validateWebhookURL(t.ActionTarget); msg != "" {
return msg
}
if t.FilterMessageRegex != "" {
if _, err := regexp.Compile(t.FilterMessageRegex); err != nil {
return "filter_message_regex invalid: " + err.Error()
}
}
return ""
}
// validateWebhookURL guards against the most common SSRF vectors that
// admin-controlled webhook URLs enable: non-http(s) schemes, missing
// host, and internal-network targets (loopback / link-local / RFC1918
// when the hostname resolves to a literal). Hostname-based lookups
// are NOT resolved here — DNS rebinding is out of scope and would
// require enforcement at dispatch time too. Admin gating remains the
// primary control; this is defense-in-depth.
func validateWebhookURL(raw string) string {
u, err := url.Parse(raw)
if err != nil {
return "action_target invalid URL: " + err.Error()
}
if u.Scheme != "http" && u.Scheme != "https" {
return "action_target must be http:// or https://"
}
host := u.Hostname()
if host == "" {
return "action_target missing host"
}
// Literal-IP guard: block loopback / link-local / unspecified
// addresses outright. RFC1918 private ranges are intentionally
// allowed since same-LAN receivers are a legitimate Tinyforge
// deployment pattern.
if ip := net.ParseIP(host); ip != nil {
if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsUnspecified() {
return "action_target points at a reserved/loopback address"
}
}
return ""
}
// redactTriggerSecret returns a copy of t with ActionSecret replaced
// by the placeholder string when a secret is configured. Empty secret
// stays empty so the UI can distinguish "no signing" from "signing
// configured."
func redactTriggerSecret(t store.EventTrigger) store.EventTrigger {
if t.ActionSecret != "" {
t.ActionSecret = actionSecretPlaceholder
}
return t
}
// mapStoreError translates a store-layer error into an HTTP status +
// generic message. ErrNotFound → 404; everything else → 500 without
// echoing driver text to the client (avoids leaking schema details
// or transient error states to API consumers).
func mapStoreError(w http.ResponseWriter, err error, resource string) {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, resource)
return
}
respondError(w, http.StatusInternalServerError, "get "+resource)
}
func parseTriggerID(w http.ResponseWriter, r *http.Request) (int64, bool) {
raw := chi.URLParam(r, "id")
id, err := strconv.ParseInt(raw, 10, 64)
if err != nil || id <= 0 {
respondError(w, http.StatusBadRequest, "invalid event trigger id")
return 0, false
}
return id, true
}
func derefString(p *string) string {
if p == nil {
return ""
}
return *p
}
func firstNonEmpty(a, b string) string {
if a != "" {
return a
}
return b
}
+143
View File
@@ -0,0 +1,143 @@
package api
import (
"strings"
"testing"
"github.com/alexei/tinyforge/internal/store"
)
func TestValidateWebhookURL(t *testing.T) {
cases := []struct {
name string
url string
wantErr string // substring; empty = pass
}{
{"https valid", "https://example.com/hook", ""},
{"http valid", "http://example.com:8080/hook", ""},
{"RFC1918 private LAN allowed", "http://192.168.1.50:9090/hook", ""},
{"loopback rejected", "http://127.0.0.1:8090/hook", "loopback"},
{"ipv6 loopback rejected", "http://[::1]:9000/hook", "loopback"},
{"link-local rejected", "http://169.254.169.254/latest/meta-data", "reserved"},
{"unspecified rejected", "http://0.0.0.0:9000/hook", "reserved"},
{"file scheme rejected", "file:///etc/passwd", "http:// or https://"},
{"missing host rejected", "https://", "missing host"},
{"malformed url rejected", "://nope", "invalid URL"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := validateWebhookURL(c.url)
if c.wantErr == "" {
if got != "" {
t.Fatalf("expected pass, got error: %q", got)
}
return
}
if !strings.Contains(got, c.wantErr) {
t.Fatalf("error mismatch:\n got: %q\n want substring: %q", got, c.wantErr)
}
})
}
}
func TestValidateTrigger(t *testing.T) {
cases := []struct {
name string
in store.EventTrigger
want string // substring of error; empty = pass
}{
{
name: "missing name",
in: store.EventTrigger{ActionTarget: "https://x.example.com/h"},
want: "name is required",
},
{
name: "missing target",
in: store.EventTrigger{Name: "n"},
want: "action_target is required",
},
{
name: "bad scheme",
in: store.EventTrigger{Name: "n", ActionTarget: "ftp://x.example.com/h"},
want: "http:// or https://",
},
{
name: "loopback target",
in: store.EventTrigger{Name: "n", ActionTarget: "http://127.0.0.1/hook"},
want: "loopback",
},
{
name: "unsupported action_type",
in: store.EventTrigger{Name: "n", ActionType: "email", ActionTarget: "https://x.example.com/h"},
want: "action_type must be",
},
{
name: "invalid regex",
in: store.EventTrigger{
Name: "n", ActionTarget: "https://x.example.com/h",
FilterMessageRegex: "([unclosed",
},
want: "filter_message_regex invalid",
},
{
name: "all valid",
in: store.EventTrigger{
Name: "n",
ActionTarget: "https://x.example.com/h",
FilterSeverity: "warn,error",
FilterMessageRegex: `\bpanic\b`,
},
want: "",
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := validateTrigger(c.in)
if c.want == "" {
if got != "" {
t.Fatalf("expected pass, got error: %q", got)
}
return
}
if !strings.Contains(got, c.want) {
t.Fatalf("error mismatch:\n got: %q\n want substring: %q", got, c.want)
}
})
}
}
func TestRedactTriggerSecret(t *testing.T) {
withSecret := store.EventTrigger{Name: "n", ActionSecret: "shh-real-secret"}
got := redactTriggerSecret(withSecret)
if got.ActionSecret != actionSecretPlaceholder {
t.Errorf("expected placeholder, got %q", got.ActionSecret)
}
if withSecret.ActionSecret != "shh-real-secret" {
t.Errorf("original mutated: %q", withSecret.ActionSecret)
}
noSecret := store.EventTrigger{Name: "n", ActionSecret: ""}
got2 := redactTriggerSecret(noSecret)
if got2.ActionSecret != "" {
t.Errorf("empty secret should stay empty, got %q", got2.ActionSecret)
}
}
func TestDerefString(t *testing.T) {
if derefString(nil) != "" {
t.Error("nil should deref to empty string")
}
s := "value"
if derefString(&s) != "value" {
t.Error("non-nil should deref to value")
}
}
func TestFirstNonEmpty(t *testing.T) {
if firstNonEmpty("a", "b") != "a" {
t.Error("non-empty first wins")
}
if firstNonEmpty("", "b") != "b" {
t.Error("fallback when first empty")
}
}
+350
View File
@@ -0,0 +1,350 @@
// Package api: log-scan rule HTTP handlers. The scanner manager
// lives in internal/logscanner; this file is the REST surface that
// lets operators create, edit, and test rules from the UI.
package api
import (
"errors"
"log/slog"
"net/http"
"regexp"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/logscanner"
"github.com/alexei/tinyforge/internal/store"
)
// LogScanReloader is what the API calls after any rule CRUD so the
// scanner manager swaps its snapshot, and what the /stats endpoint
// queries for runtime counters. Implemented by *logscanner.Manager;
// nil-tolerant on the API side so the routes still work in a
// scanner-disabled deployment.
type LogScanReloader interface {
ReloadRules() error
Stats() logscanner.Stats
}
// SetLogScanReloader wires the API → manager reload signal. Called
// from main after both subsystems are constructed.
func (s *Server) SetLogScanReloader(r LogScanReloader) {
s.logScanReloader = r
}
// ruleInput is the JSON shape accepted by POST + PATCH. Pointers
// distinguish "absent" from explicit empty/zero. WorkloadID and
// OverridesID are immutable on update (per store.UpdateLogScanRule)
// so they only appear here for create.
type ruleInput struct {
WorkloadID *string `json:"workload_id"`
OverridesID *int64 `json:"overrides_id"`
Name *string `json:"name"`
Pattern *string `json:"pattern"`
Severity *string `json:"severity"`
Streams *string `json:"streams"`
CooldownSeconds *int `json:"cooldown_seconds"`
Enabled *bool `json:"enabled"`
}
// listLogScanRules handles GET /api/log-scan-rules. Optional query
// filter `workload_id=...` returns only rules scoped to that
// workload (workload-only + override rows, NOT globals).
func (s *Server) listLogScanRules(w http.ResponseWriter, r *http.Request) {
if wlID := r.URL.Query().Get("workload_id"); wlID != "" {
out, err := s.store.ListLogScanRulesByWorkload(wlID)
if err != nil {
respondError(w, http.StatusInternalServerError, "list log scan rules")
return
}
respondJSON(w, http.StatusOK, out)
return
}
out, err := s.store.ListLogScanRules()
if err != nil {
respondError(w, http.StatusInternalServerError, "list log scan rules")
return
}
respondJSON(w, http.StatusOK, out)
}
// getLogScanRule handles GET /api/log-scan-rules/{id}.
func (s *Server) getLogScanRule(w http.ResponseWriter, r *http.Request) {
id, ok := parseRuleID(w, r)
if !ok {
return
}
rule, err := s.store.GetLogScanRule(id)
if err != nil {
mapStoreError(w, err, "log scan rule")
return
}
respondJSON(w, http.StatusOK, rule)
}
// createLogScanRule handles POST /api/log-scan-rules.
func (s *Server) createLogScanRule(w http.ResponseWriter, r *http.Request) {
var in ruleInput
if !decodeJSON(w, r, &in) {
return
}
rule := store.LogScanRule{
WorkloadID: derefString(in.WorkloadID),
OverridesID: derefInt64(in.OverridesID),
Name: derefString(in.Name),
Pattern: derefString(in.Pattern),
Severity: firstNonEmpty(derefString(in.Severity), store.LogScanSeverityWarn),
Streams: firstNonEmpty(derefString(in.Streams), store.LogScanStreamAll),
CooldownSeconds: derefIntDefault(in.CooldownSeconds, 60),
Enabled: in.Enabled == nil || *in.Enabled,
}
if msg := validateRulePattern(rule.Pattern); msg != "" {
respondError(w, http.StatusBadRequest, msg)
return
}
out, err := s.store.CreateLogScanRule(rule)
if err != nil {
// Store-side validation errors map to 400; anything else
// (driver errors) is a 500 without leaking the raw text.
if isClientValidationErr(err) {
respondError(w, http.StatusBadRequest, err.Error())
return
}
respondError(w, http.StatusInternalServerError, "create log scan rule")
return
}
s.reloadLogScan()
respondJSON(w, http.StatusCreated, out)
}
// updateLogScanRule handles PATCH /api/log-scan-rules/{id}. Scope
// fields (workload_id, overrides_id) are immutable; pattern/severity/
// streams/cooldown/enabled/name are individually overridable.
func (s *Server) updateLogScanRule(w http.ResponseWriter, r *http.Request) {
id, ok := parseRuleID(w, r)
if !ok {
return
}
existing, err := s.store.GetLogScanRule(id)
if err != nil {
mapStoreError(w, err, "log scan rule")
return
}
var in ruleInput
if !decodeJSON(w, r, &in) {
return
}
if in.Name != nil {
existing.Name = *in.Name
}
if in.Pattern != nil {
existing.Pattern = *in.Pattern
}
if in.Severity != nil && *in.Severity != "" {
existing.Severity = *in.Severity
}
if in.Streams != nil && *in.Streams != "" {
existing.Streams = *in.Streams
}
if in.CooldownSeconds != nil {
existing.CooldownSeconds = *in.CooldownSeconds
}
if in.Enabled != nil {
existing.Enabled = *in.Enabled
}
if msg := validateRulePattern(existing.Pattern); msg != "" {
respondError(w, http.StatusBadRequest, msg)
return
}
out, err := s.store.UpdateLogScanRule(existing)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "log scan rule")
return
}
if isClientValidationErr(err) {
respondError(w, http.StatusBadRequest, err.Error())
return
}
respondError(w, http.StatusInternalServerError, "update log scan rule")
return
}
s.reloadLogScan()
respondJSON(w, http.StatusOK, out)
}
// deleteLogScanRule handles DELETE /api/log-scan-rules/{id}. Override
// rows that reference this id are cascade-deleted by the store layer.
func (s *Server) deleteLogScanRule(w http.ResponseWriter, r *http.Request) {
id, ok := parseRuleID(w, r)
if !ok {
return
}
if err := s.store.DeleteLogScanRule(id); err != nil {
mapStoreError(w, err, "log scan rule")
return
}
s.reloadLogScan()
w.WriteHeader(http.StatusNoContent)
}
// testLogScanRule handles POST /api/log-scan-rules/{id}/test. Body
// `{"sample_line": "..."}` returns whether the rule pattern matches +
// any captured subgroups. Lets operators iterate on regexes in the
// UI without spinning up real container traffic.
func (s *Server) testLogScanRule(w http.ResponseWriter, r *http.Request) {
id, ok := parseRuleID(w, r)
if !ok {
return
}
rule, err := s.store.GetLogScanRule(id)
if err != nil {
mapStoreError(w, err, "log scan rule")
return
}
var body struct {
SampleLine string `json:"sample_line"`
}
if !decodeJSON(w, r, &body) {
return
}
respondJSON(w, http.StatusOK, testRuleAgainstLine(rule, body.SampleLine))
}
// getEffectiveLogScanRules handles GET /api/workloads/{id}/effective-rules.
// Returns the resolved effective rule set (globals minus overrides +
// workload-only + override-substitutes) that the scanner uses for
// this workload's containers.
func (s *Server) getEffectiveLogScanRules(w http.ResponseWriter, r *http.Request) {
workloadID := chi.URLParam(r, "id")
if workloadID == "" {
respondError(w, http.StatusBadRequest, "workload id required")
return
}
rules, err := s.store.EffectiveLogScanRules(workloadID)
if err != nil {
respondError(w, http.StatusInternalServerError, "compute effective rules")
return
}
respondJSON(w, http.StatusOK, rules)
}
// testResult is the shape returned by /test. Keeping it focused —
// caller wants a yes/no + captures so they can iterate, nothing more.
type ruleTestResult struct {
Matched bool `json:"matched"`
Captures map[string]string `json:"captures,omitempty"`
Error string `json:"error,omitempty"`
}
func testRuleAgainstLine(rule store.LogScanRule, line string) ruleTestResult {
re, err := regexp.Compile(rule.Pattern)
if err != nil {
return ruleTestResult{Error: "rule pattern is invalid: " + err.Error()}
}
subs := re.FindStringSubmatch(line)
if subs == nil {
return ruleTestResult{Matched: false}
}
captures := map[string]string{}
names := re.SubexpNames()
for i, s := range subs[1:] {
key := names[i+1]
if key == "" {
key = "$" + strconv.Itoa(i+1)
}
captures[key] = s
}
return ruleTestResult{Matched: true, Captures: captures}
}
func validateRulePattern(pattern string) string {
if strings.TrimSpace(pattern) == "" {
return "pattern is required"
}
if _, err := regexp.Compile(pattern); err != nil {
return "pattern invalid: " + err.Error()
}
return ""
}
// isClientValidationErr returns true when the store error is one of
// the validation errors raised by CreateLogScanRule /
// UpdateLogScanRule (name/pattern required, invalid enum, negative
// cooldown). Used to map those to 400 rather than 500 without
// exposing driver text.
func isClientValidationErr(err error) bool {
if err == nil {
return false
}
msg := err.Error()
for _, needle := range []string{
"name is required",
"pattern is required",
"invalid severity",
"invalid streams",
"cooldown_seconds must be",
"override row requires workload_id",
} {
if strings.Contains(msg, needle) {
return true
}
}
return false
}
func parseRuleID(w http.ResponseWriter, r *http.Request) (int64, bool) {
raw := chi.URLParam(r, "id")
id, err := strconv.ParseInt(raw, 10, 64)
if err != nil || id <= 0 {
respondError(w, http.StatusBadRequest, "invalid rule id")
return 0, false
}
return id, true
}
func derefInt64(p *int64) int64 {
if p == nil {
return 0
}
return *p
}
func derefIntDefault(p *int, def int) int {
if p == nil {
return def
}
return *p
}
// getLogScanStats handles GET /api/log-scan-rules/stats. Returns
// engine drop counters + last-snapshot compile errors + active
// tail count so operators can see when their patterns are too
// greedy or syntactically broken. When the scanner manager is not
// wired (scanner-disabled deployment), returns a zero-valued
// shape rather than 404 so the frontend can render the panel
// uniformly.
func (s *Server) getLogScanStats(w http.ResponseWriter, r *http.Request) {
if s.logScanReloader == nil {
respondJSON(w, http.StatusOK, logscanner.Stats{})
return
}
respondJSON(w, http.StatusOK, s.logScanReloader.Stats())
}
// reloadLogScan fires the manager's snapshot rebuild. Nil-tolerant
// so the API can run before the manager is wired (and in
// scanner-disabled deployments). Failures are logged at warn —
// we don't fail the originating CRUD request because that already
// succeeded, but operators need a signal so they don't chase a
// "why isn't my rule firing?" mystery.
func (s *Server) reloadLogScan() {
if s.logScanReloader == nil {
return
}
if err := s.logScanReloader.ReloadRules(); err != nil {
slog.Warn("log-scan reload failed; manager snapshot may be stale",
"error", err)
}
}
+84 -5
View File
@@ -50,6 +50,7 @@ type Server struct {
stackManager *stack.Manager
backupEngine *backup.Engine
sseGate *sseGate
logScanReloader LogScanReloader
dbPath string
shutdownFunc func() // called after restore to trigger graceful shutdown
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
@@ -217,13 +218,26 @@ func (s *Server) Router() chi.Router {
r.Group(func(r chi.Router) {
r.Use(auth.Middleware(s.localAuth))
// Plugin registry inspection + unified ingress (Workload refactor).
// /hooks/kinds is informational and visible to any authenticated
// caller. /hooks/generic dispatches deploys and is admin-gated —
// vendor-specific webhooks (with their own per-target HMAC
// secrets) live under /webhook/* and remain the only ingress
// reachable by external CI systems until Phase 5 consolidates them.
r.Get("/hooks/kinds", s.listHookKinds)
r.Get("/hooks/kinds/{kind}/schema", s.getHookKindSchema)
r.With(auth.AdminOnly).Post("/hooks/generic", s.dispatchGeneric)
// Read-only endpoints (any authenticated user).
r.Get("/health", s.getHealth)
r.Get("/auth/me", s.currentUser)
r.Post("/auth/logout", s.logout)
r.Get("/proxies", s.listProxyRoutes)
r.Get("/docker/unused-images", s.unusedImageStats)
r.Get("/projects", s.listProjects)
// Legacy project/stage/site/stack endpoints carry a Deprecation
// header pointing at /api/workloads. Functional behavior is
// unchanged until the hard cutover removes them.
r.With(deprecated("/api/workloads")).Get("/projects", s.listProjects)
r.Route("/projects/{id}", func(r chi.Router) {
r.Get("/", s.getProject)
r.Get("/stages/{stage}/env", s.listStageEnv)
@@ -290,7 +304,7 @@ func (s *Server) Router() chi.Router {
})
})
// Stacks (docker-compose).
r.Get("/stacks", s.listStacks)
r.With(deprecated("/api/workloads?kind=plugin&source_kind=compose")).Get("/stacks", s.listStacks)
r.Route("/stacks/{id}", func(r chi.Router) {
r.Get("/", s.getStack)
r.Get("/revisions", s.listStackRevisions)
@@ -311,7 +325,7 @@ func (s *Server) Router() chi.Router {
r.With(auth.AdminOnly).Post("/stacks", s.createStack)
// Static sites.
r.Get("/sites", s.listStaticSites)
r.With(deprecated("/api/workloads?kind=plugin&source_kind=static")).Get("/sites", s.listStaticSites)
r.Route("/sites/{id}", func(r chi.Router) {
r.Get("/", s.getStaticSite)
r.Get("/secrets", s.listStaticSiteSecrets)
@@ -375,13 +389,47 @@ func (s *Server) Router() chi.Router {
r.Get("/containers/stale", s.listStaleContainers)
// Workload-shaped endpoints (the unifying layer over project /
// stack / site). Read-only; mutations still go through the
// kind-specific endpoints (POST /projects, PUT /stacks/{id}, …).
// stack / site). Read endpoints are open to any authenticated
// user; create / update / deploy mutate state and are admin-gated.
// Plugin-native workloads (source_kind + trigger_kind set) are
// created here; legacy project / stack / site mutations remain at
// their dedicated endpoints during the cutover.
r.Get("/workloads", s.listWorkloads)
r.With(auth.AdminOnly).Post("/workloads", s.createPluginWorkload)
r.Route("/workloads/{id}", func(r chi.Router) {
r.Get("/", s.getWorkload)
r.Get("/containers", s.listWorkloadContainers)
r.Get("/containers/{cid}/logs", s.streamWorkloadContainerLogs)
r.With(auth.AdminOnly).Patch("/app", s.updateWorkloadAppID)
r.With(auth.AdminOnly).Put("/plugin", s.updatePluginWorkload)
r.With(auth.AdminOnly).Post("/deploy", s.deployPluginWorkload)
r.With(auth.AdminOnly).Delete("/", s.deletePluginWorkload)
// Per-workload env vars (analog of legacy stage_env).
// Listing is open to authenticated readers; mutations are
// admin-gated. Encrypted values are write-only after store.
r.Get("/env", s.listWorkloadEnv)
r.With(auth.AdminOnly).Put("/env", s.setWorkloadEnv)
r.With(auth.AdminOnly).Delete("/env/{envID}", s.deleteWorkloadEnv)
// Per-workload inbound webhook URL: rotate the secret + fetch
// the canonical URL. Mirrors the project / site webhook UX.
r.With(auth.AdminOnly).Get("/webhook", s.getWorkloadWebhook)
r.With(auth.AdminOnly).Post("/webhook/regenerate", s.regenerateWorkloadWebhook)
// Per-workload volume mounts (analog of legacy project volumes).
// Reads are open to authenticated users; mutations admin-gated.
// Source/target paths are validated for traversal safety here;
// host-path allow-listing happens at deploy time.
r.Get("/volumes", s.listWorkloadVolumes)
r.With(auth.AdminOnly).Put("/volumes", s.setWorkloadVolume)
r.With(auth.AdminOnly).Delete("/volumes/{volID}", s.deleteWorkloadVolume)
// Stages chain: parent + self + direct children, plus a
// promote-from action that copies the source workload's
// running image tag onto this workload's default_tag.
r.Get("/chain", s.getWorkloadChain)
r.With(auth.AdminOnly).Post("/promote-from/{sourceID}", s.promoteFromWorkload)
})
// Global container index, joined to workload + app names.
@@ -398,6 +446,37 @@ func (s *Server) Router() chi.Router {
r.Delete("/apps/{id}", s.deleteApp)
})
// Event triggers: filter+action rules over the event_log
// stream. Read endpoints are available to any authenticated
// user; mutations + test-dispatch are admin-gated since they
// can fire arbitrary outbound webhooks.
r.Get("/event-triggers", s.listEventTriggers)
r.Get("/event-triggers/{id}", s.getEventTrigger)
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
r.Post("/event-triggers", s.createEventTrigger)
r.Patch("/event-triggers/{id}", s.updateEventTrigger)
r.Delete("/event-triggers/{id}", s.deleteEventTrigger)
r.Post("/event-triggers/{id}/test", s.testEventTrigger)
})
// Log-scan rules: regex patterns the scanner manager
// applies to container log lines. Read endpoints are
// available to any authenticated user; mutations are
// admin-gated since they can change global observability
// behavior across every workload.
r.Get("/log-scan-rules", s.listLogScanRules)
r.Get("/log-scan-rules/stats", s.getLogScanStats)
r.Get("/log-scan-rules/{id}", s.getLogScanRule)
r.Get("/workloads/{id}/effective-rules", s.getEffectiveLogScanRules)
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
r.Post("/log-scan-rules", s.createLogScanRule)
r.Patch("/log-scan-rules/{id}", s.updateLogScanRule)
r.Delete("/log-scan-rules/{id}", s.deleteLogScanRule)
r.Post("/log-scan-rules/{id}/test", s.testLogScanRule)
})
// System resources (read-only).
r.Get("/system/stats", s.getSystemStats)
r.Get("/system/stats/history", s.getSystemStatsHistory)