feat(observability): event triggers + log scanner backend
Two paired backends sharing the events.Bus seam:
Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
redaction on read (placeholder echo treated as "no change" on
PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
filters (severity CSV, source CSV, message regex with memoized
compile cache). Structural loop-prevention: never writes to
event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
TierEventTrigger constant, doSendRaw shared with the legacy
Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
sending the real TriggerWebhookPayload shape. SSRF guard
rejects loopback / link-local / unspecified targets. PATCH
uses pointer-typed DTO for partial updates.
Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
per-container token bucket, atomic drop counters), tail
(multiplexed docker frame demuxer with TTY fallback + 16 MiB
payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
timestamp strip + UTF-8-safe message truncation), manager
(5s container polling, atomic.Pointer[Snapshot] hot-reload,
HitEmitter writes event_log + publishes EventLog so the
trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
EffectiveLogScanRules resolver (globals minus per-workload
overrides plus workload-only additions). Transactional
cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
(sample_line → matched/captures) + /stats (drop counters +
active tail count + last-snapshot compile errors) +
GET /api/workloads/{id}/effective-rules.
cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,7 @@ import (
|
||||
"github.com/alexei/tinyforge/internal/events"
|
||||
"github.com/alexei/tinyforge/internal/health"
|
||||
"github.com/alexei/tinyforge/internal/logging"
|
||||
"github.com/alexei/tinyforge/internal/logscanner"
|
||||
"github.com/alexei/tinyforge/internal/notify"
|
||||
"github.com/alexei/tinyforge/internal/npm"
|
||||
"github.com/alexei/tinyforge/internal/proxy"
|
||||
@@ -38,6 +39,16 @@ import (
|
||||
"github.com/alexei/tinyforge/internal/staticsite"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/webhook"
|
||||
|
||||
// Plugin registrations: each blank-import runs its init() and registers
|
||||
// itself with internal/workload/plugin. Adding a new Source or Trigger
|
||||
// is a matter of dropping a new package and adding it to this list.
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/compose"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/image"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/static"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/git"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/manual"
|
||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/registry"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -105,6 +116,9 @@ func main() {
|
||||
rec := reconciler.New(db, dockerClient, 30*time.Second)
|
||||
rec.Start(context.Background())
|
||||
defer rec.Stop()
|
||||
// The plugin pass is wired after the deployer is constructed (below);
|
||||
// the reconciler tolerates a nil dispatcher until then. SetPluginReconciler
|
||||
// is safe to call at any time, including mid-tick.
|
||||
|
||||
// Read settings for NPM URL and polling interval.
|
||||
settings, err := db.GetSettings()
|
||||
@@ -166,12 +180,24 @@ func main() {
|
||||
})
|
||||
defer stopLogger()
|
||||
|
||||
// Event-trigger dispatcher: consume EventLog publishes off the bus
|
||||
// and fan out to operator-configured webhook actions. Loop-prevention
|
||||
// is structural — the dispatcher never writes back to event_log; all
|
||||
// delivery outcomes land in notifier audit logging.
|
||||
stopTriggerDispatcher := events.RegisterEventTriggerDispatcher(eventBus, db, notifier)
|
||||
defer stopTriggerDispatcher()
|
||||
|
||||
dep := deployer.New(dockerClient, proxyProvider, db, healthChecker, notifier, eventBus, encKey)
|
||||
rec.SetPluginReconciler(dep)
|
||||
|
||||
// Initialize webhook handler. Per-project and per-site secrets are stored
|
||||
// on their respective rows; the static-site triggerer is wired in below
|
||||
// once the site manager has been constructed.
|
||||
webhookHandler := webhook.NewHandler(db, dep, nil)
|
||||
// Plugin-pipeline dispatcher for /api/webhook/workloads/{secret}.
|
||||
// Wired here so the same *deployer.Deployer serves both legacy and
|
||||
// plugin-native paths from one place.
|
||||
webhookHandler.SetPluginDispatcher(dep)
|
||||
|
||||
// Initialize registry poller.
|
||||
poller := registry.NewPoller(db, dep, encKey)
|
||||
@@ -322,6 +348,11 @@ func main() {
|
||||
// Initialize static site manager and health checker.
|
||||
staticSiteMgr := staticsite.NewManager(db, dockerClient, proxyProvider, eventBus, notifier, encKey)
|
||||
webhookHandler.SetSiteSyncTriggerer(staticSiteMgr)
|
||||
// Wire the plugin static source's backend to the manager. After this
|
||||
// call the "static" kind appears in /api/hooks/kinds and the /apps/new
|
||||
// picker; before it, the source registers no kind, so the frontend
|
||||
// silently omits it.
|
||||
wireStaticBackend(db, staticSiteMgr)
|
||||
staticSiteHealth := staticsite.NewHealthChecker(db, dockerClient, staticSiteMgr)
|
||||
if err := staticSiteHealth.Start("2m"); err != nil {
|
||||
slog.Warn("failed to start static site health checker", "error", err)
|
||||
@@ -339,6 +370,26 @@ func main() {
|
||||
stackMgr = nil
|
||||
}
|
||||
|
||||
// Log-scan manager: tails running containers and emits event_log
|
||||
// entries when log lines match operator-configured regex rules.
|
||||
// Start before the API server is wired so the reload callback can
|
||||
// be plugged in via SetLogScanReloader.
|
||||
logScanMgr := logscanner.NewManager(logscanner.Config{
|
||||
Rules: db,
|
||||
Containers: db,
|
||||
Docker: dockerClient,
|
||||
Events: db,
|
||||
Bus: eventBus,
|
||||
PollInterval: 5 * time.Second,
|
||||
})
|
||||
// Manager owns its own cancellation; Stop() drives the loop and
|
||||
// every tail to exit. Using Background here matches the
|
||||
// reconciler + stale-scanner pattern elsewhere in this file.
|
||||
if err := logScanMgr.Start(context.Background()); err != nil {
|
||||
slog.Warn("logscanner: initial rule load failed", "error", err)
|
||||
}
|
||||
defer logScanMgr.Stop()
|
||||
|
||||
// Build API server.
|
||||
apiServer := api.NewServer(db, dockerClient, npmClient, proxyProvider, dep, notifier, webhookHandler, eventBus, encKey)
|
||||
apiServer.SetStaticSiteManager(staticSiteMgr)
|
||||
@@ -346,6 +397,7 @@ func main() {
|
||||
apiServer.SetStackManager(stackMgr)
|
||||
}
|
||||
apiServer.SetStaleScanner(staleScanner)
|
||||
apiServer.SetLogScanReloader(logScanMgr)
|
||||
apiServer.SetBackupEngine(backupEngine)
|
||||
apiServer.SetDBPath(dbPath)
|
||||
apiServer.SetBackupSettingsChangedCallback(scheduleAutobackup)
|
||||
|
||||
Reference in New Issue
Block a user