feat(observability): event triggers + log scanner backend
Two paired backends sharing the events.Bus seam:
Event triggers (consumer-side):
- internal/store/event_triggers.go — CRUD with action_secret
redaction on read (placeholder echo treated as "no change" on
PATCH so secrets aren't accidentally wiped).
- internal/events/dispatcher.go — bus subscriber, AND-composed
filters (severity CSV, source CSV, message regex with memoized
compile cache). Structural loop-prevention: never writes to
event_log. Sends via notifier.SendPayload.
- internal/notify: SendPayload + SendSyncForTestPayload methods,
TierEventTrigger constant, doSendRaw shared with the legacy
Event-shaped path.
- internal/api/event_triggers.go — admin-gated CRUD + /test
sending the real TriggerWebhookPayload shape. SSRF guard
rejects loopback / link-local / unspecified targets. PATCH
uses pointer-typed DTO for partial updates.
Log scanner (producer-side):
- internal/logscanner/ — engine (per-rule cooldown +
per-container token bucket, atomic drop counters), tail
(multiplexed docker frame demuxer with TTY fallback + 16 MiB
payload cap + 1 MiB reassembly cap + RFC3339Nano-validated
timestamp strip + UTF-8-safe message truncation), manager
(5s container polling, atomic.Pointer[Snapshot] hot-reload,
HitEmitter writes event_log + publishes EventLog so the
trigger dispatcher picks them up immediately).
- internal/docker/container.go — ContainerLogsOpts exposes
stream selection for stderr-only / stdout-only rules.
- internal/store: log_scan_rules table + CRUD with
EffectiveLogScanRules resolver (globals minus per-workload
overrides plus workload-only additions). Transactional
cascade-delete of overrides when a global rule is removed.
- internal/api/log_scan_rules.go — admin-gated CRUD + /test
(sample_line → matched/captures) + /stats (drop counters +
active tail count + last-snapshot compile errors) +
GET /api/workloads/{id}/effective-rules.
cmd/server/main.go wires both subsystems next to the existing
RegisterPersistentLogger. Coverage spans engine cooldown / bucket
counter tests, snapshot effective-set semantics, manager compile-
error capture, dispatcher matching, store validation +
cascade-delete, API URL validator + secret redaction.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+84
-5
@@ -50,6 +50,7 @@ type Server struct {
|
||||
stackManager *stack.Manager
|
||||
backupEngine *backup.Engine
|
||||
sseGate *sseGate
|
||||
logScanReloader LogScanReloader
|
||||
dbPath string
|
||||
shutdownFunc func() // called after restore to trigger graceful shutdown
|
||||
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
|
||||
@@ -217,13 +218,26 @@ func (s *Server) Router() chi.Router {
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.Middleware(s.localAuth))
|
||||
|
||||
// Plugin registry inspection + unified ingress (Workload refactor).
|
||||
// /hooks/kinds is informational and visible to any authenticated
|
||||
// caller. /hooks/generic dispatches deploys and is admin-gated —
|
||||
// vendor-specific webhooks (with their own per-target HMAC
|
||||
// secrets) live under /webhook/* and remain the only ingress
|
||||
// reachable by external CI systems until Phase 5 consolidates them.
|
||||
r.Get("/hooks/kinds", s.listHookKinds)
|
||||
r.Get("/hooks/kinds/{kind}/schema", s.getHookKindSchema)
|
||||
r.With(auth.AdminOnly).Post("/hooks/generic", s.dispatchGeneric)
|
||||
|
||||
// Read-only endpoints (any authenticated user).
|
||||
r.Get("/health", s.getHealth)
|
||||
r.Get("/auth/me", s.currentUser)
|
||||
r.Post("/auth/logout", s.logout)
|
||||
r.Get("/proxies", s.listProxyRoutes)
|
||||
r.Get("/docker/unused-images", s.unusedImageStats)
|
||||
r.Get("/projects", s.listProjects)
|
||||
// Legacy project/stage/site/stack endpoints carry a Deprecation
|
||||
// header pointing at /api/workloads. Functional behavior is
|
||||
// unchanged until the hard cutover removes them.
|
||||
r.With(deprecated("/api/workloads")).Get("/projects", s.listProjects)
|
||||
r.Route("/projects/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getProject)
|
||||
r.Get("/stages/{stage}/env", s.listStageEnv)
|
||||
@@ -290,7 +304,7 @@ func (s *Server) Router() chi.Router {
|
||||
})
|
||||
})
|
||||
// Stacks (docker-compose).
|
||||
r.Get("/stacks", s.listStacks)
|
||||
r.With(deprecated("/api/workloads?kind=plugin&source_kind=compose")).Get("/stacks", s.listStacks)
|
||||
r.Route("/stacks/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getStack)
|
||||
r.Get("/revisions", s.listStackRevisions)
|
||||
@@ -311,7 +325,7 @@ func (s *Server) Router() chi.Router {
|
||||
r.With(auth.AdminOnly).Post("/stacks", s.createStack)
|
||||
|
||||
// Static sites.
|
||||
r.Get("/sites", s.listStaticSites)
|
||||
r.With(deprecated("/api/workloads?kind=plugin&source_kind=static")).Get("/sites", s.listStaticSites)
|
||||
r.Route("/sites/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getStaticSite)
|
||||
r.Get("/secrets", s.listStaticSiteSecrets)
|
||||
@@ -375,13 +389,47 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/containers/stale", s.listStaleContainers)
|
||||
|
||||
// Workload-shaped endpoints (the unifying layer over project /
|
||||
// stack / site). Read-only; mutations still go through the
|
||||
// kind-specific endpoints (POST /projects, PUT /stacks/{id}, …).
|
||||
// stack / site). Read endpoints are open to any authenticated
|
||||
// user; create / update / deploy mutate state and are admin-gated.
|
||||
// Plugin-native workloads (source_kind + trigger_kind set) are
|
||||
// created here; legacy project / stack / site mutations remain at
|
||||
// their dedicated endpoints during the cutover.
|
||||
r.Get("/workloads", s.listWorkloads)
|
||||
r.With(auth.AdminOnly).Post("/workloads", s.createPluginWorkload)
|
||||
r.Route("/workloads/{id}", func(r chi.Router) {
|
||||
r.Get("/", s.getWorkload)
|
||||
r.Get("/containers", s.listWorkloadContainers)
|
||||
r.Get("/containers/{cid}/logs", s.streamWorkloadContainerLogs)
|
||||
r.With(auth.AdminOnly).Patch("/app", s.updateWorkloadAppID)
|
||||
r.With(auth.AdminOnly).Put("/plugin", s.updatePluginWorkload)
|
||||
r.With(auth.AdminOnly).Post("/deploy", s.deployPluginWorkload)
|
||||
r.With(auth.AdminOnly).Delete("/", s.deletePluginWorkload)
|
||||
|
||||
// Per-workload env vars (analog of legacy stage_env).
|
||||
// Listing is open to authenticated readers; mutations are
|
||||
// admin-gated. Encrypted values are write-only after store.
|
||||
r.Get("/env", s.listWorkloadEnv)
|
||||
r.With(auth.AdminOnly).Put("/env", s.setWorkloadEnv)
|
||||
r.With(auth.AdminOnly).Delete("/env/{envID}", s.deleteWorkloadEnv)
|
||||
|
||||
// Per-workload inbound webhook URL: rotate the secret + fetch
|
||||
// the canonical URL. Mirrors the project / site webhook UX.
|
||||
r.With(auth.AdminOnly).Get("/webhook", s.getWorkloadWebhook)
|
||||
r.With(auth.AdminOnly).Post("/webhook/regenerate", s.regenerateWorkloadWebhook)
|
||||
|
||||
// Per-workload volume mounts (analog of legacy project volumes).
|
||||
// Reads are open to authenticated users; mutations admin-gated.
|
||||
// Source/target paths are validated for traversal safety here;
|
||||
// host-path allow-listing happens at deploy time.
|
||||
r.Get("/volumes", s.listWorkloadVolumes)
|
||||
r.With(auth.AdminOnly).Put("/volumes", s.setWorkloadVolume)
|
||||
r.With(auth.AdminOnly).Delete("/volumes/{volID}", s.deleteWorkloadVolume)
|
||||
|
||||
// Stages chain: parent + self + direct children, plus a
|
||||
// promote-from action that copies the source workload's
|
||||
// running image tag onto this workload's default_tag.
|
||||
r.Get("/chain", s.getWorkloadChain)
|
||||
r.With(auth.AdminOnly).Post("/promote-from/{sourceID}", s.promoteFromWorkload)
|
||||
})
|
||||
|
||||
// Global container index, joined to workload + app names.
|
||||
@@ -398,6 +446,37 @@ func (s *Server) Router() chi.Router {
|
||||
r.Delete("/apps/{id}", s.deleteApp)
|
||||
})
|
||||
|
||||
// Event triggers: filter+action rules over the event_log
|
||||
// stream. Read endpoints are available to any authenticated
|
||||
// user; mutations + test-dispatch are admin-gated since they
|
||||
// can fire arbitrary outbound webhooks.
|
||||
r.Get("/event-triggers", s.listEventTriggers)
|
||||
r.Get("/event-triggers/{id}", s.getEventTrigger)
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
r.Post("/event-triggers", s.createEventTrigger)
|
||||
r.Patch("/event-triggers/{id}", s.updateEventTrigger)
|
||||
r.Delete("/event-triggers/{id}", s.deleteEventTrigger)
|
||||
r.Post("/event-triggers/{id}/test", s.testEventTrigger)
|
||||
})
|
||||
|
||||
// Log-scan rules: regex patterns the scanner manager
|
||||
// applies to container log lines. Read endpoints are
|
||||
// available to any authenticated user; mutations are
|
||||
// admin-gated since they can change global observability
|
||||
// behavior across every workload.
|
||||
r.Get("/log-scan-rules", s.listLogScanRules)
|
||||
r.Get("/log-scan-rules/stats", s.getLogScanStats)
|
||||
r.Get("/log-scan-rules/{id}", s.getLogScanRule)
|
||||
r.Get("/workloads/{id}/effective-rules", s.getEffectiveLogScanRules)
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
r.Post("/log-scan-rules", s.createLogScanRule)
|
||||
r.Patch("/log-scan-rules/{id}", s.updateLogScanRule)
|
||||
r.Delete("/log-scan-rules/{id}", s.deleteLogScanRule)
|
||||
r.Post("/log-scan-rules/{id}/test", s.testLogScanRule)
|
||||
})
|
||||
|
||||
// System resources (read-only).
|
||||
r.Get("/system/stats", s.getSystemStats)
|
||||
r.Get("/system/stats/history", s.getSystemStatsHistory)
|
||||
|
||||
Reference in New Issue
Block a user