refactor(triggers): review followups — fire-now, dedupe trigger pages, hardening
Build / build (push) Failing after 34s
Build / build (push) Failing after 34s
Follow-ups on commit 39e1e36 addressing review feedback from
go-reviewer / security-reviewer / typescript-reviewer.
Backend:
- New POST /api/triggers/{id}/fire (AdminOnly, schedule-only): operator
"Fire now" button — dispatches immediately without waiting for the
next natural interval. Persists last_fired_at BEFORE dispatch, same
ordering as the scheduler. Per-trigger in-flight guard (429 if a
fire is already running) to defend against rapid double-clicks /
runaway scripts. Refuses request when AdminOnly claims are absent
rather than logging an unattributable deploy.
- SetTriggerLastFired now validates timestamp parses as RFC3339 before
writing. Rejects empty string explicitly — empty-clears semantics
were dead (no caller) and would silently re-fire on next tick if
ever accidentally written. A future reset-cadence flow must add a
dedicated ClearTriggerLastFired so the call site is grep-able and
separately auditable.
- Scheduler logs WARN on catch-up fires (now - lastFired > 2× interval)
so the "surprise burst at restart" pattern shows up in audit logs.
- BindingResult reason strings extracted to package consts
(webhook.Reason*) so the scheduler and api fire-now classifications
stay in sync without string-matching drift.
- SECURITY NOTE on FanOutForTrigger documents that the
WebhookRequireSignature gate is ingress-only by design.
Frontend:
- Refactored /triggers/new (770 LOC → 155 LOC) and /triggers/[id]
(~350 LOC dropped) to use the shared TriggerKindForm. Eliminates the
triplicated per-kind state + buildConfig + canSubmit + template that
caused the d-unit regex drift in the prior commit.
- New seedTriggerKindFormState helper on TriggerKindForm primes the
form from a server-returned trigger config with defensive type
guards; resets per-kind slots first so re-seeding across kinds
doesn't inherit stale state.
- /triggers/[id] gains a Schedule status panel with Last Fired + Fire
Now button (gated on binding_count > 0). Confirmation dialog,
result flash, timer cleanup on unmount + new-fire (no stale-closure
race). EN+RU i18n parity.
This commit is contained in:
@@ -318,6 +318,7 @@ func (s *Server) Router() chi.Router {
|
||||
r.Delete("/triggers/{id}", s.deleteTrigger)
|
||||
r.Get("/triggers/{id}/webhook", s.getTriggerWebhook)
|
||||
r.Post("/triggers/{id}/webhook/regenerate", s.regenerateTriggerWebhook)
|
||||
r.Post("/triggers/{id}/fire", s.fireTriggerNow)
|
||||
r.Post("/triggers/{id}/bindings", s.bindWorkloadToTrigger)
|
||||
r.Put("/bindings/{bid}", s.updateBinding)
|
||||
r.Delete("/bindings/{bid}", s.deleteBinding)
|
||||
|
||||
@@ -7,13 +7,27 @@ import (
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/auth"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/webhook"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// fireInFlight tracks trigger IDs that have a fire-now request actively
|
||||
// running so a runaway script or rapid double-click doesn't queue
|
||||
// duplicate deploys. Keyed by trigger ID; entries are added under the
|
||||
// mutex and removed by the handler's defer. Sufficient for an admin
|
||||
// gate — a real rate limiter belongs at the middleware layer, not here.
|
||||
var (
|
||||
fireInFlightMu sync.Mutex
|
||||
fireInFlight = map[string]struct{}{}
|
||||
)
|
||||
|
||||
// triggerView is the response shape for /api/triggers. Webhook secrets
|
||||
// are never serialized — read them via the dedicated /webhook subresource
|
||||
// where the canonical URL is composed.
|
||||
@@ -251,6 +265,126 @@ func (s *Server) getTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
respondJSON(w, http.StatusOK, view)
|
||||
}
|
||||
|
||||
// fireTriggerNow dispatches a trigger immediately without waiting for
|
||||
// its next natural fire window. Used by the /triggers/[id] "Fire now"
|
||||
// button so an operator can re-test a fixed broken deploy without
|
||||
// waiting one full schedule interval.
|
||||
//
|
||||
// Scope: schedule triggers only. Other kinds (registry / git / manual)
|
||||
// already have their own dispatch paths — registry/git fire on real
|
||||
// inbound events, manual fires from the workload Deploy button. Adding
|
||||
// "fire-now" for those would duplicate those flows without adding new
|
||||
// capability.
|
||||
//
|
||||
// Side effect: updates last_fired_at to "now" (same persist-before-
|
||||
// dispatch ordering the scheduler uses) so the natural next-fire
|
||||
// window shifts forward by exactly the interval. This is the
|
||||
// principle-of-least-surprise behavior — an operator who fires now
|
||||
// is intentionally resetting the cadence.
|
||||
func (s *Server) fireTriggerNow(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
|
||||
// Per-trigger in-flight guard. AdminOnly + UI throttle is the only
|
||||
// gate against rapid double-clicks; without this guard a runaway
|
||||
// script could queue parallel fans-out of the same schedule, each
|
||||
// holding up to maxTriggerFanOutConcurrency deployer slots.
|
||||
// Returning 429 lets the client distinguish "already running" from
|
||||
// a real validation error.
|
||||
fireInFlightMu.Lock()
|
||||
if _, busy := fireInFlight[id]; busy {
|
||||
fireInFlightMu.Unlock()
|
||||
respondError(w, http.StatusTooManyRequests,
|
||||
"a fire is already in progress for this trigger")
|
||||
return
|
||||
}
|
||||
fireInFlight[id] = struct{}{}
|
||||
fireInFlightMu.Unlock()
|
||||
defer func() {
|
||||
fireInFlightMu.Lock()
|
||||
delete(fireInFlight, id)
|
||||
fireInFlightMu.Unlock()
|
||||
}()
|
||||
|
||||
trg, err := s.store.GetTriggerByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "trigger")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "failed to load trigger")
|
||||
return
|
||||
}
|
||||
if trg.Kind != "schedule" {
|
||||
respondError(w, http.StatusBadRequest,
|
||||
"fire-now is only supported for schedule triggers")
|
||||
return
|
||||
}
|
||||
|
||||
// AdminOnly middleware guarantees claims; treat their absence as a
|
||||
// boot-time wiring bug rather than fall back to an unattributable
|
||||
// "manual" string that collides with the `manual` trigger kind in
|
||||
// audit logs.
|
||||
claims, ok := auth.ClaimsFromContext(r.Context())
|
||||
if !ok || claims.Username == "" {
|
||||
slog.Error("fire-now: missing claims under AdminOnly", "trigger", trg.Name)
|
||||
respondError(w, http.StatusInternalServerError, "missing auth context")
|
||||
return
|
||||
}
|
||||
actor := claims.Username
|
||||
|
||||
now := time.Now().UTC()
|
||||
if err := s.store.SetTriggerLastFired(trg.ID, now.Format(time.RFC3339)); err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "persist last_fired_at")
|
||||
return
|
||||
}
|
||||
|
||||
evt := plugin.InboundEvent{
|
||||
Kind: "schedule",
|
||||
Schedule: &plugin.ScheduleEvent{FiredAt: now},
|
||||
}
|
||||
results, err := s.webhook.FanOutForTrigger(r.Context(), trg, evt)
|
||||
if err != nil {
|
||||
slog.Warn("fire-now: fan-out failed",
|
||||
"trigger", trg.Name, "actor", actor, "error", err)
|
||||
// Don't expose the raw error — it can carry registry-auth or
|
||||
// compose-stdout bytes (matches the manual-deploy handler).
|
||||
respondError(w, http.StatusInternalServerError, "fire failed; see server logs")
|
||||
return
|
||||
}
|
||||
|
||||
var deployed, errored int
|
||||
for _, b := range results {
|
||||
switch {
|
||||
case b.Deployed:
|
||||
deployed++
|
||||
case b.Reason == webhook.ReasonBindingDisabled, b.Reason == webhook.ReasonNoMatch:
|
||||
// silent
|
||||
default:
|
||||
errored++
|
||||
}
|
||||
}
|
||||
// Empty fan-out (no bindings) is almost certainly an operator
|
||||
// mistake — the UI button is gated on binding_count>0, but the
|
||||
// counts can change between page load and click. Warn so the
|
||||
// no-op shows up in audit logs.
|
||||
if len(results) == 0 {
|
||||
slog.Warn("fire-now: no bindings to fire",
|
||||
"trigger", trg.Name, "actor", actor)
|
||||
} else {
|
||||
slog.Info("fire-now dispatched",
|
||||
"trigger", trg.Name, "actor", actor,
|
||||
"bindings", len(results), "deployed", deployed, "errored", errored)
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusAccepted, map[string]any{
|
||||
"trigger": trg.Name,
|
||||
"fired_at": now.Format(time.RFC3339),
|
||||
"bindings": len(results),
|
||||
"deployed": deployed,
|
||||
"errored": errored,
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Server) regenerateTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
secret := generateWebhookSecret()
|
||||
|
||||
@@ -171,9 +171,31 @@ func (s *Scheduler) shouldFire(t store.Trigger, now time.Time) bool {
|
||||
// require a manual DB poke.
|
||||
return true
|
||||
}
|
||||
return !now.Before(last.Add(interval))
|
||||
if now.Before(last.Add(interval)) {
|
||||
return false
|
||||
}
|
||||
// Catch-up warning: a trigger whose last_fired_at is many intervals
|
||||
// old (paused-then-resumed, restored from backup, or just left
|
||||
// running while the dispatcher was down) WILL fire on this tick.
|
||||
// Log a one-line warning so the operator can recognize the "surprise
|
||||
// burst at restart" pattern in audit logs. We still fire — silent
|
||||
// no-fire would be worse — but the warning explains why.
|
||||
if overdue := now.Sub(last); overdue > catchUpWarnThreshold*interval {
|
||||
slog.Warn("scheduler: catch-up fire (very overdue)",
|
||||
"trigger", t.Name, "overdue", overdue, "interval", interval)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// catchUpWarnThreshold is the multiplier on `interval` past which a
|
||||
// fire is logged as "catch-up." 2× means a daily schedule whose last
|
||||
// fire was more than 48h ago gets a warning at next tick. Chosen so
|
||||
// the warning fires on "wedged for many intervals" without alerting on
|
||||
// the every-tick lag a healthy 30s-tick scheduler accumulates against
|
||||
// a sub-minute interval. Bigger threshold = noisier-quiet trade-off;
|
||||
// 2× is the smallest value that excludes single-tick lag.
|
||||
const catchUpWarnThreshold = 2
|
||||
|
||||
// fire dispatches one trigger and records the new last_fired_at.
|
||||
//
|
||||
// We persist last_fired_at BEFORE calling the dispatcher so a panic
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -307,7 +308,17 @@ func (s *Store) EnsureTriggerWebhookSecret(id string) (string, error) {
|
||||
// so the value is stable across timezones. Updating last_fired_at does
|
||||
// not bump updated_at — last_fired_at is operational state, while
|
||||
// updated_at tracks user-visible config edits.
|
||||
//
|
||||
// ts must parse as RFC3339 — a defense-in-depth check so a careless
|
||||
// caller cannot corrupt the column with a garbage string the scheduler
|
||||
// would refuse to parse on every tick. To clear the column (effectively
|
||||
// "fire on next tick"), use a separate API rather than passing empty
|
||||
// here; the narrow contract keeps the call site grep-able and forces
|
||||
// any reset-cadence flow to be explicitly designed and authorized.
|
||||
func (s *Store) SetTriggerLastFired(id, ts string) error {
|
||||
if _, err := time.Parse(time.RFC3339, ts); err != nil {
|
||||
return fmt.Errorf("invalid last_fired_at %q (want RFC3339): %w", ts, err)
|
||||
}
|
||||
result, err := s.db.Exec(
|
||||
`UPDATE triggers SET last_fired_at = ? WHERE id = ?`,
|
||||
ts, id,
|
||||
|
||||
@@ -34,6 +34,18 @@ type BindingResult struct {
|
||||
Reason string `json:"reason,omitempty"`
|
||||
}
|
||||
|
||||
// Reason strings used in BindingResult.Reason. Exported so callers
|
||||
// classifying fan-out outcomes (e.g. the API fire-now summary log)
|
||||
// don't need to keep string literals in sync with this package.
|
||||
const (
|
||||
ReasonBindingDisabled = "binding disabled"
|
||||
ReasonWorkloadMissing = "workload missing"
|
||||
ReasonNoMatch = "no match"
|
||||
ReasonConfigError = "config merge error"
|
||||
ReasonMatchError = "match error"
|
||||
ReasonDispatchFailed = "dispatch failed"
|
||||
)
|
||||
|
||||
// handleTriggerWebhook processes an inbound webhook for a first-class
|
||||
// Trigger record. The secret resolves to one Trigger; the Trigger then
|
||||
// fans out to every enabled workload binding. Each binding gets its
|
||||
@@ -160,9 +172,9 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Deployed:
|
||||
deployed++
|
||||
case r.Reason == "binding disabled":
|
||||
case r.Reason == ReasonBindingDisabled:
|
||||
skipped++
|
||||
case r.Reason == "no match":
|
||||
case r.Reason == ReasonNoMatch:
|
||||
noMatch++
|
||||
default:
|
||||
errored++
|
||||
@@ -198,6 +210,14 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
// triggers without a real HTTP request — same dispatch path, same
|
||||
// per-binding isolation, same outcome shape.
|
||||
//
|
||||
// SECURITY NOTE: trg.WebhookSigningSecret + WebhookRequireSignature
|
||||
// gate INBOUND HTTP only (handleTriggerWebhook). This method skips
|
||||
// that check by design because the caller is first-party in-process
|
||||
// code — no untrusted bytes flow in here. If you add a new caller
|
||||
// outside the scheduler / inbound webhook, audit the call site for
|
||||
// authorization first; this is not a generic "fire any trigger"
|
||||
// entry point.
|
||||
//
|
||||
// Returns nil + error only when the trigger plugin is missing or the
|
||||
// bindings query fails — both fatal upstream conditions the caller
|
||||
// should log. A per-binding error becomes a row in the result slice
|
||||
@@ -248,14 +268,14 @@ func (h *Handler) fanOutBindings(
|
||||
var wg sync.WaitGroup
|
||||
for i, b := range bindings {
|
||||
if !b.Enabled {
|
||||
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: "binding disabled"}
|
||||
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonBindingDisabled}
|
||||
continue
|
||||
}
|
||||
row, lookupErr := h.store.GetWorkloadByID(b.WorkloadID)
|
||||
if lookupErr != nil {
|
||||
slog.Warn("webhook: bound workload missing",
|
||||
"trigger", trg.Name, "workload", b.WorkloadID, "error", lookupErr)
|
||||
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: "workload missing"}
|
||||
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonWorkloadMissing}
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
@@ -289,16 +309,16 @@ func (h *Handler) fireBinding(
|
||||
if err != nil {
|
||||
slog.Warn("webhook: merge effective trigger config failed",
|
||||
"trigger", trg.Name, "workload", row.Name, "error", err)
|
||||
return false, "config merge error"
|
||||
return false, ReasonConfigError
|
||||
}
|
||||
intent, err := trigPlugin.Match(ctx, h.plugins.PluginDeps(), pwl, evt)
|
||||
if err != nil {
|
||||
slog.Warn("webhook: trigger match error",
|
||||
"trigger", trg.Name, "workload", row.Name, "error", err)
|
||||
return false, "match error"
|
||||
return false, ReasonMatchError
|
||||
}
|
||||
if intent == nil {
|
||||
return false, "no match"
|
||||
return false, ReasonNoMatch
|
||||
}
|
||||
if intent.TriggeredAt.IsZero() {
|
||||
intent.TriggeredAt = time.Now().UTC()
|
||||
@@ -309,7 +329,7 @@ func (h *Handler) fireBinding(
|
||||
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
|
||||
slog.Warn("webhook: dispatch failed",
|
||||
"trigger", trg.Name, "workload", row.Name, "error", err)
|
||||
return false, "dispatch failed"
|
||||
return false, ReasonDispatchFailed
|
||||
}
|
||||
slog.Info("webhook: triggered deploy via trigger fan-out",
|
||||
"trigger", trg.Name, "workload", row.Name, "reason", intent.Reason)
|
||||
|
||||
Reference in New Issue
Block a user