refactor(triggers): review followups — fire-now, dedupe trigger pages, hardening
Build / build (push) Failing after 34s

Follow-ups on commit 39e1e36 addressing review feedback from
go-reviewer / security-reviewer / typescript-reviewer.

Backend:
- New POST /api/triggers/{id}/fire (AdminOnly, schedule-only): operator
  "Fire now" button — dispatches immediately without waiting for the
  next natural interval. Persists last_fired_at BEFORE dispatch, same
  ordering as the scheduler. Per-trigger in-flight guard (429 if a
  fire is already running) to defend against rapid double-clicks /
  runaway scripts. Refuses request when AdminOnly claims are absent
  rather than logging an unattributable deploy.
- SetTriggerLastFired now validates timestamp parses as RFC3339 before
  writing. Rejects empty string explicitly — empty-clears semantics
  were dead (no caller) and would silently re-fire on next tick if
  ever accidentally written. A future reset-cadence flow must add a
  dedicated ClearTriggerLastFired so the call site is grep-able and
  separately auditable.
- Scheduler logs WARN on catch-up fires (now - lastFired > 2× interval)
  so the "surprise burst at restart" pattern shows up in audit logs.
- BindingResult reason strings extracted to package consts
  (webhook.Reason*) so the scheduler and api fire-now classifications
  stay in sync without string-matching drift.
- SECURITY NOTE on FanOutForTrigger documents that the
  WebhookRequireSignature gate is ingress-only by design.

Frontend:
- Refactored /triggers/new (770 LOC → 155 LOC) and /triggers/[id]
  (~350 LOC dropped) to use the shared TriggerKindForm. Eliminates the
  triplicated per-kind state + buildConfig + canSubmit + template that
  caused the d-unit regex drift in the prior commit.
- New seedTriggerKindFormState helper on TriggerKindForm primes the
  form from a server-returned trigger config with defensive type
  guards; resets per-kind slots first so re-seeding across kinds
  doesn't inherit stale state.
- /triggers/[id] gains a Schedule status panel with Last Fired + Fire
  Now button (gated on binding_count > 0). Confirmation dialog,
  result flash, timer cleanup on unmount + new-fire (no stale-closure
  race). EN+RU i18n parity.
This commit is contained in:
2026-05-16 12:16:47 +03:00
parent 39e1e36510
commit 5e78f13e06
12 changed files with 486 additions and 1227 deletions
+1
View File
@@ -318,6 +318,7 @@ func (s *Server) Router() chi.Router {
r.Delete("/triggers/{id}", s.deleteTrigger)
r.Get("/triggers/{id}/webhook", s.getTriggerWebhook)
r.Post("/triggers/{id}/webhook/regenerate", s.regenerateTriggerWebhook)
r.Post("/triggers/{id}/fire", s.fireTriggerNow)
r.Post("/triggers/{id}/bindings", s.bindWorkloadToTrigger)
r.Put("/bindings/{bid}", s.updateBinding)
r.Delete("/bindings/{bid}", s.deleteBinding)
+134
View File
@@ -7,13 +7,27 @@ import (
"log/slog"
"net/http"
"strings"
"sync"
"time"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/auth"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/webhook"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// fireInFlight tracks trigger IDs that have a fire-now request actively
// running so a runaway script or rapid double-click doesn't queue
// duplicate deploys. Keyed by trigger ID; entries are added under the
// mutex and removed by the handler's defer. Sufficient for an admin
// gate — a real rate limiter belongs at the middleware layer, not here.
var (
fireInFlightMu sync.Mutex
fireInFlight = map[string]struct{}{}
)
// triggerView is the response shape for /api/triggers. Webhook secrets
// are never serialized — read them via the dedicated /webhook subresource
// where the canonical URL is composed.
@@ -251,6 +265,126 @@ func (s *Server) getTriggerWebhook(w http.ResponseWriter, r *http.Request) {
respondJSON(w, http.StatusOK, view)
}
// fireTriggerNow dispatches a trigger immediately without waiting for
// its next natural fire window. Used by the /triggers/[id] "Fire now"
// button so an operator can re-test a fixed broken deploy without
// waiting one full schedule interval.
//
// Scope: schedule triggers only. Other kinds (registry / git / manual)
// already have their own dispatch paths — registry/git fire on real
// inbound events, manual fires from the workload Deploy button. Adding
// "fire-now" for those would duplicate those flows without adding new
// capability.
//
// Side effect: updates last_fired_at to "now" (same persist-before-
// dispatch ordering the scheduler uses) so the natural next-fire
// window shifts forward by exactly the interval. This is the
// principle-of-least-surprise behavior — an operator who fires now
// is intentionally resetting the cadence.
func (s *Server) fireTriggerNow(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
// Per-trigger in-flight guard. AdminOnly + UI throttle is the only
// gate against rapid double-clicks; without this guard a runaway
// script could queue parallel fans-out of the same schedule, each
// holding up to maxTriggerFanOutConcurrency deployer slots.
// Returning 429 lets the client distinguish "already running" from
// a real validation error.
fireInFlightMu.Lock()
if _, busy := fireInFlight[id]; busy {
fireInFlightMu.Unlock()
respondError(w, http.StatusTooManyRequests,
"a fire is already in progress for this trigger")
return
}
fireInFlight[id] = struct{}{}
fireInFlightMu.Unlock()
defer func() {
fireInFlightMu.Lock()
delete(fireInFlight, id)
fireInFlightMu.Unlock()
}()
trg, err := s.store.GetTriggerByID(id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "trigger")
return
}
respondError(w, http.StatusInternalServerError, "failed to load trigger")
return
}
if trg.Kind != "schedule" {
respondError(w, http.StatusBadRequest,
"fire-now is only supported for schedule triggers")
return
}
// AdminOnly middleware guarantees claims; treat their absence as a
// boot-time wiring bug rather than fall back to an unattributable
// "manual" string that collides with the `manual` trigger kind in
// audit logs.
claims, ok := auth.ClaimsFromContext(r.Context())
if !ok || claims.Username == "" {
slog.Error("fire-now: missing claims under AdminOnly", "trigger", trg.Name)
respondError(w, http.StatusInternalServerError, "missing auth context")
return
}
actor := claims.Username
now := time.Now().UTC()
if err := s.store.SetTriggerLastFired(trg.ID, now.Format(time.RFC3339)); err != nil {
respondError(w, http.StatusInternalServerError, "persist last_fired_at")
return
}
evt := plugin.InboundEvent{
Kind: "schedule",
Schedule: &plugin.ScheduleEvent{FiredAt: now},
}
results, err := s.webhook.FanOutForTrigger(r.Context(), trg, evt)
if err != nil {
slog.Warn("fire-now: fan-out failed",
"trigger", trg.Name, "actor", actor, "error", err)
// Don't expose the raw error — it can carry registry-auth or
// compose-stdout bytes (matches the manual-deploy handler).
respondError(w, http.StatusInternalServerError, "fire failed; see server logs")
return
}
var deployed, errored int
for _, b := range results {
switch {
case b.Deployed:
deployed++
case b.Reason == webhook.ReasonBindingDisabled, b.Reason == webhook.ReasonNoMatch:
// silent
default:
errored++
}
}
// Empty fan-out (no bindings) is almost certainly an operator
// mistake — the UI button is gated on binding_count>0, but the
// counts can change between page load and click. Warn so the
// no-op shows up in audit logs.
if len(results) == 0 {
slog.Warn("fire-now: no bindings to fire",
"trigger", trg.Name, "actor", actor)
} else {
slog.Info("fire-now dispatched",
"trigger", trg.Name, "actor", actor,
"bindings", len(results), "deployed", deployed, "errored", errored)
}
respondJSON(w, http.StatusAccepted, map[string]any{
"trigger": trg.Name,
"fired_at": now.Format(time.RFC3339),
"bindings": len(results),
"deployed": deployed,
"errored": errored,
})
}
func (s *Server) regenerateTriggerWebhook(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
secret := generateWebhookSecret()
+23 -1
View File
@@ -171,9 +171,31 @@ func (s *Scheduler) shouldFire(t store.Trigger, now time.Time) bool {
// require a manual DB poke.
return true
}
return !now.Before(last.Add(interval))
if now.Before(last.Add(interval)) {
return false
}
// Catch-up warning: a trigger whose last_fired_at is many intervals
// old (paused-then-resumed, restored from backup, or just left
// running while the dispatcher was down) WILL fire on this tick.
// Log a one-line warning so the operator can recognize the "surprise
// burst at restart" pattern in audit logs. We still fire — silent
// no-fire would be worse — but the warning explains why.
if overdue := now.Sub(last); overdue > catchUpWarnThreshold*interval {
slog.Warn("scheduler: catch-up fire (very overdue)",
"trigger", t.Name, "overdue", overdue, "interval", interval)
}
return true
}
// catchUpWarnThreshold is the multiplier on `interval` past which a
// fire is logged as "catch-up." 2× means a daily schedule whose last
// fire was more than 48h ago gets a warning at next tick. Chosen so
// the warning fires on "wedged for many intervals" without alerting on
// the every-tick lag a healthy 30s-tick scheduler accumulates against
// a sub-minute interval. Bigger threshold = noisier-quiet trade-off;
// 2× is the smallest value that excludes single-tick lag.
const catchUpWarnThreshold = 2
// fire dispatches one trigger and records the new last_fired_at.
//
// We persist last_fired_at BEFORE calling the dispatcher so a panic
+11
View File
@@ -4,6 +4,7 @@ import (
"database/sql"
"errors"
"fmt"
"time"
"github.com/google/uuid"
)
@@ -307,7 +308,17 @@ func (s *Store) EnsureTriggerWebhookSecret(id string) (string, error) {
// so the value is stable across timezones. Updating last_fired_at does
// not bump updated_at — last_fired_at is operational state, while
// updated_at tracks user-visible config edits.
//
// ts must parse as RFC3339 — a defense-in-depth check so a careless
// caller cannot corrupt the column with a garbage string the scheduler
// would refuse to parse on every tick. To clear the column (effectively
// "fire on next tick"), use a separate API rather than passing empty
// here; the narrow contract keeps the call site grep-able and forces
// any reset-cadence flow to be explicitly designed and authorized.
func (s *Store) SetTriggerLastFired(id, ts string) error {
if _, err := time.Parse(time.RFC3339, ts); err != nil {
return fmt.Errorf("invalid last_fired_at %q (want RFC3339): %w", ts, err)
}
result, err := s.db.Exec(
`UPDATE triggers SET last_fired_at = ? WHERE id = ?`,
ts, id,
+28 -8
View File
@@ -34,6 +34,18 @@ type BindingResult struct {
Reason string `json:"reason,omitempty"`
}
// Reason strings used in BindingResult.Reason. Exported so callers
// classifying fan-out outcomes (e.g. the API fire-now summary log)
// don't need to keep string literals in sync with this package.
const (
ReasonBindingDisabled = "binding disabled"
ReasonWorkloadMissing = "workload missing"
ReasonNoMatch = "no match"
ReasonConfigError = "config merge error"
ReasonMatchError = "match error"
ReasonDispatchFailed = "dispatch failed"
)
// handleTriggerWebhook processes an inbound webhook for a first-class
// Trigger record. The secret resolves to one Trigger; the Trigger then
// fans out to every enabled workload binding. Each binding gets its
@@ -160,9 +172,9 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
switch {
case r.Deployed:
deployed++
case r.Reason == "binding disabled":
case r.Reason == ReasonBindingDisabled:
skipped++
case r.Reason == "no match":
case r.Reason == ReasonNoMatch:
noMatch++
default:
errored++
@@ -198,6 +210,14 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
// triggers without a real HTTP request — same dispatch path, same
// per-binding isolation, same outcome shape.
//
// SECURITY NOTE: trg.WebhookSigningSecret + WebhookRequireSignature
// gate INBOUND HTTP only (handleTriggerWebhook). This method skips
// that check by design because the caller is first-party in-process
// code — no untrusted bytes flow in here. If you add a new caller
// outside the scheduler / inbound webhook, audit the call site for
// authorization first; this is not a generic "fire any trigger"
// entry point.
//
// Returns nil + error only when the trigger plugin is missing or the
// bindings query fails — both fatal upstream conditions the caller
// should log. A per-binding error becomes a row in the result slice
@@ -248,14 +268,14 @@ func (h *Handler) fanOutBindings(
var wg sync.WaitGroup
for i, b := range bindings {
if !b.Enabled {
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: "binding disabled"}
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonBindingDisabled}
continue
}
row, lookupErr := h.store.GetWorkloadByID(b.WorkloadID)
if lookupErr != nil {
slog.Warn("webhook: bound workload missing",
"trigger", trg.Name, "workload", b.WorkloadID, "error", lookupErr)
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: "workload missing"}
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonWorkloadMissing}
continue
}
wg.Add(1)
@@ -289,16 +309,16 @@ func (h *Handler) fireBinding(
if err != nil {
slog.Warn("webhook: merge effective trigger config failed",
"trigger", trg.Name, "workload", row.Name, "error", err)
return false, "config merge error"
return false, ReasonConfigError
}
intent, err := trigPlugin.Match(ctx, h.plugins.PluginDeps(), pwl, evt)
if err != nil {
slog.Warn("webhook: trigger match error",
"trigger", trg.Name, "workload", row.Name, "error", err)
return false, "match error"
return false, ReasonMatchError
}
if intent == nil {
return false, "no match"
return false, ReasonNoMatch
}
if intent.TriggeredAt.IsZero() {
intent.TriggeredAt = time.Now().UTC()
@@ -309,7 +329,7 @@ func (h *Handler) fireBinding(
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
slog.Warn("webhook: dispatch failed",
"trigger", trg.Name, "workload", row.Name, "error", err)
return false, "dispatch failed"
return false, ReasonDispatchFailed
}
slog.Info("webhook: triggered deploy via trigger fan-out",
"trigger", trg.Name, "workload", row.Name, "reason", intent.Reason)