feat(triggers): add schedule trigger kind + internal scheduler

Fourth trigger kind alongside registry/git/manual. Recurring time-interval fires driven by a new internal/scheduler tick loop (default 30s, clamped to 5m). Goes through the same webhook.Handler.FanOutForTrigger seam as inbound HTTP webhooks, so per-binding concurrency, outcome accounting, and config-merge semantics are identical. Schema: triggers.last_fired_at TEXT column (additive ALTER for existing DBs). Scheduler persists last_fired_at BEFORE dispatch so a panicking Match cannot wedge a tight loop; failed deploys wait one full interval before retry — correct trade-off for a periodic refresh trigger. Frontend: TriggerKindForm + /triggers/new + /triggers/[id] gain the schedule kind (4-col card grid, preset chips Hourly/Daily/Weekly, custom interval input matched to Go time.ParseDuration syntax, optional pinned reference). /triggers/[id] surfaces "last fired" on schedule rows. EN+RU i18n in parity. Review fixes from go-reviewer / security-reviewer / typescript-reviewer: - Scheduler Start/Stop wrapped in sync.Once (no goroutine leak / double- cancel panic on shutdown re-entry). - shouldFire rejects sub-MinInterval as defense-in-depth against hand-inserted rows that bypassed Validate. - fire() asserts trigger Kind=="schedule" before dispatching. - Aligned isValidInterval regex across all three frontend sites; reject the unsupported "d" unit (Go time.ParseDuration doesn't accept it). - formatLastFired falls back to lastFiredNever on malformed timestamps rather than leaking raw bytes into the UI. - main.go scheduler closure logs per-fire deployed/errored counts.
2026-05-16 11:24:05 +03:00
parent e3c7b13d58
commit 39e1e36510
19 changed files with 1247 additions and 49 deletions
@@ -0,0 +1,208 @@
+// Package scheduler drives the "schedule" trigger kind. It ticks on a
+// fixed interval, scans every enabled schedule trigger, and dispatches
+// the ones whose next-fire window has elapsed through the same
+// FanOutForTrigger path the inbound HTTP webhook uses.
+//
+// The scheduler is intentionally simple:
+//
+//   - Tick on `tickInterval` (default 30s).
+//   - For every trigger with Kind=="schedule", parse its config to get
+//     the interval, compute (LastFiredAt + interval), and if now >=
+//     that target, fire.
+//   - On fire: build a plugin.InboundEvent{Kind: "schedule"} and call
+//     handler.FanOutForTrigger. last_fired_at is persisted BEFORE the
+//     dispatch runs so a panicking Match cannot wedge the row into a
+//     tight retry loop — a failed deploy waits one full interval
+//     before retry, which is the correct trade-off for a periodic
+//     refresh trigger.
+//   - A never-fired trigger (LastFiredAt == "") fires on the next
+//     tick — operator-friendly for testing "did I configure it right?".
+//
+// Per-trigger errors are logged but do not abort the tick.
+package scheduler
+
+import (
+	"context"
+	"log/slog"
+	"sync"
+	"time"
+
+	"github.com/alexei/tinyforge/internal/store"
+	"github.com/alexei/tinyforge/internal/workload/plugin"
+	"github.com/alexei/tinyforge/internal/workload/plugin/trigger/schedule"
+)
+
+// Scheduler owns the background tick loop.
+type Scheduler struct {
+	store        *store.Store
+	dispatcher   fanOutFn
+	tickInterval time.Duration
+	clock        func() time.Time // overridable for tests
+
+	startOnce sync.Once
+	stopOnce  sync.Once
+	cancel    context.CancelFunc
+	wg        sync.WaitGroup
+}
+
+// fanOutFn is the internal callback shape — narrower than the public
+// FanOutTrigger interface so the wiring in cmd/server/main.go can pass
+// a closure directly without standing up a wrapper type.
+type fanOutFn func(ctx context.Context, trg store.Trigger, evt plugin.InboundEvent) error
+
+// New constructs a Scheduler bound to `st` that dispatches via `fanOut`.
+// `tickInterval` controls how often the loop wakes up to check
+// schedules; values <=0 fall back to 30s. Tick intervals longer than 5
+// minutes are clamped so a misconfigured value can't silently disable
+// schedules.
+//
+// `fanOut` should call webhook.Handler.FanOutForTrigger and return its
+// error (or nil); the per-binding result slice is discarded — the
+// scheduler does not need to know per-binding outcomes, only whether
+// the dispatch itself failed.
+func New(st *store.Store, fanOut fanOutFn, tickInterval time.Duration) *Scheduler {
+	clamped := tickInterval
+	if clamped <= 0 {
+		clamped = 30 * time.Second
+	}
+	if clamped > 5*time.Minute {
+		clamped = 5 * time.Minute
+	}
+	if clamped != tickInterval && tickInterval != 0 {
+		slog.Warn("scheduler: tick interval clamped",
+			"requested", tickInterval, "applied", clamped)
+	}
+	return &Scheduler{
+		store:        st,
+		dispatcher:   fanOut,
+		tickInterval: clamped,
+		clock:        func() time.Time { return time.Now().UTC() },
+	}
+}
+
+// Start launches the loop. Idempotent — repeat calls are no-ops, not
+// goroutine leaks. Mirrors the reconciler's lifecycle.
+func (s *Scheduler) Start(ctx context.Context) {
+	s.startOnce.Do(func() {
+		ctx, cancel := context.WithCancel(ctx)
+		s.cancel = cancel
+		s.wg.Add(1)
+		go s.loop(ctx)
+	})
+}
+
+// Stop cancels the context and waits for the in-flight tick. Idempotent
+// via sync.Once — second call returns immediately without panicking on
+// a double cancel.
+func (s *Scheduler) Stop() {
+	s.stopOnce.Do(func() {
+		if s.cancel != nil {
+			s.cancel()
+		}
+	})
+	s.wg.Wait()
+}
+
+func (s *Scheduler) loop(ctx context.Context) {
+	defer s.wg.Done()
+	// First sweep at boot so a daily schedule does not idle 24h after a
+	// restart before it picks up rows whose window already elapsed.
+	s.TickOnce(ctx)
+
+	ticker := time.NewTicker(s.tickInterval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			s.TickOnce(ctx)
+		}
+	}
+}
+
+// TickOnce runs a single sweep. Exposed for tests and for the boot
+// kick. On error per-trigger the loop continues with the next row.
+func (s *Scheduler) TickOnce(ctx context.Context) {
+	rows, err := s.store.ListTriggers("schedule")
+	if err != nil {
+		slog.Warn("scheduler: list triggers", "error", err)
+		return
+	}
+	now := s.clock()
+	for _, t := range rows {
+		if !s.shouldFire(t, now) {
+			continue
+		}
+		s.fire(ctx, t, now)
+	}
+}
+
+// shouldFire decides whether to dispatch trg at `now`. Returns true if:
+//   - the trigger's interval is parseable, AND
+//   - last_fired_at is empty (never fired) OR now >= lastFired + interval.
+//
+// Unparseable last_fired_at or interval are logged once and treated as
+// "do not fire" — the operator needs to fix the config; the scheduler
+// must not loop on a broken row.
+func (s *Scheduler) shouldFire(t store.Trigger, now time.Time) bool {
+	interval, err := schedule.IntervalOfRaw(t.Config)
+	if err != nil {
+		slog.Warn("scheduler: bad interval", "trigger", t.Name, "error", err)
+		return false
+	}
+	// Defense-in-depth against a hand-inserted row that bypassed
+	// Validate (manual SQL, restore, ad-hoc migration). Validate
+	// already enforces the floor on the create path; this re-check
+	// keeps the loop honest if anything sneaks past it.
+	if interval < schedule.MinInterval {
+		slog.Warn("scheduler: interval below minimum, ignoring",
+			"trigger", t.Name, "interval", interval, "minimum", schedule.MinInterval)
+		return false
+	}
+	if t.LastFiredAt == "" {
+		return true
+	}
+	last, err := time.Parse(time.RFC3339, t.LastFiredAt)
+	if err != nil {
+		slog.Warn("scheduler: bad last_fired_at", "trigger", t.Name,
+			"value", t.LastFiredAt, "error", err)
+		// Treat as never-fired so the operator's fix-by-redeploy doesn't
+		// require a manual DB poke.
+		return true
+	}
+	return !now.Before(last.Add(interval))
+}
+
+// fire dispatches one trigger and records the new last_fired_at.
+//
+// We persist last_fired_at BEFORE calling the dispatcher so a panic
+// inside Match cannot wedge the row into a tight loop. Down-side: a
+// deploy that fails leaves the scheduler waiting one full interval
+// before retry — acceptable because the trigger is a periodic refresh,
+// not a critical-path retry mechanism.
+func (s *Scheduler) fire(ctx context.Context, t store.Trigger, now time.Time) {
+	// Belt-and-suspenders: ListTriggersByKind only returns "schedule"
+	// rows, but if a future caller wires fire() differently this guard
+	// keeps the scheduler from blindly dispatching a kind it isn't
+	// designed for.
+	if t.Kind != "schedule" {
+		slog.Warn("scheduler: refusing to fire non-schedule kind",
+			"trigger", t.Name, "kind", t.Kind)
+		return
+	}
+	ts := now.Format(time.RFC3339)
+	if err := s.store.SetTriggerLastFired(t.ID, ts); err != nil {
+		slog.Warn("scheduler: persist last_fired_at", "trigger", t.Name, "error", err)
+		return
+	}
+	evt := plugin.InboundEvent{
+		Kind:     "schedule",
+		Schedule: &plugin.ScheduleEvent{FiredAt: now},
+	}
+	if err := s.dispatcher(ctx, t, evt); err != nil {
+		slog.Warn("scheduler: dispatch", "trigger", t.Name, "error", err)
+		return
+	}
+	slog.Info("scheduler: fired", "trigger", t.Name, "kind", t.Kind, "at", ts)
+}
@@ -0,0 +1,223 @@
+package scheduler
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/alexei/tinyforge/internal/store"
+	"github.com/alexei/tinyforge/internal/workload/plugin"
+)
+
+// newTestStore opens an in-memory SQLite store. Each test gets its own
+// DSN so parallel runs do not collide on shared cache databases.
+func newTestStore(t *testing.T) *store.Store {
+	t.Helper()
+	st, err := store.New(":memory:")
+	if err != nil {
+		t.Fatalf("open store: %v", err)
+	}
+	t.Cleanup(func() { _ = st.Close() })
+	return st
+}
+
+func seedScheduleTrigger(t *testing.T, st *store.Store, name, interval, lastFired string) store.Trigger {
+	t.Helper()
+	trg, err := st.CreateTrigger(store.Trigger{
+		Kind:        "schedule",
+		Name:        name,
+		Config:      `{"interval":"` + interval + `"}`,
+		LastFiredAt: lastFired,
+	})
+	if err != nil {
+		t.Fatalf("CreateTrigger: %v", err)
+	}
+	return trg
+}
+
+func TestShouldFire(t *testing.T) {
+	st := newTestStore(t)
+	now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
+	s := New(st, func(context.Context, store.Trigger, plugin.InboundEvent) error { return nil }, 0)
+
+	cases := []struct {
+		name      string
+		interval  string
+		lastFired string
+		want      bool
+	}{
+		{"never fired fires", "1h", "", true},
+		{"window not yet elapsed", "1h", now.Add(-30 * time.Minute).Format(time.RFC3339), false},
+		{"window exactly elapsed fires", "1h", now.Add(-1 * time.Hour).Format(time.RFC3339), true},
+		{"window long elapsed fires", "24h", now.Add(-48 * time.Hour).Format(time.RFC3339), true},
+		{"bad interval suppressed", "banana", "", false},
+		{"bad last_fired_at treated as never", "1h", "not-a-timestamp", true},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			trg := store.Trigger{
+				Config:      `{"interval":"` + tc.interval + `"}`,
+				LastFiredAt: tc.lastFired,
+			}
+			got := s.shouldFire(trg, now)
+			if got != tc.want {
+				t.Fatalf("shouldFire = %v, want %v", got, tc.want)
+			}
+		})
+	}
+}
+
+func TestTickOnce_FiresOverdueTriggers(t *testing.T) {
+	st := newTestStore(t)
+	now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
+
+	// Three triggers: one overdue, one not yet due, one never-fired.
+	overdue := seedScheduleTrigger(t, st, "overdue", "1h", now.Add(-2*time.Hour).Format(time.RFC3339))
+	notDue := seedScheduleTrigger(t, st, "notdue", "1h", now.Add(-30*time.Minute).Format(time.RFC3339))
+	never := seedScheduleTrigger(t, st, "never", "1h", "")
+
+	fired := make(map[string]int)
+	s := New(st, func(_ context.Context, trg store.Trigger, _ plugin.InboundEvent) error {
+		fired[trg.Name]++
+		return nil
+	}, 0)
+	s.clock = func() time.Time { return now }
+
+	s.TickOnce(context.Background())
+
+	if fired["overdue"] != 1 {
+		t.Errorf("overdue should fire once, got %d", fired["overdue"])
+	}
+	if fired["notdue"] != 0 {
+		t.Errorf("notdue should not fire, got %d", fired["notdue"])
+	}
+	if fired["never"] != 1 {
+		t.Errorf("never should fire once on first tick, got %d", fired["never"])
+	}
+
+	// last_fired_at must advance for everyone we dispatched.
+	for _, id := range []string{overdue.ID, never.ID} {
+		row, err := st.GetTriggerByID(id)
+		if err != nil {
+			t.Fatalf("GetTriggerByID(%s): %v", id, err)
+		}
+		if row.LastFiredAt == "" {
+			t.Errorf("last_fired_at not persisted for %s", row.Name)
+		}
+	}
+	// not-due trigger's last_fired_at must NOT have changed.
+	row, err := st.GetTriggerByID(notDue.ID)
+	if err != nil {
+		t.Fatalf("GetTriggerByID(notdue): %v", err)
+	}
+	if row.LastFiredAt != notDue.LastFiredAt {
+		t.Errorf("notdue last_fired_at changed: was %q now %q", notDue.LastFiredAt, row.LastFiredAt)
+	}
+}
+
+func TestTickOnce_DispatchErrorDoesNotWedgeOthers(t *testing.T) {
+	st := newTestStore(t)
+	now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
+
+	broken := seedScheduleTrigger(t, st, "broken", "1h", "")
+	seedScheduleTrigger(t, st, "healthy", "1h", "")
+
+	fired := map[string]int{}
+	s := New(st, func(_ context.Context, trg store.Trigger, _ plugin.InboundEvent) error {
+		fired[trg.Name]++
+		if trg.Name == "broken" {
+			return context.Canceled
+		}
+		return nil
+	}, 0)
+	s.clock = func() time.Time { return now }
+
+	s.TickOnce(context.Background())
+
+	if fired["broken"] != 1 {
+		t.Errorf("broken should be attempted once, got %d", fired["broken"])
+	}
+	if fired["healthy"] != 1 {
+		t.Errorf("healthy should fire once, got %d", fired["healthy"])
+	}
+
+	// Core persist-before-dispatch invariant: even though the broken
+	// trigger's dispatcher returned an error, last_fired_at must have
+	// advanced. Otherwise the scheduler would re-fire it on every tick.
+	row, err := st.GetTriggerByID(broken.ID)
+	if err != nil {
+		t.Fatalf("GetTriggerByID(broken): %v", err)
+	}
+	if row.LastFiredAt == "" {
+		t.Fatalf("broken trigger last_fired_at must advance even on dispatch error")
+	}
+
+	// And: a second TickOnce at the same `now` must not re-fire broken.
+	s.TickOnce(context.Background())
+	if fired["broken"] != 1 {
+		t.Errorf("broken refired after persist; got %d (want 1)", fired["broken"])
+	}
+}
+
+func TestTickOnce_PersistsLastFiredBeforeDispatch(t *testing.T) {
+	// Documented behavior: last_fired_at is persisted before the
+	// dispatcher runs so a panicking match cannot wedge a tight loop.
+	st := newTestStore(t)
+	now := time.Date(2026, 5, 16, 12, 0, 0, 0, time.UTC)
+	trg := seedScheduleTrigger(t, st, "tick", "1h", "")
+
+	dispatched := false
+	s := New(st, func(_ context.Context, t store.Trigger, _ plugin.InboundEvent) error {
+		// At dispatch time the column must already be set.
+		row, err := st.GetTriggerByID(t.ID)
+		if err != nil {
+			return err
+		}
+		dispatched = row.LastFiredAt != ""
+		return nil
+	}, 0)
+	s.clock = func() time.Time { return now }
+
+	s.TickOnce(context.Background())
+
+	if !dispatched {
+		t.Fatalf("last_fired_at must be persisted before dispatcher runs")
+	}
+	row, err := st.GetTriggerByID(trg.ID)
+	if err != nil {
+		t.Fatalf("get: %v", err)
+	}
+	if row.LastFiredAt != now.Format(time.RFC3339) {
+		t.Errorf("last_fired_at = %q, want %q", row.LastFiredAt, now.Format(time.RFC3339))
+	}
+}
+
+func TestLifecycle_StartStopIdempotent(t *testing.T) {
+	// Start + Stop are wrapped in sync.Once. A second call must be a
+	// no-op (no panic on double-cancel, no goroutine leak from double-
+	// Start). This guards the shutdown path that runs Stop from both
+	// defer and the signal-handler block in cmd/server/main.go.
+	st := newTestStore(t)
+	noop := func(context.Context, store.Trigger, plugin.InboundEvent) error { return nil }
+	s := New(st, noop, 100*time.Millisecond)
+
+	s.Start(context.Background())
+	s.Start(context.Background()) // second call: no goroutine spawned
+
+	s.Stop()
+	s.Stop() // second call: no panic on closing already-cancelled context
+}
+
+func TestNew_ClampsInterval(t *testing.T) {
+	st := newTestStore(t)
+	noop := func(context.Context, store.Trigger, plugin.InboundEvent) error { return nil }
+	if got := New(st, noop, 0).tickInterval; got != 30*time.Second {
+		t.Errorf("default = %s, want 30s", got)
+	}
+	if got := New(st, noop, 1*time.Hour).tickInterval; got != 5*time.Minute {
+		t.Errorf("clamped = %s, want 5m", got)
+	}
+	if got := New(st, noop, 2*time.Minute).tickInterval; got != 2*time.Minute {
+		t.Errorf("passthrough = %s, want 2m", got)
+	}
+}