Files
tiny-forge/internal/webhook/trigger_handler.go
alexei.dolgolyov 410a131cec feat(apps): stepped creation wizard, branch previews, and app-creation fixes
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
  WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
  ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
  + {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
  /apps/[id] edit form onto the same components (removes the duplication). Add
  vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
  environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
  state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
  conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
  label hints; dashboard + /apps "Total workloads" count only source_kind workloads
  (drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
  empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.

Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
2026-05-29 02:09:54 +03:00

435 lines
16 KiB
Go

package webhook
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"sync"
"time"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/metrics"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
"github.com/alexei/tinyforge/internal/workload/preview"
)
// maxTriggerFanOutConcurrency caps how many bindings dispatch in
// parallel for a single trigger webhook. Sequential fan-out would hold
// the request goroutine for the sum of every binding's deploy time —
// minutes for an N-binding trigger. Bounding to 4 keeps wall-clock
// roughly N/4 * deploy_time without saturating the docker daemon (which
// already serializes pulls).
const maxTriggerFanOutConcurrency = 4
// BindingResult is the per-binding entry in the trigger fan-out
// response body. Exported so non-HTTP callers (the scheduler) can
// inspect outcomes after calling FanOutForTrigger.
type BindingResult struct {
Workload string `json:"workload"`
Deployed bool `json:"deployed"`
Reason string `json:"reason,omitempty"`
}
// Reason strings used in BindingResult.Reason. Exported so callers
// classifying fan-out outcomes (e.g. the API fire-now summary log)
// don't need to keep string literals in sync with this package.
const (
ReasonBindingDisabled = "binding disabled"
ReasonWorkloadMissing = "workload missing"
ReasonNoMatch = "no match"
ReasonConfigError = "config merge error"
ReasonMatchError = "match error"
ReasonDispatchFailed = "dispatch failed"
ReasonPreviewError = "preview materialize error"
ReasonPreviewTorndown = "preview torn down"
// ReasonPreviewNoop: a branch-delete webhook arrived but no preview was
// ever materialized for that branch — a legitimate clean skip, distinct
// from "no binding matched" so it isn't misreported as a wiring problem.
ReasonPreviewNoop = "preview noop"
// ReasonPreviewOrphaned: the preview container was torn down but its
// workload row could not be deleted, leaving an orphan row. Surfaced
// distinctly so the partial failure is visible rather than masquerading
// as a clean teardown.
ReasonPreviewOrphaned = "preview torn down (row orphaned)"
)
// handleTriggerWebhook processes an inbound webhook for a first-class
// Trigger record. The secret resolves to one Trigger; the Trigger then
// fans out to every enabled workload binding. Each binding gets its
// effective config (trigger.config + binding.binding_config merged) and
// runs through the trigger plugin's Match independently — one binding
// firing does not affect another.
//
// URL: POST /api/webhook/triggers/{secret}
//
// Response shape: aggregate counts so a CI can tell at a glance whether
// any deploys fired (status 200 + deploys=N) without parsing per-binding
// detail. Errors per-binding are logged at warn level but do not fail
// the whole request — one broken workload should not block the others.
func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
delivery := store.WebhookDelivery{
TargetType: "trigger",
SourceIP: clientIP(r),
SignatureState: sigStateUnconfigured,
StatusCode: http.StatusOK,
Outcome: outcomeSkip,
}
defer func() { h.recordDelivery(delivery) }()
if h.plugins == nil {
delivery.StatusCode = http.StatusServiceUnavailable
delivery.Outcome = outcomeError
delivery.Detail = "plugin dispatcher not wired"
respondWebhookError(w, http.StatusServiceUnavailable, "plugin dispatcher not wired")
return
}
secret := chi.URLParam(r, "secret")
if secret == "" {
delivery.StatusCode = http.StatusNotFound
delivery.Outcome = outcomeNotFound
http.NotFound(w, r)
return
}
trg, err := h.store.GetTriggerByWebhookSecret(secret)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
delivery.StatusCode = http.StatusNotFound
delivery.Outcome = outcomeNotFound
delivery.Detail = "unknown webhook secret"
http.NotFound(w, r)
return
}
slog.Error("webhook: trigger lookup failed", "error", err)
delivery.StatusCode = http.StatusNotFound
delivery.Outcome = outcomeError
delivery.Detail = "lookup failed"
http.NotFound(w, r)
return
}
delivery.TargetID = trg.ID
delivery.TargetName = trg.Name
body, err := io.ReadAll(io.LimitReader(r.Body, maxWebhookBodyBytes))
if err != nil {
delivery.StatusCode = http.StatusBadRequest
delivery.Outcome = outcomeBadRequest
delivery.Detail = "failed to read request body"
respondWebhookError(w, http.StatusBadRequest, "failed to read request body")
return
}
delivery.BodySize = len(body)
header := r.Header.Get(signatureHeader)
verified, attempted := verifyHMAC(trg.WebhookSigningSecret, body, header)
delivery.SignatureState = signatureStateFor(trg.WebhookSigningSecret, header, verified, attempted)
if trg.WebhookRequireSignature && !verified {
slog.Warn("webhook: trigger signature required but invalid/missing", "trigger", trg.Name)
delivery.StatusCode = http.StatusUnauthorized
delivery.Outcome = outcomeRejected
delivery.Detail = "invalid or missing signature"
respondWebhookError(w, http.StatusUnauthorized, "invalid or missing signature")
return
}
if attempted && !verified {
slog.Warn("webhook: trigger bad signature", "trigger", trg.Name)
delivery.StatusCode = http.StatusUnauthorized
delivery.Outcome = outcomeRejected
delivery.Detail = "invalid signature"
respondWebhookError(w, http.StatusUnauthorized, "invalid signature")
return
}
evt, err := buildInboundEvent(body, r.Header)
if err != nil {
delivery.StatusCode = http.StatusBadRequest
delivery.Outcome = outcomeBadRequest
delivery.Detail = err.Error()
respondWebhookError(w, http.StatusBadRequest, err.Error())
return
}
trigPlugin, err := plugin.GetTrigger(trg.Kind)
if err != nil {
slog.Warn("webhook: trigger plugin not registered",
"trigger", trg.Name, "kind", trg.Kind, "error", err)
delivery.StatusCode = http.StatusInternalServerError
delivery.Outcome = outcomeError
delivery.Detail = "trigger plugin missing"
respondWebhookError(w, http.StatusInternalServerError, "trigger plugin missing")
return
}
bindings, err := h.store.ListBindingsForTrigger(trg.ID)
if err != nil {
slog.Error("webhook: list bindings failed", "trigger", trg.Name, "error", err)
delivery.StatusCode = http.StatusInternalServerError
delivery.Outcome = outcomeError
delivery.Detail = "list bindings failed"
respondWebhookError(w, http.StatusInternalServerError, "list bindings failed")
return
}
results := h.fanOutBindings(ctx, trg, trigPlugin, bindings, evt)
var deployed, skipped, noMatch, errored int
for _, r := range results {
switch {
case r.Deployed:
deployed++
case r.Reason == ReasonBindingDisabled, r.Reason == ReasonPreviewNoop:
skipped++
case r.Reason == ReasonNoMatch:
noMatch++
default:
errored++
}
}
switch {
case deployed > 0:
delivery.Outcome = outcomeDeploy
delivery.Detail = fmt.Sprintf("deployed=%d of %d (errored=%d, skipped=%d)",
deployed, len(results), errored, skipped)
case errored > 0:
delivery.Outcome = outcomeError
delivery.Detail = fmt.Sprintf("errored=%d of %d", errored, len(results))
case skipped == len(results):
delivery.Detail = "all bindings disabled"
case noMatch == len(results)-skipped:
delivery.Detail = "no binding matched"
default:
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d nomatch=%d",
skipped, errored, noMatch)
}
metrics.WebhookDeliveriesTotal.Inc(delivery.Outcome)
respondWebhookJSON(w, http.StatusOK, map[string]any{
"success": true,
"trigger": trg.Name,
"deployed": deployed,
"bindings": results,
})
}
// FanOutForTrigger looks up the trigger plugin + bindings for trg and
// dispatches evt through the same bounded worker pool the inbound HTTP
// webhook uses. The scheduler calls this on each tick to fire schedule
// triggers without a real HTTP request — same dispatch path, same
// per-binding isolation, same outcome shape.
//
// SECURITY NOTE: trg.WebhookSigningSecret + WebhookRequireSignature
// gate INBOUND HTTP only (handleTriggerWebhook). This method skips
// that check by design because the caller is first-party in-process
// code — no untrusted bytes flow in here. If you add a new caller
// outside the scheduler / inbound webhook, audit the call site for
// authorization first; this is not a generic "fire any trigger"
// entry point.
//
// Returns nil + error only when the trigger plugin is missing or the
// bindings query fails — both fatal upstream conditions the caller
// should log. A per-binding error becomes a row in the result slice
// with Deployed=false; that case returns nil error.
func (h *Handler) FanOutForTrigger(
ctx context.Context,
trg store.Trigger,
evt plugin.InboundEvent,
) ([]BindingResult, error) {
if h.plugins == nil {
return nil, fmt.Errorf("plugin dispatcher not wired")
}
trigPlugin, err := plugin.GetTrigger(trg.Kind)
if err != nil {
return nil, fmt.Errorf("trigger plugin %q: %w", trg.Kind, err)
}
bindings, err := h.store.ListBindingsForTrigger(trg.ID)
if err != nil {
return nil, fmt.Errorf("list bindings: %w", err)
}
return h.fanOutBindings(ctx, trg, trigPlugin, bindings, evt), nil
}
// fanOutBindings dispatches every binding through fireBinding with at
// most maxTriggerFanOutConcurrency goroutines in flight. Order of the
// returned slice matches the input bindings slice so callers can rely
// on positional correlation.
//
// Disabled bindings short-circuit on the orchestrator goroutine — they
// don't take a worker slot, leaving the pool free for real dispatches.
// Workload-missing rows are recorded as errors and also skip the pool.
func (h *Handler) fanOutBindings(
ctx context.Context,
trg store.Trigger,
trigPlugin plugin.Trigger,
bindings []store.WorkloadTriggerBinding,
evt plugin.InboundEvent,
) []BindingResult {
results := make([]BindingResult, len(bindings))
concurrency := maxTriggerFanOutConcurrency
if len(bindings) < concurrency {
concurrency = len(bindings)
}
if concurrency < 1 {
concurrency = 1
}
sem := make(chan struct{}, concurrency)
var wg sync.WaitGroup
for i, b := range bindings {
if !b.Enabled {
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonBindingDisabled}
continue
}
row, lookupErr := h.store.GetWorkloadByID(b.WorkloadID)
if lookupErr != nil {
slog.Warn("webhook: bound workload missing",
"trigger", trg.Name, "workload", b.WorkloadID, "error", lookupErr)
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonWorkloadMissing}
continue
}
wg.Add(1)
sem <- struct{}{}
go func(idx int, binding store.WorkloadTriggerBinding, wl store.Workload) {
defer wg.Done()
defer func() { <-sem }()
fired, reason := h.fireBinding(ctx, trg, trigPlugin, wl, binding, evt)
results[idx] = BindingResult{Workload: wl.Name, Deployed: fired, Reason: reason}
}(i, b, row)
}
wg.Wait()
return results
}
// fireBinding runs Match for one binding and dispatches if intent.
// Returns (fired, human-readable reason). Errors are logged but the
// reason is kept generic on the wire so a malformed binding does not
// leak internals.
func (h *Handler) fireBinding(
ctx context.Context,
trg store.Trigger,
trigPlugin plugin.Trigger,
row store.Workload,
b store.WorkloadTriggerBinding,
evt plugin.InboundEvent,
) (bool, string) {
pwl := toPluginWorkload(row)
pwl, err := plugin.WithEffectiveTrigger(pwl, trg.Kind,
json.RawMessage(trg.Config), json.RawMessage(b.BindingConfig))
if err != nil {
slog.Warn("webhook: merge effective trigger config failed",
"trigger", trg.Name, "workload", row.Name, "error", err)
return false, ReasonConfigError
}
intent, err := trigPlugin.Match(ctx, h.plugins.PluginDeps(), pwl, evt)
if err != nil {
slog.Warn("webhook: trigger match error",
"trigger", trg.Name, "workload", row.Name, "error", err)
return false, ReasonMatchError
}
if intent == nil {
return false, ReasonNoMatch
}
if intent.TriggeredAt.IsZero() {
intent.TriggeredAt = time.Now().UTC()
}
if intent.TriggeredBy == "" {
intent.TriggeredBy = "trigger-webhook"
}
// Preview-deploy fork: the git trigger plugin attaches preview_branch
// metadata when BranchPattern matches a non-baseline branch. Route
// the dispatch through a per-branch child workload rather than
// redeploying the parent template. The fork is intentionally before
// the dispatch so the template's container never gets clobbered by
// a feature-branch push.
if previewBranch := intent.Metadata["preview_branch"]; previewBranch != "" {
fired, reason := h.handlePreviewIntent(ctx, row, intent, previewBranch)
return fired, reason
}
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
slog.Warn("webhook: dispatch failed",
"trigger", trg.Name, "workload", row.Name, "error", err)
return false, ReasonDispatchFailed
}
slog.Info("webhook: triggered deploy via trigger fan-out",
"trigger", trg.Name, "workload", row.Name, "reason", intent.Reason)
return true, intent.Reason
}
// handlePreviewIntent dispatches an intent that targeted a non-baseline
// branch on a preview-template workload. Two paths:
//
// 1. Branch deleted: find the matching preview workload, dispatch
// Teardown, then delete the workload row so the dashboard reflects
// the upstream state.
// 2. Branch pushed: materialize (or reuse) the preview workload, then
// dispatch the deploy against it. The template workload itself is
// never deployed against a feature branch.
//
// On any error the helper logs and returns a generic reason — the
// fan-out caller treats these the same as a normal dispatch failure.
func (h *Handler) handlePreviewIntent(
ctx context.Context,
template store.Workload,
intent *plugin.DeploymentIntent,
branch string,
) (bool, string) {
deleted := intent.Metadata["preview_deleted"] == "1"
if deleted {
child, ok, err := preview.FindPreviewForBranch(h.store, template.ID, branch)
if err != nil {
slog.Warn("webhook: preview lookup failed",
"template", template.Name, "branch", branch, "error", err)
return false, ReasonPreviewError
}
if !ok {
// Branch was deleted upstream but we never materialized a
// preview for it — nothing to do. Report as a distinct noop so
// it isn't bucketed as "no binding matched".
return false, ReasonPreviewNoop
}
childPwl := toPluginWorkload(child)
if err := h.plugins.DispatchTeardown(ctx, childPwl); err != nil {
slog.Warn("webhook: preview teardown dispatch failed",
"template", template.Name, "preview", child.Name, "error", err)
return false, ReasonDispatchFailed
}
if err := h.store.DeleteWorkload(child.ID); err != nil {
// Container is gone but the row is orphaned. Surface this as a
// distinct reason so the partial failure is visible rather than
// reported as a clean teardown; the operator can delete the row
// from the dashboard if it sticks around.
slog.Warn("webhook: preview row delete failed (orphaned row)",
"template", template.Name, "preview", child.Name, "error", err)
return true, ReasonPreviewOrphaned
}
slog.Info("webhook: preview torn down",
"template", template.Name, "branch", branch, "preview", child.Name)
return true, ReasonPreviewTorndown
}
child, err := preview.MaterializeForBranch(h.store, template, branch)
if err != nil {
slog.Warn("webhook: preview materialize failed",
"template", template.Name, "branch", branch, "error", err)
return false, ReasonPreviewError
}
childPwl := toPluginWorkload(child)
if err := h.plugins.DispatchPlugin(ctx, childPwl, *intent); err != nil {
slog.Warn("webhook: preview dispatch failed",
"template", template.Name, "preview", child.Name, "error", err)
return false, ReasonDispatchFailed
}
slog.Info("webhook: triggered preview deploy",
"template", template.Name, "branch", branch, "preview", child.Name, "reason", intent.Reason)
return true, intent.Reason
}