410a131cec
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
+ {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
/apps/[id] edit form onto the same components (removes the duplication). Add
vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
label hints; dashboard + /apps "Total workloads" count only source_kind workloads
(drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.
Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
435 lines
16 KiB
Go
435 lines
16 KiB
Go
package webhook
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"github.com/alexei/tinyforge/internal/metrics"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
|
"github.com/alexei/tinyforge/internal/workload/preview"
|
|
)
|
|
|
|
// maxTriggerFanOutConcurrency caps how many bindings dispatch in
|
|
// parallel for a single trigger webhook. Sequential fan-out would hold
|
|
// the request goroutine for the sum of every binding's deploy time —
|
|
// minutes for an N-binding trigger. Bounding to 4 keeps wall-clock
|
|
// roughly N/4 * deploy_time without saturating the docker daemon (which
|
|
// already serializes pulls).
|
|
const maxTriggerFanOutConcurrency = 4
|
|
|
|
// BindingResult is the per-binding entry in the trigger fan-out
|
|
// response body. Exported so non-HTTP callers (the scheduler) can
|
|
// inspect outcomes after calling FanOutForTrigger.
|
|
type BindingResult struct {
|
|
Workload string `json:"workload"`
|
|
Deployed bool `json:"deployed"`
|
|
Reason string `json:"reason,omitempty"`
|
|
}
|
|
|
|
// Reason strings used in BindingResult.Reason. Exported so callers
|
|
// classifying fan-out outcomes (e.g. the API fire-now summary log)
|
|
// don't need to keep string literals in sync with this package.
|
|
const (
|
|
ReasonBindingDisabled = "binding disabled"
|
|
ReasonWorkloadMissing = "workload missing"
|
|
ReasonNoMatch = "no match"
|
|
ReasonConfigError = "config merge error"
|
|
ReasonMatchError = "match error"
|
|
ReasonDispatchFailed = "dispatch failed"
|
|
ReasonPreviewError = "preview materialize error"
|
|
ReasonPreviewTorndown = "preview torn down"
|
|
// ReasonPreviewNoop: a branch-delete webhook arrived but no preview was
|
|
// ever materialized for that branch — a legitimate clean skip, distinct
|
|
// from "no binding matched" so it isn't misreported as a wiring problem.
|
|
ReasonPreviewNoop = "preview noop"
|
|
// ReasonPreviewOrphaned: the preview container was torn down but its
|
|
// workload row could not be deleted, leaving an orphan row. Surfaced
|
|
// distinctly so the partial failure is visible rather than masquerading
|
|
// as a clean teardown.
|
|
ReasonPreviewOrphaned = "preview torn down (row orphaned)"
|
|
)
|
|
|
|
// handleTriggerWebhook processes an inbound webhook for a first-class
|
|
// Trigger record. The secret resolves to one Trigger; the Trigger then
|
|
// fans out to every enabled workload binding. Each binding gets its
|
|
// effective config (trigger.config + binding.binding_config merged) and
|
|
// runs through the trigger plugin's Match independently — one binding
|
|
// firing does not affect another.
|
|
//
|
|
// URL: POST /api/webhook/triggers/{secret}
|
|
//
|
|
// Response shape: aggregate counts so a CI can tell at a glance whether
|
|
// any deploys fired (status 200 + deploys=N) without parsing per-binding
|
|
// detail. Errors per-binding are logged at warn level but do not fail
|
|
// the whole request — one broken workload should not block the others.
|
|
func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
|
ctx := r.Context()
|
|
|
|
delivery := store.WebhookDelivery{
|
|
TargetType: "trigger",
|
|
SourceIP: clientIP(r),
|
|
SignatureState: sigStateUnconfigured,
|
|
StatusCode: http.StatusOK,
|
|
Outcome: outcomeSkip,
|
|
}
|
|
defer func() { h.recordDelivery(delivery) }()
|
|
|
|
if h.plugins == nil {
|
|
delivery.StatusCode = http.StatusServiceUnavailable
|
|
delivery.Outcome = outcomeError
|
|
delivery.Detail = "plugin dispatcher not wired"
|
|
respondWebhookError(w, http.StatusServiceUnavailable, "plugin dispatcher not wired")
|
|
return
|
|
}
|
|
|
|
secret := chi.URLParam(r, "secret")
|
|
if secret == "" {
|
|
delivery.StatusCode = http.StatusNotFound
|
|
delivery.Outcome = outcomeNotFound
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
|
|
trg, err := h.store.GetTriggerByWebhookSecret(secret)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
delivery.StatusCode = http.StatusNotFound
|
|
delivery.Outcome = outcomeNotFound
|
|
delivery.Detail = "unknown webhook secret"
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
slog.Error("webhook: trigger lookup failed", "error", err)
|
|
delivery.StatusCode = http.StatusNotFound
|
|
delivery.Outcome = outcomeError
|
|
delivery.Detail = "lookup failed"
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
delivery.TargetID = trg.ID
|
|
delivery.TargetName = trg.Name
|
|
|
|
body, err := io.ReadAll(io.LimitReader(r.Body, maxWebhookBodyBytes))
|
|
if err != nil {
|
|
delivery.StatusCode = http.StatusBadRequest
|
|
delivery.Outcome = outcomeBadRequest
|
|
delivery.Detail = "failed to read request body"
|
|
respondWebhookError(w, http.StatusBadRequest, "failed to read request body")
|
|
return
|
|
}
|
|
delivery.BodySize = len(body)
|
|
|
|
header := r.Header.Get(signatureHeader)
|
|
verified, attempted := verifyHMAC(trg.WebhookSigningSecret, body, header)
|
|
delivery.SignatureState = signatureStateFor(trg.WebhookSigningSecret, header, verified, attempted)
|
|
if trg.WebhookRequireSignature && !verified {
|
|
slog.Warn("webhook: trigger signature required but invalid/missing", "trigger", trg.Name)
|
|
delivery.StatusCode = http.StatusUnauthorized
|
|
delivery.Outcome = outcomeRejected
|
|
delivery.Detail = "invalid or missing signature"
|
|
respondWebhookError(w, http.StatusUnauthorized, "invalid or missing signature")
|
|
return
|
|
}
|
|
if attempted && !verified {
|
|
slog.Warn("webhook: trigger bad signature", "trigger", trg.Name)
|
|
delivery.StatusCode = http.StatusUnauthorized
|
|
delivery.Outcome = outcomeRejected
|
|
delivery.Detail = "invalid signature"
|
|
respondWebhookError(w, http.StatusUnauthorized, "invalid signature")
|
|
return
|
|
}
|
|
|
|
evt, err := buildInboundEvent(body, r.Header)
|
|
if err != nil {
|
|
delivery.StatusCode = http.StatusBadRequest
|
|
delivery.Outcome = outcomeBadRequest
|
|
delivery.Detail = err.Error()
|
|
respondWebhookError(w, http.StatusBadRequest, err.Error())
|
|
return
|
|
}
|
|
|
|
trigPlugin, err := plugin.GetTrigger(trg.Kind)
|
|
if err != nil {
|
|
slog.Warn("webhook: trigger plugin not registered",
|
|
"trigger", trg.Name, "kind", trg.Kind, "error", err)
|
|
delivery.StatusCode = http.StatusInternalServerError
|
|
delivery.Outcome = outcomeError
|
|
delivery.Detail = "trigger plugin missing"
|
|
respondWebhookError(w, http.StatusInternalServerError, "trigger plugin missing")
|
|
return
|
|
}
|
|
|
|
bindings, err := h.store.ListBindingsForTrigger(trg.ID)
|
|
if err != nil {
|
|
slog.Error("webhook: list bindings failed", "trigger", trg.Name, "error", err)
|
|
delivery.StatusCode = http.StatusInternalServerError
|
|
delivery.Outcome = outcomeError
|
|
delivery.Detail = "list bindings failed"
|
|
respondWebhookError(w, http.StatusInternalServerError, "list bindings failed")
|
|
return
|
|
}
|
|
|
|
results := h.fanOutBindings(ctx, trg, trigPlugin, bindings, evt)
|
|
var deployed, skipped, noMatch, errored int
|
|
for _, r := range results {
|
|
switch {
|
|
case r.Deployed:
|
|
deployed++
|
|
case r.Reason == ReasonBindingDisabled, r.Reason == ReasonPreviewNoop:
|
|
skipped++
|
|
case r.Reason == ReasonNoMatch:
|
|
noMatch++
|
|
default:
|
|
errored++
|
|
}
|
|
}
|
|
|
|
switch {
|
|
case deployed > 0:
|
|
delivery.Outcome = outcomeDeploy
|
|
delivery.Detail = fmt.Sprintf("deployed=%d of %d (errored=%d, skipped=%d)",
|
|
deployed, len(results), errored, skipped)
|
|
case errored > 0:
|
|
delivery.Outcome = outcomeError
|
|
delivery.Detail = fmt.Sprintf("errored=%d of %d", errored, len(results))
|
|
case skipped == len(results):
|
|
delivery.Detail = "all bindings disabled"
|
|
case noMatch == len(results)-skipped:
|
|
delivery.Detail = "no binding matched"
|
|
default:
|
|
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d nomatch=%d",
|
|
skipped, errored, noMatch)
|
|
}
|
|
metrics.WebhookDeliveriesTotal.Inc(delivery.Outcome)
|
|
respondWebhookJSON(w, http.StatusOK, map[string]any{
|
|
"success": true,
|
|
"trigger": trg.Name,
|
|
"deployed": deployed,
|
|
"bindings": results,
|
|
})
|
|
}
|
|
|
|
// FanOutForTrigger looks up the trigger plugin + bindings for trg and
|
|
// dispatches evt through the same bounded worker pool the inbound HTTP
|
|
// webhook uses. The scheduler calls this on each tick to fire schedule
|
|
// triggers without a real HTTP request — same dispatch path, same
|
|
// per-binding isolation, same outcome shape.
|
|
//
|
|
// SECURITY NOTE: trg.WebhookSigningSecret + WebhookRequireSignature
|
|
// gate INBOUND HTTP only (handleTriggerWebhook). This method skips
|
|
// that check by design because the caller is first-party in-process
|
|
// code — no untrusted bytes flow in here. If you add a new caller
|
|
// outside the scheduler / inbound webhook, audit the call site for
|
|
// authorization first; this is not a generic "fire any trigger"
|
|
// entry point.
|
|
//
|
|
// Returns nil + error only when the trigger plugin is missing or the
|
|
// bindings query fails — both fatal upstream conditions the caller
|
|
// should log. A per-binding error becomes a row in the result slice
|
|
// with Deployed=false; that case returns nil error.
|
|
func (h *Handler) FanOutForTrigger(
|
|
ctx context.Context,
|
|
trg store.Trigger,
|
|
evt plugin.InboundEvent,
|
|
) ([]BindingResult, error) {
|
|
if h.plugins == nil {
|
|
return nil, fmt.Errorf("plugin dispatcher not wired")
|
|
}
|
|
trigPlugin, err := plugin.GetTrigger(trg.Kind)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("trigger plugin %q: %w", trg.Kind, err)
|
|
}
|
|
bindings, err := h.store.ListBindingsForTrigger(trg.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("list bindings: %w", err)
|
|
}
|
|
return h.fanOutBindings(ctx, trg, trigPlugin, bindings, evt), nil
|
|
}
|
|
|
|
// fanOutBindings dispatches every binding through fireBinding with at
|
|
// most maxTriggerFanOutConcurrency goroutines in flight. Order of the
|
|
// returned slice matches the input bindings slice so callers can rely
|
|
// on positional correlation.
|
|
//
|
|
// Disabled bindings short-circuit on the orchestrator goroutine — they
|
|
// don't take a worker slot, leaving the pool free for real dispatches.
|
|
// Workload-missing rows are recorded as errors and also skip the pool.
|
|
func (h *Handler) fanOutBindings(
|
|
ctx context.Context,
|
|
trg store.Trigger,
|
|
trigPlugin plugin.Trigger,
|
|
bindings []store.WorkloadTriggerBinding,
|
|
evt plugin.InboundEvent,
|
|
) []BindingResult {
|
|
results := make([]BindingResult, len(bindings))
|
|
concurrency := maxTriggerFanOutConcurrency
|
|
if len(bindings) < concurrency {
|
|
concurrency = len(bindings)
|
|
}
|
|
if concurrency < 1 {
|
|
concurrency = 1
|
|
}
|
|
sem := make(chan struct{}, concurrency)
|
|
var wg sync.WaitGroup
|
|
for i, b := range bindings {
|
|
if !b.Enabled {
|
|
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonBindingDisabled}
|
|
continue
|
|
}
|
|
row, lookupErr := h.store.GetWorkloadByID(b.WorkloadID)
|
|
if lookupErr != nil {
|
|
slog.Warn("webhook: bound workload missing",
|
|
"trigger", trg.Name, "workload", b.WorkloadID, "error", lookupErr)
|
|
results[i] = BindingResult{Workload: b.WorkloadID, Deployed: false, Reason: ReasonWorkloadMissing}
|
|
continue
|
|
}
|
|
wg.Add(1)
|
|
sem <- struct{}{}
|
|
go func(idx int, binding store.WorkloadTriggerBinding, wl store.Workload) {
|
|
defer wg.Done()
|
|
defer func() { <-sem }()
|
|
fired, reason := h.fireBinding(ctx, trg, trigPlugin, wl, binding, evt)
|
|
results[idx] = BindingResult{Workload: wl.Name, Deployed: fired, Reason: reason}
|
|
}(i, b, row)
|
|
}
|
|
wg.Wait()
|
|
return results
|
|
}
|
|
|
|
// fireBinding runs Match for one binding and dispatches if intent.
|
|
// Returns (fired, human-readable reason). Errors are logged but the
|
|
// reason is kept generic on the wire so a malformed binding does not
|
|
// leak internals.
|
|
func (h *Handler) fireBinding(
|
|
ctx context.Context,
|
|
trg store.Trigger,
|
|
trigPlugin plugin.Trigger,
|
|
row store.Workload,
|
|
b store.WorkloadTriggerBinding,
|
|
evt plugin.InboundEvent,
|
|
) (bool, string) {
|
|
pwl := toPluginWorkload(row)
|
|
pwl, err := plugin.WithEffectiveTrigger(pwl, trg.Kind,
|
|
json.RawMessage(trg.Config), json.RawMessage(b.BindingConfig))
|
|
if err != nil {
|
|
slog.Warn("webhook: merge effective trigger config failed",
|
|
"trigger", trg.Name, "workload", row.Name, "error", err)
|
|
return false, ReasonConfigError
|
|
}
|
|
intent, err := trigPlugin.Match(ctx, h.plugins.PluginDeps(), pwl, evt)
|
|
if err != nil {
|
|
slog.Warn("webhook: trigger match error",
|
|
"trigger", trg.Name, "workload", row.Name, "error", err)
|
|
return false, ReasonMatchError
|
|
}
|
|
if intent == nil {
|
|
return false, ReasonNoMatch
|
|
}
|
|
if intent.TriggeredAt.IsZero() {
|
|
intent.TriggeredAt = time.Now().UTC()
|
|
}
|
|
if intent.TriggeredBy == "" {
|
|
intent.TriggeredBy = "trigger-webhook"
|
|
}
|
|
|
|
// Preview-deploy fork: the git trigger plugin attaches preview_branch
|
|
// metadata when BranchPattern matches a non-baseline branch. Route
|
|
// the dispatch through a per-branch child workload rather than
|
|
// redeploying the parent template. The fork is intentionally before
|
|
// the dispatch so the template's container never gets clobbered by
|
|
// a feature-branch push.
|
|
if previewBranch := intent.Metadata["preview_branch"]; previewBranch != "" {
|
|
fired, reason := h.handlePreviewIntent(ctx, row, intent, previewBranch)
|
|
return fired, reason
|
|
}
|
|
|
|
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
|
|
slog.Warn("webhook: dispatch failed",
|
|
"trigger", trg.Name, "workload", row.Name, "error", err)
|
|
return false, ReasonDispatchFailed
|
|
}
|
|
slog.Info("webhook: triggered deploy via trigger fan-out",
|
|
"trigger", trg.Name, "workload", row.Name, "reason", intent.Reason)
|
|
return true, intent.Reason
|
|
}
|
|
|
|
// handlePreviewIntent dispatches an intent that targeted a non-baseline
|
|
// branch on a preview-template workload. Two paths:
|
|
//
|
|
// 1. Branch deleted: find the matching preview workload, dispatch
|
|
// Teardown, then delete the workload row so the dashboard reflects
|
|
// the upstream state.
|
|
// 2. Branch pushed: materialize (or reuse) the preview workload, then
|
|
// dispatch the deploy against it. The template workload itself is
|
|
// never deployed against a feature branch.
|
|
//
|
|
// On any error the helper logs and returns a generic reason — the
|
|
// fan-out caller treats these the same as a normal dispatch failure.
|
|
func (h *Handler) handlePreviewIntent(
|
|
ctx context.Context,
|
|
template store.Workload,
|
|
intent *plugin.DeploymentIntent,
|
|
branch string,
|
|
) (bool, string) {
|
|
deleted := intent.Metadata["preview_deleted"] == "1"
|
|
if deleted {
|
|
child, ok, err := preview.FindPreviewForBranch(h.store, template.ID, branch)
|
|
if err != nil {
|
|
slog.Warn("webhook: preview lookup failed",
|
|
"template", template.Name, "branch", branch, "error", err)
|
|
return false, ReasonPreviewError
|
|
}
|
|
if !ok {
|
|
// Branch was deleted upstream but we never materialized a
|
|
// preview for it — nothing to do. Report as a distinct noop so
|
|
// it isn't bucketed as "no binding matched".
|
|
return false, ReasonPreviewNoop
|
|
}
|
|
childPwl := toPluginWorkload(child)
|
|
if err := h.plugins.DispatchTeardown(ctx, childPwl); err != nil {
|
|
slog.Warn("webhook: preview teardown dispatch failed",
|
|
"template", template.Name, "preview", child.Name, "error", err)
|
|
return false, ReasonDispatchFailed
|
|
}
|
|
if err := h.store.DeleteWorkload(child.ID); err != nil {
|
|
// Container is gone but the row is orphaned. Surface this as a
|
|
// distinct reason so the partial failure is visible rather than
|
|
// reported as a clean teardown; the operator can delete the row
|
|
// from the dashboard if it sticks around.
|
|
slog.Warn("webhook: preview row delete failed (orphaned row)",
|
|
"template", template.Name, "preview", child.Name, "error", err)
|
|
return true, ReasonPreviewOrphaned
|
|
}
|
|
slog.Info("webhook: preview torn down",
|
|
"template", template.Name, "branch", branch, "preview", child.Name)
|
|
return true, ReasonPreviewTorndown
|
|
}
|
|
|
|
child, err := preview.MaterializeForBranch(h.store, template, branch)
|
|
if err != nil {
|
|
slog.Warn("webhook: preview materialize failed",
|
|
"template", template.Name, "branch", branch, "error", err)
|
|
return false, ReasonPreviewError
|
|
}
|
|
childPwl := toPluginWorkload(child)
|
|
if err := h.plugins.DispatchPlugin(ctx, childPwl, *intent); err != nil {
|
|
slog.Warn("webhook: preview dispatch failed",
|
|
"template", template.Name, "preview", child.Name, "error", err)
|
|
return false, ReasonDispatchFailed
|
|
}
|
|
slog.Info("webhook: triggered preview deploy",
|
|
"template", template.Name, "branch", branch, "preview", child.Name, "reason", intent.Reason)
|
|
return true, intent.Reason
|
|
}
|
|
|