feat(apps): per-workload deploy history, rollback, and resource metrics

Two additions to the app detail page, each backed by a per-workload endpoint. Deploy history + rollback: - New deploy_history table — a structured, version-pinned ledger of every dispatch (success AND failure), distinct from the free-text event_log. Recorded at the single DispatchPlugin choke point so every source kind is covered. The raw deploy error is never persisted (it can carry registry-auth / compose-stdout secrets) — only a generic marker, with detail going to slog. Pruned to the newest N per workload; cascade- deleted with the workload. - GET /api/workloads/{id}/deploys lists the ledger; POST .../rollback (admin) replays a prior successful deploy's pinned reference as a rollback-reason dispatch. Phase 1 is image-source only (RollbackCapable); git-built sources need checkout-by-commit, a later phase. - DeployHistoryPanel.svelte renders the ledger with confirm-gated rollback. Per-workload metrics: - ListContainerStatsSamplesByWorkload joins the existing container stats samples through the containers index; GET /api/workloads/{id}/stats/history aggregates CPU/memory per timestamp across the workload's containers. - WorkloadMetricsPanel.svelte reuses ResourceChart (CPU% + memory MiB, windowed, 15s poll). en/ru i18n added with parity. Tests: store CRUD + cascade + workload-scoped join, deployer recording (incl. secret-non-leak on failure), API rollback guards, and per-timestamp aggregation. Plans under docs/plans/.
2026-06-19 16:22:12 +03:00
parent c8e71a0c34
commit 0c4c338bfe
23 changed files with 1828 additions and 0 deletions
@@ -0,0 +1,76 @@
+package deployer
+
+import (
+	"log/slog"
+
+	"github.com/alexei/tinyforge/internal/store"
+	"github.com/alexei/tinyforge/internal/workload/plugin"
+)
+
+// deployHistoryKeepPerWorkload bounds the ledger per workload. Newer rows
+// always have larger ids, so pruning keeps the most recent N — enough for a
+// useful rollback menu without unbounded growth on hot workloads.
+const deployHistoryKeepPerWorkload = 50
+
+// recordDeployHistory appends one ledger row for a completed dispatch.
+//
+// Best-effort: a store failure is logged and swallowed — recording must
+// never turn a successful deploy into a failed request (same contract as
+// EmitDeployEvent and the pre-deploy backup). The raw deploy error is NEVER
+// persisted: it can carry registry-auth bytes or compose stdout, so only a
+// fixed, secret-free marker lands in the row (raw detail goes to slog at the
+// call site). Called only from DispatchPlugin — reconcile/teardown ticks are
+// not deploys and must not appear in the ledger.
+func (d *Deployer) recordDeployHistory(w plugin.Workload, intent plugin.DeploymentIntent, outcome string, deployErr error, startedAt string) {
+	if d.store == nil {
+		return
+	}
+	entry := store.DeployHistoryEntry{
+		WorkloadID:  w.ID,
+		SourceKind:  w.SourceKind,
+		Reference:   d.effectiveReference(w, intent),
+		Reason:      intent.Reason,
+		TriggeredBy: intent.TriggeredBy,
+		Note:        intent.Metadata["note"], // nil map read is safe
+		Outcome:     outcome,
+		StartedAt:   startedAt,
+		FinishedAt:  store.Now(),
+	}
+	if deployErr != nil {
+		entry.Error = "deploy failed (see server logs)"
+	}
+	if _, err := d.store.InsertDeployHistory(entry); err != nil {
+		slog.Warn("deploy history: insert failed", "workload", w.ID, "error", err)
+		return
+	}
+	// Cheap indexed DELETE — negligible next to a multi-second deploy, so it
+	// stays inline rather than on an untracked goroutine that could outrace
+	// graceful shutdown's db.Close().
+	if err := d.store.PruneDeployHistory(w.ID, deployHistoryKeepPerWorkload); err != nil {
+		slog.Warn("deploy history: prune failed", "workload", w.ID, "error", err)
+	}
+}
+
+// effectiveReference resolves the artifact handle to record (and, for
+// rollback-capable sources, to replay). It starts from the trigger-supplied
+// intent.Reference and, for the image source, prefers the tag actually
+// written onto the freshest container row — capturing the DefaultTag /
+// "latest" resolution the source performs when intent.Reference is empty
+// (e.g. a manual deploy with no override). ListContainersByWorkload returns
+// newest-first, so rows[0] is the just-deployed container on success.
+//
+// For static/dockerfile the git trigger already supplies the commit SHA as
+// intent.Reference; a manual deploy of those may record an empty reference
+// (acceptable — they are not rollback-capable in this phase). compose has no
+// single artifact handle.
+func (d *Deployer) effectiveReference(w plugin.Workload, intent plugin.DeploymentIntent) string {
+	ref := intent.Reference
+	if w.SourceKind == "image" && d.store != nil {
+		if rows, err := d.store.ListContainersByWorkload(w.ID); err == nil && len(rows) > 0 {
+			if tag := rows[0].ImageTag; tag != "" {
+				ref = tag
+			}
+		}
+	}
+	return ref
+}
@@ -5,6 +5,7 @@ import (
 	"fmt"

 	"github.com/alexei/tinyforge/internal/metrics"
+	"github.com/alexei/tinyforge/internal/store"
 	"github.com/alexei/tinyforge/internal/workload/plugin"
 )

@@ -33,12 +34,17 @@ func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent
 	// check (e.g. the image source's same-tag short-circuit), so a same-tag
 	// redeploy still snapshots — "backup before every deploy attempt".
 	d.maybeBackupBeforeDeploy(w.ID)
+	startedAt := store.Now()
 	err = src.Deploy(ctx, d.PluginDeps(), w, intent)
 	outcome := "success"
 	if err != nil {
 		outcome = "failure"
 	}
 	metrics.DeploysTotal.Inc(w.SourceKind, outcome)
+	// Append to the structured deploy ledger (powers the per-app history
+	// panel + rollback). Best-effort and secret-free; see recordDeployHistory.
+	// Only DispatchPlugin records — reconcile/teardown are not deploys.
+	d.recordDeployHistory(w, intent, outcome, err, startedAt)
 	return err
 }

@@ -250,6 +250,84 @@ func TestDispatchReconcile_PropagatesSourceError(t *testing.T) {
 	}
 }

+// ---- Deploy history recording ----------------------------------------------
+
+// seedDispatchWorkload inserts a real workloads row so deploy_history's FK
+// (workload_id REFERENCES workloads) is satisfied, then returns a plugin
+// workload pointing at the fake source.
+func seedDispatchWorkload(t *testing.T, d *Deployer) plugin.Workload {
+	t.Helper()
+	row, err := d.store.CreateWorkload(store.Workload{Kind: "project", RefID: "dh", Name: "dh"})
+	if err != nil {
+		t.Fatalf("CreateWorkload: %v", err)
+	}
+	return plugin.Workload{ID: row.ID, Name: "dh", SourceKind: "dispatchertest"}
+}
+
+func TestDispatchPlugin_RecordsSuccessHistory(t *testing.T) {
+	resetFake(t)
+	d := newTestDeployer(t)
+	w := seedDispatchWorkload(t, d)
+
+	intent := plugin.DeploymentIntent{Reason: "manual", Reference: "v9", TriggeredBy: "alice",
+		Metadata: map[string]string{"note": "ship it"}}
+	if err := d.DispatchPlugin(context.Background(), w, intent); err != nil {
+		t.Fatalf("DispatchPlugin: %v", err)
+	}
+	rows, err := d.store.ListDeployHistory(w.ID, 10, 0)
+	if err != nil {
+		t.Fatalf("ListDeployHistory: %v", err)
+	}
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 history row, got %d", len(rows))
+	}
+	got := rows[0]
+	if got.Outcome != "success" || got.Reason != "manual" || got.Reference != "v9" {
+		t.Fatalf("unexpected row: %+v", got)
+	}
+	if got.TriggeredBy != "alice" || got.Note != "ship it" {
+		t.Fatalf("intent fields not recorded: %+v", got)
+	}
+	if got.Error != "" {
+		t.Fatalf("success row must have empty error, got %q", got.Error)
+	}
+}
+
+func TestDispatchPlugin_RecordsFailureWithoutLeakingError(t *testing.T) {
+	resetFake(t)
+	d := newTestDeployer(t)
+	w := seedDispatchWorkload(t, d)
+
+	// A deploy error carrying a "secret" must never reach the persisted row.
+	dispatchTestSource.setDeployErr(errors.New("compose up failed (output: SUPER_SECRET=hunter2)"))
+	_ = d.DispatchPlugin(context.Background(), w, plugin.DeploymentIntent{Reason: "manual"})
+
+	rows, _ := d.store.ListDeployHistory(w.ID, 10, 0)
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 history row, got %d", len(rows))
+	}
+	if rows[0].Outcome != "failure" {
+		t.Fatalf("expected failure outcome, got %q", rows[0].Outcome)
+	}
+	if strings.Contains(rows[0].Error, "hunter2") || strings.Contains(rows[0].Error, "SECRET") {
+		t.Fatalf("raw error leaked into history: %q", rows[0].Error)
+	}
+}
+
+func TestDispatchReconcile_RecordsNoHistory(t *testing.T) {
+	resetFake(t)
+	d := newTestDeployer(t)
+	w := seedDispatchWorkload(t, d)
+
+	if err := d.DispatchReconcile(context.Background(), w); err != nil {
+		t.Fatalf("DispatchReconcile: %v", err)
+	}
+	rows, _ := d.store.ListDeployHistory(w.ID, 10, 0)
+	if len(rows) != 0 {
+		t.Fatalf("reconcile must not write history, got %d rows", len(rows))
+	}
+}
+
 // ---- PluginDeps -------------------------------------------------------------

 func TestPluginDeps_PassesStoreAndEncKey(t *testing.T) {