Files
alexei.dolgolyov 0c4c338bfe feat(apps): per-workload deploy history, rollback, and resource metrics
Two additions to the app detail page, each backed by a per-workload
endpoint.

Deploy history + rollback:
- New deploy_history table — a structured, version-pinned ledger of every
  dispatch (success AND failure), distinct from the free-text event_log.
  Recorded at the single DispatchPlugin choke point so every source kind
  is covered. The raw deploy error is never persisted (it can carry
  registry-auth / compose-stdout secrets) — only a generic marker, with
  detail going to slog. Pruned to the newest N per workload; cascade-
  deleted with the workload.
- GET /api/workloads/{id}/deploys lists the ledger; POST .../rollback
  (admin) replays a prior successful deploy's pinned reference as a
  rollback-reason dispatch. Phase 1 is image-source only (RollbackCapable);
  git-built sources need checkout-by-commit, a later phase.
- DeployHistoryPanel.svelte renders the ledger with confirm-gated rollback.

Per-workload metrics:
- ListContainerStatsSamplesByWorkload joins the existing container stats
  samples through the containers index; GET /api/workloads/{id}/stats/history
  aggregates CPU/memory per timestamp across the workload's containers.
- WorkloadMetricsPanel.svelte reuses ResourceChart (CPU% + memory MiB,
  windowed, 15s poll).

en/ru i18n added with parity. Tests: store CRUD + cascade + workload-scoped
join, deployer recording (incl. secret-non-leak on failure), API rollback
guards, and per-timestamp aggregation. Plans under docs/plans/.
2026-06-19 16:22:12 +03:00

376 lines
12 KiB
Go

package deployer
import (
"context"
"encoding/json"
"errors"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// fakeSource is a stub Source implementation registered exactly once
// (kind="dispatchertest") so each dispatch test can assert exactly which
// lifecycle method ran. Counters and the configured error are atomic /
// mutex-guarded because a future parallel run should not flake.
type fakeSource struct {
kind string
mu sync.Mutex
deployErr error
teardownErr error
reconcileErr error
deployCount atomic.Int32
teardownCount atomic.Int32
reconcileCount atomic.Int32
lastIntent plugin.DeploymentIntent
lastDeps plugin.Deps
}
func (f *fakeSource) Kind() string { return f.kind }
func (f *fakeSource) SchemaSample() any { return struct{}{} }
func (f *fakeSource) Validate(json.RawMessage) error { return nil }
func (f *fakeSource) Deploy(_ context.Context, deps plugin.Deps, _ plugin.Workload, intent plugin.DeploymentIntent) error {
f.deployCount.Add(1)
f.mu.Lock()
f.lastIntent = intent
f.lastDeps = deps
err := f.deployErr
f.mu.Unlock()
return err
}
func (f *fakeSource) Teardown(_ context.Context, deps plugin.Deps, _ plugin.Workload) error {
f.teardownCount.Add(1)
f.mu.Lock()
f.lastDeps = deps
err := f.teardownErr
f.mu.Unlock()
return err
}
func (f *fakeSource) Reconcile(_ context.Context, deps plugin.Deps, _ plugin.Workload) error {
f.reconcileCount.Add(1)
f.mu.Lock()
f.lastDeps = deps
err := f.reconcileErr
f.mu.Unlock()
return err
}
func (f *fakeSource) setDeployErr(err error) { f.mu.Lock(); f.deployErr = err; f.mu.Unlock() }
func (f *fakeSource) setTeardownErr(err error) { f.mu.Lock(); f.teardownErr = err; f.mu.Unlock() }
func (f *fakeSource) setReconcileErr(err error) { f.mu.Lock(); f.reconcileErr = err; f.mu.Unlock() }
// dispatchTestSource is the singleton fake registered into the plugin
// registry. Registration happens exactly once — subsequent calls would
// panic (RegisterSource panics on duplicate kind).
var dispatchTestSource = &fakeSource{kind: "dispatchertest"}
func init() {
plugin.RegisterSource(dispatchTestSource)
}
// resetFake clears counters + queued errors between tests. The Source
// instance is shared (the registry can't be cleared per-test) so reset
// is the seam.
func resetFake(t *testing.T) {
t.Helper()
dispatchTestSource.mu.Lock()
dispatchTestSource.deployErr = nil
dispatchTestSource.teardownErr = nil
dispatchTestSource.reconcileErr = nil
dispatchTestSource.lastIntent = plugin.DeploymentIntent{}
dispatchTestSource.lastDeps = plugin.Deps{}
dispatchTestSource.mu.Unlock()
dispatchTestSource.deployCount.Store(0)
dispatchTestSource.teardownCount.Store(0)
dispatchTestSource.reconcileCount.Store(0)
}
func newTestDeployer(t *testing.T) *Deployer {
t.Helper()
st, err := store.New(":memory:")
if err != nil {
t.Fatalf("create store: %v", err)
}
t.Cleanup(func() { st.Close() })
// All other deps are nil — the fake source ignores them. The dispatch
// surface itself does not dereference them.
return New(nil, nil, st, nil, nil, nil, [32]byte{})
}
func sampleWorkload() plugin.Workload {
return plugin.Workload{
ID: "wid-dispatch",
Name: "wkl",
SourceKind: "dispatchertest",
SourceConfig: json.RawMessage(`{}`),
}
}
// ---- DispatchPlugin ---------------------------------------------------------
func TestDispatchPlugin_HappyPath_CallsDeployOnce(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
intent := plugin.DeploymentIntent{Reason: "manual", TriggeredBy: "alice"}
if err := d.DispatchPlugin(context.Background(), sampleWorkload(), intent); err != nil {
t.Fatalf("DispatchPlugin: %v", err)
}
if got := dispatchTestSource.deployCount.Load(); got != 1 {
t.Fatalf("Deploy called %d times, want 1", got)
}
if dispatchTestSource.lastIntent.Reason != "manual" {
t.Fatalf("intent.Reason = %q, want manual", dispatchTestSource.lastIntent.Reason)
}
if dispatchTestSource.lastIntent.TriggeredBy != "alice" {
t.Fatalf("intent.TriggeredBy = %q, want alice", dispatchTestSource.lastIntent.TriggeredBy)
}
}
func TestDispatchPlugin_UnknownKind_ReturnsRegistryError(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
w := sampleWorkload()
w.SourceKind = "no-such-kind"
err := d.DispatchPlugin(context.Background(), w, plugin.DeploymentIntent{})
if err == nil {
t.Fatalf("expected error for unknown kind, got nil")
}
if !strings.Contains(err.Error(), "no source registered") {
t.Fatalf("error = %q, want substring 'no source registered'", err.Error())
}
if got := dispatchTestSource.deployCount.Load(); got != 0 {
t.Fatalf("Deploy must not be called for unknown kind, got %d", got)
}
}
func TestDispatchPlugin_PropagatesSourceError(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
want := errors.New("boom")
dispatchTestSource.setDeployErr(want)
err := d.DispatchPlugin(context.Background(), sampleWorkload(), plugin.DeploymentIntent{})
if !errors.Is(err, want) {
t.Fatalf("expected wrapped error to match %v, got %v", want, err)
}
}
// ---- DispatchTeardown -------------------------------------------------------
func TestDispatchTeardown_HappyPath_CallsTeardownOnce(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
if err := d.DispatchTeardown(context.Background(), sampleWorkload()); err != nil {
t.Fatalf("DispatchTeardown: %v", err)
}
if got := dispatchTestSource.teardownCount.Load(); got != 1 {
t.Fatalf("Teardown called %d times, want 1", got)
}
if got := dispatchTestSource.deployCount.Load(); got != 0 {
t.Fatalf("Teardown must not call Deploy, got %d Deploy calls", got)
}
}
func TestDispatchTeardown_UnknownKind_ReturnsRegistryError(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
w := sampleWorkload()
w.SourceKind = "no-such-kind"
err := d.DispatchTeardown(context.Background(), w)
if err == nil || !strings.Contains(err.Error(), "no source registered") {
t.Fatalf("expected unknown-kind error, got %v", err)
}
}
func TestDispatchTeardown_PropagatesSourceError(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
want := errors.New("teardown failed")
dispatchTestSource.setTeardownErr(want)
err := d.DispatchTeardown(context.Background(), sampleWorkload())
if !errors.Is(err, want) {
t.Fatalf("expected wrapped error to match %v, got %v", want, err)
}
}
// ---- DispatchReconcile ------------------------------------------------------
func TestDispatchReconcile_HappyPath_CallsReconcileOnce(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
if err := d.DispatchReconcile(context.Background(), sampleWorkload()); err != nil {
t.Fatalf("DispatchReconcile: %v", err)
}
if got := dispatchTestSource.reconcileCount.Load(); got != 1 {
t.Fatalf("Reconcile called %d times, want 1", got)
}
}
func TestDispatchReconcile_UnknownKind_ReturnsRegistryError(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
w := sampleWorkload()
w.SourceKind = "no-such-kind"
err := d.DispatchReconcile(context.Background(), w)
if err == nil || !strings.Contains(err.Error(), "no source registered") {
t.Fatalf("expected unknown-kind error, got %v", err)
}
}
func TestDispatchReconcile_PropagatesSourceError(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
want := errors.New("reconcile failed")
dispatchTestSource.setReconcileErr(want)
err := d.DispatchReconcile(context.Background(), sampleWorkload())
if !errors.Is(err, want) {
t.Fatalf("expected wrapped error to match %v, got %v", want, err)
}
}
// ---- Deploy history recording ----------------------------------------------
// seedDispatchWorkload inserts a real workloads row so deploy_history's FK
// (workload_id REFERENCES workloads) is satisfied, then returns a plugin
// workload pointing at the fake source.
func seedDispatchWorkload(t *testing.T, d *Deployer) plugin.Workload {
t.Helper()
row, err := d.store.CreateWorkload(store.Workload{Kind: "project", RefID: "dh", Name: "dh"})
if err != nil {
t.Fatalf("CreateWorkload: %v", err)
}
return plugin.Workload{ID: row.ID, Name: "dh", SourceKind: "dispatchertest"}
}
func TestDispatchPlugin_RecordsSuccessHistory(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
w := seedDispatchWorkload(t, d)
intent := plugin.DeploymentIntent{Reason: "manual", Reference: "v9", TriggeredBy: "alice",
Metadata: map[string]string{"note": "ship it"}}
if err := d.DispatchPlugin(context.Background(), w, intent); err != nil {
t.Fatalf("DispatchPlugin: %v", err)
}
rows, err := d.store.ListDeployHistory(w.ID, 10, 0)
if err != nil {
t.Fatalf("ListDeployHistory: %v", err)
}
if len(rows) != 1 {
t.Fatalf("expected 1 history row, got %d", len(rows))
}
got := rows[0]
if got.Outcome != "success" || got.Reason != "manual" || got.Reference != "v9" {
t.Fatalf("unexpected row: %+v", got)
}
if got.TriggeredBy != "alice" || got.Note != "ship it" {
t.Fatalf("intent fields not recorded: %+v", got)
}
if got.Error != "" {
t.Fatalf("success row must have empty error, got %q", got.Error)
}
}
func TestDispatchPlugin_RecordsFailureWithoutLeakingError(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
w := seedDispatchWorkload(t, d)
// A deploy error carrying a "secret" must never reach the persisted row.
dispatchTestSource.setDeployErr(errors.New("compose up failed (output: SUPER_SECRET=hunter2)"))
_ = d.DispatchPlugin(context.Background(), w, plugin.DeploymentIntent{Reason: "manual"})
rows, _ := d.store.ListDeployHistory(w.ID, 10, 0)
if len(rows) != 1 {
t.Fatalf("expected 1 history row, got %d", len(rows))
}
if rows[0].Outcome != "failure" {
t.Fatalf("expected failure outcome, got %q", rows[0].Outcome)
}
if strings.Contains(rows[0].Error, "hunter2") || strings.Contains(rows[0].Error, "SECRET") {
t.Fatalf("raw error leaked into history: %q", rows[0].Error)
}
}
func TestDispatchReconcile_RecordsNoHistory(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
w := seedDispatchWorkload(t, d)
if err := d.DispatchReconcile(context.Background(), w); err != nil {
t.Fatalf("DispatchReconcile: %v", err)
}
rows, _ := d.store.ListDeployHistory(w.ID, 10, 0)
if len(rows) != 0 {
t.Fatalf("reconcile must not write history, got %d rows", len(rows))
}
}
// ---- PluginDeps -------------------------------------------------------------
func TestPluginDeps_PassesStoreAndEncKey(t *testing.T) {
resetFake(t)
d := newTestDeployer(t)
if err := d.DispatchPlugin(context.Background(), sampleWorkload(), plugin.DeploymentIntent{}); err != nil {
t.Fatalf("dispatch: %v", err)
}
got := dispatchTestSource.lastDeps
if got.Store != d.store {
t.Fatalf("Deps.Store mismatch: got %p want %p", got.Store, d.store)
}
// EncKey is a value type — compare bytes.
if got.EncKey != d.encKey {
t.Fatalf("Deps.EncKey not propagated")
}
}
func TestPluginDeps_DNSReadUnderRWMutex_NoDeadlockOnHotSwap(t *testing.T) {
// PluginDeps takes dnsMu.RLock; SetDNSProvider takes dnsMu.Lock. A bug
// where the read code path also took the write lock would deadlock
// when a concurrent SetDNSProvider runs. Run both in parallel goroutines
// and assert both finish.
d := newTestDeployer(t)
const N = 50
var wg sync.WaitGroup
wg.Add(2 * N)
for i := 0; i < N; i++ {
go func() { defer wg.Done(); _ = d.PluginDeps() }()
go func() { defer wg.Done(); d.SetDNSProvider(nil) }()
}
done := make(chan struct{})
go func() { wg.Wait(); close(done) }()
// Real timeout: a deadlock here would hang `go test` for the entire
// package timeout (default 10 min) and report no useful diagnostic.
// Bound at 2s so a regression fails this test specifically.
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatal("deadlock: PluginDeps/SetDNSProvider did not finish within 2s")
}
}