Files
tiny-forge/internal/api/volume_snapshots_test.go
T
alexei.dolgolyov 1c47030854 feat(volsnap): volume snapshot restore (backlog #6)
Restore a captured volume snapshot onto an image workload's live host-bind
data volumes, then redeploy — the most destructive workload action, built to
the adversarially-reviewed design (C1–C6) with all data-loss guards.

- Engine.Restore (engine-owned): all-or-nothing pre-flight re-resolution from
  the workload's CURRENT config (never the tamperable manifest), per-filesystem
  disk pre-check, per-workload lock, container quiesce, extract-to-tmp, durable
  pre-restore snapshot, write-ahead journal, atomic rename swap, redeploy, and
  crash-recovery sweep (RecoverInterruptedRestores) wired before serving.
- internal/keyedmutex: shared per-key lock; deployer now serializes every
  deploy entrypoint per workload via DispatchPlugin (+ LockWorkload/RedeployLocked
  for the restore re-dispatch, no deadlock).
- Untrusted-archive extractor: zip-slip containment, type allow-list (reg/dir
  only), decompression-bomb cap, manifest-index bounds.
- POST /api/workloads/{id}/snapshots/{sid}/restore: admin, X-Confirm-Restore
  header (CSRF), per-workload single-flight (409).
- WebUI: Restore button + danger ConfirmDialog + busy state + i18n (en/ru).

Scope: image-source only; scopes absolute/stage/project (driven off the same
supportedScopes constant capture uses).

Plan-reviewed before coding; per-phase go/security/ts reviews; final review
READY TO MERGE. Security review caught + fixed a CRITICAL manifest-Source path
traversal (re-derive target from current config + base containment).

Plan: plans/volume-snapshot-restore/
2026-06-22 17:23:52 +03:00

386 lines
13 KiB
Go

package api
import (
"context"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"sync"
"testing"
"time"
"github.com/alexei/tinyforge/internal/auth"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/volsnap"
"github.com/alexei/tinyforge/internal/webhook"
)
// newSnapshotEnv builds an API test env with the volume-snapshot engine wired
// (the shared newAPITestEnv does not wire it). dataDir holds the snapshot
// archives; baseVol is where host-bind volume directories resolve.
func newSnapshotEnv(t *testing.T) (*apiTestEnv, string) {
t.Helper()
st, err := store.New(":memory:")
if err != nil {
t.Fatalf("create store: %v", err)
}
t.Cleanup(func() { st.Close() })
encKey := [32]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
dispatcher := &fakeAPIDispatcher{}
wh := webhook.NewHandler(st)
wh.SetPluginDispatcher(dispatcher)
srv := NewServer(st, nil, nil, nil, dispatcher, nil, wh, nil, encKey)
snapEng, err := volsnap.New(st, t.TempDir())
if err != nil {
t.Fatalf("snapshot engine: %v", err)
}
srv.SetSnapshotEngine(snapEng)
httpsrv := httptest.NewServer(srv.Router())
t.Cleanup(httpsrv.Close)
la := auth.NewLocalAuth(encKey)
tok, err := la.GenerateToken(auth.Claims{UserID: "u-admin", Username: "admin", Role: "admin"})
if err != nil {
t.Fatalf("mint token: %v", err)
}
baseVol := t.TempDir()
settings, _ := st.GetSettings()
settings.BaseVolumePath = baseVol
if err := st.UpdateSettings(settings); err != nil {
t.Fatalf("update settings: %v", err)
}
return &apiTestEnv{srv: httpsrv, store: st, dispatcher: dispatcher, adminToken: tok.Token, encKey: encKey, snapEngine: snapEng}, baseVol
}
// doRestore issues an authenticated restore POST, optionally setting the
// X-Confirm-Restore header (pass confirm="" to omit it).
func (e *apiTestEnv) doRestore(t *testing.T, workloadID, sid, confirm string) *http.Response {
t.Helper()
req, err := http.NewRequest(http.MethodPost,
e.srv.URL+"/api/workloads/"+workloadID+"/snapshots/"+sid+"/restore", nil)
if err != nil {
t.Fatalf("new request: %v", err)
}
req.Header.Set("Authorization", "Bearer "+e.adminToken)
if confirm != "" {
req.Header.Set("X-Confirm-Restore", confirm)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do request: %v", err)
}
return resp
}
// okLifecycle is a no-op volsnap.Lifecycle for HTTP-layer happy-path tests; the
// deep restore behavior is covered by the volsnap engine tests.
type okLifecycle struct{ tag string }
func (l *okLifecycle) Lock(string) func() { return func() {} }
func (l *okLifecycle) StopContainers(context.Context, string) (string, error) { return l.tag, nil }
func (l *okLifecycle) Redeploy(context.Context, store.Workload, string) error { return nil }
func TestRestoreSnapshot_RequiresConfirmHeader(t *testing.T) {
e, _ := newSnapshotEnv(t)
w, _ := e.store.CreateWorkload(store.Workload{Name: "a", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"x","port":80}`})
snap, _ := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{WorkloadID: w.ID, Filename: "f.tar.gz", Manifest: "[]"})
// Missing header → 400.
resp := e.doRestore(t, w.ID, snap.ID, "")
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("missing header status = %d, want 400", resp.StatusCode)
}
resp.Body.Close()
// Mismatched header → 400.
resp = e.doRestore(t, w.ID, snap.ID, "not-the-sid")
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("mismatched header status = %d, want 400", resp.StatusCode)
}
resp.Body.Close()
}
func TestRestoreSnapshot_WrongWorkload(t *testing.T) {
e, _ := newSnapshotEnv(t)
w, _ := e.store.CreateWorkload(store.Workload{Name: "a", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"x","port":80}`})
snap, _ := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{WorkloadID: w.ID, Filename: "f.tar.gz", Manifest: "[]"})
resp := e.doRestore(t, "some-other-workload", snap.ID, snap.ID)
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("cross-workload restore status = %d, want 400", resp.StatusCode)
}
resp.Body.Close()
}
func TestRestoreSnapshot_NonImageWorkload(t *testing.T) {
e, _ := newSnapshotEnv(t)
w, _ := e.store.CreateWorkload(store.Workload{Name: "site", Kind: "project", SourceKind: "static", SourceConfig: `{}`})
snap, _ := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{WorkloadID: w.ID, Filename: "f.tar.gz", Manifest: "[]"})
resp := e.doRestore(t, w.ID, snap.ID, snap.ID)
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("non-image restore status = %d, want 400", resp.StatusCode)
}
resp.Body.Close()
}
func TestRestoreSnapshot_NotFound(t *testing.T) {
e, _ := newSnapshotEnv(t)
w, _ := e.store.CreateWorkload(store.Workload{Name: "a", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"x","port":80}`})
resp := e.doRestore(t, w.ID, "missing-sid", "missing-sid")
if resp.StatusCode != http.StatusNotFound {
t.Fatalf("unknown snapshot status = %d, want 404", resp.StatusCode)
}
resp.Body.Close()
}
func TestRestoreSnapshot_HappyPath(t *testing.T) {
e, baseVol := newSnapshotEnv(t)
e.snapEngine.SetLifecycle(&okLifecycle{tag: "v1"})
w, err := e.store.CreateWorkload(store.Workload{
Name: "data-app", Kind: "project", SourceKind: "image",
SourceConfig: `{"image":"reg/app","port":80,"volumes":[{"source":"data","target":"/data","scope":"project"}]}`,
})
if err != nil {
t.Fatalf("create workload: %v", err)
}
if _, err := e.store.SetWorkloadVolume(store.WorkloadVolume{WorkloadID: w.ID, Target: "/data", Source: "data", Scope: "project"}); err != nil {
t.Fatalf("set volume: %v", err)
}
id8 := w.ID
if len(id8) > 8 {
id8 = id8[:8]
}
hostDir := filepath.Join(baseVol, "data-app-"+id8, "data")
if err := os.MkdirAll(hostDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(hostDir, "payload.txt"), []byte("ORIGINAL"), 0o644); err != nil {
t.Fatal(err)
}
settings, _ := e.store.GetSettings()
snap, err := e.snapEngine.Create(w, settings, "base")
if err != nil {
t.Fatalf("create snapshot: %v", err)
}
// Drift the live data, then restore.
if err := os.WriteFile(filepath.Join(hostDir, "payload.txt"), []byte("CHANGED"), 0o644); err != nil {
t.Fatal(err)
}
resp := e.doRestore(t, w.ID, snap.ID, snap.ID)
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
t.Fatalf("restore status = %d, body=%s", resp.StatusCode, body)
}
resp.Body.Close()
if got, _ := os.ReadFile(filepath.Join(hostDir, "payload.txt")); string(got) != "ORIGINAL" {
t.Errorf("payload.txt = %q, want ORIGINAL (restored)", got)
}
}
// blockingLifecycle blocks in Lock until released, signaling when entered — so
// a test can hold one restore in-flight and assert a second is rejected 409.
type blockingLifecycle struct {
entered chan struct{}
release chan struct{}
once sync.Once
}
func (l *blockingLifecycle) Lock(string) func() {
l.once.Do(func() { close(l.entered) })
<-l.release
return func() {}
}
func (l *blockingLifecycle) StopContainers(context.Context, string) (string, error) { return "", nil }
func (l *blockingLifecycle) Redeploy(context.Context, store.Workload, string) error { return nil }
// seedRestorable creates an image workload with a project volume + live data and
// a captured snapshot, returning the workload and snapshot ids.
func seedRestorable(t *testing.T, e *apiTestEnv, baseVol string) (workloadID, snapshotID string) {
t.Helper()
w, err := e.store.CreateWorkload(store.Workload{
Name: "sf-app", Kind: "project", SourceKind: "image",
SourceConfig: `{"image":"reg/app","port":80,"volumes":[{"source":"data","target":"/data","scope":"project"}]}`,
})
if err != nil {
t.Fatalf("create workload: %v", err)
}
id8 := w.ID
if len(id8) > 8 {
id8 = id8[:8]
}
hostDir := filepath.Join(baseVol, "sf-app-"+id8, "data")
if err := os.MkdirAll(hostDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(hostDir, "f.txt"), []byte("data"), 0o644); err != nil {
t.Fatal(err)
}
settings, _ := e.store.GetSettings()
snap, err := e.snapEngine.Create(w, settings, "base")
if err != nil {
t.Fatalf("create snapshot: %v", err)
}
return w.ID, snap.ID
}
func TestRestoreSnapshot_SingleFlight409(t *testing.T) {
e, baseVol := newSnapshotEnv(t)
wid, sid := seedRestorable(t, e, baseVol)
bl := &blockingLifecycle{entered: make(chan struct{}), release: make(chan struct{})}
e.snapEngine.SetLifecycle(bl)
// Restore #1: passes validation, takes the single-flight, then blocks inside
// the engine's Lock.
go func() {
resp := e.doRestore(t, wid, sid, sid)
resp.Body.Close()
}()
select {
case <-bl.entered:
case <-time.After(3 * time.Second):
t.Fatal("first restore never reached the lifecycle lock")
}
// Restore #2 for the same workload must be rejected fast with 409.
resp := e.doRestore(t, wid, sid, sid)
got := resp.StatusCode
resp.Body.Close()
close(bl.release) // let #1 finish
if got != http.StatusConflict {
t.Fatalf("concurrent restore status = %d, want 409", got)
}
}
func TestVolumeSnapshots_EndToEnd(t *testing.T) {
e, baseVol := newSnapshotEnv(t)
w, err := e.store.CreateWorkload(store.Workload{
Name: "data-app",
Kind: "project",
SourceKind: "image",
SourceConfig: `{"image":"registry.example.com/owner/app","port":8080}`,
})
if err != nil {
t.Fatalf("create workload: %v", err)
}
if _, err := e.store.SetWorkloadVolume(store.WorkloadVolume{
WorkloadID: w.ID, Target: "/data", Source: "data", Scope: "project",
}); err != nil {
t.Fatalf("set volume: %v", err)
}
// Materialize the resolved host-bind dir with a file so there is data to
// capture. Layout mirrors ResolveWorkloadPath for project scope:
// <baseVol>/<name>-<id8>/<source>.
id8 := w.ID
if len(id8) > 8 {
id8 = id8[:8]
}
hostDir := filepath.Join(baseVol, "data-app-"+id8, "data")
if err := os.MkdirAll(hostDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(hostDir, "payload.txt"), []byte("important"), 0o644); err != nil {
t.Fatal(err)
}
// snapshotable lists the one host-bind volume.
resp := e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshotable", nil)
if resp.StatusCode != http.StatusOK {
t.Fatalf("snapshotable status = %d", resp.StatusCode)
}
var snapable struct {
Volumes []map[string]string `json:"volumes"`
Skipped []map[string]string `json:"skipped"`
}
decodeEnvelope(t, resp, &snapable)
if len(snapable.Volumes) != 1 || snapable.Volumes[0]["target"] != "/data" {
t.Fatalf("expected 1 snapshotable volume /data, got %+v", snapable)
}
// Create a snapshot.
resp = e.do(t, http.MethodPost, "/api/workloads/"+w.ID+"/snapshots", map[string]string{"label": "before upgrade"})
if resp.StatusCode != http.StatusCreated {
t.Fatalf("create snapshot status = %d", resp.StatusCode)
}
var snap store.VolumeSnapshot
decodeEnvelope(t, resp, &snap)
if snap.ID == "" || snap.SizeBytes == 0 || snap.Label != "before upgrade" {
t.Fatalf("unexpected snapshot: %+v", snap)
}
// It appears in the list.
resp = e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshots", nil)
var list []store.VolumeSnapshot
decodeEnvelope(t, resp, &list)
if len(list) != 1 || list[0].ID != snap.ID {
t.Fatalf("expected 1 snapshot in list, got %+v", list)
}
// Download streams a non-empty gzip archive (not the JSON envelope).
resp = e.do(t, http.MethodGet, "/api/snapshots/"+snap.ID+"/download", nil)
if resp.StatusCode != http.StatusOK {
t.Fatalf("download status = %d", resp.StatusCode)
}
if ct := resp.Header.Get("Content-Type"); ct != "application/gzip" {
t.Errorf("download content-type = %q, want application/gzip", ct)
}
data, _ := io.ReadAll(resp.Body)
resp.Body.Close()
if len(data) == 0 {
t.Error("download body is empty")
}
// Delete removes it.
resp = e.do(t, http.MethodDelete, "/api/snapshots/"+snap.ID, nil)
if resp.StatusCode != http.StatusOK {
t.Fatalf("delete status = %d", resp.StatusCode)
}
resp = e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshots", nil)
var after []store.VolumeSnapshot
decodeEnvelope(t, resp, &after)
if len(after) != 0 {
t.Fatalf("expected 0 snapshots after delete, got %d", len(after))
}
}
func TestCreateSnapshot_NoVolumeData_Returns400(t *testing.T) {
e, _ := newSnapshotEnv(t)
w, err := e.store.CreateWorkload(store.Workload{
Name: "no-vol-app",
Kind: "project",
SourceKind: "image",
SourceConfig: `{"image":"x","port":80}`,
})
if err != nil {
t.Fatalf("create workload: %v", err)
}
resp := e.do(t, http.MethodPost, "/api/workloads/"+w.ID+"/snapshots", nil)
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("expected 400 for an app with no snapshottable volumes, got %d", resp.StatusCode)
}
resp.Body.Close()
}
func TestSnapshotEndpoints_RequireWorkload(t *testing.T) {
e, _ := newSnapshotEnv(t)
// snapshotable on an unknown workload → 404.
resp := e.do(t, http.MethodGet, "/api/workloads/does-not-exist/snapshotable", nil)
if resp.StatusCode != http.StatusNotFound {
t.Fatalf("snapshotable unknown workload = %d, want 404", resp.StatusCode)
}
resp.Body.Close()
}