1c47030854
Restore a captured volume snapshot onto an image workload's live host-bind
data volumes, then redeploy — the most destructive workload action, built to
the adversarially-reviewed design (C1–C6) with all data-loss guards.
- Engine.Restore (engine-owned): all-or-nothing pre-flight re-resolution from
the workload's CURRENT config (never the tamperable manifest), per-filesystem
disk pre-check, per-workload lock, container quiesce, extract-to-tmp, durable
pre-restore snapshot, write-ahead journal, atomic rename swap, redeploy, and
crash-recovery sweep (RecoverInterruptedRestores) wired before serving.
- internal/keyedmutex: shared per-key lock; deployer now serializes every
deploy entrypoint per workload via DispatchPlugin (+ LockWorkload/RedeployLocked
for the restore re-dispatch, no deadlock).
- Untrusted-archive extractor: zip-slip containment, type allow-list (reg/dir
only), decompression-bomb cap, manifest-index bounds.
- POST /api/workloads/{id}/snapshots/{sid}/restore: admin, X-Confirm-Restore
header (CSRF), per-workload single-flight (409).
- WebUI: Restore button + danger ConfirmDialog + busy state + i18n (en/ru).
Scope: image-source only; scopes absolute/stage/project (driven off the same
supportedScopes constant capture uses).
Plan-reviewed before coding; per-phase go/security/ts reviews; final review
READY TO MERGE. Security review caught + fixed a CRITICAL manifest-Source path
traversal (re-derive target from current config + base containment).
Plan: plans/volume-snapshot-restore/
386 lines
13 KiB
Go
386 lines
13 KiB
Go
package api
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/alexei/tinyforge/internal/auth"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
"github.com/alexei/tinyforge/internal/volsnap"
|
|
"github.com/alexei/tinyforge/internal/webhook"
|
|
)
|
|
|
|
// newSnapshotEnv builds an API test env with the volume-snapshot engine wired
|
|
// (the shared newAPITestEnv does not wire it). dataDir holds the snapshot
|
|
// archives; baseVol is where host-bind volume directories resolve.
|
|
func newSnapshotEnv(t *testing.T) (*apiTestEnv, string) {
|
|
t.Helper()
|
|
st, err := store.New(":memory:")
|
|
if err != nil {
|
|
t.Fatalf("create store: %v", err)
|
|
}
|
|
t.Cleanup(func() { st.Close() })
|
|
|
|
encKey := [32]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
|
|
dispatcher := &fakeAPIDispatcher{}
|
|
wh := webhook.NewHandler(st)
|
|
wh.SetPluginDispatcher(dispatcher)
|
|
srv := NewServer(st, nil, nil, nil, dispatcher, nil, wh, nil, encKey)
|
|
|
|
snapEng, err := volsnap.New(st, t.TempDir())
|
|
if err != nil {
|
|
t.Fatalf("snapshot engine: %v", err)
|
|
}
|
|
srv.SetSnapshotEngine(snapEng)
|
|
|
|
httpsrv := httptest.NewServer(srv.Router())
|
|
t.Cleanup(httpsrv.Close)
|
|
|
|
la := auth.NewLocalAuth(encKey)
|
|
tok, err := la.GenerateToken(auth.Claims{UserID: "u-admin", Username: "admin", Role: "admin"})
|
|
if err != nil {
|
|
t.Fatalf("mint token: %v", err)
|
|
}
|
|
|
|
baseVol := t.TempDir()
|
|
settings, _ := st.GetSettings()
|
|
settings.BaseVolumePath = baseVol
|
|
if err := st.UpdateSettings(settings); err != nil {
|
|
t.Fatalf("update settings: %v", err)
|
|
}
|
|
|
|
return &apiTestEnv{srv: httpsrv, store: st, dispatcher: dispatcher, adminToken: tok.Token, encKey: encKey, snapEngine: snapEng}, baseVol
|
|
}
|
|
|
|
// doRestore issues an authenticated restore POST, optionally setting the
|
|
// X-Confirm-Restore header (pass confirm="" to omit it).
|
|
func (e *apiTestEnv) doRestore(t *testing.T, workloadID, sid, confirm string) *http.Response {
|
|
t.Helper()
|
|
req, err := http.NewRequest(http.MethodPost,
|
|
e.srv.URL+"/api/workloads/"+workloadID+"/snapshots/"+sid+"/restore", nil)
|
|
if err != nil {
|
|
t.Fatalf("new request: %v", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+e.adminToken)
|
|
if confirm != "" {
|
|
req.Header.Set("X-Confirm-Restore", confirm)
|
|
}
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
t.Fatalf("do request: %v", err)
|
|
}
|
|
return resp
|
|
}
|
|
|
|
// okLifecycle is a no-op volsnap.Lifecycle for HTTP-layer happy-path tests; the
|
|
// deep restore behavior is covered by the volsnap engine tests.
|
|
type okLifecycle struct{ tag string }
|
|
|
|
func (l *okLifecycle) Lock(string) func() { return func() {} }
|
|
func (l *okLifecycle) StopContainers(context.Context, string) (string, error) { return l.tag, nil }
|
|
func (l *okLifecycle) Redeploy(context.Context, store.Workload, string) error { return nil }
|
|
|
|
func TestRestoreSnapshot_RequiresConfirmHeader(t *testing.T) {
|
|
e, _ := newSnapshotEnv(t)
|
|
w, _ := e.store.CreateWorkload(store.Workload{Name: "a", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"x","port":80}`})
|
|
snap, _ := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{WorkloadID: w.ID, Filename: "f.tar.gz", Manifest: "[]"})
|
|
|
|
// Missing header → 400.
|
|
resp := e.doRestore(t, w.ID, snap.ID, "")
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Fatalf("missing header status = %d, want 400", resp.StatusCode)
|
|
}
|
|
resp.Body.Close()
|
|
// Mismatched header → 400.
|
|
resp = e.doRestore(t, w.ID, snap.ID, "not-the-sid")
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Fatalf("mismatched header status = %d, want 400", resp.StatusCode)
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
func TestRestoreSnapshot_WrongWorkload(t *testing.T) {
|
|
e, _ := newSnapshotEnv(t)
|
|
w, _ := e.store.CreateWorkload(store.Workload{Name: "a", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"x","port":80}`})
|
|
snap, _ := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{WorkloadID: w.ID, Filename: "f.tar.gz", Manifest: "[]"})
|
|
|
|
resp := e.doRestore(t, "some-other-workload", snap.ID, snap.ID)
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Fatalf("cross-workload restore status = %d, want 400", resp.StatusCode)
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
func TestRestoreSnapshot_NonImageWorkload(t *testing.T) {
|
|
e, _ := newSnapshotEnv(t)
|
|
w, _ := e.store.CreateWorkload(store.Workload{Name: "site", Kind: "project", SourceKind: "static", SourceConfig: `{}`})
|
|
snap, _ := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{WorkloadID: w.ID, Filename: "f.tar.gz", Manifest: "[]"})
|
|
|
|
resp := e.doRestore(t, w.ID, snap.ID, snap.ID)
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Fatalf("non-image restore status = %d, want 400", resp.StatusCode)
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
func TestRestoreSnapshot_NotFound(t *testing.T) {
|
|
e, _ := newSnapshotEnv(t)
|
|
w, _ := e.store.CreateWorkload(store.Workload{Name: "a", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"x","port":80}`})
|
|
|
|
resp := e.doRestore(t, w.ID, "missing-sid", "missing-sid")
|
|
if resp.StatusCode != http.StatusNotFound {
|
|
t.Fatalf("unknown snapshot status = %d, want 404", resp.StatusCode)
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
func TestRestoreSnapshot_HappyPath(t *testing.T) {
|
|
e, baseVol := newSnapshotEnv(t)
|
|
e.snapEngine.SetLifecycle(&okLifecycle{tag: "v1"})
|
|
|
|
w, err := e.store.CreateWorkload(store.Workload{
|
|
Name: "data-app", Kind: "project", SourceKind: "image",
|
|
SourceConfig: `{"image":"reg/app","port":80,"volumes":[{"source":"data","target":"/data","scope":"project"}]}`,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("create workload: %v", err)
|
|
}
|
|
if _, err := e.store.SetWorkloadVolume(store.WorkloadVolume{WorkloadID: w.ID, Target: "/data", Source: "data", Scope: "project"}); err != nil {
|
|
t.Fatalf("set volume: %v", err)
|
|
}
|
|
id8 := w.ID
|
|
if len(id8) > 8 {
|
|
id8 = id8[:8]
|
|
}
|
|
hostDir := filepath.Join(baseVol, "data-app-"+id8, "data")
|
|
if err := os.MkdirAll(hostDir, 0o755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(hostDir, "payload.txt"), []byte("ORIGINAL"), 0o644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
settings, _ := e.store.GetSettings()
|
|
snap, err := e.snapEngine.Create(w, settings, "base")
|
|
if err != nil {
|
|
t.Fatalf("create snapshot: %v", err)
|
|
}
|
|
// Drift the live data, then restore.
|
|
if err := os.WriteFile(filepath.Join(hostDir, "payload.txt"), []byte("CHANGED"), 0o644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
resp := e.doRestore(t, w.ID, snap.ID, snap.ID)
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
t.Fatalf("restore status = %d, body=%s", resp.StatusCode, body)
|
|
}
|
|
resp.Body.Close()
|
|
if got, _ := os.ReadFile(filepath.Join(hostDir, "payload.txt")); string(got) != "ORIGINAL" {
|
|
t.Errorf("payload.txt = %q, want ORIGINAL (restored)", got)
|
|
}
|
|
}
|
|
|
|
// blockingLifecycle blocks in Lock until released, signaling when entered — so
|
|
// a test can hold one restore in-flight and assert a second is rejected 409.
|
|
type blockingLifecycle struct {
|
|
entered chan struct{}
|
|
release chan struct{}
|
|
once sync.Once
|
|
}
|
|
|
|
func (l *blockingLifecycle) Lock(string) func() {
|
|
l.once.Do(func() { close(l.entered) })
|
|
<-l.release
|
|
return func() {}
|
|
}
|
|
func (l *blockingLifecycle) StopContainers(context.Context, string) (string, error) { return "", nil }
|
|
func (l *blockingLifecycle) Redeploy(context.Context, store.Workload, string) error { return nil }
|
|
|
|
// seedRestorable creates an image workload with a project volume + live data and
|
|
// a captured snapshot, returning the workload and snapshot ids.
|
|
func seedRestorable(t *testing.T, e *apiTestEnv, baseVol string) (workloadID, snapshotID string) {
|
|
t.Helper()
|
|
w, err := e.store.CreateWorkload(store.Workload{
|
|
Name: "sf-app", Kind: "project", SourceKind: "image",
|
|
SourceConfig: `{"image":"reg/app","port":80,"volumes":[{"source":"data","target":"/data","scope":"project"}]}`,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("create workload: %v", err)
|
|
}
|
|
id8 := w.ID
|
|
if len(id8) > 8 {
|
|
id8 = id8[:8]
|
|
}
|
|
hostDir := filepath.Join(baseVol, "sf-app-"+id8, "data")
|
|
if err := os.MkdirAll(hostDir, 0o755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(hostDir, "f.txt"), []byte("data"), 0o644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
settings, _ := e.store.GetSettings()
|
|
snap, err := e.snapEngine.Create(w, settings, "base")
|
|
if err != nil {
|
|
t.Fatalf("create snapshot: %v", err)
|
|
}
|
|
return w.ID, snap.ID
|
|
}
|
|
|
|
func TestRestoreSnapshot_SingleFlight409(t *testing.T) {
|
|
e, baseVol := newSnapshotEnv(t)
|
|
wid, sid := seedRestorable(t, e, baseVol)
|
|
bl := &blockingLifecycle{entered: make(chan struct{}), release: make(chan struct{})}
|
|
e.snapEngine.SetLifecycle(bl)
|
|
|
|
// Restore #1: passes validation, takes the single-flight, then blocks inside
|
|
// the engine's Lock.
|
|
go func() {
|
|
resp := e.doRestore(t, wid, sid, sid)
|
|
resp.Body.Close()
|
|
}()
|
|
|
|
select {
|
|
case <-bl.entered:
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatal("first restore never reached the lifecycle lock")
|
|
}
|
|
|
|
// Restore #2 for the same workload must be rejected fast with 409.
|
|
resp := e.doRestore(t, wid, sid, sid)
|
|
got := resp.StatusCode
|
|
resp.Body.Close()
|
|
close(bl.release) // let #1 finish
|
|
if got != http.StatusConflict {
|
|
t.Fatalf("concurrent restore status = %d, want 409", got)
|
|
}
|
|
}
|
|
|
|
func TestVolumeSnapshots_EndToEnd(t *testing.T) {
|
|
e, baseVol := newSnapshotEnv(t)
|
|
|
|
w, err := e.store.CreateWorkload(store.Workload{
|
|
Name: "data-app",
|
|
Kind: "project",
|
|
SourceKind: "image",
|
|
SourceConfig: `{"image":"registry.example.com/owner/app","port":8080}`,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("create workload: %v", err)
|
|
}
|
|
if _, err := e.store.SetWorkloadVolume(store.WorkloadVolume{
|
|
WorkloadID: w.ID, Target: "/data", Source: "data", Scope: "project",
|
|
}); err != nil {
|
|
t.Fatalf("set volume: %v", err)
|
|
}
|
|
|
|
// Materialize the resolved host-bind dir with a file so there is data to
|
|
// capture. Layout mirrors ResolveWorkloadPath for project scope:
|
|
// <baseVol>/<name>-<id8>/<source>.
|
|
id8 := w.ID
|
|
if len(id8) > 8 {
|
|
id8 = id8[:8]
|
|
}
|
|
hostDir := filepath.Join(baseVol, "data-app-"+id8, "data")
|
|
if err := os.MkdirAll(hostDir, 0o755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(hostDir, "payload.txt"), []byte("important"), 0o644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// snapshotable lists the one host-bind volume.
|
|
resp := e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshotable", nil)
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Fatalf("snapshotable status = %d", resp.StatusCode)
|
|
}
|
|
var snapable struct {
|
|
Volumes []map[string]string `json:"volumes"`
|
|
Skipped []map[string]string `json:"skipped"`
|
|
}
|
|
decodeEnvelope(t, resp, &snapable)
|
|
if len(snapable.Volumes) != 1 || snapable.Volumes[0]["target"] != "/data" {
|
|
t.Fatalf("expected 1 snapshotable volume /data, got %+v", snapable)
|
|
}
|
|
|
|
// Create a snapshot.
|
|
resp = e.do(t, http.MethodPost, "/api/workloads/"+w.ID+"/snapshots", map[string]string{"label": "before upgrade"})
|
|
if resp.StatusCode != http.StatusCreated {
|
|
t.Fatalf("create snapshot status = %d", resp.StatusCode)
|
|
}
|
|
var snap store.VolumeSnapshot
|
|
decodeEnvelope(t, resp, &snap)
|
|
if snap.ID == "" || snap.SizeBytes == 0 || snap.Label != "before upgrade" {
|
|
t.Fatalf("unexpected snapshot: %+v", snap)
|
|
}
|
|
|
|
// It appears in the list.
|
|
resp = e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshots", nil)
|
|
var list []store.VolumeSnapshot
|
|
decodeEnvelope(t, resp, &list)
|
|
if len(list) != 1 || list[0].ID != snap.ID {
|
|
t.Fatalf("expected 1 snapshot in list, got %+v", list)
|
|
}
|
|
|
|
// Download streams a non-empty gzip archive (not the JSON envelope).
|
|
resp = e.do(t, http.MethodGet, "/api/snapshots/"+snap.ID+"/download", nil)
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Fatalf("download status = %d", resp.StatusCode)
|
|
}
|
|
if ct := resp.Header.Get("Content-Type"); ct != "application/gzip" {
|
|
t.Errorf("download content-type = %q, want application/gzip", ct)
|
|
}
|
|
data, _ := io.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
if len(data) == 0 {
|
|
t.Error("download body is empty")
|
|
}
|
|
|
|
// Delete removes it.
|
|
resp = e.do(t, http.MethodDelete, "/api/snapshots/"+snap.ID, nil)
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Fatalf("delete status = %d", resp.StatusCode)
|
|
}
|
|
resp = e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshots", nil)
|
|
var after []store.VolumeSnapshot
|
|
decodeEnvelope(t, resp, &after)
|
|
if len(after) != 0 {
|
|
t.Fatalf("expected 0 snapshots after delete, got %d", len(after))
|
|
}
|
|
}
|
|
|
|
func TestCreateSnapshot_NoVolumeData_Returns400(t *testing.T) {
|
|
e, _ := newSnapshotEnv(t)
|
|
w, err := e.store.CreateWorkload(store.Workload{
|
|
Name: "no-vol-app",
|
|
Kind: "project",
|
|
SourceKind: "image",
|
|
SourceConfig: `{"image":"x","port":80}`,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("create workload: %v", err)
|
|
}
|
|
resp := e.do(t, http.MethodPost, "/api/workloads/"+w.ID+"/snapshots", nil)
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Fatalf("expected 400 for an app with no snapshottable volumes, got %d", resp.StatusCode)
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
func TestSnapshotEndpoints_RequireWorkload(t *testing.T) {
|
|
e, _ := newSnapshotEnv(t)
|
|
// snapshotable on an unknown workload → 404.
|
|
resp := e.do(t, http.MethodGet, "/api/workloads/does-not-exist/snapshotable", nil)
|
|
if resp.StatusCode != http.StatusNotFound {
|
|
t.Fatalf("snapshotable unknown workload = %d, want 404", resp.StatusCode)
|
|
}
|
|
resp.Body.Close()
|
|
}
|