package volsnap import ( "archive/tar" "context" "errors" "os" "path/filepath" "strings" "sync" "testing" "github.com/alexei/tinyforge/internal/store" "github.com/alexei/tinyforge/internal/volume" ) // fakeLifecycle records the order of deploy-side calls and lets tests inject // failures, without a real deployer/docker. type fakeLifecycle struct { mu sync.Mutex calls []string tag string stopErr error redeployErr error redeployRef string } func (f *fakeLifecycle) rec(s string) { f.mu.Lock() f.calls = append(f.calls, s) f.mu.Unlock() } func (f *fakeLifecycle) Lock(string) func() { f.rec("lock"); return func() { f.rec("unlock") } } func (f *fakeLifecycle) StopContainers(context.Context, string) (string, error) { f.rec("stop") return f.tag, f.stopErr } func (f *fakeLifecycle) Redeploy(_ context.Context, _ store.Workload, ref string) error { f.rec("redeploy:" + ref) f.redeployRef = ref return f.redeployErr } func (f *fakeLifecycle) saw(s string) bool { f.mu.Lock() defer f.mu.Unlock() for _, c := range f.calls { if c == s { return true } } return false } func newRestoreEngine(t *testing.T) (*Engine, *store.Store, string) { t.Helper() st, err := store.New(":memory:") if err != nil { t.Fatalf("store: %v", err) } t.Cleanup(func() { st.Close() }) base := t.TempDir() s, _ := st.GetSettings() s.BaseVolumePath = base if err := st.UpdateSettings(s); err != nil { t.Fatalf("settings: %v", err) } eng, err := New(st, t.TempDir()) if err != nil { t.Fatalf("engine: %v", err) } return eng, st, base } // seedImageWorkload creates an image workload with one project-scope volume and // returns it plus the resolved live host dir. func seedImageWorkload(t *testing.T, st *store.Store) (store.Workload, string) { t.Helper() w, err := st.CreateWorkload(store.Workload{ Name: "data-app", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"reg/app","port":80,"volumes":[{"source":"data","target":"/data","scope":"project"}]}`, }) if err != nil { t.Fatalf("create workload: %v", err) } settings, _ := st.GetSettings() live, err := volume.ResolveWorkloadPath( store.WorkloadVolume{Source: "data", Target: "/data", Scope: "project"}, volume.ResolveWorkloadParams{BasePath: settings.BaseVolumePath, WorkloadID: w.ID, WorkloadName: w.Name}, ) if err != nil { t.Fatalf("resolve: %v", err) } return w, live } func TestEngineRestore_HappyPath(t *testing.T) { eng, st, _ := newRestoreEngine(t) w, live := seedImageWorkload(t, st) mkDirWith(t, live, "orig.txt", "ORIGINAL") settings, _ := st.GetSettings() snap, err := eng.Create(w, settings, "base") if err != nil { t.Fatalf("create snapshot: %v", err) } // Drift: the live dir now differs from the snapshot. if err := os.WriteFile(filepath.Join(live, "orig.txt"), []byte("CHANGED"), 0o600); err != nil { t.Fatal(err) } mkDirWith(t, live, "extra.txt", "NEW") // not in the snapshot fake := &fakeLifecycle{tag: "v1.2.3"} eng.SetLifecycle(fake) // Uses the REAL eng.Create for the pre-restore capture — if Restore held // e.mu this would deadlock (R1), failing the test instead of production. if err := eng.Restore(context.Background(), snap.ID, w.ID); err != nil { t.Fatalf("restore: %v", err) } if got := readIn(t, live, "orig.txt"); got != "ORIGINAL" { t.Errorf("orig.txt = %q, want ORIGINAL (restored)", got) } if _, err := os.Stat(filepath.Join(live, "extra.txt")); !os.IsNotExist(err) { t.Error("extra.txt should be gone — restore replaces the volume dir wholesale") } for _, want := range []string{"lock", "stop", "redeploy:v1.2.3", "unlock"} { if !fake.saw(want) { t.Errorf("expected lifecycle call %q; calls=%v", want, fake.calls) } } if fake.redeployRef != "v1.2.3" { t.Errorf("redeploy reference = %q, want the running tag v1.2.3", fake.redeployRef) } // A durable pre-restore snapshot was captured (base + pre-restore). snaps, _ := eng.List(w.ID) if len(snaps) != 2 { t.Errorf("expected 2 snapshots (base + pre-restore), got %d", len(snaps)) } // No journal left behind. assertNoJournal(t, eng) } func TestEngineRestore_RedeployFailureKeepsRestoredData(t *testing.T) { eng, st, _ := newRestoreEngine(t) w, live := seedImageWorkload(t, st) mkDirWith(t, live, "orig.txt", "ORIGINAL") settings, _ := st.GetSettings() snap, _ := eng.Create(w, settings, "base") if err := os.WriteFile(filepath.Join(live, "orig.txt"), []byte("CHANGED"), 0o600); err != nil { t.Fatal(err) } fake := &fakeLifecycle{tag: "v1", redeployErr: errors.New("boom")} eng.SetLifecycle(fake) err := eng.Restore(context.Background(), snap.ID, w.ID) if err == nil || !strings.Contains(err.Error(), "redeploy") { t.Fatalf("expected a redeploy error, got %v", err) } // Data is committed despite the redeploy failure — we must NOT roll it back. if got := readIn(t, live, "orig.txt"); got != "ORIGINAL" { t.Errorf("orig.txt = %q, want ORIGINAL (restore committed)", got) } assertNoJournal(t, eng) } func TestEngineRestore_PreflightFailDoesNotLockOrStop(t *testing.T) { eng, st, _ := newRestoreEngine(t) w, _ := seedImageWorkload(t, st) // A snapshot whose manifest names an unsupported scope ⇒ pre-flight aborts. bad, err := st.CreateVolumeSnapshot(store.VolumeSnapshot{ WorkloadID: w.ID, Filename: "bad.tar.gz", Manifest: `[{"index":0,"target":"/x","scope":"named","source":"x"}]`, }) if err != nil { t.Fatalf("seed snapshot: %v", err) } fake := &fakeLifecycle{} eng.SetLifecycle(fake) if err := eng.Restore(context.Background(), bad.ID, w.ID); err == nil { t.Fatal("expected pre-flight to abort on an unsupported scope") } if fake.saw("lock") || fake.saw("stop") { t.Errorf("pre-flight abort must happen BEFORE lock/stop; calls=%v", fake.calls) } } func TestEngineRestore_NilLifecycle(t *testing.T) { eng, _, _ := newRestoreEngine(t) if err := eng.Restore(context.Background(), "s", "w"); err == nil || !strings.Contains(err.Error(), "lifecycle") { t.Fatalf("expected a lifecycle-not-configured error, got %v", err) } } func TestEngineRestore_WrongWorkload(t *testing.T) { eng, st, _ := newRestoreEngine(t) w, live := seedImageWorkload(t, st) mkDirWith(t, live, "f.txt", "x") settings, _ := st.GetSettings() snap, _ := eng.Create(w, settings, "base") fake := &fakeLifecycle{} eng.SetLifecycle(fake) if err := eng.Restore(context.Background(), snap.ID, "some-other-workload"); err == nil { t.Fatal("expected cross-workload restore to be rejected") } if fake.saw("lock") { t.Error("must reject before taking the lock") } } func TestEngineRestore_ExtractFailureAbortsAfterLock(t *testing.T) { eng, st, _ := newRestoreEngine(t) // The workload must CURRENTLY declare both targets so pre-flight passes and // the failure happens during extraction (post-lock), not pre-flight. w, err := st.CreateWorkload(store.Workload{ Name: "two-vol", Kind: "project", SourceKind: "image", SourceConfig: `{"image":"x","port":80,"volumes":[` + `{"source":"data","target":"/data","scope":"project"},` + `{"source":"other","target":"/other","scope":"project"}]}`, }) if err != nil { t.Fatalf("create workload: %v", err) } // Hand-build a 2-volume archive where volume 1 carries a symlink entry the // untrusted extractor rejects — forcing a post-lock extract failure after // volume 0 has already been staged. arc := buildTarGz(t, []tentry{ {name: "0/f.txt", typeflag: tar.TypeReg, body: "x"}, {name: "1/evil", typeflag: tar.TypeSymlink, linkname: "/etc/passwd"}, }) data, err := os.ReadFile(arc) if err != nil { t.Fatal(err) } fname := "extract-fail.tar.gz" if err := os.WriteFile(filepath.Join(eng.snapDir, fname), data, 0o600); err != nil { t.Fatal(err) } snap, err := st.CreateVolumeSnapshot(store.VolumeSnapshot{ WorkloadID: w.ID, Filename: fname, Manifest: `[{"index":0,"target":"/data","scope":"project","source":"data"},` + `{"index":1,"target":"/other","scope":"project","source":"other"}]`, }) if err != nil { t.Fatalf("seed snapshot: %v", err) } fake := &fakeLifecycle{tag: "v1"} eng.SetLifecycle(fake) if err := eng.Restore(context.Background(), snap.ID, w.ID); err == nil { t.Fatal("expected extract failure to abort the restore") } // Post-lock abort: it stopped, then brought the app back (no swaps happened). if !fake.saw("lock") || !fake.saw("stop") || !fake.saw("redeploy:v1") { t.Errorf("expected lock+stop+redeploy after a post-lock abort; calls=%v", fake.calls) } // No staging or journal left behind. assertNoJournal(t, eng) entries, _ := os.ReadDir(eng.snapDir) for _, e := range entries { if strings.Contains(e.Name(), ".tf-restore-") { t.Errorf("leftover staging dir: %s", e.Name()) } } } func TestRecoverInterruptedRestores(t *testing.T) { eng, _, _ := newRestoreEngine(t) root := t.TempDir() // A: swap completed — keep restored live, drop old. liveA := filepath.Join(root, "A") oldA := filepath.Join(root, ".A.old") mkDirWith(t, liveA, "f", "RESTORED-A") mkDirWith(t, oldA, "f", "ORIGINAL-A") // B: not swapped, live present — keep original, drop tmp. liveB := filepath.Join(root, "B") tmpB := filepath.Join(root, ".B.tmp") mkDirWith(t, liveB, "f", "ORIGINAL-B") mkDirWith(t, tmpB, "f", "STAGED-B") // C: crashed mid-rename — live missing, old present — revert from old. liveC := filepath.Join(root, "C") oldC := filepath.Join(root, ".C.old") tmpC := filepath.Join(root, ".C.tmp") mkDirWith(t, oldC, "f", "ORIGINAL-C") mkDirWith(t, tmpC, "f", "STAGED-C") jr := restoreJournal{SnapshotID: "snap", WorkloadID: "wl-recover", Volumes: []journalVolume{ {Live: liveA, Old: oldA, Swapped: true, HadOld: true}, {Live: liveB, Tmp: tmpB, Swapped: false}, {Live: liveC, Old: oldC, Tmp: tmpC, Swapped: false}, }} if err := eng.writeJournal(jr); err != nil { t.Fatalf("write journal: %v", err) } n, err := eng.RecoverInterruptedRestores() if err != nil { t.Fatalf("recover: %v", err) } if n != 1 { t.Fatalf("recovered %d journals, want 1", n) } if got := readIn(t, liveA, "f"); got != "RESTORED-A" { t.Errorf("A live = %q, want RESTORED-A (swap kept)", got) } if _, err := os.Stat(oldA); !os.IsNotExist(err) { t.Error("A old should be removed") } if got := readIn(t, liveB, "f"); got != "ORIGINAL-B" { t.Errorf("B live = %q, want ORIGINAL-B (untouched)", got) } if _, err := os.Stat(tmpB); !os.IsNotExist(err) { t.Error("B tmp should be removed") } if got := readIn(t, liveC, "f"); got != "ORIGINAL-C" { t.Errorf("C live = %q, want ORIGINAL-C (reverted from old)", got) } if _, err := os.Stat(tmpC); !os.IsNotExist(err) { t.Error("C tmp should be removed") } assertNoJournal(t, eng) } func assertNoJournal(t *testing.T, eng *Engine) { t.Helper() entries, err := os.ReadDir(eng.snapDir) if err != nil { t.Fatal(err) } for _, e := range entries { if strings.HasPrefix(e.Name(), "restore-") && strings.HasSuffix(e.Name(), ".json") { t.Errorf("leftover restore journal: %s", e.Name()) } } }