feat(snapshots): capture app data-volume snapshots
Build / build (push) Successful in 10m59s

Add per-workload capture of host-bind data volumes as downloadable tar.gz archives: a new internal/volsnap engine (enumerate host-bind volumes via the computeMounts merge, archive with archive/tar+gzip skipping symlinks/special files, per-workload retention + startup orphan cleanup), a volume_snapshots table + store CRUD, admin-gated API (list/snapshotable/create/download/delete), and a Snapshots panel on /apps/[id] that shows coverage and which volumes are skipped (and why). Scope: image-source apps, host-bind scopes (absolute/stage/project); Docker named volumes, tmpfs, and instance scope are surfaced as not-yet-supported. Restore is a separate later phase. Download/FilePath are containment-checked; create returns a typed no-data error (400) vs generic 500. Covered by archiver unit tests + full API e2e.
This commit is contained in:
2026-06-02 14:56:10 +03:00
parent 2ba49b9bb6
commit 6b45ed62bb
16 changed files with 1565 additions and 4 deletions
+140
View File
@@ -0,0 +1,140 @@
package volsnap
import (
"archive/tar"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"
)
// writeArchive serializes the given host-bind volume directories into a
// gzip-compressed tar at dest. Each volume's files live under an integer
// subdirectory (its manifest Index); a manifest.json at the archive root makes
// the archive self-describing. Returns the manifest describing what was
// captured.
//
// Only regular files and directories are archived. Symlinks and special files
// (devices, sockets, fifos) are skipped — this keeps capture safe and avoids
// recording links whose targets would be meaningless or escape the volume on a
// later restore. A torn snapshot is possible if the app writes during capture;
// callers should surface that caveat.
func writeArchive(dest string, refs []VolumeRef) ([]SnapshotVolume, error) {
// O_EXCL: never clobber an existing file (filenames are unique per call).
f, err := os.OpenFile(dest, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil {
return nil, fmt.Errorf("create snapshot file: %w", err)
}
defer f.Close()
gz := gzip.NewWriter(f)
tw := tar.NewWriter(gz)
manifest := make([]SnapshotVolume, 0, len(refs))
for i, ref := range refs {
manifest = append(manifest, SnapshotVolume{Index: i, Target: ref.Target, Scope: ref.Scope, Source: ref.Source})
if err := addDir(tw, ref.HostPath, fmt.Sprintf("%d", i)); err != nil {
_ = tw.Close()
_ = gz.Close()
_ = f.Close()
os.Remove(dest)
return nil, err
}
}
if err := writeManifestEntry(tw, manifest); err != nil {
_ = tw.Close()
_ = gz.Close()
os.Remove(dest)
return nil, err
}
if err := tw.Close(); err != nil {
_ = gz.Close()
os.Remove(dest)
return nil, fmt.Errorf("finalize tar: %w", err)
}
if err := gz.Close(); err != nil {
os.Remove(dest)
return nil, fmt.Errorf("finalize gzip: %w", err)
}
if err := f.Close(); err != nil {
os.Remove(dest)
return nil, fmt.Errorf("close snapshot file: %w", err)
}
return manifest, nil
}
// addDir walks root and writes its regular files and directories into tw under
// the given archive prefix.
func addDir(tw *tar.Writer, root, prefix string) error {
return filepath.WalkDir(root, func(p string, d fs.DirEntry, walkErr error) error {
if walkErr != nil {
return fmt.Errorf("walk %s: %w", p, walkErr)
}
// Skip symlinks and special files; archive only dirs and regular files.
if d.Type()&fs.ModeSymlink != 0 {
return nil
}
if !d.IsDir() && !d.Type().IsRegular() {
return nil
}
rel, err := filepath.Rel(root, p)
if err != nil {
return fmt.Errorf("relativize %s: %w", p, err)
}
name := prefix
if rel != "." {
name = path.Join(prefix, filepath.ToSlash(rel))
}
info, err := d.Info()
if err != nil {
return fmt.Errorf("stat %s: %w", p, err)
}
hdr, err := tar.FileInfoHeader(info, "")
if err != nil {
return fmt.Errorf("tar header %s: %w", p, err)
}
hdr.Name = name
if d.IsDir() {
hdr.Name += "/"
}
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("write tar header %s: %w", name, err)
}
if d.IsDir() {
return nil
}
src, err := os.Open(p)
if err != nil {
return fmt.Errorf("open %s: %w", p, err)
}
defer src.Close()
if _, err := io.Copy(tw, src); err != nil {
return fmt.Errorf("copy %s: %w", p, err)
}
return nil
})
}
func writeManifestEntry(tw *tar.Writer, manifest []SnapshotVolume) error {
data, err := json.MarshalIndent(manifest, "", " ")
if err != nil {
return fmt.Errorf("encode manifest: %w", err)
}
hdr := &tar.Header{Name: "manifest.json", Mode: 0o600, Size: int64(len(data)), Typeflag: tar.TypeReg}
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("write manifest header: %w", err)
}
if _, err := tw.Write(data); err != nil {
return fmt.Errorf("write manifest: %w", err)
}
return nil
}
+117
View File
@@ -0,0 +1,117 @@
package volsnap
import (
"archive/tar"
"compress/gzip"
"io"
"os"
"path/filepath"
"testing"
)
func TestWriteArchiveRoundTrip(t *testing.T) {
root := t.TempDir()
mustWrite(t, filepath.Join(root, "a.txt"), "hello")
if err := os.MkdirAll(filepath.Join(root, "sub"), 0o755); err != nil {
t.Fatal(err)
}
mustWrite(t, filepath.Join(root, "sub", "b.txt"), "world")
dest := filepath.Join(t.TempDir(), "snap.tar.gz")
refs := []VolumeRef{{Target: "/data", Scope: "project", Source: "data", HostPath: root}}
manifest, err := writeArchive(dest, refs)
if err != nil {
t.Fatalf("writeArchive: %v", err)
}
if len(manifest) != 1 || manifest[0].Index != 0 || manifest[0].Target != "/data" || manifest[0].Scope != "project" {
t.Fatalf("unexpected manifest: %+v", manifest)
}
entries := readArchive(t, dest)
for _, want := range []string{"0/a.txt", "0/sub/b.txt", "manifest.json"} {
if _, ok := entries[want]; !ok {
keys := make([]string, 0, len(entries))
for k := range entries {
keys = append(keys, k)
}
t.Fatalf("archive missing %q; got %v", want, keys)
}
}
if got := entries["0/a.txt"]; got != "hello" {
t.Errorf("0/a.txt = %q, want %q", got, "hello")
}
}
func TestWriteArchiveRefusesExisting(t *testing.T) {
dest := filepath.Join(t.TempDir(), "snap.tar.gz")
mustWrite(t, dest, "existing")
if _, err := writeArchive(dest, nil); err == nil {
t.Fatal("expected error writing over an existing file (O_EXCL)")
}
}
func TestWriteArchiveSkipsSymlinks(t *testing.T) {
root := t.TempDir()
mustWrite(t, filepath.Join(root, "real.txt"), "data")
if err := os.Symlink(filepath.Join(root, "real.txt"), filepath.Join(root, "link.txt")); err != nil {
t.Skipf("symlinks unavailable on this platform: %v", err)
}
dest := filepath.Join(t.TempDir(), "snap.tar.gz")
if _, err := writeArchive(dest, []VolumeRef{{Target: "/d", Scope: "project", HostPath: root}}); err != nil {
t.Fatalf("writeArchive: %v", err)
}
entries := readArchive(t, dest)
if _, ok := entries["0/link.txt"]; ok {
t.Error("symlink should have been skipped, but it is in the archive")
}
if _, ok := entries["0/real.txt"]; !ok {
t.Error("regular file should be archived")
}
}
func mustWrite(t *testing.T, path, content string) {
t.Helper()
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
}
// readArchive returns a map of regular-file entry name -> content. Directory
// entries are recorded with an empty string so their presence can be asserted.
func readArchive(t *testing.T, path string) map[string]string {
t.Helper()
f, err := os.Open(path)
if err != nil {
t.Fatal(err)
}
defer f.Close()
gz, err := gzip.NewReader(f)
if err != nil {
t.Fatal(err)
}
defer gz.Close()
out := map[string]string{}
tr := tar.NewReader(gz)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
t.Fatal(err)
}
if hdr.Typeflag == tar.TypeDir {
out[hdr.Name] = ""
continue
}
data, err := io.ReadAll(tr)
if err != nil {
t.Fatal(err)
}
out[hdr.Name] = string(data)
}
return out
}
+207
View File
@@ -0,0 +1,207 @@
package volsnap
import (
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/google/uuid"
"github.com/alexei/tinyforge/internal/store"
)
// maxSnapshotsPerWorkload caps how many snapshots are retained per app. On
// create, older snapshots beyond this count are pruned (best-effort) so volume
// snapshots cannot grow the data disk without bound.
const maxSnapshotsPerWorkload = 20
// ErrNoSnapshotData is returned by Create when the workload has no resolved
// host-bind volume directory to capture. It is a client-actionable condition
// (HTTP 400), distinct from internal failures (HTTP 500).
var ErrNoSnapshotData = errors.New("no snapshottable volume data for this app")
// Engine creates and manages volume snapshots under <dataDir>/snapshots.
type Engine struct {
mu sync.Mutex
store *store.Store
snapDir string
}
// New creates the snapshot engine, ensuring the snapshot directory exists.
func New(st *store.Store, dataDir string) (*Engine, error) {
dir := filepath.Join(dataDir, "snapshots")
if err := os.MkdirAll(dir, 0o755); err != nil {
return nil, fmt.Errorf("create snapshot directory: %w", err)
}
return &Engine{store: st, snapDir: dir}, nil
}
// SnapDir returns the directory holding snapshot archives.
func (e *Engine) SnapDir() string { return e.snapDir }
// Create captures a snapshot of the workload's host-bind data volumes.
func (e *Engine) Create(w store.Workload, settings store.Settings, label string) (store.VolumeSnapshot, error) {
refs, _, err := SnapshotableVolumes(e.store, w, settings)
if err != nil {
return store.VolumeSnapshot{}, fmt.Errorf("enumerate volumes: %w", err)
}
if len(refs) == 0 {
return store.VolumeSnapshot{}, ErrNoSnapshotData
}
e.mu.Lock()
defer e.mu.Unlock()
filename := fmt.Sprintf("%s-%s-%s.tar.gz",
idShort(w.ID), time.Now().UTC().Format("20060102-150405"), uuid.New().String()[:8])
dest := filepath.Join(e.snapDir, filename)
manifest, err := writeArchive(dest, refs)
if err != nil {
return store.VolumeSnapshot{}, err
}
info, err := os.Stat(dest)
if err != nil {
os.Remove(dest)
return store.VolumeSnapshot{}, fmt.Errorf("stat snapshot: %w", err)
}
manifestJSON, err := json.Marshal(manifest)
if err != nil {
os.Remove(dest)
return store.VolumeSnapshot{}, fmt.Errorf("encode manifest: %w", err)
}
row, err := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{
WorkloadID: w.ID,
Label: strings.TrimSpace(label),
Filename: filename,
SizeBytes: info.Size(),
Manifest: string(manifestJSON),
})
if err != nil {
os.Remove(dest) // best-effort: don't leak an orphan file
return store.VolumeSnapshot{}, fmt.Errorf("record snapshot: %w", err)
}
slog.Info("volume snapshot created", "id", row.ID, "workload", w.ID,
"volumes", len(manifest), "size", info.Size())
e.pruneWorkload(w.ID)
return row, nil
}
// List returns a workload's snapshots, newest first.
func (e *Engine) List(workloadID string) ([]store.VolumeSnapshot, error) {
return e.store.ListVolumeSnapshots(workloadID)
}
// Get returns one snapshot by id.
func (e *Engine) Get(id string) (store.VolumeSnapshot, error) {
return e.store.GetVolumeSnapshot(id)
}
// Delete removes a snapshot's archive file and its metadata row.
func (e *Engine) Delete(id string) error {
snap, err := e.store.GetVolumeSnapshot(id)
if err != nil {
return err
}
e.mu.Lock()
defer e.mu.Unlock()
if p, perr := e.FilePath(snap); perr == nil {
if rmErr := os.Remove(p); rmErr != nil && !os.IsNotExist(rmErr) {
slog.Warn("volume snapshot: remove file", "id", id, "error", rmErr)
}
}
return e.store.DeleteVolumeSnapshot(id)
}
// FilePath resolves a snapshot's archive path and verifies it stays within the
// snapshot directory (defence-in-depth against a tampered filename column).
func (e *Engine) FilePath(snap store.VolumeSnapshot) (string, error) {
base := filepath.Base(snap.Filename)
if base == "" || base == "." || base != snap.Filename {
return "", fmt.Errorf("invalid snapshot filename")
}
p := filepath.Join(e.snapDir, base)
abs, err := filepath.Abs(p)
if err != nil {
return "", err
}
absDir, _ := filepath.Abs(e.snapDir)
if !strings.HasPrefix(abs, absDir+string(filepath.Separator)) {
return "", fmt.Errorf("snapshot path escapes snapshot directory")
}
return abs, nil
}
// CleanOrphans removes snapshot archive files that have no metadata row,
// reconciling on-disk files against the DB. Workload deletion CASCADEs the
// volume_snapshots rows but cannot reach the files; this (run at startup)
// reclaims them. Mirrors backup.Engine.CleanOrphans.
func (e *Engine) CleanOrphans() (int, error) {
e.mu.Lock()
defer e.mu.Unlock()
entries, err := os.ReadDir(e.snapDir)
if err != nil {
return 0, fmt.Errorf("read snapshot dir: %w", err)
}
filenames, err := e.store.AllVolumeSnapshotFilenames()
if err != nil {
return 0, fmt.Errorf("list snapshot filenames: %w", err)
}
known := make(map[string]bool, len(filenames))
for _, f := range filenames {
known[f] = true
}
removed := 0
for _, ent := range entries {
if ent.IsDir() || known[ent.Name()] {
continue
}
if err := os.Remove(filepath.Join(e.snapDir, ent.Name())); err != nil {
slog.Warn("volume snapshot: remove orphan", "file", ent.Name(), "error", err)
continue
}
removed++
}
return removed, nil
}
// pruneWorkload deletes snapshots beyond maxSnapshotsPerWorkload for one
// workload (oldest first). Best-effort: caller already holds e.mu.
func (e *Engine) pruneWorkload(workloadID string) {
count, err := e.store.CountVolumeSnapshots(workloadID)
if err != nil || count <= maxSnapshotsPerWorkload {
return
}
oldest, err := e.store.GetOldestVolumeSnapshots(workloadID, count-maxSnapshotsPerWorkload)
if err != nil {
slog.Warn("volume snapshot: prune query", "workload", workloadID, "error", err)
return
}
for _, snap := range oldest {
if p, perr := e.FilePath(snap); perr == nil {
_ = os.Remove(p)
}
if derr := e.store.DeleteVolumeSnapshot(snap.ID); derr != nil {
slog.Warn("volume snapshot: prune delete", "id", snap.ID, "error", derr)
}
}
}
func idShort(id string) string {
if len(id) > 8 {
return id[:8]
}
return id
}
+146
View File
@@ -0,0 +1,146 @@
// Package volsnap captures and manages per-workload snapshots of an app's
// host-bind data volumes. It is deliberately independent of internal/backup
// (which is SQLite-specific): a snapshot here is a tar.gz of the resolved
// volume directories, recorded in the volume_snapshots table.
//
// Phase 2a-i covers CAPTURE only (create/list/delete/download). The restore
// path — which overwrites live data and needs container quiesce + atomic swap
// — is intentionally a separate, later phase.
package volsnap
import (
"encoding/json"
"os"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/volume"
)
// supportedScopes are the host-bind volume scopes phase 2a-i can snapshot.
// Each resolves to a real host directory the running container binds. Excluded
// for now: instance (needs the deployed image tag to resolve a per-tag dir),
// named/project_named (Docker named volumes — need a docker-run-tar primitive),
// and ephemeral (tmpfs — no data to capture).
var supportedScopes = map[string]bool{
string(store.VolumeScopeAbsolute): true,
string(store.VolumeScopeStage): true,
string(store.VolumeScopeProject): true,
}
// SnapshotVolume is one volume covered by a snapshot. It is persisted in the
// snapshot row's manifest (JSON) and written into the archive so a future
// restore can re-resolve the target even if volume settings drift. Index names
// the archive subdirectory holding that volume's files.
type SnapshotVolume struct {
Index int `json:"index"`
Target string `json:"target"`
Scope string `json:"scope"`
Source string `json:"source"`
}
// VolumeRef is a resolved, on-disk host-bind volume eligible for snapshotting.
type VolumeRef struct {
Target string
Scope string
Source string
HostPath string
}
// SkippedVolume is a declared volume that cannot be snapshotted, with the
// reason surfaced to the UI so users are never misled into thinking data is
// captured when it is not.
type SkippedVolume struct {
Target string `json:"target"`
Scope string `json:"scope"`
Reason string `json:"reason"`
}
// scVolumes is the minimal shape parsed out of an image workload's
// source_config — just enough to learn its declared volumes without importing
// the image source package.
type scVolumes struct {
Volumes []struct {
Source string `json:"source"`
Target string `json:"target"`
Scope string `json:"scope"`
Name string `json:"name"`
} `json:"volumes"`
}
// SnapshotableVolumes enumerates a workload's data volumes and splits them into
// those that can be snapshotted now (resolved host-bind dirs that exist on
// disk) and those that are skipped (with a reason). It mirrors the image
// source's computeMounts merge: source_config volumes overlaid by persisted
// workload_volumes rows (persisted wins on a target conflict).
//
// Only image-source workloads declare host-bind data volumes today; for any
// other source kind both slices come back empty.
func SnapshotableVolumes(st *store.Store, w store.Workload, settings store.Settings) (refs []VolumeRef, skipped []SkippedVolume, err error) {
if w.SourceKind != "image" {
return nil, nil, nil
}
byTarget := map[string]store.WorkloadVolume{}
var cfg scVolumes
if w.SourceConfig != "" {
// Best-effort: a malformed config simply yields no inline volumes; the
// persisted rows below still apply.
_ = json.Unmarshal([]byte(w.SourceConfig), &cfg)
}
for _, v := range cfg.Volumes {
if v.Target == "" {
continue
}
byTarget[v.Target] = store.WorkloadVolume{Source: v.Source, Target: v.Target, Scope: v.Scope, Name: v.Name}
}
persisted, perr := st.ListWorkloadVolumes(w.ID)
if perr != nil {
return nil, nil, perr
}
for _, p := range persisted {
byTarget[p.Target] = store.WorkloadVolume{Source: p.Source, Target: p.Target, Scope: p.Scope, Name: p.Name}
}
params := volume.ResolveWorkloadParams{
BasePath: settings.BaseVolumePath,
WorkloadID: w.ID,
WorkloadName: w.Name,
AllowedVolumePaths: settings.AllowedVolumePaths,
}
for _, v := range byTarget {
if v.Target == "" {
continue
}
if !supportedScopes[v.Scope] {
skipped = append(skipped, SkippedVolume{Target: v.Target, Scope: v.Scope, Reason: skipReason(v.Scope)})
continue
}
hostPath, rerr := volume.ResolveWorkloadPath(v, params)
if rerr != nil {
skipped = append(skipped, SkippedVolume{Target: v.Target, Scope: v.Scope, Reason: rerr.Error()})
continue
}
info, serr := os.Stat(hostPath)
if serr != nil || !info.IsDir() {
skipped = append(skipped, SkippedVolume{Target: v.Target, Scope: v.Scope, Reason: "no data on disk yet"})
continue
}
refs = append(refs, VolumeRef{Target: v.Target, Scope: v.Scope, Source: v.Source, HostPath: hostPath})
}
return refs, skipped, nil
}
func skipReason(scope string) string {
switch scope {
case string(store.VolumeScopeInstance):
return "instance-scoped volumes are not yet snapshottable"
case string(store.VolumeScopeNamed), string(store.VolumeScopeProjectNamed):
return "Docker named volumes are not yet snapshottable"
case string(store.VolumeScopeEphemeral):
return "ephemeral (tmpfs) volumes hold no persistent data"
default:
return "unsupported volume scope"
}
}