Add per-workload capture of host-bind data volumes as downloadable tar.gz archives: a new internal/volsnap engine (enumerate host-bind volumes via the computeMounts merge, archive with archive/tar+gzip skipping symlinks/special files, per-workload retention + startup orphan cleanup), a volume_snapshots table + store CRUD, admin-gated API (list/snapshotable/create/download/delete), and a Snapshots panel on /apps/[id] that shows coverage and which volumes are skipped (and why). Scope: image-source apps, host-bind scopes (absolute/stage/project); Docker named volumes, tmpfs, and instance scope are surfaced as not-yet-supported. Restore is a separate later phase. Download/FilePath are containment-checked; create returns a typed no-data error (400) vs generic 500. Covered by archiver unit tests + full API e2e.
This commit is contained in:
@@ -0,0 +1,207 @@
|
||||
package volsnap
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// maxSnapshotsPerWorkload caps how many snapshots are retained per app. On
|
||||
// create, older snapshots beyond this count are pruned (best-effort) so volume
|
||||
// snapshots cannot grow the data disk without bound.
|
||||
const maxSnapshotsPerWorkload = 20
|
||||
|
||||
// ErrNoSnapshotData is returned by Create when the workload has no resolved
|
||||
// host-bind volume directory to capture. It is a client-actionable condition
|
||||
// (HTTP 400), distinct from internal failures (HTTP 500).
|
||||
var ErrNoSnapshotData = errors.New("no snapshottable volume data for this app")
|
||||
|
||||
// Engine creates and manages volume snapshots under <dataDir>/snapshots.
|
||||
type Engine struct {
|
||||
mu sync.Mutex
|
||||
store *store.Store
|
||||
snapDir string
|
||||
}
|
||||
|
||||
// New creates the snapshot engine, ensuring the snapshot directory exists.
|
||||
func New(st *store.Store, dataDir string) (*Engine, error) {
|
||||
dir := filepath.Join(dataDir, "snapshots")
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("create snapshot directory: %w", err)
|
||||
}
|
||||
return &Engine{store: st, snapDir: dir}, nil
|
||||
}
|
||||
|
||||
// SnapDir returns the directory holding snapshot archives.
|
||||
func (e *Engine) SnapDir() string { return e.snapDir }
|
||||
|
||||
// Create captures a snapshot of the workload's host-bind data volumes.
|
||||
func (e *Engine) Create(w store.Workload, settings store.Settings, label string) (store.VolumeSnapshot, error) {
|
||||
refs, _, err := SnapshotableVolumes(e.store, w, settings)
|
||||
if err != nil {
|
||||
return store.VolumeSnapshot{}, fmt.Errorf("enumerate volumes: %w", err)
|
||||
}
|
||||
if len(refs) == 0 {
|
||||
return store.VolumeSnapshot{}, ErrNoSnapshotData
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
filename := fmt.Sprintf("%s-%s-%s.tar.gz",
|
||||
idShort(w.ID), time.Now().UTC().Format("20060102-150405"), uuid.New().String()[:8])
|
||||
dest := filepath.Join(e.snapDir, filename)
|
||||
|
||||
manifest, err := writeArchive(dest, refs)
|
||||
if err != nil {
|
||||
return store.VolumeSnapshot{}, err
|
||||
}
|
||||
|
||||
info, err := os.Stat(dest)
|
||||
if err != nil {
|
||||
os.Remove(dest)
|
||||
return store.VolumeSnapshot{}, fmt.Errorf("stat snapshot: %w", err)
|
||||
}
|
||||
manifestJSON, err := json.Marshal(manifest)
|
||||
if err != nil {
|
||||
os.Remove(dest)
|
||||
return store.VolumeSnapshot{}, fmt.Errorf("encode manifest: %w", err)
|
||||
}
|
||||
|
||||
row, err := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{
|
||||
WorkloadID: w.ID,
|
||||
Label: strings.TrimSpace(label),
|
||||
Filename: filename,
|
||||
SizeBytes: info.Size(),
|
||||
Manifest: string(manifestJSON),
|
||||
})
|
||||
if err != nil {
|
||||
os.Remove(dest) // best-effort: don't leak an orphan file
|
||||
return store.VolumeSnapshot{}, fmt.Errorf("record snapshot: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("volume snapshot created", "id", row.ID, "workload", w.ID,
|
||||
"volumes", len(manifest), "size", info.Size())
|
||||
|
||||
e.pruneWorkload(w.ID)
|
||||
return row, nil
|
||||
}
|
||||
|
||||
// List returns a workload's snapshots, newest first.
|
||||
func (e *Engine) List(workloadID string) ([]store.VolumeSnapshot, error) {
|
||||
return e.store.ListVolumeSnapshots(workloadID)
|
||||
}
|
||||
|
||||
// Get returns one snapshot by id.
|
||||
func (e *Engine) Get(id string) (store.VolumeSnapshot, error) {
|
||||
return e.store.GetVolumeSnapshot(id)
|
||||
}
|
||||
|
||||
// Delete removes a snapshot's archive file and its metadata row.
|
||||
func (e *Engine) Delete(id string) error {
|
||||
snap, err := e.store.GetVolumeSnapshot(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
if p, perr := e.FilePath(snap); perr == nil {
|
||||
if rmErr := os.Remove(p); rmErr != nil && !os.IsNotExist(rmErr) {
|
||||
slog.Warn("volume snapshot: remove file", "id", id, "error", rmErr)
|
||||
}
|
||||
}
|
||||
return e.store.DeleteVolumeSnapshot(id)
|
||||
}
|
||||
|
||||
// FilePath resolves a snapshot's archive path and verifies it stays within the
|
||||
// snapshot directory (defence-in-depth against a tampered filename column).
|
||||
func (e *Engine) FilePath(snap store.VolumeSnapshot) (string, error) {
|
||||
base := filepath.Base(snap.Filename)
|
||||
if base == "" || base == "." || base != snap.Filename {
|
||||
return "", fmt.Errorf("invalid snapshot filename")
|
||||
}
|
||||
p := filepath.Join(e.snapDir, base)
|
||||
abs, err := filepath.Abs(p)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
absDir, _ := filepath.Abs(e.snapDir)
|
||||
if !strings.HasPrefix(abs, absDir+string(filepath.Separator)) {
|
||||
return "", fmt.Errorf("snapshot path escapes snapshot directory")
|
||||
}
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
// CleanOrphans removes snapshot archive files that have no metadata row,
|
||||
// reconciling on-disk files against the DB. Workload deletion CASCADEs the
|
||||
// volume_snapshots rows but cannot reach the files; this (run at startup)
|
||||
// reclaims them. Mirrors backup.Engine.CleanOrphans.
|
||||
func (e *Engine) CleanOrphans() (int, error) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
entries, err := os.ReadDir(e.snapDir)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("read snapshot dir: %w", err)
|
||||
}
|
||||
filenames, err := e.store.AllVolumeSnapshotFilenames()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("list snapshot filenames: %w", err)
|
||||
}
|
||||
known := make(map[string]bool, len(filenames))
|
||||
for _, f := range filenames {
|
||||
known[f] = true
|
||||
}
|
||||
|
||||
removed := 0
|
||||
for _, ent := range entries {
|
||||
if ent.IsDir() || known[ent.Name()] {
|
||||
continue
|
||||
}
|
||||
if err := os.Remove(filepath.Join(e.snapDir, ent.Name())); err != nil {
|
||||
slog.Warn("volume snapshot: remove orphan", "file", ent.Name(), "error", err)
|
||||
continue
|
||||
}
|
||||
removed++
|
||||
}
|
||||
return removed, nil
|
||||
}
|
||||
|
||||
// pruneWorkload deletes snapshots beyond maxSnapshotsPerWorkload for one
|
||||
// workload (oldest first). Best-effort: caller already holds e.mu.
|
||||
func (e *Engine) pruneWorkload(workloadID string) {
|
||||
count, err := e.store.CountVolumeSnapshots(workloadID)
|
||||
if err != nil || count <= maxSnapshotsPerWorkload {
|
||||
return
|
||||
}
|
||||
oldest, err := e.store.GetOldestVolumeSnapshots(workloadID, count-maxSnapshotsPerWorkload)
|
||||
if err != nil {
|
||||
slog.Warn("volume snapshot: prune query", "workload", workloadID, "error", err)
|
||||
return
|
||||
}
|
||||
for _, snap := range oldest {
|
||||
if p, perr := e.FilePath(snap); perr == nil {
|
||||
_ = os.Remove(p)
|
||||
}
|
||||
if derr := e.store.DeleteVolumeSnapshot(snap.ID); derr != nil {
|
||||
slog.Warn("volume snapshot: prune delete", "id", snap.ID, "error", derr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func idShort(id string) string {
|
||||
if len(id) > 8 {
|
||||
return id[:8]
|
||||
}
|
||||
return id
|
||||
}
|
||||
Reference in New Issue
Block a user