package volsnap import ( "encoding/json" "errors" "fmt" "log/slog" "os" "path/filepath" "strings" "sync" "time" "github.com/google/uuid" "github.com/alexei/tinyforge/internal/store" ) // maxSnapshotsPerWorkload caps how many snapshots are retained per app. On // create, older snapshots beyond this count are pruned (best-effort) so volume // snapshots cannot grow the data disk without bound. const maxSnapshotsPerWorkload = 20 // ErrNoSnapshotData is returned by Create when the workload has no resolved // host-bind volume directory to capture. It is a client-actionable condition // (HTTP 400), distinct from internal failures (HTTP 500). var ErrNoSnapshotData = errors.New("no snapshottable volume data for this app") // Engine creates and manages volume snapshots under /snapshots. type Engine struct { mu sync.Mutex store *store.Store snapDir string } // New creates the snapshot engine, ensuring the snapshot directory exists. func New(st *store.Store, dataDir string) (*Engine, error) { dir := filepath.Join(dataDir, "snapshots") if err := os.MkdirAll(dir, 0o755); err != nil { return nil, fmt.Errorf("create snapshot directory: %w", err) } return &Engine{store: st, snapDir: dir}, nil } // SnapDir returns the directory holding snapshot archives. func (e *Engine) SnapDir() string { return e.snapDir } // Create captures a snapshot of the workload's host-bind data volumes. func (e *Engine) Create(w store.Workload, settings store.Settings, label string) (store.VolumeSnapshot, error) { refs, _, err := SnapshotableVolumes(e.store, w, settings) if err != nil { return store.VolumeSnapshot{}, fmt.Errorf("enumerate volumes: %w", err) } if len(refs) == 0 { return store.VolumeSnapshot{}, ErrNoSnapshotData } e.mu.Lock() defer e.mu.Unlock() filename := fmt.Sprintf("%s-%s-%s.tar.gz", idShort(w.ID), time.Now().UTC().Format("20060102-150405"), uuid.New().String()[:8]) dest := filepath.Join(e.snapDir, filename) manifest, err := writeArchive(dest, refs) if err != nil { return store.VolumeSnapshot{}, err } info, err := os.Stat(dest) if err != nil { os.Remove(dest) return store.VolumeSnapshot{}, fmt.Errorf("stat snapshot: %w", err) } manifestJSON, err := json.Marshal(manifest) if err != nil { os.Remove(dest) return store.VolumeSnapshot{}, fmt.Errorf("encode manifest: %w", err) } row, err := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{ WorkloadID: w.ID, Label: strings.TrimSpace(label), Filename: filename, SizeBytes: info.Size(), Manifest: string(manifestJSON), }) if err != nil { os.Remove(dest) // best-effort: don't leak an orphan file return store.VolumeSnapshot{}, fmt.Errorf("record snapshot: %w", err) } slog.Info("volume snapshot created", "id", row.ID, "workload", w.ID, "volumes", len(manifest), "size", info.Size()) e.pruneWorkload(w.ID) return row, nil } // List returns a workload's snapshots, newest first. func (e *Engine) List(workloadID string) ([]store.VolumeSnapshot, error) { return e.store.ListVolumeSnapshots(workloadID) } // Get returns one snapshot by id. func (e *Engine) Get(id string) (store.VolumeSnapshot, error) { return e.store.GetVolumeSnapshot(id) } // Delete removes a snapshot's archive file and its metadata row. func (e *Engine) Delete(id string) error { snap, err := e.store.GetVolumeSnapshot(id) if err != nil { return err } e.mu.Lock() defer e.mu.Unlock() if p, perr := e.FilePath(snap); perr == nil { if rmErr := os.Remove(p); rmErr != nil && !os.IsNotExist(rmErr) { slog.Warn("volume snapshot: remove file", "id", id, "error", rmErr) } } return e.store.DeleteVolumeSnapshot(id) } // FilePath resolves a snapshot's archive path and verifies it stays within the // snapshot directory (defence-in-depth against a tampered filename column). func (e *Engine) FilePath(snap store.VolumeSnapshot) (string, error) { base := filepath.Base(snap.Filename) if base == "" || base == "." || base != snap.Filename { return "", fmt.Errorf("invalid snapshot filename") } p := filepath.Join(e.snapDir, base) abs, err := filepath.Abs(p) if err != nil { return "", err } absDir, _ := filepath.Abs(e.snapDir) if !strings.HasPrefix(abs, absDir+string(filepath.Separator)) { return "", fmt.Errorf("snapshot path escapes snapshot directory") } return abs, nil } // CleanOrphans removes snapshot archive files that have no metadata row, // reconciling on-disk files against the DB. Workload deletion CASCADEs the // volume_snapshots rows but cannot reach the files; this (run at startup) // reclaims them. Mirrors backup.Engine.CleanOrphans. func (e *Engine) CleanOrphans() (int, error) { e.mu.Lock() defer e.mu.Unlock() entries, err := os.ReadDir(e.snapDir) if err != nil { return 0, fmt.Errorf("read snapshot dir: %w", err) } filenames, err := e.store.AllVolumeSnapshotFilenames() if err != nil { return 0, fmt.Errorf("list snapshot filenames: %w", err) } known := make(map[string]bool, len(filenames)) for _, f := range filenames { known[f] = true } removed := 0 for _, ent := range entries { if ent.IsDir() || known[ent.Name()] { continue } if err := os.Remove(filepath.Join(e.snapDir, ent.Name())); err != nil { slog.Warn("volume snapshot: remove orphan", "file", ent.Name(), "error", err) continue } removed++ } return removed, nil } // pruneWorkload deletes snapshots beyond maxSnapshotsPerWorkload for one // workload (oldest first). Best-effort: caller already holds e.mu. func (e *Engine) pruneWorkload(workloadID string) { count, err := e.store.CountVolumeSnapshots(workloadID) if err != nil || count <= maxSnapshotsPerWorkload { return } oldest, err := e.store.GetOldestVolumeSnapshots(workloadID, count-maxSnapshotsPerWorkload) if err != nil { slog.Warn("volume snapshot: prune query", "workload", workloadID, "error", err) return } for _, snap := range oldest { if p, perr := e.FilePath(snap); perr == nil { _ = os.Remove(p) } if derr := e.store.DeleteVolumeSnapshot(snap.ID); derr != nil { slog.Warn("volume snapshot: prune delete", "id", snap.ID, "error", derr) } } } func idShort(id string) string { if len(id) > 8 { return id[:8] } return id }