package volsnap import ( "archive/tar" "compress/gzip" "encoding/json" "fmt" "io" "os" "path" "path/filepath" "github.com/alexei/tinyforge/internal/store" "github.com/alexei/tinyforge/internal/volume" ) // maxRestoreUncompressedBytes caps the total decompressed size accepted from a // snapshot archive during restore (decompression-bomb defence). 50 GiB is far // above any realistic app data volume while still bounding a hostile archive. const maxRestoreUncompressedBytes int64 = 50 << 30 // diskFreeHeadroomBytes is extra free space required beyond the extracted size // so a restore never fills the target filesystem to the brim. The live copy is // renamed aside (no new space), so the new allocation is ~the extracted size; // this headroom covers filesystem overhead and metadata. const diskFreeHeadroomBytes int64 = 256 << 20 // resolvedVol is a manifest volume whose live host path has been re-resolved // against the workload's CURRENT config (all-or-nothing pre-flight, C3). type resolvedVol struct { Index int Target string Scope string LivePath string } // parseManifest decodes the snapshot row's manifest JSON ([]SnapshotVolume). func parseManifest(snap store.VolumeSnapshot) ([]SnapshotVolume, error) { var m []SnapshotVolume if err := json.Unmarshal([]byte(snap.Manifest), &m); err != nil { return nil, fmt.Errorf("parse snapshot manifest: %w", err) } if len(m) == 0 { return nil, fmt.Errorf("snapshot manifest is empty") } return m, nil } // preflightResolve re-derives every manifest volume's live host path from the // workload's CURRENT config, ALL-OR-NOTHING (C3): if any snapshotted target is // no longer declared, its scope is unsupported, or it can't resolve, it returns // an error and the caller MUST abort BEFORE stopping containers or touching // disk — config drift mid-restore is silent corruption. // // SECURITY: the swap target is keyed on the manifest's container Target path but // its host directory is derived from the CURRENT (trusted, operator-set) // Source/Scope — never from the snapshot manifest's persisted Source/Scope. The // manifest column is attacker-influenceable (e.g. a restored/tampered DB), and // trusting its Source for stage/project scope would let `Source:"../../etc"` // redirect the destructive rename-swap outside the volume tree. As defence in // depth, base-relative resolved paths are asserted to stay under BaseVolumePath. func preflightResolve(st *store.Store, w store.Workload, settings store.Settings, manifest []SnapshotVolume) ([]resolvedVol, error) { current, err := volumesByTarget(st, w) if err != nil { return nil, fmt.Errorf("load current volumes: %w", err) } params := volume.ResolveWorkloadParams{ BasePath: settings.BaseVolumePath, WorkloadID: w.ID, WorkloadName: w.Name, AllowedVolumePaths: settings.AllowedVolumePaths, } out := make([]resolvedVol, 0, len(manifest)) for _, mv := range manifest { // A negative index can never name an archive subtree. if mv.Index < 0 { return nil, fmt.Errorf("volume %q has invalid index %d", mv.Target, mv.Index) } cur, ok := current[mv.Target] if !ok { return nil, fmt.Errorf("volume %q is no longer declared by the workload", mv.Target) } if !supportedScopes[cur.Scope] { return nil, fmt.Errorf("volume %q scope %q is not restorable", mv.Target, cur.Scope) } live, err := volume.ResolveWorkloadPath(cur, params) if err != nil { return nil, fmt.Errorf("resolve volume %q (%s): %w", mv.Target, cur.Scope, err) } // Containment: the destructive swap target must stay inside the volume // root. Base-relative scopes must resolve under BaseVolumePath; absolute // scope is already constrained to AllowedVolumePaths by the resolver. if cur.Scope != string(store.VolumeScopeAbsolute) { contained, cerr := pathWithinBase(settings.BaseVolumePath, live) if cerr != nil || !contained { return nil, fmt.Errorf("resolved path for volume %q escapes the volume root", mv.Target) } } out = append(out, resolvedVol{Index: mv.Index, Target: mv.Target, Scope: cur.Scope, LivePath: live}) } return out, nil } // pathWithinBase reports whether target resolves to base or a path beneath it. // An empty base is treated as non-containing (refuse rather than allow). func pathWithinBase(base, target string) (bool, error) { if base == "" { return false, nil } absBase, err := filepath.Abs(base) if err != nil { return false, err } absTarget, err := filepath.Abs(target) if err != nil { return false, err } return withinDir(absBase, absTarget), nil } // archiveUncompressedSize scans the archive's tar headers and returns the // per-index and total uncompressed sizes, enforcing bombCap so a hostile // archive can't make the disk pre-check allocate unbounded. Feeds the // per-filesystem free-space pre-check (C5). // // The total is a LOWER-BOUND estimate of on-disk consumption: it sums regular- // file bytes only, ignoring directory entries and per-file inode/block-rounding // overhead, so a volume of many tiny files consumes more than reported. The // real safety net is the staged extract + atomic swap (a mid-extract ENOSPC // discards the staging dir and leaves live untouched), not this pre-check. // // "No body copy" is at the API level only — tar.Next still inflates and // discards each skipped body, so a 50 GiB-of-headers archive does 50 GiB of // gzip work; bombCap bounds that. func archiveUncompressedSize(archivePath string, bombCap int64) (perIndex map[int]int64, total int64, err error) { f, err := os.Open(archivePath) if err != nil { return nil, 0, fmt.Errorf("open archive: %w", err) } defer f.Close() gz, err := gzip.NewReader(f) if err != nil { return nil, 0, fmt.Errorf("gzip reader: %w", err) } defer gz.Close() perIndex = map[int]int64{} tr := tar.NewReader(gz) for { hdr, e := tr.Next() if e == io.EOF { break } if e != nil { return nil, 0, fmt.Errorf("read tar: %w", e) } if hdr.Typeflag != tar.TypeReg { continue } name := path.Clean(hdr.Name) if name == "manifest.json" { continue } idx, ok := leadingIndex(name) if !ok { continue } total += hdr.Size if total > bombCap { return nil, 0, fmt.Errorf("archive exceeds decompression cap of %d bytes", bombCap) } perIndex[idx] += hdr.Size } return perIndex, total, nil } // swap records one volume's atomic dir replacement so it can be rolled back. type swap struct { live string old string // where the prior live dir was set aside ("" if live didn't exist) tmp string // staging dir holding the freshly-extracted data hadOld bool // whether a prior live dir existed and was moved to old } // stagingDirs returns the per-volume tmp and old staging paths as SIBLINGS of // the live dir's parent, so every rename in the swap is intra-filesystem and // therefore atomic (R2). A cross-device rename (live is itself a mountpoint) // fails loudly in swapVolumeDir rather than silently degrading to a copy. func stagingDirs(live, token string, index int) (tmp, old string) { parent := filepath.Dir(live) base := fmt.Sprintf(".tf-restore-%s-%d", token, index) return filepath.Join(parent, base+".tmp"), filepath.Join(parent, base+".old") } // swapVolumeDir performs the crash-minimal two-rename swap: set the live dir // aside to old (if it exists), then move the staged tmp into place (C2). On the // second rename failing it reverts the first so live is never left missing. // Returns whether a prior live dir was preserved at old (for rollback). func swapVolumeDir(live, tmp, old string) (hadOld bool, err error) { if _, statErr := os.Lstat(live); statErr == nil { if rerr := os.Rename(live, old); rerr != nil { return false, fmt.Errorf("set aside live %s: %w", live, rerr) } hadOld = true } else if !os.IsNotExist(statErr) { return false, fmt.Errorf("stat live %s: %w", live, statErr) } if mkErr := os.MkdirAll(filepath.Dir(live), 0o700); mkErr != nil { if hadOld { _ = os.Rename(old, live) } return hadOld, fmt.Errorf("ensure parent of %s: %w", live, mkErr) } if rerr := os.Rename(tmp, live); rerr != nil { if hadOld { _ = os.Rename(old, live) // revert: live is never left missing } return hadOld, fmt.Errorf("promote restored data into %s: %w", live, rerr) } return hadOld, nil } // rollbackSwaps reverts completed swaps in reverse order: drop the restored // live dir and move the preserved original back. Best-effort — each step is // logged by the caller; rollback must attempt every volume regardless. func rollbackSwaps(done []swap) { for i := len(done) - 1; i >= 0; i-- { s := done[i] _ = os.RemoveAll(s.live) if s.hadOld { _ = os.Rename(s.old, s.live) } } }