package volsnap import ( "archive/tar" "compress/gzip" "fmt" "io" "os" "path" "path/filepath" "strconv" "strings" ) // safeExtractIndex extracts the files archived under the integer subdirectory // `index` of a snapshot tar.gz into dest, returning the total bytes written. // // On RESTORE the archive is treated as UNTRUSTED (it may have been downloaded, // hand-edited, or swapped on disk), so this is hardened well beyond what the // capture writer emits: // // - zip-slip: every resolved target must stay within dest (HasPrefix check on // the cleaned absolute path) — a "../" or absolute member is rejected. // - type allow-list: ONLY regular files and directories are materialized; // symlinks, hardlinks, char/block devices, fifos, and sockets are rejected // outright (never created, never followed) — they could redirect a write // outside the volume or smuggle in a device node. // - decompression bomb: a running byte counter is capped at bombCap; the first // byte past the cap aborts the extraction. // // dest must be a fresh staging directory (files are created O_EXCL). The caller // performs the atomic rename-swap of dest onto the live path separately. func safeExtractIndex(archivePath string, index int, dest string, bombCap int64) (int64, error) { f, err := os.Open(archivePath) if err != nil { return 0, fmt.Errorf("open archive: %w", err) } defer f.Close() gz, err := gzip.NewReader(f) if err != nil { return 0, fmt.Errorf("gzip reader: %w", err) } defer gz.Close() cleanDest, err := filepath.Abs(dest) if err != nil { return 0, fmt.Errorf("resolve dest: %w", err) } if err := os.MkdirAll(cleanDest, 0o700); err != nil { return 0, fmt.Errorf("create dest: %w", err) } tr := tar.NewReader(gz) var written int64 for { hdr, err := tr.Next() if err == io.EOF { break } if err != nil { return written, fmt.Errorf("read tar: %w", err) } // Archive paths are always forward-slash. path.Clean collapses any // "./" / "../" so the prefix and containment checks see a normal form. name := path.Clean(hdr.Name) if name == "manifest.json" { continue } rel, ok := stripIndexPrefix(name, index) if !ok { continue // belongs to a different volume's subtree } switch hdr.Typeflag { case tar.TypeReg, tar.TypeDir: // allowed default: return written, fmt.Errorf("archive entry %q has disallowed type %q", hdr.Name, string(hdr.Typeflag)) } target := cleanDest if rel != "" { target = filepath.Join(cleanDest, filepath.FromSlash(rel)) } if !withinDir(cleanDest, target) { return written, fmt.Errorf("archive entry %q escapes destination", hdr.Name) } if hdr.Typeflag == tar.TypeDir { if err := os.MkdirAll(target, 0o700); err != nil { return written, fmt.Errorf("mkdir %s: %w", target, err) } continue } if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil { return written, fmt.Errorf("mkdir parent of %s: %w", target, err) } remaining := bombCap - written if remaining <= 0 { return written, fmt.Errorf("archive exceeds decompression cap of %d bytes", bombCap) } out, err := os.OpenFile(target, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) if err != nil { return written, fmt.Errorf("create %s: %w", target, err) } // LimitReader to remaining+1: if the entry is larger than the cap allows, // the extra byte is copied and written>bombCap trips the guard below. n, copyErr := io.Copy(out, io.LimitReader(tr, remaining+1)) closeErr := out.Close() written += n if copyErr != nil { return written, fmt.Errorf("write %s: %w", target, copyErr) } if closeErr != nil { return written, fmt.Errorf("close %s: %w", target, closeErr) } if written > bombCap { return written, fmt.Errorf("archive exceeds decompression cap of %d bytes", bombCap) } } return written, nil } // stripIndexPrefix returns the path relative to the "/" subtree and // whether name belongs to it. name=="" (the subtree root) yields ("", true). // The "/" boundary keeps index 1 from matching "10/...". func stripIndexPrefix(name string, index int) (string, bool) { p := strconv.Itoa(index) if name == p { return "", true } if strings.HasPrefix(name, p+"/") { return name[len(p)+1:], true } return "", false } // leadingIndex parses the first path segment of an archive entry name as the // volume index. Returns false for manifest.json or any non-integer prefix. func leadingIndex(name string) (int, bool) { seg := name if i := strings.IndexByte(name, '/'); i >= 0 { seg = name[:i] } idx, err := strconv.Atoi(seg) if err != nil { return 0, false } return idx, true } // withinDir reports whether target is base itself or lives beneath it. Both // args must already be cleaned absolute paths. func withinDir(base, target string) bool { if target == base { return true } return strings.HasPrefix(target, base+string(filepath.Separator)) }