1c47030854
Restore a captured volume snapshot onto an image workload's live host-bind
data volumes, then redeploy — the most destructive workload action, built to
the adversarially-reviewed design (C1–C6) with all data-loss guards.
- Engine.Restore (engine-owned): all-or-nothing pre-flight re-resolution from
the workload's CURRENT config (never the tamperable manifest), per-filesystem
disk pre-check, per-workload lock, container quiesce, extract-to-tmp, durable
pre-restore snapshot, write-ahead journal, atomic rename swap, redeploy, and
crash-recovery sweep (RecoverInterruptedRestores) wired before serving.
- internal/keyedmutex: shared per-key lock; deployer now serializes every
deploy entrypoint per workload via DispatchPlugin (+ LockWorkload/RedeployLocked
for the restore re-dispatch, no deadlock).
- Untrusted-archive extractor: zip-slip containment, type allow-list (reg/dir
only), decompression-bomb cap, manifest-index bounds.
- POST /api/workloads/{id}/snapshots/{sid}/restore: admin, X-Confirm-Restore
header (CSRF), per-workload single-flight (409).
- WebUI: Restore button + danger ConfirmDialog + busy state + i18n (en/ru).
Scope: image-source only; scopes absolute/stage/project (driven off the same
supportedScopes constant capture uses).
Plan-reviewed before coding; per-phase go/security/ts reviews; final review
READY TO MERGE. Security review caught + fixed a CRITICAL manifest-Source path
traversal (re-derive target from current config + base containment).
Plan: plans/volume-snapshot-restore/
244 lines
7.9 KiB
Go
244 lines
7.9 KiB
Go
package api
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
"github.com/alexei/tinyforge/internal/volsnap"
|
|
)
|
|
|
|
// listWorkloadSnapshots handles GET /api/workloads/{id}/snapshots.
|
|
func (s *Server) listWorkloadSnapshots(w http.ResponseWriter, r *http.Request) {
|
|
if s.snapshotEngine == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
|
return
|
|
}
|
|
id := chi.URLParam(r, "id")
|
|
snaps, err := s.snapshotEngine.List(id)
|
|
if err != nil {
|
|
slog.Error("snapshots: list", "workload", id, "error", err)
|
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusOK, snaps)
|
|
}
|
|
|
|
// snapshotableVolume is the sanitized view of a volume in the snapshotable
|
|
// response — it omits the resolved host path so internal layout is not leaked.
|
|
type snapshotableVolume struct {
|
|
Target string `json:"target"`
|
|
Scope string `json:"scope"`
|
|
Source string `json:"source"`
|
|
}
|
|
|
|
// getWorkloadSnapshotable handles GET /api/workloads/{id}/snapshotable. It
|
|
// tells the UI which volumes can be snapshotted and which are skipped (and
|
|
// why), so users are never misled about coverage.
|
|
func (s *Server) getWorkloadSnapshotable(w http.ResponseWriter, r *http.Request) {
|
|
if s.snapshotEngine == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
|
return
|
|
}
|
|
id := chi.URLParam(r, "id")
|
|
workload, err := s.store.GetWorkloadByID(id)
|
|
if err != nil {
|
|
respondError(w, http.StatusNotFound, "workload not found")
|
|
return
|
|
}
|
|
settings, err := s.store.GetSettings()
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
|
return
|
|
}
|
|
refs, skipped, err := volsnap.SnapshotableVolumes(s.store, workload, settings)
|
|
if err != nil {
|
|
slog.Error("snapshots: enumerate", "workload", id, "error", err)
|
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
|
return
|
|
}
|
|
|
|
volumes := make([]snapshotableVolume, 0, len(refs))
|
|
for _, ref := range refs {
|
|
volumes = append(volumes, snapshotableVolume{Target: ref.Target, Scope: ref.Scope, Source: ref.Source})
|
|
}
|
|
if skipped == nil {
|
|
skipped = []volsnap.SkippedVolume{}
|
|
}
|
|
respondJSON(w, http.StatusOK, map[string]any{
|
|
"volumes": volumes,
|
|
"skipped": skipped,
|
|
})
|
|
}
|
|
|
|
// createWorkloadSnapshot handles POST /api/workloads/{id}/snapshots.
|
|
func (s *Server) createWorkloadSnapshot(w http.ResponseWriter, r *http.Request) {
|
|
if s.snapshotEngine == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
|
return
|
|
}
|
|
id := chi.URLParam(r, "id")
|
|
workload, err := s.store.GetWorkloadByID(id)
|
|
if err != nil {
|
|
respondError(w, http.StatusNotFound, "workload not found")
|
|
return
|
|
}
|
|
settings, err := s.store.GetSettings()
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
|
return
|
|
}
|
|
|
|
var body struct {
|
|
Label string `json:"label"`
|
|
}
|
|
if r.ContentLength != 0 {
|
|
if err := json.NewDecoder(io.LimitReader(r.Body, 1<<20)).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
|
|
respondError(w, http.StatusBadRequest, "invalid JSON body")
|
|
return
|
|
}
|
|
}
|
|
|
|
snap, err := s.snapshotEngine.Create(workload, settings, body.Label)
|
|
if err != nil {
|
|
// "no snapshottable volume data" is client-actionable (400, safe to
|
|
// echo). Any other error is server-side: log the detail, return a
|
|
// generic 500 so internal paths / DB text never reach the client.
|
|
if errors.Is(err, volsnap.ErrNoSnapshotData) {
|
|
respondError(w, http.StatusBadRequest, err.Error())
|
|
return
|
|
}
|
|
slog.Error("snapshots: create", "workload", id, "error", err)
|
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusCreated, snap)
|
|
}
|
|
|
|
// deleteSnapshot handles DELETE /api/snapshots/{sid}.
|
|
func (s *Server) deleteSnapshot(w http.ResponseWriter, r *http.Request) {
|
|
if s.snapshotEngine == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
|
return
|
|
}
|
|
sid := chi.URLParam(r, "sid")
|
|
if err := s.snapshotEngine.Delete(sid); err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
respondError(w, http.StatusNotFound, "snapshot not found")
|
|
return
|
|
}
|
|
respondError(w, http.StatusInternalServerError, "failed to delete snapshot")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusOK, map[string]string{"status": "deleted"})
|
|
}
|
|
|
|
// restoreWorkloadSnapshot handles POST /api/workloads/{id}/snapshots/{sid}/restore.
|
|
//
|
|
// This is the most destructive workload action: it overwrites the app's live
|
|
// volume data with the snapshot and recreates its containers. It is guarded like
|
|
// the DB restore — admin-only, an X-Confirm-Restore header that must echo the
|
|
// snapshot id (defeats CSRF form/img posts, which can't set custom headers), and
|
|
// a per-workload single-flight so a double-click can't stack two restores. All
|
|
// the dangerous lock/stop/swap/redeploy logic lives in Engine.Restore; this
|
|
// handler only validates and delegates.
|
|
func (s *Server) restoreWorkloadSnapshot(w http.ResponseWriter, r *http.Request) {
|
|
if s.snapshotEngine == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
|
return
|
|
}
|
|
id := chi.URLParam(r, "id")
|
|
sid := chi.URLParam(r, "sid")
|
|
|
|
if confirm := r.Header.Get("X-Confirm-Restore"); confirm != sid {
|
|
respondError(w, http.StatusBadRequest,
|
|
"missing or mismatched X-Confirm-Restore header (must equal snapshot id)")
|
|
return
|
|
}
|
|
|
|
// Up-front validation for precise client errors (Engine.Restore re-checks
|
|
// ownership + source kind under the lock).
|
|
snap, err := s.snapshotEngine.Get(sid)
|
|
if err != nil {
|
|
respondError(w, http.StatusNotFound, "snapshot not found")
|
|
return
|
|
}
|
|
if snap.WorkloadID != id {
|
|
respondError(w, http.StatusBadRequest, "snapshot does not belong to this workload")
|
|
return
|
|
}
|
|
row, ok := s.loadWorkload(w, id)
|
|
if !ok {
|
|
return
|
|
}
|
|
if row.SourceKind != "image" {
|
|
respondError(w, http.StatusBadRequest, "restore is only supported for image-source workloads")
|
|
return
|
|
}
|
|
|
|
// Per-workload single-flight: reject a concurrent restore of the SAME
|
|
// workload with 409 rather than queuing it behind the deployer lock.
|
|
release, ok := s.volRestoreInFlight.TryLock(id)
|
|
if !ok {
|
|
respondError(w, http.StatusConflict, "a restore is already in progress for this workload")
|
|
return
|
|
}
|
|
defer release()
|
|
|
|
if err := s.snapshotEngine.Restore(r.Context(), sid, id); err != nil {
|
|
// Raw error (which can carry resolved host paths) stays in the log; the
|
|
// client gets a generic message.
|
|
slog.Error("snapshots: restore failed", "workload", id, "snapshot", sid, "error", err)
|
|
respondError(w, http.StatusInternalServerError, "restore failed; see server logs")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusOK, map[string]any{
|
|
"status": "restored",
|
|
"workload_id": id,
|
|
"snapshot_id": sid,
|
|
})
|
|
}
|
|
|
|
// downloadSnapshot handles GET /api/snapshots/{sid}/download, streaming the
|
|
// tar.gz archive. The resolved path is containment-checked against the
|
|
// snapshot directory.
|
|
func (s *Server) downloadSnapshot(w http.ResponseWriter, r *http.Request) {
|
|
if s.snapshotEngine == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
|
return
|
|
}
|
|
sid := chi.URLParam(r, "sid")
|
|
snap, err := s.snapshotEngine.Get(sid)
|
|
if err != nil {
|
|
respondError(w, http.StatusNotFound, "snapshot not found")
|
|
return
|
|
}
|
|
path, err := s.snapshotEngine.FilePath(snap)
|
|
if err != nil {
|
|
respondError(w, http.StatusForbidden, "access denied")
|
|
return
|
|
}
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
respondError(w, http.StatusNotFound, "snapshot file not found on disk")
|
|
return
|
|
}
|
|
defer f.Close()
|
|
stat, err := f.Stat()
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "failed to read snapshot file")
|
|
return
|
|
}
|
|
name := filepath.Base(snap.Filename)
|
|
w.Header().Set("Content-Type", "application/gzip")
|
|
w.Header().Set("Content-Disposition", "attachment; filename=\""+name+"\"")
|
|
http.ServeContent(w, r, name, stat.ModTime(), f)
|
|
}
|