feat(volsnap): volume snapshot restore (backlog #6)
Restore a captured volume snapshot onto an image workload's live host-bind
data volumes, then redeploy — the most destructive workload action, built to
the adversarially-reviewed design (C1–C6) with all data-loss guards.
- Engine.Restore (engine-owned): all-or-nothing pre-flight re-resolution from
the workload's CURRENT config (never the tamperable manifest), per-filesystem
disk pre-check, per-workload lock, container quiesce, extract-to-tmp, durable
pre-restore snapshot, write-ahead journal, atomic rename swap, redeploy, and
crash-recovery sweep (RecoverInterruptedRestores) wired before serving.
- internal/keyedmutex: shared per-key lock; deployer now serializes every
deploy entrypoint per workload via DispatchPlugin (+ LockWorkload/RedeployLocked
for the restore re-dispatch, no deadlock).
- Untrusted-archive extractor: zip-slip containment, type allow-list (reg/dir
only), decompression-bomb cap, manifest-index bounds.
- POST /api/workloads/{id}/snapshots/{sid}/restore: admin, X-Confirm-Restore
header (CSRF), per-workload single-flight (409).
- WebUI: Restore button + danger ConfirmDialog + busy state + i18n (en/ru).
Scope: image-source only; scopes absolute/stage/project (driven off the same
supportedScopes constant capture uses).
Plan-reviewed before coding; per-phase go/security/ts reviews; final review
READY TO MERGE. Security review caught + fixed a CRITICAL manifest-Source path
traversal (re-derive target from current config + base containment).
Plan: plans/volume-snapshot-restore/
This commit is contained in:
@@ -15,6 +15,18 @@ import (
|
||||
// operator enables auto_backup_before_deploy, a pre-deploy Tinyforge DB
|
||||
// snapshot is taken here, after the source resolves and before it runs.
|
||||
func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
||||
// C1: serialize all deploy-class work per workload. Held across the whole
|
||||
// deploy so a concurrent deploy/rollback/promote/trigger — or a volume
|
||||
// restore (which redeploys via RedeployLocked while holding this) — can
|
||||
// never interleave container changes for the same workload.
|
||||
unlock := d.workloadLocks.Lock(w.ID)
|
||||
defer unlock()
|
||||
return d.dispatchLocked(ctx, w, intent)
|
||||
}
|
||||
|
||||
// dispatchLocked is DispatchPlugin's body, assuming the per-workload lock is
|
||||
// already held. RedeployLocked calls it directly during restore.
|
||||
func (d *Deployer) dispatchLocked(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
||||
if err := d.beginDispatch(); err != nil {
|
||||
metrics.DeploysTotal.Inc(w.SourceKind, "rejected_draining")
|
||||
return err
|
||||
@@ -52,6 +64,12 @@ func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent
|
||||
// Used when a workload is deleted. Tracked via activeWg so Drain() honours
|
||||
// in-progress teardowns just like deploys.
|
||||
func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) error {
|
||||
// Teardown mutates the same containers/routes a deploy does, so it takes the
|
||||
// per-workload lock too (C1). Callers tear down distinct workload ids
|
||||
// sequentially (e.g. preview children then parent), never nested, so no
|
||||
// self-deadlock.
|
||||
unlock := d.workloadLocks.Lock(w.ID)
|
||||
defer unlock()
|
||||
if err := d.beginDispatch(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user