feat(volsnap): volume snapshot restore (backlog #6)
Restore a captured volume snapshot onto an image workload's live host-bind
data volumes, then redeploy — the most destructive workload action, built to
the adversarially-reviewed design (C1–C6) with all data-loss guards.
- Engine.Restore (engine-owned): all-or-nothing pre-flight re-resolution from
the workload's CURRENT config (never the tamperable manifest), per-filesystem
disk pre-check, per-workload lock, container quiesce, extract-to-tmp, durable
pre-restore snapshot, write-ahead journal, atomic rename swap, redeploy, and
crash-recovery sweep (RecoverInterruptedRestores) wired before serving.
- internal/keyedmutex: shared per-key lock; deployer now serializes every
deploy entrypoint per workload via DispatchPlugin (+ LockWorkload/RedeployLocked
for the restore re-dispatch, no deadlock).
- Untrusted-archive extractor: zip-slip containment, type allow-list (reg/dir
only), decompression-bomb cap, manifest-index bounds.
- POST /api/workloads/{id}/snapshots/{sid}/restore: admin, X-Confirm-Restore
header (CSRF), per-workload single-flight (409).
- WebUI: Restore button + danger ConfirmDialog + busy state + i18n (en/ru).
Scope: image-source only; scopes absolute/stage/project (driven off the same
supportedScopes constant capture uses).
Plan-reviewed before coding; per-phase go/security/ts reviews; final review
READY TO MERGE. Security review caught + fixed a CRITICAL manifest-Source path
traversal (re-derive target from current config + base containment).
Plan: plans/volume-snapshot-restore/
This commit is contained in:
@@ -0,0 +1,48 @@
|
||||
// Package keyedmutex provides a lazily-populated per-key mutex, so a critical
|
||||
// section can be serialized per key (e.g. per workload id) without a global
|
||||
// lock. It is the shared form of the pattern that originated inline in the
|
||||
// GitOps sync handler; the deployer (per-workload deploy serialization) and the
|
||||
// volume-snapshot restore single-flight both use it.
|
||||
package keyedmutex
|
||||
|
||||
import "sync"
|
||||
|
||||
// Mutex hands out one *sync.Mutex per key on demand. The zero value is ready to
|
||||
// use. The internal map only grows (one entry per distinct key ever locked),
|
||||
// which is bounded in practice by the number of workloads.
|
||||
type Mutex struct {
|
||||
mu sync.Mutex
|
||||
m map[string]*sync.Mutex
|
||||
}
|
||||
|
||||
func (k *Mutex) get(key string) *sync.Mutex {
|
||||
k.mu.Lock()
|
||||
defer k.mu.Unlock()
|
||||
if k.m == nil {
|
||||
k.m = make(map[string]*sync.Mutex)
|
||||
}
|
||||
mu, ok := k.m[key]
|
||||
if !ok {
|
||||
mu = &sync.Mutex{}
|
||||
k.m[key] = mu
|
||||
}
|
||||
return mu
|
||||
}
|
||||
|
||||
// Lock blocks until the mutex for key is acquired, then returns its unlock func.
|
||||
func (k *Mutex) Lock(key string) func() {
|
||||
mu := k.get(key)
|
||||
mu.Lock()
|
||||
return mu.Unlock
|
||||
}
|
||||
|
||||
// TryLock attempts to acquire the mutex for key without blocking. On success it
|
||||
// returns the unlock func and true; if the key is already locked it returns nil
|
||||
// and false so the caller can reject (e.g. HTTP 409) instead of queuing.
|
||||
func (k *Mutex) TryLock(key string) (func(), bool) {
|
||||
mu := k.get(key)
|
||||
if !mu.TryLock() {
|
||||
return nil, false
|
||||
}
|
||||
return mu.Unlock, true
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
package keyedmutex
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLockSerializesSameKey(t *testing.T) {
|
||||
var m Mutex
|
||||
unlock := m.Lock("a")
|
||||
|
||||
acquired := make(chan struct{})
|
||||
go func() {
|
||||
u := m.Lock("a")
|
||||
close(acquired)
|
||||
u()
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-acquired:
|
||||
t.Fatal("second Lock on the same key acquired while the first was held")
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
// expected: blocked
|
||||
}
|
||||
unlock()
|
||||
select {
|
||||
case <-acquired:
|
||||
// expected: now acquired
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("second Lock did not acquire after release")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockIndependentKeys(t *testing.T) {
|
||||
var m Mutex
|
||||
unlockA := m.Lock("a")
|
||||
defer unlockA()
|
||||
// A different key must not block.
|
||||
done := make(chan struct{})
|
||||
go func() { u := m.Lock("b"); u(); close(done) }()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("Lock on an independent key blocked")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTryLock(t *testing.T) {
|
||||
var m Mutex
|
||||
unlock, ok := m.TryLock("a")
|
||||
if !ok {
|
||||
t.Fatal("TryLock should succeed on a free key")
|
||||
}
|
||||
if _, ok := m.TryLock("a"); ok {
|
||||
t.Fatal("TryLock should fail while the key is held")
|
||||
}
|
||||
unlock()
|
||||
u2, ok := m.TryLock("a")
|
||||
if !ok {
|
||||
t.Fatal("TryLock should succeed after release")
|
||||
}
|
||||
u2()
|
||||
}
|
||||
|
||||
func TestConcurrentLockNoRace(t *testing.T) {
|
||||
var m Mutex
|
||||
var wg sync.WaitGroup
|
||||
counter := 0
|
||||
for i := 0; i < 50; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
u := m.Lock("shared")
|
||||
counter++ // protected by the keyed lock
|
||||
u()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
if counter != 50 {
|
||||
t.Errorf("counter = %d, want 50 (lost updates ⇒ lock not serializing)", counter)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user