1c47030854
Restore a captured volume snapshot onto an image workload's live host-bind
data volumes, then redeploy — the most destructive workload action, built to
the adversarially-reviewed design (C1–C6) with all data-loss guards.
- Engine.Restore (engine-owned): all-or-nothing pre-flight re-resolution from
the workload's CURRENT config (never the tamperable manifest), per-filesystem
disk pre-check, per-workload lock, container quiesce, extract-to-tmp, durable
pre-restore snapshot, write-ahead journal, atomic rename swap, redeploy, and
crash-recovery sweep (RecoverInterruptedRestores) wired before serving.
- internal/keyedmutex: shared per-key lock; deployer now serializes every
deploy entrypoint per workload via DispatchPlugin (+ LockWorkload/RedeployLocked
for the restore re-dispatch, no deadlock).
- Untrusted-archive extractor: zip-slip containment, type allow-list (reg/dir
only), decompression-bomb cap, manifest-index bounds.
- POST /api/workloads/{id}/snapshots/{sid}/restore: admin, X-Confirm-Restore
header (CSRF), per-workload single-flight (409).
- WebUI: Restore button + danger ConfirmDialog + busy state + i18n (en/ru).
Scope: image-source only; scopes absolute/stage/project (driven off the same
supportedScopes constant capture uses).
Plan-reviewed before coding; per-phase go/security/ts reviews; final review
READY TO MERGE. Security review caught + fixed a CRITICAL manifest-Source path
traversal (re-derive target from current config + base containment).
Plan: plans/volume-snapshot-restore/
128 lines
5.1 KiB
Go
128 lines
5.1 KiB
Go
package deployer
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/alexei/tinyforge/internal/metrics"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
|
)
|
|
|
|
// DispatchPlugin routes a DeploymentIntent for w to the matching Source
|
|
// plugin. This is the unified deploy path for every source kind (the legacy
|
|
// executeDeploy pipeline was removed in the workload-first cutover). When the
|
|
// operator enables auto_backup_before_deploy, a pre-deploy Tinyforge DB
|
|
// snapshot is taken here, after the source resolves and before it runs.
|
|
func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
|
// C1: serialize all deploy-class work per workload. Held across the whole
|
|
// deploy so a concurrent deploy/rollback/promote/trigger — or a volume
|
|
// restore (which redeploys via RedeployLocked while holding this) — can
|
|
// never interleave container changes for the same workload.
|
|
unlock := d.workloadLocks.Lock(w.ID)
|
|
defer unlock()
|
|
return d.dispatchLocked(ctx, w, intent)
|
|
}
|
|
|
|
// dispatchLocked is DispatchPlugin's body, assuming the per-workload lock is
|
|
// already held. RedeployLocked calls it directly during restore.
|
|
func (d *Deployer) dispatchLocked(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
|
if err := d.beginDispatch(); err != nil {
|
|
metrics.DeploysTotal.Inc(w.SourceKind, "rejected_draining")
|
|
return err
|
|
}
|
|
defer d.activeWg.Done()
|
|
src, err := plugin.GetSource(w.SourceKind)
|
|
if err != nil {
|
|
// Unknown source: use the constant "unknown" sentinel for the
|
|
// label so a typo-spam attack can't grow the metrics map with
|
|
// one series per bogus source_kind. The actual user-supplied
|
|
// value still surfaces via the wrapped error / event log.
|
|
metrics.DeploysTotal.Inc("unknown", "unknown_source")
|
|
return fmt.Errorf("dispatch %s: %w", w.Name, err)
|
|
}
|
|
// Optional operator-enabled pre-deploy DB snapshot. Fail-open: never
|
|
// blocks shipping a deploy. Runs before any source-internal idempotency
|
|
// check (e.g. the image source's same-tag short-circuit), so a same-tag
|
|
// redeploy still snapshots — "backup before every deploy attempt".
|
|
d.maybeBackupBeforeDeploy(w.ID)
|
|
startedAt := store.Now()
|
|
err = src.Deploy(ctx, d.PluginDeps(), w, intent)
|
|
outcome := "success"
|
|
if err != nil {
|
|
outcome = "failure"
|
|
}
|
|
metrics.DeploysTotal.Inc(w.SourceKind, outcome)
|
|
// Append to the structured deploy ledger (powers the per-app history
|
|
// panel + rollback). Best-effort and secret-free; see recordDeployHistory.
|
|
// Only DispatchPlugin records — reconcile/teardown are not deploys.
|
|
d.recordDeployHistory(w, intent, outcome, err, startedAt)
|
|
return err
|
|
}
|
|
|
|
// DispatchTeardown routes a teardown call to the matching Source plugin.
|
|
// Used when a workload is deleted. Tracked via activeWg so Drain() honours
|
|
// in-progress teardowns just like deploys.
|
|
func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) error {
|
|
// Teardown mutates the same containers/routes a deploy does, so it takes the
|
|
// per-workload lock too (C1). Callers tear down distinct workload ids
|
|
// sequentially (e.g. preview children then parent), never nested, so no
|
|
// self-deadlock.
|
|
unlock := d.workloadLocks.Lock(w.ID)
|
|
defer unlock()
|
|
if err := d.beginDispatch(); err != nil {
|
|
return err
|
|
}
|
|
defer d.activeWg.Done()
|
|
src, err := plugin.GetSource(w.SourceKind)
|
|
if err != nil {
|
|
return fmt.Errorf("dispatch teardown %s: %w", w.Name, err)
|
|
}
|
|
return src.Teardown(ctx, d.PluginDeps(), w)
|
|
}
|
|
|
|
// DispatchReconcile routes a Reconcile call. Periodic reconciler iterates
|
|
// every Workload and calls this; idle Sources should make it a cheap
|
|
// no-op. Tracked via activeWg so a long-running reconcile blocks Drain().
|
|
func (d *Deployer) DispatchReconcile(ctx context.Context, w plugin.Workload) error {
|
|
if err := d.beginDispatch(); err != nil {
|
|
// Silent skip — reconcile is a periodic tick, not a user-initiated
|
|
// action, so we don't want to surface "draining" errors back to the
|
|
// reconciler loop. The next tick after restart will catch up. Routing
|
|
// through beginDispatch keeps the activeWg.Add atomic with the drain
|
|
// check (see Drain) instead of a bare shuttingDown.Load + Add race.
|
|
return nil
|
|
}
|
|
defer d.activeWg.Done()
|
|
src, err := plugin.GetSource(w.SourceKind)
|
|
if err != nil {
|
|
return fmt.Errorf("dispatch reconcile %s: %w", w.Name, err)
|
|
}
|
|
return src.Reconcile(ctx, d.PluginDeps(), w)
|
|
}
|
|
|
|
// PluginDeps captures the Deployer's existing dependencies in the bundle
|
|
// shape Sources expect. Reads d.dns under the RWMutex since proxy/DNS
|
|
// can be hot-swapped at runtime when settings change. Exported so the
|
|
// API layer can hand the same Deps to Trigger.Match — passing zero-Deps
|
|
// to triggers would silently nil-panic the moment any Trigger touches
|
|
// deps.Store / deps.Crypto for signature verification.
|
|
func (d *Deployer) PluginDeps() plugin.Deps {
|
|
d.dnsMu.RLock()
|
|
dnsProvider := d.dns
|
|
d.dnsMu.RUnlock()
|
|
d.proxyMu.RLock()
|
|
proxyProvider := d.proxy
|
|
d.proxyMu.RUnlock()
|
|
return plugin.Deps{
|
|
Store: d.store,
|
|
Docker: d.docker,
|
|
Proxy: proxyProvider,
|
|
DNS: dnsProvider,
|
|
Health: d.health,
|
|
Notifier: d.notifier,
|
|
Events: d.eventBus,
|
|
EncKey: d.encKey,
|
|
}
|
|
}
|