package main import ( "context" "fmt" "log/slog" "time" "github.com/alexei/tinyforge/internal/deployer" "github.com/alexei/tinyforge/internal/docker" "github.com/alexei/tinyforge/internal/store" "github.com/alexei/tinyforge/internal/workload/plugin" ) // restoreStopTimeoutSeconds bounds the graceful-stop window per container during // a restore quiesce before Docker kills it. const restoreStopTimeoutSeconds = 10 // restoreLifecycle adapts the deployer + Docker client + store to the // volsnap.Lifecycle seam the volume-snapshot restore flow needs. It lives in the // composition root so the volsnap package stays decoupled from deployer/docker. type restoreLifecycle struct { dep *deployer.Deployer docker *docker.Client store *store.Store } // Lock takes the deployer's per-workload deploy lock so the restore serializes // against every deploy entrypoint (C1). func (l *restoreLifecycle) Lock(workloadID string) func() { return l.dep.LockWorkload(workloadID) } // StopContainers stops every running container for the workload (quiesce before // the volume swap, C4) and returns the image tag the newest running container // was on, so the redeploy brings the SAME version back up. ListContainersByWorkload // returns rows newest-first, so the first running row is the newest. func (l *restoreLifecycle) StopContainers(ctx context.Context, workloadID string) (string, error) { rows, err := l.store.ListContainersByWorkload(workloadID) if err != nil { return "", fmt.Errorf("list containers: %w", err) } tag := "" for _, c := range rows { if c.State != "running" || c.ContainerID == "" { continue } if tag == "" && c.ImageTag != "" { tag = c.ImageTag // newest running container's tag } if err := l.docker.StopContainer(ctx, c.ContainerID, restoreStopTimeoutSeconds); err != nil { return "", fmt.Errorf("stop container %s: %w", c.ContainerID, err) } if err := l.store.UpdateContainerState(c.ID, "stopped"); err != nil { slog.Warn("restore: mark container stopped", "container", c.ID, "error", err) } } return tag, nil } // Redeploy re-dispatches the workload via the deployer's unlocked path (the // restore already holds the per-workload lock). reference pins the image tag. func (l *restoreLifecycle) Redeploy(ctx context.Context, w store.Workload, reference string) error { intent := plugin.DeploymentIntent{ Reason: "restore", Reference: reference, Metadata: map[string]string{"note": "redeploy after volume snapshot restore"}, TriggeredAt: time.Now().UTC(), TriggeredBy: "restore", } return l.dep.RedeployLocked(ctx, plugin.WorkloadFromStore(w), intent) }