feat(deployer): configurable per-workload deploy strategy (blue-green for built sources)
Add a deploy_strategy field to each source's config blob — "" (default), "recreate", or "blue-green" — validated in each source's Validate and read on the deploy path. No new DB column, no migration: the field rides inside the existing SourceConfig JSON and every existing workload decodes "" to its historical behavior (image -> blue-green, others -> recreate). The real gap this closes: dockerfile and static stopped the old container before creating the new one on every redeploy — a downtime window image never had. Their blue-green branch now: - names the new "green" container with a unique suffix so it coexists with the still-serving blue (plumbed into both the container name AND the proxy forwardHost); - skips the collision teardown that destroyed blue early; - gates green — an HTTP readiness probe (deps.Health.Check) when a healthcheck is configured, else the existing liveness window; - swaps the route via a pure upsert (no pre-DeleteRoute) so NPM repoints in place with no gap; - persists green into the single runtime-state row BEFORE reaping blue, so a crash mid-swap can never orphan green or leave the row pointing at a removed container (state.go/teardown.go/reconcile.go stay untouched). image honors explicit "recreate" (reap existing containers after pull, before cutover); its default blue-green path is unchanged. compose stays stack-managed and rejects "blue-green" at Validate so the contract is honest. static forces recreate for storage-backed deno sites — blue-green would mount the same RW volume into both containers at once. Shared helper internal/workload/plugin/strategy.go (ValidateStrategy + BuildGreenName). Backend-only (phase 1); the field is usable today via the app's advanced-JSON editor — a friendly toggle + i18n follow in phase 2. Tests: ValidateStrategy matrix, per-source Validate (incl. the empty-key backward-compat lock), and effectiveStrategy defaults + the deno gate. Design + adversarial review: docs/plans/DEPLOY_STRATEGY_PLAN.md.
This commit is contained in:
@@ -40,6 +40,21 @@ type Config struct {
|
||||
MemoryLimit int `json:"memory_limit"` // megabytes; 0 = unlimited
|
||||
DefaultTag string `json:"default_tag"` // tag used when intent.Reference is empty
|
||||
MaxInstances int `json:"max_instances"` // simultaneous containers to keep; 0/1 = strict blue-green
|
||||
// DeployStrategy selects how a redeploy cuts over. "" defaults to the
|
||||
// image source's native zero-downtime blue-green; "recreate" reaps the
|
||||
// old container before the new one comes up (opt-in downtime). Validated
|
||||
// via plugin.ValidateStrategy. Orthogonal to MaxInstances.
|
||||
DeployStrategy string `json:"deploy_strategy,omitempty"`
|
||||
}
|
||||
|
||||
// effectiveStrategy resolves the configured strategy for the image source.
|
||||
// Empty maps to blue-green so every existing image workload keeps its
|
||||
// current zero-downtime behavior byte-for-byte.
|
||||
func effectiveStrategy(cfg Config) string {
|
||||
if cfg.DeployStrategy == "" {
|
||||
return plugin.StrategyBlueGreen
|
||||
}
|
||||
return cfg.DeployStrategy
|
||||
}
|
||||
|
||||
// VolumeMount mirrors the existing store.Volume scope shape but as a flat
|
||||
@@ -88,6 +103,9 @@ func (*source) Validate(cfg json.RawMessage) error {
|
||||
if c.Port < 0 || c.Port > 65535 {
|
||||
return fmt.Errorf("image source: port must be 0-65535")
|
||||
}
|
||||
if err := plugin.ValidateStrategy(c.DeployStrategy, true); err != nil {
|
||||
return fmt.Errorf("image source: %w", err)
|
||||
}
|
||||
for i, v := range c.Volumes {
|
||||
if strings.TrimSpace(v.Target) == "" {
|
||||
return fmt.Errorf("image source: volumes[%d].target is required", i)
|
||||
@@ -189,6 +207,33 @@ func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload,
|
||||
return fmt.Errorf("image source: ensure network: %w", err)
|
||||
}
|
||||
|
||||
// recreate strategy (opt-in): tear down the existing containers BEFORE
|
||||
// the new one comes up — the operator chose a downtime window. The
|
||||
// default blue-green path skips this; its new container coexists with
|
||||
// the old and the proxy route swaps atomically (enforceMaxInstances
|
||||
// reaps the old AFTER cutover). Reaped here (after a successful pull, so
|
||||
// a pull failure doesn't take the workload down for nothing). On a
|
||||
// later create/health/route failure the recreate path has no blue to
|
||||
// fall back to — inherent to recreate, distinct from blue-green's
|
||||
// non-disruptive rollbackNew.
|
||||
if effectiveStrategy(cfg) == plugin.StrategyRecreate {
|
||||
for _, c := range existing {
|
||||
if c.ContainerID != "" {
|
||||
_ = deps.Docker.RemoveContainer(ctx, c.ContainerID, true)
|
||||
}
|
||||
if c.ProxyRouteID != "" {
|
||||
_ = deps.Proxy.DeleteRoute(ctx, c.ProxyRouteID)
|
||||
}
|
||||
if delErr := deps.Store.DeleteContainer(c.ID); delErr != nil && !errors.Is(delErr, store.ErrNotFound) {
|
||||
slog.Warn("image source: recreate reap old row", "workload", w.ID, "row", c.ID, "error", delErr)
|
||||
}
|
||||
}
|
||||
if len(existing) > 0 {
|
||||
slog.Info("image source: recreate strategy reaped old containers before cutover",
|
||||
"workload", w.ID, "count", len(existing))
|
||||
}
|
||||
}
|
||||
|
||||
// Unique-per-deploy name so the new container can run alongside the
|
||||
// old one. The suffix is monotonic ms; collisions are not a real
|
||||
// concern for human-driven or webhook-driven deploys.
|
||||
|
||||
Reference in New Issue
Block a user