Files
alexei.dolgolyov e3d140c57a feat(deployer): configurable per-workload deploy strategy (blue-green for built sources)
Add a deploy_strategy field to each source's config blob — "" (default),
"recreate", or "blue-green" — validated in each source's Validate and read on
the deploy path. No new DB column, no migration: the field rides inside the
existing SourceConfig JSON and every existing workload decodes "" to its
historical behavior (image -> blue-green, others -> recreate).

The real gap this closes: dockerfile and static stopped the old container
before creating the new one on every redeploy — a downtime window image never
had. Their blue-green branch now:
- names the new "green" container with a unique suffix so it coexists with the
  still-serving blue (plumbed into both the container name AND the proxy
  forwardHost);
- skips the collision teardown that destroyed blue early;
- gates green — an HTTP readiness probe (deps.Health.Check) when a healthcheck
  is configured, else the existing liveness window;
- swaps the route via a pure upsert (no pre-DeleteRoute) so NPM repoints in
  place with no gap;
- persists green into the single runtime-state row BEFORE reaping blue, so a
  crash mid-swap can never orphan green or leave the row pointing at a removed
  container (state.go/teardown.go/reconcile.go stay untouched).

image honors explicit "recreate" (reap existing containers after pull, before
cutover); its default blue-green path is unchanged. compose stays
stack-managed and rejects "blue-green" at Validate so the contract is honest.
static forces recreate for storage-backed deno sites — blue-green would mount
the same RW volume into both containers at once.

Shared helper internal/workload/plugin/strategy.go (ValidateStrategy +
BuildGreenName). Backend-only (phase 1); the field is usable today via the
app's advanced-JSON editor — a friendly toggle + i18n follow in phase 2.
Tests: ValidateStrategy matrix, per-source Validate (incl. the empty-key
backward-compat lock), and effectiveStrategy defaults + the deno gate. Design
+ adversarial review: docs/plans/DEPLOY_STRATEGY_PLAN.md.
2026-06-19 16:51:20 +03:00

325 lines
12 KiB
Go

// Package compose implements the "compose" source: a docker-compose stack
// deployed as a single logical unit. Multiple service containers may
// result; each becomes one row in the containers index keyed by service
// name in Container.Role.
package compose
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/alexei/tinyforge/internal/stack"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// Config is the per-workload source config blob. ComposeYAML is the
// authoritative spec — either inline (manual / paste-in flow) or fetched
// by a git trigger and stashed here on each deploy. ComposeProjectName
// is the `-p` arg passed to docker compose; defaults to a stable
// workload-derived value when blank.
type Config struct {
ComposeYAML string `json:"compose_yaml"`
ComposeProjectName string `json:"compose_project_name"`
// DeployStrategy is accepted for parity with the other sources but a
// compose stack only supports recreate (docker compose up -d
// --remove-orphans). "" and "recreate" are honored; "blue-green" is
// rejected at Validate so the contract is honest in the UI rather than
// silently accepting a value compose can't deliver.
DeployStrategy string `json:"deploy_strategy,omitempty"`
}
type source struct{}
// composeRunner is the slice of stack.Compose this plugin actually
// drives. Defined locally per the "interfaces where they are used"
// idiom so the plugin can be unit-tested without a real docker compose
// binary. `*stack.Compose` satisfies it implicitly.
type composeRunner interface {
Up(ctx context.Context, projectName, yamlPath string) (string, error)
Down(ctx context.Context, projectName string, removeVolumes bool) (string, error)
Ps(ctx context.Context, projectName, yamlPath string) ([]stack.Service, error)
}
// newComposeRunner returns the runner the plugin should call. Tests
// swap this var with a fake; production code never touches it. The
// indirection costs one function-pointer dereference per Deploy /
// Teardown / Reconcile call — negligible against the docker compose
// exec it gates.
var newComposeRunner = func() composeRunner { return stack.NewCompose("") }
func init() { plugin.RegisterSource(&source{}) }
func (*source) Kind() string { return "compose" }
func (*source) SchemaSample() any {
return Config{
ComposeYAML: "services:\n web:\n image: nginx:alpine\n ports:\n - \"80\"\n",
}
}
func (*source) Validate(cfg json.RawMessage) error {
var c Config
if len(cfg) == 0 {
return fmt.Errorf("compose source: config is required")
}
if err := json.Unmarshal(cfg, &c); err != nil {
return fmt.Errorf("compose source: invalid json: %w", err)
}
if strings.TrimSpace(c.ComposeYAML) == "" {
return fmt.Errorf("compose source: compose_yaml is required")
}
// allowBlueGreen=false: a whole-stack blue-green is not implemented, so
// reject it here rather than silently running recreate.
if err := plugin.ValidateStrategy(c.DeployStrategy, false); err != nil {
return fmt.Errorf("compose source: %w", err)
}
spec, err := stack.Parse(c.ComposeYAML)
if err != nil {
return fmt.Errorf("compose source: parse yaml: %w", err)
}
if err := stack.Validate(spec); err != nil {
return fmt.Errorf("compose source: validate yaml: %w", err)
}
return nil
}
// Deploy writes the compose YAML to a stable per-workload path, runs
// `docker compose -p <project> up -d`, then syncs one Container row per
// service. The workload ID is the natural compose project name unless
// the user supplied one explicitly.
func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) (err error) {
cfg, err := plugin.SourceConfigOf[Config](w)
if err != nil {
return fmt.Errorf("compose source: decode config: %w", err)
}
if strings.TrimSpace(cfg.ComposeYAML) == "" {
return fmt.Errorf("compose source: workload %s has empty compose_yaml", w.ID)
}
// compose.Deploy has no idempotency short-circuit (no "already up"
// fast path that returns nil), so every call past config validation
// is a real deploy. Arm the terminal audit emit here — after pure
// config-validation errors above (kept quiet, mirroring the image
// plugin) but before any real work — so all real failures and the
// success are captured for the per-app timeline. err is the named
// return.
defer func() {
if err != nil {
// SECURITY: the compose.Up failure wraps raw `docker compose`
// combined output (which can include the deployed app's own
// stderr — potentially secrets). Deploy events are persisted
// indefinitely AND egress to operator webhooks (the global
// NotificationURL + event-trigger actions), so the emitted
// status must NOT carry that output. The full detail still
// reaches the server log + admin deploy result via the returned
// err; the timeline records only a generic, secret-free reason.
plugin.EmitDeployEvent(deps, w, "compose", "failed")
} else {
plugin.EmitDeployEvent(deps, w, "compose", "deployed")
}
}()
projectName := composeProjectName(cfg.ComposeProjectName, w)
yamlPath, err := writeYAML(w.ID, cfg.ComposeYAML)
if err != nil {
return fmt.Errorf("compose source: write yaml: %w", err)
}
compose := newComposeRunner()
out, err := compose.Up(ctx, projectName, yamlPath)
if err != nil {
return fmt.Errorf("compose source: docker compose up: %w (output: %s)", err, truncate(out, 1024))
}
if err := syncContainers(ctx, deps, compose, w, projectName, yamlPath); err != nil {
// `up` succeeded but we could not enumerate the resulting
// containers — surface the failure so the UI does not show an
// empty containers index for a running stack.
return fmt.Errorf("compose source: sync container rows: %w", err)
}
return nil
}
// Teardown runs `docker compose down --remove-orphans -v` and drops the
// container rows. Idempotent: missing compose project is treated as
// already-down. Volume removal is intentional — workload teardown is
// destructive by design (matches `DeleteStack(removeVolumes=true)`).
func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
cfg, _ := plugin.SourceConfigOf[Config](w)
projectName := composeProjectName(cfg.ComposeProjectName, w)
compose := newComposeRunner()
if _, err := compose.Down(ctx, projectName, true); err != nil {
// Log but proceed — the DB rows must not be orphaned.
slog.Warn("compose source: docker compose down", "workload", w.ID, "error", err)
}
// Best-effort: remove the YAML scratch dir.
_ = os.RemoveAll(workloadDir(w.ID))
rows, err := deps.Store.ListContainersByWorkload(w.ID)
if err != nil {
return fmt.Errorf("compose source: list containers: %w", err)
}
for _, c := range rows {
if err := deps.Store.DeleteContainer(c.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("compose source: delete container row", "id", c.ID, "error", err)
}
}
return nil
}
// Reconcile refreshes the containers index from `docker compose ps`. If
// the compose project is unknown to Docker, container rows are marked
// missing so the UI flags them. The reconciler hits this on every tick
// per workload, so the YAML is only rewritten when its content has
// actually changed.
func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
cfg, err := plugin.SourceConfigOf[Config](w)
if err != nil {
return fmt.Errorf("compose source: decode config: %w", err)
}
projectName := composeProjectName(cfg.ComposeProjectName, w)
yamlPath, _ := writeYAMLIfChanged(w.ID, cfg.ComposeYAML)
compose := newComposeRunner()
services, err := compose.Ps(ctx, projectName, yamlPath)
if err != nil {
// Likely no compose project running for this workload. Mark
// existing rows missing so the UI surfaces it.
rows, _ := deps.Store.ListContainersByWorkload(w.ID)
for _, c := range rows {
_ = deps.Store.UpdateContainerState(c.ID, "missing")
}
return nil
}
for _, svc := range services {
state := svc.State
if state == "" {
state = svc.Status
}
upsertServiceRow(deps, w, svc, state)
}
return nil
}
// syncContainers shares its body with Reconcile minus the missing-row
// fallback — Deploy expects compose ps to succeed since `up` just ran.
func syncContainers(ctx context.Context, deps plugin.Deps, compose composeRunner, w plugin.Workload, projectName, yamlPath string) error {
services, err := compose.Ps(ctx, projectName, yamlPath)
if err != nil {
return fmt.Errorf("compose ps: %w", err)
}
for _, svc := range services {
state := svc.State
if state == "" {
state = svc.Status
}
upsertServiceRow(deps, w, svc, state)
}
return nil
}
func upsertServiceRow(deps plugin.Deps, w plugin.Workload, svc stack.Service, state string) {
role := svc.Service
if role == "" {
role = svc.Name
}
if err := deps.Store.UpsertContainer(store.Container{
ID: w.ID + ":" + role,
WorkloadID: w.ID,
WorkloadKind: "compose",
Role: role,
ContainerID: "", // reconciler fills via `docker ps` label join
Host: "local",
State: state,
LastSeenAt: store.Now(),
}); err != nil {
slog.Warn("compose source: upsert container row", "workload", w.ID, "service", role, "error", err)
}
}
// composeProjectName returns the `-p` argument for docker compose. We
// always derive a stable name from the workload (sanitized + truncated
// ID) when the user did not set ComposeProjectName, so re-deploys of the
// same workload reuse the same project.
var projectNameSanitizer = regexp.MustCompile(`[^a-z0-9_-]`)
func composeProjectName(explicit string, w plugin.Workload) string {
if explicit != "" {
// Apply the same sanitizer to operator-supplied names so a value
// like "--foo" cannot reach the docker CLI and be re-parsed as a
// flag. Reuses the canonical lower+[^a-z0-9_-]→"-" + trim path.
san := strings.ToLower(explicit)
san = projectNameSanitizer.ReplaceAllString(san, "-")
san = strings.Trim(san, "-")
if san != "" {
return san
}
// Fall through to the derived name if sanitization stripped
// everything (operator passed e.g. "---" — degenerate input).
}
name := strings.ToLower(w.Name)
name = projectNameSanitizer.ReplaceAllString(name, "-")
name = strings.Trim(name, "-")
if name == "" {
name = "wkl"
}
idShort := w.ID
if len(idShort) > 8 {
idShort = idShort[:8]
}
return fmt.Sprintf("tf-%s-%s", name, idShort)
}
// workloadDir is the per-workload scratch directory for compose YAML.
func workloadDir(workloadID string) string {
return filepath.Join(os.TempDir(), "tinyforge-compose", workloadID)
}
// writeYAML writes the current compose YAML to a stable path under the
// workload's scratch dir. Returns the path. Each deploy overwrites the
// file — there are no revisions at the source level (the workload row is
// the single source of truth; git or registry triggers update SourceConfig).
//
// Permissions are owner-only (0o700 / 0o600) because the YAML often
// contains environment-section secrets and the dir lives in shared /tmp.
func writeYAML(workloadID, yamlText string) (string, error) {
dir := workloadDir(workloadID)
if err := os.MkdirAll(dir, 0o700); err != nil {
return "", err
}
path := filepath.Join(dir, "compose.yml")
if err := os.WriteFile(path, []byte(yamlText), 0o600); err != nil {
return "", err
}
return path, nil
}
// writeYAMLIfChanged is writeYAML minus the disk write when the existing
// file already matches yamlText. Used by Reconcile, which runs per
// workload per tick; redundant fsync churn was a measurable cost.
func writeYAMLIfChanged(workloadID, yamlText string) (string, error) {
dir := workloadDir(workloadID)
path := filepath.Join(dir, "compose.yml")
if existing, err := os.ReadFile(path); err == nil && string(existing) == yamlText {
return path, nil
}
return writeYAML(workloadID, yamlText)
}
func truncate(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "...(truncated)"
}