Files
tiny-forge/internal/deployer/bluegreen.go
T
alexei.dolgolyov cba2149aa9 refactor(workload): finalize containers index + post-review hardening
Wraps up the workload refactor with the fixes that came out of the multi-agent
code review (see docs/plans/workload-refactor.md "What actually shipped").

Backend:
- store.ReconcileContainer: separate write path so the 30s reconciler tick no
  longer overwrites deployer-owned fields (subdomain, proxy_route_id,
  npm_proxy_id, image_tag).
- Container.stage_id column + index; ListProxyRoutes / ListContainersByStageID
  join via stage_id (survives stage rename), with legacy fallback to
  (project_id, role=stage_name).
- Reconciler: workload-existence check (rejects forged tinyforge.workload.id
  labels), skips inventing project-kind rows, child-context cancel before
  wg.Wait() on shutdown.
- Transactional CRUD across projects / stacks / static_sites: parent UPDATE
  and workload sync land in one transaction so secret rotations are durable.
- Webhook routing reads exclusively through workloads.webhook_secret; legacy
  GetProjectByWebhookSecret / GetStaticSiteByWebhookSecret fallback removed.
- store.GetStackByComposeProjectName + indexed lookup (no more full-table
  stack scan per compose container per tick).
- store.ListMissingSweepRows: filtered query for the missing-sweep.
- /api/instances/* handlers verify (workload_id, role) match URL
  (project_id, stage_name) before mutating — closes the cross-project
  hijack the security review flagged.
- extra_json no longer referenced from Go (column kept on disk for now).

Frontend:
- WorkloadContainers.svelte: generic detail-page panel reusable by stack and
  site detail pages.
- Containers page polish: client-side kind/state filters over an unfiltered
  fetch, URL-synced filters, race-safe loads via sequence number, EN+RU i18n,
  sidebar counter via navCounts.containers.

Misc:
- scripts/dev-server.sh: tolerate empty netstat grep result.
- .gitignore: ignore docker-watcher binaries, .claude/worktrees/, .facts-sync.json.
2026-05-09 15:44:41 +03:00

211 lines
7.2 KiB
Go

package deployer
import (
"context"
"fmt"
"log/slog"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/store"
"github.com/google/uuid"
)
// blueGreenDeploy performs a zero-downtime deployment:
// 1. Start new container (green)
// 2. Health check green
// 3. Swap NPM proxy to point to green
// 4. Stop old container (blue)
//
// If the new container fails health check, it is removed and the old one stays.
func (d *Deployer) blueGreenDeploy(
ctx context.Context,
project store.Project,
stage store.Stage,
settings store.Settings,
deployID string,
imageTag string,
) (string, string, string, error) {
// Find existing running container for this stage (the "blue" container).
existing, err := d.store.ListContainersByStageID(stage.ID)
if err != nil {
return "", "", "", fmt.Errorf("get existing containers: %w", err)
}
var blueContainer *store.Container
for _, c := range existing {
if c.State == "running" {
cCopy := c
blueContainer = &cCopy
break
}
}
// Step 1: Pull image.
if err := d.store.UpdateDeployStatus(deployID, "pulling", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "pulling", "")
d.logDeploy(deployID, fmt.Sprintf("Blue-green: pulling image %s:%s", project.Image, imageTag), "info")
authConfig, err := d.buildRegistryAuth(project)
if err != nil {
return "", "", "", fmt.Errorf("build registry auth: %w", err)
}
if err := d.docker.PullImage(ctx, project.Image, imageTag, authConfig); err != nil {
return "", "", "", fmt.Errorf("pull image: %w", err)
}
d.logDeploy(deployID, "Image pulled successfully", "info")
// Step 2: Ensure network.
networkID, err := d.docker.EnsureNetwork(ctx, settings.Network)
if err != nil {
return "", "", "", fmt.Errorf("ensure network: %w", err)
}
// Step 3: Create and start green container.
if err := d.store.UpdateDeployStatus(deployID, "starting", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "starting", "")
instanceID := uuid.New().String()
subdomain := d.buildSubdomain(project, stage, settings, imageTag)
workloadID := d.resolveProjectWorkloadID(project.ID)
containerName := docker.ContainerName(project.Name, stage.Name, imageTag)
portStr := fmt.Sprintf("%d/tcp", project.Port)
envVars := d.mergeEnvVars(project, stage.ID)
mounts := d.computeVolumeMounts(project.ID, project.Name, stage.Name, imageTag, settings.BaseVolumePath)
containerCfg := docker.ContainerConfig{
Name: containerName,
Image: project.Image + ":" + imageTag,
Env: envVars,
ExposedPorts: []string{portStr},
NetworkName: settings.Network,
NetworkID: networkID,
WorkloadID: workloadID,
WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name,
Mounts: mounts,
CpuLimit: stage.CpuLimit,
MemoryLimit: stage.MemoryLimit,
}
// Set proxy labels for providers that use Docker labels (e.g., Traefik).
if stage.EnableProxy {
fqdn := subdomain + "." + settings.Domain
if proxyLabels := d.proxy.ContainerLabels(fqdn, project.Port); proxyLabels != nil {
if containerCfg.Labels == nil {
containerCfg.Labels = make(map[string]string)
}
for k, v := range proxyLabels {
containerCfg.Labels[k] = v
}
}
}
d.logDeploy(deployID, fmt.Sprintf("Blue-green: creating green container %s", containerName), "info")
containerID, err := d.docker.CreateContainer(ctx, containerCfg)
if err != nil {
return "", "", instanceID, fmt.Errorf("create container: %w", err)
}
// Create container row.
row, err := d.store.CreateContainer(store.Container{
ID: instanceID,
WorkloadID: workloadID,
WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name,
StageID: stage.ID,
ContainerID: containerID,
ImageRef: project.Image + ":" + imageTag,
ImageTag: imageTag,
Host: "local",
State: "stopped",
Port: project.Port,
Subdomain: subdomain,
})
if err != nil {
return containerID, "", instanceID, fmt.Errorf("create container row: %w", err)
}
instanceID = row.ID
if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil {
slog.Warn("link deploy to container", "error", err)
}
d.logDeploy(deployID, fmt.Sprintf("Blue-green: starting green container %s", containerName), "info")
if err := d.docker.StartContainer(ctx, containerID); err != nil {
return containerID, "", instanceID, fmt.Errorf("start container: %w", err)
}
if err := d.store.UpdateContainerState(instanceID, "running"); err != nil {
slog.Warn("update container state", "error", err)
}
row.State = "running"
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
// Step 4: Health check the green container.
if project.Healthcheck != "" {
if err := d.store.UpdateDeployStatus(deployID, "health_checking", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "health_checking", "")
healthURL := fmt.Sprintf("http://%s:%d%s", containerName, project.Port, project.Healthcheck)
d.logDeploy(deployID, fmt.Sprintf("Blue-green: health checking green at %s", healthURL), "info")
if err := d.health.Check(ctx, healthURL); err != nil {
return containerID, "", instanceID, fmt.Errorf("health check green: %w", err)
}
d.logDeploy(deployID, "Blue-green: green health check passed", "info")
}
// Step 5: Swap proxy to green.
var proxyRouteID string
if stage.EnableProxy {
if err := d.store.UpdateDeployStatus(deployID, "configuring_proxy", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "configuring_proxy", "")
accessListID := settings.NpmAccessListID
if project.NpmAccessListID > 0 {
accessListID = project.NpmAccessListID
}
proxyRouteID, err = d.configureProxy(ctx, deployID, settings, containerID, containerName, project.Port, subdomain, accessListID)
if err != nil {
return containerID, "", instanceID, fmt.Errorf("configure proxy: %w", err)
}
row.ProxyRouteID = proxyRouteID
d.logDeploy(deployID, "Blue-green: proxy swapped to green container", "info")
// Create/update DNS record for the green container.
fqdn := subdomain + "." + settings.Domain
d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID)
} else {
d.logDeploy(deployID, "Blue-green: proxy skipped (disabled for this stage)", "info")
}
row.Subdomain = subdomain
if err := d.store.UpdateContainer(row); err != nil {
slog.Warn("update container with proxy ID", "error", err)
}
// Step 6: Stop the blue container.
if blueContainer != nil {
d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue container %s (tag: %s)", blueContainer.ID, blueContainer.ImageTag), "info")
if err := d.removeContainer(ctx, *blueContainer, settings); err != nil {
// Non-fatal: log but continue. Green is already serving traffic.
d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue container: %v", err), "warn")
} else {
d.logDeploy(deployID, "Blue-green: blue container removed", "info")
}
}
return containerID, proxyRouteID, instanceID, nil
}