cba2149aa9
Wraps up the workload refactor with the fixes that came out of the multi-agent code review (see docs/plans/workload-refactor.md "What actually shipped"). Backend: - store.ReconcileContainer: separate write path so the 30s reconciler tick no longer overwrites deployer-owned fields (subdomain, proxy_route_id, npm_proxy_id, image_tag). - Container.stage_id column + index; ListProxyRoutes / ListContainersByStageID join via stage_id (survives stage rename), with legacy fallback to (project_id, role=stage_name). - Reconciler: workload-existence check (rejects forged tinyforge.workload.id labels), skips inventing project-kind rows, child-context cancel before wg.Wait() on shutdown. - Transactional CRUD across projects / stacks / static_sites: parent UPDATE and workload sync land in one transaction so secret rotations are durable. - Webhook routing reads exclusively through workloads.webhook_secret; legacy GetProjectByWebhookSecret / GetStaticSiteByWebhookSecret fallback removed. - store.GetStackByComposeProjectName + indexed lookup (no more full-table stack scan per compose container per tick). - store.ListMissingSweepRows: filtered query for the missing-sweep. - /api/instances/* handlers verify (workload_id, role) match URL (project_id, stage_name) before mutating — closes the cross-project hijack the security review flagged. - extra_json no longer referenced from Go (column kept on disk for now). Frontend: - WorkloadContainers.svelte: generic detail-page panel reusable by stack and site detail pages. - Containers page polish: client-side kind/state filters over an unfiltered fetch, URL-synced filters, race-safe loads via sequence number, EN+RU i18n, sidebar counter via navCounts.containers. Misc: - scripts/dev-server.sh: tolerate empty netstat grep result. - .gitignore: ignore docker-watcher binaries, .claude/worktrees/, .facts-sync.json.
211 lines
7.2 KiB
Go
211 lines
7.2 KiB
Go
package deployer
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
|
|
"github.com/alexei/tinyforge/internal/docker"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// blueGreenDeploy performs a zero-downtime deployment:
|
|
// 1. Start new container (green)
|
|
// 2. Health check green
|
|
// 3. Swap NPM proxy to point to green
|
|
// 4. Stop old container (blue)
|
|
//
|
|
// If the new container fails health check, it is removed and the old one stays.
|
|
func (d *Deployer) blueGreenDeploy(
|
|
ctx context.Context,
|
|
project store.Project,
|
|
stage store.Stage,
|
|
settings store.Settings,
|
|
deployID string,
|
|
imageTag string,
|
|
) (string, string, string, error) {
|
|
// Find existing running container for this stage (the "blue" container).
|
|
existing, err := d.store.ListContainersByStageID(stage.ID)
|
|
if err != nil {
|
|
return "", "", "", fmt.Errorf("get existing containers: %w", err)
|
|
}
|
|
|
|
var blueContainer *store.Container
|
|
for _, c := range existing {
|
|
if c.State == "running" {
|
|
cCopy := c
|
|
blueContainer = &cCopy
|
|
break
|
|
}
|
|
}
|
|
|
|
// Step 1: Pull image.
|
|
if err := d.store.UpdateDeployStatus(deployID, "pulling", ""); err != nil {
|
|
slog.Warn("update deploy status", "error", err)
|
|
}
|
|
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "pulling", "")
|
|
d.logDeploy(deployID, fmt.Sprintf("Blue-green: pulling image %s:%s", project.Image, imageTag), "info")
|
|
|
|
authConfig, err := d.buildRegistryAuth(project)
|
|
if err != nil {
|
|
return "", "", "", fmt.Errorf("build registry auth: %w", err)
|
|
}
|
|
|
|
if err := d.docker.PullImage(ctx, project.Image, imageTag, authConfig); err != nil {
|
|
return "", "", "", fmt.Errorf("pull image: %w", err)
|
|
}
|
|
d.logDeploy(deployID, "Image pulled successfully", "info")
|
|
|
|
// Step 2: Ensure network.
|
|
networkID, err := d.docker.EnsureNetwork(ctx, settings.Network)
|
|
if err != nil {
|
|
return "", "", "", fmt.Errorf("ensure network: %w", err)
|
|
}
|
|
|
|
// Step 3: Create and start green container.
|
|
if err := d.store.UpdateDeployStatus(deployID, "starting", ""); err != nil {
|
|
slog.Warn("update deploy status", "error", err)
|
|
}
|
|
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "starting", "")
|
|
|
|
instanceID := uuid.New().String()
|
|
subdomain := d.buildSubdomain(project, stage, settings, imageTag)
|
|
workloadID := d.resolveProjectWorkloadID(project.ID)
|
|
containerName := docker.ContainerName(project.Name, stage.Name, imageTag)
|
|
portStr := fmt.Sprintf("%d/tcp", project.Port)
|
|
envVars := d.mergeEnvVars(project, stage.ID)
|
|
mounts := d.computeVolumeMounts(project.ID, project.Name, stage.Name, imageTag, settings.BaseVolumePath)
|
|
|
|
containerCfg := docker.ContainerConfig{
|
|
Name: containerName,
|
|
Image: project.Image + ":" + imageTag,
|
|
Env: envVars,
|
|
ExposedPorts: []string{portStr},
|
|
NetworkName: settings.Network,
|
|
NetworkID: networkID,
|
|
WorkloadID: workloadID,
|
|
WorkloadKind: string(store.WorkloadKindProject),
|
|
Role: stage.Name,
|
|
Mounts: mounts,
|
|
CpuLimit: stage.CpuLimit,
|
|
MemoryLimit: stage.MemoryLimit,
|
|
}
|
|
|
|
// Set proxy labels for providers that use Docker labels (e.g., Traefik).
|
|
if stage.EnableProxy {
|
|
fqdn := subdomain + "." + settings.Domain
|
|
if proxyLabels := d.proxy.ContainerLabels(fqdn, project.Port); proxyLabels != nil {
|
|
if containerCfg.Labels == nil {
|
|
containerCfg.Labels = make(map[string]string)
|
|
}
|
|
for k, v := range proxyLabels {
|
|
containerCfg.Labels[k] = v
|
|
}
|
|
}
|
|
}
|
|
|
|
d.logDeploy(deployID, fmt.Sprintf("Blue-green: creating green container %s", containerName), "info")
|
|
containerID, err := d.docker.CreateContainer(ctx, containerCfg)
|
|
if err != nil {
|
|
return "", "", instanceID, fmt.Errorf("create container: %w", err)
|
|
}
|
|
|
|
// Create container row.
|
|
row, err := d.store.CreateContainer(store.Container{
|
|
ID: instanceID,
|
|
WorkloadID: workloadID,
|
|
WorkloadKind: string(store.WorkloadKindProject),
|
|
Role: stage.Name,
|
|
StageID: stage.ID,
|
|
ContainerID: containerID,
|
|
ImageRef: project.Image + ":" + imageTag,
|
|
ImageTag: imageTag,
|
|
Host: "local",
|
|
State: "stopped",
|
|
Port: project.Port,
|
|
Subdomain: subdomain,
|
|
})
|
|
if err != nil {
|
|
return containerID, "", instanceID, fmt.Errorf("create container row: %w", err)
|
|
}
|
|
instanceID = row.ID
|
|
|
|
if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil {
|
|
slog.Warn("link deploy to container", "error", err)
|
|
}
|
|
|
|
d.logDeploy(deployID, fmt.Sprintf("Blue-green: starting green container %s", containerName), "info")
|
|
if err := d.docker.StartContainer(ctx, containerID); err != nil {
|
|
return containerID, "", instanceID, fmt.Errorf("start container: %w", err)
|
|
}
|
|
|
|
if err := d.store.UpdateContainerState(instanceID, "running"); err != nil {
|
|
slog.Warn("update container state", "error", err)
|
|
}
|
|
row.State = "running"
|
|
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
|
|
|
|
// Step 4: Health check the green container.
|
|
if project.Healthcheck != "" {
|
|
if err := d.store.UpdateDeployStatus(deployID, "health_checking", ""); err != nil {
|
|
slog.Warn("update deploy status", "error", err)
|
|
}
|
|
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "health_checking", "")
|
|
|
|
healthURL := fmt.Sprintf("http://%s:%d%s", containerName, project.Port, project.Healthcheck)
|
|
d.logDeploy(deployID, fmt.Sprintf("Blue-green: health checking green at %s", healthURL), "info")
|
|
|
|
if err := d.health.Check(ctx, healthURL); err != nil {
|
|
return containerID, "", instanceID, fmt.Errorf("health check green: %w", err)
|
|
}
|
|
d.logDeploy(deployID, "Blue-green: green health check passed", "info")
|
|
}
|
|
|
|
// Step 5: Swap proxy to green.
|
|
var proxyRouteID string
|
|
if stage.EnableProxy {
|
|
if err := d.store.UpdateDeployStatus(deployID, "configuring_proxy", ""); err != nil {
|
|
slog.Warn("update deploy status", "error", err)
|
|
}
|
|
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "configuring_proxy", "")
|
|
|
|
accessListID := settings.NpmAccessListID
|
|
if project.NpmAccessListID > 0 {
|
|
accessListID = project.NpmAccessListID
|
|
}
|
|
|
|
proxyRouteID, err = d.configureProxy(ctx, deployID, settings, containerID, containerName, project.Port, subdomain, accessListID)
|
|
if err != nil {
|
|
return containerID, "", instanceID, fmt.Errorf("configure proxy: %w", err)
|
|
}
|
|
|
|
row.ProxyRouteID = proxyRouteID
|
|
d.logDeploy(deployID, "Blue-green: proxy swapped to green container", "info")
|
|
|
|
// Create/update DNS record for the green container.
|
|
fqdn := subdomain + "." + settings.Domain
|
|
d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID)
|
|
} else {
|
|
d.logDeploy(deployID, "Blue-green: proxy skipped (disabled for this stage)", "info")
|
|
}
|
|
|
|
row.Subdomain = subdomain
|
|
if err := d.store.UpdateContainer(row); err != nil {
|
|
slog.Warn("update container with proxy ID", "error", err)
|
|
}
|
|
|
|
// Step 6: Stop the blue container.
|
|
if blueContainer != nil {
|
|
d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue container %s (tag: %s)", blueContainer.ID, blueContainer.ImageTag), "info")
|
|
if err := d.removeContainer(ctx, *blueContainer, settings); err != nil {
|
|
// Non-fatal: log but continue. Green is already serving traffic.
|
|
d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue container: %v", err), "warn")
|
|
} else {
|
|
d.logDeploy(deployID, "Blue-green: blue container removed", "info")
|
|
}
|
|
}
|
|
|
|
return containerID, proxyRouteID, instanceID, nil
|
|
}
|