package deployer import ( "context" "fmt" "log/slog" "github.com/alexei/docker-watcher/internal/docker" "github.com/alexei/docker-watcher/internal/store" "github.com/google/uuid" ) // blueGreenDeploy performs a zero-downtime deployment: // 1. Start new container (green) // 2. Health check green // 3. Swap NPM proxy to point to green // 4. Stop old container (blue) // // If the new container fails health check, it is removed and the old one stays. func (d *Deployer) blueGreenDeploy( ctx context.Context, project store.Project, stage store.Stage, settings store.Settings, deployID string, imageTag string, ) (string, string, string, error) { // Find existing running instance for this stage (the "blue" instance). existingInstances, err := d.store.GetInstancesByStageID(stage.ID) if err != nil { return "", "", "", fmt.Errorf("get existing instances: %w", err) } var blueInstance *store.Instance for _, inst := range existingInstances { if inst.Status == "running" { instCopy := inst blueInstance = &instCopy break } } // Step 1: Pull image. if err := d.store.UpdateDeployStatus(deployID, "pulling", ""); err != nil { slog.Warn("update deploy status", "error", err) } d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "pulling", "") d.logDeploy(deployID, fmt.Sprintf("Blue-green: pulling image %s:%s", project.Image, imageTag), "info") authConfig, err := d.buildRegistryAuth(project) if err != nil { return "", "", "", fmt.Errorf("build registry auth: %w", err) } if err := d.docker.PullImage(ctx, project.Image, imageTag, authConfig); err != nil { return "", "", "", fmt.Errorf("pull image: %w", err) } d.logDeploy(deployID, "Image pulled successfully", "info") // Step 2: Ensure network. networkID, err := d.docker.EnsureNetwork(ctx, settings.Network) if err != nil { return "", "", "", fmt.Errorf("ensure network: %w", err) } // Step 3: Create and start green container. if err := d.store.UpdateDeployStatus(deployID, "starting", ""); err != nil { slog.Warn("update deploy status", "error", err) } d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "starting", "") instanceID := uuid.New().String() subdomain := d.buildSubdomain(project, stage, settings, imageTag) containerName := docker.ContainerName(project.Name, stage.Name, imageTag) portStr := fmt.Sprintf("%d/tcp", project.Port) envVars := d.mergeEnvVars(project, stage.ID) mounts := d.computeVolumeMounts(project.ID, project.Name, stage.Name, imageTag, settings.BaseVolumePath) containerCfg := docker.ContainerConfig{ Name: containerName, Image: project.Image + ":" + imageTag, Env: envVars, ExposedPorts: []string{portStr}, NetworkName: settings.Network, NetworkID: networkID, Project: project.Name, Stage: stage.Name, InstanceID: instanceID, Mounts: mounts, CpuLimit: stage.CpuLimit, MemoryLimit: stage.MemoryLimit, } // Set proxy labels for providers that use Docker labels (e.g., Traefik). if stage.EnableProxy { fqdn := subdomain + "." + settings.Domain if proxyLabels := d.proxy.ContainerLabels(fqdn, project.Port); proxyLabels != nil { if containerCfg.Labels == nil { containerCfg.Labels = make(map[string]string) } for k, v := range proxyLabels { containerCfg.Labels[k] = v } } } d.logDeploy(deployID, fmt.Sprintf("Blue-green: creating green container %s", containerName), "info") containerID, err := d.docker.CreateContainer(ctx, containerCfg) if err != nil { return "", "", instanceID, fmt.Errorf("create container: %w", err) } // Create instance record. inst, err := d.store.CreateInstanceWithID(store.Instance{ ID: instanceID, StageID: stage.ID, ProjectID: project.ID, ContainerID: containerID, ImageTag: imageTag, Subdomain: subdomain, Status: "stopped", Port: project.Port, }) if err != nil { return containerID, "", instanceID, fmt.Errorf("create instance record: %w", err) } instanceID = inst.ID if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil { slog.Warn("link deploy to instance", "error", err) } d.logDeploy(deployID, fmt.Sprintf("Blue-green: starting green container %s", containerName), "info") if err := d.docker.StartContainer(ctx, containerID); err != nil { return containerID, "", instanceID, fmt.Errorf("start container: %w", err) } if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil { slog.Warn("update instance status", "error", err) } d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running") // Step 4: Health check the green container. if project.Healthcheck != "" { if err := d.store.UpdateDeployStatus(deployID, "health_checking", ""); err != nil { slog.Warn("update deploy status", "error", err) } d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "health_checking", "") healthURL := fmt.Sprintf("http://%s:%d%s", containerName, project.Port, project.Healthcheck) d.logDeploy(deployID, fmt.Sprintf("Blue-green: health checking green at %s", healthURL), "info") if err := d.health.Check(ctx, healthURL); err != nil { return containerID, "", instanceID, fmt.Errorf("health check green: %w", err) } d.logDeploy(deployID, "Blue-green: green health check passed", "info") } // Step 5: Swap proxy to green. var proxyRouteID string if stage.EnableProxy { if err := d.store.UpdateDeployStatus(deployID, "configuring_proxy", ""); err != nil { slog.Warn("update deploy status", "error", err) } d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "configuring_proxy", "") accessListID := settings.NpmAccessListID if project.NpmAccessListID > 0 { accessListID = project.NpmAccessListID } proxyRouteID, err = d.configureProxy(ctx, deployID, settings, containerID, containerName, project.Port, subdomain, accessListID) if err != nil { return containerID, "", instanceID, fmt.Errorf("configure proxy: %w", err) } inst.ProxyRouteID = proxyRouteID d.logDeploy(deployID, "Blue-green: proxy swapped to green container", "info") // Create/update DNS record for the green instance. fqdn := subdomain + "." + settings.Domain d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID) } else { d.logDeploy(deployID, "Blue-green: proxy skipped (disabled for this stage)", "info") } inst.Subdomain = subdomain if err := d.store.UpdateInstance(inst); err != nil { slog.Warn("update instance with proxy ID", "error", err) } // Step 6: Stop the blue container. if blueInstance != nil { d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue instance %s (tag: %s)", blueInstance.ID, blueInstance.ImageTag), "info") if err := d.removeInstance(ctx, *blueInstance, settings); err != nil { // Non-fatal: log but continue. Green is already serving traffic. d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue instance: %v", err), "warn") } else { d.logDeploy(deployID, "Blue-green: blue instance removed", "info") } } return containerID, proxyRouteID, instanceID, nil }