Files
tiny-forge/internal/deployer/deployer.go
T
alexei.dolgolyov e94c4f9116 feat: optional NPM proxy per stage
Add enable_proxy boolean to stages (default true). When disabled,
the deployer skips NPM proxy host creation — useful for internal
services, workers, or externally-routed containers. UI shows
toggle in Add Stage form and "No Proxy" badge on stage header.
2026-03-29 12:58:13 +03:00

700 lines
23 KiB
Go

package deployer
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"path/filepath"
"sort"
"sync"
"sync/atomic"
"github.com/alexei/docker-watcher/internal/crypto"
"github.com/alexei/docker-watcher/internal/docker"
"github.com/alexei/docker-watcher/internal/events"
"github.com/alexei/docker-watcher/internal/health"
"github.com/alexei/docker-watcher/internal/notify"
"github.com/alexei/docker-watcher/internal/npm"
"github.com/alexei/docker-watcher/internal/store"
"github.com/moby/moby/api/types/mount"
"github.com/google/uuid"
)
// Deployer orchestrates the full deployment flow: pull image, create container,
// start, configure proxy, health check, and handle rollback on failure.
// It implements both webhook.DeployTriggerer and registry.DeployTriggerer.
type Deployer struct {
docker *docker.Client
npm *npm.Client
store *store.Store
health *health.Checker
notifier *notify.Notifier
eventBus EventPublisher
encKey [32]byte
// Graceful shutdown: tracks in-progress deploys.
activeWg sync.WaitGroup
shuttingDown atomic.Bool
}
// EventPublisher is the interface for publishing events to the event bus.
type EventPublisher interface {
Publish(evt events.Event)
}
// New creates a new Deployer with all required dependencies.
func New(
dockerClient *docker.Client,
npmClient *npm.Client,
st *store.Store,
checker *health.Checker,
notifier *notify.Notifier,
eventBus EventPublisher,
encKey [32]byte,
) *Deployer {
return &Deployer{
docker: dockerClient,
npm: npmClient,
store: st,
health: checker,
notifier: notifier,
eventBus: eventBus,
encKey: encKey,
}
}
// Drain waits for all in-progress deploys to complete. Call this during graceful shutdown.
func (d *Deployer) Drain() {
d.shuttingDown.Store(true)
slog.Info("deployer: draining in-progress deploys")
d.activeWg.Wait()
slog.Info("deployer: all deploys drained")
}
// AsyncTriggerDeploy creates a deploy record and returns the deploy ID immediately,
// then runs the full deploy pipeline in a background goroutine. Use this from HTTP handlers
// to avoid blocking the request. Progress is streamed via SSE.
func (d *Deployer) AsyncTriggerDeploy(ctx context.Context, projectID, stageID, imageTag string) (string, error) {
if d.shuttingDown.Load() {
return "", fmt.Errorf("deployer is shutting down, rejecting new deploy")
}
// Validate inputs synchronously so the caller gets immediate feedback.
project, err := d.store.GetProjectByID(projectID)
if err != nil {
return "", fmt.Errorf("get project: %w", err)
}
stage, err := d.store.GetStageByID(stageID)
if err != nil {
return "", fmt.Errorf("get stage: %w", err)
}
if err := d.validatePromoteFrom(stage, imageTag); err != nil {
return "", fmt.Errorf("promote validation: %w", err)
}
// Create deploy record synchronously so caller gets the ID.
deploy, err := d.store.CreateDeploy(store.Deploy{
ProjectID: projectID,
StageID: stageID,
ImageTag: imageTag,
Status: "pending",
})
if err != nil {
return "", fmt.Errorf("create deploy record: %w", err)
}
// Run the actual deploy in the background.
d.activeWg.Add(1)
go func() {
defer d.activeWg.Done()
// Use a detached context so client disconnect doesn't abort the deploy.
bgCtx := context.Background()
if err := d.runDeploy(bgCtx, project, stage, deploy.ID, imageTag); err != nil {
slog.Error("async deploy failed", "deploy_id", deploy.ID, "error", err)
}
}()
return deploy.ID, nil
}
// runDeploy is the internal deploy pipeline used by AsyncTriggerDeploy.
// It assumes the deploy record already exists and project/stage are validated.
func (d *Deployer) runDeploy(ctx context.Context, project store.Project, stage store.Stage, deployID string, imageTag string) error {
settings, err := d.store.GetSettings()
if err != nil {
if updateErr := d.store.UpdateDeployStatus(deployID, "failed", err.Error()); updateErr != nil {
slog.Warn("update deploy status", "error", updateErr)
}
return fmt.Errorf("get settings: %w", err)
}
slog.Info("starting deploy",
"deploy_id", deployID,
"project", project.Name,
"stage", stage.Name,
"tag", imageTag,
)
d.logDeploy(deployID, fmt.Sprintf("Starting deploy of %s:%s for project %s, stage %s", project.Image, imageTag, project.Name, stage.Name), "info")
// Enforce max_instances before deploying.
if err := d.enforceMaxInstances(ctx, stage, deployID, settings); err != nil {
d.logDeploy(deployID, fmt.Sprintf("Failed to enforce max instances: %v", err), "error")
}
var containerID string
var npmProxyID int
var instanceID string
var deployErr error
if stage.MaxInstances == 1 {
containerID, npmProxyID, instanceID, deployErr = d.blueGreenDeploy(ctx, project, stage, settings, deployID, imageTag)
} else {
containerID, npmProxyID, instanceID, deployErr = d.executeDeploy(ctx, project, stage, settings, deployID, imageTag)
}
if deployErr != nil {
d.logDeploy(deployID, fmt.Sprintf("Deploy failed: %v", deployErr), "error")
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "failed", deployErr.Error())
d.rollback(ctx, deployID, containerID, npmProxyID, instanceID)
d.notifier.Send(settings.NotificationURL, notify.Event{
Type: "deploy_failure",
Project: project.Name,
Stage: stage.Name,
ImageTag: imageTag,
Error: deployErr.Error(),
})
return fmt.Errorf("deploy failed: %w", deployErr)
}
if err := d.store.UpdateDeployStatus(deployID, "success", ""); err != nil {
slog.Warn("update deploy status to success", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "success", "")
subdomain := d.buildSubdomain(project, stage, settings, imageTag)
fullURL := fmt.Sprintf("https://%s.%s", subdomain, settings.Domain)
d.logDeploy(deployID, fmt.Sprintf("Deploy successful: %s", fullURL), "info")
d.notifier.Send(settings.NotificationURL, notify.Event{
Type: "deploy_success",
Project: project.Name,
Stage: stage.Name,
ImageTag: imageTag,
Subdomain: subdomain,
URL: fullURL,
})
return nil
}
// TriggerDeploy is the synchronous entry point for deployments (used by poller and webhook).
// It validates inputs, creates a deploy record, and delegates to runDeploy.
func (d *Deployer) TriggerDeploy(ctx context.Context, projectID, stageID, imageTag string) error {
if d.shuttingDown.Load() {
return fmt.Errorf("deployer is shutting down, rejecting new deploy")
}
d.activeWg.Add(1)
defer d.activeWg.Done()
project, err := d.store.GetProjectByID(projectID)
if err != nil {
return fmt.Errorf("get project: %w", err)
}
stage, err := d.store.GetStageByID(stageID)
if err != nil {
return fmt.Errorf("get stage: %w", err)
}
if err := d.validatePromoteFrom(stage, imageTag); err != nil {
return fmt.Errorf("promote validation: %w", err)
}
deploy, err := d.store.CreateDeploy(store.Deploy{
ProjectID: projectID,
StageID: stageID,
ImageTag: imageTag,
Status: "pending",
})
if err != nil {
return fmt.Errorf("create deploy record: %w", err)
}
if err := d.runDeploy(ctx, project, stage, deploy.ID, imageTag); err != nil {
return err
}
return nil
}
// executeDeploy runs the deploy pipeline steps and returns rollback-relevant state.
// It returns (containerID, npmProxyID, instanceID, error).
func (d *Deployer) executeDeploy(
ctx context.Context,
project store.Project,
stage store.Stage,
settings store.Settings,
deployID string,
imageTag string,
) (string, int, string, error) {
var containerID string
var npmProxyID int
var instanceID string
// Step 1: Pull image.
if err := d.store.UpdateDeployStatus(deployID, "pulling", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "pulling", "")
d.logDeploy(deployID, fmt.Sprintf("Pulling image %s:%s", project.Image, imageTag), "info")
authConfig, err := d.buildRegistryAuth(project)
if err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("build registry auth: %w", err)
}
if err := d.docker.PullImage(ctx, project.Image, imageTag, authConfig); err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("pull image: %w", err)
}
d.logDeploy(deployID, "Image pulled successfully", "info")
// Step 2: Ensure network exists.
networkID, err := d.docker.EnsureNetwork(ctx, settings.Network)
if err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("ensure network: %w", err)
}
d.logDeploy(deployID, fmt.Sprintf("Network %s ready (ID: %s)", settings.Network, truncateID(networkID)), "info")
// Step 3: Create and start container.
if err := d.store.UpdateDeployStatus(deployID, "starting", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "starting", "")
// Pre-generate instance ID so it can be set as a container label.
instanceID = uuid.New().String()
subdomain := d.buildSubdomain(project, stage, settings, imageTag)
containerName := docker.ContainerName(project.Name, stage.Name, imageTag)
portStr := fmt.Sprintf("%d/tcp", project.Port)
envVars := d.mergeEnvVars(project, stage.ID)
mounts := d.computeVolumeMounts(project.ID, stage.Name, imageTag, settings.BaseVolumePath)
containerCfg := docker.ContainerConfig{
Name: containerName,
Image: project.Image + ":" + imageTag,
Env: envVars,
ExposedPorts: []string{portStr},
NetworkName: settings.Network,
NetworkID: networkID,
Project: project.Name,
Stage: stage.Name,
InstanceID: instanceID,
Mounts: mounts,
}
d.logDeploy(deployID, fmt.Sprintf("Creating container %s", containerName), "info")
containerID, err = d.docker.CreateContainer(ctx, containerCfg)
if err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("create container: %w", err)
}
d.logDeploy(deployID, fmt.Sprintf("Container created (ID: %s)", truncateID(containerID)), "info")
// Create instance record in store with the pre-generated ID.
inst, err := d.store.CreateInstanceWithID(store.Instance{
ID: instanceID,
StageID: stage.ID,
ProjectID: project.ID,
ContainerID: containerID,
ImageTag: imageTag,
Subdomain: subdomain,
Status: "stopped",
Port: project.Port,
})
if err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("create instance record: %w", err)
}
instanceID = inst.ID
// Link deploy to instance.
if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil {
slog.Warn("link deploy to instance", "error", err)
}
d.logDeploy(deployID, fmt.Sprintf("Starting container %s", containerName), "info")
if err := d.docker.StartContainer(ctx, containerID); err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("start container: %w", err)
}
if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil {
slog.Warn("update instance status to running", "error", err)
}
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
d.logDeploy(deployID, "Container started", "info")
// Step 4: Configure NPM proxy (optional per stage).
if stage.EnableProxy {
if err := d.store.UpdateDeployStatus(deployID, "configuring_proxy", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "configuring_proxy", "")
npmProxyID, err = d.configureProxy(ctx, deployID, settings, containerName, project.Port, subdomain)
if err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("configure proxy: %w", err)
}
// Update instance with NPM proxy ID.
inst.NpmProxyID = npmProxyID
inst.Subdomain = subdomain
if err := d.store.UpdateInstance(inst); err != nil {
slog.Warn("update instance with proxy ID", "error", err)
}
} else {
d.logDeploy(deployID, "Proxy creation skipped (disabled for this stage)", "info")
inst.Subdomain = subdomain
if err := d.store.UpdateInstance(inst); err != nil {
slog.Warn("update instance", "error", err)
}
}
// Step 5: Health check.
if project.Healthcheck != "" {
if err := d.store.UpdateDeployStatus(deployID, "health_checking", ""); err != nil {
slog.Warn("update deploy status", "error", err)
}
d.publishDeployStatus(deployID, project.ID, stage.ID, imageTag, "health_checking", "")
healthURL := fmt.Sprintf("http://%s:%d%s", containerName, project.Port, project.Healthcheck)
d.logDeploy(deployID, fmt.Sprintf("Running health check: %s", healthURL), "info")
if err := d.health.Check(ctx, healthURL); err != nil {
return containerID, npmProxyID, instanceID, fmt.Errorf("health check: %w", err)
}
d.logDeploy(deployID, "Health check passed", "info")
} else {
d.logDeploy(deployID, "No health check configured, skipping", "info")
}
return containerID, npmProxyID, instanceID, nil
}
// configureProxy creates or updates an NPM proxy host for the deployed container.
// It authenticates to NPM using credentials from settings, then creates the proxy.
// Returns the NPM proxy host ID.
func (d *Deployer) configureProxy(
ctx context.Context,
deployID string,
settings store.Settings,
containerName string,
containerPort int,
subdomain string,
) (int, error) {
// Authenticate to NPM.
npmPassword, err := d.decryptNpmPassword(settings.NpmPassword)
if err != nil {
return 0, fmt.Errorf("decrypt npm password: %w", err)
}
if err := d.npm.Authenticate(ctx, settings.NpmEmail, npmPassword); err != nil {
return 0, fmt.Errorf("authenticate to npm: %w", err)
}
fqdn := subdomain + "." + settings.Domain
d.logDeploy(deployID, fmt.Sprintf("Configuring proxy: %s -> %s:%d", fqdn, containerName, containerPort), "info")
// Check if a proxy host already exists for this domain.
existing, found, err := d.npm.FindProxyHostByDomain(ctx, fqdn)
if err != nil {
return 0, fmt.Errorf("find existing proxy host: %w", err)
}
proxyConfig := npm.ProxyHostConfig{
DomainNames: []string{fqdn},
ForwardScheme: "http",
ForwardHost: containerName,
ForwardPort: containerPort,
BlockExploits: true,
AllowWebsocket: true,
HTTP2Support: true,
Meta: npm.Meta{},
Locations: []any{},
}
if found {
d.logDeploy(deployID, fmt.Sprintf("Updating existing proxy host %d for %s", existing.ID, fqdn), "info")
host, err := d.npm.UpdateProxyHost(ctx, existing.ID, proxyConfig)
if err != nil {
return 0, fmt.Errorf("update proxy host: %w", err)
}
d.logDeploy(deployID, "Proxy host updated", "info")
return host.ID, nil
}
d.logDeploy(deployID, fmt.Sprintf("Creating new proxy host for %s", fqdn), "info")
host, err := d.npm.CreateProxyHost(ctx, proxyConfig)
if err != nil {
return 0, fmt.Errorf("create proxy host: %w", err)
}
d.logDeploy(deployID, fmt.Sprintf("Proxy host created (ID: %d)", host.ID), "info")
return host.ID, nil
}
// enforceMaxInstances removes the oldest instances when the stage has reached its limit.
// This makes room for the new deployment.
func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, deployID string, settings store.Settings) error {
if stage.MaxInstances <= 0 {
return nil
}
instances, err := d.store.GetInstancesByStageID(stage.ID)
if err != nil {
return fmt.Errorf("get instances for stage: %w", err)
}
// Filter to running/stopped instances (not already failed/removing).
var active []store.Instance
for _, inst := range instances {
if inst.Status == "running" || inst.Status == "stopped" {
active = append(active, inst)
}
}
// We need room for one more instance, so remove oldest when at limit.
removeCount := len(active) - stage.MaxInstances + 1
if removeCount <= 0 {
return nil
}
// Sort by created_at ascending (oldest first).
sort.Slice(active, func(i, j int) bool {
return active[i].CreatedAt < active[j].CreatedAt
})
for i := 0; i < removeCount && i < len(active); i++ {
inst := active[i]
d.logDeploy(deployID, fmt.Sprintf("Removing oldest instance %s (tag: %s) to enforce max_instances=%d", inst.ID, inst.ImageTag, stage.MaxInstances), "info")
if err := d.removeInstance(ctx, inst, settings); err != nil {
d.logDeploy(deployID, fmt.Sprintf("Failed to remove instance %s: %v", inst.ID, err), "warn")
continue
}
d.logDeploy(deployID, fmt.Sprintf("Removed instance %s", inst.ID), "info")
}
return nil
}
// removeInstance stops and removes a container, deletes its NPM proxy host,
// and removes the instance record from the store.
func (d *Deployer) removeInstance(ctx context.Context, inst store.Instance, settings store.Settings) error {
// Mark as removing.
if err := d.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil {
slog.Warn("update instance status to removing", "instance_id", inst.ID, "error", err)
}
// Remove Docker container.
if inst.ContainerID != "" {
if err := d.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil {
slog.Warn("remove container", "container_id", inst.ContainerID, "error", err)
}
}
// Delete NPM proxy host.
if inst.NpmProxyID > 0 {
npmPassword, err := d.decryptNpmPassword(settings.NpmPassword)
if err != nil {
slog.Warn("decrypt npm password for proxy cleanup", "error", err)
} else if authErr := d.npm.Authenticate(ctx, settings.NpmEmail, npmPassword); authErr != nil {
slog.Warn("authenticate npm for proxy cleanup", "error", authErr)
} else if delErr := d.npm.DeleteProxyHost(ctx, inst.NpmProxyID); delErr != nil {
slog.Warn("delete proxy host", "proxy_id", inst.NpmProxyID, "error", delErr)
}
}
// Delete instance record.
if err := d.store.DeleteInstance(inst.ID); err != nil {
return fmt.Errorf("delete instance record: %w", err)
}
return nil
}
// buildSubdomain generates the subdomain for an instance based on settings and stage config.
func (d *Deployer) buildSubdomain(project store.Project, stage store.Stage, settings store.Settings, imageTag string) string {
return GenerateTaggedSubdomain(settings.SubdomainPattern, project.Name, stage.Name, imageTag, stage.Subdomain)
}
// buildRegistryAuth constructs the Docker registry auth string for pulling images.
// If the project has a registry configured, it looks up the registry token.
func (d *Deployer) buildRegistryAuth(project store.Project) (string, error) {
if project.Registry == "" {
return "", nil
}
reg, err := d.store.GetRegistryByName(project.Registry)
if err != nil {
return "", fmt.Errorf("get registry %s: %w", project.Registry, err)
}
if reg.Token != "" {
decrypted, err := crypto.Decrypt(d.encKey, reg.Token)
if err != nil {
return "", fmt.Errorf("decrypt registry token: %w", err)
}
return docker.EncodeRegistryAuth(decrypted, decrypted, reg.URL)
}
return "", nil
}
// decryptNpmPassword decrypts the NPM password from settings.
// Returns empty string if the encrypted password is empty.
func (d *Deployer) decryptNpmPassword(encryptedPassword string) (string, error) {
if encryptedPassword == "" {
return "", nil
}
return crypto.Decrypt(d.encKey, encryptedPassword)
}
// mergeEnvVars builds the final environment variable list for a container:
// 1. Parse project-level env JSON
// 2. Overlay with stage-level env overrides (stage wins on key conflict)
// 3. Decrypt any encrypted (secret) values
// Returns a []string of KEY=VALUE pairs.
func (d *Deployer) mergeEnvVars(project store.Project, stageID string) []string {
// Step 1: Parse project-level env.
envMap := make(map[string]string)
if project.Env != "" && project.Env != "{}" {
var projectEnv map[string]string
if err := json.Unmarshal([]byte(project.Env), &projectEnv); err != nil {
slog.Warn("parse project env vars", "error", err)
} else {
for k, v := range projectEnv {
envMap[k] = v
}
}
}
// Step 2: Overlay with stage-level overrides.
stageEnvs, err := d.store.GetStageEnvByStageID(stageID)
if err != nil {
slog.Warn("get stage env overrides", "stage_id", stageID, "error", err)
} else {
for _, se := range stageEnvs {
value := se.Value
if se.Encrypted {
// Step 3: Decrypt secret values.
decrypted, err := crypto.Decrypt(d.encKey, se.Value)
if err != nil {
slog.Warn("decrypt stage env value", "key", se.Key, "error", err)
continue
}
value = decrypted
}
envMap[se.Key] = value
}
}
vars := make([]string, 0, len(envMap))
for k, v := range envMap {
vars = append(vars, k+"="+v)
}
return vars
}
// computeVolumeMounts builds Docker mount specifications from the project's volume config.
// For shared mode, source is used as-is.
// For isolated mode, source gets /{stage}-{tag}/ appended.
func (d *Deployer) computeVolumeMounts(projectID, stageName, imageTag, basePath string) []mount.Mount {
vols, err := d.store.GetVolumesByProjectID(projectID)
if err != nil {
slog.Warn("get project volumes", "project_id", projectID, "error", err)
return nil
}
if len(vols) == 0 {
return nil
}
mounts := make([]mount.Mount, 0, len(vols))
for _, vol := range vols {
source := vol.Source
// Prepend base path if source is relative (doesn't start with /).
if basePath != "" && !filepath.IsAbs(source) {
source = filepath.Join(basePath, source)
}
if vol.Mode == "isolated" {
source = filepath.Join(source, fmt.Sprintf("%s-%s", stageName, imageTag))
}
mounts = append(mounts, mount.Mount{
Type: mount.TypeBind,
Source: source,
Target: vol.Target,
})
}
return mounts
}
// logDeploy appends a log entry for a deploy and publishes it on the event bus.
// Errors are logged to stderr but not propagated.
func (d *Deployer) logDeploy(deployID, message, level string) {
if err := d.store.AppendDeployLog(deployID, message, level); err != nil {
slog.Warn("append deploy log", "error", err)
}
if d.eventBus != nil {
d.eventBus.Publish(events.Event{
Type: events.EventDeployLog,
Payload: events.DeployLogPayload{
DeployID: deployID,
Message: message,
Level: level,
},
})
}
}
// publishDeployStatus publishes a deploy status change event on the bus.
func (d *Deployer) publishDeployStatus(deployID, projectID, stageID, imageTag, status, deployErr string) {
if d.eventBus != nil {
d.eventBus.Publish(events.Event{
Type: events.EventDeployStatus,
Payload: events.DeployStatusPayload{
DeployID: deployID,
ProjectID: projectID,
StageID: stageID,
ImageTag: imageTag,
Status: status,
Error: deployErr,
},
})
}
}
// publishInstanceStatus publishes an instance status change event on the bus.
func (d *Deployer) publishInstanceStatus(instanceID, projectID, stageID, status string) {
if d.eventBus != nil {
d.eventBus.Publish(events.Event{
Type: events.EventInstanceStatus,
Payload: events.InstanceStatusPayload{
InstanceID: instanceID,
ProjectID: projectID,
StageID: stageID,
Status: status,
},
})
}
}
// truncateID safely truncates a Docker ID to 12 characters for display.
func truncateID(id string) string {
if len(id) > 12 {
return id[:12]
}
return id
}