refactor(workload): extract Instance entirely; Container is canonical
Build / build (push) Successful in 10m41s

End-to-end extraction of the Instance concept. After this commit:

  * internal/store/instances.go — DELETED
  * internal/store/models.go — Instance struct gone, ProxyRoute moved here
  * containers table is the single source of truth for project/stack/site
    container state. instances table is dropped via DROP TABLE migration
    (idempotent; re-runnable on every boot).
  * Legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id
    Docker labels are no longer emitted; only tinyforge.workload.{id,kind},
    tinyforge.role, and tinyforge.managed are stamped on new containers.

Backend rewrites:
  - internal/deployer:        executeDeploy + blueGreenDeploy + rollback +
                              promote use store.Container natively. New
                              removeContainer() replaces removeInstance().
                              enforceMaxInstances reads via
                              ListContainersByStageID.
  - internal/reconciler:      legacy tinyforge.instance-id dispatch removed;
                              upsertByWorkloadLabel now finds existing rows
                              by docker container ID first and falls back to
                              the deterministic workloadID:role key.
  - internal/stale/scanner:   Scan + new FindStaleContainers walk the
                              containers table; emit StaleContainer JSON.
  - internal/stats/collector: ListContainers replaces ListAllInstances.
  - internal/webhook/handler: workload-secret lookup tried first; falls back
                              to project / static_site secret column.
  - internal/api: instances.go, stale.go, stats.go, stats_history.go,
                  projects.go, settings.go, docker.go, dns.go all read /
                  write through Container.

Docker layer:
  - ManagedContainer exposes WorkloadID/Kind/Role from the canonical labels.
  - ListContainers filters by tinyforge.managed=true.
  - Network creation uses LabelManaged instead of LabelProject.

Frontend:
  - Instance type is now a Container alias; .status → .state,
    .last_alive_at → .last_seen_at.
  - InstanceCard takes stageId as a prop (no longer derived from Instance).
  - StaleContainer JSON shape rewritten: { container, workload_name, role,
    days_stale }. StaleContainerCard + /containers/stale page updated.
  - ProjectCard / homepage / SystemHealthCard filter by .state.

The migration loop now tolerates "no such table" alongside "duplicate
column" / "already exists" so obsolete ALTER TABLE entries targeting the
dropped instances table no-op cleanly on first boot.

Tests: store + deployer + reconciler + webhook + staticsite + notify all
still pass. Frontend svelte-check: zero errors.
This commit is contained in:
2026-05-09 14:43:12 +03:00
parent d516462750
commit d8ab22876f
32 changed files with 649 additions and 957 deletions
+37 -40
View File
@@ -25,17 +25,17 @@ func (d *Deployer) blueGreenDeploy(
deployID string,
imageTag string,
) (string, string, string, error) {
// Find existing running instance for this stage (the "blue" instance).
existingInstances, err := d.store.GetInstancesByStageID(stage.ID)
// Find existing running container for this stage (the "blue" container).
existing, err := d.store.ListContainersByStageID(stage.ID)
if err != nil {
return "", "", "", fmt.Errorf("get existing instances: %w", err)
return "", "", "", fmt.Errorf("get existing containers: %w", err)
}
var blueInstance *store.Instance
for _, inst := range existingInstances {
if inst.Status == "running" {
instCopy := inst
blueInstance = &instCopy
var blueContainer *store.Container
for _, c := range existing {
if c.State == "running" {
cCopy := c
blueContainer = &cCopy
break
}
}
@@ -84,9 +84,6 @@ func (d *Deployer) blueGreenDeploy(
ExposedPorts: []string{portStr},
NetworkName: settings.Network,
NetworkID: networkID,
Project: project.Name,
Stage: stage.Name,
InstanceID: instanceID,
WorkloadID: workloadID,
WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name,
@@ -114,25 +111,27 @@ func (d *Deployer) blueGreenDeploy(
return "", "", instanceID, fmt.Errorf("create container: %w", err)
}
// Create instance record.
inst, err := d.store.CreateInstanceWithID(store.Instance{
ID: instanceID,
StageID: stage.ID,
ProjectID: project.ID,
ContainerID: containerID,
ImageTag: imageTag,
Subdomain: subdomain,
Status: "stopped",
Port: project.Port,
// Create container row.
row, err := d.store.CreateContainer(store.Container{
ID: instanceID,
WorkloadID: workloadID,
WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name,
ContainerID: containerID,
ImageRef: project.Image + ":" + imageTag,
ImageTag: imageTag,
Host: "local",
State: "stopped",
Port: project.Port,
Subdomain: subdomain,
})
if err != nil {
return containerID, "", instanceID, fmt.Errorf("create instance record: %w", err)
return containerID, "", instanceID, fmt.Errorf("create container row: %w", err)
}
instanceID = inst.ID
d.upsertContainerForInstance(project, stage, inst, workloadID)
instanceID = row.ID
if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil {
slog.Warn("link deploy to instance", "error", err)
slog.Warn("link deploy to container", "error", err)
}
d.logDeploy(deployID, fmt.Sprintf("Blue-green: starting green container %s", containerName), "info")
@@ -140,11 +139,10 @@ func (d *Deployer) blueGreenDeploy(
return containerID, "", instanceID, fmt.Errorf("start container: %w", err)
}
if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil {
slog.Warn("update instance status", "error", err)
if err := d.store.UpdateContainerState(instanceID, "running"); err != nil {
slog.Warn("update container state", "error", err)
}
inst.Status = "running"
d.upsertContainerForInstance(project, stage, inst, workloadID)
row.State = "running"
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
// Step 4: Health check the green container.
@@ -181,30 +179,29 @@ func (d *Deployer) blueGreenDeploy(
return containerID, "", instanceID, fmt.Errorf("configure proxy: %w", err)
}
inst.ProxyRouteID = proxyRouteID
row.ProxyRouteID = proxyRouteID
d.logDeploy(deployID, "Blue-green: proxy swapped to green container", "info")
// Create/update DNS record for the green instance.
// Create/update DNS record for the green container.
fqdn := subdomain + "." + settings.Domain
d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID)
} else {
d.logDeploy(deployID, "Blue-green: proxy skipped (disabled for this stage)", "info")
}
inst.Subdomain = subdomain
if err := d.store.UpdateInstance(inst); err != nil {
slog.Warn("update instance with proxy ID", "error", err)
row.Subdomain = subdomain
if err := d.store.UpdateContainer(row); err != nil {
slog.Warn("update container with proxy ID", "error", err)
}
d.upsertContainerForInstance(project, stage, inst, workloadID)
// Step 6: Stop the blue container.
if blueInstance != nil {
d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue instance %s (tag: %s)", blueInstance.ID, blueInstance.ImageTag), "info")
if err := d.removeInstance(ctx, *blueInstance, settings); err != nil {
if blueContainer != nil {
d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue container %s (tag: %s)", blueContainer.ID, blueContainer.ImageTag), "info")
if err := d.removeContainer(ctx, *blueContainer, settings); err != nil {
// Non-fatal: log but continue. Green is already serving traffic.
d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue instance: %v", err), "warn")
d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue container: %v", err), "warn")
} else {
d.logDeploy(deployID, "Blue-green: blue instance removed", "info")
d.logDeploy(deployID, "Blue-green: blue container removed", "info")
}
}