refactor(workload): extract Instance entirely; Container is canonical
Build / build (push) Successful in 10m41s
Build / build (push) Successful in 10m41s
End-to-end extraction of the Instance concept. After this commit:
* internal/store/instances.go — DELETED
* internal/store/models.go — Instance struct gone, ProxyRoute moved here
* containers table is the single source of truth for project/stack/site
container state. instances table is dropped via DROP TABLE migration
(idempotent; re-runnable on every boot).
* Legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id
Docker labels are no longer emitted; only tinyforge.workload.{id,kind},
tinyforge.role, and tinyforge.managed are stamped on new containers.
Backend rewrites:
- internal/deployer: executeDeploy + blueGreenDeploy + rollback +
promote use store.Container natively. New
removeContainer() replaces removeInstance().
enforceMaxInstances reads via
ListContainersByStageID.
- internal/reconciler: legacy tinyforge.instance-id dispatch removed;
upsertByWorkloadLabel now finds existing rows
by docker container ID first and falls back to
the deterministic workloadID:role key.
- internal/stale/scanner: Scan + new FindStaleContainers walk the
containers table; emit StaleContainer JSON.
- internal/stats/collector: ListContainers replaces ListAllInstances.
- internal/webhook/handler: workload-secret lookup tried first; falls back
to project / static_site secret column.
- internal/api: instances.go, stale.go, stats.go, stats_history.go,
projects.go, settings.go, docker.go, dns.go all read /
write through Container.
Docker layer:
- ManagedContainer exposes WorkloadID/Kind/Role from the canonical labels.
- ListContainers filters by tinyforge.managed=true.
- Network creation uses LabelManaged instead of LabelProject.
Frontend:
- Instance type is now a Container alias; .status → .state,
.last_alive_at → .last_seen_at.
- InstanceCard takes stageId as a prop (no longer derived from Instance).
- StaleContainer JSON shape rewritten: { container, workload_name, role,
days_stale }. StaleContainerCard + /containers/stale page updated.
- ProjectCard / homepage / SystemHealthCard filter by .state.
The migration loop now tolerates "no such table" alongside "duplicate
column" / "already exists" so obsolete ALTER TABLE entries targeting the
dropped instances table no-op cleanly on first boot.
Tests: store + deployer + reconciler + webhook + staticsite + notify all
still pass. Frontend svelte-check: zero errors.
This commit is contained in:
+66
-103
@@ -376,9 +376,6 @@ func (d *Deployer) executeDeploy(
|
||||
ExposedPorts: []string{portStr},
|
||||
NetworkName: settings.Network,
|
||||
NetworkID: networkID,
|
||||
Project: project.Name,
|
||||
Stage: stage.Name,
|
||||
InstanceID: instanceID,
|
||||
WorkloadID: workloadID,
|
||||
WorkloadKind: string(store.WorkloadKindProject),
|
||||
Role: stage.Name,
|
||||
@@ -407,26 +404,32 @@ func (d *Deployer) executeDeploy(
|
||||
}
|
||||
d.logDeploy(deployID, fmt.Sprintf("Container created (ID: %s)", truncateID(containerID)), "info")
|
||||
|
||||
// Create instance record in store with the pre-generated ID.
|
||||
inst, err := d.store.CreateInstanceWithID(store.Instance{
|
||||
ID: instanceID,
|
||||
StageID: stage.ID,
|
||||
ProjectID: project.ID,
|
||||
ContainerID: containerID,
|
||||
ImageTag: imageTag,
|
||||
Subdomain: subdomain,
|
||||
Status: "stopped",
|
||||
Port: project.Port,
|
||||
// Create container row with the pre-generated ID. The deployer is the
|
||||
// authoritative writer until the next reconciler tick — it's important
|
||||
// the row exists before StartContainer so a fast tick doesn't see an
|
||||
// orphan and mark it missing.
|
||||
row, err := d.store.CreateContainer(store.Container{
|
||||
ID: instanceID,
|
||||
WorkloadID: workloadID,
|
||||
WorkloadKind: string(store.WorkloadKindProject),
|
||||
Role: stage.Name,
|
||||
ContainerID: containerID,
|
||||
ImageRef: project.Image + ":" + imageTag,
|
||||
ImageTag: imageTag,
|
||||
Host: "local",
|
||||
State: "stopped",
|
||||
Port: project.Port,
|
||||
Subdomain: subdomain,
|
||||
})
|
||||
if err != nil {
|
||||
return containerID, proxyRouteID, instanceID, fmt.Errorf("create instance record: %w", err)
|
||||
return containerID, proxyRouteID, instanceID, fmt.Errorf("create container row: %w", err)
|
||||
}
|
||||
instanceID = inst.ID
|
||||
d.upsertContainerForInstance(project, stage, inst, workloadID)
|
||||
instanceID = row.ID
|
||||
|
||||
// Link deploy to instance.
|
||||
// Link deploy to container row (the existing Deploy.InstanceID column
|
||||
// stores the row ID — same value as before, just a renamed concept).
|
||||
if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil {
|
||||
slog.Warn("link deploy to instance", "error", err)
|
||||
slog.Warn("link deploy to container", "error", err)
|
||||
}
|
||||
|
||||
d.logDeploy(deployID, fmt.Sprintf("Starting container %s", containerName), "info")
|
||||
@@ -434,15 +437,11 @@ func (d *Deployer) executeDeploy(
|
||||
return containerID, proxyRouteID, instanceID, fmt.Errorf("start container: %w", err)
|
||||
}
|
||||
|
||||
if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil {
|
||||
slog.Warn("update instance status to running", "error", err)
|
||||
if err := d.store.UpdateContainerState(instanceID, "running"); err != nil {
|
||||
slog.Warn("update container state to running", "error", err)
|
||||
}
|
||||
if err := d.store.UpdateLastAliveAt(instanceID); err != nil {
|
||||
slog.Warn("update last_alive_at on deploy", "instance_id", instanceID, "error", err)
|
||||
}
|
||||
inst.Status = "running"
|
||||
inst.LastAliveAt = store.Now()
|
||||
d.upsertContainerForInstance(project, stage, inst, workloadID)
|
||||
row.State = "running"
|
||||
row.LastSeenAt = store.Now()
|
||||
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
|
||||
d.logDeploy(deployID, "Container started", "info")
|
||||
|
||||
@@ -463,24 +462,22 @@ func (d *Deployer) executeDeploy(
|
||||
return containerID, proxyRouteID, instanceID, fmt.Errorf("configure proxy: %w", err)
|
||||
}
|
||||
|
||||
// Update instance with proxy route ID.
|
||||
inst.ProxyRouteID = proxyRouteID
|
||||
inst.Subdomain = subdomain
|
||||
if err := d.store.UpdateInstance(inst); err != nil {
|
||||
slog.Warn("update instance with proxy ID", "error", err)
|
||||
// Update container row with proxy route ID.
|
||||
row.ProxyRouteID = proxyRouteID
|
||||
row.Subdomain = subdomain
|
||||
if err := d.store.UpdateContainer(row); err != nil {
|
||||
slog.Warn("update container with proxy ID", "error", err)
|
||||
}
|
||||
d.upsertContainerForInstance(project, stage, inst, workloadID)
|
||||
|
||||
// Create DNS record for this instance.
|
||||
// Create DNS record for this container.
|
||||
fqdn := subdomain + "." + settings.Domain
|
||||
d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID)
|
||||
} else {
|
||||
d.logDeploy(deployID, "Proxy creation skipped (disabled for this stage)", "info")
|
||||
inst.Subdomain = subdomain
|
||||
if err := d.store.UpdateInstance(inst); err != nil {
|
||||
slog.Warn("update instance", "error", err)
|
||||
row.Subdomain = subdomain
|
||||
if err := d.store.UpdateContainer(row); err != nil {
|
||||
slog.Warn("update container", "error", err)
|
||||
}
|
||||
d.upsertContainerForInstance(project, stage, inst, workloadID)
|
||||
}
|
||||
|
||||
// Step 5: Health check.
|
||||
@@ -554,27 +551,27 @@ func (d *Deployer) configureProxy(
|
||||
return routeID, nil
|
||||
}
|
||||
|
||||
// enforceMaxInstances removes the oldest instances when the stage has reached its limit.
|
||||
// This makes room for the new deployment.
|
||||
// enforceMaxInstances removes the oldest container rows when the stage has
|
||||
// reached its instance limit, making room for the new deploy.
|
||||
func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, deployID string, settings store.Settings) error {
|
||||
if stage.MaxInstances <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
instances, err := d.store.GetInstancesByStageID(stage.ID)
|
||||
containers, err := d.store.ListContainersByStageID(stage.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get instances for stage: %w", err)
|
||||
return fmt.Errorf("get containers for stage: %w", err)
|
||||
}
|
||||
|
||||
// Filter to running/stopped instances (not already failed/removing).
|
||||
var active []store.Instance
|
||||
for _, inst := range instances {
|
||||
if inst.Status == "running" || inst.Status == "stopped" {
|
||||
active = append(active, inst)
|
||||
// Filter to running/stopped containers (not already failed/removing).
|
||||
var active []store.Container
|
||||
for _, c := range containers {
|
||||
if c.State == "running" || c.State == "stopped" {
|
||||
active = append(active, c)
|
||||
}
|
||||
}
|
||||
|
||||
// We need room for one more instance, so remove oldest when at limit.
|
||||
// We need room for one more container, so remove the oldest when at limit.
|
||||
removeCount := len(active) - stage.MaxInstances + 1
|
||||
if removeCount <= 0 {
|
||||
return nil
|
||||
@@ -586,57 +583,50 @@ func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, d
|
||||
})
|
||||
|
||||
for i := 0; i < removeCount && i < len(active); i++ {
|
||||
inst := active[i]
|
||||
d.logDeploy(deployID, fmt.Sprintf("Removing oldest instance %s (tag: %s) to enforce max_instances=%d", inst.ID, inst.ImageTag, stage.MaxInstances), "info")
|
||||
c := active[i]
|
||||
d.logDeploy(deployID, fmt.Sprintf("Removing oldest container %s (tag: %s) to enforce max_instances=%d", c.ID, c.ImageTag, stage.MaxInstances), "info")
|
||||
|
||||
if err := d.removeInstance(ctx, inst, settings); err != nil {
|
||||
d.logDeploy(deployID, fmt.Sprintf("Failed to remove instance %s: %v", inst.ID, err), "warn")
|
||||
if err := d.removeContainer(ctx, c, settings); err != nil {
|
||||
d.logDeploy(deployID, fmt.Sprintf("Failed to remove container %s: %v", c.ID, err), "warn")
|
||||
continue
|
||||
}
|
||||
d.logDeploy(deployID, fmt.Sprintf("Removed instance %s", inst.ID), "info")
|
||||
d.logDeploy(deployID, fmt.Sprintf("Removed container %s", c.ID), "info")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// removeInstance stops and removes a container, deletes its NPM proxy host,
|
||||
// and removes the instance record from the store.
|
||||
func (d *Deployer) removeInstance(ctx context.Context, inst store.Instance, settings store.Settings) error {
|
||||
// removeContainer stops + removes the Docker container, deletes its proxy
|
||||
// route, drops the DNS record, and removes the container row from the store.
|
||||
func (d *Deployer) removeContainer(ctx context.Context, c store.Container, settings store.Settings) error {
|
||||
// Mark as removing.
|
||||
if err := d.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil {
|
||||
slog.Warn("update instance status to removing", "instance_id", inst.ID, "error", err)
|
||||
if err := d.store.UpdateContainerState(c.ID, "removing"); err != nil {
|
||||
slog.Warn("update container state to removing", "id", c.ID, "error", err)
|
||||
}
|
||||
|
||||
// Remove Docker container.
|
||||
if inst.ContainerID != "" {
|
||||
if err := d.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil {
|
||||
slog.Warn("remove container", "container_id", inst.ContainerID, "error", err)
|
||||
if c.ContainerID != "" {
|
||||
if err := d.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
|
||||
slog.Warn("remove docker container", "container_id", c.ContainerID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete proxy route.
|
||||
if inst.ProxyRouteID != "" {
|
||||
if err := d.proxy.DeleteRoute(ctx, inst.ProxyRouteID); err != nil {
|
||||
slog.Warn("delete proxy route", "route_id", inst.ProxyRouteID, "error", err)
|
||||
if c.ProxyRouteID != "" {
|
||||
if err := d.proxy.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
|
||||
slog.Warn("delete proxy route", "route_id", c.ProxyRouteID, "error", err)
|
||||
}
|
||||
|
||||
// Remove DNS record for this instance.
|
||||
if inst.Subdomain != "" && settings.Domain != "" {
|
||||
fqdn := inst.Subdomain + "." + settings.Domain
|
||||
// Remove DNS record.
|
||||
if c.Subdomain != "" && settings.Domain != "" {
|
||||
fqdn := c.Subdomain + "." + settings.Domain
|
||||
d.removeDNS(ctx, fqdn, "")
|
||||
}
|
||||
}
|
||||
|
||||
// Delete instance record.
|
||||
if err := d.store.DeleteInstance(inst.ID); err != nil {
|
||||
return fmt.Errorf("delete instance record: %w", err)
|
||||
}
|
||||
|
||||
// Drop the matching container index row. ID matches instance.ID by
|
||||
// construction; ignore NotFound which is harmless if the row predates
|
||||
// this refactor.
|
||||
if err := d.store.DeleteContainer(inst.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
|
||||
slog.Warn("delete container row", "instance_id", inst.ID, "error", err)
|
||||
// Drop the container row.
|
||||
if err := d.store.DeleteContainer(c.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
|
||||
return fmt.Errorf("delete container row: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -903,33 +893,6 @@ func truncateID(id string) string {
|
||||
return id
|
||||
}
|
||||
|
||||
// upsertContainerForInstance keeps the normalized containers index in sync
|
||||
// with the project-specific instance row. Same UUID is used for both rows so
|
||||
// the reconciler can find them later. Best-effort: a sync failure is logged
|
||||
// but does not abort the deploy — the container is still running and the
|
||||
// reconciler will pick it up on the next tick (once that lands).
|
||||
func (d *Deployer) upsertContainerForInstance(project store.Project, stage store.Stage, inst store.Instance, workloadID string) {
|
||||
c := store.Container{
|
||||
ID: inst.ID,
|
||||
WorkloadID: workloadID,
|
||||
WorkloadKind: string(store.WorkloadKindProject),
|
||||
Role: stage.Name,
|
||||
ContainerID: inst.ContainerID,
|
||||
ImageRef: project.Image + ":" + inst.ImageTag,
|
||||
ImageTag: inst.ImageTag,
|
||||
Host: "local",
|
||||
State: inst.Status,
|
||||
Port: inst.Port,
|
||||
Subdomain: inst.Subdomain,
|
||||
ProxyRouteID: inst.ProxyRouteID,
|
||||
NpmProxyID: inst.NpmProxyID,
|
||||
LastSeenAt: inst.LastAliveAt,
|
||||
}
|
||||
if err := d.store.UpsertContainer(c); err != nil {
|
||||
slog.Warn("upsert container row", "instance_id", inst.ID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// resolveProjectWorkloadID returns the workload ID paired with a project.
|
||||
// Backfill-on-boot guarantees the row exists, so this is essentially a lookup.
|
||||
// On miss (defensive), it logs and returns empty so the caller can decide.
|
||||
|
||||
Reference in New Issue
Block a user