d8ab22876f
Build / build (push) Successful in 10m41s
End-to-end extraction of the Instance concept. After this commit:
* internal/store/instances.go — DELETED
* internal/store/models.go — Instance struct gone, ProxyRoute moved here
* containers table is the single source of truth for project/stack/site
container state. instances table is dropped via DROP TABLE migration
(idempotent; re-runnable on every boot).
* Legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id
Docker labels are no longer emitted; only tinyforge.workload.{id,kind},
tinyforge.role, and tinyforge.managed are stamped on new containers.
Backend rewrites:
- internal/deployer: executeDeploy + blueGreenDeploy + rollback +
promote use store.Container natively. New
removeContainer() replaces removeInstance().
enforceMaxInstances reads via
ListContainersByStageID.
- internal/reconciler: legacy tinyforge.instance-id dispatch removed;
upsertByWorkloadLabel now finds existing rows
by docker container ID first and falls back to
the deterministic workloadID:role key.
- internal/stale/scanner: Scan + new FindStaleContainers walk the
containers table; emit StaleContainer JSON.
- internal/stats/collector: ListContainers replaces ListAllInstances.
- internal/webhook/handler: workload-secret lookup tried first; falls back
to project / static_site secret column.
- internal/api: instances.go, stale.go, stats.go, stats_history.go,
projects.go, settings.go, docker.go, dns.go all read /
write through Container.
Docker layer:
- ManagedContainer exposes WorkloadID/Kind/Role from the canonical labels.
- ListContainers filters by tinyforge.managed=true.
- Network creation uses LabelManaged instead of LabelProject.
Frontend:
- Instance type is now a Container alias; .status → .state,
.last_alive_at → .last_seen_at.
- InstanceCard takes stageId as a prop (no longer derived from Instance).
- StaleContainer JSON shape rewritten: { container, workload_name, role,
days_stale }. StaleContainerCard + /containers/stale page updated.
- ProjectCard / homepage / SystemHealthCard filter by .state.
The migration loop now tolerates "no such table" alongside "duplicate
column" / "already exists" so obsolete ALTER TABLE entries targeting the
dropped instances table no-op cleanly on first boot.
Tests: store + deployer + reconciler + webhook + staticsite + notify all
still pass. Frontend svelte-check: zero errors.
308 lines
9.2 KiB
Go
308 lines
9.2 KiB
Go
// Package reconciler keeps the normalized containers index in sync with the
|
|
// Docker daemon. It runs on a tick (and one-shot at boot) — for every
|
|
// Tinyforge-managed container in `docker ps`, it dispatches to a workload by
|
|
// labels and upserts a Container row. Rows whose Docker container ID is no
|
|
// longer present are flipped to state='missing'.
|
|
//
|
|
// Dispatch precedence:
|
|
// 1. tinyforge.workload.id label (canonical, new)
|
|
// 2. tinyforge.static-site label (legacy site — joins via static_sites)
|
|
// 3. com.docker.compose.project (stack — joins via Stack.ComposeProjectName)
|
|
//
|
|
// The legacy tinyforge.instance-id path was removed when the deployer was
|
|
// rewritten to use Container natively — every Tinyforge-managed project
|
|
// container now carries the workload labels at create time.
|
|
package reconciler
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/alexei/tinyforge/internal/docker"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
)
|
|
|
|
// DockerLister is the subset of docker.Client the reconciler depends on.
|
|
// Defined here (where it's used) so tests can substitute a fake without
|
|
// pulling in the full docker package.
|
|
type DockerLister interface {
|
|
ListAllForReconciler(ctx context.Context) ([]docker.ReconcileItem, error)
|
|
}
|
|
|
|
// Reconciler is the background worker that syncs the containers index.
|
|
type Reconciler struct {
|
|
store *store.Store
|
|
docker DockerLister
|
|
interval time.Duration
|
|
|
|
stop chan struct{}
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
// New constructs a Reconciler. interval is the tick period; values <=0 fall
|
|
// back to 30s. interval > 5m is clamped to 5m so a manual misconfiguration
|
|
// can't silently disable timely state updates.
|
|
func New(st *store.Store, dockerClient DockerLister, interval time.Duration) *Reconciler {
|
|
if interval <= 0 {
|
|
interval = 30 * time.Second
|
|
}
|
|
if interval > 5*time.Minute {
|
|
interval = 5 * time.Minute
|
|
}
|
|
return &Reconciler{
|
|
store: st,
|
|
docker: dockerClient,
|
|
interval: interval,
|
|
stop: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// Start kicks off the background reconciliation loop. Runs one tick
|
|
// immediately so startup populates the index without waiting for the first
|
|
// timer fire. Idempotent: calling Start twice is a programming error.
|
|
func (r *Reconciler) Start(ctx context.Context) {
|
|
r.wg.Add(1)
|
|
go r.loop(ctx)
|
|
}
|
|
|
|
// Stop signals the loop to exit and waits for the in-flight tick to finish.
|
|
func (r *Reconciler) Stop() {
|
|
close(r.stop)
|
|
r.wg.Wait()
|
|
}
|
|
|
|
// ReconcileOnce runs a single reconciliation pass. Exposed for tests and for
|
|
// callers that want to force a sync after a known mutation (e.g., right after
|
|
// a deploy succeeds, before the next tick).
|
|
func (r *Reconciler) ReconcileOnce(ctx context.Context) error {
|
|
items, err := r.docker.ListAllForReconciler(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
seen := make(map[string]struct{}, len(items)) // container row IDs we touched
|
|
|
|
for _, item := range items {
|
|
rowID := r.upsertFromItem(ctx, item)
|
|
if rowID != "" {
|
|
seen[rowID] = struct{}{}
|
|
}
|
|
}
|
|
|
|
r.markMissingRows(seen)
|
|
return nil
|
|
}
|
|
|
|
func (r *Reconciler) loop(ctx context.Context) {
|
|
defer r.wg.Done()
|
|
|
|
// Boot tick.
|
|
if err := r.ReconcileOnce(ctx); err != nil {
|
|
slog.Warn("reconciler: initial pass", "error", err)
|
|
}
|
|
|
|
ticker := time.NewTicker(r.interval)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-r.stop:
|
|
return
|
|
case <-ticker.C:
|
|
if err := r.ReconcileOnce(ctx); err != nil {
|
|
slog.Warn("reconciler: tick", "error", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// upsertFromItem dispatches one container to its workload and writes the
|
|
// Container row. Returns the row ID on success or "" if no dispatch matched.
|
|
func (r *Reconciler) upsertFromItem(ctx context.Context, item docker.ReconcileItem) string {
|
|
if id := item.Labels[docker.LabelWorkloadID]; id != "" {
|
|
return r.upsertByWorkloadLabel(item, id)
|
|
}
|
|
if siteID := item.Labels["tinyforge.static-site"]; siteID != "" {
|
|
return r.upsertBySiteLabel(item, siteID)
|
|
}
|
|
if cp := item.Labels["com.docker.compose.project"]; cp != "" && strings.HasPrefix(cp, "tinyforge-") {
|
|
return r.upsertByComposeProject(item, cp)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// upsertByWorkloadLabel — canonical path. The row may already exist with a
|
|
// deployer-assigned UUID (project deploys do this so each blue-green slot
|
|
// has a stable handle); look it up by docker container ID first and fall
|
|
// back to the deterministic workloadID:role key.
|
|
func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string) string {
|
|
role := item.Labels[docker.LabelRole]
|
|
kind := item.Labels[docker.LabelWorkloadKind]
|
|
rowID := workloadIDRow(workloadID, kind, role, item.ID)
|
|
if existing, err := r.store.GetContainerByDockerID(item.ID); err == nil {
|
|
rowID = existing.ID
|
|
}
|
|
|
|
port := 0
|
|
if len(item.Ports) > 0 {
|
|
port = int(item.Ports[0])
|
|
}
|
|
if err := r.store.UpsertContainer(store.Container{
|
|
ID: rowID,
|
|
WorkloadID: workloadID,
|
|
WorkloadKind: kind,
|
|
Role: role,
|
|
ContainerID: item.ID,
|
|
ImageRef: item.Image,
|
|
Host: "local",
|
|
State: normalizeState(item.State),
|
|
Port: port,
|
|
LastSeenAt: store.Now(),
|
|
}); err != nil {
|
|
slog.Warn("reconciler: upsert by workload label", "container_id", item.ID, "error", err)
|
|
return ""
|
|
}
|
|
return rowID
|
|
}
|
|
|
|
func (r *Reconciler) upsertBySiteLabel(item docker.ReconcileItem, siteID string) string {
|
|
w, err := r.store.GetWorkloadByRef(store.WorkloadKindSite, siteID)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
rowID := w.ID + ":site"
|
|
port := 0
|
|
if len(item.Ports) > 0 {
|
|
port = int(item.Ports[0])
|
|
}
|
|
if err := r.store.UpsertContainer(store.Container{
|
|
ID: rowID,
|
|
WorkloadID: w.ID,
|
|
WorkloadKind: string(store.WorkloadKindSite),
|
|
Role: "",
|
|
ContainerID: item.ID,
|
|
ImageRef: item.Image,
|
|
Host: "local",
|
|
State: normalizeState(item.State),
|
|
Port: port,
|
|
LastSeenAt: store.Now(),
|
|
}); err != nil {
|
|
slog.Warn("reconciler: upsert by site label", "container_id", item.ID, "error", err)
|
|
return ""
|
|
}
|
|
return rowID
|
|
}
|
|
|
|
func (r *Reconciler) upsertByComposeProject(item docker.ReconcileItem, composeProject string) string {
|
|
stack, err := r.findStackByComposeProject(composeProject)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
w, err := r.store.GetWorkloadByRef(store.WorkloadKindStack, stack.ID)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
role := item.Labels["com.docker.compose.service"]
|
|
if role == "" {
|
|
role = item.Name
|
|
}
|
|
rowID := w.ID + ":" + role
|
|
port := 0
|
|
if len(item.Ports) > 0 {
|
|
port = int(item.Ports[0])
|
|
}
|
|
if err := r.store.UpsertContainer(store.Container{
|
|
ID: rowID,
|
|
WorkloadID: w.ID,
|
|
WorkloadKind: string(store.WorkloadKindStack),
|
|
Role: role,
|
|
ContainerID: item.ID,
|
|
ImageRef: item.Image,
|
|
Host: "local",
|
|
State: normalizeState(item.State),
|
|
Port: port,
|
|
LastSeenAt: store.Now(),
|
|
}); err != nil {
|
|
slog.Warn("reconciler: upsert by compose project", "container_id", item.ID, "error", err)
|
|
return ""
|
|
}
|
|
return rowID
|
|
}
|
|
|
|
// findStackByComposeProject scans all stacks for a matching ComposeProjectName.
|
|
// Linear; the stack count is small in practice.
|
|
func (r *Reconciler) findStackByComposeProject(composeProject string) (store.Stack, error) {
|
|
stacks, err := r.store.GetAllStacks()
|
|
if err != nil {
|
|
return store.Stack{}, err
|
|
}
|
|
for _, s := range stacks {
|
|
if s.ComposeProjectName == composeProject {
|
|
return s, nil
|
|
}
|
|
}
|
|
return store.Stack{}, store.ErrNotFound
|
|
}
|
|
|
|
// markMissingRows flips state to 'missing' for any container row whose Docker
|
|
// container ID was not seen in this pass. Rows with empty container_id are
|
|
// skipped — the deployer creates them ahead of `docker create` so they're
|
|
// transient and shouldn't be marked missing on a tick that races the deploy.
|
|
func (r *Reconciler) markMissingRows(seen map[string]struct{}) {
|
|
rows, err := r.store.ListContainers(store.ContainerFilter{})
|
|
if err != nil {
|
|
slog.Warn("reconciler: list containers for missing-sweep", "error", err)
|
|
return
|
|
}
|
|
for _, row := range rows {
|
|
if _, ok := seen[row.ID]; ok {
|
|
continue
|
|
}
|
|
if row.ContainerID == "" {
|
|
continue // never bound to a real container yet
|
|
}
|
|
if row.State == "missing" {
|
|
continue // already marked
|
|
}
|
|
if err := r.store.MarkContainerMissing(row.ID); err != nil {
|
|
slog.Warn("reconciler: mark missing", "row_id", row.ID, "error", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// workloadIDRow picks the row ID for a workload-labelled container.
|
|
// Stack rows use the deterministic workloadID:role pattern; sites use
|
|
// workloadID:site. Project rows have a per-deploy UUID assigned by the
|
|
// deployer and ALSO carry the role label (= stage name), so the same
|
|
// pattern resolves to the same row across deployer + reconciler upserts.
|
|
func workloadIDRow(workloadID, kind, role, containerID string) string {
|
|
if role != "" {
|
|
return workloadID + ":" + role
|
|
}
|
|
if kind == string(store.WorkloadKindSite) {
|
|
return workloadID + ":site"
|
|
}
|
|
// Last-resort fallback: container ID. Uncommon path.
|
|
return workloadID + ":" + containerID
|
|
}
|
|
|
|
// normalizeState maps Docker container states to our condensed set:
|
|
// running | stopped | failed | removing | missing.
|
|
func normalizeState(dockerState string) string {
|
|
switch dockerState {
|
|
case "running":
|
|
return "running"
|
|
case "exited", "dead", "stopped":
|
|
return "stopped"
|
|
case "created", "restarting", "paused":
|
|
return dockerState
|
|
case "removing":
|
|
return "removing"
|
|
default:
|
|
return dockerState
|
|
}
|
|
}
|