// Package reconciler keeps the normalized containers index in sync with the // Docker daemon. It runs on a tick (and one-shot at boot) — for every // Tinyforge-managed container in `docker ps`, it dispatches to a workload by // labels and upserts a Container row. Rows whose Docker container ID is no // longer present are flipped to state='missing'. // // Dispatch precedence: // 1. tinyforge.workload.id label (canonical, new) // 2. tinyforge.static-site label (legacy site — joins via static_sites) // 3. com.docker.compose.project (stack — joins via Stack.ComposeProjectName) // // The legacy tinyforge.instance-id path was removed when the deployer was // rewritten to use Container natively — every Tinyforge-managed project // container now carries the workload labels at create time. package reconciler import ( "context" "log/slog" "strings" "sync" "time" "github.com/alexei/tinyforge/internal/docker" "github.com/alexei/tinyforge/internal/store" ) // DockerLister is the subset of docker.Client the reconciler depends on. // Defined here (where it's used) so tests can substitute a fake without // pulling in the full docker package. type DockerLister interface { ListAllForReconciler(ctx context.Context) ([]docker.ReconcileItem, error) } // Reconciler is the background worker that syncs the containers index. type Reconciler struct { store *store.Store docker DockerLister interval time.Duration stop chan struct{} wg sync.WaitGroup } // New constructs a Reconciler. interval is the tick period; values <=0 fall // back to 30s. interval > 5m is clamped to 5m so a manual misconfiguration // can't silently disable timely state updates. func New(st *store.Store, dockerClient DockerLister, interval time.Duration) *Reconciler { if interval <= 0 { interval = 30 * time.Second } if interval > 5*time.Minute { interval = 5 * time.Minute } return &Reconciler{ store: st, docker: dockerClient, interval: interval, stop: make(chan struct{}), } } // Start kicks off the background reconciliation loop. Runs one tick // immediately so startup populates the index without waiting for the first // timer fire. Idempotent: calling Start twice is a programming error. func (r *Reconciler) Start(ctx context.Context) { r.wg.Add(1) go r.loop(ctx) } // Stop signals the loop to exit and waits for the in-flight tick to finish. func (r *Reconciler) Stop() { close(r.stop) r.wg.Wait() } // ReconcileOnce runs a single reconciliation pass. Exposed for tests and for // callers that want to force a sync after a known mutation (e.g., right after // a deploy succeeds, before the next tick). func (r *Reconciler) ReconcileOnce(ctx context.Context) error { items, err := r.docker.ListAllForReconciler(ctx) if err != nil { return err } seen := make(map[string]struct{}, len(items)) // container row IDs we touched for _, item := range items { rowID := r.upsertFromItem(ctx, item) if rowID != "" { seen[rowID] = struct{}{} } } r.markMissingRows(seen) return nil } func (r *Reconciler) loop(ctx context.Context) { defer r.wg.Done() // Boot tick. if err := r.ReconcileOnce(ctx); err != nil { slog.Warn("reconciler: initial pass", "error", err) } ticker := time.NewTicker(r.interval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-r.stop: return case <-ticker.C: if err := r.ReconcileOnce(ctx); err != nil { slog.Warn("reconciler: tick", "error", err) } } } } // upsertFromItem dispatches one container to its workload and writes the // Container row. Returns the row ID on success or "" if no dispatch matched. func (r *Reconciler) upsertFromItem(ctx context.Context, item docker.ReconcileItem) string { if id := item.Labels[docker.LabelWorkloadID]; id != "" { return r.upsertByWorkloadLabel(item, id) } if siteID := item.Labels["tinyforge.static-site"]; siteID != "" { return r.upsertBySiteLabel(item, siteID) } if cp := item.Labels["com.docker.compose.project"]; cp != "" && strings.HasPrefix(cp, "tinyforge-") { return r.upsertByComposeProject(item, cp) } return "" } // upsertByWorkloadLabel — canonical path. The row may already exist with a // deployer-assigned UUID (project deploys do this so each blue-green slot // has a stable handle); look it up by docker container ID first and fall // back to the deterministic workloadID:role key. func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string) string { role := item.Labels[docker.LabelRole] kind := item.Labels[docker.LabelWorkloadKind] rowID := workloadIDRow(workloadID, kind, role, item.ID) if existing, err := r.store.GetContainerByDockerID(item.ID); err == nil { rowID = existing.ID } port := 0 if len(item.Ports) > 0 { port = int(item.Ports[0]) } if err := r.store.UpsertContainer(store.Container{ ID: rowID, WorkloadID: workloadID, WorkloadKind: kind, Role: role, ContainerID: item.ID, ImageRef: item.Image, Host: "local", State: normalizeState(item.State), Port: port, LastSeenAt: store.Now(), }); err != nil { slog.Warn("reconciler: upsert by workload label", "container_id", item.ID, "error", err) return "" } return rowID } func (r *Reconciler) upsertBySiteLabel(item docker.ReconcileItem, siteID string) string { w, err := r.store.GetWorkloadByRef(store.WorkloadKindSite, siteID) if err != nil { return "" } rowID := w.ID + ":site" port := 0 if len(item.Ports) > 0 { port = int(item.Ports[0]) } if err := r.store.UpsertContainer(store.Container{ ID: rowID, WorkloadID: w.ID, WorkloadKind: string(store.WorkloadKindSite), Role: "", ContainerID: item.ID, ImageRef: item.Image, Host: "local", State: normalizeState(item.State), Port: port, LastSeenAt: store.Now(), }); err != nil { slog.Warn("reconciler: upsert by site label", "container_id", item.ID, "error", err) return "" } return rowID } func (r *Reconciler) upsertByComposeProject(item docker.ReconcileItem, composeProject string) string { stack, err := r.findStackByComposeProject(composeProject) if err != nil { return "" } w, err := r.store.GetWorkloadByRef(store.WorkloadKindStack, stack.ID) if err != nil { return "" } role := item.Labels["com.docker.compose.service"] if role == "" { role = item.Name } rowID := w.ID + ":" + role port := 0 if len(item.Ports) > 0 { port = int(item.Ports[0]) } if err := r.store.UpsertContainer(store.Container{ ID: rowID, WorkloadID: w.ID, WorkloadKind: string(store.WorkloadKindStack), Role: role, ContainerID: item.ID, ImageRef: item.Image, Host: "local", State: normalizeState(item.State), Port: port, LastSeenAt: store.Now(), }); err != nil { slog.Warn("reconciler: upsert by compose project", "container_id", item.ID, "error", err) return "" } return rowID } // findStackByComposeProject scans all stacks for a matching ComposeProjectName. // Linear; the stack count is small in practice. func (r *Reconciler) findStackByComposeProject(composeProject string) (store.Stack, error) { stacks, err := r.store.GetAllStacks() if err != nil { return store.Stack{}, err } for _, s := range stacks { if s.ComposeProjectName == composeProject { return s, nil } } return store.Stack{}, store.ErrNotFound } // markMissingRows flips state to 'missing' for any container row whose Docker // container ID was not seen in this pass. Rows with empty container_id are // skipped — the deployer creates them ahead of `docker create` so they're // transient and shouldn't be marked missing on a tick that races the deploy. func (r *Reconciler) markMissingRows(seen map[string]struct{}) { rows, err := r.store.ListContainers(store.ContainerFilter{}) if err != nil { slog.Warn("reconciler: list containers for missing-sweep", "error", err) return } for _, row := range rows { if _, ok := seen[row.ID]; ok { continue } if row.ContainerID == "" { continue // never bound to a real container yet } if row.State == "missing" { continue // already marked } if err := r.store.MarkContainerMissing(row.ID); err != nil { slog.Warn("reconciler: mark missing", "row_id", row.ID, "error", err) } } } // workloadIDRow picks the row ID for a workload-labelled container. // Stack rows use the deterministic workloadID:role pattern; sites use // workloadID:site. Project rows have a per-deploy UUID assigned by the // deployer and ALSO carry the role label (= stage name), so the same // pattern resolves to the same row across deployer + reconciler upserts. func workloadIDRow(workloadID, kind, role, containerID string) string { if role != "" { return workloadID + ":" + role } if kind == string(store.WorkloadKindSite) { return workloadID + ":site" } // Last-resort fallback: container ID. Uncommon path. return workloadID + ":" + containerID } // normalizeState maps Docker container states to our condensed set: // running | stopped | failed | removing | missing. func normalizeState(dockerState string) string { switch dockerState { case "running": return "running" case "exited", "dead", "stopped": return "stopped" case "created", "restarting", "paused": return dockerState case "removing": return "removing" default: return dockerState } }