Files
alexei.dolgolyov 5c17885197 perf(reconciler): batch workloads per tick, drop redundant image inspect
Load every workload once per tick into a map instead of a per-container
GetWorkloadByID (N+1) in the upsert loop plus a second ListWorkloads in
the plugin pass: one query per tick, zero GetWorkloadByID. The
ListWorkloads error path returns before the missing-sweep so a failed
load can't flip live container rows to 'missing'.

image.Reconcile is now a no-op: the generic upsert+markMissing pass
already syncs every labeled container's state from the single
ListAllForReconciler (docker ps -a) snapshot earlier in the same tick,
so the former per-container IsContainerRunning loop was N redundant
Docker calls/tick. (Its no-op body sits in image.go, which landed with
the preceding commit; the tests are here.) compose/static reconcile do
non-redundant work and are intentionally untouched.

Reviewed: go APPROVE.
2026-05-29 13:51:27 +03:00

418 lines
14 KiB
Go

package reconciler
import (
"context"
"testing"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/store"
)
// fakeDocker is a tiny stand-in for docker.Client. The reconciler depends on
// the DockerLister interface so we don't need a real daemon for unit tests.
type fakeDocker struct {
items []docker.ReconcileItem
}
func (f *fakeDocker) ListAllForReconciler(ctx context.Context) ([]docker.ReconcileItem, error) {
return f.items, nil
}
func newTestStore(t *testing.T) *store.Store {
t.Helper()
s, err := store.New(":memory:")
if err != nil {
t.Fatalf("create store: %v", err)
}
t.Cleanup(func() { s.Close() })
return s
}
// makeWorkload inserts a workload row with the given kind so reconciler
// dispatch can resolve it by ID.
func makeWorkload(t *testing.T, st *store.Store, name, kind string) store.Workload {
t.Helper()
w, err := st.CreateWorkload(store.Workload{
Kind: kind, RefID: name + "-ref", Name: name,
})
if err != nil {
t.Fatalf("CreateWorkload: %v", err)
}
return w
}
func TestReconcileWorkloadLabelledStackContainer(t *testing.T) {
st := newTestStore(t)
w := makeWorkload(t, st, "wf-stack", "stack")
// One container with the canonical workload labels stamped.
fake := &fakeDocker{items: []docker.ReconcileItem{{
ID: "docker-abc",
Name: "wf-stack-web-1",
Image: "nginx:1.27",
State: "running",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: w.ID,
docker.LabelWorkloadKind: "stack",
docker.LabelRole: "web",
},
Ports: []uint16{8080},
}}}
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
rows, _ := st.ListContainersByWorkload(w.ID)
if len(rows) != 1 {
t.Fatalf("expected 1 container row, got %d", len(rows))
}
got := rows[0]
if got.ContainerID != "docker-abc" {
t.Fatalf("container_id not bound: got %q", got.ContainerID)
}
if got.Role != "web" || got.WorkloadKind != "stack" {
t.Fatalf("dispatch wrong: %+v", got)
}
if got.State != "running" || got.Port != 8080 {
t.Fatalf("state/port wrong: %+v", got)
}
}
func TestReconcileMarksMissingRows(t *testing.T) {
st := newTestStore(t)
w := makeWorkload(t, st, "missing-stack", "stack")
// Pre-existing row with a real container_id that no longer exists.
if err := st.UpsertContainer(store.Container{
ID: w.ID + ":web", WorkloadID: w.ID, WorkloadKind: "stack",
Role: "web", ContainerID: "docker-gone", State: "running",
}); err != nil {
t.Fatalf("seed: %v", err)
}
// Reconciler sees nothing.
r := New(st, &fakeDocker{}, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
got, _ := st.GetContainerByID(w.ID + ":web")
if got.State != "missing" {
t.Fatalf("expected state=missing, got %q", got.State)
}
}
func TestReconcileSkipsRowsAwaitingDocker(t *testing.T) {
st := newTestStore(t)
w := makeWorkload(t, st, "pending", "stack")
// A row with empty container_id (deployer placeholder, awaiting docker
// create). Reconciler must not mark this as missing.
if err := st.UpsertContainer(store.Container{
ID: w.ID + ":web", WorkloadID: w.ID, WorkloadKind: "stack",
Role: "web", ContainerID: "", State: "starting",
}); err != nil {
t.Fatalf("seed: %v", err)
}
r := New(st, &fakeDocker{}, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
got, _ := st.GetContainerByID(w.ID + ":web")
if got.State != "starting" {
t.Fatalf("placeholder row should keep state, got %q", got.State)
}
}
func TestReconcileIgnoresUnmanagedContainers(t *testing.T) {
// A container without the canonical workload label is ignored even if
// it carries other labels — only tinyforge.workload.id is honored.
st := newTestStore(t)
fake := &fakeDocker{items: []docker.ReconcileItem{{
ID: "docker-foreign", Labels: map[string]string{"app": "other"},
}}}
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
rows, _ := st.ListContainers(store.ContainerFilter{})
if len(rows) != 0 {
t.Fatalf("foreign container should not produce rows, got %d", len(rows))
}
}
// TestReconcileDoesNotClobberDeployerFields guards against the regression where
// the reconciler's upsert wiped subdomain / proxy_route_id / npm_proxy_id /
// image_tag / stage_id on every tick because those columns were included in
// the ON CONFLICT DO UPDATE SET clause but never populated by the reconciler.
func TestReconcileDoesNotClobberDeployerFields(t *testing.T) {
st := newTestStore(t)
// Project workload — exercises the path most affected by the regression
// (proxies, blue-green slots, image-tag-based stale detection).
w := makeWorkload(t, st, "p", "project")
// Deployer wrote the row with proxy / subdomain / image_tag / stage_id.
deployerRow := store.Container{
ID: "deploy-uuid-1", WorkloadID: w.ID, WorkloadKind: "project",
Role: "prod", StageID: "stage-prod-id", ContainerID: "docker-aaa",
ImageRef: "nginx:1.27", ImageTag: "1.27", State: "running", Port: 8080,
Subdomain: "prod-p", ProxyRouteID: "route-42", NpmProxyID: 7,
}
if err := st.UpsertContainer(deployerRow); err != nil {
t.Fatalf("seed deployer row: %v", err)
}
// Reconciler sees the same docker container — no proxy fields in labels.
fake := &fakeDocker{items: []docker.ReconcileItem{{
ID: "docker-aaa", Image: "nginx:1.27", State: "running",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: w.ID,
docker.LabelWorkloadKind: "project",
docker.LabelRole: "prod",
},
Ports: []uint16{8080},
}}}
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
got, _ := st.GetContainerByID("deploy-uuid-1")
if got.Subdomain != "prod-p" {
t.Fatalf("subdomain wiped: %q", got.Subdomain)
}
if got.ProxyRouteID != "route-42" {
t.Fatalf("proxy_route_id wiped: %q", got.ProxyRouteID)
}
if got.NpmProxyID != 7 {
t.Fatalf("npm_proxy_id wiped: %d", got.NpmProxyID)
}
if got.ImageTag != "1.27" {
t.Fatalf("image_tag wiped: %q", got.ImageTag)
}
if got.StageID != "stage-prod-id" {
t.Fatalf("stage_id wiped: %q", got.StageID)
}
}
// TestReconcileRejectsForgedWorkloadLabel guards C2 — a Docker container
// claiming a non-existent workload_id must be ignored, not adopted into the
// containers index.
func TestReconcileRejectsForgedWorkloadLabel(t *testing.T) {
st := newTestStore(t)
fake := &fakeDocker{items: []docker.ReconcileItem{{
ID: "docker-evil",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: "wl-does-not-exist",
docker.LabelWorkloadKind: "project",
docker.LabelRole: "prod",
},
}}}
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
rows, _ := st.ListContainers(store.ContainerFilter{})
if len(rows) != 0 {
t.Fatalf("forged label should produce no row, got %d", len(rows))
}
}
// TestReconcileSkipsProjectInsertWithoutDeployerRow guards H3 — the reconciler
// must not invent a project container row, since the deployer is the
// authoritative writer and inventing rows races with MaxInstances > 1 deploys.
func TestReconcileSkipsProjectInsertWithoutDeployerRow(t *testing.T) {
st := newTestStore(t)
w := makeWorkload(t, st, "p2", "project")
// Reconciler sees a real container with project labels but no deployer
// row exists yet (race during deploy).
fake := &fakeDocker{items: []docker.ReconcileItem{{
ID: "docker-race", Image: "nginx", State: "running",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: w.ID,
docker.LabelWorkloadKind: "project",
docker.LabelRole: "prod",
},
}}}
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
rows, _ := st.ListContainersByWorkload(w.ID)
if len(rows) != 0 {
t.Fatalf("project insert without deployer row should be skipped, got %d rows", len(rows))
}
}
// TestReconcileBatchingPreservesBehavior locks Fix A: loading all workloads
// once per tick (and resolving labels from that in-memory map instead of an
// N+1 GetWorkloadByID) must produce the same outcome as the per-container
// lookup did. With multiple containers across multiple workloads plus a forged
// label and a stale row, after one ReconcileOnce: known-workload containers
// are upserted with the snapshot State, the forged-label container is skipped,
// and the absent stale row is flipped to missing.
func TestReconcileBatchingPreservesBehavior(t *testing.T) {
st := newTestStore(t)
w1 := makeWorkload(t, st, "batch-a", "stack")
w2 := makeWorkload(t, st, "batch-b", "stack")
// A stale row for w2 whose container is gone — must be marked missing.
if err := st.UpsertContainer(store.Container{
ID: w2.ID + ":old", WorkloadID: w2.ID, WorkloadKind: "stack",
Role: "old", ContainerID: "docker-vanished", State: "running",
}); err != nil {
t.Fatalf("seed stale row: %v", err)
}
fake := &fakeDocker{items: []docker.ReconcileItem{
{
ID: "docker-a1", Name: "batch-a-web-1", Image: "nginx:1.27", State: "running",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: w1.ID,
docker.LabelWorkloadKind: "stack",
docker.LabelRole: "web",
},
Ports: []uint16{8080},
},
{
ID: "docker-b1", Name: "batch-b-api-1", Image: "redis:7", State: "exited",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: w2.ID,
docker.LabelWorkloadKind: "stack",
docker.LabelRole: "api",
},
},
{
// Forged label — no such workload. Must be skipped entirely.
ID: "docker-evil", Name: "evil", Image: "nginx", State: "running",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: "wl-forged",
docker.LabelWorkloadKind: "stack",
docker.LabelRole: "web",
},
},
}}
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
// w1: one row, bound to docker-a1, running.
w1Rows, _ := st.ListContainersByWorkload(w1.ID)
if len(w1Rows) != 1 {
t.Fatalf("w1: expected 1 row, got %d", len(w1Rows))
}
if w1Rows[0].ContainerID != "docker-a1" || w1Rows[0].State != "running" || w1Rows[0].Role != "web" {
t.Fatalf("w1 row wrong: %+v", w1Rows[0])
}
// w2: the new api container is present (exited→stopped); the stale row is missing.
api, _ := st.GetContainerByID(w2.ID + ":api")
if api.ContainerID != "docker-b1" || api.State != "stopped" {
t.Fatalf("w2 api row wrong: %+v", api)
}
old, _ := st.GetContainerByID(w2.ID + ":old")
if old.State != "missing" {
t.Fatalf("w2 stale row should be missing, got %q", old.State)
}
// Forged label produced no row anywhere.
all, _ := st.ListContainers(store.ContainerFilter{})
for _, c := range all {
if c.ContainerID == "docker-evil" {
t.Fatalf("forged-label container was adopted: %+v", c)
}
}
}
// TestReconcileSyncsImageContainerState locks the Fix B coupling: the generic
// reconciler upsert pass — NOT image.Reconcile — is what syncs an image
// container's State from the snapshot. An image container carries the
// workload_id / kind=image / role=image labels at create time, so a present
// container's row gets its State written here, proving the per-container
// inspect formerly in image.Reconcile is redundant.
func TestReconcileSyncsImageContainerState(t *testing.T) {
st := newTestStore(t)
w := makeWorkload(t, st, "img", "image")
// Deployer pre-created the image container row (running). Docker now
// reports it exited — the generic pass must sync it to stopped.
if err := st.UpsertContainer(store.Container{
ID: "img-deploy-uuid", WorkloadID: w.ID, WorkloadKind: "image",
Role: "image", ContainerID: "docker-img", State: "running",
}); err != nil {
t.Fatalf("seed image row: %v", err)
}
fake := &fakeDocker{items: []docker.ReconcileItem{{
ID: "docker-img", Image: "ghcr.io/owner/app:v1", State: "exited",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: w.ID,
docker.LabelWorkloadKind: "image",
docker.LabelRole: "image",
},
Ports: []uint16{3000},
}}}
// No plugin reconciler wired — proves the state sync comes from the
// generic upsert pass, not from image.Reconcile.
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
got, _ := st.GetContainerByID("img-deploy-uuid")
if got.State != "stopped" {
t.Fatalf("image container state not synced by generic pass: got %q want stopped", got.State)
}
if got.Port != 3000 || got.ImageRef != "ghcr.io/owner/app:v1" {
t.Fatalf("image container docker fields not synced: %+v", got)
}
}
func TestReconcileNormalizesState(t *testing.T) {
st := newTestStore(t)
w := makeWorkload(t, st, "norm", "stack")
fake := &fakeDocker{items: []docker.ReconcileItem{{
ID: "docker-1",
Image: "nginx",
State: "exited",
Labels: map[string]string{
docker.LabelManaged: "true",
docker.LabelWorkloadID: w.ID,
docker.LabelWorkloadKind: "stack",
docker.LabelRole: "web",
},
}}}
r := New(st, fake, 0)
if err := r.ReconcileOnce(context.Background()); err != nil {
t.Fatalf("ReconcileOnce: %v", err)
}
got, _ := st.GetContainerByID(w.ID + ":web")
if got.State != "stopped" {
t.Fatalf("docker 'exited' should normalize to 'stopped', got %q", got.State)
}
}