5c17885197
Load every workload once per tick into a map instead of a per-container GetWorkloadByID (N+1) in the upsert loop plus a second ListWorkloads in the plugin pass: one query per tick, zero GetWorkloadByID. The ListWorkloads error path returns before the missing-sweep so a failed load can't flip live container rows to 'missing'. image.Reconcile is now a no-op: the generic upsert+markMissing pass already syncs every labeled container's state from the single ListAllForReconciler (docker ps -a) snapshot earlier in the same tick, so the former per-container IsContainerRunning loop was N redundant Docker calls/tick. (Its no-op body sits in image.go, which landed with the preceding commit; the tests are here.) compose/static reconcile do non-redundant work and are intentionally untouched. Reviewed: go APPROVE.
418 lines
14 KiB
Go
418 lines
14 KiB
Go
package reconciler
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"github.com/alexei/tinyforge/internal/docker"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
)
|
|
|
|
// fakeDocker is a tiny stand-in for docker.Client. The reconciler depends on
|
|
// the DockerLister interface so we don't need a real daemon for unit tests.
|
|
type fakeDocker struct {
|
|
items []docker.ReconcileItem
|
|
}
|
|
|
|
func (f *fakeDocker) ListAllForReconciler(ctx context.Context) ([]docker.ReconcileItem, error) {
|
|
return f.items, nil
|
|
}
|
|
|
|
func newTestStore(t *testing.T) *store.Store {
|
|
t.Helper()
|
|
s, err := store.New(":memory:")
|
|
if err != nil {
|
|
t.Fatalf("create store: %v", err)
|
|
}
|
|
t.Cleanup(func() { s.Close() })
|
|
return s
|
|
}
|
|
|
|
// makeWorkload inserts a workload row with the given kind so reconciler
|
|
// dispatch can resolve it by ID.
|
|
func makeWorkload(t *testing.T, st *store.Store, name, kind string) store.Workload {
|
|
t.Helper()
|
|
w, err := st.CreateWorkload(store.Workload{
|
|
Kind: kind, RefID: name + "-ref", Name: name,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("CreateWorkload: %v", err)
|
|
}
|
|
return w
|
|
}
|
|
|
|
func TestReconcileWorkloadLabelledStackContainer(t *testing.T) {
|
|
st := newTestStore(t)
|
|
|
|
w := makeWorkload(t, st, "wf-stack", "stack")
|
|
|
|
// One container with the canonical workload labels stamped.
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
|
ID: "docker-abc",
|
|
Name: "wf-stack-web-1",
|
|
Image: "nginx:1.27",
|
|
State: "running",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: w.ID,
|
|
docker.LabelWorkloadKind: "stack",
|
|
docker.LabelRole: "web",
|
|
},
|
|
Ports: []uint16{8080},
|
|
}}}
|
|
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
|
|
rows, _ := st.ListContainersByWorkload(w.ID)
|
|
if len(rows) != 1 {
|
|
t.Fatalf("expected 1 container row, got %d", len(rows))
|
|
}
|
|
got := rows[0]
|
|
if got.ContainerID != "docker-abc" {
|
|
t.Fatalf("container_id not bound: got %q", got.ContainerID)
|
|
}
|
|
if got.Role != "web" || got.WorkloadKind != "stack" {
|
|
t.Fatalf("dispatch wrong: %+v", got)
|
|
}
|
|
if got.State != "running" || got.Port != 8080 {
|
|
t.Fatalf("state/port wrong: %+v", got)
|
|
}
|
|
}
|
|
|
|
func TestReconcileMarksMissingRows(t *testing.T) {
|
|
st := newTestStore(t)
|
|
|
|
w := makeWorkload(t, st, "missing-stack", "stack")
|
|
|
|
// Pre-existing row with a real container_id that no longer exists.
|
|
if err := st.UpsertContainer(store.Container{
|
|
ID: w.ID + ":web", WorkloadID: w.ID, WorkloadKind: "stack",
|
|
Role: "web", ContainerID: "docker-gone", State: "running",
|
|
}); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
|
|
// Reconciler sees nothing.
|
|
r := New(st, &fakeDocker{}, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
|
|
got, _ := st.GetContainerByID(w.ID + ":web")
|
|
if got.State != "missing" {
|
|
t.Fatalf("expected state=missing, got %q", got.State)
|
|
}
|
|
}
|
|
|
|
func TestReconcileSkipsRowsAwaitingDocker(t *testing.T) {
|
|
st := newTestStore(t)
|
|
|
|
w := makeWorkload(t, st, "pending", "stack")
|
|
|
|
// A row with empty container_id (deployer placeholder, awaiting docker
|
|
// create). Reconciler must not mark this as missing.
|
|
if err := st.UpsertContainer(store.Container{
|
|
ID: w.ID + ":web", WorkloadID: w.ID, WorkloadKind: "stack",
|
|
Role: "web", ContainerID: "", State: "starting",
|
|
}); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
|
|
r := New(st, &fakeDocker{}, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
|
|
got, _ := st.GetContainerByID(w.ID + ":web")
|
|
if got.State != "starting" {
|
|
t.Fatalf("placeholder row should keep state, got %q", got.State)
|
|
}
|
|
}
|
|
|
|
func TestReconcileIgnoresUnmanagedContainers(t *testing.T) {
|
|
// A container without the canonical workload label is ignored even if
|
|
// it carries other labels — only tinyforge.workload.id is honored.
|
|
st := newTestStore(t)
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
|
ID: "docker-foreign", Labels: map[string]string{"app": "other"},
|
|
}}}
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
rows, _ := st.ListContainers(store.ContainerFilter{})
|
|
if len(rows) != 0 {
|
|
t.Fatalf("foreign container should not produce rows, got %d", len(rows))
|
|
}
|
|
}
|
|
|
|
// TestReconcileDoesNotClobberDeployerFields guards against the regression where
|
|
// the reconciler's upsert wiped subdomain / proxy_route_id / npm_proxy_id /
|
|
// image_tag / stage_id on every tick because those columns were included in
|
|
// the ON CONFLICT DO UPDATE SET clause but never populated by the reconciler.
|
|
func TestReconcileDoesNotClobberDeployerFields(t *testing.T) {
|
|
st := newTestStore(t)
|
|
|
|
// Project workload — exercises the path most affected by the regression
|
|
// (proxies, blue-green slots, image-tag-based stale detection).
|
|
w := makeWorkload(t, st, "p", "project")
|
|
|
|
// Deployer wrote the row with proxy / subdomain / image_tag / stage_id.
|
|
deployerRow := store.Container{
|
|
ID: "deploy-uuid-1", WorkloadID: w.ID, WorkloadKind: "project",
|
|
Role: "prod", StageID: "stage-prod-id", ContainerID: "docker-aaa",
|
|
ImageRef: "nginx:1.27", ImageTag: "1.27", State: "running", Port: 8080,
|
|
Subdomain: "prod-p", ProxyRouteID: "route-42", NpmProxyID: 7,
|
|
}
|
|
if err := st.UpsertContainer(deployerRow); err != nil {
|
|
t.Fatalf("seed deployer row: %v", err)
|
|
}
|
|
|
|
// Reconciler sees the same docker container — no proxy fields in labels.
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
|
ID: "docker-aaa", Image: "nginx:1.27", State: "running",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: w.ID,
|
|
docker.LabelWorkloadKind: "project",
|
|
docker.LabelRole: "prod",
|
|
},
|
|
Ports: []uint16{8080},
|
|
}}}
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
|
|
got, _ := st.GetContainerByID("deploy-uuid-1")
|
|
if got.Subdomain != "prod-p" {
|
|
t.Fatalf("subdomain wiped: %q", got.Subdomain)
|
|
}
|
|
if got.ProxyRouteID != "route-42" {
|
|
t.Fatalf("proxy_route_id wiped: %q", got.ProxyRouteID)
|
|
}
|
|
if got.NpmProxyID != 7 {
|
|
t.Fatalf("npm_proxy_id wiped: %d", got.NpmProxyID)
|
|
}
|
|
if got.ImageTag != "1.27" {
|
|
t.Fatalf("image_tag wiped: %q", got.ImageTag)
|
|
}
|
|
if got.StageID != "stage-prod-id" {
|
|
t.Fatalf("stage_id wiped: %q", got.StageID)
|
|
}
|
|
}
|
|
|
|
// TestReconcileRejectsForgedWorkloadLabel guards C2 — a Docker container
|
|
// claiming a non-existent workload_id must be ignored, not adopted into the
|
|
// containers index.
|
|
func TestReconcileRejectsForgedWorkloadLabel(t *testing.T) {
|
|
st := newTestStore(t)
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
|
ID: "docker-evil",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: "wl-does-not-exist",
|
|
docker.LabelWorkloadKind: "project",
|
|
docker.LabelRole: "prod",
|
|
},
|
|
}}}
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
rows, _ := st.ListContainers(store.ContainerFilter{})
|
|
if len(rows) != 0 {
|
|
t.Fatalf("forged label should produce no row, got %d", len(rows))
|
|
}
|
|
}
|
|
|
|
// TestReconcileSkipsProjectInsertWithoutDeployerRow guards H3 — the reconciler
|
|
// must not invent a project container row, since the deployer is the
|
|
// authoritative writer and inventing rows races with MaxInstances > 1 deploys.
|
|
func TestReconcileSkipsProjectInsertWithoutDeployerRow(t *testing.T) {
|
|
st := newTestStore(t)
|
|
w := makeWorkload(t, st, "p2", "project")
|
|
|
|
// Reconciler sees a real container with project labels but no deployer
|
|
// row exists yet (race during deploy).
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
|
ID: "docker-race", Image: "nginx", State: "running",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: w.ID,
|
|
docker.LabelWorkloadKind: "project",
|
|
docker.LabelRole: "prod",
|
|
},
|
|
}}}
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
rows, _ := st.ListContainersByWorkload(w.ID)
|
|
if len(rows) != 0 {
|
|
t.Fatalf("project insert without deployer row should be skipped, got %d rows", len(rows))
|
|
}
|
|
}
|
|
|
|
// TestReconcileBatchingPreservesBehavior locks Fix A: loading all workloads
|
|
// once per tick (and resolving labels from that in-memory map instead of an
|
|
// N+1 GetWorkloadByID) must produce the same outcome as the per-container
|
|
// lookup did. With multiple containers across multiple workloads plus a forged
|
|
// label and a stale row, after one ReconcileOnce: known-workload containers
|
|
// are upserted with the snapshot State, the forged-label container is skipped,
|
|
// and the absent stale row is flipped to missing.
|
|
func TestReconcileBatchingPreservesBehavior(t *testing.T) {
|
|
st := newTestStore(t)
|
|
|
|
w1 := makeWorkload(t, st, "batch-a", "stack")
|
|
w2 := makeWorkload(t, st, "batch-b", "stack")
|
|
|
|
// A stale row for w2 whose container is gone — must be marked missing.
|
|
if err := st.UpsertContainer(store.Container{
|
|
ID: w2.ID + ":old", WorkloadID: w2.ID, WorkloadKind: "stack",
|
|
Role: "old", ContainerID: "docker-vanished", State: "running",
|
|
}); err != nil {
|
|
t.Fatalf("seed stale row: %v", err)
|
|
}
|
|
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{
|
|
{
|
|
ID: "docker-a1", Name: "batch-a-web-1", Image: "nginx:1.27", State: "running",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: w1.ID,
|
|
docker.LabelWorkloadKind: "stack",
|
|
docker.LabelRole: "web",
|
|
},
|
|
Ports: []uint16{8080},
|
|
},
|
|
{
|
|
ID: "docker-b1", Name: "batch-b-api-1", Image: "redis:7", State: "exited",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: w2.ID,
|
|
docker.LabelWorkloadKind: "stack",
|
|
docker.LabelRole: "api",
|
|
},
|
|
},
|
|
{
|
|
// Forged label — no such workload. Must be skipped entirely.
|
|
ID: "docker-evil", Name: "evil", Image: "nginx", State: "running",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: "wl-forged",
|
|
docker.LabelWorkloadKind: "stack",
|
|
docker.LabelRole: "web",
|
|
},
|
|
},
|
|
}}
|
|
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
|
|
// w1: one row, bound to docker-a1, running.
|
|
w1Rows, _ := st.ListContainersByWorkload(w1.ID)
|
|
if len(w1Rows) != 1 {
|
|
t.Fatalf("w1: expected 1 row, got %d", len(w1Rows))
|
|
}
|
|
if w1Rows[0].ContainerID != "docker-a1" || w1Rows[0].State != "running" || w1Rows[0].Role != "web" {
|
|
t.Fatalf("w1 row wrong: %+v", w1Rows[0])
|
|
}
|
|
|
|
// w2: the new api container is present (exited→stopped); the stale row is missing.
|
|
api, _ := st.GetContainerByID(w2.ID + ":api")
|
|
if api.ContainerID != "docker-b1" || api.State != "stopped" {
|
|
t.Fatalf("w2 api row wrong: %+v", api)
|
|
}
|
|
old, _ := st.GetContainerByID(w2.ID + ":old")
|
|
if old.State != "missing" {
|
|
t.Fatalf("w2 stale row should be missing, got %q", old.State)
|
|
}
|
|
|
|
// Forged label produced no row anywhere.
|
|
all, _ := st.ListContainers(store.ContainerFilter{})
|
|
for _, c := range all {
|
|
if c.ContainerID == "docker-evil" {
|
|
t.Fatalf("forged-label container was adopted: %+v", c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestReconcileSyncsImageContainerState locks the Fix B coupling: the generic
|
|
// reconciler upsert pass — NOT image.Reconcile — is what syncs an image
|
|
// container's State from the snapshot. An image container carries the
|
|
// workload_id / kind=image / role=image labels at create time, so a present
|
|
// container's row gets its State written here, proving the per-container
|
|
// inspect formerly in image.Reconcile is redundant.
|
|
func TestReconcileSyncsImageContainerState(t *testing.T) {
|
|
st := newTestStore(t)
|
|
w := makeWorkload(t, st, "img", "image")
|
|
|
|
// Deployer pre-created the image container row (running). Docker now
|
|
// reports it exited — the generic pass must sync it to stopped.
|
|
if err := st.UpsertContainer(store.Container{
|
|
ID: "img-deploy-uuid", WorkloadID: w.ID, WorkloadKind: "image",
|
|
Role: "image", ContainerID: "docker-img", State: "running",
|
|
}); err != nil {
|
|
t.Fatalf("seed image row: %v", err)
|
|
}
|
|
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
|
ID: "docker-img", Image: "ghcr.io/owner/app:v1", State: "exited",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: w.ID,
|
|
docker.LabelWorkloadKind: "image",
|
|
docker.LabelRole: "image",
|
|
},
|
|
Ports: []uint16{3000},
|
|
}}}
|
|
|
|
// No plugin reconciler wired — proves the state sync comes from the
|
|
// generic upsert pass, not from image.Reconcile.
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
|
|
got, _ := st.GetContainerByID("img-deploy-uuid")
|
|
if got.State != "stopped" {
|
|
t.Fatalf("image container state not synced by generic pass: got %q want stopped", got.State)
|
|
}
|
|
if got.Port != 3000 || got.ImageRef != "ghcr.io/owner/app:v1" {
|
|
t.Fatalf("image container docker fields not synced: %+v", got)
|
|
}
|
|
}
|
|
|
|
func TestReconcileNormalizesState(t *testing.T) {
|
|
st := newTestStore(t)
|
|
w := makeWorkload(t, st, "norm", "stack")
|
|
|
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
|
ID: "docker-1",
|
|
Image: "nginx",
|
|
State: "exited",
|
|
Labels: map[string]string{
|
|
docker.LabelManaged: "true",
|
|
docker.LabelWorkloadID: w.ID,
|
|
docker.LabelWorkloadKind: "stack",
|
|
docker.LabelRole: "web",
|
|
},
|
|
}}}
|
|
|
|
r := New(st, fake, 0)
|
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
|
t.Fatalf("ReconcileOnce: %v", err)
|
|
}
|
|
|
|
got, _ := st.GetContainerByID(w.ID + ":web")
|
|
if got.State != "stopped" {
|
|
t.Fatalf("docker 'exited' should normalize to 'stopped', got %q", got.State)
|
|
}
|
|
}
|