perf(reconciler): batch workloads per tick, drop redundant image inspect
Load every workload once per tick into a map instead of a per-container GetWorkloadByID (N+1) in the upsert loop plus a second ListWorkloads in the plugin pass: one query per tick, zero GetWorkloadByID. The ListWorkloads error path returns before the missing-sweep so a failed load can't flip live container rows to 'missing'. image.Reconcile is now a no-op: the generic upsert+markMissing pass already syncs every labeled container's state from the single ListAllForReconciler (docker ps -a) snapshot earlier in the same tick, so the former per-container IsContainerRunning loop was N redundant Docker calls/tick. (Its no-op body sits in image.go, which landed with the preceding commit; the tests are here.) compose/static reconcile do non-redundant work and are intentionally untouched. Reviewed: go APPROVE.
This commit is contained in:
@@ -17,6 +17,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -110,17 +111,37 @@ func (r *Reconciler) ReconcileOnce(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Load every workload ONCE per tick and index by ID. This replaces both
|
||||
// the former N+1 GetWorkloadByID (one DB read per container) in the
|
||||
// upsert loop and the second ListWorkloads("") in the plugin pass: net 1
|
||||
// query per tick, 0 GetWorkloadByID.
|
||||
//
|
||||
// On error we return BEFORE the upsert loop and leave state untouched
|
||||
// this tick (the next tick retries). We must NOT proceed with an empty
|
||||
// map and fall through to markMissingRows: with no container resolving,
|
||||
// `seen` would be empty and markMissingRows would flip EVERY live row to
|
||||
// 'missing'. Aborting early is the safe choice.
|
||||
rows, err := r.store.ListWorkloads("")
|
||||
if err != nil {
|
||||
return fmt.Errorf("reconciler: list workloads: %w", err)
|
||||
}
|
||||
byID := make(map[string]store.Workload, len(rows))
|
||||
for _, w := range rows {
|
||||
byID[w.ID] = w
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{}, len(items)) // container row IDs we touched
|
||||
|
||||
for _, item := range items {
|
||||
rowID := r.upsertFromItem(item)
|
||||
rowID := r.upsertFromItem(item, byID)
|
||||
if rowID != "" {
|
||||
seen[rowID] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
r.markMissingRows(seen)
|
||||
r.reconcilePluginWorkloads(ctx)
|
||||
r.reconcilePluginWorkloads(ctx, rows)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -137,15 +158,13 @@ func (r *Reconciler) ReconcileOnce(ctx context.Context) error {
|
||||
//
|
||||
// No-op when the plugin dispatcher hasn't been wired (boot-time race,
|
||||
// disabled deployments, tests).
|
||||
func (r *Reconciler) reconcilePluginWorkloads(ctx context.Context) {
|
||||
//
|
||||
// rows is the workload set already loaded once by ReconcileOnce — passed
|
||||
// through rather than re-queried so a tick costs a single ListWorkloads.
|
||||
func (r *Reconciler) reconcilePluginWorkloads(ctx context.Context, rows []store.Workload) {
|
||||
if r.plugins == nil {
|
||||
return
|
||||
}
|
||||
rows, err := r.store.ListWorkloads("")
|
||||
if err != nil {
|
||||
slog.Warn("reconciler: list workloads for plugin pass", "error", err)
|
||||
return
|
||||
}
|
||||
for _, w := range rows {
|
||||
if w.SourceKind == "" {
|
||||
continue
|
||||
@@ -214,9 +233,9 @@ func (r *Reconciler) loop(ctx context.Context) {
|
||||
// After the hard cutover only the canonical tinyforge.workload.id label
|
||||
// path is honored — every Source plugin labels its containers with the
|
||||
// workload identity at create time.
|
||||
func (r *Reconciler) upsertFromItem(item docker.ReconcileItem) string {
|
||||
func (r *Reconciler) upsertFromItem(item docker.ReconcileItem, byID map[string]store.Workload) string {
|
||||
if id := item.Labels[docker.LabelWorkloadID]; id != "" {
|
||||
return r.upsertByWorkloadLabel(item, id)
|
||||
return r.upsertByWorkloadLabel(item, id, byID)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@@ -233,9 +252,9 @@ func (r *Reconciler) upsertFromItem(item docker.ReconcileItem) string {
|
||||
// known workload row is silently ignored. Anyone with Docker socket access
|
||||
// could otherwise spawn a container with a forged label and steal the
|
||||
// canonical slot for an existing workload.
|
||||
func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string) string {
|
||||
w, err := r.store.GetWorkloadByID(workloadID)
|
||||
if err != nil {
|
||||
func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string, byID map[string]store.Workload) string {
|
||||
w, ok := byID[workloadID]
|
||||
if !ok {
|
||||
// Forged or stale label — log once at debug; tick rate keeps logs quiet.
|
||||
slog.Debug("reconciler: unknown workload_id label", "workload_id", workloadID, "container_id", item.ID)
|
||||
return ""
|
||||
|
||||
Reference in New Issue
Block a user