refactor(workload): finalize containers index + post-review hardening

Wraps up the workload refactor with the fixes that came out of the multi-agent
code review (see docs/plans/workload-refactor.md "What actually shipped").

Backend:
- store.ReconcileContainer: separate write path so the 30s reconciler tick no
  longer overwrites deployer-owned fields (subdomain, proxy_route_id,
  npm_proxy_id, image_tag).
- Container.stage_id column + index; ListProxyRoutes / ListContainersByStageID
  join via stage_id (survives stage rename), with legacy fallback to
  (project_id, role=stage_name).
- Reconciler: workload-existence check (rejects forged tinyforge.workload.id
  labels), skips inventing project-kind rows, child-context cancel before
  wg.Wait() on shutdown.
- Transactional CRUD across projects / stacks / static_sites: parent UPDATE
  and workload sync land in one transaction so secret rotations are durable.
- Webhook routing reads exclusively through workloads.webhook_secret; legacy
  GetProjectByWebhookSecret / GetStaticSiteByWebhookSecret fallback removed.
- store.GetStackByComposeProjectName + indexed lookup (no more full-table
  stack scan per compose container per tick).
- store.ListMissingSweepRows: filtered query for the missing-sweep.
- /api/instances/* handlers verify (workload_id, role) match URL
  (project_id, stage_name) before mutating — closes the cross-project
  hijack the security review flagged.
- extra_json no longer referenced from Go (column kept on disk for now).

Frontend:
- WorkloadContainers.svelte: generic detail-page panel reusable by stack and
  site detail pages.
- Containers page polish: client-side kind/state filters over an unfiltered
  fetch, URL-synced filters, race-safe loads via sequence number, EN+RU i18n,
  sidebar counter via navCounts.containers.

Misc:
- scripts/dev-server.sh: tolerate empty netstat grep result.
- .gitignore: ignore docker-watcher binaries, .claude/worktrees/, .facts-sync.json.
This commit is contained in:
2026-05-09 15:44:41 +03:00
parent d8ab22876f
commit cba2149aa9
30 changed files with 1227 additions and 509 deletions
+135 -55
View File
@@ -9,18 +9,21 @@ import (
"github.com/google/uuid"
)
const containerColumns = `id, workload_id, workload_kind, role, container_id,
// containerColumns is the canonical column list for `containers` queries.
// stage_id is populated by the deployer for project containers (so ListProxyRoutes
// survives stage renames) and left empty for stacks and sites.
const containerColumns = `id, workload_id, workload_kind, role, stage_id, container_id,
image_ref, image_tag, host, state, port,
subdomain, proxy_route_id, npm_proxy_id,
last_seen_at, extra_json, created_at, updated_at`
last_seen_at, created_at, updated_at`
func scanContainer(scanner interface{ Scan(...any) error }) (Container, error) {
var c Container
err := scanner.Scan(
&c.ID, &c.WorkloadID, &c.WorkloadKind, &c.Role, &c.ContainerID,
&c.ID, &c.WorkloadID, &c.WorkloadKind, &c.Role, &c.StageID, &c.ContainerID,
&c.ImageRef, &c.ImageTag, &c.Host, &c.State, &c.Port,
&c.Subdomain, &c.ProxyRouteID, &c.NpmProxyID,
&c.LastSeenAt, &c.ExtraJSON, &c.CreatedAt, &c.UpdatedAt,
&c.LastSeenAt, &c.CreatedAt, &c.UpdatedAt,
)
return c, err
}
@@ -34,19 +37,16 @@ func (s *Store) CreateContainer(c Container) (Container, error) {
if c.Host == "" {
c.Host = "local"
}
if c.ExtraJSON == "" {
c.ExtraJSON = "{}"
}
c.CreatedAt = Now()
c.UpdatedAt = c.CreatedAt
_, err := s.db.Exec(
`INSERT INTO containers (`+containerColumns+`)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
c.ID, c.WorkloadID, c.WorkloadKind, c.Role, c.ContainerID,
c.ID, c.WorkloadID, c.WorkloadKind, c.Role, c.StageID, c.ContainerID,
c.ImageRef, c.ImageTag, c.Host, c.State, c.Port,
c.Subdomain, c.ProxyRouteID, c.NpmProxyID,
c.LastSeenAt, c.ExtraJSON, c.CreatedAt, c.UpdatedAt,
c.LastSeenAt, c.CreatedAt, c.UpdatedAt,
)
if err != nil {
return Container{}, fmt.Errorf("insert container: %w", err)
@@ -54,9 +54,12 @@ func (s *Store) CreateContainer(c Container) (Container, error) {
return c, nil
}
// UpsertContainer is the reconciler's primary write path. It updates an
// existing row (matched by ID) or inserts a new one. Caller is responsible
// for setting ID — use container_id-based lookup before calling this.
// UpsertContainer is the full-write path used by the deployer paths
// (stack manager, static-site manager) that own all fields of a row. Inserts
// if missing, replaces every column on conflict. The reconciler must NOT call
// this — it would clobber deployer-written subdomain / proxy_route_id /
// npm_proxy_id / image_tag with the empty values it doesn't know about. Use
// ReconcileContainer instead.
func (s *Store) UpsertContainer(c Container) error {
if c.ID == "" {
return fmt.Errorf("UpsertContainer: ID is required")
@@ -64,9 +67,6 @@ func (s *Store) UpsertContainer(c Container) error {
if c.Host == "" {
c.Host = "local"
}
if c.ExtraJSON == "" {
c.ExtraJSON = "{}"
}
c.UpdatedAt = Now()
if c.CreatedAt == "" {
c.CreatedAt = c.UpdatedAt
@@ -80,6 +80,7 @@ func (s *Store) UpsertContainer(c Container) error {
workload_id=excluded.workload_id,
workload_kind=excluded.workload_kind,
role=excluded.role,
stage_id=excluded.stage_id,
container_id=excluded.container_id,
image_ref=excluded.image_ref,
image_tag=excluded.image_tag,
@@ -90,12 +91,11 @@ func (s *Store) UpsertContainer(c Container) error {
proxy_route_id=excluded.proxy_route_id,
npm_proxy_id=excluded.npm_proxy_id,
last_seen_at=excluded.last_seen_at,
extra_json=excluded.extra_json,
updated_at=excluded.updated_at`,
c.ID, c.WorkloadID, c.WorkloadKind, c.Role, c.ContainerID,
c.ID, c.WorkloadID, c.WorkloadKind, c.Role, c.StageID, c.ContainerID,
c.ImageRef, c.ImageTag, c.Host, c.State, c.Port,
c.Subdomain, c.ProxyRouteID, c.NpmProxyID,
c.LastSeenAt, c.ExtraJSON, c.CreatedAt, c.UpdatedAt,
c.LastSeenAt, c.CreatedAt, c.UpdatedAt,
)
if err != nil {
return fmt.Errorf("upsert container: %w", err)
@@ -103,6 +103,44 @@ func (s *Store) UpsertContainer(c Container) error {
return nil
}
// ReconcileContainer is the reconciler's write path. INSERTs a new row when
// none exists (with all label-derived metadata) and on conflict updates ONLY
// the Docker-derived fields the reconciler can observe — never touching
// subdomain / proxy_route_id / npm_proxy_id / image_tag / stage_id, which are
// owned by the deployer paths and would be wiped to empty if included.
func (s *Store) ReconcileContainer(c Container) error {
if c.ID == "" {
return fmt.Errorf("ReconcileContainer: ID is required")
}
if c.Host == "" {
c.Host = "local"
}
c.UpdatedAt = Now()
if c.CreatedAt == "" {
c.CreatedAt = c.UpdatedAt
}
_, err := s.db.Exec(
`INSERT INTO containers (`+containerColumns+`)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
container_id=excluded.container_id,
image_ref=excluded.image_ref,
state=excluded.state,
port=excluded.port,
last_seen_at=excluded.last_seen_at,
updated_at=excluded.updated_at`,
c.ID, c.WorkloadID, c.WorkloadKind, c.Role, c.StageID, c.ContainerID,
c.ImageRef, c.ImageTag, c.Host, c.State, c.Port,
c.Subdomain, c.ProxyRouteID, c.NpmProxyID,
c.LastSeenAt, c.CreatedAt, c.UpdatedAt,
)
if err != nil {
return fmt.Errorf("reconcile container: %w", err)
}
return nil
}
// GetContainerByID returns a single container row.
func (s *Store) GetContainerByID(id string) (Container, error) {
c, err := scanContainer(s.db.QueryRow(
@@ -136,9 +174,8 @@ func (s *Store) GetContainerByDockerID(dockerID string) (Container, error) {
}
// ListProxyRoutes returns proxy-enabled project containers joined with
// project + stage names. Reads from the normalized containers index. Stage
// ID is resolved through a (project_id, role=stage_name) join, which is
// uniquely indexed via UNIQUE(project_id, name) on stages.
// project + stage names. Reads from the normalized containers index and
// joins through stage_id so a stage rename does not orphan the row's view.
//
// Source is reported as "instance" for back-compat with the Proxies page
// filter (the frontend keys off the literal string).
@@ -150,7 +187,7 @@ func (s *Store) ListProxyRoutes(domain string) ([]ProxyRoute, error) {
FROM containers c
JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project'
JOIN projects p ON p.id = w.ref_id
JOIN stages s ON s.project_id = p.id AND s.name = c.role
JOIN stages s ON s.id = c.stage_id OR (c.stage_id = '' AND s.project_id = p.id AND s.name = c.role)
WHERE c.subdomain != '' AND (c.proxy_route_id != '' OR c.npm_proxy_id > 0)
ORDER BY p.name, s.name, c.created_at DESC`,
)
@@ -179,17 +216,23 @@ func (s *Store) ListProxyRoutes(domain string) ([]ProxyRoute, error) {
}
// ListContainersByStageID returns project containers for the given stage,
// newest first. Resolves stage → project_id → workload(kind=project) →
// containers with role = stage.name. Replaces GetInstancesByStageID for
// callers in the deployer / API layer.
// newest first. Resolves via stage_id with a fallback to the legacy
// (stage.name = container.role) join for rows written before the stage_id
// column was populated. Replaces GetInstancesByStageID.
func (s *Store) ListContainersByStageID(stageID string) ([]Container, error) {
rows, err := s.db.Query(`
SELECT `+prefixCols(containerColumns, "c.")+`
FROM containers c
JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project'
JOIN stages s ON s.project_id = w.ref_id AND s.name = c.role
WHERE s.id = ?
ORDER BY c.created_at DESC`, stageID)
LEFT JOIN stages s ON s.id = ?
WHERE c.stage_id = ?
OR (c.stage_id = '' AND s.id IS NOT NULL
AND c.role = s.name
AND EXISTS (
SELECT 1 FROM workloads w
WHERE w.id = c.workload_id
AND w.kind = 'project'
AND w.ref_id = s.project_id))
ORDER BY c.created_at DESC`, stageID, stageID)
if err != nil {
return nil, fmt.Errorf("query containers by stage: %w", err)
}
@@ -244,6 +287,11 @@ func (s *Store) ListContainers(f ContainerFilter) ([]Container, error) {
where []string
args []any
)
needsAppJoin := f.AppID != ""
if needsAppJoin {
where = append(where, "w.app_id = ?")
args = append(args, f.AppID)
}
if f.WorkloadID != "" {
where = append(where, "c.workload_id = ?")
args = append(args, f.WorkloadID)
@@ -256,23 +304,15 @@ func (s *Store) ListContainers(f ContainerFilter) ([]Container, error) {
where = append(where, "c.state = ?")
args = append(args, f.State)
}
var query string
if f.AppID != "" {
query = `SELECT ` + prefixCols(containerColumns, "c.") + `
FROM containers c JOIN workloads w ON w.id = c.workload_id
WHERE w.app_id = ?`
args = append([]any{f.AppID}, args...)
if len(where) > 0 {
query += " AND " + strings.Join(where, " AND ")
}
query += " ORDER BY c.created_at DESC"
} else {
query = `SELECT ` + prefixCols(containerColumns, "c.") + ` FROM containers c`
if len(where) > 0 {
query += " WHERE " + strings.Join(where, " AND ")
}
query += " ORDER BY c.created_at DESC"
query := `SELECT ` + prefixCols(containerColumns, "c.") + ` FROM containers c`
if needsAppJoin {
query += ` JOIN workloads w ON w.id = c.workload_id`
}
if len(where) > 0 {
query += " WHERE " + strings.Join(where, " AND ")
}
query += " ORDER BY c.created_at DESC"
rows, err := s.db.Query(query, args...)
if err != nil {
@@ -295,24 +335,24 @@ func (s *Store) ListContainers(f ContainerFilter) ([]Container, error) {
// Use this from the deployer when proxy / subdomain assignments change.
func (s *Store) UpdateContainer(c Container) error {
c.UpdatedAt = Now()
if c.ExtraJSON == "" {
c.ExtraJSON = "{}"
}
result, err := s.db.Exec(
`UPDATE containers SET workload_id=?, workload_kind=?, role=?, container_id=?,
`UPDATE containers SET workload_id=?, workload_kind=?, role=?, stage_id=?, container_id=?,
image_ref=?, image_tag=?, host=?, state=?, port=?,
subdomain=?, proxy_route_id=?, npm_proxy_id=?,
last_seen_at=?, extra_json=?, updated_at=?
last_seen_at=?, updated_at=?
WHERE id=?`,
c.WorkloadID, c.WorkloadKind, c.Role, c.ContainerID,
c.WorkloadID, c.WorkloadKind, c.Role, c.StageID, c.ContainerID,
c.ImageRef, c.ImageTag, c.Host, c.State, c.Port,
c.Subdomain, c.ProxyRouteID, c.NpmProxyID,
c.LastSeenAt, c.ExtraJSON, c.UpdatedAt, c.ID,
c.LastSeenAt, c.UpdatedAt, c.ID,
)
if err != nil {
return fmt.Errorf("update container: %w", err)
}
n, _ := result.RowsAffected()
n, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("rows affected: %w", err)
}
if n == 0 {
return fmt.Errorf("container %s: %w", c.ID, ErrNotFound)
}
@@ -330,7 +370,10 @@ func (s *Store) UpdateContainerState(id, state string) error {
if err != nil {
return fmt.Errorf("update container state: %w", err)
}
n, _ := result.RowsAffected()
n, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("rows affected: %w", err)
}
if n == 0 {
return fmt.Errorf("container %s: %w", id, ErrNotFound)
}
@@ -350,7 +393,10 @@ func (s *Store) DeleteContainer(id string) error {
if err != nil {
return fmt.Errorf("delete container: %w", err)
}
n, _ := result.RowsAffected()
n, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("rows affected: %w", err)
}
if n == 0 {
return fmt.Errorf("container %s: %w", id, ErrNotFound)
}
@@ -367,6 +413,40 @@ func (s *Store) DeleteContainersByWorkload(workloadID string) error {
return nil
}
// ListMissingSweepRows returns rows the reconciler must consider for the
// missing-state sweep — bound to a real Docker container and not already
// flipped to 'missing'. Used in place of a full ListContainers scan to keep
// the per-tick query proportional to the live set.
func (s *Store) ListMissingSweepRows() ([]struct {
ID string
ContainerID string
}, error) {
rows, err := s.db.Query(
`SELECT id, container_id FROM containers
WHERE container_id != '' AND state != 'missing'`,
)
if err != nil {
return nil, fmt.Errorf("query missing-sweep rows: %w", err)
}
defer rows.Close()
out := []struct {
ID string
ContainerID string
}{}
for rows.Next() {
var r struct {
ID string
ContainerID string
}
if err := rows.Scan(&r.ID, &r.ContainerID); err != nil {
return nil, fmt.Errorf("scan missing-sweep row: %w", err)
}
out = append(out, r)
}
return out, rows.Err()
}
// prefixCols rewrites a comma-separated column list to use a table alias prefix.
// Used by ListContainers when joining containers (alias `c`) to workloads.
func prefixCols(cols, prefix string) string {