refactor(workload): finalize containers index + post-review hardening

Wraps up the workload refactor with the fixes that came out of the multi-agent
code review (see docs/plans/workload-refactor.md "What actually shipped").

Backend:
- store.ReconcileContainer: separate write path so the 30s reconciler tick no
  longer overwrites deployer-owned fields (subdomain, proxy_route_id,
  npm_proxy_id, image_tag).
- Container.stage_id column + index; ListProxyRoutes / ListContainersByStageID
  join via stage_id (survives stage rename), with legacy fallback to
  (project_id, role=stage_name).
- Reconciler: workload-existence check (rejects forged tinyforge.workload.id
  labels), skips inventing project-kind rows, child-context cancel before
  wg.Wait() on shutdown.
- Transactional CRUD across projects / stacks / static_sites: parent UPDATE
  and workload sync land in one transaction so secret rotations are durable.
- Webhook routing reads exclusively through workloads.webhook_secret; legacy
  GetProjectByWebhookSecret / GetStaticSiteByWebhookSecret fallback removed.
- store.GetStackByComposeProjectName + indexed lookup (no more full-table
  stack scan per compose container per tick).
- store.ListMissingSweepRows: filtered query for the missing-sweep.
- /api/instances/* handlers verify (workload_id, role) match URL
  (project_id, stage_name) before mutating — closes the cross-project
  hijack the security review flagged.
- extra_json no longer referenced from Go (column kept on disk for now).

Frontend:
- WorkloadContainers.svelte: generic detail-page panel reusable by stack and
  site detail pages.
- Containers page polish: client-side kind/state filters over an unfiltered
  fetch, URL-synced filters, race-safe loads via sequence number, EN+RU i18n,
  sidebar counter via navCounts.containers.

Misc:
- scripts/dev-server.sh: tolerate empty netstat grep result.
- .gitignore: ignore docker-watcher binaries, .claude/worktrees/, .facts-sync.json.
This commit is contained in:
2026-05-09 15:44:41 +03:00
parent d8ab22876f
commit cba2149aa9
30 changed files with 1227 additions and 509 deletions
+68 -20
View File
@@ -119,19 +119,14 @@ func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) {
// removeInstance handles DELETE /api/projects/{id}/stages/{stage}/instances/{iid}.
// {iid} is the container row ID (same UUID as the legacy instance ID).
// Verifies that the container belongs to the project + stage in the URL —
// without this check, a stale URL could delete an unrelated stack/site row.
func (s *Server) removeInstance(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "iid")
c, err := s.store.GetContainerByID(id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "container")
return
}
slog.Error("failed to get container", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error")
c, ok := s.resolveAndAuthorizeInstance(w, r)
if !ok {
return
}
id := c.ID
// Remove the Docker container if it has one.
if c.ContainerID != "" {
@@ -171,19 +166,14 @@ func (s *Server) restartInstance(w http.ResponseWriter, r *http.Request) {
}
// controlInstance performs a stop/start/restart action on a container.
// The container's ownership of the URL-provided project + stage is verified
// before any Docker call — see resolveAndAuthorizeInstance for rationale.
func (s *Server) controlInstance(w http.ResponseWriter, r *http.Request, action string) {
id := chi.URLParam(r, "iid")
c, err := s.store.GetContainerByID(id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "container")
return
}
slog.Error("failed to get container", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error")
c, ok := s.resolveAndAuthorizeInstance(w, r)
if !ok {
return
}
id := c.ID
if c.ContainerID == "" {
respondError(w, http.StatusBadRequest, "container row has no docker container bound")
@@ -231,3 +221,61 @@ type DeployTriggerer interface {
TriggerDeploy(ctx context.Context, projectID, stageID, imageTag string) error
AsyncTriggerDeploy(ctx context.Context, projectID, stageID, imageTag string) (string, error)
}
// resolveAndAuthorizeInstance loads the container row identified by {iid} and
// verifies it actually belongs to the project + stage in the URL path.
// Without this, a stale or hand-crafted URL like
//
// DELETE /api/projects/<projectA>/stages/<stageA>/instances/<rowOfStackB>
//
// would happily delete an unrelated stack/site container — admin-only doesn't
// excuse the cross-project bypass. Returns the container on success or
// nothing (with the response already written) on failure.
func (s *Server) resolveAndAuthorizeInstance(w http.ResponseWriter, r *http.Request) (store.Container, bool) {
projectID := chi.URLParam(r, "id")
stageName := ""
if stageID := chi.URLParam(r, "stage"); stageID != "" {
st, err := s.store.GetStageByID(stageID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "stage")
return store.Container{}, false
}
slog.Error("failed to get stage", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error")
return store.Container{}, false
}
if st.ProjectID != projectID {
respondNotFound(w, "stage")
return store.Container{}, false
}
stageName = st.Name
}
id := chi.URLParam(r, "iid")
c, err := s.store.GetContainerByID(id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "container")
return store.Container{}, false
}
slog.Error("failed to get container", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error")
return store.Container{}, false
}
w2, err := s.store.GetWorkloadByRef(store.WorkloadKindProject, projectID)
if err != nil {
respondNotFound(w, "container")
return store.Container{}, false
}
if c.WorkloadID != w2.ID {
respondNotFound(w, "container")
return store.Container{}, false
}
if stageName != "" && c.Role != stageName {
respondNotFound(w, "container")
return store.Container{}, false
}
return c, true
}
+16 -1
View File
@@ -1,6 +1,7 @@
package api
import (
"encoding/json"
"errors"
"log/slog"
"net/http"
@@ -136,11 +137,25 @@ func (s *Server) cleanupContainer(r *http.Request, c store.Container) error {
func (s *Server) emitStaleCleanupEvent(c store.Container) {
msg := "Stale container cleaned up: " + c.ID + " (tag: " + c.ImageTag + ")"
// Use json.Marshal — c.Role is reconciler-derived from a Docker label and
// could contain quotes / control chars that break a hand-built JSON string.
metaBytes, err := json.Marshal(map[string]string{
"container_id": c.ID,
"workload_id": c.WorkloadID,
"role": c.Role,
})
if err != nil {
// json.Marshal on a flat string map can only fail in pathological
// circumstances (memory exhaustion); fall back to an empty object so
// the event still records.
metaBytes = []byte(`{}`)
}
evt, err := s.store.InsertEvent(store.EventLog{
Source: "stale_cleanup",
Severity: "info",
Message: msg,
Metadata: `{"container_id":"` + c.ID + `","workload_id":"` + c.WorkloadID + `","role":"` + c.Role + `"}`,
Metadata: string(metaBytes),
})
if err != nil {
slog.Error("stale cleanup: failed to persist event", "error", err)