cba2149aa9
Wraps up the workload refactor with the fixes that came out of the multi-agent code review (see docs/plans/workload-refactor.md "What actually shipped"). Backend: - store.ReconcileContainer: separate write path so the 30s reconciler tick no longer overwrites deployer-owned fields (subdomain, proxy_route_id, npm_proxy_id, image_tag). - Container.stage_id column + index; ListProxyRoutes / ListContainersByStageID join via stage_id (survives stage rename), with legacy fallback to (project_id, role=stage_name). - Reconciler: workload-existence check (rejects forged tinyforge.workload.id labels), skips inventing project-kind rows, child-context cancel before wg.Wait() on shutdown. - Transactional CRUD across projects / stacks / static_sites: parent UPDATE and workload sync land in one transaction so secret rotations are durable. - Webhook routing reads exclusively through workloads.webhook_secret; legacy GetProjectByWebhookSecret / GetStaticSiteByWebhookSecret fallback removed. - store.GetStackByComposeProjectName + indexed lookup (no more full-table stack scan per compose container per tick). - store.ListMissingSweepRows: filtered query for the missing-sweep. - /api/instances/* handlers verify (workload_id, role) match URL (project_id, stage_name) before mutating — closes the cross-project hijack the security review flagged. - extra_json no longer referenced from Go (column kept on disk for now). Frontend: - WorkloadContainers.svelte: generic detail-page panel reusable by stack and site detail pages. - Containers page polish: client-side kind/state filters over an unfiltered fetch, URL-synced filters, race-safe loads via sequence number, EN+RU i18n, sidebar counter via navCounts.containers. Misc: - scripts/dev-server.sh: tolerate empty netstat grep result. - .gitignore: ignore docker-watcher binaries, .claude/worktrees/, .facts-sync.json.
177 lines
5.1 KiB
Go
177 lines
5.1 KiB
Go
package api
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"log/slog"
|
|
"net/http"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"github.com/alexei/tinyforge/internal/events"
|
|
"github.com/alexei/tinyforge/internal/stale"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
)
|
|
|
|
// listStaleContainers handles GET /api/containers/stale.
|
|
func (s *Server) listStaleContainers(w http.ResponseWriter, r *http.Request) {
|
|
if s.staleScanner == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized")
|
|
return
|
|
}
|
|
|
|
staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
|
|
if err != nil {
|
|
slog.Error("failed to find stale containers", "error", err)
|
|
respondError(w, http.StatusInternalServerError, "failed to find stale containers")
|
|
return
|
|
}
|
|
|
|
if staleRows == nil {
|
|
staleRows = []stale.StaleContainer{}
|
|
}
|
|
respondJSON(w, http.StatusOK, staleRows)
|
|
}
|
|
|
|
// cleanupStaleContainer handles POST /api/containers/stale/{id}/cleanup.
|
|
// Stops the Docker container, removes the proxy route, and deletes the
|
|
// container row. {id} is the container row ID.
|
|
func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
|
|
c, err := s.store.GetContainerByID(id)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
respondNotFound(w, "container")
|
|
return
|
|
}
|
|
slog.Error("failed to get container", "id", id, "error", err)
|
|
respondError(w, http.StatusInternalServerError, "failed to get container")
|
|
return
|
|
}
|
|
|
|
if c.State == "removing" {
|
|
respondError(w, http.StatusConflict, "container is already being removed")
|
|
return
|
|
}
|
|
|
|
if err := s.cleanupContainer(r, c); err != nil {
|
|
slog.Error("failed to cleanup container", "id", id, "error", err)
|
|
respondError(w, http.StatusInternalServerError, "failed to cleanup container")
|
|
return
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, map[string]string{"cleaned": id})
|
|
}
|
|
|
|
// bulkCleanupStaleContainers handles POST /api/containers/stale/cleanup.
|
|
func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Request) {
|
|
if s.staleScanner == nil {
|
|
respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized")
|
|
return
|
|
}
|
|
|
|
staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
|
|
if err != nil {
|
|
slog.Error("failed to find stale containers for bulk cleanup", "error", err)
|
|
respondError(w, http.StatusInternalServerError, "failed to find stale containers")
|
|
return
|
|
}
|
|
|
|
var cleaned []string
|
|
var failed []string
|
|
|
|
for _, sc := range staleRows {
|
|
if sc.Container.State == "removing" {
|
|
continue
|
|
}
|
|
if err := s.cleanupContainer(r, sc.Container); err != nil {
|
|
slog.Error("bulk stale cleanup failed",
|
|
"id", sc.Container.ID, "error", err)
|
|
failed = append(failed, sc.Container.ID)
|
|
continue
|
|
}
|
|
cleaned = append(cleaned, sc.Container.ID)
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, map[string]any{
|
|
"cleaned": cleaned,
|
|
"failed": failed,
|
|
})
|
|
}
|
|
|
|
// cleanupContainer stops a Docker container, removes its proxy route,
|
|
// deletes the container row, and emits an event.
|
|
func (s *Server) cleanupContainer(r *http.Request, c store.Container) error {
|
|
ctx := r.Context()
|
|
|
|
if err := s.store.UpdateContainerState(c.ID, "removing"); err != nil {
|
|
slog.Warn("stale cleanup: update state to removing", "id", c.ID, "error", err)
|
|
}
|
|
|
|
if c.ContainerID != "" {
|
|
if err := s.docker.StopContainer(ctx, c.ContainerID, 10); err != nil {
|
|
slog.Warn("stale cleanup: stop container", "container_id", c.ContainerID, "error", err)
|
|
}
|
|
if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
|
|
slog.Warn("stale cleanup: remove container", "container_id", c.ContainerID, "error", err)
|
|
}
|
|
}
|
|
|
|
if c.ProxyRouteID != "" {
|
|
if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
|
|
slog.Warn("stale cleanup: delete proxy route", "route_id", c.ProxyRouteID, "error", err)
|
|
}
|
|
}
|
|
|
|
if err := s.store.DeleteContainer(c.ID); err != nil {
|
|
return err
|
|
}
|
|
|
|
s.emitStaleCleanupEvent(c)
|
|
|
|
return nil
|
|
}
|
|
|
|
// emitStaleCleanupEvent publishes an event when a stale container is cleaned up.
|
|
func (s *Server) emitStaleCleanupEvent(c store.Container) {
|
|
msg := "Stale container cleaned up: " + c.ID + " (tag: " + c.ImageTag + ")"
|
|
|
|
// Use json.Marshal — c.Role is reconciler-derived from a Docker label and
|
|
// could contain quotes / control chars that break a hand-built JSON string.
|
|
metaBytes, err := json.Marshal(map[string]string{
|
|
"container_id": c.ID,
|
|
"workload_id": c.WorkloadID,
|
|
"role": c.Role,
|
|
})
|
|
if err != nil {
|
|
// json.Marshal on a flat string map can only fail in pathological
|
|
// circumstances (memory exhaustion); fall back to an empty object so
|
|
// the event still records.
|
|
metaBytes = []byte(`{}`)
|
|
}
|
|
|
|
evt, err := s.store.InsertEvent(store.EventLog{
|
|
Source: "stale_cleanup",
|
|
Severity: "info",
|
|
Message: msg,
|
|
Metadata: string(metaBytes),
|
|
})
|
|
if err != nil {
|
|
slog.Error("stale cleanup: failed to persist event", "error", err)
|
|
return
|
|
}
|
|
|
|
s.eventBus.Publish(events.Event{
|
|
Type: events.EventLog,
|
|
Payload: events.EventLogPayload{
|
|
ID: evt.ID,
|
|
Source: "stale_cleanup",
|
|
Severity: "info",
|
|
Message: msg,
|
|
Metadata: evt.Metadata,
|
|
CreatedAt: evt.CreatedAt,
|
|
},
|
|
})
|
|
}
|