refactor(workload): extract Instance entirely; Container is canonical
Build / build (push) Successful in 10m41s

End-to-end extraction of the Instance concept. After this commit:

  * internal/store/instances.go — DELETED
  * internal/store/models.go — Instance struct gone, ProxyRoute moved here
  * containers table is the single source of truth for project/stack/site
    container state. instances table is dropped via DROP TABLE migration
    (idempotent; re-runnable on every boot).
  * Legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id
    Docker labels are no longer emitted; only tinyforge.workload.{id,kind},
    tinyforge.role, and tinyforge.managed are stamped on new containers.

Backend rewrites:
  - internal/deployer:        executeDeploy + blueGreenDeploy + rollback +
                              promote use store.Container natively. New
                              removeContainer() replaces removeInstance().
                              enforceMaxInstances reads via
                              ListContainersByStageID.
  - internal/reconciler:      legacy tinyforge.instance-id dispatch removed;
                              upsertByWorkloadLabel now finds existing rows
                              by docker container ID first and falls back to
                              the deterministic workloadID:role key.
  - internal/stale/scanner:   Scan + new FindStaleContainers walk the
                              containers table; emit StaleContainer JSON.
  - internal/stats/collector: ListContainers replaces ListAllInstances.
  - internal/webhook/handler: workload-secret lookup tried first; falls back
                              to project / static_site secret column.
  - internal/api: instances.go, stale.go, stats.go, stats_history.go,
                  projects.go, settings.go, docker.go, dns.go all read /
                  write through Container.

Docker layer:
  - ManagedContainer exposes WorkloadID/Kind/Role from the canonical labels.
  - ListContainers filters by tinyforge.managed=true.
  - Network creation uses LabelManaged instead of LabelProject.

Frontend:
  - Instance type is now a Container alias; .status → .state,
    .last_alive_at → .last_seen_at.
  - InstanceCard takes stageId as a prop (no longer derived from Instance).
  - StaleContainer JSON shape rewritten: { container, workload_name, role,
    days_stale }. StaleContainerCard + /containers/stale page updated.
  - ProjectCard / homepage / SystemHealthCard filter by .state.

The migration loop now tolerates "no such table" alongside "duplicate
column" / "already exists" so obsolete ALTER TABLE entries targeting the
dropped instances table no-op cleanly on first boot.

Tests: store + deployer + reconciler + webhook + staticsite + notify all
still pass. Frontend svelte-check: zero errors.
This commit is contained in:
2026-05-09 14:43:12 +03:00
parent d516462750
commit d8ab22876f
32 changed files with 649 additions and 957 deletions
+9 -9
View File
@@ -204,9 +204,9 @@ func (s *Server) buildConsumerNameMap() map[string]string {
for _, p := range projects { for _, p := range projects {
stages, _ := s.store.GetStagesByProjectID(p.ID) stages, _ := s.store.GetStagesByProjectID(p.ID)
for _, st := range stages { for _, st := range stages {
instances, _ := s.store.GetInstancesByStageID(st.ID) rows, _ := s.store.ListContainersByStageID(st.ID)
for _, inst := range instances { for _, c := range rows {
names["instance:"+inst.ID] = p.Name + "/" + st.Name + ":" + inst.ImageTag names["instance:"+c.ID] = p.Name + "/" + st.Name + ":" + c.ImageTag
} }
} }
} }
@@ -362,15 +362,15 @@ func (s *Server) computeExpectedFQDNs(settings store.Settings) (map[string]strin
if !st.EnableProxy { if !st.EnableProxy {
continue continue
} }
instances, err := s.store.GetInstancesByStageID(st.ID) rows, err := s.store.ListContainersByStageID(st.ID)
if err != nil { if err != nil {
slog.Warn("dns: failed to get instances", "stage_id", st.ID, "error", err) slog.Warn("dns: failed to get containers", "stage_id", st.ID, "error", err)
continue continue
} }
for _, inst := range instances { for _, c := range rows {
if inst.NpmProxyID > 0 && inst.Subdomain != "" && inst.Status == "running" { if c.NpmProxyID > 0 && c.Subdomain != "" && c.State == "running" {
fqdn := inst.Subdomain + "." + settings.Domain fqdn := c.Subdomain + "." + settings.Domain
expected[fqdn] = "instance:" + inst.ID expected[fqdn] = "instance:" + c.ID
} }
} }
} }
+22 -15
View File
@@ -69,39 +69,46 @@ func (s *Server) listProjectImages(w http.ResponseWriter, r *http.Request) {
} }
// streamContainerLogs handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/logs. // streamContainerLogs handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/logs.
// Streams container logs via SSE. Supports query params: // Streams container logs via SSE. {iid} is the container row ID. Ownership is
// - tail: number of lines from end (default "200") // verified by joining through workload + stage so an attacker cannot stream
// - follow: "true" to stream new lines in real-time // logs for a foreign container by guessing IDs under the wrong project URL.
func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) { func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
projectID := chi.URLParam(r, "id") projectID := chi.URLParam(r, "id")
stageID := chi.URLParam(r, "stage") stageID := chi.URLParam(r, "stage")
instanceID := chi.URLParam(r, "iid") containerRowID := chi.URLParam(r, "iid")
inst, err := s.store.GetInstanceByID(instanceID) c, err := s.store.GetContainerByID(containerRowID)
if err != nil { if err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance") respondNotFound(w, "container")
return return
} }
slog.Error("failed to get instance", "error", err) slog.Error("failed to get container", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error") respondError(w, http.StatusInternalServerError, "internal server error")
return return
} }
// Verify the instance actually belongs to the project/stage in the path. wl, err := s.store.GetWorkloadByID(c.WorkloadID)
// Without this, a user could stream logs for any instance ID by guessing if err != nil {
// it under the wrong project — defence-in-depth for future per-project ACLs. respondNotFound(w, "container")
if inst.ProjectID != projectID || inst.StageID != stageID { return
respondNotFound(w, "instance") }
stage, err := s.store.GetStageByID(stageID)
if err != nil || stage.ProjectID != projectID {
respondNotFound(w, "container")
return
}
if wl.Kind != string(store.WorkloadKindProject) || wl.RefID != projectID || c.Role != stage.Name {
respondNotFound(w, "container")
return return
} }
if inst.ContainerID == "" { if c.ContainerID == "" {
respondError(w, http.StatusBadRequest, "instance has no container") respondError(w, http.StatusBadRequest, "container row has no docker container bound")
return return
} }
s.streamLogsForContainer(w, r, inst.ContainerID) s.streamLogsForContainer(w, r, c.ContainerID)
} }
// streamLogsForContainer streams logs for an arbitrary container ID using the // streamLogsForContainer streams logs for an arbitrary container ID using the
+54 -58
View File
@@ -13,10 +13,13 @@ import (
) )
// listInstances handles GET /api/projects/{id}/stages/{stage}/instances. // listInstances handles GET /api/projects/{id}/stages/{stage}/instances.
// Reads the normalized container index — the legacy `instances` table is gone.
// JSON shape stays Container-shaped (id, container_id, image_tag, subdomain,
// state, port, etc.), so the frontend type may show some renamed fields
// (status→state, last_alive_at→last_seen_at).
func (s *Server) listInstances(w http.ResponseWriter, r *http.Request) { func (s *Server) listInstances(w http.ResponseWriter, r *http.Request) {
stageID := chi.URLParam(r, "stage") stageID := chi.URLParam(r, "stage")
// Verify stage exists.
if _, err := s.store.GetStageByID(stageID); err != nil { if _, err := s.store.GetStageByID(stageID); err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "stage") respondNotFound(w, "stage")
@@ -27,34 +30,36 @@ func (s *Server) listInstances(w http.ResponseWriter, r *http.Request) {
return return
} }
instances, err := s.store.GetInstancesByStageID(stageID) containers, err := s.store.ListContainersByStageID(stageID)
if err != nil { if err != nil {
slog.Error("failed to list instances", "error", err) slog.Error("failed to list containers", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error") respondError(w, http.StatusInternalServerError, "internal server error")
return return
} }
// Reconcile instance statuses with Docker's actual state. // Reconcile container state with Docker's actual state — covers the
// case where a container was killed externally between deployer writes
// and the next reconciler tick.
ctx := r.Context() ctx := r.Context()
for i, inst := range instances { for i, c := range containers {
if inst.ContainerID == "" || inst.Status == "removing" { if c.ContainerID == "" || c.State == "removing" {
continue continue
} }
running, err := s.docker.IsContainerRunning(ctx, inst.ContainerID) running, err := s.docker.IsContainerRunning(ctx, c.ContainerID)
if err != nil { if err != nil {
continue // Docker unreachable, keep stored status. continue
} }
actualStatus := "stopped" actual := "stopped"
if running { if running {
actualStatus = "running" actual = "running"
} }
if inst.Status != actualStatus { if c.State != actual {
instances[i].Status = actualStatus containers[i].State = actual
_ = s.store.UpdateInstanceStatus(inst.ID, actualStatus) _ = s.store.UpdateContainerState(c.ID, actual)
} }
} }
respondJSON(w, http.StatusOK, instances) respondJSON(w, http.StatusOK, containers)
} }
// deployRequest is the expected JSON body for triggering a deploy. // deployRequest is the expected JSON body for triggering a deploy.
@@ -62,12 +67,11 @@ type deployRequest struct {
ImageTag string `json:"image_tag"` ImageTag string `json:"image_tag"`
} }
// deployInstance handles POST /api/projects/{id}/stages/{stage}/instances (trigger deploy). // deployInstance handles POST /api/projects/{id}/stages/{stage}/instances.
func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) { func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) {
projectID := chi.URLParam(r, "id") projectID := chi.URLParam(r, "id")
stageID := chi.URLParam(r, "stage") stageID := chi.URLParam(r, "stage")
// Verify project exists.
if _, err := s.store.GetProjectByID(projectID); err != nil { if _, err := s.store.GetProjectByID(projectID); err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "project") respondNotFound(w, "project")
@@ -78,7 +82,6 @@ func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) {
return return
} }
// Verify stage exists.
if _, err := s.store.GetStageByID(stageID); err != nil { if _, err := s.store.GetStageByID(stageID); err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "stage") respondNotFound(w, "stage")
@@ -115,40 +118,41 @@ func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) {
} }
// removeInstance handles DELETE /api/projects/{id}/stages/{stage}/instances/{iid}. // removeInstance handles DELETE /api/projects/{id}/stages/{stage}/instances/{iid}.
// {iid} is the container row ID (same UUID as the legacy instance ID).
func (s *Server) removeInstance(w http.ResponseWriter, r *http.Request) { func (s *Server) removeInstance(w http.ResponseWriter, r *http.Request) {
instanceID := chi.URLParam(r, "iid") id := chi.URLParam(r, "iid")
inst, err := s.store.GetInstanceByID(instanceID) c, err := s.store.GetContainerByID(id)
if err != nil { if err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance") respondNotFound(w, "container")
return return
} }
slog.Error("failed to get instance", "error", err) slog.Error("failed to get container", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error") respondError(w, http.StatusInternalServerError, "internal server error")
return return
} }
// Remove the Docker container if it has one. // Remove the Docker container if it has one.
if inst.ContainerID != "" { if c.ContainerID != "" {
if err := s.docker.RemoveContainer(r.Context(), inst.ContainerID, true); err != nil { if err := s.docker.RemoveContainer(r.Context(), c.ContainerID, true); err != nil {
slog.Error("remove container", "container_id", inst.ContainerID, "error", err) slog.Error("remove container", "container_id", c.ContainerID, "error", err)
} }
} }
// Delete proxy route if it has one. // Delete proxy route if it has one.
if inst.ProxyRouteID != "" { if c.ProxyRouteID != "" {
if err := s.proxyProvider.DeleteRoute(r.Context(), inst.ProxyRouteID); err != nil { if err := s.proxyProvider.DeleteRoute(r.Context(), c.ProxyRouteID); err != nil {
slog.Warn("delete proxy route on instance removal", "route_id", inst.ProxyRouteID, "error", err) slog.Warn("delete proxy route on container removal", "route_id", c.ProxyRouteID, "error", err)
} }
} }
// Delete instance record. // Delete container row.
if err := s.store.DeleteInstance(instanceID); err != nil { if err := s.store.DeleteContainer(id); err != nil {
respondError(w, http.StatusInternalServerError, "failed to delete instance") respondError(w, http.StatusInternalServerError, "failed to delete container")
return return
} }
respondJSON(w, http.StatusOK, map[string]string{"deleted": instanceID}) respondJSON(w, http.StatusOK, map[string]string{"deleted": id})
} }
// stopInstance handles POST /api/projects/{id}/stages/{stage}/instances/{iid}/stop. // stopInstance handles POST /api/projects/{id}/stages/{stage}/instances/{iid}/stop.
@@ -166,67 +170,59 @@ func (s *Server) restartInstance(w http.ResponseWriter, r *http.Request) {
s.controlInstance(w, r, "restart") s.controlInstance(w, r, "restart")
} }
// controlInstance performs a stop/start/restart action on an instance's container. // controlInstance performs a stop/start/restart action on a container.
func (s *Server) controlInstance(w http.ResponseWriter, r *http.Request, action string) { func (s *Server) controlInstance(w http.ResponseWriter, r *http.Request, action string) {
instanceID := chi.URLParam(r, "iid") id := chi.URLParam(r, "iid")
inst, err := s.store.GetInstanceByID(instanceID) c, err := s.store.GetContainerByID(id)
if err != nil { if err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance") respondNotFound(w, "container")
return return
} }
slog.Error("failed to get instance", "error", err) slog.Error("failed to get container", "error", err)
respondError(w, http.StatusInternalServerError, "internal server error") respondError(w, http.StatusInternalServerError, "internal server error")
return return
} }
if inst.ContainerID == "" { if c.ContainerID == "" {
respondError(w, http.StatusBadRequest, "instance has no container") respondError(w, http.StatusBadRequest, "container row has no docker container bound")
return return
} }
ctx := r.Context() ctx := r.Context()
var controlErr error var controlErr error
var newStatus string var newState string
switch action { switch action {
case "stop": case "stop":
controlErr = s.docker.StopContainer(ctx, inst.ContainerID, 10) controlErr = s.docker.StopContainer(ctx, c.ContainerID, 10)
newStatus = "stopped" newState = "stopped"
case "start": case "start":
controlErr = s.docker.StartContainer(ctx, inst.ContainerID) controlErr = s.docker.StartContainer(ctx, c.ContainerID)
newStatus = "running" newState = "running"
case "restart": case "restart":
controlErr = s.docker.RestartContainer(ctx, inst.ContainerID, 10) controlErr = s.docker.RestartContainer(ctx, c.ContainerID, 10)
newStatus = "running" newState = "running"
default: default:
respondError(w, http.StatusBadRequest, fmt.Sprintf("unknown action: %s", action)) respondError(w, http.StatusBadRequest, fmt.Sprintf("unknown action: %s", action))
return return
} }
if controlErr != nil { if controlErr != nil {
slog.Error("failed to control instance", "action", action, "instance_id", instanceID, "error", controlErr) slog.Error("failed to control container", "action", action, "id", id, "error", controlErr)
respondError(w, http.StatusInternalServerError, "internal server error") respondError(w, http.StatusInternalServerError, "internal server error")
return return
} }
// Update status in store. if err := s.store.UpdateContainerState(id, newState); err != nil {
if err := s.store.UpdateInstanceStatus(instanceID, newStatus); err != nil { slog.Error("update container state", "id", id, "state", newState, "error", err)
slog.Error("update instance status", "instance_id", instanceID, "status", newStatus, "error", err)
}
// Track last_alive_at when container becomes running.
if newStatus == "running" {
if err := s.store.UpdateLastAliveAt(instanceID); err != nil {
slog.Error("update last_alive_at", "instance_id", instanceID, "error", err)
}
} }
respondJSON(w, http.StatusOK, map[string]string{ respondJSON(w, http.StatusOK, map[string]string{
"instance_id": instanceID, "instance_id": id,
"action": action, "action": action,
"status": newStatus, "status": newState,
}) })
} }
+8 -8
View File
@@ -188,16 +188,16 @@ func (s *Server) deleteProject(w http.ResponseWriter, r *http.Request) {
ctx := r.Context() ctx := r.Context()
stages, _ := s.store.GetStagesByProjectID(id) stages, _ := s.store.GetStagesByProjectID(id)
for _, stage := range stages { for _, stage := range stages {
instances, _ := s.store.GetInstancesByStageID(stage.ID) rows, _ := s.store.ListContainersByStageID(stage.ID)
for _, inst := range instances { for _, c := range rows {
if inst.ContainerID != "" { if c.ContainerID != "" {
if err := s.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil { if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
slog.Warn("delete project: remove container", "container", inst.ContainerID, "error", err) slog.Warn("delete project: remove container", "container", c.ContainerID, "error", err)
} }
} }
if inst.ProxyRouteID != "" { if c.ProxyRouteID != "" {
if err := s.proxyProvider.DeleteRoute(ctx, inst.ProxyRouteID); err != nil { if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
slog.Warn("delete project: delete proxy route", "route", inst.ProxyRouteID, "error", err) slog.Warn("delete project: delete proxy route", "route", c.ProxyRouteID, "error", err)
} }
} }
} }
+11 -12
View File
@@ -466,14 +466,14 @@ func (s *Server) resyncAllProxies(oldSettings, newSettings store.Settings) {
// Step 2: If new provider is "none", clear all proxy route IDs and we're done. // Step 2: If new provider is "none", clear all proxy route IDs and we're done.
if newSettings.ProxyProvider == "none" { if newSettings.ProxyProvider == "none" {
for _, route := range routes { for _, route := range routes {
inst, err := s.store.GetInstanceByID(route.InstanceID) c, err := s.store.GetContainerByID(route.InstanceID)
if err != nil { if err != nil {
continue continue
} }
inst.ProxyRouteID = "" c.ProxyRouteID = ""
inst.NpmProxyID = 0 c.NpmProxyID = 0
if err := s.store.UpdateInstance(inst); err != nil { if err := s.store.UpdateContainer(c); err != nil {
slog.Warn("proxy resync: clear route ID", "instance", route.InstanceID, "error", err) slog.Warn("proxy resync: clear route ID", "container", route.InstanceID, "error", err)
} }
} }
slog.Info("proxy resync: cleared all proxy routes (provider set to none)", "count", len(routes)) slog.Info("proxy resync: cleared all proxy routes (provider set to none)", "count", len(routes))
@@ -501,18 +501,17 @@ func (s *Server) resyncAllProxies(oldSettings, newSettings store.Settings) {
continue continue
} }
// Update instance with new route ID. // Update container row with new route ID.
inst, err := s.store.GetInstanceByID(route.InstanceID) c, err := s.store.GetContainerByID(route.InstanceID)
if err != nil { if err != nil {
continue continue
} }
inst.ProxyRouteID = routeID c.ProxyRouteID = routeID
if domainChanged { if domainChanged {
// Subdomain stays the same, but the FQDN in external systems changed. slog.Info("proxy resync: domain updated", "container", route.InstanceID, "domain", fqdn)
slog.Info("proxy resync: domain updated", "instance", route.InstanceID, "domain", fqdn)
} }
if err := s.store.UpdateInstance(inst); err != nil { if err := s.store.UpdateContainer(c); err != nil {
slog.Warn("proxy resync: update instance", "instance", route.InstanceID, "error", err) slog.Warn("proxy resync: update container", "container", route.InstanceID, "error", err)
} }
updated++ updated++
} }
+42 -48
View File
@@ -19,59 +19,58 @@ func (s *Server) listStaleContainers(w http.ResponseWriter, r *http.Request) {
return return
} }
staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
if err != nil { if err != nil {
slog.Error("failed to find stale containers", "error", err) slog.Error("failed to find stale containers", "error", err)
respondError(w, http.StatusInternalServerError, "failed to find stale containers") respondError(w, http.StatusInternalServerError, "failed to find stale containers")
return return
} }
if staleInstances == nil { if staleRows == nil {
staleInstances = []stale.StaleInstance{} staleRows = []stale.StaleContainer{}
} }
respondJSON(w, http.StatusOK, staleInstances) respondJSON(w, http.StatusOK, staleRows)
} }
// cleanupStaleContainer handles POST /api/containers/stale/{id}/cleanup. // cleanupStaleContainer handles POST /api/containers/stale/{id}/cleanup.
// Stops the Docker container, removes the NPM proxy, and deletes the instance from the store. // Stops the Docker container, removes the proxy route, and deletes the
// container row. {id} is the container row ID.
func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) { func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) {
instanceID := chi.URLParam(r, "id") id := chi.URLParam(r, "id")
inst, err := s.store.GetInstanceByID(instanceID) c, err := s.store.GetContainerByID(id)
if err != nil { if err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance") respondNotFound(w, "container")
return return
} }
slog.Error("failed to get instance", "instance_id", instanceID, "error", err) slog.Error("failed to get container", "id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get instance") respondError(w, http.StatusInternalServerError, "failed to get container")
return return
} }
// Don't remove instances already being cleaned up. if c.State == "removing" {
if inst.Status == "removing" { respondError(w, http.StatusConflict, "container is already being removed")
respondError(w, http.StatusConflict, "instance is already being removed")
return return
} }
if err := s.cleanupInstance(r, inst); err != nil { if err := s.cleanupContainer(r, c); err != nil {
slog.Error("failed to cleanup instance", "instance_id", instanceID, "error", err) slog.Error("failed to cleanup container", "id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to cleanup instance") respondError(w, http.StatusInternalServerError, "failed to cleanup container")
return return
} }
respondJSON(w, http.StatusOK, map[string]string{"cleaned": instanceID}) respondJSON(w, http.StatusOK, map[string]string{"cleaned": id})
} }
// bulkCleanupStaleContainers handles POST /api/containers/stale/cleanup. // bulkCleanupStaleContainers handles POST /api/containers/stale/cleanup.
// Cleans up all currently stale containers.
func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Request) { func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Request) {
if s.staleScanner == nil { if s.staleScanner == nil {
respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized") respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized")
return return
} }
staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
if err != nil { if err != nil {
slog.Error("failed to find stale containers for bulk cleanup", "error", err) slog.Error("failed to find stale containers for bulk cleanup", "error", err)
respondError(w, http.StatusInternalServerError, "failed to find stale containers") respondError(w, http.StatusInternalServerError, "failed to find stale containers")
@@ -81,17 +80,17 @@ func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Reque
var cleaned []string var cleaned []string
var failed []string var failed []string
for _, si := range staleInstances { for _, sc := range staleRows {
if si.Instance.Status == "removing" { if sc.Container.State == "removing" {
continue continue
} }
if err := s.cleanupInstance(r, si.Instance); err != nil { if err := s.cleanupContainer(r, sc.Container); err != nil {
slog.Error("bulk stale cleanup failed", slog.Error("bulk stale cleanup failed",
"instance_id", si.Instance.ID, "error", err) "id", sc.Container.ID, "error", err)
failed = append(failed, si.Instance.ID) failed = append(failed, sc.Container.ID)
continue continue
} }
cleaned = append(cleaned, si.Instance.ID) cleaned = append(cleaned, sc.Container.ID)
} }
respondJSON(w, http.StatusOK, map[string]any{ respondJSON(w, http.StatusOK, map[string]any{
@@ -100,53 +99,48 @@ func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Reque
}) })
} }
// cleanupInstance stops a Docker container, removes the NPM proxy, deletes // cleanupContainer stops a Docker container, removes its proxy route,
// the store record, and emits an event. // deletes the container row, and emits an event.
func (s *Server) cleanupInstance(r *http.Request, inst store.Instance) error { func (s *Server) cleanupContainer(r *http.Request, c store.Container) error {
ctx := r.Context() ctx := r.Context()
// Mark as removing. if err := s.store.UpdateContainerState(c.ID, "removing"); err != nil {
if err := s.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil { slog.Warn("stale cleanup: update state to removing", "id", c.ID, "error", err)
slog.Warn("stale cleanup: update status to removing", "instance_id", inst.ID, "error", err)
} }
// Stop and remove Docker container. if c.ContainerID != "" {
if inst.ContainerID != "" { if err := s.docker.StopContainer(ctx, c.ContainerID, 10); err != nil {
if err := s.docker.StopContainer(ctx, inst.ContainerID, 10); err != nil { slog.Warn("stale cleanup: stop container", "container_id", c.ContainerID, "error", err)
slog.Warn("stale cleanup: stop container", "container_id", inst.ContainerID, "error", err)
} }
if err := s.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil { if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
slog.Warn("stale cleanup: remove container", "container_id", inst.ContainerID, "error", err) slog.Warn("stale cleanup: remove container", "container_id", c.ContainerID, "error", err)
} }
} }
// Delete proxy route if present. if c.ProxyRouteID != "" {
if inst.ProxyRouteID != "" { if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
if err := s.proxyProvider.DeleteRoute(ctx, inst.ProxyRouteID); err != nil { slog.Warn("stale cleanup: delete proxy route", "route_id", c.ProxyRouteID, "error", err)
slog.Warn("stale cleanup: delete proxy route", "route_id", inst.ProxyRouteID, "error", err)
} }
} }
// Delete instance record. if err := s.store.DeleteContainer(c.ID); err != nil {
if err := s.store.DeleteInstance(inst.ID); err != nil {
return err return err
} }
// Emit cleanup event. s.emitStaleCleanupEvent(c)
s.emitStaleCleanupEvent(inst)
return nil return nil
} }
// emitStaleCleanupEvent publishes an event when a stale container is cleaned up. // emitStaleCleanupEvent publishes an event when a stale container is cleaned up.
func (s *Server) emitStaleCleanupEvent(inst store.Instance) { func (s *Server) emitStaleCleanupEvent(c store.Container) {
msg := "Stale container cleaned up: " + inst.ID + " (tag: " + inst.ImageTag + ")" msg := "Stale container cleaned up: " + c.ID + " (tag: " + c.ImageTag + ")"
evt, err := s.store.InsertEvent(store.EventLog{ evt, err := s.store.InsertEvent(store.EventLog{
Source: "stale_cleanup", Source: "stale_cleanup",
Severity: "info", Severity: "info",
Message: msg, Message: msg,
Metadata: `{"instance_id":"` + inst.ID + `","project_id":"` + inst.ProjectID + `","stage_id":"` + inst.StageID + `"}`, Metadata: `{"container_id":"` + c.ID + `","workload_id":"` + c.WorkloadID + `","role":"` + c.Role + `"}`,
}) })
if err != nil { if err != nil {
slog.Error("stale cleanup: failed to persist event", "error", err) slog.Error("stale cleanup: failed to persist event", "error", err)
+10 -10
View File
@@ -11,29 +11,29 @@ import (
) )
// getInstanceStats handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats. // getInstanceStats handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats.
// Returns CPU and memory stats for the container backing the given instance. // {iid} is the container row ID (same UUID as the legacy instance ID).
func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) { func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) {
instanceID := chi.URLParam(r, "iid") id := chi.URLParam(r, "iid")
inst, err := s.store.GetInstanceByID(instanceID) c, err := s.store.GetContainerByID(id)
if err != nil { if err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance") respondNotFound(w, "container")
return return
} }
slog.Error("failed to get instance", "instance_id", instanceID, "error", err) slog.Error("failed to get container", "id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get instance") respondError(w, http.StatusInternalServerError, "failed to get container")
return return
} }
if inst.ContainerID == "" { if c.ContainerID == "" {
respondError(w, http.StatusBadRequest, "instance has no container") respondError(w, http.StatusBadRequest, "container row has no docker container bound")
return return
} }
stats, err := s.docker.GetContainerStats(r.Context(), inst.ContainerID) stats, err := s.docker.GetContainerStats(r.Context(), c.ContainerID)
if err != nil { if err != nil {
slog.Error("failed to get container stats", "container_id", inst.ContainerID, "error", err) slog.Error("failed to get container stats", "container_id", c.ContainerID, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get container stats") respondError(w, http.StatusInternalServerError, "failed to get container stats")
return return
} }
+17 -16
View File
@@ -91,15 +91,16 @@ func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) {
} }
// getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history. // getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history.
// {iid} is the container row ID (same UUID as the legacy instance ID).
func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) { func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) {
instanceID := chi.URLParam(r, "iid") instanceID := chi.URLParam(r, "iid")
if _, err := s.store.GetInstanceByID(instanceID); err != nil { if _, err := s.store.GetContainerByID(instanceID); err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance") respondNotFound(w, "container")
return return
} }
slog.Error("failed to get instance", "instance_id", instanceID, "error", err) slog.Error("failed to get container", "id", instanceID, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get instance") respondError(w, http.StatusInternalServerError, "failed to get container")
return return
} }
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r))) samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r)))
@@ -279,25 +280,25 @@ func (s *Server) enrichWithOwnerNames(samples []store.ContainerStatsSample) []To
return out return out
} }
// lookupInstanceName returns "project/stage" for an instance, or empty on // lookupInstanceName returns "workload/role" for a container row, or empty
// any lookup error so a transient miss does not break the response. // on any lookup error so a transient miss does not break the response.
func (s *Server) lookupInstanceName(instanceID string) string { func (s *Server) lookupInstanceName(instanceID string) string {
inst, err := s.store.GetInstanceByID(instanceID) c, err := s.store.GetContainerByID(instanceID)
if err != nil { if err != nil {
return "" return ""
} }
project, perr := s.store.GetProjectByID(inst.ProjectID) w, err := s.store.GetWorkloadByID(c.WorkloadID)
stage, serr := s.store.GetStageByID(inst.StageID) if err != nil {
switch { if c.Role != "" {
case perr == nil && serr == nil: return c.Role
return project.Name + "/" + stage.Name
case perr == nil:
return project.Name
case serr == nil:
return stage.Name
} }
return "" return ""
} }
if c.Role != "" {
return w.Name + "/" + c.Role
}
return w.Name
}
// lookupSiteName returns the site's display name or empty on lookup error. // lookupSiteName returns the site's display name or empty on lookup error.
func (s *Server) lookupSiteName(siteID string) string { func (s *Server) lookupSiteName(siteID string) string {
+33 -36
View File
@@ -25,17 +25,17 @@ func (d *Deployer) blueGreenDeploy(
deployID string, deployID string,
imageTag string, imageTag string,
) (string, string, string, error) { ) (string, string, string, error) {
// Find existing running instance for this stage (the "blue" instance). // Find existing running container for this stage (the "blue" container).
existingInstances, err := d.store.GetInstancesByStageID(stage.ID) existing, err := d.store.ListContainersByStageID(stage.ID)
if err != nil { if err != nil {
return "", "", "", fmt.Errorf("get existing instances: %w", err) return "", "", "", fmt.Errorf("get existing containers: %w", err)
} }
var blueInstance *store.Instance var blueContainer *store.Container
for _, inst := range existingInstances { for _, c := range existing {
if inst.Status == "running" { if c.State == "running" {
instCopy := inst cCopy := c
blueInstance = &instCopy blueContainer = &cCopy
break break
} }
} }
@@ -84,9 +84,6 @@ func (d *Deployer) blueGreenDeploy(
ExposedPorts: []string{portStr}, ExposedPorts: []string{portStr},
NetworkName: settings.Network, NetworkName: settings.Network,
NetworkID: networkID, NetworkID: networkID,
Project: project.Name,
Stage: stage.Name,
InstanceID: instanceID,
WorkloadID: workloadID, WorkloadID: workloadID,
WorkloadKind: string(store.WorkloadKindProject), WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name, Role: stage.Name,
@@ -114,25 +111,27 @@ func (d *Deployer) blueGreenDeploy(
return "", "", instanceID, fmt.Errorf("create container: %w", err) return "", "", instanceID, fmt.Errorf("create container: %w", err)
} }
// Create instance record. // Create container row.
inst, err := d.store.CreateInstanceWithID(store.Instance{ row, err := d.store.CreateContainer(store.Container{
ID: instanceID, ID: instanceID,
StageID: stage.ID, WorkloadID: workloadID,
ProjectID: project.ID, WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name,
ContainerID: containerID, ContainerID: containerID,
ImageRef: project.Image + ":" + imageTag,
ImageTag: imageTag, ImageTag: imageTag,
Subdomain: subdomain, Host: "local",
Status: "stopped", State: "stopped",
Port: project.Port, Port: project.Port,
Subdomain: subdomain,
}) })
if err != nil { if err != nil {
return containerID, "", instanceID, fmt.Errorf("create instance record: %w", err) return containerID, "", instanceID, fmt.Errorf("create container row: %w", err)
} }
instanceID = inst.ID instanceID = row.ID
d.upsertContainerForInstance(project, stage, inst, workloadID)
if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil { if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil {
slog.Warn("link deploy to instance", "error", err) slog.Warn("link deploy to container", "error", err)
} }
d.logDeploy(deployID, fmt.Sprintf("Blue-green: starting green container %s", containerName), "info") d.logDeploy(deployID, fmt.Sprintf("Blue-green: starting green container %s", containerName), "info")
@@ -140,11 +139,10 @@ func (d *Deployer) blueGreenDeploy(
return containerID, "", instanceID, fmt.Errorf("start container: %w", err) return containerID, "", instanceID, fmt.Errorf("start container: %w", err)
} }
if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil { if err := d.store.UpdateContainerState(instanceID, "running"); err != nil {
slog.Warn("update instance status", "error", err) slog.Warn("update container state", "error", err)
} }
inst.Status = "running" row.State = "running"
d.upsertContainerForInstance(project, stage, inst, workloadID)
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running") d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
// Step 4: Health check the green container. // Step 4: Health check the green container.
@@ -181,30 +179,29 @@ func (d *Deployer) blueGreenDeploy(
return containerID, "", instanceID, fmt.Errorf("configure proxy: %w", err) return containerID, "", instanceID, fmt.Errorf("configure proxy: %w", err)
} }
inst.ProxyRouteID = proxyRouteID row.ProxyRouteID = proxyRouteID
d.logDeploy(deployID, "Blue-green: proxy swapped to green container", "info") d.logDeploy(deployID, "Blue-green: proxy swapped to green container", "info")
// Create/update DNS record for the green instance. // Create/update DNS record for the green container.
fqdn := subdomain + "." + settings.Domain fqdn := subdomain + "." + settings.Domain
d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID) d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID)
} else { } else {
d.logDeploy(deployID, "Blue-green: proxy skipped (disabled for this stage)", "info") d.logDeploy(deployID, "Blue-green: proxy skipped (disabled for this stage)", "info")
} }
inst.Subdomain = subdomain row.Subdomain = subdomain
if err := d.store.UpdateInstance(inst); err != nil { if err := d.store.UpdateContainer(row); err != nil {
slog.Warn("update instance with proxy ID", "error", err) slog.Warn("update container with proxy ID", "error", err)
} }
d.upsertContainerForInstance(project, stage, inst, workloadID)
// Step 6: Stop the blue container. // Step 6: Stop the blue container.
if blueInstance != nil { if blueContainer != nil {
d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue instance %s (tag: %s)", blueInstance.ID, blueInstance.ImageTag), "info") d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue container %s (tag: %s)", blueContainer.ID, blueContainer.ImageTag), "info")
if err := d.removeInstance(ctx, *blueInstance, settings); err != nil { if err := d.removeContainer(ctx, *blueContainer, settings); err != nil {
// Non-fatal: log but continue. Green is already serving traffic. // Non-fatal: log but continue. Green is already serving traffic.
d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue instance: %v", err), "warn") d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue container: %v", err), "warn")
} else { } else {
d.logDeploy(deployID, "Blue-green: blue instance removed", "info") d.logDeploy(deployID, "Blue-green: blue container removed", "info")
} }
} }
+62 -99
View File
@@ -376,9 +376,6 @@ func (d *Deployer) executeDeploy(
ExposedPorts: []string{portStr}, ExposedPorts: []string{portStr},
NetworkName: settings.Network, NetworkName: settings.Network,
NetworkID: networkID, NetworkID: networkID,
Project: project.Name,
Stage: stage.Name,
InstanceID: instanceID,
WorkloadID: workloadID, WorkloadID: workloadID,
WorkloadKind: string(store.WorkloadKindProject), WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name, Role: stage.Name,
@@ -407,26 +404,32 @@ func (d *Deployer) executeDeploy(
} }
d.logDeploy(deployID, fmt.Sprintf("Container created (ID: %s)", truncateID(containerID)), "info") d.logDeploy(deployID, fmt.Sprintf("Container created (ID: %s)", truncateID(containerID)), "info")
// Create instance record in store with the pre-generated ID. // Create container row with the pre-generated ID. The deployer is the
inst, err := d.store.CreateInstanceWithID(store.Instance{ // authoritative writer until the next reconciler tick — it's important
// the row exists before StartContainer so a fast tick doesn't see an
// orphan and mark it missing.
row, err := d.store.CreateContainer(store.Container{
ID: instanceID, ID: instanceID,
StageID: stage.ID, WorkloadID: workloadID,
ProjectID: project.ID, WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name,
ContainerID: containerID, ContainerID: containerID,
ImageRef: project.Image + ":" + imageTag,
ImageTag: imageTag, ImageTag: imageTag,
Subdomain: subdomain, Host: "local",
Status: "stopped", State: "stopped",
Port: project.Port, Port: project.Port,
Subdomain: subdomain,
}) })
if err != nil { if err != nil {
return containerID, proxyRouteID, instanceID, fmt.Errorf("create instance record: %w", err) return containerID, proxyRouteID, instanceID, fmt.Errorf("create container row: %w", err)
} }
instanceID = inst.ID instanceID = row.ID
d.upsertContainerForInstance(project, stage, inst, workloadID)
// Link deploy to instance. // Link deploy to container row (the existing Deploy.InstanceID column
// stores the row ID — same value as before, just a renamed concept).
if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil { if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil {
slog.Warn("link deploy to instance", "error", err) slog.Warn("link deploy to container", "error", err)
} }
d.logDeploy(deployID, fmt.Sprintf("Starting container %s", containerName), "info") d.logDeploy(deployID, fmt.Sprintf("Starting container %s", containerName), "info")
@@ -434,15 +437,11 @@ func (d *Deployer) executeDeploy(
return containerID, proxyRouteID, instanceID, fmt.Errorf("start container: %w", err) return containerID, proxyRouteID, instanceID, fmt.Errorf("start container: %w", err)
} }
if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil { if err := d.store.UpdateContainerState(instanceID, "running"); err != nil {
slog.Warn("update instance status to running", "error", err) slog.Warn("update container state to running", "error", err)
} }
if err := d.store.UpdateLastAliveAt(instanceID); err != nil { row.State = "running"
slog.Warn("update last_alive_at on deploy", "instance_id", instanceID, "error", err) row.LastSeenAt = store.Now()
}
inst.Status = "running"
inst.LastAliveAt = store.Now()
d.upsertContainerForInstance(project, stage, inst, workloadID)
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running") d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
d.logDeploy(deployID, "Container started", "info") d.logDeploy(deployID, "Container started", "info")
@@ -463,24 +462,22 @@ func (d *Deployer) executeDeploy(
return containerID, proxyRouteID, instanceID, fmt.Errorf("configure proxy: %w", err) return containerID, proxyRouteID, instanceID, fmt.Errorf("configure proxy: %w", err)
} }
// Update instance with proxy route ID. // Update container row with proxy route ID.
inst.ProxyRouteID = proxyRouteID row.ProxyRouteID = proxyRouteID
inst.Subdomain = subdomain row.Subdomain = subdomain
if err := d.store.UpdateInstance(inst); err != nil { if err := d.store.UpdateContainer(row); err != nil {
slog.Warn("update instance with proxy ID", "error", err) slog.Warn("update container with proxy ID", "error", err)
} }
d.upsertContainerForInstance(project, stage, inst, workloadID)
// Create DNS record for this instance. // Create DNS record for this container.
fqdn := subdomain + "." + settings.Domain fqdn := subdomain + "." + settings.Domain
d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID) d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID)
} else { } else {
d.logDeploy(deployID, "Proxy creation skipped (disabled for this stage)", "info") d.logDeploy(deployID, "Proxy creation skipped (disabled for this stage)", "info")
inst.Subdomain = subdomain row.Subdomain = subdomain
if err := d.store.UpdateInstance(inst); err != nil { if err := d.store.UpdateContainer(row); err != nil {
slog.Warn("update instance", "error", err) slog.Warn("update container", "error", err)
} }
d.upsertContainerForInstance(project, stage, inst, workloadID)
} }
// Step 5: Health check. // Step 5: Health check.
@@ -554,27 +551,27 @@ func (d *Deployer) configureProxy(
return routeID, nil return routeID, nil
} }
// enforceMaxInstances removes the oldest instances when the stage has reached its limit. // enforceMaxInstances removes the oldest container rows when the stage has
// This makes room for the new deployment. // reached its instance limit, making room for the new deploy.
func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, deployID string, settings store.Settings) error { func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, deployID string, settings store.Settings) error {
if stage.MaxInstances <= 0 { if stage.MaxInstances <= 0 {
return nil return nil
} }
instances, err := d.store.GetInstancesByStageID(stage.ID) containers, err := d.store.ListContainersByStageID(stage.ID)
if err != nil { if err != nil {
return fmt.Errorf("get instances for stage: %w", err) return fmt.Errorf("get containers for stage: %w", err)
} }
// Filter to running/stopped instances (not already failed/removing). // Filter to running/stopped containers (not already failed/removing).
var active []store.Instance var active []store.Container
for _, inst := range instances { for _, c := range containers {
if inst.Status == "running" || inst.Status == "stopped" { if c.State == "running" || c.State == "stopped" {
active = append(active, inst) active = append(active, c)
} }
} }
// We need room for one more instance, so remove oldest when at limit. // We need room for one more container, so remove the oldest when at limit.
removeCount := len(active) - stage.MaxInstances + 1 removeCount := len(active) - stage.MaxInstances + 1
if removeCount <= 0 { if removeCount <= 0 {
return nil return nil
@@ -586,57 +583,50 @@ func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, d
}) })
for i := 0; i < removeCount && i < len(active); i++ { for i := 0; i < removeCount && i < len(active); i++ {
inst := active[i] c := active[i]
d.logDeploy(deployID, fmt.Sprintf("Removing oldest instance %s (tag: %s) to enforce max_instances=%d", inst.ID, inst.ImageTag, stage.MaxInstances), "info") d.logDeploy(deployID, fmt.Sprintf("Removing oldest container %s (tag: %s) to enforce max_instances=%d", c.ID, c.ImageTag, stage.MaxInstances), "info")
if err := d.removeInstance(ctx, inst, settings); err != nil { if err := d.removeContainer(ctx, c, settings); err != nil {
d.logDeploy(deployID, fmt.Sprintf("Failed to remove instance %s: %v", inst.ID, err), "warn") d.logDeploy(deployID, fmt.Sprintf("Failed to remove container %s: %v", c.ID, err), "warn")
continue continue
} }
d.logDeploy(deployID, fmt.Sprintf("Removed instance %s", inst.ID), "info") d.logDeploy(deployID, fmt.Sprintf("Removed container %s", c.ID), "info")
} }
return nil return nil
} }
// removeInstance stops and removes a container, deletes its NPM proxy host, // removeContainer stops + removes the Docker container, deletes its proxy
// and removes the instance record from the store. // route, drops the DNS record, and removes the container row from the store.
func (d *Deployer) removeInstance(ctx context.Context, inst store.Instance, settings store.Settings) error { func (d *Deployer) removeContainer(ctx context.Context, c store.Container, settings store.Settings) error {
// Mark as removing. // Mark as removing.
if err := d.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil { if err := d.store.UpdateContainerState(c.ID, "removing"); err != nil {
slog.Warn("update instance status to removing", "instance_id", inst.ID, "error", err) slog.Warn("update container state to removing", "id", c.ID, "error", err)
} }
// Remove Docker container. // Remove Docker container.
if inst.ContainerID != "" { if c.ContainerID != "" {
if err := d.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil { if err := d.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
slog.Warn("remove container", "container_id", inst.ContainerID, "error", err) slog.Warn("remove docker container", "container_id", c.ContainerID, "error", err)
} }
} }
// Delete proxy route. // Delete proxy route.
if inst.ProxyRouteID != "" { if c.ProxyRouteID != "" {
if err := d.proxy.DeleteRoute(ctx, inst.ProxyRouteID); err != nil { if err := d.proxy.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
slog.Warn("delete proxy route", "route_id", inst.ProxyRouteID, "error", err) slog.Warn("delete proxy route", "route_id", c.ProxyRouteID, "error", err)
} }
// Remove DNS record for this instance. // Remove DNS record.
if inst.Subdomain != "" && settings.Domain != "" { if c.Subdomain != "" && settings.Domain != "" {
fqdn := inst.Subdomain + "." + settings.Domain fqdn := c.Subdomain + "." + settings.Domain
d.removeDNS(ctx, fqdn, "") d.removeDNS(ctx, fqdn, "")
} }
} }
// Delete instance record. // Drop the container row.
if err := d.store.DeleteInstance(inst.ID); err != nil { if err := d.store.DeleteContainer(c.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
return fmt.Errorf("delete instance record: %w", err) return fmt.Errorf("delete container row: %w", err)
}
// Drop the matching container index row. ID matches instance.ID by
// construction; ignore NotFound which is harmless if the row predates
// this refactor.
if err := d.store.DeleteContainer(inst.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("delete container row", "instance_id", inst.ID, "error", err)
} }
return nil return nil
@@ -903,33 +893,6 @@ func truncateID(id string) string {
return id return id
} }
// upsertContainerForInstance keeps the normalized containers index in sync
// with the project-specific instance row. Same UUID is used for both rows so
// the reconciler can find them later. Best-effort: a sync failure is logged
// but does not abort the deploy — the container is still running and the
// reconciler will pick it up on the next tick (once that lands).
func (d *Deployer) upsertContainerForInstance(project store.Project, stage store.Stage, inst store.Instance, workloadID string) {
c := store.Container{
ID: inst.ID,
WorkloadID: workloadID,
WorkloadKind: string(store.WorkloadKindProject),
Role: stage.Name,
ContainerID: inst.ContainerID,
ImageRef: project.Image + ":" + inst.ImageTag,
ImageTag: inst.ImageTag,
Host: "local",
State: inst.Status,
Port: inst.Port,
Subdomain: inst.Subdomain,
ProxyRouteID: inst.ProxyRouteID,
NpmProxyID: inst.NpmProxyID,
LastSeenAt: inst.LastAliveAt,
}
if err := d.store.UpsertContainer(c); err != nil {
slog.Warn("upsert container row", "instance_id", inst.ID, "error", err)
}
}
// resolveProjectWorkloadID returns the workload ID paired with a project. // resolveProjectWorkloadID returns the workload ID paired with a project.
// Backfill-on-boot guarantees the row exists, so this is essentially a lookup. // Backfill-on-boot guarantees the row exists, so this is essentially a lookup.
// On miss (defensive), it logs and returns empty so the caller can decide. // On miss (defensive), it logs and returns empty so the caller can decide.
+4 -4
View File
@@ -34,13 +34,13 @@ func (d *Deployer) validatePromoteFrom(stage store.Stage, imageTag string) error
} }
// Check if the tag is running in the source stage. // Check if the tag is running in the source stage.
instances, err := d.store.GetInstancesByStageID(sourceStage.ID) containers, err := d.store.ListContainersByStageID(sourceStage.ID)
if err != nil { if err != nil {
return fmt.Errorf("get instances for source stage: %w", err) return fmt.Errorf("get containers for source stage: %w", err)
} }
for _, inst := range instances { for _, c := range containers {
if inst.ImageTag == imageTag && (inst.Status == "running" || inst.Status == "stopped") { if c.ImageTag == imageTag && (c.State == "running" || c.State == "stopped") {
return nil // Tag found in source stage, promotion is allowed. return nil // Tag found in source stage, promotion is allowed.
} }
} }
+8 -7
View File
@@ -32,24 +32,25 @@ func (d *Deployer) rollback(ctx context.Context, deployID string, containerID st
} }
} }
// Clean up DNS record if the instance had a subdomain. // Clean up DNS record if the container had a subdomain. instanceID is
// the container row ID (same UUID either way) — read from containers.
if instanceID != "" { if instanceID != "" {
inst, err := d.store.GetInstanceByID(instanceID) c, err := d.store.GetContainerByID(instanceID)
if err == nil && inst.Subdomain != "" { if err == nil && c.Subdomain != "" {
settings, settingsErr := d.store.GetSettings() settings, settingsErr := d.store.GetSettings()
if settingsErr != nil { if settingsErr != nil {
slog.Warn("rollback: failed to get settings for DNS cleanup", "error", settingsErr) slog.Warn("rollback: failed to get settings for DNS cleanup", "error", settingsErr)
} else if settings.Domain != "" { } else if settings.Domain != "" {
fqdn := inst.Subdomain + "." + settings.Domain fqdn := c.Subdomain + "." + settings.Domain
d.removeDNS(ctx, fqdn, deployID) d.removeDNS(ctx, fqdn, deployID)
} }
} }
} }
// Update instance status to failed if it was created. // Mark the container row as failed if it was created.
if instanceID != "" { if instanceID != "" {
if err := d.store.UpdateInstanceStatus(instanceID, "failed"); err != nil { if err := d.store.UpdateContainerState(instanceID, "failed"); err != nil {
slog.Warn("rollback: update instance status", "instance_id", instanceID, "error", err) slog.Warn("rollback: update container state", "id", instanceID, "error", err)
} }
} }
+3 -10
View File
@@ -9,17 +9,10 @@ import (
// Labels applied to all containers managed by Tinyforge. // Labels applied to all containers managed by Tinyforge.
// //
// Workload-shaped labels (LabelWorkloadID, LabelWorkloadKind, LabelRole, // The legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id
// LabelManaged) are the canonical set going forward and what the reconciler // labels were removed in the workload refactor — the deployer now stamps
// queries by. The legacy project/stage/instance-id labels are still emitted // only the workload-shaped labels below at create time.
// alongside them for back-compat with anything that selects on them
// (operator runbooks, monitoring scrape rules, ad-hoc shell debugging) — they
// will be removed once the migration soaks.
const ( const (
LabelProject = "tinyforge.project"
LabelStage = "tinyforge.stage"
LabelInstanceID = "tinyforge.instance-id"
LabelManaged = "tinyforge.managed" // present on every Tinyforge-managed container LabelManaged = "tinyforge.managed" // present on every Tinyforge-managed container
LabelWorkloadID = "tinyforge.workload.id" // workload row primary key LabelWorkloadID = "tinyforge.workload.id" // workload row primary key
LabelWorkloadKind = "tinyforge.workload.kind" // 'project' | 'stack' | 'site' LabelWorkloadKind = "tinyforge.workload.kind" // 'project' | 'stack' | 'site'
+15 -32
View File
@@ -39,15 +39,6 @@ type ContainerConfig struct {
// Tinyforge management labels are added automatically via Project, Stage, and InstanceID. // Tinyforge management labels are added automatically via Project, Stage, and InstanceID.
Labels map[string]string Labels map[string]string
// Project is the Tinyforge project name (used for labelling).
Project string
// Stage is the Tinyforge stage name (used for labelling).
Stage string
// InstanceID is the Tinyforge instance ID (used for labelling).
InstanceID string
// WorkloadID is the unifying primitive's row ID (Workload.ID). Future // WorkloadID is the unifying primitive's row ID (Workload.ID). Future
// reconciler / global views key off this label, so it must be set on // reconciler / global views key off this label, so it must be set on
// every Tinyforge-managed container (project, stack, site). // every Tinyforge-managed container (project, stack, site).
@@ -106,12 +97,7 @@ func (c *Client) CreateContainer(ctx context.Context, cfg ContainerConfig) (stri
for k, v := range cfg.Labels { for k, v := range cfg.Labels {
labels[k] = v labels[k] = v
} }
// Legacy labels (kept for back-compat with operator runbooks / // Workload-shaped labels — the canonical Tinyforge label set.
// monitoring scrape rules; will be removed after the workload soak).
labels[LabelProject] = cfg.Project
labels[LabelStage] = cfg.Stage
labels[LabelInstanceID] = cfg.InstanceID
// Workload-shaped labels — canonical going forward.
labels[LabelManaged] = "true" labels[LabelManaged] = "true"
if cfg.WorkloadID != "" { if cfg.WorkloadID != "" {
labels[LabelWorkloadID] = cfg.WorkloadID labels[LabelWorkloadID] = cfg.WorkloadID
@@ -225,26 +211,27 @@ func (c *Client) RestartContainer(ctx context.Context, containerID string, timeo
} }
// ManagedContainer holds summary information about a container managed by Tinyforge. // ManagedContainer holds summary information about a container managed by Tinyforge.
// WorkloadID/Kind/Role are pulled from the canonical Tinyforge labels.
type ManagedContainer struct { type ManagedContainer struct {
ID string ID string
Name string Name string
Image string Image string
Status string Status string
State string State string
Project string WorkloadID string
Stage string WorkloadKind string
InstanceID string Role string
Ports []uint16 Ports []uint16
} }
// ListContainers returns all containers matching the given label filters. // ListContainers returns all Tinyforge-managed containers (label
// Pass nil or an empty map to list all Tinyforge managed containers. // tinyforge.managed=true), optionally narrowed by additional label filters.
// Label filters are key=value pairs applied as Docker label filters. // Returns the workload labels so callers can dispatch / display without an
// extra inspect call.
func (c *Client) ListContainers(ctx context.Context, labelFilters map[string]string) ([]ManagedContainer, error) { func (c *Client) ListContainers(ctx context.Context, labelFilters map[string]string) ([]ManagedContainer, error) {
filterArgs := make(client.Filters) filterArgs := make(client.Filters)
// Always filter by the Tinyforge project label to only return managed containers. filterArgs.Add("label", LabelManaged+"=true")
filterArgs.Add("label", LabelProject)
for k, v := range labelFilters { for k, v := range labelFilters {
if v != "" { if v != "" {
@@ -283,9 +270,9 @@ func (c *Client) ListContainers(ctx context.Context, labelFilters map[string]str
Image: ctr.Image, Image: ctr.Image,
Status: ctr.Status, Status: ctr.Status,
State: string(ctr.State), State: string(ctr.State),
Project: ctr.Labels[LabelProject], WorkloadID: ctr.Labels[LabelWorkloadID],
Stage: ctr.Labels[LabelStage], WorkloadKind: ctr.Labels[LabelWorkloadKind],
InstanceID: ctr.Labels[LabelInstanceID], Role: ctr.Labels[LabelRole],
Ports: ports, Ports: ports,
}) })
} }
@@ -308,9 +295,8 @@ type ReconcileItem struct {
// ListAllForReconciler returns every container the daemon knows about whose // ListAllForReconciler returns every container the daemon knows about whose
// labels mark it as Tinyforge-managed by ANY of the supported schemes: // labels mark it as Tinyforge-managed by ANY of the supported schemes:
// - tinyforge.managed (canonical, new) // - tinyforge.managed (canonical — every project, stack, site we own)
// - tinyforge.project / tinyforge.instance-id (legacy project) // - tinyforge.static-site (sites that predate the workload labels)
// - tinyforge.static-site (legacy site)
// - com.docker.compose.project starting with "tinyforge-" (stacks) // - com.docker.compose.project starting with "tinyforge-" (stacks)
// //
// The Docker API does not support OR'd label filters, so we list everything // The Docker API does not support OR'd label filters, so we list everything
@@ -361,9 +347,6 @@ func isTinyforgeManaged(labels map[string]string) bool {
if labels[LabelManaged] == "true" { if labels[LabelManaged] == "true" {
return true return true
} }
if labels[LabelProject] != "" || labels[LabelInstanceID] != "" {
return true
}
if _, ok := labels["tinyforge.static-site"]; ok { if _, ok := labels["tinyforge.static-site"]; ok {
return true return true
} }
+1 -1
View File
@@ -32,7 +32,7 @@ func (c *Client) EnsureNetwork(ctx context.Context, networkName string) (string,
resp, err := c.api.NetworkCreate(ctx, networkName, client.NetworkCreateOptions{ resp, err := c.api.NetworkCreate(ctx, networkName, client.NetworkCreateOptions{
Driver: "bridge", Driver: "bridge",
Labels: map[string]string{ Labels: map[string]string{
LabelProject: "tinyforge", LabelManaged: "true",
}, },
}) })
if err != nil { if err != nil {
+21 -62
View File
@@ -5,15 +5,17 @@
// longer present are flipped to state='missing'. // longer present are flipped to state='missing'.
// //
// Dispatch precedence: // Dispatch precedence:
// 1. tinyforge.workload.id label (canonical) // 1. tinyforge.workload.id label (canonical, new)
// 2. tinyforge.instance-id label (legacy project — joins via instances row) // 2. tinyforge.static-site label (legacy site — joins via static_sites)
// 3. tinyforge.static-site label (legacy site) // 3. com.docker.compose.project (stack — joins via Stack.ComposeProjectName)
// 4. com.docker.compose.project (stack — joins via Stack.ComposeProjectName) //
// The legacy tinyforge.instance-id path was removed when the deployer was
// rewritten to use Container natively — every Tinyforge-managed project
// container now carries the workload labels at create time.
package reconciler package reconciler
import ( import (
"context" "context"
"errors"
"log/slog" "log/slog"
"strings" "strings"
"sync" "sync"
@@ -123,9 +125,6 @@ func (r *Reconciler) upsertFromItem(ctx context.Context, item docker.ReconcileIt
if id := item.Labels[docker.LabelWorkloadID]; id != "" { if id := item.Labels[docker.LabelWorkloadID]; id != "" {
return r.upsertByWorkloadLabel(item, id) return r.upsertByWorkloadLabel(item, id)
} }
if instanceID := item.Labels[docker.LabelInstanceID]; instanceID != "" {
return r.upsertByInstanceLabel(item, instanceID)
}
if siteID := item.Labels["tinyforge.static-site"]; siteID != "" { if siteID := item.Labels["tinyforge.static-site"]; siteID != "" {
return r.upsertBySiteLabel(item, siteID) return r.upsertBySiteLabel(item, siteID)
} }
@@ -135,12 +134,17 @@ func (r *Reconciler) upsertFromItem(ctx context.Context, item docker.ReconcileIt
return "" return ""
} }
// upsertByWorkloadLabel — canonical path. WorkloadID + Role uniquely // upsertByWorkloadLabel — canonical path. The row may already exist with a
// identifies the row. ID stays deterministic so re-deploys update in place. // deployer-assigned UUID (project deploys do this so each blue-green slot
// has a stable handle); look it up by docker container ID first and fall
// back to the deterministic workloadID:role key.
func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string) string { func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string) string {
role := item.Labels[docker.LabelRole] role := item.Labels[docker.LabelRole]
kind := item.Labels[docker.LabelWorkloadKind] kind := item.Labels[docker.LabelWorkloadKind]
rowID := workloadIDRow(workloadID, kind, role, item.Labels[docker.LabelInstanceID], item.ID) rowID := workloadIDRow(workloadID, kind, role, item.ID)
if existing, err := r.store.GetContainerByDockerID(item.ID); err == nil {
rowID = existing.ID
}
port := 0 port := 0
if len(item.Ports) > 0 { if len(item.Ports) > 0 {
@@ -164,49 +168,6 @@ func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID
return rowID return rowID
} }
// upsertByInstanceLabel — legacy project path. Instance ID maps 1:1 to the
// container row ID by construction (deployer uses the same UUID for both),
// so we can update directly. We still need the workload ID for the row.
func (r *Reconciler) upsertByInstanceLabel(item docker.ReconcileItem, instanceID string) string {
inst, err := r.store.GetInstanceByID(instanceID)
if err != nil {
// Container with stale label — instance row gone. Skip silently.
if errors.Is(err, store.ErrNotFound) {
return ""
}
slog.Warn("reconciler: lookup instance", "instance_id", instanceID, "error", err)
return ""
}
w, err := r.store.GetWorkloadByRef(store.WorkloadKindProject, inst.ProjectID)
if err != nil {
return ""
}
port := inst.Port
if port == 0 && len(item.Ports) > 0 {
port = int(item.Ports[0])
}
if err := r.store.UpsertContainer(store.Container{
ID: inst.ID,
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindProject),
Role: item.Labels[docker.LabelStage],
ContainerID: item.ID,
ImageRef: item.Image,
ImageTag: inst.ImageTag,
Host: "local",
State: normalizeState(item.State),
Port: port,
Subdomain: inst.Subdomain,
ProxyRouteID: inst.ProxyRouteID,
NpmProxyID: inst.NpmProxyID,
LastSeenAt: store.Now(),
}); err != nil {
slog.Warn("reconciler: upsert by instance label", "container_id", item.ID, "error", err)
return ""
}
return inst.ID
}
func (r *Reconciler) upsertBySiteLabel(item docker.ReconcileItem, siteID string) string { func (r *Reconciler) upsertBySiteLabel(item docker.ReconcileItem, siteID string) string {
w, err := r.store.GetWorkloadByRef(store.WorkloadKindSite, siteID) w, err := r.store.GetWorkloadByRef(store.WorkloadKindSite, siteID)
if err != nil { if err != nil {
@@ -313,20 +274,18 @@ func (r *Reconciler) markMissingRows(seen map[string]struct{}) {
} }
// workloadIDRow picks the row ID for a workload-labelled container. // workloadIDRow picks the row ID for a workload-labelled container.
// For projects the deployer assigns instance ID = container row ID (via // Stack rows use the deterministic workloadID:role pattern; sites use
// LabelInstanceID), so we honor that to keep IDs stable. For stack/site // workloadID:site. Project rows have a per-deploy UUID assigned by the
// it's the deterministic workloadID:role pattern. // deployer and ALSO carry the role label (= stage name), so the same
func workloadIDRow(workloadID, kind, role, instanceID, containerID string) string { // pattern resolves to the same row across deployer + reconciler upserts.
if instanceID != "" && kind == string(store.WorkloadKindProject) { func workloadIDRow(workloadID, kind, role, containerID string) string {
return instanceID
}
if role != "" { if role != "" {
return workloadID + ":" + role return workloadID + ":" + role
} }
if kind == string(store.WorkloadKindSite) { if kind == string(store.WorkloadKindSite) {
return workloadID + ":site" return workloadID + ":site"
} }
// Last-resort fallback: container ID. Better than ""; uncommon path. // Last-resort fallback: container ID. Uncommon path.
return workloadID + ":" + containerID return workloadID + ":" + containerID
} }
+84 -115
View File
@@ -14,16 +14,21 @@ import (
"github.com/robfig/cron/v3" "github.com/robfig/cron/v3"
) )
// StaleInstance holds enriched info about a stale container for API responses. // StaleContainer is a stale container row enriched with the human-readable
type StaleInstance struct { // labels needed to render the Stale view (workload + role + days).
Instance store.Instance `json:"instance"` //
ProjectName string `json:"project_name"` // JSON shape uses container_id semantics — the frontend type was historically
StageName string `json:"stage_name"` // "Instance"; after the workload refactor it consumes Container fields directly.
type StaleContainer struct {
Container store.Container `json:"container"`
WorkloadID string `json:"workload_id"`
WorkloadName string `json:"workload_name"`
Role string `json:"role"`
DaysStale int `json:"days_stale"` DaysStale int `json:"days_stale"`
} }
// Scanner periodically checks for stale containers that have been // Scanner periodically checks for containers that have been non-running for
// non-running for longer than the configured threshold. // longer than the configured threshold.
type Scanner struct { type Scanner struct {
store *store.Store store *store.Store
docker *docker.Client docker *docker.Client
@@ -34,8 +39,8 @@ type Scanner struct {
entryID cron.EntryID entryID cron.EntryID
running bool running bool
// knownStale tracks instance IDs that have already had a stale event emitted, // knownStale tracks container row IDs that have already had a stale event
// to avoid re-emitting warnings for the same instance. // emitted, to avoid re-emitting the same warning on every tick.
knownStale map[string]struct{} knownStale map[string]struct{}
} }
@@ -101,7 +106,7 @@ func (s *Scanner) Stop() {
} }
// Scan performs a single stale-container scan cycle. // Scan performs a single stale-container scan cycle.
// It updates last_alive_at for running containers and detects newly stale ones. // Updates last_seen_at for running containers and detects newly stale ones.
func (s *Scanner) Scan(ctx context.Context) error { func (s *Scanner) Scan(ctx context.Context) error {
settings, err := s.store.GetSettings() settings, err := s.store.GetSettings()
if err != nil { if err != nil {
@@ -113,67 +118,53 @@ func (s *Scanner) Scan(ctx context.Context) error {
thresholdDays = 7 thresholdDays = 7
} }
// Get all instances from the store. containers, err := s.store.ListContainers(store.ContainerFilter{})
instances, err := s.store.ListAllInstances()
if err != nil { if err != nil {
return fmt.Errorf("list all instances: %w", err) return fmt.Errorf("list containers: %w", err)
} }
if len(containers) == 0 {
if len(instances) == 0 {
return nil return nil
} }
// Get all managed Docker containers to check live state. // Live state from Docker, indexed by container_id label so we can
containers, err := s.docker.ListContainers(ctx, nil) // reconcile on a single pass.
dockerContainers, err := s.docker.ListContainers(ctx, nil)
if err != nil { if err != nil {
return fmt.Errorf("list docker containers: %w", err) return fmt.Errorf("list docker containers: %w", err)
} }
stateByContainerID := make(map[string]string, len(dockerContainers))
// Build a lookup: instance ID -> container state. for _, dc := range dockerContainers {
containerStateByInstanceID := make(map[string]string, len(containers)) stateByContainerID[dc.ID] = dc.State
for _, c := range containers {
if c.InstanceID != "" {
containerStateByInstanceID[c.InstanceID] = c.State
}
} }
now := time.Now().UTC() now := time.Now().UTC()
currentStaleIDs := make(map[string]struct{}) currentStaleIDs := make(map[string]struct{})
for _, inst := range instances { for _, c := range containers {
// Skip instances already being cleaned up. if c.State == "removing" {
if inst.Status == "removing" {
continue continue
} }
dockerState := containerStateByInstanceID[inst.ID] dockerState := stateByContainerID[c.ContainerID]
// If the container is running in Docker, update last_alive_at.
if dockerState == "running" { if dockerState == "running" {
if err := s.store.UpdateLastAliveAt(inst.ID); err != nil { if err := s.store.UpdateContainerState(c.ID, "running"); err != nil {
slog.Warn("stale scanner: failed to update last_alive_at", slog.Warn("stale scanner: failed to update state",
"instance_id", inst.ID, "error", err) "id", c.ID, "error", err)
}
// Also sync store status if it was out of date.
if inst.Status != "running" {
if err := s.store.UpdateInstanceStatus(inst.ID, "running"); err != nil {
slog.Warn("stale scanner: failed to sync instance status",
"instance_id", inst.ID, "error", err)
}
} }
continue continue
} }
// Container is not running. Check if it's stale. // Container is not running. Check staleness against last_seen_at,
if inst.LastAliveAt == "" { // falling back to created_at if it never came up.
// Never been seen running. Use created_at as fallback. ref := c.LastSeenAt
inst.LastAliveAt = inst.CreatedAt if ref == "" {
ref = c.CreatedAt
} }
lastAlive, parseErr := time.Parse("2006-01-02 15:04:05", ref)
lastAlive, parseErr := time.Parse("2006-01-02 15:04:05", inst.LastAliveAt)
if parseErr != nil { if parseErr != nil {
slog.Warn("stale scanner: failed to parse last_alive_at", slog.Warn("stale scanner: failed to parse last_seen_at",
"instance_id", inst.ID, "last_alive_at", inst.LastAliveAt, "error", parseErr) "id", c.ID, "ref", ref, "error", parseErr)
continue continue
} }
@@ -182,23 +173,19 @@ func (s *Scanner) Scan(ctx context.Context) error {
continue continue
} }
// This instance is stale. currentStaleIDs[c.ID] = struct{}{}
currentStaleIDs[inst.ID] = struct{}{} if _, alreadyKnown := s.knownStale[c.ID]; !alreadyKnown {
s.emitStaleEvent(c, daysSinceAlive)
// Emit event only if this is newly detected as stale.
if _, alreadyKnown := s.knownStale[inst.ID]; !alreadyKnown {
s.emitStaleEvent(inst, daysSinceAlive)
} }
} }
// Update known stale set: remove IDs that are no longer stale.
s.knownStale = currentStaleIDs s.knownStale = currentStaleIDs
return nil return nil
} }
// FindStaleInstances returns all currently stale instances with enriched project/stage info. // FindStaleContainers returns all currently stale containers enriched with
func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, error) { // workload + role labels for rendering.
func (s *Scanner) FindStaleContainers(ctx context.Context) ([]StaleContainer, error) {
settings, err := s.store.GetSettings() settings, err := s.store.GetSettings()
if err != nil { if err != nil {
return nil, fmt.Errorf("get settings: %w", err) return nil, fmt.Errorf("get settings: %w", err)
@@ -209,58 +196,45 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro
thresholdDays = 7 thresholdDays = 7
} }
instances, err := s.store.ListAllInstances() containers, err := s.store.ListContainers(store.ContainerFilter{})
if err != nil { if err != nil {
return nil, fmt.Errorf("list all instances: %w", err) return nil, fmt.Errorf("list containers: %w", err)
} }
containers, err := s.docker.ListContainers(ctx, nil) dockerContainers, err := s.docker.ListContainers(ctx, nil)
if err != nil { if err != nil {
// Docker unavailable — fall back to store-only detection (no live state). // Docker unavailable — fall back to store-only detection.
slog.Warn("stale scanner: docker unavailable, using store status only", "error", err) slog.Warn("stale scanner: docker unavailable, using store status only", "error", err)
containers = nil dockerContainers = nil
}
stateByContainerID := make(map[string]string, len(dockerContainers))
for _, dc := range dockerContainers {
stateByContainerID[dc.ID] = dc.State
} }
containerStateByInstanceID := make(map[string]string, len(containers)) // Pre-load workload names so each stale row carries a friendly identifier.
for _, c := range containers { workloads, _ := s.store.ListWorkloads("")
if c.InstanceID != "" { workloadNameByID := make(map[string]string, len(workloads))
containerStateByInstanceID[c.InstanceID] = c.State for _, w := range workloads {
} workloadNameByID[w.ID] = w.Name
}
// Pre-load project and stage names to avoid N+1 queries.
allProjects, _ := s.store.GetAllProjects()
projectNames := make(map[string]string, len(allProjects))
for _, p := range allProjects {
projectNames[p.ID] = p.Name
}
stageNames := make(map[string]string)
for _, p := range allProjects {
stages, _ := s.store.GetStagesByProjectID(p.ID)
for _, st := range stages {
stageNames[st.ID] = st.Name
}
} }
now := time.Now().UTC() now := time.Now().UTC()
var result []StaleInstance var result []StaleContainer
for _, inst := range instances { for _, c := range containers {
if inst.Status == "removing" { if c.State == "removing" {
continue
}
if stateByContainerID[c.ContainerID] == "running" {
continue continue
} }
// If Docker says it's running, it's not stale. ref := c.LastSeenAt
if containerStateByInstanceID[inst.ID] == "running" { if ref == "" {
continue ref = c.CreatedAt
} }
lastAliveTime, parseErr := time.Parse("2006-01-02 15:04:05", ref)
lastAlive := inst.LastAliveAt
if lastAlive == "" {
lastAlive = inst.CreatedAt
}
lastAliveTime, parseErr := time.Parse("2006-01-02 15:04:05", lastAlive)
if parseErr != nil { if parseErr != nil {
continue continue
} }
@@ -270,20 +244,16 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro
continue continue
} }
// Look up project and stage names from pre-loaded maps. name := workloadNameByID[c.WorkloadID]
projectName := projectNames[inst.ProjectID] if name == "" {
if projectName == "" { name = c.WorkloadID
projectName = inst.ProjectID
}
stageName := stageNames[inst.StageID]
if stageName == "" {
stageName = inst.StageID
} }
result = append(result, StaleInstance{ result = append(result, StaleContainer{
Instance: inst, Container: c,
ProjectName: projectName, WorkloadID: c.WorkloadID,
StageName: stageName, WorkloadName: name,
Role: c.Role,
DaysStale: daysSinceAlive, DaysStale: daysSinceAlive,
}) })
} }
@@ -292,20 +262,20 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro
} }
// emitStaleEvent publishes a warning event for a newly detected stale container. // emitStaleEvent publishes a warning event for a newly detected stale container.
func (s *Scanner) emitStaleEvent(inst store.Instance, daysStale int) { func (s *Scanner) emitStaleEvent(c store.Container, daysStale int) {
metadata, _ := json.Marshal(map[string]any{ metadata, _ := json.Marshal(map[string]any{
"instance_id": inst.ID, "container_id": c.ID,
"project_id": inst.ProjectID, "workload_id": c.WorkloadID,
"stage_id": inst.StageID, "workload_kind": c.WorkloadKind,
"image_tag": inst.ImageTag, "role": c.Role,
"last_alive_at": inst.LastAliveAt, "image_tag": c.ImageTag,
"last_seen_at": c.LastSeenAt,
"days_stale": daysStale, "days_stale": daysStale,
}) })
msg := fmt.Sprintf("Container %s (tag: %s) has been non-running for %d days", msg := fmt.Sprintf("Container %s (tag: %s) has been non-running for %d days",
inst.ID, inst.ImageTag, daysStale) c.ID, c.ImageTag, daysStale)
// Persist directly to event log.
evt, err := s.store.InsertEvent(store.EventLog{ evt, err := s.store.InsertEvent(store.EventLog{
Source: "stale_scanner", Source: "stale_scanner",
Severity: "warn", Severity: "warn",
@@ -317,7 +287,6 @@ func (s *Scanner) emitStaleEvent(inst store.Instance, daysStale int) {
return return
} }
// Publish for SSE clients.
s.eventBus.Publish(events.Event{ s.eventBus.Publish(events.Event{
Type: events.EventLog, Type: events.EventLog,
Payload: events.EventLogPayload{ Payload: events.EventLogPayload{
+3 -4
View File
@@ -283,8 +283,6 @@ func (m *Manager) Deploy(ctx context.Context, siteID string, force bool) error {
"tinyforge.static-site": site.ID, "tinyforge.static-site": site.ID,
"tinyforge.static-site-name": site.Name, "tinyforge.static-site-name": site.Name,
}, },
Project: "static-site",
Stage: site.Name,
WorkloadID: m.resolveSiteWorkloadID(site.ID), WorkloadID: m.resolveSiteWorkloadID(site.ID),
WorkloadKind: string(store.WorkloadKindSite), WorkloadKind: string(store.WorkloadKindSite),
Role: "", Role: "",
@@ -310,8 +308,9 @@ func (m *Manager) Deploy(ctx context.Context, siteID string, force bool) error {
"tinyforge.static-site": site.ID, "tinyforge.static-site": site.ID,
"tinyforge.static-site-name": site.Name, "tinyforge.static-site-name": site.Name,
}, },
Project: "static-site", WorkloadID: m.resolveSiteWorkloadID(site.ID),
Stage: site.Name, WorkloadKind: string(store.WorkloadKindSite),
Role: "",
}) })
if err != nil { if err != nil {
m.updateStatus(site.ID, "failed", latestSHA, fmt.Sprintf("create container: %v", err)) m.updateStatus(site.ID, "failed", latestSHA, fmt.Sprintf("create container: %v", err))
+14 -23
View File
@@ -195,38 +195,29 @@ type target struct {
OwnerID string OwnerID string
} }
// buildTargets fetches running instances and sites that have a container ID. // buildTargets fetches container rows that have a docker container_id bound.
// Project containers and stack containers are surfaced as OwnerTypeInstance
// (the stats sample owner_type is kept for back-compat with the persisted
// schema and the dashboard's group-by semantics).
func (c *Collector) buildTargets() []target { func (c *Collector) buildTargets() []target {
var out []target var out []target
instances, err := c.store.ListAllInstances() containers, err := c.store.ListContainers(store.ContainerFilter{})
if err != nil { if err != nil {
slog.Warn("stats collector: list instances", "error", err) slog.Warn("stats collector: list containers", "error", err)
} else { } else {
for _, inst := range instances { for _, row := range containers {
if inst.ContainerID == "" { if row.ContainerID == "" {
continue continue
} }
out = append(out, target{ ownerType := OwnerTypeInstance
ContainerID: inst.ContainerID, if row.WorkloadKind == string(store.WorkloadKindSite) {
OwnerType: OwnerTypeInstance, ownerType = OwnerTypeSite
OwnerID: inst.ID,
})
}
}
sites, err := c.store.GetAllStaticSites()
if err != nil {
slog.Warn("stats collector: list sites", "error", err)
} else {
for _, site := range sites {
if site.ContainerID == "" {
continue
} }
out = append(out, target{ out = append(out, target{
ContainerID: site.ContainerID, ContainerID: row.ContainerID,
OwnerType: OwnerTypeSite, OwnerType: ownerType,
OwnerID: site.ID, OwnerID: row.ID,
}) })
} }
} }
+71
View File
@@ -135,6 +135,77 @@ func (s *Store) GetContainerByDockerID(dockerID string) (Container, error) {
return c, nil return c, nil
} }
// ListProxyRoutes returns proxy-enabled project containers joined with
// project + stage names. Reads from the normalized containers index. Stage
// ID is resolved through a (project_id, role=stage_name) join, which is
// uniquely indexed via UNIQUE(project_id, name) on stages.
//
// Source is reported as "instance" for back-compat with the Proxies page
// filter (the frontend keys off the literal string).
func (s *Store) ListProxyRoutes(domain string) ([]ProxyRoute, error) {
rows, err := s.db.Query(`
SELECT c.id, p.id, p.name, s.id, s.name,
c.image_tag, c.subdomain, c.container_id, c.port,
c.proxy_route_id, c.npm_proxy_id, c.state, c.created_at
FROM containers c
JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project'
JOIN projects p ON p.id = w.ref_id
JOIN stages s ON s.project_id = p.id AND s.name = c.role
WHERE c.subdomain != '' AND (c.proxy_route_id != '' OR c.npm_proxy_id > 0)
ORDER BY p.name, s.name, c.created_at DESC`,
)
if err != nil {
return nil, fmt.Errorf("query proxy routes: %w", err)
}
defer rows.Close()
routes := []ProxyRoute{}
for rows.Next() {
var r ProxyRoute
if err := rows.Scan(
&r.InstanceID, &r.ProjectID, &r.ProjectName, &r.StageID, &r.StageName,
&r.ImageTag, &r.Subdomain, &r.ContainerID, &r.Port,
&r.ProxyRouteID, &r.NpmProxyID, &r.Status, &r.CreatedAt,
); err != nil {
return nil, fmt.Errorf("scan proxy route: %w", err)
}
r.Source = "instance"
if domain != "" && r.Subdomain != "" {
r.Domain = r.Subdomain + "." + domain
}
routes = append(routes, r)
}
return routes, rows.Err()
}
// ListContainersByStageID returns project containers for the given stage,
// newest first. Resolves stage → project_id → workload(kind=project) →
// containers with role = stage.name. Replaces GetInstancesByStageID for
// callers in the deployer / API layer.
func (s *Store) ListContainersByStageID(stageID string) ([]Container, error) {
rows, err := s.db.Query(`
SELECT `+prefixCols(containerColumns, "c.")+`
FROM containers c
JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project'
JOIN stages s ON s.project_id = w.ref_id AND s.name = c.role
WHERE s.id = ?
ORDER BY c.created_at DESC`, stageID)
if err != nil {
return nil, fmt.Errorf("query containers by stage: %w", err)
}
defer rows.Close()
out := []Container{}
for rows.Next() {
c, err := scanContainer(rows)
if err != nil {
return nil, fmt.Errorf("scan container: %w", err)
}
out = append(out, c)
}
return out, rows.Err()
}
// ListContainersByWorkload returns all containers for a given workload, newest first. // ListContainersByWorkload returns all containers for a given workload, newest first.
func (s *Store) ListContainersByWorkload(workloadID string) ([]Container, error) { func (s *Store) ListContainersByWorkload(workloadID string) ([]Container, error) {
rows, err := s.db.Query( rows, err := s.db.Query(
-251
View File
@@ -1,251 +0,0 @@
package store
import (
"database/sql"
"errors"
"fmt"
"github.com/google/uuid"
)
// instanceColumns is the canonical column list for instance queries.
const instanceColumns = `id, stage_id, project_id, container_id, image_tag, subdomain, npm_proxy_id, proxy_route_id, status, port, last_alive_at, created_at, updated_at`
// scanInstance scans a row into an Instance struct using the canonical column order.
func scanInstance(scanner interface{ Scan(...any) error }) (Instance, error) {
var inst Instance
err := scanner.Scan(
&inst.ID, &inst.StageID, &inst.ProjectID, &inst.ContainerID, &inst.ImageTag,
&inst.Subdomain, &inst.NpmProxyID, &inst.ProxyRouteID, &inst.Status, &inst.Port,
&inst.LastAliveAt, &inst.CreatedAt, &inst.UpdatedAt,
)
return inst, err
}
// CreateInstance inserts a new instance record.
func (s *Store) CreateInstance(inst Instance) (Instance, error) {
inst.ID = uuid.New().String()
inst.CreatedAt = Now()
inst.UpdatedAt = inst.CreatedAt
_, err := s.db.Exec(
`INSERT INTO instances (`+instanceColumns+`)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
inst.ID, inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag,
inst.Subdomain, inst.NpmProxyID, inst.ProxyRouteID, inst.Status, inst.Port,
inst.LastAliveAt, inst.CreatedAt, inst.UpdatedAt,
)
if err != nil {
return Instance{}, fmt.Errorf("insert instance: %w", err)
}
return inst, nil
}
// CreateInstanceWithID inserts a new instance using a pre-generated ID.
// Use this when the ID must be known before creation (e.g., for container labels).
func (s *Store) CreateInstanceWithID(inst Instance) (Instance, error) {
if inst.ID == "" {
return Instance{}, fmt.Errorf("instance ID is required")
}
inst.CreatedAt = Now()
inst.UpdatedAt = inst.CreatedAt
_, err := s.db.Exec(
`INSERT INTO instances (`+instanceColumns+`)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
inst.ID, inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag,
inst.Subdomain, inst.NpmProxyID, inst.ProxyRouteID, inst.Status, inst.Port,
inst.LastAliveAt, inst.CreatedAt, inst.UpdatedAt,
)
if err != nil {
return Instance{}, fmt.Errorf("insert instance: %w", err)
}
return inst, nil
}
// GetInstanceByID returns a single instance by its ID.
func (s *Store) GetInstanceByID(id string) (Instance, error) {
inst, err := scanInstance(s.db.QueryRow(
`SELECT `+instanceColumns+` FROM instances WHERE id = ?`, id,
))
if errors.Is(err, sql.ErrNoRows) {
return Instance{}, fmt.Errorf("instance %s: %w", id, ErrNotFound)
}
if err != nil {
return Instance{}, fmt.Errorf("query instance: %w", err)
}
return inst, nil
}
// GetInstancesByStageID returns all instances for a given stage.
func (s *Store) GetInstancesByStageID(stageID string) ([]Instance, error) {
rows, err := s.db.Query(
`SELECT `+instanceColumns+` FROM instances WHERE stage_id = ? ORDER BY created_at DESC`, stageID,
)
if err != nil {
return nil, fmt.Errorf("query instances: %w", err)
}
defer rows.Close()
instances := []Instance{}
for rows.Next() {
inst, err := scanInstance(rows)
if err != nil {
return nil, fmt.Errorf("scan instance: %w", err)
}
instances = append(instances, inst)
}
return instances, rows.Err()
}
// ListAllInstances returns all instances across all stages.
func (s *Store) ListAllInstances() ([]Instance, error) {
rows, err := s.db.Query(
`SELECT ` + instanceColumns + ` FROM instances ORDER BY created_at DESC`,
)
if err != nil {
return nil, fmt.Errorf("query all instances: %w", err)
}
defer rows.Close()
instances := []Instance{}
for rows.Next() {
inst, err := scanInstance(rows)
if err != nil {
return nil, fmt.Errorf("scan instance: %w", err)
}
instances = append(instances, inst)
}
return instances, rows.Err()
}
// ProxyRoute represents a proxy-enabled resource (Docker instance or static site)
// joined with the human-readable names needed to render the Proxies page.
type ProxyRoute struct {
Source string `json:"source"` // "instance" or "static_site"
InstanceID string `json:"instance_id"`
ProjectID string `json:"project_id"`
ProjectName string `json:"project_name"`
StageID string `json:"stage_id"`
StageName string `json:"stage_name"`
ImageTag string `json:"image_tag"`
Subdomain string `json:"subdomain"`
Domain string `json:"domain"`
ContainerID string `json:"container_id"`
Port int `json:"port"`
ProxyRouteID string `json:"proxy_route_id"`
NpmProxyID int `json:"npm_proxy_id"`
Status string `json:"status"`
CreatedAt string `json:"created_at"`
}
// ListProxyRoutes returns proxy-enabled project containers joined with
// project + stage names. Reads from the normalized containers index — the
// instances table is no longer queried. Stage ID is resolved through a
// (project_id, role=stage_name) join, which is uniquely indexed.
//
// Source is reported as "instance" for back-compat with the Proxies page
// filter (which keys off the literal string).
func (s *Store) ListProxyRoutes(domain string) ([]ProxyRoute, error) {
rows, err := s.db.Query(`
SELECT c.id, p.id, p.name, s.id, s.name,
c.image_tag, c.subdomain, c.container_id, c.port,
c.proxy_route_id, c.npm_proxy_id, c.state, c.created_at
FROM containers c
JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project'
JOIN projects p ON p.id = w.ref_id
JOIN stages s ON s.project_id = p.id AND s.name = c.role
WHERE c.subdomain != '' AND (c.proxy_route_id != '' OR c.npm_proxy_id > 0)
ORDER BY p.name, s.name, c.created_at DESC`,
)
if err != nil {
return nil, fmt.Errorf("query proxy routes: %w", err)
}
defer rows.Close()
routes := []ProxyRoute{}
for rows.Next() {
var r ProxyRoute
if err := rows.Scan(
&r.InstanceID, &r.ProjectID, &r.ProjectName, &r.StageID, &r.StageName,
&r.ImageTag, &r.Subdomain, &r.ContainerID, &r.Port,
&r.ProxyRouteID, &r.NpmProxyID, &r.Status, &r.CreatedAt,
); err != nil {
return nil, fmt.Errorf("scan proxy route: %w", err)
}
r.Source = "instance"
if domain != "" && r.Subdomain != "" {
r.Domain = r.Subdomain + "." + domain
}
routes = append(routes, r)
}
return routes, rows.Err()
}
// UpdateInstance updates an existing instance's mutable fields.
func (s *Store) UpdateInstance(inst Instance) error {
inst.UpdatedAt = Now()
result, err := s.db.Exec(
`UPDATE instances SET stage_id=?, project_id=?, container_id=?, image_tag=?, subdomain=?, npm_proxy_id=?, proxy_route_id=?, status=?, port=?, last_alive_at=?, updated_at=?
WHERE id=?`,
inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag,
inst.Subdomain, inst.NpmProxyID, inst.ProxyRouteID, inst.Status, inst.Port,
inst.LastAliveAt, inst.UpdatedAt, inst.ID,
)
if err != nil {
return fmt.Errorf("update instance: %w", err)
}
n, _ := result.RowsAffected()
if n == 0 {
return fmt.Errorf("instance %s: %w", inst.ID, ErrNotFound)
}
return nil
}
// UpdateInstanceStatus sets only the status field on an instance.
func (s *Store) UpdateInstanceStatus(id string, status string) error {
ts := Now()
result, err := s.db.Exec(
`UPDATE instances SET status=?, updated_at=? WHERE id=?`,
status, ts, id,
)
if err != nil {
return fmt.Errorf("update instance status: %w", err)
}
n, _ := result.RowsAffected()
if n == 0 {
return fmt.Errorf("instance %s: %w", id, ErrNotFound)
}
return nil
}
// UpdateLastAliveAt sets the last_alive_at timestamp for an instance.
// Called when an instance is seen running.
func (s *Store) UpdateLastAliveAt(id string) error {
ts := Now()
result, err := s.db.Exec(
`UPDATE instances SET last_alive_at=?, updated_at=? WHERE id=?`,
ts, ts, id,
)
if err != nil {
return fmt.Errorf("update last_alive_at: %w", err)
}
n, _ := result.RowsAffected()
if n == 0 {
return fmt.Errorf("instance %s: %w", id, ErrNotFound)
}
return nil
}
// DeleteInstance removes an instance by ID.
func (s *Store) DeleteInstance(id string) error {
result, err := s.db.Exec(`DELETE FROM instances WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("delete instance: %w", err)
}
n, _ := result.RowsAffected()
if n == 0 {
return fmt.Errorf("instance %s: %w", id, ErrNotFound)
}
return nil
}
+15 -10
View File
@@ -142,21 +142,26 @@ type DNSRecord struct {
UpdatedAt string `json:"updated_at"` UpdatedAt string `json:"updated_at"`
} }
// Instance represents a running (or stopped) container for a project stage. // ProxyRoute is a proxy-enabled container row joined with its project + stage
type Instance struct { // names, shaped for the Proxies page. Source is "instance" for project
ID string `json:"id"` // containers and "static_site" for site rows — the names are historical
StageID string `json:"stage_id"` // (the table itself was renamed to containers in the workload refactor).
type ProxyRoute struct {
Source string `json:"source"`
InstanceID string `json:"instance_id"`
ProjectID string `json:"project_id"` ProjectID string `json:"project_id"`
ContainerID string `json:"container_id"` ProjectName string `json:"project_name"`
StageID string `json:"stage_id"`
StageName string `json:"stage_name"`
ImageTag string `json:"image_tag"` ImageTag string `json:"image_tag"`
Subdomain string `json:"subdomain"` Subdomain string `json:"subdomain"`
NpmProxyID int `json:"npm_proxy_id"` Domain string `json:"domain"`
ProxyRouteID string `json:"proxy_route_id"` ContainerID string `json:"container_id"`
Status string `json:"status"` // running, stopped, failed, removing
Port int `json:"port"` Port int `json:"port"`
LastAliveAt string `json:"last_alive_at"` ProxyRouteID string `json:"proxy_route_id"`
NpmProxyID int `json:"npm_proxy_id"`
Status string `json:"status"`
CreatedAt string `json:"created_at"` CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
} }
// Deploy represents a deployment attempt. // Deploy represents a deployment attempt.
+16 -19
View File
@@ -310,12 +310,15 @@ func (s *Store) runMigrations() error {
for _, m := range migrations { for _, m := range migrations {
if _, err := s.db.Exec(m); err != nil { if _, err := s.db.Exec(m); err != nil {
// "duplicate column" / "already exists" are expected when a // "duplicate column" / "already exists" are expected when a
// migration has already been applied. Anything else (typo, FK // migration has already been applied. "no such table" is
// conflict, real schema bug) must surface, otherwise the store // expected for obsolete ALTER TABLE migrations targeting tables
// silently runs against the wrong shape. // the workload refactor dropped (e.g. instances). Anything
// else must surface — silently running against the wrong shape
// is worse than a startup failure.
msg := err.Error() msg := err.Error()
if !strings.Contains(msg, "duplicate column") && if !strings.Contains(msg, "duplicate column") &&
!strings.Contains(msg, "already exists") { !strings.Contains(msg, "already exists") &&
!strings.Contains(msg, "no such table") {
return fmt.Errorf("apply migration %q: %w", m, err) return fmt.Errorf("apply migration %q: %w", m, err)
} }
} }
@@ -323,8 +326,8 @@ func (s *Store) runMigrations() error {
// Create indexes on foreign key columns for query performance. // Create indexes on foreign key columns for query performance.
indexes := []string{ indexes := []string{
`CREATE INDEX IF NOT EXISTS idx_instances_stage_id ON instances(stage_id)`, // instances table dropped 2026-05-09 (workload refactor) — no indexes
`CREATE INDEX IF NOT EXISTS idx_instances_project_id ON instances(project_id)`, // needed; containers replaces it with idx_containers_workload below.
`CREATE INDEX IF NOT EXISTS idx_deploys_project_id ON deploys(project_id)`, `CREATE INDEX IF NOT EXISTS idx_deploys_project_id ON deploys(project_id)`,
`CREATE INDEX IF NOT EXISTS idx_deploys_stage_id ON deploys(stage_id)`, `CREATE INDEX IF NOT EXISTS idx_deploys_stage_id ON deploys(stage_id)`,
`CREATE INDEX IF NOT EXISTS idx_deploy_logs_deploy_id ON deploy_logs(deploy_id)`, `CREATE INDEX IF NOT EXISTS idx_deploy_logs_deploy_id ON deploy_logs(deploy_id)`,
@@ -344,6 +347,10 @@ func (s *Store) runMigrations() error {
`CREATE INDEX IF NOT EXISTS idx_container_stats_container_ts ON container_stats_samples(container_id, ts)`, `CREATE INDEX IF NOT EXISTS idx_container_stats_container_ts ON container_stats_samples(container_id, ts)`,
`CREATE INDEX IF NOT EXISTS idx_container_stats_ts ON container_stats_samples(ts)`, `CREATE INDEX IF NOT EXISTS idx_container_stats_ts ON container_stats_samples(ts)`,
`CREATE INDEX IF NOT EXISTS idx_system_stats_ts ON system_stats_samples(ts)`, `CREATE INDEX IF NOT EXISTS idx_system_stats_ts ON system_stats_samples(ts)`,
// Drop the legacy instances table — containers is the canonical index
// after the workload refactor (2026-05-09). Idempotent: SQLite's
// DROP TABLE IF EXISTS is a no-op on databases that already shed it.
`DROP TABLE IF EXISTS instances`,
// Workload refactor indexes (2026-05-09). // Workload refactor indexes (2026-05-09).
`CREATE INDEX IF NOT EXISTS idx_workloads_kind ON workloads(kind)`, `CREATE INDEX IF NOT EXISTS idx_workloads_kind ON workloads(kind)`,
`CREATE INDEX IF NOT EXISTS idx_workloads_app_id ON workloads(app_id) WHERE app_id != ''`, `CREATE INDEX IF NOT EXISTS idx_workloads_app_id ON workloads(app_id) WHERE app_id != ''`,
@@ -449,19 +456,9 @@ CREATE TABLE IF NOT EXISTS settings (
updated_at TEXT NOT NULL DEFAULT (datetime('now')) updated_at TEXT NOT NULL DEFAULT (datetime('now'))
); );
CREATE TABLE IF NOT EXISTS instances ( -- The instances table was removed in the workload refactor (2026-05-09).
id TEXT PRIMARY KEY, -- Container state lives in the containers table; see runMigrations for the
stage_id TEXT NOT NULL REFERENCES stages(id) ON DELETE CASCADE, -- current schema. The DROP TABLE migration runs unconditionally on boot.
project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
container_id TEXT NOT NULL DEFAULT '',
image_tag TEXT NOT NULL,
subdomain TEXT NOT NULL DEFAULT '',
npm_proxy_id INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'stopped',
port INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS deploys ( CREATE TABLE IF NOT EXISTS deploys (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
+24 -2
View File
@@ -319,7 +319,19 @@ func (h *Handler) handleWebhook(w http.ResponseWriter, r *http.Request) {
return return
} }
project, err := h.store.GetProjectByWebhookSecret(secret) // Resolve the secret via the workload row first (canonical path —
// workloads.webhook_secret is kept in sync by the project CRUD path).
// Fall back to the project's own column for any pre-refactor row that
// might not have its workload yet (defensive belt-and-suspenders).
var (
project store.Project
err error
)
if wl, wErr := h.store.GetWorkloadByWebhookSecret(secret); wErr == nil && wl.Kind == string(store.WorkloadKindProject) {
project, err = h.store.GetProjectByID(wl.RefID)
} else {
project, err = h.store.GetProjectByWebhookSecret(secret)
}
if err != nil { if err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
delivery.StatusCode = http.StatusNotFound delivery.StatusCode = http.StatusNotFound
@@ -502,7 +514,17 @@ func (h *Handler) handleSiteWebhook(w http.ResponseWriter, r *http.Request) {
return return
} }
site, err := h.store.GetStaticSiteByWebhookSecret(secret) // Workload-first lookup, mirroring the project handler. Falls back to the
// site's own webhook_secret column for pre-refactor rows.
var (
site store.StaticSite
err error
)
if wl, wErr := h.store.GetWorkloadByWebhookSecret(secret); wErr == nil && wl.Kind == string(store.WorkloadKindSite) {
site, err = h.store.GetStaticSiteByID(wl.RefID)
} else {
site, err = h.store.GetStaticSiteByWebhookSecret(secret)
}
if err != nil { if err != nil {
if errors.Is(err, store.ErrNotFound) { if errors.Is(err, store.ErrNotFound) {
delivery.StatusCode = http.StatusNotFound delivery.StatusCode = http.StatusNotFound
+12 -11
View File
@@ -15,11 +15,12 @@
interface Props { interface Props {
instance: Instance; instance: Instance;
projectId: string; projectId: string;
stageId: string;
domain?: string; domain?: string;
onchange?: () => void; onchange?: () => void;
} }
const { instance, projectId, domain = '', onchange }: Props = $props(); const { instance, projectId, stageId, domain = '', onchange }: Props = $props();
let loading = $state(false); let loading = $state(false);
let error = $state(''); let error = $state('');
@@ -41,16 +42,16 @@
try { try {
switch (action) { switch (action) {
case 'stop': case 'stop':
await api.stopInstance(projectId, instance.stage_id, instance.id); await api.stopInstance(projectId, stageId, instance.id);
break; break;
case 'start': case 'start':
await api.startInstance(projectId, instance.stage_id, instance.id); await api.startInstance(projectId, stageId, instance.id);
break; break;
case 'restart': case 'restart':
await api.restartInstance(projectId, instance.stage_id, instance.id); await api.restartInstance(projectId, stageId, instance.id);
break; break;
case 'remove': case 'remove':
await api.removeInstance(projectId, instance.stage_id, instance.id); await api.removeInstance(projectId, stageId, instance.id);
break; break;
} }
onchange?.(); onchange?.();
@@ -73,7 +74,7 @@
<span class="truncate font-mono text-sm font-medium text-[var(--text-primary)]"> <span class="truncate font-mono text-sm font-medium text-[var(--text-primary)]">
{instance.image_tag} {instance.image_tag}
</span> </span>
<StatusBadge status={instance.status} size="sm" /> <StatusBadge status={instance.state} size="sm" />
</div> </div>
{#if subdomainUrl} {#if subdomainUrl}
@@ -96,7 +97,7 @@
<!-- Action buttons --> <!-- Action buttons -->
<div class="ml-3 flex items-center gap-1"> <div class="ml-3 flex items-center gap-1">
{#if instance.status === 'running'} {#if instance.state === 'running'}
<button <button
type="button" type="button"
class="rounded-lg p-2 text-[var(--text-tertiary)] hover:bg-amber-50 hover:text-amber-600 disabled:opacity-50 transition-all duration-150 active:animate-press" class="rounded-lg p-2 text-[var(--text-tertiary)] hover:bg-amber-50 hover:text-amber-600 disabled:opacity-50 transition-all duration-150 active:animate-press"
@@ -115,7 +116,7 @@
> >
<IconRestart size={16} /> <IconRestart size={16} />
</button> </button>
{:else if instance.status === 'stopped'} {:else if instance.state === 'stopped'}
<button <button
type="button" type="button"
class="rounded-lg p-2 text-[var(--text-tertiary)] hover:bg-emerald-50 hover:text-emerald-600 disabled:opacity-50 transition-all duration-150 active:animate-press" class="rounded-lg p-2 text-[var(--text-tertiary)] hover:bg-emerald-50 hover:text-emerald-600 disabled:opacity-50 transition-all duration-150 active:animate-press"
@@ -146,14 +147,14 @@
</div> </div>
</div> </div>
{#if instance.status === 'running'} {#if instance.state === 'running'}
<ContainerStats source={{ kind: 'instance', projectId, stageId: instance.stage_id, instanceId: instance.id }} /> <ContainerStats source={{ kind: 'instance', projectId, stageId: stageId, instanceId: instance.id }} />
{/if} {/if}
{#if showLogs} {#if showLogs}
<div class="mt-2"> <div class="mt-2">
<ContainerLogs <ContainerLogs
source={{ kind: 'instance', projectId, stageId: instance.stage_id, instanceId: instance.id }} source={{ kind: 'instance', projectId, stageId: stageId, instanceId: instance.id }}
onclose={() => { showLogs = false; }} onclose={() => { showLogs = false; }}
/> />
</div> </div>
+3 -3
View File
@@ -14,9 +14,9 @@
const { project, instances = [] }: Props = $props(); const { project, instances = [] }: Props = $props();
const runningCount = $derived(instances.filter((i) => i.status === 'running').length); const runningCount = $derived(instances.filter((i) => i.state === 'running').length);
const stoppedCount = $derived(instances.filter((i) => i.status === 'stopped').length); const stoppedCount = $derived(instances.filter((i) => i.state === 'stopped').length);
const failedCount = $derived(instances.filter((i) => i.status === 'failed').length); const failedCount = $derived(instances.filter((i) => i.state === 'failed').length);
const totalCount = $derived(instances.length); const totalCount = $derived(instances.length);
const overallStatus = $derived.by<'failed' | 'running' | 'stopped'>(() => { const overallStatus = $derived.by<'failed' | 'running' | 'stopped'>(() => {
@@ -22,7 +22,9 @@
); );
const displayName = $derived( const displayName = $derived(
`${container.project_name}-${container.stage_name}-${container.instance.image_tag}` container.role
? `${container.workload_name}-${container.role}-${container.container.image_tag}`
: `${container.workload_name}-${container.container.image_tag}`
); );
</script> </script>
@@ -36,11 +38,13 @@
</h3> </h3>
<div class="mt-1.5 flex flex-wrap items-center gap-2"> <div class="mt-1.5 flex flex-wrap items-center gap-2">
<span class="inline-flex items-center gap-1 rounded-md bg-[var(--color-brand-50)] px-2 py-0.5 text-xs font-medium text-[var(--color-brand-600)]"> <span class="inline-flex items-center gap-1 rounded-md bg-[var(--color-brand-50)] px-2 py-0.5 text-xs font-medium text-[var(--color-brand-600)]">
{container.project_name} {container.workload_name}
</span> </span>
{#if container.role}
<span class="inline-flex items-center gap-1 rounded-md bg-[var(--surface-card-hover)] px-2 py-0.5 text-xs font-medium text-[var(--text-secondary)]"> <span class="inline-flex items-center gap-1 rounded-md bg-[var(--surface-card-hover)] px-2 py-0.5 text-xs font-medium text-[var(--text-secondary)]">
{container.stage_name} {container.role}
</span> </span>
{/if}
</div> </div>
</div> </div>
@@ -55,14 +59,14 @@
<div class="mt-3 flex flex-wrap items-center gap-x-4 gap-y-1.5 text-xs text-[var(--text-secondary)]"> <div class="mt-3 flex flex-wrap items-center gap-x-4 gap-y-1.5 text-xs text-[var(--text-secondary)]">
<span class="inline-flex items-center gap-1"> <span class="inline-flex items-center gap-1">
<IconTag size={12} /> <IconTag size={12} />
{container.instance.image_tag} {container.container.image_tag}
</span> </span>
<span class="inline-flex items-center gap-1"> <span class="inline-flex items-center gap-1">
<IconClock size={12} /> <IconClock size={12} />
{$t('stale.lastAlive')}: {$fmt.shortDate(container.instance.last_alive_at)} {$t('stale.lastAlive')}: {$fmt.shortDate(container.container.last_seen_at)}
</span> </span>
<span class="rounded bg-[var(--surface-card-hover)] px-1.5 py-0.5 font-mono text-[10px]"> <span class="rounded bg-[var(--surface-card-hover)] px-1.5 py-0.5 font-mono text-[10px]">
{container.instance.status} {container.container.state}
</span> </span>
</div> </div>
@@ -71,7 +75,7 @@
<button <button
type="button" type="button"
disabled={cleaning} disabled={cleaning}
onclick={() => oncleanup(container.instance.id)} onclick={() => oncleanup(container.container.id)}
class="inline-flex items-center gap-1.5 rounded-lg border border-[var(--color-danger)] px-3 py-1.5 text-xs font-medium text-[var(--color-danger)] transition-colors hover:bg-[var(--color-danger-light)] disabled:opacity-50 active:animate-press" class="inline-flex items-center gap-1.5 rounded-lg border border-[var(--color-danger)] px-3 py-1.5 text-xs font-medium text-[var(--color-danger)] transition-colors hover:bg-[var(--color-danger-light)] disabled:opacity-50 active:animate-press"
> >
<IconTrash size={14} /> <IconTrash size={14} />
@@ -37,8 +37,8 @@
} }
if (!cancelled) { if (!cancelled) {
runningCount = allInstances.filter((i) => i.status === 'running').length; runningCount = allInstances.filter((i) => i.state === 'running').length;
stoppedCount = allInstances.filter((i) => i.status !== 'running').length; stoppedCount = allInstances.filter((i) => i.state !== 'running').length;
recentErrors = eventStats.error; recentErrors = eventStats.error;
loading = false; loading = false;
} }
+21 -31
View File
@@ -33,20 +33,14 @@ export interface Stage {
updated_at: string; updated_at: string;
} }
export interface Instance { /**
id: string; * Instance is a back-compat alias: project deploys used to live in a
stage_id: string; * dedicated `instances` table, but after the workload refactor the canonical
project_id: string; * row is a Container. The Instance name is kept on the frontend so existing
container_id: string; * components don't all rename in one change — new code should use Container
image_tag: string; * directly, and `instance.state` (not `.status`) is the current field.
subdomain: string; */
npm_proxy_id: number; export type Instance = Container;
status: InstanceStatus;
port: number;
last_alive_at?: string;
created_at: string;
updated_at: string;
}
export type InstanceStatus = 'running' | 'stopped' | 'failed' | 'removing'; export type InstanceStatus = 'running' | 'stopped' | 'failed' | 'removing';
@@ -349,24 +343,20 @@ export interface EventLogStats {
total: number; total: number;
} }
/** A container detected as stale by the backend poller. */ /**
* A container detected as stale by the backend poller.
*
* Shape matches the Go side after the workload refactor:
* the embedded Container row is the canonical state and the workload/role
* fields decorate it for display. The legacy `instance` / `project_name` /
* `stage_name` aliases are exposed as optional getters via the StaleContainerCard
* adapter so we don't have to update every consumer at once.
*/
export interface StaleContainer { export interface StaleContainer {
instance: { container: Container;
id: string; workload_id: string;
stage_id: string; workload_name: string;
project_id: string; role: string;
container_id: string;
image_tag: string;
subdomain: string;
npm_proxy_id: number;
status: string;
port: number;
last_alive_at: string;
created_at: string;
updated_at: string;
};
project_name: string;
stage_name: string;
days_stale: number; days_stale: number;
} }
+2 -2
View File
@@ -86,12 +86,12 @@
const totalRunning = $derived( const totalRunning = $derived(
Object.values(instancesByProject) Object.values(instancesByProject)
.flat() .flat()
.filter((i) => i.status === 'running').length .filter((i) => i.state === 'running').length
); );
const totalFailed = $derived( const totalFailed = $derived(
Object.values(instancesByProject) Object.values(instancesByProject)
.flat() .flat()
.filter((i) => i.status === 'failed').length .filter((i) => i.state === 'failed').length
); );
const totalStale = $derived(staleContainers.length); const totalStale = $derived(staleContainers.length);
const totalSites = $derived(sites.length); const totalSites = $derived(sites.length);
+3 -3
View File
@@ -41,7 +41,7 @@
cleaningIds = new Set([...cleaningIds, id]); cleaningIds = new Set([...cleaningIds, id]);
try { try {
await api.cleanupStaleContainer(id); await api.cleanupStaleContainer(id);
containers = containers.filter((c) => c.instance.id !== id); containers = containers.filter((c) => c.container.id !== id);
toasts.success($t('stale.cleanedUp')); toasts.success($t('stale.cleanedUp'));
} catch (e) { } catch (e) {
toasts.error(e instanceof Error ? e.message : $t('stale.cleanupFailed')); toasts.error(e instanceof Error ? e.message : $t('stale.cleanupFailed'));
@@ -124,10 +124,10 @@
/> />
{:else} {:else}
<div class="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-3"> <div class="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-3">
{#each containers as container (container.instance.id)} {#each containers as container (container.container.id)}
<StaleContainerCard <StaleContainerCard
{container} {container}
cleaning={cleaningIds.has(container.instance.id)} cleaning={cleaningIds.has(container.container.id)}
oncleanup={requestCleanup} oncleanup={requestCleanup}
/> />
{/each} {/each}
@@ -757,6 +757,7 @@
<InstanceCard <InstanceCard
{instance} {instance}
{projectId} {projectId}
stageId={stage.id}
domain={settingsDomain} domain={settingsDomain}
onchange={loadProject} onchange={loadProject}
/> />