diff --git a/internal/api/dns.go b/internal/api/dns.go index 2af4c49..442046a 100644 --- a/internal/api/dns.go +++ b/internal/api/dns.go @@ -204,9 +204,9 @@ func (s *Server) buildConsumerNameMap() map[string]string { for _, p := range projects { stages, _ := s.store.GetStagesByProjectID(p.ID) for _, st := range stages { - instances, _ := s.store.GetInstancesByStageID(st.ID) - for _, inst := range instances { - names["instance:"+inst.ID] = p.Name + "/" + st.Name + ":" + inst.ImageTag + rows, _ := s.store.ListContainersByStageID(st.ID) + for _, c := range rows { + names["instance:"+c.ID] = p.Name + "/" + st.Name + ":" + c.ImageTag } } } @@ -362,15 +362,15 @@ func (s *Server) computeExpectedFQDNs(settings store.Settings) (map[string]strin if !st.EnableProxy { continue } - instances, err := s.store.GetInstancesByStageID(st.ID) + rows, err := s.store.ListContainersByStageID(st.ID) if err != nil { - slog.Warn("dns: failed to get instances", "stage_id", st.ID, "error", err) + slog.Warn("dns: failed to get containers", "stage_id", st.ID, "error", err) continue } - for _, inst := range instances { - if inst.NpmProxyID > 0 && inst.Subdomain != "" && inst.Status == "running" { - fqdn := inst.Subdomain + "." + settings.Domain - expected[fqdn] = "instance:" + inst.ID + for _, c := range rows { + if c.NpmProxyID > 0 && c.Subdomain != "" && c.State == "running" { + fqdn := c.Subdomain + "." + settings.Domain + expected[fqdn] = "instance:" + c.ID } } } diff --git a/internal/api/docker.go b/internal/api/docker.go index d831f29..59ab250 100644 --- a/internal/api/docker.go +++ b/internal/api/docker.go @@ -69,39 +69,46 @@ func (s *Server) listProjectImages(w http.ResponseWriter, r *http.Request) { } // streamContainerLogs handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/logs. -// Streams container logs via SSE. Supports query params: -// - tail: number of lines from end (default "200") -// - follow: "true" to stream new lines in real-time +// Streams container logs via SSE. {iid} is the container row ID. Ownership is +// verified by joining through workload + stage so an attacker cannot stream +// logs for a foreign container by guessing IDs under the wrong project URL. func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) { projectID := chi.URLParam(r, "id") stageID := chi.URLParam(r, "stage") - instanceID := chi.URLParam(r, "iid") + containerRowID := chi.URLParam(r, "iid") - inst, err := s.store.GetInstanceByID(instanceID) + c, err := s.store.GetContainerByID(containerRowID) if err != nil { if errors.Is(err, store.ErrNotFound) { - respondNotFound(w, "instance") + respondNotFound(w, "container") return } - slog.Error("failed to get instance", "error", err) + slog.Error("failed to get container", "error", err) respondError(w, http.StatusInternalServerError, "internal server error") return } - // Verify the instance actually belongs to the project/stage in the path. - // Without this, a user could stream logs for any instance ID by guessing - // it under the wrong project — defence-in-depth for future per-project ACLs. - if inst.ProjectID != projectID || inst.StageID != stageID { - respondNotFound(w, "instance") + wl, err := s.store.GetWorkloadByID(c.WorkloadID) + if err != nil { + respondNotFound(w, "container") + return + } + stage, err := s.store.GetStageByID(stageID) + if err != nil || stage.ProjectID != projectID { + respondNotFound(w, "container") + return + } + if wl.Kind != string(store.WorkloadKindProject) || wl.RefID != projectID || c.Role != stage.Name { + respondNotFound(w, "container") return } - if inst.ContainerID == "" { - respondError(w, http.StatusBadRequest, "instance has no container") + if c.ContainerID == "" { + respondError(w, http.StatusBadRequest, "container row has no docker container bound") return } - s.streamLogsForContainer(w, r, inst.ContainerID) + s.streamLogsForContainer(w, r, c.ContainerID) } // streamLogsForContainer streams logs for an arbitrary container ID using the diff --git a/internal/api/instances.go b/internal/api/instances.go index 170509f..d7c41bf 100644 --- a/internal/api/instances.go +++ b/internal/api/instances.go @@ -13,48 +13,53 @@ import ( ) // listInstances handles GET /api/projects/{id}/stages/{stage}/instances. +// Reads the normalized container index — the legacy `instances` table is gone. +// JSON shape stays Container-shaped (id, container_id, image_tag, subdomain, +// state, port, etc.), so the frontend type may show some renamed fields +// (status→state, last_alive_at→last_seen_at). func (s *Server) listInstances(w http.ResponseWriter, r *http.Request) { stageID := chi.URLParam(r, "stage") - // Verify stage exists. if _, err := s.store.GetStageByID(stageID); err != nil { if errors.Is(err, store.ErrNotFound) { respondNotFound(w, "stage") return } slog.Error("failed to get stage", "error", err) - respondError(w, http.StatusInternalServerError, "internal server error") - return - } - - instances, err := s.store.GetInstancesByStageID(stageID) - if err != nil { - slog.Error("failed to list instances", "error", err) respondError(w, http.StatusInternalServerError, "internal server error") return } - // Reconcile instance statuses with Docker's actual state. + containers, err := s.store.ListContainersByStageID(stageID) + if err != nil { + slog.Error("failed to list containers", "error", err) + respondError(w, http.StatusInternalServerError, "internal server error") + return + } + + // Reconcile container state with Docker's actual state — covers the + // case where a container was killed externally between deployer writes + // and the next reconciler tick. ctx := r.Context() - for i, inst := range instances { - if inst.ContainerID == "" || inst.Status == "removing" { + for i, c := range containers { + if c.ContainerID == "" || c.State == "removing" { continue } - running, err := s.docker.IsContainerRunning(ctx, inst.ContainerID) + running, err := s.docker.IsContainerRunning(ctx, c.ContainerID) if err != nil { - continue // Docker unreachable, keep stored status. + continue } - actualStatus := "stopped" + actual := "stopped" if running { - actualStatus = "running" + actual = "running" } - if inst.Status != actualStatus { - instances[i].Status = actualStatus - _ = s.store.UpdateInstanceStatus(inst.ID, actualStatus) + if c.State != actual { + containers[i].State = actual + _ = s.store.UpdateContainerState(c.ID, actual) } } - respondJSON(w, http.StatusOK, instances) + respondJSON(w, http.StatusOK, containers) } // deployRequest is the expected JSON body for triggering a deploy. @@ -62,30 +67,28 @@ type deployRequest struct { ImageTag string `json:"image_tag"` } -// deployInstance handles POST /api/projects/{id}/stages/{stage}/instances (trigger deploy). +// deployInstance handles POST /api/projects/{id}/stages/{stage}/instances. func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) { projectID := chi.URLParam(r, "id") stageID := chi.URLParam(r, "stage") - // Verify project exists. if _, err := s.store.GetProjectByID(projectID); err != nil { if errors.Is(err, store.ErrNotFound) { respondNotFound(w, "project") return } slog.Error("failed to get project", "error", err) - respondError(w, http.StatusInternalServerError, "internal server error") + respondError(w, http.StatusInternalServerError, "internal server error") return } - // Verify stage exists. if _, err := s.store.GetStageByID(stageID); err != nil { if errors.Is(err, store.ErrNotFound) { respondNotFound(w, "stage") return } slog.Error("failed to get stage", "error", err) - respondError(w, http.StatusInternalServerError, "internal server error") + respondError(w, http.StatusInternalServerError, "internal server error") return } @@ -115,40 +118,41 @@ func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) { } // removeInstance handles DELETE /api/projects/{id}/stages/{stage}/instances/{iid}. +// {iid} is the container row ID (same UUID as the legacy instance ID). func (s *Server) removeInstance(w http.ResponseWriter, r *http.Request) { - instanceID := chi.URLParam(r, "iid") + id := chi.URLParam(r, "iid") - inst, err := s.store.GetInstanceByID(instanceID) + c, err := s.store.GetContainerByID(id) if err != nil { if errors.Is(err, store.ErrNotFound) { - respondNotFound(w, "instance") + respondNotFound(w, "container") return } - slog.Error("failed to get instance", "error", err) - respondError(w, http.StatusInternalServerError, "internal server error") + slog.Error("failed to get container", "error", err) + respondError(w, http.StatusInternalServerError, "internal server error") return } // Remove the Docker container if it has one. - if inst.ContainerID != "" { - if err := s.docker.RemoveContainer(r.Context(), inst.ContainerID, true); err != nil { - slog.Error("remove container", "container_id", inst.ContainerID, "error", err) + if c.ContainerID != "" { + if err := s.docker.RemoveContainer(r.Context(), c.ContainerID, true); err != nil { + slog.Error("remove container", "container_id", c.ContainerID, "error", err) } } // Delete proxy route if it has one. - if inst.ProxyRouteID != "" { - if err := s.proxyProvider.DeleteRoute(r.Context(), inst.ProxyRouteID); err != nil { - slog.Warn("delete proxy route on instance removal", "route_id", inst.ProxyRouteID, "error", err) + if c.ProxyRouteID != "" { + if err := s.proxyProvider.DeleteRoute(r.Context(), c.ProxyRouteID); err != nil { + slog.Warn("delete proxy route on container removal", "route_id", c.ProxyRouteID, "error", err) } } - // Delete instance record. - if err := s.store.DeleteInstance(instanceID); err != nil { - respondError(w, http.StatusInternalServerError, "failed to delete instance") + // Delete container row. + if err := s.store.DeleteContainer(id); err != nil { + respondError(w, http.StatusInternalServerError, "failed to delete container") return } - respondJSON(w, http.StatusOK, map[string]string{"deleted": instanceID}) + respondJSON(w, http.StatusOK, map[string]string{"deleted": id}) } // stopInstance handles POST /api/projects/{id}/stages/{stage}/instances/{iid}/stop. @@ -166,67 +170,59 @@ func (s *Server) restartInstance(w http.ResponseWriter, r *http.Request) { s.controlInstance(w, r, "restart") } -// controlInstance performs a stop/start/restart action on an instance's container. +// controlInstance performs a stop/start/restart action on a container. func (s *Server) controlInstance(w http.ResponseWriter, r *http.Request, action string) { - instanceID := chi.URLParam(r, "iid") + id := chi.URLParam(r, "iid") - inst, err := s.store.GetInstanceByID(instanceID) + c, err := s.store.GetContainerByID(id) if err != nil { if errors.Is(err, store.ErrNotFound) { - respondNotFound(w, "instance") + respondNotFound(w, "container") return } - slog.Error("failed to get instance", "error", err) - respondError(w, http.StatusInternalServerError, "internal server error") + slog.Error("failed to get container", "error", err) + respondError(w, http.StatusInternalServerError, "internal server error") return } - if inst.ContainerID == "" { - respondError(w, http.StatusBadRequest, "instance has no container") + if c.ContainerID == "" { + respondError(w, http.StatusBadRequest, "container row has no docker container bound") return } ctx := r.Context() var controlErr error - var newStatus string + var newState string switch action { case "stop": - controlErr = s.docker.StopContainer(ctx, inst.ContainerID, 10) - newStatus = "stopped" + controlErr = s.docker.StopContainer(ctx, c.ContainerID, 10) + newState = "stopped" case "start": - controlErr = s.docker.StartContainer(ctx, inst.ContainerID) - newStatus = "running" + controlErr = s.docker.StartContainer(ctx, c.ContainerID) + newState = "running" case "restart": - controlErr = s.docker.RestartContainer(ctx, inst.ContainerID, 10) - newStatus = "running" + controlErr = s.docker.RestartContainer(ctx, c.ContainerID, 10) + newState = "running" default: respondError(w, http.StatusBadRequest, fmt.Sprintf("unknown action: %s", action)) return } if controlErr != nil { - slog.Error("failed to control instance", "action", action, "instance_id", instanceID, "error", controlErr) + slog.Error("failed to control container", "action", action, "id", id, "error", controlErr) respondError(w, http.StatusInternalServerError, "internal server error") return } - // Update status in store. - if err := s.store.UpdateInstanceStatus(instanceID, newStatus); err != nil { - slog.Error("update instance status", "instance_id", instanceID, "status", newStatus, "error", err) - } - - // Track last_alive_at when container becomes running. - if newStatus == "running" { - if err := s.store.UpdateLastAliveAt(instanceID); err != nil { - slog.Error("update last_alive_at", "instance_id", instanceID, "error", err) - } + if err := s.store.UpdateContainerState(id, newState); err != nil { + slog.Error("update container state", "id", id, "state", newState, "error", err) } respondJSON(w, http.StatusOK, map[string]string{ - "instance_id": instanceID, + "instance_id": id, "action": action, - "status": newStatus, + "status": newState, }) } diff --git a/internal/api/projects.go b/internal/api/projects.go index 8b3b99f..b49f740 100644 --- a/internal/api/projects.go +++ b/internal/api/projects.go @@ -188,16 +188,16 @@ func (s *Server) deleteProject(w http.ResponseWriter, r *http.Request) { ctx := r.Context() stages, _ := s.store.GetStagesByProjectID(id) for _, stage := range stages { - instances, _ := s.store.GetInstancesByStageID(stage.ID) - for _, inst := range instances { - if inst.ContainerID != "" { - if err := s.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil { - slog.Warn("delete project: remove container", "container", inst.ContainerID, "error", err) + rows, _ := s.store.ListContainersByStageID(stage.ID) + for _, c := range rows { + if c.ContainerID != "" { + if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil { + slog.Warn("delete project: remove container", "container", c.ContainerID, "error", err) } } - if inst.ProxyRouteID != "" { - if err := s.proxyProvider.DeleteRoute(ctx, inst.ProxyRouteID); err != nil { - slog.Warn("delete project: delete proxy route", "route", inst.ProxyRouteID, "error", err) + if c.ProxyRouteID != "" { + if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil { + slog.Warn("delete project: delete proxy route", "route", c.ProxyRouteID, "error", err) } } } diff --git a/internal/api/settings.go b/internal/api/settings.go index 775a2f9..56f30c3 100644 --- a/internal/api/settings.go +++ b/internal/api/settings.go @@ -466,14 +466,14 @@ func (s *Server) resyncAllProxies(oldSettings, newSettings store.Settings) { // Step 2: If new provider is "none", clear all proxy route IDs and we're done. if newSettings.ProxyProvider == "none" { for _, route := range routes { - inst, err := s.store.GetInstanceByID(route.InstanceID) + c, err := s.store.GetContainerByID(route.InstanceID) if err != nil { continue } - inst.ProxyRouteID = "" - inst.NpmProxyID = 0 - if err := s.store.UpdateInstance(inst); err != nil { - slog.Warn("proxy resync: clear route ID", "instance", route.InstanceID, "error", err) + c.ProxyRouteID = "" + c.NpmProxyID = 0 + if err := s.store.UpdateContainer(c); err != nil { + slog.Warn("proxy resync: clear route ID", "container", route.InstanceID, "error", err) } } slog.Info("proxy resync: cleared all proxy routes (provider set to none)", "count", len(routes)) @@ -501,18 +501,17 @@ func (s *Server) resyncAllProxies(oldSettings, newSettings store.Settings) { continue } - // Update instance with new route ID. - inst, err := s.store.GetInstanceByID(route.InstanceID) + // Update container row with new route ID. + c, err := s.store.GetContainerByID(route.InstanceID) if err != nil { continue } - inst.ProxyRouteID = routeID + c.ProxyRouteID = routeID if domainChanged { - // Subdomain stays the same, but the FQDN in external systems changed. - slog.Info("proxy resync: domain updated", "instance", route.InstanceID, "domain", fqdn) + slog.Info("proxy resync: domain updated", "container", route.InstanceID, "domain", fqdn) } - if err := s.store.UpdateInstance(inst); err != nil { - slog.Warn("proxy resync: update instance", "instance", route.InstanceID, "error", err) + if err := s.store.UpdateContainer(c); err != nil { + slog.Warn("proxy resync: update container", "container", route.InstanceID, "error", err) } updated++ } diff --git a/internal/api/stale.go b/internal/api/stale.go index ffa4262..ad3449e 100644 --- a/internal/api/stale.go +++ b/internal/api/stale.go @@ -19,59 +19,58 @@ func (s *Server) listStaleContainers(w http.ResponseWriter, r *http.Request) { return } - staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) + staleRows, err := s.staleScanner.FindStaleContainers(r.Context()) if err != nil { slog.Error("failed to find stale containers", "error", err) respondError(w, http.StatusInternalServerError, "failed to find stale containers") return } - if staleInstances == nil { - staleInstances = []stale.StaleInstance{} + if staleRows == nil { + staleRows = []stale.StaleContainer{} } - respondJSON(w, http.StatusOK, staleInstances) + respondJSON(w, http.StatusOK, staleRows) } // cleanupStaleContainer handles POST /api/containers/stale/{id}/cleanup. -// Stops the Docker container, removes the NPM proxy, and deletes the instance from the store. +// Stops the Docker container, removes the proxy route, and deletes the +// container row. {id} is the container row ID. func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) { - instanceID := chi.URLParam(r, "id") + id := chi.URLParam(r, "id") - inst, err := s.store.GetInstanceByID(instanceID) + c, err := s.store.GetContainerByID(id) if err != nil { if errors.Is(err, store.ErrNotFound) { - respondNotFound(w, "instance") + respondNotFound(w, "container") return } - slog.Error("failed to get instance", "instance_id", instanceID, "error", err) - respondError(w, http.StatusInternalServerError, "failed to get instance") + slog.Error("failed to get container", "id", id, "error", err) + respondError(w, http.StatusInternalServerError, "failed to get container") return } - // Don't remove instances already being cleaned up. - if inst.Status == "removing" { - respondError(w, http.StatusConflict, "instance is already being removed") + if c.State == "removing" { + respondError(w, http.StatusConflict, "container is already being removed") return } - if err := s.cleanupInstance(r, inst); err != nil { - slog.Error("failed to cleanup instance", "instance_id", instanceID, "error", err) - respondError(w, http.StatusInternalServerError, "failed to cleanup instance") + if err := s.cleanupContainer(r, c); err != nil { + slog.Error("failed to cleanup container", "id", id, "error", err) + respondError(w, http.StatusInternalServerError, "failed to cleanup container") return } - respondJSON(w, http.StatusOK, map[string]string{"cleaned": instanceID}) + respondJSON(w, http.StatusOK, map[string]string{"cleaned": id}) } // bulkCleanupStaleContainers handles POST /api/containers/stale/cleanup. -// Cleans up all currently stale containers. func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Request) { if s.staleScanner == nil { respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized") return } - staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) + staleRows, err := s.staleScanner.FindStaleContainers(r.Context()) if err != nil { slog.Error("failed to find stale containers for bulk cleanup", "error", err) respondError(w, http.StatusInternalServerError, "failed to find stale containers") @@ -81,17 +80,17 @@ func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Reque var cleaned []string var failed []string - for _, si := range staleInstances { - if si.Instance.Status == "removing" { + for _, sc := range staleRows { + if sc.Container.State == "removing" { continue } - if err := s.cleanupInstance(r, si.Instance); err != nil { + if err := s.cleanupContainer(r, sc.Container); err != nil { slog.Error("bulk stale cleanup failed", - "instance_id", si.Instance.ID, "error", err) - failed = append(failed, si.Instance.ID) + "id", sc.Container.ID, "error", err) + failed = append(failed, sc.Container.ID) continue } - cleaned = append(cleaned, si.Instance.ID) + cleaned = append(cleaned, sc.Container.ID) } respondJSON(w, http.StatusOK, map[string]any{ @@ -100,53 +99,48 @@ func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Reque }) } -// cleanupInstance stops a Docker container, removes the NPM proxy, deletes -// the store record, and emits an event. -func (s *Server) cleanupInstance(r *http.Request, inst store.Instance) error { +// cleanupContainer stops a Docker container, removes its proxy route, +// deletes the container row, and emits an event. +func (s *Server) cleanupContainer(r *http.Request, c store.Container) error { ctx := r.Context() - // Mark as removing. - if err := s.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil { - slog.Warn("stale cleanup: update status to removing", "instance_id", inst.ID, "error", err) + if err := s.store.UpdateContainerState(c.ID, "removing"); err != nil { + slog.Warn("stale cleanup: update state to removing", "id", c.ID, "error", err) } - // Stop and remove Docker container. - if inst.ContainerID != "" { - if err := s.docker.StopContainer(ctx, inst.ContainerID, 10); err != nil { - slog.Warn("stale cleanup: stop container", "container_id", inst.ContainerID, "error", err) + if c.ContainerID != "" { + if err := s.docker.StopContainer(ctx, c.ContainerID, 10); err != nil { + slog.Warn("stale cleanup: stop container", "container_id", c.ContainerID, "error", err) } - if err := s.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil { - slog.Warn("stale cleanup: remove container", "container_id", inst.ContainerID, "error", err) + if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil { + slog.Warn("stale cleanup: remove container", "container_id", c.ContainerID, "error", err) } } - // Delete proxy route if present. - if inst.ProxyRouteID != "" { - if err := s.proxyProvider.DeleteRoute(ctx, inst.ProxyRouteID); err != nil { - slog.Warn("stale cleanup: delete proxy route", "route_id", inst.ProxyRouteID, "error", err) + if c.ProxyRouteID != "" { + if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil { + slog.Warn("stale cleanup: delete proxy route", "route_id", c.ProxyRouteID, "error", err) } } - // Delete instance record. - if err := s.store.DeleteInstance(inst.ID); err != nil { + if err := s.store.DeleteContainer(c.ID); err != nil { return err } - // Emit cleanup event. - s.emitStaleCleanupEvent(inst) + s.emitStaleCleanupEvent(c) return nil } // emitStaleCleanupEvent publishes an event when a stale container is cleaned up. -func (s *Server) emitStaleCleanupEvent(inst store.Instance) { - msg := "Stale container cleaned up: " + inst.ID + " (tag: " + inst.ImageTag + ")" +func (s *Server) emitStaleCleanupEvent(c store.Container) { + msg := "Stale container cleaned up: " + c.ID + " (tag: " + c.ImageTag + ")" evt, err := s.store.InsertEvent(store.EventLog{ Source: "stale_cleanup", Severity: "info", Message: msg, - Metadata: `{"instance_id":"` + inst.ID + `","project_id":"` + inst.ProjectID + `","stage_id":"` + inst.StageID + `"}`, + Metadata: `{"container_id":"` + c.ID + `","workload_id":"` + c.WorkloadID + `","role":"` + c.Role + `"}`, }) if err != nil { slog.Error("stale cleanup: failed to persist event", "error", err) @@ -158,9 +152,9 @@ func (s *Server) emitStaleCleanupEvent(inst store.Instance) { Payload: events.EventLogPayload{ ID: evt.ID, Source: "stale_cleanup", - Severity: "info", - Message: msg, - Metadata: evt.Metadata, + Severity: "info", + Message: msg, + Metadata: evt.Metadata, CreatedAt: evt.CreatedAt, }, }) diff --git a/internal/api/stats.go b/internal/api/stats.go index 6574ac6..5f0b26c 100644 --- a/internal/api/stats.go +++ b/internal/api/stats.go @@ -11,29 +11,29 @@ import ( ) // getInstanceStats handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats. -// Returns CPU and memory stats for the container backing the given instance. +// {iid} is the container row ID (same UUID as the legacy instance ID). func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) { - instanceID := chi.URLParam(r, "iid") + id := chi.URLParam(r, "iid") - inst, err := s.store.GetInstanceByID(instanceID) + c, err := s.store.GetContainerByID(id) if err != nil { if errors.Is(err, store.ErrNotFound) { - respondNotFound(w, "instance") + respondNotFound(w, "container") return } - slog.Error("failed to get instance", "instance_id", instanceID, "error", err) - respondError(w, http.StatusInternalServerError, "failed to get instance") + slog.Error("failed to get container", "id", id, "error", err) + respondError(w, http.StatusInternalServerError, "failed to get container") return } - if inst.ContainerID == "" { - respondError(w, http.StatusBadRequest, "instance has no container") + if c.ContainerID == "" { + respondError(w, http.StatusBadRequest, "container row has no docker container bound") return } - stats, err := s.docker.GetContainerStats(r.Context(), inst.ContainerID) + stats, err := s.docker.GetContainerStats(r.Context(), c.ContainerID) if err != nil { - slog.Error("failed to get container stats", "container_id", inst.ContainerID, "error", err) + slog.Error("failed to get container stats", "container_id", c.ContainerID, "error", err) respondError(w, http.StatusInternalServerError, "failed to get container stats") return } diff --git a/internal/api/stats_history.go b/internal/api/stats_history.go index 9d8c3b0..60418d7 100644 --- a/internal/api/stats_history.go +++ b/internal/api/stats_history.go @@ -91,15 +91,16 @@ func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) { } // getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history. +// {iid} is the container row ID (same UUID as the legacy instance ID). func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) { instanceID := chi.URLParam(r, "iid") - if _, err := s.store.GetInstanceByID(instanceID); err != nil { + if _, err := s.store.GetContainerByID(instanceID); err != nil { if errors.Is(err, store.ErrNotFound) { - respondNotFound(w, "instance") + respondNotFound(w, "container") return } - slog.Error("failed to get instance", "instance_id", instanceID, "error", err) - respondError(w, http.StatusInternalServerError, "failed to get instance") + slog.Error("failed to get container", "id", instanceID, "error", err) + respondError(w, http.StatusInternalServerError, "failed to get container") return } samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r))) @@ -279,24 +280,24 @@ func (s *Server) enrichWithOwnerNames(samples []store.ContainerStatsSample) []To return out } -// lookupInstanceName returns "project/stage" for an instance, or empty on -// any lookup error so a transient miss does not break the response. +// lookupInstanceName returns "workload/role" for a container row, or empty +// on any lookup error so a transient miss does not break the response. func (s *Server) lookupInstanceName(instanceID string) string { - inst, err := s.store.GetInstanceByID(instanceID) + c, err := s.store.GetContainerByID(instanceID) if err != nil { return "" } - project, perr := s.store.GetProjectByID(inst.ProjectID) - stage, serr := s.store.GetStageByID(inst.StageID) - switch { - case perr == nil && serr == nil: - return project.Name + "/" + stage.Name - case perr == nil: - return project.Name - case serr == nil: - return stage.Name + w, err := s.store.GetWorkloadByID(c.WorkloadID) + if err != nil { + if c.Role != "" { + return c.Role + } + return "" } - return "" + if c.Role != "" { + return w.Name + "/" + c.Role + } + return w.Name } // lookupSiteName returns the site's display name or empty on lookup error. diff --git a/internal/deployer/bluegreen.go b/internal/deployer/bluegreen.go index 5435c0b..359647a 100644 --- a/internal/deployer/bluegreen.go +++ b/internal/deployer/bluegreen.go @@ -25,17 +25,17 @@ func (d *Deployer) blueGreenDeploy( deployID string, imageTag string, ) (string, string, string, error) { - // Find existing running instance for this stage (the "blue" instance). - existingInstances, err := d.store.GetInstancesByStageID(stage.ID) + // Find existing running container for this stage (the "blue" container). + existing, err := d.store.ListContainersByStageID(stage.ID) if err != nil { - return "", "", "", fmt.Errorf("get existing instances: %w", err) + return "", "", "", fmt.Errorf("get existing containers: %w", err) } - var blueInstance *store.Instance - for _, inst := range existingInstances { - if inst.Status == "running" { - instCopy := inst - blueInstance = &instCopy + var blueContainer *store.Container + for _, c := range existing { + if c.State == "running" { + cCopy := c + blueContainer = &cCopy break } } @@ -84,9 +84,6 @@ func (d *Deployer) blueGreenDeploy( ExposedPorts: []string{portStr}, NetworkName: settings.Network, NetworkID: networkID, - Project: project.Name, - Stage: stage.Name, - InstanceID: instanceID, WorkloadID: workloadID, WorkloadKind: string(store.WorkloadKindProject), Role: stage.Name, @@ -114,25 +111,27 @@ func (d *Deployer) blueGreenDeploy( return "", "", instanceID, fmt.Errorf("create container: %w", err) } - // Create instance record. - inst, err := d.store.CreateInstanceWithID(store.Instance{ - ID: instanceID, - StageID: stage.ID, - ProjectID: project.ID, - ContainerID: containerID, - ImageTag: imageTag, - Subdomain: subdomain, - Status: "stopped", - Port: project.Port, + // Create container row. + row, err := d.store.CreateContainer(store.Container{ + ID: instanceID, + WorkloadID: workloadID, + WorkloadKind: string(store.WorkloadKindProject), + Role: stage.Name, + ContainerID: containerID, + ImageRef: project.Image + ":" + imageTag, + ImageTag: imageTag, + Host: "local", + State: "stopped", + Port: project.Port, + Subdomain: subdomain, }) if err != nil { - return containerID, "", instanceID, fmt.Errorf("create instance record: %w", err) + return containerID, "", instanceID, fmt.Errorf("create container row: %w", err) } - instanceID = inst.ID - d.upsertContainerForInstance(project, stage, inst, workloadID) + instanceID = row.ID if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil { - slog.Warn("link deploy to instance", "error", err) + slog.Warn("link deploy to container", "error", err) } d.logDeploy(deployID, fmt.Sprintf("Blue-green: starting green container %s", containerName), "info") @@ -140,11 +139,10 @@ func (d *Deployer) blueGreenDeploy( return containerID, "", instanceID, fmt.Errorf("start container: %w", err) } - if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil { - slog.Warn("update instance status", "error", err) + if err := d.store.UpdateContainerState(instanceID, "running"); err != nil { + slog.Warn("update container state", "error", err) } - inst.Status = "running" - d.upsertContainerForInstance(project, stage, inst, workloadID) + row.State = "running" d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running") // Step 4: Health check the green container. @@ -181,30 +179,29 @@ func (d *Deployer) blueGreenDeploy( return containerID, "", instanceID, fmt.Errorf("configure proxy: %w", err) } - inst.ProxyRouteID = proxyRouteID + row.ProxyRouteID = proxyRouteID d.logDeploy(deployID, "Blue-green: proxy swapped to green container", "info") - // Create/update DNS record for the green instance. + // Create/update DNS record for the green container. fqdn := subdomain + "." + settings.Domain d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID) } else { d.logDeploy(deployID, "Blue-green: proxy skipped (disabled for this stage)", "info") } - inst.Subdomain = subdomain - if err := d.store.UpdateInstance(inst); err != nil { - slog.Warn("update instance with proxy ID", "error", err) + row.Subdomain = subdomain + if err := d.store.UpdateContainer(row); err != nil { + slog.Warn("update container with proxy ID", "error", err) } - d.upsertContainerForInstance(project, stage, inst, workloadID) // Step 6: Stop the blue container. - if blueInstance != nil { - d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue instance %s (tag: %s)", blueInstance.ID, blueInstance.ImageTag), "info") - if err := d.removeInstance(ctx, *blueInstance, settings); err != nil { + if blueContainer != nil { + d.logDeploy(deployID, fmt.Sprintf("Blue-green: stopping blue container %s (tag: %s)", blueContainer.ID, blueContainer.ImageTag), "info") + if err := d.removeContainer(ctx, *blueContainer, settings); err != nil { // Non-fatal: log but continue. Green is already serving traffic. - d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue instance: %v", err), "warn") + d.logDeploy(deployID, fmt.Sprintf("Blue-green: warning: failed to remove blue container: %v", err), "warn") } else { - d.logDeploy(deployID, "Blue-green: blue instance removed", "info") + d.logDeploy(deployID, "Blue-green: blue container removed", "info") } } diff --git a/internal/deployer/deployer.go b/internal/deployer/deployer.go index 109c71c..0e2b8da 100644 --- a/internal/deployer/deployer.go +++ b/internal/deployer/deployer.go @@ -376,9 +376,6 @@ func (d *Deployer) executeDeploy( ExposedPorts: []string{portStr}, NetworkName: settings.Network, NetworkID: networkID, - Project: project.Name, - Stage: stage.Name, - InstanceID: instanceID, WorkloadID: workloadID, WorkloadKind: string(store.WorkloadKindProject), Role: stage.Name, @@ -407,26 +404,32 @@ func (d *Deployer) executeDeploy( } d.logDeploy(deployID, fmt.Sprintf("Container created (ID: %s)", truncateID(containerID)), "info") - // Create instance record in store with the pre-generated ID. - inst, err := d.store.CreateInstanceWithID(store.Instance{ - ID: instanceID, - StageID: stage.ID, - ProjectID: project.ID, - ContainerID: containerID, - ImageTag: imageTag, - Subdomain: subdomain, - Status: "stopped", - Port: project.Port, + // Create container row with the pre-generated ID. The deployer is the + // authoritative writer until the next reconciler tick — it's important + // the row exists before StartContainer so a fast tick doesn't see an + // orphan and mark it missing. + row, err := d.store.CreateContainer(store.Container{ + ID: instanceID, + WorkloadID: workloadID, + WorkloadKind: string(store.WorkloadKindProject), + Role: stage.Name, + ContainerID: containerID, + ImageRef: project.Image + ":" + imageTag, + ImageTag: imageTag, + Host: "local", + State: "stopped", + Port: project.Port, + Subdomain: subdomain, }) if err != nil { - return containerID, proxyRouteID, instanceID, fmt.Errorf("create instance record: %w", err) + return containerID, proxyRouteID, instanceID, fmt.Errorf("create container row: %w", err) } - instanceID = inst.ID - d.upsertContainerForInstance(project, stage, inst, workloadID) + instanceID = row.ID - // Link deploy to instance. + // Link deploy to container row (the existing Deploy.InstanceID column + // stores the row ID — same value as before, just a renamed concept). if err := d.store.SetDeployInstanceID(deployID, instanceID); err != nil { - slog.Warn("link deploy to instance", "error", err) + slog.Warn("link deploy to container", "error", err) } d.logDeploy(deployID, fmt.Sprintf("Starting container %s", containerName), "info") @@ -434,15 +437,11 @@ func (d *Deployer) executeDeploy( return containerID, proxyRouteID, instanceID, fmt.Errorf("start container: %w", err) } - if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil { - slog.Warn("update instance status to running", "error", err) + if err := d.store.UpdateContainerState(instanceID, "running"); err != nil { + slog.Warn("update container state to running", "error", err) } - if err := d.store.UpdateLastAliveAt(instanceID); err != nil { - slog.Warn("update last_alive_at on deploy", "instance_id", instanceID, "error", err) - } - inst.Status = "running" - inst.LastAliveAt = store.Now() - d.upsertContainerForInstance(project, stage, inst, workloadID) + row.State = "running" + row.LastSeenAt = store.Now() d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running") d.logDeploy(deployID, "Container started", "info") @@ -463,24 +462,22 @@ func (d *Deployer) executeDeploy( return containerID, proxyRouteID, instanceID, fmt.Errorf("configure proxy: %w", err) } - // Update instance with proxy route ID. - inst.ProxyRouteID = proxyRouteID - inst.Subdomain = subdomain - if err := d.store.UpdateInstance(inst); err != nil { - slog.Warn("update instance with proxy ID", "error", err) + // Update container row with proxy route ID. + row.ProxyRouteID = proxyRouteID + row.Subdomain = subdomain + if err := d.store.UpdateContainer(row); err != nil { + slog.Warn("update container with proxy ID", "error", err) } - d.upsertContainerForInstance(project, stage, inst, workloadID) - // Create DNS record for this instance. + // Create DNS record for this container. fqdn := subdomain + "." + settings.Domain d.ensureDNS(ctx, fqdn, "instance", instanceID, deployID) } else { d.logDeploy(deployID, "Proxy creation skipped (disabled for this stage)", "info") - inst.Subdomain = subdomain - if err := d.store.UpdateInstance(inst); err != nil { - slog.Warn("update instance", "error", err) + row.Subdomain = subdomain + if err := d.store.UpdateContainer(row); err != nil { + slog.Warn("update container", "error", err) } - d.upsertContainerForInstance(project, stage, inst, workloadID) } // Step 5: Health check. @@ -554,27 +551,27 @@ func (d *Deployer) configureProxy( return routeID, nil } -// enforceMaxInstances removes the oldest instances when the stage has reached its limit. -// This makes room for the new deployment. +// enforceMaxInstances removes the oldest container rows when the stage has +// reached its instance limit, making room for the new deploy. func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, deployID string, settings store.Settings) error { if stage.MaxInstances <= 0 { return nil } - instances, err := d.store.GetInstancesByStageID(stage.ID) + containers, err := d.store.ListContainersByStageID(stage.ID) if err != nil { - return fmt.Errorf("get instances for stage: %w", err) + return fmt.Errorf("get containers for stage: %w", err) } - // Filter to running/stopped instances (not already failed/removing). - var active []store.Instance - for _, inst := range instances { - if inst.Status == "running" || inst.Status == "stopped" { - active = append(active, inst) + // Filter to running/stopped containers (not already failed/removing). + var active []store.Container + for _, c := range containers { + if c.State == "running" || c.State == "stopped" { + active = append(active, c) } } - // We need room for one more instance, so remove oldest when at limit. + // We need room for one more container, so remove the oldest when at limit. removeCount := len(active) - stage.MaxInstances + 1 if removeCount <= 0 { return nil @@ -586,57 +583,50 @@ func (d *Deployer) enforceMaxInstances(ctx context.Context, stage store.Stage, d }) for i := 0; i < removeCount && i < len(active); i++ { - inst := active[i] - d.logDeploy(deployID, fmt.Sprintf("Removing oldest instance %s (tag: %s) to enforce max_instances=%d", inst.ID, inst.ImageTag, stage.MaxInstances), "info") + c := active[i] + d.logDeploy(deployID, fmt.Sprintf("Removing oldest container %s (tag: %s) to enforce max_instances=%d", c.ID, c.ImageTag, stage.MaxInstances), "info") - if err := d.removeInstance(ctx, inst, settings); err != nil { - d.logDeploy(deployID, fmt.Sprintf("Failed to remove instance %s: %v", inst.ID, err), "warn") + if err := d.removeContainer(ctx, c, settings); err != nil { + d.logDeploy(deployID, fmt.Sprintf("Failed to remove container %s: %v", c.ID, err), "warn") continue } - d.logDeploy(deployID, fmt.Sprintf("Removed instance %s", inst.ID), "info") + d.logDeploy(deployID, fmt.Sprintf("Removed container %s", c.ID), "info") } return nil } -// removeInstance stops and removes a container, deletes its NPM proxy host, -// and removes the instance record from the store. -func (d *Deployer) removeInstance(ctx context.Context, inst store.Instance, settings store.Settings) error { +// removeContainer stops + removes the Docker container, deletes its proxy +// route, drops the DNS record, and removes the container row from the store. +func (d *Deployer) removeContainer(ctx context.Context, c store.Container, settings store.Settings) error { // Mark as removing. - if err := d.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil { - slog.Warn("update instance status to removing", "instance_id", inst.ID, "error", err) + if err := d.store.UpdateContainerState(c.ID, "removing"); err != nil { + slog.Warn("update container state to removing", "id", c.ID, "error", err) } // Remove Docker container. - if inst.ContainerID != "" { - if err := d.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil { - slog.Warn("remove container", "container_id", inst.ContainerID, "error", err) + if c.ContainerID != "" { + if err := d.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil { + slog.Warn("remove docker container", "container_id", c.ContainerID, "error", err) } } // Delete proxy route. - if inst.ProxyRouteID != "" { - if err := d.proxy.DeleteRoute(ctx, inst.ProxyRouteID); err != nil { - slog.Warn("delete proxy route", "route_id", inst.ProxyRouteID, "error", err) + if c.ProxyRouteID != "" { + if err := d.proxy.DeleteRoute(ctx, c.ProxyRouteID); err != nil { + slog.Warn("delete proxy route", "route_id", c.ProxyRouteID, "error", err) } - // Remove DNS record for this instance. - if inst.Subdomain != "" && settings.Domain != "" { - fqdn := inst.Subdomain + "." + settings.Domain + // Remove DNS record. + if c.Subdomain != "" && settings.Domain != "" { + fqdn := c.Subdomain + "." + settings.Domain d.removeDNS(ctx, fqdn, "") } } - // Delete instance record. - if err := d.store.DeleteInstance(inst.ID); err != nil { - return fmt.Errorf("delete instance record: %w", err) - } - - // Drop the matching container index row. ID matches instance.ID by - // construction; ignore NotFound which is harmless if the row predates - // this refactor. - if err := d.store.DeleteContainer(inst.ID); err != nil && !errors.Is(err, store.ErrNotFound) { - slog.Warn("delete container row", "instance_id", inst.ID, "error", err) + // Drop the container row. + if err := d.store.DeleteContainer(c.ID); err != nil && !errors.Is(err, store.ErrNotFound) { + return fmt.Errorf("delete container row: %w", err) } return nil @@ -903,33 +893,6 @@ func truncateID(id string) string { return id } -// upsertContainerForInstance keeps the normalized containers index in sync -// with the project-specific instance row. Same UUID is used for both rows so -// the reconciler can find them later. Best-effort: a sync failure is logged -// but does not abort the deploy — the container is still running and the -// reconciler will pick it up on the next tick (once that lands). -func (d *Deployer) upsertContainerForInstance(project store.Project, stage store.Stage, inst store.Instance, workloadID string) { - c := store.Container{ - ID: inst.ID, - WorkloadID: workloadID, - WorkloadKind: string(store.WorkloadKindProject), - Role: stage.Name, - ContainerID: inst.ContainerID, - ImageRef: project.Image + ":" + inst.ImageTag, - ImageTag: inst.ImageTag, - Host: "local", - State: inst.Status, - Port: inst.Port, - Subdomain: inst.Subdomain, - ProxyRouteID: inst.ProxyRouteID, - NpmProxyID: inst.NpmProxyID, - LastSeenAt: inst.LastAliveAt, - } - if err := d.store.UpsertContainer(c); err != nil { - slog.Warn("upsert container row", "instance_id", inst.ID, "error", err) - } -} - // resolveProjectWorkloadID returns the workload ID paired with a project. // Backfill-on-boot guarantees the row exists, so this is essentially a lookup. // On miss (defensive), it logs and returns empty so the caller can decide. diff --git a/internal/deployer/promote.go b/internal/deployer/promote.go index a7ae82b..7d53c7f 100644 --- a/internal/deployer/promote.go +++ b/internal/deployer/promote.go @@ -34,13 +34,13 @@ func (d *Deployer) validatePromoteFrom(stage store.Stage, imageTag string) error } // Check if the tag is running in the source stage. - instances, err := d.store.GetInstancesByStageID(sourceStage.ID) + containers, err := d.store.ListContainersByStageID(sourceStage.ID) if err != nil { - return fmt.Errorf("get instances for source stage: %w", err) + return fmt.Errorf("get containers for source stage: %w", err) } - for _, inst := range instances { - if inst.ImageTag == imageTag && (inst.Status == "running" || inst.Status == "stopped") { + for _, c := range containers { + if c.ImageTag == imageTag && (c.State == "running" || c.State == "stopped") { return nil // Tag found in source stage, promotion is allowed. } } diff --git a/internal/deployer/rollback.go b/internal/deployer/rollback.go index b678eda..f983dc8 100644 --- a/internal/deployer/rollback.go +++ b/internal/deployer/rollback.go @@ -32,24 +32,25 @@ func (d *Deployer) rollback(ctx context.Context, deployID string, containerID st } } - // Clean up DNS record if the instance had a subdomain. + // Clean up DNS record if the container had a subdomain. instanceID is + // the container row ID (same UUID either way) — read from containers. if instanceID != "" { - inst, err := d.store.GetInstanceByID(instanceID) - if err == nil && inst.Subdomain != "" { + c, err := d.store.GetContainerByID(instanceID) + if err == nil && c.Subdomain != "" { settings, settingsErr := d.store.GetSettings() if settingsErr != nil { slog.Warn("rollback: failed to get settings for DNS cleanup", "error", settingsErr) } else if settings.Domain != "" { - fqdn := inst.Subdomain + "." + settings.Domain + fqdn := c.Subdomain + "." + settings.Domain d.removeDNS(ctx, fqdn, deployID) } } } - // Update instance status to failed if it was created. + // Mark the container row as failed if it was created. if instanceID != "" { - if err := d.store.UpdateInstanceStatus(instanceID, "failed"); err != nil { - slog.Warn("rollback: update instance status", "instance_id", instanceID, "error", err) + if err := d.store.UpdateContainerState(instanceID, "failed"); err != nil { + slog.Warn("rollback: update container state", "id", instanceID, "error", err) } } diff --git a/internal/docker/client.go b/internal/docker/client.go index dda483b..5297b7d 100644 --- a/internal/docker/client.go +++ b/internal/docker/client.go @@ -9,17 +9,10 @@ import ( // Labels applied to all containers managed by Tinyforge. // -// Workload-shaped labels (LabelWorkloadID, LabelWorkloadKind, LabelRole, -// LabelManaged) are the canonical set going forward and what the reconciler -// queries by. The legacy project/stage/instance-id labels are still emitted -// alongside them for back-compat with anything that selects on them -// (operator runbooks, monitoring scrape rules, ad-hoc shell debugging) — they -// will be removed once the migration soaks. +// The legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id +// labels were removed in the workload refactor — the deployer now stamps +// only the workload-shaped labels below at create time. const ( - LabelProject = "tinyforge.project" - LabelStage = "tinyforge.stage" - LabelInstanceID = "tinyforge.instance-id" - LabelManaged = "tinyforge.managed" // present on every Tinyforge-managed container LabelWorkloadID = "tinyforge.workload.id" // workload row primary key LabelWorkloadKind = "tinyforge.workload.kind" // 'project' | 'stack' | 'site' diff --git a/internal/docker/container.go b/internal/docker/container.go index 75af8c3..f8d88ab 100644 --- a/internal/docker/container.go +++ b/internal/docker/container.go @@ -39,15 +39,6 @@ type ContainerConfig struct { // Tinyforge management labels are added automatically via Project, Stage, and InstanceID. Labels map[string]string - // Project is the Tinyforge project name (used for labelling). - Project string - - // Stage is the Tinyforge stage name (used for labelling). - Stage string - - // InstanceID is the Tinyforge instance ID (used for labelling). - InstanceID string - // WorkloadID is the unifying primitive's row ID (Workload.ID). Future // reconciler / global views key off this label, so it must be set on // every Tinyforge-managed container (project, stack, site). @@ -106,12 +97,7 @@ func (c *Client) CreateContainer(ctx context.Context, cfg ContainerConfig) (stri for k, v := range cfg.Labels { labels[k] = v } - // Legacy labels (kept for back-compat with operator runbooks / - // monitoring scrape rules; will be removed after the workload soak). - labels[LabelProject] = cfg.Project - labels[LabelStage] = cfg.Stage - labels[LabelInstanceID] = cfg.InstanceID - // Workload-shaped labels — canonical going forward. + // Workload-shaped labels — the canonical Tinyforge label set. labels[LabelManaged] = "true" if cfg.WorkloadID != "" { labels[LabelWorkloadID] = cfg.WorkloadID @@ -225,26 +211,27 @@ func (c *Client) RestartContainer(ctx context.Context, containerID string, timeo } // ManagedContainer holds summary information about a container managed by Tinyforge. +// WorkloadID/Kind/Role are pulled from the canonical Tinyforge labels. type ManagedContainer struct { - ID string - Name string - Image string - Status string - State string - Project string - Stage string - InstanceID string - Ports []uint16 + ID string + Name string + Image string + Status string + State string + WorkloadID string + WorkloadKind string + Role string + Ports []uint16 } -// ListContainers returns all containers matching the given label filters. -// Pass nil or an empty map to list all Tinyforge managed containers. -// Label filters are key=value pairs applied as Docker label filters. +// ListContainers returns all Tinyforge-managed containers (label +// tinyforge.managed=true), optionally narrowed by additional label filters. +// Returns the workload labels so callers can dispatch / display without an +// extra inspect call. func (c *Client) ListContainers(ctx context.Context, labelFilters map[string]string) ([]ManagedContainer, error) { filterArgs := make(client.Filters) - // Always filter by the Tinyforge project label to only return managed containers. - filterArgs.Add("label", LabelProject) + filterArgs.Add("label", LabelManaged+"=true") for k, v := range labelFilters { if v != "" { @@ -278,15 +265,15 @@ func (c *Client) ListContainers(ctx context.Context, labelFilters map[string]str } result = append(result, ManagedContainer{ - ID: ctr.ID, - Name: name, - Image: ctr.Image, - Status: ctr.Status, - State: string(ctr.State), - Project: ctr.Labels[LabelProject], - Stage: ctr.Labels[LabelStage], - InstanceID: ctr.Labels[LabelInstanceID], - Ports: ports, + ID: ctr.ID, + Name: name, + Image: ctr.Image, + Status: ctr.Status, + State: string(ctr.State), + WorkloadID: ctr.Labels[LabelWorkloadID], + WorkloadKind: ctr.Labels[LabelWorkloadKind], + Role: ctr.Labels[LabelRole], + Ports: ports, }) } @@ -308,9 +295,8 @@ type ReconcileItem struct { // ListAllForReconciler returns every container the daemon knows about whose // labels mark it as Tinyforge-managed by ANY of the supported schemes: -// - tinyforge.managed (canonical, new) -// - tinyforge.project / tinyforge.instance-id (legacy project) -// - tinyforge.static-site (legacy site) +// - tinyforge.managed (canonical — every project, stack, site we own) +// - tinyforge.static-site (sites that predate the workload labels) // - com.docker.compose.project starting with "tinyforge-" (stacks) // // The Docker API does not support OR'd label filters, so we list everything @@ -361,9 +347,6 @@ func isTinyforgeManaged(labels map[string]string) bool { if labels[LabelManaged] == "true" { return true } - if labels[LabelProject] != "" || labels[LabelInstanceID] != "" { - return true - } if _, ok := labels["tinyforge.static-site"]; ok { return true } diff --git a/internal/docker/network.go b/internal/docker/network.go index 3bf066e..8fb12c3 100644 --- a/internal/docker/network.go +++ b/internal/docker/network.go @@ -32,7 +32,7 @@ func (c *Client) EnsureNetwork(ctx context.Context, networkName string) (string, resp, err := c.api.NetworkCreate(ctx, networkName, client.NetworkCreateOptions{ Driver: "bridge", Labels: map[string]string{ - LabelProject: "tinyforge", + LabelManaged: "true", }, }) if err != nil { diff --git a/internal/reconciler/reconciler.go b/internal/reconciler/reconciler.go index 216c620..6add527 100644 --- a/internal/reconciler/reconciler.go +++ b/internal/reconciler/reconciler.go @@ -5,15 +5,17 @@ // longer present are flipped to state='missing'. // // Dispatch precedence: -// 1. tinyforge.workload.id label (canonical) -// 2. tinyforge.instance-id label (legacy project — joins via instances row) -// 3. tinyforge.static-site label (legacy site) -// 4. com.docker.compose.project (stack — joins via Stack.ComposeProjectName) +// 1. tinyforge.workload.id label (canonical, new) +// 2. tinyforge.static-site label (legacy site — joins via static_sites) +// 3. com.docker.compose.project (stack — joins via Stack.ComposeProjectName) +// +// The legacy tinyforge.instance-id path was removed when the deployer was +// rewritten to use Container natively — every Tinyforge-managed project +// container now carries the workload labels at create time. package reconciler import ( "context" - "errors" "log/slog" "strings" "sync" @@ -123,9 +125,6 @@ func (r *Reconciler) upsertFromItem(ctx context.Context, item docker.ReconcileIt if id := item.Labels[docker.LabelWorkloadID]; id != "" { return r.upsertByWorkloadLabel(item, id) } - if instanceID := item.Labels[docker.LabelInstanceID]; instanceID != "" { - return r.upsertByInstanceLabel(item, instanceID) - } if siteID := item.Labels["tinyforge.static-site"]; siteID != "" { return r.upsertBySiteLabel(item, siteID) } @@ -135,12 +134,17 @@ func (r *Reconciler) upsertFromItem(ctx context.Context, item docker.ReconcileIt return "" } -// upsertByWorkloadLabel — canonical path. WorkloadID + Role uniquely -// identifies the row. ID stays deterministic so re-deploys update in place. +// upsertByWorkloadLabel — canonical path. The row may already exist with a +// deployer-assigned UUID (project deploys do this so each blue-green slot +// has a stable handle); look it up by docker container ID first and fall +// back to the deterministic workloadID:role key. func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string) string { role := item.Labels[docker.LabelRole] kind := item.Labels[docker.LabelWorkloadKind] - rowID := workloadIDRow(workloadID, kind, role, item.Labels[docker.LabelInstanceID], item.ID) + rowID := workloadIDRow(workloadID, kind, role, item.ID) + if existing, err := r.store.GetContainerByDockerID(item.ID); err == nil { + rowID = existing.ID + } port := 0 if len(item.Ports) > 0 { @@ -164,49 +168,6 @@ func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID return rowID } -// upsertByInstanceLabel — legacy project path. Instance ID maps 1:1 to the -// container row ID by construction (deployer uses the same UUID for both), -// so we can update directly. We still need the workload ID for the row. -func (r *Reconciler) upsertByInstanceLabel(item docker.ReconcileItem, instanceID string) string { - inst, err := r.store.GetInstanceByID(instanceID) - if err != nil { - // Container with stale label — instance row gone. Skip silently. - if errors.Is(err, store.ErrNotFound) { - return "" - } - slog.Warn("reconciler: lookup instance", "instance_id", instanceID, "error", err) - return "" - } - w, err := r.store.GetWorkloadByRef(store.WorkloadKindProject, inst.ProjectID) - if err != nil { - return "" - } - port := inst.Port - if port == 0 && len(item.Ports) > 0 { - port = int(item.Ports[0]) - } - if err := r.store.UpsertContainer(store.Container{ - ID: inst.ID, - WorkloadID: w.ID, - WorkloadKind: string(store.WorkloadKindProject), - Role: item.Labels[docker.LabelStage], - ContainerID: item.ID, - ImageRef: item.Image, - ImageTag: inst.ImageTag, - Host: "local", - State: normalizeState(item.State), - Port: port, - Subdomain: inst.Subdomain, - ProxyRouteID: inst.ProxyRouteID, - NpmProxyID: inst.NpmProxyID, - LastSeenAt: store.Now(), - }); err != nil { - slog.Warn("reconciler: upsert by instance label", "container_id", item.ID, "error", err) - return "" - } - return inst.ID -} - func (r *Reconciler) upsertBySiteLabel(item docker.ReconcileItem, siteID string) string { w, err := r.store.GetWorkloadByRef(store.WorkloadKindSite, siteID) if err != nil { @@ -313,20 +274,18 @@ func (r *Reconciler) markMissingRows(seen map[string]struct{}) { } // workloadIDRow picks the row ID for a workload-labelled container. -// For projects the deployer assigns instance ID = container row ID (via -// LabelInstanceID), so we honor that to keep IDs stable. For stack/site -// it's the deterministic workloadID:role pattern. -func workloadIDRow(workloadID, kind, role, instanceID, containerID string) string { - if instanceID != "" && kind == string(store.WorkloadKindProject) { - return instanceID - } +// Stack rows use the deterministic workloadID:role pattern; sites use +// workloadID:site. Project rows have a per-deploy UUID assigned by the +// deployer and ALSO carry the role label (= stage name), so the same +// pattern resolves to the same row across deployer + reconciler upserts. +func workloadIDRow(workloadID, kind, role, containerID string) string { if role != "" { return workloadID + ":" + role } if kind == string(store.WorkloadKindSite) { return workloadID + ":site" } - // Last-resort fallback: container ID. Better than ""; uncommon path. + // Last-resort fallback: container ID. Uncommon path. return workloadID + ":" + containerID } diff --git a/internal/stale/scanner.go b/internal/stale/scanner.go index d84979d..0f93f1b 100644 --- a/internal/stale/scanner.go +++ b/internal/stale/scanner.go @@ -14,16 +14,21 @@ import ( "github.com/robfig/cron/v3" ) -// StaleInstance holds enriched info about a stale container for API responses. -type StaleInstance struct { - Instance store.Instance `json:"instance"` - ProjectName string `json:"project_name"` - StageName string `json:"stage_name"` - DaysStale int `json:"days_stale"` +// StaleContainer is a stale container row enriched with the human-readable +// labels needed to render the Stale view (workload + role + days). +// +// JSON shape uses container_id semantics — the frontend type was historically +// "Instance"; after the workload refactor it consumes Container fields directly. +type StaleContainer struct { + Container store.Container `json:"container"` + WorkloadID string `json:"workload_id"` + WorkloadName string `json:"workload_name"` + Role string `json:"role"` + DaysStale int `json:"days_stale"` } -// Scanner periodically checks for stale containers that have been -// non-running for longer than the configured threshold. +// Scanner periodically checks for containers that have been non-running for +// longer than the configured threshold. type Scanner struct { store *store.Store docker *docker.Client @@ -34,8 +39,8 @@ type Scanner struct { entryID cron.EntryID running bool - // knownStale tracks instance IDs that have already had a stale event emitted, - // to avoid re-emitting warnings for the same instance. + // knownStale tracks container row IDs that have already had a stale event + // emitted, to avoid re-emitting the same warning on every tick. knownStale map[string]struct{} } @@ -101,7 +106,7 @@ func (s *Scanner) Stop() { } // Scan performs a single stale-container scan cycle. -// It updates last_alive_at for running containers and detects newly stale ones. +// Updates last_seen_at for running containers and detects newly stale ones. func (s *Scanner) Scan(ctx context.Context) error { settings, err := s.store.GetSettings() if err != nil { @@ -113,67 +118,53 @@ func (s *Scanner) Scan(ctx context.Context) error { thresholdDays = 7 } - // Get all instances from the store. - instances, err := s.store.ListAllInstances() + containers, err := s.store.ListContainers(store.ContainerFilter{}) if err != nil { - return fmt.Errorf("list all instances: %w", err) + return fmt.Errorf("list containers: %w", err) } - - if len(instances) == 0 { + if len(containers) == 0 { return nil } - // Get all managed Docker containers to check live state. - containers, err := s.docker.ListContainers(ctx, nil) + // Live state from Docker, indexed by container_id label so we can + // reconcile on a single pass. + dockerContainers, err := s.docker.ListContainers(ctx, nil) if err != nil { return fmt.Errorf("list docker containers: %w", err) } - - // Build a lookup: instance ID -> container state. - containerStateByInstanceID := make(map[string]string, len(containers)) - for _, c := range containers { - if c.InstanceID != "" { - containerStateByInstanceID[c.InstanceID] = c.State - } + stateByContainerID := make(map[string]string, len(dockerContainers)) + for _, dc := range dockerContainers { + stateByContainerID[dc.ID] = dc.State } now := time.Now().UTC() currentStaleIDs := make(map[string]struct{}) - for _, inst := range instances { - // Skip instances already being cleaned up. - if inst.Status == "removing" { + for _, c := range containers { + if c.State == "removing" { continue } - dockerState := containerStateByInstanceID[inst.ID] + dockerState := stateByContainerID[c.ContainerID] - // If the container is running in Docker, update last_alive_at. if dockerState == "running" { - if err := s.store.UpdateLastAliveAt(inst.ID); err != nil { - slog.Warn("stale scanner: failed to update last_alive_at", - "instance_id", inst.ID, "error", err) - } - // Also sync store status if it was out of date. - if inst.Status != "running" { - if err := s.store.UpdateInstanceStatus(inst.ID, "running"); err != nil { - slog.Warn("stale scanner: failed to sync instance status", - "instance_id", inst.ID, "error", err) - } + if err := s.store.UpdateContainerState(c.ID, "running"); err != nil { + slog.Warn("stale scanner: failed to update state", + "id", c.ID, "error", err) } continue } - // Container is not running. Check if it's stale. - if inst.LastAliveAt == "" { - // Never been seen running. Use created_at as fallback. - inst.LastAliveAt = inst.CreatedAt + // Container is not running. Check staleness against last_seen_at, + // falling back to created_at if it never came up. + ref := c.LastSeenAt + if ref == "" { + ref = c.CreatedAt } - - lastAlive, parseErr := time.Parse("2006-01-02 15:04:05", inst.LastAliveAt) + lastAlive, parseErr := time.Parse("2006-01-02 15:04:05", ref) if parseErr != nil { - slog.Warn("stale scanner: failed to parse last_alive_at", - "instance_id", inst.ID, "last_alive_at", inst.LastAliveAt, "error", parseErr) + slog.Warn("stale scanner: failed to parse last_seen_at", + "id", c.ID, "ref", ref, "error", parseErr) continue } @@ -182,23 +173,19 @@ func (s *Scanner) Scan(ctx context.Context) error { continue } - // This instance is stale. - currentStaleIDs[inst.ID] = struct{}{} - - // Emit event only if this is newly detected as stale. - if _, alreadyKnown := s.knownStale[inst.ID]; !alreadyKnown { - s.emitStaleEvent(inst, daysSinceAlive) + currentStaleIDs[c.ID] = struct{}{} + if _, alreadyKnown := s.knownStale[c.ID]; !alreadyKnown { + s.emitStaleEvent(c, daysSinceAlive) } } - // Update known stale set: remove IDs that are no longer stale. s.knownStale = currentStaleIDs - return nil } -// FindStaleInstances returns all currently stale instances with enriched project/stage info. -func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, error) { +// FindStaleContainers returns all currently stale containers enriched with +// workload + role labels for rendering. +func (s *Scanner) FindStaleContainers(ctx context.Context) ([]StaleContainer, error) { settings, err := s.store.GetSettings() if err != nil { return nil, fmt.Errorf("get settings: %w", err) @@ -209,58 +196,45 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro thresholdDays = 7 } - instances, err := s.store.ListAllInstances() + containers, err := s.store.ListContainers(store.ContainerFilter{}) if err != nil { - return nil, fmt.Errorf("list all instances: %w", err) + return nil, fmt.Errorf("list containers: %w", err) } - containers, err := s.docker.ListContainers(ctx, nil) + dockerContainers, err := s.docker.ListContainers(ctx, nil) if err != nil { - // Docker unavailable — fall back to store-only detection (no live state). + // Docker unavailable — fall back to store-only detection. slog.Warn("stale scanner: docker unavailable, using store status only", "error", err) - containers = nil + dockerContainers = nil + } + stateByContainerID := make(map[string]string, len(dockerContainers)) + for _, dc := range dockerContainers { + stateByContainerID[dc.ID] = dc.State } - containerStateByInstanceID := make(map[string]string, len(containers)) - for _, c := range containers { - if c.InstanceID != "" { - containerStateByInstanceID[c.InstanceID] = c.State - } - } - - // Pre-load project and stage names to avoid N+1 queries. - allProjects, _ := s.store.GetAllProjects() - projectNames := make(map[string]string, len(allProjects)) - for _, p := range allProjects { - projectNames[p.ID] = p.Name - } - stageNames := make(map[string]string) - for _, p := range allProjects { - stages, _ := s.store.GetStagesByProjectID(p.ID) - for _, st := range stages { - stageNames[st.ID] = st.Name - } + // Pre-load workload names so each stale row carries a friendly identifier. + workloads, _ := s.store.ListWorkloads("") + workloadNameByID := make(map[string]string, len(workloads)) + for _, w := range workloads { + workloadNameByID[w.ID] = w.Name } now := time.Now().UTC() - var result []StaleInstance + var result []StaleContainer - for _, inst := range instances { - if inst.Status == "removing" { + for _, c := range containers { + if c.State == "removing" { + continue + } + if stateByContainerID[c.ContainerID] == "running" { continue } - // If Docker says it's running, it's not stale. - if containerStateByInstanceID[inst.ID] == "running" { - continue + ref := c.LastSeenAt + if ref == "" { + ref = c.CreatedAt } - - lastAlive := inst.LastAliveAt - if lastAlive == "" { - lastAlive = inst.CreatedAt - } - - lastAliveTime, parseErr := time.Parse("2006-01-02 15:04:05", lastAlive) + lastAliveTime, parseErr := time.Parse("2006-01-02 15:04:05", ref) if parseErr != nil { continue } @@ -270,21 +244,17 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro continue } - // Look up project and stage names from pre-loaded maps. - projectName := projectNames[inst.ProjectID] - if projectName == "" { - projectName = inst.ProjectID - } - stageName := stageNames[inst.StageID] - if stageName == "" { - stageName = inst.StageID + name := workloadNameByID[c.WorkloadID] + if name == "" { + name = c.WorkloadID } - result = append(result, StaleInstance{ - Instance: inst, - ProjectName: projectName, - StageName: stageName, - DaysStale: daysSinceAlive, + result = append(result, StaleContainer{ + Container: c, + WorkloadID: c.WorkloadID, + WorkloadName: name, + Role: c.Role, + DaysStale: daysSinceAlive, }) } @@ -292,20 +262,20 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro } // emitStaleEvent publishes a warning event for a newly detected stale container. -func (s *Scanner) emitStaleEvent(inst store.Instance, daysStale int) { +func (s *Scanner) emitStaleEvent(c store.Container, daysStale int) { metadata, _ := json.Marshal(map[string]any{ - "instance_id": inst.ID, - "project_id": inst.ProjectID, - "stage_id": inst.StageID, - "image_tag": inst.ImageTag, - "last_alive_at": inst.LastAliveAt, - "days_stale": daysStale, + "container_id": c.ID, + "workload_id": c.WorkloadID, + "workload_kind": c.WorkloadKind, + "role": c.Role, + "image_tag": c.ImageTag, + "last_seen_at": c.LastSeenAt, + "days_stale": daysStale, }) msg := fmt.Sprintf("Container %s (tag: %s) has been non-running for %d days", - inst.ID, inst.ImageTag, daysStale) + c.ID, c.ImageTag, daysStale) - // Persist directly to event log. evt, err := s.store.InsertEvent(store.EventLog{ Source: "stale_scanner", Severity: "warn", @@ -317,15 +287,14 @@ func (s *Scanner) emitStaleEvent(inst store.Instance, daysStale int) { return } - // Publish for SSE clients. s.eventBus.Publish(events.Event{ Type: events.EventLog, Payload: events.EventLogPayload{ ID: evt.ID, Source: "stale_scanner", - Severity: "warn", - Message: msg, - Metadata: string(metadata), + Severity: "warn", + Message: msg, + Metadata: string(metadata), CreatedAt: evt.CreatedAt, }, }) diff --git a/internal/staticsite/manager.go b/internal/staticsite/manager.go index eb3fe18..c59de6f 100644 --- a/internal/staticsite/manager.go +++ b/internal/staticsite/manager.go @@ -280,11 +280,9 @@ func (m *Manager) Deploy(ctx context.Context, siteID string, force bool) error { NetworkID: networkID, Mounts: mounts, Labels: map[string]string{ - "tinyforge.static-site": site.ID, + "tinyforge.static-site": site.ID, "tinyforge.static-site-name": site.Name, }, - Project: "static-site", - Stage: site.Name, WorkloadID: m.resolveSiteWorkloadID(site.ID), WorkloadKind: string(store.WorkloadKindSite), Role: "", @@ -307,11 +305,12 @@ func (m *Manager) Deploy(ctx context.Context, siteID string, force bool) error { NetworkID: networkID, Mounts: mounts, Labels: map[string]string{ - "tinyforge.static-site": site.ID, + "tinyforge.static-site": site.ID, "tinyforge.static-site-name": site.Name, }, - Project: "static-site", - Stage: site.Name, + WorkloadID: m.resolveSiteWorkloadID(site.ID), + WorkloadKind: string(store.WorkloadKindSite), + Role: "", }) if err != nil { m.updateStatus(site.ID, "failed", latestSHA, fmt.Sprintf("create container: %v", err)) diff --git a/internal/stats/collector.go b/internal/stats/collector.go index f2e5b99..c95ae0d 100644 --- a/internal/stats/collector.go +++ b/internal/stats/collector.go @@ -195,38 +195,29 @@ type target struct { OwnerID string } -// buildTargets fetches running instances and sites that have a container ID. +// buildTargets fetches container rows that have a docker container_id bound. +// Project containers and stack containers are surfaced as OwnerTypeInstance +// (the stats sample owner_type is kept for back-compat with the persisted +// schema and the dashboard's group-by semantics). func (c *Collector) buildTargets() []target { var out []target - instances, err := c.store.ListAllInstances() + containers, err := c.store.ListContainers(store.ContainerFilter{}) if err != nil { - slog.Warn("stats collector: list instances", "error", err) + slog.Warn("stats collector: list containers", "error", err) } else { - for _, inst := range instances { - if inst.ContainerID == "" { + for _, row := range containers { + if row.ContainerID == "" { continue } - out = append(out, target{ - ContainerID: inst.ContainerID, - OwnerType: OwnerTypeInstance, - OwnerID: inst.ID, - }) - } - } - - sites, err := c.store.GetAllStaticSites() - if err != nil { - slog.Warn("stats collector: list sites", "error", err) - } else { - for _, site := range sites { - if site.ContainerID == "" { - continue + ownerType := OwnerTypeInstance + if row.WorkloadKind == string(store.WorkloadKindSite) { + ownerType = OwnerTypeSite } out = append(out, target{ - ContainerID: site.ContainerID, - OwnerType: OwnerTypeSite, - OwnerID: site.ID, + ContainerID: row.ContainerID, + OwnerType: ownerType, + OwnerID: row.ID, }) } } diff --git a/internal/store/containers.go b/internal/store/containers.go index 47f5d73..3318e89 100644 --- a/internal/store/containers.go +++ b/internal/store/containers.go @@ -135,6 +135,77 @@ func (s *Store) GetContainerByDockerID(dockerID string) (Container, error) { return c, nil } +// ListProxyRoutes returns proxy-enabled project containers joined with +// project + stage names. Reads from the normalized containers index. Stage +// ID is resolved through a (project_id, role=stage_name) join, which is +// uniquely indexed via UNIQUE(project_id, name) on stages. +// +// Source is reported as "instance" for back-compat with the Proxies page +// filter (the frontend keys off the literal string). +func (s *Store) ListProxyRoutes(domain string) ([]ProxyRoute, error) { + rows, err := s.db.Query(` + SELECT c.id, p.id, p.name, s.id, s.name, + c.image_tag, c.subdomain, c.container_id, c.port, + c.proxy_route_id, c.npm_proxy_id, c.state, c.created_at + FROM containers c + JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project' + JOIN projects p ON p.id = w.ref_id + JOIN stages s ON s.project_id = p.id AND s.name = c.role + WHERE c.subdomain != '' AND (c.proxy_route_id != '' OR c.npm_proxy_id > 0) + ORDER BY p.name, s.name, c.created_at DESC`, + ) + if err != nil { + return nil, fmt.Errorf("query proxy routes: %w", err) + } + defer rows.Close() + + routes := []ProxyRoute{} + for rows.Next() { + var r ProxyRoute + if err := rows.Scan( + &r.InstanceID, &r.ProjectID, &r.ProjectName, &r.StageID, &r.StageName, + &r.ImageTag, &r.Subdomain, &r.ContainerID, &r.Port, + &r.ProxyRouteID, &r.NpmProxyID, &r.Status, &r.CreatedAt, + ); err != nil { + return nil, fmt.Errorf("scan proxy route: %w", err) + } + r.Source = "instance" + if domain != "" && r.Subdomain != "" { + r.Domain = r.Subdomain + "." + domain + } + routes = append(routes, r) + } + return routes, rows.Err() +} + +// ListContainersByStageID returns project containers for the given stage, +// newest first. Resolves stage → project_id → workload(kind=project) → +// containers with role = stage.name. Replaces GetInstancesByStageID for +// callers in the deployer / API layer. +func (s *Store) ListContainersByStageID(stageID string) ([]Container, error) { + rows, err := s.db.Query(` + SELECT `+prefixCols(containerColumns, "c.")+` + FROM containers c + JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project' + JOIN stages s ON s.project_id = w.ref_id AND s.name = c.role + WHERE s.id = ? + ORDER BY c.created_at DESC`, stageID) + if err != nil { + return nil, fmt.Errorf("query containers by stage: %w", err) + } + defer rows.Close() + + out := []Container{} + for rows.Next() { + c, err := scanContainer(rows) + if err != nil { + return nil, fmt.Errorf("scan container: %w", err) + } + out = append(out, c) + } + return out, rows.Err() +} + // ListContainersByWorkload returns all containers for a given workload, newest first. func (s *Store) ListContainersByWorkload(workloadID string) ([]Container, error) { rows, err := s.db.Query( diff --git a/internal/store/instances.go b/internal/store/instances.go deleted file mode 100644 index 9280170..0000000 --- a/internal/store/instances.go +++ /dev/null @@ -1,251 +0,0 @@ -package store - -import ( - "database/sql" - "errors" - "fmt" - - "github.com/google/uuid" -) - -// instanceColumns is the canonical column list for instance queries. -const instanceColumns = `id, stage_id, project_id, container_id, image_tag, subdomain, npm_proxy_id, proxy_route_id, status, port, last_alive_at, created_at, updated_at` - -// scanInstance scans a row into an Instance struct using the canonical column order. -func scanInstance(scanner interface{ Scan(...any) error }) (Instance, error) { - var inst Instance - err := scanner.Scan( - &inst.ID, &inst.StageID, &inst.ProjectID, &inst.ContainerID, &inst.ImageTag, - &inst.Subdomain, &inst.NpmProxyID, &inst.ProxyRouteID, &inst.Status, &inst.Port, - &inst.LastAliveAt, &inst.CreatedAt, &inst.UpdatedAt, - ) - return inst, err -} - -// CreateInstance inserts a new instance record. -func (s *Store) CreateInstance(inst Instance) (Instance, error) { - inst.ID = uuid.New().String() - inst.CreatedAt = Now() - inst.UpdatedAt = inst.CreatedAt - - _, err := s.db.Exec( - `INSERT INTO instances (`+instanceColumns+`) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, - inst.ID, inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag, - inst.Subdomain, inst.NpmProxyID, inst.ProxyRouteID, inst.Status, inst.Port, - inst.LastAliveAt, inst.CreatedAt, inst.UpdatedAt, - ) - if err != nil { - return Instance{}, fmt.Errorf("insert instance: %w", err) - } - return inst, nil -} - -// CreateInstanceWithID inserts a new instance using a pre-generated ID. -// Use this when the ID must be known before creation (e.g., for container labels). -func (s *Store) CreateInstanceWithID(inst Instance) (Instance, error) { - if inst.ID == "" { - return Instance{}, fmt.Errorf("instance ID is required") - } - inst.CreatedAt = Now() - inst.UpdatedAt = inst.CreatedAt - - _, err := s.db.Exec( - `INSERT INTO instances (`+instanceColumns+`) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, - inst.ID, inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag, - inst.Subdomain, inst.NpmProxyID, inst.ProxyRouteID, inst.Status, inst.Port, - inst.LastAliveAt, inst.CreatedAt, inst.UpdatedAt, - ) - if err != nil { - return Instance{}, fmt.Errorf("insert instance: %w", err) - } - return inst, nil -} - -// GetInstanceByID returns a single instance by its ID. -func (s *Store) GetInstanceByID(id string) (Instance, error) { - inst, err := scanInstance(s.db.QueryRow( - `SELECT `+instanceColumns+` FROM instances WHERE id = ?`, id, - )) - if errors.Is(err, sql.ErrNoRows) { - return Instance{}, fmt.Errorf("instance %s: %w", id, ErrNotFound) - } - if err != nil { - return Instance{}, fmt.Errorf("query instance: %w", err) - } - return inst, nil -} - -// GetInstancesByStageID returns all instances for a given stage. -func (s *Store) GetInstancesByStageID(stageID string) ([]Instance, error) { - rows, err := s.db.Query( - `SELECT `+instanceColumns+` FROM instances WHERE stage_id = ? ORDER BY created_at DESC`, stageID, - ) - if err != nil { - return nil, fmt.Errorf("query instances: %w", err) - } - defer rows.Close() - - instances := []Instance{} - for rows.Next() { - inst, err := scanInstance(rows) - if err != nil { - return nil, fmt.Errorf("scan instance: %w", err) - } - instances = append(instances, inst) - } - return instances, rows.Err() -} - -// ListAllInstances returns all instances across all stages. -func (s *Store) ListAllInstances() ([]Instance, error) { - rows, err := s.db.Query( - `SELECT ` + instanceColumns + ` FROM instances ORDER BY created_at DESC`, - ) - if err != nil { - return nil, fmt.Errorf("query all instances: %w", err) - } - defer rows.Close() - - instances := []Instance{} - for rows.Next() { - inst, err := scanInstance(rows) - if err != nil { - return nil, fmt.Errorf("scan instance: %w", err) - } - instances = append(instances, inst) - } - return instances, rows.Err() -} - -// ProxyRoute represents a proxy-enabled resource (Docker instance or static site) -// joined with the human-readable names needed to render the Proxies page. -type ProxyRoute struct { - Source string `json:"source"` // "instance" or "static_site" - InstanceID string `json:"instance_id"` - ProjectID string `json:"project_id"` - ProjectName string `json:"project_name"` - StageID string `json:"stage_id"` - StageName string `json:"stage_name"` - ImageTag string `json:"image_tag"` - Subdomain string `json:"subdomain"` - Domain string `json:"domain"` - ContainerID string `json:"container_id"` - Port int `json:"port"` - ProxyRouteID string `json:"proxy_route_id"` - NpmProxyID int `json:"npm_proxy_id"` - Status string `json:"status"` - CreatedAt string `json:"created_at"` -} - -// ListProxyRoutes returns proxy-enabled project containers joined with -// project + stage names. Reads from the normalized containers index — the -// instances table is no longer queried. Stage ID is resolved through a -// (project_id, role=stage_name) join, which is uniquely indexed. -// -// Source is reported as "instance" for back-compat with the Proxies page -// filter (which keys off the literal string). -func (s *Store) ListProxyRoutes(domain string) ([]ProxyRoute, error) { - rows, err := s.db.Query(` - SELECT c.id, p.id, p.name, s.id, s.name, - c.image_tag, c.subdomain, c.container_id, c.port, - c.proxy_route_id, c.npm_proxy_id, c.state, c.created_at - FROM containers c - JOIN workloads w ON w.id = c.workload_id AND w.kind = 'project' - JOIN projects p ON p.id = w.ref_id - JOIN stages s ON s.project_id = p.id AND s.name = c.role - WHERE c.subdomain != '' AND (c.proxy_route_id != '' OR c.npm_proxy_id > 0) - ORDER BY p.name, s.name, c.created_at DESC`, - ) - if err != nil { - return nil, fmt.Errorf("query proxy routes: %w", err) - } - defer rows.Close() - - routes := []ProxyRoute{} - for rows.Next() { - var r ProxyRoute - if err := rows.Scan( - &r.InstanceID, &r.ProjectID, &r.ProjectName, &r.StageID, &r.StageName, - &r.ImageTag, &r.Subdomain, &r.ContainerID, &r.Port, - &r.ProxyRouteID, &r.NpmProxyID, &r.Status, &r.CreatedAt, - ); err != nil { - return nil, fmt.Errorf("scan proxy route: %w", err) - } - r.Source = "instance" - if domain != "" && r.Subdomain != "" { - r.Domain = r.Subdomain + "." + domain - } - routes = append(routes, r) - } - return routes, rows.Err() -} - -// UpdateInstance updates an existing instance's mutable fields. -func (s *Store) UpdateInstance(inst Instance) error { - inst.UpdatedAt = Now() - result, err := s.db.Exec( - `UPDATE instances SET stage_id=?, project_id=?, container_id=?, image_tag=?, subdomain=?, npm_proxy_id=?, proxy_route_id=?, status=?, port=?, last_alive_at=?, updated_at=? - WHERE id=?`, - inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag, - inst.Subdomain, inst.NpmProxyID, inst.ProxyRouteID, inst.Status, inst.Port, - inst.LastAliveAt, inst.UpdatedAt, inst.ID, - ) - if err != nil { - return fmt.Errorf("update instance: %w", err) - } - n, _ := result.RowsAffected() - if n == 0 { - return fmt.Errorf("instance %s: %w", inst.ID, ErrNotFound) - } - return nil -} - -// UpdateInstanceStatus sets only the status field on an instance. -func (s *Store) UpdateInstanceStatus(id string, status string) error { - ts := Now() - result, err := s.db.Exec( - `UPDATE instances SET status=?, updated_at=? WHERE id=?`, - status, ts, id, - ) - if err != nil { - return fmt.Errorf("update instance status: %w", err) - } - n, _ := result.RowsAffected() - if n == 0 { - return fmt.Errorf("instance %s: %w", id, ErrNotFound) - } - return nil -} - -// UpdateLastAliveAt sets the last_alive_at timestamp for an instance. -// Called when an instance is seen running. -func (s *Store) UpdateLastAliveAt(id string) error { - ts := Now() - result, err := s.db.Exec( - `UPDATE instances SET last_alive_at=?, updated_at=? WHERE id=?`, - ts, ts, id, - ) - if err != nil { - return fmt.Errorf("update last_alive_at: %w", err) - } - n, _ := result.RowsAffected() - if n == 0 { - return fmt.Errorf("instance %s: %w", id, ErrNotFound) - } - return nil -} - -// DeleteInstance removes an instance by ID. -func (s *Store) DeleteInstance(id string) error { - result, err := s.db.Exec(`DELETE FROM instances WHERE id = ?`, id) - if err != nil { - return fmt.Errorf("delete instance: %w", err) - } - n, _ := result.RowsAffected() - if n == 0 { - return fmt.Errorf("instance %s: %w", id, ErrNotFound) - } - return nil -} diff --git a/internal/store/models.go b/internal/store/models.go index 7b9fe49..f590b4f 100644 --- a/internal/store/models.go +++ b/internal/store/models.go @@ -142,21 +142,26 @@ type DNSRecord struct { UpdatedAt string `json:"updated_at"` } -// Instance represents a running (or stopped) container for a project stage. -type Instance struct { - ID string `json:"id"` - StageID string `json:"stage_id"` - ProjectID string `json:"project_id"` - ContainerID string `json:"container_id"` - ImageTag string `json:"image_tag"` - Subdomain string `json:"subdomain"` - NpmProxyID int `json:"npm_proxy_id"` - ProxyRouteID string `json:"proxy_route_id"` - Status string `json:"status"` // running, stopped, failed, removing +// ProxyRoute is a proxy-enabled container row joined with its project + stage +// names, shaped for the Proxies page. Source is "instance" for project +// containers and "static_site" for site rows — the names are historical +// (the table itself was renamed to containers in the workload refactor). +type ProxyRoute struct { + Source string `json:"source"` + InstanceID string `json:"instance_id"` + ProjectID string `json:"project_id"` + ProjectName string `json:"project_name"` + StageID string `json:"stage_id"` + StageName string `json:"stage_name"` + ImageTag string `json:"image_tag"` + Subdomain string `json:"subdomain"` + Domain string `json:"domain"` + ContainerID string `json:"container_id"` Port int `json:"port"` - LastAliveAt string `json:"last_alive_at"` - CreatedAt string `json:"created_at"` - UpdatedAt string `json:"updated_at"` + ProxyRouteID string `json:"proxy_route_id"` + NpmProxyID int `json:"npm_proxy_id"` + Status string `json:"status"` + CreatedAt string `json:"created_at"` } // Deploy represents a deployment attempt. diff --git a/internal/store/store.go b/internal/store/store.go index 498d45e..2ac6fcb 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -310,12 +310,15 @@ func (s *Store) runMigrations() error { for _, m := range migrations { if _, err := s.db.Exec(m); err != nil { // "duplicate column" / "already exists" are expected when a - // migration has already been applied. Anything else (typo, FK - // conflict, real schema bug) must surface, otherwise the store - // silently runs against the wrong shape. + // migration has already been applied. "no such table" is + // expected for obsolete ALTER TABLE migrations targeting tables + // the workload refactor dropped (e.g. instances). Anything + // else must surface — silently running against the wrong shape + // is worse than a startup failure. msg := err.Error() if !strings.Contains(msg, "duplicate column") && - !strings.Contains(msg, "already exists") { + !strings.Contains(msg, "already exists") && + !strings.Contains(msg, "no such table") { return fmt.Errorf("apply migration %q: %w", m, err) } } @@ -323,8 +326,8 @@ func (s *Store) runMigrations() error { // Create indexes on foreign key columns for query performance. indexes := []string{ - `CREATE INDEX IF NOT EXISTS idx_instances_stage_id ON instances(stage_id)`, - `CREATE INDEX IF NOT EXISTS idx_instances_project_id ON instances(project_id)`, + // instances table dropped 2026-05-09 (workload refactor) — no indexes + // needed; containers replaces it with idx_containers_workload below. `CREATE INDEX IF NOT EXISTS idx_deploys_project_id ON deploys(project_id)`, `CREATE INDEX IF NOT EXISTS idx_deploys_stage_id ON deploys(stage_id)`, `CREATE INDEX IF NOT EXISTS idx_deploy_logs_deploy_id ON deploy_logs(deploy_id)`, @@ -344,6 +347,10 @@ func (s *Store) runMigrations() error { `CREATE INDEX IF NOT EXISTS idx_container_stats_container_ts ON container_stats_samples(container_id, ts)`, `CREATE INDEX IF NOT EXISTS idx_container_stats_ts ON container_stats_samples(ts)`, `CREATE INDEX IF NOT EXISTS idx_system_stats_ts ON system_stats_samples(ts)`, + // Drop the legacy instances table — containers is the canonical index + // after the workload refactor (2026-05-09). Idempotent: SQLite's + // DROP TABLE IF EXISTS is a no-op on databases that already shed it. + `DROP TABLE IF EXISTS instances`, // Workload refactor indexes (2026-05-09). `CREATE INDEX IF NOT EXISTS idx_workloads_kind ON workloads(kind)`, `CREATE INDEX IF NOT EXISTS idx_workloads_app_id ON workloads(app_id) WHERE app_id != ''`, @@ -449,19 +456,9 @@ CREATE TABLE IF NOT EXISTS settings ( updated_at TEXT NOT NULL DEFAULT (datetime('now')) ); -CREATE TABLE IF NOT EXISTS instances ( - id TEXT PRIMARY KEY, - stage_id TEXT NOT NULL REFERENCES stages(id) ON DELETE CASCADE, - project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE, - container_id TEXT NOT NULL DEFAULT '', - image_tag TEXT NOT NULL, - subdomain TEXT NOT NULL DEFAULT '', - npm_proxy_id INTEGER NOT NULL DEFAULT 0, - status TEXT NOT NULL DEFAULT 'stopped', - port INTEGER NOT NULL DEFAULT 0, - created_at TEXT NOT NULL DEFAULT (datetime('now')), - updated_at TEXT NOT NULL DEFAULT (datetime('now')) -); +-- The instances table was removed in the workload refactor (2026-05-09). +-- Container state lives in the containers table; see runMigrations for the +-- current schema. The DROP TABLE migration runs unconditionally on boot. CREATE TABLE IF NOT EXISTS deploys ( id TEXT PRIMARY KEY, diff --git a/internal/webhook/handler.go b/internal/webhook/handler.go index 3c9527a..c8c0424 100644 --- a/internal/webhook/handler.go +++ b/internal/webhook/handler.go @@ -319,7 +319,19 @@ func (h *Handler) handleWebhook(w http.ResponseWriter, r *http.Request) { return } - project, err := h.store.GetProjectByWebhookSecret(secret) + // Resolve the secret via the workload row first (canonical path — + // workloads.webhook_secret is kept in sync by the project CRUD path). + // Fall back to the project's own column for any pre-refactor row that + // might not have its workload yet (defensive belt-and-suspenders). + var ( + project store.Project + err error + ) + if wl, wErr := h.store.GetWorkloadByWebhookSecret(secret); wErr == nil && wl.Kind == string(store.WorkloadKindProject) { + project, err = h.store.GetProjectByID(wl.RefID) + } else { + project, err = h.store.GetProjectByWebhookSecret(secret) + } if err != nil { if errors.Is(err, store.ErrNotFound) { delivery.StatusCode = http.StatusNotFound @@ -502,7 +514,17 @@ func (h *Handler) handleSiteWebhook(w http.ResponseWriter, r *http.Request) { return } - site, err := h.store.GetStaticSiteByWebhookSecret(secret) + // Workload-first lookup, mirroring the project handler. Falls back to the + // site's own webhook_secret column for pre-refactor rows. + var ( + site store.StaticSite + err error + ) + if wl, wErr := h.store.GetWorkloadByWebhookSecret(secret); wErr == nil && wl.Kind == string(store.WorkloadKindSite) { + site, err = h.store.GetStaticSiteByID(wl.RefID) + } else { + site, err = h.store.GetStaticSiteByWebhookSecret(secret) + } if err != nil { if errors.Is(err, store.ErrNotFound) { delivery.StatusCode = http.StatusNotFound diff --git a/web/src/lib/components/InstanceCard.svelte b/web/src/lib/components/InstanceCard.svelte index 9d388c6..a564659 100644 --- a/web/src/lib/components/InstanceCard.svelte +++ b/web/src/lib/components/InstanceCard.svelte @@ -15,11 +15,12 @@ interface Props { instance: Instance; projectId: string; + stageId: string; domain?: string; onchange?: () => void; } - const { instance, projectId, domain = '', onchange }: Props = $props(); + const { instance, projectId, stageId, domain = '', onchange }: Props = $props(); let loading = $state(false); let error = $state(''); @@ -41,16 +42,16 @@ try { switch (action) { case 'stop': - await api.stopInstance(projectId, instance.stage_id, instance.id); + await api.stopInstance(projectId, stageId, instance.id); break; case 'start': - await api.startInstance(projectId, instance.stage_id, instance.id); + await api.startInstance(projectId, stageId, instance.id); break; case 'restart': - await api.restartInstance(projectId, instance.stage_id, instance.id); + await api.restartInstance(projectId, stageId, instance.id); break; case 'remove': - await api.removeInstance(projectId, instance.stage_id, instance.id); + await api.removeInstance(projectId, stageId, instance.id); break; } onchange?.(); @@ -73,7 +74,7 @@ {instance.image_tag} - + {#if subdomainUrl} @@ -96,7 +97,7 @@
- {#if instance.status === 'running'} + {#if instance.state === 'running'} - {:else if instance.status === 'stopped'} + {:else if instance.state === 'stopped'}
- {#if instance.status === 'running'} - + {#if instance.state === 'running'} + {/if} {#if showLogs}
{ showLogs = false; }} />
diff --git a/web/src/lib/components/ProjectCard.svelte b/web/src/lib/components/ProjectCard.svelte index 914fdb7..2872213 100644 --- a/web/src/lib/components/ProjectCard.svelte +++ b/web/src/lib/components/ProjectCard.svelte @@ -14,9 +14,9 @@ const { project, instances = [] }: Props = $props(); - const runningCount = $derived(instances.filter((i) => i.status === 'running').length); - const stoppedCount = $derived(instances.filter((i) => i.status === 'stopped').length); - const failedCount = $derived(instances.filter((i) => i.status === 'failed').length); + const runningCount = $derived(instances.filter((i) => i.state === 'running').length); + const stoppedCount = $derived(instances.filter((i) => i.state === 'stopped').length); + const failedCount = $derived(instances.filter((i) => i.state === 'failed').length); const totalCount = $derived(instances.length); const overallStatus = $derived.by<'failed' | 'running' | 'stopped'>(() => { diff --git a/web/src/lib/components/StaleContainerCard.svelte b/web/src/lib/components/StaleContainerCard.svelte index c38da9e..2208893 100644 --- a/web/src/lib/components/StaleContainerCard.svelte +++ b/web/src/lib/components/StaleContainerCard.svelte @@ -22,7 +22,9 @@ ); const displayName = $derived( - `${container.project_name}-${container.stage_name}-${container.instance.image_tag}` + container.role + ? `${container.workload_name}-${container.role}-${container.container.image_tag}` + : `${container.workload_name}-${container.container.image_tag}` ); @@ -36,11 +38,13 @@
- {container.project_name} - - - {container.stage_name} + {container.workload_name} + {#if container.role} + + {container.role} + + {/if}
@@ -55,14 +59,14 @@
- {container.instance.image_tag} + {container.container.image_tag} - {$t('stale.lastAlive')}: {$fmt.shortDate(container.instance.last_alive_at)} + {$t('stale.lastAlive')}: {$fmt.shortDate(container.container.last_seen_at)} - {container.instance.status} + {container.container.state}
@@ -71,7 +75,7 @@