refactor(workload): extract Instance entirely; Container is canonical
Build / build (push) Successful in 10m41s
Build / build (push) Successful in 10m41s
End-to-end extraction of the Instance concept. After this commit:
* internal/store/instances.go — DELETED
* internal/store/models.go — Instance struct gone, ProxyRoute moved here
* containers table is the single source of truth for project/stack/site
container state. instances table is dropped via DROP TABLE migration
(idempotent; re-runnable on every boot).
* Legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id
Docker labels are no longer emitted; only tinyforge.workload.{id,kind},
tinyforge.role, and tinyforge.managed are stamped on new containers.
Backend rewrites:
- internal/deployer: executeDeploy + blueGreenDeploy + rollback +
promote use store.Container natively. New
removeContainer() replaces removeInstance().
enforceMaxInstances reads via
ListContainersByStageID.
- internal/reconciler: legacy tinyforge.instance-id dispatch removed;
upsertByWorkloadLabel now finds existing rows
by docker container ID first and falls back to
the deterministic workloadID:role key.
- internal/stale/scanner: Scan + new FindStaleContainers walk the
containers table; emit StaleContainer JSON.
- internal/stats/collector: ListContainers replaces ListAllInstances.
- internal/webhook/handler: workload-secret lookup tried first; falls back
to project / static_site secret column.
- internal/api: instances.go, stale.go, stats.go, stats_history.go,
projects.go, settings.go, docker.go, dns.go all read /
write through Container.
Docker layer:
- ManagedContainer exposes WorkloadID/Kind/Role from the canonical labels.
- ListContainers filters by tinyforge.managed=true.
- Network creation uses LabelManaged instead of LabelProject.
Frontend:
- Instance type is now a Container alias; .status → .state,
.last_alive_at → .last_seen_at.
- InstanceCard takes stageId as a prop (no longer derived from Instance).
- StaleContainer JSON shape rewritten: { container, workload_name, role,
days_stale }. StaleContainerCard + /containers/stale page updated.
- ProjectCard / homepage / SystemHealthCard filter by .state.
The migration loop now tolerates "no such table" alongside "duplicate
column" / "already exists" so obsolete ALTER TABLE entries targeting the
dropped instances table no-op cleanly on first boot.
Tests: store + deployer + reconciler + webhook + staticsite + notify all
still pass. Frontend svelte-check: zero errors.
This commit is contained in:
+9
-9
@@ -204,9 +204,9 @@ func (s *Server) buildConsumerNameMap() map[string]string {
|
||||
for _, p := range projects {
|
||||
stages, _ := s.store.GetStagesByProjectID(p.ID)
|
||||
for _, st := range stages {
|
||||
instances, _ := s.store.GetInstancesByStageID(st.ID)
|
||||
for _, inst := range instances {
|
||||
names["instance:"+inst.ID] = p.Name + "/" + st.Name + ":" + inst.ImageTag
|
||||
rows, _ := s.store.ListContainersByStageID(st.ID)
|
||||
for _, c := range rows {
|
||||
names["instance:"+c.ID] = p.Name + "/" + st.Name + ":" + c.ImageTag
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -362,15 +362,15 @@ func (s *Server) computeExpectedFQDNs(settings store.Settings) (map[string]strin
|
||||
if !st.EnableProxy {
|
||||
continue
|
||||
}
|
||||
instances, err := s.store.GetInstancesByStageID(st.ID)
|
||||
rows, err := s.store.ListContainersByStageID(st.ID)
|
||||
if err != nil {
|
||||
slog.Warn("dns: failed to get instances", "stage_id", st.ID, "error", err)
|
||||
slog.Warn("dns: failed to get containers", "stage_id", st.ID, "error", err)
|
||||
continue
|
||||
}
|
||||
for _, inst := range instances {
|
||||
if inst.NpmProxyID > 0 && inst.Subdomain != "" && inst.Status == "running" {
|
||||
fqdn := inst.Subdomain + "." + settings.Domain
|
||||
expected[fqdn] = "instance:" + inst.ID
|
||||
for _, c := range rows {
|
||||
if c.NpmProxyID > 0 && c.Subdomain != "" && c.State == "running" {
|
||||
fqdn := c.Subdomain + "." + settings.Domain
|
||||
expected[fqdn] = "instance:" + c.ID
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+22
-15
@@ -69,39 +69,46 @@ func (s *Server) listProjectImages(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// streamContainerLogs handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/logs.
|
||||
// Streams container logs via SSE. Supports query params:
|
||||
// - tail: number of lines from end (default "200")
|
||||
// - follow: "true" to stream new lines in real-time
|
||||
// Streams container logs via SSE. {iid} is the container row ID. Ownership is
|
||||
// verified by joining through workload + stage so an attacker cannot stream
|
||||
// logs for a foreign container by guessing IDs under the wrong project URL.
|
||||
func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
|
||||
projectID := chi.URLParam(r, "id")
|
||||
stageID := chi.URLParam(r, "stage")
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
containerRowID := chi.URLParam(r, "iid")
|
||||
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
c, err := s.store.GetContainerByID(containerRowID)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "error", err)
|
||||
slog.Error("failed to get container", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Verify the instance actually belongs to the project/stage in the path.
|
||||
// Without this, a user could stream logs for any instance ID by guessing
|
||||
// it under the wrong project — defence-in-depth for future per-project ACLs.
|
||||
if inst.ProjectID != projectID || inst.StageID != stageID {
|
||||
respondNotFound(w, "instance")
|
||||
wl, err := s.store.GetWorkloadByID(c.WorkloadID)
|
||||
if err != nil {
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
stage, err := s.store.GetStageByID(stageID)
|
||||
if err != nil || stage.ProjectID != projectID {
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
if wl.Kind != string(store.WorkloadKindProject) || wl.RefID != projectID || c.Role != stage.Name {
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
|
||||
if inst.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "instance has no container")
|
||||
if c.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "container row has no docker container bound")
|
||||
return
|
||||
}
|
||||
|
||||
s.streamLogsForContainer(w, r, inst.ContainerID)
|
||||
s.streamLogsForContainer(w, r, c.ContainerID)
|
||||
}
|
||||
|
||||
// streamLogsForContainer streams logs for an arbitrary container ID using the
|
||||
|
||||
+63
-67
@@ -13,48 +13,53 @@ import (
|
||||
)
|
||||
|
||||
// listInstances handles GET /api/projects/{id}/stages/{stage}/instances.
|
||||
// Reads the normalized container index — the legacy `instances` table is gone.
|
||||
// JSON shape stays Container-shaped (id, container_id, image_tag, subdomain,
|
||||
// state, port, etc.), so the frontend type may show some renamed fields
|
||||
// (status→state, last_alive_at→last_seen_at).
|
||||
func (s *Server) listInstances(w http.ResponseWriter, r *http.Request) {
|
||||
stageID := chi.URLParam(r, "stage")
|
||||
|
||||
// Verify stage exists.
|
||||
if _, err := s.store.GetStageByID(stageID); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "stage")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get stage", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
instances, err := s.store.GetInstancesByStageID(stageID)
|
||||
if err != nil {
|
||||
slog.Error("failed to list instances", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Reconcile instance statuses with Docker's actual state.
|
||||
containers, err := s.store.ListContainersByStageID(stageID)
|
||||
if err != nil {
|
||||
slog.Error("failed to list containers", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Reconcile container state with Docker's actual state — covers the
|
||||
// case where a container was killed externally between deployer writes
|
||||
// and the next reconciler tick.
|
||||
ctx := r.Context()
|
||||
for i, inst := range instances {
|
||||
if inst.ContainerID == "" || inst.Status == "removing" {
|
||||
for i, c := range containers {
|
||||
if c.ContainerID == "" || c.State == "removing" {
|
||||
continue
|
||||
}
|
||||
running, err := s.docker.IsContainerRunning(ctx, inst.ContainerID)
|
||||
running, err := s.docker.IsContainerRunning(ctx, c.ContainerID)
|
||||
if err != nil {
|
||||
continue // Docker unreachable, keep stored status.
|
||||
continue
|
||||
}
|
||||
actualStatus := "stopped"
|
||||
actual := "stopped"
|
||||
if running {
|
||||
actualStatus = "running"
|
||||
actual = "running"
|
||||
}
|
||||
if inst.Status != actualStatus {
|
||||
instances[i].Status = actualStatus
|
||||
_ = s.store.UpdateInstanceStatus(inst.ID, actualStatus)
|
||||
if c.State != actual {
|
||||
containers[i].State = actual
|
||||
_ = s.store.UpdateContainerState(c.ID, actual)
|
||||
}
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, instances)
|
||||
respondJSON(w, http.StatusOK, containers)
|
||||
}
|
||||
|
||||
// deployRequest is the expected JSON body for triggering a deploy.
|
||||
@@ -62,30 +67,28 @@ type deployRequest struct {
|
||||
ImageTag string `json:"image_tag"`
|
||||
}
|
||||
|
||||
// deployInstance handles POST /api/projects/{id}/stages/{stage}/instances (trigger deploy).
|
||||
// deployInstance handles POST /api/projects/{id}/stages/{stage}/instances.
|
||||
func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) {
|
||||
projectID := chi.URLParam(r, "id")
|
||||
stageID := chi.URLParam(r, "stage")
|
||||
|
||||
// Verify project exists.
|
||||
if _, err := s.store.GetProjectByID(projectID); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "project")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get project", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Verify stage exists.
|
||||
if _, err := s.store.GetStageByID(stageID); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "stage")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get stage", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -115,40 +118,41 @@ func (s *Server) deployInstance(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// removeInstance handles DELETE /api/projects/{id}/stages/{stage}/instances/{iid}.
|
||||
// {iid} is the container row ID (same UUID as the legacy instance ID).
|
||||
func (s *Server) removeInstance(w http.ResponseWriter, r *http.Request) {
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
id := chi.URLParam(r, "iid")
|
||||
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
c, err := s.store.GetContainerByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
slog.Error("failed to get container", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Remove the Docker container if it has one.
|
||||
if inst.ContainerID != "" {
|
||||
if err := s.docker.RemoveContainer(r.Context(), inst.ContainerID, true); err != nil {
|
||||
slog.Error("remove container", "container_id", inst.ContainerID, "error", err)
|
||||
if c.ContainerID != "" {
|
||||
if err := s.docker.RemoveContainer(r.Context(), c.ContainerID, true); err != nil {
|
||||
slog.Error("remove container", "container_id", c.ContainerID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete proxy route if it has one.
|
||||
if inst.ProxyRouteID != "" {
|
||||
if err := s.proxyProvider.DeleteRoute(r.Context(), inst.ProxyRouteID); err != nil {
|
||||
slog.Warn("delete proxy route on instance removal", "route_id", inst.ProxyRouteID, "error", err)
|
||||
if c.ProxyRouteID != "" {
|
||||
if err := s.proxyProvider.DeleteRoute(r.Context(), c.ProxyRouteID); err != nil {
|
||||
slog.Warn("delete proxy route on container removal", "route_id", c.ProxyRouteID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete instance record.
|
||||
if err := s.store.DeleteInstance(instanceID); err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "failed to delete instance")
|
||||
// Delete container row.
|
||||
if err := s.store.DeleteContainer(id); err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "failed to delete container")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, map[string]string{"deleted": instanceID})
|
||||
respondJSON(w, http.StatusOK, map[string]string{"deleted": id})
|
||||
}
|
||||
|
||||
// stopInstance handles POST /api/projects/{id}/stages/{stage}/instances/{iid}/stop.
|
||||
@@ -166,67 +170,59 @@ func (s *Server) restartInstance(w http.ResponseWriter, r *http.Request) {
|
||||
s.controlInstance(w, r, "restart")
|
||||
}
|
||||
|
||||
// controlInstance performs a stop/start/restart action on an instance's container.
|
||||
// controlInstance performs a stop/start/restart action on a container.
|
||||
func (s *Server) controlInstance(w http.ResponseWriter, r *http.Request, action string) {
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
id := chi.URLParam(r, "iid")
|
||||
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
c, err := s.store.GetContainerByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
slog.Error("failed to get container", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
if inst.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "instance has no container")
|
||||
if c.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "container row has no docker container bound")
|
||||
return
|
||||
}
|
||||
|
||||
ctx := r.Context()
|
||||
var controlErr error
|
||||
var newStatus string
|
||||
var newState string
|
||||
|
||||
switch action {
|
||||
case "stop":
|
||||
controlErr = s.docker.StopContainer(ctx, inst.ContainerID, 10)
|
||||
newStatus = "stopped"
|
||||
controlErr = s.docker.StopContainer(ctx, c.ContainerID, 10)
|
||||
newState = "stopped"
|
||||
case "start":
|
||||
controlErr = s.docker.StartContainer(ctx, inst.ContainerID)
|
||||
newStatus = "running"
|
||||
controlErr = s.docker.StartContainer(ctx, c.ContainerID)
|
||||
newState = "running"
|
||||
case "restart":
|
||||
controlErr = s.docker.RestartContainer(ctx, inst.ContainerID, 10)
|
||||
newStatus = "running"
|
||||
controlErr = s.docker.RestartContainer(ctx, c.ContainerID, 10)
|
||||
newState = "running"
|
||||
default:
|
||||
respondError(w, http.StatusBadRequest, fmt.Sprintf("unknown action: %s", action))
|
||||
return
|
||||
}
|
||||
|
||||
if controlErr != nil {
|
||||
slog.Error("failed to control instance", "action", action, "instance_id", instanceID, "error", controlErr)
|
||||
slog.Error("failed to control container", "action", action, "id", id, "error", controlErr)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Update status in store.
|
||||
if err := s.store.UpdateInstanceStatus(instanceID, newStatus); err != nil {
|
||||
slog.Error("update instance status", "instance_id", instanceID, "status", newStatus, "error", err)
|
||||
}
|
||||
|
||||
// Track last_alive_at when container becomes running.
|
||||
if newStatus == "running" {
|
||||
if err := s.store.UpdateLastAliveAt(instanceID); err != nil {
|
||||
slog.Error("update last_alive_at", "instance_id", instanceID, "error", err)
|
||||
}
|
||||
if err := s.store.UpdateContainerState(id, newState); err != nil {
|
||||
slog.Error("update container state", "id", id, "state", newState, "error", err)
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]string{
|
||||
"instance_id": instanceID,
|
||||
"instance_id": id,
|
||||
"action": action,
|
||||
"status": newStatus,
|
||||
"status": newState,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -188,16 +188,16 @@ func (s *Server) deleteProject(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
stages, _ := s.store.GetStagesByProjectID(id)
|
||||
for _, stage := range stages {
|
||||
instances, _ := s.store.GetInstancesByStageID(stage.ID)
|
||||
for _, inst := range instances {
|
||||
if inst.ContainerID != "" {
|
||||
if err := s.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil {
|
||||
slog.Warn("delete project: remove container", "container", inst.ContainerID, "error", err)
|
||||
rows, _ := s.store.ListContainersByStageID(stage.ID)
|
||||
for _, c := range rows {
|
||||
if c.ContainerID != "" {
|
||||
if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
|
||||
slog.Warn("delete project: remove container", "container", c.ContainerID, "error", err)
|
||||
}
|
||||
}
|
||||
if inst.ProxyRouteID != "" {
|
||||
if err := s.proxyProvider.DeleteRoute(ctx, inst.ProxyRouteID); err != nil {
|
||||
slog.Warn("delete project: delete proxy route", "route", inst.ProxyRouteID, "error", err)
|
||||
if c.ProxyRouteID != "" {
|
||||
if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
|
||||
slog.Warn("delete project: delete proxy route", "route", c.ProxyRouteID, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+11
-12
@@ -466,14 +466,14 @@ func (s *Server) resyncAllProxies(oldSettings, newSettings store.Settings) {
|
||||
// Step 2: If new provider is "none", clear all proxy route IDs and we're done.
|
||||
if newSettings.ProxyProvider == "none" {
|
||||
for _, route := range routes {
|
||||
inst, err := s.store.GetInstanceByID(route.InstanceID)
|
||||
c, err := s.store.GetContainerByID(route.InstanceID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
inst.ProxyRouteID = ""
|
||||
inst.NpmProxyID = 0
|
||||
if err := s.store.UpdateInstance(inst); err != nil {
|
||||
slog.Warn("proxy resync: clear route ID", "instance", route.InstanceID, "error", err)
|
||||
c.ProxyRouteID = ""
|
||||
c.NpmProxyID = 0
|
||||
if err := s.store.UpdateContainer(c); err != nil {
|
||||
slog.Warn("proxy resync: clear route ID", "container", route.InstanceID, "error", err)
|
||||
}
|
||||
}
|
||||
slog.Info("proxy resync: cleared all proxy routes (provider set to none)", "count", len(routes))
|
||||
@@ -501,18 +501,17 @@ func (s *Server) resyncAllProxies(oldSettings, newSettings store.Settings) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Update instance with new route ID.
|
||||
inst, err := s.store.GetInstanceByID(route.InstanceID)
|
||||
// Update container row with new route ID.
|
||||
c, err := s.store.GetContainerByID(route.InstanceID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
inst.ProxyRouteID = routeID
|
||||
c.ProxyRouteID = routeID
|
||||
if domainChanged {
|
||||
// Subdomain stays the same, but the FQDN in external systems changed.
|
||||
slog.Info("proxy resync: domain updated", "instance", route.InstanceID, "domain", fqdn)
|
||||
slog.Info("proxy resync: domain updated", "container", route.InstanceID, "domain", fqdn)
|
||||
}
|
||||
if err := s.store.UpdateInstance(inst); err != nil {
|
||||
slog.Warn("proxy resync: update instance", "instance", route.InstanceID, "error", err)
|
||||
if err := s.store.UpdateContainer(c); err != nil {
|
||||
slog.Warn("proxy resync: update container", "container", route.InstanceID, "error", err)
|
||||
}
|
||||
updated++
|
||||
}
|
||||
|
||||
+45
-51
@@ -19,59 +19,58 @@ func (s *Server) listStaleContainers(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
staleInstances, err := s.staleScanner.FindStaleInstances(r.Context())
|
||||
staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
|
||||
if err != nil {
|
||||
slog.Error("failed to find stale containers", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to find stale containers")
|
||||
return
|
||||
}
|
||||
|
||||
if staleInstances == nil {
|
||||
staleInstances = []stale.StaleInstance{}
|
||||
if staleRows == nil {
|
||||
staleRows = []stale.StaleContainer{}
|
||||
}
|
||||
respondJSON(w, http.StatusOK, staleInstances)
|
||||
respondJSON(w, http.StatusOK, staleRows)
|
||||
}
|
||||
|
||||
// cleanupStaleContainer handles POST /api/containers/stale/{id}/cleanup.
|
||||
// Stops the Docker container, removes the NPM proxy, and deletes the instance from the store.
|
||||
// Stops the Docker container, removes the proxy route, and deletes the
|
||||
// container row. {id} is the container row ID.
|
||||
func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) {
|
||||
instanceID := chi.URLParam(r, "id")
|
||||
id := chi.URLParam(r, "id")
|
||||
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
c, err := s.store.GetContainerByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get instance")
|
||||
slog.Error("failed to get container", "id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get container")
|
||||
return
|
||||
}
|
||||
|
||||
// Don't remove instances already being cleaned up.
|
||||
if inst.Status == "removing" {
|
||||
respondError(w, http.StatusConflict, "instance is already being removed")
|
||||
if c.State == "removing" {
|
||||
respondError(w, http.StatusConflict, "container is already being removed")
|
||||
return
|
||||
}
|
||||
|
||||
if err := s.cleanupInstance(r, inst); err != nil {
|
||||
slog.Error("failed to cleanup instance", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to cleanup instance")
|
||||
if err := s.cleanupContainer(r, c); err != nil {
|
||||
slog.Error("failed to cleanup container", "id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to cleanup container")
|
||||
return
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]string{"cleaned": instanceID})
|
||||
respondJSON(w, http.StatusOK, map[string]string{"cleaned": id})
|
||||
}
|
||||
|
||||
// bulkCleanupStaleContainers handles POST /api/containers/stale/cleanup.
|
||||
// Cleans up all currently stale containers.
|
||||
func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Request) {
|
||||
if s.staleScanner == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized")
|
||||
return
|
||||
}
|
||||
|
||||
staleInstances, err := s.staleScanner.FindStaleInstances(r.Context())
|
||||
staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
|
||||
if err != nil {
|
||||
slog.Error("failed to find stale containers for bulk cleanup", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to find stale containers")
|
||||
@@ -81,17 +80,17 @@ func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Reque
|
||||
var cleaned []string
|
||||
var failed []string
|
||||
|
||||
for _, si := range staleInstances {
|
||||
if si.Instance.Status == "removing" {
|
||||
for _, sc := range staleRows {
|
||||
if sc.Container.State == "removing" {
|
||||
continue
|
||||
}
|
||||
if err := s.cleanupInstance(r, si.Instance); err != nil {
|
||||
if err := s.cleanupContainer(r, sc.Container); err != nil {
|
||||
slog.Error("bulk stale cleanup failed",
|
||||
"instance_id", si.Instance.ID, "error", err)
|
||||
failed = append(failed, si.Instance.ID)
|
||||
"id", sc.Container.ID, "error", err)
|
||||
failed = append(failed, sc.Container.ID)
|
||||
continue
|
||||
}
|
||||
cleaned = append(cleaned, si.Instance.ID)
|
||||
cleaned = append(cleaned, sc.Container.ID)
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]any{
|
||||
@@ -100,53 +99,48 @@ func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Reque
|
||||
})
|
||||
}
|
||||
|
||||
// cleanupInstance stops a Docker container, removes the NPM proxy, deletes
|
||||
// the store record, and emits an event.
|
||||
func (s *Server) cleanupInstance(r *http.Request, inst store.Instance) error {
|
||||
// cleanupContainer stops a Docker container, removes its proxy route,
|
||||
// deletes the container row, and emits an event.
|
||||
func (s *Server) cleanupContainer(r *http.Request, c store.Container) error {
|
||||
ctx := r.Context()
|
||||
|
||||
// Mark as removing.
|
||||
if err := s.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil {
|
||||
slog.Warn("stale cleanup: update status to removing", "instance_id", inst.ID, "error", err)
|
||||
if err := s.store.UpdateContainerState(c.ID, "removing"); err != nil {
|
||||
slog.Warn("stale cleanup: update state to removing", "id", c.ID, "error", err)
|
||||
}
|
||||
|
||||
// Stop and remove Docker container.
|
||||
if inst.ContainerID != "" {
|
||||
if err := s.docker.StopContainer(ctx, inst.ContainerID, 10); err != nil {
|
||||
slog.Warn("stale cleanup: stop container", "container_id", inst.ContainerID, "error", err)
|
||||
if c.ContainerID != "" {
|
||||
if err := s.docker.StopContainer(ctx, c.ContainerID, 10); err != nil {
|
||||
slog.Warn("stale cleanup: stop container", "container_id", c.ContainerID, "error", err)
|
||||
}
|
||||
if err := s.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil {
|
||||
slog.Warn("stale cleanup: remove container", "container_id", inst.ContainerID, "error", err)
|
||||
if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
|
||||
slog.Warn("stale cleanup: remove container", "container_id", c.ContainerID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete proxy route if present.
|
||||
if inst.ProxyRouteID != "" {
|
||||
if err := s.proxyProvider.DeleteRoute(ctx, inst.ProxyRouteID); err != nil {
|
||||
slog.Warn("stale cleanup: delete proxy route", "route_id", inst.ProxyRouteID, "error", err)
|
||||
if c.ProxyRouteID != "" {
|
||||
if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
|
||||
slog.Warn("stale cleanup: delete proxy route", "route_id", c.ProxyRouteID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete instance record.
|
||||
if err := s.store.DeleteInstance(inst.ID); err != nil {
|
||||
if err := s.store.DeleteContainer(c.ID); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Emit cleanup event.
|
||||
s.emitStaleCleanupEvent(inst)
|
||||
s.emitStaleCleanupEvent(c)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// emitStaleCleanupEvent publishes an event when a stale container is cleaned up.
|
||||
func (s *Server) emitStaleCleanupEvent(inst store.Instance) {
|
||||
msg := "Stale container cleaned up: " + inst.ID + " (tag: " + inst.ImageTag + ")"
|
||||
func (s *Server) emitStaleCleanupEvent(c store.Container) {
|
||||
msg := "Stale container cleaned up: " + c.ID + " (tag: " + c.ImageTag + ")"
|
||||
|
||||
evt, err := s.store.InsertEvent(store.EventLog{
|
||||
Source: "stale_cleanup",
|
||||
Severity: "info",
|
||||
Message: msg,
|
||||
Metadata: `{"instance_id":"` + inst.ID + `","project_id":"` + inst.ProjectID + `","stage_id":"` + inst.StageID + `"}`,
|
||||
Metadata: `{"container_id":"` + c.ID + `","workload_id":"` + c.WorkloadID + `","role":"` + c.Role + `"}`,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Error("stale cleanup: failed to persist event", "error", err)
|
||||
@@ -158,9 +152,9 @@ func (s *Server) emitStaleCleanupEvent(inst store.Instance) {
|
||||
Payload: events.EventLogPayload{
|
||||
ID: evt.ID,
|
||||
Source: "stale_cleanup",
|
||||
Severity: "info",
|
||||
Message: msg,
|
||||
Metadata: evt.Metadata,
|
||||
Severity: "info",
|
||||
Message: msg,
|
||||
Metadata: evt.Metadata,
|
||||
CreatedAt: evt.CreatedAt,
|
||||
},
|
||||
})
|
||||
|
||||
+10
-10
@@ -11,29 +11,29 @@ import (
|
||||
)
|
||||
|
||||
// getInstanceStats handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats.
|
||||
// Returns CPU and memory stats for the container backing the given instance.
|
||||
// {iid} is the container row ID (same UUID as the legacy instance ID).
|
||||
func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) {
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
id := chi.URLParam(r, "iid")
|
||||
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
c, err := s.store.GetContainerByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get instance")
|
||||
slog.Error("failed to get container", "id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get container")
|
||||
return
|
||||
}
|
||||
|
||||
if inst.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "instance has no container")
|
||||
if c.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "container row has no docker container bound")
|
||||
return
|
||||
}
|
||||
|
||||
stats, err := s.docker.GetContainerStats(r.Context(), inst.ContainerID)
|
||||
stats, err := s.docker.GetContainerStats(r.Context(), c.ContainerID)
|
||||
if err != nil {
|
||||
slog.Error("failed to get container stats", "container_id", inst.ContainerID, "error", err)
|
||||
slog.Error("failed to get container stats", "container_id", c.ContainerID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get container stats")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -91,15 +91,16 @@ func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history.
|
||||
// {iid} is the container row ID (same UUID as the legacy instance ID).
|
||||
func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
if _, err := s.store.GetInstanceByID(instanceID); err != nil {
|
||||
if _, err := s.store.GetContainerByID(instanceID); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
respondNotFound(w, "container")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get instance")
|
||||
slog.Error("failed to get container", "id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get container")
|
||||
return
|
||||
}
|
||||
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r)))
|
||||
@@ -279,24 +280,24 @@ func (s *Server) enrichWithOwnerNames(samples []store.ContainerStatsSample) []To
|
||||
return out
|
||||
}
|
||||
|
||||
// lookupInstanceName returns "project/stage" for an instance, or empty on
|
||||
// any lookup error so a transient miss does not break the response.
|
||||
// lookupInstanceName returns "workload/role" for a container row, or empty
|
||||
// on any lookup error so a transient miss does not break the response.
|
||||
func (s *Server) lookupInstanceName(instanceID string) string {
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
c, err := s.store.GetContainerByID(instanceID)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
project, perr := s.store.GetProjectByID(inst.ProjectID)
|
||||
stage, serr := s.store.GetStageByID(inst.StageID)
|
||||
switch {
|
||||
case perr == nil && serr == nil:
|
||||
return project.Name + "/" + stage.Name
|
||||
case perr == nil:
|
||||
return project.Name
|
||||
case serr == nil:
|
||||
return stage.Name
|
||||
w, err := s.store.GetWorkloadByID(c.WorkloadID)
|
||||
if err != nil {
|
||||
if c.Role != "" {
|
||||
return c.Role
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return ""
|
||||
if c.Role != "" {
|
||||
return w.Name + "/" + c.Role
|
||||
}
|
||||
return w.Name
|
||||
}
|
||||
|
||||
// lookupSiteName returns the site's display name or empty on lookup error.
|
||||
|
||||
Reference in New Issue
Block a user