Files
tiny-forge/internal/api/stale.go
T
alexei.dolgolyov d8ab22876f
Build / build (push) Successful in 10m41s
refactor(workload): extract Instance entirely; Container is canonical
End-to-end extraction of the Instance concept. After this commit:

  * internal/store/instances.go — DELETED
  * internal/store/models.go — Instance struct gone, ProxyRoute moved here
  * containers table is the single source of truth for project/stack/site
    container state. instances table is dropped via DROP TABLE migration
    (idempotent; re-runnable on every boot).
  * Legacy tinyforge.project / tinyforge.stage / tinyforge.instance-id
    Docker labels are no longer emitted; only tinyforge.workload.{id,kind},
    tinyforge.role, and tinyforge.managed are stamped on new containers.

Backend rewrites:
  - internal/deployer:        executeDeploy + blueGreenDeploy + rollback +
                              promote use store.Container natively. New
                              removeContainer() replaces removeInstance().
                              enforceMaxInstances reads via
                              ListContainersByStageID.
  - internal/reconciler:      legacy tinyforge.instance-id dispatch removed;
                              upsertByWorkloadLabel now finds existing rows
                              by docker container ID first and falls back to
                              the deterministic workloadID:role key.
  - internal/stale/scanner:   Scan + new FindStaleContainers walk the
                              containers table; emit StaleContainer JSON.
  - internal/stats/collector: ListContainers replaces ListAllInstances.
  - internal/webhook/handler: workload-secret lookup tried first; falls back
                              to project / static_site secret column.
  - internal/api: instances.go, stale.go, stats.go, stats_history.go,
                  projects.go, settings.go, docker.go, dns.go all read /
                  write through Container.

Docker layer:
  - ManagedContainer exposes WorkloadID/Kind/Role from the canonical labels.
  - ListContainers filters by tinyforge.managed=true.
  - Network creation uses LabelManaged instead of LabelProject.

Frontend:
  - Instance type is now a Container alias; .status → .state,
    .last_alive_at → .last_seen_at.
  - InstanceCard takes stageId as a prop (no longer derived from Instance).
  - StaleContainer JSON shape rewritten: { container, workload_name, role,
    days_stale }. StaleContainerCard + /containers/stale page updated.
  - ProjectCard / homepage / SystemHealthCard filter by .state.

The migration loop now tolerates "no such table" alongside "duplicate
column" / "already exists" so obsolete ALTER TABLE entries targeting the
dropped instances table no-op cleanly on first boot.

Tests: store + deployer + reconciler + webhook + staticsite + notify all
still pass. Frontend svelte-check: zero errors.
2026-05-09 14:43:12 +03:00

162 lines
4.6 KiB
Go

package api
import (
"errors"
"log/slog"
"net/http"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/stale"
"github.com/alexei/tinyforge/internal/store"
)
// listStaleContainers handles GET /api/containers/stale.
func (s *Server) listStaleContainers(w http.ResponseWriter, r *http.Request) {
if s.staleScanner == nil {
respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized")
return
}
staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
if err != nil {
slog.Error("failed to find stale containers", "error", err)
respondError(w, http.StatusInternalServerError, "failed to find stale containers")
return
}
if staleRows == nil {
staleRows = []stale.StaleContainer{}
}
respondJSON(w, http.StatusOK, staleRows)
}
// cleanupStaleContainer handles POST /api/containers/stale/{id}/cleanup.
// Stops the Docker container, removes the proxy route, and deletes the
// container row. {id} is the container row ID.
func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
c, err := s.store.GetContainerByID(id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "container")
return
}
slog.Error("failed to get container", "id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get container")
return
}
if c.State == "removing" {
respondError(w, http.StatusConflict, "container is already being removed")
return
}
if err := s.cleanupContainer(r, c); err != nil {
slog.Error("failed to cleanup container", "id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to cleanup container")
return
}
respondJSON(w, http.StatusOK, map[string]string{"cleaned": id})
}
// bulkCleanupStaleContainers handles POST /api/containers/stale/cleanup.
func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Request) {
if s.staleScanner == nil {
respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized")
return
}
staleRows, err := s.staleScanner.FindStaleContainers(r.Context())
if err != nil {
slog.Error("failed to find stale containers for bulk cleanup", "error", err)
respondError(w, http.StatusInternalServerError, "failed to find stale containers")
return
}
var cleaned []string
var failed []string
for _, sc := range staleRows {
if sc.Container.State == "removing" {
continue
}
if err := s.cleanupContainer(r, sc.Container); err != nil {
slog.Error("bulk stale cleanup failed",
"id", sc.Container.ID, "error", err)
failed = append(failed, sc.Container.ID)
continue
}
cleaned = append(cleaned, sc.Container.ID)
}
respondJSON(w, http.StatusOK, map[string]any{
"cleaned": cleaned,
"failed": failed,
})
}
// cleanupContainer stops a Docker container, removes its proxy route,
// deletes the container row, and emits an event.
func (s *Server) cleanupContainer(r *http.Request, c store.Container) error {
ctx := r.Context()
if err := s.store.UpdateContainerState(c.ID, "removing"); err != nil {
slog.Warn("stale cleanup: update state to removing", "id", c.ID, "error", err)
}
if c.ContainerID != "" {
if err := s.docker.StopContainer(ctx, c.ContainerID, 10); err != nil {
slog.Warn("stale cleanup: stop container", "container_id", c.ContainerID, "error", err)
}
if err := s.docker.RemoveContainer(ctx, c.ContainerID, true); err != nil {
slog.Warn("stale cleanup: remove container", "container_id", c.ContainerID, "error", err)
}
}
if c.ProxyRouteID != "" {
if err := s.proxyProvider.DeleteRoute(ctx, c.ProxyRouteID); err != nil {
slog.Warn("stale cleanup: delete proxy route", "route_id", c.ProxyRouteID, "error", err)
}
}
if err := s.store.DeleteContainer(c.ID); err != nil {
return err
}
s.emitStaleCleanupEvent(c)
return nil
}
// emitStaleCleanupEvent publishes an event when a stale container is cleaned up.
func (s *Server) emitStaleCleanupEvent(c store.Container) {
msg := "Stale container cleaned up: " + c.ID + " (tag: " + c.ImageTag + ")"
evt, err := s.store.InsertEvent(store.EventLog{
Source: "stale_cleanup",
Severity: "info",
Message: msg,
Metadata: `{"container_id":"` + c.ID + `","workload_id":"` + c.WorkloadID + `","role":"` + c.Role + `"}`,
})
if err != nil {
slog.Error("stale cleanup: failed to persist event", "error", err)
return
}
s.eventBus.Publish(events.Event{
Type: events.EventLog,
Payload: events.EventLogPayload{
ID: evt.ID,
Source: "stale_cleanup",
Severity: "info",
Message: msg,
Metadata: evt.Metadata,
CreatedAt: evt.CreatedAt,
},
})
}