package api import ( "errors" "log/slog" "net/http" "sort" "strconv" "time" "github.com/go-chi/chi/v5" "github.com/alexei/tinyforge/internal/auth" "github.com/alexei/tinyforge/internal/stats" "github.com/alexei/tinyforge/internal/store" ) // topConsumerWindow is how recent a container sample must be to count toward // the "top consumers" list. Scaled with the collector interval (read from // settings) so it stays meaningful even when sampling is sparse. const topConsumerMinWindow = 2 * time.Minute // TopContainerSample augments a stats sample with the human-readable owner // name so the UI can show "project/stage" or the static-site name without an // extra round-trip per row. type TopContainerSample struct { store.ContainerStatsSample OwnerName string `json:"owner_name"` } const ( // defaultHistoryWindow is used when no ?window= param is provided or the // value fails to parse. Matches the default retention so the "last 2h" // view always has data when collection is enabled. defaultHistoryWindow = 2 * time.Hour maxHistoryWindow = 24 * time.Hour ) // parseWindow reads the ?window= query (Go duration string, e.g. "1h", "30m") // and returns a bounded duration. func parseWindow(r *http.Request) time.Duration { raw := r.URL.Query().Get("window") if raw == "" { return defaultHistoryWindow } d, err := time.ParseDuration(raw) if err != nil || d <= 0 { return defaultHistoryWindow } if d > maxHistoryWindow { return maxHistoryWindow } return d } // sinceTimestamp converts a duration into a Unix-seconds cutoff. func sinceTimestamp(window time.Duration) int64 { return time.Now().UTC().Add(-window).Unix() } // getSystemStats handles GET /api/system/stats — current host snapshot. // When the Docker daemon is unreachable (e.g. Docker Desktop stopped) the // handler returns 503 so the frontend can show a dedicated unavailable // state instead of treating it as a generic 5xx failure. func (s *Server) getSystemStats(w http.ResponseWriter, r *http.Request) { if s.docker == nil { respondError(w, http.StatusServiceUnavailable, "Docker is not available") return } sys, err := s.docker.GetSystemStats(r.Context()) if err != nil { slog.Warn("system stats unavailable", "error", err) respondError(w, http.StatusServiceUnavailable, "Docker is not available") return } respondJSON(w, http.StatusOK, sys) } // getSystemStatsHistory handles GET /api/system/stats/history?window=1h. func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) { samples, err := s.store.ListSystemStatsSamples(sinceTimestamp(parseWindow(r))) if err != nil { slog.Error("failed to list system stats samples", "error", err) respondError(w, http.StatusInternalServerError, "failed to list samples") return } if samples == nil { samples = []store.SystemStatsSample{} } respondJSON(w, http.StatusOK, samples) } // getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history. // {iid} is the container row ID (same UUID as the legacy instance ID). func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) { instanceID := chi.URLParam(r, "iid") if _, err := s.store.GetContainerByID(instanceID); err != nil { if errors.Is(err, store.ErrNotFound) { respondNotFound(w, "container") return } slog.Error("failed to get container", "id", instanceID, "error", err) respondError(w, http.StatusInternalServerError, "failed to get container") return } samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r))) if err != nil { slog.Error("failed to list instance stats samples", "instance_id", instanceID, "error", err) respondError(w, http.StatusInternalServerError, "failed to list samples") return } if samples == nil { samples = []store.ContainerStatsSample{} } respondJSON(w, http.StatusOK, samples) } // getStaticSiteStats handles GET /api/sites/{id}/stats — current snapshot. func (s *Server) getStaticSiteStats(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") site, err := s.store.GetStaticSiteByID(id) if err != nil { if errors.Is(err, store.ErrNotFound) { respondNotFound(w, "site") return } slog.Error("failed to get site", "site_id", id, "error", err) respondError(w, http.StatusInternalServerError, "failed to get site") return } if site.ContainerID == "" { respondError(w, http.StatusConflict, "site has no container") return } if s.docker == nil { respondError(w, http.StatusServiceUnavailable, "Docker is not available") return } cs, err := s.docker.GetContainerStats(r.Context(), site.ContainerID) if err != nil { slog.Error("failed to get site stats", "site_id", id, "error", err) respondError(w, http.StatusInternalServerError, "failed to get site stats") return } respondJSON(w, http.StatusOK, cs) } // getStaticSiteStatsHistory handles GET /api/sites/{id}/stats/history. func (s *Server) getStaticSiteStatsHistory(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") if _, err := s.store.GetStaticSiteByID(id); err != nil { if errors.Is(err, store.ErrNotFound) { respondNotFound(w, "site") return } slog.Error("failed to get site", "site_id", id, "error", err) respondError(w, http.StatusInternalServerError, "failed to get site") return } samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeSite, id, sinceTimestamp(parseWindow(r))) if err != nil { slog.Error("failed to list site stats samples", "site_id", id, "error", err) respondError(w, http.StatusInternalServerError, "failed to list samples") return } if samples == nil { samples = []store.ContainerStatsSample{} } respondJSON(w, http.StatusOK, samples) } // streamStaticSiteLogs handles GET /api/sites/{id}/logs?tail=200&follow=true. // Reuses the shared container log streamer so the SSE + multiplex handling // matches /api/projects/.../instances/.../logs exactly. func (s *Server) streamStaticSiteLogs(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") site, err := s.store.GetStaticSiteByID(id) if err != nil { if errors.Is(err, store.ErrNotFound) { respondNotFound(w, "site") return } slog.Error("failed to get site", "site_id", id, "error", err) respondError(w, http.StatusInternalServerError, "failed to get site") return } if site.ContainerID == "" { respondError(w, http.StatusConflict, "site has no container") return } s.streamLogsForContainer(w, r, site.ContainerID) } // listTopContainers handles GET /api/system/stats/top?limit=5&by=cpu. // Returns the top-N most recent samples across containers, sorted by CPU or // memory. Container IDs are stripped for non-admins so a low-privilege viewer // cannot enumerate workloads outside their scope. func (s *Server) listTopContainers(w http.ResponseWriter, r *http.Request) { limit := 5 if raw := r.URL.Query().Get("limit"); raw != "" { if n, err := strconv.Atoi(raw); err == nil && n > 0 && n <= 50 { limit = n } } by := r.URL.Query().Get("by") if by != "memory" { by = "cpu" } // Samples must be at least as recent as max(2*interval, 2 minutes) so the // list reflects near-current load even when collection is sparse. window := topConsumerMinWindow if settings, err := s.store.GetSettings(); err == nil && settings.StatsIntervalSeconds > 0 { if w := time.Duration(settings.StatsIntervalSeconds*2) * time.Second; w > window { window = w } } samples, err := s.store.ListAllRecentContainerStatsSamples(sinceTimestamp(window)) if err != nil { slog.Error("failed to list container samples for top", "error", err) respondError(w, http.StatusInternalServerError, "failed to list samples") return } // Keep only the latest sample per container. latest := make(map[string]store.ContainerStatsSample, len(samples)) for _, sm := range samples { if prev, ok := latest[sm.ContainerID]; !ok || sm.TS > prev.TS { latest[sm.ContainerID] = sm } } top := make([]store.ContainerStatsSample, 0, len(latest)) for _, sm := range latest { top = append(top, sm) } sort.Slice(top, func(i, j int) bool { if by == "memory" { return top[i].MemoryUsage > top[j].MemoryUsage } return top[i].CPUPercent > top[j].CPUPercent }) if len(top) > limit { top = top[:limit] } // Resolve owner names so the UI can show "project/stage" or the site name // without a per-row round trip. enriched := s.enrichWithOwnerNames(top) // Scrub container IDs for non-admins. The owner name is the actionable // identifier; the container ID is a host-level handle that reveals // workload existence to viewers who shouldn't have it. claims, _ := auth.ClaimsFromContext(r.Context()) if claims.Role != "admin" { for i := range enriched { enriched[i].ContainerID = "" } } respondJSON(w, http.StatusOK, enriched) } // enrichWithOwnerNames attaches a human-readable owner name to each sample. // Looks up instances and sites in batch so the cost is independent of the // number of samples (which is at most 'limit'). func (s *Server) enrichWithOwnerNames(samples []store.ContainerStatsSample) []TopContainerSample { out := make([]TopContainerSample, len(samples)) for i, sm := range samples { out[i] = TopContainerSample{ContainerStatsSample: sm} switch sm.OwnerType { case stats.OwnerTypeInstance: out[i].OwnerName = s.lookupInstanceName(sm.OwnerID) case stats.OwnerTypeSite: out[i].OwnerName = s.lookupSiteName(sm.OwnerID) } } return out } // lookupInstanceName returns "workload/role" for a container row, or empty // on any lookup error so a transient miss does not break the response. func (s *Server) lookupInstanceName(instanceID string) string { c, err := s.store.GetContainerByID(instanceID) if err != nil { return "" } w, err := s.store.GetWorkloadByID(c.WorkloadID) if err != nil { if c.Role != "" { return c.Role } return "" } if c.Role != "" { return w.Name + "/" + c.Role } return w.Name } // lookupSiteName returns the site's display name or empty on lookup error. func (s *Server) lookupSiteName(siteID string) string { site, err := s.store.GetStaticSiteByID(siteID) if err != nil { return "" } return site.Name }