package api import ( "log/slog" "net/http" "sort" "strconv" "time" "github.com/alexei/tinyforge/internal/auth" "github.com/alexei/tinyforge/internal/store" ) // topConsumerMinWindow is how recent a container sample must be to count toward // the "top consumers" list. Scaled with the collector interval (read from // settings) so it stays meaningful even when sampling is sparse. const topConsumerMinWindow = 2 * time.Minute // TopContainerSample augments a stats sample with the human-readable owner // name so the UI can show "workload/role" without an extra round-trip per row. type TopContainerSample struct { store.ContainerStatsSample OwnerName string `json:"owner_name"` } const ( // defaultHistoryWindow is used when no ?window= param is provided or the // value fails to parse. Matches the default retention so the "last 2h" // view always has data when collection is enabled. defaultHistoryWindow = 2 * time.Hour maxHistoryWindow = 24 * time.Hour ) // parseWindow reads the ?window= query (Go duration string, e.g. "1h", "30m") // and returns a bounded duration. func parseWindow(r *http.Request) time.Duration { raw := r.URL.Query().Get("window") if raw == "" { return defaultHistoryWindow } d, err := time.ParseDuration(raw) if err != nil || d <= 0 { return defaultHistoryWindow } if d > maxHistoryWindow { return maxHistoryWindow } return d } // sinceTimestamp converts a duration into a Unix-seconds cutoff. func sinceTimestamp(window time.Duration) int64 { return time.Now().UTC().Add(-window).Unix() } // getSystemStats handles GET /api/system/stats — current host snapshot. // When the Docker daemon is unreachable (e.g. Docker Desktop stopped) the // handler returns 503 so the frontend can show a dedicated unavailable // state instead of treating it as a generic 5xx failure. func (s *Server) getSystemStats(w http.ResponseWriter, r *http.Request) { if s.docker == nil { respondError(w, http.StatusServiceUnavailable, "Docker is not available") return } sys, err := s.docker.GetSystemStats(r.Context()) if err != nil { slog.Warn("system stats unavailable", "error", err) respondError(w, http.StatusServiceUnavailable, "Docker is not available") return } respondJSON(w, http.StatusOK, sys) } // getSystemStatsHistory handles GET /api/system/stats/history?window=1h. func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) { samples, err := s.store.ListSystemStatsSamples(sinceTimestamp(parseWindow(r))) if err != nil { slog.Error("failed to list system stats samples", "error", err) respondError(w, http.StatusInternalServerError, "failed to list samples") return } if samples == nil { samples = []store.SystemStatsSample{} } respondJSON(w, http.StatusOK, samples) } // listTopContainers handles GET /api/system/stats/top?limit=5&by=cpu. // Returns the top-N most recent samples across containers, sorted by CPU or // memory. Container IDs are stripped for non-admins so a low-privilege viewer // cannot enumerate workloads outside their scope. func (s *Server) listTopContainers(w http.ResponseWriter, r *http.Request) { limit := 5 if raw := r.URL.Query().Get("limit"); raw != "" { if n, err := strconv.Atoi(raw); err == nil && n > 0 && n <= 50 { limit = n } } by := r.URL.Query().Get("by") if by != "memory" { by = "cpu" } // Samples must be at least as recent as max(2*interval, 2 minutes) so the // list reflects near-current load even when collection is sparse. window := topConsumerMinWindow if settings, err := s.store.GetSettings(); err == nil && settings.StatsIntervalSeconds > 0 { if w := time.Duration(settings.StatsIntervalSeconds*2) * time.Second; w > window { window = w } } samples, err := s.store.ListAllRecentContainerStatsSamples(sinceTimestamp(window)) if err != nil { slog.Error("failed to list container samples for top", "error", err) respondError(w, http.StatusInternalServerError, "failed to list samples") return } // Keep only the latest sample per container. latest := make(map[string]store.ContainerStatsSample, len(samples)) for _, sm := range samples { if prev, ok := latest[sm.ContainerID]; !ok || sm.TS > prev.TS { latest[sm.ContainerID] = sm } } top := make([]store.ContainerStatsSample, 0, len(latest)) for _, sm := range latest { top = append(top, sm) } sort.Slice(top, func(i, j int) bool { if by == "memory" { return top[i].MemoryUsage > top[j].MemoryUsage } return top[i].CPUPercent > top[j].CPUPercent }) if len(top) > limit { top = top[:limit] } enriched := s.enrichWithOwnerNames(top) // Scrub container IDs for non-admins. The owner name is the actionable // identifier; the container ID is a host-level handle that reveals // workload existence to viewers who shouldn't have it. claims, _ := auth.ClaimsFromContext(r.Context()) if claims.Role != "admin" { for i := range enriched { enriched[i].ContainerID = "" } } respondJSON(w, http.StatusOK, enriched) } // enrichWithOwnerNames attaches a human-readable owner name to each sample. // Names are resolved through the containers index → workloads, which after // the cutover is the only available lookup path. func (s *Server) enrichWithOwnerNames(samples []store.ContainerStatsSample) []TopContainerSample { out := make([]TopContainerSample, len(samples)) for i, sm := range samples { out[i] = TopContainerSample{ContainerStatsSample: sm} out[i].OwnerName = s.lookupInstanceName(sm.OwnerID) } return out } // lookupInstanceName returns "workload/role" for a container row, or empty // on any lookup error so a transient miss does not break the response. func (s *Server) lookupInstanceName(instanceID string) string { c, err := s.store.GetContainerByID(instanceID) if err != nil { return "" } w, err := s.store.GetWorkloadByID(c.WorkloadID) if err != nil { if c.Role != "" { return c.Role } return "" } if c.Role != "" { return w.Name + "/" + c.Role } return w.Name }