Files
tiny-forge/internal/api/stats_history.go
T
alexei.dolgolyov 05440a5f92
Build / build (push) Successful in 10m50s
feat(stats): resource metrics dashboard + sites logs/stats
Background collector samples CPU/memory/network/block I/O for every
instance and site on a configurable interval (default 15s, range
5-300s), persists samples to SQLite with a configurable retention
window (default 2h, range 0-24h), and skips ticks gracefully when
the Docker daemon is unreachable. Settings are reloadable without
a restart — each tick re-reads them.

New API endpoints:
- GET /api/system/stats (host snapshot: info + df)
- GET /api/system/stats/history
- GET /api/system/stats/top?by=cpu|memory
- GET /api/projects/{id}/stages/{s}/instances/{iid}/stats/history
- GET /api/sites/{id}/stats[/history]
- GET /api/sites/{id}/logs (SSE + JSON, reuses instance log streamer)

Frontend:
- ECharts added with tree-shaken imports (~180KB gzip) for
  future-proof time-series/gantt/graph visualizations
- CollapsibleSection wraps all dashboard sections (system health,
  daemons, system resources, static sites, projects) with
  localStorage-persisted open state
- SystemResourcesCard shows capacity tiles, workload utilization
  chart with 30m/2h/6h/24h window picker, disk breakdown with
  reclaimable callouts, and top 5 consumers
- ContainerStats and ContainerLogs take a source discriminated union
  so sites reuse the same components as instances; sites detail page
  embeds both for Deno backend debugging
- Settings › Maintenance exposes collection interval + retention
- Docker-unavailable state returns 503 and renders an amber banner
  instead of a generic 500

Full i18n coverage (en + ru) for all new strings.
2026-04-24 15:02:43 +03:00

246 lines
8.0 KiB
Go

package api
import (
"errors"
"log/slog"
"net/http"
"strconv"
"time"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/stats"
"github.com/alexei/tinyforge/internal/store"
)
const (
// defaultHistoryWindow is used when no ?window= param is provided or the
// value fails to parse. Matches the default retention so the "last 2h"
// view always has data when collection is enabled.
defaultHistoryWindow = 2 * time.Hour
maxHistoryWindow = 24 * time.Hour
)
// parseWindow reads the ?window= query (Go duration string, e.g. "1h", "30m")
// and returns a bounded duration.
func parseWindow(r *http.Request) time.Duration {
raw := r.URL.Query().Get("window")
if raw == "" {
return defaultHistoryWindow
}
d, err := time.ParseDuration(raw)
if err != nil || d <= 0 {
return defaultHistoryWindow
}
if d > maxHistoryWindow {
return maxHistoryWindow
}
return d
}
// sinceTimestamp converts a duration into a Unix-seconds cutoff.
func sinceTimestamp(window time.Duration) int64 {
return time.Now().UTC().Add(-window).Unix()
}
// getSystemStats handles GET /api/system/stats — current host snapshot.
// When the Docker daemon is unreachable (e.g. Docker Desktop stopped) the
// handler returns 503 so the frontend can show a dedicated unavailable
// state instead of treating it as a generic 5xx failure.
func (s *Server) getSystemStats(w http.ResponseWriter, r *http.Request) {
if s.docker == nil {
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
return
}
sys, err := s.docker.GetSystemStats(r.Context())
if err != nil {
slog.Warn("system stats unavailable", "error", err)
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
return
}
respondJSON(w, http.StatusOK, sys)
}
// getSystemStatsHistory handles GET /api/system/stats/history?window=1h.
func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) {
samples, err := s.store.ListSystemStatsSamples(sinceTimestamp(parseWindow(r)))
if err != nil {
slog.Error("failed to list system stats samples", "error", err)
respondError(w, http.StatusInternalServerError, "failed to list samples")
return
}
if samples == nil {
samples = []store.SystemStatsSample{}
}
respondJSON(w, http.StatusOK, samples)
}
// getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history.
func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) {
instanceID := chi.URLParam(r, "iid")
if _, err := s.store.GetInstanceByID(instanceID); err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance")
return
}
slog.Error("failed to get instance", "instance_id", instanceID, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get instance")
return
}
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r)))
if err != nil {
slog.Error("failed to list instance stats samples", "instance_id", instanceID, "error", err)
respondError(w, http.StatusInternalServerError, "failed to list samples")
return
}
if samples == nil {
samples = []store.ContainerStatsSample{}
}
respondJSON(w, http.StatusOK, samples)
}
// getStaticSiteStats handles GET /api/sites/{id}/stats — current snapshot.
func (s *Server) getStaticSiteStats(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
site, err := s.store.GetStaticSiteByID(id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "site")
return
}
slog.Error("failed to get site", "site_id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get site")
return
}
if site.ContainerID == "" {
respondError(w, http.StatusConflict, "site has no container")
return
}
if s.docker == nil {
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
return
}
cs, err := s.docker.GetContainerStats(r.Context(), site.ContainerID)
if err != nil {
slog.Error("failed to get site stats", "site_id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get site stats")
return
}
respondJSON(w, http.StatusOK, cs)
}
// getStaticSiteStatsHistory handles GET /api/sites/{id}/stats/history.
func (s *Server) getStaticSiteStatsHistory(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if _, err := s.store.GetStaticSiteByID(id); err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "site")
return
}
slog.Error("failed to get site", "site_id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get site")
return
}
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeSite, id, sinceTimestamp(parseWindow(r)))
if err != nil {
slog.Error("failed to list site stats samples", "site_id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to list samples")
return
}
if samples == nil {
samples = []store.ContainerStatsSample{}
}
respondJSON(w, http.StatusOK, samples)
}
// streamStaticSiteLogs handles GET /api/sites/{id}/logs?tail=200&follow=true.
// Reuses the shared container log streamer so the SSE + multiplex handling
// matches /api/projects/.../instances/.../logs exactly.
func (s *Server) streamStaticSiteLogs(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
site, err := s.store.GetStaticSiteByID(id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "site")
return
}
slog.Error("failed to get site", "site_id", id, "error", err)
respondError(w, http.StatusInternalServerError, "failed to get site")
return
}
if site.ContainerID == "" {
respondError(w, http.StatusConflict, "site has no container")
return
}
s.streamLogsForContainer(w, r, site.ContainerID)
}
// listTopContainersByCPU handles GET /api/system/stats/top?limit=5&by=cpu.
// Returns the top-N most recent samples across containers, sorted by CPU or
// memory. Useful for a system dashboard "top consumers" widget without
// requiring the frontend to aggregate per-container history on its own.
func (s *Server) listTopContainersByCPU(w http.ResponseWriter, r *http.Request) {
limit := 5
if raw := r.URL.Query().Get("limit"); raw != "" {
if n, err := strconv.Atoi(raw); err == nil && n > 0 && n <= 50 {
limit = n
}
}
by := r.URL.Query().Get("by")
if by != "memory" {
by = "cpu"
}
// Samples from the last 2 minutes window so "top" reflects near-current
// load, not long-dead rows.
samples, err := s.store.ListAllRecentContainerStatsSamples(sinceTimestamp(2 * time.Minute))
if err != nil {
slog.Error("failed to list container samples for top", "error", err)
respondError(w, http.StatusInternalServerError, "failed to list samples")
return
}
// Keep only the latest sample per container.
latest := make(map[string]store.ContainerStatsSample, len(samples))
for _, sm := range samples {
if prev, ok := latest[sm.ContainerID]; !ok || sm.TS > prev.TS {
latest[sm.ContainerID] = sm
}
}
top := make([]store.ContainerStatsSample, 0, len(latest))
for _, sm := range latest {
top = append(top, sm)
}
// Partial-sort by the requested metric, descending. For small N a simple
// insertion-like approach is plenty.
sortContainerSamples(top, by)
if len(top) > limit {
top = top[:limit]
}
respondJSON(w, http.StatusOK, top)
}
// sortContainerSamples sorts in place by CPU (or memory) descending.
// Note: ListContainerStatsSamples with empty ownerID returns no rows — the
// caller uses per-owner-type queries and merges; this helper is applied to
// the already-merged slice.
func sortContainerSamples(s []store.ContainerStatsSample, by string) {
// O(n^2) is fine — N is small (bounded by the number of containers).
for i := 1; i < len(s); i++ {
for j := i; j > 0; j-- {
var less bool
if by == "memory" {
less = s[j].MemoryUsage > s[j-1].MemoryUsage
} else {
less = s[j].CPUPercent > s[j-1].CPUPercent
}
if !less {
break
}
s[j-1], s[j] = s[j], s[j-1]
}
}
}