feat(stats): resource metrics dashboard + sites logs/stats
Build / build (push) Successful in 10m50s
Build / build (push) Successful in 10m50s
Background collector samples CPU/memory/network/block I/O for every
instance and site on a configurable interval (default 15s, range
5-300s), persists samples to SQLite with a configurable retention
window (default 2h, range 0-24h), and skips ticks gracefully when
the Docker daemon is unreachable. Settings are reloadable without
a restart — each tick re-reads them.
New API endpoints:
- GET /api/system/stats (host snapshot: info + df)
- GET /api/system/stats/history
- GET /api/system/stats/top?by=cpu|memory
- GET /api/projects/{id}/stages/{s}/instances/{iid}/stats/history
- GET /api/sites/{id}/stats[/history]
- GET /api/sites/{id}/logs (SSE + JSON, reuses instance log streamer)
Frontend:
- ECharts added with tree-shaken imports (~180KB gzip) for
future-proof time-series/gantt/graph visualizations
- CollapsibleSection wraps all dashboard sections (system health,
daemons, system resources, static sites, projects) with
localStorage-persisted open state
- SystemResourcesCard shows capacity tiles, workload utilization
chart with 30m/2h/6h/24h window picker, disk breakdown with
reclaimable callouts, and top 5 consumers
- ContainerStats and ContainerLogs take a source discriminated union
so sites reuse the same components as instances; sites detail page
embeds both for Deno backend debugging
- Settings › Maintenance exposes collection interval + retention
- Docker-unavailable state returns 503 and renders an amber banner
instead of a generic 500
Full i18n coverage (en + ru) for all new strings.
This commit is contained in:
@@ -68,6 +68,13 @@ func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
s.streamLogsForContainer(w, r, inst.ContainerID)
|
||||
}
|
||||
|
||||
// streamLogsForContainer streams logs for an arbitrary container ID using the
|
||||
// shared SSE/JSON dual-mode pattern. Owner-specific handlers (instance, site)
|
||||
// should validate ownership and then delegate here.
|
||||
func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request, containerID string) {
|
||||
if s.docker == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
@@ -83,9 +90,9 @@ func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
|
||||
accept := r.Header.Get("Accept")
|
||||
isSSE := strings.Contains(accept, "text/event-stream")
|
||||
|
||||
logReader, err := s.docker.ContainerLogs(r.Context(), inst.ContainerID, follow && isSSE, tail)
|
||||
logReader, err := s.docker.ContainerLogs(r.Context(), containerID, follow && isSSE, tail)
|
||||
if err != nil {
|
||||
slog.Error("failed to get container logs", "instance", instanceID, "error", err)
|
||||
slog.Error("failed to get container logs", "container", containerID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get container logs")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -219,6 +219,7 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/stages/{stage}/env", s.listStageEnv)
|
||||
r.Get("/stages/{stage}/instances", s.listInstances)
|
||||
r.Get("/stages/{stage}/instances/{iid}/stats", s.getInstanceStats)
|
||||
r.Get("/stages/{stage}/instances/{iid}/stats/history", s.getInstanceStatsHistory)
|
||||
r.Get("/stages/{stage}/instances/{iid}/logs", s.streamContainerLogs)
|
||||
r.Get("/images", s.listProjectImages)
|
||||
r.Get("/volumes", s.listVolumes)
|
||||
@@ -288,6 +289,9 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/", s.getStaticSite)
|
||||
r.Get("/secrets", s.listStaticSiteSecrets)
|
||||
r.Get("/storage", s.getStaticSiteStorage)
|
||||
r.Get("/logs", s.streamStaticSiteLogs)
|
||||
r.Get("/stats", s.getStaticSiteStats)
|
||||
r.Get("/stats/history", s.getStaticSiteStatsHistory)
|
||||
|
||||
// Admin-only mutations.
|
||||
r.Group(func(r chi.Router) {
|
||||
@@ -333,6 +337,11 @@ func (s *Server) Router() chi.Router {
|
||||
// Stale container endpoints (read).
|
||||
r.Get("/containers/stale", s.listStaleContainers)
|
||||
|
||||
// System resources (read-only).
|
||||
r.Get("/system/stats", s.getSystemStats)
|
||||
r.Get("/system/stats/history", s.getSystemStatsHistory)
|
||||
r.Get("/system/stats/top", s.listTopContainersByCPU)
|
||||
|
||||
// Admin-only routes: require admin role.
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
|
||||
@@ -46,6 +46,8 @@ type settingsRequest struct {
|
||||
BackupEnabled *bool `json:"backup_enabled,omitempty"`
|
||||
BackupIntervalHours *int `json:"backup_interval_hours,omitempty"`
|
||||
BackupRetentionCount *int `json:"backup_retention_count,omitempty"`
|
||||
StatsIntervalSeconds *int `json:"stats_interval_seconds,omitempty"`
|
||||
StatsRetentionHours *int `json:"stats_retention_hours,omitempty"`
|
||||
}
|
||||
|
||||
// getSettings handles GET /api/settings.
|
||||
@@ -86,6 +88,8 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) {
|
||||
"backup_enabled": settings.BackupEnabled,
|
||||
"backup_interval_hours": settings.BackupIntervalHours,
|
||||
"backup_retention_count": settings.BackupRetentionCount,
|
||||
"stats_interval_seconds": settings.StatsIntervalSeconds,
|
||||
"stats_retention_hours": settings.StatsRetentionHours,
|
||||
"updated_at": settings.UpdatedAt,
|
||||
})
|
||||
}
|
||||
@@ -238,6 +242,22 @@ func (s *Server) updateSettings(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
updated.BackupRetentionCount = *req.BackupRetentionCount
|
||||
}
|
||||
if req.StatsIntervalSeconds != nil {
|
||||
v := *req.StatsIntervalSeconds
|
||||
if v != 0 && (v < 5 || v > 300) {
|
||||
respondError(w, http.StatusBadRequest, "stats_interval_seconds must be 0 (disabled) or between 5 and 300")
|
||||
return
|
||||
}
|
||||
updated.StatsIntervalSeconds = v
|
||||
}
|
||||
if req.StatsRetentionHours != nil {
|
||||
v := *req.StatsRetentionHours
|
||||
if v < 0 || v > 24 {
|
||||
respondError(w, http.StatusBadRequest, "stats_retention_hours must be between 0 and 24")
|
||||
return
|
||||
}
|
||||
updated.StatsRetentionHours = v
|
||||
}
|
||||
|
||||
if err := s.store.UpdateSettings(updated); err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "failed to update settings: "+err.Error())
|
||||
|
||||
@@ -0,0 +1,245 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/stats"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
const (
|
||||
// defaultHistoryWindow is used when no ?window= param is provided or the
|
||||
// value fails to parse. Matches the default retention so the "last 2h"
|
||||
// view always has data when collection is enabled.
|
||||
defaultHistoryWindow = 2 * time.Hour
|
||||
maxHistoryWindow = 24 * time.Hour
|
||||
)
|
||||
|
||||
// parseWindow reads the ?window= query (Go duration string, e.g. "1h", "30m")
|
||||
// and returns a bounded duration.
|
||||
func parseWindow(r *http.Request) time.Duration {
|
||||
raw := r.URL.Query().Get("window")
|
||||
if raw == "" {
|
||||
return defaultHistoryWindow
|
||||
}
|
||||
d, err := time.ParseDuration(raw)
|
||||
if err != nil || d <= 0 {
|
||||
return defaultHistoryWindow
|
||||
}
|
||||
if d > maxHistoryWindow {
|
||||
return maxHistoryWindow
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// sinceTimestamp converts a duration into a Unix-seconds cutoff.
|
||||
func sinceTimestamp(window time.Duration) int64 {
|
||||
return time.Now().UTC().Add(-window).Unix()
|
||||
}
|
||||
|
||||
// getSystemStats handles GET /api/system/stats — current host snapshot.
|
||||
// When the Docker daemon is unreachable (e.g. Docker Desktop stopped) the
|
||||
// handler returns 503 so the frontend can show a dedicated unavailable
|
||||
// state instead of treating it as a generic 5xx failure.
|
||||
func (s *Server) getSystemStats(w http.ResponseWriter, r *http.Request) {
|
||||
if s.docker == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
}
|
||||
sys, err := s.docker.GetSystemStats(r.Context())
|
||||
if err != nil {
|
||||
slog.Warn("system stats unavailable", "error", err)
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, sys)
|
||||
}
|
||||
|
||||
// getSystemStatsHistory handles GET /api/system/stats/history?window=1h.
|
||||
func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
samples, err := s.store.ListSystemStatsSamples(sinceTimestamp(parseWindow(r)))
|
||||
if err != nil {
|
||||
slog.Error("failed to list system stats samples", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
if samples == nil {
|
||||
samples = []store.SystemStatsSample{}
|
||||
}
|
||||
respondJSON(w, http.StatusOK, samples)
|
||||
}
|
||||
|
||||
// getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history.
|
||||
func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
if _, err := s.store.GetInstanceByID(instanceID); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get instance")
|
||||
return
|
||||
}
|
||||
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r)))
|
||||
if err != nil {
|
||||
slog.Error("failed to list instance stats samples", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
if samples == nil {
|
||||
samples = []store.ContainerStatsSample{}
|
||||
}
|
||||
respondJSON(w, http.StatusOK, samples)
|
||||
}
|
||||
|
||||
// getStaticSiteStats handles GET /api/sites/{id}/stats — current snapshot.
|
||||
func (s *Server) getStaticSiteStats(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
site, err := s.store.GetStaticSiteByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "site")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get site", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site")
|
||||
return
|
||||
}
|
||||
if site.ContainerID == "" {
|
||||
respondError(w, http.StatusConflict, "site has no container")
|
||||
return
|
||||
}
|
||||
if s.docker == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
}
|
||||
cs, err := s.docker.GetContainerStats(r.Context(), site.ContainerID)
|
||||
if err != nil {
|
||||
slog.Error("failed to get site stats", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site stats")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, cs)
|
||||
}
|
||||
|
||||
// getStaticSiteStatsHistory handles GET /api/sites/{id}/stats/history.
|
||||
func (s *Server) getStaticSiteStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if _, err := s.store.GetStaticSiteByID(id); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "site")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get site", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site")
|
||||
return
|
||||
}
|
||||
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeSite, id, sinceTimestamp(parseWindow(r)))
|
||||
if err != nil {
|
||||
slog.Error("failed to list site stats samples", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
if samples == nil {
|
||||
samples = []store.ContainerStatsSample{}
|
||||
}
|
||||
respondJSON(w, http.StatusOK, samples)
|
||||
}
|
||||
|
||||
// streamStaticSiteLogs handles GET /api/sites/{id}/logs?tail=200&follow=true.
|
||||
// Reuses the shared container log streamer so the SSE + multiplex handling
|
||||
// matches /api/projects/.../instances/.../logs exactly.
|
||||
func (s *Server) streamStaticSiteLogs(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
site, err := s.store.GetStaticSiteByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "site")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get site", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site")
|
||||
return
|
||||
}
|
||||
if site.ContainerID == "" {
|
||||
respondError(w, http.StatusConflict, "site has no container")
|
||||
return
|
||||
}
|
||||
s.streamLogsForContainer(w, r, site.ContainerID)
|
||||
}
|
||||
|
||||
// listTopContainersByCPU handles GET /api/system/stats/top?limit=5&by=cpu.
|
||||
// Returns the top-N most recent samples across containers, sorted by CPU or
|
||||
// memory. Useful for a system dashboard "top consumers" widget without
|
||||
// requiring the frontend to aggregate per-container history on its own.
|
||||
func (s *Server) listTopContainersByCPU(w http.ResponseWriter, r *http.Request) {
|
||||
limit := 5
|
||||
if raw := r.URL.Query().Get("limit"); raw != "" {
|
||||
if n, err := strconv.Atoi(raw); err == nil && n > 0 && n <= 50 {
|
||||
limit = n
|
||||
}
|
||||
}
|
||||
by := r.URL.Query().Get("by")
|
||||
if by != "memory" {
|
||||
by = "cpu"
|
||||
}
|
||||
|
||||
// Samples from the last 2 minutes window so "top" reflects near-current
|
||||
// load, not long-dead rows.
|
||||
samples, err := s.store.ListAllRecentContainerStatsSamples(sinceTimestamp(2 * time.Minute))
|
||||
if err != nil {
|
||||
slog.Error("failed to list container samples for top", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
|
||||
// Keep only the latest sample per container.
|
||||
latest := make(map[string]store.ContainerStatsSample, len(samples))
|
||||
for _, sm := range samples {
|
||||
if prev, ok := latest[sm.ContainerID]; !ok || sm.TS > prev.TS {
|
||||
latest[sm.ContainerID] = sm
|
||||
}
|
||||
}
|
||||
|
||||
top := make([]store.ContainerStatsSample, 0, len(latest))
|
||||
for _, sm := range latest {
|
||||
top = append(top, sm)
|
||||
}
|
||||
|
||||
// Partial-sort by the requested metric, descending. For small N a simple
|
||||
// insertion-like approach is plenty.
|
||||
sortContainerSamples(top, by)
|
||||
if len(top) > limit {
|
||||
top = top[:limit]
|
||||
}
|
||||
respondJSON(w, http.StatusOK, top)
|
||||
}
|
||||
|
||||
// sortContainerSamples sorts in place by CPU (or memory) descending.
|
||||
// Note: ListContainerStatsSamples with empty ownerID returns no rows — the
|
||||
// caller uses per-owner-type queries and merges; this helper is applied to
|
||||
// the already-merged slice.
|
||||
func sortContainerSamples(s []store.ContainerStatsSample, by string) {
|
||||
// O(n^2) is fine — N is small (bounded by the number of containers).
|
||||
for i := 1; i < len(s); i++ {
|
||||
for j := i; j > 0; j-- {
|
||||
var less bool
|
||||
if by == "memory" {
|
||||
less = s[j].MemoryUsage > s[j-1].MemoryUsage
|
||||
} else {
|
||||
less = s[j].CPUPercent > s[j-1].CPUPercent
|
||||
}
|
||||
if !less {
|
||||
break
|
||||
}
|
||||
s[j-1], s[j] = s[j], s[j-1]
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user