feat(stats): resource metrics dashboard + sites logs/stats
Build / build (push) Successful in 10m50s
Build / build (push) Successful in 10m50s
Background collector samples CPU/memory/network/block I/O for every
instance and site on a configurable interval (default 15s, range
5-300s), persists samples to SQLite with a configurable retention
window (default 2h, range 0-24h), and skips ticks gracefully when
the Docker daemon is unreachable. Settings are reloadable without
a restart — each tick re-reads them.
New API endpoints:
- GET /api/system/stats (host snapshot: info + df)
- GET /api/system/stats/history
- GET /api/system/stats/top?by=cpu|memory
- GET /api/projects/{id}/stages/{s}/instances/{iid}/stats/history
- GET /api/sites/{id}/stats[/history]
- GET /api/sites/{id}/logs (SSE + JSON, reuses instance log streamer)
Frontend:
- ECharts added with tree-shaken imports (~180KB gzip) for
future-proof time-series/gantt/graph visualizations
- CollapsibleSection wraps all dashboard sections (system health,
daemons, system resources, static sites, projects) with
localStorage-persisted open state
- SystemResourcesCard shows capacity tiles, workload utilization
chart with 30m/2h/6h/24h window picker, disk breakdown with
reclaimable callouts, and top 5 consumers
- ContainerStats and ContainerLogs take a source discriminated union
so sites reuse the same components as instances; sites detail page
embeds both for Deno backend debugging
- Settings › Maintenance exposes collection interval + retention
- Docker-unavailable state returns 503 and renders an amber banner
instead of a generic 500
Full i18n coverage (en + ru) for all new strings.
This commit is contained in:
@@ -68,6 +68,13 @@ func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
s.streamLogsForContainer(w, r, inst.ContainerID)
|
||||
}
|
||||
|
||||
// streamLogsForContainer streams logs for an arbitrary container ID using the
|
||||
// shared SSE/JSON dual-mode pattern. Owner-specific handlers (instance, site)
|
||||
// should validate ownership and then delegate here.
|
||||
func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request, containerID string) {
|
||||
if s.docker == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
@@ -83,9 +90,9 @@ func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
|
||||
accept := r.Header.Get("Accept")
|
||||
isSSE := strings.Contains(accept, "text/event-stream")
|
||||
|
||||
logReader, err := s.docker.ContainerLogs(r.Context(), inst.ContainerID, follow && isSSE, tail)
|
||||
logReader, err := s.docker.ContainerLogs(r.Context(), containerID, follow && isSSE, tail)
|
||||
if err != nil {
|
||||
slog.Error("failed to get container logs", "instance", instanceID, "error", err)
|
||||
slog.Error("failed to get container logs", "container", containerID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get container logs")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -219,6 +219,7 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/stages/{stage}/env", s.listStageEnv)
|
||||
r.Get("/stages/{stage}/instances", s.listInstances)
|
||||
r.Get("/stages/{stage}/instances/{iid}/stats", s.getInstanceStats)
|
||||
r.Get("/stages/{stage}/instances/{iid}/stats/history", s.getInstanceStatsHistory)
|
||||
r.Get("/stages/{stage}/instances/{iid}/logs", s.streamContainerLogs)
|
||||
r.Get("/images", s.listProjectImages)
|
||||
r.Get("/volumes", s.listVolumes)
|
||||
@@ -288,6 +289,9 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/", s.getStaticSite)
|
||||
r.Get("/secrets", s.listStaticSiteSecrets)
|
||||
r.Get("/storage", s.getStaticSiteStorage)
|
||||
r.Get("/logs", s.streamStaticSiteLogs)
|
||||
r.Get("/stats", s.getStaticSiteStats)
|
||||
r.Get("/stats/history", s.getStaticSiteStatsHistory)
|
||||
|
||||
// Admin-only mutations.
|
||||
r.Group(func(r chi.Router) {
|
||||
@@ -333,6 +337,11 @@ func (s *Server) Router() chi.Router {
|
||||
// Stale container endpoints (read).
|
||||
r.Get("/containers/stale", s.listStaleContainers)
|
||||
|
||||
// System resources (read-only).
|
||||
r.Get("/system/stats", s.getSystemStats)
|
||||
r.Get("/system/stats/history", s.getSystemStatsHistory)
|
||||
r.Get("/system/stats/top", s.listTopContainersByCPU)
|
||||
|
||||
// Admin-only routes: require admin role.
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
|
||||
@@ -46,6 +46,8 @@ type settingsRequest struct {
|
||||
BackupEnabled *bool `json:"backup_enabled,omitempty"`
|
||||
BackupIntervalHours *int `json:"backup_interval_hours,omitempty"`
|
||||
BackupRetentionCount *int `json:"backup_retention_count,omitempty"`
|
||||
StatsIntervalSeconds *int `json:"stats_interval_seconds,omitempty"`
|
||||
StatsRetentionHours *int `json:"stats_retention_hours,omitempty"`
|
||||
}
|
||||
|
||||
// getSettings handles GET /api/settings.
|
||||
@@ -86,6 +88,8 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) {
|
||||
"backup_enabled": settings.BackupEnabled,
|
||||
"backup_interval_hours": settings.BackupIntervalHours,
|
||||
"backup_retention_count": settings.BackupRetentionCount,
|
||||
"stats_interval_seconds": settings.StatsIntervalSeconds,
|
||||
"stats_retention_hours": settings.StatsRetentionHours,
|
||||
"updated_at": settings.UpdatedAt,
|
||||
})
|
||||
}
|
||||
@@ -238,6 +242,22 @@ func (s *Server) updateSettings(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
updated.BackupRetentionCount = *req.BackupRetentionCount
|
||||
}
|
||||
if req.StatsIntervalSeconds != nil {
|
||||
v := *req.StatsIntervalSeconds
|
||||
if v != 0 && (v < 5 || v > 300) {
|
||||
respondError(w, http.StatusBadRequest, "stats_interval_seconds must be 0 (disabled) or between 5 and 300")
|
||||
return
|
||||
}
|
||||
updated.StatsIntervalSeconds = v
|
||||
}
|
||||
if req.StatsRetentionHours != nil {
|
||||
v := *req.StatsRetentionHours
|
||||
if v < 0 || v > 24 {
|
||||
respondError(w, http.StatusBadRequest, "stats_retention_hours must be between 0 and 24")
|
||||
return
|
||||
}
|
||||
updated.StatsRetentionHours = v
|
||||
}
|
||||
|
||||
if err := s.store.UpdateSettings(updated); err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "failed to update settings: "+err.Error())
|
||||
|
||||
@@ -0,0 +1,245 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/stats"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
const (
|
||||
// defaultHistoryWindow is used when no ?window= param is provided or the
|
||||
// value fails to parse. Matches the default retention so the "last 2h"
|
||||
// view always has data when collection is enabled.
|
||||
defaultHistoryWindow = 2 * time.Hour
|
||||
maxHistoryWindow = 24 * time.Hour
|
||||
)
|
||||
|
||||
// parseWindow reads the ?window= query (Go duration string, e.g. "1h", "30m")
|
||||
// and returns a bounded duration.
|
||||
func parseWindow(r *http.Request) time.Duration {
|
||||
raw := r.URL.Query().Get("window")
|
||||
if raw == "" {
|
||||
return defaultHistoryWindow
|
||||
}
|
||||
d, err := time.ParseDuration(raw)
|
||||
if err != nil || d <= 0 {
|
||||
return defaultHistoryWindow
|
||||
}
|
||||
if d > maxHistoryWindow {
|
||||
return maxHistoryWindow
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// sinceTimestamp converts a duration into a Unix-seconds cutoff.
|
||||
func sinceTimestamp(window time.Duration) int64 {
|
||||
return time.Now().UTC().Add(-window).Unix()
|
||||
}
|
||||
|
||||
// getSystemStats handles GET /api/system/stats — current host snapshot.
|
||||
// When the Docker daemon is unreachable (e.g. Docker Desktop stopped) the
|
||||
// handler returns 503 so the frontend can show a dedicated unavailable
|
||||
// state instead of treating it as a generic 5xx failure.
|
||||
func (s *Server) getSystemStats(w http.ResponseWriter, r *http.Request) {
|
||||
if s.docker == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
}
|
||||
sys, err := s.docker.GetSystemStats(r.Context())
|
||||
if err != nil {
|
||||
slog.Warn("system stats unavailable", "error", err)
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, sys)
|
||||
}
|
||||
|
||||
// getSystemStatsHistory handles GET /api/system/stats/history?window=1h.
|
||||
func (s *Server) getSystemStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
samples, err := s.store.ListSystemStatsSamples(sinceTimestamp(parseWindow(r)))
|
||||
if err != nil {
|
||||
slog.Error("failed to list system stats samples", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
if samples == nil {
|
||||
samples = []store.SystemStatsSample{}
|
||||
}
|
||||
respondJSON(w, http.StatusOK, samples)
|
||||
}
|
||||
|
||||
// getInstanceStatsHistory handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats/history.
|
||||
func (s *Server) getInstanceStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
if _, err := s.store.GetInstanceByID(instanceID); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get instance", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get instance")
|
||||
return
|
||||
}
|
||||
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeInstance, instanceID, sinceTimestamp(parseWindow(r)))
|
||||
if err != nil {
|
||||
slog.Error("failed to list instance stats samples", "instance_id", instanceID, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
if samples == nil {
|
||||
samples = []store.ContainerStatsSample{}
|
||||
}
|
||||
respondJSON(w, http.StatusOK, samples)
|
||||
}
|
||||
|
||||
// getStaticSiteStats handles GET /api/sites/{id}/stats — current snapshot.
|
||||
func (s *Server) getStaticSiteStats(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
site, err := s.store.GetStaticSiteByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "site")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get site", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site")
|
||||
return
|
||||
}
|
||||
if site.ContainerID == "" {
|
||||
respondError(w, http.StatusConflict, "site has no container")
|
||||
return
|
||||
}
|
||||
if s.docker == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||
return
|
||||
}
|
||||
cs, err := s.docker.GetContainerStats(r.Context(), site.ContainerID)
|
||||
if err != nil {
|
||||
slog.Error("failed to get site stats", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site stats")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, cs)
|
||||
}
|
||||
|
||||
// getStaticSiteStatsHistory handles GET /api/sites/{id}/stats/history.
|
||||
func (s *Server) getStaticSiteStatsHistory(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if _, err := s.store.GetStaticSiteByID(id); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "site")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get site", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site")
|
||||
return
|
||||
}
|
||||
samples, err := s.store.ListContainerStatsSamples(stats.OwnerTypeSite, id, sinceTimestamp(parseWindow(r)))
|
||||
if err != nil {
|
||||
slog.Error("failed to list site stats samples", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
if samples == nil {
|
||||
samples = []store.ContainerStatsSample{}
|
||||
}
|
||||
respondJSON(w, http.StatusOK, samples)
|
||||
}
|
||||
|
||||
// streamStaticSiteLogs handles GET /api/sites/{id}/logs?tail=200&follow=true.
|
||||
// Reuses the shared container log streamer so the SSE + multiplex handling
|
||||
// matches /api/projects/.../instances/.../logs exactly.
|
||||
func (s *Server) streamStaticSiteLogs(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
site, err := s.store.GetStaticSiteByID(id)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "site")
|
||||
return
|
||||
}
|
||||
slog.Error("failed to get site", "site_id", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to get site")
|
||||
return
|
||||
}
|
||||
if site.ContainerID == "" {
|
||||
respondError(w, http.StatusConflict, "site has no container")
|
||||
return
|
||||
}
|
||||
s.streamLogsForContainer(w, r, site.ContainerID)
|
||||
}
|
||||
|
||||
// listTopContainersByCPU handles GET /api/system/stats/top?limit=5&by=cpu.
|
||||
// Returns the top-N most recent samples across containers, sorted by CPU or
|
||||
// memory. Useful for a system dashboard "top consumers" widget without
|
||||
// requiring the frontend to aggregate per-container history on its own.
|
||||
func (s *Server) listTopContainersByCPU(w http.ResponseWriter, r *http.Request) {
|
||||
limit := 5
|
||||
if raw := r.URL.Query().Get("limit"); raw != "" {
|
||||
if n, err := strconv.Atoi(raw); err == nil && n > 0 && n <= 50 {
|
||||
limit = n
|
||||
}
|
||||
}
|
||||
by := r.URL.Query().Get("by")
|
||||
if by != "memory" {
|
||||
by = "cpu"
|
||||
}
|
||||
|
||||
// Samples from the last 2 minutes window so "top" reflects near-current
|
||||
// load, not long-dead rows.
|
||||
samples, err := s.store.ListAllRecentContainerStatsSamples(sinceTimestamp(2 * time.Minute))
|
||||
if err != nil {
|
||||
slog.Error("failed to list container samples for top", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "failed to list samples")
|
||||
return
|
||||
}
|
||||
|
||||
// Keep only the latest sample per container.
|
||||
latest := make(map[string]store.ContainerStatsSample, len(samples))
|
||||
for _, sm := range samples {
|
||||
if prev, ok := latest[sm.ContainerID]; !ok || sm.TS > prev.TS {
|
||||
latest[sm.ContainerID] = sm
|
||||
}
|
||||
}
|
||||
|
||||
top := make([]store.ContainerStatsSample, 0, len(latest))
|
||||
for _, sm := range latest {
|
||||
top = append(top, sm)
|
||||
}
|
||||
|
||||
// Partial-sort by the requested metric, descending. For small N a simple
|
||||
// insertion-like approach is plenty.
|
||||
sortContainerSamples(top, by)
|
||||
if len(top) > limit {
|
||||
top = top[:limit]
|
||||
}
|
||||
respondJSON(w, http.StatusOK, top)
|
||||
}
|
||||
|
||||
// sortContainerSamples sorts in place by CPU (or memory) descending.
|
||||
// Note: ListContainerStatsSamples with empty ownerID returns no rows — the
|
||||
// caller uses per-owner-type queries and merges; this helper is applied to
|
||||
// the already-merged slice.
|
||||
func sortContainerSamples(s []store.ContainerStatsSample, by string) {
|
||||
// O(n^2) is fine — N is small (bounded by the number of containers).
|
||||
for i := 1; i < len(s); i++ {
|
||||
for j := i; j > 0; j-- {
|
||||
var less bool
|
||||
if by == "memory" {
|
||||
less = s[j].MemoryUsage > s[j-1].MemoryUsage
|
||||
} else {
|
||||
less = s[j].CPUPercent > s[j-1].CPUPercent
|
||||
}
|
||||
if !less {
|
||||
break
|
||||
}
|
||||
s[j-1], s[j] = s[j], s[j-1]
|
||||
}
|
||||
}
|
||||
}
|
||||
+58
-10
@@ -4,21 +4,30 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/moby/moby/api/types/container"
|
||||
"github.com/moby/moby/client"
|
||||
)
|
||||
|
||||
// ContainerStats holds computed CPU and memory usage for a container.
|
||||
// ContainerStats holds computed CPU, memory, network, and block I/O
|
||||
// usage for a container. Network and block I/O values are cumulative
|
||||
// byte counters — compute rates by differencing two samples.
|
||||
type ContainerStats struct {
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryUsage int64 `json:"memory_usage"`
|
||||
MemoryLimit int64 `json:"memory_limit"`
|
||||
MemoryPercent float64 `json:"memory_percent"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryUsage int64 `json:"memory_usage"`
|
||||
MemoryLimit int64 `json:"memory_limit"`
|
||||
MemoryPercent float64 `json:"memory_percent"`
|
||||
NetworkRxBytes int64 `json:"network_rx_bytes"`
|
||||
NetworkTxBytes int64 `json:"network_tx_bytes"`
|
||||
BlockReadBytes int64 `json:"block_read_bytes"`
|
||||
BlockWriteBytes int64 `json:"block_write_bytes"`
|
||||
}
|
||||
|
||||
// GetContainerStats retrieves a one-shot stats snapshot for the given container
|
||||
// and computes CPU and memory percentages.
|
||||
// and computes CPU, memory, network, and block I/O metrics.
|
||||
func (c *Client) GetContainerStats(ctx context.Context, containerID string) (ContainerStats, error) {
|
||||
result, err := c.api.ContainerStats(ctx, containerID, client.ContainerStatsOptions{
|
||||
Stream: false,
|
||||
@@ -42,14 +51,53 @@ func (c *Client) GetContainerStats(ctx context.Context, containerID string) (Con
|
||||
memPercent = float64(memUsage) / float64(memLimit) * 100.0
|
||||
}
|
||||
|
||||
rxBytes, txBytes := sumNetworkBytes(stats.Networks)
|
||||
readBytes, writeBytes := sumBlockIOBytes(stats.BlkioStats.IoServiceBytesRecursive)
|
||||
|
||||
ts := stats.Read
|
||||
if ts.IsZero() {
|
||||
ts = time.Now().UTC()
|
||||
}
|
||||
|
||||
return ContainerStats{
|
||||
CPUPercent: cpuPercent,
|
||||
MemoryUsage: memUsage,
|
||||
MemoryLimit: memLimit,
|
||||
MemoryPercent: memPercent,
|
||||
Timestamp: ts,
|
||||
CPUPercent: cpuPercent,
|
||||
MemoryUsage: memUsage,
|
||||
MemoryLimit: memLimit,
|
||||
MemoryPercent: memPercent,
|
||||
NetworkRxBytes: rxBytes,
|
||||
NetworkTxBytes: txBytes,
|
||||
BlockReadBytes: readBytes,
|
||||
BlockWriteBytes: writeBytes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// sumNetworkBytes aggregates rx/tx byte counters across all network interfaces
|
||||
// for a single container. Missing Networks map (disabled networking) yields zeros.
|
||||
func sumNetworkBytes(nets map[string]container.NetworkStats) (rx, tx int64) {
|
||||
for _, n := range nets {
|
||||
rx += int64(n.RxBytes)
|
||||
tx += int64(n.TxBytes)
|
||||
}
|
||||
return rx, tx
|
||||
}
|
||||
|
||||
// sumBlockIOBytes totals read/write bytes across all block devices from the
|
||||
// cgroup io_service_bytes_recursive entries. The "Op" field is "read"/"write"
|
||||
// on cgroup v2 and "Read"/"Write" on cgroup v1 — match case-insensitively so
|
||||
// either runtime works.
|
||||
func sumBlockIOBytes(entries []container.BlkioStatEntry) (read, write int64) {
|
||||
for _, e := range entries {
|
||||
switch {
|
||||
case strings.EqualFold(e.Op, "read"):
|
||||
read += int64(e.Value)
|
||||
case strings.EqualFold(e.Op, "write"):
|
||||
write += int64(e.Value)
|
||||
}
|
||||
}
|
||||
return read, write
|
||||
}
|
||||
|
||||
// calculateCPUPercent computes CPU usage percentage from a stats response
|
||||
// using the delta between current and previous CPU readings.
|
||||
func calculateCPUPercent(stats container.StatsResponse) float64 {
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
package docker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/moby/moby/client"
|
||||
)
|
||||
|
||||
// SystemStats is a host-level snapshot combining daemon capacity
|
||||
// (NCPU, memory total) with container counts and disk usage broken down
|
||||
// by category. Workload CPU/memory utilization is aggregated from
|
||||
// per-container samples by the stats collector, not here.
|
||||
type SystemStats struct {
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
|
||||
// Capacity from Docker daemon.
|
||||
NCPU int `json:"ncpu"`
|
||||
MemoryTotal int64 `json:"memory_total"`
|
||||
|
||||
// Container/image counts.
|
||||
Containers int `json:"containers"`
|
||||
Running int `json:"running"`
|
||||
Paused int `json:"paused"`
|
||||
Stopped int `json:"stopped"`
|
||||
Images int `json:"images"`
|
||||
|
||||
// Disk usage by category (bytes).
|
||||
DiskImagesBytes int64 `json:"disk_images_bytes"`
|
||||
DiskContainersBytes int64 `json:"disk_containers_bytes"`
|
||||
DiskVolumesBytes int64 `json:"disk_volumes_bytes"`
|
||||
DiskBuildCacheBytes int64 `json:"disk_build_cache_bytes"`
|
||||
|
||||
// Reclaimable disk space by category (bytes).
|
||||
DiskImagesReclaimable int64 `json:"disk_images_reclaimable"`
|
||||
DiskContainersReclaimable int64 `json:"disk_containers_reclaimable"`
|
||||
DiskVolumesReclaimable int64 `json:"disk_volumes_reclaimable"`
|
||||
DiskBuildCacheReclaimable int64 `json:"disk_build_cache_reclaimable"`
|
||||
|
||||
// DiskTotalBytes is the sum of the category totals.
|
||||
DiskTotalBytes int64 `json:"disk_total_bytes"`
|
||||
}
|
||||
|
||||
// GetSystemStats returns a one-shot host-level snapshot. The Info() call
|
||||
// and disk usage call are made in sequence. Disk usage failures do not
|
||||
// fail the whole call — the result degrades gracefully with zero disk fields.
|
||||
func (c *Client) GetSystemStats(ctx context.Context) (SystemStats, error) {
|
||||
info, err := c.Info(ctx)
|
||||
if err != nil {
|
||||
return SystemStats{}, fmt.Errorf("system stats: %w", err)
|
||||
}
|
||||
|
||||
stats := SystemStats{
|
||||
Timestamp: time.Now().UTC(),
|
||||
NCPU: info.NCPU,
|
||||
MemoryTotal: info.MemoryTotal,
|
||||
Containers: info.Containers,
|
||||
Running: info.Running,
|
||||
Paused: info.Paused,
|
||||
Stopped: info.Stopped,
|
||||
Images: info.Images,
|
||||
}
|
||||
|
||||
du, derr := c.api.DiskUsage(ctx, client.DiskUsageOptions{
|
||||
Containers: true,
|
||||
Images: true,
|
||||
Volumes: true,
|
||||
BuildCache: true,
|
||||
})
|
||||
if derr == nil {
|
||||
stats.DiskImagesBytes = du.Images.TotalSize
|
||||
stats.DiskContainersBytes = du.Containers.TotalSize
|
||||
stats.DiskVolumesBytes = du.Volumes.TotalSize
|
||||
stats.DiskBuildCacheBytes = du.BuildCache.TotalSize
|
||||
stats.DiskImagesReclaimable = du.Images.Reclaimable
|
||||
stats.DiskContainersReclaimable = du.Containers.Reclaimable
|
||||
stats.DiskVolumesReclaimable = du.Volumes.Reclaimable
|
||||
stats.DiskBuildCacheReclaimable = du.BuildCache.Reclaimable
|
||||
stats.DiskTotalBytes = stats.DiskImagesBytes +
|
||||
stats.DiskContainersBytes +
|
||||
stats.DiskVolumesBytes +
|
||||
stats.DiskBuildCacheBytes
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
@@ -0,0 +1,309 @@
|
||||
// Package stats implements a background goroutine that periodically samples
|
||||
// Docker container and host-level resource usage and persists the samples
|
||||
// into SQLite. It reads its interval and retention from settings on every
|
||||
// tick so configuration changes take effect without a restart.
|
||||
package stats
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/docker"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// Defaults applied when settings values are outside their valid range.
|
||||
const (
|
||||
DefaultIntervalSeconds = 15
|
||||
DefaultRetentionHours = 2
|
||||
MinIntervalSeconds = 5
|
||||
MaxIntervalSeconds = 300
|
||||
// Hard cap on parallel container stat requests to avoid overwhelming
|
||||
// the Docker daemon when the user has many containers.
|
||||
maxParallelSamples = 8
|
||||
)
|
||||
|
||||
// OwnerType values for ContainerStatsSample.OwnerType.
|
||||
const (
|
||||
OwnerTypeInstance = "instance"
|
||||
OwnerTypeSite = "site"
|
||||
)
|
||||
|
||||
// Collector runs the background sampling loop.
|
||||
type Collector struct {
|
||||
store *store.Store
|
||||
docker *docker.Client
|
||||
|
||||
stopOnce sync.Once
|
||||
stop chan struct{}
|
||||
done chan struct{}
|
||||
}
|
||||
|
||||
// New creates a new stats collector. Call Start to begin sampling.
|
||||
func New(s *store.Store, d *docker.Client) *Collector {
|
||||
return &Collector{
|
||||
store: s,
|
||||
docker: d,
|
||||
stop: make(chan struct{}),
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Start launches the background loop. Returns immediately. The loop exits
|
||||
// when Stop is called.
|
||||
func (c *Collector) Start() {
|
||||
go c.run()
|
||||
}
|
||||
|
||||
// Stop signals the collector to exit and blocks until it has finished the
|
||||
// in-flight tick.
|
||||
func (c *Collector) Stop() {
|
||||
c.stopOnce.Do(func() { close(c.stop) })
|
||||
<-c.done
|
||||
}
|
||||
|
||||
// run is the main loop. It reads the interval from settings on every tick,
|
||||
// which lets configuration changes propagate within one tick without a
|
||||
// dedicated reload mechanism.
|
||||
func (c *Collector) run() {
|
||||
defer close(c.done)
|
||||
|
||||
// Wait a few seconds before the first sample so the app has settled.
|
||||
select {
|
||||
case <-time.After(3 * time.Second):
|
||||
case <-c.stop:
|
||||
return
|
||||
}
|
||||
|
||||
for {
|
||||
interval, retention := c.readConfig()
|
||||
if interval == 0 || retention == 0 {
|
||||
// Collection disabled. Poll settings every minute in case the
|
||||
// user re-enables it.
|
||||
select {
|
||||
case <-time.After(time.Minute):
|
||||
continue
|
||||
case <-c.stop:
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
c.tick(retention)
|
||||
|
||||
select {
|
||||
case <-time.After(time.Duration(interval) * time.Second):
|
||||
case <-c.stop:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readConfig reads the current interval + retention from settings, applying
|
||||
// defaults and clamping to the valid range.
|
||||
func (c *Collector) readConfig() (intervalSeconds, retentionHours int) {
|
||||
settings, err := c.store.GetSettings()
|
||||
if err != nil {
|
||||
slog.Warn("stats collector: failed to read settings — using defaults", "error", err)
|
||||
return DefaultIntervalSeconds, DefaultRetentionHours
|
||||
}
|
||||
intervalSeconds = settings.StatsIntervalSeconds
|
||||
retentionHours = settings.StatsRetentionHours
|
||||
if intervalSeconds < 0 || retentionHours < 0 {
|
||||
return 0, 0
|
||||
}
|
||||
if intervalSeconds > 0 && intervalSeconds < MinIntervalSeconds {
|
||||
intervalSeconds = MinIntervalSeconds
|
||||
}
|
||||
if intervalSeconds > MaxIntervalSeconds {
|
||||
intervalSeconds = MaxIntervalSeconds
|
||||
}
|
||||
return intervalSeconds, retentionHours
|
||||
}
|
||||
|
||||
// tick samples all known containers, aggregates workload-level totals,
|
||||
// persists samples, and prunes rows beyond the retention window. When
|
||||
// the Docker daemon is unreachable the whole tick is skipped with a
|
||||
// single debug log instead of one warning per container.
|
||||
func (c *Collector) tick(retentionHours int) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
pingCtx, pingCancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer pingCancel()
|
||||
if err := c.docker.Ping(pingCtx); err != nil {
|
||||
slog.Debug("stats collector: docker unreachable, skipping tick", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
targets := c.buildTargets()
|
||||
if len(targets) == 0 {
|
||||
// No containers to sample, but still record a system sample so the
|
||||
// host history isn't empty.
|
||||
c.recordSystemSample(ctx, 0, 0, 0)
|
||||
c.prune(retentionHours)
|
||||
return
|
||||
}
|
||||
|
||||
samples := c.sampleAll(ctx, targets)
|
||||
|
||||
var (
|
||||
totalCPU float64
|
||||
totalMem int64
|
||||
running int
|
||||
)
|
||||
for _, s := range samples {
|
||||
if err := c.store.InsertContainerStatsSample(s); err != nil {
|
||||
slog.Warn("stats collector: insert container sample",
|
||||
"container", s.ContainerID, "error", err)
|
||||
continue
|
||||
}
|
||||
totalCPU += s.CPUPercent
|
||||
totalMem += s.MemoryUsage
|
||||
running++
|
||||
}
|
||||
|
||||
c.recordSystemSample(ctx, totalCPU, totalMem, running)
|
||||
c.prune(retentionHours)
|
||||
}
|
||||
|
||||
// target describes a single container to sample.
|
||||
type target struct {
|
||||
ContainerID string
|
||||
OwnerType string
|
||||
OwnerID string
|
||||
}
|
||||
|
||||
// buildTargets fetches running instances and sites that have a container ID.
|
||||
func (c *Collector) buildTargets() []target {
|
||||
var out []target
|
||||
|
||||
instances, err := c.store.ListAllInstances()
|
||||
if err != nil {
|
||||
slog.Warn("stats collector: list instances", "error", err)
|
||||
} else {
|
||||
for _, inst := range instances {
|
||||
if inst.ContainerID == "" {
|
||||
continue
|
||||
}
|
||||
out = append(out, target{
|
||||
ContainerID: inst.ContainerID,
|
||||
OwnerType: OwnerTypeInstance,
|
||||
OwnerID: inst.ID,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
sites, err := c.store.GetAllStaticSites()
|
||||
if err != nil {
|
||||
slog.Warn("stats collector: list sites", "error", err)
|
||||
} else {
|
||||
for _, site := range sites {
|
||||
if site.ContainerID == "" {
|
||||
continue
|
||||
}
|
||||
out = append(out, target{
|
||||
ContainerID: site.ContainerID,
|
||||
OwnerType: OwnerTypeSite,
|
||||
OwnerID: site.ID,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// sampleAll fetches Docker stats for every target in bounded parallelism.
|
||||
// Failed samples are logged and skipped — a missing container must not kill
|
||||
// the whole tick.
|
||||
func (c *Collector) sampleAll(ctx context.Context, targets []target) []store.ContainerStatsSample {
|
||||
sem := make(chan struct{}, maxParallelSamples)
|
||||
results := make([]store.ContainerStatsSample, len(targets))
|
||||
found := make([]bool, len(targets))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i, t := range targets {
|
||||
wg.Add(1)
|
||||
go func(i int, t target) {
|
||||
defer wg.Done()
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
|
||||
sampleCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
stats, err := c.docker.GetContainerStats(sampleCtx, t.ContainerID)
|
||||
if err != nil {
|
||||
slog.Debug("stats collector: get container stats",
|
||||
"container", t.ContainerID, "owner_type", t.OwnerType, "error", err)
|
||||
return
|
||||
}
|
||||
ts := stats.Timestamp.Unix()
|
||||
if ts <= 0 {
|
||||
ts = time.Now().UTC().Unix()
|
||||
}
|
||||
results[i] = store.ContainerStatsSample{
|
||||
ContainerID: t.ContainerID,
|
||||
OwnerType: t.OwnerType,
|
||||
OwnerID: t.OwnerID,
|
||||
TS: ts,
|
||||
CPUPercent: stats.CPUPercent,
|
||||
MemoryUsage: stats.MemoryUsage,
|
||||
MemoryLimit: stats.MemoryLimit,
|
||||
NetworkRxBytes: stats.NetworkRxBytes,
|
||||
NetworkTxBytes: stats.NetworkTxBytes,
|
||||
BlockReadBytes: stats.BlockReadBytes,
|
||||
BlockWriteBytes: stats.BlockWriteBytes,
|
||||
}
|
||||
found[i] = true
|
||||
}(i, t)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
out := results[:0]
|
||||
for i := range results {
|
||||
if found[i] {
|
||||
out = append(out, results[i])
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// recordSystemSample fetches host info + disk usage and persists a combined
|
||||
// system-level sample. Failures are warned but do not propagate.
|
||||
func (c *Collector) recordSystemSample(ctx context.Context, workloadCPU float64, workloadMem int64, running int) {
|
||||
sysStats, err := c.docker.GetSystemStats(ctx)
|
||||
if err != nil {
|
||||
slog.Warn("stats collector: get system stats", "error", err)
|
||||
return
|
||||
}
|
||||
sample := store.SystemStatsSample{
|
||||
TS: sysStats.Timestamp.Unix(),
|
||||
NCPU: sysStats.NCPU,
|
||||
MemoryTotal: sysStats.MemoryTotal,
|
||||
WorkloadCPUPercent: workloadCPU,
|
||||
WorkloadMemUsage: workloadMem,
|
||||
ContainersRunning: running,
|
||||
DiskTotalBytes: sysStats.DiskTotalBytes,
|
||||
}
|
||||
// Prefer the Docker-reported running count when we have no running samples
|
||||
// (e.g., very first tick may race with container readiness).
|
||||
if running == 0 && sysStats.Running > 0 {
|
||||
sample.ContainersRunning = sysStats.Running
|
||||
}
|
||||
if err := c.store.InsertSystemStatsSample(sample); err != nil {
|
||||
slog.Warn("stats collector: insert system sample", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// prune drops rows older than the retention window.
|
||||
func (c *Collector) prune(retentionHours int) {
|
||||
if retentionHours <= 0 {
|
||||
return
|
||||
}
|
||||
cutoff := time.Now().UTC().Add(-time.Duration(retentionHours) * time.Hour).Unix()
|
||||
if _, err := c.store.PruneStatsSamplesBefore(cutoff); err != nil {
|
||||
slog.Warn("stats collector: prune", "error", err)
|
||||
}
|
||||
}
|
||||
@@ -78,9 +78,40 @@ type Settings struct {
|
||||
BackupEnabled bool `json:"backup_enabled"`
|
||||
BackupIntervalHours int `json:"backup_interval_hours"`
|
||||
BackupRetentionCount int `json:"backup_retention_count"`
|
||||
StatsIntervalSeconds int `json:"stats_interval_seconds"` // 0 disables collection
|
||||
StatsRetentionHours int `json:"stats_retention_hours"` // 0 disables collection
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// ContainerStatsSample is one persisted sample of container resource usage.
|
||||
// Cumulative counters (network, block I/O) require differencing two samples
|
||||
// to get rates; CPU is already a percent-since-previous-sample value.
|
||||
type ContainerStatsSample struct {
|
||||
ContainerID string `json:"container_id"`
|
||||
OwnerType string `json:"owner_type"` // "instance" or "site"
|
||||
OwnerID string `json:"owner_id"`
|
||||
TS int64 `json:"ts"` // Unix seconds UTC
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryUsage int64 `json:"memory_usage"`
|
||||
MemoryLimit int64 `json:"memory_limit"`
|
||||
NetworkRxBytes int64 `json:"network_rx_bytes"`
|
||||
NetworkTxBytes int64 `json:"network_tx_bytes"`
|
||||
BlockReadBytes int64 `json:"block_read_bytes"`
|
||||
BlockWriteBytes int64 `json:"block_write_bytes"`
|
||||
}
|
||||
|
||||
// SystemStatsSample is one persisted host-level snapshot that aggregates
|
||||
// workload usage across all containers plus daemon capacity + disk totals.
|
||||
type SystemStatsSample struct {
|
||||
TS int64 `json:"ts"` // Unix seconds UTC
|
||||
NCPU int `json:"ncpu"`
|
||||
MemoryTotal int64 `json:"memory_total"`
|
||||
WorkloadCPUPercent float64 `json:"workload_cpu_percent"`
|
||||
WorkloadMemUsage int64 `json:"workload_mem_usage"`
|
||||
ContainersRunning int `json:"containers_running"`
|
||||
DiskTotalBytes int64 `json:"disk_total_bytes"`
|
||||
}
|
||||
|
||||
// Backup represents a backup metadata record.
|
||||
type Backup struct {
|
||||
ID string `json:"id"`
|
||||
|
||||
@@ -18,6 +18,7 @@ func (s *Store) GetSettings() (Settings, error) {
|
||||
traefik_entrypoint, traefik_cert_resolver, traefik_network, traefik_api_url,
|
||||
image_prune_threshold_mb,
|
||||
backup_enabled, backup_interval_hours, backup_retention_count,
|
||||
stats_interval_seconds, stats_retention_hours,
|
||||
updated_at
|
||||
FROM settings WHERE id = 1`,
|
||||
).Scan(&st.Domain, &st.ServerIP, &st.PublicIP, &st.Network, &st.SubdomainPattern, &st.NotificationURL,
|
||||
@@ -29,6 +30,7 @@ func (s *Store) GetSettings() (Settings, error) {
|
||||
&st.TraefikEntrypoint, &st.TraefikCertResolver, &st.TraefikNetwork, &st.TraefikAPIURL,
|
||||
&st.ImagePruneThresholdMB,
|
||||
&backupEnabled, &st.BackupIntervalHours, &st.BackupRetentionCount,
|
||||
&st.StatsIntervalSeconds, &st.StatsRetentionHours,
|
||||
&st.UpdatedAt)
|
||||
if err != nil {
|
||||
return Settings{}, fmt.Errorf("query settings: %w", err)
|
||||
@@ -65,6 +67,7 @@ func (s *Store) UpdateSettings(st Settings) error {
|
||||
traefik_entrypoint=?, traefik_cert_resolver=?, traefik_network=?, traefik_api_url=?,
|
||||
image_prune_threshold_mb=?,
|
||||
backup_enabled=?, backup_interval_hours=?, backup_retention_count=?,
|
||||
stats_interval_seconds=?, stats_retention_hours=?,
|
||||
updated_at=?
|
||||
WHERE id = 1`,
|
||||
st.Domain, st.ServerIP, st.PublicIP, st.Network, st.SubdomainPattern, st.NotificationURL,
|
||||
@@ -76,6 +79,7 @@ func (s *Store) UpdateSettings(st Settings) error {
|
||||
st.TraefikEntrypoint, st.TraefikCertResolver, st.TraefikNetwork, st.TraefikAPIURL,
|
||||
st.ImagePruneThresholdMB,
|
||||
backupEnabled, st.BackupIntervalHours, st.BackupRetentionCount,
|
||||
st.StatsIntervalSeconds, st.StatsRetentionHours,
|
||||
st.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// InsertContainerStatsSample appends a single container sample row.
|
||||
func (s *Store) InsertContainerStatsSample(sample ContainerStatsSample) error {
|
||||
_, err := s.db.Exec(
|
||||
`INSERT INTO container_stats_samples (
|
||||
container_id, owner_type, owner_id, ts,
|
||||
cpu_percent, memory_usage, memory_limit,
|
||||
network_rx, network_tx, block_read, block_write
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
sample.ContainerID, sample.OwnerType, sample.OwnerID, sample.TS,
|
||||
sample.CPUPercent, sample.MemoryUsage, sample.MemoryLimit,
|
||||
sample.NetworkRxBytes, sample.NetworkTxBytes,
|
||||
sample.BlockReadBytes, sample.BlockWriteBytes,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("insert container stats sample: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// InsertSystemStatsSample appends a single host-level sample row.
|
||||
func (s *Store) InsertSystemStatsSample(sample SystemStatsSample) error {
|
||||
_, err := s.db.Exec(
|
||||
`INSERT INTO system_stats_samples (
|
||||
ts, ncpu, memory_total,
|
||||
workload_cpu_percent, workload_mem_usage,
|
||||
containers_running, disk_total_bytes
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||
sample.TS, sample.NCPU, sample.MemoryTotal,
|
||||
sample.WorkloadCPUPercent, sample.WorkloadMemUsage,
|
||||
sample.ContainersRunning, sample.DiskTotalBytes,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("insert system stats sample: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListContainerStatsSamples returns samples for the given owner since the
|
||||
// given unix timestamp (inclusive), ordered by ts ascending.
|
||||
func (s *Store) ListContainerStatsSamples(ownerType, ownerID string, sinceTS int64) ([]ContainerStatsSample, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT container_id, owner_type, owner_id, ts,
|
||||
cpu_percent, memory_usage, memory_limit,
|
||||
network_rx, network_tx, block_read, block_write
|
||||
FROM container_stats_samples
|
||||
WHERE owner_type = ? AND owner_id = ? AND ts >= ?
|
||||
ORDER BY ts ASC`,
|
||||
ownerType, ownerID, sinceTS,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list container stats samples: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []ContainerStatsSample
|
||||
for rows.Next() {
|
||||
var s ContainerStatsSample
|
||||
if err := rows.Scan(
|
||||
&s.ContainerID, &s.OwnerType, &s.OwnerID, &s.TS,
|
||||
&s.CPUPercent, &s.MemoryUsage, &s.MemoryLimit,
|
||||
&s.NetworkRxBytes, &s.NetworkTxBytes,
|
||||
&s.BlockReadBytes, &s.BlockWriteBytes,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("scan container stats sample: %w", err)
|
||||
}
|
||||
out = append(out, s)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// ListAllRecentContainerStatsSamples returns samples across every owner since
|
||||
// the given unix timestamp, ordered by ts ascending. Used by the system
|
||||
// dashboard "top containers" widget where the UI wants a mixed pool.
|
||||
func (s *Store) ListAllRecentContainerStatsSamples(sinceTS int64) ([]ContainerStatsSample, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT container_id, owner_type, owner_id, ts,
|
||||
cpu_percent, memory_usage, memory_limit,
|
||||
network_rx, network_tx, block_read, block_write
|
||||
FROM container_stats_samples
|
||||
WHERE ts >= ?
|
||||
ORDER BY ts ASC`,
|
||||
sinceTS,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list all recent container stats samples: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []ContainerStatsSample
|
||||
for rows.Next() {
|
||||
var s ContainerStatsSample
|
||||
if err := rows.Scan(
|
||||
&s.ContainerID, &s.OwnerType, &s.OwnerID, &s.TS,
|
||||
&s.CPUPercent, &s.MemoryUsage, &s.MemoryLimit,
|
||||
&s.NetworkRxBytes, &s.NetworkTxBytes,
|
||||
&s.BlockReadBytes, &s.BlockWriteBytes,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("scan container stats sample: %w", err)
|
||||
}
|
||||
out = append(out, s)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// ListSystemStatsSamples returns host samples since the given unix timestamp.
|
||||
func (s *Store) ListSystemStatsSamples(sinceTS int64) ([]SystemStatsSample, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT ts, ncpu, memory_total,
|
||||
workload_cpu_percent, workload_mem_usage,
|
||||
containers_running, disk_total_bytes
|
||||
FROM system_stats_samples
|
||||
WHERE ts >= ?
|
||||
ORDER BY ts ASC`,
|
||||
sinceTS,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list system stats samples: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []SystemStatsSample
|
||||
for rows.Next() {
|
||||
var s SystemStatsSample
|
||||
if err := rows.Scan(
|
||||
&s.TS, &s.NCPU, &s.MemoryTotal,
|
||||
&s.WorkloadCPUPercent, &s.WorkloadMemUsage,
|
||||
&s.ContainersRunning, &s.DiskTotalBytes,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("scan system stats sample: %w", err)
|
||||
}
|
||||
out = append(out, s)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// PruneStatsSamplesBefore deletes all samples older than the given unix timestamp
|
||||
// from both the container and system stats tables. Returns rows deleted across
|
||||
// both tables.
|
||||
func (s *Store) PruneStatsSamplesBefore(ts int64) (int64, error) {
|
||||
r1, err := s.db.Exec(`DELETE FROM container_stats_samples WHERE ts < ?`, ts)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("prune container stats samples: %w", err)
|
||||
}
|
||||
r2, err := s.db.Exec(`DELETE FROM system_stats_samples WHERE ts < ?`, ts)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("prune system stats samples: %w", err)
|
||||
}
|
||||
n1, _ := r1.RowsAffected()
|
||||
n2, _ := r2.RowsAffected()
|
||||
return n1 + n2, nil
|
||||
}
|
||||
@@ -133,10 +133,46 @@ func (s *Store) runMigrations() error {
|
||||
// avoid a destructive migration on SQLite.
|
||||
`ALTER TABLE projects ADD COLUMN webhook_secret TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE static_sites ADD COLUMN webhook_secret TEXT NOT NULL DEFAULT ''`,
|
||||
// Resource metrics collection (2026-04-24). Interval in seconds,
|
||||
// retention in hours. 0 in either disables collection.
|
||||
`ALTER TABLE settings ADD COLUMN stats_interval_seconds INTEGER NOT NULL DEFAULT 15`,
|
||||
`ALTER TABLE settings ADD COLUMN stats_retention_hours INTEGER NOT NULL DEFAULT 2`,
|
||||
}
|
||||
|
||||
// Additive stack tables (2026-04-16). Created here rather than in the
|
||||
// schema constant so older databases pick them up on restart.
|
||||
statsTables := []string{
|
||||
`CREATE TABLE IF NOT EXISTS container_stats_samples (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
container_id TEXT NOT NULL,
|
||||
owner_type TEXT NOT NULL,
|
||||
owner_id TEXT NOT NULL,
|
||||
ts INTEGER NOT NULL,
|
||||
cpu_percent REAL NOT NULL DEFAULT 0,
|
||||
memory_usage INTEGER NOT NULL DEFAULT 0,
|
||||
memory_limit INTEGER NOT NULL DEFAULT 0,
|
||||
network_rx INTEGER NOT NULL DEFAULT 0,
|
||||
network_tx INTEGER NOT NULL DEFAULT 0,
|
||||
block_read INTEGER NOT NULL DEFAULT 0,
|
||||
block_write INTEGER NOT NULL DEFAULT 0
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS system_stats_samples (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ts INTEGER NOT NULL,
|
||||
ncpu INTEGER NOT NULL DEFAULT 0,
|
||||
memory_total INTEGER NOT NULL DEFAULT 0,
|
||||
workload_cpu_percent REAL NOT NULL DEFAULT 0,
|
||||
workload_mem_usage INTEGER NOT NULL DEFAULT 0,
|
||||
containers_running INTEGER NOT NULL DEFAULT 0,
|
||||
disk_total_bytes INTEGER NOT NULL DEFAULT 0
|
||||
)`,
|
||||
}
|
||||
for _, t := range statsTables {
|
||||
if _, err := s.db.Exec(t); err != nil {
|
||||
return fmt.Errorf("create stats table: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
stackTables := []string{
|
||||
`CREATE TABLE IF NOT EXISTS stacks (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -201,6 +237,10 @@ func (s *Store) runMigrations() error {
|
||||
`CREATE INDEX IF NOT EXISTS idx_stack_deploys_stack_id ON stack_deploys(stack_id)`,
|
||||
`CREATE UNIQUE INDEX IF NOT EXISTS idx_projects_webhook_secret ON projects(webhook_secret) WHERE webhook_secret != ''`,
|
||||
`CREATE UNIQUE INDEX IF NOT EXISTS idx_static_sites_webhook_secret ON static_sites(webhook_secret) WHERE webhook_secret != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_container_stats_owner_ts ON container_stats_samples(owner_type, owner_id, ts)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_container_stats_container_ts ON container_stats_samples(container_id, ts)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_container_stats_ts ON container_stats_samples(ts)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_system_stats_ts ON system_stats_samples(ts)`,
|
||||
}
|
||||
for _, idx := range indexes {
|
||||
if _, err := s.db.Exec(idx); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user