feat(observability): phase 8 - container stats, notifications & dashboard

Add container monitoring and notification system:
- Docker Stats API: real-time CPU/memory for running containers
- Webhook notifications for errors (deploy failures, stale, proxy unhealthy)
- Event log auto-pruning (daily, 30-day retention)
- ContainerStats component with auto-polling progress bars
- SystemHealthCard dashboard widget with running/proxy/error counts
- Full EN/RU i18n for stats and system health
This commit is contained in:
2026-03-30 11:37:25 +03:00
parent 79a40f3d9c
commit 7c57c740b4
13 changed files with 436 additions and 0 deletions
+1
View File
@@ -136,6 +136,7 @@ func (s *Server) Router() chi.Router {
r.Get("/", s.getProject)
r.Get("/stages/{stage}/env", s.listStageEnv)
r.Get("/stages/{stage}/instances", s.listInstances)
r.Get("/stages/{stage}/instances/{iid}/stats", s.getInstanceStats)
r.Get("/volumes", s.listVolumes)
})
r.Get("/deploys", s.listDeploys)
+39
View File
@@ -0,0 +1,39 @@
package api
import (
"errors"
"net/http"
"github.com/go-chi/chi/v5"
"github.com/alexei/docker-watcher/internal/store"
)
// getInstanceStats handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats.
// Returns CPU and memory stats for the container backing the given instance.
func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) {
instanceID := chi.URLParam(r, "iid")
inst, err := s.store.GetInstanceByID(instanceID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "instance")
return
}
respondError(w, http.StatusInternalServerError, "failed to get instance: "+err.Error())
return
}
if inst.ContainerID == "" {
respondError(w, http.StatusBadRequest, "instance has no container")
return
}
stats, err := s.docker.GetContainerStats(r.Context(), inst.ContainerID)
if err != nil {
respondError(w, http.StatusInternalServerError, "failed to get container stats: "+err.Error())
return
}
respondJSON(w, http.StatusOK, stats)
}
+69
View File
@@ -0,0 +1,69 @@
package docker
import (
"context"
"encoding/json"
"fmt"
"github.com/moby/moby/api/types/container"
"github.com/moby/moby/client"
)
// ContainerStats holds computed CPU and memory usage for a container.
type ContainerStats struct {
CPUPercent float64 `json:"cpu_percent"`
MemoryUsage int64 `json:"memory_usage"`
MemoryLimit int64 `json:"memory_limit"`
MemoryPercent float64 `json:"memory_percent"`
}
// GetContainerStats retrieves a one-shot stats snapshot for the given container
// and computes CPU and memory percentages.
func (c *Client) GetContainerStats(ctx context.Context, containerID string) (ContainerStats, error) {
result, err := c.api.ContainerStats(ctx, containerID, client.ContainerStatsOptions{
Stream: false,
IncludePreviousSample: true,
})
if err != nil {
return ContainerStats{}, fmt.Errorf("get container stats %s: %w", containerID, err)
}
defer result.Body.Close()
var stats container.StatsResponse
if err := json.NewDecoder(result.Body).Decode(&stats); err != nil {
return ContainerStats{}, fmt.Errorf("decode container stats %s: %w", containerID, err)
}
cpuPercent := calculateCPUPercent(stats)
memUsage := int64(stats.MemoryStats.Usage)
memLimit := int64(stats.MemoryStats.Limit)
var memPercent float64
if memLimit > 0 {
memPercent = float64(memUsage) / float64(memLimit) * 100.0
}
return ContainerStats{
CPUPercent: cpuPercent,
MemoryUsage: memUsage,
MemoryLimit: memLimit,
MemoryPercent: memPercent,
}, nil
}
// calculateCPUPercent computes CPU usage percentage from a stats response
// using the delta between current and previous CPU readings.
func calculateCPUPercent(stats container.StatsResponse) float64 {
cpuDelta := float64(stats.CPUStats.CPUUsage.TotalUsage) - float64(stats.PreCPUStats.CPUUsage.TotalUsage)
systemDelta := float64(stats.CPUStats.SystemUsage) - float64(stats.PreCPUStats.SystemUsage)
if systemDelta <= 0 || cpuDelta < 0 {
return 0.0
}
onlineCPUs := float64(stats.CPUStats.OnlineCPUs)
if onlineCPUs == 0 {
onlineCPUs = 1
}
return (cpuDelta / systemDelta) * onlineCPUs * 100.0
}
+9
View File
@@ -0,0 +1,9 @@
package notify
// Event types for notifications.
const (
EventTypeDeploySuccess = "deploy_success"
EventTypeDeployFailure = "deploy_failure"
EventTypeStaleDetected = "stale_detected"
EventTypeProxyUnhealthy = "proxy_unhealthy"
)