feat(observability): phase 8 - container stats, notifications & dashboard
Add container monitoring and notification system: - Docker Stats API: real-time CPU/memory for running containers - Webhook notifications for errors (deploy failures, stale, proxy unhealthy) - Event log auto-pruning (daily, 30-day retention) - ContainerStats component with auto-polling progress bars - SystemHealthCard dashboard widget with running/proxy/error counts - Full EN/RU i18n for stats and system health
This commit is contained in:
@@ -136,6 +136,7 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/", s.getProject)
|
||||
r.Get("/stages/{stage}/env", s.listStageEnv)
|
||||
r.Get("/stages/{stage}/instances", s.listInstances)
|
||||
r.Get("/stages/{stage}/instances/{iid}/stats", s.getInstanceStats)
|
||||
r.Get("/volumes", s.listVolumes)
|
||||
})
|
||||
r.Get("/deploys", s.listDeploys)
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/docker-watcher/internal/store"
|
||||
)
|
||||
|
||||
// getInstanceStats handles GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats.
|
||||
// Returns CPU and memory stats for the container backing the given instance.
|
||||
func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) {
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "instance")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "failed to get instance: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
if inst.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "instance has no container")
|
||||
return
|
||||
}
|
||||
|
||||
stats, err := s.docker.GetContainerStats(r.Context(), inst.ContainerID)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "failed to get container stats: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, stats)
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
package docker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/moby/moby/api/types/container"
|
||||
"github.com/moby/moby/client"
|
||||
)
|
||||
|
||||
// ContainerStats holds computed CPU and memory usage for a container.
|
||||
type ContainerStats struct {
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryUsage int64 `json:"memory_usage"`
|
||||
MemoryLimit int64 `json:"memory_limit"`
|
||||
MemoryPercent float64 `json:"memory_percent"`
|
||||
}
|
||||
|
||||
// GetContainerStats retrieves a one-shot stats snapshot for the given container
|
||||
// and computes CPU and memory percentages.
|
||||
func (c *Client) GetContainerStats(ctx context.Context, containerID string) (ContainerStats, error) {
|
||||
result, err := c.api.ContainerStats(ctx, containerID, client.ContainerStatsOptions{
|
||||
Stream: false,
|
||||
IncludePreviousSample: true,
|
||||
})
|
||||
if err != nil {
|
||||
return ContainerStats{}, fmt.Errorf("get container stats %s: %w", containerID, err)
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
var stats container.StatsResponse
|
||||
if err := json.NewDecoder(result.Body).Decode(&stats); err != nil {
|
||||
return ContainerStats{}, fmt.Errorf("decode container stats %s: %w", containerID, err)
|
||||
}
|
||||
|
||||
cpuPercent := calculateCPUPercent(stats)
|
||||
memUsage := int64(stats.MemoryStats.Usage)
|
||||
memLimit := int64(stats.MemoryStats.Limit)
|
||||
var memPercent float64
|
||||
if memLimit > 0 {
|
||||
memPercent = float64(memUsage) / float64(memLimit) * 100.0
|
||||
}
|
||||
|
||||
return ContainerStats{
|
||||
CPUPercent: cpuPercent,
|
||||
MemoryUsage: memUsage,
|
||||
MemoryLimit: memLimit,
|
||||
MemoryPercent: memPercent,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// calculateCPUPercent computes CPU usage percentage from a stats response
|
||||
// using the delta between current and previous CPU readings.
|
||||
func calculateCPUPercent(stats container.StatsResponse) float64 {
|
||||
cpuDelta := float64(stats.CPUStats.CPUUsage.TotalUsage) - float64(stats.PreCPUStats.CPUUsage.TotalUsage)
|
||||
systemDelta := float64(stats.CPUStats.SystemUsage) - float64(stats.PreCPUStats.SystemUsage)
|
||||
|
||||
if systemDelta <= 0 || cpuDelta < 0 {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
onlineCPUs := float64(stats.CPUStats.OnlineCPUs)
|
||||
if onlineCPUs == 0 {
|
||||
onlineCPUs = 1
|
||||
}
|
||||
|
||||
return (cpuDelta / systemDelta) * onlineCPUs * 100.0
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package notify
|
||||
|
||||
// Event types for notifications.
|
||||
const (
|
||||
EventTypeDeploySuccess = "deploy_success"
|
||||
EventTypeDeployFailure = "deploy_failure"
|
||||
EventTypeStaleDetected = "stale_detected"
|
||||
EventTypeProxyUnhealthy = "proxy_unhealthy"
|
||||
)
|
||||
Reference in New Issue
Block a user