feat(observability): phase 8 - container stats, notifications & dashboard

Add container monitoring and notification system:
- Docker Stats API: real-time CPU/memory for running containers
- Webhook notifications for errors (deploy failures, stale, proxy unhealthy)
- Event log auto-pruning (daily, 30-day retention)
- ContainerStats component with auto-polling progress bars
- SystemHealthCard dashboard widget with running/proxy/error counts
- Full EN/RU i18n for stats and system health
This commit is contained in:
2026-03-30 11:37:25 +03:00
parent 79a40f3d9c
commit 7c57c740b4
13 changed files with 436 additions and 0 deletions
+49
View File
@@ -12,6 +12,8 @@ import (
"syscall"
"time"
"github.com/robfig/cron/v3"
dockerwatcher "github.com/alexei/docker-watcher"
"github.com/alexei/docker-watcher/internal/api"
"github.com/alexei/docker-watcher/internal/auth"
@@ -145,6 +147,51 @@ func main() {
slog.Warn("failed to start proxy health monitor", "error", err)
}
// Start daily event log pruning cron job.
cronScheduler := cron.New()
if _, err := cronScheduler.AddFunc("@daily", func() {
pruned, err := db.PruneEvents(30)
if err != nil {
slog.Error("event log prune failed", "error", err)
return
}
if pruned > 0 {
slog.Info("pruned old event log entries", "count", pruned)
}
}); err != nil {
slog.Warn("failed to schedule event prune cron", "error", err)
}
cronScheduler.Start()
// Subscribe to error events and forward notifications.
notifySub := eventBus.Subscribe(func(evt events.Event) bool {
if evt.Type != events.EventLog {
return false
}
p, ok := evt.Payload.(events.EventLogPayload)
if !ok {
return false
}
return p.Severity == "error"
})
go func() {
for evt := range notifySub {
p, ok := evt.Payload.(events.EventLogPayload)
if !ok {
continue
}
currentSettings, err := db.GetSettings()
if err != nil || currentSettings.NotificationURL == "" {
continue
}
notifier.Send(currentSettings.NotificationURL, notify.Event{
Type: p.Source + "_error",
Project: p.Source,
Error: p.Message,
})
}
}()
// Build API server.
apiServer := api.NewServer(db, dockerClient, npmClient, dep, webhookHandler, eventBus, encKey)
apiServer.SetStaleScanner(staleScanner)
@@ -190,6 +237,8 @@ func main() {
slog.Info("shutting down...")
// Stop accepting new work.
cronScheduler.Stop()
eventBus.Unsubscribe(notifySub)
proxyHealth.Stop()
staleScanner.Stop()
poller.Stop()