feat(observability): phase 8 - container stats, notifications & dashboard
Add container monitoring and notification system: - Docker Stats API: real-time CPU/memory for running containers - Webhook notifications for errors (deploy failures, stale, proxy unhealthy) - Event log auto-pruning (daily, 30-day retention) - ContainerStats component with auto-polling progress bars - SystemHealthCard dashboard widget with running/proxy/error counts - Full EN/RU i18n for stats and system health
This commit is contained in:
@@ -12,6 +12,8 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/robfig/cron/v3"
|
||||
|
||||
dockerwatcher "github.com/alexei/docker-watcher"
|
||||
"github.com/alexei/docker-watcher/internal/api"
|
||||
"github.com/alexei/docker-watcher/internal/auth"
|
||||
@@ -145,6 +147,51 @@ func main() {
|
||||
slog.Warn("failed to start proxy health monitor", "error", err)
|
||||
}
|
||||
|
||||
// Start daily event log pruning cron job.
|
||||
cronScheduler := cron.New()
|
||||
if _, err := cronScheduler.AddFunc("@daily", func() {
|
||||
pruned, err := db.PruneEvents(30)
|
||||
if err != nil {
|
||||
slog.Error("event log prune failed", "error", err)
|
||||
return
|
||||
}
|
||||
if pruned > 0 {
|
||||
slog.Info("pruned old event log entries", "count", pruned)
|
||||
}
|
||||
}); err != nil {
|
||||
slog.Warn("failed to schedule event prune cron", "error", err)
|
||||
}
|
||||
cronScheduler.Start()
|
||||
|
||||
// Subscribe to error events and forward notifications.
|
||||
notifySub := eventBus.Subscribe(func(evt events.Event) bool {
|
||||
if evt.Type != events.EventLog {
|
||||
return false
|
||||
}
|
||||
p, ok := evt.Payload.(events.EventLogPayload)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return p.Severity == "error"
|
||||
})
|
||||
go func() {
|
||||
for evt := range notifySub {
|
||||
p, ok := evt.Payload.(events.EventLogPayload)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
currentSettings, err := db.GetSettings()
|
||||
if err != nil || currentSettings.NotificationURL == "" {
|
||||
continue
|
||||
}
|
||||
notifier.Send(currentSettings.NotificationURL, notify.Event{
|
||||
Type: p.Source + "_error",
|
||||
Project: p.Source,
|
||||
Error: p.Message,
|
||||
})
|
||||
}
|
||||
}()
|
||||
|
||||
// Build API server.
|
||||
apiServer := api.NewServer(db, dockerClient, npmClient, dep, webhookHandler, eventBus, encKey)
|
||||
apiServer.SetStaleScanner(staleScanner)
|
||||
@@ -190,6 +237,8 @@ func main() {
|
||||
slog.Info("shutting down...")
|
||||
|
||||
// Stop accepting new work.
|
||||
cronScheduler.Stop()
|
||||
eventBus.Unsubscribe(notifySub)
|
||||
proxyHealth.Stop()
|
||||
staleScanner.Stop()
|
||||
poller.Stop()
|
||||
|
||||
Reference in New Issue
Block a user