feat(observability): phase 2 - stale container detection
Add periodic scanner for stale containers: - Cron-based scanner (hourly) detects non-running containers exceeding threshold - last_alive_at tracking on instances, updated on deploy/start/restart - API: GET /api/containers/stale, POST cleanup (single + bulk) - Event log warnings emitted for newly stale containers - Graceful handling of externally removed containers
This commit is contained in:
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/alexei/docker-watcher/internal/docker"
|
||||
"github.com/alexei/docker-watcher/internal/events"
|
||||
"github.com/alexei/docker-watcher/internal/npm"
|
||||
"github.com/alexei/docker-watcher/internal/stale"
|
||||
"github.com/alexei/docker-watcher/internal/store"
|
||||
"github.com/alexei/docker-watcher/internal/webhook"
|
||||
)
|
||||
@@ -26,6 +27,7 @@ type Server struct {
|
||||
encKey [32]byte
|
||||
localAuth *auth.LocalAuth
|
||||
oidcProvider *auth.OIDCProvider
|
||||
staleScanner *stale.Scanner
|
||||
}
|
||||
|
||||
// NewServer creates a new API Server with all required dependencies.
|
||||
@@ -60,6 +62,12 @@ func NewServer(
|
||||
return s
|
||||
}
|
||||
|
||||
// SetStaleScanner sets the stale scanner on the server.
|
||||
// Called after both the API server and scanner are initialized.
|
||||
func (s *Server) SetStaleScanner(scanner *stale.Scanner) {
|
||||
s.staleScanner = scanner
|
||||
}
|
||||
|
||||
// initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal.
|
||||
func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
|
||||
// Decrypt the OIDC client secret if it's encrypted.
|
||||
@@ -135,6 +143,9 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/settings", s.getSettings)
|
||||
r.Get("/settings/npm-certificates", s.listNpmCertificates)
|
||||
|
||||
// Stale container endpoints.
|
||||
r.Get("/containers/stale", s.listStaleContainers)
|
||||
|
||||
// Admin-only routes: require admin role.
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
@@ -192,6 +203,11 @@ func (s *Server) Router() chi.Router {
|
||||
r.Post("/test", s.testRegistry)
|
||||
})
|
||||
|
||||
// Stale container cleanup endpoints (admin-only).
|
||||
// Bulk route must be registered before parameterized route.
|
||||
r.Post("/containers/stale/cleanup", s.bulkCleanupStaleContainers)
|
||||
r.Post("/containers/stale/{id}/cleanup", s.cleanupStaleContainer)
|
||||
|
||||
// Settings endpoints.
|
||||
r.Put("/settings", s.updateSettings)
|
||||
r.Get("/settings/webhook-url", s.getWebhookURL)
|
||||
|
||||
Reference in New Issue
Block a user