feat(observability): phase 2 - stale container detection
Add periodic scanner for stale containers: - Cron-based scanner (hourly) detects non-running containers exceeding threshold - last_alive_at tracking on instances, updated on deploy/start/restart - API: GET /api/containers/stale, POST cleanup (single + bulk) - Event log warnings emitted for newly stale containers - Graceful handling of externally removed containers
This commit is contained in:
@@ -196,6 +196,13 @@ func (s *Server) controlInstance(w http.ResponseWriter, r *http.Request, action
|
||||
slog.Error("update instance status", "instance_id", instanceID, "status", newStatus, "error", err)
|
||||
}
|
||||
|
||||
// Track last_alive_at when container becomes running.
|
||||
if newStatus == "running" {
|
||||
if err := s.store.UpdateLastAliveAt(instanceID); err != nil {
|
||||
slog.Error("update last_alive_at", "instance_id", instanceID, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]string{
|
||||
"instance_id": instanceID,
|
||||
"action": action,
|
||||
|
||||
Reference in New Issue
Block a user