feat(observability): phase 2 - stale container detection
Add periodic scanner for stale containers: - Cron-based scanner (hourly) detects non-running containers exceeding threshold - last_alive_at tracking on instances, updated on deploy/start/restart - API: GET /api/containers/stale, POST cleanup (single + bulk) - Event log warnings emitted for newly stale containers - Graceful handling of externally removed containers
This commit is contained in:
@@ -333,6 +333,9 @@ func (d *Deployer) executeDeploy(
|
||||
if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil {
|
||||
slog.Warn("update instance status to running", "error", err)
|
||||
}
|
||||
if err := d.store.UpdateLastAliveAt(instanceID); err != nil {
|
||||
slog.Warn("update last_alive_at on deploy", "instance_id", instanceID, "error", err)
|
||||
}
|
||||
d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running")
|
||||
d.logDeploy(deployID, "Container started", "info")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user