Files
tiny-forge/internal/staticsite/healthcheck.go
T
alexei.dolgolyov 791cd4d6af
Build / build (push) Successful in 12m20s
feat: rename Docker Watcher to Tinyforge
Rebrand the project as Tinyforge to reflect its evolution from a Docker
container watcher into a self-hosted mini CI/deployment platform.

Rename covers: Go module path, Docker labels, DB/config filenames,
JWT issuer, Dockerfile binary, docker-compose, CI workflows, frontend
i18n, README with static sites docs, and all code comments.
2026-04-12 21:30:39 +03:00

112 lines
2.8 KiB
Go

package staticsite
import (
"context"
"fmt"
"log/slog"
"sync"
"time"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/store"
"github.com/robfig/cron/v3"
)
// HealthChecker periodically checks that deployed static site containers
// are still running. If a container has crashed, it updates the site status
// to "failed" and optionally triggers a redeploy.
type HealthChecker struct {
store *store.Store
docker *docker.Client
manager *Manager
cron *cron.Cron
mu sync.Mutex
entryID cron.EntryID
running bool
}
// NewHealthChecker creates a new static site health checker.
func NewHealthChecker(st *store.Store, dockerClient *docker.Client, mgr *Manager) *HealthChecker {
return &HealthChecker{
store: st,
docker: dockerClient,
manager: mgr,
cron: cron.New(),
}
}
// Start begins the periodic health check with the given interval (e.g., "5m", "1m").
func (h *HealthChecker) Start(interval string) error {
h.mu.Lock()
defer h.mu.Unlock()
duration, err := time.ParseDuration(interval)
if err != nil {
return fmt.Errorf("parse interval %q: %w", interval, err)
}
if h.running {
h.cron.Remove(h.entryID)
}
spec := fmt.Sprintf("@every %s", duration)
id, err := h.cron.AddFunc(spec, h.check)
if err != nil {
return fmt.Errorf("schedule health check: %w", err)
}
h.entryID = id
h.running = true
h.cron.Start()
slog.Info("static site health checker started", "interval", interval)
return nil
}
// Stop stops the periodic health checker.
func (h *HealthChecker) Stop() {
h.mu.Lock()
defer h.mu.Unlock()
if h.running {
h.cron.Stop()
h.running = false
slog.Info("static site health checker stopped")
}
}
// check runs a single health check pass over all deployed static sites.
func (h *HealthChecker) check() {
sites, err := h.store.GetAllStaticSites()
if err != nil {
slog.Error("static site health check: failed to list sites", "error", err)
return
}
ctx := context.Background()
for _, site := range sites {
if site.Status != "deployed" || site.ContainerID == "" {
continue
}
running, err := h.docker.IsContainerRunning(ctx, site.ContainerID)
if err != nil {
// Container might have been removed externally.
slog.Warn("static site health check: container inspect failed",
"site", site.Name, "container", site.ContainerID[:12], "error", err)
h.manager.updateStatus(site.ID, "failed", site.LastCommitSHA, "container not found")
h.manager.publishEvent(site.ID, site.Name, "failed: container not found")
continue
}
if !running {
slog.Warn("static site health check: container not running",
"site", site.Name, "container", site.ContainerID[:12])
h.manager.updateStatus(site.ID, "failed", site.LastCommitSHA, "container stopped unexpectedly")
h.manager.publishEvent(site.ID, site.Name, "failed: container stopped unexpectedly")
}
}
}