Files
tiny-forge/cmd/server/main.go
T
alexei.dolgolyov aefecdffdf feat(observability): phase 2 - stale container detection
Add periodic scanner for stale containers:
- Cron-based scanner (hourly) detects non-running containers exceeding threshold
- last_alive_at tracking on instances, updated on deploy/start/restart
- API: GET /api/containers/stale, POST cleanup (single + bulk)
- Event log warnings emitted for newly stale containers
- Graceful handling of externally removed containers
2026-03-30 11:12:25 +03:00

253 lines
6.9 KiB
Go

package main
import (
"context"
"errors"
"io/fs"
"log/slog"
"net/http"
"os"
"os/signal"
"path/filepath"
"syscall"
"time"
dockerwatcher "github.com/alexei/docker-watcher"
"github.com/alexei/docker-watcher/internal/api"
"github.com/alexei/docker-watcher/internal/auth"
"github.com/alexei/docker-watcher/internal/config"
"github.com/alexei/docker-watcher/internal/crypto"
"github.com/alexei/docker-watcher/internal/deployer"
"github.com/alexei/docker-watcher/internal/docker"
"github.com/alexei/docker-watcher/internal/events"
"github.com/alexei/docker-watcher/internal/health"
"github.com/alexei/docker-watcher/internal/logging"
"github.com/alexei/docker-watcher/internal/notify"
"github.com/alexei/docker-watcher/internal/npm"
"github.com/alexei/docker-watcher/internal/registry"
"github.com/alexei/docker-watcher/internal/stale"
"github.com/alexei/docker-watcher/internal/store"
"github.com/alexei/docker-watcher/internal/webhook"
)
func main() {
// Initialize structured JSON logging.
logging.Setup()
dataDir := envOrDefault("DATA_DIR", "./data")
if err := os.MkdirAll(dataDir, 0o755); err != nil {
slog.Error("create data directory", "error", err)
os.Exit(1)
}
// Open database.
dbPath := filepath.Join(dataDir, "docker-watcher.db")
db, err := store.New(dbPath)
if err != nil {
slog.Error("open store", "error", err)
os.Exit(1)
}
defer db.Close()
// Derive encryption key from environment (required).
encKey, err := crypto.KeyFromEnv()
if err != nil {
slog.Error("ENCRYPTION_KEY is required — set it to a random 32+ character string")
os.Exit(1)
}
// Import seed config on first launch (idempotent).
seedPath := envOrDefault("SEED_FILE", "./docker-watcher.yaml")
if err := config.ImportSeed(db, seedPath); err != nil {
slog.Error("seed import", "error", err)
os.Exit(1)
}
// Ensure default admin user exists on first launch.
if err := ensureDefaultAdmin(db); err != nil {
slog.Error("ensure default admin", "error", err)
os.Exit(1)
}
// Initialize Docker client.
dockerClient, err := docker.New()
if err != nil {
slog.Error("create docker client", "error", err)
os.Exit(1)
}
defer dockerClient.Close()
// Read settings for NPM URL and polling interval.
settings, err := db.GetSettings()
if err != nil {
slog.Error("get settings", "error", err)
os.Exit(1)
}
// Initialize NPM client.
npmURL := envOrDefault("NPM_URL", settings.NpmURL)
npmClient := npm.New(npmURL)
// Initialize services.
healthChecker := health.New()
notifier := notify.New()
eventBus := events.New()
// Auto-persist warn/error events from the event bus to the database.
stopLogger := eventBus.RegisterPersistentLogger(func(source, severity, message, metadata string) (int64, string, error) {
evt, err := db.InsertEvent(store.EventLog{
Source: source,
Severity: severity,
Message: message,
Metadata: metadata,
})
if err != nil {
return 0, "", err
}
return evt.ID, evt.CreatedAt, nil
})
defer stopLogger()
dep := deployer.New(dockerClient, npmClient, db, healthChecker, notifier, eventBus, encKey)
// Initialize webhook handler.
webhookHandler := webhook.NewHandler(db, dep, dockerClient)
// Ensure webhook secret exists.
_, err = webhook.EnsureWebhookSecret(db)
if err != nil {
slog.Error("ensure webhook secret", "error", err)
os.Exit(1)
}
slog.Info("webhook secret configured (use /api/settings/webhook-url to retrieve)")
// Initialize registry poller.
poller := registry.NewPoller(db, dep, encKey)
pollingInterval := envOrDefault("POLLING_INTERVAL", settings.PollingInterval)
if pollingInterval != "" {
if err := poller.Start(pollingInterval); err != nil {
slog.Warn("failed to start poller", "error", err)
}
}
// Initialize stale container scanner.
staleScanner := stale.New(db, dockerClient, eventBus)
if err := staleScanner.Start("1h"); err != nil {
slog.Warn("failed to start stale scanner", "error", err)
}
// Build API server.
apiServer := api.NewServer(db, dockerClient, npmClient, dep, webhookHandler, eventBus, encKey)
apiServer.SetStaleScanner(staleScanner)
router := apiServer.Router()
// Serve embedded static files for the SPA frontend.
// The embed.FS has "web/build" as a prefix, so we sub it to get the root.
webBuildFS, err := fs.Sub(dockerwatcher.WebBuildFS, "web/build")
if err != nil {
slog.Warn("embedded frontend not available", "error", err)
} else {
staticHandler := api.StaticHandler(webBuildFS)
// Handle all non-API routes with the static file server.
router.NotFound(staticHandler.ServeHTTP)
}
// Start HTTP server.
addr := envOrDefault("LISTEN_ADDR", ":8080")
httpServer := &http.Server{
Addr: addr,
Handler: router,
ReadTimeout: 30 * time.Second,
// WriteTimeout is disabled (0) to support SSE long-lived connections.
// Individual non-SSE handlers should use context timeouts as needed.
WriteTimeout: 0,
IdleTimeout: 120 * time.Second,
}
// Graceful shutdown.
done := make(chan os.Signal, 1)
signal.Notify(done, os.Interrupt, syscall.SIGTERM)
go func() {
slog.Info("Docker Watcher started", "addr", addr)
if err := httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
slog.Error("HTTP server error", "error", err)
os.Exit(1)
}
}()
<-done
slog.Info("shutting down...")
// Stop accepting new work.
staleScanner.Stop()
poller.Stop()
// Drain in-progress deploys and notifications.
dep.Drain()
notifier.Drain()
// Shut down HTTP server.
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := httpServer.Shutdown(ctx); err != nil {
slog.Error("HTTP server shutdown error", "error", err)
}
// Close database.
if err := db.Close(); err != nil {
slog.Error("database close error", "error", err)
}
slog.Info("Docker Watcher stopped")
}
// envOrDefault reads an environment variable or returns the fallback value.
func envOrDefault(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
// ensureDefaultAdmin creates a default admin user on first launch if no users exist.
// The password comes from ADMIN_PASSWORD env var, defaulting to "admin".
func ensureDefaultAdmin(db *store.Store) error {
count, err := db.UserCount()
if err != nil {
return err
}
if count > 0 {
return nil // Users already exist, skip.
}
password := os.Getenv("ADMIN_PASSWORD")
if password == "" {
slog.Error("ADMIN_PASSWORD is required on first launch — set it to a secure password")
os.Exit(1)
}
hash, err := auth.HashPassword(password)
if err != nil {
return err
}
_, err = db.CreateUser(store.User{
Username: "admin",
PasswordHash: hash,
Email: "",
Role: "admin",
})
if err != nil {
// Ignore duplicate key errors (race condition on concurrent startup).
if errors.Is(err, store.ErrNotFound) {
return nil
}
return err
}
slog.Info("default admin user created", "username", "admin")
return nil
}