cba2149aa9
Wraps up the workload refactor with the fixes that came out of the multi-agent code review (see docs/plans/workload-refactor.md "What actually shipped"). Backend: - store.ReconcileContainer: separate write path so the 30s reconciler tick no longer overwrites deployer-owned fields (subdomain, proxy_route_id, npm_proxy_id, image_tag). - Container.stage_id column + index; ListProxyRoutes / ListContainersByStageID join via stage_id (survives stage rename), with legacy fallback to (project_id, role=stage_name). - Reconciler: workload-existence check (rejects forged tinyforge.workload.id labels), skips inventing project-kind rows, child-context cancel before wg.Wait() on shutdown. - Transactional CRUD across projects / stacks / static_sites: parent UPDATE and workload sync land in one transaction so secret rotations are durable. - Webhook routing reads exclusively through workloads.webhook_secret; legacy GetProjectByWebhookSecret / GetStaticSiteByWebhookSecret fallback removed. - store.GetStackByComposeProjectName + indexed lookup (no more full-table stack scan per compose container per tick). - store.ListMissingSweepRows: filtered query for the missing-sweep. - /api/instances/* handlers verify (workload_id, role) match URL (project_id, stage_name) before mutating — closes the cross-project hijack the security review flagged. - extra_json no longer referenced from Go (column kept on disk for now). Frontend: - WorkloadContainers.svelte: generic detail-page panel reusable by stack and site detail pages. - Containers page polish: client-side kind/state filters over an unfiltered fetch, URL-synced filters, race-safe loads via sequence number, EN+RU i18n, sidebar counter via navCounts.containers. Misc: - scripts/dev-server.sh: tolerate empty netstat grep result. - .gitignore: ignore docker-watcher binaries, .claude/worktrees/, .facts-sync.json.
506 lines
15 KiB
Go
506 lines
15 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"path/filepath"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/robfig/cron/v3"
|
|
|
|
tinyforge "github.com/alexei/tinyforge"
|
|
"github.com/alexei/tinyforge/internal/api"
|
|
"github.com/alexei/tinyforge/internal/auth"
|
|
"github.com/alexei/tinyforge/internal/config"
|
|
"github.com/alexei/tinyforge/internal/crypto"
|
|
"github.com/alexei/tinyforge/internal/backup"
|
|
"github.com/alexei/tinyforge/internal/deployer"
|
|
"github.com/alexei/tinyforge/internal/dns"
|
|
"github.com/alexei/tinyforge/internal/docker"
|
|
"github.com/alexei/tinyforge/internal/events"
|
|
"github.com/alexei/tinyforge/internal/health"
|
|
"github.com/alexei/tinyforge/internal/logging"
|
|
"github.com/alexei/tinyforge/internal/notify"
|
|
"github.com/alexei/tinyforge/internal/npm"
|
|
"github.com/alexei/tinyforge/internal/proxy"
|
|
"github.com/alexei/tinyforge/internal/reconciler"
|
|
"github.com/alexei/tinyforge/internal/registry"
|
|
"github.com/alexei/tinyforge/internal/stale"
|
|
"github.com/alexei/tinyforge/internal/stack"
|
|
"github.com/alexei/tinyforge/internal/stats"
|
|
"github.com/alexei/tinyforge/internal/staticsite"
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
"github.com/alexei/tinyforge/internal/webhook"
|
|
)
|
|
|
|
func main() {
|
|
// Initialize structured JSON logging.
|
|
logging.Setup()
|
|
|
|
dataDir := envOrDefault("DATA_DIR", "./data")
|
|
|
|
if err := os.MkdirAll(dataDir, 0o755); err != nil {
|
|
slog.Error("create data directory", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Open database.
|
|
dbPath := filepath.Join(dataDir, "tinyforge.db")
|
|
db, err := store.New(dbPath)
|
|
if err != nil {
|
|
slog.Error("open store", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
defer db.Close()
|
|
|
|
// Derive encryption key from environment (required).
|
|
encKey, err := crypto.KeyFromEnv()
|
|
if err != nil {
|
|
slog.Error("ENCRYPTION_KEY is required — set it to a random 32+ character string")
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Import seed config on first launch (idempotent).
|
|
seedPath := envOrDefault("SEED_FILE", "./tinyforge.yaml")
|
|
if err := config.ImportSeed(db, seedPath); err != nil {
|
|
slog.Error("seed import", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Backfill workload rows for any project / stack / static site that
|
|
// predates the workload refactor. Idempotent — safe on every boot.
|
|
if err := db.BackfillWorkloads(); err != nil {
|
|
slog.Error("workload backfill", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Ensure default admin user exists on first launch.
|
|
if err := ensureDefaultAdmin(db); err != nil {
|
|
slog.Error("ensure default admin", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Initialize Docker client.
|
|
dockerClient, err := docker.New()
|
|
if err != nil {
|
|
slog.Error("create docker client", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
defer dockerClient.Close()
|
|
|
|
// Start the container index reconciler. Runs one boot pass and then
|
|
// ticks every 30s. Boot pass populates the containers table from any
|
|
// running containers that predate the workload refactor; subsequent
|
|
// ticks catch state drift the deployer didn't witness (e.g., a stack
|
|
// service that exited on its own). Stop() cancels its own child context
|
|
// before waiting on the goroutine, so a hung `docker ps` doesn't block
|
|
// shutdown.
|
|
rec := reconciler.New(db, dockerClient, 30*time.Second)
|
|
rec.Start(context.Background())
|
|
defer rec.Stop()
|
|
|
|
// Read settings for NPM URL and polling interval.
|
|
settings, err := db.GetSettings()
|
|
if err != nil {
|
|
slog.Error("get settings", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Initialize NPM client (used for NPM-specific endpoints like certificates).
|
|
npmURL := envOrDefault("NPM_URL", settings.NpmURL)
|
|
npmClient := npm.New(npmURL)
|
|
|
|
// Build proxy provider based on settings.
|
|
var proxyProvider proxy.Provider
|
|
switch settings.ProxyProvider {
|
|
case "none":
|
|
proxyProvider = proxy.NewNoneProvider()
|
|
slog.Info("proxy provider: none")
|
|
case "traefik":
|
|
proxyProvider = proxy.NewTraefikProvider(
|
|
settings.TraefikEntrypoint,
|
|
settings.TraefikCertResolver,
|
|
settings.TraefikNetwork,
|
|
settings.TraefikAPIURL,
|
|
)
|
|
slog.Info("proxy provider: traefik", "entrypoint", settings.TraefikEntrypoint)
|
|
default:
|
|
// Default to NPM for backward compatibility (including "npm" and empty string).
|
|
npmPassword := ""
|
|
if settings.NpmPassword != "" {
|
|
decrypted, err := crypto.Decrypt(encKey, settings.NpmPassword)
|
|
if err != nil {
|
|
slog.Warn("failed to decrypt NPM password for proxy provider", "error", err)
|
|
} else {
|
|
npmPassword = decrypted
|
|
}
|
|
}
|
|
proxyProvider = proxy.NewNpmProvider(npmClient, settings.NpmEmail, npmPassword)
|
|
slog.Info("proxy provider: npm", "url", npmURL)
|
|
}
|
|
|
|
// Initialize services.
|
|
healthChecker := health.New()
|
|
notifier := notify.New()
|
|
eventBus := events.New()
|
|
|
|
// Auto-persist warn/error events from the event bus to the database.
|
|
stopLogger := eventBus.RegisterPersistentLogger(func(source, severity, message, metadata string) (int64, string, error) {
|
|
evt, err := db.InsertEvent(store.EventLog{
|
|
Source: source,
|
|
Severity: severity,
|
|
Message: message,
|
|
Metadata: metadata,
|
|
})
|
|
if err != nil {
|
|
return 0, "", err
|
|
}
|
|
return evt.ID, evt.CreatedAt, nil
|
|
})
|
|
defer stopLogger()
|
|
|
|
dep := deployer.New(dockerClient, proxyProvider, db, healthChecker, notifier, eventBus, encKey)
|
|
|
|
// Initialize webhook handler. Per-project and per-site secrets are stored
|
|
// on their respective rows; the static-site triggerer is wired in below
|
|
// once the site manager has been constructed.
|
|
webhookHandler := webhook.NewHandler(db, dep, nil)
|
|
|
|
// Initialize registry poller.
|
|
poller := registry.NewPoller(db, dep, encKey)
|
|
pollingInterval := envOrDefault("POLLING_INTERVAL", settings.PollingInterval)
|
|
if pollingInterval != "" {
|
|
if err := poller.Start(pollingInterval); err != nil {
|
|
slog.Warn("failed to start poller", "error", err)
|
|
}
|
|
}
|
|
|
|
// Initialize stale container scanner.
|
|
staleScanner := stale.New(db, dockerClient, eventBus)
|
|
if err := staleScanner.Start("1h"); err != nil {
|
|
slog.Warn("failed to start stale scanner", "error", err)
|
|
}
|
|
|
|
// Start daily event log pruning cron job.
|
|
cronScheduler := cron.New()
|
|
if _, err := cronScheduler.AddFunc("@daily", func() {
|
|
pruned, err := db.PruneEvents(30)
|
|
if err != nil {
|
|
slog.Error("event log prune failed", "error", err)
|
|
return
|
|
}
|
|
if pruned > 0 {
|
|
slog.Info("pruned old event log entries", "count", pruned)
|
|
}
|
|
}); err != nil {
|
|
slog.Warn("failed to schedule event prune cron", "error", err)
|
|
}
|
|
// Webhook delivery log: keep 14 days of audit trail. Same daily cadence
|
|
// so an admin always has a recent window for debugging without
|
|
// unbounded growth on a noisy CI.
|
|
if _, err := cronScheduler.AddFunc("@daily", func() {
|
|
cutoff := time.Now().UTC().AddDate(0, 0, -14).Format("2006-01-02 15:04:05")
|
|
pruned, err := db.PruneWebhookDeliveriesBefore(cutoff)
|
|
if err != nil {
|
|
slog.Error("webhook delivery prune failed", "error", err)
|
|
return
|
|
}
|
|
if pruned > 0 {
|
|
slog.Info("pruned old webhook deliveries", "count", pruned)
|
|
}
|
|
}); err != nil {
|
|
slog.Warn("failed to schedule webhook delivery prune cron", "error", err)
|
|
}
|
|
cronScheduler.Start()
|
|
|
|
// Subscribe to error events and forward notifications.
|
|
notifySub := eventBus.Subscribe(func(evt events.Event) bool {
|
|
if evt.Type != events.EventLog {
|
|
return false
|
|
}
|
|
p, ok := evt.Payload.(events.EventLogPayload)
|
|
if !ok {
|
|
return false
|
|
}
|
|
return p.Severity == "error"
|
|
})
|
|
go func() {
|
|
for evt := range notifySub {
|
|
p, ok := evt.Payload.(events.EventLogPayload)
|
|
if !ok {
|
|
continue
|
|
}
|
|
currentSettings, err := db.GetSettings()
|
|
if err != nil || currentSettings.NotificationURL == "" {
|
|
continue
|
|
}
|
|
notifier.SendSigned(currentSettings.NotificationURL, currentSettings.NotificationSecret, notify.TierSettings, notify.Event{
|
|
Type: p.Source + "_error",
|
|
Project: p.Source,
|
|
Error: p.Message,
|
|
})
|
|
}
|
|
}()
|
|
|
|
// Initialize DNS provider from settings (nil for wildcard mode).
|
|
dnsProvider := initDNSProvider(settings, encKey)
|
|
if dnsProvider != nil {
|
|
dep.SetDNSProvider(dnsProvider)
|
|
slog.Info("DNS provider initialized", "provider", settings.DNSProvider)
|
|
}
|
|
|
|
// Initialize backup engine.
|
|
backupEngine, err := backup.New(db, dbPath, dataDir)
|
|
if err != nil {
|
|
slog.Error("create backup engine", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
dep.SetPreDeployBackuper(backupEngine)
|
|
|
|
// Clean orphaned backup files and prune on startup.
|
|
if cleaned, err := backupEngine.CleanOrphans(); err != nil {
|
|
slog.Warn("backup: clean orphans on startup", "error", err)
|
|
} else if cleaned > 0 {
|
|
slog.Info("backup: cleaned orphaned files on startup", "count", cleaned)
|
|
}
|
|
if settings.BackupRetentionCount > 0 {
|
|
if pruned, err := backupEngine.Prune(settings.BackupRetentionCount); err != nil {
|
|
slog.Warn("backup: prune on startup", "error", err)
|
|
} else if pruned > 0 {
|
|
slog.Info("backup: pruned old backups on startup", "count", pruned)
|
|
}
|
|
}
|
|
|
|
// Schedule autobackup if enabled. Track entry ID for rescheduling.
|
|
var backupCronID cron.EntryID
|
|
scheduleAutobackup := func(enabled bool, intervalHours int) {
|
|
// Remove existing schedule if any.
|
|
if backupCronID != 0 {
|
|
cronScheduler.Remove(backupCronID)
|
|
backupCronID = 0
|
|
slog.Info("autobackup: removed previous schedule")
|
|
}
|
|
if !enabled || intervalHours <= 0 {
|
|
return
|
|
}
|
|
interval := fmt.Sprintf("@every %dh", intervalHours)
|
|
id, err := cronScheduler.AddFunc(interval, func() {
|
|
b, err := backupEngine.CreateBackup("auto")
|
|
if err != nil {
|
|
slog.Error("autobackup failed", "error", err)
|
|
return
|
|
}
|
|
slog.Info("autobackup completed", "id", b.ID, "filename", b.Filename)
|
|
|
|
currentSettings, err := db.GetSettings()
|
|
if err == nil && currentSettings.BackupRetentionCount > 0 {
|
|
backupEngine.Prune(currentSettings.BackupRetentionCount)
|
|
}
|
|
})
|
|
if err != nil {
|
|
slog.Warn("failed to schedule autobackup", "error", err)
|
|
} else {
|
|
backupCronID = id
|
|
slog.Info("autobackup scheduled", "interval_hours", intervalHours)
|
|
}
|
|
}
|
|
scheduleAutobackup(settings.BackupEnabled, settings.BackupIntervalHours)
|
|
|
|
// Initialize resource stats collector. Interval + retention are read from
|
|
// settings on each tick, so configuration changes take effect within one
|
|
// tick without a restart.
|
|
statsCollector := stats.New(db, dockerClient)
|
|
statsCollector.Start()
|
|
|
|
// Initialize static site manager and health checker.
|
|
staticSiteMgr := staticsite.NewManager(db, dockerClient, proxyProvider, eventBus, notifier, encKey)
|
|
webhookHandler.SetSiteSyncTriggerer(staticSiteMgr)
|
|
staticSiteHealth := staticsite.NewHealthChecker(db, dockerClient, staticSiteMgr)
|
|
if err := staticSiteHealth.Start("2m"); err != nil {
|
|
slog.Warn("failed to start static site health checker", "error", err)
|
|
}
|
|
|
|
// Initialize stack (docker-compose) manager. Disabled gracefully if
|
|
// `docker compose` is not available on the host.
|
|
stackWorkDir := filepath.Join(filepath.Dir(dbPath), "stacks")
|
|
stackMgr, err := stack.NewManager(db, stack.NewCompose(""), eventBus, stackWorkDir)
|
|
if err != nil {
|
|
slog.Warn("failed to init stack manager", "error", err)
|
|
stackMgr = nil
|
|
} else if err := stackMgr.Available(context.Background()); err != nil {
|
|
slog.Warn("docker compose not available — stacks feature disabled", "error", err)
|
|
stackMgr = nil
|
|
}
|
|
|
|
// Build API server.
|
|
apiServer := api.NewServer(db, dockerClient, npmClient, proxyProvider, dep, notifier, webhookHandler, eventBus, encKey)
|
|
apiServer.SetStaticSiteManager(staticSiteMgr)
|
|
if stackMgr != nil {
|
|
apiServer.SetStackManager(stackMgr)
|
|
}
|
|
apiServer.SetStaleScanner(staleScanner)
|
|
apiServer.SetBackupEngine(backupEngine)
|
|
apiServer.SetDBPath(dbPath)
|
|
apiServer.SetBackupSettingsChangedCallback(scheduleAutobackup)
|
|
apiServer.SetDNSProvider(dnsProvider)
|
|
apiServer.SetDNSProviderChangedCallback(func(provider dns.Provider) {
|
|
dep.SetDNSProvider(provider)
|
|
})
|
|
apiServer.SetProxyProviderChangedCallback(func(provider proxy.Provider) {
|
|
dep.SetProxyProvider(provider)
|
|
})
|
|
router := apiServer.Router()
|
|
|
|
// Serve embedded static files for the SPA frontend.
|
|
// The embed.FS has "web/build" as a prefix, so we sub it to get the root.
|
|
webBuildFS, err := fs.Sub(tinyforge.WebBuildFS, "web/build")
|
|
if err != nil {
|
|
slog.Warn("embedded frontend not available", "error", err)
|
|
} else {
|
|
staticHandler := api.StaticHandler(webBuildFS)
|
|
// Handle all non-API routes with the static file server.
|
|
router.NotFound(staticHandler.ServeHTTP)
|
|
}
|
|
|
|
// Start HTTP server.
|
|
addr := envOrDefault("LISTEN_ADDR", ":8080")
|
|
httpServer := &http.Server{
|
|
Addr: addr,
|
|
Handler: router,
|
|
ReadTimeout: 30 * time.Second,
|
|
// WriteTimeout is disabled (0) to support SSE long-lived connections.
|
|
// Individual non-SSE handlers should use context timeouts as needed.
|
|
WriteTimeout: 0,
|
|
IdleTimeout: 120 * time.Second,
|
|
}
|
|
|
|
// Graceful shutdown.
|
|
done := make(chan os.Signal, 1)
|
|
signal.Notify(done, os.Interrupt, syscall.SIGTERM)
|
|
|
|
// Allow restore to trigger shutdown.
|
|
apiServer.SetShutdownFunc(func() {
|
|
done <- syscall.SIGTERM
|
|
})
|
|
|
|
go func() {
|
|
slog.Info("Tinyforge started", "addr", addr)
|
|
if err := httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
|
slog.Error("HTTP server error", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
}()
|
|
|
|
<-done
|
|
slog.Info("shutting down...")
|
|
|
|
// Stop accepting new work.
|
|
cronScheduler.Stop()
|
|
eventBus.Unsubscribe(notifySub)
|
|
staticSiteHealth.Stop()
|
|
staleScanner.Stop()
|
|
poller.Stop()
|
|
statsCollector.Stop()
|
|
|
|
// Drain in-progress deploys, site syncs, and notifications.
|
|
dep.Drain()
|
|
webhookHandler.Drain()
|
|
notifier.Drain()
|
|
|
|
// Shut down HTTP server.
|
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
|
defer cancel()
|
|
|
|
if err := httpServer.Shutdown(ctx); err != nil {
|
|
slog.Error("HTTP server shutdown error", "error", err)
|
|
}
|
|
|
|
// Close database.
|
|
if err := db.Close(); err != nil {
|
|
slog.Error("database close error", "error", err)
|
|
}
|
|
|
|
slog.Info("Tinyforge stopped")
|
|
}
|
|
|
|
// envOrDefault reads an environment variable or returns the fallback value.
|
|
func envOrDefault(key, fallback string) string {
|
|
if v := os.Getenv(key); v != "" {
|
|
return v
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
// ensureDefaultAdmin creates a default admin user on first launch if no users exist.
|
|
// The password comes from ADMIN_PASSWORD env var, defaulting to "admin".
|
|
func ensureDefaultAdmin(db *store.Store) error {
|
|
count, err := db.UserCount()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if count > 0 {
|
|
return nil // Users already exist, skip.
|
|
}
|
|
|
|
password := os.Getenv("ADMIN_PASSWORD")
|
|
if password == "" {
|
|
slog.Error("ADMIN_PASSWORD is required on first launch — set it to a secure password")
|
|
os.Exit(1)
|
|
}
|
|
hash, err := auth.HashPassword(password)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
_, err = db.CreateUser(store.User{
|
|
Username: "admin",
|
|
PasswordHash: hash,
|
|
Email: "",
|
|
Role: "admin",
|
|
})
|
|
if err != nil {
|
|
// Ignore duplicate key errors (race condition on concurrent startup).
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
slog.Info("default admin user created", "username", "admin")
|
|
return nil
|
|
}
|
|
|
|
// initDNSProvider creates a DNS provider from settings. Returns nil for wildcard mode.
|
|
func initDNSProvider(settings store.Settings, encKey [32]byte) dns.Provider {
|
|
if settings.WildcardDNS || settings.DNSProvider == "" {
|
|
return nil
|
|
}
|
|
|
|
token := settings.CloudflareAPIToken
|
|
if token != "" {
|
|
decrypted, err := crypto.Decrypt(encKey, token)
|
|
if err != nil {
|
|
slog.Error("dns: failed to decrypt API token", "error", err)
|
|
return nil
|
|
}
|
|
token = decrypted
|
|
}
|
|
|
|
provider, err := dns.NewProvider(settings.DNSProvider, dns.Config{
|
|
Token: token,
|
|
ZoneID: settings.CloudflareZoneID,
|
|
})
|
|
if err != nil {
|
|
slog.Error("dns: failed to create provider", "error", err)
|
|
return nil
|
|
}
|
|
return provider
|
|
}
|