Files
tiny-forge/cmd/server/main.go
T
alexei.dolgolyov 234c3c711e
Build / build (push) Successful in 10m43s
feat(static): inline static-source plugin; drop phantom-row adapter
Lift the static-site deploy pipeline from internal/staticsite/manager.go
into internal/workload/plugin/source/static/ so plugin-native static
workloads operate directly on plugin.Workload + the containers table +
workload_env. The cmd/server/static_backend.go phantom-row adapter is
gone; the legacy static_sites table is no longer touched on plugin
deploys.

Backend
- new state.go: runtimeState (last_commit_sha, last_sync_at,
  last_error, status) persisted in containers.extra_json under the
  deterministic row id <workloadID>:site
- per-workload sync.Mutex serializes saveState read-modify-write so
  parallel deploys for the same workload can't race container_id /
  proxy_route_id writes
- extra_json round-trips through map[string]json.RawMessage so
  unknown keys survive — typed runtimeStateKeys are stripped before
  merge so clearing a typed field actually drops the key
- new env.go reads workload_env (replaces static_site_secrets for
  plugin-native sites); decrypt-failure logs and skips one entry
  rather than failing the whole deploy
- new build.go ports prepareDenoBuild + prepareStaticBuild + copyDir;
  copyDir uses filepath.WalkDir + Lstat to refuse symlinks and
  non-regular files
- new deploy.go is the ~300-line core; intent.Reason gates force vs
  skip-if-no-changes; success-path saveState failure rolls back
  container + proxy route and writes "failed" state (no orphans)
- new teardown.go combines Remove + Stop; idempotent on
  never-deployed workloads
- new reconcile.go refreshes container state from Docker; flips
  runtimeState.Status to failed when the container is missing/crashed

Hardening (from go-reviewer + security-reviewer subagent passes;
1 CRITICAL + 5 HIGH + 3 MEDIUM addressed before merge)
- path-traversal defense in all 3 providers (gitea_content,
  github_provider, gitlab_provider): reject tree entries whose
  resolved local path escapes destDir
- verifyDownloadInsideRoot walks the build dir post-download as a
  second line of defense
- sanitizeError redacts the access token, collapses to one line, and
  clamps to 240 bytes before persisting to extra_json or fanning out
  to the notification webhook
- container/image/volume names suffixed with workload-id short prefix
  (workload name is not UNIQUE in schema)
- primaryDomain reads settings.Domain to complete a bare subdomain
  face into a full FQDN (matches legacy Manager behavior)
- ctx-aware health-check sleep
- json.Marshal for event metadata (was fmt.Sprintf JSON template)
- strings.HasPrefix for failed-status detection (was brittle slice
  expression)

Wire-up
- cmd/server/main.go: removed wireStaticBackend(...) call; existing
  blank import on _ ".../source/static" drives init() registration
- cmd/server/static_backend.go deleted

Doc
- WORKLOAD_REFACTOR_TODO: static port marked DONE; next focus is
  the hard legacy cutover (drop /api/projects, /api/stacks,
  /api/sites, /api/stages + their tables, internal/stack +
  internal/staticsite packages, frontend /projects /stacks /sites)

Behavior notes for operators
- plugin-native static workloads no longer write to static_sites;
  legacy /api/sites/* still serves original rows unchanged
- legacy tinyforge.static-site / .static-site-name container labels
  dropped on plugin deploys; canonical tinyforge.workload.id / .kind
  cover ownership
- container/image/volume names gained an 8-char ID suffix
  (e.g. dw-site-mysite-a1b2c3d4); legacy-deployed sites keep the
  old shape until redeployed through the plugin path
2026-05-16 02:56:23 +03:00

558 lines
18 KiB
Go

package main
import (
"context"
"errors"
"fmt"
"io/fs"
"log/slog"
"net/http"
"os"
"os/signal"
"path/filepath"
"syscall"
"time"
"github.com/robfig/cron/v3"
tinyforge "github.com/alexei/tinyforge"
"github.com/alexei/tinyforge/internal/api"
"github.com/alexei/tinyforge/internal/auth"
"github.com/alexei/tinyforge/internal/config"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/backup"
"github.com/alexei/tinyforge/internal/deployer"
"github.com/alexei/tinyforge/internal/dns"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/health"
"github.com/alexei/tinyforge/internal/logging"
"github.com/alexei/tinyforge/internal/logscanner"
"github.com/alexei/tinyforge/internal/notify"
"github.com/alexei/tinyforge/internal/npm"
"github.com/alexei/tinyforge/internal/proxy"
"github.com/alexei/tinyforge/internal/reconciler"
"github.com/alexei/tinyforge/internal/registry"
"github.com/alexei/tinyforge/internal/stale"
"github.com/alexei/tinyforge/internal/stack"
"github.com/alexei/tinyforge/internal/stats"
"github.com/alexei/tinyforge/internal/staticsite"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/webhook"
// Plugin registrations: each blank-import runs its init() and registers
// itself with internal/workload/plugin. Adding a new Source or Trigger
// is a matter of dropping a new package and adding it to this list.
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/compose"
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/image"
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/static"
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/git"
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/manual"
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/registry"
)
func main() {
// Initialize structured JSON logging.
logging.Setup()
dataDir := envOrDefault("DATA_DIR", "./data")
if err := os.MkdirAll(dataDir, 0o755); err != nil {
slog.Error("create data directory", "error", err)
os.Exit(1)
}
// Open database.
dbPath := filepath.Join(dataDir, "tinyforge.db")
db, err := store.New(dbPath)
if err != nil {
slog.Error("open store", "error", err)
os.Exit(1)
}
defer db.Close()
// Derive encryption key from environment (required).
encKey, err := crypto.KeyFromEnv()
if err != nil {
slog.Error("ENCRYPTION_KEY is required — set it to a random 32+ character string")
os.Exit(1)
}
// Import seed config on first launch (idempotent).
seedPath := envOrDefault("SEED_FILE", "./tinyforge.yaml")
if err := config.ImportSeed(db, seedPath); err != nil {
slog.Error("seed import", "error", err)
os.Exit(1)
}
// Backfill workload rows for any project / stack / static site that
// predates the workload refactor. Idempotent — safe on every boot.
if err := db.BackfillWorkloads(); err != nil {
slog.Error("workload backfill", "error", err)
os.Exit(1)
}
// Ensure default admin user exists on first launch.
if err := ensureDefaultAdmin(db); err != nil {
slog.Error("ensure default admin", "error", err)
os.Exit(1)
}
// Initialize Docker client.
dockerClient, err := docker.New()
if err != nil {
slog.Error("create docker client", "error", err)
os.Exit(1)
}
defer dockerClient.Close()
// Start the container index reconciler. Runs one boot pass and then
// ticks every 30s. Boot pass populates the containers table from any
// running containers that predate the workload refactor; subsequent
// ticks catch state drift the deployer didn't witness (e.g., a stack
// service that exited on its own). Stop() cancels its own child context
// before waiting on the goroutine, so a hung `docker ps` doesn't block
// shutdown.
rec := reconciler.New(db, dockerClient, 30*time.Second)
rec.Start(context.Background())
defer rec.Stop()
// The plugin pass is wired after the deployer is constructed (below);
// the reconciler tolerates a nil dispatcher until then. SetPluginReconciler
// is safe to call at any time, including mid-tick.
// Read settings for NPM URL and polling interval.
settings, err := db.GetSettings()
if err != nil {
slog.Error("get settings", "error", err)
os.Exit(1)
}
// Initialize NPM client (used for NPM-specific endpoints like certificates).
npmURL := envOrDefault("NPM_URL", settings.NpmURL)
npmClient := npm.New(npmURL)
// Build proxy provider based on settings.
var proxyProvider proxy.Provider
switch settings.ProxyProvider {
case "none":
proxyProvider = proxy.NewNoneProvider()
slog.Info("proxy provider: none")
case "traefik":
proxyProvider = proxy.NewTraefikProvider(
settings.TraefikEntrypoint,
settings.TraefikCertResolver,
settings.TraefikNetwork,
settings.TraefikAPIURL,
)
slog.Info("proxy provider: traefik", "entrypoint", settings.TraefikEntrypoint)
default:
// Default to NPM for backward compatibility (including "npm" and empty string).
npmPassword := ""
if settings.NpmPassword != "" {
decrypted, err := crypto.Decrypt(encKey, settings.NpmPassword)
if err != nil {
slog.Warn("failed to decrypt NPM password for proxy provider", "error", err)
} else {
npmPassword = decrypted
}
}
proxyProvider = proxy.NewNpmProvider(npmClient, settings.NpmEmail, npmPassword)
slog.Info("proxy provider: npm", "url", npmURL)
}
// Initialize services.
healthChecker := health.New()
notifier := notify.New()
eventBus := events.New()
// Auto-persist warn/error events from the event bus to the database.
stopLogger := eventBus.RegisterPersistentLogger(func(source, severity, message, metadata string) (int64, string, error) {
evt, err := db.InsertEvent(store.EventLog{
Source: source,
Severity: severity,
Message: message,
Metadata: metadata,
})
if err != nil {
return 0, "", err
}
return evt.ID, evt.CreatedAt, nil
})
defer stopLogger()
// Event-trigger dispatcher: consume EventLog publishes off the bus
// and fan out to operator-configured webhook actions. Loop-prevention
// is structural — the dispatcher never writes back to event_log; all
// delivery outcomes land in notifier audit logging.
stopTriggerDispatcher := events.RegisterEventTriggerDispatcher(eventBus, db, notifier)
defer stopTriggerDispatcher()
dep := deployer.New(dockerClient, proxyProvider, db, healthChecker, notifier, eventBus, encKey)
rec.SetPluginReconciler(dep)
// Initialize webhook handler. Per-project and per-site secrets are stored
// on their respective rows; the static-site triggerer is wired in below
// once the site manager has been constructed.
webhookHandler := webhook.NewHandler(db, dep, nil)
// Plugin-pipeline dispatcher for /api/webhook/workloads/{secret}.
// Wired here so the same *deployer.Deployer serves both legacy and
// plugin-native paths from one place.
webhookHandler.SetPluginDispatcher(dep)
// Initialize registry poller.
poller := registry.NewPoller(db, dep, encKey)
pollingInterval := envOrDefault("POLLING_INTERVAL", settings.PollingInterval)
if pollingInterval != "" {
if err := poller.Start(pollingInterval); err != nil {
slog.Warn("failed to start poller", "error", err)
}
}
// Initialize stale container scanner.
staleScanner := stale.New(db, dockerClient, eventBus)
if err := staleScanner.Start("1h"); err != nil {
slog.Warn("failed to start stale scanner", "error", err)
}
// Start daily event log pruning cron job.
cronScheduler := cron.New()
if _, err := cronScheduler.AddFunc("@daily", func() {
pruned, err := db.PruneEvents(30)
if err != nil {
slog.Error("event log prune failed", "error", err)
return
}
if pruned > 0 {
slog.Info("pruned old event log entries", "count", pruned)
}
}); err != nil {
slog.Warn("failed to schedule event prune cron", "error", err)
}
// Webhook delivery log: keep 14 days of audit trail. Same daily cadence
// so an admin always has a recent window for debugging without
// unbounded growth on a noisy CI.
if _, err := cronScheduler.AddFunc("@daily", func() {
cutoff := time.Now().UTC().AddDate(0, 0, -14).Format("2006-01-02 15:04:05")
pruned, err := db.PruneWebhookDeliveriesBefore(cutoff)
if err != nil {
slog.Error("webhook delivery prune failed", "error", err)
return
}
if pruned > 0 {
slog.Info("pruned old webhook deliveries", "count", pruned)
}
}); err != nil {
slog.Warn("failed to schedule webhook delivery prune cron", "error", err)
}
cronScheduler.Start()
// Subscribe to error events and forward notifications.
notifySub := eventBus.Subscribe(func(evt events.Event) bool {
if evt.Type != events.EventLog {
return false
}
p, ok := evt.Payload.(events.EventLogPayload)
if !ok {
return false
}
return p.Severity == "error"
})
go func() {
for evt := range notifySub {
p, ok := evt.Payload.(events.EventLogPayload)
if !ok {
continue
}
currentSettings, err := db.GetSettings()
if err != nil || currentSettings.NotificationURL == "" {
continue
}
notifier.SendSigned(currentSettings.NotificationURL, currentSettings.NotificationSecret, notify.TierSettings, notify.Event{
Type: p.Source + "_error",
Project: p.Source,
Error: p.Message,
})
}
}()
// Initialize DNS provider from settings (nil for wildcard mode).
dnsProvider := initDNSProvider(settings, encKey)
if dnsProvider != nil {
dep.SetDNSProvider(dnsProvider)
slog.Info("DNS provider initialized", "provider", settings.DNSProvider)
}
// Initialize backup engine.
backupEngine, err := backup.New(db, dbPath, dataDir)
if err != nil {
slog.Error("create backup engine", "error", err)
os.Exit(1)
}
dep.SetPreDeployBackuper(backupEngine)
// Clean orphaned backup files and prune on startup.
if cleaned, err := backupEngine.CleanOrphans(); err != nil {
slog.Warn("backup: clean orphans on startup", "error", err)
} else if cleaned > 0 {
slog.Info("backup: cleaned orphaned files on startup", "count", cleaned)
}
if settings.BackupRetentionCount > 0 {
if pruned, err := backupEngine.Prune(settings.BackupRetentionCount); err != nil {
slog.Warn("backup: prune on startup", "error", err)
} else if pruned > 0 {
slog.Info("backup: pruned old backups on startup", "count", pruned)
}
}
// Schedule autobackup if enabled. Track entry ID for rescheduling.
var backupCronID cron.EntryID
scheduleAutobackup := func(enabled bool, intervalHours int) {
// Remove existing schedule if any.
if backupCronID != 0 {
cronScheduler.Remove(backupCronID)
backupCronID = 0
slog.Info("autobackup: removed previous schedule")
}
if !enabled || intervalHours <= 0 {
return
}
interval := fmt.Sprintf("@every %dh", intervalHours)
id, err := cronScheduler.AddFunc(interval, func() {
b, err := backupEngine.CreateBackup("auto")
if err != nil {
slog.Error("autobackup failed", "error", err)
return
}
slog.Info("autobackup completed", "id", b.ID, "filename", b.Filename)
currentSettings, err := db.GetSettings()
if err == nil && currentSettings.BackupRetentionCount > 0 {
backupEngine.Prune(currentSettings.BackupRetentionCount)
}
})
if err != nil {
slog.Warn("failed to schedule autobackup", "error", err)
} else {
backupCronID = id
slog.Info("autobackup scheduled", "interval_hours", intervalHours)
}
}
scheduleAutobackup(settings.BackupEnabled, settings.BackupIntervalHours)
// Initialize resource stats collector. Interval + retention are read from
// settings on each tick, so configuration changes take effect within one
// tick without a restart.
statsCollector := stats.New(db, dockerClient)
statsCollector.Start()
// Initialize static site manager and health checker.
staticSiteMgr := staticsite.NewManager(db, dockerClient, proxyProvider, eventBus, notifier, encKey)
webhookHandler.SetSiteSyncTriggerer(staticSiteMgr)
// The plugin static source registers itself eagerly in its init()
// now that the deploy pipeline is implemented inline (see
// internal/workload/plugin/source/static). The legacy Manager kept
// here keeps the /api/sites/* HTTP routes alive during the cutover
// window.
staticSiteHealth := staticsite.NewHealthChecker(db, dockerClient, staticSiteMgr)
if err := staticSiteHealth.Start("2m"); err != nil {
slog.Warn("failed to start static site health checker", "error", err)
}
// Initialize stack (docker-compose) manager. Disabled gracefully if
// `docker compose` is not available on the host.
stackWorkDir := filepath.Join(filepath.Dir(dbPath), "stacks")
stackMgr, err := stack.NewManager(db, stack.NewCompose(""), eventBus, stackWorkDir)
if err != nil {
slog.Warn("failed to init stack manager", "error", err)
stackMgr = nil
} else if err := stackMgr.Available(context.Background()); err != nil {
slog.Warn("docker compose not available — stacks feature disabled", "error", err)
stackMgr = nil
}
// Log-scan manager: tails running containers and emits event_log
// entries when log lines match operator-configured regex rules.
// Start before the API server is wired so the reload callback can
// be plugged in via SetLogScanReloader.
logScanMgr := logscanner.NewManager(logscanner.Config{
Rules: db,
Containers: db,
Docker: dockerClient,
Events: db,
Bus: eventBus,
PollInterval: 5 * time.Second,
})
// Manager owns its own cancellation; Stop() drives the loop and
// every tail to exit. Using Background here matches the
// reconciler + stale-scanner pattern elsewhere in this file.
if err := logScanMgr.Start(context.Background()); err != nil {
slog.Warn("logscanner: initial rule load failed", "error", err)
}
defer logScanMgr.Stop()
// Build API server.
apiServer := api.NewServer(db, dockerClient, npmClient, proxyProvider, dep, notifier, webhookHandler, eventBus, encKey)
apiServer.SetStaticSiteManager(staticSiteMgr)
if stackMgr != nil {
apiServer.SetStackManager(stackMgr)
}
apiServer.SetStaleScanner(staleScanner)
apiServer.SetLogScanReloader(logScanMgr)
apiServer.SetBackupEngine(backupEngine)
apiServer.SetDBPath(dbPath)
apiServer.SetBackupSettingsChangedCallback(scheduleAutobackup)
apiServer.SetDNSProvider(dnsProvider)
apiServer.SetDNSProviderChangedCallback(func(provider dns.Provider) {
dep.SetDNSProvider(provider)
})
apiServer.SetProxyProviderChangedCallback(func(provider proxy.Provider) {
dep.SetProxyProvider(provider)
})
router := apiServer.Router()
// Serve embedded static files for the SPA frontend.
// The embed.FS has "web/build" as a prefix, so we sub it to get the root.
webBuildFS, err := fs.Sub(tinyforge.WebBuildFS, "web/build")
if err != nil {
slog.Warn("embedded frontend not available", "error", err)
} else {
staticHandler := api.StaticHandler(webBuildFS)
// Handle all non-API routes with the static file server.
router.NotFound(staticHandler.ServeHTTP)
}
// Start HTTP server.
addr := envOrDefault("LISTEN_ADDR", ":8080")
httpServer := &http.Server{
Addr: addr,
Handler: router,
ReadTimeout: 30 * time.Second,
// WriteTimeout is disabled (0) to support SSE long-lived connections.
// Individual non-SSE handlers should use context timeouts as needed.
WriteTimeout: 0,
IdleTimeout: 120 * time.Second,
}
// Graceful shutdown.
done := make(chan os.Signal, 1)
signal.Notify(done, os.Interrupt, syscall.SIGTERM)
// Allow restore to trigger shutdown.
apiServer.SetShutdownFunc(func() {
done <- syscall.SIGTERM
})
go func() {
slog.Info("Tinyforge started", "addr", addr)
if err := httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
slog.Error("HTTP server error", "error", err)
os.Exit(1)
}
}()
<-done
slog.Info("shutting down...")
// Stop accepting new work.
cronScheduler.Stop()
eventBus.Unsubscribe(notifySub)
staticSiteHealth.Stop()
staleScanner.Stop()
poller.Stop()
statsCollector.Stop()
// Drain in-progress deploys, site syncs, and notifications.
dep.Drain()
webhookHandler.Drain()
notifier.Drain()
// Shut down HTTP server.
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := httpServer.Shutdown(ctx); err != nil {
slog.Error("HTTP server shutdown error", "error", err)
}
// Close database.
if err := db.Close(); err != nil {
slog.Error("database close error", "error", err)
}
slog.Info("Tinyforge stopped")
}
// envOrDefault reads an environment variable or returns the fallback value.
func envOrDefault(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
// ensureDefaultAdmin creates a default admin user on first launch if no users exist.
// The password comes from ADMIN_PASSWORD env var, defaulting to "admin".
func ensureDefaultAdmin(db *store.Store) error {
count, err := db.UserCount()
if err != nil {
return err
}
if count > 0 {
return nil // Users already exist, skip.
}
password := os.Getenv("ADMIN_PASSWORD")
if password == "" {
slog.Error("ADMIN_PASSWORD is required on first launch — set it to a secure password")
os.Exit(1)
}
hash, err := auth.HashPassword(password)
if err != nil {
return err
}
_, err = db.CreateUser(store.User{
Username: "admin",
PasswordHash: hash,
Email: "",
Role: "admin",
})
if err != nil {
// Ignore duplicate key errors (race condition on concurrent startup).
if errors.Is(err, store.ErrNotFound) {
return nil
}
return err
}
slog.Info("default admin user created", "username", "admin")
return nil
}
// initDNSProvider creates a DNS provider from settings. Returns nil for wildcard mode.
func initDNSProvider(settings store.Settings, encKey [32]byte) dns.Provider {
if settings.WildcardDNS || settings.DNSProvider == "" {
return nil
}
token := settings.CloudflareAPIToken
if token != "" {
decrypted, err := crypto.Decrypt(encKey, token)
if err != nil {
slog.Error("dns: failed to decrypt API token", "error", err)
return nil
}
token = decrypted
}
provider, err := dns.NewProvider(settings.DNSProvider, dns.Config{
Token: token,
ZoneID: settings.CloudflareZoneID,
})
if err != nil {
slog.Error("dns: failed to create provider", "error", err)
return nil
}
return provider
}