Files
tiny-forge/internal/api/router.go
T
alexei.dolgolyov a4362b842d
Build / build (push) Successful in 11m42s
fix: harden security, fix concurrency bugs, and address review findings
Security:
- rate limit /api/webhook routes per-IP and cap concurrent site syncs
- global SSE connection cap (256) with new sse_gate
- validate ?tail= and cap JSON log responses at 4 MiB
- strip ANSI/CSI/OSC and control bytes from streamed log lines
- redact webhook secret from request log middleware
- scrub host details from /api/health for non-admin viewers
- drop container_id from /api/system/stats/top for non-admins
- generate webhook secrets via crypto/rand; require >=32 chars on insert
- verify iid path consistency in streamContainerLogs
- LimitReader on site webhook body; reject malformed non-empty bodies

Concurrency / correctness:
- stats collector: Stop() no longer hangs without Start(), semaphore
  acquired in parent loop so ctx cancellation short-circuits the queue,
  in-flight tick cancellable via shared base context, zero-ts guard
- webhook handler: replace fire-and-forget goroutine with WaitGroup-tracked
  workers + Drain() wired into graceful shutdown
- $derived(() => ...) mis-idiom fixed in ContainerStats / InstanceCard /
  ProjectCard (returned function instead of value)
- SystemResourcesCard: rename `window` and `t` locals to avoid shadowing
  globalThis.window and the i18n `t` import

Quality / performance:
- replace O(n^2) insertion sort with sort.Slice in stats top
- runMigrations only swallows duplicate-column / already-exists errors
- PruneStatsSamplesBefore wrapped in a transaction
- collapse N+1 in unusedImageStats / pruneImages to one ListAllInstances
  pass; surface DB errors instead of silently treating them as inactive
- run Docker Info + DiskUsage in parallel via errgroup
- container log SSE emits `: ping` heartbeat every 20 s
- imageMatches case-insensitive on registry host (RFC behaviour)
- log warning on invalid stage tag pattern instead of silent skip
- reject malformed non-empty site webhook payloads

Frontend / i18n:
- shared formatBytes utility replaces three local copies
- statsInterval store drives dynamic "no samples / collection disabled"
  copy across ContainerStats and SystemResourcesCard
- top consumers row now shows owner_name (project/stage or site name)
- drop seven `as any` casts on the Settings type; add cloudflare_api_token
  write-only field
- move "Service status", "Docker daemon", "Docker unreachable",
  "Proxy unreachable", "reachable", and "Docker daemon is not reachable."
  strings into en/ru i18n bundles
2026-05-07 00:56:14 +03:00

422 lines
14 KiB
Go

package api
import (
"context"
"log/slog"
"sync"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/auth"
"github.com/alexei/tinyforge/internal/backup"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/dns"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/npm"
"github.com/alexei/tinyforge/internal/proxy"
"github.com/alexei/tinyforge/internal/stack"
"github.com/alexei/tinyforge/internal/stale"
"github.com/alexei/tinyforge/internal/staticsite"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/webhook"
)
// DNSProviderChangedFunc is called when DNS settings change so the caller can
// update the provider on the deployer.
type DNSProviderChangedFunc func(provider dns.Provider)
// Server holds all dependencies for the API layer.
type Server struct {
store *store.Store
docker *docker.Client
npm *npm.Client // optional: only for NPM-specific endpoints (certificates)
proxyProvider proxy.Provider
deployer DeployTriggerer
webhook *webhook.Handler
eventBus *events.Bus
encKey [32]byte
localAuth *auth.LocalAuth
oidcProvider *auth.OIDCProvider
staleScanner *stale.Scanner
dnsProviderMu sync.RWMutex
dnsProvider dns.Provider
onDNSProviderChanged DNSProviderChangedFunc
staticSiteManager *staticsite.Manager
stackManager *stack.Manager
backupEngine *backup.Engine
sseGate *sseGate
dbPath string
shutdownFunc func() // called after restore to trigger graceful shutdown
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
onProxyProviderChanged func(provider proxy.Provider) // called when proxy provider changes
}
// NewServer creates a new API Server with all required dependencies.
func NewServer(
st *store.Store,
dockerClient *docker.Client,
npmClient *npm.Client,
proxyProvider proxy.Provider,
deployer DeployTriggerer,
webhookHandler *webhook.Handler,
eventBus *events.Bus,
encKey [32]byte,
) *Server {
localAuth := auth.NewLocalAuth(encKey)
s := &Server{
store: st,
docker: dockerClient,
npm: npmClient,
proxyProvider: proxyProvider,
deployer: deployer,
webhook: webhookHandler,
eventBus: eventBus,
encKey: encKey,
localAuth: localAuth,
sseGate: newSSEGate(maxConcurrentSSEStreams),
}
// Try to initialize OIDC provider from stored settings.
authSettings, err := st.GetAuthSettings()
if err == nil && authSettings.AuthMode == "oidc" && authSettings.OIDCIssuerURL != "" {
s.initOIDCProvider(context.Background(), authSettings)
}
return s
}
// SetStaticSiteManager sets the static site manager on the server.
func (s *Server) SetStaticSiteManager(mgr *staticsite.Manager) {
s.staticSiteManager = mgr
}
// SetStackManager sets the docker-compose stack manager on the server.
func (s *Server) SetStackManager(mgr *stack.Manager) {
s.stackManager = mgr
}
// SetStaleScanner sets the stale scanner on the server.
// Called after both the API server and scanner are initialized.
func (s *Server) SetStaleScanner(scanner *stale.Scanner) {
s.staleScanner = scanner
}
// SetBackupEngine sets the backup engine on the server.
func (s *Server) SetBackupEngine(engine *backup.Engine) {
s.backupEngine = engine
}
// SetDBPath sets the database file path (needed for restore).
func (s *Server) SetDBPath(path string) {
s.dbPath = path
}
// SetShutdownFunc sets the function called after a restore to trigger graceful shutdown.
func (s *Server) SetShutdownFunc(fn func()) {
s.shutdownFunc = fn
}
// SetBackupSettingsChangedCallback sets the callback for when backup settings change.
func (s *Server) SetBackupSettingsChangedCallback(fn func(enabled bool, intervalHours int)) {
s.onBackupSettingsChanged = fn
}
// SetProxyProviderChangedCallback sets the callback for when the proxy provider changes.
func (s *Server) SetProxyProviderChangedCallback(fn func(provider proxy.Provider)) {
s.onProxyProviderChanged = fn
}
// SetProxyProvider updates the proxy provider at runtime.
func (s *Server) SetProxyProvider(provider proxy.Provider) {
s.proxyProvider = provider
}
// SetDNSProvider sets the current DNS provider on the server.
func (s *Server) SetDNSProvider(provider dns.Provider) {
s.dnsProviderMu.Lock()
defer s.dnsProviderMu.Unlock()
s.dnsProvider = provider
}
// getDNSProviderLocked returns the current DNS provider under read lock.
func (s *Server) getDNSProviderLocked() dns.Provider {
s.dnsProviderMu.RLock()
defer s.dnsProviderMu.RUnlock()
return s.dnsProvider
}
// SetDNSProviderChangedCallback sets the callback for when DNS settings change.
func (s *Server) SetDNSProviderChangedCallback(fn DNSProviderChangedFunc) {
s.onDNSProviderChanged = fn
}
// initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal.
func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
// Decrypt the OIDC client secret if it's encrypted.
clientSecret := as.OIDCClientSecret
if clientSecret != "" {
if decrypted, err := crypto.Decrypt(s.encKey, clientSecret); err == nil {
clientSecret = decrypted
}
// If decrypt fails, assume it's already plaintext (migration scenario).
}
provider, err := auth.NewOIDCProvider(ctx, auth.OIDCConfig{
IssuerURL: as.OIDCIssuerURL,
ClientID: as.OIDCClientID,
ClientSecret: clientSecret,
RedirectURL: as.OIDCRedirectURL,
})
if err != nil {
slog.Warn("failed to initialize OIDC provider", "error", err)
return
}
s.oidcProvider = provider
slog.Info("OIDC provider initialized", "issuer", as.OIDCIssuerURL)
}
// Router returns a chi router with all API routes mounted.
func (s *Server) Router() chi.Router {
r := chi.NewRouter()
// Global middleware.
r.Use(recovery)
r.Use(securityHeaders)
r.Use(logging)
r.Use(cors)
loginLimiter := newRateLimiter()
webhookLimiter := newRateLimiter()
r.Route("/api", func(r chi.Router) {
// JSON content type and body size limit for API routes.
r.Use(jsonContentType)
r.Use(limitBody)
// Public auth endpoints (no auth required).
r.Get("/auth/mode", s.authMode)
r.Post("/auth/login", s.rateLimitedLogin(loginLimiter))
r.Get("/auth/oidc/login", s.oidcLogin)
r.Get("/auth/oidc/callback", s.oidcCallback)
r.Post("/auth/oidc/token", s.oidcExchangeToken)
// Webhook handler (uses its own secret-based auth).
// Per-IP rate limit prevents an attacker who has guessed (or leaked)
// a secret from triggering a deploy storm, and rejects unauthenticated
// brute-force probes over the secret URL space.
r.With(rateLimitMiddleware(webhookLimiter)).Mount("/webhook", s.webhook.Route())
// Protected routes: require valid JWT.
r.Group(func(r chi.Router) {
r.Use(auth.Middleware(s.localAuth))
// Read-only endpoints (any authenticated user).
r.Get("/health", s.getHealth)
r.Get("/auth/me", s.currentUser)
r.Post("/auth/logout", s.logout)
r.Get("/proxies", s.listProxyRoutes)
r.Get("/docker/unused-images", s.unusedImageStats)
r.Get("/projects", s.listProjects)
r.Route("/projects/{id}", func(r chi.Router) {
r.Get("/", s.getProject)
r.Get("/stages/{stage}/env", s.listStageEnv)
r.Get("/stages/{stage}/instances", s.listInstances)
r.Get("/stages/{stage}/instances/{iid}/stats", s.getInstanceStats)
r.Get("/stages/{stage}/instances/{iid}/stats/history", s.getInstanceStatsHistory)
r.Get("/stages/{stage}/instances/{iid}/logs", s.streamContainerLogs)
r.Get("/images", s.listProjectImages)
r.Get("/volumes", s.listVolumes)
r.Get("/volumes/{volId}/browse", s.browseVolume)
r.Get("/volumes/{volId}/download", s.downloadVolume)
// Admin-only project mutations.
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
r.Put("/", s.updateProject)
r.Delete("/", s.deleteProject)
// Per-project webhook URL management.
r.Get("/webhook", s.getProjectWebhook)
r.Post("/webhook/regenerate", s.regenerateProjectWebhook)
// Stage endpoints.
r.Post("/stages", s.createStage)
r.Put("/stages/{stage}", s.updateStage)
r.Delete("/stages/{stage}", s.deleteStage)
// Stage env override endpoints.
r.Post("/stages/{stage}/env", s.createStageEnv)
r.Put("/stages/{stage}/env/{envId}", s.updateStageEnv)
r.Delete("/stages/{stage}/env/{envId}", s.deleteStageEnv)
// Instance endpoints.
r.Post("/stages/{stage}/instances", s.deployInstance)
r.Delete("/stages/{stage}/instances/{iid}", s.removeInstance)
// Instance control endpoints.
r.Post("/stages/{stage}/instances/{iid}/stop", s.stopInstance)
r.Post("/stages/{stage}/instances/{iid}/start", s.startInstance)
r.Post("/stages/{stage}/instances/{iid}/restart", s.restartInstance)
// Volume endpoints.
r.Post("/volumes", s.createVolume)
r.Put("/volumes/{volId}", s.updateVolume)
r.Delete("/volumes/{volId}", s.deleteVolume)
r.Post("/volumes/{volId}/upload", s.uploadToVolume)
})
})
// Stacks (docker-compose).
r.Get("/stacks", s.listStacks)
r.Route("/stacks/{id}", func(r chi.Router) {
r.Get("/", s.getStack)
r.Get("/revisions", s.listStackRevisions)
r.Get("/revisions/{revId}", s.getStackRevision)
r.Get("/services", s.getStackServices)
r.Get("/logs", s.getStackLogs)
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
r.Put("/", s.updateStack)
r.Delete("/", s.deleteStack)
r.Post("/revisions", s.createStackRevision)
r.Post("/rollback/{revId}", s.rollbackStack)
r.Post("/stop", s.stopStack)
r.Post("/start", s.startStack)
})
})
r.With(auth.AdminOnly).Post("/stacks", s.createStack)
// Static sites.
r.Get("/sites", s.listStaticSites)
r.Route("/sites/{id}", func(r chi.Router) {
r.Get("/", s.getStaticSite)
r.Get("/secrets", s.listStaticSiteSecrets)
r.Get("/storage", s.getStaticSiteStorage)
r.Get("/logs", s.streamStaticSiteLogs)
r.Get("/stats", s.getStaticSiteStats)
r.Get("/stats/history", s.getStaticSiteStatsHistory)
// Admin-only mutations.
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
r.Put("/", s.updateStaticSite)
r.Delete("/", s.deleteStaticSite)
r.Post("/deploy", s.deployStaticSite)
r.Post("/stop", s.stopStaticSite)
r.Post("/start", s.startStaticSite)
r.Get("/webhook", s.getStaticSiteWebhook)
r.Post("/webhook/regenerate", s.regenerateStaticSiteWebhook)
r.Post("/secrets", s.createStaticSiteSecret)
r.Put("/secrets/{sid}", s.updateStaticSiteSecret)
r.Delete("/secrets/{sid}", s.deleteStaticSiteSecret)
})
})
r.Get("/deploys", s.listDeploys)
r.Get("/deploys/{id}/logs", s.streamDeployLogs)
r.Get("/events", s.streamEvents)
r.Get("/events/log", s.listEventLog)
r.Get("/events/log/stats", s.getEventLogStats)
r.Get("/registries", s.listRegistries)
r.Route("/registries/{id}", func(r chi.Router) {
r.Get("/tags/*", s.listRegistryTags)
r.Get("/images", s.listRegistryImages)
// Admin-only registry mutations.
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
r.Put("/", s.updateRegistry)
r.Delete("/", s.deleteRegistry)
r.Post("/test", s.testRegistry)
})
})
r.Get("/settings", s.getSettings)
r.Get("/settings/npm-certificates", s.listNpmCertificates)
r.Get("/settings/npm-access-lists", s.listNpmAccessLists)
// Volume scope metadata (read-only).
r.Get("/volumes/scopes", s.listVolumeScopes)
// Stale container endpoints (read).
r.Get("/containers/stale", s.listStaleContainers)
// System resources (read-only).
r.Get("/system/stats", s.getSystemStats)
r.Get("/system/stats/history", s.getSystemStatsHistory)
r.Get("/system/stats/top", s.listTopContainers)
// Admin-only routes: require admin role.
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
// Config export (reveals project/infra details).
r.Get("/config/export", s.exportConfig)
// Event log management.
r.Delete("/events/log/{id}", s.deleteEvent)
r.Delete("/events/log", s.clearEvents)
// Auth management.
r.Get("/auth/settings", s.getAuthSettings)
r.Put("/auth/settings", s.updateAuthSettings)
r.Get("/auth/users", s.listUsers)
r.Post("/auth/users", s.createUser)
r.Put("/auth/users/{uid}", s.updateUser)
r.Put("/auth/users/{uid}/password", s.changePassword)
r.Delete("/auth/users/{uid}", s.deleteUser)
// Project creation.
r.Post("/projects", s.createProject)
// Static site creation and tools.
r.Post("/sites", s.createStaticSite)
r.Post("/sites/test-connection", s.testStaticSiteConnection)
r.Post("/sites/branches", s.listStaticSiteBranches)
r.Post("/sites/tree", s.listStaticSiteTree)
r.Post("/sites/detect-provider", s.detectStaticSiteProvider)
r.Post("/sites/repos", s.listStaticSiteRepos)
// Quick deploy endpoints.
r.Post("/deploy/inspect", s.inspectImage)
r.Post("/deploy/quick", s.quickDeploy)
// Registry creation.
r.Post("/registries", s.createRegistry)
// Stale container cleanup endpoints.
// Bulk route must be registered before parameterized route.
r.Post("/containers/stale/cleanup", s.bulkCleanupStaleContainers)
r.Post("/containers/stale/{id}/cleanup", s.cleanupStaleContainer)
// Settings endpoints.
r.Put("/settings", s.updateSettings)
// Docker management.
r.Post("/docker/prune-images", s.pruneImages)
// NPM connection test.
r.Post("/settings/npm/test", s.testNpmConnection)
// DNS management endpoints.
r.Post("/settings/dns/test", s.testDNSConnection)
r.Post("/settings/dns/zones", s.listDNSZones)
r.Get("/dns/records", s.listDNSRecords)
r.Post("/dns/sync", s.syncDNSRecords)
r.Delete("/dns/records/{fqdn}", s.deleteDNSRecord)
// Backup endpoints.
r.Get("/backups", s.listBackups)
r.Post("/backups", s.triggerBackup)
r.Get("/backups/{id}/download", s.downloadBackup)
r.Delete("/backups/{id}", s.deleteBackup)
r.Post("/backups/{id}/restore", s.restoreBackup)
})
})
})
return r
}