a4362b842d
Build / build (push) Successful in 11m42s
Security: - rate limit /api/webhook routes per-IP and cap concurrent site syncs - global SSE connection cap (256) with new sse_gate - validate ?tail= and cap JSON log responses at 4 MiB - strip ANSI/CSI/OSC and control bytes from streamed log lines - redact webhook secret from request log middleware - scrub host details from /api/health for non-admin viewers - drop container_id from /api/system/stats/top for non-admins - generate webhook secrets via crypto/rand; require >=32 chars on insert - verify iid path consistency in streamContainerLogs - LimitReader on site webhook body; reject malformed non-empty bodies Concurrency / correctness: - stats collector: Stop() no longer hangs without Start(), semaphore acquired in parent loop so ctx cancellation short-circuits the queue, in-flight tick cancellable via shared base context, zero-ts guard - webhook handler: replace fire-and-forget goroutine with WaitGroup-tracked workers + Drain() wired into graceful shutdown - $derived(() => ...) mis-idiom fixed in ContainerStats / InstanceCard / ProjectCard (returned function instead of value) - SystemResourcesCard: rename `window` and `t` locals to avoid shadowing globalThis.window and the i18n `t` import Quality / performance: - replace O(n^2) insertion sort with sort.Slice in stats top - runMigrations only swallows duplicate-column / already-exists errors - PruneStatsSamplesBefore wrapped in a transaction - collapse N+1 in unusedImageStats / pruneImages to one ListAllInstances pass; surface DB errors instead of silently treating them as inactive - run Docker Info + DiskUsage in parallel via errgroup - container log SSE emits `: ping` heartbeat every 20 s - imageMatches case-insensitive on registry host (RFC behaviour) - log warning on invalid stage tag pattern instead of silent skip - reject malformed non-empty site webhook payloads Frontend / i18n: - shared formatBytes utility replaces three local copies - statsInterval store drives dynamic "no samples / collection disabled" copy across ContainerStats and SystemResourcesCard - top consumers row now shows owner_name (project/stage or site name) - drop seven `as any` casts on the Settings type; add cloudflare_api_token write-only field - move "Service status", "Docker daemon", "Docker unreachable", "Proxy unreachable", "reachable", and "Docker daemon is not reachable." strings into en/ru i18n bundles
256 lines
7.6 KiB
Go
256 lines
7.6 KiB
Go
package api
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/alexei/tinyforge/internal/auth"
|
|
"github.com/alexei/tinyforge/internal/proxy"
|
|
)
|
|
|
|
// healthProbeTimeout caps a single health probe so a stuck dependency does
|
|
// not hold the polling endpoint open. The UI polls every 30 s, so 8 s leaves
|
|
// headroom for the ping + Info + NPM list calls.
|
|
const healthProbeTimeout = 8 * time.Second
|
|
|
|
// nonAdminDockerFields enumerates the fields any authenticated user is
|
|
// allowed to see — version + connectivity + container counts. Host-detail
|
|
// fields (kernel, root_dir, hostname, OS, storage driver) are admin-only to
|
|
// avoid recon information leaks.
|
|
var nonAdminDockerFields = map[string]bool{
|
|
"connected": true,
|
|
"latency_ms": true,
|
|
"error": true,
|
|
"version": true,
|
|
"api_version": true,
|
|
"containers": true,
|
|
"running": true,
|
|
"paused": true,
|
|
"stopped": true,
|
|
"images": true,
|
|
"ncpu": true,
|
|
"memory_total": true,
|
|
}
|
|
|
|
// nonAdminProxyFields are the proxy fields safe to share with non-admins.
|
|
// Configured URLs and aggregate counts of internal lists/certs are stripped.
|
|
var nonAdminProxyFields = map[string]bool{
|
|
"provider": true,
|
|
"connected": true,
|
|
"latency_ms": true,
|
|
"error": true,
|
|
"proxy_hosts_managed": true,
|
|
}
|
|
|
|
// getHealth handles GET /api/health.
|
|
//
|
|
// Returns the connectivity state and (when connected) diagnostics for the
|
|
// Docker daemon and the active proxy provider. Detailed host information
|
|
// (kernel, root_dir, internal NPM URL, …) is stripped for non-admin users to
|
|
// avoid leaking infrastructure details to read-only viewers.
|
|
func (s *Server) getHealth(w http.ResponseWriter, r *http.Request) {
|
|
ctx, cancel := context.WithTimeout(r.Context(), healthProbeTimeout)
|
|
defer cancel()
|
|
|
|
claims, _ := auth.ClaimsFromContext(r.Context())
|
|
isAdmin := claims.Role == "admin"
|
|
|
|
now := time.Now().UTC().Format(time.RFC3339)
|
|
result := map[string]any{
|
|
"checked_at": now,
|
|
}
|
|
|
|
// ── Database ─────────────────────────────────────────────────────
|
|
if err := s.store.DB().PingContext(ctx); err != nil {
|
|
result["database"] = map[string]any{"connected": false, "error": "database unreachable"}
|
|
} else {
|
|
result["database"] = map[string]any{"connected": true}
|
|
}
|
|
|
|
// ── Docker daemon ────────────────────────────────────────────────
|
|
docker := s.dockerHealth(ctx)
|
|
if !isAdmin {
|
|
docker = filterFields(docker, nonAdminDockerFields)
|
|
}
|
|
result["docker"] = docker
|
|
|
|
// ── Proxy provider ───────────────────────────────────────────────
|
|
if s.proxyProvider != nil {
|
|
proxyInfo := s.proxyHealth(ctx)
|
|
if !isAdmin {
|
|
proxyInfo = filterFields(proxyInfo, nonAdminProxyFields)
|
|
}
|
|
result["proxy"] = proxyInfo
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, result)
|
|
}
|
|
|
|
// filterFields returns a copy of m containing only the keys present in allow.
|
|
func filterFields(m map[string]any, allow map[string]bool) map[string]any {
|
|
out := make(map[string]any, len(allow))
|
|
for k, v := range m {
|
|
if allow[k] {
|
|
out[k] = v
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// dockerHealth probes the Docker daemon and, if reachable, attaches a full
|
|
// DaemonInfo snapshot. The caller does not need to error-check the Info()
|
|
// call — if it fails, the connected flag remains true (ping succeeded) but
|
|
// the detail fields are simply omitted.
|
|
func (s *Server) dockerHealth(ctx context.Context) map[string]any {
|
|
if s.docker == nil {
|
|
return map[string]any{
|
|
"connected": false,
|
|
"error": "docker client not initialized",
|
|
}
|
|
}
|
|
|
|
start := time.Now()
|
|
if err := s.docker.Ping(ctx); err != nil {
|
|
return map[string]any{
|
|
"connected": false,
|
|
"error": err.Error(),
|
|
"latency_ms": time.Since(start).Milliseconds(),
|
|
}
|
|
}
|
|
|
|
out := map[string]any{
|
|
"connected": true,
|
|
"latency_ms": time.Since(start).Milliseconds(),
|
|
}
|
|
|
|
// Info enriches the payload; failures are non-fatal.
|
|
info, err := s.docker.Info(ctx)
|
|
if err == nil {
|
|
if info.Version != "" {
|
|
out["version"] = info.Version
|
|
}
|
|
if info.APIVersion != "" {
|
|
out["api_version"] = info.APIVersion
|
|
}
|
|
if info.OS != "" {
|
|
out["os"] = info.OS
|
|
}
|
|
if info.Arch != "" {
|
|
out["arch"] = info.Arch
|
|
}
|
|
if info.Kernel != "" {
|
|
out["kernel"] = info.Kernel
|
|
}
|
|
if info.OperatingSystem != "" {
|
|
out["operating_system"] = info.OperatingSystem
|
|
}
|
|
if info.StorageDriver != "" {
|
|
out["storage_driver"] = info.StorageDriver
|
|
}
|
|
if info.RootDir != "" {
|
|
out["root_dir"] = info.RootDir
|
|
}
|
|
if info.Name != "" {
|
|
out["name"] = info.Name
|
|
}
|
|
if info.NCPU > 0 {
|
|
out["ncpu"] = info.NCPU
|
|
}
|
|
if info.MemoryTotal > 0 {
|
|
out["memory_total"] = info.MemoryTotal
|
|
}
|
|
out["containers"] = info.Containers
|
|
out["running"] = info.Running
|
|
out["paused"] = info.Paused
|
|
out["stopped"] = info.Stopped
|
|
out["images"] = info.Images
|
|
}
|
|
|
|
return out
|
|
}
|
|
|
|
// proxyHealth probes the configured proxy provider. For NPM, attaches
|
|
// aggregate counts (proxy hosts, access lists, certificates) which the
|
|
// dashboard surfaces alongside the connection indicator.
|
|
func (s *Server) proxyHealth(ctx context.Context) map[string]any {
|
|
providerName := s.proxyProvider.Name()
|
|
|
|
start := time.Now()
|
|
err := s.proxyProvider.Ping(ctx)
|
|
latency := time.Since(start).Milliseconds()
|
|
|
|
if err != nil {
|
|
return map[string]any{
|
|
"provider": providerName,
|
|
"connected": false,
|
|
"error": providerName + " unreachable: " + err.Error(),
|
|
"latency_ms": latency,
|
|
}
|
|
}
|
|
|
|
out := map[string]any{
|
|
"provider": providerName,
|
|
"connected": true,
|
|
"latency_ms": latency,
|
|
}
|
|
|
|
// Attach configured URL from settings for both NPM and Traefik.
|
|
if settings, serr := s.store.GetSettings(); serr == nil {
|
|
switch providerName {
|
|
case "npm":
|
|
if settings.NpmURL != "" {
|
|
out["url"] = settings.NpmURL
|
|
}
|
|
case "traefik":
|
|
if settings.TraefikAPIURL != "" {
|
|
out["url"] = settings.TraefikAPIURL
|
|
}
|
|
}
|
|
}
|
|
|
|
// NPM-specific aggregates — a quick glance at route/list/cert counts.
|
|
// These calls require an authenticated NPM session, so we trigger the
|
|
// provider's auth step first (it's cheap: cached JWT is reused for 1h).
|
|
if providerName == "npm" && s.npm != nil {
|
|
if np, ok := s.proxyProvider.(*proxy.NpmProvider); ok {
|
|
if err := np.Authenticate(ctx); err == nil {
|
|
if hosts, herr := s.npm.ListProxyHosts(ctx); herr == nil {
|
|
out["proxy_hosts"] = len(hosts)
|
|
}
|
|
if lists, lerr := s.npm.ListAccessLists(ctx); lerr == nil {
|
|
out["access_lists"] = len(lists)
|
|
}
|
|
if certs, cerr := s.npm.ListCertificates(ctx); cerr == nil {
|
|
out["certificates"] = len(certs)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Managed-route count — how many of the proxy's routes were deployed
|
|
// by Tinyforge itself, counting both Docker instances and static sites.
|
|
// This works for every provider (NPM, Traefik, …) because it reads from
|
|
// our own store, not the external proxy API.
|
|
if managed, merr := s.managedRouteCount(); merr == nil {
|
|
out["proxy_hosts_managed"] = managed
|
|
}
|
|
|
|
return out
|
|
}
|
|
|
|
// managedRouteCount returns the number of proxy routes Tinyforge manages
|
|
// (Docker instances + static sites combined). The domain argument doesn't
|
|
// affect the count so we pass an empty string to skip FQDN rendering.
|
|
func (s *Server) managedRouteCount() (int, error) {
|
|
instanceRoutes, err := s.store.ListProxyRoutes("")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
siteRoutes, err := s.store.ListStaticSiteProxyRoutes("")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return len(instanceRoutes) + len(siteRoutes), nil
|
|
}
|