package api import ( "context" "net/http" "time" "github.com/alexei/tinyforge/internal/auth" "github.com/alexei/tinyforge/internal/proxy" ) // healthProbeTimeout caps a single health probe so a stuck dependency does // not hold the polling endpoint open. The UI polls every 30 s, so 8 s leaves // headroom for the ping + Info + NPM list calls. const healthProbeTimeout = 8 * time.Second // nonAdminDockerFields enumerates the fields any authenticated user is // allowed to see — version + connectivity + container counts. Host-detail // fields (kernel, root_dir, hostname, OS, storage driver) are admin-only to // avoid recon information leaks. var nonAdminDockerFields = map[string]bool{ "connected": true, "latency_ms": true, "error": true, "version": true, "api_version": true, "containers": true, "running": true, "paused": true, "stopped": true, "images": true, "ncpu": true, "memory_total": true, } // nonAdminProxyFields are the proxy fields safe to share with non-admins. // Configured URLs and aggregate counts of internal lists/certs are stripped. var nonAdminProxyFields = map[string]bool{ "provider": true, "connected": true, "latency_ms": true, "error": true, "proxy_hosts_managed": true, } // getHealth handles GET /api/health. // // Returns the connectivity state and (when connected) diagnostics for the // Docker daemon and the active proxy provider. Detailed host information // (kernel, root_dir, internal NPM URL, …) is stripped for non-admin users to // avoid leaking infrastructure details to read-only viewers. func (s *Server) getHealth(w http.ResponseWriter, r *http.Request) { ctx, cancel := context.WithTimeout(r.Context(), healthProbeTimeout) defer cancel() claims, _ := auth.ClaimsFromContext(r.Context()) isAdmin := claims.Role == "admin" now := time.Now().UTC().Format(time.RFC3339) result := map[string]any{ "checked_at": now, } // ── Database ───────────────────────────────────────────────────── if err := s.store.DB().PingContext(ctx); err != nil { result["database"] = map[string]any{"connected": false, "error": "database unreachable"} } else { result["database"] = map[string]any{"connected": true} } // ── Docker daemon ──────────────────────────────────────────────── docker := s.dockerHealth(ctx) if !isAdmin { docker = filterFields(docker, nonAdminDockerFields) } result["docker"] = docker // ── Proxy provider ─────────────────────────────────────────────── if s.proxyProvider != nil { proxyInfo := s.proxyHealth(ctx) if !isAdmin { proxyInfo = filterFields(proxyInfo, nonAdminProxyFields) } result["proxy"] = proxyInfo } respondJSON(w, http.StatusOK, result) } // filterFields returns a copy of m containing only the keys present in allow. func filterFields(m map[string]any, allow map[string]bool) map[string]any { out := make(map[string]any, len(allow)) for k, v := range m { if allow[k] { out[k] = v } } return out } // dockerHealth probes the Docker daemon and, if reachable, attaches a full // DaemonInfo snapshot. The caller does not need to error-check the Info() // call — if it fails, the connected flag remains true (ping succeeded) but // the detail fields are simply omitted. func (s *Server) dockerHealth(ctx context.Context) map[string]any { if s.docker == nil { return map[string]any{ "connected": false, "error": "docker client not initialized", } } start := time.Now() if err := s.docker.Ping(ctx); err != nil { return map[string]any{ "connected": false, "error": err.Error(), "latency_ms": time.Since(start).Milliseconds(), } } out := map[string]any{ "connected": true, "latency_ms": time.Since(start).Milliseconds(), } // Info enriches the payload; failures are non-fatal. info, err := s.docker.Info(ctx) if err == nil { if info.Version != "" { out["version"] = info.Version } if info.APIVersion != "" { out["api_version"] = info.APIVersion } if info.OS != "" { out["os"] = info.OS } if info.Arch != "" { out["arch"] = info.Arch } if info.Kernel != "" { out["kernel"] = info.Kernel } if info.OperatingSystem != "" { out["operating_system"] = info.OperatingSystem } if info.StorageDriver != "" { out["storage_driver"] = info.StorageDriver } if info.RootDir != "" { out["root_dir"] = info.RootDir } if info.Name != "" { out["name"] = info.Name } if info.NCPU > 0 { out["ncpu"] = info.NCPU } if info.MemoryTotal > 0 { out["memory_total"] = info.MemoryTotal } out["containers"] = info.Containers out["running"] = info.Running out["paused"] = info.Paused out["stopped"] = info.Stopped out["images"] = info.Images } return out } // proxyHealth probes the configured proxy provider. For NPM, attaches // aggregate counts (proxy hosts, access lists, certificates) which the // dashboard surfaces alongside the connection indicator. func (s *Server) proxyHealth(ctx context.Context) map[string]any { providerName := s.proxyProvider.Name() start := time.Now() err := s.proxyProvider.Ping(ctx) latency := time.Since(start).Milliseconds() if err != nil { return map[string]any{ "provider": providerName, "connected": false, "error": providerName + " unreachable: " + err.Error(), "latency_ms": latency, } } out := map[string]any{ "provider": providerName, "connected": true, "latency_ms": latency, } // Attach configured URL from settings for both NPM and Traefik. if settings, serr := s.store.GetSettings(); serr == nil { switch providerName { case "npm": if settings.NpmURL != "" { out["url"] = settings.NpmURL } case "traefik": if settings.TraefikAPIURL != "" { out["url"] = settings.TraefikAPIURL } } } // NPM-specific aggregates — a quick glance at route/list/cert counts. // These calls require an authenticated NPM session, so we trigger the // provider's auth step first (it's cheap: cached JWT is reused for 1h). if providerName == "npm" && s.npm != nil { if np, ok := s.proxyProvider.(*proxy.NpmProvider); ok { if err := np.Authenticate(ctx); err == nil { if hosts, herr := s.npm.ListProxyHosts(ctx); herr == nil { out["proxy_hosts"] = len(hosts) } if lists, lerr := s.npm.ListAccessLists(ctx); lerr == nil { out["access_lists"] = len(lists) } if certs, cerr := s.npm.ListCertificates(ctx); cerr == nil { out["certificates"] = len(certs) } } } } // Managed-route count — how many of the proxy's routes were deployed // by Tinyforge itself, counting both Docker instances and static sites. // This works for every provider (NPM, Traefik, …) because it reads from // our own store, not the external proxy API. if managed, merr := s.managedRouteCount(); merr == nil { out["proxy_hosts_managed"] = managed } return out } // managedRouteCount returns the number of proxy routes Tinyforge manages, // reading from the unified containers index. The domain argument doesn't // affect the count so we pass an empty string to skip FQDN rendering. func (s *Server) managedRouteCount() (int, error) { routes, err := s.store.ListProxyRoutes("") if err != nil { return 0, err } return len(routes), nil }