feat(observability): phase 3 - direct proxy creation with validation

Add standalone proxy management:
- Multi-step validation pipeline (DNS, TCP, HTTP) with diagnostic hints
- Proxy lifecycle: create/update/delete via NPM API with SSL auto-assign
- Periodic health monitoring (5min) with event log on status transitions
- Unified /api/proxies/all endpoint merging standalone + managed proxies
- Frontend types and API functions for downstream UI phases
This commit is contained in:
2026-03-30 11:19:55 +03:00
parent aefecdffdf
commit 7a85441b81
9 changed files with 1076 additions and 1 deletions
+192
View File
@@ -0,0 +1,192 @@
package api
import (
"context"
"net/http"
"time"
"github.com/go-chi/chi/v5"
"github.com/alexei/docker-watcher/internal/proxy"
)
// validateProxy runs the validation pipeline without creating a proxy.
// POST /api/proxies/validate
func (s *Server) validateProxy(w http.ResponseWriter, r *http.Request) {
var req struct {
Host string `json:"host"`
Port int `json:"port"`
}
if !decodeJSON(w, r, &req) {
return
}
if req.Host == "" {
respondError(w, http.StatusBadRequest, "host is required")
return
}
if req.Port < 1 || req.Port > 65535 {
respondError(w, http.StatusBadRequest, "port must be between 1 and 65535")
return
}
ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second)
defer cancel()
result := proxy.ValidateDestination(ctx, req.Host, req.Port)
respondJSON(w, http.StatusOK, result)
}
// createProxy creates a new standalone proxy.
// POST /api/proxies
func (s *Server) createProxy(w http.ResponseWriter, r *http.Request) {
if s.proxyManager == nil {
respondError(w, http.StatusServiceUnavailable, "proxy manager not configured")
return
}
var req proxy.CreateProxyRequest
if !decodeJSON(w, r, &req) {
return
}
if req.Domain == "" {
respondError(w, http.StatusBadRequest, "domain is required")
return
}
if req.DestinationURL == "" {
respondError(w, http.StatusBadRequest, "destination_url is required")
return
}
if req.DestinationPort < 1 || req.DestinationPort > 65535 {
respondError(w, http.StatusBadRequest, "destination_port must be between 1 and 65535")
return
}
p, err := s.proxyManager.CreateProxy(r.Context(), req)
if err != nil {
respondError(w, http.StatusInternalServerError, err.Error())
return
}
respondJSON(w, http.StatusCreated, p)
}
// listProxies returns all standalone proxies.
// GET /api/proxies
func (s *Server) listProxies(w http.ResponseWriter, r *http.Request) {
if s.proxyManager == nil {
respondError(w, http.StatusServiceUnavailable, "proxy manager not configured")
return
}
proxies, err := s.proxyManager.ListProxies()
if err != nil {
respondError(w, http.StatusInternalServerError, err.Error())
return
}
respondJSON(w, http.StatusOK, proxies)
}
// getProxy returns a single standalone proxy.
// GET /api/proxies/{id}
func (s *Server) getProxy(w http.ResponseWriter, r *http.Request) {
if s.proxyManager == nil {
respondError(w, http.StatusServiceUnavailable, "proxy manager not configured")
return
}
id := chi.URLParam(r, "id")
p, err := s.proxyManager.GetProxy(id)
if err != nil {
if proxy.IsNotFound(err) {
respondNotFound(w, "proxy")
return
}
respondError(w, http.StatusInternalServerError, err.Error())
return
}
respondJSON(w, http.StatusOK, p)
}
// updateProxy updates an existing standalone proxy.
// PUT /api/proxies/{id}
func (s *Server) updateProxy(w http.ResponseWriter, r *http.Request) {
if s.proxyManager == nil {
respondError(w, http.StatusServiceUnavailable, "proxy manager not configured")
return
}
id := chi.URLParam(r, "id")
var req proxy.UpdateProxyRequest
if !decodeJSON(w, r, &req) {
return
}
if req.Domain == "" {
respondError(w, http.StatusBadRequest, "domain is required")
return
}
if req.DestinationURL == "" {
respondError(w, http.StatusBadRequest, "destination_url is required")
return
}
if req.DestinationPort < 1 || req.DestinationPort > 65535 {
respondError(w, http.StatusBadRequest, "destination_port must be between 1 and 65535")
return
}
p, err := s.proxyManager.UpdateProxy(r.Context(), id, req)
if err != nil {
if proxy.IsNotFound(err) {
respondNotFound(w, "proxy")
return
}
respondError(w, http.StatusInternalServerError, err.Error())
return
}
respondJSON(w, http.StatusOK, p)
}
// deleteProxy removes a standalone proxy.
// DELETE /api/proxies/{id}
func (s *Server) deleteProxy(w http.ResponseWriter, r *http.Request) {
if s.proxyManager == nil {
respondError(w, http.StatusServiceUnavailable, "proxy manager not configured")
return
}
id := chi.URLParam(r, "id")
if err := s.proxyManager.DeleteProxy(r.Context(), id); err != nil {
if proxy.IsNotFound(err) {
respondNotFound(w, "proxy")
return
}
respondError(w, http.StatusInternalServerError, err.Error())
return
}
respondJSON(w, http.StatusOK, map[string]string{"deleted": id})
}
// listAllProxies returns a merged view of standalone and deploy-managed proxies.
// GET /api/proxies/all
func (s *Server) listAllProxies(w http.ResponseWriter, r *http.Request) {
if s.proxyManager == nil {
respondError(w, http.StatusServiceUnavailable, "proxy manager not configured")
return
}
views, err := s.proxyManager.ListAllProxies()
if err != nil {
respondError(w, http.StatusInternalServerError, err.Error())
return
}
respondJSON(w, http.StatusOK, views)
}
+23
View File
@@ -11,6 +11,7 @@ import (
"github.com/alexei/docker-watcher/internal/docker"
"github.com/alexei/docker-watcher/internal/events"
"github.com/alexei/docker-watcher/internal/npm"
"github.com/alexei/docker-watcher/internal/proxy"
"github.com/alexei/docker-watcher/internal/stale"
"github.com/alexei/docker-watcher/internal/store"
"github.com/alexei/docker-watcher/internal/webhook"
@@ -28,6 +29,7 @@ type Server struct {
localAuth *auth.LocalAuth
oidcProvider *auth.OIDCProvider
staleScanner *stale.Scanner
proxyManager *proxy.Manager
}
// NewServer creates a new API Server with all required dependencies.
@@ -68,6 +70,12 @@ func (s *Server) SetStaleScanner(scanner *stale.Scanner) {
s.staleScanner = scanner
}
// SetProxyManager sets the proxy manager on the server.
// Called after both the API server and proxy manager are initialized.
func (s *Server) SetProxyManager(pm *proxy.Manager) {
s.proxyManager = pm
}
// initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal.
func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
// Decrypt the OIDC client secret if it's encrypted.
@@ -146,10 +154,25 @@ func (s *Server) Router() chi.Router {
// Stale container endpoints.
r.Get("/containers/stale", s.listStaleContainers)
// Proxy endpoints (read-only for any authenticated user).
r.Get("/proxies", s.listProxies)
r.Get("/proxies/all", s.listAllProxies)
r.Route("/proxies/{id}", func(r chi.Router) {
r.Get("/", s.getProxy)
})
// Admin-only routes: require admin role.
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
// Proxy mutation endpoints.
r.Post("/proxies/validate", s.validateProxy)
r.Post("/proxies", s.createProxy)
r.Route("/proxies/{id}", func(r chi.Router) {
r.Put("/", s.updateProxy)
r.Delete("/", s.deleteProxy)
})
// Config export (reveals project/infra details).
r.Get("/config/export", s.exportConfig)
+184
View File
@@ -0,0 +1,184 @@
package proxy
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"net/http"
"sync"
"time"
"github.com/alexei/docker-watcher/internal/events"
"github.com/alexei/docker-watcher/internal/store"
"github.com/robfig/cron/v3"
)
// HealthMonitor periodically checks the health of all standalone proxies.
type HealthMonitor struct {
store *store.Store
eventBus *events.Bus
cron *cron.Cron
mu sync.Mutex
entryID cron.EntryID
running bool
}
// NewHealthMonitor creates a new proxy health monitor.
func NewHealthMonitor(st *store.Store, eventBus *events.Bus) *HealthMonitor {
return &HealthMonitor{
store: st,
eventBus: eventBus,
cron: cron.New(),
}
}
// Start begins periodic health checks with the given interval (e.g., "5m", "1m").
// If already running, it stops and restarts with the new interval.
func (h *HealthMonitor) Start(interval string) error {
h.mu.Lock()
defer h.mu.Unlock()
duration, err := time.ParseDuration(interval)
if err != nil {
return fmt.Errorf("parse health check interval %q: %w", interval, err)
}
if h.running {
h.cron.Remove(h.entryID)
}
spec := fmt.Sprintf("@every %s", duration.String())
entryID, err := h.cron.AddFunc(spec, func() {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
if checkErr := h.CheckAll(ctx); checkErr != nil {
slog.Warn("proxy health monitor: check error", "error", checkErr)
}
})
if err != nil {
return fmt.Errorf("schedule proxy health monitor: %w", err)
}
h.entryID = entryID
if !h.running {
h.cron.Start()
}
h.running = true
slog.Info("proxy health monitor started", "interval", duration.String())
return nil
}
// Stop gracefully shuts down the health monitor.
func (h *HealthMonitor) Stop() {
h.mu.Lock()
defer h.mu.Unlock()
if h.running {
ctx := h.cron.Stop()
<-ctx.Done()
h.running = false
slog.Info("proxy health monitor stopped")
}
}
// CheckAll performs a single health check cycle for all standalone proxies.
func (h *HealthMonitor) CheckAll(ctx context.Context) error {
proxies, err := h.store.ListStandaloneProxies()
if err != nil {
return fmt.Errorf("list standalone proxies: %w", err)
}
for _, proxy := range proxies {
newStatus := checkProxyHealth(ctx, proxy.DestinationURL, proxy.DestinationPort)
oldStatus := proxy.HealthStatus
if err := h.store.UpdateProxyHealth(proxy.ID, newStatus); err != nil {
slog.Warn("proxy health monitor: failed to update health",
"proxy_id", proxy.ID, "error", err)
continue
}
// Emit event on status change.
if oldStatus != newStatus && oldStatus != "unknown" {
h.emitHealthEvent(proxy, oldStatus, newStatus)
}
}
return nil
}
// checkProxyHealth performs an HTTP GET to the destination and returns the health status.
func checkProxyHealth(ctx context.Context, host string, port int) string {
target := fmt.Sprintf("http://%s:%d/", host, port)
reqCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, target, nil)
if err != nil {
return "unhealthy"
}
client := &http.Client{
Timeout: 10 * time.Second,
CheckRedirect: func(*http.Request, []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Do(req)
if err != nil {
return "unhealthy"
}
resp.Body.Close()
if resp.StatusCode >= 500 {
return "unhealthy"
}
return "healthy"
}
// emitHealthEvent persists and publishes a health status change event.
func (h *HealthMonitor) emitHealthEvent(proxy store.StandaloneProxy, oldStatus, newStatus string) {
severity := "info"
if newStatus == "unhealthy" {
severity = "warn"
}
msg := fmt.Sprintf("Proxy %s (%s) health changed: %s -> %s",
proxy.Domain, proxy.ID, oldStatus, newStatus)
metadata, _ := json.Marshal(map[string]any{
"proxy_id": proxy.ID,
"domain": proxy.Domain,
"old_status": oldStatus,
"new_status": newStatus,
})
evt, err := h.store.InsertEvent(store.EventLog{
Source: "proxy_health",
Severity: severity,
Message: msg,
Metadata: string(metadata),
})
if err != nil {
slog.Error("proxy health monitor: failed to persist event", "error", err)
return
}
h.eventBus.Publish(events.Event{
Type: events.EventLog,
Payload: events.EventLogPayload{
ID: evt.ID,
Source: "proxy_health",
Severity: severity,
Message: msg,
Metadata: string(metadata),
CreatedAt: evt.CreatedAt,
},
})
}
+74
View File
@@ -0,0 +1,74 @@
package proxy
import (
"errors"
"fmt"
"net"
"strings"
)
// diagnosticHint returns a user-friendly suggestion for a validation failure.
func diagnosticHint(step string, err error) string {
if err == nil {
return ""
}
switch step {
case StepDNS:
return "Domain cannot be resolved. Check DNS settings or use an IP address."
case StepTCP:
return tcpHintFromError(err)
case StepHTTP:
return httpHint(err.Error())
default:
return "Validation failed: " + err.Error()
}
}
// tcpHintFromError returns a specific hint based on the TCP error type.
func tcpHintFromError(err error) string {
if err == nil {
return ""
}
var opErr *net.OpError
if errors.As(err, &opErr) {
lower := strings.ToLower(opErr.Err.Error())
switch {
case strings.Contains(lower, "connection refused"):
return "Port is not accepting connections. Check if the service is running and the port is correct."
case strings.Contains(lower, "i/o timeout") || strings.Contains(lower, "timeout"):
return "Connection timed out. Possible firewall blocking. Check network/firewall rules."
case strings.Contains(lower, "no route to host") || strings.Contains(lower, "host is unreachable"):
return "Host is not reachable. Verify the IP address and network connectivity."
}
}
msg := err.Error()
lower := strings.ToLower(msg)
switch {
case strings.Contains(lower, "connection refused"):
return "Port is not accepting connections. Check if the service is running and the port is correct."
case strings.Contains(lower, "timeout"):
return "Connection timed out. Possible firewall blocking. Check network/firewall rules."
default:
return fmt.Sprintf("TCP connection failed: %s", msg)
}
}
// httpHint returns a specific hint based on the HTTP probe result.
func httpHint(msg string) string {
lower := strings.ToLower(msg)
switch {
case strings.Contains(lower, "status"):
return msg // Already formatted by the caller with the status code.
case strings.Contains(lower, "timeout"):
return "HTTP health probe timed out. The service may be slow or unresponsive."
default:
return "HTTP health probe failed: " + msg
}
}
+297
View File
@@ -0,0 +1,297 @@
package proxy
import (
"context"
"errors"
"fmt"
"log/slog"
"github.com/alexei/docker-watcher/internal/npm"
"github.com/alexei/docker-watcher/internal/store"
)
// Manager handles the lifecycle of standalone proxy hosts.
type Manager struct {
store *store.Store
npm *npm.Client
}
// NewManager creates a new proxy manager.
func NewManager(st *store.Store, npmClient *npm.Client) *Manager {
return &Manager{
store: st,
npm: npmClient,
}
}
// CreateProxyRequest is the input for creating a standalone proxy.
type CreateProxyRequest struct {
Domain string `json:"domain"`
DestinationURL string `json:"destination_url"`
DestinationPort int `json:"destination_port"`
}
// UpdateProxyRequest is the input for updating a standalone proxy.
type UpdateProxyRequest struct {
Domain string `json:"domain"`
DestinationURL string `json:"destination_url"`
DestinationPort int `json:"destination_port"`
}
// ProxyView is a unified view of both standalone and deploy-managed proxies.
type ProxyView struct {
ID string `json:"id"`
Domain string `json:"domain"`
Destination string `json:"destination"`
Type string `json:"type"` // "standalone" or "managed"
ProjectName string `json:"project_name,omitempty"`
StageName string `json:"stage_name,omitempty"`
HealthStatus string `json:"health_status"`
SSLEnabled bool `json:"ssl_enabled"`
NpmProxyID int `json:"npm_proxy_id"`
CreatedAt string `json:"created_at"`
}
// CreateProxy validates the destination, creates an NPM proxy host, and saves to the store.
func (m *Manager) CreateProxy(ctx context.Context, req CreateProxyRequest) (store.StandaloneProxy, error) {
// Validate destination.
result := ValidateDestination(ctx, req.DestinationURL, req.DestinationPort)
if !result.Valid {
return store.StandaloneProxy{}, fmt.Errorf("destination validation failed: %s", lastFailedStep(result))
}
// Load settings for SSL certificate and domain.
settings, err := m.store.GetSettings()
if err != nil {
return store.StandaloneProxy{}, fmt.Errorf("get settings: %w", err)
}
// Build NPM proxy host config.
config := npm.ProxyHostConfig{
DomainNames: []string{req.Domain},
ForwardScheme: "http",
ForwardHost: req.DestinationURL,
ForwardPort: req.DestinationPort,
CertificateID: settings.SSLCertificateID,
SSLForced: settings.SSLCertificateID > 0,
BlockExploits: true,
AllowWebsocket: true,
HTTP2Support: true,
HSTSEnabled: settings.SSLCertificateID > 0,
Locations: []any{},
}
// Create NPM proxy host.
npmHost, err := m.npm.CreateProxyHost(ctx, config)
if err != nil {
return store.StandaloneProxy{}, fmt.Errorf("create NPM proxy host: %w", err)
}
slog.Info("created NPM proxy host for standalone proxy",
"domain", req.Domain, "npm_proxy_id", npmHost.ID)
// Save to store.
proxy, err := m.store.CreateStandaloneProxy(store.StandaloneProxy{
Domain: req.Domain,
DestinationURL: req.DestinationURL,
DestinationPort: req.DestinationPort,
SSLCertificateID: settings.SSLCertificateID,
NpmProxyID: npmHost.ID,
HealthStatus: "unknown",
})
if err != nil {
// Best effort: clean up the NPM host if store insert fails.
if delErr := m.npm.DeleteProxyHost(ctx, npmHost.ID); delErr != nil {
slog.Error("failed to clean up NPM proxy host after store error",
"npm_proxy_id", npmHost.ID, "error", delErr)
}
return store.StandaloneProxy{}, fmt.Errorf("save standalone proxy: %w", err)
}
return proxy, nil
}
// UpdateProxy re-validates the destination, updates the NPM proxy host, and updates the store.
func (m *Manager) UpdateProxy(ctx context.Context, id string, req UpdateProxyRequest) (store.StandaloneProxy, error) {
existing, err := m.store.GetStandaloneProxy(id)
if err != nil {
return store.StandaloneProxy{}, fmt.Errorf("get proxy: %w", err)
}
// Validate new destination.
result := ValidateDestination(ctx, req.DestinationURL, req.DestinationPort)
if !result.Valid {
return store.StandaloneProxy{}, fmt.Errorf("destination validation failed: %s", lastFailedStep(result))
}
// Load settings for SSL certificate.
settings, err := m.store.GetSettings()
if err != nil {
return store.StandaloneProxy{}, fmt.Errorf("get settings: %w", err)
}
// Update NPM proxy host.
config := npm.ProxyHostConfig{
DomainNames: []string{req.Domain},
ForwardScheme: "http",
ForwardHost: req.DestinationURL,
ForwardPort: req.DestinationPort,
CertificateID: settings.SSLCertificateID,
SSLForced: settings.SSLCertificateID > 0,
BlockExploits: true,
AllowWebsocket: true,
HTTP2Support: true,
HSTSEnabled: settings.SSLCertificateID > 0,
Locations: []any{},
}
if _, err := m.npm.UpdateProxyHost(ctx, existing.NpmProxyID, config); err != nil {
return store.StandaloneProxy{}, fmt.Errorf("update NPM proxy host: %w", err)
}
// Update store.
updated := existing
updated.Domain = req.Domain
updated.DestinationURL = req.DestinationURL
updated.DestinationPort = req.DestinationPort
updated.SSLCertificateID = settings.SSLCertificateID
if err := m.store.UpdateStandaloneProxy(updated); err != nil {
return store.StandaloneProxy{}, fmt.Errorf("update standalone proxy: %w", err)
}
// Re-read from store to get updated timestamps.
return m.store.GetStandaloneProxy(id)
}
// DeleteProxy removes the NPM proxy host and deletes from the store.
func (m *Manager) DeleteProxy(ctx context.Context, id string) error {
proxy, err := m.store.GetStandaloneProxy(id)
if err != nil {
return fmt.Errorf("get proxy: %w", err)
}
// Delete NPM proxy host.
if proxy.NpmProxyID > 0 {
if err := m.npm.DeleteProxyHost(ctx, proxy.NpmProxyID); err != nil {
slog.Warn("failed to delete NPM proxy host (continuing with store deletion)",
"npm_proxy_id", proxy.NpmProxyID, "error", err)
}
}
if err := m.store.DeleteStandaloneProxy(id); err != nil {
return fmt.Errorf("delete standalone proxy: %w", err)
}
return nil
}
// GetProxy returns a single standalone proxy by ID.
func (m *Manager) GetProxy(id string) (store.StandaloneProxy, error) {
proxy, err := m.store.GetStandaloneProxy(id)
if err != nil {
return store.StandaloneProxy{}, fmt.Errorf("get proxy: %w", err)
}
return proxy, nil
}
// ListProxies returns all standalone proxies.
func (m *Manager) ListProxies() ([]store.StandaloneProxy, error) {
proxies, err := m.store.ListStandaloneProxies()
if err != nil {
return nil, fmt.Errorf("list proxies: %w", err)
}
return proxies, nil
}
// ListAllProxies returns a merged view of standalone and deploy-managed proxies.
func (m *Manager) ListAllProxies() ([]ProxyView, error) {
views := []ProxyView{}
// Standalone proxies.
standalones, err := m.store.ListStandaloneProxies()
if err != nil {
return nil, fmt.Errorf("list standalone proxies: %w", err)
}
for _, p := range standalones {
views = append(views, ProxyView{
ID: p.ID,
Domain: p.Domain,
Destination: fmt.Sprintf("%s:%d", p.DestinationURL, p.DestinationPort),
Type: "standalone",
HealthStatus: p.HealthStatus,
SSLEnabled: p.SSLCertificateID > 0,
NpmProxyID: p.NpmProxyID,
CreatedAt: p.CreatedAt,
})
}
// Deploy-managed proxies: instances with npm_proxy_id > 0.
instances, err := m.store.ListAllInstances()
if err != nil {
return nil, fmt.Errorf("list instances: %w", err)
}
for _, inst := range instances {
if inst.NpmProxyID <= 0 {
continue
}
projectName := inst.ProjectID
stageName := inst.StageID
if proj, err := m.store.GetProjectByID(inst.ProjectID); err == nil {
projectName = proj.Name
}
if stg, err := m.store.GetStageByID(inst.StageID); err == nil {
stageName = stg.Name
}
destination := fmt.Sprintf("%s:%d", inst.ContainerID[:12], inst.Port)
if inst.Subdomain != "" {
destination = fmt.Sprintf("%s:%d", inst.Subdomain, inst.Port)
}
healthStatus := "unknown"
if inst.Status == "running" {
healthStatus = "healthy"
} else if inst.Status == "stopped" || inst.Status == "failed" {
healthStatus = "unhealthy"
}
views = append(views, ProxyView{
ID: inst.ID,
Domain: inst.Subdomain,
Destination: destination,
Type: "managed",
ProjectName: projectName,
StageName: stageName,
HealthStatus: healthStatus,
SSLEnabled: true, // managed proxies always get SSL from settings
NpmProxyID: inst.NpmProxyID,
CreatedAt: inst.CreatedAt,
})
}
return views, nil
}
// lastFailedStep returns the message of the last failed validation step.
func lastFailedStep(result ValidationResult) string {
for _, step := range result.Steps {
if !step.Passed {
msg := step.Message
if step.Hint != "" {
msg += " — " + step.Hint
}
return msg
}
}
return "unknown validation failure"
}
// IsNotFound checks if an error wraps store.ErrNotFound.
func IsNotFound(err error) bool {
return errors.Is(err, store.ErrNotFound)
}
+224
View File
@@ -0,0 +1,224 @@
package proxy
import (
"context"
"fmt"
"net"
"net/http"
"net/url"
"strconv"
"time"
)
// Validation step names.
const (
StepSyntax = "syntax"
StepDNS = "dns"
StepTCP = "tcp"
StepHTTP = "http"
)
// ValidationStep holds the result of a single validation check.
type ValidationStep struct {
Name string `json:"name"`
Passed bool `json:"passed"`
Message string `json:"message,omitempty"`
Hint string `json:"hint,omitempty"`
}
// ValidationResult holds the aggregate result of the validation pipeline.
type ValidationResult struct {
Valid bool `json:"valid"`
Steps []ValidationStep `json:"steps"`
}
// ValidateDestination runs the multi-step validation pipeline against the given
// destination host and port. It checks syntax, DNS, TCP reachability, and HTTP health.
// The pipeline short-circuits on failure: later steps are skipped if an earlier one fails.
func ValidateDestination(ctx context.Context, host string, port int) ValidationResult {
result := ValidationResult{Valid: true}
// Step 1: Syntax validation.
if step, ok := validateSyntax(host, port); !ok {
result.Valid = false
result.Steps = append(result.Steps, step)
return result
} else {
result.Steps = append(result.Steps, step)
}
// Step 2: DNS resolution (skip for IP addresses).
ip := net.ParseIP(host)
if ip == nil {
if step, ok := validateDNS(ctx, host); !ok {
result.Valid = false
result.Steps = append(result.Steps, step)
return result
} else {
result.Steps = append(result.Steps, step)
}
} else {
result.Steps = append(result.Steps, ValidationStep{
Name: StepDNS,
Passed: true,
Message: "Skipped (IP address provided)",
})
}
// Step 3: TCP port reachability.
if step, ok := validateTCP(ctx, host, port); !ok {
result.Valid = false
result.Steps = append(result.Steps, step)
return result
} else {
result.Steps = append(result.Steps, step)
}
// Step 4: HTTP health probe.
step := validateHTTP(ctx, host, port)
result.Steps = append(result.Steps, step)
if !step.Passed {
result.Valid = false
}
return result
}
// validateSyntax checks that the host and port values are syntactically valid.
func validateSyntax(host string, port int) (ValidationStep, bool) {
if host == "" {
return ValidationStep{
Name: StepSyntax,
Passed: false,
Message: "Host is empty",
Hint: "Provide a valid hostname or IP address.",
}, false
}
if port < 1 || port > 65535 {
return ValidationStep{
Name: StepSyntax,
Passed: false,
Message: fmt.Sprintf("Port %d is out of range (1-65535)", port),
Hint: "Provide a valid port number between 1 and 65535.",
}, false
}
// Reject obviously invalid hostnames (but allow IPs).
if net.ParseIP(host) == nil {
// Basic hostname validation: must not contain spaces or schemes.
if _, err := url.Parse("http://" + host); err != nil {
return ValidationStep{
Name: StepSyntax,
Passed: false,
Message: "Invalid hostname: " + err.Error(),
Hint: "Provide a valid hostname without scheme (e.g., 'example.com' not 'http://example.com').",
}, false
}
}
return ValidationStep{
Name: StepSyntax,
Passed: true,
Message: fmt.Sprintf("Host %q port %d syntax OK", host, port),
}, true
}
// validateDNS performs a DNS lookup on the given host.
func validateDNS(ctx context.Context, host string) (ValidationStep, bool) {
resolver := net.DefaultResolver
addrs, err := resolver.LookupHost(ctx, host)
if err != nil {
return ValidationStep{
Name: StepDNS,
Passed: false,
Message: fmt.Sprintf("DNS resolution failed for %q: %s", host, err.Error()),
Hint: diagnosticHint(StepDNS, err),
}, false
}
return ValidationStep{
Name: StepDNS,
Passed: true,
Message: fmt.Sprintf("Resolved to %v", addrs),
}, true
}
// validateTCP attempts a TCP connection to host:port with a 5-second timeout.
func validateTCP(ctx context.Context, host string, port int) (ValidationStep, bool) {
addr := net.JoinHostPort(host, strconv.Itoa(port))
dialCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
var d net.Dialer
conn, err := d.DialContext(dialCtx, "tcp", addr)
if err != nil {
return ValidationStep{
Name: StepTCP,
Passed: false,
Message: fmt.Sprintf("TCP connect to %s failed: %s", addr, err.Error()),
Hint: diagnosticHint(StepTCP, err),
}, false
}
conn.Close()
return ValidationStep{
Name: StepTCP,
Passed: true,
Message: fmt.Sprintf("TCP connect to %s succeeded", addr),
}, true
}
// validateHTTP performs a GET request to the destination and checks for a response.
// Non-5xx responses are considered passing (the service is responding).
func validateHTTP(ctx context.Context, host string, port int) ValidationStep {
target := fmt.Sprintf("http://%s:%d/", host, port)
httpCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(httpCtx, http.MethodGet, target, nil)
if err != nil {
return ValidationStep{
Name: StepHTTP,
Passed: false,
Message: fmt.Sprintf("Failed to build HTTP request: %s", err.Error()),
Hint: diagnosticHint(StepHTTP, err),
}
}
client := &http.Client{
Timeout: 10 * time.Second,
// Do not follow redirects — we just want to see if the port responds to HTTP.
CheckRedirect: func(*http.Request, []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Do(req)
if err != nil {
return ValidationStep{
Name: StepHTTP,
Passed: false,
Message: fmt.Sprintf("HTTP probe to %s failed: %s", target, err.Error()),
Hint: diagnosticHint(StepHTTP, err),
}
}
resp.Body.Close()
if resp.StatusCode >= 500 {
return ValidationStep{
Name: StepHTTP,
Passed: false,
Message: fmt.Sprintf("Service responded with HTTP %d. The service may not be healthy.", resp.StatusCode),
Hint: fmt.Sprintf("Service responded with HTTP %d. The service may not be healthy.", resp.StatusCode),
}
}
return ValidationStep{
Name: StepHTTP,
Passed: true,
Message: fmt.Sprintf("HTTP probe returned %d", resp.StatusCode),
}
}