From c38b7d4c78218af1408a5f96a81421baf7d2821d Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Mon, 30 Mar 2026 10:59:13 +0300 Subject: [PATCH 1/7] feat(observability): phase 1 - schema, models & event log backend Add database foundation for observability features: - event_log table with severity/source filtering and pagination - standalone_proxies table for user-created reverse proxies - stale_threshold_days setting (default 7 days) - Auto-persist warn/error events from event bus to database - SSE broadcast of persistent events for real-time UI updates - Frontend types and API functions for downstream UI phases --- cmd/server/main.go | 15 ++ internal/api/eventlog.go | 48 ++++++ internal/api/router.go | 2 + internal/api/settings.go | 33 ++-- internal/api/sse.go | 4 +- internal/events/bus.go | 70 +++++++++ internal/store/eventlog.go | 148 ++++++++++++++++++ internal/store/models.go | 29 +++- internal/store/settings.go | 8 +- internal/store/standalone_proxy.go | 120 ++++++++++++++ internal/store/store.go | 27 ++++ plans/observability-proxy-mgmt/CONTEXT.md | 52 ++++++ plans/observability-proxy-mgmt/PLAN.md | 71 +++++++++ .../phase-1-schema-eventlog.md | 60 +++++++ .../phase-2-stale-detection.md | 55 +++++++ .../phase-3-proxy-creation.md | 81 ++++++++++ .../phase-4-proxy-viewer.md | 56 +++++++ .../phase-5-stale-ui.md | 55 +++++++ .../phase-6-proxy-creation-ui.md | 54 +++++++ .../phase-7-eventlog-ui.md | 54 +++++++ .../phase-8-stats-notifications.md | 67 ++++++++ web/src/lib/api.ts | 27 ++++ web/src/lib/types.ts | 33 ++++ 23 files changed, 1149 insertions(+), 20 deletions(-) create mode 100644 internal/api/eventlog.go create mode 100644 internal/store/eventlog.go create mode 100644 internal/store/standalone_proxy.go create mode 100644 plans/observability-proxy-mgmt/CONTEXT.md create mode 100644 plans/observability-proxy-mgmt/PLAN.md create mode 100644 plans/observability-proxy-mgmt/phase-1-schema-eventlog.md create mode 100644 plans/observability-proxy-mgmt/phase-2-stale-detection.md create mode 100644 plans/observability-proxy-mgmt/phase-3-proxy-creation.md create mode 100644 plans/observability-proxy-mgmt/phase-4-proxy-viewer.md create mode 100644 plans/observability-proxy-mgmt/phase-5-stale-ui.md create mode 100644 plans/observability-proxy-mgmt/phase-6-proxy-creation-ui.md create mode 100644 plans/observability-proxy-mgmt/phase-7-eventlog-ui.md create mode 100644 plans/observability-proxy-mgmt/phase-8-stats-notifications.md diff --git a/cmd/server/main.go b/cmd/server/main.go index b91e2e6..c369f7d 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -93,6 +93,21 @@ func main() { notifier := notify.New() eventBus := events.New() + // Auto-persist warn/error events from the event bus to the database. + stopLogger := eventBus.RegisterPersistentLogger(func(source, severity, message, metadata string) (int64, string, error) { + evt, err := db.InsertEvent(store.EventLog{ + Source: source, + Severity: severity, + Message: message, + Metadata: metadata, + }) + if err != nil { + return 0, "", err + } + return evt.ID, evt.CreatedAt, nil + }) + defer stopLogger() + dep := deployer.New(dockerClient, npmClient, db, healthChecker, notifier, eventBus, encKey) // Initialize webhook handler. diff --git a/internal/api/eventlog.go b/internal/api/eventlog.go new file mode 100644 index 0000000..a4fd025 --- /dev/null +++ b/internal/api/eventlog.go @@ -0,0 +1,48 @@ +package api + +import ( + "log/slog" + "net/http" + "strconv" + + "github.com/alexei/docker-watcher/internal/store" +) + +// listEventLog handles GET /api/events/log. +// Supports query parameters: severity, source, since, until, limit, offset. +func (s *Server) listEventLog(w http.ResponseWriter, r *http.Request) { + q := r.URL.Query() + + limit, _ := strconv.Atoi(q.Get("limit")) + offset, _ := strconv.Atoi(q.Get("offset")) + + filter := store.EventLogFilter{ + Severity: q.Get("severity"), + Source: q.Get("source"), + Since: q.Get("since"), + Until: q.Get("until"), + Limit: limit, + Offset: offset, + } + + events, err := s.store.ListEvents(filter) + if err != nil { + slog.Error("failed to list events", "error", err) + respondError(w, http.StatusInternalServerError, "failed to list events") + return + } + + respondJSON(w, http.StatusOK, events) +} + +// getEventLogStats handles GET /api/events/log/stats. +func (s *Server) getEventLogStats(w http.ResponseWriter, r *http.Request) { + stats, err := s.store.GetEventStats() + if err != nil { + slog.Error("failed to get event stats", "error", err) + respondError(w, http.StatusInternalServerError, "failed to get event stats") + return + } + + respondJSON(w, http.StatusOK, stats) +} diff --git a/internal/api/router.go b/internal/api/router.go index dfb0221..8f4e656 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -125,6 +125,8 @@ func (s *Server) Router() chi.Router { r.Get("/deploys", s.listDeploys) r.Get("/deploys/{id}/logs", s.streamDeployLogs) r.Get("/events", s.streamEvents) + r.Get("/events/log", s.listEventLog) + r.Get("/events/log/stats", s.getEventLogStats) r.Get("/registries", s.listRegistries) r.Route("/registries/{id}", func(r chi.Router) { r.Get("/tags/*", s.listRegistryTags) diff --git a/internal/api/settings.go b/internal/api/settings.go index 22c6dbb..276dd45 100644 --- a/internal/api/settings.go +++ b/internal/api/settings.go @@ -24,7 +24,8 @@ type settingsRequest struct { NpmEmail string `json:"npm_email"` NpmPassword string `json:"npm_password"` PollingInterval string `json:"polling_interval"` - SSLCertificateID *int `json:"ssl_certificate_id,omitempty"` + SSLCertificateID *int `json:"ssl_certificate_id,omitempty"` + StaleThresholdDays *int `json:"stale_threshold_days,omitempty"` } // getSettings handles GET /api/settings. @@ -37,17 +38,18 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) { // Return settings without sensitive fields. respondJSON(w, http.StatusOK, map[string]any{ - "domain": settings.Domain, - "server_ip": settings.ServerIP, - "network": settings.Network, - "subdomain_pattern": settings.SubdomainPattern, - "notification_url": settings.NotificationURL, - "npm_url": settings.NpmURL, - "npm_email": settings.NpmEmail, - "has_npm_password": settings.NpmPassword != "", - "polling_interval": settings.PollingInterval, - "ssl_certificate_id": settings.SSLCertificateID, - "updated_at": settings.UpdatedAt, + "domain": settings.Domain, + "server_ip": settings.ServerIP, + "network": settings.Network, + "subdomain_pattern": settings.SubdomainPattern, + "notification_url": settings.NotificationURL, + "npm_url": settings.NpmURL, + "npm_email": settings.NpmEmail, + "has_npm_password": settings.NpmPassword != "", + "polling_interval": settings.PollingInterval, + "ssl_certificate_id": settings.SSLCertificateID, + "stale_threshold_days": settings.StaleThresholdDays, + "updated_at": settings.UpdatedAt, }) } @@ -101,6 +103,13 @@ func (s *Server) updateSettings(w http.ResponseWriter, r *http.Request) { updated.SSLCertificateID = *req.SSLCertificateID sslChanged = true } + if req.StaleThresholdDays != nil { + if *req.StaleThresholdDays < 1 { + respondError(w, http.StatusBadRequest, "stale_threshold_days must be at least 1") + return + } + updated.StaleThresholdDays = *req.StaleThresholdDays + } if err := s.store.UpdateSettings(updated); err != nil { respondError(w, http.StatusInternalServerError, "failed to update settings: "+err.Error()) diff --git a/internal/api/sse.go b/internal/api/sse.go index 4882223..32b1538 100644 --- a/internal/api/sse.go +++ b/internal/api/sse.go @@ -150,9 +150,9 @@ func (s *Server) streamEvents(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) flusher.Flush() - // Subscribe to instance status and deploy status events. + // Subscribe to instance status, deploy status, and persistent event log events. sub := s.eventBus.Subscribe(func(evt events.Event) bool { - return evt.Type == events.EventInstanceStatus || evt.Type == events.EventDeployStatus + return evt.Type == events.EventInstanceStatus || evt.Type == events.EventDeployStatus || evt.Type == events.EventLog }) defer s.eventBus.Unsubscribe(sub) diff --git a/internal/events/bus.go b/internal/events/bus.go index a4097a2..5cc1a4c 100644 --- a/internal/events/bus.go +++ b/internal/events/bus.go @@ -2,6 +2,7 @@ package events import ( "encoding/json" + "log/slog" "sync" ) @@ -17,6 +18,9 @@ const ( // EventDeployStatus is emitted when a deploy status changes. EventDeployStatus EventType = "deploy_status" + + // EventLog is emitted when a persistent event is logged. + EventLog EventType = "event_log" ) // Event is a single event published on the bus. @@ -50,6 +54,72 @@ type DeployStatusPayload struct { Error string `json:"error,omitempty"` } +// EventLogPayload is the payload for EventLog events (persistent event log). +type EventLogPayload struct { + ID int64 `json:"id"` + Source string `json:"source"` + Severity string `json:"severity"` + Message string `json:"message"` + Metadata string `json:"metadata"` + CreatedAt string `json:"created_at"` +} + +// PersistFunc is a callback that persists an event log entry. +// It receives source, severity, message, and metadata (JSON string). +// It returns the persisted entry's ID and created_at timestamp. +type PersistFunc func(source, severity, message, metadata string) (int64, string, error) + +// RegisterPersistentLogger subscribes to the bus and auto-persists warn/error +// events by calling the provided persist function. It also re-publishes the +// persisted event as an EventLog so SSE clients receive it in real-time. +// Call the returned function to unsubscribe. +func (b *Bus) RegisterPersistentLogger(persist PersistFunc) func() { + sub := b.Subscribe(func(evt Event) bool { + // Only persist deploy log events with warn/error level. + if evt.Type != EventDeployLog { + return false + } + p, ok := evt.Payload.(DeployLogPayload) + if !ok { + return false + } + return p.Level == "warn" || p.Level == "error" + }) + + go func() { + for evt := range sub { + p, ok := evt.Payload.(DeployLogPayload) + if !ok { + continue + } + metaBytes, _ := json.Marshal(map[string]string{"deploy_id": p.DeployID}) + metadata := string(metaBytes) + id, createdAt, err := persist("deploy", p.Level, p.Message, metadata) + if err != nil { + slog.Error("failed to persist event log", "source", "deploy", "level", p.Level, "error", err) + continue + } + + // Re-publish as EventLog for SSE clients. + b.Publish(Event{ + Type: EventLog, + Payload: EventLogPayload{ + ID: id, + Source: "deploy", + Severity: p.Level, + Message: p.Message, + Metadata: metadata, + CreatedAt: createdAt, + }, + }) + } + }() + + return func() { + b.Unsubscribe(sub) + } +} + // Subscriber is a channel that receives events. type Subscriber chan Event diff --git a/internal/store/eventlog.go b/internal/store/eventlog.go new file mode 100644 index 0000000..6c8a458 --- /dev/null +++ b/internal/store/eventlog.go @@ -0,0 +1,148 @@ +package store + +import ( + "fmt" + "strings" +) + +// EventLogFilter holds optional filters for listing event log entries. +type EventLogFilter struct { + Severity string // Filter by severity (info, warn, error). + Source string // Filter by source. + Since string // Only events created at or after this timestamp. + Until string // Only events created at or before this timestamp. + Limit int // Maximum number of results (default 50). + Offset int // Offset for pagination. +} + +// EventLogStats holds counts of event log entries by severity. +type EventLogStats struct { + Info int `json:"info"` + Warn int `json:"warn"` + Error int `json:"error"` + Total int `json:"total"` +} + +// InsertEvent inserts a new event log entry. +func (s *Store) InsertEvent(evt EventLog) (EventLog, error) { + evt.CreatedAt = Now() + if evt.Metadata == "" { + evt.Metadata = "{}" + } + + result, err := s.db.Exec( + `INSERT INTO event_log (source, severity, message, metadata, created_at) + VALUES (?, ?, ?, ?, ?)`, + evt.Source, evt.Severity, evt.Message, evt.Metadata, evt.CreatedAt, + ) + if err != nil { + return EventLog{}, fmt.Errorf("insert event: %w", err) + } + + id, err := result.LastInsertId() + if err != nil { + return EventLog{}, fmt.Errorf("get event id: %w", err) + } + evt.ID = id + + return evt, nil +} + +// ListEvents returns event log entries matching the given filter. +func (s *Store) ListEvents(filter EventLogFilter) ([]EventLog, error) { + var conditions []string + var args []any + + if filter.Severity != "" { + conditions = append(conditions, "severity = ?") + args = append(args, filter.Severity) + } + if filter.Source != "" { + conditions = append(conditions, "source = ?") + args = append(args, filter.Source) + } + if filter.Since != "" { + conditions = append(conditions, "created_at >= ?") + args = append(args, filter.Since) + } + if filter.Until != "" { + conditions = append(conditions, "created_at <= ?") + args = append(args, filter.Until) + } + + query := "SELECT id, source, severity, message, metadata, created_at FROM event_log" + if len(conditions) > 0 { + query += " WHERE " + strings.Join(conditions, " AND ") + } + query += " ORDER BY created_at DESC" + + limit := filter.Limit + if limit <= 0 { + limit = 50 + } + if limit > 500 { + limit = 500 + } + query += fmt.Sprintf(" LIMIT %d OFFSET %d", limit, filter.Offset) + + rows, err := s.db.Query(query, args...) + if err != nil { + return nil, fmt.Errorf("query events: %w", err) + } + defer rows.Close() + + events := []EventLog{} + for rows.Next() { + var evt EventLog + if err := rows.Scan(&evt.ID, &evt.Source, &evt.Severity, &evt.Message, &evt.Metadata, &evt.CreatedAt); err != nil { + return nil, fmt.Errorf("scan event: %w", err) + } + events = append(events, evt) + } + return events, rows.Err() +} + +// GetEventStats returns counts of event log entries grouped by severity. +func (s *Store) GetEventStats() (EventLogStats, error) { + rows, err := s.db.Query( + `SELECT severity, COUNT(*) FROM event_log GROUP BY severity`, + ) + if err != nil { + return EventLogStats{}, fmt.Errorf("query event stats: %w", err) + } + defer rows.Close() + + var stats EventLogStats + for rows.Next() { + var severity string + var count int + if err := rows.Scan(&severity, &count); err != nil { + return EventLogStats{}, fmt.Errorf("scan event stats: %w", err) + } + switch severity { + case "info": + stats.Info = count + case "warn": + stats.Warn = count + case "error": + stats.Error = count + } + stats.Total += count + } + return stats, rows.Err() +} + +// PruneEvents deletes event log entries older than the given number of days. +func (s *Store) PruneEvents(olderThanDays int) (int64, error) { + if olderThanDays < 1 { + return 0, fmt.Errorf("prune events: olderThanDays must be >= 1, got %d", olderThanDays) + } + result, err := s.db.Exec( + `DELETE FROM event_log WHERE created_at < datetime('now', ?)`, + fmt.Sprintf("-%d days", olderThanDays), + ) + if err != nil { + return 0, fmt.Errorf("prune events: %w", err) + } + return result.RowsAffected() +} diff --git a/internal/store/models.go b/internal/store/models.go index 72e823f..5b0ed1b 100644 --- a/internal/store/models.go +++ b/internal/store/models.go @@ -55,8 +55,9 @@ type Settings struct { WebhookSecret string `json:"webhook_secret"` PollingInterval string `json:"polling_interval"` BaseVolumePath string `json:"base_volume_path"` - SSLCertificateID int `json:"ssl_certificate_id"` - UpdatedAt string `json:"updated_at"` + SSLCertificateID int `json:"ssl_certificate_id"` + StaleThresholdDays int `json:"stale_threshold_days"` + UpdatedAt string `json:"updated_at"` } // Instance represents a running (or stopped) container for a project stage. @@ -117,3 +118,27 @@ type Volume struct { CreatedAt string `json:"created_at"` UpdatedAt string `json:"updated_at"` } + +// EventLog represents a persistent event log entry. +type EventLog struct { + ID int64 `json:"id"` + Source string `json:"source"` + Severity string `json:"severity"` // info, warn, error + Message string `json:"message"` + Metadata string `json:"metadata"` // JSON-encoded structured data + CreatedAt string `json:"created_at"` +} + +// StandaloneProxy represents a standalone reverse proxy not tied to a project. +type StandaloneProxy struct { + ID string `json:"id"` + Domain string `json:"domain"` + DestinationURL string `json:"destination_url"` + DestinationPort int `json:"destination_port"` + SSLCertificateID int `json:"ssl_certificate_id"` + NpmProxyID int `json:"npm_proxy_id"` + HealthStatus string `json:"health_status"` // unknown, healthy, unhealthy + HealthCheckedAt string `json:"health_checked_at"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} diff --git a/internal/store/settings.go b/internal/store/settings.go index 1580cd8..d9ea761 100644 --- a/internal/store/settings.go +++ b/internal/store/settings.go @@ -9,10 +9,10 @@ func (s *Store) GetSettings() (Settings, error) { var st Settings err := s.db.QueryRow( `SELECT domain, server_ip, network, subdomain_pattern, notification_url, - npm_url, npm_email, npm_password, webhook_secret, polling_interval, base_volume_path, ssl_certificate_id, updated_at + npm_url, npm_email, npm_password, webhook_secret, polling_interval, base_volume_path, ssl_certificate_id, stale_threshold_days, updated_at FROM settings WHERE id = 1`, ).Scan(&st.Domain, &st.ServerIP, &st.Network, &st.SubdomainPattern, &st.NotificationURL, - &st.NpmURL, &st.NpmEmail, &st.NpmPassword, &st.WebhookSecret, &st.PollingInterval, &st.BaseVolumePath, &st.SSLCertificateID, &st.UpdatedAt) + &st.NpmURL, &st.NpmEmail, &st.NpmPassword, &st.WebhookSecret, &st.PollingInterval, &st.BaseVolumePath, &st.SSLCertificateID, &st.StaleThresholdDays, &st.UpdatedAt) if err != nil { return Settings{}, fmt.Errorf("query settings: %w", err) } @@ -25,10 +25,10 @@ func (s *Store) UpdateSettings(st Settings) error { _, err := s.db.Exec( `UPDATE settings SET domain=?, server_ip=?, network=?, subdomain_pattern=?, notification_url=?, - npm_url=?, npm_email=?, npm_password=?, webhook_secret=?, polling_interval=?, base_volume_path=?, ssl_certificate_id=?, updated_at=? + npm_url=?, npm_email=?, npm_password=?, webhook_secret=?, polling_interval=?, base_volume_path=?, ssl_certificate_id=?, stale_threshold_days=?, updated_at=? WHERE id = 1`, st.Domain, st.ServerIP, st.Network, st.SubdomainPattern, st.NotificationURL, - st.NpmURL, st.NpmEmail, st.NpmPassword, st.WebhookSecret, st.PollingInterval, st.BaseVolumePath, st.SSLCertificateID, st.UpdatedAt, + st.NpmURL, st.NpmEmail, st.NpmPassword, st.WebhookSecret, st.PollingInterval, st.BaseVolumePath, st.SSLCertificateID, st.StaleThresholdDays, st.UpdatedAt, ) if err != nil { return fmt.Errorf("update settings: %w", err) diff --git a/internal/store/standalone_proxy.go b/internal/store/standalone_proxy.go new file mode 100644 index 0000000..a1ce46e --- /dev/null +++ b/internal/store/standalone_proxy.go @@ -0,0 +1,120 @@ +package store + +import ( + "database/sql" + "errors" + "fmt" + + "github.com/google/uuid" +) + +// CreateStandaloneProxy inserts a new standalone proxy record. +func (s *Store) CreateStandaloneProxy(p StandaloneProxy) (StandaloneProxy, error) { + p.ID = uuid.New().String() + p.CreatedAt = Now() + p.UpdatedAt = p.CreatedAt + + if p.HealthStatus == "" { + p.HealthStatus = "unknown" + } + + _, err := s.db.Exec( + `INSERT INTO standalone_proxies (id, domain, destination_url, destination_port, ssl_certificate_id, npm_proxy_id, health_status, health_checked_at, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + p.ID, p.Domain, p.DestinationURL, p.DestinationPort, p.SSLCertificateID, + p.NpmProxyID, p.HealthStatus, p.HealthCheckedAt, p.CreatedAt, p.UpdatedAt, + ) + if err != nil { + return StandaloneProxy{}, fmt.Errorf("insert standalone proxy: %w", err) + } + return p, nil +} + +// GetStandaloneProxy returns a standalone proxy by ID. +func (s *Store) GetStandaloneProxy(id string) (StandaloneProxy, error) { + var p StandaloneProxy + err := s.db.QueryRow( + `SELECT id, domain, destination_url, destination_port, ssl_certificate_id, npm_proxy_id, health_status, health_checked_at, created_at, updated_at + FROM standalone_proxies WHERE id = ?`, id, + ).Scan(&p.ID, &p.Domain, &p.DestinationURL, &p.DestinationPort, &p.SSLCertificateID, + &p.NpmProxyID, &p.HealthStatus, &p.HealthCheckedAt, &p.CreatedAt, &p.UpdatedAt) + if errors.Is(err, sql.ErrNoRows) { + return StandaloneProxy{}, fmt.Errorf("standalone proxy %s: %w", id, ErrNotFound) + } + if err != nil { + return StandaloneProxy{}, fmt.Errorf("query standalone proxy: %w", err) + } + return p, nil +} + +// ListStandaloneProxies returns all standalone proxy records ordered by creation time. +func (s *Store) ListStandaloneProxies() ([]StandaloneProxy, error) { + rows, err := s.db.Query( + `SELECT id, domain, destination_url, destination_port, ssl_certificate_id, npm_proxy_id, health_status, health_checked_at, created_at, updated_at + FROM standalone_proxies ORDER BY created_at DESC`, + ) + if err != nil { + return nil, fmt.Errorf("query standalone proxies: %w", err) + } + defer rows.Close() + + proxies := []StandaloneProxy{} + for rows.Next() { + var p StandaloneProxy + if err := rows.Scan(&p.ID, &p.Domain, &p.DestinationURL, &p.DestinationPort, &p.SSLCertificateID, + &p.NpmProxyID, &p.HealthStatus, &p.HealthCheckedAt, &p.CreatedAt, &p.UpdatedAt); err != nil { + return nil, fmt.Errorf("scan standalone proxy: %w", err) + } + proxies = append(proxies, p) + } + return proxies, rows.Err() +} + +// UpdateStandaloneProxy updates an existing standalone proxy's mutable fields. +func (s *Store) UpdateStandaloneProxy(p StandaloneProxy) error { + p.UpdatedAt = Now() + result, err := s.db.Exec( + `UPDATE standalone_proxies SET domain=?, destination_url=?, destination_port=?, ssl_certificate_id=?, npm_proxy_id=?, health_status=?, health_checked_at=?, updated_at=? + WHERE id=?`, + p.Domain, p.DestinationURL, p.DestinationPort, p.SSLCertificateID, + p.NpmProxyID, p.HealthStatus, p.HealthCheckedAt, p.UpdatedAt, p.ID, + ) + if err != nil { + return fmt.Errorf("update standalone proxy: %w", err) + } + n, _ := result.RowsAffected() + if n == 0 { + return fmt.Errorf("standalone proxy %s: %w", p.ID, ErrNotFound) + } + return nil +} + +// DeleteStandaloneProxy removes a standalone proxy by ID. +func (s *Store) DeleteStandaloneProxy(id string) error { + result, err := s.db.Exec(`DELETE FROM standalone_proxies WHERE id = ?`, id) + if err != nil { + return fmt.Errorf("delete standalone proxy: %w", err) + } + n, _ := result.RowsAffected() + if n == 0 { + return fmt.Errorf("standalone proxy %s: %w", id, ErrNotFound) + } + return nil +} + +// UpdateProxyHealth updates the health status and check timestamp for a standalone proxy. +func (s *Store) UpdateProxyHealth(id string, status string) error { + ts := Now() + result, err := s.db.Exec( + `UPDATE standalone_proxies SET health_status=?, health_checked_at=?, updated_at=? WHERE id=?`, + status, ts, ts, id, + ) + if err != nil { + return fmt.Errorf("update proxy health: %w", err) + } + n, _ := result.RowsAffected() + if n == 0 { + return fmt.Errorf("standalone proxy %s: %w", id, ErrNotFound) + } + return nil +} diff --git a/internal/store/store.go b/internal/store/store.go index 9dbda01..e4cf857 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -81,6 +81,8 @@ func (s *Store) runMigrations() error { `ALTER TABLE stages ADD COLUMN enable_proxy INTEGER NOT NULL DEFAULT 1`, // Add ssl_certificate_id to settings (2026-03-29). `ALTER TABLE settings ADD COLUMN ssl_certificate_id INTEGER NOT NULL DEFAULT 0`, + // Add stale_threshold_days to settings (2026-03-30). + `ALTER TABLE settings ADD COLUMN stale_threshold_days INTEGER NOT NULL DEFAULT 7`, } for _, m := range migrations { @@ -98,6 +100,9 @@ func (s *Store) runMigrations() error { `CREATE INDEX IF NOT EXISTS idx_stages_project_id ON stages(project_id)`, `CREATE INDEX IF NOT EXISTS idx_stage_env_stage_id ON stage_env(stage_id)`, `CREATE INDEX IF NOT EXISTS idx_volumes_project_id ON volumes(project_id)`, + `CREATE INDEX IF NOT EXISTS idx_event_log_severity ON event_log(severity)`, + `CREATE INDEX IF NOT EXISTS idx_event_log_source ON event_log(source)`, + `CREATE INDEX IF NOT EXISTS idx_event_log_created_at ON event_log(created_at)`, } for _, idx := range indexes { if _, err := s.db.Exec(idx); err != nil { @@ -250,6 +255,28 @@ CREATE TABLE IF NOT EXISTS volumes ( created_at TEXT NOT NULL DEFAULT (datetime('now')), updated_at TEXT NOT NULL DEFAULT (datetime('now')) ); + +CREATE TABLE IF NOT EXISTS event_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source TEXT NOT NULL DEFAULT '', + severity TEXT NOT NULL DEFAULT 'info', + message TEXT NOT NULL DEFAULT '', + metadata TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE TABLE IF NOT EXISTS standalone_proxies ( + id TEXT PRIMARY KEY, + domain TEXT NOT NULL UNIQUE, + destination_url TEXT NOT NULL DEFAULT '', + destination_port INTEGER NOT NULL DEFAULT 0, + ssl_certificate_id INTEGER NOT NULL DEFAULT 0, + npm_proxy_id INTEGER NOT NULL DEFAULT 0, + health_status TEXT NOT NULL DEFAULT 'unknown', + health_checked_at TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); ` // Now returns the current time formatted for SQLite storage. diff --git a/plans/observability-proxy-mgmt/CONTEXT.md b/plans/observability-proxy-mgmt/CONTEXT.md new file mode 100644 index 0000000..025cb0e --- /dev/null +++ b/plans/observability-proxy-mgmt/CONTEXT.md @@ -0,0 +1,52 @@ +# Feature Context: Observability & Proxy Management + +## Configuration +- **Development mode:** Automated +- **Execution mode:** Orchestrator +- **Strategy:** Incremental +- **Build (full):** `make build` +- **Build (frontend):** `cd web && npm install && npm run build` +- **Build (backend):** `go build -o docker-watcher ./cmd/server` +- **Test:** `go test ./...` +- **Lint (backend):** `go vet ./...` +- **Lint (frontend):** `cd web && npm run check` +- **Dev server:** `make dev` (port: 8080) + +## Current State +Feature branch just created. No implementation yet. Codebase is fully working on main. + +## Temporary Workarounds +(none yet) + +## Cross-Phase Dependencies +- Phases 2 & 3 depend on Phase 1 (schema, event_log table, store methods) +- Phases 4, 5, 6, 7 depend on their respective backend phases (1-3) for API endpoints +- Phase 8 depends on Phases 1-3 for backend infrastructure and event system + +## Deferred Work +(none yet) + +## Failed Approaches +(none yet) + +## Review Findings Log +(none yet) + +## Phase Execution Log +| Phase | Agent Used | Test Writer | Parallel | Notes | +|-------|-----------|-------------|----------|-------| +| (none yet) | | | | | + +## Environment & Runtime Notes +- Build is currently blocked on Go 1.25 transitive dep from Docker SDK β€” may need to use Go 1.24 toolchain +- SQLite has MaxOpenConns=1, so all DB operations are serialized +- Frontend is embedded into Go binary via embed.FS + +## Implementation Notes +- Event bus (`internal/events/bus.go`) uses buffered channels (64 cap), non-blocking publish +- NPM client (`internal/npm/client.go`) handles JWT auth with auto-refresh +- Store uses additive migrations β€” new `ALTER TABLE` statements are appended to runMigrations(), errors ignored for idempotency +- New tables use `CREATE TABLE IF NOT EXISTS` in the schema constant +- All API responses use envelope pattern: `{success: bool, data?: T, error?: string}` +- Frontend types in `web/src/lib/types.ts` mirror Go models +- API functions centralized in `web/src/lib/api.ts` diff --git a/plans/observability-proxy-mgmt/PLAN.md b/plans/observability-proxy-mgmt/PLAN.md new file mode 100644 index 0000000..8f4ec19 --- /dev/null +++ b/plans/observability-proxy-mgmt/PLAN.md @@ -0,0 +1,71 @@ +# Feature: Observability & Proxy Management + +**Branch:** `feature/observability-proxy-mgmt` +**Base branch:** `main` +**Created:** 2026-03-30 +**Status:** 🟑 In Progress +**Strategy:** Incremental +**Mode:** Automated +**Execution:** Orchestrator + +## Summary + +Extend Docker Watcher with four interconnected features: stale container detection, +standalone proxy management with health monitoring, a unified proxy viewer, and a +persistent event log β€” plus container stats and notification triggers. + +## Build & Test Commands +- **Build (frontend):** `cd web && npm install && npm run build` +- **Build (backend):** `go build -o docker-watcher ./cmd/server` +- **Build (full):** `make build` +- **Test (backend):** `go test ./...` +- **Lint (backend):** `go vet ./...` +- **Lint (frontend):** `cd web && npm run check` + +## Tech Stack Summary +- **Backend:** Go 1.24, chi v5 router, SQLite (modernc.org/sqlite), Docker SDK (moby/moby/client) +- **Frontend:** SvelteKit 2.15, Svelte 5, TypeScript 5.7, Tailwind CSS 4, Vite 6 +- **Real-time:** Server-Sent Events with auto-reconnect +- **Auth:** JWT + optional OIDC +- **Encryption:** AES-256-GCM for credentials + +## Project Conventions +- **Go:** gofmt, small interfaces, error wrapping with `fmt.Errorf("context: %w", err)`, constructor injection +- **DB:** Single-row settings, additive migrations via `ALTER TABLE` (errors ignored for idempotency), `CREATE TABLE IF NOT EXISTS` for new tables +- **API:** Envelope pattern `{success, data?, error?}`, chi route groups, admin middleware for writes +- **Frontend:** Svelte 5 runes ($state, $derived, $effect), TypeScript interfaces mirroring Go models, centralized api.ts, custom components (no UI library) +- **Files:** Feature-organized, small focused files +- **State:** Immutable patterns, no mutation + +## Phases + +- [ ] Phase 1: Schema, Models & Event Log Backend [domain: backend] β†’ [subplan](./phase-1-schema-eventlog.md) +- [ ] Phase 2: Stale Container Detection [domain: backend] β†’ [subplan](./phase-2-stale-detection.md) +- [ ] Phase 3: Direct Proxy Creation with Validation [domain: backend] β†’ [subplan](./phase-3-proxy-creation.md) +- [ ] Phase 4: Unified Proxy Viewer UI [domain: frontend] β†’ [subplan](./phase-4-proxy-viewer.md) +- [ ] Phase 5: Stale Containers UI [domain: frontend] β†’ [subplan](./phase-5-stale-ui.md) +- [ ] Phase 6: Direct Proxy Creation UI [domain: frontend] β†’ [subplan](./phase-6-proxy-creation-ui.md) +- [ ] Phase 7: Event Log UI [domain: frontend] β†’ [subplan](./phase-7-eventlog-ui.md) +- [ ] Phase 8: Container Stats & Notifications [domain: fullstack] β†’ [subplan](./phase-8-stats-notifications.md) + +**Parallelizable phases:** +- Phases 4, 5, 6, 7 are all frontend phases that touch different routes/components and can potentially run in parallel after all backend phases (1-3) complete. + +## Phase Progress Log + +| Phase | Domain | Status | Review | Build | Committed | +|-------|--------|--------|--------|-------|-----------| +| Phase 1: Schema & Event Log | backend | ⬜ Not Started | ⬜ | ⬜ | ⬜ | +| Phase 2: Stale Detection | backend | ⬜ Not Started | ⬜ | ⬜ | ⬜ | +| Phase 3: Proxy Creation | backend | ⬜ Not Started | ⬜ | ⬜ | ⬜ | +| Phase 4: Proxy Viewer UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ | +| Phase 5: Stale Containers UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ | +| Phase 6: Proxy Creation UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ | +| Phase 7: Event Log UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ | +| Phase 8: Stats & Notifications | fullstack | ⬜ Not Started | ⬜ | ⬜ | ⬜ | + +## Final Review +- [ ] Comprehensive code review +- [ ] Full build passes +- [ ] Full test suite passes +- [ ] Merged to `main` diff --git a/plans/observability-proxy-mgmt/phase-1-schema-eventlog.md b/plans/observability-proxy-mgmt/phase-1-schema-eventlog.md new file mode 100644 index 0000000..247d673 --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-1-schema-eventlog.md @@ -0,0 +1,60 @@ +# Phase 1: Schema, Models & Event Log Backend + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** backend + +## Objective +Lay the database foundation for all new features and implement the persistent event log system. + +## Tasks + +- [ ] Task 1: Add `event_log` table to schema (id INTEGER PK AUTOINCREMENT, source TEXT, severity TEXT, message TEXT, metadata TEXT JSON, created_at TEXT) +- [ ] Task 2: Add `standalone_proxies` table to schema (id TEXT PK, domain TEXT UNIQUE, destination_url TEXT, destination_port INTEGER, ssl_certificate_id INTEGER, npm_proxy_id INTEGER, health_status TEXT, health_checked_at TEXT, created_at TEXT, updated_at TEXT) +- [ ] Task 3: Add `stale_threshold_days` column to settings table (migration, default 7) +- [ ] Task 4: Create `internal/store/eventlog.go` β€” store methods: InsertEvent, ListEvents (paginated, filterable by severity/source/date range), GetEventStats (counts by severity), PruneEvents (delete old entries) +- [ ] Task 5: Create `internal/store/standalone_proxy.go` β€” store methods: CreateStandaloneProxy, GetStandaloneProxy, ListStandaloneProxies, UpdateStandaloneProxy, DeleteStandaloneProxy, UpdateProxyHealth +- [ ] Task 6: Create Go models in `internal/store/models.go` β€” EventLog struct, StandaloneProxy struct +- [ ] Task 7: Update settings model to include stale_threshold_days field; update GetSettings/SaveSettings +- [ ] Task 8: Enhance event bus to auto-persist warn/error events β€” add a subscriber in events.Bus that writes to store +- [ ] Task 9: Add API endpoints: `GET /api/events/log` (paginated, filterable), `GET /api/events/log/stats` +- [ ] Task 10: Add new SSE event type `event_log` β€” broadcast persistent events in real-time +- [ ] Task 11: Add frontend types: EventLogEntry, StandaloneProxy interfaces in types.ts +- [ ] Task 12: Add API functions in api.ts: fetchEventLog, fetchEventLogStats + +## Files to Modify/Create +- `internal/store/store.go` β€” Add schema for event_log, standalone_proxies tables; migration for stale_threshold_days +- `internal/store/models.go` β€” Add EventLog, StandaloneProxy structs; update Settings struct +- `internal/store/eventlog.go` β€” NEW: Event log store methods +- `internal/store/standalone_proxy.go` β€” NEW: Standalone proxy store methods +- `internal/store/settings.go` β€” Update GetSettings/SaveSettings for new field +- `internal/events/bus.go` β€” Add persistent event subscriber +- `internal/api/router.go` β€” Mount new event log routes +- `internal/api/eventlog.go` β€” NEW: Event log HTTP handlers +- `web/src/lib/types.ts` β€” Add EventLogEntry, StandaloneProxy types +- `web/src/lib/api.ts` β€” Add fetchEventLog, fetchEventLogStats functions + +## Acceptance Criteria +- event_log and standalone_proxies tables created on startup (migration is idempotent) +- stale_threshold_days setting accessible via settings API +- Events with warn/error severity auto-persisted from event bus +- GET /api/events/log returns paginated, filterable results +- GET /api/events/log/stats returns severity counts +- Frontend types and API functions ready for downstream UI phases +- Existing functionality unchanged β€” all current tests/builds pass + +## Notes +- Follow existing migration pattern: ALTER TABLE errors ignored for idempotency +- event_log metadata is a JSON TEXT column for flexible structured data +- Pagination follows offset/limit pattern (no cursor β€” SQLite is simple enough) +- Event log pruning can be called from a cron job later (Phase 8) + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/plans/observability-proxy-mgmt/phase-2-stale-detection.md b/plans/observability-proxy-mgmt/phase-2-stale-detection.md new file mode 100644 index 0000000..aa10c15 --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-2-stale-detection.md @@ -0,0 +1,55 @@ +# Phase 2: Stale Container Detection + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** backend + +## Objective +Implement a periodic scanner that detects containers managed by docker-watcher which have been non-running for more than N configurable days, and exposes them via API. + +## Tasks + +- [ ] Task 1: Create `internal/stale/scanner.go` β€” Scanner struct with dependencies (store, docker client, event bus) +- [ ] Task 2: Implement scan logic: query all instances from store, check Docker container state via Docker SDK, compare against stale_threshold_days from settings +- [ ] Task 3: Add `last_alive_at` column to instances table (migration) β€” updated when instance is seen running +- [ ] Task 4: Update deployer/instance lifecycle to set last_alive_at when container starts/is seen running +- [ ] Task 5: Implement stale detection: instance is stale if status != 'running' AND (now - last_alive_at) > threshold days +- [ ] Task 6: Emit event_log warnings when containers become newly stale (avoid re-emitting for already-known stale containers) +- [ ] Task 7: Register scanner as cron job (reuse existing robfig/cron infrastructure from registry poller) +- [ ] Task 8: Add API endpoints: `GET /api/containers/stale` (list stale with project/stage info), `POST /api/containers/stale/{id}/cleanup` (remove single), `POST /api/containers/stale/cleanup` (bulk remove) +- [ ] Task 9: Cleanup handler: stop container via Docker SDK, remove instance from store, emit event +- [ ] Task 10: Wire scanner into main.go startup (after store, docker client, event bus init) + +## Files to Modify/Create +- `internal/stale/scanner.go` β€” NEW: Stale container scanner +- `internal/store/store.go` β€” Migration for last_alive_at column +- `internal/store/models.go` β€” Update Instance struct with LastAliveAt field +- `internal/store/instances.go` β€” Update queries to include last_alive_at; add UpdateLastAliveAt method +- `internal/api/router.go` β€” Mount stale container routes +- `internal/api/stale.go` β€” NEW: Stale container HTTP handlers +- `cmd/server/main.go` β€” Wire scanner with cron + +## Acceptance Criteria +- Scanner runs on configurable interval (e.g., every hour) +- Stale containers correctly identified based on threshold +- GET /api/containers/stale returns list with project name, stage name, image tag, last alive timestamp, days stale +- Cleanup endpoints properly stop Docker containers and remove from store +- Events emitted when containers become stale +- Existing deploy flow unaffected β€” last_alive_at updated on successful deploy +- Build passes, existing tests pass + +## Notes +- Scanner should handle gracefully: containers that no longer exist in Docker (already removed externally) +- Bulk cleanup should be admin-only +- Consider: scan interval could be derived from stale_threshold_days (e.g., scan every threshold/7 days, min 1h) +- Don't remove containers that are in 'removing' status (already being cleaned up) + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/plans/observability-proxy-mgmt/phase-3-proxy-creation.md b/plans/observability-proxy-mgmt/phase-3-proxy-creation.md new file mode 100644 index 0000000..c713044 --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-3-proxy-creation.md @@ -0,0 +1,81 @@ +# Phase 3: Direct Proxy Creation with Validation + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** backend + +## Objective +Implement standalone proxy creation with a multi-step validation pipeline that checks destination reachability, and periodic health monitoring for all standalone proxies. + +## Tasks + +- [ ] Task 1: Create `internal/proxy/validator.go` β€” validation pipeline: + - URL/port syntax validation + - DNS resolution check + - TCP port reachability (net.DialTimeout, 5s) + - HTTP health probe (GET to destination, 10s timeout) + - Returns structured ValidationResult with per-step pass/fail and diagnostic hints +- [ ] Task 2: Create `internal/proxy/hints.go` β€” diagnostic hint generator: + - DNS failure β†’ "Domain cannot be resolved. Check DNS settings or use an IP address." + - TCP refused β†’ "Port {port} is not accepting connections. Check if the service is running and the port is correct." + - TCP timeout β†’ "Connection timed out. Possible firewall blocking. Check network/firewall rules." + - Host unreachable β†’ "Host is not reachable. Verify the IP address and network connectivity." + - HTTP error β†’ "Service responded with HTTP {status}. The service may not be healthy." +- [ ] Task 3: Create `internal/proxy/manager.go` β€” proxy lifecycle: + - CreateProxy: validate destination, create NPM proxy host (using npm.Client), assign SSL cert from settings, save to standalone_proxies table + - UpdateProxy: re-validate, update NPM proxy host, update store + - DeleteProxy: remove NPM proxy host, remove from store + - GetProxy/ListProxies: read from store with health status +- [ ] Task 4: Create `internal/proxy/health.go` β€” periodic health monitor: + - Cron job that checks all standalone proxies + - HTTP GET to destination URL/port + - Updates health_status (healthy/unhealthy/unknown) and health_checked_at in store + - Emits event_log on status change (healthyβ†’unhealthy or vice versa) +- [ ] Task 5: Add API endpoints: + - `POST /api/proxies/validate` β€” run validation without creating + - `POST /api/proxies` β€” create standalone proxy + - `GET /api/proxies` β€” list standalone proxies + - `GET /api/proxies/{id}` β€” get single proxy + - `PUT /api/proxies/{id}` β€” update proxy + - `DELETE /api/proxies/{id}` β€” delete proxy + - `GET /api/proxies/all` β€” merged view: standalone + deploy-managed proxies (for Phase 4 UI) +- [ ] Task 6: Wire health monitor cron job in main.go +- [ ] Task 7: Add frontend API functions in api.ts: validateProxy, createProxy, listProxies, getProxy, updateProxy, deleteProxy, listAllProxies +- [ ] Task 8: Add frontend types: ValidationResult, ValidationStep, ProxyHealthStatus + +## Files to Modify/Create +- `internal/proxy/validator.go` β€” NEW: Validation pipeline +- `internal/proxy/hints.go` β€” NEW: Diagnostic hints +- `internal/proxy/manager.go` β€” NEW: Proxy lifecycle management +- `internal/proxy/health.go` β€” NEW: Health monitoring +- `internal/api/router.go` β€” Mount proxy routes +- `internal/api/proxy.go` β€” NEW: Proxy HTTP handlers +- `cmd/server/main.go` β€” Wire proxy manager and health monitor +- `web/src/lib/types.ts` β€” Add ValidationResult, ProxyHealthStatus types +- `web/src/lib/api.ts` β€” Add proxy API functions + +## Acceptance Criteria +- Validation pipeline returns structured results with specific failure hints +- POST /api/proxies/validate runs full check without side effects +- Proxy creation creates NPM proxy host with SSL cert from global settings +- Health monitor runs periodically and updates proxy status +- Events emitted on health status changes +- GET /api/proxies/all merges standalone and deploy-managed proxy data +- Build passes, existing tests pass + +## Notes +- Validation should be fast (short timeouts) β€” user waits for results +- Health monitor interval: every 5 minutes (configurable later) +- For /api/proxies/all: query NPM for all proxy hosts, join with instances table for managed proxies, join with standalone_proxies for standalone ones +- SSL cert auto-assigned from settings.ssl_certificate_id +- Consider: proxy domain must be unique across both standalone and managed proxies + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/plans/observability-proxy-mgmt/phase-4-proxy-viewer.md b/plans/observability-proxy-mgmt/phase-4-proxy-viewer.md new file mode 100644 index 0000000..e77218c --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-4-proxy-viewer.md @@ -0,0 +1,56 @@ +# Phase 4: Unified Proxy Viewer UI + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** frontend + +## Objective +Build a unified proxy viewer page showing ALL proxies (deploy-managed and standalone) with grouping, filtering, and real-time health indicators. + +## Tasks + +- [ ] Task 1: Create route `/proxies` with `+page.svelte` and `+page.ts` data loader +- [ ] Task 2: Create ProxyCard component β€” displays: domain, destination, SSL badge, health indicator (green/yellow/red dot), proxy type badge (managed/standalone), last health check timestamp +- [ ] Task 3: Create ProxyGroup component β€” collapsible section with project name header, stage sub-groups, proxy count badge +- [ ] Task 4: Create StandaloneProxyGroup component β€” separate collapsible section for user-created proxies +- [ ] Task 5: Implement filtering: by project, stage, health status (healthy/unhealthy/unknown), proxy type (managed/standalone), free-text search by domain/destination +- [ ] Task 6: Filter bar component with dropdown selects and search input +- [ ] Task 7: SSE integration β€” subscribe to proxy health events, update health indicators in real-time +- [ ] Task 8: Empty state β€” friendly message when no proxies exist, with link to create one +- [ ] Task 9: Add navigation link in sidebar layout (+layout.svelte) +- [ ] Task 10: Add i18n keys for proxy viewer page + +## Files to Modify/Create +- `web/src/routes/proxies/+page.svelte` β€” NEW: Proxy viewer page +- `web/src/routes/proxies/+page.ts` β€” NEW: Data loader +- `web/src/lib/components/ProxyCard.svelte` β€” NEW: Individual proxy display +- `web/src/lib/components/ProxyGroup.svelte` β€” NEW: Collapsible project/stage group +- `web/src/lib/components/ProxyFilter.svelte` β€” NEW: Filter bar +- `web/src/routes/+layout.svelte` β€” Add proxies nav link +- `web/src/lib/i18n/en.ts` (or equivalent) β€” Add proxy viewer strings + +## Acceptance Criteria +- All proxies visible: both deploy-managed and standalone +- Proxies grouped by project/stage in collapsible sections +- Health indicators show real-time status (green=healthy, red=unhealthy, yellow=unknown) +- Filtering works: project, stage, health, type, text search +- SSE updates health indicators without page refresh +- Navigation accessible from sidebar +- Responsive layout (mobile-friendly) + +## Notes +- Use existing component patterns (ConfirmDialog, FormField styles, etc.) +- Follow existing Svelte 5 patterns ($state, $derived, $effect) +- The /api/proxies/all endpoint from Phase 3 provides the data source +- Health indicator should pulse/animate briefly on status change +- Consider: show proxy count in sidebar nav badge + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/plans/observability-proxy-mgmt/phase-5-stale-ui.md b/plans/observability-proxy-mgmt/phase-5-stale-ui.md new file mode 100644 index 0000000..28adfb6 --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-5-stale-ui.md @@ -0,0 +1,55 @@ +# Phase 5: Stale Containers UI + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** frontend + +## Objective +Build the stale containers dashboard widget and dedicated view, with cleanup actions and settings configuration. + +## Tasks + +- [ ] Task 1: Add API functions in api.ts: fetchStaleContainers, cleanupStaleContainer, bulkCleanupStaleContainers +- [ ] Task 2: Create StaleContainerCard component β€” shows: container name, project, stage, image tag, last alive timestamp, "X days stale" badge (color-coded by severity) +- [ ] Task 3: Create stale containers section on dashboard (+page.svelte) β€” count badge, mini-list of top 5 offenders, "View all" link +- [ ] Task 4: Create dedicated route `/containers/stale` with full stale container list +- [ ] Task 5: Individual cleanup action β€” ConfirmDialog with warning, calls cleanup API +- [ ] Task 6: Bulk cleanup action β€” "Clean up all" button with confirmation, progress indicator +- [ ] Task 7: Settings integration β€” add stale_threshold_days field to settings page with validation (min 1 day) +- [ ] Task 8: Add navigation link or sub-nav for stale containers +- [ ] Task 9: Add i18n keys for stale containers + +## Files to Modify/Create +- `web/src/lib/api.ts` β€” Add stale container API functions +- `web/src/lib/types.ts` β€” Add StaleContainer interface +- `web/src/lib/components/StaleContainerCard.svelte` β€” NEW: Stale container display +- `web/src/routes/+page.svelte` β€” Add stale containers dashboard widget +- `web/src/routes/containers/stale/+page.svelte` β€” NEW: Dedicated stale view +- `web/src/routes/containers/stale/+page.ts` β€” NEW: Data loader +- `web/src/routes/settings/+page.svelte` β€” Add stale threshold setting field +- `web/src/routes/+layout.svelte` β€” Add nav link if needed + +## Acceptance Criteria +- Dashboard shows stale container count and top offenders +- Dedicated page lists all stale containers with details +- Individual cleanup removes container with confirmation +- Bulk cleanup works with progress feedback +- Settings page allows configuring stale threshold +- Severity coloring: 7-14 days = yellow, 14+ days = red +- Responsive layout + +## Notes +- Reuse existing ConfirmDialog for destructive actions +- Dashboard widget should not slow down initial page load (lazy load or small payload) +- Stale container data comes from GET /api/containers/stale (Phase 2) +- Settings update uses existing PUT /api/settings endpoint + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/plans/observability-proxy-mgmt/phase-6-proxy-creation-ui.md b/plans/observability-proxy-mgmt/phase-6-proxy-creation-ui.md new file mode 100644 index 0000000..7ccf7df --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-6-proxy-creation-ui.md @@ -0,0 +1,54 @@ +# Phase 6: Direct Proxy Creation UI + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** frontend + +## Objective +Build the proxy creation form with live validation feedback, diagnostic hints, and management actions (edit/delete). + +## Tasks + +- [ ] Task 1: Create "Create Proxy" form component β€” fields: destination URL/IP, port, domain (auto-suggested from subdomain pattern), optional custom subdomain override +- [ ] Task 2: Live validation β€” debounced calls to POST /api/proxies/validate as user types (300ms debounce) +- [ ] Task 3: Validation result display β€” step-by-step checklist with icons: + - βœ… DNS resolution OK / ❌ DNS resolution failed + - βœ… TCP port reachable / ❌ TCP port not reachable + - βœ… HTTP responding / ❌ HTTP not responding + - Each failure shows the diagnostic hint from the backend +- [ ] Task 4: Create proxy submission β€” calls POST /api/proxies, shows success toast with health indicator +- [ ] Task 5: Edit proxy β€” modal or inline form, pre-populated with current values, re-validates on save +- [ ] Task 6: Delete proxy β€” ConfirmDialog with domain name confirmation +- [ ] Task 7: Integration with proxy viewer page β€” "Create Proxy" button in the proxy viewer header +- [ ] Task 8: Domain auto-suggestion β€” when user enters destination, suggest domain based on subdomain_pattern from settings +- [ ] Task 9: Add i18n keys for proxy creation + +## Files to Modify/Create +- `web/src/lib/components/ProxyForm.svelte` β€” NEW: Create/edit proxy form +- `web/src/lib/components/ValidationChecklist.svelte` β€” NEW: Step-by-step validation display +- `web/src/routes/proxies/+page.svelte` β€” Add "Create Proxy" button and modal/panel +- `web/src/lib/api.ts` β€” Ensure validateProxy, createProxy, updateProxy, deleteProxy are present (from Phase 3) + +## Acceptance Criteria +- Form validates destination in real-time with debouncing +- Each validation step shows pass/fail with diagnostic hints +- Proxy creation works end-to-end (form β†’ API β†’ NPM β†’ success) +- Edit and delete work for existing standalone proxies +- Domain auto-suggestion works from settings pattern +- Error states handled gracefully (network errors, API failures) + +## Notes +- Validation should show a loading spinner while in progress +- Don't validate on every keystroke β€” use 300ms debounce +- If all validation steps fail, still allow creation (user might know better β€” just warn) +- SSL certificate is applied automatically from global settings (no cert picker in form) + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/plans/observability-proxy-mgmt/phase-7-eventlog-ui.md b/plans/observability-proxy-mgmt/phase-7-eventlog-ui.md new file mode 100644 index 0000000..d17e39e --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-7-eventlog-ui.md @@ -0,0 +1,54 @@ +# Phase 7: Event Log UI + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** frontend + +## Objective +Build a persistent, searchable event log viewer with real-time streaming, filters, and resource linking. + +## Tasks + +- [ ] Task 1: Create route `/events` with `+page.svelte` and `+page.ts` data loader +- [ ] Task 2: Create EventLogEntry component β€” timestamp, severity badge (info=blue, warn=yellow, error=red), source icon (container/proxy/deploy/system), message text, expandable metadata section +- [ ] Task 3: Create EventLogFilter component β€” filters: severity multi-select, source multi-select, date range picker (start/end), free-text search +- [ ] Task 4: Implement pagination β€” "Load more" button at bottom (offset/limit pattern matching API) +- [ ] Task 5: SSE integration β€” subscribe to event_log events, prepend new entries at top with subtle highlight animation +- [ ] Task 6: Quick actions β€” clickable links to related resources (e.g., click container name β†’ go to project/stage, click proxy domain β†’ go to proxy viewer) +- [ ] Task 7: Stats header β€” show counts by severity (from GET /api/events/log/stats), with colored badges +- [ ] Task 8: Add navigation link in sidebar +- [ ] Task 9: Add i18n keys for event log page + +## Files to Modify/Create +- `web/src/routes/events/+page.svelte` β€” NEW: Event log page +- `web/src/routes/events/+page.ts` β€” NEW: Data loader +- `web/src/lib/components/EventLogEntry.svelte` β€” NEW: Event entry display +- `web/src/lib/components/EventLogFilter.svelte` β€” NEW: Filter controls +- `web/src/routes/+layout.svelte` β€” Add events nav link +- `web/src/lib/sse.ts` β€” Add event_log SSE subscription helper (if needed) + +## Acceptance Criteria +- Event log shows all persistent events with severity and source +- Filters work: severity, source, date range, text search +- New events stream in real-time via SSE without page refresh +- Pagination loads older events on demand +- Quick actions link to related resources +- Stats header shows severity distribution +- Responsive layout + +## Notes +- Follow existing SSE patterns from deploy logs viewer +- Date range filter: consider "last hour", "last 24h", "last 7 days" presets + custom range +- Metadata section is JSON β€” render as formatted key-value pairs, not raw JSON +- Resource linking: parse source and metadata to construct navigation URLs +- Consider: auto-scroll to top when new event arrives (if user is at top), otherwise show "N new events" badge + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/plans/observability-proxy-mgmt/phase-8-stats-notifications.md b/plans/observability-proxy-mgmt/phase-8-stats-notifications.md new file mode 100644 index 0000000..857236b --- /dev/null +++ b/plans/observability-proxy-mgmt/phase-8-stats-notifications.md @@ -0,0 +1,67 @@ +# Phase 8: Container Stats & Notifications + +**Status:** ⬜ Not Started +**Parent plan:** [PLAN.md](./PLAN.md) +**Domain:** fullstack + +## Objective +Add container resource monitoring (CPU/memory), notification triggers for operational events, and a system health dashboard summary. + +## Tasks + +- [ ] Task 1: Create `internal/docker/stats.go` β€” wrapper around Docker Stats API to get CPU %, memory usage/limit for a container +- [ ] Task 2: Add API endpoint: `GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats` β€” returns current CPU/memory for an instance +- [ ] Task 3: Create SSE event type `container_stats` β€” periodically broadcast stats for running containers (every 30s) +- [ ] Task 4: Extend notification stub (`internal/notify/`) β€” implement webhook sender for events: + - Stale container detected + - Proxy health failure + - Deploy failure/rollback + - Format: JSON payload with event type, details, timestamp +- [ ] Task 5: Add notification settings UI β€” enable/disable per event type in settings page +- [ ] Task 6: Update instance cards in frontend β€” show CPU % bar and memory usage badge +- [ ] Task 7: Create ContainerStats component β€” mini CPU/memory visualization (progress bars) +- [ ] Task 8: Dashboard system health summary card β€” total containers (running/stopped), healthy/unhealthy proxies, recent error count (last 24h) +- [ ] Task 9: Wire notification sender to event bus β€” subscribe to relevant event types, fire notifications +- [ ] Task 10: Add event log pruning cron job β€” delete events older than 30 days (configurable) +- [ ] Task 11: Add i18n keys for stats and notifications + +## Files to Modify/Create +- `internal/docker/stats.go` β€” NEW: Docker Stats API wrapper +- `internal/api/stats.go` β€” NEW: Stats HTTP handler +- `internal/api/router.go` β€” Mount stats endpoint +- `internal/notify/sender.go` β€” Implement webhook notification sender +- `internal/notify/types.go` β€” NEW: Notification event types and payloads +- `cmd/server/main.go` β€” Wire notification subscriber and event pruning cron +- `web/src/lib/types.ts` β€” Add ContainerStats, NotificationSettings types +- `web/src/lib/api.ts` β€” Add fetchContainerStats function +- `web/src/lib/components/ContainerStats.svelte` β€” NEW: CPU/memory display +- `web/src/lib/components/SystemHealthCard.svelte` β€” NEW: Dashboard summary +- `web/src/routes/+page.svelte` β€” Add system health card to dashboard +- `web/src/routes/settings/+page.svelte` β€” Add notification settings section +- `web/src/lib/sse.ts` β€” Add container_stats SSE handler + +## Acceptance Criteria +- Container stats (CPU/memory) visible on instance cards +- Stats update in real-time via SSE +- Webhook notifications fire for configured event types +- Dashboard shows system health summary +- Event log auto-prunes old entries +- Settings page allows configuring notification preferences +- Build passes, existing tests pass + +## Notes +- Docker Stats API returns a stream β€” read one snapshot and close, don't hold the connection +- CPU calculation: (container CPU delta / system CPU delta) * 100 β€” needs two reads +- Memory: usage_bytes / limit_bytes * 100 for percentage +- Notification webhook format should be compatible with common receivers (Slack webhook, Discord webhook, generic HTTP) +- System health card: consider caching aggregated stats to avoid N+1 queries on dashboard load + +## Review Checklist +- [ ] All tasks completed +- [ ] Code follows project conventions +- [ ] No unintended side effects +- [ ] Build passes +- [ ] Tests pass (new + existing) + +## Handoff to Next Phase + diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 9bbc5b1..30d9d68 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -2,6 +2,8 @@ import type { ApiEnvelope, Deploy, DeployLog, + EventLogEntry, + EventLogStats, InspectResult, Instance, NpmCertificate, @@ -338,4 +340,29 @@ export function deleteVolume( return del<{ deleted: string }>(`/api/projects/${projectId}/volumes/${volId}`); } +// ── Event Log ─────────────────────────────────────────────────────── + +export function fetchEventLog(params?: { + severity?: string; + source?: string; + since?: string; + until?: string; + limit?: number; + offset?: number; +}): Promise { + const query = new URLSearchParams(); + if (params?.severity) query.set('severity', params.severity); + if (params?.source) query.set('source', params.source); + if (params?.since) query.set('since', params.since); + if (params?.until) query.set('until', params.until); + if (params?.limit) query.set('limit', String(params.limit)); + if (params?.offset) query.set('offset', String(params.offset)); + const qs = query.toString(); + return get(`/api/events/log${qs ? `?${qs}` : ''}`); +} + +export function fetchEventLogStats(): Promise { + return get('/api/events/log/stats'); +} + export { ApiError }; diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index c7dedc8..7c78603 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -106,6 +106,7 @@ export interface Settings { polling_interval: string; base_volume_path: string; ssl_certificate_id: number; + stale_threshold_days: number; updated_at: string; } @@ -170,3 +171,35 @@ export interface Volume { created_at: string; updated_at: string; } + +/** A persistent event log entry. */ +export interface EventLogEntry { + id: number; + source: string; + severity: 'info' | 'warn' | 'error'; + message: string; + metadata: string; + created_at: string; +} + +/** Severity counts for the event log. */ +export interface EventLogStats { + info: number; + warn: number; + error: number; + total: number; +} + +/** A standalone reverse proxy not tied to a project. */ +export interface StandaloneProxy { + id: string; + domain: string; + destination_url: string; + destination_port: number; + ssl_certificate_id: number; + npm_proxy_id: number; + health_status: 'unknown' | 'healthy' | 'unhealthy'; + health_checked_at: string; + created_at: string; + updated_at: string; +} From aefecdffdf0c1cc215fcc0a58bb845de7646b5f3 Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Mon, 30 Mar 2026 11:12:25 +0300 Subject: [PATCH 2/7] feat(observability): phase 2 - stale container detection Add periodic scanner for stale containers: - Cron-based scanner (hourly) detects non-running containers exceeding threshold - last_alive_at tracking on instances, updated on deploy/start/restart - API: GET /api/containers/stale, POST cleanup (single + bulk) - Event log warnings emitted for newly stale containers - Graceful handling of externally removed containers --- cmd/server/main.go | 9 + internal/api/instances.go | 7 + internal/api/router.go | 16 ++ internal/api/stale.go | 172 ++++++++++++++++++ internal/deployer/deployer.go | 3 + internal/stale/scanner.go | 316 ++++++++++++++++++++++++++++++++++ internal/store/instances.go | 89 ++++++++-- internal/store/models.go | 1 + internal/store/store.go | 2 + 9 files changed, 596 insertions(+), 19 deletions(-) create mode 100644 internal/api/stale.go create mode 100644 internal/stale/scanner.go diff --git a/cmd/server/main.go b/cmd/server/main.go index c369f7d..cf148cb 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -25,6 +25,7 @@ import ( "github.com/alexei/docker-watcher/internal/notify" "github.com/alexei/docker-watcher/internal/npm" "github.com/alexei/docker-watcher/internal/registry" + "github.com/alexei/docker-watcher/internal/stale" "github.com/alexei/docker-watcher/internal/store" "github.com/alexei/docker-watcher/internal/webhook" ) @@ -130,8 +131,15 @@ func main() { } } + // Initialize stale container scanner. + staleScanner := stale.New(db, dockerClient, eventBus) + if err := staleScanner.Start("1h"); err != nil { + slog.Warn("failed to start stale scanner", "error", err) + } + // Build API server. apiServer := api.NewServer(db, dockerClient, npmClient, dep, webhookHandler, eventBus, encKey) + apiServer.SetStaleScanner(staleScanner) router := apiServer.Router() // Serve embedded static files for the SPA frontend. @@ -173,6 +181,7 @@ func main() { slog.Info("shutting down...") // Stop accepting new work. + staleScanner.Stop() poller.Stop() // Drain in-progress deploys and notifications. diff --git a/internal/api/instances.go b/internal/api/instances.go index 4b936bc..26eca47 100644 --- a/internal/api/instances.go +++ b/internal/api/instances.go @@ -196,6 +196,13 @@ func (s *Server) controlInstance(w http.ResponseWriter, r *http.Request, action slog.Error("update instance status", "instance_id", instanceID, "status", newStatus, "error", err) } + // Track last_alive_at when container becomes running. + if newStatus == "running" { + if err := s.store.UpdateLastAliveAt(instanceID); err != nil { + slog.Error("update last_alive_at", "instance_id", instanceID, "error", err) + } + } + respondJSON(w, http.StatusOK, map[string]string{ "instance_id": instanceID, "action": action, diff --git a/internal/api/router.go b/internal/api/router.go index 8f4e656..606045d 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -11,6 +11,7 @@ import ( "github.com/alexei/docker-watcher/internal/docker" "github.com/alexei/docker-watcher/internal/events" "github.com/alexei/docker-watcher/internal/npm" + "github.com/alexei/docker-watcher/internal/stale" "github.com/alexei/docker-watcher/internal/store" "github.com/alexei/docker-watcher/internal/webhook" ) @@ -26,6 +27,7 @@ type Server struct { encKey [32]byte localAuth *auth.LocalAuth oidcProvider *auth.OIDCProvider + staleScanner *stale.Scanner } // NewServer creates a new API Server with all required dependencies. @@ -60,6 +62,12 @@ func NewServer( return s } +// SetStaleScanner sets the stale scanner on the server. +// Called after both the API server and scanner are initialized. +func (s *Server) SetStaleScanner(scanner *stale.Scanner) { + s.staleScanner = scanner +} + // initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal. func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) { // Decrypt the OIDC client secret if it's encrypted. @@ -135,6 +143,9 @@ func (s *Server) Router() chi.Router { r.Get("/settings", s.getSettings) r.Get("/settings/npm-certificates", s.listNpmCertificates) + // Stale container endpoints. + r.Get("/containers/stale", s.listStaleContainers) + // Admin-only routes: require admin role. r.Group(func(r chi.Router) { r.Use(auth.AdminOnly) @@ -192,6 +203,11 @@ func (s *Server) Router() chi.Router { r.Post("/test", s.testRegistry) }) + // Stale container cleanup endpoints (admin-only). + // Bulk route must be registered before parameterized route. + r.Post("/containers/stale/cleanup", s.bulkCleanupStaleContainers) + r.Post("/containers/stale/{id}/cleanup", s.cleanupStaleContainer) + // Settings endpoints. r.Put("/settings", s.updateSettings) r.Get("/settings/webhook-url", s.getWebhookURL) diff --git a/internal/api/stale.go b/internal/api/stale.go new file mode 100644 index 0000000..26d9c4e --- /dev/null +++ b/internal/api/stale.go @@ -0,0 +1,172 @@ +package api + +import ( + "errors" + "log/slog" + "net/http" + + "github.com/go-chi/chi/v5" + + "github.com/alexei/docker-watcher/internal/crypto" + "github.com/alexei/docker-watcher/internal/events" + "github.com/alexei/docker-watcher/internal/stale" + "github.com/alexei/docker-watcher/internal/store" +) + +// listStaleContainers handles GET /api/containers/stale. +func (s *Server) listStaleContainers(w http.ResponseWriter, r *http.Request) { + if s.staleScanner == nil { + respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized") + return + } + + staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) + if err != nil { + respondError(w, http.StatusInternalServerError, "failed to find stale containers: "+err.Error()) + return + } + + if staleInstances == nil { + staleInstances = []stale.StaleInstance{} + } + respondJSON(w, http.StatusOK, staleInstances) +} + +// cleanupStaleContainer handles POST /api/containers/stale/{id}/cleanup. +// Stops the Docker container, removes the NPM proxy, and deletes the instance from the store. +func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) { + instanceID := chi.URLParam(r, "id") + + inst, err := s.store.GetInstanceByID(instanceID) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + respondNotFound(w, "instance") + return + } + respondError(w, http.StatusInternalServerError, "failed to get instance: "+err.Error()) + return + } + + // Don't remove instances already being cleaned up. + if inst.Status == "removing" { + respondError(w, http.StatusConflict, "instance is already being removed") + return + } + + if err := s.cleanupInstance(r, inst); err != nil { + respondError(w, http.StatusInternalServerError, "failed to cleanup instance: "+err.Error()) + return + } + + respondJSON(w, http.StatusOK, map[string]string{"cleaned": instanceID}) +} + +// bulkCleanupStaleContainers handles POST /api/containers/stale/cleanup. +// Cleans up all currently stale containers. +func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Request) { + if s.staleScanner == nil { + respondError(w, http.StatusServiceUnavailable, "stale scanner not initialized") + return + } + + staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) + if err != nil { + respondError(w, http.StatusInternalServerError, "failed to find stale containers: "+err.Error()) + return + } + + var cleaned []string + var failed []string + + for _, si := range staleInstances { + if si.Instance.Status == "removing" { + continue + } + if err := s.cleanupInstance(r, si.Instance); err != nil { + slog.Error("bulk stale cleanup failed", + "instance_id", si.Instance.ID, "error", err) + failed = append(failed, si.Instance.ID) + continue + } + cleaned = append(cleaned, si.Instance.ID) + } + + respondJSON(w, http.StatusOK, map[string]any{ + "cleaned": cleaned, + "failed": failed, + }) +} + +// cleanupInstance stops a Docker container, removes the NPM proxy, deletes +// the store record, and emits an event. +func (s *Server) cleanupInstance(r *http.Request, inst store.Instance) error { + ctx := r.Context() + + // Mark as removing. + if err := s.store.UpdateInstanceStatus(inst.ID, "removing"); err != nil { + slog.Warn("stale cleanup: update status to removing", "instance_id", inst.ID, "error", err) + } + + // Stop and remove Docker container. + if inst.ContainerID != "" { + if err := s.docker.StopContainer(ctx, inst.ContainerID, 10); err != nil { + slog.Warn("stale cleanup: stop container", "container_id", inst.ContainerID, "error", err) + } + if err := s.docker.RemoveContainer(ctx, inst.ContainerID, true); err != nil { + slog.Warn("stale cleanup: remove container", "container_id", inst.ContainerID, "error", err) + } + } + + // Delete NPM proxy host if present. + if inst.NpmProxyID > 0 { + settings, err := s.store.GetSettings() + if err == nil { + npmPassword, err := crypto.Decrypt(s.encKey, settings.NpmPassword) + if err == nil { + if authErr := s.npm.Authenticate(ctx, settings.NpmEmail, npmPassword); authErr == nil { + if delErr := s.npm.DeleteProxyHost(ctx, inst.NpmProxyID); delErr != nil { + slog.Warn("stale cleanup: delete proxy host", "proxy_id", inst.NpmProxyID, "error", delErr) + } + } + } + } + } + + // Delete instance record. + if err := s.store.DeleteInstance(inst.ID); err != nil { + return err + } + + // Emit cleanup event. + s.emitStaleCleanupEvent(inst) + + return nil +} + +// emitStaleCleanupEvent publishes an event when a stale container is cleaned up. +func (s *Server) emitStaleCleanupEvent(inst store.Instance) { + msg := "Stale container cleaned up: " + inst.ID + " (tag: " + inst.ImageTag + ")" + + evt, err := s.store.InsertEvent(store.EventLog{ + Source: "stale_cleanup", + Severity: "info", + Message: msg, + Metadata: `{"instance_id":"` + inst.ID + `","project_id":"` + inst.ProjectID + `","stage_id":"` + inst.StageID + `"}`, + }) + if err != nil { + slog.Error("stale cleanup: failed to persist event", "error", err) + return + } + + s.eventBus.Publish(events.Event{ + Type: events.EventLog, + Payload: events.EventLogPayload{ + ID: evt.ID, + Source: "stale_cleanup", + Severity: "info", + Message: msg, + Metadata: evt.Metadata, + CreatedAt: evt.CreatedAt, + }, + }) +} diff --git a/internal/deployer/deployer.go b/internal/deployer/deployer.go index f8442f9..4b0cda3 100644 --- a/internal/deployer/deployer.go +++ b/internal/deployer/deployer.go @@ -333,6 +333,9 @@ func (d *Deployer) executeDeploy( if err := d.store.UpdateInstanceStatus(instanceID, "running"); err != nil { slog.Warn("update instance status to running", "error", err) } + if err := d.store.UpdateLastAliveAt(instanceID); err != nil { + slog.Warn("update last_alive_at on deploy", "instance_id", instanceID, "error", err) + } d.publishInstanceStatus(instanceID, project.ID, stage.ID, "running") d.logDeploy(deployID, "Container started", "info") diff --git a/internal/stale/scanner.go b/internal/stale/scanner.go new file mode 100644 index 0000000..94063fa --- /dev/null +++ b/internal/stale/scanner.go @@ -0,0 +1,316 @@ +package stale + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "sync" + "time" + + "github.com/alexei/docker-watcher/internal/docker" + "github.com/alexei/docker-watcher/internal/events" + "github.com/alexei/docker-watcher/internal/store" + "github.com/robfig/cron/v3" +) + +// StaleInstance holds enriched info about a stale container for API responses. +type StaleInstance struct { + Instance store.Instance `json:"instance"` + ProjectName string `json:"project_name"` + StageName string `json:"stage_name"` + DaysStale int `json:"days_stale"` +} + +// Scanner periodically checks for stale containers that have been +// non-running for longer than the configured threshold. +type Scanner struct { + store *store.Store + docker *docker.Client + eventBus *events.Bus + + cron *cron.Cron + mu sync.Mutex + entryID cron.EntryID + running bool + + // knownStale tracks instance IDs that have already had a stale event emitted, + // to avoid re-emitting warnings for the same instance. + knownStale map[string]struct{} +} + +// New creates a new stale container scanner. +func New(st *store.Store, dockerClient *docker.Client, eventBus *events.Bus) *Scanner { + return &Scanner{ + store: st, + docker: dockerClient, + eventBus: eventBus, + cron: cron.New(), + knownStale: make(map[string]struct{}), + } +} + +// Start begins the periodic stale container scan with the given interval (e.g., "1h", "30m"). +// If the scanner is already running, it stops and restarts with the new interval. +func (s *Scanner) Start(interval string) error { + s.mu.Lock() + defer s.mu.Unlock() + + duration, err := time.ParseDuration(interval) + if err != nil { + return fmt.Errorf("parse stale scan interval %q: %w", interval, err) + } + + if s.running { + s.cron.Remove(s.entryID) + } + + spec := fmt.Sprintf("@every %s", duration.String()) + entryID, err := s.cron.AddFunc(spec, func() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + if scanErr := s.Scan(ctx); scanErr != nil { + slog.Warn("stale scanner: scan error", "error", scanErr) + } + }) + if err != nil { + return fmt.Errorf("schedule stale scanner: %w", err) + } + + s.entryID = entryID + if !s.running { + s.cron.Start() + } + s.running = true + + slog.Info("stale scanner started", "interval", duration.String()) + return nil +} + +// Stop gracefully shuts down the scanner. +func (s *Scanner) Stop() { + s.mu.Lock() + defer s.mu.Unlock() + + if s.running { + ctx := s.cron.Stop() + <-ctx.Done() + s.running = false + slog.Info("stale scanner stopped") + } +} + +// Scan performs a single stale-container scan cycle. +// It updates last_alive_at for running containers and detects newly stale ones. +func (s *Scanner) Scan(ctx context.Context) error { + settings, err := s.store.GetSettings() + if err != nil { + return fmt.Errorf("get settings: %w", err) + } + + thresholdDays := settings.StaleThresholdDays + if thresholdDays <= 0 { + thresholdDays = 7 + } + + // Get all instances from the store. + instances, err := s.store.ListAllInstances() + if err != nil { + return fmt.Errorf("list all instances: %w", err) + } + + if len(instances) == 0 { + return nil + } + + // Get all managed Docker containers to check live state. + containers, err := s.docker.ListContainers(ctx, nil) + if err != nil { + return fmt.Errorf("list docker containers: %w", err) + } + + // Build a lookup: instance ID -> container state. + containerStateByInstanceID := make(map[string]string, len(containers)) + for _, c := range containers { + if c.InstanceID != "" { + containerStateByInstanceID[c.InstanceID] = c.State + } + } + + now := time.Now().UTC() + currentStaleIDs := make(map[string]struct{}) + + for _, inst := range instances { + // Skip instances already being cleaned up. + if inst.Status == "removing" { + continue + } + + dockerState := containerStateByInstanceID[inst.ID] + + // If the container is running in Docker, update last_alive_at. + if dockerState == "running" { + if err := s.store.UpdateLastAliveAt(inst.ID); err != nil { + slog.Warn("stale scanner: failed to update last_alive_at", + "instance_id", inst.ID, "error", err) + } + // Also sync store status if it was out of date. + if inst.Status != "running" { + if err := s.store.UpdateInstanceStatus(inst.ID, "running"); err != nil { + slog.Warn("stale scanner: failed to sync instance status", + "instance_id", inst.ID, "error", err) + } + } + continue + } + + // Container is not running. Check if it's stale. + if inst.LastAliveAt == "" { + // Never been seen running. Use created_at as fallback. + inst.LastAliveAt = inst.CreatedAt + } + + lastAlive, parseErr := time.Parse("2006-01-02 15:04:05", inst.LastAliveAt) + if parseErr != nil { + slog.Warn("stale scanner: failed to parse last_alive_at", + "instance_id", inst.ID, "last_alive_at", inst.LastAliveAt, "error", parseErr) + continue + } + + daysSinceAlive := int(now.Sub(lastAlive).Hours() / 24) + if daysSinceAlive < thresholdDays { + continue + } + + // This instance is stale. + currentStaleIDs[inst.ID] = struct{}{} + + // Emit event only if this is newly detected as stale. + if _, alreadyKnown := s.knownStale[inst.ID]; !alreadyKnown { + s.emitStaleEvent(inst, daysSinceAlive) + } + } + + // Update known stale set: remove IDs that are no longer stale. + s.knownStale = currentStaleIDs + + return nil +} + +// FindStaleInstances returns all currently stale instances with enriched project/stage info. +func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, error) { + settings, err := s.store.GetSettings() + if err != nil { + return nil, fmt.Errorf("get settings: %w", err) + } + + thresholdDays := settings.StaleThresholdDays + if thresholdDays <= 0 { + thresholdDays = 7 + } + + instances, err := s.store.ListAllInstances() + if err != nil { + return nil, fmt.Errorf("list all instances: %w", err) + } + + containers, err := s.docker.ListContainers(ctx, nil) + if err != nil { + return nil, fmt.Errorf("list docker containers: %w", err) + } + + containerStateByInstanceID := make(map[string]string, len(containers)) + for _, c := range containers { + if c.InstanceID != "" { + containerStateByInstanceID[c.InstanceID] = c.State + } + } + + now := time.Now().UTC() + var result []StaleInstance + + for _, inst := range instances { + if inst.Status == "removing" { + continue + } + + // If Docker says it's running, it's not stale. + if containerStateByInstanceID[inst.ID] == "running" { + continue + } + + lastAlive := inst.LastAliveAt + if lastAlive == "" { + lastAlive = inst.CreatedAt + } + + lastAliveTime, parseErr := time.Parse("2006-01-02 15:04:05", lastAlive) + if parseErr != nil { + continue + } + + daysSinceAlive := int(now.Sub(lastAliveTime).Hours() / 24) + if daysSinceAlive < thresholdDays { + continue + } + + // Look up project and stage names. + projectName := inst.ProjectID + stageName := inst.StageID + if proj, err := s.store.GetProjectByID(inst.ProjectID); err == nil { + projectName = proj.Name + } + if stg, err := s.store.GetStageByID(inst.StageID); err == nil { + stageName = stg.Name + } + + result = append(result, StaleInstance{ + Instance: inst, + ProjectName: projectName, + StageName: stageName, + DaysStale: daysSinceAlive, + }) + } + + return result, nil +} + +// emitStaleEvent publishes a warning event for a newly detected stale container. +func (s *Scanner) emitStaleEvent(inst store.Instance, daysStale int) { + metadata, _ := json.Marshal(map[string]any{ + "instance_id": inst.ID, + "project_id": inst.ProjectID, + "stage_id": inst.StageID, + "image_tag": inst.ImageTag, + "last_alive_at": inst.LastAliveAt, + "days_stale": daysStale, + }) + + msg := fmt.Sprintf("Container %s (tag: %s) has been non-running for %d days", + inst.ID, inst.ImageTag, daysStale) + + // Persist directly to event log. + evt, err := s.store.InsertEvent(store.EventLog{ + Source: "stale_scanner", + Severity: "warn", + Message: msg, + Metadata: string(metadata), + }) + if err != nil { + slog.Error("stale scanner: failed to persist event", "error", err) + return + } + + // Publish for SSE clients. + s.eventBus.Publish(events.Event{ + Type: events.EventLog, + Payload: events.EventLogPayload{ + ID: evt.ID, + Source: "stale_scanner", + Severity: "warn", + Message: msg, + Metadata: string(metadata), + CreatedAt: evt.CreatedAt, + }, + }) +} diff --git a/internal/store/instances.go b/internal/store/instances.go index 2cb4cfb..d0fb730 100644 --- a/internal/store/instances.go +++ b/internal/store/instances.go @@ -8,6 +8,20 @@ import ( "github.com/google/uuid" ) +// instanceColumns is the canonical column list for instance queries. +const instanceColumns = `id, stage_id, project_id, container_id, image_tag, subdomain, npm_proxy_id, status, port, last_alive_at, created_at, updated_at` + +// scanInstance scans a row into an Instance struct using the canonical column order. +func scanInstance(scanner interface{ Scan(...any) error }) (Instance, error) { + var inst Instance + err := scanner.Scan( + &inst.ID, &inst.StageID, &inst.ProjectID, &inst.ContainerID, &inst.ImageTag, + &inst.Subdomain, &inst.NpmProxyID, &inst.Status, &inst.Port, + &inst.LastAliveAt, &inst.CreatedAt, &inst.UpdatedAt, + ) + return inst, err +} + // CreateInstance inserts a new instance record. func (s *Store) CreateInstance(inst Instance) (Instance, error) { inst.ID = uuid.New().String() @@ -15,10 +29,11 @@ func (s *Store) CreateInstance(inst Instance) (Instance, error) { inst.UpdatedAt = inst.CreatedAt _, err := s.db.Exec( - `INSERT INTO instances (id, stage_id, project_id, container_id, image_tag, subdomain, npm_proxy_id, status, port, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + `INSERT INTO instances (`+instanceColumns+`) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, inst.ID, inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag, - inst.Subdomain, inst.NpmProxyID, inst.Status, inst.Port, inst.CreatedAt, inst.UpdatedAt, + inst.Subdomain, inst.NpmProxyID, inst.Status, inst.Port, + inst.LastAliveAt, inst.CreatedAt, inst.UpdatedAt, ) if err != nil { return Instance{}, fmt.Errorf("insert instance: %w", err) @@ -36,10 +51,11 @@ func (s *Store) CreateInstanceWithID(inst Instance) (Instance, error) { inst.UpdatedAt = inst.CreatedAt _, err := s.db.Exec( - `INSERT INTO instances (id, stage_id, project_id, container_id, image_tag, subdomain, npm_proxy_id, status, port, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + `INSERT INTO instances (`+instanceColumns+`) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, inst.ID, inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag, - inst.Subdomain, inst.NpmProxyID, inst.Status, inst.Port, inst.CreatedAt, inst.UpdatedAt, + inst.Subdomain, inst.NpmProxyID, inst.Status, inst.Port, + inst.LastAliveAt, inst.CreatedAt, inst.UpdatedAt, ) if err != nil { return Instance{}, fmt.Errorf("insert instance: %w", err) @@ -49,12 +65,9 @@ func (s *Store) CreateInstanceWithID(inst Instance) (Instance, error) { // GetInstanceByID returns a single instance by its ID. func (s *Store) GetInstanceByID(id string) (Instance, error) { - var inst Instance - err := s.db.QueryRow( - `SELECT id, stage_id, project_id, container_id, image_tag, subdomain, npm_proxy_id, status, port, created_at, updated_at - FROM instances WHERE id = ?`, id, - ).Scan(&inst.ID, &inst.StageID, &inst.ProjectID, &inst.ContainerID, &inst.ImageTag, - &inst.Subdomain, &inst.NpmProxyID, &inst.Status, &inst.Port, &inst.CreatedAt, &inst.UpdatedAt) + inst, err := scanInstance(s.db.QueryRow( + `SELECT `+instanceColumns+` FROM instances WHERE id = ?`, id, + )) if errors.Is(err, sql.ErrNoRows) { return Instance{}, fmt.Errorf("instance %s: %w", id, ErrNotFound) } @@ -67,8 +80,7 @@ func (s *Store) GetInstanceByID(id string) (Instance, error) { // GetInstancesByStageID returns all instances for a given stage. func (s *Store) GetInstancesByStageID(stageID string) ([]Instance, error) { rows, err := s.db.Query( - `SELECT id, stage_id, project_id, container_id, image_tag, subdomain, npm_proxy_id, status, port, created_at, updated_at - FROM instances WHERE stage_id = ? ORDER BY created_at DESC`, stageID, + `SELECT `+instanceColumns+` FROM instances WHERE stage_id = ? ORDER BY created_at DESC`, stageID, ) if err != nil { return nil, fmt.Errorf("query instances: %w", err) @@ -77,9 +89,29 @@ func (s *Store) GetInstancesByStageID(stageID string) ([]Instance, error) { instances := []Instance{} for rows.Next() { - var inst Instance - if err := rows.Scan(&inst.ID, &inst.StageID, &inst.ProjectID, &inst.ContainerID, &inst.ImageTag, - &inst.Subdomain, &inst.NpmProxyID, &inst.Status, &inst.Port, &inst.CreatedAt, &inst.UpdatedAt); err != nil { + inst, err := scanInstance(rows) + if err != nil { + return nil, fmt.Errorf("scan instance: %w", err) + } + instances = append(instances, inst) + } + return instances, rows.Err() +} + +// ListAllInstances returns all instances across all stages. +func (s *Store) ListAllInstances() ([]Instance, error) { + rows, err := s.db.Query( + `SELECT ` + instanceColumns + ` FROM instances ORDER BY created_at DESC`, + ) + if err != nil { + return nil, fmt.Errorf("query all instances: %w", err) + } + defer rows.Close() + + instances := []Instance{} + for rows.Next() { + inst, err := scanInstance(rows) + if err != nil { return nil, fmt.Errorf("scan instance: %w", err) } instances = append(instances, inst) @@ -91,10 +123,11 @@ func (s *Store) GetInstancesByStageID(stageID string) ([]Instance, error) { func (s *Store) UpdateInstance(inst Instance) error { inst.UpdatedAt = Now() result, err := s.db.Exec( - `UPDATE instances SET stage_id=?, project_id=?, container_id=?, image_tag=?, subdomain=?, npm_proxy_id=?, status=?, port=?, updated_at=? + `UPDATE instances SET stage_id=?, project_id=?, container_id=?, image_tag=?, subdomain=?, npm_proxy_id=?, status=?, port=?, last_alive_at=?, updated_at=? WHERE id=?`, inst.StageID, inst.ProjectID, inst.ContainerID, inst.ImageTag, - inst.Subdomain, inst.NpmProxyID, inst.Status, inst.Port, inst.UpdatedAt, inst.ID, + inst.Subdomain, inst.NpmProxyID, inst.Status, inst.Port, + inst.LastAliveAt, inst.UpdatedAt, inst.ID, ) if err != nil { return fmt.Errorf("update instance: %w", err) @@ -123,6 +156,24 @@ func (s *Store) UpdateInstanceStatus(id string, status string) error { return nil } +// UpdateLastAliveAt sets the last_alive_at timestamp for an instance. +// Called when an instance is seen running. +func (s *Store) UpdateLastAliveAt(id string) error { + ts := Now() + result, err := s.db.Exec( + `UPDATE instances SET last_alive_at=?, updated_at=? WHERE id=?`, + ts, ts, id, + ) + if err != nil { + return fmt.Errorf("update last_alive_at: %w", err) + } + n, _ := result.RowsAffected() + if n == 0 { + return fmt.Errorf("instance %s: %w", id, ErrNotFound) + } + return nil +} + // DeleteInstance removes an instance by ID. func (s *Store) DeleteInstance(id string) error { result, err := s.db.Exec(`DELETE FROM instances WHERE id = ?`, id) diff --git a/internal/store/models.go b/internal/store/models.go index 5b0ed1b..cd4eedd 100644 --- a/internal/store/models.go +++ b/internal/store/models.go @@ -71,6 +71,7 @@ type Instance struct { NpmProxyID int `json:"npm_proxy_id"` Status string `json:"status"` // running, stopped, failed, removing Port int `json:"port"` + LastAliveAt string `json:"last_alive_at"` CreatedAt string `json:"created_at"` UpdatedAt string `json:"updated_at"` } diff --git a/internal/store/store.go b/internal/store/store.go index e4cf857..0ea811f 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -83,6 +83,8 @@ func (s *Store) runMigrations() error { `ALTER TABLE settings ADD COLUMN ssl_certificate_id INTEGER NOT NULL DEFAULT 0`, // Add stale_threshold_days to settings (2026-03-30). `ALTER TABLE settings ADD COLUMN stale_threshold_days INTEGER NOT NULL DEFAULT 7`, + // Add last_alive_at to instances for stale container detection (2026-03-30). + `ALTER TABLE instances ADD COLUMN last_alive_at TEXT NOT NULL DEFAULT ''`, } for _, m := range migrations { From 7a85441b814378955795a0f16a4f0b6f0488142a Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Mon, 30 Mar 2026 11:19:55 +0300 Subject: [PATCH 3/7] feat(observability): phase 3 - direct proxy creation with validation Add standalone proxy management: - Multi-step validation pipeline (DNS, TCP, HTTP) with diagnostic hints - Proxy lifecycle: create/update/delete via NPM API with SSL auto-assign - Periodic health monitoring (5min) with event log on status transitions - Unified /api/proxies/all endpoint merging standalone + managed proxies - Frontend types and API functions for downstream UI phases --- cmd/server/main.go | 10 ++ internal/api/proxy.go | 192 +++++++++++++++++++++++ internal/api/router.go | 23 +++ internal/proxy/health.go | 184 ++++++++++++++++++++++ internal/proxy/hints.go | 74 +++++++++ internal/proxy/manager.go | 297 ++++++++++++++++++++++++++++++++++++ internal/proxy/validator.go | 224 +++++++++++++++++++++++++++ web/src/lib/api.ts | 40 +++++ web/src/lib/types.ts | 33 +++- 9 files changed, 1076 insertions(+), 1 deletion(-) create mode 100644 internal/api/proxy.go create mode 100644 internal/proxy/health.go create mode 100644 internal/proxy/hints.go create mode 100644 internal/proxy/manager.go create mode 100644 internal/proxy/validator.go diff --git a/cmd/server/main.go b/cmd/server/main.go index cf148cb..06d04a4 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -24,6 +24,7 @@ import ( "github.com/alexei/docker-watcher/internal/logging" "github.com/alexei/docker-watcher/internal/notify" "github.com/alexei/docker-watcher/internal/npm" + "github.com/alexei/docker-watcher/internal/proxy" "github.com/alexei/docker-watcher/internal/registry" "github.com/alexei/docker-watcher/internal/stale" "github.com/alexei/docker-watcher/internal/store" @@ -137,9 +138,17 @@ func main() { slog.Warn("failed to start stale scanner", "error", err) } + // Initialize proxy manager and health monitor. + proxyManager := proxy.NewManager(db, npmClient) + proxyHealth := proxy.NewHealthMonitor(db, eventBus) + if err := proxyHealth.Start("5m"); err != nil { + slog.Warn("failed to start proxy health monitor", "error", err) + } + // Build API server. apiServer := api.NewServer(db, dockerClient, npmClient, dep, webhookHandler, eventBus, encKey) apiServer.SetStaleScanner(staleScanner) + apiServer.SetProxyManager(proxyManager) router := apiServer.Router() // Serve embedded static files for the SPA frontend. @@ -181,6 +190,7 @@ func main() { slog.Info("shutting down...") // Stop accepting new work. + proxyHealth.Stop() staleScanner.Stop() poller.Stop() diff --git a/internal/api/proxy.go b/internal/api/proxy.go new file mode 100644 index 0000000..698f7be --- /dev/null +++ b/internal/api/proxy.go @@ -0,0 +1,192 @@ +package api + +import ( + "context" + "net/http" + "time" + + "github.com/go-chi/chi/v5" + + "github.com/alexei/docker-watcher/internal/proxy" +) + +// validateProxy runs the validation pipeline without creating a proxy. +// POST /api/proxies/validate +func (s *Server) validateProxy(w http.ResponseWriter, r *http.Request) { + var req struct { + Host string `json:"host"` + Port int `json:"port"` + } + if !decodeJSON(w, r, &req) { + return + } + + if req.Host == "" { + respondError(w, http.StatusBadRequest, "host is required") + return + } + if req.Port < 1 || req.Port > 65535 { + respondError(w, http.StatusBadRequest, "port must be between 1 and 65535") + return + } + + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + + result := proxy.ValidateDestination(ctx, req.Host, req.Port) + respondJSON(w, http.StatusOK, result) +} + +// createProxy creates a new standalone proxy. +// POST /api/proxies +func (s *Server) createProxy(w http.ResponseWriter, r *http.Request) { + if s.proxyManager == nil { + respondError(w, http.StatusServiceUnavailable, "proxy manager not configured") + return + } + + var req proxy.CreateProxyRequest + if !decodeJSON(w, r, &req) { + return + } + + if req.Domain == "" { + respondError(w, http.StatusBadRequest, "domain is required") + return + } + if req.DestinationURL == "" { + respondError(w, http.StatusBadRequest, "destination_url is required") + return + } + if req.DestinationPort < 1 || req.DestinationPort > 65535 { + respondError(w, http.StatusBadRequest, "destination_port must be between 1 and 65535") + return + } + + p, err := s.proxyManager.CreateProxy(r.Context(), req) + if err != nil { + respondError(w, http.StatusInternalServerError, err.Error()) + return + } + + respondJSON(w, http.StatusCreated, p) +} + +// listProxies returns all standalone proxies. +// GET /api/proxies +func (s *Server) listProxies(w http.ResponseWriter, r *http.Request) { + if s.proxyManager == nil { + respondError(w, http.StatusServiceUnavailable, "proxy manager not configured") + return + } + + proxies, err := s.proxyManager.ListProxies() + if err != nil { + respondError(w, http.StatusInternalServerError, err.Error()) + return + } + + respondJSON(w, http.StatusOK, proxies) +} + +// getProxy returns a single standalone proxy. +// GET /api/proxies/{id} +func (s *Server) getProxy(w http.ResponseWriter, r *http.Request) { + if s.proxyManager == nil { + respondError(w, http.StatusServiceUnavailable, "proxy manager not configured") + return + } + + id := chi.URLParam(r, "id") + p, err := s.proxyManager.GetProxy(id) + if err != nil { + if proxy.IsNotFound(err) { + respondNotFound(w, "proxy") + return + } + respondError(w, http.StatusInternalServerError, err.Error()) + return + } + + respondJSON(w, http.StatusOK, p) +} + +// updateProxy updates an existing standalone proxy. +// PUT /api/proxies/{id} +func (s *Server) updateProxy(w http.ResponseWriter, r *http.Request) { + if s.proxyManager == nil { + respondError(w, http.StatusServiceUnavailable, "proxy manager not configured") + return + } + + id := chi.URLParam(r, "id") + + var req proxy.UpdateProxyRequest + if !decodeJSON(w, r, &req) { + return + } + + if req.Domain == "" { + respondError(w, http.StatusBadRequest, "domain is required") + return + } + if req.DestinationURL == "" { + respondError(w, http.StatusBadRequest, "destination_url is required") + return + } + if req.DestinationPort < 1 || req.DestinationPort > 65535 { + respondError(w, http.StatusBadRequest, "destination_port must be between 1 and 65535") + return + } + + p, err := s.proxyManager.UpdateProxy(r.Context(), id, req) + if err != nil { + if proxy.IsNotFound(err) { + respondNotFound(w, "proxy") + return + } + respondError(w, http.StatusInternalServerError, err.Error()) + return + } + + respondJSON(w, http.StatusOK, p) +} + +// deleteProxy removes a standalone proxy. +// DELETE /api/proxies/{id} +func (s *Server) deleteProxy(w http.ResponseWriter, r *http.Request) { + if s.proxyManager == nil { + respondError(w, http.StatusServiceUnavailable, "proxy manager not configured") + return + } + + id := chi.URLParam(r, "id") + + if err := s.proxyManager.DeleteProxy(r.Context(), id); err != nil { + if proxy.IsNotFound(err) { + respondNotFound(w, "proxy") + return + } + respondError(w, http.StatusInternalServerError, err.Error()) + return + } + + respondJSON(w, http.StatusOK, map[string]string{"deleted": id}) +} + +// listAllProxies returns a merged view of standalone and deploy-managed proxies. +// GET /api/proxies/all +func (s *Server) listAllProxies(w http.ResponseWriter, r *http.Request) { + if s.proxyManager == nil { + respondError(w, http.StatusServiceUnavailable, "proxy manager not configured") + return + } + + views, err := s.proxyManager.ListAllProxies() + if err != nil { + respondError(w, http.StatusInternalServerError, err.Error()) + return + } + + respondJSON(w, http.StatusOK, views) +} diff --git a/internal/api/router.go b/internal/api/router.go index 606045d..eecfda2 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -11,6 +11,7 @@ import ( "github.com/alexei/docker-watcher/internal/docker" "github.com/alexei/docker-watcher/internal/events" "github.com/alexei/docker-watcher/internal/npm" + "github.com/alexei/docker-watcher/internal/proxy" "github.com/alexei/docker-watcher/internal/stale" "github.com/alexei/docker-watcher/internal/store" "github.com/alexei/docker-watcher/internal/webhook" @@ -28,6 +29,7 @@ type Server struct { localAuth *auth.LocalAuth oidcProvider *auth.OIDCProvider staleScanner *stale.Scanner + proxyManager *proxy.Manager } // NewServer creates a new API Server with all required dependencies. @@ -68,6 +70,12 @@ func (s *Server) SetStaleScanner(scanner *stale.Scanner) { s.staleScanner = scanner } +// SetProxyManager sets the proxy manager on the server. +// Called after both the API server and proxy manager are initialized. +func (s *Server) SetProxyManager(pm *proxy.Manager) { + s.proxyManager = pm +} + // initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal. func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) { // Decrypt the OIDC client secret if it's encrypted. @@ -146,10 +154,25 @@ func (s *Server) Router() chi.Router { // Stale container endpoints. r.Get("/containers/stale", s.listStaleContainers) + // Proxy endpoints (read-only for any authenticated user). + r.Get("/proxies", s.listProxies) + r.Get("/proxies/all", s.listAllProxies) + r.Route("/proxies/{id}", func(r chi.Router) { + r.Get("/", s.getProxy) + }) + // Admin-only routes: require admin role. r.Group(func(r chi.Router) { r.Use(auth.AdminOnly) + // Proxy mutation endpoints. + r.Post("/proxies/validate", s.validateProxy) + r.Post("/proxies", s.createProxy) + r.Route("/proxies/{id}", func(r chi.Router) { + r.Put("/", s.updateProxy) + r.Delete("/", s.deleteProxy) + }) + // Config export (reveals project/infra details). r.Get("/config/export", s.exportConfig) diff --git a/internal/proxy/health.go b/internal/proxy/health.go new file mode 100644 index 0000000..323d08f --- /dev/null +++ b/internal/proxy/health.go @@ -0,0 +1,184 @@ +package proxy + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "net/http" + "sync" + "time" + + "github.com/alexei/docker-watcher/internal/events" + "github.com/alexei/docker-watcher/internal/store" + "github.com/robfig/cron/v3" +) + +// HealthMonitor periodically checks the health of all standalone proxies. +type HealthMonitor struct { + store *store.Store + eventBus *events.Bus + + cron *cron.Cron + mu sync.Mutex + entryID cron.EntryID + running bool +} + +// NewHealthMonitor creates a new proxy health monitor. +func NewHealthMonitor(st *store.Store, eventBus *events.Bus) *HealthMonitor { + return &HealthMonitor{ + store: st, + eventBus: eventBus, + cron: cron.New(), + } +} + +// Start begins periodic health checks with the given interval (e.g., "5m", "1m"). +// If already running, it stops and restarts with the new interval. +func (h *HealthMonitor) Start(interval string) error { + h.mu.Lock() + defer h.mu.Unlock() + + duration, err := time.ParseDuration(interval) + if err != nil { + return fmt.Errorf("parse health check interval %q: %w", interval, err) + } + + if h.running { + h.cron.Remove(h.entryID) + } + + spec := fmt.Sprintf("@every %s", duration.String()) + entryID, err := h.cron.AddFunc(spec, func() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + if checkErr := h.CheckAll(ctx); checkErr != nil { + slog.Warn("proxy health monitor: check error", "error", checkErr) + } + }) + if err != nil { + return fmt.Errorf("schedule proxy health monitor: %w", err) + } + + h.entryID = entryID + if !h.running { + h.cron.Start() + } + h.running = true + + slog.Info("proxy health monitor started", "interval", duration.String()) + return nil +} + +// Stop gracefully shuts down the health monitor. +func (h *HealthMonitor) Stop() { + h.mu.Lock() + defer h.mu.Unlock() + + if h.running { + ctx := h.cron.Stop() + <-ctx.Done() + h.running = false + slog.Info("proxy health monitor stopped") + } +} + +// CheckAll performs a single health check cycle for all standalone proxies. +func (h *HealthMonitor) CheckAll(ctx context.Context) error { + proxies, err := h.store.ListStandaloneProxies() + if err != nil { + return fmt.Errorf("list standalone proxies: %w", err) + } + + for _, proxy := range proxies { + newStatus := checkProxyHealth(ctx, proxy.DestinationURL, proxy.DestinationPort) + oldStatus := proxy.HealthStatus + + if err := h.store.UpdateProxyHealth(proxy.ID, newStatus); err != nil { + slog.Warn("proxy health monitor: failed to update health", + "proxy_id", proxy.ID, "error", err) + continue + } + + // Emit event on status change. + if oldStatus != newStatus && oldStatus != "unknown" { + h.emitHealthEvent(proxy, oldStatus, newStatus) + } + } + + return nil +} + +// checkProxyHealth performs an HTTP GET to the destination and returns the health status. +func checkProxyHealth(ctx context.Context, host string, port int) string { + target := fmt.Sprintf("http://%s:%d/", host, port) + + reqCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, target, nil) + if err != nil { + return "unhealthy" + } + + client := &http.Client{ + Timeout: 10 * time.Second, + CheckRedirect: func(*http.Request, []*http.Request) error { + return http.ErrUseLastResponse + }, + } + + resp, err := client.Do(req) + if err != nil { + return "unhealthy" + } + resp.Body.Close() + + if resp.StatusCode >= 500 { + return "unhealthy" + } + + return "healthy" +} + +// emitHealthEvent persists and publishes a health status change event. +func (h *HealthMonitor) emitHealthEvent(proxy store.StandaloneProxy, oldStatus, newStatus string) { + severity := "info" + if newStatus == "unhealthy" { + severity = "warn" + } + + msg := fmt.Sprintf("Proxy %s (%s) health changed: %s -> %s", + proxy.Domain, proxy.ID, oldStatus, newStatus) + + metadata, _ := json.Marshal(map[string]any{ + "proxy_id": proxy.ID, + "domain": proxy.Domain, + "old_status": oldStatus, + "new_status": newStatus, + }) + + evt, err := h.store.InsertEvent(store.EventLog{ + Source: "proxy_health", + Severity: severity, + Message: msg, + Metadata: string(metadata), + }) + if err != nil { + slog.Error("proxy health monitor: failed to persist event", "error", err) + return + } + + h.eventBus.Publish(events.Event{ + Type: events.EventLog, + Payload: events.EventLogPayload{ + ID: evt.ID, + Source: "proxy_health", + Severity: severity, + Message: msg, + Metadata: string(metadata), + CreatedAt: evt.CreatedAt, + }, + }) +} diff --git a/internal/proxy/hints.go b/internal/proxy/hints.go new file mode 100644 index 0000000..8c1fbdc --- /dev/null +++ b/internal/proxy/hints.go @@ -0,0 +1,74 @@ +package proxy + +import ( + "errors" + "fmt" + "net" + "strings" +) + +// diagnosticHint returns a user-friendly suggestion for a validation failure. +func diagnosticHint(step string, err error) string { + if err == nil { + return "" + } + + switch step { + case StepDNS: + return "Domain cannot be resolved. Check DNS settings or use an IP address." + + case StepTCP: + return tcpHintFromError(err) + + case StepHTTP: + return httpHint(err.Error()) + + default: + return "Validation failed: " + err.Error() + } +} + +// tcpHintFromError returns a specific hint based on the TCP error type. +func tcpHintFromError(err error) string { + if err == nil { + return "" + } + + var opErr *net.OpError + if errors.As(err, &opErr) { + lower := strings.ToLower(opErr.Err.Error()) + switch { + case strings.Contains(lower, "connection refused"): + return "Port is not accepting connections. Check if the service is running and the port is correct." + case strings.Contains(lower, "i/o timeout") || strings.Contains(lower, "timeout"): + return "Connection timed out. Possible firewall blocking. Check network/firewall rules." + case strings.Contains(lower, "no route to host") || strings.Contains(lower, "host is unreachable"): + return "Host is not reachable. Verify the IP address and network connectivity." + } + } + + msg := err.Error() + lower := strings.ToLower(msg) + switch { + case strings.Contains(lower, "connection refused"): + return "Port is not accepting connections. Check if the service is running and the port is correct." + case strings.Contains(lower, "timeout"): + return "Connection timed out. Possible firewall blocking. Check network/firewall rules." + default: + return fmt.Sprintf("TCP connection failed: %s", msg) + } +} + +// httpHint returns a specific hint based on the HTTP probe result. +func httpHint(msg string) string { + lower := strings.ToLower(msg) + + switch { + case strings.Contains(lower, "status"): + return msg // Already formatted by the caller with the status code. + case strings.Contains(lower, "timeout"): + return "HTTP health probe timed out. The service may be slow or unresponsive." + default: + return "HTTP health probe failed: " + msg + } +} diff --git a/internal/proxy/manager.go b/internal/proxy/manager.go new file mode 100644 index 0000000..9a7af02 --- /dev/null +++ b/internal/proxy/manager.go @@ -0,0 +1,297 @@ +package proxy + +import ( + "context" + "errors" + "fmt" + "log/slog" + + "github.com/alexei/docker-watcher/internal/npm" + "github.com/alexei/docker-watcher/internal/store" +) + +// Manager handles the lifecycle of standalone proxy hosts. +type Manager struct { + store *store.Store + npm *npm.Client +} + +// NewManager creates a new proxy manager. +func NewManager(st *store.Store, npmClient *npm.Client) *Manager { + return &Manager{ + store: st, + npm: npmClient, + } +} + +// CreateProxyRequest is the input for creating a standalone proxy. +type CreateProxyRequest struct { + Domain string `json:"domain"` + DestinationURL string `json:"destination_url"` + DestinationPort int `json:"destination_port"` +} + +// UpdateProxyRequest is the input for updating a standalone proxy. +type UpdateProxyRequest struct { + Domain string `json:"domain"` + DestinationURL string `json:"destination_url"` + DestinationPort int `json:"destination_port"` +} + +// ProxyView is a unified view of both standalone and deploy-managed proxies. +type ProxyView struct { + ID string `json:"id"` + Domain string `json:"domain"` + Destination string `json:"destination"` + Type string `json:"type"` // "standalone" or "managed" + ProjectName string `json:"project_name,omitempty"` + StageName string `json:"stage_name,omitempty"` + HealthStatus string `json:"health_status"` + SSLEnabled bool `json:"ssl_enabled"` + NpmProxyID int `json:"npm_proxy_id"` + CreatedAt string `json:"created_at"` +} + +// CreateProxy validates the destination, creates an NPM proxy host, and saves to the store. +func (m *Manager) CreateProxy(ctx context.Context, req CreateProxyRequest) (store.StandaloneProxy, error) { + // Validate destination. + result := ValidateDestination(ctx, req.DestinationURL, req.DestinationPort) + if !result.Valid { + return store.StandaloneProxy{}, fmt.Errorf("destination validation failed: %s", lastFailedStep(result)) + } + + // Load settings for SSL certificate and domain. + settings, err := m.store.GetSettings() + if err != nil { + return store.StandaloneProxy{}, fmt.Errorf("get settings: %w", err) + } + + // Build NPM proxy host config. + config := npm.ProxyHostConfig{ + DomainNames: []string{req.Domain}, + ForwardScheme: "http", + ForwardHost: req.DestinationURL, + ForwardPort: req.DestinationPort, + CertificateID: settings.SSLCertificateID, + SSLForced: settings.SSLCertificateID > 0, + BlockExploits: true, + AllowWebsocket: true, + HTTP2Support: true, + HSTSEnabled: settings.SSLCertificateID > 0, + Locations: []any{}, + } + + // Create NPM proxy host. + npmHost, err := m.npm.CreateProxyHost(ctx, config) + if err != nil { + return store.StandaloneProxy{}, fmt.Errorf("create NPM proxy host: %w", err) + } + + slog.Info("created NPM proxy host for standalone proxy", + "domain", req.Domain, "npm_proxy_id", npmHost.ID) + + // Save to store. + proxy, err := m.store.CreateStandaloneProxy(store.StandaloneProxy{ + Domain: req.Domain, + DestinationURL: req.DestinationURL, + DestinationPort: req.DestinationPort, + SSLCertificateID: settings.SSLCertificateID, + NpmProxyID: npmHost.ID, + HealthStatus: "unknown", + }) + if err != nil { + // Best effort: clean up the NPM host if store insert fails. + if delErr := m.npm.DeleteProxyHost(ctx, npmHost.ID); delErr != nil { + slog.Error("failed to clean up NPM proxy host after store error", + "npm_proxy_id", npmHost.ID, "error", delErr) + } + return store.StandaloneProxy{}, fmt.Errorf("save standalone proxy: %w", err) + } + + return proxy, nil +} + +// UpdateProxy re-validates the destination, updates the NPM proxy host, and updates the store. +func (m *Manager) UpdateProxy(ctx context.Context, id string, req UpdateProxyRequest) (store.StandaloneProxy, error) { + existing, err := m.store.GetStandaloneProxy(id) + if err != nil { + return store.StandaloneProxy{}, fmt.Errorf("get proxy: %w", err) + } + + // Validate new destination. + result := ValidateDestination(ctx, req.DestinationURL, req.DestinationPort) + if !result.Valid { + return store.StandaloneProxy{}, fmt.Errorf("destination validation failed: %s", lastFailedStep(result)) + } + + // Load settings for SSL certificate. + settings, err := m.store.GetSettings() + if err != nil { + return store.StandaloneProxy{}, fmt.Errorf("get settings: %w", err) + } + + // Update NPM proxy host. + config := npm.ProxyHostConfig{ + DomainNames: []string{req.Domain}, + ForwardScheme: "http", + ForwardHost: req.DestinationURL, + ForwardPort: req.DestinationPort, + CertificateID: settings.SSLCertificateID, + SSLForced: settings.SSLCertificateID > 0, + BlockExploits: true, + AllowWebsocket: true, + HTTP2Support: true, + HSTSEnabled: settings.SSLCertificateID > 0, + Locations: []any{}, + } + + if _, err := m.npm.UpdateProxyHost(ctx, existing.NpmProxyID, config); err != nil { + return store.StandaloneProxy{}, fmt.Errorf("update NPM proxy host: %w", err) + } + + // Update store. + updated := existing + updated.Domain = req.Domain + updated.DestinationURL = req.DestinationURL + updated.DestinationPort = req.DestinationPort + updated.SSLCertificateID = settings.SSLCertificateID + + if err := m.store.UpdateStandaloneProxy(updated); err != nil { + return store.StandaloneProxy{}, fmt.Errorf("update standalone proxy: %w", err) + } + + // Re-read from store to get updated timestamps. + return m.store.GetStandaloneProxy(id) +} + +// DeleteProxy removes the NPM proxy host and deletes from the store. +func (m *Manager) DeleteProxy(ctx context.Context, id string) error { + proxy, err := m.store.GetStandaloneProxy(id) + if err != nil { + return fmt.Errorf("get proxy: %w", err) + } + + // Delete NPM proxy host. + if proxy.NpmProxyID > 0 { + if err := m.npm.DeleteProxyHost(ctx, proxy.NpmProxyID); err != nil { + slog.Warn("failed to delete NPM proxy host (continuing with store deletion)", + "npm_proxy_id", proxy.NpmProxyID, "error", err) + } + } + + if err := m.store.DeleteStandaloneProxy(id); err != nil { + return fmt.Errorf("delete standalone proxy: %w", err) + } + + return nil +} + +// GetProxy returns a single standalone proxy by ID. +func (m *Manager) GetProxy(id string) (store.StandaloneProxy, error) { + proxy, err := m.store.GetStandaloneProxy(id) + if err != nil { + return store.StandaloneProxy{}, fmt.Errorf("get proxy: %w", err) + } + return proxy, nil +} + +// ListProxies returns all standalone proxies. +func (m *Manager) ListProxies() ([]store.StandaloneProxy, error) { + proxies, err := m.store.ListStandaloneProxies() + if err != nil { + return nil, fmt.Errorf("list proxies: %w", err) + } + return proxies, nil +} + +// ListAllProxies returns a merged view of standalone and deploy-managed proxies. +func (m *Manager) ListAllProxies() ([]ProxyView, error) { + views := []ProxyView{} + + // Standalone proxies. + standalones, err := m.store.ListStandaloneProxies() + if err != nil { + return nil, fmt.Errorf("list standalone proxies: %w", err) + } + + for _, p := range standalones { + views = append(views, ProxyView{ + ID: p.ID, + Domain: p.Domain, + Destination: fmt.Sprintf("%s:%d", p.DestinationURL, p.DestinationPort), + Type: "standalone", + HealthStatus: p.HealthStatus, + SSLEnabled: p.SSLCertificateID > 0, + NpmProxyID: p.NpmProxyID, + CreatedAt: p.CreatedAt, + }) + } + + // Deploy-managed proxies: instances with npm_proxy_id > 0. + instances, err := m.store.ListAllInstances() + if err != nil { + return nil, fmt.Errorf("list instances: %w", err) + } + + for _, inst := range instances { + if inst.NpmProxyID <= 0 { + continue + } + + projectName := inst.ProjectID + stageName := inst.StageID + + if proj, err := m.store.GetProjectByID(inst.ProjectID); err == nil { + projectName = proj.Name + } + if stg, err := m.store.GetStageByID(inst.StageID); err == nil { + stageName = stg.Name + } + + destination := fmt.Sprintf("%s:%d", inst.ContainerID[:12], inst.Port) + if inst.Subdomain != "" { + destination = fmt.Sprintf("%s:%d", inst.Subdomain, inst.Port) + } + + healthStatus := "unknown" + if inst.Status == "running" { + healthStatus = "healthy" + } else if inst.Status == "stopped" || inst.Status == "failed" { + healthStatus = "unhealthy" + } + + views = append(views, ProxyView{ + ID: inst.ID, + Domain: inst.Subdomain, + Destination: destination, + Type: "managed", + ProjectName: projectName, + StageName: stageName, + HealthStatus: healthStatus, + SSLEnabled: true, // managed proxies always get SSL from settings + NpmProxyID: inst.NpmProxyID, + CreatedAt: inst.CreatedAt, + }) + } + + return views, nil +} + +// lastFailedStep returns the message of the last failed validation step. +func lastFailedStep(result ValidationResult) string { + for _, step := range result.Steps { + if !step.Passed { + msg := step.Message + if step.Hint != "" { + msg += " β€” " + step.Hint + } + return msg + } + } + return "unknown validation failure" +} + +// IsNotFound checks if an error wraps store.ErrNotFound. +func IsNotFound(err error) bool { + return errors.Is(err, store.ErrNotFound) +} diff --git a/internal/proxy/validator.go b/internal/proxy/validator.go new file mode 100644 index 0000000..a1a5ce5 --- /dev/null +++ b/internal/proxy/validator.go @@ -0,0 +1,224 @@ +package proxy + +import ( + "context" + "fmt" + "net" + "net/http" + "net/url" + "strconv" + "time" +) + +// Validation step names. +const ( + StepSyntax = "syntax" + StepDNS = "dns" + StepTCP = "tcp" + StepHTTP = "http" +) + +// ValidationStep holds the result of a single validation check. +type ValidationStep struct { + Name string `json:"name"` + Passed bool `json:"passed"` + Message string `json:"message,omitempty"` + Hint string `json:"hint,omitempty"` +} + +// ValidationResult holds the aggregate result of the validation pipeline. +type ValidationResult struct { + Valid bool `json:"valid"` + Steps []ValidationStep `json:"steps"` +} + +// ValidateDestination runs the multi-step validation pipeline against the given +// destination host and port. It checks syntax, DNS, TCP reachability, and HTTP health. +// The pipeline short-circuits on failure: later steps are skipped if an earlier one fails. +func ValidateDestination(ctx context.Context, host string, port int) ValidationResult { + result := ValidationResult{Valid: true} + + // Step 1: Syntax validation. + if step, ok := validateSyntax(host, port); !ok { + result.Valid = false + result.Steps = append(result.Steps, step) + return result + } else { + result.Steps = append(result.Steps, step) + } + + // Step 2: DNS resolution (skip for IP addresses). + ip := net.ParseIP(host) + if ip == nil { + if step, ok := validateDNS(ctx, host); !ok { + result.Valid = false + result.Steps = append(result.Steps, step) + return result + } else { + result.Steps = append(result.Steps, step) + } + } else { + result.Steps = append(result.Steps, ValidationStep{ + Name: StepDNS, + Passed: true, + Message: "Skipped (IP address provided)", + }) + } + + // Step 3: TCP port reachability. + if step, ok := validateTCP(ctx, host, port); !ok { + result.Valid = false + result.Steps = append(result.Steps, step) + return result + } else { + result.Steps = append(result.Steps, step) + } + + // Step 4: HTTP health probe. + step := validateHTTP(ctx, host, port) + result.Steps = append(result.Steps, step) + if !step.Passed { + result.Valid = false + } + + return result +} + +// validateSyntax checks that the host and port values are syntactically valid. +func validateSyntax(host string, port int) (ValidationStep, bool) { + if host == "" { + return ValidationStep{ + Name: StepSyntax, + Passed: false, + Message: "Host is empty", + Hint: "Provide a valid hostname or IP address.", + }, false + } + + if port < 1 || port > 65535 { + return ValidationStep{ + Name: StepSyntax, + Passed: false, + Message: fmt.Sprintf("Port %d is out of range (1-65535)", port), + Hint: "Provide a valid port number between 1 and 65535.", + }, false + } + + // Reject obviously invalid hostnames (but allow IPs). + if net.ParseIP(host) == nil { + // Basic hostname validation: must not contain spaces or schemes. + if _, err := url.Parse("http://" + host); err != nil { + return ValidationStep{ + Name: StepSyntax, + Passed: false, + Message: "Invalid hostname: " + err.Error(), + Hint: "Provide a valid hostname without scheme (e.g., 'example.com' not 'http://example.com').", + }, false + } + } + + return ValidationStep{ + Name: StepSyntax, + Passed: true, + Message: fmt.Sprintf("Host %q port %d syntax OK", host, port), + }, true +} + +// validateDNS performs a DNS lookup on the given host. +func validateDNS(ctx context.Context, host string) (ValidationStep, bool) { + resolver := net.DefaultResolver + addrs, err := resolver.LookupHost(ctx, host) + if err != nil { + return ValidationStep{ + Name: StepDNS, + Passed: false, + Message: fmt.Sprintf("DNS resolution failed for %q: %s", host, err.Error()), + Hint: diagnosticHint(StepDNS, err), + }, false + } + + return ValidationStep{ + Name: StepDNS, + Passed: true, + Message: fmt.Sprintf("Resolved to %v", addrs), + }, true +} + +// validateTCP attempts a TCP connection to host:port with a 5-second timeout. +func validateTCP(ctx context.Context, host string, port int) (ValidationStep, bool) { + addr := net.JoinHostPort(host, strconv.Itoa(port)) + + dialCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + var d net.Dialer + conn, err := d.DialContext(dialCtx, "tcp", addr) + if err != nil { + return ValidationStep{ + Name: StepTCP, + Passed: false, + Message: fmt.Sprintf("TCP connect to %s failed: %s", addr, err.Error()), + Hint: diagnosticHint(StepTCP, err), + }, false + } + conn.Close() + + return ValidationStep{ + Name: StepTCP, + Passed: true, + Message: fmt.Sprintf("TCP connect to %s succeeded", addr), + }, true +} + +// validateHTTP performs a GET request to the destination and checks for a response. +// Non-5xx responses are considered passing (the service is responding). +func validateHTTP(ctx context.Context, host string, port int) ValidationStep { + target := fmt.Sprintf("http://%s:%d/", host, port) + + httpCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(httpCtx, http.MethodGet, target, nil) + if err != nil { + return ValidationStep{ + Name: StepHTTP, + Passed: false, + Message: fmt.Sprintf("Failed to build HTTP request: %s", err.Error()), + Hint: diagnosticHint(StepHTTP, err), + } + } + + client := &http.Client{ + Timeout: 10 * time.Second, + // Do not follow redirects β€” we just want to see if the port responds to HTTP. + CheckRedirect: func(*http.Request, []*http.Request) error { + return http.ErrUseLastResponse + }, + } + + resp, err := client.Do(req) + if err != nil { + return ValidationStep{ + Name: StepHTTP, + Passed: false, + Message: fmt.Sprintf("HTTP probe to %s failed: %s", target, err.Error()), + Hint: diagnosticHint(StepHTTP, err), + } + } + resp.Body.Close() + + if resp.StatusCode >= 500 { + return ValidationStep{ + Name: StepHTTP, + Passed: false, + Message: fmt.Sprintf("Service responded with HTTP %d. The service may not be healthy.", resp.StatusCode), + Hint: fmt.Sprintf("Service responded with HTTP %d. The service may not be healthy.", resp.StatusCode), + } + } + + return ValidationStep{ + Name: StepHTTP, + Passed: true, + Message: fmt.Sprintf("HTTP probe returned %d", resp.StatusCode), + } +} diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 30d9d68..2cdf9c0 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -9,11 +9,14 @@ import type { NpmCertificate, Project, ProjectDetail, + ProxyView, Registry, RegistryImage, Settings, Stage, StageEnv, + StandaloneProxy, + ValidationResult, Volume } from './types'; @@ -365,4 +368,41 @@ export function fetchEventLogStats(): Promise { return get('/api/events/log/stats'); } +// ── Proxies ───────────────────────────────────────────────────────── + +export function validateProxy(host: string, port: number): Promise { + return post('/api/proxies/validate', { host, port }); +} + +export function createProxy(data: { + domain: string; + destination_url: string; + destination_port: number; +}): Promise { + return post('/api/proxies', data); +} + +export function listProxies(): Promise { + return get('/api/proxies'); +} + +export function getProxy(id: string): Promise { + return get(`/api/proxies/${id}`); +} + +export function updateProxy( + id: string, + data: { domain: string; destination_url: string; destination_port: number } +): Promise { + return put(`/api/proxies/${id}`, data); +} + +export function deleteProxy(id: string): Promise<{ deleted: string }> { + return del<{ deleted: string }>(`/api/proxies/${id}`); +} + +export function listAllProxies(): Promise { + return get('/api/proxies/all'); +} + export { ApiError }; diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 7c78603..18db92b 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -198,8 +198,39 @@ export interface StandaloneProxy { destination_port: number; ssl_certificate_id: number; npm_proxy_id: number; - health_status: 'unknown' | 'healthy' | 'unhealthy'; + health_status: ProxyHealthStatus; health_checked_at: string; created_at: string; updated_at: string; } + +/** Health status for a proxy. */ +export type ProxyHealthStatus = 'unknown' | 'healthy' | 'unhealthy'; + +/** A single step in the validation pipeline. */ +export interface ValidationStep { + name: string; + passed: boolean; + message?: string; + hint?: string; +} + +/** Result of the proxy destination validation pipeline. */ +export interface ValidationResult { + valid: boolean; + steps: ValidationStep[]; +} + +/** Unified view of standalone + deploy-managed proxies (from /api/proxies/all). */ +export interface ProxyView { + id: string; + domain: string; + destination: string; + type: 'standalone' | 'managed'; + project_name?: string; + stage_name?: string; + health_status: ProxyHealthStatus; + ssl_enabled: boolean; + npm_proxy_id: number; + created_at: string; +} From 79a40f3d9cc016a84edcb8add5f4c205b7b04c65 Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Mon, 30 Mar 2026 11:29:10 +0300 Subject: [PATCH 4/7] feat(observability): phases 4-7 - complete frontend UI (big bang) Add all frontend pages for observability & proxy management: - Proxy Viewer: /proxies with grouped view, filtering, health indicators - Proxy Creation: form with live validation, diagnostic hints, edit/delete - Stale Containers: /containers/stale with dashboard widget, cleanup actions - Event Log: /events with filters, pagination, real-time SSE streaming - Navigation: proxies and events links in sidebar - i18n: full EN/RU translations for all new features - Settings: stale threshold configuration --- web/src/lib/api.ts | 15 + web/src/lib/components/EventLogEntry.svelte | 161 +++++++++ web/src/lib/components/EventLogFilter.svelte | 167 ++++++++++ web/src/lib/components/ProxyCard.svelte | 129 +++++++ web/src/lib/components/ProxyFilter.svelte | 85 +++++ web/src/lib/components/ProxyForm.svelte | 292 ++++++++++++++++ web/src/lib/components/ProxyGroup.svelte | 46 +++ .../lib/components/StaleContainerCard.svelte | 85 +++++ .../lib/components/ValidationChecklist.svelte | 73 ++++ .../lib/components/icons/IconEvents.svelte | 7 + .../lib/components/icons/IconProxies.svelte | 7 + web/src/lib/components/icons/index.ts | 2 + web/src/lib/i18n/en.json | 121 ++++++- web/src/lib/i18n/ru.json | 121 ++++++- web/src/lib/sse.ts | 16 +- web/src/lib/types.ts | 13 + web/src/routes/+layout.svelte | 8 +- web/src/routes/+page.svelte | 23 +- web/src/routes/containers/stale/+page.svelte | 152 +++++++++ web/src/routes/containers/stale/+page.ts | 2 + web/src/routes/events/+page.svelte | 314 ++++++++++++++++++ web/src/routes/events/+page.ts | 2 + web/src/routes/proxies/+page.svelte | 239 +++++++++++++ web/src/routes/proxies/+page.ts | 1 + web/src/routes/proxies/[id]/edit/+page.svelte | 94 ++++++ web/src/routes/proxies/[id]/edit/+page.ts | 1 + web/src/routes/proxies/create/+page.svelte | 52 +++ web/src/routes/proxies/create/+page.ts | 1 + web/src/routes/settings/+page.svelte | 20 +- 29 files changed, 2237 insertions(+), 12 deletions(-) create mode 100644 web/src/lib/components/EventLogEntry.svelte create mode 100644 web/src/lib/components/EventLogFilter.svelte create mode 100644 web/src/lib/components/ProxyCard.svelte create mode 100644 web/src/lib/components/ProxyFilter.svelte create mode 100644 web/src/lib/components/ProxyForm.svelte create mode 100644 web/src/lib/components/ProxyGroup.svelte create mode 100644 web/src/lib/components/StaleContainerCard.svelte create mode 100644 web/src/lib/components/ValidationChecklist.svelte create mode 100644 web/src/lib/components/icons/IconEvents.svelte create mode 100644 web/src/lib/components/icons/IconProxies.svelte create mode 100644 web/src/routes/containers/stale/+page.svelte create mode 100644 web/src/routes/containers/stale/+page.ts create mode 100644 web/src/routes/events/+page.svelte create mode 100644 web/src/routes/events/+page.ts create mode 100644 web/src/routes/proxies/+page.svelte create mode 100644 web/src/routes/proxies/+page.ts create mode 100644 web/src/routes/proxies/[id]/edit/+page.svelte create mode 100644 web/src/routes/proxies/[id]/edit/+page.ts create mode 100644 web/src/routes/proxies/create/+page.svelte create mode 100644 web/src/routes/proxies/create/+page.ts diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 2cdf9c0..9206d17 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -13,6 +13,7 @@ import type { Registry, RegistryImage, Settings, + StaleContainer, Stage, StageEnv, StandaloneProxy, @@ -405,4 +406,18 @@ export function listAllProxies(): Promise { return get('/api/proxies/all'); } +// ── Stale Containers ──────────────────────────────────────────────── + +export function fetchStaleContainers(): Promise { + return get('/api/containers/stale'); +} + +export function cleanupStaleContainer(id: string): Promise<{ deleted: string }> { + return post<{ deleted: string }>(`/api/containers/stale/${id}/cleanup`); +} + +export function bulkCleanupStaleContainers(): Promise<{ deleted: number }> { + return post<{ deleted: number }>('/api/containers/stale/cleanup'); +} + export { ApiError }; diff --git a/web/src/lib/components/EventLogEntry.svelte b/web/src/lib/components/EventLogEntry.svelte new file mode 100644 index 0000000..b3fb727 --- /dev/null +++ b/web/src/lib/components/EventLogEntry.svelte @@ -0,0 +1,161 @@ + + + +
+
+ +
+ {#if entry.source === 'deploy'} + + + + + + + {:else if entry.source === 'container'} + + + + + + {:else if entry.source === 'proxy'} + + + + + {:else} + + + + + + {/if} +
+ + +
+
+ + + {$t(severityLabelKeys[entry.severity] ?? 'events.severity.info')} + + + + + {$t(`events.source.${entry.source}`)} + + + + + {timeAgo(entry.created_at)} + +
+ + +

+ {entry.message} +

+ + + {#if hasMetadata} + + + {#if expanded} +
+ + + {#each Object.entries(parsedMetadata ?? {}) as [key, value]} + + + + + {/each} + +
{key}{typeof value === 'object' ? JSON.stringify(value) : String(value)}
+
+ {/if} + {/if} +
+
+
diff --git a/web/src/lib/components/EventLogFilter.svelte b/web/src/lib/components/EventLogFilter.svelte new file mode 100644 index 0000000..46652a6 --- /dev/null +++ b/web/src/lib/components/EventLogFilter.svelte @@ -0,0 +1,167 @@ + + + +
+
+ +
+ +
+ {#each allSeverities as sev} + + {/each} +
+
+ + +
+ +
+ {#each allSources as src} + + {/each} +
+
+ + +
+ +
+ {#each dateRangeOptions as opt} + + {/each} +
+
+ + +
+ +
+ + + + onsearchchange((e.target as HTMLInputElement).value)} + class="w-full rounded-md border border-[var(--border-primary)] bg-[var(--surface-page)] py-1.5 pl-8 pr-3 text-xs text-[var(--text-primary)] placeholder:text-[var(--text-tertiary)] focus:border-[var(--color-brand-500)] focus:outline-none focus:ring-1 focus:ring-[var(--color-brand-500)]" + /> +
+
+ + +
+ +
+
+
diff --git a/web/src/lib/components/ProxyCard.svelte b/web/src/lib/components/ProxyCard.svelte new file mode 100644 index 0000000..2b2c9b6 --- /dev/null +++ b/web/src/lib/components/ProxyCard.svelte @@ -0,0 +1,129 @@ + + + +
+ +
+
+
+ + + {#if isHealthy} + + {/if} + + + + + + {proxy.domain} + + +
+ + +

+ {proxy.destination} +

+
+ + + + {proxy.type} + +
+ + +
+ + {#if proxy.ssl_enabled} + + + SSL + + {/if} + + + + {healthLabel} + + + + {#if proxy.type === 'managed' && proxy.project_name} + + {proxy.project_name} + + {#if proxy.stage_name} + + {proxy.stage_name} + + {/if} + {/if} +
+ + +
+ {#if proxy.type === 'standalone'} + + + {$t('common.edit')} + + {:else} + + {/if} + + {#if proxy.created_at} +

+ {$t('proxies.lastChecked')}: {formatTimestamp(proxy.created_at)} +

+ {/if} +
+
diff --git a/web/src/lib/components/ProxyFilter.svelte b/web/src/lib/components/ProxyFilter.svelte new file mode 100644 index 0000000..80be70e --- /dev/null +++ b/web/src/lib/components/ProxyFilter.svelte @@ -0,0 +1,85 @@ + + + +
+ +
+ + onsearchchange(e.currentTarget.value)} + placeholder={$t('proxies.filter.search')} + class="w-full rounded-lg border border-[var(--border-primary)] bg-[var(--surface-card)] py-2 pl-9 pr-3 text-sm text-[var(--text-primary)] placeholder:text-[var(--text-tertiary)] transition-colors focus:border-[var(--color-brand-500)] focus:outline-none focus:ring-1 focus:ring-[var(--color-brand-500)]" + /> +
+ + + + + + + + + {#if hasFilters} + + {/if} +
diff --git a/web/src/lib/components/ProxyForm.svelte b/web/src/lib/components/ProxyForm.svelte new file mode 100644 index 0000000..d5a10fa --- /dev/null +++ b/web/src/lib/components/ProxyForm.svelte @@ -0,0 +1,292 @@ + + + +
+ +

{title}

+ + +
{ e.preventDefault(); handleSubmit(); }} class="space-y-4"> + + + + + + + +
+ + + +
+ + + {#if validationResult && !validationResult.valid} +

+ Validation reported issues but you can still create the proxy. +

+ {/if} + + + {#if submitError} +

{submitError}

+ {/if} + + +
+
+ {#if mode === 'edit'} + + {/if} +
+ +
+ + + +
+
+ +
+ + +{#if mode === 'edit'} + { deleteConfirmOpen = false; }} + /> +{/if} diff --git a/web/src/lib/components/ProxyGroup.svelte b/web/src/lib/components/ProxyGroup.svelte new file mode 100644 index 0000000..17cd76d --- /dev/null +++ b/web/src/lib/components/ProxyGroup.svelte @@ -0,0 +1,46 @@ + + + +
+ + + + + {#if expanded} +
+
+ {@render children()} +
+
+ {/if} +
diff --git a/web/src/lib/components/StaleContainerCard.svelte b/web/src/lib/components/StaleContainerCard.svelte new file mode 100644 index 0000000..63dd09b --- /dev/null +++ b/web/src/lib/components/StaleContainerCard.svelte @@ -0,0 +1,85 @@ + + + +
+ +
+
+

+ {displayName} +

+
+ + {container.project_name} + + + {container.stage_name} + +
+
+ + + + + {container.days_stale} {$t('stale.daysStale')} + +
+ + +
+ + + {container.image_tag} + + + + {$t('stale.lastAlive')}: {formatDate(container.last_alive_at)} + + + {container.status} + +
+ + +
+ +
+
diff --git a/web/src/lib/components/ValidationChecklist.svelte b/web/src/lib/components/ValidationChecklist.svelte new file mode 100644 index 0000000..54f9a4d --- /dev/null +++ b/web/src/lib/components/ValidationChecklist.svelte @@ -0,0 +1,73 @@ + + + +{#if loading || result} +
+

+ {$t('proxies.validation.title')} +

+ + {#if loading && !result} +
+ + {$t('proxies.validation.checking')} +
+ {:else if result} +
    + {#each result.steps as step} +
  • +
    + {#if step.passed} + + + + {getStepLabel(step.name)} + {#if step.message} + — {step.message} + {/if} + {:else} + + + + {getStepLabel(step.name)} + {#if step.message} + — {step.message} + {/if} + {/if} +
    + {#if !step.passed && step.hint} +

    {step.hint}

    + {/if} +
  • + {/each} +
+ {/if} +
+{/if} diff --git a/web/src/lib/components/icons/IconEvents.svelte b/web/src/lib/components/icons/IconEvents.svelte new file mode 100644 index 0000000..207772a --- /dev/null +++ b/web/src/lib/components/icons/IconEvents.svelte @@ -0,0 +1,7 @@ + + diff --git a/web/src/lib/components/icons/IconProxies.svelte b/web/src/lib/components/icons/IconProxies.svelte new file mode 100644 index 0000000..b6fdede --- /dev/null +++ b/web/src/lib/components/icons/IconProxies.svelte @@ -0,0 +1,7 @@ + + diff --git a/web/src/lib/components/icons/index.ts b/web/src/lib/components/icons/index.ts index a38f759..049c76b 100644 --- a/web/src/lib/components/icons/index.ts +++ b/web/src/lib/components/icons/index.ts @@ -45,3 +45,5 @@ export { default as IconContainer } from './IconContainer.svelte'; export { default as IconHardDrive } from './IconHardDrive.svelte'; export { default as IconWifi } from './IconWifi.svelte'; export { default as IconRefresh } from './IconRefresh.svelte'; +export { default as IconProxies } from './IconProxies.svelte'; +export { default as IconEvents } from './IconEvents.svelte'; diff --git a/web/src/lib/i18n/en.json b/web/src/lib/i18n/en.json index 424d07d..2560eda 100644 --- a/web/src/lib/i18n/en.json +++ b/web/src/lib/i18n/en.json @@ -7,6 +7,8 @@ "dashboard": "Dashboard", "projects": "Projects", "deploy": "Deploy", + "proxies": "Proxies", + "events": "Events", "settings": "Settings" }, "dashboard": { @@ -19,7 +21,8 @@ "retry": "Retry", "noProjects": "No projects yet.", "addFirst": "Add your first project", - "loadFailed": "Failed to load dashboard" + "loadFailed": "Failed to load dashboard", + "staleContainers": "Stale Containers" }, "projects": { "title": "Projects", @@ -176,7 +179,9 @@ "registries": "Registries", "credentials": "Credentials", "authentication": "Authentication", - "appearance": "Appearance" + "appearance": "Appearance", + "staleThreshold": "Stale threshold (days)", + "staleThresholdHelp": "Containers inactive for longer than this will be flagged as stale." }, "settingsGeneral": { "title": "General Settings", @@ -320,6 +325,27 @@ "loginFailed": "Login failed", "networkError": "Network error" }, + "proxies": { + "title": "Proxy Manager", + "create": "Create Proxy", + "standalone": "Standalone Proxies", + "managed": "Managed Proxies", + "noProxies": "No proxies found", + "noProxiesDesc": "Create a standalone proxy or deploy a project with proxy enabled.", + "filter": { + "search": "Search by domain or destination...", + "health": "Health", + "type": "Type", + "all": "All", + "clear": "Clear filters" + }, + "health": { + "healthy": "Healthy", + "unhealthy": "Unhealthy", + "unknown": "Unknown" + }, + "lastChecked": "Last checked" + }, "common": { "cancel": "Cancel", "confirm": "Confirm", @@ -387,6 +413,97 @@ "search": "Search...", "noResults": "No results found" }, + "stale": { + "title": "Stale Containers", + "noStale": "No stale containers", + "noStaleDesc": "All containers are healthy and running.", + "cleanup": "Clean up", + "cleanupAll": "Clean up all", + "confirmCleanup": "This will stop and remove the container. Continue?", + "confirmBulkCleanup": "This will stop and remove all stale containers. Continue?", + "daysStale": "days stale", + "lastAlive": "Last alive", + "count": "Stale", + "cleanedUp": "Container cleaned up", + "bulkCleanedUp": "{count} containers cleaned up", + "cleanupFailed": "Cleanup failed", + "loadFailed": "Failed to load stale containers" + }, + "proxies": { + "title": "Proxies", + "create": "Create Proxy", + "noProxies": "No proxies configured yet.", + "noProxiesDesc": "Create a standalone proxy or deploy a project to see proxies here.", + "standalone": "Standalone Proxies", + "managed": "Managed", + "lastChecked": "Last checked", + "health": { + "healthy": "Healthy", + "unhealthy": "Unhealthy", + "unknown": "Unknown" + }, + "filter": { + "search": "Search proxies...", + "health": "Health", + "type": "Type", + "all": "All", + "clear": "Clear filters" + }, + "form": { + "title": "Create Proxy", + "editTitle": "Edit Proxy", + "destination": "Destination URL / IP", + "port": "Port", + "domain": "Domain", + "domainHelp": "The public domain for this proxy.", + "validate": "Validate", + "validating": "Validating...", + "create": "Create Proxy", + "save": "Save Changes", + "cancel": "Cancel", + "delete": "Delete", + "deleteConfirm": "Delete this proxy? This cannot be undone." + }, + "validation": { + "title": "Destination Validation", + "syntax": "URL syntax", + "dns": "DNS resolution", + "tcp": "TCP connection", + "http": "HTTP response", + "checking": "Checking...", + "skipped": "Skipped" + } + }, + "events": { + "title": "Event Log", + "noEvents": "No events found", + "noEventsDesc": "Events will appear here as they occur.", + "loadMore": "Load more", + "newEvents": "new events", + "filter": { + "severity": "Severity", + "source": "Source", + "dateRange": "Date range", + "search": "Search events...", + "lastHour": "Last hour", + "last24h": "Last 24 hours", + "last7d": "Last 7 days", + "allTime": "All time", + "clear": "Clear filters" + }, + "severity": { + "info": "Info", + "warn": "Warning", + "error": "Error" + }, + "source": { + "deploy": "Deploy", + "container": "Container", + "proxy": "Proxy", + "system": "System" + }, + "metadata": "Details" + }, "language": { "en": "English", "ru": "Russian" diff --git a/web/src/lib/i18n/ru.json b/web/src/lib/i18n/ru.json index 082dbf7..a1054a4 100644 --- a/web/src/lib/i18n/ru.json +++ b/web/src/lib/i18n/ru.json @@ -7,6 +7,8 @@ "dashboard": "ПанСль", "projects": "ΠŸΡ€ΠΎΠ΅ΠΊΡ‚Ρ‹", "deploy": "Π”Π΅ΠΏΠ»ΠΎΠΉ", + "proxies": "ΠŸΡ€ΠΎΠΊΡΠΈ", + "events": "Бобытия", "settings": "Настройки" }, "dashboard": { @@ -19,7 +21,8 @@ "retry": "ΠŸΠΎΠ²Ρ‚ΠΎΡ€ΠΈΡ‚ΡŒ", "noProjects": "ΠŸΡ€ΠΎΠ΅ΠΊΡ‚ΠΎΠ² ΠΏΠΎΠΊΠ° Π½Π΅Ρ‚.", "addFirst": "Π”ΠΎΠ±Π°Π²ΡŒΡ‚Π΅ ΠΏΠ΅Ρ€Π²Ρ‹ΠΉ ΠΏΡ€ΠΎΠ΅ΠΊΡ‚", - "loadFailed": "НС ΡƒΠ΄Π°Π»ΠΎΡΡŒ Π·Π°Π³Ρ€ΡƒΠ·ΠΈΡ‚ΡŒ панСль" + "loadFailed": "НС ΡƒΠ΄Π°Π»ΠΎΡΡŒ Π·Π°Π³Ρ€ΡƒΠ·ΠΈΡ‚ΡŒ панСль", + "staleContainers": "Π£ΡΡ‚Π°Ρ€Π΅Π²ΡˆΠΈΠ΅ ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€Ρ‹" }, "projects": { "title": "ΠŸΡ€ΠΎΠ΅ΠΊΡ‚Ρ‹", @@ -176,7 +179,9 @@ "registries": "РССстры", "credentials": "Π£Ρ‡Ρ‘Ρ‚Π½Ρ‹Π΅ Π΄Π°Π½Π½Ρ‹Π΅", "authentication": "АутСнтификация", - "appearance": "Π’Π½Π΅ΡˆΠ½ΠΈΠΉ Π²ΠΈΠ΄" + "appearance": "Π’Π½Π΅ΡˆΠ½ΠΈΠΉ Π²ΠΈΠ΄", + "staleThreshold": "ΠŸΠΎΡ€ΠΎΠ³ устарСвания (Π΄Π½ΠΈ)", + "staleThresholdHelp": "ΠšΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€Ρ‹, Π½Π΅Π°ΠΊΡ‚ΠΈΠ²Π½Ρ‹Π΅ дольшС этого срока, Π±ΡƒΠ΄ΡƒΡ‚ ΠΏΠΎΠΌΠ΅Ρ‡Π΅Π½Ρ‹ ΠΊΠ°ΠΊ ΡƒΡΡ‚Π°Ρ€Π΅Π²ΡˆΠΈΠ΅." }, "settingsGeneral": { "title": "ΠžΠ±Ρ‰ΠΈΠ΅ настройки", @@ -320,6 +325,27 @@ "loginFailed": "Ошибка Π²Ρ…ΠΎΠ΄Π°", "networkError": "Ошибка сСти" }, + "proxies": { + "title": "ΠœΠ΅Π½Π΅Π΄ΠΆΠ΅Ρ€ прокси", + "create": "Π‘ΠΎΠ·Π΄Π°Ρ‚ΡŒ прокси", + "standalone": "АвтономныС прокси", + "managed": "УправляСмыС прокси", + "noProxies": "ΠŸΡ€ΠΎΠΊΡΠΈ Π½Π΅ Π½Π°ΠΉΠ΄Π΅Π½Ρ‹", + "noProxiesDesc": "Π‘ΠΎΠ·Π΄Π°ΠΉΡ‚Π΅ Π°Π²Ρ‚ΠΎΠ½ΠΎΠΌΠ½Ρ‹ΠΉ прокси ΠΈΠ»ΠΈ Ρ€Π°Π·Π²Π΅Ρ€Π½ΠΈΡ‚Π΅ ΠΏΡ€ΠΎΠ΅ΠΊΡ‚ с Π²ΠΊΠ»ΡŽΡ‡Ρ‘Π½Π½Ρ‹ΠΌ прокси.", + "filter": { + "search": "Поиск ΠΏΠΎ Π΄ΠΎΠΌΠ΅Π½Ρƒ ΠΈΠ»ΠΈ Π½Π°Π·Π½Π°Ρ‡Π΅Π½ΠΈΡŽ...", + "health": "Π—Π΄ΠΎΡ€ΠΎΠ²ΡŒΠ΅", + "type": "Π’ΠΈΠΏ", + "all": "ВсС", + "clear": "Π‘Π±Ρ€ΠΎΡΠΈΡ‚ΡŒ Ρ„ΠΈΠ»ΡŒΡ‚Ρ€Ρ‹" + }, + "health": { + "healthy": "Π—Π΄ΠΎΡ€ΠΎΠ²", + "unhealthy": "НСздоров", + "unknown": "НСизвСстно" + }, + "lastChecked": "ПослСдняя ΠΏΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ°" + }, "common": { "cancel": "ΠžΡ‚ΠΌΠ΅Π½Π°", "confirm": "ΠŸΠΎΠ΄Ρ‚Π²Π΅Ρ€Π΄ΠΈΡ‚ΡŒ", @@ -387,6 +413,97 @@ "search": "Поиск...", "noResults": "НичСго Π½Π΅ Π½Π°ΠΉΠ΄Π΅Π½ΠΎ" }, + "stale": { + "title": "Π£ΡΡ‚Π°Ρ€Π΅Π²ΡˆΠΈΠ΅ ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€Ρ‹", + "noStale": "НСт ΡƒΡΡ‚Π°Ρ€Π΅Π²ΡˆΠΈΡ… ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€ΠΎΠ²", + "noStaleDesc": "ВсС ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€Ρ‹ исправны ΠΈ Ρ€Π°Π±ΠΎΡ‚Π°ΡŽΡ‚.", + "cleanup": "ΠžΡ‡ΠΈΡΡ‚ΠΈΡ‚ΡŒ", + "cleanupAll": "ΠžΡ‡ΠΈΡΡ‚ΠΈΡ‚ΡŒ всС", + "confirmCleanup": "Π­Ρ‚ΠΎ остановит ΠΈ ΡƒΠ΄Π°Π»ΠΈΡ‚ ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€. ΠŸΡ€ΠΎΠ΄ΠΎΠ»ΠΆΠΈΡ‚ΡŒ?", + "confirmBulkCleanup": "Π­Ρ‚ΠΎ остановит ΠΈ ΡƒΠ΄Π°Π»ΠΈΡ‚ всС ΡƒΡΡ‚Π°Ρ€Π΅Π²ΡˆΠΈΠ΅ ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€Ρ‹. ΠŸΡ€ΠΎΠ΄ΠΎΠ»ΠΆΠΈΡ‚ΡŒ?", + "daysStale": "Π΄Π½Π΅ΠΉ устарСл", + "lastAlive": "ПослСдний Ρ€Π°Π· ΠΆΠΈΠ²", + "count": "Π£ΡΡ‚Π°Ρ€Π΅Π²ΡˆΠΈΠ΅", + "cleanedUp": "ΠšΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€ ΠΎΡ‡ΠΈΡ‰Π΅Π½", + "bulkCleanedUp": "{count} ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€ΠΎΠ² ΠΎΡ‡ΠΈΡ‰Π΅Π½ΠΎ", + "cleanupFailed": "НС ΡƒΠ΄Π°Π»ΠΎΡΡŒ ΠΎΡ‡ΠΈΡΡ‚ΠΈΡ‚ΡŒ", + "loadFailed": "НС ΡƒΠ΄Π°Π»ΠΎΡΡŒ Π·Π°Π³Ρ€ΡƒΠ·ΠΈΡ‚ΡŒ ΡƒΡΡ‚Π°Ρ€Π΅Π²ΡˆΠΈΠ΅ ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€Ρ‹" + }, + "proxies": { + "title": "ΠŸΡ€ΠΎΠΊΡΠΈ", + "create": "Π‘ΠΎΠ·Π΄Π°Ρ‚ΡŒ прокси", + "noProxies": "ΠŸΡ€ΠΎΠΊΡΠΈ Π΅Ρ‰Ρ‘ Π½Π΅ настроСны.", + "noProxiesDesc": "Π‘ΠΎΠ·Π΄Π°ΠΉΡ‚Π΅ Π°Π²Ρ‚ΠΎΠ½ΠΎΠΌΠ½Ρ‹ΠΉ прокси ΠΈΠ»ΠΈ Ρ€Π°Π·Π²Π΅Ρ€Π½ΠΈΡ‚Π΅ ΠΏΡ€ΠΎΠ΅ΠΊΡ‚, Ρ‡Ρ‚ΠΎΠ±Ρ‹ ΡƒΠ²ΠΈΠ΄Π΅Ρ‚ΡŒ прокси здСсь.", + "standalone": "АвтономныС прокси", + "managed": "УправляСмыС", + "lastChecked": "ПослСдняя ΠΏΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ°", + "health": { + "healthy": "Π Π°Π±ΠΎΡ‚Π°Π΅Ρ‚", + "unhealthy": "НСдоступСн", + "unknown": "НСизвСстно" + }, + "filter": { + "search": "Поиск прокси...", + "health": "Π—Π΄ΠΎΡ€ΠΎΠ²ΡŒΠ΅", + "type": "Π’ΠΈΠΏ", + "all": "ВсС", + "clear": "Π‘Π±Ρ€ΠΎΡΠΈΡ‚ΡŒ Ρ„ΠΈΠ»ΡŒΡ‚Ρ€Ρ‹" + }, + "form": { + "title": "Π‘ΠΎΠ·Π΄Π°Ρ‚ΡŒ прокси", + "editTitle": "Π Π΅Π΄Π°ΠΊΡ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ прокси", + "destination": "URL / IP назначСния", + "port": "ΠŸΠΎΡ€Ρ‚", + "domain": "Π”ΠΎΠΌΠ΅Π½", + "domainHelp": "ΠŸΡƒΠ±Π»ΠΈΡ‡Π½Ρ‹ΠΉ Π΄ΠΎΠΌΠ΅Π½ для этого прокси.", + "validate": "ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΈΡ‚ΡŒ", + "validating": "ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ°...", + "create": "Π‘ΠΎΠ·Π΄Π°Ρ‚ΡŒ прокси", + "save": "Π‘ΠΎΡ…Ρ€Π°Π½ΠΈΡ‚ΡŒ измСнСния", + "cancel": "ΠžΡ‚ΠΌΠ΅Π½Π°", + "delete": "Π£Π΄Π°Π»ΠΈΡ‚ΡŒ", + "deleteConfirm": "Π£Π΄Π°Π»ΠΈΡ‚ΡŒ этот прокси? Π­Ρ‚ΠΎ дСйствиС Π½Π΅ΠΎΠ±Ρ€Π°Ρ‚ΠΈΠΌΠΎ." + }, + "validation": { + "title": "ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ° назначСния", + "syntax": "Бинтаксис URL", + "dns": "DNS Ρ€Π°Π·Ρ€Π΅ΡˆΠ΅Π½ΠΈΠ΅", + "tcp": "TCP ΠΏΠΎΠ΄ΠΊΠ»ΡŽΡ‡Π΅Π½ΠΈΠ΅", + "http": "HTTP ΠΎΡ‚Π²Π΅Ρ‚", + "checking": "ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ°...", + "skipped": "ΠŸΡ€ΠΎΠΏΡƒΡ‰Π΅Π½ΠΎ" + } + }, + "events": { + "title": "Π–ΡƒΡ€Π½Π°Π» событий", + "noEvents": "Π‘ΠΎΠ±Ρ‹Ρ‚ΠΈΠΉ Π½Π΅ Π½Π°ΠΉΠ΄Π΅Π½ΠΎ", + "noEventsDesc": "Бобытия Π±ΡƒΠ΄ΡƒΡ‚ ΠΎΡ‚ΠΎΠ±Ρ€Π°ΠΆΠ°Ρ‚ΡŒΡΡ здСсь ΠΏΠΎ ΠΌΠ΅Ρ€Π΅ ΠΈΡ… возникновСния.", + "loadMore": "Π—Π°Π³Ρ€ΡƒΠ·ΠΈΡ‚ΡŒ Π΅Ρ‰Ρ‘", + "newEvents": "Π½ΠΎΠ²Ρ‹Ρ… событий", + "filter": { + "severity": "Π£Ρ€ΠΎΠ²Π΅Π½ΡŒ", + "source": "Π˜ΡΡ‚ΠΎΡ‡Π½ΠΈΠΊ", + "dateRange": "ΠŸΠ΅Ρ€ΠΈΠΎΠ΄", + "search": "Поиск событий...", + "lastHour": "ПослСдний час", + "last24h": "ПослСдниС 24 часа", + "last7d": "ПослСдниС 7 Π΄Π½Π΅ΠΉ", + "allTime": "Π—Π° всё врСмя", + "clear": "Π‘Π±Ρ€ΠΎΡΠΈΡ‚ΡŒ Ρ„ΠΈΠ»ΡŒΡ‚Ρ€Ρ‹" + }, + "severity": { + "info": "Π˜Π½Ρ„ΠΎ", + "warn": "ΠŸΡ€Π΅Π΄ΡƒΠΏΡ€Π΅ΠΆΠ΄Π΅Π½ΠΈΠ΅", + "error": "Ошибка" + }, + "source": { + "deploy": "Π Π°Π·Π²Ρ‘Ρ€Ρ‚Ρ‹Π²Π°Π½ΠΈΠ΅", + "container": "ΠšΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€", + "proxy": "ΠŸΡ€ΠΎΠΊΡΠΈ", + "system": "БистСма" + }, + "metadata": "ΠŸΠΎΠ΄Ρ€ΠΎΠ±Π½ΠΎΡΡ‚ΠΈ" + }, "language": { "en": "Английский", "ru": "Русский" diff --git a/web/src/lib/sse.ts b/web/src/lib/sse.ts index 43926a1..7f04651 100644 --- a/web/src/lib/sse.ts +++ b/web/src/lib/sse.ts @@ -7,7 +7,7 @@ // ── Types ────────────────────────────────────────────────────────── -export type SSEEventType = 'deploy_log' | 'instance_status' | 'deploy_status'; +export type SSEEventType = 'deploy_log' | 'instance_status' | 'deploy_status' | 'event_log'; export interface SSEEvent { type: SSEEventType; @@ -36,7 +36,16 @@ export interface DeployStatusPayload { error?: string; } -type SSEPayload = DeployLogPayload | InstanceStatusPayload | DeployStatusPayload; +export interface EventLogSSEPayload { + id: number; + source: string; + severity: string; + message: string; + metadata: string; + created_at: string; +} + +type SSEPayload = DeployLogPayload | InstanceStatusPayload | DeployStatusPayload | EventLogSSEPayload; export interface SSEOptions { /** Called for each SSE event received. */ @@ -179,6 +188,7 @@ export function connectDeployLogs( export function connectGlobalEvents(callbacks: { onInstanceStatus?: (payload: InstanceStatusPayload) => void; onDeployStatus?: (payload: DeployStatusPayload) => void; + onEventLog?: (payload: EventLogSSEPayload) => void; onOpen?: () => void; onError?: (attempt: number) => void; }): SSEConnection { @@ -188,6 +198,8 @@ export function connectGlobalEvents(callbacks: { callbacks.onInstanceStatus?.(event.payload as InstanceStatusPayload); } else if (event.type === 'deploy_status') { callbacks.onDeployStatus?.(event.payload as DeployStatusPayload); + } else if (event.type === 'event_log') { + callbacks.onEventLog?.(event.payload as EventLogSSEPayload); } }, onOpen: callbacks.onOpen, diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 18db92b..8e7f5ca 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -207,6 +207,19 @@ export interface StandaloneProxy { /** Health status for a proxy. */ export type ProxyHealthStatus = 'unknown' | 'healthy' | 'unhealthy'; +/** A container detected as stale by the backend poller. */ +export interface StaleContainer { + id: string; + project_name: string; + stage_name: string; + image_tag: string; + container_id: string; + status: string; + last_alive_at: string; + days_stale: number; + created_at: string; +} + /** A single step in the validation pipeline. */ export interface ValidationStep { name: string; diff --git a/web/src/routes/+layout.svelte b/web/src/routes/+layout.svelte index f03609a..ad1f695 100644 --- a/web/src/routes/+layout.svelte +++ b/web/src/routes/+layout.svelte @@ -6,7 +6,7 @@ import Toast from '$lib/components/Toast.svelte'; import ThemeToggle from '$lib/components/ThemeToggle.svelte'; import LocaleSwitcher from '$lib/components/LocaleSwitcher.svelte'; - import { IconDashboard, IconProjects, IconDeploy, IconSettings, IconMenu, IconX } from '$lib/components/icons'; + import { IconDashboard, IconProjects, IconDeploy, IconProxies, IconEvents, IconSettings, IconMenu, IconX } from '$lib/components/icons'; import { connectGlobalEvents, type SSEConnection } from '$lib/sse'; import { instanceStatusStore } from '$lib/stores/instance-status'; import { resolvedTheme, applyTheme } from '$lib/stores/theme'; @@ -22,6 +22,8 @@ { href: '/', labelKey: 'nav.dashboard', icon: 'dashboard' }, { href: '/projects', labelKey: 'nav.projects', icon: 'projects' }, { href: '/deploy', labelKey: 'nav.deploy', icon: 'deploy' }, + { href: '/proxies', labelKey: 'nav.proxies', icon: 'proxies' }, + { href: '/events', labelKey: 'nav.events', icon: 'events' }, { href: '/settings', labelKey: 'nav.settings', icon: 'settings' } ] as const; @@ -128,6 +130,10 @@ {:else if item.icon === 'deploy'} + {:else if item.icon === 'proxies'} + + {:else if item.icon === 'events'} + {:else if item.icon === 'settings'} {/if} diff --git a/web/src/routes/+page.svelte b/web/src/routes/+page.svelte index 68fa09b..f7d89ec 100644 --- a/web/src/routes/+page.svelte +++ b/web/src/routes/+page.svelte @@ -1,14 +1,15 @@ @@ -79,7 +85,7 @@ -
+ diff --git a/web/src/routes/containers/stale/+page.svelte b/web/src/routes/containers/stale/+page.svelte new file mode 100644 index 0000000..9d54c2f --- /dev/null +++ b/web/src/routes/containers/stale/+page.svelte @@ -0,0 +1,152 @@ + + + + {$t('stale.title')} - {$t('app.name')} + + +
+ +
+

{$t('stale.title')}

+ {#if containers.length > 0} + + {/if} +
+ + + {#if loading} +
+ {#each Array(3) as _} + + {/each} +
+ {:else if error} +
+

{error}

+ +
+ {:else if containers.length === 0} + + {:else} +
+ {#each containers as container (container.id)} + + {/each} +
+ {/if} +
+ + + { confirmSingleId = ''; }} +/> + + + { confirmBulk = false; }} +/> diff --git a/web/src/routes/containers/stale/+page.ts b/web/src/routes/containers/stale/+page.ts new file mode 100644 index 0000000..161a35d --- /dev/null +++ b/web/src/routes/containers/stale/+page.ts @@ -0,0 +1,2 @@ +// Client-side only β€” data is fetched in the component. +export const ssr = false; diff --git a/web/src/routes/events/+page.svelte b/web/src/routes/events/+page.svelte new file mode 100644 index 0000000..ba1db48 --- /dev/null +++ b/web/src/routes/events/+page.svelte @@ -0,0 +1,314 @@ + + + +
+ +
+

{$t('events.title')}

+
+ + +
+
+
+ {$t('events.severity.info')} + {stats.info} +
+
+
+ {$t('events.severity.warn')} + {stats.warn} +
+
+
+ {$t('events.severity.error')} + {stats.error} +
+
+ Total + {stats.total} +
+
+ + + + + + {#if pendingNewEvents.length > 0} + + {/if} + + + {#if loading} +
+ + + + +
+ {:else if filteredEvents.length === 0} + + {:else} +
+ {#each filteredEvents as entry (entry.id)} + + {/each} + + + {#if hasMore && searchText.trim() === ''} +
+ +
+ {/if} +
+ {/if} +
diff --git a/web/src/routes/events/+page.ts b/web/src/routes/events/+page.ts new file mode 100644 index 0000000..7d860cf --- /dev/null +++ b/web/src/routes/events/+page.ts @@ -0,0 +1,2 @@ +// Event log page β€” all data loaded client-side. +export const ssr = false; diff --git a/web/src/routes/proxies/+page.svelte b/web/src/routes/proxies/+page.svelte new file mode 100644 index 0000000..0319b34 --- /dev/null +++ b/web/src/routes/proxies/+page.svelte @@ -0,0 +1,239 @@ + + + + + {$t('proxies.title')} - {$t('app.name')} + + + +
+
+
+ +
+
+

{$t('proxies.title')}

+ {#if !loading && proxies.length > 0} +

+ {proxies.length} {proxies.length === 1 ? 'proxy' : 'proxies'} +

+ {/if} +
+
+ + + + + + {$t('proxies.create')} + +
+ + +{#if loading} +
+ + {$t('common.loading')} +
+{:else if error} + +
+

{error}

+ +
+{:else if proxies.length === 0} + + +{:else} + +
+ { search = v; }} + onhealthchange={(v) => { healthFilter = v; }} + ontypechange={(v) => { typeFilter = v; }} + onclear={clearFilters} + /> +
+ + + {#if filtered().length === 0} +
+

{$t('proxies.noProxies')}

+ +
+ {:else} +
+ + {#if standaloneProxies.length > 0} + + {#each standaloneProxies as proxy (proxy.id)} + + {/each} + + {/if} + + + {#if managedGroups().length > 0} + {#each managedGroups() as group (group.projectName)} + + {#each group.stages as stage (stage.stageName)} + {#if group.stages.length > 1} +
+

+ {stage.stageName} +

+
+ {/if} + {#each stage.proxies as proxy (proxy.id)} + + {/each} + {/each} +
+ {/each} + {/if} +
+ {/if} +{/if} diff --git a/web/src/routes/proxies/+page.ts b/web/src/routes/proxies/+page.ts new file mode 100644 index 0000000..0aef742 --- /dev/null +++ b/web/src/routes/proxies/+page.ts @@ -0,0 +1 @@ +// Client-side loading β€” data is fetched in the component via $effect. diff --git a/web/src/routes/proxies/[id]/edit/+page.svelte b/web/src/routes/proxies/[id]/edit/+page.svelte new file mode 100644 index 0000000..ede08ee --- /dev/null +++ b/web/src/routes/proxies/[id]/edit/+page.svelte @@ -0,0 +1,94 @@ + + + + + {$t('proxies.form.editTitle')} - {$t('app.name')} + + + + + + +
+
+ +
+

{$t('proxies.form.editTitle')}

+
+ +{#if loading} +
+ + {$t('common.loading')} +
+{:else if error} + +{:else if proxy} + +
+ +
+{/if} diff --git a/web/src/routes/proxies/[id]/edit/+page.ts b/web/src/routes/proxies/[id]/edit/+page.ts new file mode 100644 index 0000000..12c4939 --- /dev/null +++ b/web/src/routes/proxies/[id]/edit/+page.ts @@ -0,0 +1 @@ +// Client-side loading β€” proxy data is fetched in the component. diff --git a/web/src/routes/proxies/create/+page.svelte b/web/src/routes/proxies/create/+page.svelte new file mode 100644 index 0000000..2e32f76 --- /dev/null +++ b/web/src/routes/proxies/create/+page.svelte @@ -0,0 +1,52 @@ + + + + + {$t('proxies.form.title')} - {$t('app.name')} + + + + + + +
+
+ +
+

{$t('proxies.form.title')}

+
+ + +
+ +
diff --git a/web/src/routes/proxies/create/+page.ts b/web/src/routes/proxies/create/+page.ts new file mode 100644 index 0000000..c480e82 --- /dev/null +++ b/web/src/routes/proxies/create/+page.ts @@ -0,0 +1 @@ +// Client-side loading β€” ProxyForm handles data fetching. diff --git a/web/src/routes/settings/+page.svelte b/web/src/routes/settings/+page.svelte index 9cff48f..3ad60a3 100644 --- a/web/src/routes/settings/+page.svelte +++ b/web/src/routes/settings/+page.svelte @@ -20,6 +20,7 @@ let pollingInterval = $state(''); let baseVolumePath = $state(''); let notificationUrl = $state(''); + let staleThresholdDays = $state('7'); let sslCertificateId = $state(0); let sslCertName = $state(''); @@ -79,6 +80,7 @@ baseVolumePath = settings.base_volume_path ?? ''; sslCertificateId = settings.ssl_certificate_id ?? 0; notificationUrl = settings.notification_url ?? ''; + staleThresholdDays = String(settings.stale_threshold_days ?? 7); } catch (err) { toasts.error(err instanceof Error ? err.message : $t('settingsGeneral.loadFailed')); } finally { @@ -101,7 +103,8 @@ domain: domain.trim(), server_ip: serverIp.trim(), network: network.trim(), subdomain_pattern: subdomainPattern.trim(), polling_interval: pollingInterval.trim(), base_volume_path: baseVolumePath.trim(), notification_url: notificationUrl.trim(), - ssl_certificate_id: sslCertificateId + ssl_certificate_id: sslCertificateId, + stale_threshold_days: Math.max(1, parseInt(staleThresholdDays, 10) || 7) }); toasts.success($t('settingsGeneral.saved')); } catch (err) { @@ -242,6 +245,21 @@
+ +
+

{$t('stale.title')}

+
+ +
+
+
+ + +

{$t('dashboard.projects')}

From e0a648fb0c44854c1abadcd2cac72be137b66abe Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Mon, 30 Mar 2026 11:47:16 +0300 Subject: [PATCH 6/7] fix(observability): address final review findings Critical fixes: - Fix StaleContainer frontend type to match nested backend response shape - Guard ContainerID[:12] slice against empty/short IDs in ListAllProxies High-priority fixes: - Support comma-separated severity/source in event log filtering (IN clause) - Eliminate N+1 queries in ListAllProxies and FindStaleInstances (pre-load maps) - Stop leaking internal error messages to API clients (use slog + generic msgs) --- internal/api/proxy.go | 19 +++++++---- internal/api/stale.go | 12 ++++--- internal/api/stats.go | 7 ++-- internal/proxy/manager.go | 33 ++++++++++++++----- internal/stale/scanner.go | 28 ++++++++++++---- internal/store/eventlog.go | 28 +++++++++++++--- .../lib/components/StaleContainerCard.svelte | 10 +++--- web/src/lib/types.ts | 20 +++++++---- 8 files changed, 115 insertions(+), 42 deletions(-) diff --git a/internal/api/proxy.go b/internal/api/proxy.go index 698f7be..e5dd8a0 100644 --- a/internal/api/proxy.go +++ b/internal/api/proxy.go @@ -2,6 +2,7 @@ package api import ( "context" + "log/slog" "net/http" "time" @@ -65,7 +66,8 @@ func (s *Server) createProxy(w http.ResponseWriter, r *http.Request) { p, err := s.proxyManager.CreateProxy(r.Context(), req) if err != nil { - respondError(w, http.StatusInternalServerError, err.Error()) + slog.Error("failed to create proxy", "domain", req.Domain, "error", err) + respondError(w, http.StatusInternalServerError, "failed to create proxy") return } @@ -82,7 +84,8 @@ func (s *Server) listProxies(w http.ResponseWriter, r *http.Request) { proxies, err := s.proxyManager.ListProxies() if err != nil { - respondError(w, http.StatusInternalServerError, err.Error()) + slog.Error("proxy operation failed", "error", err) + respondError(w, http.StatusInternalServerError, "proxy operation failed") return } @@ -104,7 +107,8 @@ func (s *Server) getProxy(w http.ResponseWriter, r *http.Request) { respondNotFound(w, "proxy") return } - respondError(w, http.StatusInternalServerError, err.Error()) + slog.Error("proxy operation failed", "error", err) + respondError(w, http.StatusInternalServerError, "proxy operation failed") return } @@ -145,7 +149,8 @@ func (s *Server) updateProxy(w http.ResponseWriter, r *http.Request) { respondNotFound(w, "proxy") return } - respondError(w, http.StatusInternalServerError, err.Error()) + slog.Error("proxy operation failed", "error", err) + respondError(w, http.StatusInternalServerError, "proxy operation failed") return } @@ -167,7 +172,8 @@ func (s *Server) deleteProxy(w http.ResponseWriter, r *http.Request) { respondNotFound(w, "proxy") return } - respondError(w, http.StatusInternalServerError, err.Error()) + slog.Error("proxy operation failed", "error", err) + respondError(w, http.StatusInternalServerError, "proxy operation failed") return } @@ -184,7 +190,8 @@ func (s *Server) listAllProxies(w http.ResponseWriter, r *http.Request) { views, err := s.proxyManager.ListAllProxies() if err != nil { - respondError(w, http.StatusInternalServerError, err.Error()) + slog.Error("proxy operation failed", "error", err) + respondError(w, http.StatusInternalServerError, "proxy operation failed") return } diff --git a/internal/api/stale.go b/internal/api/stale.go index 26d9c4e..340041e 100644 --- a/internal/api/stale.go +++ b/internal/api/stale.go @@ -22,7 +22,8 @@ func (s *Server) listStaleContainers(w http.ResponseWriter, r *http.Request) { staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) if err != nil { - respondError(w, http.StatusInternalServerError, "failed to find stale containers: "+err.Error()) + slog.Error("failed to find stale containers", "error", err) + respondError(w, http.StatusInternalServerError, "failed to find stale containers") return } @@ -43,7 +44,8 @@ func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) { respondNotFound(w, "instance") return } - respondError(w, http.StatusInternalServerError, "failed to get instance: "+err.Error()) + slog.Error("failed to get instance", "instance_id", instanceID, "error", err) + respondError(w, http.StatusInternalServerError, "failed to get instance") return } @@ -54,7 +56,8 @@ func (s *Server) cleanupStaleContainer(w http.ResponseWriter, r *http.Request) { } if err := s.cleanupInstance(r, inst); err != nil { - respondError(w, http.StatusInternalServerError, "failed to cleanup instance: "+err.Error()) + slog.Error("failed to cleanup instance", "instance_id", instanceID, "error", err) + respondError(w, http.StatusInternalServerError, "failed to cleanup instance") return } @@ -71,7 +74,8 @@ func (s *Server) bulkCleanupStaleContainers(w http.ResponseWriter, r *http.Reque staleInstances, err := s.staleScanner.FindStaleInstances(r.Context()) if err != nil { - respondError(w, http.StatusInternalServerError, "failed to find stale containers: "+err.Error()) + slog.Error("failed to find stale containers for bulk cleanup", "error", err) + respondError(w, http.StatusInternalServerError, "failed to find stale containers") return } diff --git a/internal/api/stats.go b/internal/api/stats.go index 9f6e088..f1e5ea2 100644 --- a/internal/api/stats.go +++ b/internal/api/stats.go @@ -2,6 +2,7 @@ package api import ( "errors" + "log/slog" "net/http" "github.com/go-chi/chi/v5" @@ -20,7 +21,8 @@ func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) { respondNotFound(w, "instance") return } - respondError(w, http.StatusInternalServerError, "failed to get instance: "+err.Error()) + slog.Error("failed to get instance", "instance_id", instanceID, "error", err) + respondError(w, http.StatusInternalServerError, "failed to get instance") return } @@ -31,7 +33,8 @@ func (s *Server) getInstanceStats(w http.ResponseWriter, r *http.Request) { stats, err := s.docker.GetContainerStats(r.Context(), inst.ContainerID) if err != nil { - respondError(w, http.StatusInternalServerError, "failed to get container stats: "+err.Error()) + slog.Error("failed to get container stats", "container_id", inst.ContainerID, "error", err) + respondError(w, http.StatusInternalServerError, "failed to get container stats") return } diff --git a/internal/proxy/manager.go b/internal/proxy/manager.go index 9a7af02..05189ef 100644 --- a/internal/proxy/manager.go +++ b/internal/proxy/manager.go @@ -233,22 +233,39 @@ func (m *Manager) ListAllProxies() ([]ProxyView, error) { return nil, fmt.Errorf("list instances: %w", err) } + // Pre-load project and stage names to avoid N+1 queries. + allProjects, _ := m.store.GetAllProjects() + projectNames := make(map[string]string, len(allProjects)) + for _, p := range allProjects { + projectNames[p.ID] = p.Name + } + stageNames := make(map[string]string) + for _, p := range allProjects { + stages, _ := m.store.GetStagesByProjectID(p.ID) + for _, s := range stages { + stageNames[s.ID] = s.Name + } + } + for _, inst := range instances { if inst.NpmProxyID <= 0 { continue } - projectName := inst.ProjectID - stageName := inst.StageID - - if proj, err := m.store.GetProjectByID(inst.ProjectID); err == nil { - projectName = proj.Name + projectName := projectNames[inst.ProjectID] + if projectName == "" { + projectName = inst.ProjectID } - if stg, err := m.store.GetStageByID(inst.StageID); err == nil { - stageName = stg.Name + stageName := stageNames[inst.StageID] + if stageName == "" { + stageName = inst.StageID } - destination := fmt.Sprintf("%s:%d", inst.ContainerID[:12], inst.Port) + cid := inst.ContainerID + if len(cid) > 12 { + cid = cid[:12] + } + destination := fmt.Sprintf("%s:%d", cid, inst.Port) if inst.Subdomain != "" { destination = fmt.Sprintf("%s:%d", inst.Subdomain, inst.Port) } diff --git a/internal/stale/scanner.go b/internal/stale/scanner.go index 94063fa..da85e97 100644 --- a/internal/stale/scanner.go +++ b/internal/stale/scanner.go @@ -226,6 +226,20 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro } } + // Pre-load project and stage names to avoid N+1 queries. + allProjects, _ := s.store.GetAllProjects() + projectNames := make(map[string]string, len(allProjects)) + for _, p := range allProjects { + projectNames[p.ID] = p.Name + } + stageNames := make(map[string]string) + for _, p := range allProjects { + stages, _ := s.store.GetStagesByProjectID(p.ID) + for _, st := range stages { + stageNames[st.ID] = st.Name + } + } + now := time.Now().UTC() var result []StaleInstance @@ -254,14 +268,14 @@ func (s *Scanner) FindStaleInstances(ctx context.Context) ([]StaleInstance, erro continue } - // Look up project and stage names. - projectName := inst.ProjectID - stageName := inst.StageID - if proj, err := s.store.GetProjectByID(inst.ProjectID); err == nil { - projectName = proj.Name + // Look up project and stage names from pre-loaded maps. + projectName := projectNames[inst.ProjectID] + if projectName == "" { + projectName = inst.ProjectID } - if stg, err := s.store.GetStageByID(inst.StageID); err == nil { - stageName = stg.Name + stageName := stageNames[inst.StageID] + if stageName == "" { + stageName = inst.StageID } result = append(result, StaleInstance{ diff --git a/internal/store/eventlog.go b/internal/store/eventlog.go index 6c8a458..1414348 100644 --- a/internal/store/eventlog.go +++ b/internal/store/eventlog.go @@ -54,12 +54,32 @@ func (s *Store) ListEvents(filter EventLogFilter) ([]EventLog, error) { var args []any if filter.Severity != "" { - conditions = append(conditions, "severity = ?") - args = append(args, filter.Severity) + parts := strings.Split(filter.Severity, ",") + if len(parts) == 1 { + conditions = append(conditions, "severity = ?") + args = append(args, filter.Severity) + } else { + placeholders := make([]string, len(parts)) + for i, p := range parts { + placeholders[i] = "?" + args = append(args, strings.TrimSpace(p)) + } + conditions = append(conditions, "severity IN ("+strings.Join(placeholders, ",")+")") + } } if filter.Source != "" { - conditions = append(conditions, "source = ?") - args = append(args, filter.Source) + parts := strings.Split(filter.Source, ",") + if len(parts) == 1 { + conditions = append(conditions, "source = ?") + args = append(args, filter.Source) + } else { + placeholders := make([]string, len(parts)) + for i, p := range parts { + placeholders[i] = "?" + args = append(args, strings.TrimSpace(p)) + } + conditions = append(conditions, "source IN ("+strings.Join(placeholders, ",")+")") + } } if filter.Since != "" { conditions = append(conditions, "created_at >= ?") diff --git a/web/src/lib/components/StaleContainerCard.svelte b/web/src/lib/components/StaleContainerCard.svelte index 63dd09b..9e5f12c 100644 --- a/web/src/lib/components/StaleContainerCard.svelte +++ b/web/src/lib/components/StaleContainerCard.svelte @@ -21,7 +21,7 @@ ); const displayName = $derived( - `${container.project_name}-${container.stage_name}-${container.image_tag}` + `${container.project_name}-${container.stage_name}-${container.instance.image_tag}` ); function formatDate(iso: string): string { @@ -59,14 +59,14 @@
- {container.image_tag} + {container.instance.image_tag} - {$t('stale.lastAlive')}: {formatDate(container.last_alive_at)} + {$t('stale.lastAlive')}: {formatDate(container.instance.last_alive_at)} - {container.status} + {container.instance.status}
@@ -75,7 +75,7 @@
-

{$t('app.name')} {$t('app.version')}

+
+

{$t('app.name')} {$t('app.version')}

+ +