feat(observability): phase 1 - schema, models & event log backend

Add database foundation for observability features:
- event_log table with severity/source filtering and pagination
- standalone_proxies table for user-created reverse proxies
- stale_threshold_days setting (default 7 days)
- Auto-persist warn/error events from event bus to database
- SSE broadcast of persistent events for real-time UI updates
- Frontend types and API functions for downstream UI phases
This commit is contained in:
2026-03-30 10:59:13 +03:00
parent f71c314262
commit c38b7d4c78
23 changed files with 1149 additions and 20 deletions
+15
View File
@@ -93,6 +93,21 @@ func main() {
notifier := notify.New() notifier := notify.New()
eventBus := events.New() eventBus := events.New()
// Auto-persist warn/error events from the event bus to the database.
stopLogger := eventBus.RegisterPersistentLogger(func(source, severity, message, metadata string) (int64, string, error) {
evt, err := db.InsertEvent(store.EventLog{
Source: source,
Severity: severity,
Message: message,
Metadata: metadata,
})
if err != nil {
return 0, "", err
}
return evt.ID, evt.CreatedAt, nil
})
defer stopLogger()
dep := deployer.New(dockerClient, npmClient, db, healthChecker, notifier, eventBus, encKey) dep := deployer.New(dockerClient, npmClient, db, healthChecker, notifier, eventBus, encKey)
// Initialize webhook handler. // Initialize webhook handler.
+48
View File
@@ -0,0 +1,48 @@
package api
import (
"log/slog"
"net/http"
"strconv"
"github.com/alexei/docker-watcher/internal/store"
)
// listEventLog handles GET /api/events/log.
// Supports query parameters: severity, source, since, until, limit, offset.
func (s *Server) listEventLog(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
limit, _ := strconv.Atoi(q.Get("limit"))
offset, _ := strconv.Atoi(q.Get("offset"))
filter := store.EventLogFilter{
Severity: q.Get("severity"),
Source: q.Get("source"),
Since: q.Get("since"),
Until: q.Get("until"),
Limit: limit,
Offset: offset,
}
events, err := s.store.ListEvents(filter)
if err != nil {
slog.Error("failed to list events", "error", err)
respondError(w, http.StatusInternalServerError, "failed to list events")
return
}
respondJSON(w, http.StatusOK, events)
}
// getEventLogStats handles GET /api/events/log/stats.
func (s *Server) getEventLogStats(w http.ResponseWriter, r *http.Request) {
stats, err := s.store.GetEventStats()
if err != nil {
slog.Error("failed to get event stats", "error", err)
respondError(w, http.StatusInternalServerError, "failed to get event stats")
return
}
respondJSON(w, http.StatusOK, stats)
}
+2
View File
@@ -125,6 +125,8 @@ func (s *Server) Router() chi.Router {
r.Get("/deploys", s.listDeploys) r.Get("/deploys", s.listDeploys)
r.Get("/deploys/{id}/logs", s.streamDeployLogs) r.Get("/deploys/{id}/logs", s.streamDeployLogs)
r.Get("/events", s.streamEvents) r.Get("/events", s.streamEvents)
r.Get("/events/log", s.listEventLog)
r.Get("/events/log/stats", s.getEventLogStats)
r.Get("/registries", s.listRegistries) r.Get("/registries", s.listRegistries)
r.Route("/registries/{id}", func(r chi.Router) { r.Route("/registries/{id}", func(r chi.Router) {
r.Get("/tags/*", s.listRegistryTags) r.Get("/tags/*", s.listRegistryTags)
+21 -12
View File
@@ -24,7 +24,8 @@ type settingsRequest struct {
NpmEmail string `json:"npm_email"` NpmEmail string `json:"npm_email"`
NpmPassword string `json:"npm_password"` NpmPassword string `json:"npm_password"`
PollingInterval string `json:"polling_interval"` PollingInterval string `json:"polling_interval"`
SSLCertificateID *int `json:"ssl_certificate_id,omitempty"` SSLCertificateID *int `json:"ssl_certificate_id,omitempty"`
StaleThresholdDays *int `json:"stale_threshold_days,omitempty"`
} }
// getSettings handles GET /api/settings. // getSettings handles GET /api/settings.
@@ -37,17 +38,18 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) {
// Return settings without sensitive fields. // Return settings without sensitive fields.
respondJSON(w, http.StatusOK, map[string]any{ respondJSON(w, http.StatusOK, map[string]any{
"domain": settings.Domain, "domain": settings.Domain,
"server_ip": settings.ServerIP, "server_ip": settings.ServerIP,
"network": settings.Network, "network": settings.Network,
"subdomain_pattern": settings.SubdomainPattern, "subdomain_pattern": settings.SubdomainPattern,
"notification_url": settings.NotificationURL, "notification_url": settings.NotificationURL,
"npm_url": settings.NpmURL, "npm_url": settings.NpmURL,
"npm_email": settings.NpmEmail, "npm_email": settings.NpmEmail,
"has_npm_password": settings.NpmPassword != "", "has_npm_password": settings.NpmPassword != "",
"polling_interval": settings.PollingInterval, "polling_interval": settings.PollingInterval,
"ssl_certificate_id": settings.SSLCertificateID, "ssl_certificate_id": settings.SSLCertificateID,
"updated_at": settings.UpdatedAt, "stale_threshold_days": settings.StaleThresholdDays,
"updated_at": settings.UpdatedAt,
}) })
} }
@@ -101,6 +103,13 @@ func (s *Server) updateSettings(w http.ResponseWriter, r *http.Request) {
updated.SSLCertificateID = *req.SSLCertificateID updated.SSLCertificateID = *req.SSLCertificateID
sslChanged = true sslChanged = true
} }
if req.StaleThresholdDays != nil {
if *req.StaleThresholdDays < 1 {
respondError(w, http.StatusBadRequest, "stale_threshold_days must be at least 1")
return
}
updated.StaleThresholdDays = *req.StaleThresholdDays
}
if err := s.store.UpdateSettings(updated); err != nil { if err := s.store.UpdateSettings(updated); err != nil {
respondError(w, http.StatusInternalServerError, "failed to update settings: "+err.Error()) respondError(w, http.StatusInternalServerError, "failed to update settings: "+err.Error())
+2 -2
View File
@@ -150,9 +150,9 @@ func (s *Server) streamEvents(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
flusher.Flush() flusher.Flush()
// Subscribe to instance status and deploy status events. // Subscribe to instance status, deploy status, and persistent event log events.
sub := s.eventBus.Subscribe(func(evt events.Event) bool { sub := s.eventBus.Subscribe(func(evt events.Event) bool {
return evt.Type == events.EventInstanceStatus || evt.Type == events.EventDeployStatus return evt.Type == events.EventInstanceStatus || evt.Type == events.EventDeployStatus || evt.Type == events.EventLog
}) })
defer s.eventBus.Unsubscribe(sub) defer s.eventBus.Unsubscribe(sub)
+70
View File
@@ -2,6 +2,7 @@ package events
import ( import (
"encoding/json" "encoding/json"
"log/slog"
"sync" "sync"
) )
@@ -17,6 +18,9 @@ const (
// EventDeployStatus is emitted when a deploy status changes. // EventDeployStatus is emitted when a deploy status changes.
EventDeployStatus EventType = "deploy_status" EventDeployStatus EventType = "deploy_status"
// EventLog is emitted when a persistent event is logged.
EventLog EventType = "event_log"
) )
// Event is a single event published on the bus. // Event is a single event published on the bus.
@@ -50,6 +54,72 @@ type DeployStatusPayload struct {
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
} }
// EventLogPayload is the payload for EventLog events (persistent event log).
type EventLogPayload struct {
ID int64 `json:"id"`
Source string `json:"source"`
Severity string `json:"severity"`
Message string `json:"message"`
Metadata string `json:"metadata"`
CreatedAt string `json:"created_at"`
}
// PersistFunc is a callback that persists an event log entry.
// It receives source, severity, message, and metadata (JSON string).
// It returns the persisted entry's ID and created_at timestamp.
type PersistFunc func(source, severity, message, metadata string) (int64, string, error)
// RegisterPersistentLogger subscribes to the bus and auto-persists warn/error
// events by calling the provided persist function. It also re-publishes the
// persisted event as an EventLog so SSE clients receive it in real-time.
// Call the returned function to unsubscribe.
func (b *Bus) RegisterPersistentLogger(persist PersistFunc) func() {
sub := b.Subscribe(func(evt Event) bool {
// Only persist deploy log events with warn/error level.
if evt.Type != EventDeployLog {
return false
}
p, ok := evt.Payload.(DeployLogPayload)
if !ok {
return false
}
return p.Level == "warn" || p.Level == "error"
})
go func() {
for evt := range sub {
p, ok := evt.Payload.(DeployLogPayload)
if !ok {
continue
}
metaBytes, _ := json.Marshal(map[string]string{"deploy_id": p.DeployID})
metadata := string(metaBytes)
id, createdAt, err := persist("deploy", p.Level, p.Message, metadata)
if err != nil {
slog.Error("failed to persist event log", "source", "deploy", "level", p.Level, "error", err)
continue
}
// Re-publish as EventLog for SSE clients.
b.Publish(Event{
Type: EventLog,
Payload: EventLogPayload{
ID: id,
Source: "deploy",
Severity: p.Level,
Message: p.Message,
Metadata: metadata,
CreatedAt: createdAt,
},
})
}
}()
return func() {
b.Unsubscribe(sub)
}
}
// Subscriber is a channel that receives events. // Subscriber is a channel that receives events.
type Subscriber chan Event type Subscriber chan Event
+148
View File
@@ -0,0 +1,148 @@
package store
import (
"fmt"
"strings"
)
// EventLogFilter holds optional filters for listing event log entries.
type EventLogFilter struct {
Severity string // Filter by severity (info, warn, error).
Source string // Filter by source.
Since string // Only events created at or after this timestamp.
Until string // Only events created at or before this timestamp.
Limit int // Maximum number of results (default 50).
Offset int // Offset for pagination.
}
// EventLogStats holds counts of event log entries by severity.
type EventLogStats struct {
Info int `json:"info"`
Warn int `json:"warn"`
Error int `json:"error"`
Total int `json:"total"`
}
// InsertEvent inserts a new event log entry.
func (s *Store) InsertEvent(evt EventLog) (EventLog, error) {
evt.CreatedAt = Now()
if evt.Metadata == "" {
evt.Metadata = "{}"
}
result, err := s.db.Exec(
`INSERT INTO event_log (source, severity, message, metadata, created_at)
VALUES (?, ?, ?, ?, ?)`,
evt.Source, evt.Severity, evt.Message, evt.Metadata, evt.CreatedAt,
)
if err != nil {
return EventLog{}, fmt.Errorf("insert event: %w", err)
}
id, err := result.LastInsertId()
if err != nil {
return EventLog{}, fmt.Errorf("get event id: %w", err)
}
evt.ID = id
return evt, nil
}
// ListEvents returns event log entries matching the given filter.
func (s *Store) ListEvents(filter EventLogFilter) ([]EventLog, error) {
var conditions []string
var args []any
if filter.Severity != "" {
conditions = append(conditions, "severity = ?")
args = append(args, filter.Severity)
}
if filter.Source != "" {
conditions = append(conditions, "source = ?")
args = append(args, filter.Source)
}
if filter.Since != "" {
conditions = append(conditions, "created_at >= ?")
args = append(args, filter.Since)
}
if filter.Until != "" {
conditions = append(conditions, "created_at <= ?")
args = append(args, filter.Until)
}
query := "SELECT id, source, severity, message, metadata, created_at FROM event_log"
if len(conditions) > 0 {
query += " WHERE " + strings.Join(conditions, " AND ")
}
query += " ORDER BY created_at DESC"
limit := filter.Limit
if limit <= 0 {
limit = 50
}
if limit > 500 {
limit = 500
}
query += fmt.Sprintf(" LIMIT %d OFFSET %d", limit, filter.Offset)
rows, err := s.db.Query(query, args...)
if err != nil {
return nil, fmt.Errorf("query events: %w", err)
}
defer rows.Close()
events := []EventLog{}
for rows.Next() {
var evt EventLog
if err := rows.Scan(&evt.ID, &evt.Source, &evt.Severity, &evt.Message, &evt.Metadata, &evt.CreatedAt); err != nil {
return nil, fmt.Errorf("scan event: %w", err)
}
events = append(events, evt)
}
return events, rows.Err()
}
// GetEventStats returns counts of event log entries grouped by severity.
func (s *Store) GetEventStats() (EventLogStats, error) {
rows, err := s.db.Query(
`SELECT severity, COUNT(*) FROM event_log GROUP BY severity`,
)
if err != nil {
return EventLogStats{}, fmt.Errorf("query event stats: %w", err)
}
defer rows.Close()
var stats EventLogStats
for rows.Next() {
var severity string
var count int
if err := rows.Scan(&severity, &count); err != nil {
return EventLogStats{}, fmt.Errorf("scan event stats: %w", err)
}
switch severity {
case "info":
stats.Info = count
case "warn":
stats.Warn = count
case "error":
stats.Error = count
}
stats.Total += count
}
return stats, rows.Err()
}
// PruneEvents deletes event log entries older than the given number of days.
func (s *Store) PruneEvents(olderThanDays int) (int64, error) {
if olderThanDays < 1 {
return 0, fmt.Errorf("prune events: olderThanDays must be >= 1, got %d", olderThanDays)
}
result, err := s.db.Exec(
`DELETE FROM event_log WHERE created_at < datetime('now', ?)`,
fmt.Sprintf("-%d days", olderThanDays),
)
if err != nil {
return 0, fmt.Errorf("prune events: %w", err)
}
return result.RowsAffected()
}
+27 -2
View File
@@ -55,8 +55,9 @@ type Settings struct {
WebhookSecret string `json:"webhook_secret"` WebhookSecret string `json:"webhook_secret"`
PollingInterval string `json:"polling_interval"` PollingInterval string `json:"polling_interval"`
BaseVolumePath string `json:"base_volume_path"` BaseVolumePath string `json:"base_volume_path"`
SSLCertificateID int `json:"ssl_certificate_id"` SSLCertificateID int `json:"ssl_certificate_id"`
UpdatedAt string `json:"updated_at"` StaleThresholdDays int `json:"stale_threshold_days"`
UpdatedAt string `json:"updated_at"`
} }
// Instance represents a running (or stopped) container for a project stage. // Instance represents a running (or stopped) container for a project stage.
@@ -117,3 +118,27 @@ type Volume struct {
CreatedAt string `json:"created_at"` CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"` UpdatedAt string `json:"updated_at"`
} }
// EventLog represents a persistent event log entry.
type EventLog struct {
ID int64 `json:"id"`
Source string `json:"source"`
Severity string `json:"severity"` // info, warn, error
Message string `json:"message"`
Metadata string `json:"metadata"` // JSON-encoded structured data
CreatedAt string `json:"created_at"`
}
// StandaloneProxy represents a standalone reverse proxy not tied to a project.
type StandaloneProxy struct {
ID string `json:"id"`
Domain string `json:"domain"`
DestinationURL string `json:"destination_url"`
DestinationPort int `json:"destination_port"`
SSLCertificateID int `json:"ssl_certificate_id"`
NpmProxyID int `json:"npm_proxy_id"`
HealthStatus string `json:"health_status"` // unknown, healthy, unhealthy
HealthCheckedAt string `json:"health_checked_at"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
}
+4 -4
View File
@@ -9,10 +9,10 @@ func (s *Store) GetSettings() (Settings, error) {
var st Settings var st Settings
err := s.db.QueryRow( err := s.db.QueryRow(
`SELECT domain, server_ip, network, subdomain_pattern, notification_url, `SELECT domain, server_ip, network, subdomain_pattern, notification_url,
npm_url, npm_email, npm_password, webhook_secret, polling_interval, base_volume_path, ssl_certificate_id, updated_at npm_url, npm_email, npm_password, webhook_secret, polling_interval, base_volume_path, ssl_certificate_id, stale_threshold_days, updated_at
FROM settings WHERE id = 1`, FROM settings WHERE id = 1`,
).Scan(&st.Domain, &st.ServerIP, &st.Network, &st.SubdomainPattern, &st.NotificationURL, ).Scan(&st.Domain, &st.ServerIP, &st.Network, &st.SubdomainPattern, &st.NotificationURL,
&st.NpmURL, &st.NpmEmail, &st.NpmPassword, &st.WebhookSecret, &st.PollingInterval, &st.BaseVolumePath, &st.SSLCertificateID, &st.UpdatedAt) &st.NpmURL, &st.NpmEmail, &st.NpmPassword, &st.WebhookSecret, &st.PollingInterval, &st.BaseVolumePath, &st.SSLCertificateID, &st.StaleThresholdDays, &st.UpdatedAt)
if err != nil { if err != nil {
return Settings{}, fmt.Errorf("query settings: %w", err) return Settings{}, fmt.Errorf("query settings: %w", err)
} }
@@ -25,10 +25,10 @@ func (s *Store) UpdateSettings(st Settings) error {
_, err := s.db.Exec( _, err := s.db.Exec(
`UPDATE settings SET `UPDATE settings SET
domain=?, server_ip=?, network=?, subdomain_pattern=?, notification_url=?, domain=?, server_ip=?, network=?, subdomain_pattern=?, notification_url=?,
npm_url=?, npm_email=?, npm_password=?, webhook_secret=?, polling_interval=?, base_volume_path=?, ssl_certificate_id=?, updated_at=? npm_url=?, npm_email=?, npm_password=?, webhook_secret=?, polling_interval=?, base_volume_path=?, ssl_certificate_id=?, stale_threshold_days=?, updated_at=?
WHERE id = 1`, WHERE id = 1`,
st.Domain, st.ServerIP, st.Network, st.SubdomainPattern, st.NotificationURL, st.Domain, st.ServerIP, st.Network, st.SubdomainPattern, st.NotificationURL,
st.NpmURL, st.NpmEmail, st.NpmPassword, st.WebhookSecret, st.PollingInterval, st.BaseVolumePath, st.SSLCertificateID, st.UpdatedAt, st.NpmURL, st.NpmEmail, st.NpmPassword, st.WebhookSecret, st.PollingInterval, st.BaseVolumePath, st.SSLCertificateID, st.StaleThresholdDays, st.UpdatedAt,
) )
if err != nil { if err != nil {
return fmt.Errorf("update settings: %w", err) return fmt.Errorf("update settings: %w", err)
+120
View File
@@ -0,0 +1,120 @@
package store
import (
"database/sql"
"errors"
"fmt"
"github.com/google/uuid"
)
// CreateStandaloneProxy inserts a new standalone proxy record.
func (s *Store) CreateStandaloneProxy(p StandaloneProxy) (StandaloneProxy, error) {
p.ID = uuid.New().String()
p.CreatedAt = Now()
p.UpdatedAt = p.CreatedAt
if p.HealthStatus == "" {
p.HealthStatus = "unknown"
}
_, err := s.db.Exec(
`INSERT INTO standalone_proxies (id, domain, destination_url, destination_port, ssl_certificate_id, npm_proxy_id, health_status, health_checked_at, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
p.ID, p.Domain, p.DestinationURL, p.DestinationPort, p.SSLCertificateID,
p.NpmProxyID, p.HealthStatus, p.HealthCheckedAt, p.CreatedAt, p.UpdatedAt,
)
if err != nil {
return StandaloneProxy{}, fmt.Errorf("insert standalone proxy: %w", err)
}
return p, nil
}
// GetStandaloneProxy returns a standalone proxy by ID.
func (s *Store) GetStandaloneProxy(id string) (StandaloneProxy, error) {
var p StandaloneProxy
err := s.db.QueryRow(
`SELECT id, domain, destination_url, destination_port, ssl_certificate_id, npm_proxy_id, health_status, health_checked_at, created_at, updated_at
FROM standalone_proxies WHERE id = ?`, id,
).Scan(&p.ID, &p.Domain, &p.DestinationURL, &p.DestinationPort, &p.SSLCertificateID,
&p.NpmProxyID, &p.HealthStatus, &p.HealthCheckedAt, &p.CreatedAt, &p.UpdatedAt)
if errors.Is(err, sql.ErrNoRows) {
return StandaloneProxy{}, fmt.Errorf("standalone proxy %s: %w", id, ErrNotFound)
}
if err != nil {
return StandaloneProxy{}, fmt.Errorf("query standalone proxy: %w", err)
}
return p, nil
}
// ListStandaloneProxies returns all standalone proxy records ordered by creation time.
func (s *Store) ListStandaloneProxies() ([]StandaloneProxy, error) {
rows, err := s.db.Query(
`SELECT id, domain, destination_url, destination_port, ssl_certificate_id, npm_proxy_id, health_status, health_checked_at, created_at, updated_at
FROM standalone_proxies ORDER BY created_at DESC`,
)
if err != nil {
return nil, fmt.Errorf("query standalone proxies: %w", err)
}
defer rows.Close()
proxies := []StandaloneProxy{}
for rows.Next() {
var p StandaloneProxy
if err := rows.Scan(&p.ID, &p.Domain, &p.DestinationURL, &p.DestinationPort, &p.SSLCertificateID,
&p.NpmProxyID, &p.HealthStatus, &p.HealthCheckedAt, &p.CreatedAt, &p.UpdatedAt); err != nil {
return nil, fmt.Errorf("scan standalone proxy: %w", err)
}
proxies = append(proxies, p)
}
return proxies, rows.Err()
}
// UpdateStandaloneProxy updates an existing standalone proxy's mutable fields.
func (s *Store) UpdateStandaloneProxy(p StandaloneProxy) error {
p.UpdatedAt = Now()
result, err := s.db.Exec(
`UPDATE standalone_proxies SET domain=?, destination_url=?, destination_port=?, ssl_certificate_id=?, npm_proxy_id=?, health_status=?, health_checked_at=?, updated_at=?
WHERE id=?`,
p.Domain, p.DestinationURL, p.DestinationPort, p.SSLCertificateID,
p.NpmProxyID, p.HealthStatus, p.HealthCheckedAt, p.UpdatedAt, p.ID,
)
if err != nil {
return fmt.Errorf("update standalone proxy: %w", err)
}
n, _ := result.RowsAffected()
if n == 0 {
return fmt.Errorf("standalone proxy %s: %w", p.ID, ErrNotFound)
}
return nil
}
// DeleteStandaloneProxy removes a standalone proxy by ID.
func (s *Store) DeleteStandaloneProxy(id string) error {
result, err := s.db.Exec(`DELETE FROM standalone_proxies WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("delete standalone proxy: %w", err)
}
n, _ := result.RowsAffected()
if n == 0 {
return fmt.Errorf("standalone proxy %s: %w", id, ErrNotFound)
}
return nil
}
// UpdateProxyHealth updates the health status and check timestamp for a standalone proxy.
func (s *Store) UpdateProxyHealth(id string, status string) error {
ts := Now()
result, err := s.db.Exec(
`UPDATE standalone_proxies SET health_status=?, health_checked_at=?, updated_at=? WHERE id=?`,
status, ts, ts, id,
)
if err != nil {
return fmt.Errorf("update proxy health: %w", err)
}
n, _ := result.RowsAffected()
if n == 0 {
return fmt.Errorf("standalone proxy %s: %w", id, ErrNotFound)
}
return nil
}
+27
View File
@@ -81,6 +81,8 @@ func (s *Store) runMigrations() error {
`ALTER TABLE stages ADD COLUMN enable_proxy INTEGER NOT NULL DEFAULT 1`, `ALTER TABLE stages ADD COLUMN enable_proxy INTEGER NOT NULL DEFAULT 1`,
// Add ssl_certificate_id to settings (2026-03-29). // Add ssl_certificate_id to settings (2026-03-29).
`ALTER TABLE settings ADD COLUMN ssl_certificate_id INTEGER NOT NULL DEFAULT 0`, `ALTER TABLE settings ADD COLUMN ssl_certificate_id INTEGER NOT NULL DEFAULT 0`,
// Add stale_threshold_days to settings (2026-03-30).
`ALTER TABLE settings ADD COLUMN stale_threshold_days INTEGER NOT NULL DEFAULT 7`,
} }
for _, m := range migrations { for _, m := range migrations {
@@ -98,6 +100,9 @@ func (s *Store) runMigrations() error {
`CREATE INDEX IF NOT EXISTS idx_stages_project_id ON stages(project_id)`, `CREATE INDEX IF NOT EXISTS idx_stages_project_id ON stages(project_id)`,
`CREATE INDEX IF NOT EXISTS idx_stage_env_stage_id ON stage_env(stage_id)`, `CREATE INDEX IF NOT EXISTS idx_stage_env_stage_id ON stage_env(stage_id)`,
`CREATE INDEX IF NOT EXISTS idx_volumes_project_id ON volumes(project_id)`, `CREATE INDEX IF NOT EXISTS idx_volumes_project_id ON volumes(project_id)`,
`CREATE INDEX IF NOT EXISTS idx_event_log_severity ON event_log(severity)`,
`CREATE INDEX IF NOT EXISTS idx_event_log_source ON event_log(source)`,
`CREATE INDEX IF NOT EXISTS idx_event_log_created_at ON event_log(created_at)`,
} }
for _, idx := range indexes { for _, idx := range indexes {
if _, err := s.db.Exec(idx); err != nil { if _, err := s.db.Exec(idx); err != nil {
@@ -250,6 +255,28 @@ CREATE TABLE IF NOT EXISTS volumes (
created_at TEXT NOT NULL DEFAULT (datetime('now')), created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')) updated_at TEXT NOT NULL DEFAULT (datetime('now'))
); );
CREATE TABLE IF NOT EXISTS event_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT NOT NULL DEFAULT '',
severity TEXT NOT NULL DEFAULT 'info',
message TEXT NOT NULL DEFAULT '',
metadata TEXT NOT NULL DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS standalone_proxies (
id TEXT PRIMARY KEY,
domain TEXT NOT NULL UNIQUE,
destination_url TEXT NOT NULL DEFAULT '',
destination_port INTEGER NOT NULL DEFAULT 0,
ssl_certificate_id INTEGER NOT NULL DEFAULT 0,
npm_proxy_id INTEGER NOT NULL DEFAULT 0,
health_status TEXT NOT NULL DEFAULT 'unknown',
health_checked_at TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
` `
// Now returns the current time formatted for SQLite storage. // Now returns the current time formatted for SQLite storage.
+52
View File
@@ -0,0 +1,52 @@
# Feature Context: Observability & Proxy Management
## Configuration
- **Development mode:** Automated
- **Execution mode:** Orchestrator
- **Strategy:** Incremental
- **Build (full):** `make build`
- **Build (frontend):** `cd web && npm install && npm run build`
- **Build (backend):** `go build -o docker-watcher ./cmd/server`
- **Test:** `go test ./...`
- **Lint (backend):** `go vet ./...`
- **Lint (frontend):** `cd web && npm run check`
- **Dev server:** `make dev` (port: 8080)
## Current State
Feature branch just created. No implementation yet. Codebase is fully working on main.
## Temporary Workarounds
(none yet)
## Cross-Phase Dependencies
- Phases 2 & 3 depend on Phase 1 (schema, event_log table, store methods)
- Phases 4, 5, 6, 7 depend on their respective backend phases (1-3) for API endpoints
- Phase 8 depends on Phases 1-3 for backend infrastructure and event system
## Deferred Work
(none yet)
## Failed Approaches
(none yet)
## Review Findings Log
(none yet)
## Phase Execution Log
| Phase | Agent Used | Test Writer | Parallel | Notes |
|-------|-----------|-------------|----------|-------|
| (none yet) | | | | |
## Environment & Runtime Notes
- Build is currently blocked on Go 1.25 transitive dep from Docker SDK — may need to use Go 1.24 toolchain
- SQLite has MaxOpenConns=1, so all DB operations are serialized
- Frontend is embedded into Go binary via embed.FS
## Implementation Notes
- Event bus (`internal/events/bus.go`) uses buffered channels (64 cap), non-blocking publish
- NPM client (`internal/npm/client.go`) handles JWT auth with auto-refresh
- Store uses additive migrations — new `ALTER TABLE` statements are appended to runMigrations(), errors ignored for idempotency
- New tables use `CREATE TABLE IF NOT EXISTS` in the schema constant
- All API responses use envelope pattern: `{success: bool, data?: T, error?: string}`
- Frontend types in `web/src/lib/types.ts` mirror Go models
- API functions centralized in `web/src/lib/api.ts`
+71
View File
@@ -0,0 +1,71 @@
# Feature: Observability & Proxy Management
**Branch:** `feature/observability-proxy-mgmt`
**Base branch:** `main`
**Created:** 2026-03-30
**Status:** 🟡 In Progress
**Strategy:** Incremental
**Mode:** Automated
**Execution:** Orchestrator
## Summary
Extend Docker Watcher with four interconnected features: stale container detection,
standalone proxy management with health monitoring, a unified proxy viewer, and a
persistent event log — plus container stats and notification triggers.
## Build & Test Commands
- **Build (frontend):** `cd web && npm install && npm run build`
- **Build (backend):** `go build -o docker-watcher ./cmd/server`
- **Build (full):** `make build`
- **Test (backend):** `go test ./...`
- **Lint (backend):** `go vet ./...`
- **Lint (frontend):** `cd web && npm run check`
## Tech Stack Summary
- **Backend:** Go 1.24, chi v5 router, SQLite (modernc.org/sqlite), Docker SDK (moby/moby/client)
- **Frontend:** SvelteKit 2.15, Svelte 5, TypeScript 5.7, Tailwind CSS 4, Vite 6
- **Real-time:** Server-Sent Events with auto-reconnect
- **Auth:** JWT + optional OIDC
- **Encryption:** AES-256-GCM for credentials
## Project Conventions
- **Go:** gofmt, small interfaces, error wrapping with `fmt.Errorf("context: %w", err)`, constructor injection
- **DB:** Single-row settings, additive migrations via `ALTER TABLE` (errors ignored for idempotency), `CREATE TABLE IF NOT EXISTS` for new tables
- **API:** Envelope pattern `{success, data?, error?}`, chi route groups, admin middleware for writes
- **Frontend:** Svelte 5 runes ($state, $derived, $effect), TypeScript interfaces mirroring Go models, centralized api.ts, custom components (no UI library)
- **Files:** Feature-organized, small focused files
- **State:** Immutable patterns, no mutation
## Phases
- [ ] Phase 1: Schema, Models & Event Log Backend [domain: backend] → [subplan](./phase-1-schema-eventlog.md)
- [ ] Phase 2: Stale Container Detection [domain: backend] → [subplan](./phase-2-stale-detection.md)
- [ ] Phase 3: Direct Proxy Creation with Validation [domain: backend] → [subplan](./phase-3-proxy-creation.md)
- [ ] Phase 4: Unified Proxy Viewer UI [domain: frontend] → [subplan](./phase-4-proxy-viewer.md)
- [ ] Phase 5: Stale Containers UI [domain: frontend] → [subplan](./phase-5-stale-ui.md)
- [ ] Phase 6: Direct Proxy Creation UI [domain: frontend] → [subplan](./phase-6-proxy-creation-ui.md)
- [ ] Phase 7: Event Log UI [domain: frontend] → [subplan](./phase-7-eventlog-ui.md)
- [ ] Phase 8: Container Stats & Notifications [domain: fullstack] → [subplan](./phase-8-stats-notifications.md)
**Parallelizable phases:**
- Phases 4, 5, 6, 7 are all frontend phases that touch different routes/components and can potentially run in parallel after all backend phases (1-3) complete.
## Phase Progress Log
| Phase | Domain | Status | Review | Build | Committed |
|-------|--------|--------|--------|-------|-----------|
| Phase 1: Schema & Event Log | backend | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
| Phase 2: Stale Detection | backend | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
| Phase 3: Proxy Creation | backend | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
| Phase 4: Proxy Viewer UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
| Phase 5: Stale Containers UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
| Phase 6: Proxy Creation UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
| Phase 7: Event Log UI | frontend | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
| Phase 8: Stats & Notifications | fullstack | ⬜ Not Started | ⬜ | ⬜ | ⬜ |
## Final Review
- [ ] Comprehensive code review
- [ ] Full build passes
- [ ] Full test suite passes
- [ ] Merged to `main`
@@ -0,0 +1,60 @@
# Phase 1: Schema, Models & Event Log Backend
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** backend
## Objective
Lay the database foundation for all new features and implement the persistent event log system.
## Tasks
- [ ] Task 1: Add `event_log` table to schema (id INTEGER PK AUTOINCREMENT, source TEXT, severity TEXT, message TEXT, metadata TEXT JSON, created_at TEXT)
- [ ] Task 2: Add `standalone_proxies` table to schema (id TEXT PK, domain TEXT UNIQUE, destination_url TEXT, destination_port INTEGER, ssl_certificate_id INTEGER, npm_proxy_id INTEGER, health_status TEXT, health_checked_at TEXT, created_at TEXT, updated_at TEXT)
- [ ] Task 3: Add `stale_threshold_days` column to settings table (migration, default 7)
- [ ] Task 4: Create `internal/store/eventlog.go` — store methods: InsertEvent, ListEvents (paginated, filterable by severity/source/date range), GetEventStats (counts by severity), PruneEvents (delete old entries)
- [ ] Task 5: Create `internal/store/standalone_proxy.go` — store methods: CreateStandaloneProxy, GetStandaloneProxy, ListStandaloneProxies, UpdateStandaloneProxy, DeleteStandaloneProxy, UpdateProxyHealth
- [ ] Task 6: Create Go models in `internal/store/models.go` — EventLog struct, StandaloneProxy struct
- [ ] Task 7: Update settings model to include stale_threshold_days field; update GetSettings/SaveSettings
- [ ] Task 8: Enhance event bus to auto-persist warn/error events — add a subscriber in events.Bus that writes to store
- [ ] Task 9: Add API endpoints: `GET /api/events/log` (paginated, filterable), `GET /api/events/log/stats`
- [ ] Task 10: Add new SSE event type `event_log` — broadcast persistent events in real-time
- [ ] Task 11: Add frontend types: EventLogEntry, StandaloneProxy interfaces in types.ts
- [ ] Task 12: Add API functions in api.ts: fetchEventLog, fetchEventLogStats
## Files to Modify/Create
- `internal/store/store.go` — Add schema for event_log, standalone_proxies tables; migration for stale_threshold_days
- `internal/store/models.go` — Add EventLog, StandaloneProxy structs; update Settings struct
- `internal/store/eventlog.go` — NEW: Event log store methods
- `internal/store/standalone_proxy.go` — NEW: Standalone proxy store methods
- `internal/store/settings.go` — Update GetSettings/SaveSettings for new field
- `internal/events/bus.go` — Add persistent event subscriber
- `internal/api/router.go` — Mount new event log routes
- `internal/api/eventlog.go` — NEW: Event log HTTP handlers
- `web/src/lib/types.ts` — Add EventLogEntry, StandaloneProxy types
- `web/src/lib/api.ts` — Add fetchEventLog, fetchEventLogStats functions
## Acceptance Criteria
- event_log and standalone_proxies tables created on startup (migration is idempotent)
- stale_threshold_days setting accessible via settings API
- Events with warn/error severity auto-persisted from event bus
- GET /api/events/log returns paginated, filterable results
- GET /api/events/log/stats returns severity counts
- Frontend types and API functions ready for downstream UI phases
- Existing functionality unchanged — all current tests/builds pass
## Notes
- Follow existing migration pattern: ALTER TABLE errors ignored for idempotency
- event_log metadata is a JSON TEXT column for flexible structured data
- Pagination follows offset/limit pattern (no cursor — SQLite is simple enough)
- Event log pruning can be called from a cron job later (Phase 8)
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
@@ -0,0 +1,55 @@
# Phase 2: Stale Container Detection
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** backend
## Objective
Implement a periodic scanner that detects containers managed by docker-watcher which have been non-running for more than N configurable days, and exposes them via API.
## Tasks
- [ ] Task 1: Create `internal/stale/scanner.go` — Scanner struct with dependencies (store, docker client, event bus)
- [ ] Task 2: Implement scan logic: query all instances from store, check Docker container state via Docker SDK, compare against stale_threshold_days from settings
- [ ] Task 3: Add `last_alive_at` column to instances table (migration) — updated when instance is seen running
- [ ] Task 4: Update deployer/instance lifecycle to set last_alive_at when container starts/is seen running
- [ ] Task 5: Implement stale detection: instance is stale if status != 'running' AND (now - last_alive_at) > threshold days
- [ ] Task 6: Emit event_log warnings when containers become newly stale (avoid re-emitting for already-known stale containers)
- [ ] Task 7: Register scanner as cron job (reuse existing robfig/cron infrastructure from registry poller)
- [ ] Task 8: Add API endpoints: `GET /api/containers/stale` (list stale with project/stage info), `POST /api/containers/stale/{id}/cleanup` (remove single), `POST /api/containers/stale/cleanup` (bulk remove)
- [ ] Task 9: Cleanup handler: stop container via Docker SDK, remove instance from store, emit event
- [ ] Task 10: Wire scanner into main.go startup (after store, docker client, event bus init)
## Files to Modify/Create
- `internal/stale/scanner.go` — NEW: Stale container scanner
- `internal/store/store.go` — Migration for last_alive_at column
- `internal/store/models.go` — Update Instance struct with LastAliveAt field
- `internal/store/instances.go` — Update queries to include last_alive_at; add UpdateLastAliveAt method
- `internal/api/router.go` — Mount stale container routes
- `internal/api/stale.go` — NEW: Stale container HTTP handlers
- `cmd/server/main.go` — Wire scanner with cron
## Acceptance Criteria
- Scanner runs on configurable interval (e.g., every hour)
- Stale containers correctly identified based on threshold
- GET /api/containers/stale returns list with project name, stage name, image tag, last alive timestamp, days stale
- Cleanup endpoints properly stop Docker containers and remove from store
- Events emitted when containers become stale
- Existing deploy flow unaffected — last_alive_at updated on successful deploy
- Build passes, existing tests pass
## Notes
- Scanner should handle gracefully: containers that no longer exist in Docker (already removed externally)
- Bulk cleanup should be admin-only
- Consider: scan interval could be derived from stale_threshold_days (e.g., scan every threshold/7 days, min 1h)
- Don't remove containers that are in 'removing' status (already being cleaned up)
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
@@ -0,0 +1,81 @@
# Phase 3: Direct Proxy Creation with Validation
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** backend
## Objective
Implement standalone proxy creation with a multi-step validation pipeline that checks destination reachability, and periodic health monitoring for all standalone proxies.
## Tasks
- [ ] Task 1: Create `internal/proxy/validator.go` — validation pipeline:
- URL/port syntax validation
- DNS resolution check
- TCP port reachability (net.DialTimeout, 5s)
- HTTP health probe (GET to destination, 10s timeout)
- Returns structured ValidationResult with per-step pass/fail and diagnostic hints
- [ ] Task 2: Create `internal/proxy/hints.go` — diagnostic hint generator:
- DNS failure → "Domain cannot be resolved. Check DNS settings or use an IP address."
- TCP refused → "Port {port} is not accepting connections. Check if the service is running and the port is correct."
- TCP timeout → "Connection timed out. Possible firewall blocking. Check network/firewall rules."
- Host unreachable → "Host is not reachable. Verify the IP address and network connectivity."
- HTTP error → "Service responded with HTTP {status}. The service may not be healthy."
- [ ] Task 3: Create `internal/proxy/manager.go` — proxy lifecycle:
- CreateProxy: validate destination, create NPM proxy host (using npm.Client), assign SSL cert from settings, save to standalone_proxies table
- UpdateProxy: re-validate, update NPM proxy host, update store
- DeleteProxy: remove NPM proxy host, remove from store
- GetProxy/ListProxies: read from store with health status
- [ ] Task 4: Create `internal/proxy/health.go` — periodic health monitor:
- Cron job that checks all standalone proxies
- HTTP GET to destination URL/port
- Updates health_status (healthy/unhealthy/unknown) and health_checked_at in store
- Emits event_log on status change (healthy→unhealthy or vice versa)
- [ ] Task 5: Add API endpoints:
- `POST /api/proxies/validate` — run validation without creating
- `POST /api/proxies` — create standalone proxy
- `GET /api/proxies` — list standalone proxies
- `GET /api/proxies/{id}` — get single proxy
- `PUT /api/proxies/{id}` — update proxy
- `DELETE /api/proxies/{id}` — delete proxy
- `GET /api/proxies/all` — merged view: standalone + deploy-managed proxies (for Phase 4 UI)
- [ ] Task 6: Wire health monitor cron job in main.go
- [ ] Task 7: Add frontend API functions in api.ts: validateProxy, createProxy, listProxies, getProxy, updateProxy, deleteProxy, listAllProxies
- [ ] Task 8: Add frontend types: ValidationResult, ValidationStep, ProxyHealthStatus
## Files to Modify/Create
- `internal/proxy/validator.go` — NEW: Validation pipeline
- `internal/proxy/hints.go` — NEW: Diagnostic hints
- `internal/proxy/manager.go` — NEW: Proxy lifecycle management
- `internal/proxy/health.go` — NEW: Health monitoring
- `internal/api/router.go` — Mount proxy routes
- `internal/api/proxy.go` — NEW: Proxy HTTP handlers
- `cmd/server/main.go` — Wire proxy manager and health monitor
- `web/src/lib/types.ts` — Add ValidationResult, ProxyHealthStatus types
- `web/src/lib/api.ts` — Add proxy API functions
## Acceptance Criteria
- Validation pipeline returns structured results with specific failure hints
- POST /api/proxies/validate runs full check without side effects
- Proxy creation creates NPM proxy host with SSL cert from global settings
- Health monitor runs periodically and updates proxy status
- Events emitted on health status changes
- GET /api/proxies/all merges standalone and deploy-managed proxy data
- Build passes, existing tests pass
## Notes
- Validation should be fast (short timeouts) — user waits for results
- Health monitor interval: every 5 minutes (configurable later)
- For /api/proxies/all: query NPM for all proxy hosts, join with instances table for managed proxies, join with standalone_proxies for standalone ones
- SSL cert auto-assigned from settings.ssl_certificate_id
- Consider: proxy domain must be unique across both standalone and managed proxies
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
@@ -0,0 +1,56 @@
# Phase 4: Unified Proxy Viewer UI
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** frontend
## Objective
Build a unified proxy viewer page showing ALL proxies (deploy-managed and standalone) with grouping, filtering, and real-time health indicators.
## Tasks
- [ ] Task 1: Create route `/proxies` with `+page.svelte` and `+page.ts` data loader
- [ ] Task 2: Create ProxyCard component — displays: domain, destination, SSL badge, health indicator (green/yellow/red dot), proxy type badge (managed/standalone), last health check timestamp
- [ ] Task 3: Create ProxyGroup component — collapsible section with project name header, stage sub-groups, proxy count badge
- [ ] Task 4: Create StandaloneProxyGroup component — separate collapsible section for user-created proxies
- [ ] Task 5: Implement filtering: by project, stage, health status (healthy/unhealthy/unknown), proxy type (managed/standalone), free-text search by domain/destination
- [ ] Task 6: Filter bar component with dropdown selects and search input
- [ ] Task 7: SSE integration — subscribe to proxy health events, update health indicators in real-time
- [ ] Task 8: Empty state — friendly message when no proxies exist, with link to create one
- [ ] Task 9: Add navigation link in sidebar layout (+layout.svelte)
- [ ] Task 10: Add i18n keys for proxy viewer page
## Files to Modify/Create
- `web/src/routes/proxies/+page.svelte` — NEW: Proxy viewer page
- `web/src/routes/proxies/+page.ts` — NEW: Data loader
- `web/src/lib/components/ProxyCard.svelte` — NEW: Individual proxy display
- `web/src/lib/components/ProxyGroup.svelte` — NEW: Collapsible project/stage group
- `web/src/lib/components/ProxyFilter.svelte` — NEW: Filter bar
- `web/src/routes/+layout.svelte` — Add proxies nav link
- `web/src/lib/i18n/en.ts` (or equivalent) — Add proxy viewer strings
## Acceptance Criteria
- All proxies visible: both deploy-managed and standalone
- Proxies grouped by project/stage in collapsible sections
- Health indicators show real-time status (green=healthy, red=unhealthy, yellow=unknown)
- Filtering works: project, stage, health, type, text search
- SSE updates health indicators without page refresh
- Navigation accessible from sidebar
- Responsive layout (mobile-friendly)
## Notes
- Use existing component patterns (ConfirmDialog, FormField styles, etc.)
- Follow existing Svelte 5 patterns ($state, $derived, $effect)
- The /api/proxies/all endpoint from Phase 3 provides the data source
- Health indicator should pulse/animate briefly on status change
- Consider: show proxy count in sidebar nav badge
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
@@ -0,0 +1,55 @@
# Phase 5: Stale Containers UI
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** frontend
## Objective
Build the stale containers dashboard widget and dedicated view, with cleanup actions and settings configuration.
## Tasks
- [ ] Task 1: Add API functions in api.ts: fetchStaleContainers, cleanupStaleContainer, bulkCleanupStaleContainers
- [ ] Task 2: Create StaleContainerCard component — shows: container name, project, stage, image tag, last alive timestamp, "X days stale" badge (color-coded by severity)
- [ ] Task 3: Create stale containers section on dashboard (+page.svelte) — count badge, mini-list of top 5 offenders, "View all" link
- [ ] Task 4: Create dedicated route `/containers/stale` with full stale container list
- [ ] Task 5: Individual cleanup action — ConfirmDialog with warning, calls cleanup API
- [ ] Task 6: Bulk cleanup action — "Clean up all" button with confirmation, progress indicator
- [ ] Task 7: Settings integration — add stale_threshold_days field to settings page with validation (min 1 day)
- [ ] Task 8: Add navigation link or sub-nav for stale containers
- [ ] Task 9: Add i18n keys for stale containers
## Files to Modify/Create
- `web/src/lib/api.ts` — Add stale container API functions
- `web/src/lib/types.ts` — Add StaleContainer interface
- `web/src/lib/components/StaleContainerCard.svelte` — NEW: Stale container display
- `web/src/routes/+page.svelte` — Add stale containers dashboard widget
- `web/src/routes/containers/stale/+page.svelte` — NEW: Dedicated stale view
- `web/src/routes/containers/stale/+page.ts` — NEW: Data loader
- `web/src/routes/settings/+page.svelte` — Add stale threshold setting field
- `web/src/routes/+layout.svelte` — Add nav link if needed
## Acceptance Criteria
- Dashboard shows stale container count and top offenders
- Dedicated page lists all stale containers with details
- Individual cleanup removes container with confirmation
- Bulk cleanup works with progress feedback
- Settings page allows configuring stale threshold
- Severity coloring: 7-14 days = yellow, 14+ days = red
- Responsive layout
## Notes
- Reuse existing ConfirmDialog for destructive actions
- Dashboard widget should not slow down initial page load (lazy load or small payload)
- Stale container data comes from GET /api/containers/stale (Phase 2)
- Settings update uses existing PUT /api/settings endpoint
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
@@ -0,0 +1,54 @@
# Phase 6: Direct Proxy Creation UI
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** frontend
## Objective
Build the proxy creation form with live validation feedback, diagnostic hints, and management actions (edit/delete).
## Tasks
- [ ] Task 1: Create "Create Proxy" form component — fields: destination URL/IP, port, domain (auto-suggested from subdomain pattern), optional custom subdomain override
- [ ] Task 2: Live validation — debounced calls to POST /api/proxies/validate as user types (300ms debounce)
- [ ] Task 3: Validation result display — step-by-step checklist with icons:
- ✅ DNS resolution OK / ❌ DNS resolution failed
- ✅ TCP port reachable / ❌ TCP port not reachable
- ✅ HTTP responding / ❌ HTTP not responding
- Each failure shows the diagnostic hint from the backend
- [ ] Task 4: Create proxy submission — calls POST /api/proxies, shows success toast with health indicator
- [ ] Task 5: Edit proxy — modal or inline form, pre-populated with current values, re-validates on save
- [ ] Task 6: Delete proxy — ConfirmDialog with domain name confirmation
- [ ] Task 7: Integration with proxy viewer page — "Create Proxy" button in the proxy viewer header
- [ ] Task 8: Domain auto-suggestion — when user enters destination, suggest domain based on subdomain_pattern from settings
- [ ] Task 9: Add i18n keys for proxy creation
## Files to Modify/Create
- `web/src/lib/components/ProxyForm.svelte` — NEW: Create/edit proxy form
- `web/src/lib/components/ValidationChecklist.svelte` — NEW: Step-by-step validation display
- `web/src/routes/proxies/+page.svelte` — Add "Create Proxy" button and modal/panel
- `web/src/lib/api.ts` — Ensure validateProxy, createProxy, updateProxy, deleteProxy are present (from Phase 3)
## Acceptance Criteria
- Form validates destination in real-time with debouncing
- Each validation step shows pass/fail with diagnostic hints
- Proxy creation works end-to-end (form → API → NPM → success)
- Edit and delete work for existing standalone proxies
- Domain auto-suggestion works from settings pattern
- Error states handled gracefully (network errors, API failures)
## Notes
- Validation should show a loading spinner while in progress
- Don't validate on every keystroke — use 300ms debounce
- If all validation steps fail, still allow creation (user might know better — just warn)
- SSL certificate is applied automatically from global settings (no cert picker in form)
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
@@ -0,0 +1,54 @@
# Phase 7: Event Log UI
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** frontend
## Objective
Build a persistent, searchable event log viewer with real-time streaming, filters, and resource linking.
## Tasks
- [ ] Task 1: Create route `/events` with `+page.svelte` and `+page.ts` data loader
- [ ] Task 2: Create EventLogEntry component — timestamp, severity badge (info=blue, warn=yellow, error=red), source icon (container/proxy/deploy/system), message text, expandable metadata section
- [ ] Task 3: Create EventLogFilter component — filters: severity multi-select, source multi-select, date range picker (start/end), free-text search
- [ ] Task 4: Implement pagination — "Load more" button at bottom (offset/limit pattern matching API)
- [ ] Task 5: SSE integration — subscribe to event_log events, prepend new entries at top with subtle highlight animation
- [ ] Task 6: Quick actions — clickable links to related resources (e.g., click container name → go to project/stage, click proxy domain → go to proxy viewer)
- [ ] Task 7: Stats header — show counts by severity (from GET /api/events/log/stats), with colored badges
- [ ] Task 8: Add navigation link in sidebar
- [ ] Task 9: Add i18n keys for event log page
## Files to Modify/Create
- `web/src/routes/events/+page.svelte` — NEW: Event log page
- `web/src/routes/events/+page.ts` — NEW: Data loader
- `web/src/lib/components/EventLogEntry.svelte` — NEW: Event entry display
- `web/src/lib/components/EventLogFilter.svelte` — NEW: Filter controls
- `web/src/routes/+layout.svelte` — Add events nav link
- `web/src/lib/sse.ts` — Add event_log SSE subscription helper (if needed)
## Acceptance Criteria
- Event log shows all persistent events with severity and source
- Filters work: severity, source, date range, text search
- New events stream in real-time via SSE without page refresh
- Pagination loads older events on demand
- Quick actions link to related resources
- Stats header shows severity distribution
- Responsive layout
## Notes
- Follow existing SSE patterns from deploy logs viewer
- Date range filter: consider "last hour", "last 24h", "last 7 days" presets + custom range
- Metadata section is JSON — render as formatted key-value pairs, not raw JSON
- Resource linking: parse source and metadata to construct navigation URLs
- Consider: auto-scroll to top when new event arrives (if user is at top), otherwise show "N new events" badge
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
@@ -0,0 +1,67 @@
# Phase 8: Container Stats & Notifications
**Status:** ⬜ Not Started
**Parent plan:** [PLAN.md](./PLAN.md)
**Domain:** fullstack
## Objective
Add container resource monitoring (CPU/memory), notification triggers for operational events, and a system health dashboard summary.
## Tasks
- [ ] Task 1: Create `internal/docker/stats.go` — wrapper around Docker Stats API to get CPU %, memory usage/limit for a container
- [ ] Task 2: Add API endpoint: `GET /api/projects/{id}/stages/{stage}/instances/{iid}/stats` — returns current CPU/memory for an instance
- [ ] Task 3: Create SSE event type `container_stats` — periodically broadcast stats for running containers (every 30s)
- [ ] Task 4: Extend notification stub (`internal/notify/`) — implement webhook sender for events:
- Stale container detected
- Proxy health failure
- Deploy failure/rollback
- Format: JSON payload with event type, details, timestamp
- [ ] Task 5: Add notification settings UI — enable/disable per event type in settings page
- [ ] Task 6: Update instance cards in frontend — show CPU % bar and memory usage badge
- [ ] Task 7: Create ContainerStats component — mini CPU/memory visualization (progress bars)
- [ ] Task 8: Dashboard system health summary card — total containers (running/stopped), healthy/unhealthy proxies, recent error count (last 24h)
- [ ] Task 9: Wire notification sender to event bus — subscribe to relevant event types, fire notifications
- [ ] Task 10: Add event log pruning cron job — delete events older than 30 days (configurable)
- [ ] Task 11: Add i18n keys for stats and notifications
## Files to Modify/Create
- `internal/docker/stats.go` — NEW: Docker Stats API wrapper
- `internal/api/stats.go` — NEW: Stats HTTP handler
- `internal/api/router.go` — Mount stats endpoint
- `internal/notify/sender.go` — Implement webhook notification sender
- `internal/notify/types.go` — NEW: Notification event types and payloads
- `cmd/server/main.go` — Wire notification subscriber and event pruning cron
- `web/src/lib/types.ts` — Add ContainerStats, NotificationSettings types
- `web/src/lib/api.ts` — Add fetchContainerStats function
- `web/src/lib/components/ContainerStats.svelte` — NEW: CPU/memory display
- `web/src/lib/components/SystemHealthCard.svelte` — NEW: Dashboard summary
- `web/src/routes/+page.svelte` — Add system health card to dashboard
- `web/src/routes/settings/+page.svelte` — Add notification settings section
- `web/src/lib/sse.ts` — Add container_stats SSE handler
## Acceptance Criteria
- Container stats (CPU/memory) visible on instance cards
- Stats update in real-time via SSE
- Webhook notifications fire for configured event types
- Dashboard shows system health summary
- Event log auto-prunes old entries
- Settings page allows configuring notification preferences
- Build passes, existing tests pass
## Notes
- Docker Stats API returns a stream — read one snapshot and close, don't hold the connection
- CPU calculation: (container CPU delta / system CPU delta) * 100 — needs two reads
- Memory: usage_bytes / limit_bytes * 100 for percentage
- Notification webhook format should be compatible with common receivers (Slack webhook, Discord webhook, generic HTTP)
- System health card: consider caching aggregated stats to avoid N+1 queries on dashboard load
## Review Checklist
- [ ] All tasks completed
- [ ] Code follows project conventions
- [ ] No unintended side effects
- [ ] Build passes
- [ ] Tests pass (new + existing)
## Handoff to Next Phase
<!-- Filled in by the implementation agent after completing this phase. -->
+27
View File
@@ -2,6 +2,8 @@ import type {
ApiEnvelope, ApiEnvelope,
Deploy, Deploy,
DeployLog, DeployLog,
EventLogEntry,
EventLogStats,
InspectResult, InspectResult,
Instance, Instance,
NpmCertificate, NpmCertificate,
@@ -338,4 +340,29 @@ export function deleteVolume(
return del<{ deleted: string }>(`/api/projects/${projectId}/volumes/${volId}`); return del<{ deleted: string }>(`/api/projects/${projectId}/volumes/${volId}`);
} }
// ── Event Log ───────────────────────────────────────────────────────
export function fetchEventLog(params?: {
severity?: string;
source?: string;
since?: string;
until?: string;
limit?: number;
offset?: number;
}): Promise<EventLogEntry[]> {
const query = new URLSearchParams();
if (params?.severity) query.set('severity', params.severity);
if (params?.source) query.set('source', params.source);
if (params?.since) query.set('since', params.since);
if (params?.until) query.set('until', params.until);
if (params?.limit) query.set('limit', String(params.limit));
if (params?.offset) query.set('offset', String(params.offset));
const qs = query.toString();
return get<EventLogEntry[]>(`/api/events/log${qs ? `?${qs}` : ''}`);
}
export function fetchEventLogStats(): Promise<EventLogStats> {
return get<EventLogStats>('/api/events/log/stats');
}
export { ApiError }; export { ApiError };
+33
View File
@@ -106,6 +106,7 @@ export interface Settings {
polling_interval: string; polling_interval: string;
base_volume_path: string; base_volume_path: string;
ssl_certificate_id: number; ssl_certificate_id: number;
stale_threshold_days: number;
updated_at: string; updated_at: string;
} }
@@ -170,3 +171,35 @@ export interface Volume {
created_at: string; created_at: string;
updated_at: string; updated_at: string;
} }
/** A persistent event log entry. */
export interface EventLogEntry {
id: number;
source: string;
severity: 'info' | 'warn' | 'error';
message: string;
metadata: string;
created_at: string;
}
/** Severity counts for the event log. */
export interface EventLogStats {
info: number;
warn: number;
error: number;
total: number;
}
/** A standalone reverse proxy not tied to a project. */
export interface StandaloneProxy {
id: string;
domain: string;
destination_url: string;
destination_port: number;
ssl_certificate_id: number;
npm_proxy_id: number;
health_status: 'unknown' | 'healthy' | 'unhealthy';
health_checked_at: string;
created_at: string;
updated_at: string;
}