fix: harden security, fix concurrency bugs, and address review findings
Build / build (push) Successful in 11m42s
Build / build (push) Successful in 11m42s
Security: - rate limit /api/webhook routes per-IP and cap concurrent site syncs - global SSE connection cap (256) with new sse_gate - validate ?tail= and cap JSON log responses at 4 MiB - strip ANSI/CSI/OSC and control bytes from streamed log lines - redact webhook secret from request log middleware - scrub host details from /api/health for non-admin viewers - drop container_id from /api/system/stats/top for non-admins - generate webhook secrets via crypto/rand; require >=32 chars on insert - verify iid path consistency in streamContainerLogs - LimitReader on site webhook body; reject malformed non-empty bodies Concurrency / correctness: - stats collector: Stop() no longer hangs without Start(), semaphore acquired in parent loop so ctx cancellation short-circuits the queue, in-flight tick cancellable via shared base context, zero-ts guard - webhook handler: replace fire-and-forget goroutine with WaitGroup-tracked workers + Drain() wired into graceful shutdown - $derived(() => ...) mis-idiom fixed in ContainerStats / InstanceCard / ProjectCard (returned function instead of value) - SystemResourcesCard: rename `window` and `t` locals to avoid shadowing globalThis.window and the i18n `t` import Quality / performance: - replace O(n^2) insertion sort with sort.Slice in stats top - runMigrations only swallows duplicate-column / already-exists errors - PruneStatsSamplesBefore wrapped in a transaction - collapse N+1 in unusedImageStats / pruneImages to one ListAllInstances pass; surface DB errors instead of silently treating them as inactive - run Docker Info + DiskUsage in parallel via errgroup - container log SSE emits `: ping` heartbeat every 20 s - imageMatches case-insensitive on registry host (RFC behaviour) - log warning on invalid stage tag pattern instead of silent skip - reject malformed non-empty site webhook payloads Frontend / i18n: - shared formatBytes utility replaces three local copies - statsInterval store drives dynamic "no samples / collection disabled" copy across ContainerStats and SystemResourcesCard - top consumers row now shows owner_name (project/stage or site name) - drop seven `as any` casts on the Settings type; add cloudflare_api_token write-only field - move "Service status", "Docker daemon", "Docker unreachable", "Proxy unreachable", "reachable", and "Docker daemon is not reachable." strings into en/ru i18n bundles
This commit is contained in:
+138
-32
@@ -5,15 +5,38 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// Limits and constants for the log endpoints.
|
||||
const (
|
||||
defaultLogTail = 200
|
||||
maxLogTail = 5000
|
||||
maxJSONLogBytes = 4 << 20 // 4 MiB cap for non-streaming log responses
|
||||
maxLogLineBytes = 1 << 20 // 1 MiB max line length for the bufio.Scanner
|
||||
logHeartbeatPeriod = 20 * time.Second
|
||||
)
|
||||
|
||||
// ANSI escape sequence patterns. Stripped from streamed log lines so a
|
||||
// hostile container cannot inject terminal control sequences (cursor moves,
|
||||
// hyperlink escapes, screen clears) into operator displays or pasted output.
|
||||
var (
|
||||
ansiCSIPattern = regexp.MustCompile(`\x1b\[[0-9;?]*[ -/]*[@-~]`)
|
||||
ansiOSCPattern = regexp.MustCompile(`\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)`)
|
||||
ctlBytePattern = regexp.MustCompile(`[\x00-\x08\x0b-\x1a\x1c-\x1f\x7f]`)
|
||||
)
|
||||
|
||||
// listProjectImages handles GET /api/projects/{id}/images.
|
||||
// Returns all local Docker images matching the project's image reference.
|
||||
func (s *Server) listProjectImages(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -50,6 +73,8 @@ func (s *Server) listProjectImages(w http.ResponseWriter, r *http.Request) {
|
||||
// - tail: number of lines from end (default "200")
|
||||
// - follow: "true" to stream new lines in real-time
|
||||
func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
|
||||
projectID := chi.URLParam(r, "id")
|
||||
stageID := chi.URLParam(r, "stage")
|
||||
instanceID := chi.URLParam(r, "iid")
|
||||
|
||||
inst, err := s.store.GetInstanceByID(instanceID)
|
||||
@@ -63,6 +88,14 @@ func (s *Server) streamContainerLogs(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Verify the instance actually belongs to the project/stage in the path.
|
||||
// Without this, a user could stream logs for any instance ID by guessing
|
||||
// it under the wrong project — defence-in-depth for future per-project ACLs.
|
||||
if inst.ProjectID != projectID || inst.StageID != stageID {
|
||||
respondNotFound(w, "instance")
|
||||
return
|
||||
}
|
||||
|
||||
if inst.ContainerID == "" {
|
||||
respondError(w, http.StatusBadRequest, "instance has no container")
|
||||
return
|
||||
@@ -80,10 +113,7 @@ func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request,
|
||||
return
|
||||
}
|
||||
|
||||
tail := r.URL.Query().Get("tail")
|
||||
if tail == "" {
|
||||
tail = "200"
|
||||
}
|
||||
tail := parseTailParam(r.URL.Query().Get("tail"))
|
||||
follow := r.URL.Query().Get("follow") == "true"
|
||||
|
||||
// Check if client accepts SSE.
|
||||
@@ -99,8 +129,10 @@ func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request,
|
||||
defer logReader.Close()
|
||||
|
||||
if !isSSE {
|
||||
// JSON mode: read all lines and return as array.
|
||||
scanner := bufio.NewScanner(logReader)
|
||||
// JSON mode: cap the total bytes read so a chatty container with
|
||||
// tail=large cannot exhaust server memory.
|
||||
scanner := bufio.NewScanner(io.LimitReader(logReader, maxJSONLogBytes))
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), maxLogLineBytes)
|
||||
var lines []string
|
||||
for scanner.Scan() {
|
||||
line := sanitizeDockerLogLine(scanner.Text())
|
||||
@@ -116,6 +148,12 @@ func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request,
|
||||
}
|
||||
|
||||
// SSE mode: stream lines as they arrive.
|
||||
release, ok := acquireSSESlot(w, s.sseGate)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer release()
|
||||
|
||||
flusher, ok := w.(http.Flusher)
|
||||
if !ok {
|
||||
respondError(w, http.StatusInternalServerError, "streaming not supported")
|
||||
@@ -126,7 +164,31 @@ func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request,
|
||||
w.Header().Set("Cache-Control", "no-cache")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
|
||||
// Heartbeat keeps the connection warm through proxies that close idle
|
||||
// streams. Sent as an SSE comment which the EventSource API ignores.
|
||||
heartbeat := time.NewTicker(logHeartbeatPeriod)
|
||||
defer heartbeat.Stop()
|
||||
heartbeatDone := make(chan struct{})
|
||||
defer close(heartbeatDone)
|
||||
var hbMu sync.Mutex
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-heartbeat.C:
|
||||
hbMu.Lock()
|
||||
_, _ = io.WriteString(w, ": ping\n\n")
|
||||
flusher.Flush()
|
||||
hbMu.Unlock()
|
||||
case <-heartbeatDone:
|
||||
return
|
||||
case <-r.Context().Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
scanner := bufio.NewScanner(logReader)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), maxLogLineBytes)
|
||||
for scanner.Scan() {
|
||||
line := sanitizeDockerLogLine(scanner.Text())
|
||||
if line == "" {
|
||||
@@ -134,8 +196,10 @@ func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request,
|
||||
}
|
||||
|
||||
data, _ := json.Marshal(map[string]string{"line": line})
|
||||
hbMu.Lock()
|
||||
fmt.Fprintf(w, "data: %s\n\n", data)
|
||||
flusher.Flush()
|
||||
hbMu.Unlock()
|
||||
|
||||
// Check if client disconnected.
|
||||
select {
|
||||
@@ -146,17 +210,67 @@ func (s *Server) streamLogsForContainer(w http.ResponseWriter, r *http.Request,
|
||||
}
|
||||
}
|
||||
|
||||
// parseTailParam validates and clamps the ?tail= query value. Empty/invalid
|
||||
// inputs fall back to the default; values above the cap are clamped down.
|
||||
// "all" is rejected — letting the caller request unbounded log history is a
|
||||
// trivial DoS vector.
|
||||
func parseTailParam(raw string) string {
|
||||
if raw == "" {
|
||||
return strconv.Itoa(defaultLogTail)
|
||||
}
|
||||
n, err := strconv.Atoi(raw)
|
||||
if err != nil || n <= 0 {
|
||||
return strconv.Itoa(defaultLogTail)
|
||||
}
|
||||
if n > maxLogTail {
|
||||
n = maxLogTail
|
||||
}
|
||||
return strconv.Itoa(n)
|
||||
}
|
||||
|
||||
// sanitizeDockerLogLine strips the Docker log stream header (8-byte prefix)
|
||||
// that Docker adds to non-TTY container logs.
|
||||
// that Docker adds to non-TTY container logs, and removes terminal control
|
||||
// sequences so a hostile container cannot inject ANSI escapes that hijack an
|
||||
// operator's terminal when log output is pasted or rendered raw.
|
||||
func sanitizeDockerLogLine(line string) string {
|
||||
// Docker multiplexed stream: first 8 bytes are header (stream type + size).
|
||||
// If the line starts with a non-printable byte followed by 0x00 0x00 0x00, strip 8 bytes.
|
||||
if len(line) > 8 && (line[0] == 1 || line[0] == 2) && line[1] == 0 && line[2] == 0 && line[3] == 0 {
|
||||
return line[8:]
|
||||
line = line[8:]
|
||||
}
|
||||
line = ansiOSCPattern.ReplaceAllString(line, "")
|
||||
line = ansiCSIPattern.ReplaceAllString(line, "")
|
||||
line = ctlBytePattern.ReplaceAllString(line, "")
|
||||
return line
|
||||
}
|
||||
|
||||
// buildActiveImagesSet returns the set of "image:tag" strings currently used
|
||||
// by any instance, computed in a single DB pass instead of N×K queries.
|
||||
// Returning an error (rather than swallowing) prevents prune logic from
|
||||
// treating a transient DB failure as "nothing is active".
|
||||
func buildActiveImagesSet(st *store.Store, projects []store.Project) (map[string]bool, error) {
|
||||
imageByProject := make(map[string]string, len(projects))
|
||||
for _, p := range projects {
|
||||
imageByProject[p.ID] = p.Image
|
||||
}
|
||||
instances, err := st.ListAllInstances()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list instances: %w", err)
|
||||
}
|
||||
active := make(map[string]bool, len(instances))
|
||||
for _, inst := range instances {
|
||||
if inst.ImageTag == "" {
|
||||
continue
|
||||
}
|
||||
image := imageByProject[inst.ProjectID]
|
||||
if image == "" {
|
||||
continue
|
||||
}
|
||||
active[image+":"+inst.ImageTag] = true
|
||||
}
|
||||
return active, nil
|
||||
}
|
||||
|
||||
// unusedImageStats handles GET /api/docker/unused-images.
|
||||
// Returns the total size of unused project images and whether the threshold is exceeded.
|
||||
func (s *Server) unusedImageStats(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -181,18 +295,14 @@ func (s *Server) unusedImageStats(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Build set of active image refs.
|
||||
activeImages := make(map[string]bool)
|
||||
for _, p := range projects {
|
||||
stages, _ := s.store.GetStagesByProjectID(p.ID)
|
||||
for _, st := range stages {
|
||||
instances, _ := s.store.GetInstancesByStageID(st.ID)
|
||||
for _, inst := range instances {
|
||||
if inst.ImageTag != "" {
|
||||
activeImages[p.Image+":"+inst.ImageTag] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Build set of active image refs in one DB pass instead of N×K queries.
|
||||
// A flaky read here previously masqueraded as "no images are active",
|
||||
// which on the prune endpoint would have deleted *running* images.
|
||||
activeImages, err := buildActiveImagesSet(s.store, projects)
|
||||
if err != nil {
|
||||
slog.Error("unused images: build active set", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Sum unused image sizes.
|
||||
@@ -242,18 +352,14 @@ func (s *Server) pruneImages(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Build a set of image refs used by active instances.
|
||||
activeImages := make(map[string]bool)
|
||||
for _, p := range projects {
|
||||
stages, _ := s.store.GetStagesByProjectID(p.ID)
|
||||
for _, st := range stages {
|
||||
instances, _ := s.store.GetInstancesByStageID(st.ID)
|
||||
for _, inst := range instances {
|
||||
if inst.ImageTag != "" {
|
||||
activeImages[p.Image+":"+inst.ImageTag] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Build a set of image refs used by active instances. Bail out on error
|
||||
// — silently treating a DB blip as "no active images" would prune
|
||||
// images currently in use by running containers.
|
||||
activeImages, err := buildActiveImagesSet(s.store, projects)
|
||||
if err != nil {
|
||||
slog.Error("prune: build active set", "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||
return
|
||||
}
|
||||
|
||||
// Collect all unique image bases from projects (without tags).
|
||||
|
||||
Reference in New Issue
Block a user