fix: harden security, fix concurrency bugs, and address review findings
Build / build (push) Successful in 11m42s

Security:
- rate limit /api/webhook routes per-IP and cap concurrent site syncs
- global SSE connection cap (256) with new sse_gate
- validate ?tail= and cap JSON log responses at 4 MiB
- strip ANSI/CSI/OSC and control bytes from streamed log lines
- redact webhook secret from request log middleware
- scrub host details from /api/health for non-admin viewers
- drop container_id from /api/system/stats/top for non-admins
- generate webhook secrets via crypto/rand; require >=32 chars on insert
- verify iid path consistency in streamContainerLogs
- LimitReader on site webhook body; reject malformed non-empty bodies

Concurrency / correctness:
- stats collector: Stop() no longer hangs without Start(), semaphore
  acquired in parent loop so ctx cancellation short-circuits the queue,
  in-flight tick cancellable via shared base context, zero-ts guard
- webhook handler: replace fire-and-forget goroutine with WaitGroup-tracked
  workers + Drain() wired into graceful shutdown
- $derived(() => ...) mis-idiom fixed in ContainerStats / InstanceCard /
  ProjectCard (returned function instead of value)
- SystemResourcesCard: rename `window` and `t` locals to avoid shadowing
  globalThis.window and the i18n `t` import

Quality / performance:
- replace O(n^2) insertion sort with sort.Slice in stats top
- runMigrations only swallows duplicate-column / already-exists errors
- PruneStatsSamplesBefore wrapped in a transaction
- collapse N+1 in unusedImageStats / pruneImages to one ListAllInstances
  pass; surface DB errors instead of silently treating them as inactive
- run Docker Info + DiskUsage in parallel via errgroup
- container log SSE emits `: ping` heartbeat every 20 s
- imageMatches case-insensitive on registry host (RFC behaviour)
- log warning on invalid stage tag pattern instead of silent skip
- reject malformed non-empty site webhook payloads

Frontend / i18n:
- shared formatBytes utility replaces three local copies
- statsInterval store drives dynamic "no samples / collection disabled"
  copy across ContainerStats and SystemResourcesCard
- top consumers row now shows owner_name (project/stage or site name)
- drop seven `as any` casts on the Settings type; add cloudflare_api_token
  write-only field
- move "Service status", "Docker daemon", "Docker unreachable",
  "Proxy unreachable", "reachable", and "Docker daemon is not reachable."
  strings into en/ru i18n bundles
This commit is contained in:
2026-05-07 00:56:14 +03:00
parent 05440a5f92
commit a4362b842d
39 changed files with 1249 additions and 213 deletions
+64 -7
View File
@@ -5,15 +5,27 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"strings"
"sync"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/store"
)
// maxSiteConcurrentSyncs caps fan-out of background site syncs triggered by
// webhooks. Above this limit, requests are rejected with 503.
const maxSiteConcurrentSyncs = 4
// maxWebhookBodyBytes caps the request body size for webhook payloads. The
// /api routes already wrap the body with MaxBytesReader, but the webhook
// router relies on its own limit so changes to the parent middleware can't
// silently increase the cap.
const maxWebhookBodyBytes = 256 * 1024 // 256 KiB
// DeployTriggerer is called when a webhook determines a deploy should happen.
// Same interface as registry.DeployTriggerer — kept separate to avoid import cycles.
type DeployTriggerer interface {
@@ -114,12 +126,28 @@ type Handler struct {
store *store.Store
deployer DeployTriggerer
sites SiteSyncTriggerer
// Site sync coordination — webhooks fire syncs in the background; Drain
// blocks until those goroutines finish, so a graceful shutdown does not
// kill an in-flight git fetch + container rebuild.
siteSyncCtx context.Context
siteSyncCancel context.CancelFunc
siteSyncWG sync.WaitGroup
siteSyncSem chan struct{}
}
// NewHandler creates a new webhook Handler. The sites triggerer is optional
// and may be nil (site webhooks will return 404).
func NewHandler(st *store.Store, deployer DeployTriggerer, sites SiteSyncTriggerer) *Handler {
return &Handler{store: st, deployer: deployer, sites: sites}
ctx, cancel := context.WithCancel(context.Background())
return &Handler{
store: st,
deployer: deployer,
sites: sites,
siteSyncCtx: ctx,
siteSyncCancel: cancel,
siteSyncSem: make(chan struct{}, maxSiteConcurrentSyncs),
}
}
// SetSiteSyncTriggerer injects the static-site manager after construction.
@@ -130,6 +158,13 @@ func (h *Handler) SetSiteSyncTriggerer(s SiteSyncTriggerer) {
h.sites = s
}
// Drain cancels in-flight site syncs and waits for their goroutines to exit.
// Safe to call from a graceful-shutdown path.
func (h *Handler) Drain() {
h.siteSyncCancel()
h.siteSyncWG.Wait()
}
// Route returns a chi router with the webhook endpoints mounted.
//
// Routes:
@@ -183,7 +218,8 @@ func (h *Handler) handleWebhook(w http.ResponseWriter, r *http.Request) {
}
var payload Payload
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
dec := json.NewDecoder(io.LimitReader(r.Body, maxWebhookBodyBytes))
if err := dec.Decode(&payload); err != nil {
respondWebhookError(w, http.StatusBadRequest, "invalid JSON payload")
return
}
@@ -302,10 +338,20 @@ func (h *Handler) handleSiteWebhook(w http.ResponseWriter, r *http.Request) {
return
}
// Body is optional — decode best-effort.
// Body is optional. We attempt to decode but accept an empty body (no Ref
// filter); a malformed non-empty body is treated as bad-request to avoid
// silently bypassing the branch/tag filter.
var payload SitePayload
if r.ContentLength > 0 {
_ = json.NewDecoder(r.Body).Decode(&payload)
body, err := io.ReadAll(io.LimitReader(r.Body, maxWebhookBodyBytes))
if err != nil {
respondWebhookError(w, http.StatusBadRequest, "failed to read request body")
return
}
if len(body) > 0 {
if err := json.Unmarshal(body, &payload); err != nil {
respondWebhookError(w, http.StatusBadRequest, "invalid JSON payload")
return
}
}
if payload.Ref != "" && !siteRefMatches(site, payload.Ref) {
@@ -320,9 +366,20 @@ func (h *Handler) handleSiteWebhook(w http.ResponseWriter, r *http.Request) {
return
}
// Fire and forget — sync may take a while (git fetch + container rebuild).
// Cap concurrent syncs so a runaway CI cannot fan out unbounded
// git-clone goroutines.
select {
case h.siteSyncSem <- struct{}{}:
default:
respondWebhookError(w, http.StatusServiceUnavailable, "site sync queue full")
return
}
h.siteSyncWG.Add(1)
go func(siteID, siteName string) {
if err := h.sites.Deploy(context.Background(), siteID, false); err != nil {
defer h.siteSyncWG.Done()
defer func() { <-h.siteSyncSem }()
if err := h.sites.Deploy(h.siteSyncCtx, siteID, false); err != nil {
slog.Error("webhook: site sync failed", "site", siteName, "error", err)
}
}(site.ID, site.Name)