fix: harden security, fix concurrency bugs, and address review findings
Build / build (push) Successful in 11m42s
Build / build (push) Successful in 11m42s
Security: - rate limit /api/webhook routes per-IP and cap concurrent site syncs - global SSE connection cap (256) with new sse_gate - validate ?tail= and cap JSON log responses at 4 MiB - strip ANSI/CSI/OSC and control bytes from streamed log lines - redact webhook secret from request log middleware - scrub host details from /api/health for non-admin viewers - drop container_id from /api/system/stats/top for non-admins - generate webhook secrets via crypto/rand; require >=32 chars on insert - verify iid path consistency in streamContainerLogs - LimitReader on site webhook body; reject malformed non-empty bodies Concurrency / correctness: - stats collector: Stop() no longer hangs without Start(), semaphore acquired in parent loop so ctx cancellation short-circuits the queue, in-flight tick cancellable via shared base context, zero-ts guard - webhook handler: replace fire-and-forget goroutine with WaitGroup-tracked workers + Drain() wired into graceful shutdown - $derived(() => ...) mis-idiom fixed in ContainerStats / InstanceCard / ProjectCard (returned function instead of value) - SystemResourcesCard: rename `window` and `t` locals to avoid shadowing globalThis.window and the i18n `t` import Quality / performance: - replace O(n^2) insertion sort with sort.Slice in stats top - runMigrations only swallows duplicate-column / already-exists errors - PruneStatsSamplesBefore wrapped in a transaction - collapse N+1 in unusedImageStats / pruneImages to one ListAllInstances pass; surface DB errors instead of silently treating them as inactive - run Docker Info + DiskUsage in parallel via errgroup - container log SSE emits `: ping` heartbeat every 20 s - imageMatches case-insensitive on registry host (RFC behaviour) - log warning on invalid stage tag pattern instead of silent skip - reject malformed non-empty site webhook payloads Frontend / i18n: - shared formatBytes utility replaces three local copies - statsInterval store drives dynamic "no samples / collection disabled" copy across ContainerStats and SystemResourcesCard - top consumers row now shows owner_name (project/stage or site name) - drop seven `as any` casts on the Settings type; add cloudflare_api_token write-only field - move "Service status", "Docker daemon", "Docker unreachable", "Proxy unreachable", "reachable", and "Docker daemon is not reachable." strings into en/ru i18n bundles
This commit is contained in:
+46
-23
@@ -3,9 +3,11 @@ package docker
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"github.com/moby/moby/client"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
// SystemStats is a host-level snapshot combining daemon capacity
|
||||
@@ -42,33 +44,54 @@ type SystemStats struct {
|
||||
DiskTotalBytes int64 `json:"disk_total_bytes"`
|
||||
}
|
||||
|
||||
// GetSystemStats returns a one-shot host-level snapshot. The Info() call
|
||||
// and disk usage call are made in sequence. Disk usage failures do not
|
||||
// fail the whole call — the result degrades gracefully with zero disk fields.
|
||||
// GetSystemStats returns a one-shot host-level snapshot. Info and DiskUsage
|
||||
// are issued in parallel because DiskUsage walks every layer/volume and is
|
||||
// often the slowest call on a busy host (1-3 s); Info typically completes in
|
||||
// ~10 ms. Disk usage failures do not fail the whole call — the result
|
||||
// degrades gracefully with zero disk fields and a warning log.
|
||||
func (c *Client) GetSystemStats(ctx context.Context) (SystemStats, error) {
|
||||
info, err := c.Info(ctx)
|
||||
if err != nil {
|
||||
return SystemStats{}, fmt.Errorf("system stats: %w", err)
|
||||
}
|
||||
stats := SystemStats{Timestamp: time.Now().UTC()}
|
||||
|
||||
stats := SystemStats{
|
||||
Timestamp: time.Now().UTC(),
|
||||
NCPU: info.NCPU,
|
||||
MemoryTotal: info.MemoryTotal,
|
||||
Containers: info.Containers,
|
||||
Running: info.Running,
|
||||
Paused: info.Paused,
|
||||
Stopped: info.Stopped,
|
||||
Images: info.Images,
|
||||
}
|
||||
g, gctx := errgroup.WithContext(ctx)
|
||||
|
||||
du, derr := c.api.DiskUsage(ctx, client.DiskUsageOptions{
|
||||
Containers: true,
|
||||
Images: true,
|
||||
Volumes: true,
|
||||
BuildCache: true,
|
||||
g.Go(func() error {
|
||||
info, err := c.Info(gctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("system stats info: %w", err)
|
||||
}
|
||||
stats.NCPU = info.NCPU
|
||||
stats.MemoryTotal = info.MemoryTotal
|
||||
stats.Containers = info.Containers
|
||||
stats.Running = info.Running
|
||||
stats.Paused = info.Paused
|
||||
stats.Stopped = info.Stopped
|
||||
stats.Images = info.Images
|
||||
return nil
|
||||
})
|
||||
if derr == nil {
|
||||
|
||||
var du *client.DiskUsageResult
|
||||
g.Go(func() error {
|
||||
usage, err := c.api.DiskUsage(gctx, client.DiskUsageOptions{
|
||||
Containers: true,
|
||||
Images: true,
|
||||
Volumes: true,
|
||||
BuildCache: true,
|
||||
})
|
||||
if err != nil {
|
||||
// Disk usage is best-effort; swallow but log so the dashboard
|
||||
// shows zeroed disk fields rather than failing entirely.
|
||||
slog.Warn("system stats: disk usage failed", "error", err)
|
||||
return nil
|
||||
}
|
||||
du = &usage
|
||||
return nil
|
||||
})
|
||||
|
||||
if err := g.Wait(); err != nil {
|
||||
return SystemStats{}, err
|
||||
}
|
||||
|
||||
if du != nil {
|
||||
stats.DiskImagesBytes = du.Images.TotalSize
|
||||
stats.DiskContainersBytes = du.Containers.TotalSize
|
||||
stats.DiskVolumesBytes = du.Volumes.TotalSize
|
||||
|
||||
Reference in New Issue
Block a user