Files
tiny-forge/internal/store/stats_samples.go
T
alexei.dolgolyov a4362b842d
Build / build (push) Successful in 11m42s
fix: harden security, fix concurrency bugs, and address review findings
Security:
- rate limit /api/webhook routes per-IP and cap concurrent site syncs
- global SSE connection cap (256) with new sse_gate
- validate ?tail= and cap JSON log responses at 4 MiB
- strip ANSI/CSI/OSC and control bytes from streamed log lines
- redact webhook secret from request log middleware
- scrub host details from /api/health for non-admin viewers
- drop container_id from /api/system/stats/top for non-admins
- generate webhook secrets via crypto/rand; require >=32 chars on insert
- verify iid path consistency in streamContainerLogs
- LimitReader on site webhook body; reject malformed non-empty bodies

Concurrency / correctness:
- stats collector: Stop() no longer hangs without Start(), semaphore
  acquired in parent loop so ctx cancellation short-circuits the queue,
  in-flight tick cancellable via shared base context, zero-ts guard
- webhook handler: replace fire-and-forget goroutine with WaitGroup-tracked
  workers + Drain() wired into graceful shutdown
- $derived(() => ...) mis-idiom fixed in ContainerStats / InstanceCard /
  ProjectCard (returned function instead of value)
- SystemResourcesCard: rename `window` and `t` locals to avoid shadowing
  globalThis.window and the i18n `t` import

Quality / performance:
- replace O(n^2) insertion sort with sort.Slice in stats top
- runMigrations only swallows duplicate-column / already-exists errors
- PruneStatsSamplesBefore wrapped in a transaction
- collapse N+1 in unusedImageStats / pruneImages to one ListAllInstances
  pass; surface DB errors instead of silently treating them as inactive
- run Docker Info + DiskUsage in parallel via errgroup
- container log SSE emits `: ping` heartbeat every 20 s
- imageMatches case-insensitive on registry host (RFC behaviour)
- log warning on invalid stage tag pattern instead of silent skip
- reject malformed non-empty site webhook payloads

Frontend / i18n:
- shared formatBytes utility replaces three local copies
- statsInterval store drives dynamic "no samples / collection disabled"
  copy across ContainerStats and SystemResourcesCard
- top consumers row now shows owner_name (project/stage or site name)
- drop seven `as any` casts on the Settings type; add cloudflare_api_token
  write-only field
- move "Service status", "Docker daemon", "Docker unreachable",
  "Proxy unreachable", "reachable", and "Docker daemon is not reachable."
  strings into en/ru i18n bundles
2026-05-07 00:56:14 +03:00

168 lines
5.3 KiB
Go

package store
import (
"fmt"
)
// InsertContainerStatsSample appends a single container sample row.
func (s *Store) InsertContainerStatsSample(sample ContainerStatsSample) error {
_, err := s.db.Exec(
`INSERT INTO container_stats_samples (
container_id, owner_type, owner_id, ts,
cpu_percent, memory_usage, memory_limit,
network_rx, network_tx, block_read, block_write
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
sample.ContainerID, sample.OwnerType, sample.OwnerID, sample.TS,
sample.CPUPercent, sample.MemoryUsage, sample.MemoryLimit,
sample.NetworkRxBytes, sample.NetworkTxBytes,
sample.BlockReadBytes, sample.BlockWriteBytes,
)
if err != nil {
return fmt.Errorf("insert container stats sample: %w", err)
}
return nil
}
// InsertSystemStatsSample appends a single host-level sample row.
func (s *Store) InsertSystemStatsSample(sample SystemStatsSample) error {
_, err := s.db.Exec(
`INSERT INTO system_stats_samples (
ts, ncpu, memory_total,
workload_cpu_percent, workload_mem_usage,
containers_running, disk_total_bytes
) VALUES (?, ?, ?, ?, ?, ?, ?)`,
sample.TS, sample.NCPU, sample.MemoryTotal,
sample.WorkloadCPUPercent, sample.WorkloadMemUsage,
sample.ContainersRunning, sample.DiskTotalBytes,
)
if err != nil {
return fmt.Errorf("insert system stats sample: %w", err)
}
return nil
}
// ListContainerStatsSamples returns samples for the given owner since the
// given unix timestamp (inclusive), ordered by ts ascending.
func (s *Store) ListContainerStatsSamples(ownerType, ownerID string, sinceTS int64) ([]ContainerStatsSample, error) {
rows, err := s.db.Query(
`SELECT container_id, owner_type, owner_id, ts,
cpu_percent, memory_usage, memory_limit,
network_rx, network_tx, block_read, block_write
FROM container_stats_samples
WHERE owner_type = ? AND owner_id = ? AND ts >= ?
ORDER BY ts ASC`,
ownerType, ownerID, sinceTS,
)
if err != nil {
return nil, fmt.Errorf("list container stats samples: %w", err)
}
defer rows.Close()
var out []ContainerStatsSample
for rows.Next() {
var s ContainerStatsSample
if err := rows.Scan(
&s.ContainerID, &s.OwnerType, &s.OwnerID, &s.TS,
&s.CPUPercent, &s.MemoryUsage, &s.MemoryLimit,
&s.NetworkRxBytes, &s.NetworkTxBytes,
&s.BlockReadBytes, &s.BlockWriteBytes,
); err != nil {
return nil, fmt.Errorf("scan container stats sample: %w", err)
}
out = append(out, s)
}
return out, rows.Err()
}
// ListAllRecentContainerStatsSamples returns samples across every owner since
// the given unix timestamp, ordered by ts ascending. Used by the system
// dashboard "top containers" widget where the UI wants a mixed pool.
func (s *Store) ListAllRecentContainerStatsSamples(sinceTS int64) ([]ContainerStatsSample, error) {
rows, err := s.db.Query(
`SELECT container_id, owner_type, owner_id, ts,
cpu_percent, memory_usage, memory_limit,
network_rx, network_tx, block_read, block_write
FROM container_stats_samples
WHERE ts >= ?
ORDER BY ts ASC`,
sinceTS,
)
if err != nil {
return nil, fmt.Errorf("list all recent container stats samples: %w", err)
}
defer rows.Close()
var out []ContainerStatsSample
for rows.Next() {
var s ContainerStatsSample
if err := rows.Scan(
&s.ContainerID, &s.OwnerType, &s.OwnerID, &s.TS,
&s.CPUPercent, &s.MemoryUsage, &s.MemoryLimit,
&s.NetworkRxBytes, &s.NetworkTxBytes,
&s.BlockReadBytes, &s.BlockWriteBytes,
); err != nil {
return nil, fmt.Errorf("scan container stats sample: %w", err)
}
out = append(out, s)
}
return out, rows.Err()
}
// ListSystemStatsSamples returns host samples since the given unix timestamp.
func (s *Store) ListSystemStatsSamples(sinceTS int64) ([]SystemStatsSample, error) {
rows, err := s.db.Query(
`SELECT ts, ncpu, memory_total,
workload_cpu_percent, workload_mem_usage,
containers_running, disk_total_bytes
FROM system_stats_samples
WHERE ts >= ?
ORDER BY ts ASC`,
sinceTS,
)
if err != nil {
return nil, fmt.Errorf("list system stats samples: %w", err)
}
defer rows.Close()
var out []SystemStatsSample
for rows.Next() {
var s SystemStatsSample
if err := rows.Scan(
&s.TS, &s.NCPU, &s.MemoryTotal,
&s.WorkloadCPUPercent, &s.WorkloadMemUsage,
&s.ContainersRunning, &s.DiskTotalBytes,
); err != nil {
return nil, fmt.Errorf("scan system stats sample: %w", err)
}
out = append(out, s)
}
return out, rows.Err()
}
// PruneStatsSamplesBefore deletes all samples older than the given unix
// timestamp from both the container and system stats tables in a single
// transaction so a crash between the two cannot leave one table pruned and
// the other not. Returns rows deleted across both tables.
func (s *Store) PruneStatsSamplesBefore(ts int64) (int64, error) {
tx, err := s.db.Begin()
if err != nil {
return 0, fmt.Errorf("begin prune tx: %w", err)
}
defer tx.Rollback()
r1, err := tx.Exec(`DELETE FROM container_stats_samples WHERE ts < ?`, ts)
if err != nil {
return 0, fmt.Errorf("prune container stats samples: %w", err)
}
r2, err := tx.Exec(`DELETE FROM system_stats_samples WHERE ts < ?`, ts)
if err != nil {
return 0, fmt.Errorf("prune system stats samples: %w", err)
}
if err := tx.Commit(); err != nil {
return 0, fmt.Errorf("commit prune tx: %w", err)
}
n1, _ := r1.RowsAffected()
n2, _ := r2.RowsAffected()
return n1 + n2, nil
}