Files
alexei.dolgolyov 0c4c338bfe feat(apps): per-workload deploy history, rollback, and resource metrics
Two additions to the app detail page, each backed by a per-workload
endpoint.

Deploy history + rollback:
- New deploy_history table — a structured, version-pinned ledger of every
  dispatch (success AND failure), distinct from the free-text event_log.
  Recorded at the single DispatchPlugin choke point so every source kind
  is covered. The raw deploy error is never persisted (it can carry
  registry-auth / compose-stdout secrets) — only a generic marker, with
  detail going to slog. Pruned to the newest N per workload; cascade-
  deleted with the workload.
- GET /api/workloads/{id}/deploys lists the ledger; POST .../rollback
  (admin) replays a prior successful deploy's pinned reference as a
  rollback-reason dispatch. Phase 1 is image-source only (RollbackCapable);
  git-built sources need checkout-by-commit, a later phase.
- DeployHistoryPanel.svelte renders the ledger with confirm-gated rollback.

Per-workload metrics:
- ListContainerStatsSamplesByWorkload joins the existing container stats
  samples through the containers index; GET /api/workloads/{id}/stats/history
  aggregates CPU/memory per timestamp across the workload's containers.
- WorkloadMetricsPanel.svelte reuses ResourceChart (CPU% + memory MiB,
  windowed, 15s poll).

en/ru i18n added with parity. Tests: store CRUD + cascade + workload-scoped
join, deployer recording (incl. secret-non-leak on failure), API rollback
guards, and per-timestamp aggregation. Plans under docs/plans/.
2026-06-19 16:22:12 +03:00

205 lines
6.7 KiB
Go

package store
import (
"fmt"
)
// InsertContainerStatsSample appends a single container sample row.
func (s *Store) InsertContainerStatsSample(sample ContainerStatsSample) error {
_, err := s.db.Exec(
`INSERT INTO container_stats_samples (
container_id, owner_type, owner_id, ts,
cpu_percent, memory_usage, memory_limit,
network_rx, network_tx, block_read, block_write
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
sample.ContainerID, sample.OwnerType, sample.OwnerID, sample.TS,
sample.CPUPercent, sample.MemoryUsage, sample.MemoryLimit,
sample.NetworkRxBytes, sample.NetworkTxBytes,
sample.BlockReadBytes, sample.BlockWriteBytes,
)
if err != nil {
return fmt.Errorf("insert container stats sample: %w", err)
}
return nil
}
// InsertSystemStatsSample appends a single host-level sample row.
func (s *Store) InsertSystemStatsSample(sample SystemStatsSample) error {
_, err := s.db.Exec(
`INSERT INTO system_stats_samples (
ts, ncpu, memory_total,
workload_cpu_percent, workload_mem_usage,
containers_running, disk_total_bytes
) VALUES (?, ?, ?, ?, ?, ?, ?)`,
sample.TS, sample.NCPU, sample.MemoryTotal,
sample.WorkloadCPUPercent, sample.WorkloadMemUsage,
sample.ContainersRunning, sample.DiskTotalBytes,
)
if err != nil {
return fmt.Errorf("insert system stats sample: %w", err)
}
return nil
}
// ListContainerStatsSamples returns samples for the given owner since the
// given unix timestamp (inclusive), ordered by ts ascending.
func (s *Store) ListContainerStatsSamples(ownerType, ownerID string, sinceTS int64) ([]ContainerStatsSample, error) {
rows, err := s.db.Query(
`SELECT container_id, owner_type, owner_id, ts,
cpu_percent, memory_usage, memory_limit,
network_rx, network_tx, block_read, block_write
FROM container_stats_samples
WHERE owner_type = ? AND owner_id = ? AND ts >= ?
ORDER BY ts ASC`,
ownerType, ownerID, sinceTS,
)
if err != nil {
return nil, fmt.Errorf("list container stats samples: %w", err)
}
defer rows.Close()
var out []ContainerStatsSample
for rows.Next() {
var s ContainerStatsSample
if err := rows.Scan(
&s.ContainerID, &s.OwnerType, &s.OwnerID, &s.TS,
&s.CPUPercent, &s.MemoryUsage, &s.MemoryLimit,
&s.NetworkRxBytes, &s.NetworkTxBytes,
&s.BlockReadBytes, &s.BlockWriteBytes,
); err != nil {
return nil, fmt.Errorf("scan container stats sample: %w", err)
}
out = append(out, s)
}
return out, rows.Err()
}
// ListContainerStatsSamplesByWorkload returns every container sample owned by
// a workload since the given unix timestamp, ordered by ts ascending. Samples
// are linked to their workload through the containers index (owner_id is the
// container row id), so this joins through it. Powers the per-workload metrics
// graph on /apps/[id].
func (s *Store) ListContainerStatsSamplesByWorkload(workloadID string, sinceTS int64) ([]ContainerStatsSample, error) {
rows, err := s.db.Query(
`SELECT cs.container_id, cs.owner_type, cs.owner_id, cs.ts,
cs.cpu_percent, cs.memory_usage, cs.memory_limit,
cs.network_rx, cs.network_tx, cs.block_read, cs.block_write
FROM container_stats_samples cs
JOIN containers c ON c.id = cs.owner_id
WHERE c.workload_id = ? AND cs.ts >= ?
ORDER BY cs.ts ASC`,
workloadID, sinceTS,
)
if err != nil {
return nil, fmt.Errorf("list container stats samples by workload: %w", err)
}
defer rows.Close()
var out []ContainerStatsSample
for rows.Next() {
var s ContainerStatsSample
if err := rows.Scan(
&s.ContainerID, &s.OwnerType, &s.OwnerID, &s.TS,
&s.CPUPercent, &s.MemoryUsage, &s.MemoryLimit,
&s.NetworkRxBytes, &s.NetworkTxBytes,
&s.BlockReadBytes, &s.BlockWriteBytes,
); err != nil {
return nil, fmt.Errorf("scan container stats sample: %w", err)
}
out = append(out, s)
}
return out, rows.Err()
}
// ListAllRecentContainerStatsSamples returns samples across every owner since
// the given unix timestamp, ordered by ts ascending. Used by the system
// dashboard "top containers" widget where the UI wants a mixed pool.
func (s *Store) ListAllRecentContainerStatsSamples(sinceTS int64) ([]ContainerStatsSample, error) {
rows, err := s.db.Query(
`SELECT container_id, owner_type, owner_id, ts,
cpu_percent, memory_usage, memory_limit,
network_rx, network_tx, block_read, block_write
FROM container_stats_samples
WHERE ts >= ?
ORDER BY ts ASC`,
sinceTS,
)
if err != nil {
return nil, fmt.Errorf("list all recent container stats samples: %w", err)
}
defer rows.Close()
var out []ContainerStatsSample
for rows.Next() {
var s ContainerStatsSample
if err := rows.Scan(
&s.ContainerID, &s.OwnerType, &s.OwnerID, &s.TS,
&s.CPUPercent, &s.MemoryUsage, &s.MemoryLimit,
&s.NetworkRxBytes, &s.NetworkTxBytes,
&s.BlockReadBytes, &s.BlockWriteBytes,
); err != nil {
return nil, fmt.Errorf("scan container stats sample: %w", err)
}
out = append(out, s)
}
return out, rows.Err()
}
// ListSystemStatsSamples returns host samples since the given unix timestamp.
func (s *Store) ListSystemStatsSamples(sinceTS int64) ([]SystemStatsSample, error) {
rows, err := s.db.Query(
`SELECT ts, ncpu, memory_total,
workload_cpu_percent, workload_mem_usage,
containers_running, disk_total_bytes
FROM system_stats_samples
WHERE ts >= ?
ORDER BY ts ASC`,
sinceTS,
)
if err != nil {
return nil, fmt.Errorf("list system stats samples: %w", err)
}
defer rows.Close()
var out []SystemStatsSample
for rows.Next() {
var s SystemStatsSample
if err := rows.Scan(
&s.TS, &s.NCPU, &s.MemoryTotal,
&s.WorkloadCPUPercent, &s.WorkloadMemUsage,
&s.ContainersRunning, &s.DiskTotalBytes,
); err != nil {
return nil, fmt.Errorf("scan system stats sample: %w", err)
}
out = append(out, s)
}
return out, rows.Err()
}
// PruneStatsSamplesBefore deletes all samples older than the given unix
// timestamp from both the container and system stats tables in a single
// transaction so a crash between the two cannot leave one table pruned and
// the other not. Returns rows deleted across both tables.
func (s *Store) PruneStatsSamplesBefore(ts int64) (int64, error) {
tx, err := s.db.Begin()
if err != nil {
return 0, fmt.Errorf("begin prune tx: %w", err)
}
defer tx.Rollback()
r1, err := tx.Exec(`DELETE FROM container_stats_samples WHERE ts < ?`, ts)
if err != nil {
return 0, fmt.Errorf("prune container stats samples: %w", err)
}
r2, err := tx.Exec(`DELETE FROM system_stats_samples WHERE ts < ?`, ts)
if err != nil {
return 0, fmt.Errorf("prune system stats samples: %w", err)
}
if err := tx.Commit(); err != nil {
return 0, fmt.Errorf("commit prune tx: %w", err)
}
n1, _ := r1.RowsAffected()
n2, _ := r2.RowsAffected()
return n1 + n2, nil
}