feat(backup): bundle assets in ZIP + partial-write hardening + restart log
Auto-backups now produce a ZIP containing ledgrab.db plus every file in the assets dir under assets/ — matching the manual GET /api/v1/system/backup format, so restore accepts either output interchangeably. Legacy .db backups remain listable, restorable, and prunable; both extensions count toward max_backups. Writes stage to <name>.partial then os.replace into place — a crash mid-ZIP never leaves a half-written backup that masquerades as valid. Stale .partials from prior crashes are swept on the next run. Symlinks inside the assets dir are skipped so a hostile link can't slurp a target outside the dir into every backup. Backups larger than 500 MB log a warning so operators notice unbounded asset growth before disk fills up. restart.py: redirect the spawned restart script's stdout/stderr to restart.log and bail out early if the script is missing — silent failures (PowerShell off PATH, restart.ps1 erroring) used to vanish into a detached child with no diagnostic trail. Tests cover happy path, asset bytes round-trip, partial cleanup, None/missing assets_dir, failure rollback, stale-partial sweep, symlink rejection, mixed legacy+new listing, and cross-format prune.
This commit is contained in:
@@ -11,6 +11,7 @@ import sys
|
||||
import threading
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
|
||||
from fastapi.responses import StreamingResponse
|
||||
@@ -38,28 +39,59 @@ _SERVER_DIR = Path(__file__).resolve().parents[4]
|
||||
|
||||
|
||||
def _schedule_restart() -> None:
|
||||
"""Spawn a restart script after a short delay so the HTTP response completes."""
|
||||
"""Spawn a restart script after a short delay so the HTTP response completes.
|
||||
|
||||
def _restart():
|
||||
stdout/stderr of the spawned script are redirected to ``<server>/restart.log``
|
||||
so a silent failure (PowerShell not on PATH, restart.ps1 erroring, etc.)
|
||||
leaves evidence on disk instead of vanishing into a detached child.
|
||||
"""
|
||||
|
||||
def _restart() -> None:
|
||||
import time
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# Annotated as ``dict[str, Any]`` because the value union spans
|
||||
# int flags (Windows ``creationflags``) and bool (POSIX
|
||||
# ``start_new_session``); a narrower union confuses ``**`` unpacking.
|
||||
popen_kwargs: dict[str, Any]
|
||||
if sys.platform == "win32":
|
||||
subprocess.Popen(
|
||||
[
|
||||
"powershell",
|
||||
"-ExecutionPolicy",
|
||||
"Bypass",
|
||||
"-File",
|
||||
str(_SERVER_DIR / "restart.ps1"),
|
||||
],
|
||||
creationflags=subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP,
|
||||
)
|
||||
script = _SERVER_DIR / "restart.ps1"
|
||||
cmd = ["powershell", "-ExecutionPolicy", "Bypass", "-File", str(script)]
|
||||
popen_kwargs = {
|
||||
"creationflags": (
|
||||
subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
),
|
||||
}
|
||||
else:
|
||||
subprocess.Popen(
|
||||
["bash", str(_SERVER_DIR / "restart.sh")],
|
||||
start_new_session=True,
|
||||
)
|
||||
script = _SERVER_DIR / "restart.sh"
|
||||
cmd = ["bash", str(script)]
|
||||
popen_kwargs = {"start_new_session": True}
|
||||
|
||||
if not script.is_file():
|
||||
logger.error("Restart script missing: %s", script)
|
||||
return
|
||||
|
||||
log_path = _SERVER_DIR / "restart.log"
|
||||
try:
|
||||
# Open in append mode so multiple restarts accumulate; the child
|
||||
# owns its own duped handle, so closing here in the parent is safe.
|
||||
with open(log_path, "ab") as log_file:
|
||||
log_file.write(
|
||||
f"\n--- restart spawned at {time.strftime('%Y-%m-%d %H:%M:%S')} ---\n".encode()
|
||||
)
|
||||
log_file.flush()
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=log_file,
|
||||
stderr=subprocess.STDOUT,
|
||||
**popen_kwargs,
|
||||
)
|
||||
logger.info("Restart script launched: %s (PID %s, log %s)", cmd[0], proc.pid, log_path)
|
||||
except OSError as e:
|
||||
logger.error("Failed to launch restart script %s: %s", script, e, exc_info=True)
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error launching restart script: %s", e, exc_info=True)
|
||||
|
||||
threading.Thread(target=_restart, daemon=True).start()
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
"""Auto-backup engine — periodic SQLite snapshot backups."""
|
||||
"""Auto-backup engine — periodic SQLite + assets snapshot backups."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import Iterable, List
|
||||
|
||||
from ledgrab.storage.database import Database
|
||||
from ledgrab.utils import get_logger
|
||||
@@ -20,19 +22,35 @@ DEFAULT_SETTINGS = {
|
||||
# Skip the immediate-on-start backup if a recent backup exists within this window.
|
||||
_STARTUP_BACKUP_COOLDOWN = timedelta(minutes=5)
|
||||
|
||||
_BACKUP_EXT = ".db"
|
||||
# Current write format. ``.db`` is still recognised on read so backups taken
|
||||
# by older versions remain listable, restorable, and prunable.
|
||||
_BACKUP_EXT = ".zip"
|
||||
_RECOGNISED_EXTS: tuple[str, ...] = (".zip", ".db")
|
||||
|
||||
# Soft warning threshold — large backups indicate an unbounded assets dir or
|
||||
# bloated DB. We don't refuse to write (user data is theirs), but log loudly
|
||||
# so the operator can investigate before disk fills up over many intervals.
|
||||
_BACKUP_SIZE_WARN_BYTES = 500 * 1024 * 1024 # 500 MB
|
||||
|
||||
|
||||
class AutoBackupEngine:
|
||||
"""Creates periodic SQLite snapshot backups of the database."""
|
||||
"""Creates periodic backups of the database and asset files.
|
||||
|
||||
Each backup is a ZIP archive containing ``ledgrab.db`` plus every file
|
||||
from ``assets_dir`` under ``assets/`` — matching the format produced by
|
||||
the manual ``GET /api/v1/system/backup`` download. The restore endpoint
|
||||
accepts either ``.zip`` or ``.db`` interchangeably.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
backup_dir: Path,
|
||||
db: Database,
|
||||
assets_dir: Path | None = None,
|
||||
):
|
||||
self._backup_dir = Path(backup_dir)
|
||||
self._db = db
|
||||
self._assets_dir = Path(assets_dir) if assets_dir else None
|
||||
self._task: asyncio.Task | None = None
|
||||
self._last_backup_time: datetime | None = None
|
||||
|
||||
@@ -82,9 +100,14 @@ class AutoBackupEngine:
|
||||
self._task.cancel()
|
||||
self._task = None
|
||||
|
||||
def _iter_backup_files(self) -> Iterable[Path]:
|
||||
"""Yield every backup file (both legacy ``.db`` and current ``.zip``)."""
|
||||
for ext in _RECOGNISED_EXTS:
|
||||
yield from self._backup_dir.glob(f"*{ext}")
|
||||
|
||||
def _most_recent_backup_age(self) -> timedelta | None:
|
||||
"""Return the age of the newest backup file, or None if no backups exist."""
|
||||
files = list(self._backup_dir.glob(f"*{_BACKUP_EXT}"))
|
||||
files = list(self._iter_backup_files())
|
||||
if not files:
|
||||
return None
|
||||
newest = max(files, key=lambda p: p.stat().st_mtime)
|
||||
@@ -124,15 +147,72 @@ class AutoBackupEngine:
|
||||
timestamp = now.strftime("%Y-%m-%dT%H%M%S")
|
||||
filename = f"ledgrab-backup-{timestamp}{_BACKUP_EXT}"
|
||||
file_path = self._backup_dir / filename
|
||||
# Stage the ZIP at <name>.partial then os.replace into place once it's
|
||||
# fully written. A crash mid-write leaves a .partial file (cleaned up
|
||||
# on the next backup) but never a half-written backup that would fool
|
||||
# ``_most_recent_backup_age`` / ``_prune_old_backups`` into trusting
|
||||
# corrupt data.
|
||||
partial_path = file_path.with_suffix(file_path.suffix + ".partial")
|
||||
|
||||
self._db.backup_to(file_path)
|
||||
# SQLite backup API → temp .db so we get a consistent snapshot
|
||||
# without holding the DB lock for the ZIP write.
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
|
||||
tmp_path = Path(tmp.name)
|
||||
tmp.close()
|
||||
asset_count = 0
|
||||
try:
|
||||
self._db.backup_to(tmp_path)
|
||||
with zipfile.ZipFile(partial_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.write(tmp_path, "ledgrab.db")
|
||||
if self._assets_dir and self._assets_dir.is_dir():
|
||||
for asset_file in self._assets_dir.iterdir():
|
||||
# Skip symlinks: ``is_file()`` follows them and we
|
||||
# don't want to silently slurp a symlink target that
|
||||
# lives outside the assets dir into every backup.
|
||||
if asset_file.is_symlink():
|
||||
continue
|
||||
if asset_file.is_file():
|
||||
zf.write(asset_file, f"assets/{asset_file.name}")
|
||||
asset_count += 1
|
||||
os.replace(partial_path, file_path)
|
||||
except Exception:
|
||||
# Roll back the staged partial so it doesn't accumulate; the
|
||||
# finally block still removes the SQLite temp file. Re-raise so
|
||||
# the caller (``_backup_loop`` / ``trigger_backup``) sees + logs
|
||||
# the failure instead of silently emitting a missing backup.
|
||||
partial_path.unlink(missing_ok=True)
|
||||
raise
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
# Best-effort sweep of any older orphan .partial files left by a
|
||||
# crash on a previous run.
|
||||
for stale in self._backup_dir.glob("*.partial"):
|
||||
try:
|
||||
stale.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
size_bytes = file_path.stat().st_size
|
||||
self._last_backup_time = now
|
||||
logger.info(f"Backup created: {filename}")
|
||||
logger.info(
|
||||
"Backup created: %s (%d asset files, %.1f MB)",
|
||||
filename,
|
||||
asset_count,
|
||||
size_bytes / (1024 * 1024),
|
||||
)
|
||||
if size_bytes > _BACKUP_SIZE_WARN_BYTES:
|
||||
logger.warning(
|
||||
"Backup %s is %.1f MB — exceeds %d MB warning threshold; "
|
||||
"consider pruning the assets directory or lowering max_backups",
|
||||
filename,
|
||||
size_bytes / (1024 * 1024),
|
||||
_BACKUP_SIZE_WARN_BYTES // (1024 * 1024),
|
||||
)
|
||||
|
||||
def _prune_old_backups(self) -> None:
|
||||
max_backups = self._settings["max_backups"]
|
||||
files = sorted(self._backup_dir.glob(f"*{_BACKUP_EXT}"), key=lambda p: p.stat().st_mtime)
|
||||
files = sorted(self._iter_backup_files(), key=lambda p: p.stat().st_mtime)
|
||||
excess = len(files) - max_backups
|
||||
if excess > 0:
|
||||
for f in files[:excess]:
|
||||
@@ -179,9 +259,7 @@ class AutoBackupEngine:
|
||||
|
||||
def list_backups(self) -> List[dict]:
|
||||
backups = []
|
||||
for f in sorted(
|
||||
self._backup_dir.glob(f"*{_BACKUP_EXT}"), key=lambda p: p.stat().st_mtime, reverse=True
|
||||
):
|
||||
for f in sorted(self._iter_backup_files(), key=lambda p: p.stat().st_mtime, reverse=True):
|
||||
stat = f.stat()
|
||||
backups.append(
|
||||
{
|
||||
|
||||
@@ -283,6 +283,7 @@ async def lifespan(app: FastAPI):
|
||||
auto_backup_engine = AutoBackupEngine(
|
||||
backup_dir=_data_dir / "backups",
|
||||
db=db,
|
||||
assets_dir=Path(config.assets.assets_dir),
|
||||
)
|
||||
|
||||
# Create update service (checks for new releases)
|
||||
|
||||
@@ -0,0 +1,210 @@
|
||||
"""Regression coverage for :class:`AutoBackupEngine` ZIP format.
|
||||
|
||||
Pins behavior added when auto-backups switched from raw ``.db`` snapshots to
|
||||
``.zip`` archives (DB + assets), and the partial-write hardening that goes
|
||||
with it (stage at ``<name>.partial`` then ``os.replace`` so a crash mid-write
|
||||
never leaves a corrupt file masquerading as a valid backup).
|
||||
"""
|
||||
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from ledgrab.core.backup.auto_backup import AutoBackupEngine
|
||||
from ledgrab.storage.database import Database
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def engine_with_assets(tmp_path):
|
||||
"""Engine wired to a small DB + assets dir with two sample files."""
|
||||
db_path = tmp_path / "ledgrab.db"
|
||||
assets_dir = tmp_path / "assets"
|
||||
assets_dir.mkdir()
|
||||
(assets_dir / "alert.wav").write_bytes(b"WAV_PAYLOAD_1")
|
||||
(assets_dir / "ping.wav").write_bytes(b"WAV_PAYLOAD_2")
|
||||
|
||||
db = Database(db_path)
|
||||
db.upsert("devices", "dev_1", "Living Room", {"id": "dev_1", "type": "mock"})
|
||||
|
||||
backup_dir = tmp_path / "backups"
|
||||
engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=assets_dir)
|
||||
yield engine
|
||||
db.close()
|
||||
|
||||
|
||||
def _only_backup(backup_dir: Path) -> Path:
|
||||
"""Return the single .zip backup in ``backup_dir``; assert exactly one."""
|
||||
zips = sorted(backup_dir.glob("*.zip"))
|
||||
assert len(zips) == 1, f"expected exactly one .zip, found {[p.name for p in zips]}"
|
||||
return zips[0]
|
||||
|
||||
|
||||
def test_backup_bundles_db_and_assets(engine_with_assets):
|
||||
"""Happy path: ZIP contains ``ledgrab.db`` plus every assets file."""
|
||||
engine_with_assets._perform_backup_sync()
|
||||
|
||||
backup = _only_backup(engine_with_assets._backup_dir)
|
||||
with zipfile.ZipFile(backup) as zf:
|
||||
names = sorted(zf.namelist())
|
||||
assert names == ["assets/alert.wav", "assets/ping.wav", "ledgrab.db"]
|
||||
|
||||
|
||||
def test_backup_preserves_asset_bytes(engine_with_assets):
|
||||
"""The asset binary inside the ZIP matches the source byte-for-byte."""
|
||||
engine_with_assets._perform_backup_sync()
|
||||
|
||||
backup = _only_backup(engine_with_assets._backup_dir)
|
||||
with zipfile.ZipFile(backup) as zf:
|
||||
assert zf.read("assets/alert.wav") == b"WAV_PAYLOAD_1"
|
||||
assert zf.read("assets/ping.wav") == b"WAV_PAYLOAD_2"
|
||||
|
||||
|
||||
def test_backup_no_partial_file_left_on_success(engine_with_assets):
|
||||
"""After a successful backup, the staging ``.partial`` is renamed away."""
|
||||
engine_with_assets._perform_backup_sync()
|
||||
|
||||
leftovers = list(engine_with_assets._backup_dir.glob("*.partial"))
|
||||
assert leftovers == []
|
||||
|
||||
|
||||
def test_backup_skips_assets_when_dir_none(tmp_path):
|
||||
"""``assets_dir=None`` produces a DB-only ZIP, no error."""
|
||||
db_path = tmp_path / "ledgrab.db"
|
||||
db = Database(db_path)
|
||||
db.upsert("devices", "dev_1", "A", {"id": "dev_1"})
|
||||
backup_dir = tmp_path / "backups"
|
||||
try:
|
||||
engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=None)
|
||||
engine._perform_backup_sync()
|
||||
|
||||
backup = _only_backup(backup_dir)
|
||||
with zipfile.ZipFile(backup) as zf:
|
||||
assert zf.namelist() == ["ledgrab.db"]
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def test_backup_skips_assets_when_dir_missing(tmp_path):
|
||||
"""A non-existent ``assets_dir`` is silently skipped, not an error."""
|
||||
db_path = tmp_path / "ledgrab.db"
|
||||
db = Database(db_path)
|
||||
backup_dir = tmp_path / "backups"
|
||||
try:
|
||||
engine = AutoBackupEngine(
|
||||
backup_dir=backup_dir, db=db, assets_dir=tmp_path / "does-not-exist"
|
||||
)
|
||||
engine._perform_backup_sync()
|
||||
|
||||
backup = _only_backup(backup_dir)
|
||||
with zipfile.ZipFile(backup) as zf:
|
||||
assert zf.namelist() == ["ledgrab.db"]
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def test_backup_failure_leaves_no_zip_or_partial(engine_with_assets):
|
||||
"""When ``db.backup_to`` raises, neither the final .zip nor the .partial
|
||||
survives. The exception propagates so the caller can log it.
|
||||
"""
|
||||
with patch.object(engine_with_assets._db, "backup_to", side_effect=RuntimeError("boom")):
|
||||
with pytest.raises(RuntimeError, match="boom"):
|
||||
engine_with_assets._perform_backup_sync()
|
||||
|
||||
assert list(engine_with_assets._backup_dir.glob("*.zip")) == []
|
||||
assert list(engine_with_assets._backup_dir.glob("*.partial")) == []
|
||||
|
||||
|
||||
def test_backup_cleans_stale_partial_from_previous_crash(engine_with_assets):
|
||||
"""A leftover ``.partial`` from a prior crash is swept on the next run."""
|
||||
stale = engine_with_assets._backup_dir
|
||||
stale.mkdir(parents=True, exist_ok=True)
|
||||
(stale / "ledgrab-backup-2025-01-01T000000.zip.partial").write_bytes(b"corrupt")
|
||||
|
||||
engine_with_assets._perform_backup_sync()
|
||||
|
||||
assert list(stale.glob("*.partial")) == []
|
||||
# The successful backup is also present.
|
||||
assert len(list(stale.glob("*.zip"))) == 1
|
||||
|
||||
|
||||
def test_backup_skips_symlinks_in_assets_dir(tmp_path):
|
||||
"""Symlinked files in ``assets_dir`` are not bundled (security guard)."""
|
||||
db_path = tmp_path / "ledgrab.db"
|
||||
assets_dir = tmp_path / "assets"
|
||||
assets_dir.mkdir()
|
||||
(assets_dir / "real.wav").write_bytes(b"REAL")
|
||||
|
||||
# Some test environments (e.g. unprivileged Windows) can't create
|
||||
# symlinks; skip rather than fail spuriously when that's the case.
|
||||
secret_target = tmp_path / "secret.bin"
|
||||
secret_target.write_bytes(b"PRIVATE")
|
||||
link_path = assets_dir / "linked.wav"
|
||||
try:
|
||||
link_path.symlink_to(secret_target)
|
||||
except (OSError, NotImplementedError):
|
||||
pytest.skip("symlinks not supported in this environment")
|
||||
|
||||
db = Database(db_path)
|
||||
try:
|
||||
backup_dir = tmp_path / "backups"
|
||||
engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=assets_dir)
|
||||
engine._perform_backup_sync()
|
||||
|
||||
backup = _only_backup(backup_dir)
|
||||
with zipfile.ZipFile(backup) as zf:
|
||||
names = zf.namelist()
|
||||
assert "assets/real.wav" in names
|
||||
assert "assets/linked.wav" not in names
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def test_list_backups_unions_legacy_db_and_new_zip(engine_with_assets):
|
||||
"""``list_backups`` returns both legacy ``.db`` and current ``.zip`` files
|
||||
so users can still restore from auto-backups written by older versions.
|
||||
"""
|
||||
backup_dir = engine_with_assets._backup_dir
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
(backup_dir / "ledgrab-backup-2024-12-31T000000.db").write_bytes(b"legacy")
|
||||
|
||||
engine_with_assets._perform_backup_sync()
|
||||
|
||||
names = {b["filename"] for b in engine_with_assets.list_backups()}
|
||||
assert any(n.endswith(".db") for n in names)
|
||||
assert any(n.endswith(".zip") for n in names)
|
||||
|
||||
|
||||
def test_prune_honors_max_across_formats(tmp_path):
|
||||
"""``_prune_old_backups`` enforces ``max_backups`` across both extensions."""
|
||||
db_path = tmp_path / "ledgrab.db"
|
||||
db = Database(db_path)
|
||||
backup_dir = tmp_path / "backups"
|
||||
backup_dir.mkdir()
|
||||
|
||||
# Two legacy .db files (older), two .zip files (newer) — max=3 should
|
||||
# prune the single oldest .db and keep the rest.
|
||||
import time
|
||||
|
||||
(backup_dir / "ledgrab-backup-2024-01-01T000000.db").write_bytes(b"a")
|
||||
time.sleep(0.02)
|
||||
(backup_dir / "ledgrab-backup-2024-02-01T000000.db").write_bytes(b"b")
|
||||
time.sleep(0.02)
|
||||
(backup_dir / "ledgrab-backup-2024-03-01T000000.zip").write_bytes(b"c")
|
||||
time.sleep(0.02)
|
||||
(backup_dir / "ledgrab-backup-2024-04-01T000000.zip").write_bytes(b"d")
|
||||
|
||||
try:
|
||||
engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=None)
|
||||
engine._settings["max_backups"] = 3
|
||||
engine._prune_old_backups()
|
||||
|
||||
remaining = sorted(p.name for p in engine._iter_backup_files())
|
||||
assert remaining == [
|
||||
"ledgrab-backup-2024-02-01T000000.db",
|
||||
"ledgrab-backup-2024-03-01T000000.zip",
|
||||
"ledgrab-backup-2024-04-01T000000.zip",
|
||||
]
|
||||
finally:
|
||||
db.close()
|
||||
Reference in New Issue
Block a user