diff --git a/server/src/ledgrab/api/routes/backup.py b/server/src/ledgrab/api/routes/backup.py index 678eefc..189f2ab 100644 --- a/server/src/ledgrab/api/routes/backup.py +++ b/server/src/ledgrab/api/routes/backup.py @@ -11,6 +11,7 @@ import sys import threading import zipfile from pathlib import Path +from typing import Any from fastapi import APIRouter, Depends, File, HTTPException, UploadFile from fastapi.responses import StreamingResponse @@ -38,28 +39,59 @@ _SERVER_DIR = Path(__file__).resolve().parents[4] def _schedule_restart() -> None: - """Spawn a restart script after a short delay so the HTTP response completes.""" + """Spawn a restart script after a short delay so the HTTP response completes. - def _restart(): + stdout/stderr of the spawned script are redirected to ``/restart.log`` + so a silent failure (PowerShell not on PATH, restart.ps1 erroring, etc.) + leaves evidence on disk instead of vanishing into a detached child. + """ + + def _restart() -> None: import time time.sleep(1) + + # Annotated as ``dict[str, Any]`` because the value union spans + # int flags (Windows ``creationflags``) and bool (POSIX + # ``start_new_session``); a narrower union confuses ``**`` unpacking. + popen_kwargs: dict[str, Any] if sys.platform == "win32": - subprocess.Popen( - [ - "powershell", - "-ExecutionPolicy", - "Bypass", - "-File", - str(_SERVER_DIR / "restart.ps1"), - ], - creationflags=subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP, - ) + script = _SERVER_DIR / "restart.ps1" + cmd = ["powershell", "-ExecutionPolicy", "Bypass", "-File", str(script)] + popen_kwargs = { + "creationflags": ( + subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP + ), + } else: - subprocess.Popen( - ["bash", str(_SERVER_DIR / "restart.sh")], - start_new_session=True, - ) + script = _SERVER_DIR / "restart.sh" + cmd = ["bash", str(script)] + popen_kwargs = {"start_new_session": True} + + if not script.is_file(): + logger.error("Restart script missing: %s", script) + return + + log_path = _SERVER_DIR / "restart.log" + try: + # Open in append mode so multiple restarts accumulate; the child + # owns its own duped handle, so closing here in the parent is safe. + with open(log_path, "ab") as log_file: + log_file.write( + f"\n--- restart spawned at {time.strftime('%Y-%m-%d %H:%M:%S')} ---\n".encode() + ) + log_file.flush() + proc = subprocess.Popen( + cmd, + stdout=log_file, + stderr=subprocess.STDOUT, + **popen_kwargs, + ) + logger.info("Restart script launched: %s (PID %s, log %s)", cmd[0], proc.pid, log_path) + except OSError as e: + logger.error("Failed to launch restart script %s: %s", script, e, exc_info=True) + except Exception as e: + logger.error("Unexpected error launching restart script: %s", e, exc_info=True) threading.Thread(target=_restart, daemon=True).start() diff --git a/server/src/ledgrab/core/backup/auto_backup.py b/server/src/ledgrab/core/backup/auto_backup.py index 682b186..0ba8833 100644 --- a/server/src/ledgrab/core/backup/auto_backup.py +++ b/server/src/ledgrab/core/backup/auto_backup.py @@ -1,10 +1,12 @@ -"""Auto-backup engine — periodic SQLite snapshot backups.""" +"""Auto-backup engine — periodic SQLite + assets snapshot backups.""" import asyncio import os +import tempfile +import zipfile from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import List +from typing import Iterable, List from ledgrab.storage.database import Database from ledgrab.utils import get_logger @@ -20,19 +22,35 @@ DEFAULT_SETTINGS = { # Skip the immediate-on-start backup if a recent backup exists within this window. _STARTUP_BACKUP_COOLDOWN = timedelta(minutes=5) -_BACKUP_EXT = ".db" +# Current write format. ``.db`` is still recognised on read so backups taken +# by older versions remain listable, restorable, and prunable. +_BACKUP_EXT = ".zip" +_RECOGNISED_EXTS: tuple[str, ...] = (".zip", ".db") + +# Soft warning threshold — large backups indicate an unbounded assets dir or +# bloated DB. We don't refuse to write (user data is theirs), but log loudly +# so the operator can investigate before disk fills up over many intervals. +_BACKUP_SIZE_WARN_BYTES = 500 * 1024 * 1024 # 500 MB class AutoBackupEngine: - """Creates periodic SQLite snapshot backups of the database.""" + """Creates periodic backups of the database and asset files. + + Each backup is a ZIP archive containing ``ledgrab.db`` plus every file + from ``assets_dir`` under ``assets/`` — matching the format produced by + the manual ``GET /api/v1/system/backup`` download. The restore endpoint + accepts either ``.zip`` or ``.db`` interchangeably. + """ def __init__( self, backup_dir: Path, db: Database, + assets_dir: Path | None = None, ): self._backup_dir = Path(backup_dir) self._db = db + self._assets_dir = Path(assets_dir) if assets_dir else None self._task: asyncio.Task | None = None self._last_backup_time: datetime | None = None @@ -82,9 +100,14 @@ class AutoBackupEngine: self._task.cancel() self._task = None + def _iter_backup_files(self) -> Iterable[Path]: + """Yield every backup file (both legacy ``.db`` and current ``.zip``).""" + for ext in _RECOGNISED_EXTS: + yield from self._backup_dir.glob(f"*{ext}") + def _most_recent_backup_age(self) -> timedelta | None: """Return the age of the newest backup file, or None if no backups exist.""" - files = list(self._backup_dir.glob(f"*{_BACKUP_EXT}")) + files = list(self._iter_backup_files()) if not files: return None newest = max(files, key=lambda p: p.stat().st_mtime) @@ -124,15 +147,72 @@ class AutoBackupEngine: timestamp = now.strftime("%Y-%m-%dT%H%M%S") filename = f"ledgrab-backup-{timestamp}{_BACKUP_EXT}" file_path = self._backup_dir / filename + # Stage the ZIP at .partial then os.replace into place once it's + # fully written. A crash mid-write leaves a .partial file (cleaned up + # on the next backup) but never a half-written backup that would fool + # ``_most_recent_backup_age`` / ``_prune_old_backups`` into trusting + # corrupt data. + partial_path = file_path.with_suffix(file_path.suffix + ".partial") - self._db.backup_to(file_path) + # SQLite backup API → temp .db so we get a consistent snapshot + # without holding the DB lock for the ZIP write. + tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False) + tmp_path = Path(tmp.name) + tmp.close() + asset_count = 0 + try: + self._db.backup_to(tmp_path) + with zipfile.ZipFile(partial_path, "w", zipfile.ZIP_DEFLATED) as zf: + zf.write(tmp_path, "ledgrab.db") + if self._assets_dir and self._assets_dir.is_dir(): + for asset_file in self._assets_dir.iterdir(): + # Skip symlinks: ``is_file()`` follows them and we + # don't want to silently slurp a symlink target that + # lives outside the assets dir into every backup. + if asset_file.is_symlink(): + continue + if asset_file.is_file(): + zf.write(asset_file, f"assets/{asset_file.name}") + asset_count += 1 + os.replace(partial_path, file_path) + except Exception: + # Roll back the staged partial so it doesn't accumulate; the + # finally block still removes the SQLite temp file. Re-raise so + # the caller (``_backup_loop`` / ``trigger_backup``) sees + logs + # the failure instead of silently emitting a missing backup. + partial_path.unlink(missing_ok=True) + raise + finally: + tmp_path.unlink(missing_ok=True) + # Best-effort sweep of any older orphan .partial files left by a + # crash on a previous run. + for stale in self._backup_dir.glob("*.partial"): + try: + stale.unlink() + except OSError: + pass + + size_bytes = file_path.stat().st_size self._last_backup_time = now - logger.info(f"Backup created: {filename}") + logger.info( + "Backup created: %s (%d asset files, %.1f MB)", + filename, + asset_count, + size_bytes / (1024 * 1024), + ) + if size_bytes > _BACKUP_SIZE_WARN_BYTES: + logger.warning( + "Backup %s is %.1f MB — exceeds %d MB warning threshold; " + "consider pruning the assets directory or lowering max_backups", + filename, + size_bytes / (1024 * 1024), + _BACKUP_SIZE_WARN_BYTES // (1024 * 1024), + ) def _prune_old_backups(self) -> None: max_backups = self._settings["max_backups"] - files = sorted(self._backup_dir.glob(f"*{_BACKUP_EXT}"), key=lambda p: p.stat().st_mtime) + files = sorted(self._iter_backup_files(), key=lambda p: p.stat().st_mtime) excess = len(files) - max_backups if excess > 0: for f in files[:excess]: @@ -179,9 +259,7 @@ class AutoBackupEngine: def list_backups(self) -> List[dict]: backups = [] - for f in sorted( - self._backup_dir.glob(f"*{_BACKUP_EXT}"), key=lambda p: p.stat().st_mtime, reverse=True - ): + for f in sorted(self._iter_backup_files(), key=lambda p: p.stat().st_mtime, reverse=True): stat = f.stat() backups.append( { diff --git a/server/src/ledgrab/main.py b/server/src/ledgrab/main.py index 392df5e..3de09dd 100644 --- a/server/src/ledgrab/main.py +++ b/server/src/ledgrab/main.py @@ -283,6 +283,7 @@ async def lifespan(app: FastAPI): auto_backup_engine = AutoBackupEngine( backup_dir=_data_dir / "backups", db=db, + assets_dir=Path(config.assets.assets_dir), ) # Create update service (checks for new releases) diff --git a/server/tests/core/backup/__init__.py b/server/tests/core/backup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/tests/core/backup/test_auto_backup.py b/server/tests/core/backup/test_auto_backup.py new file mode 100644 index 0000000..a5dab7a --- /dev/null +++ b/server/tests/core/backup/test_auto_backup.py @@ -0,0 +1,210 @@ +"""Regression coverage for :class:`AutoBackupEngine` ZIP format. + +Pins behavior added when auto-backups switched from raw ``.db`` snapshots to +``.zip`` archives (DB + assets), and the partial-write hardening that goes +with it (stage at ``.partial`` then ``os.replace`` so a crash mid-write +never leaves a corrupt file masquerading as a valid backup). +""" + +import zipfile +from pathlib import Path +from unittest.mock import patch + +import pytest + +from ledgrab.core.backup.auto_backup import AutoBackupEngine +from ledgrab.storage.database import Database + + +@pytest.fixture() +def engine_with_assets(tmp_path): + """Engine wired to a small DB + assets dir with two sample files.""" + db_path = tmp_path / "ledgrab.db" + assets_dir = tmp_path / "assets" + assets_dir.mkdir() + (assets_dir / "alert.wav").write_bytes(b"WAV_PAYLOAD_1") + (assets_dir / "ping.wav").write_bytes(b"WAV_PAYLOAD_2") + + db = Database(db_path) + db.upsert("devices", "dev_1", "Living Room", {"id": "dev_1", "type": "mock"}) + + backup_dir = tmp_path / "backups" + engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=assets_dir) + yield engine + db.close() + + +def _only_backup(backup_dir: Path) -> Path: + """Return the single .zip backup in ``backup_dir``; assert exactly one.""" + zips = sorted(backup_dir.glob("*.zip")) + assert len(zips) == 1, f"expected exactly one .zip, found {[p.name for p in zips]}" + return zips[0] + + +def test_backup_bundles_db_and_assets(engine_with_assets): + """Happy path: ZIP contains ``ledgrab.db`` plus every assets file.""" + engine_with_assets._perform_backup_sync() + + backup = _only_backup(engine_with_assets._backup_dir) + with zipfile.ZipFile(backup) as zf: + names = sorted(zf.namelist()) + assert names == ["assets/alert.wav", "assets/ping.wav", "ledgrab.db"] + + +def test_backup_preserves_asset_bytes(engine_with_assets): + """The asset binary inside the ZIP matches the source byte-for-byte.""" + engine_with_assets._perform_backup_sync() + + backup = _only_backup(engine_with_assets._backup_dir) + with zipfile.ZipFile(backup) as zf: + assert zf.read("assets/alert.wav") == b"WAV_PAYLOAD_1" + assert zf.read("assets/ping.wav") == b"WAV_PAYLOAD_2" + + +def test_backup_no_partial_file_left_on_success(engine_with_assets): + """After a successful backup, the staging ``.partial`` is renamed away.""" + engine_with_assets._perform_backup_sync() + + leftovers = list(engine_with_assets._backup_dir.glob("*.partial")) + assert leftovers == [] + + +def test_backup_skips_assets_when_dir_none(tmp_path): + """``assets_dir=None`` produces a DB-only ZIP, no error.""" + db_path = tmp_path / "ledgrab.db" + db = Database(db_path) + db.upsert("devices", "dev_1", "A", {"id": "dev_1"}) + backup_dir = tmp_path / "backups" + try: + engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=None) + engine._perform_backup_sync() + + backup = _only_backup(backup_dir) + with zipfile.ZipFile(backup) as zf: + assert zf.namelist() == ["ledgrab.db"] + finally: + db.close() + + +def test_backup_skips_assets_when_dir_missing(tmp_path): + """A non-existent ``assets_dir`` is silently skipped, not an error.""" + db_path = tmp_path / "ledgrab.db" + db = Database(db_path) + backup_dir = tmp_path / "backups" + try: + engine = AutoBackupEngine( + backup_dir=backup_dir, db=db, assets_dir=tmp_path / "does-not-exist" + ) + engine._perform_backup_sync() + + backup = _only_backup(backup_dir) + with zipfile.ZipFile(backup) as zf: + assert zf.namelist() == ["ledgrab.db"] + finally: + db.close() + + +def test_backup_failure_leaves_no_zip_or_partial(engine_with_assets): + """When ``db.backup_to`` raises, neither the final .zip nor the .partial + survives. The exception propagates so the caller can log it. + """ + with patch.object(engine_with_assets._db, "backup_to", side_effect=RuntimeError("boom")): + with pytest.raises(RuntimeError, match="boom"): + engine_with_assets._perform_backup_sync() + + assert list(engine_with_assets._backup_dir.glob("*.zip")) == [] + assert list(engine_with_assets._backup_dir.glob("*.partial")) == [] + + +def test_backup_cleans_stale_partial_from_previous_crash(engine_with_assets): + """A leftover ``.partial`` from a prior crash is swept on the next run.""" + stale = engine_with_assets._backup_dir + stale.mkdir(parents=True, exist_ok=True) + (stale / "ledgrab-backup-2025-01-01T000000.zip.partial").write_bytes(b"corrupt") + + engine_with_assets._perform_backup_sync() + + assert list(stale.glob("*.partial")) == [] + # The successful backup is also present. + assert len(list(stale.glob("*.zip"))) == 1 + + +def test_backup_skips_symlinks_in_assets_dir(tmp_path): + """Symlinked files in ``assets_dir`` are not bundled (security guard).""" + db_path = tmp_path / "ledgrab.db" + assets_dir = tmp_path / "assets" + assets_dir.mkdir() + (assets_dir / "real.wav").write_bytes(b"REAL") + + # Some test environments (e.g. unprivileged Windows) can't create + # symlinks; skip rather than fail spuriously when that's the case. + secret_target = tmp_path / "secret.bin" + secret_target.write_bytes(b"PRIVATE") + link_path = assets_dir / "linked.wav" + try: + link_path.symlink_to(secret_target) + except (OSError, NotImplementedError): + pytest.skip("symlinks not supported in this environment") + + db = Database(db_path) + try: + backup_dir = tmp_path / "backups" + engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=assets_dir) + engine._perform_backup_sync() + + backup = _only_backup(backup_dir) + with zipfile.ZipFile(backup) as zf: + names = zf.namelist() + assert "assets/real.wav" in names + assert "assets/linked.wav" not in names + finally: + db.close() + + +def test_list_backups_unions_legacy_db_and_new_zip(engine_with_assets): + """``list_backups`` returns both legacy ``.db`` and current ``.zip`` files + so users can still restore from auto-backups written by older versions. + """ + backup_dir = engine_with_assets._backup_dir + backup_dir.mkdir(parents=True, exist_ok=True) + (backup_dir / "ledgrab-backup-2024-12-31T000000.db").write_bytes(b"legacy") + + engine_with_assets._perform_backup_sync() + + names = {b["filename"] for b in engine_with_assets.list_backups()} + assert any(n.endswith(".db") for n in names) + assert any(n.endswith(".zip") for n in names) + + +def test_prune_honors_max_across_formats(tmp_path): + """``_prune_old_backups`` enforces ``max_backups`` across both extensions.""" + db_path = tmp_path / "ledgrab.db" + db = Database(db_path) + backup_dir = tmp_path / "backups" + backup_dir.mkdir() + + # Two legacy .db files (older), two .zip files (newer) — max=3 should + # prune the single oldest .db and keep the rest. + import time + + (backup_dir / "ledgrab-backup-2024-01-01T000000.db").write_bytes(b"a") + time.sleep(0.02) + (backup_dir / "ledgrab-backup-2024-02-01T000000.db").write_bytes(b"b") + time.sleep(0.02) + (backup_dir / "ledgrab-backup-2024-03-01T000000.zip").write_bytes(b"c") + time.sleep(0.02) + (backup_dir / "ledgrab-backup-2024-04-01T000000.zip").write_bytes(b"d") + + try: + engine = AutoBackupEngine(backup_dir=backup_dir, db=db, assets_dir=None) + engine._settings["max_backups"] = 3 + engine._prune_old_backups() + + remaining = sorted(p.name for p in engine._iter_backup_files()) + assert remaining == [ + "ledgrab-backup-2024-02-01T000000.db", + "ledgrab-backup-2024-03-01T000000.zip", + "ledgrab-backup-2024-04-01T000000.zip", + ] + finally: + db.close()