Files
ledgrab/server/src/ledgrab/__main__.py
T
alexei.dolgolyov e24f9d33cc fix(shutdown): survive PC restart with WAL fsync + Win32 session-end guard
Two bugs caused user data ('G502' target's color-strip ref, etc.) to
revert after PC restart while persisting fine across normal app
restarts:

1. SQLite was in WAL mode with synchronous=NORMAL and Database.close()
   was never called. On graceful Python exit the sqlite3 finalizer
   checkpoints the WAL, but on an unclean PC shutdown (power loss,
   forced reboot, or Windows force-terminating pythonw.exe) the WAL
   stayed in OS cache, never reached disk, and the next boot rolled the
   DB back to the last checkpoint -- losing recent edits.

2. Nothing handled WM_QUERYENDSESSION / WM_ENDSESSION, so on PC
   shutdown Windows force-killed pythonw.exe after ~5s and the FastAPI
   lifespan never ran. The 'stop_targets' setting was silently ignored
   and devices were left at their last frame.

Changes:
- Database: PRAGMA synchronous=FULL + wal_autocheckpoint=100, plus an
  explicit wal_checkpoint(TRUNCATE) inside Database.close().
- New utils/win_shutdown.py: hidden top-level window in a daemon thread
  with a ctypes WindowProc that catches WM_QUERYENDSESSION (calls
  ShutdownBlockReasonCreate to extend Windows' 5s hung-app timeout up
  to the ~20s GUI ceiling), fires the shutdown callback, then waits in
  WM_ENDSESSION on a completion event before returning. Also raises
  the process shutdown priority via SetProcessShutdownParameters. All
  Win32 argtypes/restypes are bound once at import to avoid LPARAM
  overflow on x64.
- New shutdown_state.py: leaf module owning the cross-thread Event so
  __main__ does not import the heavy ledgrab.main at startup.
- main.py lifespan: per-step asyncio.wait_for budgets (8s for
  processor_manager.stop_all, 1.5s each for HA/MQTT, etc.) so a hung
  device cannot starve the DB checkpoint, then db.close() and
  shutdown_complete.set() always run.
- __main__.py: install the Windows shutdown guard before tray start;
  install SIGINT/SIGTERM/SIGBREAK handlers only on the tray path
  (uvicorn overwrites them on no-tray); raise server_thread.join to 20s.
- Tests cover WM_QUERYENDSESSION (fires callback, returns TRUE,
  idempotent), WM_ENDSESSION (waits on event, times out cleanly,
  cancel-path returns instantly), signal handler installation, and
  that main and shutdown_state share the same Event instance.
2026-05-22 21:43:41 +03:00

246 lines
8.4 KiB
Python

"""Entry point for ``python -m ledgrab``.
Starts the uvicorn server and, on Windows when *pystray* is installed,
shows a system-tray icon with **Show UI** / **Exit** actions.
"""
import asyncio
import os
import signal
import socket
import sys
import threading
import time
import webbrowser
from pathlib import Path
from urllib.error import URLError
from urllib.request import urlopen
def _fix_embedded_tcl_paths() -> None:
"""Point TCL_LIBRARY/TK_LIBRARY at the bundled tcl/tk dirs.
The Windows installer ships embedded Python with tcl8.6/ and tk8.6/
next to python.exe, but Tcl's auto-detection searches ``<exe>/../lib/tcl8.6``
and similar paths that don't exist in our layout. Without these env vars,
``tkinter.Tk()`` fails with "Can't find a usable init.tcl", which breaks
both the screen overlay and tray messageboxes.
"""
exe_dir = Path(sys.executable).parent
tcl_dir = exe_dir / "tcl8.6"
tk_dir = exe_dir / "tk8.6"
if (tcl_dir / "init.tcl").is_file():
os.environ.setdefault("TCL_LIBRARY", str(tcl_dir))
if (tk_dir / "tk.tcl").is_file():
os.environ.setdefault("TK_LIBRARY", str(tk_dir))
_fix_embedded_tcl_paths()
import uvicorn # noqa: E402
from ledgrab.config import get_config # noqa: E402
from ledgrab.server_ref import set_server, set_tray # noqa: E402
from ledgrab.tray import PYSTRAY_AVAILABLE, TrayManager # noqa: E402
from ledgrab.utils import setup_logging, get_logger # noqa: E402
from ledgrab.utils.platform import is_windows # noqa: E402
from ledgrab.utils.win_shutdown import WindowsShutdownGuard # noqa: E402
setup_logging()
logger = get_logger(__name__)
_ICON_PATH = Path(__file__).parent / "static" / "icons" / "icon-192.png"
def _run_server(server: uvicorn.Server) -> None:
"""Run uvicorn in a dedicated asyncio event loop (background thread)."""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(server.serve())
def _wait_for_server(port: int, timeout: float = 30.0, interval: float = 0.25) -> bool:
"""Poll /health until the server responds or *timeout* seconds elapse."""
url = f"http://localhost:{port}/health"
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
try:
with urlopen(url, timeout=1) as resp: # noqa: S310 - localhost only
if 200 <= resp.status < 500:
return True
except (URLError, ConnectionError, OSError, TimeoutError):
pass
time.sleep(interval)
return False
def _open_browser(port: int) -> None:
"""Open the UI in the default browser once the server is ready."""
if not _wait_for_server(port):
logger.warning("Server did not become ready in time; opening browser anyway")
webbrowser.open(f"http://localhost:{port}")
def _is_restart() -> bool:
"""Detect if this is a restart (vs first launch)."""
return os.environ.get("LEDGRAB_RESTART", "") == "1"
def _is_autostart() -> bool:
"""Detect if launched via the Windows autostart shortcut."""
return os.environ.get("LEDGRAB_AUTOSTART", "") == "1"
def _should_skip_browser() -> bool:
"""Skip auto-opening the browser on restarts and on Windows login autostart."""
return _is_restart() or _is_autostart()
def _check_port(host: str, port: int) -> None:
"""Exit with a clear message if the port is already in use."""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.settimeout(1)
try:
sock.bind((host, port))
except OSError:
logger.error("Port %d is already in use on %s", port, host)
sys.exit(1)
def main() -> None:
config = get_config()
_check_port(config.server.host, config.server.port)
uv_config = uvicorn.Config(
"ledgrab.main:app",
host=config.server.host,
port=config.server.port,
log_level=config.server.log_level.lower(),
)
server = uvicorn.Server(uv_config)
set_server(server)
# Wire the OS-shutdown safety net. The lifespan in ``ledgrab.main`` signals
# ``shutdown_complete`` once it has stopped targets and checkpointed the
# DB; the Windows guard waits on that event before letting the OS finish
# ending the session. Without this, the entire shutdown lifespan never
# runs on PC reboot — devices stay on and the SQLite WAL is lost.
guard = _install_os_shutdown_guard(server)
use_tray = PYSTRAY_AVAILABLE and (sys.platform == "win32" or _force_tray())
if use_tray:
logger.info("Starting with system tray icon")
# Install signal handlers BEFORE starting the uvicorn thread so a
# SIGINT/SIGBREAK during startup still triggers a clean shutdown.
# We do NOT install them on the no-tray path because uvicorn's
# ``server.run()`` overwrites SIGINT/SIGTERM with its own handlers.
_install_signal_handlers(server)
# Uvicorn in a background thread
server_thread = threading.Thread(
target=_run_server,
args=(server,),
daemon=True,
)
server_thread.start()
# Browser after a short delay (skip on restart and on Windows login autostart)
if not _should_skip_browser():
threading.Thread(
target=_open_browser,
args=(config.server.port,),
daemon=True,
).start()
# Tray on main thread (blocking)
tray = TrayManager(
icon_path=_ICON_PATH,
port=config.server.port,
on_exit=lambda: _request_shutdown(server),
)
set_tray(tray)
tray.run()
# Tray exited — wait for server to finish its graceful shutdown.
# Use a longer join than the lifespan's own ~18 s budget so we don't
# cut the DB checkpoint short on a slow disk.
server_thread.join(timeout=20)
if guard is not None:
guard.stop()
else:
if not PYSTRAY_AVAILABLE:
logger.info("System tray not available (install pystray for tray support)")
try:
server.run()
finally:
if guard is not None:
guard.stop()
def _request_shutdown(server: uvicorn.Server) -> None:
"""Signal uvicorn to perform a graceful shutdown."""
server.should_exit = True
def _install_os_shutdown_guard(server: uvicorn.Server) -> "WindowsShutdownGuard | None":
"""Install the OS-shutdown safety net (Windows only).
Returns the guard so the caller can ``stop()`` it on normal exit, or
``None`` on platforms where no guard is needed.
"""
if not is_windows():
return None
# ``shutdown_state`` is a leaf module — importing it does NOT pull in
# ``ledgrab.main`` and its global stores. uvicorn loads ``main`` lazily
# via the import string ``"ledgrab.main:app"`` once it starts serving.
from ledgrab.shutdown_state import shutdown_complete
guard = WindowsShutdownGuard(
on_shutdown=lambda: _request_shutdown(server),
shutdown_complete=shutdown_complete,
)
if guard.start():
logger.info("Windows shutdown guard installed")
else:
logger.warning("Windows shutdown guard failed to start")
return guard
def _install_signal_handlers(server: uvicorn.Server) -> None:
"""Catch terminal/admin shutdown signals and trigger graceful exit.
Uvicorn already installs SIGINT/SIGTERM handlers when ``server.run()``
is called on the main thread (the no-tray path). For the tray path,
uvicorn runs on a background thread and skips signal installation, so
we install our own here. SIGBREAK is Windows-specific and fires on
Ctrl-Break and in some service-stop scenarios.
"""
def _handler(signum, frame): # noqa: ANN001 - signal handler signature
logger.warning("Signal %s received — requesting shutdown", signum)
_request_shutdown(server)
candidates = ["SIGINT", "SIGTERM", "SIGBREAK"]
for name in candidates:
sig = getattr(signal, name, None)
if sig is None:
continue
try:
signal.signal(sig, _handler)
except (ValueError, OSError) as e:
# ValueError: not on main thread; OSError: signal not supported here.
logger.debug("Could not install handler for %s: %s", name, e)
def _force_tray() -> bool:
"""Allow forcing tray on non-Windows via LEDGRAB_TRAY=1."""
import os
return os.environ.get("LEDGRAB_TRAY", "").strip() in ("1", "true", "yes")
if __name__ == "__main__":
main()