Files
media-player-server/media_server/services/browser_url_service.py
T
alexei.dolgolyov 61cdce9b60
Lint & Test / test (push) Failing after 8s
feat(foreground): track topmost process + browser page title
Adds cross-platform foreground-window tracking and exposes it over REST
(/api/foreground) and the existing WebSocket feed.

- foreground_service.py: Windows probe via ctypes (HANDLE-correct argtypes
  to avoid 64-bit handle truncation); macOS via AppKit; Linux via Xlib
  (Wayland returns unavailable). TTL cache + per-platform fallback.
- browser_url_service.py: when foreground is a recognised browser, extract
  the page title from the window title (browser-name suffix stripped) and
  surface `is_browser` + `browser_page_title`. Optional UIA-based URL
  extraction behind MEDIA_SERVER_BROWSER_UIA env flag (off by default —
  Chromium browsers keep their accessibility tree dormant otherwise).
- websocket_manager: poll foreground every 1s inside the existing status
  loop, broadcast `foreground` on connect and `foreground_update` on
  change. Diff only on user-visible fields to avoid geometry spam.
- WebUI: new editorial card rendered under the monitor list on the
  Display tab — process name, window title, fullscreen/minimized/monitor
  chips, browser block when applicable, exe path, PID, started-ago,
  geometry, platform. 16px inter-section gap matches Settings cadence.
- i18n: 25 new keys added to both en.json and ru.json.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-05-18 03:11:59 +03:00

297 lines
9.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Extract page-level metadata from a focused desktop web browser.
The browser's window title is the reliable signal — every major browser
formats it as ``"<page title> - <Browser Name>"``, so stripping the suffix
gives us the page title for free.
URL extraction was attempted via UI Automation (UIA), but Chromium-based
browsers (Chrome/Edge/Brave/Vivaldi) keep their accessibility tree dormant
unless a screen reader is active or ``--force-renderer-accessibility`` is
set — neither is something we want to require from end users. The UIA
machinery is still here behind a feature flag in case a future caller
opts into the accessibility-flag path; by default we just return the
page title and leave ``url=None``.
Other platforms (macOS via AppleScript, Linux via AT-SPI) are out of scope
for this iteration.
"""
from __future__ import annotations
import logging
import os
import platform
import threading
from dataclasses import dataclass
logger = logging.getLogger(__name__)
# UIA URL extraction is opt-in because Chromium browsers keep their
# accessibility tree dormant unless the user starts the browser with
# ``--force-renderer-accessibility`` (or a screen reader is running).
# Without that, `FindAll` throws and we'd burn 5s per probe retrying.
# Set MEDIA_SERVER_BROWSER_UIA=1 to enable; default off.
_UIA_ENABLED = os.environ.get("MEDIA_SERVER_BROWSER_UIA", "").lower() in (
"1", "true", "yes", "on"
)
# Known browser executables (lowercase, .exe-stripped). Used to decide
# whether to spend the UIA query budget on this foreground process.
BROWSER_PROCESS_HINTS: frozenset[str] = frozenset({
"chrome",
"msedge",
"firefox",
"brave",
"opera",
"vivaldi",
"yandex",
"browser", # Yandex Browser sometimes reports as browser.exe
"arc",
"thorium",
})
@dataclass(frozen=True)
class BrowserPageInfo:
url: str | None = None
page_title: str | None = None
_EMPTY = BrowserPageInfo()
def is_browser_process(process_name: str | None) -> bool:
"""Return True when ``process_name`` looks like a supported browser."""
if not process_name:
return False
base = process_name.lower()
if base.endswith(".exe"):
base = base[:-4]
return base in BROWSER_PROCESS_HINTS
def _strip_browser_suffix(title: str | None, process_name: str | None) -> str | None:
"""Pull the page title out of the browser's window title.
Most browsers format their window title as ``"<page> - <Browser Name>"``.
We strip the trailing suffix so consumers get the page title alone. If
the suffix can't be matched, return the raw title unchanged.
"""
if not title:
return None
suffixes = (
" - Google Chrome",
" — Google Chrome",
" - MicrosoftEdge",
" - Microsoft Edge",
" — Mozilla Firefox",
" - Mozilla Firefox",
" - Brave",
" - Opera",
" - Vivaldi",
" - Yandex",
)
for s in suffixes:
if title.endswith(s):
return title[: -len(s)].strip() or None
return title
# ─── UIA lookup (Windows) ───────────────────────────────────────────
# UIA control type / property constants we need. Avoiding the full
# UIAutomationClient typelib generation — those constants are stable.
_UIA_EditControlTypeId = 50004
_UIA_ControlTypePropertyId = 30003
_UIA_ValueValuePropertyId = 30045
_UIA_NamePropertyId = 30005
_UIA_ValuePatternId = 10002
_TreeScope_Descendants = 4
_PropertyConditionFlags_IgnoreCase = 1
# Lazy import + per-thread COM init.
_uia_lock = threading.Lock()
_uia_singleton = None
_uia_load_error: str | None = None
_uia_thread_local = threading.local()
def _ensure_com() -> None:
"""Initialise COM on the current thread (idempotent per thread)."""
if getattr(_uia_thread_local, "initialised", False):
return
try:
import comtypes # type: ignore
# COINIT_APARTMENTTHREADED is required by UIA; comtypes' default
# CoInitializeEx already passes that flag.
comtypes.CoInitialize()
_uia_thread_local.initialised = True
except Exception as e:
logger.debug("CoInitialize failed: %s", e)
def _get_uia():
"""Return the IUIAutomation singleton, or None if unavailable."""
global _uia_singleton, _uia_load_error
if _uia_singleton is not None:
return _uia_singleton
if _uia_load_error is not None:
return None
with _uia_lock:
if _uia_singleton is not None:
return _uia_singleton
try:
import comtypes.client # type: ignore
# CLSID for CUIAutomation. Using GetActiveObject would fail,
# so we cocreate. comtypes.client.CreateObject keeps the COM
# plumbing tidy.
_uia_singleton = comtypes.client.CreateObject(
"{ff48dba4-60ef-4201-aa87-54103eef594e}",
interface=comtypes.client.GetModule(
"UIAutomationCore.dll"
).IUIAutomation,
)
return _uia_singleton
except Exception as e:
_uia_load_error = str(e)
logger.info("UIA unavailable; browser URL extraction disabled: %s", e)
return None
def _find_address_bar_value(hwnd: int) -> str | None:
"""Walk the UIA tree under ``hwnd`` looking for the URL Edit control.
Strategy: find every descendant Edit control, then pick the first one
whose Name contains an address-bar hint, or — failing that — the first
one whose value parses as a URL-ish string. Browsers expose extra Edit
controls (search bars, find-in-page) so name matching is the reliable
signal; the URL-ish fallback covers locale variants we haven't seen.
"""
_ensure_com()
uia = _get_uia()
if uia is None:
return None
try:
element = uia.ElementFromHandle(hwnd)
if not element:
return None
# Build a condition matching ControlType=Edit, then enumerate.
edit_condition = uia.CreatePropertyCondition(
_UIA_ControlTypePropertyId, _UIA_EditControlTypeId
)
edits = element.FindAll(_TreeScope_Descendants, edit_condition)
count = edits.Length if edits else 0
if count == 0:
return None
# Hints (lowercase) used to identify the address bar by its Name
# property. Covers en-US plus a few common locales / browsers.
name_hints = (
"address", # Chrome/Edge: "Address and search bar"
"адрес", # Chrome ru: "Адресная строка и строка поиска"
"адресная",
"search with", # Firefox: "Search with Google or enter address"
"поиск или ввод", # Firefox ru
"url",
"location",
)
# First pass: name-based match (high confidence).
candidates: list[tuple[int, str]] = []
for i in range(count):
edit = edits.GetElement(i)
try:
name = (edit.CurrentName or "").lower()
except Exception:
name = ""
try:
value = edit.GetCurrentPropertyValue(_UIA_ValueValuePropertyId)
except Exception:
value = None
if value is None:
continue
value_str = str(value)
for h in name_hints:
if h in name:
return value_str
candidates.append((i, value_str))
# Second pass: URL-ish fallback. Pick the first candidate that
# looks like a URL; this catches browser/locale combos we haven't
# listed above.
for _i, v in candidates:
lv = v.lower()
if (
lv.startswith("http://")
or lv.startswith("https://")
or lv.startswith("about:")
or lv.startswith("chrome://")
or lv.startswith("edge://")
or lv.startswith("brave://")
or lv.startswith("file://")
or lv.startswith("ftp://")
):
return v
return None
except Exception as e:
logger.debug("UIA address-bar lookup failed: %s", e)
return None
# ─── Per-(hwnd, title) cache ────────────────────────────────────────
_cache_lock = threading.Lock()
_cache_key: tuple[int | None, str | None] = (None, None)
_cache_value: BrowserPageInfo = _EMPTY
def get_browser_page(
*,
hwnd: int | None,
process_name: str | None,
window_title: str | None,
) -> BrowserPageInfo:
"""Return the URL + page title for the foreground browser tab, if any.
Callers pass the already-resolved foreground HWND/title/process_name so
this service doesn't re-walk Win32 to find them. Returns ``_EMPTY`` for
non-browser processes or when UIA can't resolve the URL.
"""
if not is_browser_process(process_name):
return _EMPTY
if platform.system() != "Windows":
# macOS/Linux paths not implemented in this iteration.
return _EMPTY
if not hwnd:
return _EMPTY
global _cache_key, _cache_value
key = (hwnd, window_title)
with _cache_lock:
if key == _cache_key and _cache_value is not _EMPTY:
return _cache_value
url = _find_address_bar_value(hwnd) if _UIA_ENABLED else None
page_title = _strip_browser_suffix(window_title, process_name)
info = BrowserPageInfo(url=url, page_title=page_title)
with _cache_lock:
_cache_key = key
_cache_value = info
return info
def reset_cache() -> None:
"""Reset the cache. Useful in tests."""
global _cache_key, _cache_value
with _cache_lock:
_cache_key = (None, None)
_cache_value = _EMPTY