feat: Home Assistant provider — WebSocket subscription + bot commands

Adds Home Assistant as a service provider with two coordinated surfaces:

Notifications (subscription):
- Long-lived WebSocket client (aiohttp ws_connect) with auth handshake,
  exponential-backoff reconnect, bounded event queue, and area-registry
  enrichment cached per (re)connect
- ServiceProvider ABC gains an optional `subscribe()` method for push-style
  providers; HomeAssistantServiceProvider uses it via a per-provider
  supervisor task started in the FastAPI lifespan
- 4 event types (state_changed, automation_triggered, call_service,
  event_fired), 4 default Jinja templates (en + ru), HA-specific
  tracker filters (entity_glob, domain_allowlist, exact entity ids)
- Extracted shared dispatch pipeline (api/webhooks.py → services/
  event_dispatch.py) so subscription and webhook ingest share the same
  event_log + deferred-dispatch + quiet-hours code path

Bot commands:
- /status, /entities [glob], /state <entity_id>, /areas
- Multi-command WS session so /status and /areas cost one handshake
- Sensitive-attribute blocklist (camera access_token, entity_picture, etc.)
  and 30-attribute cap to keep /state output safe and within Telegram's
  message size
- Error-message redaction strips URL userinfo before surfacing to chat

Frontend:
- HA descriptor with toggle ConfigField type (new) and tag-input filter
  mode for free-text glob/domain lists (new TagInput component)
- 15 command slots + 4 notification slots wired into the existing
  template-config UI
This commit is contained in:
2026-05-13 14:31:56 +03:00
parent 90f958bdc6
commit 22127e2a59
79 changed files with 4042 additions and 210 deletions
@@ -0,0 +1,98 @@
"""Unit tests for HA bot command helpers — Phase 2.
Focus on the security-sensitive bits the reviewer flagged: attribute
filtering, error-message redaction, and the sample-context shape that
flows through Jinja preview rendering.
"""
from __future__ import annotations
from notify_bridge_server.commands.home_assistant_handler import (
_filter_attributes,
_is_sensitive_attr,
_normalize_state,
)
def test_filter_attributes_drops_credential_keys() -> None:
"""HA camera entities expose an ``access_token`` attribute. The handler
MUST NOT surface it to the chat user via /state."""
raw = {
"friendly_name": "Front Camera",
"access_token": "real-camera-proxy-token",
"entity_picture": "/api/camera_proxy/...?token=abc",
"brightness": 200,
}
safe, hidden = _filter_attributes(raw)
assert "access_token" not in safe
# entity_picture contains 'token' substring → blocked.
assert "entity_picture" not in safe
# friendly_name is rendered as a top-level field, not iterated.
assert "friendly_name" not in safe
# brightness is a normal attribute, passes through.
assert safe["brightness"] == 200
assert hidden == 2
def test_filter_attributes_caps_count() -> None:
"""When an entity has dozens of attributes the renderer would overflow
Telegram's 4096-char message limit. Cap at 30 with overflow surfaced."""
raw = {f"attr_{i:03d}": i for i in range(50)}
safe, hidden = _filter_attributes(raw)
assert len(safe) == 30
assert hidden == 20
def test_is_sensitive_attr_case_insensitive() -> None:
"""Match should not depend on key casing — custom integrations are
inconsistent about capitalization."""
assert _is_sensitive_attr("Access_Token") is True
assert _is_sensitive_attr("API_KEY") is True
assert _is_sensitive_attr("password") is True
assert _is_sensitive_attr("brightness") is False
assert _is_sensitive_attr("color_mode") is False
def test_normalize_state_filters_attrs() -> None:
"""End-to-end: feed _normalize_state a malicious state row, verify the
output has redacted attributes + hidden_attr_count surfaced."""
state_row = {
"entity_id": "camera.front_door",
"state": "idle",
"attributes": {
"friendly_name": "Front Door Camera",
"access_token": "leaked",
"brand": "Reolink",
},
"last_changed": "2026-05-13T12:00:00+00:00",
"last_updated": "2026-05-13T12:00:00+00:00",
}
out = _normalize_state(state_row)
assert out["entity_id"] == "camera.front_door"
assert out["friendly_name"] == "Front Door Camera"
assert out["domain"] == "camera"
# Top-level fields preserved.
assert out["state"] == "idle"
# Attributes dict is filtered.
assert "access_token" not in out["attributes"]
assert out["attributes"].get("brand") == "Reolink"
# Hidden count reflects access_token (friendly_name is top-level, not redacted).
assert out["hidden_attr_count"] == 1
def test_normalize_state_handles_missing_attributes() -> None:
"""A state row with no attributes dict should not crash."""
out = _normalize_state({"entity_id": "sensor.x", "state": "1"})
assert out["attributes"] == {}
assert out["hidden_attr_count"] == 0
def test_redact_ha_message_strips_userinfo() -> None:
"""The Phase 1 redact helper is re-exported via the HA package and used
by /entities, /state, /areas before surfacing errors. Make sure the
re-export still works and the contract is what we expect."""
from notify_bridge_core.providers.home_assistant import redact_ha_message
msg = "Cannot connect to https://leak-token@homeassistant.local:8123/api/websocket"
out = redact_ha_message(msg)
assert "leak-token@" not in out
assert "homeassistant.local:8123" in out
@@ -0,0 +1,80 @@
"""Tests for the HA-specific tracker filter (entity_glob, domain_allowlist).
The Gitea filter is an intersection of senders/collections. The HA filter
is intentionally a *union* across the three keys — any match passes — so a
user can mix exact entity ids with glob patterns and domain allowlists
without each one narrowing the others.
"""
from __future__ import annotations
from datetime import datetime, timezone
from notify_bridge_core.models.events import EventType, ServiceEvent
from notify_bridge_core.providers.base import ServiceProviderType
from notify_bridge_server.services.ha_subscription import _ha_passes_filters
def _ha_event(entity_id: str, domain: str | None = None) -> ServiceEvent:
return ServiceEvent(
event_type=EventType.HA_STATE_CHANGED,
provider_type=ServiceProviderType.HOME_ASSISTANT,
provider_name="HA",
collection_id=entity_id,
collection_name=entity_id,
timestamp=datetime.now(timezone.utc),
extra={"domain": domain or (entity_id.split(".", 1)[0] if "." in entity_id else "")},
)
def test_empty_filters_accept_everything() -> None:
assert _ha_passes_filters(_ha_event("light.kitchen"), {}) is True
def test_exact_entity_match() -> None:
filters = {"collections": ["light.kitchen", "switch.lamp"]}
assert _ha_passes_filters(_ha_event("light.kitchen"), filters) is True
assert _ha_passes_filters(_ha_event("light.bedroom"), filters) is False
def test_entity_glob_match() -> None:
filters = {"entity_glob": ["binary_sensor.*_motion", "light.kitchen*"]}
assert _ha_passes_filters(_ha_event("binary_sensor.hallway_motion"), filters) is True
assert _ha_passes_filters(_ha_event("light.kitchen_main"), filters) is True
assert _ha_passes_filters(_ha_event("light.bedroom"), filters) is False
def test_domain_allowlist() -> None:
filters = {"domain_allowlist": ["light", "switch"]}
assert _ha_passes_filters(_ha_event("light.kitchen"), filters) is True
assert _ha_passes_filters(_ha_event("switch.lamp"), filters) is True
assert _ha_passes_filters(_ha_event("sensor.temp"), filters) is False
def test_union_across_keys() -> None:
"""If collections names a specific sensor.* but domain_allowlist names
'light', BOTH should be acceptable — that's the difference from the
Gitea-style intersection filter."""
filters = {
"collections": ["sensor.outdoor_temp"],
"domain_allowlist": ["light"],
}
assert _ha_passes_filters(_ha_event("sensor.outdoor_temp"), filters) is True
assert _ha_passes_filters(_ha_event("light.kitchen"), filters) is True
# Neither matches:
assert _ha_passes_filters(_ha_event("binary_sensor.door"), filters) is False
def test_domain_derived_when_extra_missing() -> None:
"""If the parser didn't populate extra.domain (e.g. malformed event),
the filter must still infer it from the entity_id prefix."""
evt = ServiceEvent(
event_type=EventType.HA_STATE_CHANGED,
provider_type=ServiceProviderType.HOME_ASSISTANT,
provider_name="HA",
collection_id="light.kitchen",
collection_name="light.kitchen",
timestamp=datetime.now(timezone.utc),
extra={}, # No 'domain' key.
)
assert _ha_passes_filters(evt, {"domain_allowlist": ["light"]}) is True
@@ -0,0 +1,187 @@
"""Unit tests for the Home Assistant event parser.
These tests don't need a database or HA server — the parser is a pure
function from ``ha_event_dict`` to :class:`ServiceEvent`.
"""
from __future__ import annotations
from notify_bridge_core.models.events import EventType
from notify_bridge_core.providers.base import ServiceProviderType
from notify_bridge_core.providers.home_assistant.event_parser import parse_event
def _ha_event_envelope(event_type: str, data: dict) -> dict:
return {
"event_type": event_type,
"data": data,
"time_fired": "2026-05-13T12:34:56.789Z",
}
def test_state_changed_basic() -> None:
payload = _ha_event_envelope(
"state_changed",
{
"entity_id": "binary_sensor.front_door",
"old_state": {"state": "off", "attributes": {}},
"new_state": {
"state": "on",
"attributes": {
"friendly_name": "Front Door",
"device_class": "door",
},
},
},
)
evt = parse_event(payload, provider_name="HA")
assert evt is not None
assert evt.event_type is EventType.HA_STATE_CHANGED
assert evt.provider_type is ServiceProviderType.HOME_ASSISTANT
assert evt.collection_id == "binary_sensor.front_door"
assert evt.collection_name == "Front Door"
assert evt.extra["old_state"] == "off"
assert evt.extra["new_state"] == "on"
assert evt.extra["domain"] == "binary_sensor"
assert evt.extra["device_class"] == "door"
# Area was not provided in lookup -> None.
assert evt.extra["area"] is None
def test_state_changed_with_area_lookup() -> None:
payload = _ha_event_envelope(
"state_changed",
{
"entity_id": "light.kitchen",
"old_state": {"state": "off", "attributes": {}},
"new_state": {
"state": "on",
"attributes": {"friendly_name": "Kitchen Light"},
},
},
)
evt = parse_event(
payload,
provider_name="HA",
area_lookup={"light.kitchen": "Kitchen"},
)
assert evt is not None
assert evt.extra["area"] == "Kitchen"
def test_state_changed_entity_removed() -> None:
"""new_state=None means HA removed the entity. Surface as 'removed' so
templates can branch on it; collection_name falls back to old_state."""
payload = _ha_event_envelope(
"state_changed",
{
"entity_id": "sensor.dropped",
"old_state": {
"state": "online",
"attributes": {"friendly_name": "Dropped Sensor"},
},
"new_state": None,
},
)
evt = parse_event(payload, provider_name="HA")
assert evt is not None
assert evt.extra["new_state"] == "removed"
assert evt.collection_name == "Dropped Sensor"
def test_automation_triggered() -> None:
payload = _ha_event_envelope(
"automation_triggered",
{
"name": "Front door notification",
"entity_id": "automation.front_door_notify",
"source": "state of binary_sensor.front_door",
},
)
evt = parse_event(payload, provider_name="HA")
assert evt is not None
assert evt.event_type is EventType.HA_AUTOMATION_TRIGGERED
assert evt.collection_name == "Front door notification"
assert evt.collection_id == "automation.front_door_notify"
assert evt.extra["automation_name"] == "Front door notification"
assert evt.extra["trigger_source"] == "state of binary_sensor.front_door"
def test_call_service_with_target() -> None:
payload = _ha_event_envelope(
"call_service",
{
"domain": "light",
"service": "turn_on",
"service_data": {"entity_id": "light.kitchen"},
},
)
evt = parse_event(payload, provider_name="HA")
assert evt is not None
assert evt.event_type is EventType.HA_SERVICE_CALLED
assert evt.collection_id == "light.turn_on"
assert evt.extra["target_entity"] == "light.kitchen"
assert evt.extra["service_domain"] == "light"
assert evt.extra["service_name"] == "turn_on"
def test_call_service_with_multi_target() -> None:
"""When the call hits multiple entities, the parser comma-joins them
so templates can render ``{{ target_entity }}`` without iterating."""
payload = _ha_event_envelope(
"call_service",
{
"domain": "light",
"service": "turn_off",
"service_data": {
"entity_id": ["light.kitchen", "light.living_room"],
},
},
)
evt = parse_event(payload, provider_name="HA")
assert evt is not None
assert evt.extra["target_entity"] == "light.kitchen, light.living_room"
def test_generic_event_fallback() -> None:
"""Any event_type not in the known set becomes ha_event_fired with the
raw event_type stashed in extras so loud catch-all subscriptions work."""
payload = _ha_event_envelope(
"custom_event_xyz",
{"foo": "bar"},
)
evt = parse_event(payload, provider_name="HA")
assert evt is not None
assert evt.event_type is EventType.HA_EVENT_FIRED
assert evt.extra["ha_event_type"] == "custom_event_xyz"
assert evt.extra["event_data"] == {"foo": "bar"}
def test_malformed_payload_returns_none() -> None:
assert parse_event({}, provider_name="HA") is None
assert parse_event("not a dict", provider_name="HA") is None # type: ignore[arg-type]
# state_changed without entity_id is unrecoverable
bad = _ha_event_envelope("state_changed", {"new_state": None})
assert parse_event(bad, provider_name="HA") is None
# call_service without domain/service is unrecoverable
bad2 = _ha_event_envelope("call_service", {"service": "turn_on"})
assert parse_event(bad2, provider_name="HA") is None
def test_time_fired_iso_with_z_suffix_parses() -> None:
"""HA uses ``Z`` suffix; older Python ``fromisoformat`` rejects it.
The parser must handle both forms or we'd lose the timestamp."""
from datetime import timezone
payload = _ha_event_envelope(
"state_changed",
{
"entity_id": "sensor.temp",
"old_state": {"state": "20", "attributes": {}},
"new_state": {"state": "21", "attributes": {}},
},
)
payload["time_fired"] = "2026-05-13T12:34:56.789Z"
evt = parse_event(payload, provider_name="HA")
assert evt is not None
assert evt.timestamp.tzinfo is not None
assert evt.timestamp.utcoffset() == timezone.utc.utcoffset(None)
@@ -0,0 +1,193 @@
"""Tests for the HA WS session helper and slice-before-normalize path.
The reviewer flagged two perf-shaped concerns that we've now addressed:
1. ``/status`` and ``/areas`` previously opened 3 and 2 separate WS
connections respectively. With ``HomeAssistantSession`` they share one
socket — these tests pin the contract.
2. ``/entities`` used to normalize every matching entity before slicing to
``count``. For HA installs with 1000+ entities this materialized 1000+
normalized dicts to throw most away. The optimization moves the slice
*before* normalize; this test exercises a 200-entity fixture and
verifies only the ``count`` survivors get normalized.
"""
from __future__ import annotations
import asyncio
from typing import Any
from unittest.mock import patch
import pytest
from notify_bridge_core.providers.home_assistant.client import HomeAssistantSession
from notify_bridge_server.commands import home_assistant_handler as handler
# ---------------------------------------------------------------------------
# Session class — surface contract
# ---------------------------------------------------------------------------
def test_session_class_has_expected_methods() -> None:
"""Anyone consuming ``HomeAssistantSession`` can rely on this surface."""
expected = {"send", "get_states", "get_area_registry", "get_entity_registry"}
actual = {name for name in dir(HomeAssistantSession) if not name.startswith("_")}
assert expected <= actual, f"missing: {expected - actual}"
@pytest.mark.asyncio
async def test_session_get_states_routes_through_send() -> None:
"""``get_states`` is a thin wrapper around ``send`` with the canonical payload."""
sent: list[dict[str, Any]] = []
class _FakeClient:
async def _send_command(self, ws: Any, payload: dict[str, Any]) -> int:
sent.append(payload)
return 1
async def _await_result(self, ws: Any, msg_id: int, timeout: float = 15.0) -> Any:
return [{"entity_id": "light.kitchen", "state": "on", "attributes": {}}]
sess = HomeAssistantSession(_FakeClient(), ws=object()) # type: ignore[arg-type]
result = await sess.get_states()
assert sent == [{"type": "get_states"}]
assert result == [{"entity_id": "light.kitchen", "state": "on", "attributes": {}}]
@pytest.mark.asyncio
async def test_session_methods_use_distinct_payloads() -> None:
"""Each session-scoped method sends the right HA command name."""
sent: list[dict[str, Any]] = []
class _FakeClient:
async def _send_command(self, ws: Any, payload: dict[str, Any]) -> int:
sent.append(payload)
return len(sent)
async def _await_result(self, ws: Any, msg_id: int, timeout: float = 15.0) -> Any:
return []
sess = HomeAssistantSession(_FakeClient(), ws=object()) # type: ignore[arg-type]
await sess.get_states()
await sess.get_area_registry()
await sess.get_entity_registry()
assert [p["type"] for p in sent] == [
"get_states",
"config/area_registry/list",
"config/entity_registry/list",
]
# ---------------------------------------------------------------------------
# slice-before-normalize — perf contract for /entities
# ---------------------------------------------------------------------------
class _FakeAsyncSession:
"""A fake HA session that returns a canned state list."""
def __init__(self, states: list[dict[str, Any]]) -> None:
self._states = states
async def get_states(self) -> list[dict[str, Any]]:
return self._states
class _FakeClient:
"""A fake client whose ``session()`` yields a ``_FakeAsyncSession``."""
def __init__(self, states: list[dict[str, Any]]) -> None:
self._states = states
def session(self): # noqa: D401 — mimics real client signature
states = self._states
class _CM:
async def __aenter__(self_inner):
return _FakeAsyncSession(states)
async def __aexit__(self_inner, *_exc):
return False
return _CM()
def _state_row(entity_id: str, n_attrs: int = 2) -> dict[str, Any]:
return {
"entity_id": entity_id,
"state": "on",
"attributes": {f"attr_{i}": i for i in range(n_attrs)},
}
@pytest.mark.asyncio
async def test_cmd_entities_slices_before_normalizing(monkeypatch: pytest.MonkeyPatch) -> None:
"""200 raw entities, count=10. Normalize must run only 10 times.
We instrument ``_normalize_state`` with a counter to prove the slice
happens before the per-row transform. The total field still reports
all 200 so the user knows the result is truncated.
"""
states = [_state_row(f"light.bulb_{i:03d}") for i in range(200)]
fake_client = _FakeClient(states)
monkeypatch.setattr(handler, "_make_ws_client", lambda provider, session: fake_client)
calls = {"count": 0}
real_normalize = handler._normalize_state
def _counting_normalize(row: dict[str, Any]) -> dict[str, Any]:
calls["count"] += 1
return real_normalize(row)
monkeypatch.setattr(handler, "_normalize_state", _counting_normalize)
# ``get_http_session`` opens a real aiohttp session in the bg; bypass
# it since our fake client never uses the session arg.
async def _fake_http_session() -> Any:
return None
monkeypatch.setattr(handler, "get_http_session", _fake_http_session)
provider = type("FakeProvider", (), {"config": {}, "name": "HA"})()
result = await handler._cmd_entities(provider, args="", count=10)
assert result["total"] == 200
assert result["shown"] == 10
assert len(result["entities"]) == 10
assert calls["count"] == 10, (
f"normalize should run once per survivor; ran {calls['count']} times"
)
@pytest.mark.asyncio
async def test_cmd_entities_glob_filter_still_normalizes_only_survivors(monkeypatch: pytest.MonkeyPatch) -> None:
"""200 raw entities mixed across 2 domains; glob narrows to one.
Normalize count = min(count, matching_total). Demonstrates the
optimization composes with the filter step.
"""
states = [
_state_row(f"light.bulb_{i:03d}") for i in range(100)
] + [
_state_row(f"sensor.temp_{i:03d}") for i in range(100)
]
fake_client = _FakeClient(states)
monkeypatch.setattr(handler, "_make_ws_client", lambda provider, session: fake_client)
calls = {"count": 0}
real_normalize = handler._normalize_state
def _counting_normalize(row: dict[str, Any]) -> dict[str, Any]:
calls["count"] += 1
return real_normalize(row)
monkeypatch.setattr(handler, "_normalize_state", _counting_normalize)
async def _fake_http_session() -> Any:
return None
monkeypatch.setattr(handler, "get_http_session", _fake_http_session)
provider = type("FakeProvider", (), {"config": {}, "name": "HA"})()
result = await handler._cmd_entities(provider, args="light.*", count=5)
assert result["total"] == 100 # all light.* entities counted
assert result["shown"] == 5 # but only 5 normalized
assert calls["count"] == 5
assert all(e["entity_id"].startswith("light.") for e in result["entities"])