From 45f93fd30ebe0e2ababe5f1d22c9c07b638b1755 Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Tue, 21 Apr 2026 17:45:21 +0300 Subject: [PATCH] fix(devices): SP110E vendor handshake + Windows/bleak robustness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SP110E peripherals silently tear down the GATT link ~1s after connect unless a two-write vendor handshake (01 00 → FFE2, 01 B7 E3 D5 → FFE1) arrives immediately. Without it the first real write hangs 30s then reconnect-loops forever. Adds optional BLEProtocol.init_writes executed on connect, plumbs a per-write char_uuid through both transports, and fixes the SP110E color/power frames from an incorrect 5 bytes to the documented 4 bytes. Windows/WinRT robustness: - asyncio.wait_for hangs on bleak because WinRT IAsyncOperations refuse to cancel. _bounded_await() uses asyncio.wait() instead so timeouts actually return control even when the inner task is uncancellable. - BleakClient connect by raw MAC string times out when WinRT guesses address type wrong; switched to pre-scanning with BleakScanner and passing the resolved BLEDevice, which carries the address type. - Target-start fetch timeout bumped to 30s with retry disabled so the UI doesn't abort during the BLE pre-scan + connect + handshake path. UI: - Settings modal exposes Protocol Family (IconSelect grid, shared with add-device via parameterized ensureBleFamilyIconSelect) so users can fix a wrong family pick without recreating the device. Govee AES key row toggles on/off with family selection. Also turns LAN auth back on in default_config.yaml, logs start_processing requests on entry for easier diagnosis, and captures the full debug trail in docs/BLE_LED_CONTROLLERS.md for future BLE work. Refs the mbullington SP110E protocol gist for the handshake bytes. --- docs/BLE_LED_CONTROLLERS.md | 109 +++++++++++++++++ server/config/default_config.yaml | 4 +- .../api/routes/output_targets_control.py | 1 + .../core/devices/android_ble_transport.py | 21 +++- server/src/ledgrab/core/devices/ble_client.py | 25 ++++ .../core/devices/ble_protocols/__init__.py | 7 ++ .../core/devices/ble_protocols/sp110e.py | 25 +++- .../src/ledgrab/core/devices/ble_transport.py | 110 ++++++++++++++++-- .../static/js/features/device-discovery.ts | 53 ++++++--- .../src/ledgrab/static/js/features/devices.ts | 44 ++++++- .../src/ledgrab/static/js/features/targets.ts | 7 ++ .../templates/modals/device-settings.html | 25 ++++ server/tests/test_ble_client.py | 48 ++++++-- server/tests/test_ble_protocols.py | 24 ++-- 14 files changed, 448 insertions(+), 55 deletions(-) create mode 100644 docs/BLE_LED_CONTROLLERS.md diff --git a/docs/BLE_LED_CONTROLLERS.md b/docs/BLE_LED_CONTROLLERS.md new file mode 100644 index 0000000..ad1eab7 --- /dev/null +++ b/docs/BLE_LED_CONTROLLERS.md @@ -0,0 +1,109 @@ +# BLE LED Controllers — Investigation & Implementation Notes + +Reference for anyone touching the BLE device provider (`server/src/ledgrab/core/devices/ble_*`). Captures the protocol quirks, Windows/bleak traps, and hardware lockdown we hit while bringing up SP110E / Triones / Zengge / Govee support. + +## Architecture + +``` +BLEDeviceProvider → BLEClient → BLETransport (desktop: bleak, Android: Kotlin BleBridge via Chaquopy) + │ + └─ BLEProtocol (family-specific wire bytes: sp110e.py, triones.py, zengge.py, govee.py) +``` + +- One `BLEProtocol` dataclass per controller family. Each supplies GATT UUIDs, write type (with/without response), `encode_color` / `encode_power` functions, name prefixes for discovery, and an optional `init_writes` handshake sequence. +- `BLEClient` is whole-strip only. `send_pixels()` averages incoming pixel arrays and emits one solid color per frame — none of these protocols support per-pixel streaming. +- Discovery auto-detects the family via advertised name prefix first, falls back to service UUID matching. The detected family is returned on `DiscoveredDevice.ble_family` and preselected in the UI. +- The settings modal lets users change the family after creation — wrong family → writes go to a characteristic the device ignores → strip stays dark. + +## Protocol Quirks + +### SP110E / SP108E (critical handshake) + +The controller **silently tears the GATT link down within ~1 second of connect** unless a two-write handshake arrives immediately: + +``` +Write 01 00 → characteristic FFE2 +Write 01 B7 E3 D5 → characteristic FFE1 +``` + +Without this, the first real write later hangs for 30 s because bleak thinks the link is up but the peripheral has already dropped it. We carry these writes in `PROTOCOL.init_writes` and execute them from `BLEClient.connect()` right after GATT open. + +Color frame is **4 bytes** (`RR GG BB 1E`), not 5 — the earlier implementation had a stray `0x00` padding byte that the device tolerated but isn't documented. + +Source: [mbullington's reverse-engineering gist](https://gist.github.com/mbullington/37957501a07ad065b67d4e8d39bfe012). + +### Triones / Zengge / Govee + +No init handshake required. Color frames and command bytes documented inline in each protocol module. Notable: Zengge and SP110E share service UUID `FFE0/FFE1`, so name-based identification is the only reliable way to tell them apart. In `_register_builtins()`, SP110E is registered first so it wins the `identify_family_by_service_uuids` tie by default — change this if the user base flips. + +## bleak + Windows WinRT Traps + +These bit us hard. All are now worked around, but future BLE work should keep them in mind. + +### 1. `asyncio.wait_for` hangs forever on WinRT + +`BleakClient.connect()` / `write_gatt_char()` wrap WinRT `IAsyncOperation`s. When asyncio tries to cancel them (as `wait_for` does on timeout), the WinRT task **never finishes cancelling**, so `wait_for` itself blocks forever while awaiting the cancellation. Symptom: log stops with no timeout error, process is alive but wedged. + +**Fix**: `_bounded_await()` in [ble_transport.py](../server/src/ledgrab/core/devices/ble_transport.py) uses `asyncio.wait()` instead, which returns on timeout without awaiting pending tasks. Orphans the hanging WinRT task but frees the caller. + +### 2. Connect by raw MAC string fails on Windows + +Passing `BleakClient("AA:BB:CC:DD:EE:FF")` makes WinRT guess the address type (public vs random static vs random resolvable). Guesses wrong → connect silently times out. Symptom: `TimeoutError: BLE connect to ... exceeded 10.0s` with no other signal. + +**Fix**: Always pre-scan with `BleakScanner.find_device_by_address()` and pass the returned `BLEDevice` object to `BleakClient`. Costs ~400 ms but makes connect reliable. + +### 3. Client-side fetch timeout too short for BLE target start + +The target-start endpoint does a ~5 s pre-scan + up to 10 s GATT connect + init handshake. Default `fetchWithAuth` has a 10 s timeout and 3× retry, so the UI was aborting and retrying concurrent `/start` requests into the server. + +**Fix**: `startTargetProcessing` overrides `timeout: 30000, retry: false`. + +### 4. `Start-Process -WindowStyle Hidden` from bash/WSL strips handles + +When `restart.ps1` is invoked from Git-Bash / WSL, `Start-Process` inherited handles cause the child uvicorn to exit immediately. Stream redirection fixes it. + +**Fix**: `restart.ps1` always uses `-RedirectStandardOutput`/`-RedirectStandardError` to a temp log. Failed startups dump the stderr tail to the caller so root cause is visible. + +## Vendor Lockdown (the dead end) + +Some controllers — notably the one we tested, advertising as `AlexTable` at `16:61:05:70:68:44` — **only accept connections from the vendor phone app**. Diagnostic sequence: + +| Test | Result | Meaning | +| --- | --- | --- | +| LedGrab `BleakClient.connect()` | 10 s timeout | Windows can't connect | +| Windows "Bluetooth LE Explorer" | Hangs on connect | Same Windows stack as bleak — not our bug | +| Phone **OS** Bluetooth Settings | Can't connect | Phone OS uses generic BLE stack — also fails | +| Phone **LED Hue** app | Connects fine | Vendor app is the *only* working client | + +At this point, further Windows/bleak tweaks have no effect. The peripheral firmware rejects generic GATT connects and only stays connected when the LED Hue app emits its vendor-specific handshake. To unlock such a controller from LedGrab you'd need to: + +1. Enable **Developer Options → Bluetooth HCI snoop log** on Android. +2. Reproduce the LED Hue flow (connect → color change → disconnect). +3. `adb bugreport bugreport.zip`; extract `btsnoop_hci.log`. +4. Open in Wireshark; identify the vendor handshake bytes written during connect. +5. Add them to the protocol's `init_writes`. + +Alternatively, replace the BLE controller hardware with **WLED on ESP32** — $3, fully supported, vastly more capable. + +## Frontend + +- BLE family picker uses the project's shared `IconSelect` grid (project rule — see [CLAUDE.md](../CLAUDE.md): "NEVER use plain HTML ` change fires when IconSelect picks a value, - // which is what toggles the Govee key field. - sel.addEventListener('change', _updateBleGoveeKeyVisibility); +export function ensureBleFamilyIconSelect(selectId: string, onChange?: () => void): any { + const sel = document.getElementById(selectId) as HTMLSelectElement | null; + if (!sel) return null; + if (_bleFamilyIconSelects[selectId]) { + _bleFamilyIconSelects[selectId].updateItems(_buildBleFamilyItems()); + return _bleFamilyIconSelects[selectId]; } + _bleFamilyIconSelects[selectId] = new IconSelect({ + target: sel, + items: _buildBleFamilyItems(), + columns: 2, + } as any); + if (onChange) { + sel.addEventListener('change', onChange); + } + return _bleFamilyIconSelects[selectId]; +} + +// Thin wrappers used by the add-device modal. +function _destroyBleFamilyIconSelect() { + destroyBleFamilyIconSelect('device-ble-family'); +} + +function _ensureBleFamilyIconSelect() { + ensureBleFamilyIconSelect('device-ble-family', _updateBleGoveeKeyVisibility); _updateBleGoveeKeyVisibility(); } diff --git a/server/src/ledgrab/static/js/features/devices.ts b/server/src/ledgrab/static/js/features/devices.ts index e190183..13bc958 100644 --- a/server/src/ledgrab/static/js/features/devices.ts +++ b/server/src/ledgrab/static/js/features/devices.ts @@ -6,9 +6,9 @@ import { _deviceBrightnessCache, updateDeviceBrightness, csptCache, } from '../core/state.ts'; -import { API_BASE, getHeaders, fetchWithAuth, escapeHtml, isSerialDevice, isMockDevice, isMqttDevice, isWsDevice, isOpenrgbDevice, isDmxDevice, isGroupDevice } from '../core/api.ts'; +import { API_BASE, getHeaders, fetchWithAuth, escapeHtml, isSerialDevice, isMockDevice, isMqttDevice, isWsDevice, isOpenrgbDevice, isDmxDevice, isBleDevice, isGroupDevice } from '../core/api.ts'; import { devicesCache } from '../core/state.ts'; -import { _fetchOpenrgbZones, _getCheckedZones, _splitOpenrgbZone, _getZoneMode, ensureDmxProtocolIconSelect, destroyDmxProtocolIconSelect, ensureSpiLedTypeIconSelect, destroySpiLedTypeIconSelect, ensureGameSenseDeviceTypeIconSelect, destroyGameSenseDeviceTypeIconSelect, addGroupChildSettingsWithId as _addGroupChildSettingsWithId, ensureGroupModeIconSelect, destroyGroupModeIconSelect } from './device-discovery.ts'; +import { _fetchOpenrgbZones, _getCheckedZones, _splitOpenrgbZone, _getZoneMode, ensureDmxProtocolIconSelect, destroyDmxProtocolIconSelect, ensureSpiLedTypeIconSelect, destroySpiLedTypeIconSelect, ensureGameSenseDeviceTypeIconSelect, destroyGameSenseDeviceTypeIconSelect, addGroupChildSettingsWithId as _addGroupChildSettingsWithId, ensureGroupModeIconSelect, destroyGroupModeIconSelect, ensureBleFamilyIconSelect, destroyBleFamilyIconSelect } from './device-discovery.ts'; import { t } from '../core/i18n.ts'; import { showToast, showConfirm, desktopFocus } from '../core/ui.ts'; import { Modal } from '../core/modal.ts'; @@ -66,6 +66,8 @@ class DeviceSettingsModal extends Modal { dmxProtocol: (document.getElementById('settings-dmx-protocol') as HTMLSelectElement | null)?.value || 'artnet', dmxStartUniverse: (document.getElementById('settings-dmx-start-universe') as HTMLInputElement | null)?.value || '0', dmxStartChannel: (document.getElementById('settings-dmx-start-channel') as HTMLInputElement | null)?.value || '1', + bleFamily: (document.getElementById('settings-ble-family') as HTMLSelectElement | null)?.value || '', + bleGoveeKey: (document.getElementById('settings-ble-govee-key') as HTMLInputElement | null)?.value || '', csptId: (document.getElementById('settings-css-processing-template') as HTMLSelectElement | null)?.value || '', }; } @@ -443,6 +445,37 @@ export async function showSettings(deviceId: any) { if (dmxStartChannelGroup) (dmxStartChannelGroup as HTMLElement).style.display = 'none'; } + // BLE-specific fields — exposed in the settings modal so the user + // can fix a wrong protocol family pick without deleting+recreating + // the device. Uses the shared IconSelect grid (project rule bans + // plain + + + +