Replace TTL with thumbhash-based cache validation and add Telegram video size limits
Some checks failed
Validate / Hassfest (push) Has been cancelled

- Asset cache now validates entries by comparing stored thumbhash with current
  Immich thumbhash instead of using TTL expiration. This makes cache invalidation
  precise (only when content actually changes) and eliminates unnecessary re-uploads.
  URL-based cache retains TTL for non-Immich URLs.
- Add TELEGRAM_MAX_VIDEO_SIZE (50 MB) check to skip oversized videos in both
  single-video and media-group paths, preventing entire groups from failing.
- Split media groups into sub-groups by cumulative upload size to ensure each
  sendMediaGroup request stays under Telegram's 50 MB upload limit.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 12:28:33 +03:00
parent 65ca81a3f3
commit dd7032b411
9 changed files with 637 additions and 257 deletions

View File

@@ -73,40 +73,53 @@ class TelegramFileCache:
When a file is uploaded to Telegram, it returns a file_id that can be reused
to send the same file without re-uploading. This cache stores these file_ids
keyed by the source URL.
keyed by the source URL or asset ID.
Supports two validation modes:
- TTL mode (default): entries expire after a configured time-to-live
- Thumbhash mode: entries are validated by comparing stored thumbhash with
the current asset thumbhash from Immich
"""
def __init__(
self,
hass: HomeAssistant,
album_id: str,
entry_id: str,
ttl_seconds: int = DEFAULT_TELEGRAM_CACHE_TTL,
use_thumbhash: bool = False,
) -> None:
"""Initialize the Telegram file cache.
Args:
hass: Home Assistant instance
album_id: Album ID for scoping the cache
ttl_seconds: Time-to-live for cache entries in seconds (default: 48 hours)
entry_id: Config entry ID for scoping the cache (per hub)
ttl_seconds: Time-to-live for cache entries in seconds (TTL mode only)
use_thumbhash: Use thumbhash-based validation instead of TTL
"""
self._store: Store[dict[str, Any]] = Store(
hass, STORAGE_VERSION, f"{STORAGE_KEY_PREFIX}.telegram_cache.{album_id}"
hass, STORAGE_VERSION, f"{STORAGE_KEY_PREFIX}.telegram_cache.{entry_id}"
)
self._data: dict[str, Any] | None = None
self._ttl_seconds = ttl_seconds
self._use_thumbhash = use_thumbhash
async def async_load(self) -> None:
"""Load cache data from storage."""
self._data = await self._store.async_load() or {"files": {}}
# Clean up expired entries on load
# Clean up expired entries on load (TTL mode only)
await self._cleanup_expired()
mode = "thumbhash" if self._use_thumbhash else "TTL"
_LOGGER.debug(
"Loaded Telegram file cache with %d entries",
"Loaded Telegram file cache with %d entries (mode: %s)",
len(self._data.get("files", {})),
mode,
)
async def _cleanup_expired(self) -> None:
"""Remove expired cache entries."""
"""Remove expired cache entries (TTL mode only)."""
if self._use_thumbhash:
return
if not self._data or "files" not in self._data:
return
@@ -127,53 +140,75 @@ class TelegramFileCache:
await self._store.async_save(self._data)
_LOGGER.debug("Cleaned up %d expired Telegram cache entries", len(expired_keys))
def get(self, url: str) -> dict[str, Any] | None:
"""Get cached file_id for a URL.
def get(self, key: str, thumbhash: str | None = None) -> dict[str, Any] | None:
"""Get cached file_id for a key.
Args:
url: The source URL of the media
key: The cache key (URL or asset ID)
thumbhash: Current thumbhash for validation (thumbhash mode only).
If provided, compares with stored thumbhash. Mismatch = cache miss.
Returns:
Dict with 'file_id' and 'type' if cached and not expired, None otherwise
Dict with 'file_id' and 'type' if cached and valid, None otherwise
"""
if not self._data or "files" not in self._data:
return None
entry = self._data["files"].get(url)
entry = self._data["files"].get(key)
if not entry:
return None
# Check if expired
cached_at_str = entry.get("cached_at")
if cached_at_str:
cached_at = datetime.fromisoformat(cached_at_str)
age_seconds = (datetime.now(timezone.utc) - cached_at).total_seconds()
if age_seconds > self._ttl_seconds:
return None
if self._use_thumbhash:
# Thumbhash-based validation
if thumbhash is not None:
stored_thumbhash = entry.get("thumbhash")
if stored_thumbhash and stored_thumbhash != thumbhash:
_LOGGER.debug(
"Cache miss for %s: thumbhash changed",
key[:36],
)
return None
# If no thumbhash provided (asset not in monitored album),
# return cached entry anyway — self-heals on Telegram rejection
else:
# TTL-based validation
cached_at_str = entry.get("cached_at")
if cached_at_str:
cached_at = datetime.fromisoformat(cached_at_str)
age_seconds = (datetime.now(timezone.utc) - cached_at).total_seconds()
if age_seconds > self._ttl_seconds:
return None
return {
"file_id": entry.get("file_id"),
"type": entry.get("type"),
}
async def async_set(self, url: str, file_id: str, media_type: str) -> None:
"""Store a file_id for a URL.
async def async_set(
self, key: str, file_id: str, media_type: str, thumbhash: str | None = None
) -> None:
"""Store a file_id for a key.
Args:
url: The source URL of the media
key: The cache key (URL or asset ID)
file_id: The Telegram file_id
media_type: The type of media ('photo', 'video', 'document')
thumbhash: Current thumbhash to store alongside file_id (thumbhash mode only)
"""
if self._data is None:
self._data = {"files": {}}
self._data["files"][url] = {
entry_data: dict[str, Any] = {
"file_id": file_id,
"type": media_type,
"cached_at": datetime.now(timezone.utc).isoformat(),
}
if thumbhash is not None:
entry_data["thumbhash"] = thumbhash
self._data["files"][key] = entry_data
await self._store.async_save(self._data)
_LOGGER.debug("Cached Telegram file_id for URL (type: %s)", media_type)
_LOGGER.debug("Cached Telegram file_id for key (type: %s)", media_type)
async def async_remove(self) -> None:
"""Remove all cache data."""