"""Incoming webhook handlers for webhook-based providers (Gitea, etc.).""" from __future__ import annotations import hashlib import hmac import json import logging from typing import Any from fastapi import APIRouter, HTTPException, Request from sqlmodel import select from sqlmodel.ext.asyncio.session import AsyncSession from ..auth.routes import limiter from notify_bridge_core.models.events import ServiceEvent from notify_bridge_core.providers.gitea.event_parser import parse_webhook as parse_gitea_webhook from notify_bridge_core.providers.planka.event_parser import parse_webhook as parse_planka_webhook from notify_bridge_core.providers.webhook.event_parser import parse_webhook as parse_generic_webhook from ..database.engine import get_engine from sqlalchemy import delete as sa_delete, func from ..database.models import ( EventLog, NotificationTracker, ServiceProvider, WebhookPayloadLog, ) from ..services.event_dispatch import dispatch_provider_event _LOGGER = logging.getLogger(__name__) router = APIRouter(prefix="/api/webhooks", tags=["webhooks"]) # Hard cap on inbound webhook body size (1 MiB is far larger than anything # legitimate providers send and keeps the worst-case memory footprint bounded # when a malicious peer lies about Content-Length or streams slowly). _MAX_WEBHOOK_BODY_BYTES = 1_000_000 async def _read_bounded_body(request: Request, limit: int = _MAX_WEBHOOK_BODY_BYTES) -> bytes: """Reject oversized inbound bodies before they exhaust memory. First checks ``Content-Length`` (fast-path for honest peers), then streams the body in chunks enforcing the same cap on actual bytes received so a peer that lies about Content-Length cannot slip through. """ declared = request.headers.get("content-length") if declared: try: if int(declared) > limit: raise HTTPException( status_code=413, detail=f"Payload too large (max {limit} bytes)", ) except ValueError: raise HTTPException(status_code=400, detail="Invalid Content-Length") chunks: list[bytes] = [] size = 0 async for chunk in request.stream(): size += len(chunk) if size > limit: raise HTTPException( status_code=413, detail=f"Payload too large (max {limit} bytes)", ) chunks.append(chunk) return b"".join(chunks) async def _get_provider_by_token( session: AsyncSession, token: str, expected_type: str, ) -> ServiceProvider: """Look up a provider by its webhook_token and expected type.""" result = await session.exec( select(ServiceProvider).where( ServiceProvider.webhook_token == token, ServiceProvider.type == expected_type, ) ) provider = result.first() if not provider: raise HTTPException(status_code=404, detail="Provider not found") return provider # --------------------------------------------------------------------------- # HMAC-SHA256 validation # --------------------------------------------------------------------------- def _verify_gitea_signature(secret: str, body: bytes, signature: str) -> bool: """Verify Gitea X-Gitea-Signature HMAC-SHA256.""" expected = hmac.new(secret.encode(), body, hashlib.sha256).hexdigest() return hmac.compare_digest(expected, signature) # --------------------------------------------------------------------------- # Filter helpers # --------------------------------------------------------------------------- def _passes_filters( event: ServiceEvent, filters: dict[str, Any], ) -> bool: """Check if an event passes the tracker's filters.""" # Collection filter (repo full_name for Gitea) collections = filters.get("collections", []) if collections and event.collection_id not in collections: return False sender = event.extra.get("sender", "") # Sender allowlist senders = filters.get("senders", []) if senders and sender not in senders: return False # Sender blocklist exclude_senders = filters.get("exclude_senders", []) if exclude_senders and sender in exclude_senders: return False return True # --------------------------------------------------------------------------- # Shared dispatch helper (legacy wrapper — body moved to services/event_dispatch.py) # --------------------------------------------------------------------------- async def _dispatch_webhook_event( engine: Any, provider_id: int, provider_name: str, provider_config: dict[str, Any], event: ServiceEvent, detail_keys: tuple[str, ...], ) -> int: """Webhook-flavoured dispatch — thin wrapper over ``dispatch_provider_event``.""" return await dispatch_provider_event( engine=engine, provider_id=provider_id, provider_name=provider_name, provider_config=provider_config, event=event, detail_keys=detail_keys, filter_fn=_passes_filters, ) # --------------------------------------------------------------------------- # Gitea webhook endpoint # --------------------------------------------------------------------------- @router.post("/gitea/{token}") async def gitea_webhook(token: str, request: Request): """Receive a Gitea webhook, parse it, filter, and dispatch notifications.""" engine = get_engine() # --- Load provider and validate signature --- async with AsyncSession(engine) as session: provider = await _get_provider_by_token(session, token, "gitea") webhook_secret = (provider.config or {}).get("webhook_secret", "") # Bail BEFORE reading the body if either the provider is missing a # secret (admin misconfiguration) or the inbound request has no # signature header. Either way the request can never authenticate, # so there's no reason to spend the 1 MiB body read first. if not webhook_secret: raise HTTPException( status_code=403, detail="Webhook secret not configured on this provider", ) signature = request.headers.get("X-Gitea-Signature", "") if not signature: raise HTTPException(status_code=403, detail="Invalid signature") # Body needed for the HMAC check — reads at most _MAX_WEBHOOK_BODY_BYTES. raw_body = await _read_bounded_body(request) if not _verify_gitea_signature(webhook_secret, raw_body, signature): raise HTTPException(status_code=403, detail="Invalid signature") # Parse event header + payload event_header = request.headers.get("X-Gitea-Event", "") if not event_header: return {"ok": True, "skipped": "no event header"} try: payload = json.loads(raw_body.decode("utf-8")) except (UnicodeDecodeError, json.JSONDecodeError, ValueError): raise HTTPException(status_code=400, detail="Invalid JSON") event = parse_gitea_webhook(event_header, payload, provider.name) if event is None: return {"ok": True, "skipped": "unmapped event"} # --- Dispatch --- dispatched = await _dispatch_webhook_event( engine=engine, provider_id=provider.id, provider_name=provider.name, provider_config=provider.config or {}, event=event, detail_keys=( "sender", "branch", "commit_count", "issue_number", "issue_title", "pr_number", "pr_title", "release_tag", "release_name", ), ) return {"ok": True, "dispatched": dispatched} # --------------------------------------------------------------------------- # Planka webhook endpoint # --------------------------------------------------------------------------- def _verify_planka_token(expected_token: str, request: Request) -> bool: """Verify Planka webhook Bearer token.""" auth_header = request.headers.get("Authorization", "") if auth_header.startswith("Bearer "): token = auth_header[7:] return hmac.compare_digest(token, expected_token) return False @router.post("/planka/{token}") async def planka_webhook(token: str, request: Request): """Receive a Planka webhook, parse it, filter, and dispatch notifications.""" engine = get_engine() # --- Load provider and validate token --- async with AsyncSession(engine) as session: provider = await _get_provider_by_token(session, token, "planka") webhook_secret = (provider.config or {}).get("webhook_secret", "") if not webhook_secret: raise HTTPException( status_code=403, detail="Webhook secret not configured on this provider", ) if not _verify_planka_token(webhook_secret, request): raise HTTPException(status_code=403, detail="Invalid token") # Read body AFTER auth check so an attacker without the bearer token # can't force an unbounded read. Token is in the header, not the body. raw_body = await _read_bounded_body(request) # Parse payload from the bounded raw_body we already read. try: payload = json.loads(raw_body.decode("utf-8")) except (UnicodeDecodeError, json.JSONDecodeError, ValueError): raise HTTPException(status_code=400, detail="Invalid JSON") event_type = payload.get("type", "") if not event_type: return {"ok": True, "skipped": "no event type"} base_url = (provider.config or {}).get("url", "") event = parse_planka_webhook(event_type, payload, provider.name, base_url=base_url) if event is None: return {"ok": True, "skipped": "unmapped event"} # --- Dispatch --- dispatched = await _dispatch_webhook_event( engine=engine, provider_id=provider.id, provider_name=provider.name, provider_config=provider.config or {}, event=event, detail_keys=( "sender", "card_name", "board_name", "list_name", "old_list_name", "new_list_name", "comment_text", "task_name", "attachment_name", "label_name", ), ) return {"ok": True, "dispatched": dispatched} # --------------------------------------------------------------------------- # Generic Webhook endpoint # --------------------------------------------------------------------------- def _verify_generic_webhook_auth( config: dict[str, Any], request: Request, raw_body: bytes, ) -> bool: """Verify authentication for a generic webhook based on configured auth_mode.""" auth_mode = config.get("auth_mode", "none") if auth_mode == "none": return True secret = config.get("webhook_secret", "") if not secret: return False if auth_mode == "hmac_sha256": # Support common signature headers signature = ( request.headers.get("X-Hub-Signature-256", "") or request.headers.get("X-Webhook-Signature", "") or request.headers.get("X-Signature-256", "") ) # Strip "sha256=" prefix if present (GitHub-style) if signature.startswith("sha256="): signature = signature[7:] if not signature: return False expected = hmac.new(secret.encode(), raw_body, hashlib.sha256).hexdigest() return hmac.compare_digest(expected, signature) if auth_mode == "bearer_token": auth_header = request.headers.get("Authorization", "") if auth_header.startswith("Bearer "): token = auth_header[7:] return hmac.compare_digest(token, secret) return False return False _SENSITIVE_HEADER_SUBSTR = ( "token", "auth", "key", "secret", "signature", "password", "credential", "cookie", "x-api", "x-hub-signature", # Extended for per-key body redaction; harmless extras for header check. "oauth", "client_secret", "webhook_secret", "csrf", ) def _is_sensitive_header(name: str) -> bool: n = name.lower() return any(s in n for s in _SENSITIVE_HEADER_SUBSTR) _REDACTED_PLACEHOLDER = "[REDACTED]" def _redact_sensitive_body(value: object) -> object: """Walk a parsed JSON body and redact values for sensitive-named keys. Returns a defensively-copied structure so the caller's object is never mutated (callers downstream still consume the original). """ if isinstance(value, dict): cleaned: dict[str, object] = {} for k, v in value.items(): if isinstance(k, str) and _is_sensitive_header(k): cleaned[k] = _REDACTED_PLACEHOLDER else: cleaned[k] = _redact_sensitive_body(v) return cleaned if isinstance(value, list): return [_redact_sensitive_body(v) for v in value] return value def _filter_headers(raw_headers: dict[str, str]) -> dict[str, str]: """Keep only safe headers for logging (strip Authorization, signatures, tokens). Allowlist base set of known-safe headers, accept X-* only if they do not match any sensitive substring (token/auth/key/secret/signature/...). """ safe: dict[str, str] = {} for k, v in raw_headers.items(): kl = k.lower() if _is_sensitive_header(kl): continue if kl in ("content-type", "user-agent", "content-length", "accept") or kl.startswith("x-"): safe[k] = v return safe async def _save_webhook_log( session: AsyncSession, provider_id: int, method: str, headers: dict[str, str], body: dict[str, Any] | str, status: str, extracted_fields: dict[str, Any] | None = None, error_message: str = "", max_count: int = 20, ) -> None: """Insert a webhook payload log entry and prune old ones.""" try: body_json = body if isinstance(body, dict) else {} # Strip sensitive values before persistence — webhook payloads # routinely include OAuth tokens / secrets in the body, and the # log is admin-readable but not need-to-know for the operator. safe_body = _redact_sensitive_body(body_json) if body_json else {} session.add(WebhookPayloadLog( provider_id=provider_id, method=method, headers=headers, body=safe_body, status=status, extracted_fields=extracted_fields or {}, error_message=error_message, )) await session.flush() # Atomic prune: DELETE anything for this provider outside the newest # max_count rows. Avoids the COUNT -> SELECT -> DELETE race. keep_subq = ( select(WebhookPayloadLog.id) .where(WebhookPayloadLog.provider_id == provider_id) .order_by(WebhookPayloadLog.created_at.desc(), WebhookPayloadLog.id.desc()) .limit(max_count) .subquery() ) await session.execute( sa_delete(WebhookPayloadLog) .where(WebhookPayloadLog.provider_id == provider_id) .where(~WebhookPayloadLog.id.in_(select(keep_subq.c.id))) ) except Exception: _LOGGER.warning("Failed to save webhook payload log for provider %d", provider_id, exc_info=True) try: await session.rollback() except Exception: # noqa: BLE001 _LOGGER.exception("Rollback after payload-log save failed") @router.post("/webhook/{token}") @limiter.limit("60/minute") async def generic_webhook(token: str, request: Request): """Receive a generic webhook, extract variables via JSONPath, and dispatch notifications. Per-IP rate limit (60/min) caps blast radius from a single source — legitimate providers send well below this; anything higher is either a misconfigured retry loop or abuse. """ engine = get_engine() # --- Load provider and validate auth --- async with AsyncSession(engine) as session: provider = await _get_provider_by_token(session, token, "webhook") provider_id = provider.id provider_config = provider.config or {} provider_name = provider.name store_payloads = provider_config.get("store_payloads", True) max_stored = min(max(int(provider_config.get("max_stored_payloads", 20)), 1), 100) # Reject misconfigured providers (auth_mode requires a secret but none # set) BEFORE the 1 MiB body read. For non-HMAC modes we can also # verify the credential header up front; HMAC needs the body. auth_mode = provider_config.get("auth_mode", "none") if auth_mode in {"hmac_sha256", "bearer_token"} and not provider_config.get("webhook_secret"): raise HTTPException(status_code=403, detail="Authentication failed") if auth_mode == "bearer_token": auth_header = request.headers.get("Authorization", "") secret = provider_config.get("webhook_secret", "") if not auth_header.startswith("Bearer ") or not hmac.compare_digest(auth_header[7:], secret): raise HTTPException(status_code=403, detail="Authentication failed") raw_body = await _read_bounded_body(request) # Bounded read above already enforces the size cap; no need to re-check. if not _verify_generic_webhook_auth(provider_config, request, raw_body): raise HTTPException(status_code=403, detail="Authentication failed") safe_headers = _filter_headers(dict(request.headers)) # Parse JSON payload from the already-bounded raw_body (request.body() # has been consumed, so request.json() is no longer usable here). try: payload = json.loads(raw_body.decode("utf-8")) if not isinstance(payload, dict): raise ValueError("Payload must be a JSON object") except (UnicodeDecodeError, json.JSONDecodeError, ValueError): if store_payloads: async with AsyncSession(get_engine()) as log_session: await _save_webhook_log( log_session, provider_id, request.method, safe_headers, {}, "error", error_message="Invalid JSON", max_count=max_stored, ) await log_session.commit() raise HTTPException(status_code=400, detail="Invalid JSON") # Parse via JSONPath mappings req_headers = dict(request.headers) event = parse_generic_webhook(payload, provider_name, provider_config, headers=req_headers) if event is None: if store_payloads: async with AsyncSession(get_engine()) as log_session: await _save_webhook_log( log_session, provider_id, request.method, safe_headers, payload, "unmatched", max_count=max_stored, ) await log_session.commit() return {"ok": True, "skipped": "parse failed"} # Inject source IP source_ip = request.client.host if request.client else "" event.extra["source_ip"] = source_ip # --- Dispatch --- dispatched = await _dispatch_webhook_event( engine=engine, provider_id=provider_id, provider_name=provider_name, provider_config=provider_config, event=event, detail_keys=( "event_type_raw", "source_ip", ), ) # Log matched payload (separate session — dispatch already committed) if store_payloads: async with AsyncSession(engine) as log_session: await _save_webhook_log( log_session, provider_id, request.method, safe_headers, payload, "matched" if dispatched > 0 else "unmatched", extracted_fields=dict(event.extra), max_count=max_stored, ) await log_session.commit() return {"ok": True, "dispatched": dispatched}