/** * RSS/Atom feed service — fetches and parses RSS/Atom feeds. * Uses lightweight XML parsing without heavy dependencies. */ import { safeFetch } from '$lib/server/utils/safeFetch.js'; const CACHE_TTL_MS = 15 * 60 * 1000; // 15 minutes const FETCH_TIMEOUT_MS = 10_000; const DEFAULT_MAX_ITEMS = 10; interface CacheEntry { readonly data: readonly FeedItem[]; readonly expiresAt: number; } export interface FeedItem { readonly title: string; readonly link: string; readonly pubDate: string; readonly summary: string; } const cache = new Map(); function getCached(key: string): readonly FeedItem[] | null { const entry = cache.get(key); if (!entry) return null; if (Date.now() > entry.expiresAt) { cache.delete(key); return null; } return entry.data; } function setCache(key: string, data: readonly FeedItem[]): void { cache.set(key, { data, expiresAt: Date.now() + CACHE_TTL_MS }); } /** * Extract text content between XML tags. */ function extractTag(xml: string, tag: string): string { // Handle CDATA sections const cdataPattern = new RegExp( `<${tag}[^>]*>\\s*\\s*`, 'i' ); const cdataMatch = xml.match(cdataPattern); if (cdataMatch) return cdataMatch[1].trim(); // Handle regular content const pattern = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)`, 'i'); const match = xml.match(pattern); if (match) return match[1].trim(); return ''; } /** * Extract href from Atom link tag. */ function extractAtomLink(entryXml: string): string { // Look for link with rel="alternate" or no rel const altMatch = entryXml.match(/]*rel=["']alternate["'][^>]*href=["']([^"']+)["']/i); if (altMatch) return altMatch[1]; const hrefMatch = entryXml.match(/]*href=["']([^"']+)["']/i); if (hrefMatch) return hrefMatch[1]; return ''; } /** * Parse RSS 2.0 feed XML. */ function parseRss(xml: string, maxItems: number): readonly FeedItem[] { const items: FeedItem[] = []; const itemRegex = /([\s\S]*?)<\/item>/gi; let match: RegExpExecArray | null; while ((match = itemRegex.exec(xml)) !== null && items.length < maxItems) { const itemXml = match[1]; items.push({ title: extractTag(itemXml, 'title') || 'Untitled', link: extractTag(itemXml, 'link') || '', pubDate: extractTag(itemXml, 'pubDate') || '', summary: extractTag(itemXml, 'description') || '' }); } return items; } /** * Parse Atom feed XML. */ function parseAtom(xml: string, maxItems: number): readonly FeedItem[] { const items: FeedItem[] = []; const entryRegex = /([\s\S]*?)<\/entry>/gi; let match: RegExpExecArray | null; while ((match = entryRegex.exec(xml)) !== null && items.length < maxItems) { const entryXml = match[1]; items.push({ title: extractTag(entryXml, 'title') || 'Untitled', link: extractAtomLink(entryXml) || '', pubDate: extractTag(entryXml, 'published') || extractTag(entryXml, 'updated') || '', summary: extractTag(entryXml, 'summary') || extractTag(entryXml, 'content') || '' }); } return items; } /** * Strip HTML tags from a string (for summaries). */ function stripHtml(html: string): string { return html .replace(/<[^>]*>/g, '') .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") .trim(); } /** * Fetch and parse an RSS or Atom feed from a URL. */ export async function fetchFeed(feedUrl: string, maxItems?: number): Promise { const limit = maxItems ?? DEFAULT_MAX_ITEMS; const cacheKey = `${feedUrl}:${limit}`; const cached = getCached(cacheKey); if (cached) return cached; try { const response = await safeFetch(feedUrl, { timeoutMs: FETCH_TIMEOUT_MS, headers: { 'User-Agent': 'WebAppLauncher/1.0', Accept: 'application/rss+xml, application/atom+xml, application/xml, text/xml' } }); if (!response.ok) { throw new Error(`Feed returned ${response.status}`); } const xml = await response.text(); // Detect feed type and parse let items: readonly FeedItem[]; if (xml.includes(' ({ ...item, summary: stripHtml(item.summary).substring(0, 500) })); setCache(cacheKey, cleanItems); return cleanItems; } catch (err) { if (err instanceof DOMException && err.name === 'AbortError') { throw new Error('Feed request timed out'); } throw err; } } /** * Clear the RSS feed cache. */ export function clearCache(): void { cache.clear(); }