"""YouTube RSS feed poller for Meet Kevin channel.""" import logging from dataclasses import dataclass from datetime import datetime from xml.etree import ElementTree as ET import httpx logger = logging.getLogger(__name__) # Atom and YouTube namespace mappings _NAMESPACES = { "a": "http://www.w3.org/2005/Atom", "yt": "http://www.youtube.com/xml/schemas/2015", "m": "http://search.yahoo.com/mrss/", } @dataclass(frozen=True) class DiscoveredVideo: """A video discovered from YouTube RSS feed.""" youtube_video_id: str title: str description: str published_at: datetime thumbnail_url: str async def fetch_feed(channel_id: str, client: httpx.AsyncClient) -> bytes: """Fetch YouTube RSS feed for a channel. Args: channel_id: YouTube channel ID (e.g., "UCUvvj5lwue7PspotMDjk5UA") client: httpx AsyncClient for HTTP requests Returns: Raw XML bytes from the feed, or empty bytes on error. HTTP errors are logged but do not raise. """ url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}" try: response = await client.get(url, timeout=15.0) response.raise_for_status() return response.content except httpx.HTTPError as e: logger.warning("Failed to fetch feed from %s: %s", url, e) return b"" def parse_feed(xml_bytes: bytes) -> list[DiscoveredVideo]: """Parse YouTube RSS feed XML and extract videos. Args: xml_bytes: Raw XML bytes from YouTube RSS feed Returns: List of DiscoveredVideo objects. Returns empty list on parse error, empty input, or if no valid entries found. Individual entries with missing required fields are skipped. """ if not xml_bytes: return [] try: root = ET.fromstring(xml_bytes) except ET.ParseError as e: logger.warning("Failed to parse feed XML: %s", e) return [] videos: list[DiscoveredVideo] = [] for entry in root.findall("a:entry", _NAMESPACES): try: # Extract required fields video_id_elem = entry.find("yt:videoId", _NAMESPACES) title_elem = entry.find("a:title", _NAMESPACES) published_elem = entry.find("a:published", _NAMESPACES) # Extract media group (description and thumbnail) media_group = entry.find("m:group", _NAMESPACES) desc_elem = None thumb_elem = None if media_group is not None: desc_elem = media_group.find("m:description", _NAMESPACES) thumb_elem = media_group.find("m:thumbnail", _NAMESPACES) # Skip entries with missing required fields if ( video_id_elem is None or video_id_elem.text is None or title_elem is None or title_elem.text is None or published_elem is None or published_elem.text is None or thumb_elem is None ): continue # Parse published timestamp (handle Z suffix) published_text = published_elem.text published_text = published_text.replace("Z", "+00:00") published_at = datetime.fromisoformat(published_text) # Extract description (may be missing) description = "" if desc_elem is not None and desc_elem.text is not None: description = desc_elem.text # Extract thumbnail URL thumbnail_url = thumb_elem.get("url", "") if not thumbnail_url: continue video = DiscoveredVideo( youtube_video_id=video_id_elem.text, title=title_elem.text, description=description, published_at=published_at, thumbnail_url=thumbnail_url, ) videos.append(video) except (ValueError, AttributeError) as e: logger.warning("Failed to parse entry in feed: %s", e) continue return videos