feat(meet-kevin): RSS poller for YouTube uploads
This commit is contained in:
parent
8edcb070ed
commit
8ce3ede09c
4 changed files with 1237 additions and 0 deletions
130
services/meet_kevin_watcher/rss_poller.py
Normal file
130
services/meet_kevin_watcher/rss_poller.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
"""YouTube RSS feed poller for Meet Kevin channel."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Atom and YouTube namespace mappings
|
||||
_NAMESPACES = {
|
||||
"a": "http://www.w3.org/2005/Atom",
|
||||
"yt": "http://www.youtube.com/xml/schemas/2015",
|
||||
"m": "http://search.yahoo.com/mrss/",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DiscoveredVideo:
|
||||
"""A video discovered from YouTube RSS feed."""
|
||||
|
||||
youtube_video_id: str
|
||||
title: str
|
||||
description: str
|
||||
published_at: datetime
|
||||
thumbnail_url: str
|
||||
|
||||
|
||||
async def fetch_feed(channel_id: str, client: httpx.AsyncClient) -> bytes:
|
||||
"""Fetch YouTube RSS feed for a channel.
|
||||
|
||||
Args:
|
||||
channel_id: YouTube channel ID (e.g., "UCUvvj5lwue7PspotMDjk5UA")
|
||||
client: httpx AsyncClient for HTTP requests
|
||||
|
||||
Returns:
|
||||
Raw XML bytes from the feed, or empty bytes on error.
|
||||
HTTP errors are logged but do not raise.
|
||||
"""
|
||||
url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
|
||||
|
||||
try:
|
||||
response = await client.get(url, timeout=15.0)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
except httpx.HTTPError as e:
|
||||
logger.warning("Failed to fetch feed from %s: %s", url, e)
|
||||
return b""
|
||||
|
||||
|
||||
def parse_feed(xml_bytes: bytes) -> list[DiscoveredVideo]:
|
||||
"""Parse YouTube RSS feed XML and extract videos.
|
||||
|
||||
Args:
|
||||
xml_bytes: Raw XML bytes from YouTube RSS feed
|
||||
|
||||
Returns:
|
||||
List of DiscoveredVideo objects. Returns empty list on parse error,
|
||||
empty input, or if no valid entries found.
|
||||
Individual entries with missing required fields are skipped.
|
||||
"""
|
||||
if not xml_bytes:
|
||||
return []
|
||||
|
||||
try:
|
||||
root = ET.fromstring(xml_bytes)
|
||||
except ET.ParseError as e:
|
||||
logger.warning("Failed to parse feed XML: %s", e)
|
||||
return []
|
||||
|
||||
videos: list[DiscoveredVideo] = []
|
||||
|
||||
for entry in root.findall("a:entry", _NAMESPACES):
|
||||
try:
|
||||
# Extract required fields
|
||||
video_id_elem = entry.find("yt:videoId", _NAMESPACES)
|
||||
title_elem = entry.find("a:title", _NAMESPACES)
|
||||
published_elem = entry.find("a:published", _NAMESPACES)
|
||||
|
||||
# Extract media group (description and thumbnail)
|
||||
media_group = entry.find("m:group", _NAMESPACES)
|
||||
desc_elem = None
|
||||
thumb_elem = None
|
||||
if media_group is not None:
|
||||
desc_elem = media_group.find("m:description", _NAMESPACES)
|
||||
thumb_elem = media_group.find("m:thumbnail", _NAMESPACES)
|
||||
|
||||
# Skip entries with missing required fields
|
||||
if (
|
||||
video_id_elem is None
|
||||
or video_id_elem.text is None
|
||||
or title_elem is None
|
||||
or title_elem.text is None
|
||||
or published_elem is None
|
||||
or published_elem.text is None
|
||||
or thumb_elem is None
|
||||
):
|
||||
continue
|
||||
|
||||
# Parse published timestamp (handle Z suffix)
|
||||
published_text = published_elem.text
|
||||
published_text = published_text.replace("Z", "+00:00")
|
||||
published_at = datetime.fromisoformat(published_text)
|
||||
|
||||
# Extract description (may be missing)
|
||||
description = ""
|
||||
if desc_elem is not None and desc_elem.text is not None:
|
||||
description = desc_elem.text
|
||||
|
||||
# Extract thumbnail URL
|
||||
thumbnail_url = thumb_elem.get("url", "")
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
|
||||
video = DiscoveredVideo(
|
||||
youtube_video_id=video_id_elem.text,
|
||||
title=title_elem.text,
|
||||
description=description,
|
||||
published_at=published_at,
|
||||
thumbnail_url=thumbnail_url,
|
||||
)
|
||||
videos.append(video)
|
||||
|
||||
except (ValueError, AttributeError) as e:
|
||||
logger.warning("Failed to parse entry in feed: %s", e)
|
||||
continue
|
||||
|
||||
return videos
|
||||
Loading…
Add table
Add a link
Reference in a new issue