chore: add untracked stacks, scripts, and agent configs

- New stacks: beads-server, hermes-agent - Terragrunt tiers.tf for infra, phpipam, status-page - Secrets symlinks for vault, phpipam, hermes-agent - Scripts: cluster_manager, image_pull, containerd pullthrough setup - Frigate config, audiblez-web app source, n8n workflows dir - Claude agent: service-upgrade, reference: upgrade-config.json - Removed: claudeception skill, excalidraw empty submodule, temp listings [ci skip] Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 09:33:06 +00:00 · 2026-04-15 09:33:06 +00:00 · bcad200a23
commit bcad200a23
parent bd41bb9230
44 changed files with 3819 additions and 0 deletions
--- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/init.py
+++ b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/init.py
--- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/chapter_embedder.py
+++ b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/chapter_embedder.py
@ -0,0 +1,156 @@
+"""M4B chapter metadata embedding service."""
+
+import re
+import subprocess
+import tempfile
+from pathlib import Path
+
+from pydub import AudioSegment
+
+from .epub_parser import Chapter
+
+
+def get_chapter_audio_durations(output_dir: Path) -> list[int]:
+    """Calculate duration of each chapter WAV file in milliseconds.
+
+    audiblez produces files like: {bookname}_chapter_{N}.wav
+    e.g., mybook_chapter_1.wav, mybook_chapter_2.wav
+
+    Args:
+        output_dir: Directory containing the WAV files
+
+    Returns:
+        List of durations in milliseconds, ordered by chapter number
+    """
+    durations = []
+
+    # Find all chapter WAV files - audiblez uses {name}_chapter_{N}.wav
+    wav_files = list(output_dir.glob("*_chapter_*.wav"))
+
+    if not wav_files:
+        # Fallback: try any WAV files
+        wav_files = list(output_dir.glob("*.wav"))
+
+    if not wav_files:
+        print(f"No WAV files found in {output_dir}")
+        return durations
+
+    # Sort by extracting chapter number from filename using regex
+    # Pattern: look for _chapter_N or chapter_N in filename
+    def extract_chapter_num(path: Path) -> int:
+        name = path.stem
+        # Try to find chapter number with regex - handles various patterns
+        # e.g., "book_chapter_1", "mybook_chapter_12", "chapter_3_voice"
+        match = re.search(r'chapter[_-]?(\d+)', name, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
+        # Fallback: find any number in the filename
+        match = re.search(r'(\d+)', name)
+        if match:
+            return int(match.group(1))
+        return 0
+
+    wav_files.sort(key=extract_chapter_num)
+
+    print(f"Found {len(wav_files)} WAV files to process for durations")
+    for wav_file in wav_files:
+        try:
+            audio = AudioSegment.from_file(str(wav_file))
+            durations.append(len(audio))  # duration in ms
+            print(f"  Chapter WAV: {wav_file.name} - {len(audio)}ms ({len(audio)/1000:.1f}s)")
+        except Exception as e:
+            print(f"  Error reading {wav_file}: {e}")
+            continue
+
+    return durations
+
+
+def generate_ffmpeg_metadata(chapters: list[Chapter], durations: list[int]) -> str:
+    """Generate FFmpeg FFMETADATA1 format string with chapter markers.
+
+    Args:
+        chapters: List of Chapter objects with titles
+        durations: List of durations in milliseconds for each chapter
+
+    Returns:
+        FFMETADATA1 formatted string
+    """
+    metadata = ";FFMETADATA1\n"
+
+    current_time_ms = 0
+
+    # Match chapters with durations
+    num_chapters = min(len(chapters), len(durations))
+
+    for i in range(num_chapters):
+        chapter = chapters[i]
+        duration = durations[i]
+
+        chapter.start_ms = current_time_ms
+        chapter.end_ms = current_time_ms + duration
+        chapter.duration_ms = duration
+
+        metadata += f"\n[CHAPTER]\n"
+        metadata += f"TIMEBASE=1/1000\n"
+        metadata += f"START={chapter.start_ms}\n"
+        metadata += f"END={chapter.end_ms}\n"
+        metadata += f"title={chapter.title}\n"
+
+        current_time_ms = chapter.end_ms
+
+    return metadata
+
+
+def embed_chapters_in_m4b(input_m4b: Path, metadata_content: str) -> Path:
+    """Re-mux M4B with chapter metadata using FFmpeg.
+
+    Args:
+        input_m4b: Path to the input M4B file
+        metadata_content: FFMETADATA1 formatted string
+
+    Returns:
+        Path to the output M4B with chapters (same as input, replaced)
+    """
+    output_m4b = input_m4b.with_suffix('.chaptered.m4b')
+
+    # Write metadata to temporary file
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
+        f.write(metadata_content)
+        metadata_file = Path(f.name)
+
+    try:
+        cmd = [
+            'ffmpeg', '-y',
+            '-i', str(input_m4b),
+            '-f', 'ffmetadata', '-i', str(metadata_file),
+            '-map', '0:a',
+            '-map_metadata', '1',
+            '-c:a', 'copy',  # Copy audio without re-encoding
+            '-movflags', '+faststart+use_metadata_tags',
+            str(output_m4b)
+        ]
+
+        print(f"Running FFmpeg: {' '.join(cmd)}")
+        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            print(f"FFmpeg stderr: {result.stderr}")
+            raise RuntimeError(f"FFmpeg failed: {result.stderr}")
+
+        # Replace original with chaptered version
+        input_m4b.unlink()
+        output_m4b.rename(input_m4b)
+
+        print(f"Successfully embedded chapters in {input_m4b}")
+        return input_m4b
+
+    except subprocess.CalledProcessError as e:
+        print(f"FFmpeg error: {e.stderr}")
+        # Clean up temp file
+        if output_m4b.exists():
+            output_m4b.unlink()
+        raise
+    finally:
+        # Clean up metadata file
+        if metadata_file.exists():
+            metadata_file.unlink()
--- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/converter.py
+++ b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/converter.py
@ -0,0 +1,291 @@
+import asyncio
+import uuid
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Callable, Optional
+import subprocess
+import json
+
+from models.schemas import Job, JobStatus, JobProgress, ChapterInfo
+from services.epub_parser import extract_chapters, Chapter
+from services.chapter_embedder import (
+    get_chapter_audio_durations,
+    generate_ffmpeg_metadata,
+    embed_chapters_in_m4b
+)
+
+
+class JobManager:
+    """Manages conversion jobs and their state with user isolation."""
+
+    def __init__(self, storage_path: str = "/mnt"):
+        self.storage_path = Path(storage_path)
+        self.jobs: dict[str, Job] = {}
+        self.progress_callbacks: dict[str, list[Callable]] = {}
+
+    def get_user_uploads_dir(self, user_id: str) -> Path:
+        """Get the uploads directory for a specific user."""
+        user_dir = self.storage_path / "users" / user_id / "uploads"
+        user_dir.mkdir(parents=True, exist_ok=True)
+        return user_dir
+
+    def get_user_outputs_dir(self, user_id: str) -> Path:
+        """Get the outputs directory for a specific user."""
+        user_dir = self.storage_path / "users" / user_id / "outputs"
+        user_dir.mkdir(parents=True, exist_ok=True)
+        return user_dir
+
+    def create_job(self, user_id: str, filename: str, voice: str, speed: float, use_gpu: bool) -> Job:
+        """Create a new conversion job for a user."""
+        job_id = str(uuid.uuid4())
+        now = datetime.now()
+
+        job = Job(
+            id=job_id,
+            user_id=user_id,
+            filename=filename,
+            voice=voice,
+            speed=speed,
+            use_gpu=use_gpu,
+            status=JobStatus.PENDING,
+            created_at=now,
+            updated_at=now,
+        )
+
+        self.jobs[job_id] = job
+        return job
+
+    def get_job(self, job_id: str, user_id: Optional[str] = None) -> Optional[Job]:
+        """Get a job by ID. If user_id is provided, verify ownership."""
+        job = self.jobs.get(job_id)
+        if job and user_id and job.user_id != user_id:
+            return None  # User doesn't own this job
+        return job
+
+    def get_user_jobs(self, user_id: str) -> list[Job]:
+        """Get all jobs for a specific user."""
+        return [job for job in self.jobs.values() if job.user_id == user_id]
+
+    def get_all_jobs(self) -> list[Job]:
+        """Get all jobs (admin use only)."""
+        return list(self.jobs.values())
+
+    def update_job_status(self, job_id: str, status: JobStatus, error: Optional[str] = None):
+        """Update job status."""
+        if job_id in self.jobs:
+            self.jobs[job_id].status = status
+            self.jobs[job_id].updated_at = datetime.now()
+            if error:
+                self.jobs[job_id].error = error
+
+    def update_job_progress(self, job_id: str, progress: float, current_chapter: Optional[str] = None, eta: Optional[str] = None):
+        """Update job progress."""
+        if job_id in self.jobs:
+            self.jobs[job_id].progress = progress
+            self.jobs[job_id].updated_at = datetime.now()
+
+            # Notify callbacks
+            if job_id in self.progress_callbacks:
+                progress_data = JobProgress(
+                    progress=progress,
+                    eta=eta,
+                    current_chapter=current_chapter,
+                    status=self.jobs[job_id].status
+                )
+                for callback in self.progress_callbacks[job_id]:
+                    try:
+                        asyncio.create_task(callback(progress_data))
+                    except Exception as e:
+                        print(f"Error in progress callback: {e}")
+
+    def register_progress_callback(self, job_id: str, callback: Callable):
+        """Register a callback for progress updates."""
+        if job_id not in self.progress_callbacks:
+            self.progress_callbacks[job_id] = []
+        self.progress_callbacks[job_id].append(callback)
+
+    def unregister_progress_callback(self, job_id: str, callback: Callable):
+        """Unregister a progress callback."""
+        if job_id in self.progress_callbacks:
+            self.progress_callbacks[job_id].remove(callback)
+            if not self.progress_callbacks[job_id]:
+                del self.progress_callbacks[job_id]
+
+    async def run_conversion(self, job_id: str):
+        """Run the audiblez conversion in the background."""
+        job = self.jobs.get(job_id)
+        if not job:
+            return
+
+        try:
+            self.update_job_status(job_id, JobStatus.PROCESSING)
+
+            # Prepare user-specific paths
+            input_path = self.get_user_uploads_dir(job.user_id) / job.filename
+            output_dir = self.get_user_outputs_dir(job.user_id) / job_id
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            # Extract chapters from EPUB before conversion
+            chapters: list[Chapter] = []
+            if input_path.suffix.lower() == '.epub':
+                chapters = extract_chapters(input_path)
+                self.jobs[job_id].total_chapters = len(chapters)
+                print(f"Extracted {len(chapters)} chapters from EPUB")
+
+            # Build audiblez command - use the venv python/audiblez
+            cmd = [
+                "/app/audiblez/bin/audiblez",
+                str(input_path),
+                "-o", str(output_dir),
+                "-v", job.voice,
+                "-s", str(job.speed),
+            ]
+
+            if job.use_gpu:
+                cmd.append("-c")  # --cuda flag for GPU
+
+            # Run conversion
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=str(self.storage_path)
+            )
+
+            # Monitor progress from stderr/stdout
+            async def read_stream(stream, is_stderr=False):
+                while True:
+                    line = await stream.readline()
+                    if not line:
+                        break
+
+                    line_str = line.decode().strip()
+                    print(f"{'[STDERR]' if is_stderr else '[STDOUT]'} {line_str}")
+
+                    # Parse progress from output
+                    # Audiblez outputs progress in various formats
+                    # We'll do a simple pattern match
+                    if "%" in line_str:
+                        try:
+                            # Try to extract percentage
+                            parts = line_str.split("%")
+                            if len(parts) > 1:
+                                # Find the number before %
+                                num_str = parts[0].split()[-1]
+                                progress = float(num_str)
+                                self.update_job_progress(job_id, progress)
+                        except:
+                            pass
+
+                    # Check for chapter info
+                    if "Chapter" in line_str or "chapter" in line_str:
+                        self.update_job_progress(
+                            job_id,
+                            job.progress,
+                            current_chapter=line_str
+                        )
+
+            # Read both streams concurrently
+            await asyncio.gather(
+                read_stream(process.stdout),
+                read_stream(process.stderr, is_stderr=True)
+            )
+
+            # Wait for completion
+            returncode = await process.wait()
+
+            if returncode == 0:
+                # Find output file
+                output_files = list(output_dir.glob("*.m4b"))
+                if not output_files:
+                    output_files = list(output_dir.glob("*.mp3"))
+
+                if output_files:
+                    output_file = output_files[0]
+
+                    # Embed chapter metadata if we have chapters and WAV files
+                    if chapters:
+                        try:
+                            durations = get_chapter_audio_durations(output_dir)
+                            print(f"Found {len(durations)} chapter audio durations")
+
+                            if durations:
+                                # Match chapter count with duration count
+                                num_chapters = min(len(chapters), len(durations))
+                                if num_chapters != len(chapters):
+                                    print(f"Warning: chapter count ({len(chapters)}) != duration count ({len(durations)})")
+
+                                metadata = generate_ffmpeg_metadata(chapters[:num_chapters], durations[:num_chapters])
+                                embed_chapters_in_m4b(output_file, metadata)
+
+                                # Store chapter info in job for API access
+                                self.jobs[job_id].chapters = [
+                                    ChapterInfo(
+                                        title=c.title,
+                                        start_ms=c.start_ms,
+                                        end_ms=c.end_ms
+                                    )
+                                    for c in chapters[:num_chapters]
+                                ]
+                                print(f"Embedded {num_chapters} chapters in M4B")
+                            else:
+                                print("No WAV files found for chapter duration calculation")
+                        except Exception as e:
+                            print(f"Failed to embed chapters: {e}")
+                            # Continue without chapter embedding - non-fatal error
+
+                    self.jobs[job_id].output_file = str(output_file.name)
+                    self.update_job_status(job_id, JobStatus.COMPLETED)
+                    self.update_job_progress(job_id, 100.0)
+                else:
+                    self.update_job_status(job_id, JobStatus.FAILED, "No output file generated")
+            else:
+                self.update_job_status(job_id, JobStatus.FAILED, f"Conversion failed with code {returncode}")
+
+        except Exception as e:
+            print(f"Conversion error: {e}")
+            self.update_job_status(job_id, JobStatus.FAILED, str(e))
+
+    def delete_job(self, job_id: str, user_id: str) -> bool:
+        """Delete a job and its output files."""
+        job = self.get_job(job_id, user_id)
+        if not job:
+            return False
+
+        # Delete output directory if exists
+        output_dir = self.get_user_outputs_dir(user_id) / job_id
+        if output_dir.exists():
+            import shutil
+            shutil.rmtree(output_dir)
+
+        # Remove from jobs dict
+        del self.jobs[job_id]
+        return True
+
+    def get_user_audiobooks(self, user_id: str) -> list[dict]:
+        """List all completed audiobooks for a user."""
+        outputs_dir = self.get_user_outputs_dir(user_id)
+        audiobooks = []
+
+        if outputs_dir.exists():
+            for job_dir in outputs_dir.iterdir():
+                if job_dir.is_dir():
+                    # Look for m4b or mp3 files
+                    audio_files = list(job_dir.glob("*.m4b")) + list(job_dir.glob("*.mp3"))
+                    for audio_file in audio_files:
+                        stat = audio_file.stat()
+                        audiobooks.append({
+                            "id": job_dir.name,
+                            "filename": audio_file.name,
+                            "size": stat.st_size,
+                            "created_at": stat.st_mtime,
+                        })
+
+        # Sort by creation time, newest first
+        audiobooks.sort(key=lambda x: x["created_at"], reverse=True)
+        return audiobooks
+
+
+# Global job manager instance
+job_manager = JobManager()
--- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/epub_parser.py
+++ b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/epub_parser.py
@ -0,0 +1,166 @@
+"""EPUB chapter extraction service.
+
+This parser attempts to match audiblez's chapter detection logic to ensure
+the extracted chapters align with the WAV files audiblez produces.
+
+audiblez iterates through EPUB ITEM_DOCUMENTs and uses is_chapter() to determine
+if a document is a chapter based on content length (100+ chars) and filename patterns.
+"""
+
+import re
+from dataclasses import dataclass
+from pathlib import Path
+
+from bs4 import BeautifulSoup
+from ebooklib import epub, ITEM_DOCUMENT
+
+
+@dataclass
+class Chapter:
+    """Represents a chapter extracted from an EPUB."""
+    title: str
+    index: int
+    duration_ms: int = 0
+    start_ms: int = 0
+    end_ms: int = 0
+
+
+def sanitize_title(title: str) -> str:
+    """Remove characters that break FFmpeg metadata format."""
+    if not title:
+        return "Untitled"
+    # Escape special chars for FFmpeg FFMETADATA format
+    return (title
+            .replace('=', '-')
+            .replace(';', '-')
+            .replace('#', '')
+            .replace('\\', '')
+            .replace('\n', ' ')
+            .replace('\r', '')
+            .strip())
+
+
+def is_chapter(text: str, filename: str) -> bool:
+    """Determine if a document is a chapter.
+
+    Matches audiblez's is_chapter() logic:
+    - Content must be over 100 characters
+    - Filename should match common chapter patterns
+    """
+    if len(text) < 100:
+        return False
+
+    # Check filename patterns that indicate a chapter
+    filename_lower = filename.lower()
+    chapter_patterns = [
+        r'chapter',
+        r'part[_-]?\d+',
+        r'split[_-]?\d+',
+        r'ch[_-]?\d+',
+        r'chap[_-]?\d+',
+        r'sect',          # section
+        r'content',
+        r'text',
+    ]
+
+    for pattern in chapter_patterns:
+        if re.search(pattern, filename_lower):
+            return True
+
+    # If content is substantial (1000+ chars), likely a chapter even without pattern match
+    if len(text) > 1000:
+        return True
+
+    return False
+
+
+def extract_title_from_content(soup: BeautifulSoup, filename: str, index: int) -> str:
+    """Extract a chapter title from the document content."""
+    # Try to find title in common heading tags
+    for tag in ['title', 'h1', 'h2', 'h3']:
+        element = soup.find(tag)
+        if element and element.get_text(strip=True):
+            title = element.get_text(strip=True)
+            # Truncate long titles
+            if len(title) > 100:
+                title = title[:97] + "..."
+            return title
+
+    # Fallback: use filename without extension
+    stem = Path(filename).stem
+    # Clean up common patterns
+    stem = re.sub(r'^(chapter|chap|ch)[_-]?', 'Chapter ', stem, flags=re.IGNORECASE)
+    stem = re.sub(r'[_-]', ' ', stem)
+
+    if stem and len(stem) < 50:
+        return stem.title()
+
+    return f"Chapter {index + 1}"
+
+
+def extract_chapters(epub_path: Path) -> list[Chapter]:
+    """Extract chapter titles matching audiblez's chapter detection logic.
+
+    audiblez determines chapters by:
+    1. Iterating through ITEM_DOCUMENT items
+    2. Checking is_chapter() based on content length and filename patterns
+
+    This ensures our chapter count matches the WAV files audiblez produces.
+
+    Args:
+        epub_path: Path to the EPUB file
+
+    Returns:
+        List of Chapter objects with title and index
+    """
+    try:
+        book = epub.read_epub(str(epub_path))
+    except Exception as e:
+        print(f"Failed to read EPUB: {e}")
+        return []
+
+    chapters: list[Chapter] = []
+    chapter_index = 0
+
+    # Iterate through documents like audiblez does
+    for item in book.get_items():
+        if item.get_type() != ITEM_DOCUMENT:
+            continue
+
+        try:
+            # Get content and parse with BeautifulSoup
+            content = item.get_content()
+            soup = BeautifulSoup(content, features='lxml')
+
+            # Extract text from relevant tags (matching audiblez)
+            text_parts = []
+            for tag in soup.find_all(['title', 'p', 'h1', 'h2', 'h3', 'h4', 'li']):
+                text = tag.get_text(strip=True)
+                if text:
+                    text_parts.append(text)
+
+            full_text = ' '.join(text_parts)
+            filename = item.get_name() or ""
+
+            # Check if this document is a chapter
+            if is_chapter(full_text, filename):
+                title = extract_title_from_content(soup, filename, chapter_index)
+                chapters.append(Chapter(
+                    title=sanitize_title(title),
+                    index=chapter_index
+                ))
+                chapter_index += 1
+
+        except Exception as e:
+            print(f"Error processing document {item.get_name()}: {e}")
+            continue
+
+    print(f"Extracted {len(chapters)} chapters from EPUB (audiblez-style detection)")
+
+    # Debug: print first few chapters
+    for i, ch in enumerate(chapters[:5]):
+        print(f"  {i+1}. {ch.title}")
+    if len(chapters) > 5:
+        print(f"  ... and {len(chapters) - 5} more")
+
+    return chapters
--- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/voices.py
+++ b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/voices.py
@ -0,0 +1,97 @@
+from models.schemas import Voice
+
+# Voice catalog from Kokoro-82M (used by audiblez)
+# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md
+VOICE_CATALOG = {
+    # American English
+    "af_heart": Voice(id="af_heart", name="Heart", language="American English", gender="F"),
+    "af_alloy": Voice(id="af_alloy", name="Alloy", language="American English", gender="F"),
+    "af_aoede": Voice(id="af_aoede", name="Aoede", language="American English", gender="F"),
+    "af_bella": Voice(id="af_bella", name="Bella", language="American English", gender="F"),
+    "af_jessica": Voice(id="af_jessica", name="Jessica", language="American English", gender="F"),
+    "af_kore": Voice(id="af_kore", name="Kore", language="American English", gender="F"),
+    "af_nicole": Voice(id="af_nicole", name="Nicole", language="American English", gender="F"),
+    "af_nova": Voice(id="af_nova", name="Nova", language="American English", gender="F"),
+    "af_river": Voice(id="af_river", name="River", language="American English", gender="F"),
+    "af_sarah": Voice(id="af_sarah", name="Sarah", language="American English", gender="F"),
+    "af_sky": Voice(id="af_sky", name="Sky", language="American English", gender="F"),
+    "am_adam": Voice(id="am_adam", name="Adam", language="American English", gender="M"),
+    "am_echo": Voice(id="am_echo", name="Echo", language="American English", gender="M"),
+    "am_eric": Voice(id="am_eric", name="Eric", language="American English", gender="M"),
+    "am_fenrir": Voice(id="am_fenrir", name="Fenrir", language="American English", gender="M"),
+    "am_liam": Voice(id="am_liam", name="Liam", language="American English", gender="M"),
+    "am_michael": Voice(id="am_michael", name="Michael", language="American English", gender="M"),
+    "am_onyx": Voice(id="am_onyx", name="Onyx", language="American English", gender="M"),
+    "am_puck": Voice(id="am_puck", name="Puck", language="American English", gender="M"),
+    "am_santa": Voice(id="am_santa", name="Santa", language="American English", gender="M"),
+
+    # British English
+    "bf_alice": Voice(id="bf_alice", name="Alice", language="British English", gender="F"),
+    "bf_emma": Voice(id="bf_emma", name="Emma", language="British English", gender="F"),
+    "bf_isabella": Voice(id="bf_isabella", name="Isabella", language="British English", gender="F"),
+    "bf_lily": Voice(id="bf_lily", name="Lily", language="British English", gender="F"),
+    "bm_daniel": Voice(id="bm_daniel", name="Daniel", language="British English", gender="M"),
+    "bm_fable": Voice(id="bm_fable", name="Fable", language="British English", gender="M"),
+    "bm_george": Voice(id="bm_george", name="George", language="British English", gender="M"),
+    "bm_lewis": Voice(id="bm_lewis", name="Lewis", language="British English", gender="M"),
+
+    # Japanese
+    "jf_alpha": Voice(id="jf_alpha", name="Alpha", language="Japanese", gender="F"),
+    "jf_gongitsune": Voice(id="jf_gongitsune", name="Gongitsune", language="Japanese", gender="F"),
+    "jf_nezumi": Voice(id="jf_nezumi", name="Nezumi", language="Japanese", gender="F"),
+    "jf_tebukuro": Voice(id="jf_tebukuro", name="Tebukuro", language="Japanese", gender="F"),
+    "jm_kumo": Voice(id="jm_kumo", name="Kumo", language="Japanese", gender="M"),
+
+    # Mandarin Chinese
+    "zf_xiaobei": Voice(id="zf_xiaobei", name="Xiaobei", language="Mandarin Chinese", gender="F"),
+    "zf_xiaoni": Voice(id="zf_xiaoni", name="Xiaoni", language="Mandarin Chinese", gender="F"),
+    "zf_xiaoxiao": Voice(id="zf_xiaoxiao", name="Xiaoxiao", language="Mandarin Chinese", gender="F"),
+    "zf_xiaoyi": Voice(id="zf_xiaoyi", name="Xiaoyi", language="Mandarin Chinese", gender="F"),
+    "zm_yunjian": Voice(id="zm_yunjian", name="Yunjian", language="Mandarin Chinese", gender="M"),
+    "zm_yunxi": Voice(id="zm_yunxi", name="Yunxi", language="Mandarin Chinese", gender="M"),
+    "zm_yunxia": Voice(id="zm_yunxia", name="Yunxia", language="Mandarin Chinese", gender="M"),
+    "zm_yunyang": Voice(id="zm_yunyang", name="Yunyang", language="Mandarin Chinese", gender="M"),
+
+    # Spanish
+    "ef_dora": Voice(id="ef_dora", name="Dora", language="Spanish", gender="F"),
+    "em_alex": Voice(id="em_alex", name="Alex", language="Spanish", gender="M"),
+    "em_santa": Voice(id="em_santa", name="Santa", language="Spanish", gender="M"),
+
+    # French
+    "ff_siwis": Voice(id="ff_siwis", name="Siwis", language="French", gender="F"),
+
+    # Hindi
+    "hf_alpha": Voice(id="hf_alpha", name="Alpha", language="Hindi", gender="F"),
+    "hf_beta": Voice(id="hf_beta", name="Beta", language="Hindi", gender="F"),
+    "hm_omega": Voice(id="hm_omega", name="Omega", language="Hindi", gender="M"),
+    "hm_psi": Voice(id="hm_psi", name="Psi", language="Hindi", gender="M"),
+
+    # Italian
+    "if_sara": Voice(id="if_sara", name="Sara", language="Italian", gender="F"),
+    "im_nicola": Voice(id="im_nicola", name="Nicola", language="Italian", gender="M"),
+
+    # Brazilian Portuguese
+    "pf_dora": Voice(id="pf_dora", name="Dora", language="Brazilian Portuguese", gender="F"),
+    "pm_alex": Voice(id="pm_alex", name="Alex", language="Brazilian Portuguese", gender="M"),
+    "pm_santa": Voice(id="pm_santa", name="Santa", language="Brazilian Portuguese", gender="M"),
+}
+
+
+def get_all_voices() -> list[Voice]:
+    """Get all available voices."""
+    return list(VOICE_CATALOG.values())
+
+
+def get_voice(voice_id: str) -> Voice | None:
+    """Get a specific voice by ID."""
+    return VOICE_CATALOG.get(voice_id)
+
+
+def get_voices_by_language() -> dict[str, list[Voice]]:
+    """Get voices grouped by language."""
+    grouped = {}
+    for voice in VOICE_CATALOG.values():
+        if voice.language not in grouped:
+            grouped[voice.language] = []
+        grouped[voice.language].append(voice)
+    return grouped