From e311cbe1032baaecdacb21f3b10368157f515c09 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 5 Jun 2026 20:38:13 +0000 Subject: [PATCH] chore(modules): remove vestigial audiblez-web copy + fix glossary note [ci skip] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit modules/kubernetes/ebook2audiobook/ held a tracked copy of the audiblez-web app source (24 files), sourced by no stack and built by no CI — audiblez-web is GHA-built from its own repo. Bulk-swept in 2026-04-15; removed. Also corrected CONTEXT.md: the "vestigial per-app dirs (immich/, ollama/, ...)" note was wrong — those were untracked local macOS cruft (._main.tf AppleDouble turds), never in the repo; cleaned from the working tree. modules/kubernetes/ now holds exactly the four factory modules (ingress_factory, nfs_volume, anubis_instance, setup_tls_secret). Co-Authored-By: Claude Opus 4.8 --- CONTEXT.md | 6 +- .../ebook2audiobook/audiblez-web/.gitignore | 53 --- .../ebook2audiobook/audiblez-web/Dockerfile | 35 -- .../ebook2audiobook/audiblez-web/README.md | 52 --- .../audiblez-web/backend/api/__init__.py | 0 .../audiblez-web/backend/api/auth.py | 104 ------ .../audiblez-web/backend/api/routes.py | 307 ------------------ .../audiblez-web/backend/api/websocket.py | 101 ------ .../audiblez-web/backend/main.py | 41 --- .../audiblez-web/backend/models/__init__.py | 0 .../audiblez-web/backend/models/schemas.py | 59 ---- .../audiblez-web/backend/requirements.txt | 11 - .../audiblez-web/backend/services/__init__.py | 0 .../backend/services/chapter_embedder.py | 156 --------- .../backend/services/converter.py | 291 ----------------- .../backend/services/epub_parser.py | 166 ---------- .../audiblez-web/backend/services/voices.py | 97 ------ .../audiblez-web/frontend/index.html | 13 - .../audiblez-web/frontend/src/App.svelte | 279 ---------------- .../audiblez-web/frontend/src/main.js | 6 - .../audiblez-web/frontend/src/stores/jobs.js | 28 -- .../audiblez-web/frontend/svelte.config.js | 5 - .../audiblez-web/frontend/vite.config.js | 15 - .../audiblez-web/generate_samples.py | 136 -------- .../audiblez-web/samples/.gitkeep | 2 - 25 files changed, 3 insertions(+), 1960 deletions(-) delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/.gitignore delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/Dockerfile delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/README.md delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/__init__.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/auth.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/routes.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/websocket.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/main.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/models/__init__.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/models/schemas.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/requirements.txt delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/__init__.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/chapter_embedder.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/converter.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/epub_parser.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/voices.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/frontend/index.html delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/App.svelte delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/main.js delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/stores/jobs.js delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/frontend/svelte.config.js delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/frontend/vite.config.js delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/generate_samples.py delete mode 100644 modules/kubernetes/ebook2audiobook/audiblez-web/samples/.gitkeep diff --git a/CONTEXT.md b/CONTEXT.md index 712e353d..28fe5e83 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -16,11 +16,11 @@ _Avoid_: using "Stack" when you mean the running Service. **Module**: A unit of HCL consumed via `source =`. Two homes, two purposes: **shared** modules under the top-level `modules/` tree (reused across many Stacks) and **Stack-local** modules nested under `stacks//modules/` (one Stack only). Bare "Module" means the shared kind. -_Avoid_: "library", "package"; assuming everything under `modules/kubernetes/` is live — the per-app dirs (`immich/`, `ollama/`, `frigate/`, `crowdsec/`, …) are **vestigial**, sourced by nothing. +_Avoid_: "library", "package". **Factory module**: -A shared **Module** that hides convention (defaults, drift handling, secret wiring) behind a small input surface. The four in live use: `ingress_factory` (103 Stacks), `setup_tls_secret` (93), `nfs_volume` (41), `anubis_instance` (8) — every current shared module is a factory. -_Avoid_: "wrapper"; citing `k8s_app` / `helm_app` / `postgres_app` (these never existed in the repo, though `docs/architecture/overview.md` still names them). +A shared **Module** that hides convention (defaults, drift handling, secret wiring) behind a small input surface. `modules/kubernetes/` holds exactly four, all factories: `ingress_factory` (103 Stacks), `setup_tls_secret` (93), `nfs_volume` (41), `anubis_instance` (8). +_Avoid_: "wrapper"; citing `k8s_app` / `helm_app` / `postgres_app` (these never existed in the repo). **Stack-local module**: A single Stack's implementation factored into a nested `stacks//modules//`, sourced by that one Stack only — organisation, not reuse. ~31 Stacks (authentik, kyverno, dbaas, mailserver, metallb, cloudflared, technitium, …). The alternative to a **flat** Stack. diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/.gitignore b/modules/kubernetes/ebook2audiobook/audiblez-web/.gitignore deleted file mode 100644 index bfa32681..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/.gitignore +++ /dev/null @@ -1,53 +0,0 @@ -# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -venv/ -.venv/ -ENV/ - -# Node -node_modules/ -frontend/dist/ -npm-debug.log* -yarn-debug.log* -yarn-error.log* - -# IDE -.idea/ -.vscode/ -*.swp -*.swo -*~ - -# OS -.DS_Store -._* -Thumbs.db - -# Uploads and outputs (runtime data) -uploads/ -outputs/ - -# Environment -.env -.env.local -.env.*.local - diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/Dockerfile b/modules/kubernetes/ebook2audiobook/audiblez-web/Dockerfile deleted file mode 100644 index 55b11b30..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -FROM viktorbarzin/audiblez:latest - -# Install Node.js for building frontend -RUN apt-get update && \ - apt-get install -y curl && \ - curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ - apt-get install -y nodejs && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -# Build frontend -COPY frontend/package.json frontend/package-lock.json* ./frontend/ -WORKDIR /app/frontend -RUN npm install - -COPY frontend/ ./ -RUN npm run build - -# Install backend dependencies -WORKDIR /app/backend -COPY backend/requirements.txt ./ -RUN pip install --no-cache-dir --break-system-packages -r requirements.txt - -COPY backend/ ./ - -# Copy voice samples -COPY samples/ /app/samples/ - -WORKDIR /app/backend - -EXPOSE 8000 - -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/README.md b/modules/kubernetes/ebook2audiobook/audiblez-web/README.md deleted file mode 100644 index 45cf9a84..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# Audiblez Web UI - -Web interface for converting EPUB files to audiobooks using [audiblez](https://github.com/santinic/audiblez). - -image - -## Features - -- Upload EPUB files via drag & drop -- Select from 50+ voices across multiple languages -- Preview voice samples before converting -- Real-time progress updates via WebSocket -- Download completed audiobooks - -## Development - -### Backend - -```bash -cd backend -pip install -r requirements.txt -uvicorn main:app --reload -``` - -### Frontend - -```bash -cd frontend -npm install -npm run dev -``` - -### Voice Samples - -Generate voice samples (requires audiblez environment): - -```bash -python generate_samples.py samples/ -``` - -## Docker Build - -```bash -docker build -t audiblez-web . -docker run -p 8000:8000 -v /path/to/data:/mnt audiblez-web -``` - -## Deployment - -Deployed to Kubernetes via Terraform. The service mounts NFS storage at `/mnt` for: -- `/mnt/uploads` - Uploaded EPUB files -- `/mnt/outputs` - Generated audiobooks diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/__init__.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/auth.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/auth.py deleted file mode 100644 index 6ce68d8c..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/auth.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -Authentication module for extracting user identity from Authentik headers. - -When nginx ingress is protected with Authentik, these headers are forwarded: -- X-Authentik-Username: The user's username -- X-Authentik-Uid: Unique user ID (used for directory separation) -- X-Authentik-Email: User's email -- X-Authentik-Name: User's display name -- X-Authentik-Groups: Comma-separated group list -""" - -from dataclasses import dataclass -from fastapi import Request, HTTPException -from typing import Optional -import re - - -@dataclass -class User: - """Represents an authenticated user from Authentik.""" - uid: str - username: str - email: Optional[str] = None - name: Optional[str] = None - groups: list[str] = None - - def __post_init__(self): - if self.groups is None: - self.groups = [] - - -def sanitize_user_id(uid: str) -> str: - """ - Sanitize user ID for use as a directory name. - Only allows alphanumeric, hyphens, and underscores. - """ - if not uid: - raise ValueError("User ID cannot be empty") - - # Only allow safe characters for filesystem - safe_uid = re.sub(r'[^a-zA-Z0-9\-_]', '', uid) - - if not safe_uid: - raise ValueError("User ID contains no valid characters") - - # Limit length to prevent path issues - if len(safe_uid) > 64: - safe_uid = safe_uid[:64] - - return safe_uid - - -async def get_current_user(request: Request) -> User: - """ - Extract user information from Authentik headers. - - This is a FastAPI dependency that should be used on protected endpoints. - Raises 401 if user headers are not present (not authenticated). - """ - # Header names are case-insensitive, but commonly forwarded as: - uid = request.headers.get("X-Authentik-Uid") - username = request.headers.get("X-Authentik-Username") - email = request.headers.get("X-Authentik-Email") - name = request.headers.get("X-Authentik-Name") - groups_str = request.headers.get("X-Authentik-Groups", "") - - # For development/testing, check for alternative header names - if not uid: - uid = request.headers.get("X-Authentik-Userid") - if not uid: - uid = request.headers.get("Remote-User") - - if not uid or not username: - raise HTTPException( - status_code=401, - detail="Authentication required. Authentik headers not found." - ) - - try: - safe_uid = sanitize_user_id(uid) - except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) - - # Parse groups (comma-separated) - groups = [g.strip() for g in groups_str.split(",") if g.strip()] - - return User( - uid=safe_uid, - username=username, - email=email, - name=name, - groups=groups - ) - - -async def get_optional_user(request: Request) -> Optional[User]: - """ - Extract user information if available, or return None. - Use this for endpoints that work with or without authentication. - """ - try: - return await get_current_user(request) - except HTTPException: - return None diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/routes.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/routes.py deleted file mode 100644 index c474c8fa..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/routes.py +++ /dev/null @@ -1,307 +0,0 @@ -from fastapi import APIRouter, UploadFile, File, HTTPException, Depends -from fastapi.responses import FileResponse -from pydantic import BaseModel -from pathlib import Path -import shutil -import asyncio -import re - -from models.schemas import Voice, JobCreate, Job, JobProgress, ChapterInfo -from services.voices import get_all_voices, get_voices_by_language, get_voice -from services.converter import job_manager -from api.auth import User, get_current_user - -router = APIRouter(prefix="/api") - - -def sanitize_filename(filename: str, max_length: int = 200) -> str: - """ - Sanitize a filename to prevent path traversal and shell injection. - Only allows alphanumeric characters, spaces, hyphens, underscores, parentheses, and dots. - """ - if not filename: - raise ValueError("Filename cannot be empty") - - # Remove any path components (prevent path traversal) - filename = Path(filename).name - - # Only allow safe characters: alphanumeric, space, hyphen, underscore, parentheses, dot - # This regex removes anything that isn't in the allowed set - safe_filename = re.sub(r'[^a-zA-Z0-9\s\-_().]', '', filename) - - # Collapse multiple spaces/dots - safe_filename = re.sub(r'\s+', ' ', safe_filename) - safe_filename = re.sub(r'\.+', '.', safe_filename) - - # Strip leading/trailing whitespace and dots - safe_filename = safe_filename.strip(' .') - - # Limit length - if len(safe_filename) > max_length: - safe_filename = safe_filename[:max_length] - - if not safe_filename: - raise ValueError("Filename contains no valid characters") - - return safe_filename - - -class RenameRequest(BaseModel): - new_name: str - - -# ============================================================================ -# Voice endpoints (no auth required - public info) -# ============================================================================ - -@router.get("/voices", response_model=list[Voice]) -async def list_voices(): - """Get all available voices.""" - return get_all_voices() - - -@router.get("/voices/grouped") -async def list_voices_grouped(): - """Get voices grouped by language.""" - return get_voices_by_language() - - -@router.get("/voices/{voice_id}/sample") -async def get_voice_sample(voice_id: str): - """Get voice sample audio file.""" - voice = get_voice(voice_id) - if not voice: - raise HTTPException(status_code=404, detail="Voice not found") - - # Try NFS storage first (persistent), then bundled samples - sample_path = Path("/mnt/samples") / f"{voice_id}.mp3" - if not sample_path.exists(): - sample_path = Path("/app/samples") / f"{voice_id}.mp3" - if not sample_path.exists(): - raise HTTPException(status_code=404, detail="Sample not available") - - return FileResponse(sample_path, media_type="audio/mpeg") - - -# ============================================================================ -# User info endpoint -# ============================================================================ - -@router.get("/me") -async def get_current_user_info(user: User = Depends(get_current_user)): - """Get current authenticated user info.""" - return { - "uid": user.uid, - "username": user.username, - "email": user.email, - "name": user.name, - "groups": user.groups - } - - -# ============================================================================ -# Upload endpoints (user-scoped) -# ============================================================================ - -@router.post("/upload") -async def upload_file(file: UploadFile = File(...), user: User = Depends(get_current_user)): - """Upload an EPUB file to user's directory.""" - if not file.filename.endswith(".epub"): - raise HTTPException(status_code=400, detail="Only EPUB files are supported") - - # Save file to user's uploads directory - upload_dir = job_manager.get_user_uploads_dir(user.uid) - - # Sanitize the filename - try: - safe_filename = sanitize_filename(file.filename) - except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) - - file_path = upload_dir / safe_filename - - with file_path.open("wb") as buffer: - shutil.copyfileobj(file.file, buffer) - - return {"filename": safe_filename, "size": file_path.stat().st_size} - - -# ============================================================================ -# Job endpoints (user-scoped) -# ============================================================================ - -@router.post("/jobs", response_model=Job) -async def create_job(job_create: JobCreate, user: User = Depends(get_current_user)): - """Create a new conversion job.""" - # Verify file exists in user's uploads - file_path = job_manager.get_user_uploads_dir(user.uid) / job_create.filename - if not file_path.exists(): - raise HTTPException(status_code=404, detail="File not found") - - # Verify voice exists - voice = get_voice(job_create.voice) - if not voice: - raise HTTPException(status_code=404, detail="Voice not found") - - # Create job with user ownership - job = job_manager.create_job( - user_id=user.uid, - filename=job_create.filename, - voice=job_create.voice, - speed=job_create.speed, - use_gpu=job_create.use_gpu - ) - - # Start conversion in background - asyncio.create_task(job_manager.run_conversion(job.id)) - - return job - - -@router.get("/jobs", response_model=list[Job]) -async def list_jobs(user: User = Depends(get_current_user)): - """Get all jobs for current user.""" - return job_manager.get_user_jobs(user.uid) - - -@router.get("/jobs/{job_id}", response_model=Job) -async def get_job(job_id: str, user: User = Depends(get_current_user)): - """Get a specific job (must be owned by user).""" - job = job_manager.get_job(job_id, user.uid) - if not job: - raise HTTPException(status_code=404, detail="Job not found") - return job - - -@router.get("/jobs/{job_id}/download") -async def download_job(job_id: str, user: User = Depends(get_current_user)): - """Download the completed audiobook.""" - job = job_manager.get_job(job_id, user.uid) - if not job: - raise HTTPException(status_code=404, detail="Job not found") - - if job.status != "completed": - raise HTTPException(status_code=400, detail="Job not completed") - - if not job.output_file: - raise HTTPException(status_code=404, detail="Output file not found") - - output_path = job_manager.get_user_outputs_dir(user.uid) / job_id / job.output_file - if not output_path.exists(): - raise HTTPException(status_code=404, detail="Output file not found") - - return FileResponse( - output_path, - media_type="audio/mp4", - filename=job.output_file - ) - - -@router.get("/jobs/{job_id}/chapters", response_model=list[ChapterInfo]) -async def get_job_chapters(job_id: str, user: User = Depends(get_current_user)): - """Get chapter metadata for a job's audiobook.""" - job = job_manager.get_job(job_id, user.uid) - if not job: - raise HTTPException(status_code=404, detail="Job not found") - - if job.status != "completed": - raise HTTPException(status_code=400, detail="Job not completed") - - return job.chapters - - -@router.delete("/jobs/{job_id}") -async def delete_job(job_id: str, user: User = Depends(get_current_user)): - """Delete a job (must be owned by user).""" - if not job_manager.delete_job(job_id, user.uid): - raise HTTPException(status_code=404, detail="Job not found") - - return {"status": "deleted"} - - -# ============================================================================ -# Audiobook endpoints (user-scoped) -# ============================================================================ - -@router.get("/audiobooks") -async def list_audiobooks(user: User = Depends(get_current_user)): - """List all completed audiobooks for current user.""" - return job_manager.get_user_audiobooks(user.uid) - - -@router.get("/audiobooks/{audiobook_id}/download") -async def download_audiobook(audiobook_id: str, user: User = Depends(get_current_user)): - """Download an audiobook by its ID (job folder name).""" - output_dir = job_manager.get_user_outputs_dir(user.uid) / audiobook_id - - if not output_dir.exists(): - raise HTTPException(status_code=404, detail="Audiobook not found") - - # Find the audio file - audio_files = list(output_dir.glob("*.m4b")) + list(output_dir.glob("*.mp3")) - if not audio_files: - raise HTTPException(status_code=404, detail="Audio file not found") - - audio_file = audio_files[0] - media_type = "audio/mp4" if audio_file.suffix == ".m4b" else "audio/mpeg" - - return FileResponse( - audio_file, - media_type=media_type, - filename=audio_file.name - ) - - -@router.delete("/audiobooks/{audiobook_id}") -async def delete_audiobook(audiobook_id: str, user: User = Depends(get_current_user)): - """Delete an audiobook and its folder.""" - output_dir = job_manager.get_user_outputs_dir(user.uid) / audiobook_id - - if not output_dir.exists(): - raise HTTPException(status_code=404, detail="Audiobook not found") - - # Delete all files in the directory and the directory itself - for file in output_dir.iterdir(): - file.unlink() - output_dir.rmdir() - - return {"status": "deleted"} - - -@router.patch("/audiobooks/{audiobook_id}/rename") -async def rename_audiobook(audiobook_id: str, rename_request: RenameRequest, user: User = Depends(get_current_user)): - """Rename an audiobook file. Input is sanitized to prevent path traversal and injection.""" - output_dir = job_manager.get_user_outputs_dir(user.uid) / audiobook_id - - if not output_dir.exists(): - raise HTTPException(status_code=404, detail="Audiobook not found") - - # Find the audio file - audio_files = list(output_dir.glob("*.m4b")) + list(output_dir.glob("*.mp3")) - if not audio_files: - raise HTTPException(status_code=404, detail="Audio file not found") - - current_file = audio_files[0] - current_extension = current_file.suffix # .m4b or .mp3 - - # Sanitize the new name - try: - safe_name = sanitize_filename(rename_request.new_name) - except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) - - # Ensure the new name has the correct extension - if not safe_name.lower().endswith(current_extension.lower()): - safe_name = safe_name + current_extension - - # Create the new path (same directory, new filename) - new_file = output_dir / safe_name - - # Check if target already exists - if new_file.exists() and new_file != current_file: - raise HTTPException(status_code=400, detail="A file with that name already exists") - - # Rename the file using pathlib (no shell commands) - current_file.rename(new_file) - - return {"status": "renamed", "new_filename": safe_name} diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/websocket.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/websocket.py deleted file mode 100644 index c8239afb..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/api/websocket.py +++ /dev/null @@ -1,101 +0,0 @@ -from fastapi import WebSocket, WebSocketDisconnect, HTTPException -from services.converter import job_manager -from models.schemas import JobProgress -from api.auth import sanitize_user_id - - -class ConnectionManager: - """Manages WebSocket connections for job progress updates.""" - - def __init__(self): - self.active_connections: dict[str, list[WebSocket]] = {} - - async def connect(self, job_id: str, websocket: WebSocket): - """Connect a websocket for a specific job.""" - await websocket.accept() - - if job_id not in self.active_connections: - self.active_connections[job_id] = [] - self.active_connections[job_id].append(websocket) - - def disconnect(self, job_id: str, websocket: WebSocket): - """Disconnect a websocket.""" - if job_id in self.active_connections: - if websocket in self.active_connections[job_id]: - self.active_connections[job_id].remove(websocket) - if not self.active_connections[job_id]: - del self.active_connections[job_id] - - async def send_progress(self, job_id: str, progress: JobProgress): - """Send progress update to all connected clients for a job.""" - if job_id in self.active_connections: - disconnected = [] - for connection in self.active_connections[job_id]: - try: - await connection.send_json(progress.model_dump()) - except: - disconnected.append(connection) - - # Remove disconnected clients - for conn in disconnected: - self.disconnect(job_id, conn) - - -manager = ConnectionManager() - - -def get_user_from_websocket(websocket: WebSocket) -> str | None: - """ - Extract user ID from websocket headers. - WebSocket connections receive HTTP headers during the upgrade handshake. - """ - # Try various header name formats - uid = websocket.headers.get("x-authentik-uid") - if not uid: - uid = websocket.headers.get("X-Authentik-Uid") - if not uid: - uid = websocket.headers.get("x-authentik-userid") - if not uid: - uid = websocket.headers.get("remote-user") - - if uid: - try: - return sanitize_user_id(uid) - except ValueError: - return None - return None - - -async def websocket_endpoint(websocket: WebSocket, job_id: str): - """WebSocket endpoint for job progress updates.""" - # Extract user from headers - user_id = get_user_from_websocket(websocket) - - # Verify job exists and user has access - job = job_manager.get_job(job_id, user_id) - if not job: - # Close connection if job not found or not owned by user - await websocket.close(code=4004, reason="Job not found or access denied") - return - - await manager.connect(job_id, websocket) - - # Register progress callback - async def progress_callback(progress: JobProgress): - await manager.send_progress(job_id, progress) - - job_manager.register_progress_callback(job_id, progress_callback) - - try: - # Send initial status - await websocket.send_json({ - "progress": job.progress, - "status": job.status, - }) - - # Wait for messages (keep-alive) - while True: - await websocket.receive_text() - except WebSocketDisconnect: - manager.disconnect(job_id, websocket) - job_manager.unregister_progress_callback(job_id, progress_callback) diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/main.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/main.py deleted file mode 100644 index 9ac643b4..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/main.py +++ /dev/null @@ -1,41 +0,0 @@ -from fastapi import FastAPI -from fastapi.staticfiles import StaticFiles -from fastapi.middleware.cors import CORSMiddleware -from pathlib import Path - -from api.routes import router -from api.websocket import websocket_endpoint - -app = FastAPI(title="Audiblez Web API", version="1.0.0") - -# CORS middleware for development -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -# Health check - must be before static mount -@app.get("/health") -async def health_check(): - return {"status": "healthy"} - -# Include API routes -app.include_router(router) - -# WebSocket endpoint -@app.websocket("/ws/jobs/{job_id}") -async def websocket_route(websocket, job_id: str): - await websocket_endpoint(websocket, job_id) - -# Serve static frontend files - MUST BE LAST as it catches all routes -static_dir = Path("/app/frontend/dist") -if static_dir.exists(): - app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static") - - -if __name__ == "__main__": - import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/models/__init__.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/models/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/models/schemas.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/models/schemas.py deleted file mode 100644 index 8fd53740..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/models/schemas.py +++ /dev/null @@ -1,59 +0,0 @@ -from pydantic import BaseModel, Field -from typing import Optional, Literal -from datetime import datetime -from enum import Enum - - -class JobStatus(str, Enum): - PENDING = "pending" - PROCESSING = "processing" - COMPLETED = "completed" - FAILED = "failed" - CANCELLED = "cancelled" - - -class Voice(BaseModel): - id: str - name: str - language: str - gender: Literal["M", "F"] - quality: str = "medium" - - -class JobCreate(BaseModel): - filename: str - voice: str - speed: float = Field(default=1.0, ge=0.5, le=2.0) - use_gpu: bool = True - - -class ChapterInfo(BaseModel): - """Chapter metadata extracted from EPUB and embedded in M4B.""" - title: str - start_ms: int - end_ms: int - - -class JobProgress(BaseModel): - progress: float = Field(ge=0, le=100) - eta: Optional[str] = None - current_chapter: Optional[str] = None - total_chapters: Optional[int] = None - status: JobStatus - - -class Job(BaseModel): - id: str - user_id: str # User who owns this job - filename: str - voice: str - speed: float - use_gpu: bool - status: JobStatus - progress: float = 0 - created_at: datetime - updated_at: datetime - error: Optional[str] = None - output_file: Optional[str] = None - total_chapters: int = 0 - chapters: list[ChapterInfo] = [] diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/requirements.txt b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/requirements.txt deleted file mode 100644 index 484ec2c6..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -fastapi==0.115.0 -uvicorn[standard]==0.32.0 -python-multipart==0.0.12 -websockets==13.1 -aiofiles==24.1.0 -pydantic==2.9.2 -pydantic-settings==2.6.0 -ebooklib>=0.18 -pydub>=0.25.1 -beautifulsoup4>=4.12.0 -lxml>=5.0.0 diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/__init__.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/chapter_embedder.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/chapter_embedder.py deleted file mode 100644 index 89b4f652..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/chapter_embedder.py +++ /dev/null @@ -1,156 +0,0 @@ -"""M4B chapter metadata embedding service.""" - -import re -import subprocess -import tempfile -from pathlib import Path - -from pydub import AudioSegment - -from .epub_parser import Chapter - - -def get_chapter_audio_durations(output_dir: Path) -> list[int]: - """Calculate duration of each chapter WAV file in milliseconds. - - audiblez produces files like: {bookname}_chapter_{N}.wav - e.g., mybook_chapter_1.wav, mybook_chapter_2.wav - - Args: - output_dir: Directory containing the WAV files - - Returns: - List of durations in milliseconds, ordered by chapter number - """ - durations = [] - - # Find all chapter WAV files - audiblez uses {name}_chapter_{N}.wav - wav_files = list(output_dir.glob("*_chapter_*.wav")) - - if not wav_files: - # Fallback: try any WAV files - wav_files = list(output_dir.glob("*.wav")) - - if not wav_files: - print(f"No WAV files found in {output_dir}") - return durations - - # Sort by extracting chapter number from filename using regex - # Pattern: look for _chapter_N or chapter_N in filename - def extract_chapter_num(path: Path) -> int: - name = path.stem - # Try to find chapter number with regex - handles various patterns - # e.g., "book_chapter_1", "mybook_chapter_12", "chapter_3_voice" - match = re.search(r'chapter[_-]?(\d+)', name, re.IGNORECASE) - if match: - return int(match.group(1)) - # Fallback: find any number in the filename - match = re.search(r'(\d+)', name) - if match: - return int(match.group(1)) - return 0 - - wav_files.sort(key=extract_chapter_num) - - print(f"Found {len(wav_files)} WAV files to process for durations") - for wav_file in wav_files: - try: - audio = AudioSegment.from_file(str(wav_file)) - durations.append(len(audio)) # duration in ms - print(f" Chapter WAV: {wav_file.name} - {len(audio)}ms ({len(audio)/1000:.1f}s)") - except Exception as e: - print(f" Error reading {wav_file}: {e}") - continue - - return durations - - -def generate_ffmpeg_metadata(chapters: list[Chapter], durations: list[int]) -> str: - """Generate FFmpeg FFMETADATA1 format string with chapter markers. - - Args: - chapters: List of Chapter objects with titles - durations: List of durations in milliseconds for each chapter - - Returns: - FFMETADATA1 formatted string - """ - metadata = ";FFMETADATA1\n" - - current_time_ms = 0 - - # Match chapters with durations - num_chapters = min(len(chapters), len(durations)) - - for i in range(num_chapters): - chapter = chapters[i] - duration = durations[i] - - chapter.start_ms = current_time_ms - chapter.end_ms = current_time_ms + duration - chapter.duration_ms = duration - - metadata += f"\n[CHAPTER]\n" - metadata += f"TIMEBASE=1/1000\n" - metadata += f"START={chapter.start_ms}\n" - metadata += f"END={chapter.end_ms}\n" - metadata += f"title={chapter.title}\n" - - current_time_ms = chapter.end_ms - - return metadata - - -def embed_chapters_in_m4b(input_m4b: Path, metadata_content: str) -> Path: - """Re-mux M4B with chapter metadata using FFmpeg. - - Args: - input_m4b: Path to the input M4B file - metadata_content: FFMETADATA1 formatted string - - Returns: - Path to the output M4B with chapters (same as input, replaced) - """ - output_m4b = input_m4b.with_suffix('.chaptered.m4b') - - # Write metadata to temporary file - with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: - f.write(metadata_content) - metadata_file = Path(f.name) - - try: - cmd = [ - 'ffmpeg', '-y', - '-i', str(input_m4b), - '-f', 'ffmetadata', '-i', str(metadata_file), - '-map', '0:a', - '-map_metadata', '1', - '-c:a', 'copy', # Copy audio without re-encoding - '-movflags', '+faststart+use_metadata_tags', - str(output_m4b) - ] - - print(f"Running FFmpeg: {' '.join(cmd)}") - result = subprocess.run(cmd, check=True, capture_output=True, text=True) - - if result.returncode != 0: - print(f"FFmpeg stderr: {result.stderr}") - raise RuntimeError(f"FFmpeg failed: {result.stderr}") - - # Replace original with chaptered version - input_m4b.unlink() - output_m4b.rename(input_m4b) - - print(f"Successfully embedded chapters in {input_m4b}") - return input_m4b - - except subprocess.CalledProcessError as e: - print(f"FFmpeg error: {e.stderr}") - # Clean up temp file - if output_m4b.exists(): - output_m4b.unlink() - raise - finally: - # Clean up metadata file - if metadata_file.exists(): - metadata_file.unlink() diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/converter.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/converter.py deleted file mode 100644 index ed7d54c1..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/converter.py +++ /dev/null @@ -1,291 +0,0 @@ -import asyncio -import uuid -import os -from datetime import datetime -from pathlib import Path -from typing import Callable, Optional -import subprocess -import json - -from models.schemas import Job, JobStatus, JobProgress, ChapterInfo -from services.epub_parser import extract_chapters, Chapter -from services.chapter_embedder import ( - get_chapter_audio_durations, - generate_ffmpeg_metadata, - embed_chapters_in_m4b -) - - -class JobManager: - """Manages conversion jobs and their state with user isolation.""" - - def __init__(self, storage_path: str = "/mnt"): - self.storage_path = Path(storage_path) - self.jobs: dict[str, Job] = {} - self.progress_callbacks: dict[str, list[Callable]] = {} - - def get_user_uploads_dir(self, user_id: str) -> Path: - """Get the uploads directory for a specific user.""" - user_dir = self.storage_path / "users" / user_id / "uploads" - user_dir.mkdir(parents=True, exist_ok=True) - return user_dir - - def get_user_outputs_dir(self, user_id: str) -> Path: - """Get the outputs directory for a specific user.""" - user_dir = self.storage_path / "users" / user_id / "outputs" - user_dir.mkdir(parents=True, exist_ok=True) - return user_dir - - def create_job(self, user_id: str, filename: str, voice: str, speed: float, use_gpu: bool) -> Job: - """Create a new conversion job for a user.""" - job_id = str(uuid.uuid4()) - now = datetime.now() - - job = Job( - id=job_id, - user_id=user_id, - filename=filename, - voice=voice, - speed=speed, - use_gpu=use_gpu, - status=JobStatus.PENDING, - created_at=now, - updated_at=now, - ) - - self.jobs[job_id] = job - return job - - def get_job(self, job_id: str, user_id: Optional[str] = None) -> Optional[Job]: - """Get a job by ID. If user_id is provided, verify ownership.""" - job = self.jobs.get(job_id) - if job and user_id and job.user_id != user_id: - return None # User doesn't own this job - return job - - def get_user_jobs(self, user_id: str) -> list[Job]: - """Get all jobs for a specific user.""" - return [job for job in self.jobs.values() if job.user_id == user_id] - - def get_all_jobs(self) -> list[Job]: - """Get all jobs (admin use only).""" - return list(self.jobs.values()) - - def update_job_status(self, job_id: str, status: JobStatus, error: Optional[str] = None): - """Update job status.""" - if job_id in self.jobs: - self.jobs[job_id].status = status - self.jobs[job_id].updated_at = datetime.now() - if error: - self.jobs[job_id].error = error - - def update_job_progress(self, job_id: str, progress: float, current_chapter: Optional[str] = None, eta: Optional[str] = None): - """Update job progress.""" - if job_id in self.jobs: - self.jobs[job_id].progress = progress - self.jobs[job_id].updated_at = datetime.now() - - # Notify callbacks - if job_id in self.progress_callbacks: - progress_data = JobProgress( - progress=progress, - eta=eta, - current_chapter=current_chapter, - status=self.jobs[job_id].status - ) - for callback in self.progress_callbacks[job_id]: - try: - asyncio.create_task(callback(progress_data)) - except Exception as e: - print(f"Error in progress callback: {e}") - - def register_progress_callback(self, job_id: str, callback: Callable): - """Register a callback for progress updates.""" - if job_id not in self.progress_callbacks: - self.progress_callbacks[job_id] = [] - self.progress_callbacks[job_id].append(callback) - - def unregister_progress_callback(self, job_id: str, callback: Callable): - """Unregister a progress callback.""" - if job_id in self.progress_callbacks: - self.progress_callbacks[job_id].remove(callback) - if not self.progress_callbacks[job_id]: - del self.progress_callbacks[job_id] - - async def run_conversion(self, job_id: str): - """Run the audiblez conversion in the background.""" - job = self.jobs.get(job_id) - if not job: - return - - try: - self.update_job_status(job_id, JobStatus.PROCESSING) - - # Prepare user-specific paths - input_path = self.get_user_uploads_dir(job.user_id) / job.filename - output_dir = self.get_user_outputs_dir(job.user_id) / job_id - output_dir.mkdir(parents=True, exist_ok=True) - - # Extract chapters from EPUB before conversion - chapters: list[Chapter] = [] - if input_path.suffix.lower() == '.epub': - chapters = extract_chapters(input_path) - self.jobs[job_id].total_chapters = len(chapters) - print(f"Extracted {len(chapters)} chapters from EPUB") - - # Build audiblez command - use the venv python/audiblez - cmd = [ - "/app/audiblez/bin/audiblez", - str(input_path), - "-o", str(output_dir), - "-v", job.voice, - "-s", str(job.speed), - ] - - if job.use_gpu: - cmd.append("-c") # --cuda flag for GPU - - # Run conversion - process = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(self.storage_path) - ) - - # Monitor progress from stderr/stdout - async def read_stream(stream, is_stderr=False): - while True: - line = await stream.readline() - if not line: - break - - line_str = line.decode().strip() - print(f"{'[STDERR]' if is_stderr else '[STDOUT]'} {line_str}") - - # Parse progress from output - # Audiblez outputs progress in various formats - # We'll do a simple pattern match - if "%" in line_str: - try: - # Try to extract percentage - parts = line_str.split("%") - if len(parts) > 1: - # Find the number before % - num_str = parts[0].split()[-1] - progress = float(num_str) - self.update_job_progress(job_id, progress) - except: - pass - - # Check for chapter info - if "Chapter" in line_str or "chapter" in line_str: - self.update_job_progress( - job_id, - job.progress, - current_chapter=line_str - ) - - # Read both streams concurrently - await asyncio.gather( - read_stream(process.stdout), - read_stream(process.stderr, is_stderr=True) - ) - - # Wait for completion - returncode = await process.wait() - - if returncode == 0: - # Find output file - output_files = list(output_dir.glob("*.m4b")) - if not output_files: - output_files = list(output_dir.glob("*.mp3")) - - if output_files: - output_file = output_files[0] - - # Embed chapter metadata if we have chapters and WAV files - if chapters: - try: - durations = get_chapter_audio_durations(output_dir) - print(f"Found {len(durations)} chapter audio durations") - - if durations: - # Match chapter count with duration count - num_chapters = min(len(chapters), len(durations)) - if num_chapters != len(chapters): - print(f"Warning: chapter count ({len(chapters)}) != duration count ({len(durations)})") - - metadata = generate_ffmpeg_metadata(chapters[:num_chapters], durations[:num_chapters]) - embed_chapters_in_m4b(output_file, metadata) - - # Store chapter info in job for API access - self.jobs[job_id].chapters = [ - ChapterInfo( - title=c.title, - start_ms=c.start_ms, - end_ms=c.end_ms - ) - for c in chapters[:num_chapters] - ] - print(f"Embedded {num_chapters} chapters in M4B") - else: - print("No WAV files found for chapter duration calculation") - except Exception as e: - print(f"Failed to embed chapters: {e}") - # Continue without chapter embedding - non-fatal error - - self.jobs[job_id].output_file = str(output_file.name) - self.update_job_status(job_id, JobStatus.COMPLETED) - self.update_job_progress(job_id, 100.0) - else: - self.update_job_status(job_id, JobStatus.FAILED, "No output file generated") - else: - self.update_job_status(job_id, JobStatus.FAILED, f"Conversion failed with code {returncode}") - - except Exception as e: - print(f"Conversion error: {e}") - self.update_job_status(job_id, JobStatus.FAILED, str(e)) - - def delete_job(self, job_id: str, user_id: str) -> bool: - """Delete a job and its output files.""" - job = self.get_job(job_id, user_id) - if not job: - return False - - # Delete output directory if exists - output_dir = self.get_user_outputs_dir(user_id) / job_id - if output_dir.exists(): - import shutil - shutil.rmtree(output_dir) - - # Remove from jobs dict - del self.jobs[job_id] - return True - - def get_user_audiobooks(self, user_id: str) -> list[dict]: - """List all completed audiobooks for a user.""" - outputs_dir = self.get_user_outputs_dir(user_id) - audiobooks = [] - - if outputs_dir.exists(): - for job_dir in outputs_dir.iterdir(): - if job_dir.is_dir(): - # Look for m4b or mp3 files - audio_files = list(job_dir.glob("*.m4b")) + list(job_dir.glob("*.mp3")) - for audio_file in audio_files: - stat = audio_file.stat() - audiobooks.append({ - "id": job_dir.name, - "filename": audio_file.name, - "size": stat.st_size, - "created_at": stat.st_mtime, - }) - - # Sort by creation time, newest first - audiobooks.sort(key=lambda x: x["created_at"], reverse=True) - return audiobooks - - -# Global job manager instance -job_manager = JobManager() diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/epub_parser.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/epub_parser.py deleted file mode 100644 index 5bd8f37c..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/epub_parser.py +++ /dev/null @@ -1,166 +0,0 @@ -"""EPUB chapter extraction service. - -This parser attempts to match audiblez's chapter detection logic to ensure -the extracted chapters align with the WAV files audiblez produces. - -audiblez iterates through EPUB ITEM_DOCUMENTs and uses is_chapter() to determine -if a document is a chapter based on content length (100+ chars) and filename patterns. -""" - -import re -from dataclasses import dataclass -from pathlib import Path - -from bs4 import BeautifulSoup -from ebooklib import epub, ITEM_DOCUMENT - - -@dataclass -class Chapter: - """Represents a chapter extracted from an EPUB.""" - title: str - index: int - duration_ms: int = 0 - start_ms: int = 0 - end_ms: int = 0 - - -def sanitize_title(title: str) -> str: - """Remove characters that break FFmpeg metadata format.""" - if not title: - return "Untitled" - # Escape special chars for FFmpeg FFMETADATA format - return (title - .replace('=', '-') - .replace(';', '-') - .replace('#', '') - .replace('\\', '') - .replace('\n', ' ') - .replace('\r', '') - .strip()) - - -def is_chapter(text: str, filename: str) -> bool: - """Determine if a document is a chapter. - - Matches audiblez's is_chapter() logic: - - Content must be over 100 characters - - Filename should match common chapter patterns - """ - if len(text) < 100: - return False - - # Check filename patterns that indicate a chapter - filename_lower = filename.lower() - chapter_patterns = [ - r'chapter', - r'part[_-]?\d+', - r'split[_-]?\d+', - r'ch[_-]?\d+', - r'chap[_-]?\d+', - r'sect', # section - r'content', - r'text', - ] - - for pattern in chapter_patterns: - if re.search(pattern, filename_lower): - return True - - # If content is substantial (1000+ chars), likely a chapter even without pattern match - if len(text) > 1000: - return True - - return False - - -def extract_title_from_content(soup: BeautifulSoup, filename: str, index: int) -> str: - """Extract a chapter title from the document content.""" - # Try to find title in common heading tags - for tag in ['title', 'h1', 'h2', 'h3']: - element = soup.find(tag) - if element and element.get_text(strip=True): - title = element.get_text(strip=True) - # Truncate long titles - if len(title) > 100: - title = title[:97] + "..." - return title - - # Fallback: use filename without extension - stem = Path(filename).stem - # Clean up common patterns - stem = re.sub(r'^(chapter|chap|ch)[_-]?', 'Chapter ', stem, flags=re.IGNORECASE) - stem = re.sub(r'[_-]', ' ', stem) - - if stem and len(stem) < 50: - return stem.title() - - return f"Chapter {index + 1}" - - -def extract_chapters(epub_path: Path) -> list[Chapter]: - """Extract chapter titles matching audiblez's chapter detection logic. - - audiblez determines chapters by: - 1. Iterating through ITEM_DOCUMENT items - 2. Checking is_chapter() based on content length and filename patterns - - This ensures our chapter count matches the WAV files audiblez produces. - - Args: - epub_path: Path to the EPUB file - - Returns: - List of Chapter objects with title and index - """ - try: - book = epub.read_epub(str(epub_path)) - except Exception as e: - print(f"Failed to read EPUB: {e}") - return [] - - chapters: list[Chapter] = [] - chapter_index = 0 - - # Iterate through documents like audiblez does - for item in book.get_items(): - if item.get_type() != ITEM_DOCUMENT: - continue - - try: - # Get content and parse with BeautifulSoup - content = item.get_content() - soup = BeautifulSoup(content, features='lxml') - - # Extract text from relevant tags (matching audiblez) - text_parts = [] - for tag in soup.find_all(['title', 'p', 'h1', 'h2', 'h3', 'h4', 'li']): - text = tag.get_text(strip=True) - if text: - text_parts.append(text) - - full_text = ' '.join(text_parts) - filename = item.get_name() or "" - - # Check if this document is a chapter - if is_chapter(full_text, filename): - title = extract_title_from_content(soup, filename, chapter_index) - chapters.append(Chapter( - title=sanitize_title(title), - index=chapter_index - )) - chapter_index += 1 - - except Exception as e: - print(f"Error processing document {item.get_name()}: {e}") - continue - - print(f"Extracted {len(chapters)} chapters from EPUB (audiblez-style detection)") - - # Debug: print first few chapters - for i, ch in enumerate(chapters[:5]): - print(f" {i+1}. {ch.title}") - if len(chapters) > 5: - print(f" ... and {len(chapters) - 5} more") - - return chapters diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/voices.py b/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/voices.py deleted file mode 100644 index 5406888f..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/backend/services/voices.py +++ /dev/null @@ -1,97 +0,0 @@ -from models.schemas import Voice - -# Voice catalog from Kokoro-82M (used by audiblez) -# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md -VOICE_CATALOG = { - # American English - "af_heart": Voice(id="af_heart", name="Heart", language="American English", gender="F"), - "af_alloy": Voice(id="af_alloy", name="Alloy", language="American English", gender="F"), - "af_aoede": Voice(id="af_aoede", name="Aoede", language="American English", gender="F"), - "af_bella": Voice(id="af_bella", name="Bella", language="American English", gender="F"), - "af_jessica": Voice(id="af_jessica", name="Jessica", language="American English", gender="F"), - "af_kore": Voice(id="af_kore", name="Kore", language="American English", gender="F"), - "af_nicole": Voice(id="af_nicole", name="Nicole", language="American English", gender="F"), - "af_nova": Voice(id="af_nova", name="Nova", language="American English", gender="F"), - "af_river": Voice(id="af_river", name="River", language="American English", gender="F"), - "af_sarah": Voice(id="af_sarah", name="Sarah", language="American English", gender="F"), - "af_sky": Voice(id="af_sky", name="Sky", language="American English", gender="F"), - "am_adam": Voice(id="am_adam", name="Adam", language="American English", gender="M"), - "am_echo": Voice(id="am_echo", name="Echo", language="American English", gender="M"), - "am_eric": Voice(id="am_eric", name="Eric", language="American English", gender="M"), - "am_fenrir": Voice(id="am_fenrir", name="Fenrir", language="American English", gender="M"), - "am_liam": Voice(id="am_liam", name="Liam", language="American English", gender="M"), - "am_michael": Voice(id="am_michael", name="Michael", language="American English", gender="M"), - "am_onyx": Voice(id="am_onyx", name="Onyx", language="American English", gender="M"), - "am_puck": Voice(id="am_puck", name="Puck", language="American English", gender="M"), - "am_santa": Voice(id="am_santa", name="Santa", language="American English", gender="M"), - - # British English - "bf_alice": Voice(id="bf_alice", name="Alice", language="British English", gender="F"), - "bf_emma": Voice(id="bf_emma", name="Emma", language="British English", gender="F"), - "bf_isabella": Voice(id="bf_isabella", name="Isabella", language="British English", gender="F"), - "bf_lily": Voice(id="bf_lily", name="Lily", language="British English", gender="F"), - "bm_daniel": Voice(id="bm_daniel", name="Daniel", language="British English", gender="M"), - "bm_fable": Voice(id="bm_fable", name="Fable", language="British English", gender="M"), - "bm_george": Voice(id="bm_george", name="George", language="British English", gender="M"), - "bm_lewis": Voice(id="bm_lewis", name="Lewis", language="British English", gender="M"), - - # Japanese - "jf_alpha": Voice(id="jf_alpha", name="Alpha", language="Japanese", gender="F"), - "jf_gongitsune": Voice(id="jf_gongitsune", name="Gongitsune", language="Japanese", gender="F"), - "jf_nezumi": Voice(id="jf_nezumi", name="Nezumi", language="Japanese", gender="F"), - "jf_tebukuro": Voice(id="jf_tebukuro", name="Tebukuro", language="Japanese", gender="F"), - "jm_kumo": Voice(id="jm_kumo", name="Kumo", language="Japanese", gender="M"), - - # Mandarin Chinese - "zf_xiaobei": Voice(id="zf_xiaobei", name="Xiaobei", language="Mandarin Chinese", gender="F"), - "zf_xiaoni": Voice(id="zf_xiaoni", name="Xiaoni", language="Mandarin Chinese", gender="F"), - "zf_xiaoxiao": Voice(id="zf_xiaoxiao", name="Xiaoxiao", language="Mandarin Chinese", gender="F"), - "zf_xiaoyi": Voice(id="zf_xiaoyi", name="Xiaoyi", language="Mandarin Chinese", gender="F"), - "zm_yunjian": Voice(id="zm_yunjian", name="Yunjian", language="Mandarin Chinese", gender="M"), - "zm_yunxi": Voice(id="zm_yunxi", name="Yunxi", language="Mandarin Chinese", gender="M"), - "zm_yunxia": Voice(id="zm_yunxia", name="Yunxia", language="Mandarin Chinese", gender="M"), - "zm_yunyang": Voice(id="zm_yunyang", name="Yunyang", language="Mandarin Chinese", gender="M"), - - # Spanish - "ef_dora": Voice(id="ef_dora", name="Dora", language="Spanish", gender="F"), - "em_alex": Voice(id="em_alex", name="Alex", language="Spanish", gender="M"), - "em_santa": Voice(id="em_santa", name="Santa", language="Spanish", gender="M"), - - # French - "ff_siwis": Voice(id="ff_siwis", name="Siwis", language="French", gender="F"), - - # Hindi - "hf_alpha": Voice(id="hf_alpha", name="Alpha", language="Hindi", gender="F"), - "hf_beta": Voice(id="hf_beta", name="Beta", language="Hindi", gender="F"), - "hm_omega": Voice(id="hm_omega", name="Omega", language="Hindi", gender="M"), - "hm_psi": Voice(id="hm_psi", name="Psi", language="Hindi", gender="M"), - - # Italian - "if_sara": Voice(id="if_sara", name="Sara", language="Italian", gender="F"), - "im_nicola": Voice(id="im_nicola", name="Nicola", language="Italian", gender="M"), - - # Brazilian Portuguese - "pf_dora": Voice(id="pf_dora", name="Dora", language="Brazilian Portuguese", gender="F"), - "pm_alex": Voice(id="pm_alex", name="Alex", language="Brazilian Portuguese", gender="M"), - "pm_santa": Voice(id="pm_santa", name="Santa", language="Brazilian Portuguese", gender="M"), -} - - -def get_all_voices() -> list[Voice]: - """Get all available voices.""" - return list(VOICE_CATALOG.values()) - - -def get_voice(voice_id: str) -> Voice | None: - """Get a specific voice by ID.""" - return VOICE_CATALOG.get(voice_id) - - -def get_voices_by_language() -> dict[str, list[Voice]]: - """Get voices grouped by language.""" - grouped = {} - for voice in VOICE_CATALOG.values(): - if voice.language not in grouped: - grouped[voice.language] = [] - grouped[voice.language].append(voice) - return grouped diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/index.html b/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/index.html deleted file mode 100644 index 3cab72f1..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/index.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - Audiblez Web - EPUB to Audiobook - - - -
- - - diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/App.svelte b/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/App.svelte deleted file mode 100644 index f23584da..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/App.svelte +++ /dev/null @@ -1,279 +0,0 @@ - - -
-
-
-
-

Audiblez Web

-

Convert EPUB to Audiobook

-
- {#if currentUser} - - {/if} -
-
- -
-
-
- -
- -
- -
-
- -
-
- - -
- -
- -
- - - - {#if error} -

{error}

- {/if} -
- - - -
-
- - diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/main.js b/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/main.js deleted file mode 100644 index e283a988..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/main.js +++ /dev/null @@ -1,6 +0,0 @@ -import App from './App.svelte' -import { mount } from 'svelte' - -const app = mount(App, { target: document.getElementById('app') }) - -export default app diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/stores/jobs.js b/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/stores/jobs.js deleted file mode 100644 index 6b1623d8..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/src/stores/jobs.js +++ /dev/null @@ -1,28 +0,0 @@ -import { writable } from 'svelte/store'; - -function createJobsStore() { - const { subscribe, set, update } = writable([]); - - return { - subscribe, - set, - add: (job) => update(jobs => [...jobs, job]), - updateJob: (jobId, updates) => update(jobs => - jobs.map(j => j.id === jobId ? { ...j, ...updates } : j) - ), - remove: (jobId) => update(jobs => jobs.filter(j => j.id !== jobId)), - refresh: async () => { - try { - const response = await fetch('/api/jobs'); - if (response.ok) { - const jobs = await response.json(); - set(jobs); - } - } catch (e) { - console.error('Failed to fetch jobs:', e); - } - } - }; -} - -export const jobs = createJobsStore(); diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/svelte.config.js b/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/svelte.config.js deleted file mode 100644 index 09a1bf7a..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/svelte.config.js +++ /dev/null @@ -1,5 +0,0 @@ -import { vitePreprocess } from '@sveltejs/vite-plugin-svelte' - -export default { - preprocess: vitePreprocess() -} diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/vite.config.js b/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/vite.config.js deleted file mode 100644 index 6bc60d2c..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/frontend/vite.config.js +++ /dev/null @@ -1,15 +0,0 @@ -import { defineConfig } from 'vite' -import { svelte } from '@sveltejs/vite-plugin-svelte' - -export default defineConfig({ - plugins: [svelte()], - server: { - proxy: { - '/api': 'http://localhost:8000', - '/ws': { - target: 'ws://localhost:8000', - ws: true - } - } - } -}) diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/generate_samples.py b/modules/kubernetes/ebook2audiobook/audiblez-web/generate_samples.py deleted file mode 100644 index 1b0befd1..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/generate_samples.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 -""" -Generate voice samples for all available voices. -Run this script in an environment with audiblez installed. - -Usage: - python generate_samples.py [output_dir] -""" - -import os -import sys -from pathlib import Path - -# Sample text for voice preview -SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample of my voice for audiobook narration." - -# All voices from Kokoro-82M (audiblez) -VOICES = [ - # American English (20 voices) - "af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", - "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", - "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", - "am_onyx", "am_puck", "am_santa", - # British English (8 voices) - "bf_alice", "bf_emma", "bf_isabella", "bf_lily", - "bm_daniel", "bm_fable", "bm_george", "bm_lewis", - # Spanish (3 voices) - "ef_dora", "em_alex", "em_santa", - # French (1 voice) - "ff_siwis", - # Hindi (4 voices) - "hf_alpha", "hf_beta", "hm_omega", "hm_psi", - # Italian (2 voices) - "if_sara", "im_nicola", - # Japanese (5 voices) - "jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo", - # Brazilian Portuguese (3 voices) - "pf_dora", "pm_alex", "pm_santa", - # Mandarin Chinese (8 voices) - "zf_xiaobei", "zf_xiaoni", "zf_xiaoxiao", "zf_xiaoyi", - "zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang", -] - - -def generate_sample(voice: str, output_dir: Path): - """Generate a voice sample using kokoro TTS.""" - try: - from kokoro import KPipeline - - output_file = output_dir / f"{voice}.mp3" - if output_file.exists(): - print(f"Skipping {voice} - already exists") - return True - - print(f"Generating sample for {voice}...") - - # Map voice prefix to language code - lang_map = { - 'a': 'a', # American English - 'b': 'b', # British English - 'e': 'e', # Spanish - 'f': 'f', # French - 'h': 'h', # Hindi - 'i': 'i', # Italian - 'j': 'j', # Japanese - 'p': 'p', # Portuguese - 'z': 'z', # Chinese - } - - # Extract language code from voice (first letter) - lang_code = lang_map.get(voice[0], 'a') - - # Initialize the Kokoro pipeline - pipeline = KPipeline(lang_code=lang_code) - - # Generate audio - generator = pipeline(SAMPLE_TEXT, voice=voice, speed=1.0) - - # Collect all audio chunks - audio_chunks = [] - for _, _, audio in generator: - audio_chunks.append(audio) - - if audio_chunks: - import soundfile as sf - import numpy as np - - # Concatenate audio - audio = np.concatenate(audio_chunks) - - # Save as WAV first, then convert to MP3 - wav_file = output_dir / f"{voice}.wav" - sf.write(str(wav_file), audio, 24000) - - # Convert to MP3 using ffmpeg - import subprocess - result = subprocess.run([ - "ffmpeg", "-y", "-i", str(wav_file), - "-codec:a", "libmp3lame", "-qscale:a", "5", - str(output_file) - ], capture_output=True) - - # Remove WAV file - if wav_file.exists(): - wav_file.unlink() - - if result.returncode == 0: - print(f"Generated {output_file}") - return True - else: - print(f"FFmpeg failed for {voice}: {result.stderr.decode()}") - return False - else: - print(f"Failed to generate audio for {voice}") - return False - - except Exception as e: - print(f"Error generating sample for {voice}: {e}") - return False - - -def main(): - output_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("samples") - output_dir.mkdir(parents=True, exist_ok=True) - - print(f"Generating voice samples to {output_dir}") - print(f"Total voices: {len(VOICES)}") - - for voice in VOICES: - generate_sample(voice, output_dir) - - print("Done!") - - -if __name__ == "__main__": - main() diff --git a/modules/kubernetes/ebook2audiobook/audiblez-web/samples/.gitkeep b/modules/kubernetes/ebook2audiobook/audiblez-web/samples/.gitkeep deleted file mode 100644 index b046185d..00000000 --- a/modules/kubernetes/ebook2audiobook/audiblez-web/samples/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ -# Voice samples go here -# Run generate_samples.py to generate voice samples for all voices