chore: add untracked stacks, scripts, and agent configs

- New stacks: beads-server, hermes-agent
- Terragrunt tiers.tf for infra, phpipam, status-page
- Secrets symlinks for vault, phpipam, hermes-agent
- Scripts: cluster_manager, image_pull, containerd pullthrough setup
- Frigate config, audiblez-web app source, n8n workflows dir
- Claude agent: service-upgrade, reference: upgrade-config.json
- Removed: claudeception skill, excalidraw empty submodule, temp listings

[ci skip]

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-15 09:33:06 +00:00
parent bd41bb9230
commit bcad200a23
44 changed files with 3819 additions and 0 deletions

View file

@ -0,0 +1,53 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
venv/
.venv/
ENV/
# Node
node_modules/
frontend/dist/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
# OS
.DS_Store
._*
Thumbs.db
# Uploads and outputs (runtime data)
uploads/
outputs/
# Environment
.env
.env.local
.env.*.local

View file

@ -0,0 +1,35 @@
FROM viktorbarzin/audiblez:latest
# Install Node.js for building frontend
RUN apt-get update && \
apt-get install -y curl && \
curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
apt-get install -y nodejs && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Build frontend
COPY frontend/package.json frontend/package-lock.json* ./frontend/
WORKDIR /app/frontend
RUN npm install
COPY frontend/ ./
RUN npm run build
# Install backend dependencies
WORKDIR /app/backend
COPY backend/requirements.txt ./
RUN pip install --no-cache-dir --break-system-packages -r requirements.txt
COPY backend/ ./
# Copy voice samples
COPY samples/ /app/samples/
WORKDIR /app/backend
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

View file

@ -0,0 +1,52 @@
# Audiblez Web UI
Web interface for converting EPUB files to audiobooks using [audiblez](https://github.com/santinic/audiblez).
<img width="1702" height="1145" alt="image" src="https://github.com/user-attachments/assets/ba0f9090-a8e9-4550-9c9b-473058f19cbb" />
## Features
- Upload EPUB files via drag & drop
- Select from 50+ voices across multiple languages
- Preview voice samples before converting
- Real-time progress updates via WebSocket
- Download completed audiobooks
## Development
### Backend
```bash
cd backend
pip install -r requirements.txt
uvicorn main:app --reload
```
### Frontend
```bash
cd frontend
npm install
npm run dev
```
### Voice Samples
Generate voice samples (requires audiblez environment):
```bash
python generate_samples.py samples/
```
## Docker Build
```bash
docker build -t audiblez-web .
docker run -p 8000:8000 -v /path/to/data:/mnt audiblez-web
```
## Deployment
Deployed to Kubernetes via Terraform. The service mounts NFS storage at `/mnt` for:
- `/mnt/uploads` - Uploaded EPUB files
- `/mnt/outputs` - Generated audiobooks

View file

@ -0,0 +1,104 @@
"""
Authentication module for extracting user identity from Authentik headers.
When nginx ingress is protected with Authentik, these headers are forwarded:
- X-Authentik-Username: The user's username
- X-Authentik-Uid: Unique user ID (used for directory separation)
- X-Authentik-Email: User's email
- X-Authentik-Name: User's display name
- X-Authentik-Groups: Comma-separated group list
"""
from dataclasses import dataclass
from fastapi import Request, HTTPException
from typing import Optional
import re
@dataclass
class User:
"""Represents an authenticated user from Authentik."""
uid: str
username: str
email: Optional[str] = None
name: Optional[str] = None
groups: list[str] = None
def __post_init__(self):
if self.groups is None:
self.groups = []
def sanitize_user_id(uid: str) -> str:
"""
Sanitize user ID for use as a directory name.
Only allows alphanumeric, hyphens, and underscores.
"""
if not uid:
raise ValueError("User ID cannot be empty")
# Only allow safe characters for filesystem
safe_uid = re.sub(r'[^a-zA-Z0-9\-_]', '', uid)
if not safe_uid:
raise ValueError("User ID contains no valid characters")
# Limit length to prevent path issues
if len(safe_uid) > 64:
safe_uid = safe_uid[:64]
return safe_uid
async def get_current_user(request: Request) -> User:
"""
Extract user information from Authentik headers.
This is a FastAPI dependency that should be used on protected endpoints.
Raises 401 if user headers are not present (not authenticated).
"""
# Header names are case-insensitive, but commonly forwarded as:
uid = request.headers.get("X-Authentik-Uid")
username = request.headers.get("X-Authentik-Username")
email = request.headers.get("X-Authentik-Email")
name = request.headers.get("X-Authentik-Name")
groups_str = request.headers.get("X-Authentik-Groups", "")
# For development/testing, check for alternative header names
if not uid:
uid = request.headers.get("X-Authentik-Userid")
if not uid:
uid = request.headers.get("Remote-User")
if not uid or not username:
raise HTTPException(
status_code=401,
detail="Authentication required. Authentik headers not found."
)
try:
safe_uid = sanitize_user_id(uid)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
# Parse groups (comma-separated)
groups = [g.strip() for g in groups_str.split(",") if g.strip()]
return User(
uid=safe_uid,
username=username,
email=email,
name=name,
groups=groups
)
async def get_optional_user(request: Request) -> Optional[User]:
"""
Extract user information if available, or return None.
Use this for endpoints that work with or without authentication.
"""
try:
return await get_current_user(request)
except HTTPException:
return None

View file

@ -0,0 +1,307 @@
from fastapi import APIRouter, UploadFile, File, HTTPException, Depends
from fastapi.responses import FileResponse
from pydantic import BaseModel
from pathlib import Path
import shutil
import asyncio
import re
from models.schemas import Voice, JobCreate, Job, JobProgress, ChapterInfo
from services.voices import get_all_voices, get_voices_by_language, get_voice
from services.converter import job_manager
from api.auth import User, get_current_user
router = APIRouter(prefix="/api")
def sanitize_filename(filename: str, max_length: int = 200) -> str:
"""
Sanitize a filename to prevent path traversal and shell injection.
Only allows alphanumeric characters, spaces, hyphens, underscores, parentheses, and dots.
"""
if not filename:
raise ValueError("Filename cannot be empty")
# Remove any path components (prevent path traversal)
filename = Path(filename).name
# Only allow safe characters: alphanumeric, space, hyphen, underscore, parentheses, dot
# This regex removes anything that isn't in the allowed set
safe_filename = re.sub(r'[^a-zA-Z0-9\s\-_().]', '', filename)
# Collapse multiple spaces/dots
safe_filename = re.sub(r'\s+', ' ', safe_filename)
safe_filename = re.sub(r'\.+', '.', safe_filename)
# Strip leading/trailing whitespace and dots
safe_filename = safe_filename.strip(' .')
# Limit length
if len(safe_filename) > max_length:
safe_filename = safe_filename[:max_length]
if not safe_filename:
raise ValueError("Filename contains no valid characters")
return safe_filename
class RenameRequest(BaseModel):
new_name: str
# ============================================================================
# Voice endpoints (no auth required - public info)
# ============================================================================
@router.get("/voices", response_model=list[Voice])
async def list_voices():
"""Get all available voices."""
return get_all_voices()
@router.get("/voices/grouped")
async def list_voices_grouped():
"""Get voices grouped by language."""
return get_voices_by_language()
@router.get("/voices/{voice_id}/sample")
async def get_voice_sample(voice_id: str):
"""Get voice sample audio file."""
voice = get_voice(voice_id)
if not voice:
raise HTTPException(status_code=404, detail="Voice not found")
# Try NFS storage first (persistent), then bundled samples
sample_path = Path("/mnt/samples") / f"{voice_id}.mp3"
if not sample_path.exists():
sample_path = Path("/app/samples") / f"{voice_id}.mp3"
if not sample_path.exists():
raise HTTPException(status_code=404, detail="Sample not available")
return FileResponse(sample_path, media_type="audio/mpeg")
# ============================================================================
# User info endpoint
# ============================================================================
@router.get("/me")
async def get_current_user_info(user: User = Depends(get_current_user)):
"""Get current authenticated user info."""
return {
"uid": user.uid,
"username": user.username,
"email": user.email,
"name": user.name,
"groups": user.groups
}
# ============================================================================
# Upload endpoints (user-scoped)
# ============================================================================
@router.post("/upload")
async def upload_file(file: UploadFile = File(...), user: User = Depends(get_current_user)):
"""Upload an EPUB file to user's directory."""
if not file.filename.endswith(".epub"):
raise HTTPException(status_code=400, detail="Only EPUB files are supported")
# Save file to user's uploads directory
upload_dir = job_manager.get_user_uploads_dir(user.uid)
# Sanitize the filename
try:
safe_filename = sanitize_filename(file.filename)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
file_path = upload_dir / safe_filename
with file_path.open("wb") as buffer:
shutil.copyfileobj(file.file, buffer)
return {"filename": safe_filename, "size": file_path.stat().st_size}
# ============================================================================
# Job endpoints (user-scoped)
# ============================================================================
@router.post("/jobs", response_model=Job)
async def create_job(job_create: JobCreate, user: User = Depends(get_current_user)):
"""Create a new conversion job."""
# Verify file exists in user's uploads
file_path = job_manager.get_user_uploads_dir(user.uid) / job_create.filename
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found")
# Verify voice exists
voice = get_voice(job_create.voice)
if not voice:
raise HTTPException(status_code=404, detail="Voice not found")
# Create job with user ownership
job = job_manager.create_job(
user_id=user.uid,
filename=job_create.filename,
voice=job_create.voice,
speed=job_create.speed,
use_gpu=job_create.use_gpu
)
# Start conversion in background
asyncio.create_task(job_manager.run_conversion(job.id))
return job
@router.get("/jobs", response_model=list[Job])
async def list_jobs(user: User = Depends(get_current_user)):
"""Get all jobs for current user."""
return job_manager.get_user_jobs(user.uid)
@router.get("/jobs/{job_id}", response_model=Job)
async def get_job(job_id: str, user: User = Depends(get_current_user)):
"""Get a specific job (must be owned by user)."""
job = job_manager.get_job(job_id, user.uid)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return job
@router.get("/jobs/{job_id}/download")
async def download_job(job_id: str, user: User = Depends(get_current_user)):
"""Download the completed audiobook."""
job = job_manager.get_job(job_id, user.uid)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status != "completed":
raise HTTPException(status_code=400, detail="Job not completed")
if not job.output_file:
raise HTTPException(status_code=404, detail="Output file not found")
output_path = job_manager.get_user_outputs_dir(user.uid) / job_id / job.output_file
if not output_path.exists():
raise HTTPException(status_code=404, detail="Output file not found")
return FileResponse(
output_path,
media_type="audio/mp4",
filename=job.output_file
)
@router.get("/jobs/{job_id}/chapters", response_model=list[ChapterInfo])
async def get_job_chapters(job_id: str, user: User = Depends(get_current_user)):
"""Get chapter metadata for a job's audiobook."""
job = job_manager.get_job(job_id, user.uid)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status != "completed":
raise HTTPException(status_code=400, detail="Job not completed")
return job.chapters
@router.delete("/jobs/{job_id}")
async def delete_job(job_id: str, user: User = Depends(get_current_user)):
"""Delete a job (must be owned by user)."""
if not job_manager.delete_job(job_id, user.uid):
raise HTTPException(status_code=404, detail="Job not found")
return {"status": "deleted"}
# ============================================================================
# Audiobook endpoints (user-scoped)
# ============================================================================
@router.get("/audiobooks")
async def list_audiobooks(user: User = Depends(get_current_user)):
"""List all completed audiobooks for current user."""
return job_manager.get_user_audiobooks(user.uid)
@router.get("/audiobooks/{audiobook_id}/download")
async def download_audiobook(audiobook_id: str, user: User = Depends(get_current_user)):
"""Download an audiobook by its ID (job folder name)."""
output_dir = job_manager.get_user_outputs_dir(user.uid) / audiobook_id
if not output_dir.exists():
raise HTTPException(status_code=404, detail="Audiobook not found")
# Find the audio file
audio_files = list(output_dir.glob("*.m4b")) + list(output_dir.glob("*.mp3"))
if not audio_files:
raise HTTPException(status_code=404, detail="Audio file not found")
audio_file = audio_files[0]
media_type = "audio/mp4" if audio_file.suffix == ".m4b" else "audio/mpeg"
return FileResponse(
audio_file,
media_type=media_type,
filename=audio_file.name
)
@router.delete("/audiobooks/{audiobook_id}")
async def delete_audiobook(audiobook_id: str, user: User = Depends(get_current_user)):
"""Delete an audiobook and its folder."""
output_dir = job_manager.get_user_outputs_dir(user.uid) / audiobook_id
if not output_dir.exists():
raise HTTPException(status_code=404, detail="Audiobook not found")
# Delete all files in the directory and the directory itself
for file in output_dir.iterdir():
file.unlink()
output_dir.rmdir()
return {"status": "deleted"}
@router.patch("/audiobooks/{audiobook_id}/rename")
async def rename_audiobook(audiobook_id: str, rename_request: RenameRequest, user: User = Depends(get_current_user)):
"""Rename an audiobook file. Input is sanitized to prevent path traversal and injection."""
output_dir = job_manager.get_user_outputs_dir(user.uid) / audiobook_id
if not output_dir.exists():
raise HTTPException(status_code=404, detail="Audiobook not found")
# Find the audio file
audio_files = list(output_dir.glob("*.m4b")) + list(output_dir.glob("*.mp3"))
if not audio_files:
raise HTTPException(status_code=404, detail="Audio file not found")
current_file = audio_files[0]
current_extension = current_file.suffix # .m4b or .mp3
# Sanitize the new name
try:
safe_name = sanitize_filename(rename_request.new_name)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
# Ensure the new name has the correct extension
if not safe_name.lower().endswith(current_extension.lower()):
safe_name = safe_name + current_extension
# Create the new path (same directory, new filename)
new_file = output_dir / safe_name
# Check if target already exists
if new_file.exists() and new_file != current_file:
raise HTTPException(status_code=400, detail="A file with that name already exists")
# Rename the file using pathlib (no shell commands)
current_file.rename(new_file)
return {"status": "renamed", "new_filename": safe_name}

View file

@ -0,0 +1,101 @@
from fastapi import WebSocket, WebSocketDisconnect, HTTPException
from services.converter import job_manager
from models.schemas import JobProgress
from api.auth import sanitize_user_id
class ConnectionManager:
"""Manages WebSocket connections for job progress updates."""
def __init__(self):
self.active_connections: dict[str, list[WebSocket]] = {}
async def connect(self, job_id: str, websocket: WebSocket):
"""Connect a websocket for a specific job."""
await websocket.accept()
if job_id not in self.active_connections:
self.active_connections[job_id] = []
self.active_connections[job_id].append(websocket)
def disconnect(self, job_id: str, websocket: WebSocket):
"""Disconnect a websocket."""
if job_id in self.active_connections:
if websocket in self.active_connections[job_id]:
self.active_connections[job_id].remove(websocket)
if not self.active_connections[job_id]:
del self.active_connections[job_id]
async def send_progress(self, job_id: str, progress: JobProgress):
"""Send progress update to all connected clients for a job."""
if job_id in self.active_connections:
disconnected = []
for connection in self.active_connections[job_id]:
try:
await connection.send_json(progress.model_dump())
except:
disconnected.append(connection)
# Remove disconnected clients
for conn in disconnected:
self.disconnect(job_id, conn)
manager = ConnectionManager()
def get_user_from_websocket(websocket: WebSocket) -> str | None:
"""
Extract user ID from websocket headers.
WebSocket connections receive HTTP headers during the upgrade handshake.
"""
# Try various header name formats
uid = websocket.headers.get("x-authentik-uid")
if not uid:
uid = websocket.headers.get("X-Authentik-Uid")
if not uid:
uid = websocket.headers.get("x-authentik-userid")
if not uid:
uid = websocket.headers.get("remote-user")
if uid:
try:
return sanitize_user_id(uid)
except ValueError:
return None
return None
async def websocket_endpoint(websocket: WebSocket, job_id: str):
"""WebSocket endpoint for job progress updates."""
# Extract user from headers
user_id = get_user_from_websocket(websocket)
# Verify job exists and user has access
job = job_manager.get_job(job_id, user_id)
if not job:
# Close connection if job not found or not owned by user
await websocket.close(code=4004, reason="Job not found or access denied")
return
await manager.connect(job_id, websocket)
# Register progress callback
async def progress_callback(progress: JobProgress):
await manager.send_progress(job_id, progress)
job_manager.register_progress_callback(job_id, progress_callback)
try:
# Send initial status
await websocket.send_json({
"progress": job.progress,
"status": job.status,
})
# Wait for messages (keep-alive)
while True:
await websocket.receive_text()
except WebSocketDisconnect:
manager.disconnect(job_id, websocket)
job_manager.unregister_progress_callback(job_id, progress_callback)

View file

@ -0,0 +1,41 @@
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
from api.routes import router
from api.websocket import websocket_endpoint
app = FastAPI(title="Audiblez Web API", version="1.0.0")
# CORS middleware for development
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Health check - must be before static mount
@app.get("/health")
async def health_check():
return {"status": "healthy"}
# Include API routes
app.include_router(router)
# WebSocket endpoint
@app.websocket("/ws/jobs/{job_id}")
async def websocket_route(websocket, job_id: str):
await websocket_endpoint(websocket, job_id)
# Serve static frontend files - MUST BE LAST as it catches all routes
static_dir = Path("/app/frontend/dist")
if static_dir.exists():
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

View file

@ -0,0 +1,59 @@
from pydantic import BaseModel, Field
from typing import Optional, Literal
from datetime import datetime
from enum import Enum
class JobStatus(str, Enum):
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class Voice(BaseModel):
id: str
name: str
language: str
gender: Literal["M", "F"]
quality: str = "medium"
class JobCreate(BaseModel):
filename: str
voice: str
speed: float = Field(default=1.0, ge=0.5, le=2.0)
use_gpu: bool = True
class ChapterInfo(BaseModel):
"""Chapter metadata extracted from EPUB and embedded in M4B."""
title: str
start_ms: int
end_ms: int
class JobProgress(BaseModel):
progress: float = Field(ge=0, le=100)
eta: Optional[str] = None
current_chapter: Optional[str] = None
total_chapters: Optional[int] = None
status: JobStatus
class Job(BaseModel):
id: str
user_id: str # User who owns this job
filename: str
voice: str
speed: float
use_gpu: bool
status: JobStatus
progress: float = 0
created_at: datetime
updated_at: datetime
error: Optional[str] = None
output_file: Optional[str] = None
total_chapters: int = 0
chapters: list[ChapterInfo] = []

View file

@ -0,0 +1,11 @@
fastapi==0.115.0
uvicorn[standard]==0.32.0
python-multipart==0.0.12
websockets==13.1
aiofiles==24.1.0
pydantic==2.9.2
pydantic-settings==2.6.0
ebooklib>=0.18
pydub>=0.25.1
beautifulsoup4>=4.12.0
lxml>=5.0.0

View file

@ -0,0 +1,156 @@
"""M4B chapter metadata embedding service."""
import re
import subprocess
import tempfile
from pathlib import Path
from pydub import AudioSegment
from .epub_parser import Chapter
def get_chapter_audio_durations(output_dir: Path) -> list[int]:
"""Calculate duration of each chapter WAV file in milliseconds.
audiblez produces files like: {bookname}_chapter_{N}.wav
e.g., mybook_chapter_1.wav, mybook_chapter_2.wav
Args:
output_dir: Directory containing the WAV files
Returns:
List of durations in milliseconds, ordered by chapter number
"""
durations = []
# Find all chapter WAV files - audiblez uses {name}_chapter_{N}.wav
wav_files = list(output_dir.glob("*_chapter_*.wav"))
if not wav_files:
# Fallback: try any WAV files
wav_files = list(output_dir.glob("*.wav"))
if not wav_files:
print(f"No WAV files found in {output_dir}")
return durations
# Sort by extracting chapter number from filename using regex
# Pattern: look for _chapter_N or chapter_N in filename
def extract_chapter_num(path: Path) -> int:
name = path.stem
# Try to find chapter number with regex - handles various patterns
# e.g., "book_chapter_1", "mybook_chapter_12", "chapter_3_voice"
match = re.search(r'chapter[_-]?(\d+)', name, re.IGNORECASE)
if match:
return int(match.group(1))
# Fallback: find any number in the filename
match = re.search(r'(\d+)', name)
if match:
return int(match.group(1))
return 0
wav_files.sort(key=extract_chapter_num)
print(f"Found {len(wav_files)} WAV files to process for durations")
for wav_file in wav_files:
try:
audio = AudioSegment.from_file(str(wav_file))
durations.append(len(audio)) # duration in ms
print(f" Chapter WAV: {wav_file.name} - {len(audio)}ms ({len(audio)/1000:.1f}s)")
except Exception as e:
print(f" Error reading {wav_file}: {e}")
continue
return durations
def generate_ffmpeg_metadata(chapters: list[Chapter], durations: list[int]) -> str:
"""Generate FFmpeg FFMETADATA1 format string with chapter markers.
Args:
chapters: List of Chapter objects with titles
durations: List of durations in milliseconds for each chapter
Returns:
FFMETADATA1 formatted string
"""
metadata = ";FFMETADATA1\n"
current_time_ms = 0
# Match chapters with durations
num_chapters = min(len(chapters), len(durations))
for i in range(num_chapters):
chapter = chapters[i]
duration = durations[i]
chapter.start_ms = current_time_ms
chapter.end_ms = current_time_ms + duration
chapter.duration_ms = duration
metadata += f"\n[CHAPTER]\n"
metadata += f"TIMEBASE=1/1000\n"
metadata += f"START={chapter.start_ms}\n"
metadata += f"END={chapter.end_ms}\n"
metadata += f"title={chapter.title}\n"
current_time_ms = chapter.end_ms
return metadata
def embed_chapters_in_m4b(input_m4b: Path, metadata_content: str) -> Path:
"""Re-mux M4B with chapter metadata using FFmpeg.
Args:
input_m4b: Path to the input M4B file
metadata_content: FFMETADATA1 formatted string
Returns:
Path to the output M4B with chapters (same as input, replaced)
"""
output_m4b = input_m4b.with_suffix('.chaptered.m4b')
# Write metadata to temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(metadata_content)
metadata_file = Path(f.name)
try:
cmd = [
'ffmpeg', '-y',
'-i', str(input_m4b),
'-f', 'ffmetadata', '-i', str(metadata_file),
'-map', '0:a',
'-map_metadata', '1',
'-c:a', 'copy', # Copy audio without re-encoding
'-movflags', '+faststart+use_metadata_tags',
str(output_m4b)
]
print(f"Running FFmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"FFmpeg stderr: {result.stderr}")
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
# Replace original with chaptered version
input_m4b.unlink()
output_m4b.rename(input_m4b)
print(f"Successfully embedded chapters in {input_m4b}")
return input_m4b
except subprocess.CalledProcessError as e:
print(f"FFmpeg error: {e.stderr}")
# Clean up temp file
if output_m4b.exists():
output_m4b.unlink()
raise
finally:
# Clean up metadata file
if metadata_file.exists():
metadata_file.unlink()

View file

@ -0,0 +1,291 @@
import asyncio
import uuid
import os
from datetime import datetime
from pathlib import Path
from typing import Callable, Optional
import subprocess
import json
from models.schemas import Job, JobStatus, JobProgress, ChapterInfo
from services.epub_parser import extract_chapters, Chapter
from services.chapter_embedder import (
get_chapter_audio_durations,
generate_ffmpeg_metadata,
embed_chapters_in_m4b
)
class JobManager:
"""Manages conversion jobs and their state with user isolation."""
def __init__(self, storage_path: str = "/mnt"):
self.storage_path = Path(storage_path)
self.jobs: dict[str, Job] = {}
self.progress_callbacks: dict[str, list[Callable]] = {}
def get_user_uploads_dir(self, user_id: str) -> Path:
"""Get the uploads directory for a specific user."""
user_dir = self.storage_path / "users" / user_id / "uploads"
user_dir.mkdir(parents=True, exist_ok=True)
return user_dir
def get_user_outputs_dir(self, user_id: str) -> Path:
"""Get the outputs directory for a specific user."""
user_dir = self.storage_path / "users" / user_id / "outputs"
user_dir.mkdir(parents=True, exist_ok=True)
return user_dir
def create_job(self, user_id: str, filename: str, voice: str, speed: float, use_gpu: bool) -> Job:
"""Create a new conversion job for a user."""
job_id = str(uuid.uuid4())
now = datetime.now()
job = Job(
id=job_id,
user_id=user_id,
filename=filename,
voice=voice,
speed=speed,
use_gpu=use_gpu,
status=JobStatus.PENDING,
created_at=now,
updated_at=now,
)
self.jobs[job_id] = job
return job
def get_job(self, job_id: str, user_id: Optional[str] = None) -> Optional[Job]:
"""Get a job by ID. If user_id is provided, verify ownership."""
job = self.jobs.get(job_id)
if job and user_id and job.user_id != user_id:
return None # User doesn't own this job
return job
def get_user_jobs(self, user_id: str) -> list[Job]:
"""Get all jobs for a specific user."""
return [job for job in self.jobs.values() if job.user_id == user_id]
def get_all_jobs(self) -> list[Job]:
"""Get all jobs (admin use only)."""
return list(self.jobs.values())
def update_job_status(self, job_id: str, status: JobStatus, error: Optional[str] = None):
"""Update job status."""
if job_id in self.jobs:
self.jobs[job_id].status = status
self.jobs[job_id].updated_at = datetime.now()
if error:
self.jobs[job_id].error = error
def update_job_progress(self, job_id: str, progress: float, current_chapter: Optional[str] = None, eta: Optional[str] = None):
"""Update job progress."""
if job_id in self.jobs:
self.jobs[job_id].progress = progress
self.jobs[job_id].updated_at = datetime.now()
# Notify callbacks
if job_id in self.progress_callbacks:
progress_data = JobProgress(
progress=progress,
eta=eta,
current_chapter=current_chapter,
status=self.jobs[job_id].status
)
for callback in self.progress_callbacks[job_id]:
try:
asyncio.create_task(callback(progress_data))
except Exception as e:
print(f"Error in progress callback: {e}")
def register_progress_callback(self, job_id: str, callback: Callable):
"""Register a callback for progress updates."""
if job_id not in self.progress_callbacks:
self.progress_callbacks[job_id] = []
self.progress_callbacks[job_id].append(callback)
def unregister_progress_callback(self, job_id: str, callback: Callable):
"""Unregister a progress callback."""
if job_id in self.progress_callbacks:
self.progress_callbacks[job_id].remove(callback)
if not self.progress_callbacks[job_id]:
del self.progress_callbacks[job_id]
async def run_conversion(self, job_id: str):
"""Run the audiblez conversion in the background."""
job = self.jobs.get(job_id)
if not job:
return
try:
self.update_job_status(job_id, JobStatus.PROCESSING)
# Prepare user-specific paths
input_path = self.get_user_uploads_dir(job.user_id) / job.filename
output_dir = self.get_user_outputs_dir(job.user_id) / job_id
output_dir.mkdir(parents=True, exist_ok=True)
# Extract chapters from EPUB before conversion
chapters: list[Chapter] = []
if input_path.suffix.lower() == '.epub':
chapters = extract_chapters(input_path)
self.jobs[job_id].total_chapters = len(chapters)
print(f"Extracted {len(chapters)} chapters from EPUB")
# Build audiblez command - use the venv python/audiblez
cmd = [
"/app/audiblez/bin/audiblez",
str(input_path),
"-o", str(output_dir),
"-v", job.voice,
"-s", str(job.speed),
]
if job.use_gpu:
cmd.append("-c") # --cuda flag for GPU
# Run conversion
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(self.storage_path)
)
# Monitor progress from stderr/stdout
async def read_stream(stream, is_stderr=False):
while True:
line = await stream.readline()
if not line:
break
line_str = line.decode().strip()
print(f"{'[STDERR]' if is_stderr else '[STDOUT]'} {line_str}")
# Parse progress from output
# Audiblez outputs progress in various formats
# We'll do a simple pattern match
if "%" in line_str:
try:
# Try to extract percentage
parts = line_str.split("%")
if len(parts) > 1:
# Find the number before %
num_str = parts[0].split()[-1]
progress = float(num_str)
self.update_job_progress(job_id, progress)
except:
pass
# Check for chapter info
if "Chapter" in line_str or "chapter" in line_str:
self.update_job_progress(
job_id,
job.progress,
current_chapter=line_str
)
# Read both streams concurrently
await asyncio.gather(
read_stream(process.stdout),
read_stream(process.stderr, is_stderr=True)
)
# Wait for completion
returncode = await process.wait()
if returncode == 0:
# Find output file
output_files = list(output_dir.glob("*.m4b"))
if not output_files:
output_files = list(output_dir.glob("*.mp3"))
if output_files:
output_file = output_files[0]
# Embed chapter metadata if we have chapters and WAV files
if chapters:
try:
durations = get_chapter_audio_durations(output_dir)
print(f"Found {len(durations)} chapter audio durations")
if durations:
# Match chapter count with duration count
num_chapters = min(len(chapters), len(durations))
if num_chapters != len(chapters):
print(f"Warning: chapter count ({len(chapters)}) != duration count ({len(durations)})")
metadata = generate_ffmpeg_metadata(chapters[:num_chapters], durations[:num_chapters])
embed_chapters_in_m4b(output_file, metadata)
# Store chapter info in job for API access
self.jobs[job_id].chapters = [
ChapterInfo(
title=c.title,
start_ms=c.start_ms,
end_ms=c.end_ms
)
for c in chapters[:num_chapters]
]
print(f"Embedded {num_chapters} chapters in M4B")
else:
print("No WAV files found for chapter duration calculation")
except Exception as e:
print(f"Failed to embed chapters: {e}")
# Continue without chapter embedding - non-fatal error
self.jobs[job_id].output_file = str(output_file.name)
self.update_job_status(job_id, JobStatus.COMPLETED)
self.update_job_progress(job_id, 100.0)
else:
self.update_job_status(job_id, JobStatus.FAILED, "No output file generated")
else:
self.update_job_status(job_id, JobStatus.FAILED, f"Conversion failed with code {returncode}")
except Exception as e:
print(f"Conversion error: {e}")
self.update_job_status(job_id, JobStatus.FAILED, str(e))
def delete_job(self, job_id: str, user_id: str) -> bool:
"""Delete a job and its output files."""
job = self.get_job(job_id, user_id)
if not job:
return False
# Delete output directory if exists
output_dir = self.get_user_outputs_dir(user_id) / job_id
if output_dir.exists():
import shutil
shutil.rmtree(output_dir)
# Remove from jobs dict
del self.jobs[job_id]
return True
def get_user_audiobooks(self, user_id: str) -> list[dict]:
"""List all completed audiobooks for a user."""
outputs_dir = self.get_user_outputs_dir(user_id)
audiobooks = []
if outputs_dir.exists():
for job_dir in outputs_dir.iterdir():
if job_dir.is_dir():
# Look for m4b or mp3 files
audio_files = list(job_dir.glob("*.m4b")) + list(job_dir.glob("*.mp3"))
for audio_file in audio_files:
stat = audio_file.stat()
audiobooks.append({
"id": job_dir.name,
"filename": audio_file.name,
"size": stat.st_size,
"created_at": stat.st_mtime,
})
# Sort by creation time, newest first
audiobooks.sort(key=lambda x: x["created_at"], reverse=True)
return audiobooks
# Global job manager instance
job_manager = JobManager()

View file

@ -0,0 +1,166 @@
"""EPUB chapter extraction service.
This parser attempts to match audiblez's chapter detection logic to ensure
the extracted chapters align with the WAV files audiblez produces.
audiblez iterates through EPUB ITEM_DOCUMENTs and uses is_chapter() to determine
if a document is a chapter based on content length (100+ chars) and filename patterns.
"""
import re
from dataclasses import dataclass
from pathlib import Path
from bs4 import BeautifulSoup
from ebooklib import epub, ITEM_DOCUMENT
@dataclass
class Chapter:
"""Represents a chapter extracted from an EPUB."""
title: str
index: int
duration_ms: int = 0
start_ms: int = 0
end_ms: int = 0
def sanitize_title(title: str) -> str:
"""Remove characters that break FFmpeg metadata format."""
if not title:
return "Untitled"
# Escape special chars for FFmpeg FFMETADATA format
return (title
.replace('=', '-')
.replace(';', '-')
.replace('#', '')
.replace('\\', '')
.replace('\n', ' ')
.replace('\r', '')
.strip())
def is_chapter(text: str, filename: str) -> bool:
"""Determine if a document is a chapter.
Matches audiblez's is_chapter() logic:
- Content must be over 100 characters
- Filename should match common chapter patterns
"""
if len(text) < 100:
return False
# Check filename patterns that indicate a chapter
filename_lower = filename.lower()
chapter_patterns = [
r'chapter',
r'part[_-]?\d+',
r'split[_-]?\d+',
r'ch[_-]?\d+',
r'chap[_-]?\d+',
r'sect', # section
r'content',
r'text',
]
for pattern in chapter_patterns:
if re.search(pattern, filename_lower):
return True
# If content is substantial (1000+ chars), likely a chapter even without pattern match
if len(text) > 1000:
return True
return False
def extract_title_from_content(soup: BeautifulSoup, filename: str, index: int) -> str:
"""Extract a chapter title from the document content."""
# Try to find title in common heading tags
for tag in ['title', 'h1', 'h2', 'h3']:
element = soup.find(tag)
if element and element.get_text(strip=True):
title = element.get_text(strip=True)
# Truncate long titles
if len(title) > 100:
title = title[:97] + "..."
return title
# Fallback: use filename without extension
stem = Path(filename).stem
# Clean up common patterns
stem = re.sub(r'^(chapter|chap|ch)[_-]?', 'Chapter ', stem, flags=re.IGNORECASE)
stem = re.sub(r'[_-]', ' ', stem)
if stem and len(stem) < 50:
return stem.title()
return f"Chapter {index + 1}"
def extract_chapters(epub_path: Path) -> list[Chapter]:
"""Extract chapter titles matching audiblez's chapter detection logic.
audiblez determines chapters by:
1. Iterating through ITEM_DOCUMENT items
2. Checking is_chapter() based on content length and filename patterns
This ensures our chapter count matches the WAV files audiblez produces.
Args:
epub_path: Path to the EPUB file
Returns:
List of Chapter objects with title and index
"""
try:
book = epub.read_epub(str(epub_path))
except Exception as e:
print(f"Failed to read EPUB: {e}")
return []
chapters: list[Chapter] = []
chapter_index = 0
# Iterate through documents like audiblez does
for item in book.get_items():
if item.get_type() != ITEM_DOCUMENT:
continue
try:
# Get content and parse with BeautifulSoup
content = item.get_content()
soup = BeautifulSoup(content, features='lxml')
# Extract text from relevant tags (matching audiblez)
text_parts = []
for tag in soup.find_all(['title', 'p', 'h1', 'h2', 'h3', 'h4', 'li']):
text = tag.get_text(strip=True)
if text:
text_parts.append(text)
full_text = ' '.join(text_parts)
filename = item.get_name() or ""
# Check if this document is a chapter
if is_chapter(full_text, filename):
title = extract_title_from_content(soup, filename, chapter_index)
chapters.append(Chapter(
title=sanitize_title(title),
index=chapter_index
))
chapter_index += 1
except Exception as e:
print(f"Error processing document {item.get_name()}: {e}")
continue
print(f"Extracted {len(chapters)} chapters from EPUB (audiblez-style detection)")
# Debug: print first few chapters
for i, ch in enumerate(chapters[:5]):
print(f" {i+1}. {ch.title}")
if len(chapters) > 5:
print(f" ... and {len(chapters) - 5} more")
return chapters

View file

@ -0,0 +1,97 @@
from models.schemas import Voice
# Voice catalog from Kokoro-82M (used by audiblez)
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md
VOICE_CATALOG = {
# American English
"af_heart": Voice(id="af_heart", name="Heart", language="American English", gender="F"),
"af_alloy": Voice(id="af_alloy", name="Alloy", language="American English", gender="F"),
"af_aoede": Voice(id="af_aoede", name="Aoede", language="American English", gender="F"),
"af_bella": Voice(id="af_bella", name="Bella", language="American English", gender="F"),
"af_jessica": Voice(id="af_jessica", name="Jessica", language="American English", gender="F"),
"af_kore": Voice(id="af_kore", name="Kore", language="American English", gender="F"),
"af_nicole": Voice(id="af_nicole", name="Nicole", language="American English", gender="F"),
"af_nova": Voice(id="af_nova", name="Nova", language="American English", gender="F"),
"af_river": Voice(id="af_river", name="River", language="American English", gender="F"),
"af_sarah": Voice(id="af_sarah", name="Sarah", language="American English", gender="F"),
"af_sky": Voice(id="af_sky", name="Sky", language="American English", gender="F"),
"am_adam": Voice(id="am_adam", name="Adam", language="American English", gender="M"),
"am_echo": Voice(id="am_echo", name="Echo", language="American English", gender="M"),
"am_eric": Voice(id="am_eric", name="Eric", language="American English", gender="M"),
"am_fenrir": Voice(id="am_fenrir", name="Fenrir", language="American English", gender="M"),
"am_liam": Voice(id="am_liam", name="Liam", language="American English", gender="M"),
"am_michael": Voice(id="am_michael", name="Michael", language="American English", gender="M"),
"am_onyx": Voice(id="am_onyx", name="Onyx", language="American English", gender="M"),
"am_puck": Voice(id="am_puck", name="Puck", language="American English", gender="M"),
"am_santa": Voice(id="am_santa", name="Santa", language="American English", gender="M"),
# British English
"bf_alice": Voice(id="bf_alice", name="Alice", language="British English", gender="F"),
"bf_emma": Voice(id="bf_emma", name="Emma", language="British English", gender="F"),
"bf_isabella": Voice(id="bf_isabella", name="Isabella", language="British English", gender="F"),
"bf_lily": Voice(id="bf_lily", name="Lily", language="British English", gender="F"),
"bm_daniel": Voice(id="bm_daniel", name="Daniel", language="British English", gender="M"),
"bm_fable": Voice(id="bm_fable", name="Fable", language="British English", gender="M"),
"bm_george": Voice(id="bm_george", name="George", language="British English", gender="M"),
"bm_lewis": Voice(id="bm_lewis", name="Lewis", language="British English", gender="M"),
# Japanese
"jf_alpha": Voice(id="jf_alpha", name="Alpha", language="Japanese", gender="F"),
"jf_gongitsune": Voice(id="jf_gongitsune", name="Gongitsune", language="Japanese", gender="F"),
"jf_nezumi": Voice(id="jf_nezumi", name="Nezumi", language="Japanese", gender="F"),
"jf_tebukuro": Voice(id="jf_tebukuro", name="Tebukuro", language="Japanese", gender="F"),
"jm_kumo": Voice(id="jm_kumo", name="Kumo", language="Japanese", gender="M"),
# Mandarin Chinese
"zf_xiaobei": Voice(id="zf_xiaobei", name="Xiaobei", language="Mandarin Chinese", gender="F"),
"zf_xiaoni": Voice(id="zf_xiaoni", name="Xiaoni", language="Mandarin Chinese", gender="F"),
"zf_xiaoxiao": Voice(id="zf_xiaoxiao", name="Xiaoxiao", language="Mandarin Chinese", gender="F"),
"zf_xiaoyi": Voice(id="zf_xiaoyi", name="Xiaoyi", language="Mandarin Chinese", gender="F"),
"zm_yunjian": Voice(id="zm_yunjian", name="Yunjian", language="Mandarin Chinese", gender="M"),
"zm_yunxi": Voice(id="zm_yunxi", name="Yunxi", language="Mandarin Chinese", gender="M"),
"zm_yunxia": Voice(id="zm_yunxia", name="Yunxia", language="Mandarin Chinese", gender="M"),
"zm_yunyang": Voice(id="zm_yunyang", name="Yunyang", language="Mandarin Chinese", gender="M"),
# Spanish
"ef_dora": Voice(id="ef_dora", name="Dora", language="Spanish", gender="F"),
"em_alex": Voice(id="em_alex", name="Alex", language="Spanish", gender="M"),
"em_santa": Voice(id="em_santa", name="Santa", language="Spanish", gender="M"),
# French
"ff_siwis": Voice(id="ff_siwis", name="Siwis", language="French", gender="F"),
# Hindi
"hf_alpha": Voice(id="hf_alpha", name="Alpha", language="Hindi", gender="F"),
"hf_beta": Voice(id="hf_beta", name="Beta", language="Hindi", gender="F"),
"hm_omega": Voice(id="hm_omega", name="Omega", language="Hindi", gender="M"),
"hm_psi": Voice(id="hm_psi", name="Psi", language="Hindi", gender="M"),
# Italian
"if_sara": Voice(id="if_sara", name="Sara", language="Italian", gender="F"),
"im_nicola": Voice(id="im_nicola", name="Nicola", language="Italian", gender="M"),
# Brazilian Portuguese
"pf_dora": Voice(id="pf_dora", name="Dora", language="Brazilian Portuguese", gender="F"),
"pm_alex": Voice(id="pm_alex", name="Alex", language="Brazilian Portuguese", gender="M"),
"pm_santa": Voice(id="pm_santa", name="Santa", language="Brazilian Portuguese", gender="M"),
}
def get_all_voices() -> list[Voice]:
"""Get all available voices."""
return list(VOICE_CATALOG.values())
def get_voice(voice_id: str) -> Voice | None:
"""Get a specific voice by ID."""
return VOICE_CATALOG.get(voice_id)
def get_voices_by_language() -> dict[str, list[Voice]]:
"""Get voices grouped by language."""
grouped = {}
for voice in VOICE_CATALOG.values():
if voice.language not in grouped:
grouped[voice.language] = []
grouped[voice.language].append(voice)
return grouped

View file

@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Audiblez Web - EPUB to Audiobook</title>
<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🎧</text></svg>" />
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.js"></script>
</body>
</html>

View file

@ -0,0 +1,279 @@
<script>
import FileUpload from './lib/FileUpload.svelte';
import VoicePicker from './lib/VoicePicker.svelte';
import JobsList from './lib/JobsList.svelte';
import AudiobooksList from './lib/AudiobooksList.svelte';
import { jobs } from './stores/jobs.js';
let uploadedFilename = $state(null);
let selectedVoice = $state('af_sky');
let speed = $state(1.0);
let useGpu = $state(true);
let isStarting = $state(false);
let error = $state(null);
let currentUser = $state(null);
// Fetch current user on mount
$effect(() => {
fetchCurrentUser();
});
async function fetchCurrentUser() {
try {
const response = await fetch('/api/me');
if (response.ok) {
currentUser = await response.json();
}
} catch (e) {
console.error('Failed to fetch user:', e);
}
}
function handleFileUpload(filename) {
uploadedFilename = filename;
}
async function startConversion() {
if (!uploadedFilename || !selectedVoice) {
error = 'Please upload a file and select a voice';
return;
}
error = null;
isStarting = true;
try {
const response = await fetch('/api/jobs', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
filename: uploadedFilename,
voice: selectedVoice,
speed: speed,
use_gpu: useGpu
})
});
if (!response.ok) {
const data = await response.json();
throw new Error(data.detail || 'Failed to start conversion');
}
const job = await response.json();
jobs.add(job);
// Reset form
uploadedFilename = null;
} catch (e) {
error = e.message;
} finally {
isStarting = false;
}
}
let canStart = $derived(uploadedFilename && selectedVoice && !isStarting);
</script>
<main>
<header>
<div class="header-content">
<div>
<h1>Audiblez Web</h1>
<p class="subtitle">Convert EPUB to Audiobook</p>
</div>
{#if currentUser}
<div class="user-info">
<span class="user-name">{currentUser.name || currentUser.username}</span>
<span class="user-email">{currentUser.email}</span>
</div>
{/if}
</div>
</header>
<div class="content">
<div class="form-section">
<div class="upload-section">
<FileUpload onUpload={handleFileUpload} />
</div>
<div class="voice-section">
<VoicePicker bind:selectedVoice />
</div>
</div>
<div class="options-section">
<div class="option">
<label for="speed">Speed: {speed.toFixed(1)}x</label>
<input
type="range"
id="speed"
min="0.5"
max="2"
step="0.1"
bind:value={speed}
/>
</div>
<div class="option">
<label>
<input type="checkbox" bind:checked={useGpu} />
Use GPU (faster)
</label>
</div>
<button
class="start-btn"
disabled={!canStart}
onclick={startConversion}
>
{#if isStarting}
Starting...
{:else}
Start Conversion
{/if}
</button>
{#if error}
<p class="error">{error}</p>
{/if}
</div>
<JobsList />
<AudiobooksList />
</div>
</main>
<style>
:global(body) {
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
background: #f5f5f5;
}
main {
max-width: 900px;
margin: 0 auto;
padding: 2rem;
}
header {
text-align: center;
margin-bottom: 2rem;
}
.header-content {
display: flex;
justify-content: space-between;
align-items: center;
text-align: left;
}
.user-info {
display: flex;
flex-direction: column;
align-items: flex-end;
padding: 0.5rem 1rem;
background: #e8f0fe;
border-radius: 8px;
}
.user-name {
font-weight: 500;
color: #333;
}
.user-email {
font-size: 0.75rem;
color: #666;
}
h1 {
margin: 0;
color: #333;
font-size: 2rem;
}
.subtitle {
color: #666;
margin: 0.25rem 0 0;
}
.content {
background: white;
border-radius: 12px;
padding: 1.5rem;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
}
.form-section {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1.5rem;
}
@media (max-width: 768px) {
.form-section {
grid-template-columns: 1fr;
}
}
.options-section {
margin-top: 1.5rem;
padding-top: 1.5rem;
border-top: 1px solid #e0e0e0;
display: flex;
flex-wrap: wrap;
gap: 1rem;
align-items: center;
}
.option {
display: flex;
align-items: center;
gap: 0.5rem;
}
.option label {
font-size: 0.875rem;
color: #666;
}
.option input[type="range"] {
width: 120px;
}
.option input[type="checkbox"] {
width: 16px;
height: 16px;
}
.start-btn {
margin-left: auto;
padding: 0.75rem 1.5rem;
background: #4a90d9;
color: white;
border: none;
border-radius: 8px;
font-size: 1rem;
font-weight: 500;
cursor: pointer;
transition: background 0.2s;
}
.start-btn:hover:not(:disabled) {
background: #3a7fc9;
}
.start-btn:disabled {
background: #ccc;
cursor: not-allowed;
}
.error {
color: #d32f2f;
font-size: 0.875rem;
width: 100%;
margin-top: 0.5rem;
}
</style>

View file

@ -0,0 +1,6 @@
import App from './App.svelte'
import { mount } from 'svelte'
const app = mount(App, { target: document.getElementById('app') })
export default app

View file

@ -0,0 +1,28 @@
import { writable } from 'svelte/store';
function createJobsStore() {
const { subscribe, set, update } = writable([]);
return {
subscribe,
set,
add: (job) => update(jobs => [...jobs, job]),
updateJob: (jobId, updates) => update(jobs =>
jobs.map(j => j.id === jobId ? { ...j, ...updates } : j)
),
remove: (jobId) => update(jobs => jobs.filter(j => j.id !== jobId)),
refresh: async () => {
try {
const response = await fetch('/api/jobs');
if (response.ok) {
const jobs = await response.json();
set(jobs);
}
} catch (e) {
console.error('Failed to fetch jobs:', e);
}
}
};
}
export const jobs = createJobsStore();

View file

@ -0,0 +1,5 @@
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte'
export default {
preprocess: vitePreprocess()
}

View file

@ -0,0 +1,15 @@
import { defineConfig } from 'vite'
import { svelte } from '@sveltejs/vite-plugin-svelte'
export default defineConfig({
plugins: [svelte()],
server: {
proxy: {
'/api': 'http://localhost:8000',
'/ws': {
target: 'ws://localhost:8000',
ws: true
}
}
}
})

View file

@ -0,0 +1,136 @@
#!/usr/bin/env python3
"""
Generate voice samples for all available voices.
Run this script in an environment with audiblez installed.
Usage:
python generate_samples.py [output_dir]
"""
import os
import sys
from pathlib import Path
# Sample text for voice preview
SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample of my voice for audiobook narration."
# All voices from Kokoro-82M (audiblez)
VOICES = [
# American English (20 voices)
"af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore",
"af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
"am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael",
"am_onyx", "am_puck", "am_santa",
# British English (8 voices)
"bf_alice", "bf_emma", "bf_isabella", "bf_lily",
"bm_daniel", "bm_fable", "bm_george", "bm_lewis",
# Spanish (3 voices)
"ef_dora", "em_alex", "em_santa",
# French (1 voice)
"ff_siwis",
# Hindi (4 voices)
"hf_alpha", "hf_beta", "hm_omega", "hm_psi",
# Italian (2 voices)
"if_sara", "im_nicola",
# Japanese (5 voices)
"jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo",
# Brazilian Portuguese (3 voices)
"pf_dora", "pm_alex", "pm_santa",
# Mandarin Chinese (8 voices)
"zf_xiaobei", "zf_xiaoni", "zf_xiaoxiao", "zf_xiaoyi",
"zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang",
]
def generate_sample(voice: str, output_dir: Path):
"""Generate a voice sample using kokoro TTS."""
try:
from kokoro import KPipeline
output_file = output_dir / f"{voice}.mp3"
if output_file.exists():
print(f"Skipping {voice} - already exists")
return True
print(f"Generating sample for {voice}...")
# Map voice prefix to language code
lang_map = {
'a': 'a', # American English
'b': 'b', # British English
'e': 'e', # Spanish
'f': 'f', # French
'h': 'h', # Hindi
'i': 'i', # Italian
'j': 'j', # Japanese
'p': 'p', # Portuguese
'z': 'z', # Chinese
}
# Extract language code from voice (first letter)
lang_code = lang_map.get(voice[0], 'a')
# Initialize the Kokoro pipeline
pipeline = KPipeline(lang_code=lang_code)
# Generate audio
generator = pipeline(SAMPLE_TEXT, voice=voice, speed=1.0)
# Collect all audio chunks
audio_chunks = []
for _, _, audio in generator:
audio_chunks.append(audio)
if audio_chunks:
import soundfile as sf
import numpy as np
# Concatenate audio
audio = np.concatenate(audio_chunks)
# Save as WAV first, then convert to MP3
wav_file = output_dir / f"{voice}.wav"
sf.write(str(wav_file), audio, 24000)
# Convert to MP3 using ffmpeg
import subprocess
result = subprocess.run([
"ffmpeg", "-y", "-i", str(wav_file),
"-codec:a", "libmp3lame", "-qscale:a", "5",
str(output_file)
], capture_output=True)
# Remove WAV file
if wav_file.exists():
wav_file.unlink()
if result.returncode == 0:
print(f"Generated {output_file}")
return True
else:
print(f"FFmpeg failed for {voice}: {result.stderr.decode()}")
return False
else:
print(f"Failed to generate audio for {voice}")
return False
except Exception as e:
print(f"Error generating sample for {voice}: {e}")
return False
def main():
output_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("samples")
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Generating voice samples to {output_dir}")
print(f"Total voices: {len(VOICES)}")
for voice in VOICES:
generate_sample(voice, output_dir)
print("Done!")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,2 @@
# Voice samples go here
# Run generate_samples.py to generate voice samples for all voices

View file

@ -0,0 +1,229 @@
mqtt:
enabled: false
birdseye:
quality: 25
detect:
fps: 1
enabled: true
go2rtc:
streams:
vermont-1:
- rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/101/3
cameras:
# # Temp disabled until valchedrym is back up
valchedrym-cam-1:
enabled: true
ffmpeg:
inputs:
#- path: rtsp://admin:REDACTED_RTSP_PW@192.168.0.11:554/Streaming/Channels/101 # <----- The stream you want to use for detection
- path: rtsp://admin:REDACTED_RTSP_PW@valchedrym.ddns.net:554/Streaming/Channels/101 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
objects:
# Optional: list of objects to track from labelmap.txt (full list - https://docs.frigate.video/configuration/objects)
track:
- person
- bicycle
- car
- bird
- cat
- dog
- horse
valchedrym-cam-2:
enabled: true
ffmpeg:
inputs:
#- path: rtsp://admin:REDACTED_RTSP_PW@192.168.0.11:554/Streaming/Channels/201 # <----- The stream you want to use for detection
- path: rtsp://admin:REDACTED_RTSP_PW@valchedrym.ddns.net:554/Streaming/Channels/201 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
objects:
# Optional: list of objects to track from labelmap.txt (full list - https://docs.frigate.video/configuration/objects)
track:
- person
- bicycle
- car
- bird
- cat
- dog
- horse
vermont-1:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/101/3 # <----- The stream you want to use for detection
roles:
- record
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
detect:
enabled: false
vermont-2:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/201/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
vermont-3:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/301/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
vermont-4:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/401/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
vermont-5:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/501/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
vermont-6:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/601/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
vermont-7:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/701/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
vermont-8:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/801/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
vermont-9:
enabled: true
ffmpeg:
inputs:
- path: rtsp://admin:REDACTED_RTSP_PW@192.168.1.10:554/Streaming/Channels/901/1 # <----- The stream you want to use for detection
detect:
enabled: false # <---- disable detection until you have a working camera feed
width: 704 # <---- update for your camera's resolution
height: 576 # <---- update for your camera's resolution
rtmp:
enabled: false
record:
enabled: false
snapshots:
enabled: false
# london-ipcam:
# enabled: false
# ffmpeg:
# inputs:
# - path: rtsp://192.168.2.2:8554/london_cam # <----- The stream you want to use for detection
# roles:
# - rtmp
# - record
# - detect
# detect:
# enabled: False
# width: 1280
# height: 720
# record:
# enabled: False # Not needed for this camera but keeping for reference
# events:
# retain:
# default: 10
# objects:
# # Optional: list of objects to track from labelmap.txt (full list - https://docs.frigate.video/configuration/objects)
# track:
# - person
# - shoe
# - handbag
# - wine glass
# - knife
# - pizza
# - laptop
# - book