#!/usr/bin/env python3
# Timestamp: "2025-12-11 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex-code/src/scitex/audio/engines/elevenlabs_engine.py
# ----------------------------------------
"""
ElevenLabs TTS backend - High quality, requires API key and payment.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import List, Optional
from ._base import BaseTTS
__all__ = ["ElevenLabsTTS"]
[docs]
class ElevenLabsTTS(BaseTTS):
"""ElevenLabs TTS backend.
High-quality voices but requires API key and has usage costs.
Environment:
ELEVENLABS_API_KEY: Your ElevenLabs API key
"""
VOICES = {
"adam": "pNInz6obpgDQGcFmaJgB",
"sarah": "EXAVITQu4vr4xnSDxMaL",
"laura": "FGY2WhTYpPnrIDTdsKH5",
"charlie": "IKne3meq5aSn9XLyUdCD",
"george": "JBFqnCBsd6RMkjVDRZzb",
"callum": "N2lVS1w4EtoT3dr4eOWO",
"river": "SAz9YHcvj6GT2YYXdXww",
"liam": "TX3LPaxmHKxFdv7VOQHJ",
"alice": "Xb7hH8MSUJpSbSDYk0k2",
"matilda": "XrExE9yKIg1WjnnlVkGX",
"will": "bIHbv24MWmeRgasZH58o",
"jessica": "cgSgspJ2msm6clMCkdW9",
"eric": "cjVigY5qzO86Huf0OWal",
"bella": "hpp4J3VqNfWAUOO0d1Us",
"chris": "iP95p4xoKVk53GoZ742B",
"brian": "nPczCjzI2devNBz1zQrb",
"daniel": "onwK4e9ZLuTAKqWW03F9",
"lily": "pFZP5JQG7iQjIQuC4Bku",
"roger": "CwhRBWXzGAHq8TQ4Fs17",
"harry": "SOYHLrjzK2X1ezoPC6cr",
"rachel": "21m00Tcm4TlvDq8ikWAM",
"antoni": "ErXwobaYiN019PkySvjV",
"domi": "AZnzlk1XvdvUeBnXmlld",
"elli": "MF3mGyEYCl7XYWbV9V6O",
"josh": "TxGEqnHWrfWFTfGW9XjX",
"sam": "yoZ06aMxZJJ28mfd3POQ",
}
# ElevenLabs API speed limits
MIN_SPEED = 0.7
MAX_SPEED = 1.2
def __init__(
self,
api_key: Optional[str] = None,
voice: str = "adam",
model_id: str = "eleven_multilingual_v2",
stability: float = 0.5,
similarity_boost: float = 0.75,
speed: float = 1.0,
client=None,
**kwargs,
):
super().__init__(**kwargs)
self.api_key = (
api_key
or os.environ.get("SCITEX_AUDIO_ELEVENLABS_API_KEY")
or os.environ.get("ELEVENLABS_API_KEY")
)
self.voice = voice
self.model_id = model_id
self.stability = stability
self.similarity_boost = similarity_boost
# Clamp speed to ElevenLabs API limits (0.7-1.2)
self.speed = max(self.MIN_SPEED, min(self.MAX_SPEED, speed))
# Optional injected client (testing / custom transport). When None,
# the real ElevenLabs SDK is lazy-loaded on first `client` access.
self._client = client
@property
def name(self) -> str:
return "elevenlabs"
@property
def requires_api_key(self) -> bool:
return True
@property
def requires_internet(self) -> bool:
return True
@property
def client(self):
"""Lazy-load ElevenLabs client."""
if self._client is None:
try:
from elevenlabs.client import ElevenLabs
self._client = ElevenLabs(api_key=self.api_key)
except ImportError:
raise ImportError(
"elevenlabs package not installed. "
"Install with: pip install elevenlabs"
)
return self._client
def _get_voice_id(self, voice: Optional[str] = None) -> str:
"""Get voice ID from name or return as-is if already an ID."""
v = voice or self.voice
normalized = v.lower()
return self.VOICES.get(normalized, v)
[docs]
def synthesize(self, text: str, output_path: str) -> Path:
"""Synthesize text using ElevenLabs API."""
voice_id = self._get_voice_id(self.config.get("voice"))
audio = self.client.text_to_speech.convert(
text=text,
voice_id=voice_id,
model_id=self.model_id,
voice_settings={
"stability": self.stability,
"similarity_boost": self.similarity_boost,
"speed": self.speed,
},
output_format="mp3_44100_128",
)
out_path = Path(output_path)
with open(out_path, "wb") as f:
for chunk in audio:
f.write(chunk)
return out_path
[docs]
def get_voices(self) -> List[dict]:
"""Get available voices."""
# Start with preset voices
voices = [
{"name": name, "id": vid, "type": "preset"}
for name, vid in self.VOICES.items()
]
# Try to get custom voices
try:
response = self.client.voices.get_all()
for v in response.voices:
voices.append(
{
"name": v.name,
"id": v.voice_id,
"type": "custom",
"labels": v.labels,
}
)
except Exception:
pass
return voices
# EOF