Source code for scitex_audio._engines._elevenlabs_engine

#!/usr/bin/env python3
# Timestamp: "2025-12-11 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex-code/src/scitex/audio/engines/elevenlabs_engine.py
# ----------------------------------------

"""
ElevenLabs TTS backend - High quality, requires API key and payment.
"""

from __future__ import annotations

import os
from pathlib import Path
from typing import List, Optional

from ._base import BaseTTS

__all__ = ["ElevenLabsTTS"]


[docs] class ElevenLabsTTS(BaseTTS): """ElevenLabs TTS backend. High-quality voices but requires API key and has usage costs. Environment: ELEVENLABS_API_KEY: Your ElevenLabs API key """ VOICES = { "adam": "pNInz6obpgDQGcFmaJgB", "sarah": "EXAVITQu4vr4xnSDxMaL", "laura": "FGY2WhTYpPnrIDTdsKH5", "charlie": "IKne3meq5aSn9XLyUdCD", "george": "JBFqnCBsd6RMkjVDRZzb", "callum": "N2lVS1w4EtoT3dr4eOWO", "river": "SAz9YHcvj6GT2YYXdXww", "liam": "TX3LPaxmHKxFdv7VOQHJ", "alice": "Xb7hH8MSUJpSbSDYk0k2", "matilda": "XrExE9yKIg1WjnnlVkGX", "will": "bIHbv24MWmeRgasZH58o", "jessica": "cgSgspJ2msm6clMCkdW9", "eric": "cjVigY5qzO86Huf0OWal", "bella": "hpp4J3VqNfWAUOO0d1Us", "chris": "iP95p4xoKVk53GoZ742B", "brian": "nPczCjzI2devNBz1zQrb", "daniel": "onwK4e9ZLuTAKqWW03F9", "lily": "pFZP5JQG7iQjIQuC4Bku", "roger": "CwhRBWXzGAHq8TQ4Fs17", "harry": "SOYHLrjzK2X1ezoPC6cr", "rachel": "21m00Tcm4TlvDq8ikWAM", "antoni": "ErXwobaYiN019PkySvjV", "domi": "AZnzlk1XvdvUeBnXmlld", "elli": "MF3mGyEYCl7XYWbV9V6O", "josh": "TxGEqnHWrfWFTfGW9XjX", "sam": "yoZ06aMxZJJ28mfd3POQ", } # ElevenLabs API speed limits MIN_SPEED = 0.7 MAX_SPEED = 1.2 def __init__( self, api_key: Optional[str] = None, voice: str = "adam", model_id: str = "eleven_multilingual_v2", stability: float = 0.5, similarity_boost: float = 0.75, speed: float = 1.0, client=None, **kwargs, ): super().__init__(**kwargs) self.api_key = ( api_key or os.environ.get("SCITEX_AUDIO_ELEVENLABS_API_KEY") or os.environ.get("ELEVENLABS_API_KEY") ) self.voice = voice self.model_id = model_id self.stability = stability self.similarity_boost = similarity_boost # Clamp speed to ElevenLabs API limits (0.7-1.2) self.speed = max(self.MIN_SPEED, min(self.MAX_SPEED, speed)) # Optional injected client (testing / custom transport). When None, # the real ElevenLabs SDK is lazy-loaded on first `client` access. self._client = client @property def name(self) -> str: return "elevenlabs" @property def requires_api_key(self) -> bool: return True @property def requires_internet(self) -> bool: return True @property def client(self): """Lazy-load ElevenLabs client.""" if self._client is None: try: from elevenlabs.client import ElevenLabs self._client = ElevenLabs(api_key=self.api_key) except ImportError: raise ImportError( "elevenlabs package not installed. " "Install with: pip install elevenlabs" ) return self._client def _get_voice_id(self, voice: Optional[str] = None) -> str: """Get voice ID from name or return as-is if already an ID.""" v = voice or self.voice normalized = v.lower() return self.VOICES.get(normalized, v)
[docs] def synthesize(self, text: str, output_path: str) -> Path: """Synthesize text using ElevenLabs API.""" voice_id = self._get_voice_id(self.config.get("voice")) audio = self.client.text_to_speech.convert( text=text, voice_id=voice_id, model_id=self.model_id, voice_settings={ "stability": self.stability, "similarity_boost": self.similarity_boost, "speed": self.speed, }, output_format="mp3_44100_128", ) out_path = Path(output_path) with open(out_path, "wb") as f: for chunk in audio: f.write(chunk) return out_path
[docs] def get_voices(self) -> List[dict]: """Get available voices.""" # Start with preset voices voices = [ {"name": name, "id": vid, "type": "preset"} for name, vid in self.VOICES.items() ] # Try to get custom voices try: response = self.client.voices.get_all() for v in response.voices: voices.append( { "name": v.name, "id": v.voice_id, "type": "custom", "labels": v.labels, } ) except Exception: pass return voices
# EOF