#!/usr/bin/env python3
# Timestamp: "2025-12-11 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex-code/src/scitex/audio/engines/gtts_engine.py
# ----------------------------------------
"""
Google Text-to-Speech (gTTS) backend - Free, requires internet.
"""
from __future__ import annotations
import io
from pathlib import Path
from typing import List
from ._base import BaseTTS
__all__ = ["GoogleTTS"]
[docs]
class GoogleTTS(BaseTTS):
"""Google Text-to-Speech backend using gTTS.
Free to use, requires internet connection.
Good quality voices with multi-language support.
Supports speed control via pydub (requires ffmpeg).
Install: pip install gTTS pydub
"""
# Supported languages (subset of most common)
LANGUAGES = {
"en": "English",
"es": "Spanish",
"fr": "French",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"ru": "Russian",
"ja": "Japanese",
"ko": "Korean",
"zh-CN": "Chinese (Simplified)",
"zh-TW": "Chinese (Traditional)",
"ar": "Arabic",
"hi": "Hindi",
"nl": "Dutch",
"pl": "Polish",
"sv": "Swedish",
"tr": "Turkish",
"vi": "Vietnamese",
}
def __init__(
self,
lang: str = "en",
slow: bool = False,
speed: float = 1.5,
gtts_factory=None,
**kwargs,
):
super().__init__(**kwargs)
self.lang = lang
self.slow = slow
self.speed = speed # 1.0 = normal, >1.0 = faster, <1.0 = slower
# Optional injected gTTS factory (testing). A callable with the
# gTTS(text=..., lang=..., slow=...) signature returning an object
# exposing .save(path) and .write_to_fp(buffer). When None, the real
# gTTS class is imported lazily at synthesis time.
self._gtts_factory = gtts_factory
def _resolve_gtts(self):
"""Return the gTTS factory — injected or the real SDK class."""
if self._gtts_factory is not None:
return self._gtts_factory
try:
from gtts import gTTS
except ImportError:
raise ImportError(
"gTTS package not installed. Install with: pip install gTTS"
)
return gTTS
@property
def name(self) -> str:
return "gtts"
@property
def requires_internet(self) -> bool:
return True
[docs]
def synthesize(self, text: str, output_path: str) -> Path:
"""Synthesize text using Google TTS with optional speed control."""
gTTS = self._resolve_gtts()
# Get language from config or use default
lang = self.config.get("voice", self.lang)
if lang in self.LANGUAGES:
pass # Valid language code
elif lang.lower() in [l.lower() for l in self.LANGUAGES.values()]:
# Convert language name to code
for code, name in self.LANGUAGES.items():
if name.lower() == lang.lower():
lang = code
break
# Get speed from config or use instance default
speed = self.config.get("speed", self.speed)
out_path = Path(output_path)
if speed != 1.0:
# Use pydub for speed control
audio_data = self._synthesize_with_speed(text, lang, speed)
audio_data.export(str(out_path), format="mp3")
else:
# Direct save without speed modification
tts = gTTS(text=text, lang=lang, slow=self.slow)
tts.save(str(out_path))
return out_path
def _synthesize_with_speed(self, text: str, lang: str, speed: float):
"""Synthesize with speed control using pydub.
Args:
text: Text to synthesize.
lang: Language code.
speed: Speed multiplier (>1.0 faster, <1.0 slower).
Returns
-------
AudioSegment with adjusted speed.
"""
try:
from pydub import AudioSegment
except ImportError as e:
raise ImportError(
"pydub package required for speed control. "
"Install with: pip install pydub"
) from e
gTTS = self._resolve_gtts()
# Generate speech to memory buffer
with io.BytesIO() as buffer:
gTTS(text=text, lang=lang, slow=self.slow).write_to_fp(buffer)
buffer.seek(0)
sound = AudioSegment.from_file(buffer, format="mp3")
# Apply speed adjustment
if speed > 1.0:
# speedup() for faster playback
sound = sound.speedup(playback_speed=speed, chunk_size=150, crossfade=25)
elif speed < 1.0:
# For slower playback, adjust frame rate
new_frame_rate = int(sound.frame_rate * speed)
sound = sound._spawn(
sound.raw_data, overrides={"frame_rate": new_frame_rate}
).set_frame_rate(sound.frame_rate)
return sound
[docs]
def get_voices(self) -> List[dict]:
"""Get available languages as 'voices'."""
return [
{"name": name, "id": code, "type": "language"}
for code, name in self.LANGUAGES.items()
]
# EOF