#!/usr/bin/env python3
# Timestamp: "2026-03-14 (ywatanabe)"
# File: scitex-audio/src/scitex_audio/__init__.py
"""
SciTeX Audio - Text-to-Speech with Multiple Backends
Backends (fallback order):
- elevenlabs: ElevenLabs (paid, high quality, speed=1.2)
- luxtts: LuxTTS (open-source, offline, voice-cloning, speed=2.0)
- gtts: Google TTS (free, requires internet, speed=1.5)
- pyttsx3: System TTS (offline, free, uses espeak/SAPI5)
Usage:
from scitex_audio import speak
speak("Hello, world!")
from scitex_audio import get_tts, LuxTTS
tts = get_tts("luxtts")
tts.speak("Hello!")
"""
from __future__ import annotations as _annotations
import subprocess as _subprocess
try:
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _v
try:
__version__ = _v("scitex-audio")
except PackageNotFoundError:
__version__ = "0.0.0+local"
del _v, PackageNotFoundError
except ImportError: # pragma: no cover — only on ancient Pythons
__version__ = "0.0.0+local"
# Import from engines subpackage (hide module from dir())
from ._engines import ElevenLabsTTS, GoogleTTS, LuxTTS, SystemTTS
from ._engines._base import BaseTTS as _BaseTTS
from ._engines._base import TTSBackend as _TTSBackend
[docs]
def stop_speech() -> None:
"""Stop any currently playing speech by killing espeak processes."""
try:
_subprocess.run(["pkill", "-9", "espeak"], capture_output=True)
except Exception:
pass
[docs]
def check_wsl_audio() -> dict:
"""Check WSL audio status and connectivity."""
import os
import shutil
result = {
"is_wsl": False,
"wslg_available": False,
"pulse_server_exists": False,
"pulse_connected": False,
"windows_fallback_available": False,
"recommended": "linux",
}
if os.path.exists("/mnt/c/Windows"):
result["is_wsl"] = True
if os.path.exists("/mnt/wslg"):
result["wslg_available"] = True
if os.path.exists("/mnt/wslg/PulseServer"):
result["pulse_server_exists"] = True
try:
env = os.environ.copy()
env["PULSE_SERVER"] = "unix:/mnt/wslg/PulseServer"
proc = _subprocess.run(
["pactl", "info"],
capture_output=True,
timeout=5,
env=env,
)
if proc.returncode == 0:
result["pulse_connected"] = True
except Exception:
pass
if shutil.which("powershell.exe"):
result["windows_fallback_available"] = True
if result["pulse_connected"]:
result["recommended"] = "linux"
elif result["windows_fallback_available"]:
result["recommended"] = "windows"
else:
result["recommended"] = "none"
else:
result["recommended"] = "linux"
return result
from ._audio_check import check_local_audio_available
from ._env_registry import generate_template as generate_env_template
from ._stt import available_models, find_whisper_cli, find_whisper_model, transcribe
from ._tts import TTS
__all__ = [
"__version__",
# Core
"speak",
"generate_bytes",
"stop_speech",
"get_tts",
"available_backends",
"announce_context",
# Engine classes
"TTS",
"GoogleTTS",
"ElevenLabsTTS",
"SystemTTS",
"LuxTTS",
# Diagnostics
"check_wsl_audio",
"check_local_audio_available",
# Configuration
"generate_env_template",
# STT (Speech-to-Text)
"transcribe",
"find_whisper_cli",
"find_whisper_model",
"available_models",
]
FALLBACK_ORDER = ["elevenlabs", "luxtts", "gtts", "pyttsx3"]
[docs]
def available_backends() -> list[str]:
"""Return list of available TTS backends."""
backends = []
if SystemTTS:
try:
import pyttsx3
engine = pyttsx3.init()
engine.stop()
backends.append("pyttsx3")
except Exception:
pass
if GoogleTTS:
backends.append("gtts")
if LuxTTS:
try:
import zipvoice # noqa: F401
backends.append("luxtts")
except ImportError:
pass
if ElevenLabsTTS:
import os
api_key = os.environ.get("SCITEX_AUDIO_ELEVENLABS_API_KEY") or os.environ.get(
"ELEVENLABS_API_KEY"
)
if api_key:
backends.append("elevenlabs")
return backends
[docs]
def get_tts(backend: str | None = None, **kwargs) -> _BaseTTS:
"""Get a TTS instance for the specified backend."""
backends = available_backends()
if not backends:
raise ValueError(
"No TTS backends available. Install one of:\n"
" pip install pyttsx3 # System TTS (offline, free)\n"
" pip install gTTS # Google TTS (free, needs internet)\n"
" pip install scitex-audio[luxtts] # LuxTTS (open-source, offline)\n"
" pip install elevenlabs # ElevenLabs (paid, best quality)"
)
if backend is None:
for b in FALLBACK_ORDER:
if b in backends:
backend = b
break
if backend == "pyttsx3" and SystemTTS and "pyttsx3" in backends:
return SystemTTS(**kwargs)
elif backend == "gtts" and GoogleTTS:
return GoogleTTS(**kwargs)
elif backend == "elevenlabs" and ElevenLabsTTS:
return ElevenLabsTTS(**kwargs)
elif backend == "luxtts" and LuxTTS:
return LuxTTS(**kwargs)
else:
raise ValueError(f"Backend '{backend}' not available. Available: {backends}")
from ._speak import speak
[docs]
def generate_bytes(
text: str,
backend: str | None = None,
voice: str | None = None,
**kwargs,
) -> bytes:
"""Generate TTS audio as raw bytes without playing."""
tts = get_tts(backend, **kwargs)
return tts.to_bytes(text, voice=voice)
[docs]
def announce_context(
include_full_path: bool = False,
speak_aloud: bool = True,
branch_resolver=None,
speak_fn=None,
) -> dict:
"""Announce the current working directory and git branch.
Builds an orientation sentence (e.g. ``"Working in scitex-audio, on
branch develop"``) and, by default, speaks it aloud. Useful when
starting work in a new session.
Parameters
----------
include_full_path : bool
Include the absolute path instead of just the directory name.
speak_aloud : bool
Speak the announcement (default True). When False, only the
context dict is returned.
branch_resolver : callable, optional
Injectable callable ``(cwd: str) -> str | None`` returning the
git branch name (testing seam). Defaults to a real
``git rev-parse`` subprocess.
speak_fn : callable, optional
Injectable speak function (testing seam). Defaults to
:func:`speak`.
Returns
-------
dict
``{"directory": str, "directory_name": str, "git_branch": str | None,
"announced_text": str, "spoke": bool}``.
"""
import os
import subprocess
cwd = os.getcwd()
dir_name = cwd if include_full_path else os.path.basename(cwd)
if branch_resolver is not None:
git_branch = branch_resolver(cwd)
else:
git_branch = None
try:
result = _subprocess.run(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
capture_output=True,
text=True,
cwd=cwd,
timeout=5,
)
if result.returncode == 0:
git_branch = result.stdout.strip()
except Exception:
pass
if git_branch:
text = f"Working in {dir_name}, on branch {git_branch}"
else:
text = f"Working in {dir_name}"
spoke = False
if speak_aloud:
_speak = speak_fn if speak_fn is not None else speak
_speak(text)
spoke = True
return {
"directory": cwd,
"directory_name": os.path.basename(cwd),
"git_branch": git_branch,
"announced_text": text,
"spoke": spoke,
}
# EOF