Source code for scitex_audio

#!/usr/bin/env python3
# Timestamp: "2026-03-14 (ywatanabe)"
# File: scitex-audio/src/scitex_audio/__init__.py
"""
SciTeX Audio - Text-to-Speech with Multiple Backends

Backends (fallback order):
    - elevenlabs: ElevenLabs (paid, high quality, speed=1.2)
    - luxtts: LuxTTS (open-source, offline, voice-cloning, speed=2.0)
    - gtts: Google TTS (free, requires internet, speed=1.5)
    - pyttsx3: System TTS (offline, free, uses espeak/SAPI5)

Usage:
    from scitex_audio import speak
    speak("Hello, world!")

    from scitex_audio import get_tts, LuxTTS
    tts = get_tts("luxtts")
    tts.speak("Hello!")
"""

from __future__ import annotations as _annotations

import subprocess as _subprocess

try:
    from importlib.metadata import PackageNotFoundError
    from importlib.metadata import version as _v

    try:
        __version__ = _v("scitex-audio")
    except PackageNotFoundError:
        __version__ = "0.0.0+local"
    del _v, PackageNotFoundError
except ImportError:  # pragma: no cover — only on ancient Pythons
    __version__ = "0.0.0+local"
# Import from engines subpackage (hide module from dir())
from ._engines import ElevenLabsTTS, GoogleTTS, LuxTTS, SystemTTS
from ._engines._base import BaseTTS as _BaseTTS
from ._engines._base import TTSBackend as _TTSBackend



[docs]
def stop_speech() -> None:
    """Stop any currently playing speech by killing espeak processes."""
    try:
        _subprocess.run(["pkill", "-9", "espeak"], capture_output=True)
    except Exception:
        pass




[docs]
def check_wsl_audio() -> dict:
    """Check WSL audio status and connectivity."""
    import os
    import shutil

    result = {
        "is_wsl": False,
        "wslg_available": False,
        "pulse_server_exists": False,
        "pulse_connected": False,
        "windows_fallback_available": False,
        "recommended": "linux",
    }

    if os.path.exists("/mnt/c/Windows"):
        result["is_wsl"] = True
        if os.path.exists("/mnt/wslg"):
            result["wslg_available"] = True
        if os.path.exists("/mnt/wslg/PulseServer"):
            result["pulse_server_exists"] = True
            try:
                env = os.environ.copy()
                env["PULSE_SERVER"] = "unix:/mnt/wslg/PulseServer"
                proc = _subprocess.run(
                    ["pactl", "info"],
                    capture_output=True,
                    timeout=5,
                    env=env,
                )
                if proc.returncode == 0:
                    result["pulse_connected"] = True
            except Exception:
                pass
        if shutil.which("powershell.exe"):
            result["windows_fallback_available"] = True
        if result["pulse_connected"]:
            result["recommended"] = "linux"
        elif result["windows_fallback_available"]:
            result["recommended"] = "windows"
        else:
            result["recommended"] = "none"
    else:
        result["recommended"] = "linux"

    return result



from ._audio_check import check_local_audio_available
from ._env_registry import generate_template as generate_env_template
from ._stt import available_models, find_whisper_cli, find_whisper_model, transcribe
from ._tts import TTS

__all__ = [
    "__version__",
    # Core
    "speak",
    "generate_bytes",
    "stop_speech",
    "get_tts",
    "available_backends",
    "announce_context",
    # Engine classes
    "TTS",
    "GoogleTTS",
    "ElevenLabsTTS",
    "SystemTTS",
    "LuxTTS",
    # Diagnostics
    "check_wsl_audio",
    "check_local_audio_available",
    # Configuration
    "generate_env_template",
    # STT (Speech-to-Text)
    "transcribe",
    "find_whisper_cli",
    "find_whisper_model",
    "available_models",
]

FALLBACK_ORDER = ["elevenlabs", "luxtts", "gtts", "pyttsx3"]



[docs]
def available_backends() -> list[str]:
    """Return list of available TTS backends."""
    backends = []

    if SystemTTS:
        try:
            import pyttsx3

            engine = pyttsx3.init()
            engine.stop()
            backends.append("pyttsx3")
        except Exception:
            pass

    if GoogleTTS:
        backends.append("gtts")

    if LuxTTS:
        try:
            import zipvoice  # noqa: F401

            backends.append("luxtts")
        except ImportError:
            pass

    if ElevenLabsTTS:
        import os

        api_key = os.environ.get("SCITEX_AUDIO_ELEVENLABS_API_KEY") or os.environ.get(
            "ELEVENLABS_API_KEY"
        )
        if api_key:
            backends.append("elevenlabs")

    return backends




[docs]
def get_tts(backend: str | None = None, **kwargs) -> _BaseTTS:
    """Get a TTS instance for the specified backend."""
    backends = available_backends()

    if not backends:
        raise ValueError(
            "No TTS backends available. Install one of:\n"
            "  pip install pyttsx3       # System TTS (offline, free)\n"
            "  pip install gTTS          # Google TTS (free, needs internet)\n"
            "  pip install scitex-audio[luxtts]  # LuxTTS (open-source, offline)\n"
            "  pip install elevenlabs    # ElevenLabs (paid, best quality)"
        )

    if backend is None:
        for b in FALLBACK_ORDER:
            if b in backends:
                backend = b
                break

    if backend == "pyttsx3" and SystemTTS and "pyttsx3" in backends:
        return SystemTTS(**kwargs)
    elif backend == "gtts" and GoogleTTS:
        return GoogleTTS(**kwargs)
    elif backend == "elevenlabs" and ElevenLabsTTS:
        return ElevenLabsTTS(**kwargs)
    elif backend == "luxtts" and LuxTTS:
        return LuxTTS(**kwargs)
    else:
        raise ValueError(f"Backend '{backend}' not available. Available: {backends}")



from ._speak import speak



[docs]
def generate_bytes(
    text: str,
    backend: str | None = None,
    voice: str | None = None,
    **kwargs,
) -> bytes:
    """Generate TTS audio as raw bytes without playing."""
    tts = get_tts(backend, **kwargs)
    return tts.to_bytes(text, voice=voice)




[docs]
def announce_context(
    include_full_path: bool = False,
    speak_aloud: bool = True,
    branch_resolver=None,
    speak_fn=None,
) -> dict:
    """Announce the current working directory and git branch.

    Builds an orientation sentence (e.g. ``"Working in scitex-audio, on
    branch develop"``) and, by default, speaks it aloud. Useful when
    starting work in a new session.

    Parameters
    ----------
    include_full_path : bool
        Include the absolute path instead of just the directory name.
    speak_aloud : bool
        Speak the announcement (default True). When False, only the
        context dict is returned.
    branch_resolver : callable, optional
        Injectable callable ``(cwd: str) -> str | None`` returning the
        git branch name (testing seam). Defaults to a real
        ``git rev-parse`` subprocess.
    speak_fn : callable, optional
        Injectable speak function (testing seam). Defaults to
        :func:`speak`.

    Returns
    -------
    dict
        ``{"directory": str, "directory_name": str, "git_branch": str | None,
        "announced_text": str, "spoke": bool}``.
    """
    import os
    import subprocess

    cwd = os.getcwd()
    dir_name = cwd if include_full_path else os.path.basename(cwd)

    if branch_resolver is not None:
        git_branch = branch_resolver(cwd)
    else:
        git_branch = None
        try:
            result = _subprocess.run(
                ["git", "rev-parse", "--abbrev-ref", "HEAD"],
                capture_output=True,
                text=True,
                cwd=cwd,
                timeout=5,
            )
            if result.returncode == 0:
                git_branch = result.stdout.strip()
        except Exception:
            pass

    if git_branch:
        text = f"Working in {dir_name}, on branch {git_branch}"
    else:
        text = f"Working in {dir_name}"

    spoke = False
    if speak_aloud:
        _speak = speak_fn if speak_fn is not None else speak
        _speak(text)
        spoke = True

    return {
        "directory": cwd,
        "directory_name": os.path.basename(cwd),
        "git_branch": git_branch,
        "announced_text": text,
        "spoke": spoke,
    }



# EOF