Text-to-Speech Service
The TTS Service is a standalone FastAPI application that generates speech from text using various providers.
Overview
The service listens on port 8002 by default. It supports a pluggable provider architecture.
Supported Providers
- Coqui TTS: Local, high-quality TTS (Default).
- ElevenLabs: Cloud-based, ultra-realistic TTS.
API Reference
Data Models
ai_term.tts.main.TTSRequest
Bases: BaseModel
TTS generation request.
Source code in src/ai_term/tts/main.py
| class TTSRequest(BaseModel):
"""TTS generation request."""
text: str
previous_text: str | None = None
speaker_id: str = "" # Optional, for multi-speaker models
language_id: str = "" # Optional
provider_config: ProviderConfigRequest | None = None
|
ai_term.tts.main.ProviderConfigRequest
Bases: BaseModel
Provider configuration passed from client.
Source code in src/ai_term/tts/main.py
| class ProviderConfigRequest(BaseModel):
"""Provider configuration passed from client."""
provider: str = "coqui" # "coqui" or "elevenlabs"
api_key: str | None = None
voice_id: str | None = None
model_id: str | None = None
|
Endpoints
POST /generate: Generate audio from text.
GET /health: Health check.
Implementation
ai_term.tts.main
TTS FastAPI Service with Provider Adapter Pattern.
generate_speech(request)
Generate speech from text using the specified provider.
Source code in src/ai_term/tts/main.py
| @app.post("/generate")
def generate_speech(request: TTSRequest):
"""Generate speech from text using the specified provider."""
text = request.text.strip()
previous_text = request.previous_text.strip() if request.previous_text else ""
if not text:
raise HTTPException(status_code=400, detail="Text cannot be empty")
logger.debug(f"Generating speech for: {text}")
try:
# Determine which provider to use
if request.provider_config:
provider = create_provider(request.provider_config)
else:
provider = get_default_provider()
logger.info(f"Using provider: {provider.name}")
# Generate audio
audio_bytes = provider.generate(text, previous_text=previous_text)
# Determine media type based on provider
media_type = "audio/mpeg" if provider.name == "elevenlabs" else "audio/wav"
return Response(content=audio_bytes, media_type=media_type)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error generating speech: {e}")
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
|
get_default_provider()
Get or create the default (Coqui) provider.
Source code in src/ai_term/tts/main.py
| def get_default_provider() -> TTSProvider:
"""Get or create the default (Coqui) provider."""
global _default_provider
if _default_provider is None:
_default_provider = CoquiTTSProvider()
return _default_provider
|
create_provider(config)
Create a provider based on configuration.
Source code in src/ai_term/tts/main.py
| def create_provider(config: ProviderConfigRequest) -> TTSProvider:
"""Create a provider based on configuration."""
if config.provider == "elevenlabs":
if not config.api_key:
raise ValueError("ElevenLabs requires an API key")
return ElevenLabsTTSProvider(
api_key=config.api_key,
voice_id=config.voice_id,
model_id=config.model_id,
)
elif config.provider == "coqui":
return get_default_provider()
else:
raise ValueError(f"Unknown provider: {config.provider}")
|