indic-tts / app.py
akshathmangudi's picture
Upload 4 files
9470ace verified
# app.py
import streamlit as st
import json
import soundfile as sf
import io
from tts_utils import load_model, generate_speech
import torch
torch.backends.cudnn.benchmark = True # Enable cuDNN benchmark for performance
torch.cuda.empty_cache() # Clear CUDA cache to prevent memory issues
# Cache model loading for performance
@st.cache_resource
def cached_load_model():
return load_model()
# Load voice database from JSON
def load_voice_database():
with open('indian_voices.json') as f:
voices = json.load(f)
return {voice['name']: voice for voice in voices}
INDIAN_FEMALE_VOICES = load_voice_database()
# Streamlit UI
st.title("πŸŽ™οΈ Indian English TTS Voice Selector")
text_input = st.text_area("Enter your text:", "Hey, how are you doing today?")
# Voice selection with warnings
selected_voice = st.selectbox(
"Choose voice:",
options=list(INDIAN_FEMALE_VOICES.keys()),
format_func=lambda x: f"{x} πŸ”Š" # Add speaker icon
)
# Show voice description and warning
voice_info = INDIAN_FEMALE_VOICES[selected_voice]
st.caption(f"**Description:** {voice_info['description']}")
if voice_info['warning']:
st.warning(f"⚠️ Note: {voice_info['warning']}")
if st.button("Generate Speech", type="primary"):
model, tokenizer, desc_tokenizer = cached_load_model()
with st.spinner(f"Generating {selected_voice}'s voice..."):
try:
audio_array = generate_speech(
text_input,
voice_info['prompt'],
model,
tokenizer,
desc_tokenizer
)
# Audio playback
buffer = io.BytesIO()
sf.write(buffer, audio_array, 16000, format="WAV")
buffer.seek(0)
st.audio(buffer, format="audio/wav")
st.success("Audio generated successfully!")
except RuntimeError as e:
st.error(f"GPU Error: {str(e)}. Try a different voice or shorter text.")