# app.py import streamlit as st import json import soundfile as sf import io from tts_utils import load_model, generate_speech import torch torch.backends.cudnn.benchmark = True # Enable cuDNN benchmark for performance torch.cuda.empty_cache() # Clear CUDA cache to prevent memory issues # Cache model loading for performance @st.cache_resource def cached_load_model(): return load_model() # Load voice database from JSON def load_voice_database(): with open('indian_voices.json') as f: voices = json.load(f) return {voice['name']: voice for voice in voices} INDIAN_FEMALE_VOICES = load_voice_database() # Streamlit UI st.title("🎙️ Indian English TTS Voice Selector") text_input = st.text_area("Enter your text:", "Hey, how are you doing today?") # Voice selection with warnings selected_voice = st.selectbox( "Choose voice:", options=list(INDIAN_FEMALE_VOICES.keys()), format_func=lambda x: f"{x} 🔊" # Add speaker icon ) # Show voice description and warning voice_info = INDIAN_FEMALE_VOICES[selected_voice] st.caption(f"**Description:** {voice_info['description']}") if voice_info['warning']: st.warning(f"⚠️ Note: {voice_info['warning']}") if st.button("Generate Speech", type="primary"): model, tokenizer, desc_tokenizer = cached_load_model() with st.spinner(f"Generating {selected_voice}'s voice..."): try: audio_array = generate_speech( text_input, voice_info['prompt'], model, tokenizer, desc_tokenizer ) # Audio playback buffer = io.BytesIO() sf.write(buffer, audio_array, 16000, format="WAV") buffer.seek(0) st.audio(buffer, format="audio/wav") st.success("Audio generated successfully!") except RuntimeError as e: st.error(f"GPU Error: {str(e)}. Try a different voice or shorter text.")