Spaces:
Build error
Build error
| import streamlit as st | |
| from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech | |
| import torch | |
| import soundfile as sf | |
| from datasets import load_dataset | |
| # Initialize the processor and model | |
| processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
| model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
| # Load the vocoder | |
| vocoder = torch.hub.load("s3prl/s3prl", "mb_melgan") | |
| # Initialize session state | |
| if 'text' not in st.session_state: | |
| st.session_state['text'] = "Hello, my dog is cooler than you!" | |
| # Function to update session state | |
| def update_text(): | |
| st.session_state['text'] = st.text_area("Text", st.session_state['text']) | |
| st.title("Text-to-Speech with SpeechT5") | |
| st.write("Enter the text you want to convert to speech:") | |
| # Use session state to store text | |
| update_text() | |
| if st.button("Generate Speech"): | |
| st.write("Generating speech...") | |
| # Process the input text | |
| inputs = processor(text=st.session_state['text'], return_tensors="pt") | |
| # Generate speech | |
| speech = model.generate_speech(inputs["input_ids"], speaker_embeddings=None) | |
| # Use the vocoder to convert the generated speech to audio | |
| with torch.no_grad(): | |
| audio = vocoder(speech) | |
| # Save the audio to a file | |
| sf.write("output.wav", audio.cpu().numpy(), samplerate=16000) | |
| # Provide a download link for the generated speech | |
| st.audio("output.wav", format="audio/wav") | |
| st.write("Speech generation complete. You can listen to the generated speech above.") | |