Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,52 +1,46 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
from transformers import
|
| 3 |
-
from datasets import load_dataset
|
| 4 |
import torch
|
| 5 |
import soundfile as sf
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
#
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
sf.write(output_path, audio, samplerate=samplerate)
|
| 49 |
-
st.audio(output_path)
|
| 50 |
-
|
| 51 |
-
if __name__ == "__main__":
|
| 52 |
-
main()
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
|
|
|
| 3 |
import torch
|
| 4 |
import soundfile as sf
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
|
| 7 |
+
# Initialize the processor and model
|
| 8 |
+
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
| 9 |
+
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
| 10 |
+
|
| 11 |
+
# Load the vocoder
|
| 12 |
+
vocoder = torch.hub.load("s3prl/s3prl", "mb_melgan")
|
| 13 |
+
|
| 14 |
+
# Initialize session state
|
| 15 |
+
if 'text' not in st.session_state:
|
| 16 |
+
st.session_state['text'] = "Hello, my dog is cooler than you!"
|
| 17 |
+
|
| 18 |
+
# Function to update session state
|
| 19 |
+
def update_text():
|
| 20 |
+
st.session_state['text'] = st.text_area("Text", st.session_state['text'])
|
| 21 |
+
|
| 22 |
+
st.title("Text-to-Speech with SpeechT5")
|
| 23 |
+
st.write("Enter the text you want to convert to speech:")
|
| 24 |
+
|
| 25 |
+
# Use session state to store text
|
| 26 |
+
update_text()
|
| 27 |
+
|
| 28 |
+
if st.button("Generate Speech"):
|
| 29 |
+
st.write("Generating speech...")
|
| 30 |
+
|
| 31 |
+
# Process the input text
|
| 32 |
+
inputs = processor(text=st.session_state['text'], return_tensors="pt")
|
| 33 |
+
|
| 34 |
+
# Generate speech
|
| 35 |
+
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings=None)
|
| 36 |
+
|
| 37 |
+
# Use the vocoder to convert the generated speech to audio
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
audio = vocoder(speech)
|
| 40 |
+
|
| 41 |
+
# Save the audio to a file
|
| 42 |
+
sf.write("output.wav", audio.cpu().numpy(), samplerate=16000)
|
| 43 |
+
|
| 44 |
+
# Provide a download link for the generated speech
|
| 45 |
+
st.audio("output.wav", format="audio/wav")
|
| 46 |
+
st.write("Speech generation complete. You can listen to the generated speech above.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|