Streamline spaces app
Browse files- spaces_app.py +1 -1
- tts.py +12 -2
spaces_app.py
CHANGED
@@ -41,7 +41,7 @@ def create_gradio_interface():
|
|
41 |
|
42 |
with gr.Row():
|
43 |
with gr.Column():
|
44 |
-
input_text = gr.Textbox(label="Input Text")
|
45 |
generate_btn = gr.Button("Generate Speech")
|
46 |
|
47 |
with gr.Column():
|
|
|
41 |
|
42 |
with gr.Row():
|
43 |
with gr.Column():
|
44 |
+
input_text = gr.Textbox(label="Input Text", lines=5)
|
45 |
generate_btn = gr.Button("Generate Speech")
|
46 |
|
47 |
with gr.Column():
|
tts.py
CHANGED
@@ -9,6 +9,11 @@ import json
|
|
9 |
import string
|
10 |
from IPython.display import Audio
|
11 |
import soundfile as sf
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Load models
|
14 |
lightspeech = ort.InferenceSession("./models/lightspeech_quant.onnx")
|
@@ -45,6 +50,11 @@ class TTS:
|
|
45 |
# Remove empty sections
|
46 |
sections = [section for section in sections if section]
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
return sections
|
49 |
|
50 |
@staticmethod
|
@@ -53,13 +63,13 @@ class TTS:
|
|
53 |
for section in sections:
|
54 |
if section == '**':
|
55 |
# Long pause
|
56 |
-
pause_duration =
|
57 |
sample_rate = 44100
|
58 |
pause = np.zeros(int(pause_duration * sample_rate))
|
59 |
audio_sections.append(pause)
|
60 |
elif section == '*':
|
61 |
# Short pause
|
62 |
-
pause_duration = 0.
|
63 |
sample_rate = 44100
|
64 |
pause = np.zeros(int(pause_duration * sample_rate))
|
65 |
audio_sections.append(pause)
|
|
|
9 |
import string
|
10 |
from IPython.display import Audio
|
11 |
import soundfile as sf
|
12 |
+
import logging
|
13 |
+
|
14 |
+
# Configure logger
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
|
18 |
# Load models
|
19 |
lightspeech = ort.InferenceSession("./models/lightspeech_quant.onnx")
|
|
|
50 |
# Remove empty sections
|
51 |
sections = [section for section in sections if section]
|
52 |
|
53 |
+
# Trim last long pause marker
|
54 |
+
if sections[-1] == '**':
|
55 |
+
sections = sections[:-1]
|
56 |
+
|
57 |
+
logger.info(f"Split text into sections: {sections}")
|
58 |
return sections
|
59 |
|
60 |
@staticmethod
|
|
|
63 |
for section in sections:
|
64 |
if section == '**':
|
65 |
# Long pause
|
66 |
+
pause_duration = 0.4
|
67 |
sample_rate = 44100
|
68 |
pause = np.zeros(int(pause_duration * sample_rate))
|
69 |
audio_sections.append(pause)
|
70 |
elif section == '*':
|
71 |
# Short pause
|
72 |
+
pause_duration = 0.2
|
73 |
sample_rate = 44100
|
74 |
pause = np.zeros(int(pause_duration * sample_rate))
|
75 |
audio_sections.append(pause)
|