mosha255 commited on
Commit
19556fd
·
unverified ·
1 Parent(s): e088c4e

Streamline spaces app

Browse files
Files changed (2) hide show
  1. spaces_app.py +1 -1
  2. tts.py +12 -2
spaces_app.py CHANGED
@@ -41,7 +41,7 @@ def create_gradio_interface():
41
 
42
  with gr.Row():
43
  with gr.Column():
44
- input_text = gr.Textbox(label="Input Text")
45
  generate_btn = gr.Button("Generate Speech")
46
 
47
  with gr.Column():
 
41
 
42
  with gr.Row():
43
  with gr.Column():
44
+ input_text = gr.Textbox(label="Input Text", lines=5)
45
  generate_btn = gr.Button("Generate Speech")
46
 
47
  with gr.Column():
tts.py CHANGED
@@ -9,6 +9,11 @@ import json
9
  import string
10
  from IPython.display import Audio
11
  import soundfile as sf
 
 
 
 
 
12
 
13
  # Load models
14
  lightspeech = ort.InferenceSession("./models/lightspeech_quant.onnx")
@@ -45,6 +50,11 @@ class TTS:
45
  # Remove empty sections
46
  sections = [section for section in sections if section]
47
 
 
 
 
 
 
48
  return sections
49
 
50
  @staticmethod
@@ -53,13 +63,13 @@ class TTS:
53
  for section in sections:
54
  if section == '**':
55
  # Long pause
56
- pause_duration = 1.0
57
  sample_rate = 44100
58
  pause = np.zeros(int(pause_duration * sample_rate))
59
  audio_sections.append(pause)
60
  elif section == '*':
61
  # Short pause
62
- pause_duration = 0.4
63
  sample_rate = 44100
64
  pause = np.zeros(int(pause_duration * sample_rate))
65
  audio_sections.append(pause)
 
9
  import string
10
  from IPython.display import Audio
11
  import soundfile as sf
12
+ import logging
13
+
14
+ # Configure logger
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
 
18
  # Load models
19
  lightspeech = ort.InferenceSession("./models/lightspeech_quant.onnx")
 
50
  # Remove empty sections
51
  sections = [section for section in sections if section]
52
 
53
+ # Trim last long pause marker
54
+ if sections[-1] == '**':
55
+ sections = sections[:-1]
56
+
57
+ logger.info(f"Split text into sections: {sections}")
58
  return sections
59
 
60
  @staticmethod
 
63
  for section in sections:
64
  if section == '**':
65
  # Long pause
66
+ pause_duration = 0.4
67
  sample_rate = 44100
68
  pause = np.zeros(int(pause_duration * sample_rate))
69
  audio_sections.append(pause)
70
  elif section == '*':
71
  # Short pause
72
+ pause_duration = 0.2
73
  sample_rate = 44100
74
  pause = np.zeros(int(pause_duration * sample_rate))
75
  audio_sections.append(pause)