Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,7 +25,7 @@ def get_text(text, hps, is_phoneme):
|
|
| 25 |
def create_tts_fn(model, hps, speaker_ids):
|
| 26 |
def tts_fn(text, speaker, speed, is_phoneme):
|
| 27 |
if limitation and ((len(text) > 60 and not is_phoneme) or (len(text) > 120 and is_phoneme)):
|
| 28 |
-
|
| 29 |
speaker_id = speaker_ids[speaker]
|
| 30 |
stn_tst = get_text(text, hps, is_phoneme)
|
| 31 |
with no_grad():
|
|
@@ -35,7 +35,7 @@ def create_tts_fn(model, hps, speaker_ids):
|
|
| 35 |
audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
|
| 36 |
length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
|
| 37 |
del stn_tst, x_tst, x_tst_lengths, sid
|
| 38 |
-
return hps.data.sampling_rate, audio
|
| 39 |
|
| 40 |
return tts_fn
|
| 41 |
|
|
@@ -43,11 +43,11 @@ def create_tts_fn(model, hps, speaker_ids):
|
|
| 43 |
def create_vc_fn(model, hps, speaker_ids):
|
| 44 |
def vc_fn(original_speaker, target_speaker, input_audio):
|
| 45 |
if input_audio is None:
|
| 46 |
-
|
| 47 |
sampling_rate, audio = input_audio
|
| 48 |
duration = audio.shape[0] / sampling_rate
|
| 49 |
if limitation and duration > 15:
|
| 50 |
-
|
| 51 |
original_speaker_id = speaker_ids[original_speaker]
|
| 52 |
target_speaker_id = speaker_ids[target_speaker]
|
| 53 |
|
|
@@ -68,7 +68,7 @@ def create_vc_fn(model, hps, speaker_ids):
|
|
| 68 |
audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
|
| 69 |
0, 0].data.cpu().float().numpy()
|
| 70 |
del y, spec, spec_lengths, sid_src, sid_tgt
|
| 71 |
-
return hps.data.sampling_rate, audio
|
| 72 |
|
| 73 |
return vc_fn
|
| 74 |
|
|
@@ -145,7 +145,8 @@ if __name__ == '__main__':
|
|
| 145 |
samples=[[x] for x in symbols])
|
| 146 |
phoneme_list_json = gr.Json(value=symbols, visible=False)
|
| 147 |
tts_submit = gr.Button("Generate", variant="primary")
|
| 148 |
-
|
|
|
|
| 149 |
advanced_button.click(None, [], [], _js="""
|
| 150 |
() => {
|
| 151 |
let options = document.querySelector("body > gradio-app");
|
|
@@ -155,7 +156,7 @@ if __name__ == '__main__':
|
|
| 155 |
options.style.display = ["none", ""].includes(options.style.display) ? "flex" : "none";
|
| 156 |
}""")
|
| 157 |
tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, phoneme_input],
|
| 158 |
-
[
|
| 159 |
to_phoneme_btn.click(lambda x: _clean_text(x, hps.data.text_cleaners) if x != "" else x,
|
| 160 |
[tts_input1], [tts_input1])
|
| 161 |
phoneme_list.click(None, [phoneme_list, phoneme_list_json, tts_input1], [tts_input1],
|
|
@@ -173,6 +174,7 @@ if __name__ == '__main__':
|
|
| 173 |
value=speakers[1])
|
| 174 |
vc_input3 = gr.Audio(label="Input Audio (15s limitation)")
|
| 175 |
vc_submit = gr.Button("Convert", variant="primary")
|
| 176 |
-
|
| 177 |
-
|
|
|
|
| 178 |
app.launch()
|
|
|
|
| 25 |
def create_tts_fn(model, hps, speaker_ids):
|
| 26 |
def tts_fn(text, speaker, speed, is_phoneme):
|
| 27 |
if limitation and ((len(text) > 60 and not is_phoneme) or (len(text) > 120 and is_phoneme)):
|
| 28 |
+
return "Error: Text is too long", None
|
| 29 |
speaker_id = speaker_ids[speaker]
|
| 30 |
stn_tst = get_text(text, hps, is_phoneme)
|
| 31 |
with no_grad():
|
|
|
|
| 35 |
audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
|
| 36 |
length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
|
| 37 |
del stn_tst, x_tst, x_tst_lengths, sid
|
| 38 |
+
return "Success", (hps.data.sampling_rate, audio)
|
| 39 |
|
| 40 |
return tts_fn
|
| 41 |
|
|
|
|
| 43 |
def create_vc_fn(model, hps, speaker_ids):
|
| 44 |
def vc_fn(original_speaker, target_speaker, input_audio):
|
| 45 |
if input_audio is None:
|
| 46 |
+
return "You need to upload an audio", None
|
| 47 |
sampling_rate, audio = input_audio
|
| 48 |
duration = audio.shape[0] / sampling_rate
|
| 49 |
if limitation and duration > 15:
|
| 50 |
+
return "Error: Audio is too long", None
|
| 51 |
original_speaker_id = speaker_ids[original_speaker]
|
| 52 |
target_speaker_id = speaker_ids[target_speaker]
|
| 53 |
|
|
|
|
| 68 |
audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
|
| 69 |
0, 0].data.cpu().float().numpy()
|
| 70 |
del y, spec, spec_lengths, sid_src, sid_tgt
|
| 71 |
+
return "Success", (hps.data.sampling_rate, audio)
|
| 72 |
|
| 73 |
return vc_fn
|
| 74 |
|
|
|
|
| 145 |
samples=[[x] for x in symbols])
|
| 146 |
phoneme_list_json = gr.Json(value=symbols, visible=False)
|
| 147 |
tts_submit = gr.Button("Generate", variant="primary")
|
| 148 |
+
tts_output1 = gr.Textbox(label="Output Message")
|
| 149 |
+
tts_output2 = gr.Audio(label="Output Audio")
|
| 150 |
advanced_button.click(None, [], [], _js="""
|
| 151 |
() => {
|
| 152 |
let options = document.querySelector("body > gradio-app");
|
|
|
|
| 156 |
options.style.display = ["none", ""].includes(options.style.display) ? "flex" : "none";
|
| 157 |
}""")
|
| 158 |
tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, phoneme_input],
|
| 159 |
+
[tts_output1, tts_output2])
|
| 160 |
to_phoneme_btn.click(lambda x: _clean_text(x, hps.data.text_cleaners) if x != "" else x,
|
| 161 |
[tts_input1], [tts_input1])
|
| 162 |
phoneme_list.click(None, [phoneme_list, phoneme_list_json, tts_input1], [tts_input1],
|
|
|
|
| 174 |
value=speakers[1])
|
| 175 |
vc_input3 = gr.Audio(label="Input Audio (15s limitation)")
|
| 176 |
vc_submit = gr.Button("Convert", variant="primary")
|
| 177 |
+
vc_output1 = gr.Textbox(label="Output Message")
|
| 178 |
+
vc_output2 = gr.Audio(label="Output Audio")
|
| 179 |
+
vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
|
| 180 |
app.launch()
|