update
Browse files
app.py
CHANGED
@@ -46,22 +46,22 @@ model = load_model(
|
|
46 |
|
47 |
@spaces.GPU
|
48 |
def infer_tts(
|
49 |
-
|
50 |
nfe_steps: float = 64.0, target_rms: float = 0.1,
|
51 |
cross_fade_duration: float = 0,
|
52 |
sway_sampling_coef: float = -1,
|
53 |
request: gr.Request = None
|
54 |
):
|
55 |
|
56 |
-
if not ref_audio_orig:
|
57 |
-
|
58 |
if not gen_text.strip():
|
59 |
raise gr.Error("Please enter the text content to generate voice.")
|
60 |
if len(gen_text.split()) > 1000:
|
61 |
raise gr.Error("Please enter text content with less than 1000 words.")
|
62 |
|
63 |
try:
|
64 |
-
ref_audio, ref_text = preprocess_ref_audio_text(
|
65 |
final_wave, final_sample_rate, spectrogram = infer_process(
|
66 |
ref_audio, ref_text.lower(), post_process(TTSnorm(gen_text)).lower(), model, vocoder, speed=speed,
|
67 |
nfe_steps=nfe_steps, target_rms=target_rms, cross_fade_duration=cross_fade_duration,
|
@@ -84,8 +84,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
84 |
""")
|
85 |
|
86 |
with gr.Row():
|
87 |
-
ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
|
88 |
-
gen_text = gr.Textbox(label="📝 Text", placeholder="
|
89 |
|
90 |
speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
|
91 |
nfe_steps = gr.Slider(16, 64, value=64, step=16, label="NFE Steps")
|
@@ -110,7 +110,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
110 |
)
|
111 |
|
112 |
btn_synthesize.click(infer_tts,
|
113 |
-
inputs=[
|
114 |
outputs=[output_audio, output_spectrogram]
|
115 |
)
|
116 |
|
|
|
46 |
|
47 |
@spaces.GPU
|
48 |
def infer_tts(
|
49 |
+
gen_text: str, speed: float = 1.0,
|
50 |
nfe_steps: float = 64.0, target_rms: float = 0.1,
|
51 |
cross_fade_duration: float = 0,
|
52 |
sway_sampling_coef: float = -1,
|
53 |
request: gr.Request = None
|
54 |
):
|
55 |
|
56 |
+
# if not ref_audio_orig:
|
57 |
+
# raise gr.Error("Please upload a sample audio file.")
|
58 |
if not gen_text.strip():
|
59 |
raise gr.Error("Please enter the text content to generate voice.")
|
60 |
if len(gen_text.split()) > 1000:
|
61 |
raise gr.Error("Please enter text content with less than 1000 words.")
|
62 |
|
63 |
try:
|
64 |
+
ref_audio, ref_text = preprocess_ref_audio_text(cached_path("hf://jackkie99/f5-tts-vnese/segment_59.wav"), "")
|
65 |
final_wave, final_sample_rate, spectrogram = infer_process(
|
66 |
ref_audio, ref_text.lower(), post_process(TTSnorm(gen_text)).lower(), model, vocoder, speed=speed,
|
67 |
nfe_steps=nfe_steps, target_rms=target_rms, cross_fade_duration=cross_fade_duration,
|
|
|
84 |
""")
|
85 |
|
86 |
with gr.Row():
|
87 |
+
# ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
|
88 |
+
gen_text = gr.Textbox(label="📝 Text", placeholder="Nhập văn bản để tổng hợp giọng", lines=3)
|
89 |
|
90 |
speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
|
91 |
nfe_steps = gr.Slider(16, 64, value=64, step=16, label="NFE Steps")
|
|
|
110 |
)
|
111 |
|
112 |
btn_synthesize.click(infer_tts,
|
113 |
+
inputs=[gen_text, speed, nfe_steps, target_rms, cross_fade_duration, sway_sampling_coef],
|
114 |
outputs=[output_audio, output_spectrogram]
|
115 |
)
|
116 |
|