jackkie99 commited on
Commit
fd98fad
·
1 Parent(s): 874c079
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -46,22 +46,22 @@ model = load_model(
46
 
47
  @spaces.GPU
48
  def infer_tts(
49
- ref_audio_orig: str, gen_text: str, speed: float = 1.0,
50
  nfe_steps: float = 64.0, target_rms: float = 0.1,
51
  cross_fade_duration: float = 0,
52
  sway_sampling_coef: float = -1,
53
  request: gr.Request = None
54
  ):
55
 
56
- if not ref_audio_orig:
57
- raise gr.Error("Please upload a sample audio file.")
58
  if not gen_text.strip():
59
  raise gr.Error("Please enter the text content to generate voice.")
60
  if len(gen_text.split()) > 1000:
61
  raise gr.Error("Please enter text content with less than 1000 words.")
62
 
63
  try:
64
- ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, "")
65
  final_wave, final_sample_rate, spectrogram = infer_process(
66
  ref_audio, ref_text.lower(), post_process(TTSnorm(gen_text)).lower(), model, vocoder, speed=speed,
67
  nfe_steps=nfe_steps, target_rms=target_rms, cross_fade_duration=cross_fade_duration,
@@ -84,8 +84,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
84
  """)
85
 
86
  with gr.Row():
87
- ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
88
- gen_text = gr.Textbox(label="📝 Text", placeholder="Enter the text to generate voice...", lines=3)
89
 
90
  speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
91
  nfe_steps = gr.Slider(16, 64, value=64, step=16, label="NFE Steps")
@@ -110,7 +110,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
110
  )
111
 
112
  btn_synthesize.click(infer_tts,
113
- inputs=[ref_audio, gen_text, speed, nfe_steps, target_rms, cross_fade_duration, sway_sampling_coef],
114
  outputs=[output_audio, output_spectrogram]
115
  )
116
 
 
46
 
47
  @spaces.GPU
48
  def infer_tts(
49
+ gen_text: str, speed: float = 1.0,
50
  nfe_steps: float = 64.0, target_rms: float = 0.1,
51
  cross_fade_duration: float = 0,
52
  sway_sampling_coef: float = -1,
53
  request: gr.Request = None
54
  ):
55
 
56
+ # if not ref_audio_orig:
57
+ # raise gr.Error("Please upload a sample audio file.")
58
  if not gen_text.strip():
59
  raise gr.Error("Please enter the text content to generate voice.")
60
  if len(gen_text.split()) > 1000:
61
  raise gr.Error("Please enter text content with less than 1000 words.")
62
 
63
  try:
64
+ ref_audio, ref_text = preprocess_ref_audio_text(cached_path("hf://jackkie99/f5-tts-vnese/segment_59.wav"), "")
65
  final_wave, final_sample_rate, spectrogram = infer_process(
66
  ref_audio, ref_text.lower(), post_process(TTSnorm(gen_text)).lower(), model, vocoder, speed=speed,
67
  nfe_steps=nfe_steps, target_rms=target_rms, cross_fade_duration=cross_fade_duration,
 
84
  """)
85
 
86
  with gr.Row():
87
+ # ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
88
+ gen_text = gr.Textbox(label="📝 Text", placeholder="Nhập văn bản để tổng hợp giọng", lines=3)
89
 
90
  speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
91
  nfe_steps = gr.Slider(16, 64, value=64, step=16, label="NFE Steps")
 
110
  )
111
 
112
  btn_synthesize.click(infer_tts,
113
+ inputs=[gen_text, speed, nfe_steps, target_rms, cross_fade_duration, sway_sampling_coef],
114
  outputs=[output_audio, output_spectrogram]
115
  )
116