txya900619 commited on
Commit
c3d752c
·
verified ·
1 Parent(s): f357dd6

feat: use speed in infer_process

Browse files
Files changed (1) hide show
  1. app.py +3 -15
app.py CHANGED
@@ -73,7 +73,7 @@ def infer(
73
  remove_silence,
74
  cross_fade_duration=0.15,
75
  nfe_step=32,
76
- fix_duration=1,
77
  show_info=gr.Info,
78
  ):
79
  if not ref_audio_orig:
@@ -96,7 +96,7 @@ def infer(
96
  vocoder,
97
  cross_fade_duration=cross_fade_duration,
98
  nfe_step=nfe_step,
99
- fix_duration=fix_duration,
100
  show_info=show_info,
101
  progress=gr.Progress(),
102
  )
@@ -109,7 +109,6 @@ def infer(
109
  final_wave, _ = torchaudio.load(f.name)
110
  final_wave = final_wave.squeeze().cpu().numpy()
111
 
112
- print(f"Final wave duration: {final_wave.shape[0] / final_sample_rate:.2f}s")
113
  # Save the spectrogram
114
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
115
  spectrogram_path = tmp_spectrogram.name
@@ -228,17 +227,6 @@ with demo:
228
  if len(gen_text_input) == 0:
229
  raise gr.Error("請勿輸入空字串。")
230
 
231
- ref_audio_info = torchaudio.info(ref_audio_input)
232
- ref_duration = ref_audio_info.num_frames / ref_audio_info.sample_rate
233
- target_duration = (
234
- ref_duration
235
- * len(gen_text_input.replace(" ", ""))
236
- / len(ref_text_input.replace(" ", ""))
237
- / speed_slider
238
- )
239
- print(f"Reference duration: {ref_duration}")
240
- print(f"Target duration: {target_duration}")
241
-
242
  ignore_punctuation = False
243
  ipa_with_ng = False
244
 
@@ -257,7 +245,7 @@ with demo:
257
  remove_silence,
258
  cross_fade_duration=cross_fade_duration_slider,
259
  nfe_step=nfe_slider,
260
- fix_duration=ref_duration + target_duration,
261
  )
262
  return audio_out, spectrogram_path
263
 
 
73
  remove_silence,
74
  cross_fade_duration=0.15,
75
  nfe_step=32,
76
+ speed=1,
77
  show_info=gr.Info,
78
  ):
79
  if not ref_audio_orig:
 
96
  vocoder,
97
  cross_fade_duration=cross_fade_duration,
98
  nfe_step=nfe_step,
99
+ speed=speed,
100
  show_info=show_info,
101
  progress=gr.Progress(),
102
  )
 
109
  final_wave, _ = torchaudio.load(f.name)
110
  final_wave = final_wave.squeeze().cpu().numpy()
111
 
 
112
  # Save the spectrogram
113
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
114
  spectrogram_path = tmp_spectrogram.name
 
227
  if len(gen_text_input) == 0:
228
  raise gr.Error("請勿輸入空字串。")
229
 
 
 
 
 
 
 
 
 
 
 
 
230
  ignore_punctuation = False
231
  ipa_with_ng = False
232
 
 
245
  remove_silence,
246
  cross_fade_duration=cross_fade_duration_slider,
247
  nfe_step=nfe_slider,
248
+ speed=speed_slider,
249
  )
250
  return audio_out, spectrogram_path
251