Athspi commited on
Commit
dff986d
·
verified ·
1 Parent(s): 6bfef72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -185,12 +185,8 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
185
 
186
  return output_path
187
 
188
- def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3", remove_silence_flag=False):
189
  """Transcribe the audio file."""
190
- # Remove silence if the flag is enabled
191
- if remove_silence_flag:
192
- audio_file = remove_silence(audio_file)
193
-
194
  # Convert audio to 16kHz mono for better compatibility
195
  audio = AudioSegment.from_file(audio_file)
196
  audio = audio.set_frame_rate(16000).set_channels(1)
@@ -261,17 +257,37 @@ with gr.Blocks() as demo:
261
  value="Faster Whisper Large v3", # Default to "Faster Whisper Large v3"
262
  interactive=True # Allow model selection by default
263
  )
264
- remove_silence_checkbox = gr.Checkbox(label="Remove Silence", value=False)
265
  transcribe_output = gr.Textbox(label="Transcription and Detected Language")
266
  transcribe_button = gr.Button("Transcribe Audio")
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  # Link buttons to functions
269
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
270
  transcribe_button.click(
271
  transcribe_audio,
272
- inputs=[transcribe_audio_input, language_dropdown, model_dropdown, remove_silence_checkbox],
273
  outputs=transcribe_output
274
  )
 
 
 
 
 
275
 
276
  # Launch the Gradio interface
277
  demo.launch()
 
185
 
186
  return output_path
187
 
188
+ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
189
  """Transcribe the audio file."""
 
 
 
 
190
  # Convert audio to 16kHz mono for better compatibility
191
  audio = AudioSegment.from_file(audio_file)
192
  audio = audio.set_frame_rate(16000).set_channels(1)
 
257
  value="Faster Whisper Large v3", # Default to "Faster Whisper Large v3"
258
  interactive=True # Allow model selection by default
259
  )
 
260
  transcribe_output = gr.Textbox(label="Transcription and Detected Language")
261
  transcribe_button = gr.Button("Transcribe Audio")
262
 
263
+ with gr.Tab("Remove Silence"):
264
+ gr.Markdown("Upload an audio file to remove silence.")
265
+ silence_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
266
+ silence_threshold_slider = gr.Slider(
267
+ minimum=-60, maximum=-20, value=-40, step=1,
268
+ label="Silence Threshold (dB)",
269
+ info="Lower values detect quieter sounds as silence."
270
+ )
271
+ min_silence_len_slider = gr.Slider(
272
+ minimum=100, maximum=2000, value=500, step=100,
273
+ label="Minimum Silence Length (ms)",
274
+ info="Minimum duration of silence to remove."
275
+ )
276
+ silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
277
+ silence_button = gr.Button("Remove Silence")
278
+
279
  # Link buttons to functions
280
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
281
  transcribe_button.click(
282
  transcribe_audio,
283
+ inputs=[transcribe_audio_input, language_dropdown, model_dropdown],
284
  outputs=transcribe_output
285
  )
286
+ silence_button.click(
287
+ remove_silence,
288
+ inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
289
+ outputs=silence_output
290
+ )
291
 
292
  # Launch the Gradio interface
293
  demo.launch()