Nick088 commited on
Commit
02d3e15
·
verified ·
1 Parent(s): 8c9701f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -11
app.py CHANGED
@@ -216,7 +216,7 @@ def check_file(audio_file_path):
216
  return audio_file_path, None
217
 
218
 
219
- def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
220
  # Check and process the file first
221
  processed_path, error_message = check_file(audio_file_path)
222
 
@@ -227,7 +227,7 @@ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
227
  with open(processed_path, "rb") as file:
228
  transcription = client.audio.transcriptions.create(
229
  file=(os.path.basename(processed_path), file.read()),
230
- model="whisper-large-v3",
231
  prompt=prompt,
232
  response_format="json",
233
  language=None if auto_detect_language else language,
@@ -236,7 +236,7 @@ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
236
  return transcription.text
237
 
238
 
239
- def translate_audio(audio_file_path, prompt):
240
  # Check and process the file first
241
  processed_path, error_message = check_file(audio_file_path)
242
 
@@ -247,7 +247,7 @@ def translate_audio(audio_file_path, prompt):
247
  with open(processed_path, "rb") as file:
248
  translation = client.audio.translations.create(
249
  file=(os.path.basename(processed_path), file.read()),
250
- model="whisper-large-v3",
251
  prompt=prompt,
252
  response_format="json",
253
  temperature=0.0,
@@ -274,7 +274,7 @@ def create_srt_from_text(transcription_text):
274
  return "".join(srt_lines)
275
 
276
 
277
- def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
278
  """Converts Whisper JSON transcription to SRT format."""
279
  # Check and process the file first
280
  processed_path, error_message = check_file(audio_file_path)
@@ -286,7 +286,7 @@ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
286
  with open(processed_path, "rb") as file:
287
  transcription_json = client.audio.transcriptions.create(
288
  file=(os.path.basename(processed_path), file.read()),
289
- model="whisper-large-v3",
290
  prompt=prompt,
291
  response_format="json",
292
  language=None if auto_detect_language else language, # Conditional language parameter
@@ -305,10 +305,36 @@ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
305
  temp_srt_file.write(srt_content)
306
 
307
  # Generate subtitles and add to video if MP4
308
- if audio_file_path.lower().endswith(".mp4"):
309
  try:
310
- # ... (ffmpeg code to add subtitles) ...
311
- return None, audio_file_path.replace(".mp4", "_with_subs.mp4"), None, temp_srt_path # Return the temp_srt_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  except subprocess.CalledProcessError as e:
313
  return None, None, f"Error during subtitle addition: {e}", temp_srt_path # Return the temp_srt_path
314
 
@@ -386,6 +412,11 @@ with gr.Blocks() as demo:
386
  audio_input = gr.File(
387
  type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
388
  )
 
 
 
 
 
389
  with gr.Row():
390
  transcribe_prompt = gr.Textbox(
391
  label="Prompt (Optional)",
@@ -402,7 +433,7 @@ with gr.Blocks() as demo:
402
  transcription_output = gr.Textbox(label="Transcription")
403
  transcribe_button.click(
404
  transcribe_audio,
405
- inputs=[audio_input, transcribe_prompt, language, auto_detect_language],
406
  outputs=transcription_output,
407
  )
408
  with gr.TabItem("Translation"):
@@ -411,6 +442,11 @@ with gr.Blocks() as demo:
411
  audio_input_translate = gr.File(
412
  type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
413
  )
 
 
 
 
 
414
  with gr.Row():
415
  translate_prompt = gr.Textbox(
416
  label="Prompt (Optional)",
@@ -420,7 +456,7 @@ with gr.Blocks() as demo:
420
  translation_output = gr.Textbox(label="Translation")
421
  translate_button.click(
422
  translate_audio,
423
- inputs=[audio_input_translate, translate_prompt],
424
  outputs=translation_output,
425
  )
426
  with gr.TabItem("Subtitle Maker"):
@@ -429,6 +465,11 @@ with gr.Blocks() as demo:
429
  label="Upload Audio/Video",
430
  file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
431
  )
 
 
 
 
 
432
  transcribe_prompt_subtitles = gr.Textbox(
433
  label="Prompt (Optional)",
434
  info="Specify any context or spelling corrections.",
@@ -452,6 +493,7 @@ with gr.Blocks() as demo:
452
  transcribe_prompt_subtitles,
453
  language_subtitles,
454
  auto_detect_language_subtitles,
 
455
  ],
456
  outputs=[srt_output, video_output, gr.Textbox(label="Error")],
457
  )
 
216
  return audio_file_path, None
217
 
218
 
219
+ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language, model):
220
  # Check and process the file first
221
  processed_path, error_message = check_file(audio_file_path)
222
 
 
227
  with open(processed_path, "rb") as file:
228
  transcription = client.audio.transcriptions.create(
229
  file=(os.path.basename(processed_path), file.read()),
230
+ model=model,
231
  prompt=prompt,
232
  response_format="json",
233
  language=None if auto_detect_language else language,
 
236
  return transcription.text
237
 
238
 
239
+ def translate_audio(audio_file_path, prompt, model):
240
  # Check and process the file first
241
  processed_path, error_message = check_file(audio_file_path)
242
 
 
247
  with open(processed_path, "rb") as file:
248
  translation = client.audio.translations.create(
249
  file=(os.path.basename(processed_path), file.read()),
250
+ model=model,
251
  prompt=prompt,
252
  response_format="json",
253
  temperature=0.0,
 
274
  return "".join(srt_lines)
275
 
276
 
277
+ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language, model):
278
  """Converts Whisper JSON transcription to SRT format."""
279
  # Check and process the file first
280
  processed_path, error_message = check_file(audio_file_path)
 
286
  with open(processed_path, "rb") as file:
287
  transcription_json = client.audio.transcriptions.create(
288
  file=(os.path.basename(processed_path), file.read()),
289
+ model=model,
290
  prompt=prompt,
291
  response_format="json",
292
  language=None if auto_detect_language else language, # Conditional language parameter
 
305
  temp_srt_file.write(srt_content)
306
 
307
  # Generate subtitles and add to video if MP4
308
+ if audio_file_path.lower().endswith((".mp4", ".webm")):
309
  try:
310
+ # Use ffmpeg to add subtitles to the video
311
+ output_file_path = audio_file_path.replace(os.path.splitext(audio_file_path)[1], "_with_subs" + os.path.splitext(audio_file_path)[1])
312
+ subprocess.run(
313
+ [
314
+ "ffmpeg",
315
+ "-i",
316
+ audio_file_path,
317
+ "-i",
318
+ temp_srt_path,
319
+ "-map",
320
+ "0:v",
321
+ "-map",
322
+ "0:a",
323
+ "-map",
324
+ "1",
325
+ "-c:v",
326
+ "copy",
327
+ "-c:a",
328
+ "copy",
329
+ "-c:s",
330
+ "mov_text",
331
+ "-metadata:s:s:0",
332
+ "language=eng",
333
+ output_file_path,
334
+ ],
335
+ check=True,
336
+ )
337
+ return None, output_file_path, None, temp_srt_path # Return the temp_srt_path
338
  except subprocess.CalledProcessError as e:
339
  return None, None, f"Error during subtitle addition: {e}", temp_srt_path # Return the temp_srt_path
340
 
 
412
  audio_input = gr.File(
413
  type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
414
  )
415
+ model_choice_transcribe = gr.Dropdown(
416
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
417
+ value="whisper-large-v3",
418
+ label="Model",
419
+ )
420
  with gr.Row():
421
  transcribe_prompt = gr.Textbox(
422
  label="Prompt (Optional)",
 
433
  transcription_output = gr.Textbox(label="Transcription")
434
  transcribe_button.click(
435
  transcribe_audio,
436
+ inputs=[audio_input, transcribe_prompt, language, auto_detect_language, model_choice_transcribe],
437
  outputs=transcription_output,
438
  )
439
  with gr.TabItem("Translation"):
 
442
  audio_input_translate = gr.File(
443
  type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
444
  )
445
+ model_choice_translate = gr.Dropdown(
446
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
447
+ value="whisper-large-v3",
448
+ label="Model",
449
+ )
450
  with gr.Row():
451
  translate_prompt = gr.Textbox(
452
  label="Prompt (Optional)",
 
456
  translation_output = gr.Textbox(label="Translation")
457
  translate_button.click(
458
  translate_audio,
459
+ inputs=[audio_input_translate, translate_prompt, model_choice_translate],
460
  outputs=translation_output,
461
  )
462
  with gr.TabItem("Subtitle Maker"):
 
465
  label="Upload Audio/Video",
466
  file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
467
  )
468
+ model_choice_subtitles = gr.Dropdown(
469
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
470
+ value="whisper-large-v3",
471
+ label="Model",
472
+ )
473
  transcribe_prompt_subtitles = gr.Textbox(
474
  label="Prompt (Optional)",
475
  info="Specify any context or spelling corrections.",
 
493
  transcribe_prompt_subtitles,
494
  language_subtitles,
495
  auto_detect_language_subtitles,
496
+ model_choice_subtitles,
497
  ],
498
  outputs=[srt_output, video_output, gr.Textbox(label="Error")],
499
  )