Blane187 commited on
Commit
8c03956
·
verified ·
1 Parent(s): 1557ed1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -132
app.py CHANGED
@@ -340,6 +340,7 @@ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language,
340
 
341
 
342
 
 
343
  with gr.Blocks() as demo:
344
  gr.Markdown(
345
  """
@@ -350,148 +351,148 @@ with gr.Blocks() as demo:
350
  )
351
  with gr.Tabs():
352
  with gr.TabItem("select option here:"):
353
- with gr.Tabs():
354
- with gr.TabItem("Speech To Text"):
355
-
356
- with gr.TabItem("Transcription"):
357
- gr.Markdown("Transcript audio from files to text!")
358
- with gr.Column():
359
- audio_input = gr.File(
360
- type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
361
- )
362
- model_choice_transcribe = gr.Dropdown(
363
- choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
364
- value="whisper-large-v3",
365
- label="Model",
366
- )
367
- transcribe_prompt = gr.Textbox(
368
- label="Prompt (Optional)",
369
- info="Specify any context or spelling corrections.",
370
- )
371
- language = gr.Dropdown(
372
- choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
373
- value="en",
374
- label="Language",
375
- )
376
- auto_detect_language = gr.Checkbox(label="Auto Detect Language")
377
- transcribe_button = gr.Button("Transcribe")
378
- transcription_output = gr.Textbox(label="Transcription")
379
- transcribe_button.click(
380
- transcribe_audio,
381
- inputs=[audio_input, transcribe_prompt, language, auto_detect_language, model_choice_transcribe],
382
- outputs=transcription_output,
383
  )
384
- with gr.TabItem("Translation"):
385
- gr.Markdown("Transcript audio from files and translate them to English text!")
386
- with gr.Column():
387
- audio_input_translate = gr.File(
388
- type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
389
- )
390
- model_choice_translate = gr.Dropdown(
391
- choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
392
- value="whisper-large-v3",
393
- label="Model",
394
- )
395
- translate_prompt = gr.Textbox(
396
- label="Prompt (Optional)",
397
- info="Specify any context or spelling corrections.",
 
 
 
 
 
 
 
398
  )
399
- translate_button = gr.Button("Translate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  translation_output = gr.Textbox(label="Translation")
401
  translate_button.click(
402
  translate_audio,
403
  inputs=[audio_input_translate, translate_prompt, model_choice_translate],
404
  outputs=translation_output,
405
  )
406
- with gr.TabItem("Subtitle Maker"):
407
- with gr.Column():
408
- audio_input_subtitles = gr.File(
409
- label="Upload Audio/Video",
410
- file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
411
- )
412
- model_choice_subtitles = gr.Dropdown(
413
- choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
414
- value="whisper-large-v3",
415
- label="Model",
416
- )
417
- transcribe_prompt_subtitles = gr.Textbox(
418
- label="Prompt (Optional)",
419
- info="Specify any context or spelling corrections.",
420
- )
421
- language_subtitles = gr.Dropdown(
422
- choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
423
- value="en",
424
- label="Language",
425
- )
426
- auto_detect_language_subtitles = gr.Checkbox(
427
- label="Auto Detect Language"
428
- )
429
- transcribe_button_subtitles = gr.Button("Generate Subtitles")
430
- srt_output = gr.File(label="SRT Output File")
431
- video_output = gr.File(label="Output Video with Subtitles")
432
- transcribe_button_subtitles.click(
433
- generate_subtitles,
434
- inputs=[
435
- audio_input_subtitles,
436
- transcribe_prompt_subtitles,
437
- language_subtitles,
438
- auto_detect_language_subtitles,
439
- model_choice_subtitles,
440
- ],
441
- outputs=[srt_output, video_output, gr.Textbox(label="Error")]
442
- )
443
- with gr.TabItem("LLMs"):
444
- with gr.Column():
445
- model = gr.Dropdown(
446
- choices=[
447
- "llama3-70b-8192",
448
- "llama3-8b-8192",
449
- "mixtral-8x7b-32768",
450
- "gemma-7b-it",
451
- "gemma2-9b-it",
452
- ],
453
- value="llama3-70b-8192",
454
- label="Model",
455
- )
456
- temperature = gr.Slider(
457
- minimum=0.0,
458
- maximum=1.0,
459
- step=0.01,
460
- value=0.5,
461
- label="Temperature",
462
- info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative.",
463
- )
464
- max_tokens = gr.Slider(
465
- minimum=1,
466
- maximum=8192,
467
- step=1,
468
- value=4096,
469
- label="Max Tokens",
470
- info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b.",
471
- )
472
- top_p = gr.Slider(
473
- minimum=0.0,
474
- maximum=1.0,
475
- step=0.01,
476
- value=0.5,
477
- label="Top P",
478
- info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p.",
479
- )
480
- seed = gr.Number(
481
- precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random"
482
- )
483
- model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
484
- chatbot = gr.ChatInterface(
485
- fn=generate_response,
486
- chatbot=None,
487
- additional_inputs=[
488
- model,
489
- temperature,
490
  max_tokens,
491
  top_p,
492
  seed,
493
- ],
494
- )
495
- model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
496
 
497
  demo.launch()
 
340
 
341
 
342
 
343
+
344
  with gr.Blocks() as demo:
345
  gr.Markdown(
346
  """
 
351
  )
352
  with gr.Tabs():
353
  with gr.TabItem("select option here:"):
354
+ with gr.Tabs():
355
+ with gr.TabItem("Speech To Text"):
356
+ gr.Markdown("Speech to Text coming soon!")
357
+ with gr.TabItem("Transcription"):
358
+ gr.Markdown("Transcript audio from files to text!")
359
+ with gr.Column():
360
+ audio_input = gr.File(
361
+ type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  )
363
+ model_choice_transcribe = gr.Dropdown(
364
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
365
+ value="whisper-large-v3",
366
+ label="Model",
367
+ )
368
+ transcribe_prompt = gr.Textbox(
369
+ label="Prompt (Optional)",
370
+ info="Specify any context or spelling corrections.",
371
+ )
372
+ language = gr.Dropdown(
373
+ choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
374
+ value="en",
375
+ label="Language",
376
+ )
377
+ auto_detect_language = gr.Checkbox(label="Auto Detect Language")
378
+ transcribe_button = gr.Button("Transcribe")
379
+ transcription_output = gr.Textbox(label="Transcription")
380
+ transcribe_button.click(
381
+ transcribe_audio,
382
+ inputs=[audio_input, transcribe_prompt, language, auto_detect_language, model_choice_transcribe],
383
+ outputs=transcription_output,
384
  )
385
+ with gr.TabItem("Translation"):
386
+ gr.Markdown("Transcript audio from files and translate them to English text!")
387
+ with gr.Column():
388
+ audio_input_translate = gr.File(
389
+ type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
390
+ )
391
+ model_choice_translate = gr.Dropdown(
392
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
393
+ value="whisper-large-v3",
394
+ label="Model",
395
+ )
396
+ translate_prompt = gr.Textbox(
397
+ label="Prompt (Optional)",
398
+ info="Specify any context or spelling corrections.",
399
+ )
400
+ translate_button = gr.Button("Translate")
401
  translation_output = gr.Textbox(label="Translation")
402
  translate_button.click(
403
  translate_audio,
404
  inputs=[audio_input_translate, translate_prompt, model_choice_translate],
405
  outputs=translation_output,
406
  )
407
+ with gr.TabItem("Subtitle Maker"):
408
+ with gr.Column():
409
+ audio_input_subtitles = gr.File(
410
+ label="Upload Audio/Video",
411
+ file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
412
+ )
413
+ model_choice_subtitles = gr.Dropdown(
414
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
415
+ value="whisper-large-v3",
416
+ label="Model",
417
+ )
418
+ transcribe_prompt_subtitles = gr.Textbox(
419
+ label="Prompt (Optional)",
420
+ info="Specify any context or spelling corrections.",
421
+ )
422
+ language_subtitles = gr.Dropdown(
423
+ choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
424
+ value="en",
425
+ label="Language",
426
+ )
427
+ auto_detect_language_subtitles = gr.Checkbox(
428
+ label="Auto Detect Language"
429
+ )
430
+ transcribe_button_subtitles = gr.Button("Generate Subtitles")
431
+ srt_output = gr.File(label="SRT Output File")
432
+ video_output = gr.File(label="Output Video with Subtitles")
433
+ transcribe_button_subtitles.click(
434
+ generate_subtitles,
435
+ inputs=[
436
+ audio_input_subtitles,
437
+ transcribe_prompt_subtitles,
438
+ language_subtitles,
439
+ auto_detect_language_subtitles,
440
+ model_choice_subtitles,
441
+ ],
442
+ outputs=[srt_output, video_output, gr.Textbox(label="Error")]
443
+ )
444
+ with gr.TabItem("LLMs"):
445
+ with gr.Column():
446
+ model = gr.Dropdown(
447
+ choices=[
448
+ "llama3-70b-8192",
449
+ "llama3-8b-8192",
450
+ "mixtral-8x7b-32768",
451
+ "gemma-7b-it",
452
+ "gemma2-9b-it",
453
+ ],
454
+ value="llama3-70b-8192",
455
+ label="Model",
456
+ )
457
+ temperature = gr.Slider(
458
+ minimum=0.0,
459
+ maximum=1.0,
460
+ step=0.01,
461
+ value=0.5,
462
+ label="Temperature",
463
+ info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative.",
464
+ )
465
+ max_tokens = gr.Slider(
466
+ minimum=1,
467
+ maximum=8192,
468
+ step=1,
469
+ value=4096,
470
+ label="Max Tokens",
471
+ info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b.",
472
+ )
473
+ top_p = gr.Slider(
474
+ minimum=0.0,
475
+ maximum=1.0,
476
+ step=0.01,
477
+ value=0.5,
478
+ label="Top P",
479
+ info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p.",
480
+ )
481
+ seed = gr.Number(
482
+ precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random"
483
+ )
484
+ model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
485
+ chatbot = gr.ChatInterface(
486
+ fn=generate_response,
487
+ chatbot=None,
488
+ additional_inputs=[
489
+ model,
490
+ temperature,
491
  max_tokens,
492
  top_p,
493
  seed,
494
+ ],
495
+ )
496
+ model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
497
 
498
  demo.launch()