Spaces:

Blane187
/

TESTGroq-API-Chat

Paused

App Files Files Community

Nick088 commited on Jul 9, 2024

Commit

02d3e15

verified ·

1 Parent(s): 8c9701f

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -11

app.py CHANGED Viewed

@@ -216,7 +216,7 @@ def check_file(audio_file_path):
     return audio_file_path, None
-def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
     # Check and process the file first
     processed_path, error_message = check_file(audio_file_path)
@@ -227,7 +227,7 @@ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
     with open(processed_path, "rb") as file:
         transcription = client.audio.transcriptions.create(
             file=(os.path.basename(processed_path), file.read()),
-            model="whisper-large-v3",
             prompt=prompt,
             response_format="json",
             language=None if auto_detect_language else language,
@@ -236,7 +236,7 @@ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
     return transcription.text
-def translate_audio(audio_file_path, prompt):
     # Check and process the file first
     processed_path, error_message = check_file(audio_file_path)
@@ -247,7 +247,7 @@ def translate_audio(audio_file_path, prompt):
     with open(processed_path, "rb") as file:
         translation = client.audio.translations.create(
             file=(os.path.basename(processed_path), file.read()),
-            model="whisper-large-v3",
             prompt=prompt,
             response_format="json",
             temperature=0.0,
@@ -274,7 +274,7 @@ def create_srt_from_text(transcription_text):
     return "".join(srt_lines)
-def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
     """Converts Whisper JSON transcription to SRT format."""
     # Check and process the file first
     processed_path, error_message = check_file(audio_file_path)
@@ -286,7 +286,7 @@ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
     with open(processed_path, "rb") as file:
         transcription_json = client.audio.transcriptions.create(
             file=(os.path.basename(processed_path), file.read()),
-            model="whisper-large-v3",
             prompt=prompt,
             response_format="json",
             language=None if auto_detect_language else language,  # Conditional language parameter
@@ -305,10 +305,36 @@ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
         temp_srt_file.write(srt_content)
     # Generate subtitles and add to video if MP4
-    if audio_file_path.lower().endswith(".mp4"):
         try:
-            # ... (ffmpeg code to add subtitles) ...
-            return None, audio_file_path.replace(".mp4", "_with_subs.mp4"), None, temp_srt_path  # Return the temp_srt_path
         except subprocess.CalledProcessError as e:
             return None, None, f"Error during subtitle addition: {e}", temp_srt_path  # Return the temp_srt_path
@@ -386,6 +412,11 @@ with gr.Blocks() as demo:
                         audio_input = gr.File(
                             type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
                         )
                     with gr.Row():
                         transcribe_prompt = gr.Textbox(
                             label="Prompt (Optional)",
@@ -402,7 +433,7 @@ with gr.Blocks() as demo:
                     transcription_output = gr.Textbox(label="Transcription")
                     transcribe_button.click(
                         transcribe_audio,
-                        inputs=[audio_input, transcribe_prompt, language, auto_detect_language],
                         outputs=transcription_output,
                     )
                 with gr.TabItem("Translation"):
@@ -411,6 +442,11 @@ with gr.Blocks() as demo:
                         audio_input_translate = gr.File(
                             type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
                         )
                     with gr.Row():
                         translate_prompt = gr.Textbox(
                             label="Prompt (Optional)",
@@ -420,7 +456,7 @@ with gr.Blocks() as demo:
                     translation_output = gr.Textbox(label="Translation")
                     translate_button.click(
                         translate_audio,
-                        inputs=[audio_input_translate, translate_prompt],
                         outputs=translation_output,
                     )
                 with gr.TabItem("Subtitle Maker"):
@@ -429,6 +465,11 @@ with gr.Blocks() as demo:
                             label="Upload Audio/Video",
                             file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
                         )
                         transcribe_prompt_subtitles = gr.Textbox(
                             label="Prompt (Optional)",
                             info="Specify any context or spelling corrections.",
@@ -452,6 +493,7 @@ with gr.Blocks() as demo:
                             transcribe_prompt_subtitles,
                             language_subtitles,
                             auto_detect_language_subtitles,
                         ],
                         outputs=[srt_output, video_output, gr.Textbox(label="Error")],
                     )

     return audio_file_path, None
+def transcribe_audio(audio_file_path, prompt, language, auto_detect_language, model):
     # Check and process the file first
     processed_path, error_message = check_file(audio_file_path)
     with open(processed_path, "rb") as file:
         transcription = client.audio.transcriptions.create(
             file=(os.path.basename(processed_path), file.read()),
+            model=model,
             prompt=prompt,
             response_format="json",
             language=None if auto_detect_language else language,
     return transcription.text
+def translate_audio(audio_file_path, prompt, model):
     # Check and process the file first
     processed_path, error_message = check_file(audio_file_path)
     with open(processed_path, "rb") as file:
         translation = client.audio.translations.create(
             file=(os.path.basename(processed_path), file.read()),
+            model=model,
             prompt=prompt,
             response_format="json",
             temperature=0.0,
     return "".join(srt_lines)
+def generate_subtitles(audio_file_path, prompt, language, auto_detect_language, model):
     """Converts Whisper JSON transcription to SRT format."""
     # Check and process the file first
     processed_path, error_message = check_file(audio_file_path)
     with open(processed_path, "rb") as file:
         transcription_json = client.audio.transcriptions.create(
             file=(os.path.basename(processed_path), file.read()),
+            model=model,
             prompt=prompt,
             response_format="json",
             language=None if auto_detect_language else language,  # Conditional language parameter
         temp_srt_file.write(srt_content)
     # Generate subtitles and add to video if MP4
+    if audio_file_path.lower().endswith((".mp4", ".webm")):
         try:
+            # Use ffmpeg to add subtitles to the video
+            output_file_path = audio_file_path.replace(os.path.splitext(audio_file_path)[1], "_with_subs" + os.path.splitext(audio_file_path)[1])
+            subprocess.run(
+                [
+                    "ffmpeg",
+                    "-i",
+                    audio_file_path,
+                    "-i",
+                    temp_srt_path,
+                    "-map",
+                    "0:v",
+                    "-map",
+                    "0:a",
+                    "-map",
+                    "1",
+                    "-c:v",
+                    "copy",
+                    "-c:a",
+                    "copy",
+                    "-c:s",
+                    "mov_text",
+                    "-metadata:s:s:0",
+                    "language=eng",
+                    output_file_path,
+                ],
+                check=True,
+            )
+            return None, output_file_path, None, temp_srt_path  # Return the temp_srt_path
         except subprocess.CalledProcessError as e:
             return None, None, f"Error during subtitle addition: {e}", temp_srt_path  # Return the temp_srt_path
                         audio_input = gr.File(
                             type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
                         )
+                        model_choice_transcribe = gr.Dropdown(
+                            choices=["whisper-large-v3"],  # Only include 'whisper-large-v3'
+                            value="whisper-large-v3",
+                            label="Model",
+                        )
                     with gr.Row():
                         transcribe_prompt = gr.Textbox(
                             label="Prompt (Optional)",
                     transcription_output = gr.Textbox(label="Transcription")
                     transcribe_button.click(
                         transcribe_audio,
+                        inputs=[audio_input, transcribe_prompt, language, auto_detect_language, model_choice_transcribe],
                         outputs=transcription_output,
                     )
                 with gr.TabItem("Translation"):
                         audio_input_translate = gr.File(
                             type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
                         )
+                        model_choice_translate = gr.Dropdown(
+                            choices=["whisper-large-v3"],  # Only include 'whisper-large-v3'
+                            value="whisper-large-v3",
+                            label="Model",
+                        )
                     with gr.Row():
                         translate_prompt = gr.Textbox(
                             label="Prompt (Optional)",
                     translation_output = gr.Textbox(label="Translation")
                     translate_button.click(
                         translate_audio,
+                        inputs=[audio_input_translate, translate_prompt, model_choice_translate],
                         outputs=translation_output,
                     )
                 with gr.TabItem("Subtitle Maker"):
                             label="Upload Audio/Video",
                             file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
                         )
+                        model_choice_subtitles = gr.Dropdown(
+                            choices=["whisper-large-v3"],  # Only include 'whisper-large-v3'
+                            value="whisper-large-v3",
+                            label="Model",
+                        )
                         transcribe_prompt_subtitles = gr.Textbox(
                             label="Prompt (Optional)",
                             info="Specify any context or spelling corrections.",
                             transcribe_prompt_subtitles,
                             language_subtitles,
                             auto_detect_language_subtitles,
+                            model_choice_subtitles,
                         ],
                         outputs=[srt_output, video_output, gr.Textbox(label="Error")],
                     )