Spaces:

Blane187
/

TESTGroq-API-Chat

Paused

App Files Files Community

Nick088 commited on Jul 8, 2024

Commit

5d2df50

verified ·

1 Parent(s): abb3da6

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -4

app.py CHANGED Viewed

@@ -44,10 +44,75 @@ def generate_response(prompt, history, model, temperature, max_tokens, top_p, se
     return response
 def transcribe_audio(audio_file_path, prompt, language):
-    with open(audio_file_path, "rb") as file:
         transcription = client.audio.transcriptions.create(
-            file=(os.path.basename(audio_file_path), file.read()),
             model="whisper-large-v3",
             prompt=prompt,
             response_format="json",
@@ -58,9 +123,16 @@ def transcribe_audio(audio_file_path, prompt, language):
 def translate_audio(audio_file_path, prompt):
-    with open(audio_file_path, "rb") as file:
         translation = client.audio.translations.create(
-            file=(os.path.basename(audio_file_path), file.read()),
             model="whisper-large-v3",
             prompt=prompt,
             response_format="json",

     return response
+ALLOWED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
+MAX_FILE_SIZE_MB = 25
+# Checks file extension, size, and downsamples if needed.
+def check_file(audio_file_path):
+    if not audio_file_path:
+        return None, gr.Error("Please upload an audio file.")
+    file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
+    file_extension = audio_file_path.split(".")[-1].lower()
+    if file_extension not in ALLOWED_FILE_EXTENSIONS:
+        return (
+            None,
+            gr.Error(
+                f"Invalid file type (.{file_extension}). Allowed types: {', '.join(ALLOWED_FILE_EXTENSIONS)}"
+            ),
+        )
+    if file_size_mb > MAX_FILE_SIZE_MB:
+        gr.Warning(
+            f"File size too large ({file_size_mb:.2f} MB). Attempting to downsample to 16kHz. Maximum allowed: {MAX_FILE_SIZE_MB} MB"
+        )
+        output_file_path = os.path.splitext(audio_file_path)[0] + "_downsampled.wav"
+        try:
+            subprocess.run(
+                [
+                    "ffmpeg",
+                    "-i",
+                    audio_file_path,
+                    "-ar",
+                    "16000",
+                    "-ac",
+                    "1",
+                    "-map",
+                    "0:a:",
+                    output_file_path,
+                ],
+                check=True,
+            )
+            # Check size after downsampling
+            downsampled_size_mb = os.path.getsize(output_file_path) / (1024 * 1024)
+            if downsampled_size_mb > MAX_FILE_SIZE_MB:
+                return (
+                    None,
+                    gr.Error(
+                        f"File size still too large after downsampling ({downsampled_size_mb:.2f} MB). Maximum allowed: {MAX_FILE_SIZE_MB} MB"
+                    ),
+                )
+            return output_file_path, None
+        except subprocess.CalledProcessError as e:
+            return None, gr.Error(f"Error during downsampling: {e}")
+    return audio_file_path, None
 def transcribe_audio(audio_file_path, prompt, language):
+    # Check and process the file first
+    processed_path, error_message = check_file(audio_file_path)
+    # If there's an error during file check
+    if error_message:
+        return error_message
+    with open(processed_path, "rb") as file:
         transcription = client.audio.transcriptions.create(
+            file=(os.path.basename(processed_path), file.read()),
             model="whisper-large-v3",
             prompt=prompt,
             response_format="json",
 def translate_audio(audio_file_path, prompt):
+    # Check and process the file first
+    processed_path, error_message = check_file(audio_file_path)
+    # If there's an error during file check
+    if error_message:
+        return error_message
+    with open(processed_path, "rb") as file:
         translation = client.audio.translations.create(
+            file=(os.path.basename(processed_path), file.read()),
             model="whisper-large-v3",
             prompt=prompt,
             response_format="json",