Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -44,10 +44,75 @@ def generate_response(prompt, history, model, temperature, max_tokens, top_p, se
|
|
44 |
return response
|
45 |
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def transcribe_audio(audio_file_path, prompt, language):
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
transcription = client.audio.transcriptions.create(
|
50 |
-
file=(os.path.basename(
|
51 |
model="whisper-large-v3",
|
52 |
prompt=prompt,
|
53 |
response_format="json",
|
@@ -58,9 +123,16 @@ def transcribe_audio(audio_file_path, prompt, language):
|
|
58 |
|
59 |
|
60 |
def translate_audio(audio_file_path, prompt):
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
translation = client.audio.translations.create(
|
63 |
-
file=(os.path.basename(
|
64 |
model="whisper-large-v3",
|
65 |
prompt=prompt,
|
66 |
response_format="json",
|
|
|
44 |
return response
|
45 |
|
46 |
|
47 |
+
ALLOWED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
|
48 |
+
MAX_FILE_SIZE_MB = 25
|
49 |
+
|
50 |
+
# Checks file extension, size, and downsamples if needed.
|
51 |
+
def check_file(audio_file_path):
|
52 |
+
if not audio_file_path:
|
53 |
+
return None, gr.Error("Please upload an audio file.")
|
54 |
+
|
55 |
+
file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
|
56 |
+
file_extension = audio_file_path.split(".")[-1].lower()
|
57 |
+
|
58 |
+
if file_extension not in ALLOWED_FILE_EXTENSIONS:
|
59 |
+
return (
|
60 |
+
None,
|
61 |
+
gr.Error(
|
62 |
+
f"Invalid file type (.{file_extension}). Allowed types: {', '.join(ALLOWED_FILE_EXTENSIONS)}"
|
63 |
+
),
|
64 |
+
)
|
65 |
+
|
66 |
+
if file_size_mb > MAX_FILE_SIZE_MB:
|
67 |
+
gr.Warning(
|
68 |
+
f"File size too large ({file_size_mb:.2f} MB). Attempting to downsample to 16kHz. Maximum allowed: {MAX_FILE_SIZE_MB} MB"
|
69 |
+
)
|
70 |
+
|
71 |
+
output_file_path = os.path.splitext(audio_file_path)[0] + "_downsampled.wav"
|
72 |
+
try:
|
73 |
+
subprocess.run(
|
74 |
+
[
|
75 |
+
"ffmpeg",
|
76 |
+
"-i",
|
77 |
+
audio_file_path,
|
78 |
+
"-ar",
|
79 |
+
"16000",
|
80 |
+
"-ac",
|
81 |
+
"1",
|
82 |
+
"-map",
|
83 |
+
"0:a:",
|
84 |
+
output_file_path,
|
85 |
+
],
|
86 |
+
check=True,
|
87 |
+
)
|
88 |
+
|
89 |
+
# Check size after downsampling
|
90 |
+
downsampled_size_mb = os.path.getsize(output_file_path) / (1024 * 1024)
|
91 |
+
if downsampled_size_mb > MAX_FILE_SIZE_MB:
|
92 |
+
return (
|
93 |
+
None,
|
94 |
+
gr.Error(
|
95 |
+
f"File size still too large after downsampling ({downsampled_size_mb:.2f} MB). Maximum allowed: {MAX_FILE_SIZE_MB} MB"
|
96 |
+
),
|
97 |
+
)
|
98 |
+
|
99 |
+
return output_file_path, None
|
100 |
+
except subprocess.CalledProcessError as e:
|
101 |
+
return None, gr.Error(f"Error during downsampling: {e}")
|
102 |
+
return audio_file_path, None
|
103 |
+
|
104 |
+
|
105 |
def transcribe_audio(audio_file_path, prompt, language):
|
106 |
+
# Check and process the file first
|
107 |
+
processed_path, error_message = check_file(audio_file_path)
|
108 |
+
|
109 |
+
# If there's an error during file check
|
110 |
+
if error_message:
|
111 |
+
return error_message
|
112 |
+
|
113 |
+
with open(processed_path, "rb") as file:
|
114 |
transcription = client.audio.transcriptions.create(
|
115 |
+
file=(os.path.basename(processed_path), file.read()),
|
116 |
model="whisper-large-v3",
|
117 |
prompt=prompt,
|
118 |
response_format="json",
|
|
|
123 |
|
124 |
|
125 |
def translate_audio(audio_file_path, prompt):
|
126 |
+
# Check and process the file first
|
127 |
+
processed_path, error_message = check_file(audio_file_path)
|
128 |
+
|
129 |
+
# If there's an error during file check
|
130 |
+
if error_message:
|
131 |
+
return error_message
|
132 |
+
|
133 |
+
with open(processed_path, "rb") as file:
|
134 |
translation = client.audio.translations.create(
|
135 |
+
file=(os.path.basename(processed_path), file.read()),
|
136 |
model="whisper-large-v3",
|
137 |
prompt=prompt,
|
138 |
response_format="json",
|