Nick088 commited on
Commit
5d2df50
·
verified ·
1 Parent(s): abb3da6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -4
app.py CHANGED
@@ -44,10 +44,75 @@ def generate_response(prompt, history, model, temperature, max_tokens, top_p, se
44
  return response
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def transcribe_audio(audio_file_path, prompt, language):
48
- with open(audio_file_path, "rb") as file:
 
 
 
 
 
 
 
49
  transcription = client.audio.transcriptions.create(
50
- file=(os.path.basename(audio_file_path), file.read()),
51
  model="whisper-large-v3",
52
  prompt=prompt,
53
  response_format="json",
@@ -58,9 +123,16 @@ def transcribe_audio(audio_file_path, prompt, language):
58
 
59
 
60
  def translate_audio(audio_file_path, prompt):
61
- with open(audio_file_path, "rb") as file:
 
 
 
 
 
 
 
62
  translation = client.audio.translations.create(
63
- file=(os.path.basename(audio_file_path), file.read()),
64
  model="whisper-large-v3",
65
  prompt=prompt,
66
  response_format="json",
 
44
  return response
45
 
46
 
47
+ ALLOWED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
48
+ MAX_FILE_SIZE_MB = 25
49
+
50
+ # Checks file extension, size, and downsamples if needed.
51
+ def check_file(audio_file_path):
52
+ if not audio_file_path:
53
+ return None, gr.Error("Please upload an audio file.")
54
+
55
+ file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
56
+ file_extension = audio_file_path.split(".")[-1].lower()
57
+
58
+ if file_extension not in ALLOWED_FILE_EXTENSIONS:
59
+ return (
60
+ None,
61
+ gr.Error(
62
+ f"Invalid file type (.{file_extension}). Allowed types: {', '.join(ALLOWED_FILE_EXTENSIONS)}"
63
+ ),
64
+ )
65
+
66
+ if file_size_mb > MAX_FILE_SIZE_MB:
67
+ gr.Warning(
68
+ f"File size too large ({file_size_mb:.2f} MB). Attempting to downsample to 16kHz. Maximum allowed: {MAX_FILE_SIZE_MB} MB"
69
+ )
70
+
71
+ output_file_path = os.path.splitext(audio_file_path)[0] + "_downsampled.wav"
72
+ try:
73
+ subprocess.run(
74
+ [
75
+ "ffmpeg",
76
+ "-i",
77
+ audio_file_path,
78
+ "-ar",
79
+ "16000",
80
+ "-ac",
81
+ "1",
82
+ "-map",
83
+ "0:a:",
84
+ output_file_path,
85
+ ],
86
+ check=True,
87
+ )
88
+
89
+ # Check size after downsampling
90
+ downsampled_size_mb = os.path.getsize(output_file_path) / (1024 * 1024)
91
+ if downsampled_size_mb > MAX_FILE_SIZE_MB:
92
+ return (
93
+ None,
94
+ gr.Error(
95
+ f"File size still too large after downsampling ({downsampled_size_mb:.2f} MB). Maximum allowed: {MAX_FILE_SIZE_MB} MB"
96
+ ),
97
+ )
98
+
99
+ return output_file_path, None
100
+ except subprocess.CalledProcessError as e:
101
+ return None, gr.Error(f"Error during downsampling: {e}")
102
+ return audio_file_path, None
103
+
104
+
105
  def transcribe_audio(audio_file_path, prompt, language):
106
+ # Check and process the file first
107
+ processed_path, error_message = check_file(audio_file_path)
108
+
109
+ # If there's an error during file check
110
+ if error_message:
111
+ return error_message
112
+
113
+ with open(processed_path, "rb") as file:
114
  transcription = client.audio.transcriptions.create(
115
+ file=(os.path.basename(processed_path), file.read()),
116
  model="whisper-large-v3",
117
  prompt=prompt,
118
  response_format="json",
 
123
 
124
 
125
  def translate_audio(audio_file_path, prompt):
126
+ # Check and process the file first
127
+ processed_path, error_message = check_file(audio_file_path)
128
+
129
+ # If there's an error during file check
130
+ if error_message:
131
+ return error_message
132
+
133
+ with open(processed_path, "rb") as file:
134
  translation = client.audio.translations.create(
135
+ file=(os.path.basename(processed_path), file.read()),
136
  model="whisper-large-v3",
137
  prompt=prompt,
138
  response_format="json",