Athspi commited on
Commit
6b2690e
·
verified ·
1 Parent(s): 3d4cc8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -16
app.py CHANGED
@@ -5,9 +5,6 @@ import os
5
  from pydub import AudioSegment
6
  from transformers import pipeline
7
 
8
- # Ensure compatible versions of torch and transformers are installed
9
- # Run: pip install torch==1.13.1 transformers==4.26.1
10
-
11
  # Mapping of model names to Whisper model sizes
12
  MODELS = {
13
  "Tiny (Fastest)": "tiny",
@@ -130,6 +127,26 @@ LANGUAGE_NAME_TO_CODE = {
130
  "Sundanese": "su",
131
  }
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
134
  """Transcribe the audio file."""
135
  # Convert audio to 16kHz mono for better compatibility
@@ -178,7 +195,13 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
178
 
179
  # Define the Gradio interface
180
  with gr.Blocks() as demo:
181
- gr.Markdown("# Audio Transcription with Fine-Tuned Models")
 
 
 
 
 
 
182
 
183
  with gr.Tab("Transcribe Audio"):
184
  gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
@@ -197,18 +220,8 @@ with gr.Blocks() as demo:
197
  transcribe_output = gr.Textbox(label="Transcription and Detected Language")
198
  transcribe_button = gr.Button("Transcribe Audio")
199
 
200
- # Update model dropdown based on language selection
201
- def update_model_dropdown(language):
202
- if language in FINE_TUNED_MODELS:
203
- # Add "Fine-Tuned Model" to the dropdown choices and disable it
204
- return gr.Dropdown(choices=["Fine-Tuned Model"], value="Fine-Tuned Model", interactive=False)
205
- else:
206
- # Reset the dropdown to standard Whisper models
207
- return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)
208
-
209
- language_dropdown.change(update_model_dropdown, inputs=language_dropdown, outputs=model_dropdown)
210
-
211
- # Link button to function
212
  transcribe_button.click(transcribe_audio, inputs=[transcribe_audio_input, language_dropdown, model_dropdown], outputs=transcribe_output)
213
 
214
  # Launch the Gradio interface
 
5
  from pydub import AudioSegment
6
  from transformers import pipeline
7
 
 
 
 
8
  # Mapping of model names to Whisper model sizes
9
  MODELS = {
10
  "Tiny (Fastest)": "tiny",
 
127
  "Sundanese": "su",
128
  }
129
 
130
+ def detect_language(audio_file):
131
+ """Detect the language of the audio file."""
132
+ # Load the Whisper model (use "base" for faster detection)
133
+ model = whisper.load_model("base")
134
+
135
+ # Convert audio to 16kHz mono for better compatibility with Whisper
136
+ audio = AudioSegment.from_file(audio_file)
137
+ audio = audio.set_frame_rate(16000).set_channels(1)
138
+ processed_audio_path = "processed_audio.wav"
139
+ audio.export(processed_audio_path, format="wav")
140
+
141
+ # Detect the language
142
+ result = model.transcribe(processed_audio_path, task="detect_language", fp16=False)
143
+ detected_language = result.get("language", "unknown")
144
+
145
+ # Clean up processed audio file
146
+ os.remove(processed_audio_path)
147
+
148
+ return f"Detected Language: {detected_language}"
149
+
150
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
151
  """Transcribe the audio file."""
152
  # Convert audio to 16kHz mono for better compatibility
 
195
 
196
  # Define the Gradio interface
197
  with gr.Blocks() as demo:
198
+ gr.Markdown("# Audio Transcription and Language Detection")
199
+
200
+ with gr.Tab("Detect Language"):
201
+ gr.Markdown("Upload an audio file to detect its language.")
202
+ detect_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
203
+ detect_language_output = gr.Textbox(label="Detected Language")
204
+ detect_button = gr.Button("Detect Language")
205
 
206
  with gr.Tab("Transcribe Audio"):
207
  gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
 
220
  transcribe_output = gr.Textbox(label="Transcription and Detected Language")
221
  transcribe_button = gr.Button("Transcribe Audio")
222
 
223
+ # Link buttons to functions
224
+ detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
 
 
 
 
 
 
 
 
 
 
225
  transcribe_button.click(transcribe_audio, inputs=[transcribe_audio_input, language_dropdown, model_dropdown], outputs=transcribe_output)
226
 
227
  # Launch the Gradio interface