Spaces:

Athspi
/

Ai-audio

Running

App Files Files Community

Athspi commited on Jan 12

Commit

868debc

verified ·

1 Parent(s): fa03377

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -13

app.py CHANGED Viewed

@@ -116,7 +116,28 @@ LANGUAGE_NAME_TO_CODE = {
     "Sundanese": "su",
 }
 def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
     # Load the selected Whisper model
     model = whisper.load_model(MODELS[model_size])
@@ -142,25 +163,34 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
     return f"Detected Language: {detected_language}\n\nTranscription:\n{result['text']}"
 # Define the Gradio interface
-iface = gr.Interface(
-    fn=transcribe_audio,
-    inputs=[
-        gr.Audio(type="filepath", label="Upload Audio File"),
-        gr.Dropdown(
             choices=list(LANGUAGE_NAME_TO_CODE.keys()),  # Full language names
             label="Select Language",
             value="Auto Detect"
-        ),
-        gr.Dropdown(
             choices=list(MODELS.keys()),  # Model options
             label="Select Model",
             value="Base (Faster)"  # Default to "Base" model
         )
-    ],
-    outputs=gr.Textbox(label="Transcription and Detected Language"),
-    title="Audio Transcription with Language and Model Selection",
-    description="Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription."
-)
 # Launch the Gradio interface
-iface.launch()

     "Sundanese": "su",
 }
+def detect_language(audio_file):
+    """Detect the language of the audio file."""
+    # Load the Whisper model (use "base" for faster detection)
+    model = whisper.load_model("base")
+    # Convert audio to 16kHz mono for better compatibility with Whisper
+    audio = AudioSegment.from_file(audio_file)
+    audio = audio.set_frame_rate(16000).set_channels(1)
+    processed_audio_path = "processed_audio.wav"
+    audio.export(processed_audio_path, format="wav")
+    # Detect the language
+    result = model.transcribe(processed_audio_path, task="detect_language", fp16=False)
+    detected_language = result.get("language", "unknown")
+    # Clean up processed audio file
+    os.remove(processed_audio_path)
+    return f"Detected Language: {detected_language}"
 def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
+    """Transcribe the audio file."""
     # Load the selected Whisper model
     model = whisper.load_model(MODELS[model_size])
     return f"Detected Language: {detected_language}\n\nTranscription:\n{result['text']}"
 # Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcription and Language Detection")
+    with gr.Tab("Detect Language"):
+        gr.Markdown("Upload an audio file to detect its language.")
+        detect_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
+        detect_language_output = gr.Textbox(label="Detected Language")
+        detect_button = gr.Button("Detect Language")
+    with gr.Tab("Transcribe Audio"):
+        gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
+        transcribe_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
+        language_dropdown = gr.Dropdown(
             choices=list(LANGUAGE_NAME_TO_CODE.keys()),  # Full language names
             label="Select Language",
             value="Auto Detect"
+        )
+        model_dropdown = gr.Dropdown(
             choices=list(MODELS.keys()),  # Model options
             label="Select Model",
             value="Base (Faster)"  # Default to "Base" model
         )
+        transcribe_output = gr.Textbox(label="Transcription and Detected Language")
+        transcribe_button = gr.Button("Transcribe Audio")
+    # Link buttons to functions
+    detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
+    transcribe_button.click(transcribe_audio, inputs=[transcribe_audio_input, language_dropdown, model_dropdown], outputs=transcribe_output)
 # Launch the Gradio interface
+demo.launch()