Spaces:

zanemotiwala
/

audio-to-text

Running

zanemotiwala commited on May 25

Commit

b3e5e27

verified ·

1 Parent(s): 053d920

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,36 @@
 import gradio as gr
 import logging
 from transformers import pipeline
-import torch
 logging.basicConfig(level=logging.INFO)
-# Load ASR pipeline
 asr = pipeline(task="automatic-speech-recognition", model="openai/whisper-tiny.en")
-def transcribe_speech(audio):
-    if audio is None:
         logging.error("No audio provided.")
         return "No audio found, please retry."
     try:
-        logging.info("Received audio input")
-        audio_array, sample_rate = audio
-        # Hugging Face Whisper expects a 1D array or file path
-        output = asr(audio_array, sampling_rate=sample_rate)
         return output["text"]
     except Exception as e:
         logging.error(f"Error during transcription: {str(e)}")
         return f"Error processing the audio file: {str(e)}"
-# Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# Simple Speech Recognition App")
-    gr.Markdown("### Record or upload audio, then click 'Transcribe Audio'")
-    mic = gr.Audio(label="Microphone or Upload", type="numpy")
-    transcribe_button = gr.Button("Transcribe Audio")
-    transcription = gr.Textbox(label="Transcription", lines=3, placeholder="Transcription will appear here...")
     transcribe_button.click(fn=transcribe_speech, inputs=mic, outputs=transcription)
-demo.launch(share=True)

 import gradio as gr
 import logging
 from transformers import pipeline
+# Set up logging
 logging.basicConfig(level=logging.INFO)
+# Load Whisper model (tiny version for speed)
 asr = pipeline(task="automatic-speech-recognition", model="openai/whisper-tiny.en")
+# Function to transcribe audio from a file path
+def transcribe_speech(audio_path):
+    if audio_path is None:
         logging.error("No audio provided.")
         return "No audio found, please retry."
     try:
+        logging.info(f"Received audio file path: {audio_path}")
+        output = asr(audio_path)
         return output["text"]
     except Exception as e:
         logging.error(f"Error during transcription: {str(e)}")
         return f"Error processing the audio file: {str(e)}"
+# Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎤 Simple Speech Recognition App")
+    gr.Markdown("Record or upload audio, then click **Transcribe Audio**")
+    mic = gr.Audio(label="🎙️ Microphone or Upload", type="filepath")  # This is the key change
+    transcribe_button = gr.Button("📝 Transcribe Audio")
+    transcription = gr.Textbox(label="🗒️ Transcription", lines=3, placeholder="Transcription will appear here...")
     transcribe_button.click(fn=transcribe_speech, inputs=mic, outputs=transcription)
+demo.launch(share=True)