# Gradio for Multi ASR import gradio as gr import torch import soundfile as sf from transformers import pipeline # Device setup device = "cuda:0" if torch.cuda.is_available() else "cpu" # Pipeline cache pipelines = {} def get_pipeline(language): """Load and cache model pipelines based on selected language.""" if language in pipelines: return pipelines[language] if language == "English": model_name = "openai/whisper-small" elif language == "Hindi": model_name = "vasista22/whisper-hindi-small" elif language == "Tamil": model_name = "vasista22/whisper-tamil-small" elif language == "Malayalam": model_name = "vrclc/W2V2-BERT-withLM-Malayalam-Studio" else: raise ValueError("Unsupported language") print(f"[INFO] Loading model for {language}: {model_name}") pipelines[language] = pipeline( "automatic-speech-recognition", model=model_name, device=device, chunk_length_s=10 ) return pipelines[language] # Transcription code with error debugging def transcribe(audio, language): """Transcribes speech from an audio file based on selected language.""" try: if audio is None: return "Please record or upload an audio file." print(f"[DEBUG] Language: {language}") print(f"[DEBUG] Received audio: {audio}") audio_path = audio if isinstance(audio, str) else audio.get("name", None) if audio_path is None: return "Could not read audio file." audio_data, sample_rate = sf.read(audio_path) print(f"[DEBUG] Sample rate: {sample_rate}, shape: {audio_data.shape}") pipe = get_pipeline(language) # Prepare input format for transformers pipeline input_data = {"array": audio_data, "sampling_rate": sample_rate} result = pipe(input_data)["text"] print(f"[DEBUG] Transcription: {result}") return result except Exception as e: import traceback print("[ERROR] Exception during transcription:") traceback.print_exc() return f"Error: {str(e)}" iface = gr.Interface( fn=transcribe, inputs=[ gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or Upload Audio"), gr.Dropdown(choices=["English", "Hindi", "Tamil", "Malayalam"], label="Select Language") ], outputs=gr.Textbox(label="Transcribed Text"), title="Multilingual Speech Recognition", description="Select a language and provide speech input to get transcription." ) iface.launch()