import whisper import gradio as gr from gtts import gTTS import os # Load Whisper model model = whisper.load_model("base") # Function to transcribe audio to text def transcribe_audio(audio_file): try: audio = whisper.load_audio(audio_file) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) result = model.transcribe(mel) return result["text"] except Exception as e: return f"Error in transcription: {e}" # Function to generate text-to-speech def generate_speech(text): try: tts = gTTS(text) output_file = "response.mp3" tts.save(output_file) return output_file except Exception as e: return f"Error in TTS: {e}" # Voice-to-Voice chatbot function def voice_to_voice(audio_file): try: # Transcribe the audio input transcribed_text = transcribe_audio(audio_file) if "Error" in transcribed_text: return transcribed_text, None # Generate a response (mock response for now) response_text = f"You said: {transcribed_text}" # Convert response text to speech audio_response = generate_speech(response_text) if "Error" in audio_response: return response_text, None return response_text, audio_response except Exception as e: return f"Error in processing: {e}", None # Gradio Interface iface = gr.Interface( fn=voice_to_voice, inputs=gr.Audio(type="filepath"), # Accepts audio input outputs=[ gr.Textbox(label="Transcription"), # Displays transcribed text gr.Audio(type="filepath") # Returns audio response ], title="Voice-to-Voice Chatbot", description="Speak into the microphone, and the chatbot will respond with speech." ) # Launch the app if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)