import whisper
import gradio as gr
from gtts import gTTS
import os

# Load Whisper model
model = whisper.load_model("base")

# Function to transcribe audio to text
def transcribe_audio(audio_file):
    try:
        audio = whisper.load_audio(audio_file)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(model.device)
        result = model.transcribe(mel)
        return result["text"]
    except Exception as e:
        return f"Error in transcription: {e}"

# Function to generate text-to-speech
def generate_speech(text):
    try:
        tts = gTTS(text)
        output_file = "response.mp3"
        tts.save(output_file)
        return output_file
    except Exception as e:
        return f"Error in TTS: {e}"

# Voice-to-Voice chatbot function
def voice_to_voice(audio_file):
    try:
        # Transcribe the audio input
        transcribed_text = transcribe_audio(audio_file)
        if "Error" in transcribed_text:
            return transcribed_text, None
        
        # Generate a response (mock response for now)
        response_text = f"You said: {transcribed_text}"
        
        # Convert response text to speech
        audio_response = generate_speech(response_text)
        if "Error" in audio_response:
            return response_text, None
        
        return response_text, audio_response
    except Exception as e:
        return f"Error in processing: {e}", None

# Gradio Interface
iface = gr.Interface(
    fn=voice_to_voice,
    inputs=gr.Audio(type="filepath"),  # Accepts audio input
    outputs=[
        gr.Textbox(label="Transcription"),  # Displays transcribed text
        gr.Audio(type="filepath")  # Returns audio response
    ],
    title="Voice-to-Voice Chatbot",
    description="Speak into the microphone, and the chatbot will respond with speech."
)

# Launch the app
if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)