import os import gradio as gr import whisper from gtts import gTTS import io from groq import Groq os.environ["GROQ_API_KEY"] = "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" # Initialize the Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Load the Whisper model model = whisper.load_model("base") # You can choose other models like "small", "medium", "large" def process_audio(file_path): try: # Load the audio file audio = whisper.load_audio(file_path) # Transcribe the audio using Whisper result = model.transcribe(audio) text = result["text"] # Generate a response using Groq chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": text}], model="llama3-8b-8192", # Replace with the correct model if necessary ) # Access the response using dot notation response_message = chat_completion.choices[0].message.content.strip() # Convert the response text to speech tts = gTTS(response_message) response_audio_io = io.BytesIO() tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object response_audio_io.seek(0) # Save audio to a file to ensure it's generated correctly with open("response.mp3", "wb") as audio_file: audio_file.write(response_audio_io.getvalue()) # Return the response text and the path to the saved audio file return response_message, "response.mp3" except Exception as e: return f"An error occurred: {e}", None # Define custom CSS for improved appearance custom_css = """ .gradio-container { background-color: #87ceeb; /* Sky blue background */ color: #333; /* Dark text color for contrast */ font-family: Arial, sans-serif; /* Better font */ text-align: center; /* Center align text */ } .gradio-title { color: #333; /* Title color */ font-size: 36px; /* Large font size */ font-weight: bold; /* Bold text */ margin-bottom: 20px; /* Space below the title */ } .gradio-input, .gradio-output { background-color: #e0f7fa; /* Slightly lighter background for inputs/outputs */ color: #333; /* Text color for inputs/outputs */ border: 1px solid #80deea; /* Border color */ border-radius: 5px; /* Rounded corners */ padding: 10px; /* Padding */ } .gradio-button { background-color: #4fc3f7; /* Button background color */ color: white; /* Button text color */ border: none; /* Remove default border */ border-radius: 5px; /* Rounded corners */ padding: 10px 20px; /* Padding */ cursor: pointer; /* Pointer cursor on hover */ } .gradio-button:hover { background-color: #29b6f6; /* Button hover color */ } """ # Define the Gradio interface with custom CSS iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath"), # Use type="filepath" outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")], live=True, title="Audio to Audio ChatBot", # Add title here description="Convert audio input to text, generate a response, and convert it back to audio.", css=custom_css # Apply custom CSS ) iface.launch()