File size: 3,261 Bytes
4cd8562
 
 
 
 
 
 
4d88240
 
4cd8562
8caa19b
4d88240
4cd8562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq

os.environ["GROQ_API_KEY"] = "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"

# Initialize the Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Load the Whisper model
model = whisper.load_model("base")  # You can choose other models like "small", "medium", "large"

def process_audio(file_path):
    try:
        # Load the audio file
        audio = whisper.load_audio(file_path)

        # Transcribe the audio using Whisper
        result = model.transcribe(audio)
        text = result["text"]

        # Generate a response using Groq
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": text}],
            model="llama3-8b-8192",  # Replace with the correct model if necessary
        )

        # Access the response using dot notation
        response_message = chat_completion.choices[0].message.content.strip()

        # Convert the response text to speech
        tts = gTTS(response_message)
        response_audio_io = io.BytesIO()
        tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
        response_audio_io.seek(0)

        # Save audio to a file to ensure it's generated correctly
        with open("response.mp3", "wb") as audio_file:
            audio_file.write(response_audio_io.getvalue())

        # Return the response text and the path to the saved audio file
        return response_message, "response.mp3"

    except Exception as e:
        return f"An error occurred: {e}", None

# Define custom CSS for improved appearance
custom_css = """
.gradio-container {
    background-color: #87ceeb; /* Sky blue background */
    color: #333; /* Dark text color for contrast */
    font-family: Arial, sans-serif; /* Better font */
    text-align: center; /* Center align text */
}

.gradio-title {
    color: #333; /* Title color */
    font-size: 36px; /* Large font size */
    font-weight: bold; /* Bold text */
    margin-bottom: 20px; /* Space below the title */
}

.gradio-input, .gradio-output {
    background-color: #e0f7fa; /* Slightly lighter background for inputs/outputs */
    color: #333; /* Text color for inputs/outputs */
    border: 1px solid #80deea; /* Border color */
    border-radius: 5px; /* Rounded corners */
    padding: 10px; /* Padding */
}

.gradio-button {
    background-color: #4fc3f7; /* Button background color */
    color: white; /* Button text color */
    border: none; /* Remove default border */
    border-radius: 5px; /* Rounded corners */
    padding: 10px 20px; /* Padding */
    cursor: pointer; /* Pointer cursor on hover */
}

.gradio-button:hover {
    background-color: #29b6f6; /* Button hover color */
}
"""

# Define the Gradio interface with custom CSS
iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),  # Use type="filepath"
    outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
    live=True,
    title="Audio to Audio ChatBot",  # Add title here
    description="Convert audio input to text, generate a response, and convert it back to audio.",
    css=custom_css  # Apply custom CSS
)

iface.launch()