|
import os |
|
import time |
|
|
|
import gradio as gr |
|
import numpy as np |
|
from dotenv import load_dotenv |
|
from elevenlabs import ElevenLabs |
|
from fastapi import FastAPI |
|
from fastrtc import ( |
|
AdditionalOutputs, |
|
ReplyOnPause, |
|
Stream, |
|
get_stt_model, |
|
get_tts_model, |
|
get_twilio_turn_credentials, |
|
) |
|
from gradio.utils import get_space |
|
from groq import Groq |
|
from numpy.typing import NDArray |
|
|
|
load_dotenv() |
|
groq_client = Groq() |
|
|
|
stt_model = get_stt_model() |
|
|
|
tts_model = get_tts_model() |
|
|
|
|
|
|
|
|
|
def response( |
|
audio: tuple[int, NDArray[np.int16 | np.float32]], |
|
chatbot: list[dict] | None = None, |
|
): |
|
chatbot = chatbot or [] |
|
messages = [{"role": d["role"], "content": d["content"]} for d in chatbot] |
|
start = time.time() |
|
text = stt_model.stt(audio) |
|
print("transcription", time.time() - start) |
|
print("prompt", text) |
|
chatbot.append({"role": "user", "content": text}) |
|
yield AdditionalOutputs(chatbot) |
|
messages.append({"role": "user", "content": text}) |
|
response_text = ( |
|
groq_client.chat.completions.create( |
|
model="llama-3.1-8b-instant", |
|
max_tokens=512, |
|
messages=messages, |
|
) |
|
.choices[0] |
|
.message.content |
|
) |
|
|
|
chatbot.append({"role": "assistant", "content": response_text}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i, chunk in enumerate(tts_model.stream_tts_sync(response_text)): |
|
|
|
yield chunk |
|
|
|
yield AdditionalOutputs(chatbot) |
|
|
|
|
|
chatbot = gr.Chatbot(type="messages") |
|
stream = Stream( |
|
modality="audio", |
|
mode="send-receive", |
|
handler=ReplyOnPause(response, input_sample_rate=16000), |
|
additional_outputs_handler=lambda a, b: b, |
|
additional_inputs=[chatbot], |
|
additional_outputs=[chatbot], |
|
rtc_configuration=get_twilio_turn_credentials() if get_space() else None, |
|
concurrency_limit=5 if get_space() else None, |
|
time_limit=90 if get_space() else None, |
|
ui_args={"title": "LLM Instant Voice Chat (Powered by Groq, Kokoro, and WebRTC ⚡️)"}, |
|
) |
|
|
|
|
|
|
|
app = FastAPI() |
|
app = gr.mount_gradio_app(app, stream.ui, path="/") |
|
|
|
|
|
if __name__ == "__main__": |
|
import os |
|
|
|
os.environ["GRADIO_SSR_MODE"] = "false" |
|
|
|
if (mode := os.getenv("MODE")) == "UI": |
|
stream.ui.launch(server_port=7860) |
|
elif mode == "PHONE": |
|
stream.fastphone(host="0.0.0.0", port=7860) |
|
else: |
|
stream.ui.launch(server_port=7860) |
|
|