Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from langchain_openai import ChatOpenAI | |
api_key = os.environ.get("FEATHERLESS_API_KEY") | |
MODEL_CHOICES = [ | |
"Qwen/Qwen3-32B", | |
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B", | |
"meta-llama/Llama-3.3-70B-Instruct", | |
"mistralai/Magistral-Small-2506", | |
"unsloth/DeepSeek-R1-Distill-Llama-70B", | |
"unsloth/Qwen2.5-72B-Instruct", | |
"unsloth/Llama-3.3-70B-Instruct", | |
] | |
def create_llm(model_name: str): | |
return ChatOpenAI( | |
base_url="https://api.featherless.ai/v1/", | |
api_key=api_key, | |
streaming=True, | |
model=model_name, | |
) | |
def respond(message, history, system_message, max_tokens, temperature, top_p, model_name): | |
llm = create_llm(model_name) | |
messages = [{"role":"system","content":system_message}] | |
for u, a in history: | |
if u: messages.append({"role":"user","content":u}) | |
if a: messages.append({"role":"assistant","content":a}) | |
messages.append({"role":"user","content":message}) | |
llm.max_tokens = max_tokens | |
llm.temperature = temperature | |
llm.top_p = top_p | |
response = "" | |
for chunk in llm.stream(messages): | |
response += chunk.content | |
yield response | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
model_dropdown = gr.Dropdown(choices=MODEL_CHOICES, value=MODEL_CHOICES[0], label="Pilih model") | |
chatbot = gr.Chatbot() | |
system_msg = gr.Textbox("You are a friendly Chatbot.", label="System message") | |
max_t = gr.Slider(1, 16384, value=2048, step=1, label="Max new tokens") | |
temp = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature") | |
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p") | |
model_dropdown.change(lambda x: None, inputs=model_dropdown, outputs=[]) | |
demo.launch() |