import gradio as gr import os from langchain_openai import ChatOpenAI api_key = os.environ.get("FEATHERLESS_API_KEY") def respond( message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p, ): llm = ChatOpenAI( base_url="https://api.featherless.ai/v1/", api_key=api_key, streaming=True, model=model, ) messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) llm.max_tokens = max_tokens llm.temperature = temperature llm.top_p = top_p response = "" for chunk in llm.stream(messages): token = chunk.content response += token yield response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Dropdown( [ "mistralai/Magistral-Small-2506", "Qwen/Qwen2.5-72B-Instruct", "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "unsloth/DeepSeek-R1-Distill-Llama-70B", "unsloth/Qwen2.5-72B-Instruct", "unsloth/Llama-3.3-70B-Instruct", ], label="Models" ), gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=16384, value=2048, step=1, label="Max tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), ], ) if __name__ == "__main__": demo.launch()