HF-LLMs / app.py
SkyNetWalker's picture
Update app.py
fc98e77 verified
raw
history blame
2.88 kB
#refer llama recipes for more info https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
#huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("myHFtoken")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
model_name, # Added a parameter for model selection
):
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Selected model: {model_name}")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
print(f"Added user message to context: {val[0]}")
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
print(f"Added assistant message to context: {val[1]}")
messages.append({"role": "user", "content": message})
response = ""
print("Sending request to OpenAI API.")
for message in client.chat.completions.create(
model=model_name, # Use the selected model here
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
token = message.choices[0].delta.content
print(f"Received token: {token}")
response += token
yield response
print("Completed response generation.")
chatbot = gr.Chatbot(height=600)
print("Chatbot interface created.")
# Define a list of models for the dropdown
model_options = [
"microsoft/phi-4",
"PowerInfer/SmallThinker-3B-Preview",
]
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(
choices=model_options,
value="microsoft/phi-4",
label="Select Model",
),
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P",
),
],
fill_height=True,
chatbot=chatbot,
#theme="Nymbo/Nymbo_Theme",
)
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch()