AiCoderv2's picture
Update app.py
5d154a6 verified
# ------------------------------------------------------------
# Gradio demo that talks to the HF Inference API (no provider)
# Model: openai/gpt-oss-120b
# UI built from low‑level components (Chatbot + Textbox + Button)
# ------------------------------------------------------------
import os
import gradio as gr
from huggingface_hub import InferenceClient
# ------------------------------------------------------------
# 1️⃣ Inference ------------------------------------------------------------
def hf_chat(message: str, history: list, token: str):
"""
Called every time the user clicks **Submit**.
Parameters
----------
message : str # newest user utterance
history : list[(str,str)] # chat history that Gradio keeps for us
token : str # HF access token obtained from LoginButton
Returns
-------
tuple (updated_history, reply)
"""
# ---- build the prompt in the format the model expects ----
system_prompt = (
"You are a helpful, knowledgeable assistant. "
"Answer the user’s questions concisely but clearly."
)
chat = [{"role": "system", "content": system_prompt}]
for usr, bot in history:
chat.append({"role": "user", "content": usr})
chat.append({"role": "assistant", "content": bot})
chat.append({"role": "user", "content": message})
# ---- call the HF Inference API ----
client = InferenceClient(
model="openai/gpt-oss-120b",
token=token,
timeout=120,
)
response = client.chat_completion(
messages=chat,
max_tokens=512,
temperature=0.7,
top_p=0.9,
)
reply = response.choices[0].message.content.strip()
# ---- return the new history (Gradio will display it) ----
# Gradio expects a list of (user, bot) tuples.
new_history = history + [(message, reply)]
return new_history, reply
# ------------------------------------------------------------
# 2️⃣ UI definition
# ------------------------------------------------------------
with gr.Blocks(fill_height=True) as demo:
# ---- Sidebar -------------------------------------------------
with gr.Sidebar():
gr.Markdown("# Inference Demo")
gr.Markdown(
"""
This Space runs **openai/gpt-oss-120b** directly via the
Hugging Face Inference API.
Sign in with your Hugging Face account to obtain a token that will be
sent securely to the API.
"""
)
# The button injects a `token` variable into the session.
login_btn = gr.LoginButton("Sign in with Hugging Face")
# ---- Main chat area -----------------------------------------
# 1️⃣ a `gr.Chatbot` that shows the conversation history
chatbot = gr.Chatbot(label="GPT‑OSS‑120B")
# 2️⃣ a single‑line textbox where the user types
txt_input = gr.Textbox(
placeholder="Type a message and press ⏎ or click Submit...",
lines=1,
show_label=False,
)
# 3️⃣ a submit button (optional – hitting Enter on the textbox works too)
submit_btn = gr.Button("Submit", variant="primary")
# ------------------------------------------------------------
# 4️⃣ Wire‑up the logic
# ------------------------------------------------------------
# The function signature is (message, history, token) → (history, reply)
# `chatbot` is passed as a *state* component, so we get the current history.
# The token comes from the login button (it is added to `additional_inputs`).
submit_btn.click(
fn=hf_chat,
inputs=[txt_input, chatbot, login_btn],
outputs=[chatbot, txt_input], # update the history and clear the textbox
)
# Allow hitting Enter in the textbox to trigger the same call
txt_input.submit(
fn=hf_chat,
inputs=[txt_input, chatbot, login_btn],
outputs=[chatbot, txt_input],
)
demo.launch()