Spaces:
Sleeping
Sleeping
# ------------------------------------------------------------ | |
# Gradio demo that talks to the HF Inference API (no provider) | |
# Model: openai/gpt-oss-120b | |
# UI built from low‑level components (Chatbot + Textbox + Button) | |
# ------------------------------------------------------------ | |
import os | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# ------------------------------------------------------------ | |
# 1️⃣ Inference ------------------------------------------------------------ | |
def hf_chat(message: str, history: list, token: str): | |
""" | |
Called every time the user clicks **Submit**. | |
Parameters | |
---------- | |
message : str # newest user utterance | |
history : list[(str,str)] # chat history that Gradio keeps for us | |
token : str # HF access token obtained from LoginButton | |
Returns | |
------- | |
tuple (updated_history, reply) | |
""" | |
# ---- build the prompt in the format the model expects ---- | |
system_prompt = ( | |
"You are a helpful, knowledgeable assistant. " | |
"Answer the user’s questions concisely but clearly." | |
) | |
chat = [{"role": "system", "content": system_prompt}] | |
for usr, bot in history: | |
chat.append({"role": "user", "content": usr}) | |
chat.append({"role": "assistant", "content": bot}) | |
chat.append({"role": "user", "content": message}) | |
# ---- call the HF Inference API ---- | |
client = InferenceClient( | |
model="openai/gpt-oss-120b", | |
token=token, | |
timeout=120, | |
) | |
response = client.chat_completion( | |
messages=chat, | |
max_tokens=512, | |
temperature=0.7, | |
top_p=0.9, | |
) | |
reply = response.choices[0].message.content.strip() | |
# ---- return the new history (Gradio will display it) ---- | |
# Gradio expects a list of (user, bot) tuples. | |
new_history = history + [(message, reply)] | |
return new_history, reply | |
# ------------------------------------------------------------ | |
# 2️⃣ UI definition | |
# ------------------------------------------------------------ | |
with gr.Blocks(fill_height=True) as demo: | |
# ---- Sidebar ------------------------------------------------- | |
with gr.Sidebar(): | |
gr.Markdown("# Inference Demo") | |
gr.Markdown( | |
""" | |
This Space runs **openai/gpt-oss-120b** directly via the | |
Hugging Face Inference API. | |
Sign in with your Hugging Face account to obtain a token that will be | |
sent securely to the API. | |
""" | |
) | |
# The button injects a `token` variable into the session. | |
login_btn = gr.LoginButton("Sign in with Hugging Face") | |
# ---- Main chat area ----------------------------------------- | |
# 1️⃣ a `gr.Chatbot` that shows the conversation history | |
chatbot = gr.Chatbot(label="GPT‑OSS‑120B") | |
# 2️⃣ a single‑line textbox where the user types | |
txt_input = gr.Textbox( | |
placeholder="Type a message and press ⏎ or click Submit...", | |
lines=1, | |
show_label=False, | |
) | |
# 3️⃣ a submit button (optional – hitting Enter on the textbox works too) | |
submit_btn = gr.Button("Submit", variant="primary") | |
# ------------------------------------------------------------ | |
# 4️⃣ Wire‑up the logic | |
# ------------------------------------------------------------ | |
# The function signature is (message, history, token) → (history, reply) | |
# `chatbot` is passed as a *state* component, so we get the current history. | |
# The token comes from the login button (it is added to `additional_inputs`). | |
submit_btn.click( | |
fn=hf_chat, | |
inputs=[txt_input, chatbot, login_btn], | |
outputs=[chatbot, txt_input], # update the history and clear the textbox | |
) | |
# Allow hitting Enter in the textbox to trigger the same call | |
txt_input.submit( | |
fn=hf_chat, | |
inputs=[txt_input, chatbot, login_btn], | |
outputs=[chatbot, txt_input], | |
) | |
demo.launch() |