# ------------------------------------------------------------ # Gradio demo that talks to the HF Inference API (no provider) # Model: openai/gpt-oss-120b # UI built from low‑level components (Chatbot + Textbox + Button) # ------------------------------------------------------------ import os import gradio as gr from huggingface_hub import InferenceClient # ------------------------------------------------------------ # 1️⃣ Inference ------------------------------------------------------------ def hf_chat(message: str, history: list, token: str): """ Called every time the user clicks **Submit**. Parameters ---------- message : str # newest user utterance history : list[(str,str)] # chat history that Gradio keeps for us token : str # HF access token obtained from LoginButton Returns ------- tuple (updated_history, reply) """ # ---- build the prompt in the format the model expects ---- system_prompt = ( "You are a helpful, knowledgeable assistant. " "Answer the user’s questions concisely but clearly." ) chat = [{"role": "system", "content": system_prompt}] for usr, bot in history: chat.append({"role": "user", "content": usr}) chat.append({"role": "assistant", "content": bot}) chat.append({"role": "user", "content": message}) # ---- call the HF Inference API ---- client = InferenceClient( model="openai/gpt-oss-120b", token=token, timeout=120, ) response = client.chat_completion( messages=chat, max_tokens=512, temperature=0.7, top_p=0.9, ) reply = response.choices[0].message.content.strip() # ---- return the new history (Gradio will display it) ---- # Gradio expects a list of (user, bot) tuples. new_history = history + [(message, reply)] return new_history, reply # ------------------------------------------------------------ # 2️⃣ UI definition # ------------------------------------------------------------ with gr.Blocks(fill_height=True) as demo: # ---- Sidebar ------------------------------------------------- with gr.Sidebar(): gr.Markdown("# Inference Demo") gr.Markdown( """ This Space runs **openai/gpt-oss-120b** directly via the Hugging Face Inference API. Sign in with your Hugging Face account to obtain a token that will be sent securely to the API. """ ) # The button injects a `token` variable into the session. login_btn = gr.LoginButton("Sign in with Hugging Face") # ---- Main chat area ----------------------------------------- # 1️⃣ a `gr.Chatbot` that shows the conversation history chatbot = gr.Chatbot(label="GPT‑OSS‑120B") # 2️⃣ a single‑line textbox where the user types txt_input = gr.Textbox( placeholder="Type a message and press ⏎ or click Submit...", lines=1, show_label=False, ) # 3️⃣ a submit button (optional – hitting Enter on the textbox works too) submit_btn = gr.Button("Submit", variant="primary") # ------------------------------------------------------------ # 4️⃣ Wire‑up the logic # ------------------------------------------------------------ # The function signature is (message, history, token) → (history, reply) # `chatbot` is passed as a *state* component, so we get the current history. # The token comes from the login button (it is added to `additional_inputs`). submit_btn.click( fn=hf_chat, inputs=[txt_input, chatbot, login_btn], outputs=[chatbot, txt_input], # update the history and clear the textbox ) # Allow hitting Enter in the textbox to trigger the same call txt_input.submit( fn=hf_chat, inputs=[txt_input, chatbot, login_btn], outputs=[chatbot, txt_input], ) demo.launch()