# ------------------------------------------------------------
#  Gradio demo that talks to the HF Inference API (no provider)
#  Model: openai/gpt-oss-120b
#  UI built from low‑level components (Chatbot + Textbox + Button)
# ------------------------------------------------------------
import os
import gradio as gr
from huggingface_hub import InferenceClient

# ------------------------------------------------------------
#  1️⃣ Inference ------------------------------------------------------------
def hf_chat(message: str, history: list, token: str):
    """
    Called every time the user clicks **Submit**.

    Parameters
    ----------
    message : str                # newest user utterance
    history : list[(str,str)]    # chat history that Gradio keeps for us
    token   : str                # HF access token obtained from LoginButton

    Returns
    -------
    tuple (updated_history, reply)
    """
    # ---- build the prompt in the format the model expects ----
    system_prompt = (
        "You are a helpful, knowledgeable assistant. "
        "Answer the user’s questions concisely but clearly."
    )
    chat = [{"role": "system", "content": system_prompt}]
    for usr, bot in history:
        chat.append({"role": "user", "content": usr})
        chat.append({"role": "assistant", "content": bot})
    chat.append({"role": "user", "content": message})

    # ---- call the HF Inference API ----
    client = InferenceClient(
        model="openai/gpt-oss-120b",
        token=token,
        timeout=120,
    )
    response = client.chat_completion(
        messages=chat,
        max_tokens=512,
        temperature=0.7,
        top_p=0.9,
    )
    reply = response.choices[0].message.content.strip()

    # ---- return the new history (Gradio will display it) ----
    # Gradio expects a list of (user, bot) tuples.
    new_history = history + [(message, reply)]
    return new_history, reply


# ------------------------------------------------------------
#  2️⃣ UI definition
# ------------------------------------------------------------
with gr.Blocks(fill_height=True) as demo:
    # ---- Sidebar -------------------------------------------------
    with gr.Sidebar():
        gr.Markdown("# Inference Demo")
        gr.Markdown(
            """
            This Space runs **openai/gpt-oss-120b** directly via the
            Hugging Face Inference API.  
            Sign in with your Hugging Face account to obtain a token that will be
            sent securely to the API.
            """
        )
        # The button injects a `token` variable into the session.
        login_btn = gr.LoginButton("Sign in with Hugging Face")

    # ---- Main chat area -----------------------------------------
    # 1️⃣ a `gr.Chatbot` that shows the conversation history
    chatbot = gr.Chatbot(label="GPT‑OSS‑120B")
    # 2️⃣ a single‑line textbox where the user types
    txt_input = gr.Textbox(
        placeholder="Type a message and press ⏎ or click Submit...",
        lines=1,
        show_label=False,
    )
    # 3️⃣ a submit button (optional – hitting Enter on the textbox works too)
    submit_btn = gr.Button("Submit", variant="primary")

    # ------------------------------------------------------------
    # 4️⃣ Wire‑up the logic
    # ------------------------------------------------------------
    # The function signature is (message, history, token) → (history, reply)
    # `chatbot` is passed as a *state* component, so we get the current history.
    # The token comes from the login button (it is added to `additional_inputs`).
    submit_btn.click(
        fn=hf_chat,
        inputs=[txt_input, chatbot, login_btn],
        outputs=[chatbot, txt_input],   # update the history and clear the textbox
    )
    # Allow hitting Enter in the textbox to trigger the same call
    txt_input.submit(
        fn=hf_chat,
        inputs=[txt_input, chatbot, login_btn],
        outputs=[chatbot, txt_input],
    )

demo.launch()