Spaces:
Paused
Paused
File size: 3,947 Bytes
0ff301d 4e4df74 de611b5 4e4df74 de611b5 4e4df74 0ff301d 4e4df74 0ff301d 4e4df74 0ff301d de611b5 4e4df74 de611b5 0ff301d 4e4df74 0ff301d 4e4df74 0ff301d 4e4df74 0ff301d 4e4df74 0ff301d 4e4df74 0ff301d 4e4df74 0ff301d 4e4df74 de611b5 0ff301d de611b5 4e4df74 de611b5 4e4df74 0ff301d 4e4df74 de611b5 4e4df74 de611b5 4e4df74 de611b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
# ----------------------------------------------------------------------
# Helper: read a secret with a safe fallback (useful when you run the
# script locally without a secrets file).
# ----------------------------------------------------------------------
def _secret(key: str, fallback: str) -> str:
"""Return the value of a secret or the supplied fallback."""
return os.getenv(key, fallback)
# ----------------------------------------------------------------------
# Core chat logic – the system prompt now comes from the secret `prec_chat`.
# ----------------------------------------------------------------------
def respond(
message: str,
history: list[dict[str, str]],
max_tokens: int,
temperature: float,
top_p: float,
hf_token: gr.OAuthToken,
):
"""
Generate a response using the HuggingFace Inference API.
The system prompt is taken from the secret **prec_chat**.
Users cannot edit it from the UI.
"""
# 1️⃣ Load the system prompt (fallback = generic assistant)
system_message = _secret("prec_chat", "You are a helpful assistant.")
# 2️⃣ Initialise the HF inference client.
client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
# 3️⃣ Build the message list for the chat completion endpoint.
messages = [{"role": "system", "content": system_message}]
messages.extend(history) # previous conversation
messages.append({"role": "user", "content": message}) # current query
# 4️⃣ Stream the response back to the UI.
response = ""
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
choices = chunk.choices
token = ""
if choices and choices[0].delta.content:
token = choices[0].delta.content
response += token
yield response
# ----------------------------------------------------------------------
# UI definition – the system‑prompt textbox has been removed.
# ----------------------------------------------------------------------
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
# Only generation parameters are exposed now.
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top‑p (nucleus sampling)",
),
],
)
# ----------------------------------------------------------------------
# Build the Blocks layout (no LoginButton – we use our own auth).
# ----------------------------------------------------------------------
with gr.Blocks() as demo:
chatbot.render()
# ----------------------------------------------------------------------
# Launch with **basic authentication**.
# ----------------------------------------------------------------------
if __name__ == "__main__":
# Pull the allowed credentials from secrets (fallback = no access)
allowed_user = _secret("CHAT_USER", "")
allowed_pass = _secret("CHAT_PASS", "")
# If either is missing we refuse to start – this prevents an accidental
# open‑access deployment.
if not allowed_user or not allowed_pass:
raise RuntimeError(
"Authentication credentials not found in secrets. "
"Add CHAT_USER and CHAT_PASS to secrets.toml."
)
demo.launch(
auth=(allowed_user, allowed_pass), # <-- Gradio's built‑in basic auth
# optional: you can also set `auth_message="Please log in"` or
# `prevent_thread_lock=True` depending on your deployment.
) |