Spaces:
Running
Running
File size: 16,149 Bytes
038f313 fab24df c5a20a4 038f313 880ced6 e13eb1b 038f313 a8fc89d 038f313 e13eb1b 038f313 27c8b8d 038f313 3a64d68 98674ca c5a20a4 038f313 e13eb1b 7255410 be3f346 e13eb1b 7255410 27c8b8d be3f346 f7c4208 c5a20a4 52ad57a 038f313 a8fc89d c5a20a4 d6c98d8 27c8b8d c5a20a4 27c8b8d d6c98d8 a8fc89d 27c8b8d a8fc89d 27c8b8d a8fc89d 27c8b8d c5a20a4 27c8b8d d6c98d8 27c8b8d d6c98d8 c5a20a4 77298b9 a8fc89d 27c8b8d a8fc89d 27c8b8d a8fc89d 27c8b8d a8fc89d c5a20a4 a8fc89d 542c2ac e13eb1b f7c4208 a8fc89d be3f346 a8fc89d c5a20a4 a8fc89d be3f346 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d 62429d1 a8fc89d be3f346 769901b 77298b9 27c8b8d 77298b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 |
import gradio as gr
from openai import OpenAI
import os
# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
# Initialize the OpenAI client with the Hugging Face Inference API endpoint
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
custom_model
):
"""
This function handles the chatbot response. It takes in:
- message: the user's new message
- history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
- system_message: the system prompt
- max_tokens: the maximum number of tokens to generate in the response
- temperature: sampling temperature
- top_p: top-p (nucleus) sampling
- frequency_penalty: penalize repeated tokens in the output
- seed: a fixed seed for reproducibility; -1 will mean 'random'
- custom_model: the final model name in use, which may be set by selecting from the Featured Models radio or by typing a custom model
"""
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
print(f"Selected model (custom_model): {custom_model}")
# Convert seed to None if -1 (meaning random)
if seed == -1:
seed = None
# Construct the messages array required by the API
messages = [{"role": "system", "content": system_message}]
print("Initial messages array constructed.")
# Add conversation history to the context
for val in history:
user_part = val[0] # Extract user message from the tuple
assistant_part = val[1] # Extract assistant message from the tuple
if user_part:
messages.append({"role": "user", "content": user_part}) # Append user message
print(f"Added user message to context: {user_part}")
if assistant_part:
messages.append({"role": "assistant", "content": assistant_part}) # Append assistant message
print(f"Added assistant message to context: {assistant_part}")
# Append the latest user message
messages.append({"role": "user", "content": message})
print("Latest user message appended.")
# If user provided a model, use that; otherwise, fall back to a default model
model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
print(f"Model selected for inference: {model_to_use}")
# Start with an empty string to build the response as tokens stream in
response = ""
print("Sending request to OpenAI API.")
# Make the streaming request to the HF Inference API via openai-like client
for message_chunk in client.chat.completions.create(
model=model_to_use, # Use either the user-provided or default model
max_tokens=max_tokens, # Maximum tokens for the response
stream=True, # Enable streaming responses
temperature=temperature, # Adjust randomness in response
top_p=top_p, # Control diversity in response generation
frequency_penalty=frequency_penalty, # Penalize repeated phrases
seed=seed, # Set random seed for reproducibility
messages=messages, # Contextual conversation messages
):
# Extract the token text from the response chunk
token_text = message_chunk.choices[0].delta.content
print(f"Received token: {token_text}")
response += token_text
# Yield the partial response to Gradio so it can display in real-time
yield response
print("Completed response generation.")
# -------------------------
# GRADIO UI CONFIGURATION
# -------------------------
# Create a Chatbot component with a specified height
chatbot = gr.Chatbot(height=600) # Define the height of the chatbot interface
print("Chatbot interface created.")
# Create textboxes and sliders for system prompt, tokens, and other parameters
system_message_box = gr.Textbox(value="", label="System message") # Input box for system message
max_tokens_slider = gr.Slider(
minimum=1, # Minimum allowable tokens
maximum=4096, # Maximum allowable tokens
value=512, # Default value
step=1, # Increment step size
label="Max new tokens" # Slider label
)
temperature_slider = gr.Slider(
minimum=0.1, # Minimum temperature
maximum=4.0, # Maximum temperature
value=0.7, # Default value
step=0.1, # Increment step size
label="Temperature" # Slider label
)
top_p_slider = gr.Slider(
minimum=0.1, # Minimum top-p value
maximum=1.0, # Maximum top-p value
value=0.95, # Default value
step=0.05, # Increment step size
label="Top-P" # Slider label
)
frequency_penalty_slider = gr.Slider(
minimum=-2.0, # Minimum penalty
maximum=2.0, # Maximum penalty
value=0.0, # Default value
step=0.1, # Increment step size
label="Frequency Penalty" # Slider label
)
seed_slider = gr.Slider(
minimum=-1, # -1 for random seed
maximum=65535, # Maximum seed value
value=-1, # Default value
step=1, # Increment step size
label="Seed (-1 for random)" # Slider label
)
# The custom_model_box is what the respond function sees as "custom_model"
custom_model_box = gr.Textbox(
value="", # Default value
label="Custom Model", # Label for the textbox
info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model." # Additional info
)
# Define a function that updates the custom model box when a featured model is selected
def set_custom_model_from_radio(selected):
"""
This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
We will update the Custom Model text box with that selection automatically.
"""
print(f"Featured model selected: {selected}") # Log selected model
return selected
# Create the main ChatInterface object
demo = gr.ChatInterface(
fn=respond, # The function to handle responses
additional_inputs=[
system_message_box, # System message input
max_tokens_slider, # Max tokens slider
temperature_slider, # Temperature slider
top_p_slider, # Top-P slider
frequency_penalty_slider, # Frequency penalty slider
seed_slider, # Seed slider
custom_model_box # Custom model input
],
fill_height=True, # Allow the chatbot to fill the container height
chatbot=chatbot, # Chatbot UI component
theme="Nymbo/Nymbo_Theme", # Theme for the interface
)
print("ChatInterface object created.")
# -----------
# ADDING THE "FEATURED MODELS" ACCORDION
# -----------
with demo:
with gr.Accordion("Featured Models", open=False): # Collapsible section for featured models
model_search_box = gr.Textbox(
label="Filter Models", # Label for the search box
placeholder="Search for a featured model...", # Placeholder text
lines=1 # Single-line input
)
print("Model search box created.")
# Sample list of popular text models
models_list = [
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"meta-llama/Llama-3.2-1B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
"NousResearch/Hermes-3-Llama-3.1-8B",
"google/gemma-2-27b-it",
"google/gemma-2-9b-it",
"google/gemma-2-2b-it",
"mistralai/Mistral-Nemo-Instruct-2407",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/QwQ-32B-Preview",
"PowerInfer/SmallThinker-3B-Preview",
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"microsoft/Phi-3.5-mini-instruct",
]
print("Models list initialized.")
featured_model_radio = gr.Radio(
label="Select a model below", # Label for the radio buttons
choices=models_list, # List of available models
value="meta-llama/Llama-3.3-70B-Instruct", # Default selection
interactive=True # Allow user interaction
)
print("Featured models radio button created.")
# Filter function for the radio button list
def filter_models(search_term):
print(f"Filtering models with search term: {search_term}") # Log the search term
filtered = [m for m in models_list if search_term.lower() in m.lower()] # Filter models by search term
print(f"Filtered models: {filtered}") # Log filtered models
return gr.update(choices=filtered)
# Update the radio list when the search box value changes
model_search_box.change(
fn=filter_models, # Function to filter models
inputs=model_search_box, # Input: search box value
outputs=featured_model_radio # Output: update radio button list
)
print("Model search box change event linked.")
# Update the custom model textbox when a featured model is selected
featured_model_radio.change(
fn=set_custom_model_from_radio, # Function to set custom model
inputs=featured_model_radio, # Input: selected model
outputs=custom_model_box # Output: update custom model textbox
)
print("Featured model radio button change event linked.")
# -----------
# ADDING THE "INFORMATION" TAB
# -----------
with gr.Tab("Information"):
with gr.Row():
# Accordion for Featured Models
with gr.Accordion("Featured Models", open=False):
gr.HTML(
"""
<table style="width:100%; text-align:center; margin:auto;">
<tr>
<th>Model Name</th>
<th>Typography</th>
<th>Notes</th>
</tr>
<tr>
<td>meta-llama/Llama-3.3-70B-Instruct</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>meta-llama/Llama-3.2-3B-Instruct</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>meta-llama/Llama-3.2-1B-Instruct</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>meta-llama/Llama-3.1-8B-Instruct</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>NousResearch/Hermes-3-Llama-3.1-8B</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>google/gemma-2-27b-it</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>google/gemma-2-9b-it</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>google/gemma-2-2b-it</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>mistralai/Mistral-Nemo-Instruct-2407</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>mistralai/Mixtral-8x7B-Instruct-v0.1</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>mistralai/Mistral-7B-Instruct-v0.3</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>Qwen/Qwen2.5-72B-Instruct</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>Qwen/QwQ-32B-Preview</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>PowerInfer/SmallThinker-3B-Preview</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>HuggingFaceTB/SmolLM2-1.7B-Instruct</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>TinyLlama/TinyLlama-1.1B-Chat-v1.0</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td>microsoft/Phi-3.5-mini-instruct</td>
<td>✅</td>
<td></td>
</tr>
</table>
"""
)
# Accordion for Parameters Overview
with gr.Accordion("Parameters Overview", open=False):
gr.Markdown(
"""
## System Message
###### This box is for setting the initial context or instructions for the AI. It helps guide the AI on how to respond to your inputs.
## Max New Tokens
###### This slider allows you to specify the maximum number of tokens (words or parts of words) the AI can generate in a single response. The default value is 512, and the maximum is 4096.
## Temperature
###### Temperature controls the randomness of the AI's responses. A higher temperature makes the responses more creative and varied, while a lower temperature makes them more predictable and focused. The default value is 0.7.
## Top-P (Nucleus Sampling)
###### Top-P sampling is another way to control the diversity of the AI's responses. It ensures that the AI only considers the most likely tokens up to a cumulative probability of P. The default value is 0.95.
## Frequency Penalty
###### This penalty discourages the AI from repeating the same tokens (words or phrases) in its responses. A higher penalty reduces repetition. The default value is 0.0.
## Seed
###### The seed is a number that ensures the reproducibility of the AI's responses. If you set a specific seed, the AI will generate the same response every time for the same input. If you set it to -1, the AI will generate a random seed each time.
## Custom Model
###### You can specify a custom Hugging Face model path here. This will override any selected featured model. This is optional and allows you to use models not listed in the featured models.
### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating!
"""
)
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch() |