Nymbo commited on
Commit
e7683ca
·
verified ·
1 Parent(s): 6a2e496

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -172
app.py CHANGED
@@ -13,25 +13,6 @@ client = OpenAI(
13
  )
14
  print("OpenAI client initialized.")
15
 
16
- # We'll define a list of placeholder featured models for demonstration.
17
- # In real usage, replace them with actual model names available on Hugging Face.
18
- models_list = [
19
- "meta-llama/Llama-3.1-8B-Instruct",
20
- "microsoft/Phi-3.5-mini-instruct",
21
- "mistralai/Mistral-7B-Instruct-v0.3",
22
- "Qwen/Qwen2.5-72B-Instruct"
23
- ]
24
-
25
- def filter_featured_models(search_term):
26
- """
27
- Filters the 'models_list' based on text entered in the search box.
28
- Returns a gr.update object that changes the choices available
29
- in the 'featured_models_radio'.
30
- """
31
- filtered = [m for m in models_list if search_term.lower() in m.lower()]
32
- return gr.update(choices=filtered)
33
-
34
-
35
  def respond(
36
  message,
37
  history: list[tuple[str, str]],
@@ -42,7 +23,7 @@ def respond(
42
  frequency_penalty,
43
  seed,
44
  custom_model,
45
- selected_model
46
  ):
47
  """
48
  This function handles the chatbot response. It takes in:
@@ -54,8 +35,8 @@ def respond(
54
  - top_p: top-p (nucleus) sampling
55
  - frequency_penalty: penalize repeated tokens in the output
56
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
57
- - custom_model: a custom Hugging Face model name (if any)
58
- - selected_model: a model name chosen from the featured models radio button
59
  """
60
 
61
  print(f"Received message: {message}")
@@ -64,12 +45,20 @@ def respond(
64
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
65
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
66
  print(f"Custom model: {custom_model}")
67
- print(f"Selected featured model: {selected_model}")
68
 
69
  # Convert seed to None if -1 (meaning random)
70
  if seed == -1:
71
  seed = None
72
 
 
 
 
 
 
 
 
 
73
  # Construct the messages array required by the API
74
  messages = [{"role": "system", "content": system_message}]
75
 
@@ -87,171 +76,234 @@ def respond(
87
  # Append the latest user message
88
  messages.append({"role": "user", "content": message})
89
 
90
- # Decide which model to use:
91
- # 1) If the user provided a custom model, use it.
92
- # 2) Else if they chose a featured model, use it.
93
- # 3) Otherwise, fall back to a default model.
94
- if custom_model.strip() != "":
95
- model_to_use = custom_model.strip()
96
- elif selected_model is not None and selected_model.strip() != "":
97
- model_to_use = selected_model.strip()
98
- else:
99
- model_to_use = "meta-llama/Llama-3.3-70B-Instruct" # Default fallback
100
-
101
- print(f"Model selected for inference: {model_to_use}")
102
-
103
  # Start with an empty string to build the response as tokens stream in
104
  response = ""
105
  print("Sending request to OpenAI API.")
106
 
107
- # Make the streaming request to the HF Inference API via openai-like client
108
- for message_chunk in client.chat.completions.create(
109
- model=model_to_use,
110
- max_tokens=max_tokens,
111
- stream=True,
112
- temperature=temperature,
113
- top_p=top_p,
114
- frequency_penalty=frequency_penalty,
115
- seed=seed,
116
- messages=messages,
117
- ):
118
- # Extract the token text from the response chunk
119
- token_text = message_chunk.choices[0].delta.content
120
- print(f"Received token: {token_text}")
121
- response += token_text
122
- # Yield the partial response to Gradio so it can display in real-time
123
- yield response
 
 
 
 
124
 
125
  print("Completed response generation.")
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- ########################
129
- # GRADIO APP LAYOUT
130
- ########################
131
-
132
- # We’ll build a custom Blocks layout so we can have:
133
- # - A Featured Models accordion with a search box
134
- # - Our ChatInterface to handle the conversation
135
- # - Additional sliders and textboxes for settings (like the original code)
136
- ########################
137
-
138
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
139
- gr.Markdown("## Serverless Text Generation Hub")
140
  gr.Markdown(
141
- "An all-in-one UI for chatting with text-generation models on Hugging Face's Inference API."
142
- )
143
-
144
- # We keep a Chatbot component for the conversation display
145
- chatbot = gr.Chatbot(height=600, label="Chat Preview")
146
-
147
- # Textbox for system message
148
- system_message_box = gr.Textbox(
149
- value="",
150
- label="System Message",
151
- placeholder="Enter a system prompt if you want (optional).",
152
- )
153
-
154
- # Slider for max_tokens
155
- max_tokens_slider = gr.Slider(
156
- minimum=1,
157
- maximum=4096,
158
- value=512,
159
- step=1,
160
- label="Max new tokens",
161
- )
162
-
163
- # Slider for temperature
164
- temperature_slider = gr.Slider(
165
- minimum=0.1,
166
- maximum=4.0,
167
- value=0.7,
168
- step=0.1,
169
- label="Temperature",
170
- )
171
-
172
- # Slider for top_p
173
- top_p_slider = gr.Slider(
174
- minimum=0.1,
175
- maximum=1.0,
176
- value=0.95,
177
- step=0.05,
178
- label="Top-P",
179
- )
180
-
181
- # Slider for frequency penalty
182
- freq_penalty_slider = gr.Slider(
183
- minimum=-2.0,
184
- maximum=2.0,
185
- value=0.0,
186
- step=0.1,
187
- label="Frequency Penalty",
188
- )
189
-
190
- # Slider for seed
191
- seed_slider = gr.Slider(
192
- minimum=-1,
193
- maximum=65535, # Arbitrary upper limit for demonstration
194
- value=-1,
195
- step=1,
196
- label="Seed (-1 for random)",
197
- )
198
-
199
- # Custom Model textbox
200
- custom_model_box = gr.Textbox(
201
- value="",
202
- label="Custom Model",
203
- info="(Optional) Provide a custom Hugging Face model path. This will override the selected Featured Model if not empty."
204
  )
205
-
206
- # Accordion for featured models
207
- with gr.Accordion("Featured Models", open=False):
208
- # Textbox for filtering the featured models
209
- model_search_box = gr.Textbox(
210
- label="Filter Models",
211
- placeholder="Search for a featured model...",
212
- lines=1,
 
 
 
 
213
  )
214
- # Radio for selecting the desired model
215
- featured_models_radio = gr.Radio(
216
- label="Select a featured model below",
217
- choices=models_list, # Start with the entire list
218
- value=None, # No default
219
- interactive=True
 
220
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
- # We connect the model_search_box to the filter function
223
- model_search_box.change(
224
- filter_featured_models,
225
- inputs=model_search_box,
226
- outputs=featured_models_radio
 
 
 
 
 
 
 
 
 
 
 
 
227
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
- # Now we create our ChatInterface
230
- # We pass all the extra components as additional_inputs
231
- interface = gr.ChatInterface(
232
- fn=respond,
233
- chatbot=chatbot,
234
- additional_inputs=[
235
- system_message_box,
236
- max_tokens_slider,
237
- temperature_slider,
238
- top_p_slider,
239
- freq_penalty_slider,
240
- seed_slider,
241
- custom_model_box,
242
- featured_models_radio
 
 
 
 
243
  ],
244
- theme="Nymbo/Nymbo_Theme",
245
- title="Serverless TextGen Hub with Featured Models",
246
- description=(
247
- "Use the sliders and textboxes to control generation parameters. "
248
- "Pick a model from 'Featured Models' or specify a custom model path."
249
- ),
250
- # Fill the screen height
251
- fill_height=True
252
  )
253
 
254
- # If you want the script to be directly executable, launch the demo here:
255
- if __name__ == "__main__":
256
- print("Launching the demo application...")
257
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  )
14
  print("OpenAI client initialized.")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def respond(
17
  message,
18
  history: list[tuple[str, str]],
 
23
  frequency_penalty,
24
  seed,
25
  custom_model,
26
+ selected_featured_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
 
35
  - top_p: top-p (nucleus) sampling
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
+ - custom_model: the user-provided custom model name (if any)
39
+ - selected_featured_model: the model selected from featured models
40
  """
41
 
42
  print(f"Received message: {message}")
 
45
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
  print(f"Custom model: {custom_model}")
48
+ print(f"Selected featured model: {selected_featured_model}")
49
 
50
  # Convert seed to None if -1 (meaning random)
51
  if seed == -1:
52
  seed = None
53
 
54
+ # Determine which model to use: either custom_model or selected featured model
55
+ if custom_model.strip() != "":
56
+ model_to_use = custom_model.strip()
57
+ print(f"Using Custom Model: {model_to_use}")
58
+ else:
59
+ model_to_use = selected_featured_model
60
+ print(f"Using Featured Model: {model_to_use}")
61
+
62
  # Construct the messages array required by the API
63
  messages = [{"role": "system", "content": system_message}]
64
 
 
76
  # Append the latest user message
77
  messages.append({"role": "user", "content": message})
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Start with an empty string to build the response as tokens stream in
80
  response = ""
81
  print("Sending request to OpenAI API.")
82
 
83
+ try:
84
+ # Make the streaming request to the HF Inference API via openai-like client
85
+ for message_chunk in client.chat.completions.create(
86
+ model=model_to_use, # Use either the user-provided custom model or selected featured model
87
+ max_tokens=max_tokens,
88
+ stream=True, # Stream the response
89
+ temperature=temperature,
90
+ top_p=top_p,
91
+ frequency_penalty=frequency_penalty,
92
+ seed=seed,
93
+ messages=messages,
94
+ ):
95
+ # Extract the token text from the response chunk
96
+ token_text = message_chunk.choices[0].delta.content
97
+ print(f"Received token: {token_text}")
98
+ response += token_text
99
+ # Yield the partial response to Gradio so it can display in real-time
100
+ yield response
101
+ except Exception as e:
102
+ print(f"Error during API call: {e}")
103
+ yield f"An error occurred: {e}"
104
 
105
  print("Completed response generation.")
106
 
107
+ # Create a Chatbot component with a specified height
108
+ chatbot = gr.Chatbot(height=600)
109
+ print("Chatbot interface created.")
110
+
111
+ # Placeholder featured models list
112
+ FEATURED_MODELS_LIST = [
113
+ "gpt-3.5-turbo",
114
+ "gpt-4",
115
+ "bert-base-uncased",
116
+ "facebook/blenderbot-3B",
117
+ "EleutherAI/gpt-neo-2.7B",
118
+ "google/flan-t5-xxl",
119
+ "microsoft/DialoGPT-large",
120
+ "Salesforce/codegen-16B-multi",
121
+ "stabilityai/stablelm-tuned-alpha-7b",
122
+ "bigscience/bloom-560m",
123
+ ]
124
 
125
+ # Define the Gradio Blocks interface
 
 
 
 
 
 
 
 
 
126
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
127
+ gr.Markdown("# Serverless-TextGen-Hub 📝🤖")
128
  gr.Markdown(
129
+ """
130
+ Welcome to the **Serverless-TextGen-Hub**! Chat with your favorite models seamlessly.
131
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  )
133
+
134
+ with gr.Row():
135
+ # Chatbot component
136
+ chatbot_component = gr.Chatbot(height=600)
137
+
138
+ with gr.Row():
139
+ # System message input
140
+ system_message = gr.Textbox(
141
+ value="You are a helpful assistant.",
142
+ label="System Message",
143
+ placeholder="Enter system message here...",
144
+ lines=2,
145
  )
146
+
147
+ with gr.Row():
148
+ # User message input
149
+ user_message = gr.Textbox(
150
+ label="Your Message",
151
+ placeholder="Type your message here...",
152
+ lines=2,
153
  )
154
+ # Run button
155
+ run_button = gr.Button("Send", variant="primary")
156
+
157
+ with gr.Row():
158
+ # Additional settings
159
+ with gr.Column(scale=1):
160
+ max_tokens = gr.Slider(
161
+ minimum=1,
162
+ maximum=4096,
163
+ value=512,
164
+ step=1,
165
+ label="Max New Tokens",
166
+ )
167
+ temperature = gr.Slider(
168
+ minimum=0.1,
169
+ maximum=4.0,
170
+ value=0.7,
171
+ step=0.1,
172
+ label="Temperature",
173
+ )
174
+ top_p = gr.Slider(
175
+ minimum=0.1,
176
+ maximum=1.0,
177
+ value=0.95,
178
+ step=0.05,
179
+ label="Top-P",
180
+ )
181
+ frequency_penalty = gr.Slider(
182
+ minimum=-2.0,
183
+ maximum=2.0,
184
+ value=0.0,
185
+ step=0.1,
186
+ label="Frequency Penalty",
187
+ )
188
+ seed = gr.Slider(
189
+ minimum=-1,
190
+ maximum=65535, # Arbitrary upper limit for demonstration
191
+ value=-1,
192
+ step=1,
193
+ label="Seed (-1 for random)",
194
+ )
195
+ custom_model = gr.Textbox(
196
+ value="",
197
+ label="Custom Model",
198
+ info="(Optional) Provide a custom Hugging Face model path. This will override the selected featured model if not empty.",
199
+ placeholder="e.g., meta-llama/Llama-3.3-70B-Instruct",
200
+ )
201
+
202
+ with gr.Accordion("Featured Models", open=True):
203
+ with gr.Column():
204
+ model_search = gr.Textbox(
205
+ label="Filter Models",
206
+ placeholder="Search for a featured model...",
207
+ lines=1,
208
+ )
209
+ featured_model = gr.Radio(
210
+ label="Select a model below",
211
+ value=FEATURED_MODELS_LIST[0],
212
+ choices=FEATURED_MODELS_LIST,
213
+ interactive=True,
214
+ )
215
+
216
+ # Function to filter featured models based on search input
217
+ def filter_featured_models(search_term):
218
+ if not search_term:
219
+ return gr.update(choices=FEATURED_MODELS_LIST, value=FEATURED_MODELS_LIST[0])
220
+ filtered = [model for model in FEATURED_MODELS_LIST if search_term.lower() in model.lower()]
221
+ if not filtered:
222
+ return gr.update(choices=[], value=None)
223
+ return gr.update(choices=filtered, value=filtered[0])
224
+
225
+ # Update featured_model choices based on search
226
+ model_search.change(
227
+ fn=filter_featured_models,
228
+ inputs=model_search,
229
+ outputs=featured_model,
230
+ )
231
 
232
+ # Function to handle the chatbot response
233
+ def handle_response(message, history, system_msg, max_tok, temp, tp, freq_pen, sd, custom_mod, selected_feat_mod):
234
+ # Append user message to history
235
+ history = history or []
236
+ history.append((message, None))
237
+ # Generate response using the respond function
238
+ response = respond(
239
+ message=message,
240
+ history=history,
241
+ system_message=system_msg,
242
+ max_tokens=max_tok,
243
+ temperature=temp,
244
+ top_p=tp,
245
+ frequency_penalty=freq_pen,
246
+ seed=sd,
247
+ custom_model=custom_mod,
248
+ selected_featured_model=selected_feat_mod,
249
  )
250
+ return response, history + [(message, response)]
251
+
252
+ # Handle button click
253
+ run_button.click(
254
+ fn=handle_response,
255
+ inputs=[
256
+ user_message,
257
+ chatbot_component, # history
258
+ system_message,
259
+ max_tokens,
260
+ temperature,
261
+ top_p,
262
+ frequency_penalty,
263
+ seed,
264
+ custom_model,
265
+ featured_model,
266
+ ],
267
+ outputs=[
268
+ chatbot_component,
269
+ chatbot_component, # Updated history
270
+ ],
271
+ )
272
 
273
+ # Allow pressing Enter to send the message
274
+ user_message.submit(
275
+ fn=handle_response,
276
+ inputs=[
277
+ user_message,
278
+ chatbot_component, # history
279
+ system_message,
280
+ max_tokens,
281
+ temperature,
282
+ top_p,
283
+ frequency_penalty,
284
+ seed,
285
+ custom_model,
286
+ featured_model,
287
+ ],
288
+ outputs=[
289
+ chatbot_component,
290
+ chatbot_component, # Updated history
291
  ],
 
 
 
 
 
 
 
 
292
  )
293
 
294
+ # Custom CSS to enhance the UI
295
+ demo.load(lambda: None, None, None, _js="""
296
+ () => {
297
+ const style = document.createElement('style');
298
+ style.innerHTML = `
299
+ footer {visibility: hidden !important;}
300
+ .gradio-container {background-color: #f9f9f9;}
301
+ `;
302
+ document.head.appendChild(style);
303
+ }
304
+ """)
305
+
306
+ print("Launching Gradio interface...") # Debug log
307
+
308
+ # Launch the Gradio interface without showing the API or sharing externally
309
+ demo.launch(show_api=False, share=False)