Nymbo commited on
Commit
c5a20a4
·
verified ·
1 Parent(s): fab24df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -187
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
- import os
3
  from openai import OpenAI
 
4
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -22,8 +22,7 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- custom_model,
26
- featured_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
@@ -35,8 +34,7 @@ def respond(
35
  - top_p: top-p (nucleus) sampling
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
- - custom_model: a user-provided custom model name (if any)
39
- - featured_model: the user-selected model from the radio
40
  """
41
 
42
  print(f"Received message: {message}")
@@ -45,16 +43,15 @@ def respond(
45
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
  print(f"Custom model: {custom_model}")
48
- print(f"Featured model: {featured_model}")
49
 
50
- # Convert seed to None if -1 (meaning "random")
51
  if seed == -1:
52
  seed = None
53
 
54
- # Construct the conversation array required by the HF Inference API
55
- messages = [{"role": "system", "content": system_message or ""}]
56
 
57
- # Add conversation history
58
  for val in history:
59
  user_part = val[0]
60
  assistant_part = val[1]
@@ -65,26 +62,22 @@ def respond(
65
  messages.append({"role": "assistant", "content": assistant_part})
66
  print(f"Added assistant message to context: {assistant_part}")
67
 
68
- # The latest user message
69
  messages.append({"role": "user", "content": message})
70
 
71
- # If custom_model is not empty, it overrides the featured model
72
- model_to_use = custom_model.strip() if custom_model.strip() != "" else featured_model.strip()
73
- # If somehow both are empty, default to an example model
74
- if model_to_use == "":
75
- model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
76
-
77
  print(f"Model selected for inference: {model_to_use}")
78
 
79
- # Build the response from the streaming tokens
80
  response = ""
81
  print("Sending request to OpenAI API.")
82
 
83
- # Streaming request to the HF Inference API
84
  for message_chunk in client.chat.completions.create(
85
- model=model_to_use,
86
  max_tokens=max_tokens,
87
- stream=True,
88
  temperature=temperature,
89
  top_p=top_p,
90
  frequency_penalty=frequency_penalty,
@@ -95,178 +88,68 @@ def respond(
95
  token_text = message_chunk.choices[0].delta.content
96
  print(f"Received token: {token_text}")
97
  response += token_text
98
- # Yield partial response so Gradio can display in real-time
99
  yield response
100
 
101
  print("Completed response generation.")
102
 
103
- #
104
- # Building the Gradio interface below
105
- #
106
- print("Building the Gradio interface with advanced features...")
107
-
108
- # --- Create a list of 'Featured Models' for demonstration. You can customize as you wish. ---
109
- models_list = (
110
- "meta-llama/Llama-3.3-70B-Instruct",
111
- "BigScience/bloom",
112
- "openai/gpt-4",
113
- "google/flan-t5-xxl",
114
- "EleutherAI/gpt-j-6B",
115
- "YourSpecialModel/awesome-13B",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  )
 
117
 
118
- # This function filters the above models_list by a given search term:
119
- def filter_models(search_term):
120
- filtered = [m for m in models_list if search_term.lower() in m.lower()]
121
- return gr.update(choices=filtered)
122
-
123
- # We’ll create a Chatbot in a Blocks layout to incorporate an Accordion for "Featured Models"
124
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
125
- gr.Markdown("## Serverless-TextGen-Hub\nA comprehensive UI for text generation, including featured models and custom model overrides.")
126
-
127
- # The Chatbot itself
128
- chatbot = gr.Chatbot(label="TextGen Chatbot", height=600)
129
-
130
- with gr.Row():
131
- with gr.Column(scale=1):
132
- # We create interactive UI elements that will feed into the 'respond' function
133
-
134
- # System message
135
- system_message = gr.Textbox(label="System message", placeholder="Set the system role instructions here.")
136
-
137
- # Accordion for selecting the model
138
- with gr.Accordion("Featured Models", open=True):
139
- model_search = gr.Textbox(
140
- label="Filter Models",
141
- placeholder="Search for a featured model...",
142
- lines=1
143
- )
144
- featured_model = gr.Radio(
145
- label="Select a Featured Model Below",
146
- choices=models_list,
147
- value="meta-llama/Llama-3.3-70B-Instruct", # default
148
- interactive=True,
149
- )
150
- # Link the search box to filter the radio model choices
151
- model_search.change(filter_models, inputs=model_search, outputs=featured_model)
152
-
153
- # A text box to optionally override the featured model
154
- custom_model = gr.Textbox(
155
- label="Custom Model",
156
- info="(Optional) Provide a custom HF model path. If non-empty, it overrides your featured model choice."
157
- )
158
-
159
- # Sliders
160
- max_tokens = gr.Slider(
161
- minimum=1,
162
- maximum=4096,
163
- value=512,
164
- step=1,
165
- label="Max new tokens"
166
- )
167
- temperature = gr.Slider(
168
- minimum=0.1,
169
- maximum=4.0,
170
- value=0.7,
171
- step=0.1,
172
- label="Temperature"
173
- )
174
- top_p = gr.Slider(
175
- minimum=0.1,
176
- maximum=1.0,
177
- value=0.95,
178
- step=0.05,
179
- label="Top-P"
180
- )
181
- frequency_penalty = gr.Slider(
182
- minimum=-2.0,
183
- maximum=2.0,
184
- value=0.0,
185
- step=0.1,
186
- label="Frequency Penalty"
187
- )
188
- seed = gr.Slider(
189
- minimum=-1,
190
- maximum=65535,
191
- value=-1,
192
- step=1,
193
- label="Seed (-1 for random)"
194
- )
195
-
196
- # The "chat" Column
197
- with gr.Column(scale=2):
198
- # We store the conversation history in a state variable
199
- state = gr.State([]) # Each element in state is (user_message, assistant_message)
200
-
201
- # Chat input box for the user
202
- with gr.Row():
203
- txt = gr.Textbox(
204
- label="Enter your message",
205
- placeholder="Type your request here, then press 'Submit'",
206
- lines=3
207
- )
208
-
209
- # Button to submit the message
210
- submit_btn = gr.Button("Submit", variant="primary")
211
-
212
- #
213
- # The 'respond' function is tied to the chatbot display.
214
- # We'll define a small wrapper that updates the 'history' (state) each time.
215
- #
216
-
217
- def user_submit(user_message, chat_history):
218
- """
219
- This function just adds the user message to the history and returns it.
220
- The actual text generation will come from 'bot_respond' next.
221
- """
222
- # Append new user message to the existing conversation
223
- chat_history = chat_history + [(user_message, None)]
224
- return "", chat_history
225
-
226
- def bot_respond(chat_history, sys_msg, max_t, temp, top, freq_pen, s, custom_mod, feat_model):
227
- """
228
- This function calls our 'respond' generator to get the text.
229
- It updates the last message in chat_history with the bot's response as it streams.
230
- """
231
- user_message = chat_history[-1][0] if len(chat_history) > 0 else ""
232
- # We call the generator
233
- bot_messages = respond(
234
- user_message,
235
- chat_history[:-1], # all but the last user message
236
- sys_msg,
237
- max_t,
238
- temp,
239
- top,
240
- freq_pen,
241
- s,
242
- custom_mod,
243
- feat_model,
244
- )
245
-
246
- # Stream the tokens back
247
- final_bot_msg = ""
248
- for token_text in bot_messages:
249
- final_bot_msg = token_text
250
- # We'll update the chatbot in real-time
251
- chat_history[-1] = (user_message, final_bot_msg)
252
- yield chat_history
253
-
254
- # Tie the Submit button to the user_submit function, and then to bot_respond
255
- submit_btn.click(
256
- user_submit,
257
- inputs=[txt, state],
258
- outputs=[txt, state],
259
- queue=False
260
- ).then(
261
- bot_respond,
262
- inputs=[state, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, custom_model, featured_model],
263
- outputs=[chatbot],
264
- queue=True
265
- )
266
-
267
- print("Interface construction complete. Ready to launch!")
268
-
269
- # Launch the Gradio Blocks interface
270
  if __name__ == "__main__":
271
  print("Launching the demo application.")
272
  demo.launch()
 
1
  import gradio as gr
 
2
  from openai import OpenAI
3
+ import os
4
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ custom_model
 
26
  ):
27
  """
28
  This function handles the chatbot response. It takes in:
 
34
  - top_p: top-p (nucleus) sampling
35
  - frequency_penalty: penalize repeated tokens in the output
36
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
+ - custom_model: the user-provided custom model name (if any)
 
38
  """
39
 
40
  print(f"Received message: {message}")
 
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
  print(f"Custom model: {custom_model}")
 
46
 
47
+ # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
49
  seed = None
50
 
51
+ # Construct the messages array required by the API
52
+ messages = [{"role": "system", "content": system_message}]
53
 
54
+ # Add conversation history to the context
55
  for val in history:
56
  user_part = val[0]
57
  assistant_part = val[1]
 
62
  messages.append({"role": "assistant", "content": assistant_part})
63
  print(f"Added assistant message to context: {assistant_part}")
64
 
65
+ # Append the latest user message
66
  messages.append({"role": "user", "content": message})
67
 
68
+ # Determine which model to use: either custom_model or a default
69
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
 
 
 
 
70
  print(f"Model selected for inference: {model_to_use}")
71
 
72
+ # Start with an empty string to build the response as tokens stream in
73
  response = ""
74
  print("Sending request to OpenAI API.")
75
 
76
+ # Make the streaming request to the HF Inference API via openai-like client
77
  for message_chunk in client.chat.completions.create(
78
+ model=model_to_use, # Use either the user-provided custom model or default
79
  max_tokens=max_tokens,
80
+ stream=True, # Stream the response
81
  temperature=temperature,
82
  top_p=top_p,
83
  frequency_penalty=frequency_penalty,
 
88
  token_text = message_chunk.choices[0].delta.content
89
  print(f"Received token: {token_text}")
90
  response += token_text
91
+ # Yield the partial response to Gradio so it can display in real-time
92
  yield response
93
 
94
  print("Completed response generation.")
95
 
96
+ # Create a Chatbot component with a specified height
97
+ chatbot = gr.Chatbot(height=600)
98
+ print("Chatbot interface created.")
99
+
100
+ # Create the Gradio ChatInterface
101
+ # We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
102
+ demo = gr.ChatInterface(
103
+ fn=respond,
104
+ additional_inputs=[
105
+ gr.Textbox(value="", label="System message"),
106
+ gr.Slider(
107
+ minimum=1,
108
+ maximum=4096,
109
+ value=512,
110
+ step=1,
111
+ label="Max new tokens"
112
+ ),
113
+ gr.Slider(
114
+ minimum=0.1,
115
+ maximum=4.0,
116
+ value=0.7,
117
+ step=0.1,
118
+ label="Temperature"
119
+ ),
120
+ gr.Slider(
121
+ minimum=0.1,
122
+ maximum=1.0,
123
+ value=0.95,
124
+ step=0.05,
125
+ label="Top-P"
126
+ ),
127
+ gr.Slider(
128
+ minimum=-2.0,
129
+ maximum=2.0,
130
+ value=0.0,
131
+ step=0.1,
132
+ label="Frequency Penalty"
133
+ ),
134
+ gr.Slider(
135
+ minimum=-1,
136
+ maximum=65535, # Arbitrary upper limit for demonstration
137
+ value=-1,
138
+ step=1,
139
+ label="Seed (-1 for random)"
140
+ ),
141
+ gr.Textbox(
142
+ value="",
143
+ label="Custom Model",
144
+ info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
145
+ ),
146
+ ],
147
+ fill_height=True,
148
+ chatbot=chatbot,
149
+ theme="Nymbo/Nymbo_Theme",
150
  )
151
+ print("Gradio interface initialized.")
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  if __name__ == "__main__":
154
  print("Launching the demo application.")
155
  demo.launch()