Nymbo commited on
Commit
8696822
·
verified ·
1 Parent(s): f523c40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -73
app.py CHANGED
@@ -2,6 +2,10 @@ import gradio as gr
2
  from openai import OpenAI
3
  import os
4
 
 
 
 
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
  print("Access token loaded.")
@@ -13,6 +17,28 @@ client = OpenAI(
13
  )
14
  print("OpenAI client initialized.")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def respond(
17
  message,
18
  history: list[tuple[str, str]],
@@ -22,34 +48,32 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- custom_model
 
26
  ):
27
  """
28
  This function handles the chatbot response. It takes in:
29
  - message: the user's new message
30
  - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
31
  - system_message: the system prompt
32
- - max_tokens: the maximum number of tokens to generate in the response
33
- - temperature: sampling temperature
34
- - top_p: top-p (nucleus) sampling
35
- - frequency_penalty: penalize repeated tokens in the output
36
- - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
- - custom_model: the user-provided custom model name (if any)
38
  """
39
-
40
  print(f"Received message: {message}")
41
  print(f"History: {history}")
42
  print(f"System message: {system_message}")
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
  print(f"Custom model: {custom_model}")
 
46
 
47
  # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
49
  seed = None
50
 
51
  # Construct the messages array required by the API
52
- messages = [{"role": "system", "content": system_message}]
53
 
54
  # Add conversation history to the context
55
  for val in history:
@@ -65,19 +89,27 @@ def respond(
65
  # Append the latest user message
66
  messages.append({"role": "user", "content": message})
67
 
68
- # Determine which model to use: either custom_model or a default
69
- model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
 
 
 
 
 
 
 
 
70
  print(f"Model selected for inference: {model_to_use}")
71
 
72
- # Start with an empty string to build the response as tokens stream in
73
  response = ""
74
  print("Sending request to OpenAI API.")
75
 
76
  # Make the streaming request to the HF Inference API via openai-like client
77
  for message_chunk in client.chat.completions.create(
78
- model=model_to_use, # Use either the user-provided custom model or default
79
  max_tokens=max_tokens,
80
- stream=True, # Stream the response
81
  temperature=temperature,
82
  top_p=top_p,
83
  frequency_penalty=frequency_penalty,
@@ -86,70 +118,168 @@ def respond(
86
  ):
87
  # Extract the token text from the response chunk
88
  token_text = message_chunk.choices[0].delta.content
89
- print(f"Received token: {token_text}")
90
  response += token_text
91
  # Yield the partial response to Gradio so it can display in real-time
92
  yield response
93
 
94
  print("Completed response generation.")
95
 
96
- # Create a Chatbot component with a specified height
97
- chatbot = gr.Chatbot(height=600)
98
- print("Chatbot interface created.")
99
-
100
- # Create the Gradio ChatInterface
101
- # We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
102
- demo = gr.ChatInterface(
103
- fn=respond,
104
- additional_inputs=[
105
- gr.Textbox(value="", label="System message"),
106
- gr.Slider(
107
- minimum=1,
108
- maximum=4096,
109
- value=512,
110
- step=1,
111
- label="Max new tokens"
112
- ),
113
- gr.Slider(
114
- minimum=0.1,
115
- maximum=4.0,
116
- value=0.7,
117
- step=0.1,
118
- label="Temperature"
119
- ),
120
- gr.Slider(
121
- minimum=0.1,
122
- maximum=1.0,
123
- value=0.95,
124
- step=0.05,
125
- label="Top-P"
126
- ),
127
- gr.Slider(
128
- minimum=-2.0,
129
- maximum=2.0,
130
- value=0.0,
131
- step=0.1,
132
- label="Frequency Penalty"
133
- ),
134
- gr.Slider(
135
- minimum=-1,
136
- maximum=65535, # Arbitrary upper limit for demonstration
137
- value=-1,
138
- step=1,
139
- label="Seed (-1 for random)"
140
- ),
141
- gr.Textbox(
142
- value="",
143
- label="Custom Model",
144
- info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
145
- ),
146
- ],
147
- fill_height=True,
148
- chatbot=chatbot,
149
- theme="Nymbo/Nymbo_Theme",
150
- )
151
- print("Gradio interface initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
 
153
  if __name__ == "__main__":
154
- print("Launching the demo application.")
155
- demo.launch()
 
 
2
  from openai import OpenAI
3
  import os
4
 
5
+ # =============================
6
+ # GLOBAL SETUP / CLIENT
7
+ # =============================
8
+
9
  # Retrieve the access token from the environment variable
10
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
11
  print("Access token loaded.")
 
17
  )
18
  print("OpenAI client initialized.")
19
 
20
+ # =============================
21
+ # MODEL CONFIG / LOGIC
22
+ # =============================
23
+
24
+ # Sample placeholder list of "featured" models for demonstration
25
+ featured_models_list = [
26
+ "meta-llama/Llama-2-13B-chat-hf",
27
+ "bigscience/bloom",
28
+ "microsoft/DialoGPT-large",
29
+ "OpenAssistant/oasst-sft-1-pythia-12b",
30
+ "tiiuae/falcon-7b-instruct",
31
+ "meta-llama/Llama-3.3-70B-Instruct"
32
+ ]
33
+
34
+ def filter_featured_models(search_term: str):
35
+ """
36
+ Returns a list of models that contain the search term (case-insensitive).
37
+ """
38
+ filtered = [m for m in featured_models_list if search_term.lower() in m.lower()]
39
+ return gr.update(choices=filtered)
40
+
41
+
42
  def respond(
43
  message,
44
  history: list[tuple[str, str]],
 
48
  top_p,
49
  frequency_penalty,
50
  seed,
51
+ custom_model,
52
+ selected_featured_model
53
  ):
54
  """
55
  This function handles the chatbot response. It takes in:
56
  - message: the user's new message
57
  - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
58
  - system_message: the system prompt
59
+ - max_tokens, temperature, top_p, frequency_penalty, seed: generation params
60
+ - custom_model: user-provided custom model path/name
61
+ - selected_featured_model: model chosen from the featured radio list
 
 
 
62
  """
 
63
  print(f"Received message: {message}")
64
  print(f"History: {history}")
65
  print(f"System message: {system_message}")
66
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
67
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
68
  print(f"Custom model: {custom_model}")
69
+ print(f"Selected featured model: {selected_featured_model}")
70
 
71
  # Convert seed to None if -1 (meaning random)
72
  if seed == -1:
73
  seed = None
74
 
75
  # Construct the messages array required by the API
76
+ messages = [{"role": "system", "content": system_message}] if system_message.strip() else []
77
 
78
  # Add conversation history to the context
79
  for val in history:
 
89
  # Append the latest user message
90
  messages.append({"role": "user", "content": message})
91
 
92
+ # Determine which model to use:
93
+ # 1) If custom_model is non-empty, it overrides everything.
94
+ # 2) Otherwise, use the selected featured model from the radio button if available.
95
+ # 3) If both are empty, fall back to the default.
96
+ model_to_use = "meta-llama/Llama-3.3-70B-Instruct" # Default
97
+ if custom_model.strip() != "":
98
+ model_to_use = custom_model.strip()
99
+ elif selected_featured_model.strip() != "":
100
+ model_to_use = selected_featured_model.strip()
101
+
102
  print(f"Model selected for inference: {model_to_use}")
103
 
104
+ # Start building the streaming response
105
  response = ""
106
  print("Sending request to OpenAI API.")
107
 
108
  # Make the streaming request to the HF Inference API via openai-like client
109
  for message_chunk in client.chat.completions.create(
110
+ model=model_to_use,
111
  max_tokens=max_tokens,
112
+ stream=True, # Stream the response
113
  temperature=temperature,
114
  top_p=top_p,
115
  frequency_penalty=frequency_penalty,
 
118
  ):
119
  # Extract the token text from the response chunk
120
  token_text = message_chunk.choices[0].delta.content
121
+ print(f"Received token: {token_text}", flush=True)
122
  response += token_text
123
  # Yield the partial response to Gradio so it can display in real-time
124
  yield response
125
 
126
  print("Completed response generation.")
127
 
128
+ # =============================
129
+ # MAIN UI
130
+ # =============================
131
+
132
+ def build_app():
133
+ """
134
+ Build the Gradio Blocks interface containing:
135
+ - A Chat tab (ChatInterface)
136
+ - A Featured Models tab
137
+ - An Information tab
138
+ """
139
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as main_interface:
140
+
141
+ # We define a Gr.State to hold the user's chosen featured model
142
+ selected_featured_model_state = gr.State("")
143
+
144
+ with gr.Tab("Chat Interface"):
145
+ gr.Markdown("## Serverless-TextGen-Hub")
146
+
147
+ # Here we embed the ChatInterface for streaming conversation
148
+ # We add extra inputs for "Selected Featured Model" as hidden,
149
+ # so the user can't directly edit but it flows into respond().
150
+ demo = gr.ChatInterface(
151
+ fn=respond,
152
+ additional_inputs=[
153
+ gr.Textbox(value="", label="System message", lines=2),
154
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
155
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
156
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
157
+ gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
158
+ gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
159
+ gr.Textbox(value="", label="Custom Model", info="(Optional) Provide a custom HF model path"),
160
+ gr.Textbox(value="", label="Selected Featured Model (from tab)", visible=False),
161
+ ],
162
+ fill_height=True,
163
+ chatbot=gr.Chatbot(height=600),
164
+ theme="Nymbo/Nymbo_Theme",
165
+ )
166
+
167
+ # We want to connect the selected_featured_model_state to that hidden text box
168
+ def set_featured_model_in_chatbox(val):
169
+ return val
170
+
171
+ # Whenever the selected_featured_model_state changes, update the hidden field in the ChatInterface
172
+ selected_featured_model_state.change(
173
+ fn=set_featured_model_in_chatbox,
174
+ inputs=selected_featured_model_state,
175
+ outputs=demo.additional_inputs[-1], # The last additional input is the "Selected Featured Model"
176
+ )
177
+
178
+ # ==========================
179
+ # Featured Models Tab
180
+ # ==========================
181
+ with gr.Tab("Featured Models"):
182
+ gr.Markdown("### Choose from our Featured Models")
183
+
184
+ # A text box for searching/filtering
185
+ model_search = gr.Textbox(
186
+ label="Filter Models",
187
+ placeholder="Search for a featured model..."
188
+ )
189
+
190
+ # A radio component listing the featured models (default to first)
191
+ model_radio = gr.Radio(
192
+ choices=featured_models_list,
193
+ label="Select a model below",
194
+ value=featured_models_list[0],
195
+ interactive=True
196
+ )
197
+
198
+ # Define how to update the radio choices when the search box changes
199
+ model_search.change(
200
+ fn=filter_featured_models,
201
+ inputs=model_search,
202
+ outputs=model_radio
203
+ )
204
+
205
+ # Button to confirm the selection
206
+ def select_featured_model(radio_val):
207
+ """
208
+ Updates the hidden state with the user-chosen featured model.
209
+ """
210
+ return radio_val
211
+
212
+ choose_btn = gr.Button("Use this Featured Model", variant="primary")
213
+
214
+ choose_btn.click(
215
+ fn=select_featured_model,
216
+ inputs=model_radio,
217
+ outputs=selected_featured_model_state
218
+ )
219
+
220
+ gr.Markdown(
221
+ """
222
+ **Tip**: If you type a Custom Model in the "Chat Interface" tab, it overrides the
223
+ featured model you selected here.
224
+ """
225
+ )
226
+
227
+ # ==========================
228
+ # Information Tab
229
+ # ==========================
230
+ with gr.Tab("Information"):
231
+ gr.Markdown("## Learn More About These Models and Parameters")
232
+
233
+ with gr.Accordion("Featured Models (Table)", open=False):
234
+ gr.HTML(
235
+ """
236
+ <p>Below is a small sample table showing some featured models.</p>
237
+ <table style="width:100%; text-align:center; margin:auto;">
238
+ <tr>
239
+ <th>Model Name</th>
240
+ <th>Type</th>
241
+ <th>Notes</th>
242
+ </tr>
243
+ <tr>
244
+ <td>meta-llama/Llama-2-13B-chat-hf</td>
245
+ <td>Chat</td>
246
+ <td>Good for multi-turn dialogue.</td>
247
+ </tr>
248
+ <tr>
249
+ <td>bigscience/bloom</td>
250
+ <td>Language Model</td>
251
+ <td>Large multilingual model.</td>
252
+ </tr>
253
+ <tr>
254
+ <td>microsoft/DialoGPT-large</td>
255
+ <td>Chat</td>
256
+ <td>Well-known smaller chat model.</td>
257
+ </tr>
258
+ </table>
259
+ """
260
+ )
261
+
262
+ with gr.Accordion("Parameters Overview", open=False):
263
+ gr.Markdown(
264
+ """
265
+ ### Explanation of Key Parameters
266
+
267
+ - **System Message**: Provides context or initial instructions to the model.
268
+ - **Max Tokens**: The maximum number of tokens (roughly pieces of words) in the generated response.
269
+ - **Temperature**: Higher values produce more random/creative outputs, while lower values make the output more focused and deterministic.
270
+ - **Top-P**: Controls nucleus sampling. The model considers only the tokens whose probability mass exceeds this value.
271
+ - **Frequency Penalty**: Penalizes repeated tokens. Positive values (like 1.0) reduce repetition in the output. Negative values can increase repetition.
272
+ - **Seed**: Determines reproducibility. Set it to a fixed integer for consistent results; `-1` is random each time.
273
+ - **Custom Model**: Overwrites the featured model. Provide the Hugging Face path (e.g., `openai/whisper-base`) for your own usage.
274
+
275
+ Use these settings to guide how the model generates text. If in doubt, stick to defaults and experiment in small increments.
276
+ """
277
+ )
278
+
279
+ return main_interface
280
 
281
+ # If run as a standalone script, just launch.
282
  if __name__ == "__main__":
283
+ print("Building and launching the Serverless-TextGen-Hub interface...")
284
+ ui = build_app()
285
+ ui.launch()