Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,14 +10,19 @@ client = Groq(api_key=os.environ.get("Groq_Api_Key"))
|
|
| 10 |
|
| 11 |
# llms
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def create_history_messages(history):
|
| 14 |
history_messages = [{"role": "user", "content": m[0]} for m in history]
|
| 15 |
history_messages.extend([{"role": "assistant", "content": m[1]} for m in history])
|
| 16 |
return history_messages
|
| 17 |
|
| 18 |
-
|
| 19 |
-
MAX_SEED = np.iinfo(np.int32).max
|
| 20 |
-
|
| 21 |
def generate_response(prompt, history, model, temperature, max_tokens, top_p, seed):
|
| 22 |
messages = create_history_messages(history)
|
| 23 |
messages.append({"role": "user", "content": prompt})
|
|
@@ -257,7 +262,19 @@ with gr.Blocks() as demo:
|
|
| 257 |
with gr.Tabs():
|
| 258 |
with gr.TabItem("LLMs"):
|
| 259 |
with gr.Row():
|
| 260 |
-
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
model = gr.Dropdown(
|
| 262 |
choices=[
|
| 263 |
"llama3-70b-8192",
|
|
@@ -279,7 +296,7 @@ with gr.Blocks() as demo:
|
|
| 279 |
)
|
| 280 |
max_tokens = gr.Slider(
|
| 281 |
minimum=1,
|
| 282 |
-
maximum=
|
| 283 |
step=1,
|
| 284 |
value=4096,
|
| 285 |
label="Max Tokens",
|
|
@@ -296,12 +313,7 @@ with gr.Blocks() as demo:
|
|
| 296 |
seed = gr.Number(
|
| 297 |
precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random"
|
| 298 |
)
|
| 299 |
-
|
| 300 |
-
chatbot_ui = gr.ChatInterface(
|
| 301 |
-
fn=generate_response,
|
| 302 |
-
chatbot=None,
|
| 303 |
-
additional_inputs=[model, temperature, max_tokens, top_p, seed],
|
| 304 |
-
)
|
| 305 |
with gr.TabItem("Speech To Text"):
|
| 306 |
with gr.Tabs():
|
| 307 |
with gr.TabItem("Transcription"):
|
|
|
|
| 10 |
|
| 11 |
# llms
|
| 12 |
|
| 13 |
+
MAX_SEED = np.iinfo(np.int32).max
|
| 14 |
+
|
| 15 |
+
def update_max_tokens(model):
|
| 16 |
+
if model in ["llama3-70b-8192", "llama3-8b-8192", "gemma-7b-it", "gemma2-9b-it"]:
|
| 17 |
+
return gr.update(maximum=8192)
|
| 18 |
+
elif model == "mixtral-8x7b-32768":
|
| 19 |
+
return gr.update(maximum=32768)
|
| 20 |
+
|
| 21 |
def create_history_messages(history):
|
| 22 |
history_messages = [{"role": "user", "content": m[0]} for m in history]
|
| 23 |
history_messages.extend([{"role": "assistant", "content": m[1]} for m in history])
|
| 24 |
return history_messages
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
def generate_response(prompt, history, model, temperature, max_tokens, top_p, seed):
|
| 27 |
messages = create_history_messages(history)
|
| 28 |
messages.append({"role": "user", "content": prompt})
|
|
|
|
| 262 |
with gr.Tabs():
|
| 263 |
with gr.TabItem("LLMs"):
|
| 264 |
with gr.Row():
|
| 265 |
+
with gr.Column(scale=1, min_width=400):
|
| 266 |
+
chatbot = gr.ChatInterface(
|
| 267 |
+
fn=generate_response,
|
| 268 |
+
chatbot=None,
|
| 269 |
+
additional_inputs=[
|
| 270 |
+
model,
|
| 271 |
+
temperature,
|
| 272 |
+
max_tokens,
|
| 273 |
+
top_p,
|
| 274 |
+
seed,
|
| 275 |
+
],
|
| 276 |
+
)
|
| 277 |
+
with gr.Column(scale=1, min_width=250):
|
| 278 |
model = gr.Dropdown(
|
| 279 |
choices=[
|
| 280 |
"llama3-70b-8192",
|
|
|
|
| 296 |
)
|
| 297 |
max_tokens = gr.Slider(
|
| 298 |
minimum=1,
|
| 299 |
+
maximum=8192,
|
| 300 |
step=1,
|
| 301 |
value=4096,
|
| 302 |
label="Max Tokens",
|
|
|
|
| 313 |
seed = gr.Number(
|
| 314 |
precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random"
|
| 315 |
)
|
| 316 |
+
model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
with gr.TabItem("Speech To Text"):
|
| 318 |
with gr.Tabs():
|
| 319 |
with gr.TabItem("Transcription"):
|