Switch to inference API and extend max_new_tokens
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ DIALOGUES_DATASET = "HuggingFaceH4/starchat_playground_dialogues"
|
|
18 |
|
19 |
model2endpoint = {
|
20 |
"starchat-alpha": "https://api-inference.huggingface.co/models/HuggingFaceH4/starcoderbase-finetuned-oasst1",
|
21 |
-
"starchat-beta": "https://
|
22 |
}
|
23 |
model_names = list(model2endpoint.keys())
|
24 |
|
@@ -136,7 +136,7 @@ def generate(
|
|
136 |
top_p=top_p,
|
137 |
repetition_penalty=repetition_penalty,
|
138 |
do_sample=True,
|
139 |
-
truncate=
|
140 |
seed=42,
|
141 |
stop_sequences=["<|end|>"],
|
142 |
)
|
@@ -295,9 +295,9 @@ with gr.Blocks(analytics_enabled=False, css=custom_css) as demo:
|
|
295 |
)
|
296 |
max_new_tokens = gr.Slider(
|
297 |
label="Max new tokens",
|
298 |
-
value=
|
299 |
minimum=0,
|
300 |
-
maximum=
|
301 |
step=4,
|
302 |
interactive=True,
|
303 |
info="The maximum numbers of new tokens",
|
|
|
18 |
|
19 |
model2endpoint = {
|
20 |
"starchat-alpha": "https://api-inference.huggingface.co/models/HuggingFaceH4/starcoderbase-finetuned-oasst1",
|
21 |
+
"starchat-beta": "https://api-inference.huggingface.co/models/HuggingFaceH4/starchat-beta",
|
22 |
}
|
23 |
model_names = list(model2endpoint.keys())
|
24 |
|
|
|
136 |
top_p=top_p,
|
137 |
repetition_penalty=repetition_penalty,
|
138 |
do_sample=True,
|
139 |
+
truncate=4096,
|
140 |
seed=42,
|
141 |
stop_sequences=["<|end|>"],
|
142 |
)
|
|
|
295 |
)
|
296 |
max_new_tokens = gr.Slider(
|
297 |
label="Max new tokens",
|
298 |
+
value=1024,
|
299 |
minimum=0,
|
300 |
+
maximum=2048,
|
301 |
step=4,
|
302 |
interactive=True,
|
303 |
info="The maximum numbers of new tokens",
|