Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -77,8 +77,8 @@ def respond(
|
|
77 |
model_path=f"models/{model}",
|
78 |
flash_attn=False,
|
79 |
n_gpu_layers=0,
|
80 |
-
n_batch=
|
81 |
-
n_ctx=
|
82 |
)
|
83 |
llm_model = model
|
84 |
provider = LlamaCppPythonProvider(llm)
|
@@ -158,9 +158,9 @@ demo = gr.ChatInterface(
|
|
158 |
),
|
159 |
gr.Slider(
|
160 |
minimum=512,
|
161 |
-
maximum=
|
162 |
-
value=
|
163 |
-
step=
|
164 |
label="Max Tokens",
|
165 |
info="Maximum length of response (higher = longer replies)",
|
166 |
),
|
|
|
77 |
model_path=f"models/{model}",
|
78 |
flash_attn=False,
|
79 |
n_gpu_layers=0,
|
80 |
+
n_batch=16,
|
81 |
+
n_ctx=2048,
|
82 |
)
|
83 |
llm_model = model
|
84 |
provider = LlamaCppPythonProvider(llm)
|
|
|
158 |
),
|
159 |
gr.Slider(
|
160 |
minimum=512,
|
161 |
+
maximum=2048,
|
162 |
+
value=1024,
|
163 |
+
step=1,
|
164 |
label="Max Tokens",
|
165 |
info="Maximum length of response (higher = longer replies)",
|
166 |
),
|