Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -24,12 +24,12 @@ llm_model = None
|
|
24 |
|
25 |
hf_hub_download(
|
26 |
repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
|
27 |
-
filename="SmolLM2-135M-Instruct-
|
28 |
local_dir="./models",
|
29 |
)
|
30 |
hf_hub_download(
|
31 |
repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
|
32 |
-
filename="SmolLM2-360M-Instruct-
|
33 |
local_dir="./models",
|
34 |
)
|
35 |
|
@@ -77,8 +77,10 @@ def respond(
|
|
77 |
model_path=f"models/{model}",
|
78 |
flash_attn=False,
|
79 |
n_gpu_layers=0,
|
80 |
-
n_batch=
|
81 |
n_ctx=2048,
|
|
|
|
|
82 |
)
|
83 |
llm_model = model
|
84 |
provider = LlamaCppPythonProvider(llm)
|
@@ -143,10 +145,10 @@ demo = gr.ChatInterface(
|
|
143 |
additional_inputs=[
|
144 |
gr.Dropdown(
|
145 |
choices=[
|
146 |
-
"SmolLM2-135M-Instruct-
|
147 |
-
"SmolLM2-360M-Instruct-
|
148 |
],
|
149 |
-
value="SmolLM2-135M-Instruct-
|
150 |
label="Model",
|
151 |
info="Select the AI model to use for chat",
|
152 |
),
|
|
|
24 |
|
25 |
hf_hub_download(
|
26 |
repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
|
27 |
+
filename="SmolLM2-135M-Instruct-Q4_K_M.gguf",
|
28 |
local_dir="./models",
|
29 |
)
|
30 |
hf_hub_download(
|
31 |
repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
|
32 |
+
filename="SmolLM2-360M-Instruct-Q4_K_M.gguf",
|
33 |
local_dir="./models",
|
34 |
)
|
35 |
|
|
|
77 |
model_path=f"models/{model}",
|
78 |
flash_attn=False,
|
79 |
n_gpu_layers=0,
|
80 |
+
n_batch=8,
|
81 |
n_ctx=2048,
|
82 |
+
n_threads=2,
|
83 |
+
n_threads_batch=2,
|
84 |
)
|
85 |
llm_model = model
|
86 |
provider = LlamaCppPythonProvider(llm)
|
|
|
145 |
additional_inputs=[
|
146 |
gr.Dropdown(
|
147 |
choices=[
|
148 |
+
"SmolLM2-135M-Instruct-Q4_K_M.gguf",
|
149 |
+
"SmolLM2-360M-Instruct-Q4_K_M.gguf",
|
150 |
],
|
151 |
+
value="SmolLM2-135M-Instruct-Q4_K_M.gguf",
|
152 |
label="Model",
|
153 |
info="Select the AI model to use for chat",
|
154 |
),
|