sitammeur commited on
Commit
d47afee
·
verified ·
1 Parent(s): f465c77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -24,12 +24,12 @@ llm_model = None
24
 
25
  hf_hub_download(
26
  repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
27
- filename="SmolLM2-135M-Instruct-Q6_K.gguf",
28
  local_dir="./models",
29
  )
30
  hf_hub_download(
31
  repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
32
- filename="SmolLM2-360M-Instruct-Q6_K.gguf",
33
  local_dir="./models",
34
  )
35
 
@@ -77,8 +77,10 @@ def respond(
77
  model_path=f"models/{model}",
78
  flash_attn=False,
79
  n_gpu_layers=0,
80
- n_batch=16,
81
  n_ctx=2048,
 
 
82
  )
83
  llm_model = model
84
  provider = LlamaCppPythonProvider(llm)
@@ -143,10 +145,10 @@ demo = gr.ChatInterface(
143
  additional_inputs=[
144
  gr.Dropdown(
145
  choices=[
146
- "SmolLM2-135M-Instruct-Q6_K.gguf",
147
- "SmolLM2-360M-Instruct-Q6_K.gguf",
148
  ],
149
- value="SmolLM2-135M-Instruct-Q6_K.gguf",
150
  label="Model",
151
  info="Select the AI model to use for chat",
152
  ),
 
24
 
25
  hf_hub_download(
26
  repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
27
+ filename="SmolLM2-135M-Instruct-Q4_K_M.gguf",
28
  local_dir="./models",
29
  )
30
  hf_hub_download(
31
  repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
32
+ filename="SmolLM2-360M-Instruct-Q4_K_M.gguf",
33
  local_dir="./models",
34
  )
35
 
 
77
  model_path=f"models/{model}",
78
  flash_attn=False,
79
  n_gpu_layers=0,
80
+ n_batch=8,
81
  n_ctx=2048,
82
+ n_threads=2,
83
+ n_threads_batch=2,
84
  )
85
  llm_model = model
86
  provider = LlamaCppPythonProvider(llm)
 
145
  additional_inputs=[
146
  gr.Dropdown(
147
  choices=[
148
+ "SmolLM2-135M-Instruct-Q4_K_M.gguf",
149
+ "SmolLM2-360M-Instruct-Q4_K_M.gguf",
150
  ],
151
+ value="SmolLM2-135M-Instruct-Q4_K_M.gguf",
152
  label="Model",
153
  info="Select the AI model to use for chat",
154
  ),