Spaces:

sitammeur
/

SmolLM-llamacpp

Running

sitammeur commited on Feb 13

Commit

d47afee

verified ·

1 Parent(s): f465c77

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,12 +24,12 @@ llm_model = None
 hf_hub_download(
     repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
-    filename="SmolLM2-135M-Instruct-Q6_K.gguf",
     local_dir="./models",
 )
 hf_hub_download(
     repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
-    filename="SmolLM2-360M-Instruct-Q6_K.gguf",
     local_dir="./models",
 )
@@ -77,8 +77,10 @@ def respond(
                 model_path=f"models/{model}",
                 flash_attn=False,
                 n_gpu_layers=0,
-                n_batch=16,
                 n_ctx=2048,
             )
             llm_model = model
         provider = LlamaCppPythonProvider(llm)
@@ -143,10 +145,10 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Dropdown(
             choices=[
-                "SmolLM2-135M-Instruct-Q6_K.gguf",
-                "SmolLM2-360M-Instruct-Q6_K.gguf",
             ],
-            value="SmolLM2-135M-Instruct-Q6_K.gguf",
             label="Model",
             info="Select the AI model to use for chat",
         ),

 hf_hub_download(
     repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
+    filename="SmolLM2-135M-Instruct-Q4_K_M.gguf",
     local_dir="./models",
 )
 hf_hub_download(
     repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
+    filename="SmolLM2-360M-Instruct-Q4_K_M.gguf",
     local_dir="./models",
 )
                 model_path=f"models/{model}",
                 flash_attn=False,
                 n_gpu_layers=0,
+                n_batch=8,
                 n_ctx=2048,
+                n_threads=2,
+                n_threads_batch=2,
             )
             llm_model = model
         provider = LlamaCppPythonProvider(llm)
     additional_inputs=[
         gr.Dropdown(
             choices=[
+                "SmolLM2-135M-Instruct-Q4_K_M.gguf",
+                "SmolLM2-360M-Instruct-Q4_K_M.gguf",
             ],
+            value="SmolLM2-135M-Instruct-Q4_K_M.gguf",
             label="Model",
             info="Select the AI model to use for chat",
         ),