Spaces:

sitammeur
/

SmolLM-llamacpp

Running

sitammeur commited on Mar 20

Commit

2678864

verified ·

1 Parent(s): b9d04ca

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,9 +19,6 @@ from exception import CustomExceptionHandling
 # Download gguf model files
-llm = None
-llm_model = None
 hf_hub_download(
     repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
     filename="SmolLM2-135M-Instruct-Q6_K.gguf",
@@ -33,11 +30,15 @@ hf_hub_download(
     local_dir="./models",
 )
 # Set the title and description
 title = "SmolLM🤗 Llama.cpp"
 description = """SmolLM2, a family of three small language models, performs well in instruction following and reasoning. The largest model significantly improves over its predecessor through advanced training techniques."""
 def respond(
     message: str,
     history: List[Tuple[str, str]],
@@ -79,8 +80,8 @@ def respond(
                 n_gpu_layers=0,
                 n_batch=8,
                 n_ctx=2048,
-                n_threads=2,
-                n_threads_batch=2,
             )
             llm_model = model
         provider = LlamaCppPythonProvider(llm)

 # Download gguf model files
 hf_hub_download(
     repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
     filename="SmolLM2-135M-Instruct-Q6_K.gguf",
     local_dir="./models",
 )
 # Set the title and description
 title = "SmolLM🤗 Llama.cpp"
 description = """SmolLM2, a family of three small language models, performs well in instruction following and reasoning. The largest model significantly improves over its predecessor through advanced training techniques."""
+llm = None
+llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
                 n_gpu_layers=0,
                 n_batch=8,
                 n_ctx=2048,
+                n_threads=8,
+                n_threads_batch=8,
             )
             llm_model = model
         provider = LlamaCppPythonProvider(llm)