Spaces:

akashD22
/

llama2-chat-prompteval

Runtime error

akashD22 commited on 10 days ago

Commit

4869bc6

verified ·

1 Parent(s): 2cad397

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,16 +2,16 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# Download the openLLaMA-3B GGUF into the Space cache
 model_path = hf_hub_download(
-    repo_id="openlm-research/openLLaMA-3B",
-    filename="openLLaMA-3B-GGUF.q4_0.gguf"
 )
 llm = Llama(model_path=model_path)
-def generate(prompt: str, temperature: float, max_tokens: int):
-    resp = llm(prompt, temperature=temperature, max_tokens=max_tokens)
-    return resp["choices"][0]["text"]
 iface = gr.Interface(
     fn=generate,
@@ -21,7 +21,7 @@ iface = gr.Interface(
         gr.Slider(16, 512, 16, label="Max Tokens", value=128),
     ],
     outputs="text",
-    title="openLLaMA-3B (Q4_0 on CPU)"
 )
 if __name__ == "__main__":

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# Download the Q3_K_M quantized GGUF from Unsloth’s repo
 model_path = hf_hub_download(
+    repo_id="unsloth/Llama-3.2-1B-Instruct-GGUF",
+    filename="Llama-3.2-1B-Instruct-Q3_K_M.gguf"
 )
 llm = Llama(model_path=model_path)
+def generate(prompt: str, temperature: float = 0.7, max_tokens: int = 128):
+    out = llm(prompt, temperature=temperature, max_tokens=max_tokens)
+    return out["choices"][0]["text"]
 iface = gr.Interface(
     fn=generate,
         gr.Slider(16, 512, 16, label="Max Tokens", value=128),
     ],
     outputs="text",
+    title="unsloth Llama-3.2-1B (Q3_K_M, CPU)"
 )
 if __name__ == "__main__":