Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,16 @@ import gradio as gr
|
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
-
# Download the
|
6 |
model_path = hf_hub_download(
|
7 |
-
repo_id="
|
8 |
-
filename="
|
9 |
)
|
10 |
llm = Llama(model_path=model_path)
|
11 |
|
12 |
-
def generate(prompt: str, temperature: float, max_tokens: int):
|
13 |
-
|
14 |
-
return
|
15 |
|
16 |
iface = gr.Interface(
|
17 |
fn=generate,
|
@@ -21,7 +21,7 @@ iface = gr.Interface(
|
|
21 |
gr.Slider(16, 512, 16, label="Max Tokens", value=128),
|
22 |
],
|
23 |
outputs="text",
|
24 |
-
title="
|
25 |
)
|
26 |
|
27 |
if __name__ == "__main__":
|
|
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
+
# Download the Q3_K_M quantized GGUF from Unsloth’s repo
|
6 |
model_path = hf_hub_download(
|
7 |
+
repo_id="unsloth/Llama-3.2-1B-Instruct-GGUF",
|
8 |
+
filename="Llama-3.2-1B-Instruct-Q3_K_M.gguf"
|
9 |
)
|
10 |
llm = Llama(model_path=model_path)
|
11 |
|
12 |
+
def generate(prompt: str, temperature: float = 0.7, max_tokens: int = 128):
|
13 |
+
out = llm(prompt, temperature=temperature, max_tokens=max_tokens)
|
14 |
+
return out["choices"][0]["text"]
|
15 |
|
16 |
iface = gr.Interface(
|
17 |
fn=generate,
|
|
|
21 |
gr.Slider(16, 512, 16, label="Max Tokens", value=128),
|
22 |
],
|
23 |
outputs="text",
|
24 |
+
title="unsloth Llama-3.2-1B (Q3_K_M, CPU)"
|
25 |
)
|
26 |
|
27 |
if __name__ == "__main__":
|