akashD22 commited on
Commit
4869bc6
·
verified ·
1 Parent(s): 2cad397

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -2,16 +2,16 @@ import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
 
5
- # Download the openLLaMA-3B GGUF into the Space cache
6
  model_path = hf_hub_download(
7
- repo_id="openlm-research/openLLaMA-3B",
8
- filename="openLLaMA-3B-GGUF.q4_0.gguf"
9
  )
10
  llm = Llama(model_path=model_path)
11
 
12
- def generate(prompt: str, temperature: float, max_tokens: int):
13
- resp = llm(prompt, temperature=temperature, max_tokens=max_tokens)
14
- return resp["choices"][0]["text"]
15
 
16
  iface = gr.Interface(
17
  fn=generate,
@@ -21,7 +21,7 @@ iface = gr.Interface(
21
  gr.Slider(16, 512, 16, label="Max Tokens", value=128),
22
  ],
23
  outputs="text",
24
- title="openLLaMA-3B (Q4_0 on CPU)"
25
  )
26
 
27
  if __name__ == "__main__":
 
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
 
5
+ # Download the Q3_K_M quantized GGUF from Unslo​th’s repo
6
  model_path = hf_hub_download(
7
+ repo_id="unsloth/Llama-3.2-1B-Instruct-GGUF",
8
+ filename="Llama-3.2-1B-Instruct-Q3_K_M.gguf"
9
  )
10
  llm = Llama(model_path=model_path)
11
 
12
+ def generate(prompt: str, temperature: float = 0.7, max_tokens: int = 128):
13
+ out = llm(prompt, temperature=temperature, max_tokens=max_tokens)
14
+ return out["choices"][0]["text"]
15
 
16
  iface = gr.Interface(
17
  fn=generate,
 
21
  gr.Slider(16, 512, 16, label="Max Tokens", value=128),
22
  ],
23
  outputs="text",
24
+ title="unsloth Llama-3.2-1B (Q3_K_M, CPU)"
25
  )
26
 
27
  if __name__ == "__main__":