prakhardoneria commited on
Commit
1c98a01
·
verified ·
1 Parent(s): b1e49c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -1,32 +1,39 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
- import torch
4
 
5
- # Load model
6
- model_path = "deepseek-coder-1.3b-instruct.Q4_K_M.gguf" # adjust if hosted elsewhere
7
- n_gpu_layers = 35 if torch.cuda.is_available() else 0
8
 
9
- llm = Llama(
10
- model_path=model_path,
11
- n_ctx=2048,
12
- n_threads=8,
13
- n_gpu_layers=n_gpu_layers,
14
- use_mlock=False
15
- )
 
 
 
 
 
 
 
 
 
16
 
17
- # Inference function
18
  def generate_response(prompt):
 
19
  full_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"
20
  output = llm(full_prompt, max_tokens=512, stop=["###"])
21
  return output["choices"][0]["text"]
22
 
23
- # Gradio UI
24
  demo = gr.Interface(
25
  fn=generate_response,
26
  inputs=gr.Textbox(lines=5, label="Enter your instruction"),
27
  outputs=gr.Textbox(lines=10, label="Model Response"),
28
- title="Viber Ai",
29
- description="Ask the model to write or modify HTML or code with instructions."
30
  )
31
 
32
  demo.launch()
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
 
5
+ # Lazy global model
6
+ llm = None
 
7
 
8
+ def load_model():
9
+ global llm
10
+ if llm is None:
11
+ # Download GGUF model from HF Hub
12
+ model_path = hf_hub_download(
13
+ repo_id="TheBloke/deepseek-coder-1.3b-instruct-GGUF",
14
+ filename="deepseek-coder-1.3b-instruct.Q4_K_M.gguf"
15
+ )
16
+ # Load LLaMA model
17
+ llm = Llama(
18
+ model_path=model_path,
19
+ n_ctx=1024,
20
+ n_threads=4,
21
+ n_gpu_layers=0, # Set 0 for CPU-only
22
+ use_mlock=False
23
+ )
24
 
 
25
  def generate_response(prompt):
26
+ load_model()
27
  full_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"
28
  output = llm(full_prompt, max_tokens=512, stop=["###"])
29
  return output["choices"][0]["text"]
30
 
 
31
  demo = gr.Interface(
32
  fn=generate_response,
33
  inputs=gr.Textbox(lines=5, label="Enter your instruction"),
34
  outputs=gr.Textbox(lines=10, label="Model Response"),
35
+ title="Viber AI",
36
+ description="Ask the model to generate or modify code, HTML, or general text via instructions."
37
  )
38
 
39
  demo.launch()