Sarvam_m_indian_chat

Runtime error

prakhardoneria commited on Apr 30

Commit

1c98a01

verified ·

1 Parent(s): b1e49c5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,32 +1,39 @@
 import gradio as gr
 from llama_cpp import Llama
-import torch
-# Load model
-model_path = "deepseek-coder-1.3b-instruct.Q4_K_M.gguf"  # adjust if hosted elsewhere
-n_gpu_layers = 35 if torch.cuda.is_available() else 0
-llm = Llama(
-    model_path=model_path,
-    n_ctx=2048,
-    n_threads=8,
-    n_gpu_layers=n_gpu_layers,
-    use_mlock=False
-)
-# Inference function
 def generate_response(prompt):
     full_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"
     output = llm(full_prompt, max_tokens=512, stop=["###"])
     return output["choices"][0]["text"]
-# Gradio UI
 demo = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(lines=5, label="Enter your instruction"),
     outputs=gr.Textbox(lines=10, label="Model Response"),
-    title="Viber Ai",
-    description="Ask the model to write or modify HTML or code with instructions."
 )
 demo.launch()

 import gradio as gr
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+# Lazy global model
+llm = None
+def load_model():
+    global llm
+    if llm is None:
+        # Download GGUF model from HF Hub
+        model_path = hf_hub_download(
+            repo_id="TheBloke/deepseek-coder-1.3b-instruct-GGUF",
+            filename="deepseek-coder-1.3b-instruct.Q4_K_M.gguf"
+        )
+        # Load LLaMA model
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=1024,
+            n_threads=4,
+            n_gpu_layers=0,  # Set 0 for CPU-only
+            use_mlock=False
+        )
 def generate_response(prompt):
+    load_model()
     full_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"
     output = llm(full_prompt, max_tokens=512, stop=["###"])
     return output["choices"][0]["text"]
 demo = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(lines=5, label="Enter your instruction"),
     outputs=gr.Textbox(lines=10, label="Model Response"),
+    title="Viber AI",
+    description="Ask the model to generate or modify code, HTML, or general text via instructions."
 )
 demo.launch()