Spaces:

omaryasserhassan
/

llm_server

Sleeping

omaryasserhassan commited on Aug 14

Commit

778f3aa

verified ·

1 Parent(s): 8b2ac0c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import traceback
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
-from ctransformers import AutoModelForCausalLM
 # --- Config ---
 REPO_ID  = "bartowski/Llama-3.2-3B-Instruct-GGUF"
@@ -24,25 +24,19 @@ def get_model():
     if _model is not None:
         return _model
-    print("📥 Downloading model...")
     local_file = hf_hub_download(
         repo_id=REPO_ID,
         filename=FILENAME,
         cache_dir=CACHE_DIR,
         local_dir_use_symlinks=False,
     )
-    local_dir = os.path.dirname(local_file)
-    print("✅ Model downloaded at", local_file)
-    print("🔄 Loading model into memory...")
-    _model = AutoModelForCausalLM.from_pretrained(
-        local_dir,
-        model_file=FILENAME,
         model_type=MODEL_TYPE,
-        gpu_layers=0,  # CPU only
         threads=os.cpu_count() or 2
     )
-    print("✅ Model loaded")
     return _model
 # --- Request Schema ---

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
+from ctransformers import LLM
 # --- Config ---
 REPO_ID  = "bartowski/Llama-3.2-3B-Instruct-GGUF"
     if _model is not None:
         return _model
     local_file = hf_hub_download(
         repo_id=REPO_ID,
         filename=FILENAME,
         cache_dir=CACHE_DIR,
         local_dir_use_symlinks=False,
     )
+    _model = LLM(
+        model=local_file,     # direct file path
         model_type=MODEL_TYPE,
+        gpu_layers=0,
         threads=os.cpu_count() or 2
     )
     return _model
 # --- Request Schema ---