Spaces:

starnernj
/

Early-Christian-Church-Fathers

Paused

starnernj commited on Feb 23

Commit

4dbc1c3

verified ·

1 Parent(s): b96a955

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -40,6 +40,9 @@ def load_model():
     # Load base model - can't do this on the free tier - not enough memory
     # model = AutoModelForCausalLM.from_pretrained(base_model_name)
     model = AutoModelForCausalLM.from_pretrained(
         base_model_name,
@@ -50,8 +53,11 @@ def load_model():
     )
     # Load LoRA adapter
-    model = PeftModel.from_pretrained(model, lora_model_name)
     # Move model to GPU *AFTER* loading LoRA to avoid CUDA init errors
     model = model.to("cuda" if torch.cuda.is_available() else "cpu")

     # Load base model - can't do this on the free tier - not enough memory
     # model = AutoModelForCausalLM.from_pretrained(base_model_name)
+    # ✅ Force CPU placement before moving anything to CUDA
+    torch.cuda.is_available = lambda: False  # 👈 Trick PyTorch to think CUDA isn't available at first
     model = AutoModelForCausalLM.from_pretrained(
         base_model_name,
     )
     # Load LoRA adapter
+    model = PeftModel.from_pretrained(model, lora_model_name, device_map={"": "cpu"})
+    # ✅ Now, allow CUDA again and move everything to GPU
+    torch.cuda.is_available = lambda: True
     # Move model to GPU *AFTER* loading LoRA to avoid CUDA init errors
     model = model.to("cuda" if torch.cuda.is_available() else "cpu")