Update app.py
Browse files
app.py
CHANGED
@@ -40,6 +40,9 @@ def load_model():
|
|
40 |
|
41 |
# Load base model - can't do this on the free tier - not enough memory
|
42 |
# model = AutoModelForCausalLM.from_pretrained(base_model_name)
|
|
|
|
|
|
|
43 |
|
44 |
model = AutoModelForCausalLM.from_pretrained(
|
45 |
base_model_name,
|
@@ -50,8 +53,11 @@ def load_model():
|
|
50 |
)
|
51 |
|
52 |
# Load LoRA adapter
|
53 |
-
model = PeftModel.from_pretrained(model, lora_model_name)
|
54 |
|
|
|
|
|
|
|
55 |
# Move model to GPU *AFTER* loading LoRA to avoid CUDA init errors
|
56 |
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
|
57 |
|
|
|
40 |
|
41 |
# Load base model - can't do this on the free tier - not enough memory
|
42 |
# model = AutoModelForCausalLM.from_pretrained(base_model_name)
|
43 |
+
|
44 |
+
# ✅ Force CPU placement before moving anything to CUDA
|
45 |
+
torch.cuda.is_available = lambda: False # 👈 Trick PyTorch to think CUDA isn't available at first
|
46 |
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
base_model_name,
|
|
|
53 |
)
|
54 |
|
55 |
# Load LoRA adapter
|
56 |
+
model = PeftModel.from_pretrained(model, lora_model_name, device_map={"": "cpu"})
|
57 |
|
58 |
+
# ✅ Now, allow CUDA again and move everything to GPU
|
59 |
+
torch.cuda.is_available = lambda: True
|
60 |
+
|
61 |
# Move model to GPU *AFTER* loading LoRA to avoid CUDA init errors
|
62 |
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
|
63 |
|