starnernj commited on
Commit
4dbc1c3
·
verified ·
1 Parent(s): b96a955

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -40,6 +40,9 @@ def load_model():
40
 
41
  # Load base model - can't do this on the free tier - not enough memory
42
  # model = AutoModelForCausalLM.from_pretrained(base_model_name)
 
 
 
43
 
44
  model = AutoModelForCausalLM.from_pretrained(
45
  base_model_name,
@@ -50,8 +53,11 @@ def load_model():
50
  )
51
 
52
  # Load LoRA adapter
53
- model = PeftModel.from_pretrained(model, lora_model_name)
54
 
 
 
 
55
  # Move model to GPU *AFTER* loading LoRA to avoid CUDA init errors
56
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
57
 
 
40
 
41
  # Load base model - can't do this on the free tier - not enough memory
42
  # model = AutoModelForCausalLM.from_pretrained(base_model_name)
43
+
44
+ # ✅ Force CPU placement before moving anything to CUDA
45
+ torch.cuda.is_available = lambda: False # 👈 Trick PyTorch to think CUDA isn't available at first
46
 
47
  model = AutoModelForCausalLM.from_pretrained(
48
  base_model_name,
 
53
  )
54
 
55
  # Load LoRA adapter
56
+ model = PeftModel.from_pretrained(model, lora_model_name, device_map={"": "cpu"})
57
 
58
+ # ✅ Now, allow CUDA again and move everything to GPU
59
+ torch.cuda.is_available = lambda: True
60
+
61
  # Move model to GPU *AFTER* loading LoRA to avoid CUDA init errors
62
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
63