starnernj commited on
Commit
7b951c9
·
verified ·
1 Parent(s): 76ad082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -46,10 +46,12 @@ def load_model():
46
 
47
  # ✅ Configure BitsAndBytes to use CPU first
48
  quantization_config = BitsAndBytesConfig(
49
- load_in_4bit=True,
50
- bnb_4bit_compute_dtype=torch.float16,
51
- bnb_4bit_use_double_quant=True,
52
- bnb_4bit_quant_type="nf4"
 
 
53
  )
54
 
55
  model = AutoModelForCausalLM.from_pretrained(
 
46
 
47
  # ✅ Configure BitsAndBytes to use CPU first
48
  quantization_config = BitsAndBytesConfig(
49
+ load_in_8bit=True, # ✅ Uses 8-bit instead of 4-bit
50
+ device_map={"": "cpu"},
51
+ # load_in_4bit=True,
52
+ # bnb_4bit_compute_dtype=torch.float16,
53
+ # bnb_4bit_use_double_quant=True,
54
+ # bnb_4bit_quant_type="nf4"
55
  )
56
 
57
  model = AutoModelForCausalLM.from_pretrained(