starnernj commited on
Commit
07be1a6
Β·
verified Β·
1 Parent(s): 7b951c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -44,6 +44,7 @@ def load_model():
44
  # βœ… Force CPU placement before moving anything to CUDA
45
  torch.cuda.is_available = lambda: False # πŸ‘ˆ Trick PyTorch to think CUDA isn't available at first
46
 
 
47
  # βœ… Configure BitsAndBytes to use CPU first
48
  quantization_config = BitsAndBytesConfig(
49
  load_in_8bit=True, # βœ… Uses 8-bit instead of 4-bit
@@ -53,12 +54,12 @@ def load_model():
53
  # bnb_4bit_use_double_quant=True,
54
  # bnb_4bit_quant_type="nf4"
55
  )
56
-
57
  model = AutoModelForCausalLM.from_pretrained(
58
  base_model_name,
59
- quantization_config=quantization_config,
60
  # load_in_4bit=True, # Reduces memory, but requires a GPU
61
- # torch_dtype=torch.float16,
62
  # llm_int8_enable_fp32_cpu_offload=True, # Offload some layers to CPU
63
  device_map={"": "cpu"} # Load everything on CPU first
64
  )
 
44
  # βœ… Force CPU placement before moving anything to CUDA
45
  torch.cuda.is_available = lambda: False # πŸ‘ˆ Trick PyTorch to think CUDA isn't available at first
46
 
47
+ """
48
  # βœ… Configure BitsAndBytes to use CPU first
49
  quantization_config = BitsAndBytesConfig(
50
  load_in_8bit=True, # βœ… Uses 8-bit instead of 4-bit
 
54
  # bnb_4bit_use_double_quant=True,
55
  # bnb_4bit_quant_type="nf4"
56
  )
57
+ """
58
  model = AutoModelForCausalLM.from_pretrained(
59
  base_model_name,
60
+ # quantization_config=quantization_config,
61
  # load_in_4bit=True, # Reduces memory, but requires a GPU
62
+ torch_dtype=torch.float16,
63
  # llm_int8_enable_fp32_cpu_offload=True, # Offload some layers to CPU
64
  device_map={"": "cpu"} # Load everything on CPU first
65
  )