Spaces:

starnernj
/

Early-Christian-Church-Fathers

Paused

starnernj commited on Feb 23

Commit

07be1a6

verified ·

1 Parent(s): 7b951c9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -44,6 +44,7 @@ def load_model():
     # ✅ Force CPU placement before moving anything to CUDA
     torch.cuda.is_available = lambda: False  # 👈 Trick PyTorch to think CUDA isn't available at first
     # ✅ Configure BitsAndBytes to use CPU first
     quantization_config = BitsAndBytesConfig(
         load_in_8bit=True,  # ✅ Uses 8-bit instead of 4-bit
@@ -53,12 +54,12 @@ def load_model():
         # bnb_4bit_use_double_quant=True,
         # bnb_4bit_quant_type="nf4"
     )
     model = AutoModelForCausalLM.from_pretrained(
         base_model_name,
-        quantization_config=quantization_config,
         # load_in_4bit=True,  # Reduces memory, but requires a GPU
-        # torch_dtype=torch.float16,
         # llm_int8_enable_fp32_cpu_offload=True,  # Offload some layers to CPU
         device_map={"": "cpu"}  # Load everything on CPU first
     )

     # ✅ Force CPU placement before moving anything to CUDA
     torch.cuda.is_available = lambda: False  # 👈 Trick PyTorch to think CUDA isn't available at first
+    """
     # ✅ Configure BitsAndBytes to use CPU first
     quantization_config = BitsAndBytesConfig(
         load_in_8bit=True,  # ✅ Uses 8-bit instead of 4-bit
         # bnb_4bit_use_double_quant=True,
         # bnb_4bit_quant_type="nf4"
     )
+    """
     model = AutoModelForCausalLM.from_pretrained(
         base_model_name,
+        # quantization_config=quantization_config,
         # load_in_4bit=True,  # Reduces memory, but requires a GPU
+        torch_dtype=torch.float16,
         # llm_int8_enable_fp32_cpu_offload=True,  # Offload some layers to CPU
         device_map={"": "cpu"}  # Load everything on CPU first
     )