Yadukrishnan commited on
Commit
8eed7bd
·
verified ·
1 Parent(s): 9668836

Update src/model_loader.py

Browse files
Files changed (1) hide show
  1. src/model_loader.py +6 -6
src/model_loader.py CHANGED
@@ -9,12 +9,12 @@ cached_tokenizer = None
9
  def load_model():
10
  global cached_model, cached_tokenizer
11
  if cached_model is None or cached_tokenizer is None:
12
- # bnb_config = BitsAndBytesConfig(
13
- # load_in_4bit=True,
14
- # bnb_4bit_use_double_quant=True,
15
- # bnb_4bit_quant_type="nf4",
16
- # bnb_4bit_compute_dtype=torch.bfloat16
17
- # )
18
  cached_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) #, quantization_config=bnb_config
19
  cached_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
20
  return cached_model, cached_tokenizer
 
9
  def load_model():
10
  global cached_model, cached_tokenizer
11
  if cached_model is None or cached_tokenizer is None:
12
+ bnb_config = BitsAndBytesConfig(
13
+ load_in_4bit=True,
14
+ bnb_4bit_use_double_quant=True,
15
+ bnb_4bit_quant_type="nf4",
16
+ bnb_4bit_compute_dtype=torch.bfloat16
17
+ )
18
  cached_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) #, quantization_config=bnb_config
19
  cached_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
20
  return cached_model, cached_tokenizer