Nac31 commited on
Commit
50a2b44
·
1 Parent(s): 744b744

Add quantize

Browse files
Files changed (2) hide show
  1. app.py +11 -3
  2. requirements.txt +1 -1
app.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  import os
5
  from dotenv import load_dotenv
6
  from huggingface_hub import login
 
7
 
8
  load_dotenv()
9
 
@@ -13,14 +14,21 @@ login(hf_token)
13
 
14
  # Configuration du modèle
15
  model_path = "mistralai/Mistral-Large-Instruct-2411"
16
- dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
17
 
18
- # Initialisation du modèle
 
 
 
 
 
 
 
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_path)
20
  model = AutoModelForCausalLM.from_pretrained(
21
  model_path,
22
  device_map="auto",
23
- torch_dtype=dtype
24
  )
25
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
26
 
 
4
  import os
5
  from dotenv import load_dotenv
6
  from huggingface_hub import login
7
+ from transformers import BitsAndBytesConfig
8
 
9
  load_dotenv()
10
 
 
14
 
15
  # Configuration du modèle
16
  model_path = "mistralai/Mistral-Large-Instruct-2411"
 
17
 
18
+ # Configuration de la quantification 4-bits
19
+ quantization_config = BitsAndBytesConfig(
20
+ load_in_4bit=True,
21
+ bnb_4bit_compute_dtype=torch.float16,
22
+ bnb_4bit_quant_type="nf4",
23
+ bnb_4bit_use_double_quant=True
24
+ )
25
+
26
+ # Initialisation du modèle avec quantification
27
  tokenizer = AutoTokenizer.from_pretrained(model_path)
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_path,
30
  device_map="auto",
31
+ quantization_config=quantization_config
32
  )
33
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
34
 
requirements.txt CHANGED
@@ -5,7 +5,7 @@ datasets
5
  sentencepiece
6
  tokenizers
7
  gradio
8
- bitsandbytes
9
  openai
10
  langchain
11
  python-dotenv
 
5
  sentencepiece
6
  tokenizers
7
  gradio
8
+ bitsandbytes>=0.41.1
9
  openai
10
  langchain
11
  python-dotenv