Add quantize
Browse files- app.py +11 -3
- requirements.txt +1 -1
app.py
CHANGED
@@ -4,6 +4,7 @@ import torch
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
from huggingface_hub import login
|
|
|
7 |
|
8 |
load_dotenv()
|
9 |
|
@@ -13,14 +14,21 @@ login(hf_token)
|
|
13 |
|
14 |
# Configuration du modèle
|
15 |
model_path = "mistralai/Mistral-Large-Instruct-2411"
|
16 |
-
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
|
17 |
|
18 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
20 |
model = AutoModelForCausalLM.from_pretrained(
|
21 |
model_path,
|
22 |
device_map="auto",
|
23 |
-
|
24 |
)
|
25 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
26 |
|
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
from huggingface_hub import login
|
7 |
+
from transformers import BitsAndBytesConfig
|
8 |
|
9 |
load_dotenv()
|
10 |
|
|
|
14 |
|
15 |
# Configuration du modèle
|
16 |
model_path = "mistralai/Mistral-Large-Instruct-2411"
|
|
|
17 |
|
18 |
+
# Configuration de la quantification 4-bits
|
19 |
+
quantization_config = BitsAndBytesConfig(
|
20 |
+
load_in_4bit=True,
|
21 |
+
bnb_4bit_compute_dtype=torch.float16,
|
22 |
+
bnb_4bit_quant_type="nf4",
|
23 |
+
bnb_4bit_use_double_quant=True
|
24 |
+
)
|
25 |
+
|
26 |
+
# Initialisation du modèle avec quantification
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
28 |
model = AutoModelForCausalLM.from_pretrained(
|
29 |
model_path,
|
30 |
device_map="auto",
|
31 |
+
quantization_config=quantization_config
|
32 |
)
|
33 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
34 |
|
requirements.txt
CHANGED
@@ -5,7 +5,7 @@ datasets
|
|
5 |
sentencepiece
|
6 |
tokenizers
|
7 |
gradio
|
8 |
-
bitsandbytes
|
9 |
openai
|
10 |
langchain
|
11 |
python-dotenv
|
|
|
5 |
sentencepiece
|
6 |
tokenizers
|
7 |
gradio
|
8 |
+
bitsandbytes>=0.41.1
|
9 |
openai
|
10 |
langchain
|
11 |
python-dotenv
|