Spaces:

Nac31
/

Sacha-1

Paused

Nac31 commited on Mar 4

Commit

50a2b44

1 Parent(s): 744b744

Add quantize

Files changed (2) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 import os
 from dotenv import load_dotenv
 from huggingface_hub import login
 load_dotenv()
@@ -13,14 +14,21 @@ login(hf_token)
 # Configuration du modèle
 model_path = "mistralai/Mistral-Large-Instruct-2411"
-dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
-# Initialisation du modèle
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     device_map="auto",
-    torch_dtype=dtype
 )
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

 import os
 from dotenv import load_dotenv
 from huggingface_hub import login
+from transformers import BitsAndBytesConfig
 load_dotenv()
 # Configuration du modèle
 model_path = "mistralai/Mistral-Large-Instruct-2411"
+# Configuration de la quantification 4-bits
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True
+)
+# Initialisation du modèle avec quantification
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     device_map="auto",
+    quantization_config=quantization_config
 )
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

requirements.txt CHANGED Viewed

@@ -5,7 +5,7 @@ datasets
 sentencepiece
 tokenizers
 gradio
-bitsandbytes
 openai
 langchain
 python-dotenv

 sentencepiece
 tokenizers
 gradio
+bitsandbytes>=0.41.1
 openai
 langchain
 python-dotenv