yamatazen commited on
Commit
d8aebac
·
verified ·
1 Parent(s): d3ea24a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -1
README.md CHANGED
@@ -5,4 +5,37 @@ tags:
5
  - bitsandbytes
6
  - bnb
7
  - chatml
8
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  - bitsandbytes
6
  - bnb
7
  - chatml
8
+ ---
9
+ # Code for quantization (Generated by Grok with manual editing)
10
+ ```python
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
12
+ import torch
13
+ import sys
14
+
15
+ # Define model ID
16
+ model_id = sys.argv[1]
17
+
18
+ # Configure quantization
19
+ quantization_config = BitsAndBytesConfig(
20
+ load_in_4bit=True, # Use 4-bit quantization (or load_in_8bit=True for 8-bit)
21
+ bnb_4bit_quant_type="nf4", # Normal Float 4-bit (nf4) for better precision
22
+ bnb_4bit_compute_dtype=torch.float16, # Compute in float16 for efficiency
23
+ bnb_4bit_use_double_quant=True # Double quantization for further memory savings
24
+ )
25
+
26
+ # Load tokenizer
27
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
28
+
29
+ # Load quantized model
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ model_id,
32
+ quantization_config=quantization_config,
33
+ device_map="auto", # Automatically map layers to GPU/CPU
34
+ torch_dtype=torch.float16
35
+ )
36
+
37
+ # Save model and tokenizer
38
+ save_path = sys.argv[2]
39
+ model.save_pretrained(save_path)
40
+ tokenizer.save_pretrained(save_path)
41
+ ```