wanzhenchn commited on
Commit
583dacf
·
verified ·
1 Parent(s): 863a5a6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +101 -3
README.md CHANGED
@@ -1,3 +1,101 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - en
5
+ - zh
6
+ base_model:
7
+ - Qwen/Qwen2.5-VL-7B-Instruct
8
+ pipeline_tag: image-text-to-text
9
+ library_name: transformers
10
+ tags:
11
+ - text-generation-inference
12
+ ---
13
+
14
+ # Qwen2.5-VL-7B-Instruct-gptqmodel-int8
15
+
16
+ It is a GPTQ-INT8 quantized [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) with [GPTQModel](https://github.com/ModelCloud/GPTQModel) toolkit.
17
+
18
+ ## How to quantize
19
+
20
+ ### Install
21
+
22
+ ```bash
23
+ # Python 3.10.x or above
24
+ pip3 install -v "gptqmodel>=2.2.0" --no-build-isolation
25
+
26
+ ```
27
+
28
+ ### Quantize
29
+
30
+ ```bash
31
+ python3 gptqmodel_quantize.py /path/to/Qwen2.5-VL-7B-Instruct/ /path/to/Qwen2.5-VL-7B-Instruct-gptqmodel-int8 8
32
+
33
+ ```
34
+
35
+ ```python
36
+ # gptqmodel_quantize.py
37
+
38
+ import fire
39
+ from datasets import load_dataset
40
+
41
+ from gptqmodel import GPTQModel, QuantizeConfig
42
+ from gptqmodel.models.definitions.base_qwen2_vl import BaseQwen2VLGPTQ
43
+
44
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
45
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
46
+ os.environ["PYTHONUTF8"]="1"
47
+
48
+ def format_qwen2_vl_dataset(image, assistant):
49
+ return [
50
+ {
51
+ "role": "user",
52
+ "content": [
53
+ {"type": "image", "image": image},
54
+ {"type": "text", "text": "generate a caption for this image"},
55
+ ],
56
+ },
57
+ {"role": "assistant", "content": assistant},
58
+ ]
59
+
60
+
61
+ def prepare_dataset(format_func, n_sample: int = 20) -> list[list[dict]]:
62
+ from datasets import load_dataset
63
+
64
+ dataset = load_dataset(
65
+ "laion/220k-GPT4Vision-captions-from-LIVIS", split=f"train[:{n_sample}]"
66
+ )
67
+ return [
68
+ format_func(sample["url"], sample["caption"])
69
+ for sample in dataset
70
+ ]
71
+
72
+
73
+ def get_calib_dataset(model):
74
+ if isinstance(model, BaseQwen2VLGPTQ):
75
+ return prepare_dataset(format_qwen2_vl_dataset, n_sample=256)
76
+ raise NotImplementedError(f"Unsupported MODEL: {model.__class__}")
77
+
78
+
79
+ def quantize(model_path: str,
80
+ output_path: str,
81
+ bit: int):
82
+ quant_config = QuantizeConfig(bits=bit, group_size=128)
83
+
84
+ model = GPTQModel.load(model_path, quant_config)
85
+ calibration_dataset = get_calib_dataset(model)
86
+
87
+ # increase `batch_size` to match gpu/vram specs to speed up quantization
88
+ model.quantize(calibration_dataset, batch_size=8)
89
+
90
+ model.save(output_path)
91
+
92
+ # test post-quant inference
93
+ model = GPTQModel.load(output_path)
94
+ result = model.generate("Uncovering deep insights begins with")[0] # tokens
95
+ print(model.tokenizer.decode(result)) # string output
96
+
97
+
98
+ if __name__ == "__main__":
99
+ fire.Fire(quantize)
100
+
101
+ ```