cicdatopea
commited on
update to iter200 results
Browse files
README.md
CHANGED
@@ -3,22 +3,23 @@ datasets:
|
|
3 |
- NeelNanda/pile-10k
|
4 |
base_model:
|
5 |
- MiniMaxAI/MiniMax-Text-01
|
|
|
6 |
---
|
|
|
7 |
## Model Details
|
8 |
|
9 |
This model is an int4 model with group_size 128 and symmetric quantization of [MiniMaxAI/MiniMax-Text-01](https://huggingface.co/MiniMaxAI/MiniMax-Text-01) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm. This model is in AutoRound format, which is **NOT** supported by other serving frameworks, such as vLLM.
|
10 |
|
11 |
Please follow the [license](https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/LICENSE) of the original model.
|
12 |
|
13 |
-
## How To Use
|
14 |
|
15 |
-
|
|
|
16 |
|
17 |
Requirements
|
18 |
|
19 |
```bash
|
20 |
pip3 install git+https://github.com/intel/auto-round.git@bf16_inference
|
21 |
-
|
22 |
pip3 install auto-gptq
|
23 |
```
|
24 |
|
@@ -29,17 +30,16 @@ from auto_round import AutoRoundConfig ##must import for autoround format
|
|
29 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
30 |
import torch
|
31 |
|
32 |
-
quantized_model_dir = "
|
33 |
|
34 |
-
tokenizer=AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
35 |
-
model = AutoModelForCausalLM.from_pretrained(quantized_model_dir,
|
36 |
-
trust_remote_code=True,
|
37 |
-
torch_dtype=torch.bfloat16,##must use bf16
|
38 |
device_map="auto")
|
39 |
|
40 |
-
|
41 |
def forward_hook(module, input, output):
|
42 |
-
return torch.clamp(output
|
|
|
43 |
|
44 |
def register_fp16_pre_hooks(model):
|
45 |
for name, module in model.named_modules():
|
@@ -50,99 +50,100 @@ def register_fp16_pre_hooks(model):
|
|
50 |
register_fp16_pre_hooks(model)
|
51 |
tokenizer.pad_token = tokenizer.eos_token
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
58 |
]
|
59 |
-
text = tokenizer.apply_chat_template(
|
60 |
-
messages,
|
61 |
-
tokenize=False,
|
62 |
-
add_generation_prompt=True
|
63 |
-
)
|
64 |
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
outputs = model.generate(
|
68 |
input_ids=inputs["input_ids"].to(model.device),
|
69 |
attention_mask=inputs["attention_mask"].to(model.device),
|
70 |
max_new_tokens=512,
|
71 |
num_return_sequences=1,
|
72 |
-
do_sample=False,
|
73 |
eos_token_id=200020,
|
74 |
)
|
75 |
generated_ids = [
|
76 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs["input_ids"], outputs)
|
77 |
]
|
78 |
|
79 |
-
|
80 |
-
print(response)
|
81 |
-
"""
|
82 |
-
Prompt: 为什么企鹅没有被北极熊吃掉?
|
83 |
-
Generated: 在自然界中,**企鹅**和**北极熊**分别生活在地球的两端:**企鹅**主要生活在**南半球**的**南极洲**及其周围的海域,而** 北极熊**则生活在**北半球**的**北极地区**。这种地理上的分隔确保了**企鹅**和**北极熊**在自然界中**无法相遇**,因此**北极熊**无法**吃掉****企 鹅**。
|
84 |
|
85 |
-
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
1. **地理分布**:
|
88 |
-
- **企鹅**生活在**南半球**,特别是在**南极洲**及其周围的海域。**企鹅**的种类包括**帝企鹅**、**阿德利企鹅**等。
|
89 |
-
- **北极熊**生活在**北半球**,主要在**北极**地区,如**加拿大**、**阿拉斯加**、**格陵兰**和**俄罗斯**等。
|
90 |
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
--------------------------------------------------
|
94 |
Prompt: 树枝上有十只鸟,如果你射杀了一只,还剩下几只?请用中文回答
|
95 |
Generated: 让我一步步思考这个问题:
|
96 |
|
97 |
-
1.
|
98 |
-
2. 射杀1
|
99 |
-
|
100 |
-
|
101 |
-
3. 所以答案是:
|
102 |
-
* 0只鸟会留在树枝上
|
103 |
-
* 因为鸟会受惊飞走
|
104 |
-
|
105 |
-
所以答案是0只。
|
106 |
-
|
107 |
-
这个答案考虑到了自然界中动物对危险的本能反应, 当有同伴被射杀时, 其他鸟会立即飞走, 而不是继续停留在树枝上。
|
108 |
-
--------------------------------------------------
|
109 |
-
Prompt: How many r in strawberry.
|
110 |
-
Generated: Let me help you count the number of "r" in "strawberry" step by step.
|
111 |
-
|
112 |
-
1. First, let's break down the word "strawberry" into its letters:
|
113 |
-
s - t - r - a - w - b - e - r - r - y
|
114 |
-
|
115 |
-
2. Now, let's count the "r" letters:
|
116 |
-
- First "r" is at position 3
|
117 |
-
- Second "r" is at position 8
|
118 |
-
- Third "r" is at position 9
|
119 |
|
120 |
-
|
121 |
|
122 |
-
|
123 |
--------------------------------------------------
|
124 |
Prompt: How many r in strawberry.
|
125 |
-
Generated: Let me
|
126 |
-
|
127 |
-
1
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
--------------------------------------------------
|
140 |
Prompt: There is a girl who likes adventure,
|
141 |
-
Generated: and
|
142 |
-
--------------------------------------------------
|
143 |
--------------------------------------------------
|
144 |
Prompt: hello
|
145 |
Generated: Hello! How can I assist you today?
|
|
|
146 |
"""
|
147 |
~~~
|
148 |
|
@@ -150,17 +151,19 @@ Generated: Hello! How can I assist you today?
|
|
150 |
|
151 |
pip3 install git+https://github.com/intel/auto-round.git@bf16_inference
|
152 |
|
153 |
-
```
|
154 |
import torch
|
155 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
156 |
|
157 |
model_name = "MiniMaxAI/MiniMax-Text-01"
|
158 |
-
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
|
159 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
160 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.bfloat16
|
|
|
161 |
fp_layers = [f"model.layers.{i}.block_sparse_moe.gate" for i in range(config.num_hidden_layers)]
|
162 |
-
|
163 |
-
fp_layers
|
|
|
|
|
164 |
device_map = {}
|
165 |
for i in range(32):
|
166 |
key = fr"model\.layers\.\d+\.block_sparse_moe\.experts\.{str(i)}\..*$"
|
@@ -168,18 +171,15 @@ for i in range(32):
|
|
168 |
device_map[key] = 0
|
169 |
else:
|
170 |
device_map[key] = 1
|
171 |
-
|
172 |
-
for fp_layer in fp_layers:
|
173 |
-
layer_config[fp_layer] = {"bits": 16}
|
174 |
|
175 |
from auto_round import AutoRound
|
176 |
|
177 |
autoround = AutoRound(model=model, tokenizer=tokenizer, layer_config=layer_config, device_map=device_map,
|
178 |
-
|
179 |
-
gradient_accumulate_steps=4, seqlen=512,iters=50,lr=5e-3)
|
180 |
autoround.quantize()
|
181 |
autoround.save_quantized(format="auto_round", output_dir="tmp_autoround")
|
182 |
-
|
183 |
```
|
184 |
|
185 |
|
|
|
3 |
- NeelNanda/pile-10k
|
4 |
base_model:
|
5 |
- MiniMaxAI/MiniMax-Text-01
|
6 |
+
|
7 |
---
|
8 |
+
|
9 |
## Model Details
|
10 |
|
11 |
This model is an int4 model with group_size 128 and symmetric quantization of [MiniMaxAI/MiniMax-Text-01](https://huggingface.co/MiniMaxAI/MiniMax-Text-01) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm. This model is in AutoRound format, which is **NOT** supported by other serving frameworks, such as vLLM.
|
12 |
|
13 |
Please follow the [license](https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/LICENSE) of the original model.
|
14 |
|
|
|
15 |
|
16 |
+
|
17 |
+
## INT4 Inference on CUDA**(**4*80G**)
|
18 |
|
19 |
Requirements
|
20 |
|
21 |
```bash
|
22 |
pip3 install git+https://github.com/intel/auto-round.git@bf16_inference
|
|
|
23 |
pip3 install auto-gptq
|
24 |
```
|
25 |
|
|
|
30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
31 |
import torch
|
32 |
|
33 |
+
quantized_model_dir = "/data3/wenhuach/MiniMax-Text-01-int4-sym-w4g128"
|
34 |
|
35 |
+
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
36 |
+
model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16,
|
|
|
|
|
37 |
device_map="auto")
|
38 |
|
39 |
+
|
40 |
def forward_hook(module, input, output):
|
41 |
+
return torch.clamp(output, -65504, 65504).to(torch.bfloat16)
|
42 |
+
|
43 |
|
44 |
def register_fp16_pre_hooks(model):
|
45 |
for name, module in model.named_modules():
|
|
|
50 |
register_fp16_pre_hooks(model)
|
51 |
tokenizer.pad_token = tokenizer.eos_token
|
52 |
|
53 |
+
prompts = [
|
54 |
+
"为什么企鹅没有被北极熊吃掉?",
|
55 |
+
"树枝上有十只鸟,如果你射杀了一只,还剩下几只?请用中文回答",
|
56 |
+
"How many r in strawberry.",
|
57 |
+
"There is a girl who likes adventure,",
|
58 |
+
"hello"
|
59 |
]
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
texts = []
|
62 |
+
for prompt in prompts:
|
63 |
+
messages = [
|
64 |
+
{"role": "system", "content": [{"type": "text",
|
65 |
+
"text": "You are a helpful assistant created by MiniMax based on MiniMax-Text-01 model."}]},
|
66 |
+
{"role": "user", "content": [{"type": "text", "text": prompt}]},
|
67 |
+
]
|
68 |
+
text = tokenizer.apply_chat_template(
|
69 |
+
messages,
|
70 |
+
tokenize=False,
|
71 |
+
add_generation_prompt=True
|
72 |
+
)
|
73 |
+
texts.append(text)
|
74 |
+
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, padding_side='left')
|
75 |
|
76 |
outputs = model.generate(
|
77 |
input_ids=inputs["input_ids"].to(model.device),
|
78 |
attention_mask=inputs["attention_mask"].to(model.device),
|
79 |
max_new_tokens=512,
|
80 |
num_return_sequences=1,
|
81 |
+
do_sample=False,
|
82 |
eos_token_id=200020,
|
83 |
)
|
84 |
generated_ids = [
|
85 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs["input_ids"], outputs)
|
86 |
]
|
87 |
|
88 |
+
decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
for i, prompt in enumerate(prompts):
|
91 |
+
input_id = inputs
|
92 |
+
print(f"Prompt: {prompt}")
|
93 |
+
print(f"Generated: {decoded_outputs[i]}")
|
94 |
+
print("-" * 50)
|
95 |
|
|
|
|
|
|
|
96 |
|
97 |
+
"""
|
98 |
+
Prompt: 为什么企鹅没有被北极熊吃掉?
|
99 |
+
Generated: ### 1. **地理分布差异**
|
100 |
+
- **企鹅**:主要生活在**南半球**,例如**南极洲**。在南极洲,企鹅没有天敌,因为这里的环境非常恶劣,食物资源有限,动物数量也有限,企鹅是这里的顶级掠食者之一。
|
101 |
+
- **北极熊**:主要生活在**北半球**,例如**北极地区**。北极熊是北极地区的顶级掠食者之一,它们以海豹等动物为食。
|
102 |
+
- **结论**:由于**地理分布**的差异,**企鹅和北极熊**在自然界中**无法相遇**,因此**北极熊无法吃掉企鹅**。
|
103 |
+
|
104 |
+
### 2. **人为因素**
|
105 |
+
- **动物园或水族馆**:在**人为因素**的影响
|
106 |
--------------------------------------------------
|
107 |
Prompt: 树枝上有十只鸟,如果你射杀了一只,还剩下几只?请用中文回答
|
108 |
Generated: 让我一步步思考这个问题:
|
109 |
|
110 |
+
1. 首先,树枝上有10只鸟
|
111 |
+
2. 射杀1只后,还剩9只
|
112 |
+
3. 但实际上,当枪声响起,其他鸟会因惊吓而飞走
|
113 |
+
4. 所以,当射杀1只后,树上不会剩下任何鸟
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
+
因此,答案是:0只
|
116 |
|
117 |
+
因为鸟会因枪声而飞走,不会继续停留在树上。
|
118 |
--------------------------------------------------
|
119 |
Prompt: How many r in strawberry.
|
120 |
+
Generated: Let me solve this step by step.
|
121 |
+
|
122 |
+
1. First, let me count the r's in "strawberry" as I say it
|
123 |
+
* s (not r)
|
124 |
+
* t (not r)
|
125 |
+
* r (1st r)
|
126 |
+
* a (not r)
|
127 |
+
* w (not r)
|
128 |
+
* b (not r)
|
129 |
+
* b (not r)
|
130 |
+
* e (not r)
|
131 |
+
* r (2nd r)
|
132 |
+
* r (3rd r)
|
133 |
+
* y (not r)
|
134 |
+
|
135 |
+
2. Counting the r's: 3 r's
|
136 |
+
|
137 |
+
Therefore, there is 3 r in strawberry.
|
138 |
+
|
139 |
+
The answer is 3.
|
140 |
--------------------------------------------------
|
141 |
Prompt: There is a girl who likes adventure,
|
142 |
+
Generated: There is a girl who likes adventure, and her name is Emily. Emily has always been drawn to the thrill of the unknown, the excitement of stepping into uncharted territory. Here is a story about
|
|
|
143 |
--------------------------------------------------
|
144 |
Prompt: hello
|
145 |
Generated: Hello! How can I assist you today?
|
146 |
+
--------------------------------------------------
|
147 |
"""
|
148 |
~~~
|
149 |
|
|
|
151 |
|
152 |
pip3 install git+https://github.com/intel/auto-round.git@bf16_inference
|
153 |
|
154 |
+
```python
|
155 |
import torch
|
156 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
157 |
|
158 |
model_name = "MiniMaxAI/MiniMax-Text-01"
|
|
|
159 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
160 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.bfloat16)
|
161 |
+
|
162 |
fp_layers = [f"model.layers.{i}.block_sparse_moe.gate" for i in range(config.num_hidden_layers)]
|
163 |
+
layer_config = {}
|
164 |
+
for fp_layer in fp_layers:
|
165 |
+
layer_config[fp_layer] = {"bits": 16}
|
166 |
+
|
167 |
device_map = {}
|
168 |
for i in range(32):
|
169 |
key = fr"model\.layers\.\d+\.block_sparse_moe\.experts\.{str(i)}\..*$"
|
|
|
171 |
device_map[key] = 0
|
172 |
else:
|
173 |
device_map[key] = 1
|
174 |
+
|
|
|
|
|
175 |
|
176 |
from auto_round import AutoRound
|
177 |
|
178 |
autoround = AutoRound(model=model, tokenizer=tokenizer, layer_config=layer_config, device_map=device_map,
|
179 |
+
batch_size=1,gradient_accumulate_steps=4, seqlen=512)
|
|
|
180 |
autoround.quantize()
|
181 |
autoround.save_quantized(format="auto_round", output_dir="tmp_autoround")
|
182 |
+
|
183 |
```
|
184 |
|
185 |
|