nm-research commited on
Commit
959e47c
·
verified ·
1 Parent(s): 24d919d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +41 -26
README.md CHANGED
@@ -93,8 +93,8 @@ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
93
 
94
  # Oneshot arguments
95
  DATASET_ID = "neuralmagic/calibration"
96
- DATASET_SPLIT = {"LLM": "train[:512]"}
97
- NUM_CALIBRATION_SAMPLES = 512
98
  MAX_SEQUENCE_LENGTH = 2048
99
 
100
  # Load dataset and preprocess.
@@ -135,12 +135,27 @@ def data_collator(batch):
135
  recipe = [
136
  GPTQModifier(
137
  targets="Linear",
138
- scheme="W4A16",
139
- ignore: ["re:.*lm_head.*", "re:.*embed_tokens.*", "re:vision_tower.*", "re:multi_modal_projector.*"],
140
- sequential_update: True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  )
142
  ]
143
 
 
144
  SAVE_DIR=f"{model_id.split('/')[1]}-quantized.w4a16"
145
 
146
  # Perform oneshot
@@ -192,45 +207,45 @@ lm_eval \
192
  <tr>
193
  <td rowspan="7"><b>OpenLLM V1</b></td>
194
  <td>ARC Challenge</td>
195
- <td>%</td>
196
- <td>%</td>
197
- <td>%</td>
198
  </tr>
199
  <tr>
200
  <td>GSM8K</td>
201
- <td>%</td>
202
- <td>%</td>
203
- <td>%</td>
204
  </tr>
205
  <tr>
206
  <td>Hellaswag</td>
207
- <td>%</td>
208
- <td>%</td>
209
- <td>%</td>
210
  </tr>
211
  <tr>
212
  <td>MMLU</td>
213
- <td>%</td>
214
- <td>%</td>
215
- <td>%</td>
216
  </tr>
217
  <tr>
218
  <td>Truthfulqa (mc2)</td>
219
- <td>%</td>
220
- <td>%</td>
221
- <td>%</td>
222
  </tr>
223
  <tr>
224
  <td>Winogrande</td>
225
- <td>%</td>
226
- <td>%</td>
227
- <td>%%</td>
228
  </tr>
229
  <tr>
230
  <td><b>Average Score</b></td>
231
- <td><b>%</b></td>
232
- <td><b>%</b></td>
233
- <td><b>%</b></td>
234
  </tr>
235
  </tbody>
236
  </table>
 
93
 
94
  # Oneshot arguments
95
  DATASET_ID = "neuralmagic/calibration"
96
+ DATASET_SPLIT = {"LLM": "train[:1024]"}
97
+ NUM_CALIBRATION_SAMPLES = 1024
98
  MAX_SEQUENCE_LENGTH = 2048
99
 
100
  # Load dataset and preprocess.
 
135
  recipe = [
136
  GPTQModifier(
137
  targets="Linear",
138
+ ignore=["re:.*lm_head.*", "re:.*embed_tokens.*", "re:vision_tower.*", "re:multi_modal_projector.*"],
139
+ sequential_update=True,
140
+ sequential_targets=["Gemma3DecoderLayer"],
141
+ dampening_frac=dampening_frac,
142
+ config_groups={
143
+ "group_0": {
144
+ "targets": ["Linear"],
145
+ "weights": {
146
+ "num_bits": 4,
147
+ "group_size": 128,
148
+ "type": "int",
149
+ "symmetric": False,
150
+ "strategy": "group",
151
+ "actorder": "weight",
152
+ },
153
+ },
154
+ },
155
  )
156
  ]
157
 
158
+
159
  SAVE_DIR=f"{model_id.split('/')[1]}-quantized.w4a16"
160
 
161
  # Perform oneshot
 
207
  <tr>
208
  <td rowspan="7"><b>OpenLLM V1</b></td>
209
  <td>ARC Challenge</td>
210
+ <td>36.86%</td>
211
+ <td>33.96%</td>
212
+ <td>92.13%</td>
213
  </tr>
214
  <tr>
215
  <td>GSM8K</td>
216
+ <td>25.17%</td>
217
+ <td>22.14%</td>
218
+ <td>87.95%</td>
219
  </tr>
220
  <tr>
221
  <td>Hellaswag</td>
222
+ <td>56.03%</td>
223
+ <td>53.62%</td>
224
+ <td>95.70%</td>
225
  </tr>
226
  <tr>
227
  <td>MMLU</td>
228
+ <td>39.99%</td>
229
+ <td>37.00%</td>
230
+ <td>92.52%</td>
231
  </tr>
232
  <tr>
233
  <td>Truthfulqa (mc2)</td>
234
+ <td>38.54%</td>
235
+ <td>39.94%</td>
236
+ <td>103.64%</td>
237
  </tr>
238
  <tr>
239
  <td>Winogrande</td>
240
+ <td>58.88%</td>
241
+ <td>57.54%</td>
242
+ <td>97.72%</td>
243
  </tr>
244
  <tr>
245
  <td><b>Average Score</b></td>
246
+ <td><b>42.58%</b></td>
247
+ <td><b>40.70%</b></td>
248
+ <td><b>95.59%</b></td>
249
  </tr>
250
  </tbody>
251
  </table>