Update README.md
Browse files
README.md
CHANGED
@@ -93,8 +93,8 @@ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
|
93 |
|
94 |
# Oneshot arguments
|
95 |
DATASET_ID = "neuralmagic/calibration"
|
96 |
-
DATASET_SPLIT = {"LLM": "train[:
|
97 |
-
NUM_CALIBRATION_SAMPLES =
|
98 |
MAX_SEQUENCE_LENGTH = 2048
|
99 |
|
100 |
# Load dataset and preprocess.
|
@@ -135,12 +135,27 @@ def data_collator(batch):
|
|
135 |
recipe = [
|
136 |
GPTQModifier(
|
137 |
targets="Linear",
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
)
|
142 |
]
|
143 |
|
|
|
144 |
SAVE_DIR=f"{model_id.split('/')[1]}-quantized.w4a16"
|
145 |
|
146 |
# Perform oneshot
|
@@ -192,45 +207,45 @@ lm_eval \
|
|
192 |
<tr>
|
193 |
<td rowspan="7"><b>OpenLLM V1</b></td>
|
194 |
<td>ARC Challenge</td>
|
195 |
-
<td
|
196 |
-
<td
|
197 |
-
<td
|
198 |
</tr>
|
199 |
<tr>
|
200 |
<td>GSM8K</td>
|
201 |
-
<td
|
202 |
-
<td
|
203 |
-
<td
|
204 |
</tr>
|
205 |
<tr>
|
206 |
<td>Hellaswag</td>
|
207 |
-
<td
|
208 |
-
<td
|
209 |
-
<td
|
210 |
</tr>
|
211 |
<tr>
|
212 |
<td>MMLU</td>
|
213 |
-
<td
|
214 |
-
<td
|
215 |
-
<td
|
216 |
</tr>
|
217 |
<tr>
|
218 |
<td>Truthfulqa (mc2)</td>
|
219 |
-
<td
|
220 |
-
<td
|
221 |
-
<td
|
222 |
</tr>
|
223 |
<tr>
|
224 |
<td>Winogrande</td>
|
225 |
-
<td
|
226 |
-
<td
|
227 |
-
<td
|
228 |
</tr>
|
229 |
<tr>
|
230 |
<td><b>Average Score</b></td>
|
231 |
-
<td><b
|
232 |
-
<td><b
|
233 |
-
<td><b
|
234 |
</tr>
|
235 |
</tbody>
|
236 |
</table>
|
|
|
93 |
|
94 |
# Oneshot arguments
|
95 |
DATASET_ID = "neuralmagic/calibration"
|
96 |
+
DATASET_SPLIT = {"LLM": "train[:1024]"}
|
97 |
+
NUM_CALIBRATION_SAMPLES = 1024
|
98 |
MAX_SEQUENCE_LENGTH = 2048
|
99 |
|
100 |
# Load dataset and preprocess.
|
|
|
135 |
recipe = [
|
136 |
GPTQModifier(
|
137 |
targets="Linear",
|
138 |
+
ignore=["re:.*lm_head.*", "re:.*embed_tokens.*", "re:vision_tower.*", "re:multi_modal_projector.*"],
|
139 |
+
sequential_update=True,
|
140 |
+
sequential_targets=["Gemma3DecoderLayer"],
|
141 |
+
dampening_frac=dampening_frac,
|
142 |
+
config_groups={
|
143 |
+
"group_0": {
|
144 |
+
"targets": ["Linear"],
|
145 |
+
"weights": {
|
146 |
+
"num_bits": 4,
|
147 |
+
"group_size": 128,
|
148 |
+
"type": "int",
|
149 |
+
"symmetric": False,
|
150 |
+
"strategy": "group",
|
151 |
+
"actorder": "weight",
|
152 |
+
},
|
153 |
+
},
|
154 |
+
},
|
155 |
)
|
156 |
]
|
157 |
|
158 |
+
|
159 |
SAVE_DIR=f"{model_id.split('/')[1]}-quantized.w4a16"
|
160 |
|
161 |
# Perform oneshot
|
|
|
207 |
<tr>
|
208 |
<td rowspan="7"><b>OpenLLM V1</b></td>
|
209 |
<td>ARC Challenge</td>
|
210 |
+
<td>36.86%</td>
|
211 |
+
<td>33.96%</td>
|
212 |
+
<td>92.13%</td>
|
213 |
</tr>
|
214 |
<tr>
|
215 |
<td>GSM8K</td>
|
216 |
+
<td>25.17%</td>
|
217 |
+
<td>22.14%</td>
|
218 |
+
<td>87.95%</td>
|
219 |
</tr>
|
220 |
<tr>
|
221 |
<td>Hellaswag</td>
|
222 |
+
<td>56.03%</td>
|
223 |
+
<td>53.62%</td>
|
224 |
+
<td>95.70%</td>
|
225 |
</tr>
|
226 |
<tr>
|
227 |
<td>MMLU</td>
|
228 |
+
<td>39.99%</td>
|
229 |
+
<td>37.00%</td>
|
230 |
+
<td>92.52%</td>
|
231 |
</tr>
|
232 |
<tr>
|
233 |
<td>Truthfulqa (mc2)</td>
|
234 |
+
<td>38.54%</td>
|
235 |
+
<td>39.94%</td>
|
236 |
+
<td>103.64%</td>
|
237 |
</tr>
|
238 |
<tr>
|
239 |
<td>Winogrande</td>
|
240 |
+
<td>58.88%</td>
|
241 |
+
<td>57.54%</td>
|
242 |
+
<td>97.72%</td>
|
243 |
</tr>
|
244 |
<tr>
|
245 |
<td><b>Average Score</b></td>
|
246 |
+
<td><b>42.58%</b></td>
|
247 |
+
<td><b>40.70%</b></td>
|
248 |
+
<td><b>95.59%</b></td>
|
249 |
</tr>
|
250 |
</tbody>
|
251 |
</table>
|