Training in progress, step 20, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd748236b4ef4e42b47b6ff97597d1978c0e9acbb1dd052e1d0533f43480a821
|
3 |
size 167832240
|
last-checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 335945362
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7434f9551b74d332180f0173a27eeb45c296284bf42c192abf06341c04101219
|
3 |
size 335945362
|
last-checkpoint/pytorch_model_fsdp.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167939550
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d512715415b0bdfe254311c5cbd8beef790a280954b3415d76b8e480262dca5
|
3 |
size 167939550
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:015707cb16790250630febca682498cb5d3456d5a13443b953687f19dc7d59ed
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -157,6 +157,156 @@
|
|
157 |
"rewards/margins": 0.6743541359901428,
|
158 |
"rewards/rejected": -0.9285954833030701,
|
159 |
"step": 10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
}
|
161 |
],
|
162 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0030452988199467074,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
157 |
"rewards/margins": 0.6743541359901428,
|
158 |
"rewards/rejected": -0.9285954833030701,
|
159 |
"step": 10
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 0.001674914350970689,
|
163 |
+
"grad_norm": 12.694933891296387,
|
164 |
+
"learning_rate": 0.00017289686274214118,
|
165 |
+
"logits/chosen": 0.028933856636285782,
|
166 |
+
"logits/rejected": 0.00032558292150497437,
|
167 |
+
"logps/chosen": -156.97647094726562,
|
168 |
+
"logps/rejected": -182.00267028808594,
|
169 |
+
"loss": 0.6836,
|
170 |
+
"rewards/accuracies": 0.375,
|
171 |
+
"rewards/chosen": -0.747052788734436,
|
172 |
+
"rewards/margins": 0.1833851933479309,
|
173 |
+
"rewards/rejected": -0.9304379820823669,
|
174 |
+
"step": 11
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 0.0018271792919680244,
|
178 |
+
"grad_norm": 14.31529712677002,
|
179 |
+
"learning_rate": 0.000163742398974869,
|
180 |
+
"logits/chosen": 0.08384992182254791,
|
181 |
+
"logits/rejected": 0.1143774539232254,
|
182 |
+
"logps/chosen": -187.43304443359375,
|
183 |
+
"logps/rejected": -182.16583251953125,
|
184 |
+
"loss": 0.7032,
|
185 |
+
"rewards/accuracies": 0.625,
|
186 |
+
"rewards/chosen": -0.8268721103668213,
|
187 |
+
"rewards/margins": 0.055836014449596405,
|
188 |
+
"rewards/rejected": -0.8827080726623535,
|
189 |
+
"step": 12
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 0.0019794442329653596,
|
193 |
+
"grad_norm": 10.662793159484863,
|
194 |
+
"learning_rate": 0.00015358267949789966,
|
195 |
+
"logits/chosen": 0.12315154075622559,
|
196 |
+
"logits/rejected": 0.15528245270252228,
|
197 |
+
"logps/chosen": -157.62741088867188,
|
198 |
+
"logps/rejected": -142.5953826904297,
|
199 |
+
"loss": 0.6688,
|
200 |
+
"rewards/accuracies": 0.75,
|
201 |
+
"rewards/chosen": -0.5131796002388,
|
202 |
+
"rewards/margins": 0.14580143988132477,
|
203 |
+
"rewards/rejected": -0.6589810848236084,
|
204 |
+
"step": 13
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 0.0021317091739626952,
|
208 |
+
"grad_norm": 13.018126487731934,
|
209 |
+
"learning_rate": 0.00014257792915650728,
|
210 |
+
"logits/chosen": 0.1431768834590912,
|
211 |
+
"logits/rejected": 0.03350931778550148,
|
212 |
+
"logps/chosen": -146.06101989746094,
|
213 |
+
"logps/rejected": -152.32742309570312,
|
214 |
+
"loss": 0.6922,
|
215 |
+
"rewards/accuracies": 0.375,
|
216 |
+
"rewards/chosen": -0.8030464053153992,
|
217 |
+
"rewards/margins": -0.06302566081285477,
|
218 |
+
"rewards/rejected": -0.740020751953125,
|
219 |
+
"step": 14
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.0022839741149600305,
|
223 |
+
"grad_norm": 12.851974487304688,
|
224 |
+
"learning_rate": 0.00013090169943749476,
|
225 |
+
"logits/chosen": 0.10366199910640717,
|
226 |
+
"logits/rejected": 0.04498244822025299,
|
227 |
+
"logps/chosen": -153.73416137695312,
|
228 |
+
"logps/rejected": -168.11575317382812,
|
229 |
+
"loss": 0.721,
|
230 |
+
"rewards/accuracies": 0.5,
|
231 |
+
"rewards/chosen": -0.8541444540023804,
|
232 |
+
"rewards/margins": 0.09350776672363281,
|
233 |
+
"rewards/rejected": -0.947652280330658,
|
234 |
+
"step": 15
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 0.0024362390559573657,
|
238 |
+
"grad_norm": 10.859753608703613,
|
239 |
+
"learning_rate": 0.00011873813145857249,
|
240 |
+
"logits/chosen": 0.018513256683945656,
|
241 |
+
"logits/rejected": 0.020192591473460197,
|
242 |
+
"logps/chosen": -149.4884033203125,
|
243 |
+
"logps/rejected": -158.08319091796875,
|
244 |
+
"loss": 0.646,
|
245 |
+
"rewards/accuracies": 0.75,
|
246 |
+
"rewards/chosen": -0.997456967830658,
|
247 |
+
"rewards/margins": 0.2520313262939453,
|
248 |
+
"rewards/rejected": -1.249488353729248,
|
249 |
+
"step": 16
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 0.0025885039969547013,
|
253 |
+
"grad_norm": 17.59671974182129,
|
254 |
+
"learning_rate": 0.00010627905195293135,
|
255 |
+
"logits/chosen": -0.13020677864551544,
|
256 |
+
"logits/rejected": -0.03429074585437775,
|
257 |
+
"logps/chosen": -142.9901123046875,
|
258 |
+
"logps/rejected": -151.025634765625,
|
259 |
+
"loss": 0.7414,
|
260 |
+
"rewards/accuracies": 0.5,
|
261 |
+
"rewards/chosen": -0.9945340156555176,
|
262 |
+
"rewards/margins": 0.11265383660793304,
|
263 |
+
"rewards/rejected": -1.1071878671646118,
|
264 |
+
"step": 17
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 0.0027407689379520365,
|
268 |
+
"grad_norm": 12.00832462310791,
|
269 |
+
"learning_rate": 9.372094804706867e-05,
|
270 |
+
"logits/chosen": 0.015887008979916573,
|
271 |
+
"logits/rejected": 0.015174375846982002,
|
272 |
+
"logps/chosen": -148.59466552734375,
|
273 |
+
"logps/rejected": -164.22711181640625,
|
274 |
+
"loss": 0.5364,
|
275 |
+
"rewards/accuracies": 0.75,
|
276 |
+
"rewards/chosen": -1.0675632953643799,
|
277 |
+
"rewards/margins": 0.4369748830795288,
|
278 |
+
"rewards/rejected": -1.5045380592346191,
|
279 |
+
"step": 18
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 0.0028930338789493718,
|
283 |
+
"grad_norm": 14.49167251586914,
|
284 |
+
"learning_rate": 8.126186854142752e-05,
|
285 |
+
"logits/chosen": 0.026431191712617874,
|
286 |
+
"logits/rejected": -0.0005289912223815918,
|
287 |
+
"logps/chosen": -171.24415588378906,
|
288 |
+
"logps/rejected": -193.31686401367188,
|
289 |
+
"loss": 0.627,
|
290 |
+
"rewards/accuracies": 0.75,
|
291 |
+
"rewards/chosen": -1.253060221672058,
|
292 |
+
"rewards/margins": 0.13999736309051514,
|
293 |
+
"rewards/rejected": -1.3930575847625732,
|
294 |
+
"step": 19
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 0.0030452988199467074,
|
298 |
+
"grad_norm": 16.078474044799805,
|
299 |
+
"learning_rate": 6.909830056250527e-05,
|
300 |
+
"logits/chosen": 0.06809265166521072,
|
301 |
+
"logits/rejected": 0.034393489360809326,
|
302 |
+
"logps/chosen": -155.27926635742188,
|
303 |
+
"logps/rejected": -175.18618774414062,
|
304 |
+
"loss": 0.726,
|
305 |
+
"rewards/accuracies": 0.625,
|
306 |
+
"rewards/chosen": -1.043820858001709,
|
307 |
+
"rewards/margins": 0.27667587995529175,
|
308 |
+
"rewards/rejected": -1.320496678352356,
|
309 |
+
"step": 20
|
310 |
}
|
311 |
],
|
312 |
"logging_steps": 1,
|