PhoenixB commited on
Commit
53641e0
·
verified ·
1 Parent(s): 371c383

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88894e9b0bafcf88835cd631c0d09cb2387fde8e8e05a50e5038262c8b4f19f7
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd748236b4ef4e42b47b6ff97597d1978c0e9acbb1dd052e1d0533f43480a821
3
  size 167832240
last-checkpoint/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a7b0dc1e79fe23b2921d631590bc7563486342c9c074e95da180c914c96f220
3
  size 335945362
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7434f9551b74d332180f0173a27eeb45c296284bf42c192abf06341c04101219
3
  size 335945362
last-checkpoint/pytorch_model_fsdp.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fad6cb68d83c9131414ad19bbf5a21e6e10131ba2e63d5bf44bec020d78411a
3
  size 167939550
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d512715415b0bdfe254311c5cbd8beef790a280954b3415d76b8e480262dca5
3
  size 167939550
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c188a6a4749e6ca627bb6d536eb7443f499d5b1b88d98a78f9c713443e010d9c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:015707cb16790250630febca682498cb5d3456d5a13443b953687f19dc7d59ed
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0015226494099733537,
5
  "eval_steps": 500,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,156 @@
157
  "rewards/margins": 0.6743541359901428,
158
  "rewards/rejected": -0.9285954833030701,
159
  "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0030452988199467074,
5
  "eval_steps": 500,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "rewards/margins": 0.6743541359901428,
158
  "rewards/rejected": -0.9285954833030701,
159
  "step": 10
160
+ },
161
+ {
162
+ "epoch": 0.001674914350970689,
163
+ "grad_norm": 12.694933891296387,
164
+ "learning_rate": 0.00017289686274214118,
165
+ "logits/chosen": 0.028933856636285782,
166
+ "logits/rejected": 0.00032558292150497437,
167
+ "logps/chosen": -156.97647094726562,
168
+ "logps/rejected": -182.00267028808594,
169
+ "loss": 0.6836,
170
+ "rewards/accuracies": 0.375,
171
+ "rewards/chosen": -0.747052788734436,
172
+ "rewards/margins": 0.1833851933479309,
173
+ "rewards/rejected": -0.9304379820823669,
174
+ "step": 11
175
+ },
176
+ {
177
+ "epoch": 0.0018271792919680244,
178
+ "grad_norm": 14.31529712677002,
179
+ "learning_rate": 0.000163742398974869,
180
+ "logits/chosen": 0.08384992182254791,
181
+ "logits/rejected": 0.1143774539232254,
182
+ "logps/chosen": -187.43304443359375,
183
+ "logps/rejected": -182.16583251953125,
184
+ "loss": 0.7032,
185
+ "rewards/accuracies": 0.625,
186
+ "rewards/chosen": -0.8268721103668213,
187
+ "rewards/margins": 0.055836014449596405,
188
+ "rewards/rejected": -0.8827080726623535,
189
+ "step": 12
190
+ },
191
+ {
192
+ "epoch": 0.0019794442329653596,
193
+ "grad_norm": 10.662793159484863,
194
+ "learning_rate": 0.00015358267949789966,
195
+ "logits/chosen": 0.12315154075622559,
196
+ "logits/rejected": 0.15528245270252228,
197
+ "logps/chosen": -157.62741088867188,
198
+ "logps/rejected": -142.5953826904297,
199
+ "loss": 0.6688,
200
+ "rewards/accuracies": 0.75,
201
+ "rewards/chosen": -0.5131796002388,
202
+ "rewards/margins": 0.14580143988132477,
203
+ "rewards/rejected": -0.6589810848236084,
204
+ "step": 13
205
+ },
206
+ {
207
+ "epoch": 0.0021317091739626952,
208
+ "grad_norm": 13.018126487731934,
209
+ "learning_rate": 0.00014257792915650728,
210
+ "logits/chosen": 0.1431768834590912,
211
+ "logits/rejected": 0.03350931778550148,
212
+ "logps/chosen": -146.06101989746094,
213
+ "logps/rejected": -152.32742309570312,
214
+ "loss": 0.6922,
215
+ "rewards/accuracies": 0.375,
216
+ "rewards/chosen": -0.8030464053153992,
217
+ "rewards/margins": -0.06302566081285477,
218
+ "rewards/rejected": -0.740020751953125,
219
+ "step": 14
220
+ },
221
+ {
222
+ "epoch": 0.0022839741149600305,
223
+ "grad_norm": 12.851974487304688,
224
+ "learning_rate": 0.00013090169943749476,
225
+ "logits/chosen": 0.10366199910640717,
226
+ "logits/rejected": 0.04498244822025299,
227
+ "logps/chosen": -153.73416137695312,
228
+ "logps/rejected": -168.11575317382812,
229
+ "loss": 0.721,
230
+ "rewards/accuracies": 0.5,
231
+ "rewards/chosen": -0.8541444540023804,
232
+ "rewards/margins": 0.09350776672363281,
233
+ "rewards/rejected": -0.947652280330658,
234
+ "step": 15
235
+ },
236
+ {
237
+ "epoch": 0.0024362390559573657,
238
+ "grad_norm": 10.859753608703613,
239
+ "learning_rate": 0.00011873813145857249,
240
+ "logits/chosen": 0.018513256683945656,
241
+ "logits/rejected": 0.020192591473460197,
242
+ "logps/chosen": -149.4884033203125,
243
+ "logps/rejected": -158.08319091796875,
244
+ "loss": 0.646,
245
+ "rewards/accuracies": 0.75,
246
+ "rewards/chosen": -0.997456967830658,
247
+ "rewards/margins": 0.2520313262939453,
248
+ "rewards/rejected": -1.249488353729248,
249
+ "step": 16
250
+ },
251
+ {
252
+ "epoch": 0.0025885039969547013,
253
+ "grad_norm": 17.59671974182129,
254
+ "learning_rate": 0.00010627905195293135,
255
+ "logits/chosen": -0.13020677864551544,
256
+ "logits/rejected": -0.03429074585437775,
257
+ "logps/chosen": -142.9901123046875,
258
+ "logps/rejected": -151.025634765625,
259
+ "loss": 0.7414,
260
+ "rewards/accuracies": 0.5,
261
+ "rewards/chosen": -0.9945340156555176,
262
+ "rewards/margins": 0.11265383660793304,
263
+ "rewards/rejected": -1.1071878671646118,
264
+ "step": 17
265
+ },
266
+ {
267
+ "epoch": 0.0027407689379520365,
268
+ "grad_norm": 12.00832462310791,
269
+ "learning_rate": 9.372094804706867e-05,
270
+ "logits/chosen": 0.015887008979916573,
271
+ "logits/rejected": 0.015174375846982002,
272
+ "logps/chosen": -148.59466552734375,
273
+ "logps/rejected": -164.22711181640625,
274
+ "loss": 0.5364,
275
+ "rewards/accuracies": 0.75,
276
+ "rewards/chosen": -1.0675632953643799,
277
+ "rewards/margins": 0.4369748830795288,
278
+ "rewards/rejected": -1.5045380592346191,
279
+ "step": 18
280
+ },
281
+ {
282
+ "epoch": 0.0028930338789493718,
283
+ "grad_norm": 14.49167251586914,
284
+ "learning_rate": 8.126186854142752e-05,
285
+ "logits/chosen": 0.026431191712617874,
286
+ "logits/rejected": -0.0005289912223815918,
287
+ "logps/chosen": -171.24415588378906,
288
+ "logps/rejected": -193.31686401367188,
289
+ "loss": 0.627,
290
+ "rewards/accuracies": 0.75,
291
+ "rewards/chosen": -1.253060221672058,
292
+ "rewards/margins": 0.13999736309051514,
293
+ "rewards/rejected": -1.3930575847625732,
294
+ "step": 19
295
+ },
296
+ {
297
+ "epoch": 0.0030452988199467074,
298
+ "grad_norm": 16.078474044799805,
299
+ "learning_rate": 6.909830056250527e-05,
300
+ "logits/chosen": 0.06809265166521072,
301
+ "logits/rejected": 0.034393489360809326,
302
+ "logps/chosen": -155.27926635742188,
303
+ "logps/rejected": -175.18618774414062,
304
+ "loss": 0.726,
305
+ "rewards/accuracies": 0.625,
306
+ "rewards/chosen": -1.043820858001709,
307
+ "rewards/margins": 0.27667587995529175,
308
+ "rewards/rejected": -1.320496678352356,
309
+ "step": 20
310
  }
311
  ],
312
  "logging_steps": 1,