adammandic87 commited on
Commit
4892336
·
verified ·
1 Parent(s): 0058ad3

Training in progress, step 57, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40978b6045d435860bdc653f2c1ad8723d28aa9333e29555d66768df70c763bb
3
  size 50899792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b838e37c6acc3226cb23a05e8da8e401052de7ec8d63320c4dea86d5b9791f8
3
  size 50899792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d6739e3b80f0c91dc11e94b1399835f20bb5b0a93df5bd3138f9f3a59d2d13
3
  size 26231300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ab86fafce39c2b8346177e57d4e4368a643eb8bc217de77945ce8f27c78395
3
  size 26231300
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d606d31d447120bc1b4de5890ffaff6e62d8521d8976078b55323f24cb5690d3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f489b33e4ce6b4592438133f0f8030528235eaffcbc7196d436c439dd611d61
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c4f0183aec085119f27cd46c60ab3f231930ae66c7ca01d0adff96b44d5e0e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be205ee2b4780d487b513e30ac9c545724d6f8bbbacf91c0e0793417bf18e789
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5049833887043189,
5
  "eval_steps": 19,
6
- "global_step": 38,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -297,6 +297,147 @@
297
  "eval_samples_per_second": 34.029,
298
  "eval_steps_per_second": 17.015,
299
  "step": 38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  }
301
  ],
302
  "logging_steps": 1,
@@ -316,7 +457,7 @@
316
  "attributes": {}
317
  }
318
  },
319
- "total_flos": 3157187360194560.0,
320
  "train_batch_size": 2,
321
  "trial_name": null,
322
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7574750830564784,
5
  "eval_steps": 19,
6
+ "global_step": 57,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
297
  "eval_samples_per_second": 34.029,
298
  "eval_steps_per_second": 17.015,
299
  "step": 38
300
+ },
301
+ {
302
+ "epoch": 0.5182724252491694,
303
+ "grad_norm": 0.3961353003978729,
304
+ "learning_rate": 0.00011892512443604102,
305
+ "loss": 0.6232,
306
+ "step": 39
307
+ },
308
+ {
309
+ "epoch": 0.53156146179402,
310
+ "grad_norm": 0.4052596986293793,
311
+ "learning_rate": 0.00011423148382732853,
312
+ "loss": 0.5982,
313
+ "step": 40
314
+ },
315
+ {
316
+ "epoch": 0.5448504983388704,
317
+ "grad_norm": 0.2967623472213745,
318
+ "learning_rate": 0.00010950560433041826,
319
+ "loss": 0.2795,
320
+ "step": 41
321
+ },
322
+ {
323
+ "epoch": 0.5581395348837209,
324
+ "grad_norm": 0.3998267352581024,
325
+ "learning_rate": 0.00010475819158237425,
326
+ "loss": 0.6561,
327
+ "step": 42
328
+ },
329
+ {
330
+ "epoch": 0.5714285714285714,
331
+ "grad_norm": 0.3726865351200104,
332
+ "learning_rate": 0.0001,
333
+ "loss": 0.4405,
334
+ "step": 43
335
+ },
336
+ {
337
+ "epoch": 0.584717607973422,
338
+ "grad_norm": 0.33510908484458923,
339
+ "learning_rate": 9.524180841762577e-05,
340
+ "loss": 0.5434,
341
+ "step": 44
342
+ },
343
+ {
344
+ "epoch": 0.5980066445182725,
345
+ "grad_norm": 0.49725764989852905,
346
+ "learning_rate": 9.049439566958175e-05,
347
+ "loss": 0.5958,
348
+ "step": 45
349
+ },
350
+ {
351
+ "epoch": 0.6112956810631229,
352
+ "grad_norm": 0.4520473778247833,
353
+ "learning_rate": 8.57685161726715e-05,
354
+ "loss": 0.5766,
355
+ "step": 46
356
+ },
357
+ {
358
+ "epoch": 0.6245847176079734,
359
+ "grad_norm": 0.4540534019470215,
360
+ "learning_rate": 8.107487556395901e-05,
361
+ "loss": 0.5456,
362
+ "step": 47
363
+ },
364
+ {
365
+ "epoch": 0.6378737541528239,
366
+ "grad_norm": 0.41648876667022705,
367
+ "learning_rate": 7.642410644905726e-05,
368
+ "loss": 0.5473,
369
+ "step": 48
370
+ },
371
+ {
372
+ "epoch": 0.6511627906976745,
373
+ "grad_norm": 0.4036474823951721,
374
+ "learning_rate": 7.182674431585704e-05,
375
+ "loss": 0.5227,
376
+ "step": 49
377
+ },
378
+ {
379
+ "epoch": 0.6644518272425249,
380
+ "grad_norm": 0.4946843385696411,
381
+ "learning_rate": 6.729320366825784e-05,
382
+ "loss": 0.5725,
383
+ "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.6777408637873754,
387
+ "grad_norm": 0.4965102970600128,
388
+ "learning_rate": 6.283375443396726e-05,
389
+ "loss": 0.5691,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.6910299003322259,
394
+ "grad_norm": 0.4677567183971405,
395
+ "learning_rate": 5.845849869981137e-05,
396
+ "loss": 0.5973,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.7043189368770764,
401
+ "grad_norm": 0.42778217792510986,
402
+ "learning_rate": 5.417734782725896e-05,
403
+ "loss": 0.5476,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.717607973421927,
408
+ "grad_norm": 0.6459518671035767,
409
+ "learning_rate": 5.000000000000002e-05,
410
+ "loss": 0.7396,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.7308970099667774,
415
+ "grad_norm": 0.4538530707359314,
416
+ "learning_rate": 4.593591825444028e-05,
417
+ "loss": 0.5768,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.7441860465116279,
422
+ "grad_norm": 0.4008188247680664,
423
+ "learning_rate": 4.19943090428802e-05,
424
+ "loss": 0.5298,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.7574750830564784,
429
+ "grad_norm": 0.3514866232872009,
430
+ "learning_rate": 3.8184101377939476e-05,
431
+ "loss": 0.3634,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.7574750830564784,
436
+ "eval_loss": 0.679520845413208,
437
+ "eval_runtime": 0.9423,
438
+ "eval_samples_per_second": 33.96,
439
+ "eval_steps_per_second": 16.98,
440
+ "step": 57
441
  }
442
  ],
443
  "logging_steps": 1,
 
457
  "attributes": {}
458
  }
459
  },
460
+ "total_flos": 4715279823667200.0,
461
  "train_batch_size": 2,
462
  "trial_name": null,
463
  "trial_params": null