iamnguyen commited on
Commit
cb5d659
1 Parent(s): b8184e4

Training in progress, step 10272, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be8f064d453a57ea7273970350761d964c9807f1c078006d860d03a3b99aecd4
3
  size 903834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99051c9584bc9cf7cf02f0a70766948ec8d3bb914e9129d5783113d3e96f9711
3
  size 903834408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12874bf81eb0524d19891bca2ddcc85a7634979c700dd291ce0217fc58255786
3
  size 1807824186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b58379566e5031d4e2d8c3c67b1e419d250cca642eb33faffa6a577c925d01b2
3
  size 1807824186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed797abbb6e575252328647d2519975ca5213c166e33893bf2b6cbc2d2b0579d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a0ce74b1f7310eca571ed8255d5a50d3cd32ef3818ed5a2757371e935be02c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:075aea1ae34d22fd6b751b1b0ff5783a336ded95b0d65058f0b4391e6f395a77
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfc0802b39b927f4d86b253bd10451131aa381a8fdc16ce3375396d233b5782b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6630141414141414,
5
  "eval_steps": 16,
6
- "global_step": 10256,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -77568,6 +77568,127 @@
77568
  "eval_samples_per_second": 11.414,
77569
  "eval_steps_per_second": 1.427,
77570
  "step": 10256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77571
  }
77572
  ],
77573
  "logging_steps": 1,
@@ -77587,7 +77708,7 @@
77587
  "attributes": {}
77588
  }
77589
  },
77590
- "total_flos": 1.9985510801866752e+17,
77591
  "train_batch_size": 8,
77592
  "trial_name": null,
77593
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6640484848484849,
5
  "eval_steps": 16,
6
+ "global_step": 10272,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
77568
  "eval_samples_per_second": 11.414,
77569
  "eval_steps_per_second": 1.427,
77570
  "step": 10256
77571
+ },
77572
+ {
77573
+ "epoch": 0.6630787878787879,
77574
+ "grad_norm": 0.041810378432273865,
77575
+ "learning_rate": 0.00017842589380483555,
77576
+ "loss": 0.0573,
77577
+ "step": 10257
77578
+ },
77579
+ {
77580
+ "epoch": 0.6631434343434344,
77581
+ "grad_norm": 0.04772426187992096,
77582
+ "learning_rate": 0.0001784216507127977,
77583
+ "loss": 0.0701,
77584
+ "step": 10258
77585
+ },
77586
+ {
77587
+ "epoch": 0.6632080808080808,
77588
+ "grad_norm": 0.057533323764801025,
77589
+ "learning_rate": 0.0001784174072540077,
77590
+ "loss": 0.0991,
77591
+ "step": 10259
77592
+ },
77593
+ {
77594
+ "epoch": 0.6632727272727272,
77595
+ "grad_norm": 0.08679311722517014,
77596
+ "learning_rate": 0.00017841316342848539,
77597
+ "loss": 0.0696,
77598
+ "step": 10260
77599
+ },
77600
+ {
77601
+ "epoch": 0.6633373737373738,
77602
+ "grad_norm": 0.05634527653455734,
77603
+ "learning_rate": 0.00017840891923625064,
77604
+ "loss": 0.0831,
77605
+ "step": 10261
77606
+ },
77607
+ {
77608
+ "epoch": 0.6634020202020202,
77609
+ "grad_norm": 0.055026594549417496,
77610
+ "learning_rate": 0.00017840467467732332,
77611
+ "loss": 0.0806,
77612
+ "step": 10262
77613
+ },
77614
+ {
77615
+ "epoch": 0.6634666666666666,
77616
+ "grad_norm": 0.04825066402554512,
77617
+ "learning_rate": 0.00017840042975172328,
77618
+ "loss": 0.073,
77619
+ "step": 10263
77620
+ },
77621
+ {
77622
+ "epoch": 0.6635313131313131,
77623
+ "grad_norm": 0.05237942561507225,
77624
+ "learning_rate": 0.00017839618445947029,
77625
+ "loss": 0.081,
77626
+ "step": 10264
77627
+ },
77628
+ {
77629
+ "epoch": 0.6635959595959596,
77630
+ "grad_norm": 0.0542687326669693,
77631
+ "learning_rate": 0.0001783919388005843,
77632
+ "loss": 0.078,
77633
+ "step": 10265
77634
+ },
77635
+ {
77636
+ "epoch": 0.6636606060606061,
77637
+ "grad_norm": 0.055040229111909866,
77638
+ "learning_rate": 0.0001783876927750851,
77639
+ "loss": 0.0889,
77640
+ "step": 10266
77641
+ },
77642
+ {
77643
+ "epoch": 0.6637252525252525,
77644
+ "grad_norm": 0.05319792032241821,
77645
+ "learning_rate": 0.0001783834463829926,
77646
+ "loss": 0.0881,
77647
+ "step": 10267
77648
+ },
77649
+ {
77650
+ "epoch": 0.663789898989899,
77651
+ "grad_norm": 0.040569208562374115,
77652
+ "learning_rate": 0.00017837919962432664,
77653
+ "loss": 0.0637,
77654
+ "step": 10268
77655
+ },
77656
+ {
77657
+ "epoch": 0.6638545454545455,
77658
+ "grad_norm": 0.0480937659740448,
77659
+ "learning_rate": 0.00017837495249910706,
77660
+ "loss": 0.0747,
77661
+ "step": 10269
77662
+ },
77663
+ {
77664
+ "epoch": 0.6639191919191919,
77665
+ "grad_norm": 0.05075068771839142,
77666
+ "learning_rate": 0.00017837070500735373,
77667
+ "loss": 0.0812,
77668
+ "step": 10270
77669
+ },
77670
+ {
77671
+ "epoch": 0.6639838383838383,
77672
+ "grad_norm": 0.053749457001686096,
77673
+ "learning_rate": 0.00017836645714908652,
77674
+ "loss": 0.0915,
77675
+ "step": 10271
77676
+ },
77677
+ {
77678
+ "epoch": 0.6640484848484849,
77679
+ "grad_norm": 0.055688828229904175,
77680
+ "learning_rate": 0.00017836220892432532,
77681
+ "loss": 0.0856,
77682
+ "step": 10272
77683
+ },
77684
+ {
77685
+ "epoch": 0.6640484848484849,
77686
+ "eval_bleu": 17.997460933013013,
77687
+ "eval_loss": 0.08846329897642136,
77688
+ "eval_runtime": 2.6835,
77689
+ "eval_samples_per_second": 11.925,
77690
+ "eval_steps_per_second": 1.491,
77691
+ "step": 10272
77692
  }
77693
  ],
77694
  "logging_steps": 1,
 
77708
  "attributes": {}
77709
  }
77710
  },
77711
+ "total_flos": 2.0016689445863424e+17,
77712
  "train_batch_size": 8,
77713
  "trial_name": null,
77714
  "trial_params": null