alicegoesdown commited on
Commit
36dc2bc
·
verified ·
1 Parent(s): 483a5b7

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7603902b76dfb153996cd58830ff2244b1c85eb06a0dbd52e2c83aa4db919240
3
  size 144748392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9010627970909f58b28a32a9e1357ab8654f5b0425229dfbd38b99ab4cfdad18
3
  size 144748392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a7049d10ee3c694bd2470145a49ede9bec92ae46b8a0cfd24b342730ad05e3c
3
  size 289690562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba0b229ad267c947c651ea9d2e1d896a6032cb0fd1c50c5267d958d12c3f73f
3
  size 289690562
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e8894b47240b5e938409bdde12ef850338a078ac443b39608ef1f61c6d50e9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f055733d5cde189e3e76b84ea53a9d0b33684eaf5a7ed4900b4ec7409362096
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:741e5a74b4b8a7e18ee94356eb03e28c998d3e32cf81a6bc98c8f2bfb8fb01d7
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10a0f55d24fa7f397f288a11330d31c57855416af9f1e0cdf88c93033ba0b0e8
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8962866067886353,
3
  "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.30592734225621415,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 11.228,
3624
  "eval_steps_per_second": 11.228,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 1.3399278300168192e+17,
3646
  "train_batch_size": 32,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8962866067886353,
3
  "best_model_checkpoint": "./output/checkpoint-4800",
4
+ "epoch": 0.3154875717017208,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 11.228,
3624
  "eval_steps_per_second": 11.228,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.3065646908859146,
3629
+ "grad_norm": 0.6696013808250427,
3630
+ "learning_rate": 4.631564202709354e-07,
3631
+ "loss": 0.8851,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.30720203951561503,
3636
+ "grad_norm": 0.6419965028762817,
3637
+ "learning_rate": 4.1573879615262185e-07,
3638
+ "loss": 0.8941,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.3078393881453155,
3643
+ "grad_norm": 0.6661477088928223,
3644
+ "learning_rate": 3.708732128449785e-07,
3645
+ "loss": 0.8848,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.3084767367750159,
3650
+ "grad_norm": 0.6392946839332581,
3651
+ "learning_rate": 3.2856151459641216e-07,
3652
+ "loss": 0.8847,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.30911408540471635,
3657
+ "grad_norm": 0.6320896744728088,
3658
+ "learning_rate": 2.888054406751106e-07,
3659
+ "loss": 0.8589,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.30975143403441685,
3664
+ "grad_norm": 0.6730213761329651,
3665
+ "learning_rate": 2.5160662529755823e-07,
3666
+ "loss": 0.901,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.3103887826641173,
3671
+ "grad_norm": 0.6767361164093018,
3672
+ "learning_rate": 2.169665975613605e-07,
3673
+ "loss": 0.9146,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.31102613129381773,
3678
+ "grad_norm": 0.6694763898849487,
3679
+ "learning_rate": 1.8488678138238456e-07,
3680
+ "loss": 0.8884,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.31166347992351817,
3685
+ "grad_norm": 0.6107333302497864,
3686
+ "learning_rate": 1.5536849543621584e-07,
3687
+ "loss": 0.9127,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.3123008285532186,
3692
+ "grad_norm": 0.6185479164123535,
3693
+ "learning_rate": 1.2841295310397905e-07,
3694
+ "loss": 0.9108,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.31293817718291905,
3699
+ "grad_norm": 0.6723112463951111,
3700
+ "learning_rate": 1.0402126242244764e-07,
3701
+ "loss": 0.8672,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.3135755258126195,
3706
+ "grad_norm": 0.6152687668800354,
3707
+ "learning_rate": 8.219442603847605e-08,
3708
+ "loss": 0.8967,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.31421287444231993,
3713
+ "grad_norm": 0.6387733817100525,
3714
+ "learning_rate": 6.293334116783817e-08,
3715
+ "loss": 0.9095,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.3148502230720204,
3720
+ "grad_norm": 0.6692641377449036,
3721
+ "learning_rate": 4.623879955827082e-08,
3722
+ "loss": 0.8861,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.3154875717017208,
3727
+ "grad_norm": 0.6291835904121399,
3728
+ "learning_rate": 3.211148745700665e-08,
3729
+ "loss": 0.8824,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.3154875717017208,
3734
+ "eval_loss": 0.8967909216880798,
3735
+ "eval_runtime": 43.4282,
3736
+ "eval_samples_per_second": 11.513,
3737
+ "eval_steps_per_second": 11.513,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 1.3818563054272512e+17,
3759
  "train_batch_size": 32,
3760
  "trial_name": null,
3761
  "trial_params": null