apriasmoro commited on
Commit
f7e6ab1
·
verified ·
1 Parent(s): 7a7b621

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:971a2f59249d55a8b48186cd9295cb594f33af73df45f87bf14fbbcafe8fa76a
3
  size 349243752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77e076f7213cd541801b56e598732e42d7aaa49f322189b3825f935f5e1a9284
3
  size 349243752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c1c4f17c0acd80bfb8240e71fa6a14ad3d00c291599f75e0c43d07631c9ed35
3
  size 177909253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47e1d75dde1eaa357bbd53fa9529a45fd976f9322d969b161d305861e4d4d4f6
3
  size 177909253
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdd06785c427f9c2a97b75ee5b71e93815ab2666e5c88a6c7a1e96a2bf4caaf0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96e967965c983a20a302f5bd4e11508247b969959098eb0b66c2fdc8d23296fa
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:118821fc5b8206c24fc92c01c7602ae0213214e4c85db903243af594ef0e4c50
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7258fe24b7785a5fa76614aef57f913158962a01367bf1fe11174ca5bb4f2704
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.39800995024875624,
6
  "eval_steps": 500,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -708,6 +708,181 @@
708
  "learning_rate": 7.68782851756094e-06,
709
  "loss": 1.3148,
710
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  }
712
  ],
713
  "logging_steps": 4,
@@ -727,7 +902,7 @@
727
  "attributes": {}
728
  }
729
  },
730
- "total_flos": 2.99858436292608e+17,
731
  "train_batch_size": 24,
732
  "trial_name": null,
733
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4975124378109453,
6
  "eval_steps": 500,
7
+ "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
708
  "learning_rate": 7.68782851756094e-06,
709
  "loss": 1.3148,
710
  "step": 400
711
+ },
712
+ {
713
+ "epoch": 0.4019900497512438,
714
+ "grad_norm": 0.17020876705646515,
715
+ "learning_rate": 7.633781445683757e-06,
716
+ "loss": 1.4201,
717
+ "step": 404
718
+ },
719
+ {
720
+ "epoch": 0.4059701492537313,
721
+ "grad_norm": 0.17484496533870697,
722
+ "learning_rate": 7.578917304062244e-06,
723
+ "loss": 1.3405,
724
+ "step": 408
725
+ },
726
+ {
727
+ "epoch": 0.4099502487562189,
728
+ "grad_norm": 0.1632368564605713,
729
+ "learning_rate": 7.523250629339467e-06,
730
+ "loss": 1.4,
731
+ "step": 412
732
+ },
733
+ {
734
+ "epoch": 0.41393034825870645,
735
+ "grad_norm": 0.17625893652439117,
736
+ "learning_rate": 7.4667961707953255e-06,
737
+ "loss": 1.4348,
738
+ "step": 416
739
+ },
740
+ {
741
+ "epoch": 0.417910447761194,
742
+ "grad_norm": 0.15959715843200684,
743
+ "learning_rate": 7.409568886438621e-06,
744
+ "loss": 1.4332,
745
+ "step": 420
746
+ },
747
+ {
748
+ "epoch": 0.4218905472636816,
749
+ "grad_norm": 0.17473378777503967,
750
+ "learning_rate": 7.351583939043828e-06,
751
+ "loss": 1.4602,
752
+ "step": 424
753
+ },
754
+ {
755
+ "epoch": 0.42587064676616915,
756
+ "grad_norm": 0.18552260100841522,
757
+ "learning_rate": 7.292856692133618e-06,
758
+ "loss": 1.3272,
759
+ "step": 428
760
+ },
761
+ {
762
+ "epoch": 0.4298507462686567,
763
+ "grad_norm": 0.15143436193466187,
764
+ "learning_rate": 7.233402705908171e-06,
765
+ "loss": 1.2945,
766
+ "step": 432
767
+ },
768
+ {
769
+ "epoch": 0.4338308457711443,
770
+ "grad_norm": 0.17919780313968658,
771
+ "learning_rate": 7.173237733122405e-06,
772
+ "loss": 1.3907,
773
+ "step": 436
774
+ },
775
+ {
776
+ "epoch": 0.43781094527363185,
777
+ "grad_norm": 0.18790380656719208,
778
+ "learning_rate": 7.11237771491216e-06,
779
+ "loss": 1.3891,
780
+ "step": 440
781
+ },
782
+ {
783
+ "epoch": 0.4417910447761194,
784
+ "grad_norm": 0.17522069811820984,
785
+ "learning_rate": 7.050838776570487e-06,
786
+ "loss": 1.4066,
787
+ "step": 444
788
+ },
789
+ {
790
+ "epoch": 0.445771144278607,
791
+ "grad_norm": 0.13809643685817719,
792
+ "learning_rate": 6.9886372232751395e-06,
793
+ "loss": 1.3792,
794
+ "step": 448
795
+ },
796
+ {
797
+ "epoch": 0.44975124378109455,
798
+ "grad_norm": 0.16660109162330627,
799
+ "learning_rate": 6.925789535768393e-06,
800
+ "loss": 1.4093,
801
+ "step": 452
802
+ },
803
+ {
804
+ "epoch": 0.4537313432835821,
805
+ "grad_norm": 0.1820070743560791,
806
+ "learning_rate": 6.862312365990363e-06,
807
+ "loss": 1.38,
808
+ "step": 456
809
+ },
810
+ {
811
+ "epoch": 0.4577114427860697,
812
+ "grad_norm": 0.13574036955833435,
813
+ "learning_rate": 6.798222532666956e-06,
814
+ "loss": 1.3845,
815
+ "step": 460
816
+ },
817
+ {
818
+ "epoch": 0.4616915422885572,
819
+ "grad_norm": 0.18035098910331726,
820
+ "learning_rate": 6.73353701685362e-06,
821
+ "loss": 1.4684,
822
+ "step": 464
823
+ },
824
+ {
825
+ "epoch": 0.46567164179104475,
826
+ "grad_norm": 0.13899867236614227,
827
+ "learning_rate": 6.668272957436101e-06,
828
+ "loss": 1.4925,
829
+ "step": 468
830
+ },
831
+ {
832
+ "epoch": 0.4696517412935323,
833
+ "grad_norm": 0.16560381650924683,
834
+ "learning_rate": 6.602447646589379e-06,
835
+ "loss": 1.4543,
836
+ "step": 472
837
+ },
838
+ {
839
+ "epoch": 0.4736318407960199,
840
+ "grad_norm": 0.19061513245105743,
841
+ "learning_rate": 6.536078525195966e-06,
842
+ "loss": 1.4835,
843
+ "step": 476
844
+ },
845
+ {
846
+ "epoch": 0.47761194029850745,
847
+ "grad_norm": 0.20418591797351837,
848
+ "learning_rate": 6.46918317822484e-06,
849
+ "loss": 1.3522,
850
+ "step": 480
851
+ },
852
+ {
853
+ "epoch": 0.481592039800995,
854
+ "grad_norm": 0.14897631108760834,
855
+ "learning_rate": 6.401779330072171e-06,
856
+ "loss": 1.4015,
857
+ "step": 484
858
+ },
859
+ {
860
+ "epoch": 0.4855721393034826,
861
+ "grad_norm": 0.15127280354499817,
862
+ "learning_rate": 6.33388483986512e-06,
863
+ "loss": 1.3304,
864
+ "step": 488
865
+ },
866
+ {
867
+ "epoch": 0.48955223880597015,
868
+ "grad_norm": 0.16975510120391846,
869
+ "learning_rate": 6.265517696729937e-06,
870
+ "loss": 1.4004,
871
+ "step": 492
872
+ },
873
+ {
874
+ "epoch": 0.4935323383084577,
875
+ "grad_norm": 0.17138828337192535,
876
+ "learning_rate": 6.196696015025615e-06,
877
+ "loss": 1.3479,
878
+ "step": 496
879
+ },
880
+ {
881
+ "epoch": 0.4975124378109453,
882
+ "grad_norm": 0.16889625787734985,
883
+ "learning_rate": 6.1274380295443624e-06,
884
+ "loss": 1.345,
885
+ "step": 500
886
  }
887
  ],
888
  "logging_steps": 4,
 
902
  "attributes": {}
903
  }
904
  },
905
+ "total_flos": 3.748818411375821e+17,
906
  "train_batch_size": 24,
907
  "trial_name": null,
908
  "trial_params": null