lesso02 commited on
Commit
497893d
·
verified ·
1 Parent(s): f2c16de

Training in progress, step 141, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3da08353f199053e68f42ba3883449e2dd6f0282330e508b01d0b777eb1467f4
3
  size 389074464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fde7902b6d7bff637d29597652a6b56ef95c5e1dd6da9c1dab080e2e6a85d377
3
  size 389074464
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0ae10a35807f6134231bcc430fa722565fde0652dbec87b967720fc35713b96
3
  size 198011252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ae170ed1a15355e349e50927ed30d7df573fedf3137707fcfd954426a88c0c
3
  size 198011252
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63add0586146b851f47d881f8a2c86d2e7bbd1031b34991ea727eda767e3ba6f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9faa80e895f2ee441f65233c3b9d99d52a69600429d38bbddd6eead4f9c541b
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8187a6c88d7933f4442806aa816104ffa2abe40157d1b5941b707067b0f91484
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef808943930759c29df7d6cb43c7ce42f18630028d7057fa5027d9913fbda00
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:428d17df5bbcb9c5bba8928ed07c3ac1e2473387421762fc2dd2a18dad7163cb
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:400862bf97811c06d931cbea4873b2c4f76e79cd2ca67b0903bcbae23651c690
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10e76a88b495596517a8c5e8dd0c9cf73e2fa8d302cb53089a3a0a19398d0705
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c9ee43196afab265d60dcd4c1c900179ab22f775dc75282af2f09c2068eaf05
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e07c2149652b1186dc91873797838ca755d72668f5e0d5315a9746f87efcc58
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2bc9793c1138e05078b265d5eaf89968ed1a8a6e30d8d2e6c8cd400956edb28
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea6c246d355698e0da22b783baa4cd562f3bf8472a265e83f06d63d516cc95c7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6e96260fd16c14c9f9f12f3b8f9c8c35af2268f7c6f573e7123e9610bf0097
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55aa132a1f6a464d42b97932e211287aa777c051c2c25ed9e36ac7ddda94bf95
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad3f1ce05ff8747c562da4063aca1d62d228d34ae20324fd91268b964a6f1fd5
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a7d871d8fc1dcb9a693c26cb0b989af2ab985ab337aecb1daa8032f9df0a10
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec81ebc5fec1f650ab3e9c5c8e2325a8fbc2b8c2c20649882b77b60f6d6de60
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90a8ac56c83b98807975f58ce50f3f82ea8b0179e01258b4bbfc026cbfdfe2db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b4b05240aaf058fc01b05a9ba4d8dfa7cad8002a61cd9b25680b47dc809e03c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9274308681488037,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.127659574468085,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,293 @@
731
  "eval_samples_per_second": 195.604,
732
  "eval_steps_per_second": 6.18,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +1041,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 4.586475853185024e+17,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9274308681488037,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.0,
5
  "eval_steps": 50,
6
+ "global_step": 141,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 195.604,
732
  "eval_steps_per_second": 6.18,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 2.148936170212766,
737
+ "grad_norm": 0.2774420976638794,
738
+ "learning_rate": 2.1719778394743813e-05,
739
+ "loss": 0.212,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 2.1702127659574466,
744
+ "grad_norm": 0.3814822733402252,
745
+ "learning_rate": 2.0726886931772476e-05,
746
+ "loss": 0.7554,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 2.1914893617021276,
751
+ "grad_norm": 0.423093318939209,
752
+ "learning_rate": 1.9751405254395587e-05,
753
+ "loss": 1.3008,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 2.2127659574468086,
758
+ "grad_norm": 0.38136741518974304,
759
+ "learning_rate": 1.879389435304766e-05,
760
+ "loss": 1.1388,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 2.2340425531914896,
765
+ "grad_norm": 0.39356493949890137,
766
+ "learning_rate": 1.7854904883337184e-05,
767
+ "loss": 0.9796,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 2.25531914893617,
772
+ "grad_norm": 0.47382405400276184,
773
+ "learning_rate": 1.693497684936963e-05,
774
+ "loss": 0.8731,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 2.276595744680851,
779
+ "grad_norm": 0.4495427906513214,
780
+ "learning_rate": 1.6034639293196224e-05,
781
+ "loss": 0.4196,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 2.297872340425532,
786
+ "grad_norm": 0.26799333095550537,
787
+ "learning_rate": 1.515440999056669e-05,
788
+ "loss": 0.338,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 2.3191489361702127,
793
+ "grad_norm": 0.4940812587738037,
794
+ "learning_rate": 1.429479515316127e-05,
795
+ "loss": 1.1992,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 2.3404255319148937,
800
+ "grad_norm": 0.4212472140789032,
801
+ "learning_rate": 1.3456289137473124e-05,
802
+ "loss": 1.153,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 2.3617021276595747,
807
+ "grad_norm": 0.4149324893951416,
808
+ "learning_rate": 1.263937416050847e-05,
809
+ "loss": 1.05,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 2.382978723404255,
814
+ "grad_norm": 0.4490218460559845,
815
+ "learning_rate": 1.1844520022468092e-05,
816
+ "loss": 0.9362,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.404255319148936,
821
+ "grad_norm": 0.5155778527259827,
822
+ "learning_rate": 1.1072183836569599e-05,
823
+ "loss": 0.615,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 2.425531914893617,
828
+ "grad_norm": 0.1996319442987442,
829
+ "learning_rate": 1.0322809766165916e-05,
830
+ "loss": 0.1577,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 2.4468085106382977,
835
+ "grad_norm": 0.5449104905128479,
836
+ "learning_rate": 9.596828769311028e-06,
837
+ "loss": 1.2252,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 2.4680851063829787,
842
+ "grad_norm": 0.4638878405094147,
843
+ "learning_rate": 8.894658350919999e-06,
844
+ "loss": 1.2128,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 2.4893617021276597,
849
+ "grad_norm": 0.4165002107620239,
850
+ "learning_rate": 8.216702322665849e-06,
851
+ "loss": 1.0802,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 2.5106382978723403,
856
+ "grad_norm": 0.42553117871284485,
857
+ "learning_rate": 7.563350570751137e-06,
858
+ "loss": 0.9043,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 2.5319148936170213,
863
+ "grad_norm": 0.5580489039421082,
864
+ "learning_rate": 6.934978831688112e-06,
865
+ "loss": 0.7406,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 2.5531914893617023,
870
+ "grad_norm": 0.14457367360591888,
871
+ "learning_rate": 6.331948476216073e-06,
872
+ "loss": 0.0878,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 2.574468085106383,
877
+ "grad_norm": 0.46198827028274536,
878
+ "learning_rate": 5.754606301480452e-06,
879
+ "loss": 1.0055,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 2.595744680851064,
884
+ "grad_norm": 0.4651603102684021,
885
+ "learning_rate": 5.2032843315930305e-06,
886
+ "loss": 1.2232,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 2.617021276595745,
891
+ "grad_norm": 0.41709625720977783,
892
+ "learning_rate": 4.678299626687903e-06,
893
+ "loss": 1.0644,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 2.6382978723404253,
898
+ "grad_norm": 0.42545390129089355,
899
+ "learning_rate": 4.179954100583199e-06,
900
+ "loss": 0.9916,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 2.6595744680851063,
905
+ "grad_norm": 0.5188893675804138,
906
+ "learning_rate": 3.708534347153212e-06,
907
+ "loss": 0.8641,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 2.6808510638297873,
912
+ "grad_norm": 0.30658212304115295,
913
+ "learning_rate": 3.26431147551097e-06,
914
+ "loss": 0.1864,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 2.702127659574468,
919
+ "grad_norm": 0.4011896252632141,
920
+ "learning_rate": 2.8475409540958616e-06,
921
+ "loss": 0.6371,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 2.723404255319149,
926
+ "grad_norm": 0.4772135615348816,
927
+ "learning_rate": 2.45846246375617e-06,
928
+ "loss": 1.2719,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 2.74468085106383,
933
+ "grad_norm": 0.44009700417518616,
934
+ "learning_rate": 2.097299759910797e-06,
935
+ "loss": 1.1568,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 2.7659574468085104,
940
+ "grad_norm": 0.42015552520751953,
941
+ "learning_rate": 1.7642605438696306e-06,
942
+ "loss": 1.0539,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 2.7872340425531914,
947
+ "grad_norm": 0.4982571303844452,
948
+ "learning_rate": 1.4595363433864484e-06,
949
+ "loss": 0.8517,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 2.8085106382978724,
954
+ "grad_norm": 0.4780231714248657,
955
+ "learning_rate": 1.1833024025130858e-06,
956
+ "loss": 0.3794,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 2.829787234042553,
961
+ "grad_norm": 0.3601129949092865,
962
+ "learning_rate": 9.357175808182305e-07,
963
+ "loss": 0.5229,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 2.851063829787234,
968
+ "grad_norm": 0.5078785419464111,
969
+ "learning_rate": 7.169242620287227e-07,
970
+ "loss": 1.2511,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 2.872340425531915,
975
+ "grad_norm": 0.42775991559028625,
976
+ "learning_rate": 5.270482721460563e-07,
977
+ "loss": 1.1546,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 2.8936170212765955,
982
+ "grad_norm": 0.43165627121925354,
983
+ "learning_rate": 3.6619880708494724e-07,
984
+ "loss": 1.0596,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 2.9148936170212765,
989
+ "grad_norm": 0.4631091356277466,
990
+ "learning_rate": 2.3446836987585295e-07,
991
+ "loss": 0.9146,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 2.9361702127659575,
996
+ "grad_norm": 0.5159528851509094,
997
+ "learning_rate": 1.319327174672832e-07,
998
+ "loss": 0.5763,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 2.9574468085106385,
1003
+ "grad_norm": 0.426923006772995,
1004
+ "learning_rate": 5.865081715870424e-08,
1005
+ "loss": 0.7757,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 2.978723404255319,
1010
+ "grad_norm": 0.4215574264526367,
1011
+ "learning_rate": 1.4664812689001438e-08,
1012
+ "loss": 1.14,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 3.0,
1017
+ "grad_norm": 0.5064499378204346,
1018
+ "learning_rate": 0.0,
1019
+ "loss": 0.8367,
1020
+ "step": 141
1021
  }
1022
  ],
1023
  "logging_steps": 1,
 
1041
  "should_evaluate": false,
1042
  "should_log": false,
1043
  "should_save": true,
1044
+ "should_training_stop": true
1045
  },
1046
  "attributes": {}
1047
  }
1048
  },
1049
+ "total_flos": 6.466930952990884e+17,
1050
  "train_batch_size": 8,
1051
  "trial_name": null,
1052
  "trial_params": null