lesso02 commited on
Commit
fbf920c
·
verified ·
1 Parent(s): 71d35aa

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1e9350840587eb203432716b84bf38f0892199b78864473311d41ad9e304e04
3
  size 140815952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824f09925e06dade0980b7b6f2673ad9313a1860d3955d0f6ec2c24b8d19dac8
3
  size 140815952
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f8fef6ccb06ff3c24bb7a0686a8734b67b5f93291fcb3385ab55b27e3c096e4
3
  size 71878612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0385716b1e421bc04c934b8f7557055a72735142692c3a1dba5e7471da2b24ea
3
  size 71878612
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a83b8c4488deb8cd397da2096d8a0daa40c8ecb31d14ce690fee69bab3faeb3f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254216cd1bfa077aac79ddf3aa991dd86be3d924b5693193fb57c269a3229fc5
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:438c47e09e1ad6945f15507fd53bac021b1f1a78602b0a94b2e541835c3eba00
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e93359c59ca311edbe89f16d5f0231bbab585603f1b0be35494b54b91a1828
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89916923dbb9b79cee22798a2a03c6f663b43ba506ccad761f7801136c1713a7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4653009b10dd48c6a46766f75f0f4c3beaaf299dba0329a995450677bc18ca49
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91d44a2d9c0d9e7d0fb61828c4526a2b0b47d002e00fca9f553c830082e53225
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5415a05b9817c8d92f7f2ef1613eb04335e657362fb7e72f840691f4ab31dc13
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48dfea6e4df90d66d1d45eb69f4399fe83524036c5ef42f89902d236419b6483
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfa835537978680c561926622f8191526b11e7115b2608526fb7b423c2761adb
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efe5762349e9b46e27b422d2cc9cd7a3d3f39fa7cd45dc918677cc202d1e0c26
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d99df497585478105a47995bca143a42ccb608ae08ea70b6fa51f3ac15d28c3
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b5bcced87ebe766f4445d1c89030bd9f26b7866183b87f288b52eabfbf6b1b6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975a341ff81520c4a9e0bf9eb1d42a63cc63f204e2529f8679b8c5258e9f3765
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73eace61c3943c39badd29a3257047b16266bd145243edbcfb32202a868ef070
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:168986a5fc9ca577afbaa24ced94bae1c6aeb7a2e5577615a8f9fb71b2478a65
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49be4161dc02d935809d37b98d9b10201fb1865fd2dcf2549c1f46447b26bb3a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a1d4966bbb3ac4136a17b84f52768cf52a77159d3c183b2a8af35a019ee542b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.160346269607544,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.03457216940363008,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,364 @@
731
  "eval_samples_per_second": 302.956,
732
  "eval_steps_per_second": 9.468,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -759,7 +1117,7 @@
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 6.343151930939802e+16,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.138585329055786,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.05185825410544512,
5
  "eval_steps": 50,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 302.956,
732
  "eval_steps_per_second": 9.468,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 0.03491789109766638,
737
+ "grad_norm": 0.2299177050590515,
738
+ "learning_rate": 5.4370619800610666e-05,
739
+ "loss": 1.318,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 0.03526361279170268,
744
+ "grad_norm": 0.3703376352787018,
745
+ "learning_rate": 5.352877148833434e-05,
746
+ "loss": 1.7293,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 0.03560933448573898,
751
+ "grad_norm": 0.5616604685783386,
752
+ "learning_rate": 5.2686231835182714e-05,
753
+ "loss": 2.084,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 0.035955056179775284,
758
+ "grad_norm": 0.6388251781463623,
759
+ "learning_rate": 5.184323118308429e-05,
760
+ "loss": 2.2635,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.036300777873811585,
765
+ "grad_norm": 0.859301745891571,
766
+ "learning_rate": 5.1e-05,
767
+ "loss": 2.3282,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.036646499567847886,
772
+ "grad_norm": 2.1892261505126953,
773
+ "learning_rate": 5.0156768816915715e-05,
774
+ "loss": 2.7334,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 0.03699222126188419,
779
+ "grad_norm": 0.24318625032901764,
780
+ "learning_rate": 4.931376816481728e-05,
781
+ "loss": 1.3745,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 0.03733794295592048,
786
+ "grad_norm": 0.3018660247325897,
787
+ "learning_rate": 4.8471228511665655e-05,
788
+ "loss": 1.4975,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 0.03768366464995678,
793
+ "grad_norm": 0.42744195461273193,
794
+ "learning_rate": 4.7629380199389334e-05,
795
+ "loss": 2.0573,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 0.03802938634399308,
800
+ "grad_norm": 0.5142998099327087,
801
+ "learning_rate": 4.6788453380911054e-05,
802
+ "loss": 2.2087,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 0.03837510803802938,
807
+ "grad_norm": 0.7566731572151184,
808
+ "learning_rate": 4.5948677957226404e-05,
809
+ "loss": 2.3133,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 0.038720829732065684,
814
+ "grad_norm": 1.5625238418579102,
815
+ "learning_rate": 4.511028351455111e-05,
816
+ "loss": 2.5176,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 0.039066551426101985,
821
+ "grad_norm": 0.27059802412986755,
822
+ "learning_rate": 4.427349926155448e-05,
823
+ "loss": 1.447,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 0.039412273120138286,
828
+ "grad_norm": 0.2744463086128235,
829
+ "learning_rate": 4.3438553966696056e-05,
830
+ "loss": 1.4258,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 0.03975799481417459,
835
+ "grad_norm": 0.373234361410141,
836
+ "learning_rate": 4.260567589568256e-05,
837
+ "loss": 1.9323,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 0.04010371650821089,
842
+ "grad_norm": 0.4722805917263031,
843
+ "learning_rate": 4.177509274906241e-05,
844
+ "loss": 2.1376,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 0.04044943820224719,
849
+ "grad_norm": 0.6444010734558105,
850
+ "learning_rate": 4.094703159997451e-05,
851
+ "loss": 2.2428,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 0.04079515989628349,
856
+ "grad_norm": 1.2310961484909058,
857
+ "learning_rate": 4.012171883206892e-05,
858
+ "loss": 2.5802,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 0.04114088159031979,
863
+ "grad_norm": 0.37185004353523254,
864
+ "learning_rate": 3.929938007761567e-05,
865
+ "loss": 1.5409,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 0.04148660328435609,
870
+ "grad_norm": 0.2401726394891739,
871
+ "learning_rate": 3.8480240155819247e-05,
872
+ "loss": 1.3984,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 0.04183232497839239,
877
+ "grad_norm": 0.3750150501728058,
878
+ "learning_rate": 3.766452301135519e-05,
879
+ "loss": 1.8374,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 0.04217804667242869,
884
+ "grad_norm": 0.4811207354068756,
885
+ "learning_rate": 3.685245165314594e-05,
886
+ "loss": 2.0736,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 0.042523768366464994,
891
+ "grad_norm": 0.5793203115463257,
892
+ "learning_rate": 3.60442480933923e-05,
893
+ "loss": 2.2211,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 0.042869490060501295,
898
+ "grad_norm": 0.9860462546348572,
899
+ "learning_rate": 3.5240133286877684e-05,
900
+ "loss": 2.3716,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 0.043215211754537596,
905
+ "grad_norm": 4.702469825744629,
906
+ "learning_rate": 3.444032707056115e-05,
907
+ "loss": 3.0878,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 0.043560933448573896,
912
+ "grad_norm": 0.2194783091545105,
913
+ "learning_rate": 3.36450481034762e-05,
914
+ "loss": 1.426,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 0.0439066551426102,
919
+ "grad_norm": 0.3231513202190399,
920
+ "learning_rate": 3.285451380695165e-05,
921
+ "loss": 1.683,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 0.0442523768366465,
926
+ "grad_norm": 0.4826495051383972,
927
+ "learning_rate": 3.2068940305170734e-05,
928
+ "loss": 2.0538,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 0.0445980985306828,
933
+ "grad_norm": 0.6153631806373596,
934
+ "learning_rate": 3.128854236608495e-05,
935
+ "loss": 2.2823,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 0.0449438202247191,
940
+ "grad_norm": 0.8221266269683838,
941
+ "learning_rate": 3.051353334269857e-05,
942
+ "loss": 2.3001,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 0.0452895419187554,
947
+ "grad_norm": 2.2605419158935547,
948
+ "learning_rate": 2.9744125114740198e-05,
949
+ "loss": 2.8268,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 0.0456352636127917,
954
+ "grad_norm": 0.25618210434913635,
955
+ "learning_rate": 2.898052803073687e-05,
956
+ "loss": 1.4537,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 0.045980985306828,
961
+ "grad_norm": 0.3234250545501709,
962
+ "learning_rate": 2.822295085050687e-05,
963
+ "loss": 1.6086,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 0.0463267070008643,
968
+ "grad_norm": 0.472940057516098,
969
+ "learning_rate": 2.747160068808693e-05,
970
+ "loss": 2.1081,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 0.046672428694900604,
975
+ "grad_norm": 0.5351232290267944,
976
+ "learning_rate": 2.672668295510925e-05,
977
+ "loss": 2.1168,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 0.047018150388936905,
982
+ "grad_norm": 0.6955279111862183,
983
+ "learning_rate": 2.5988401304644103e-05,
984
+ "loss": 2.2151,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.047363872082973206,
989
+ "grad_norm": 1.3634015321731567,
990
+ "learning_rate": 2.5256957575522985e-05,
991
+ "loss": 2.5617,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.04770959377700951,
996
+ "grad_norm": 0.2829039990901947,
997
+ "learning_rate": 2.453255173715804e-05,
998
+ "loss": 1.3504,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.04805531547104581,
1003
+ "grad_norm": 0.2663942277431488,
1004
+ "learning_rate": 2.381538183487233e-05,
1005
+ "loss": 1.4263,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.04840103716508211,
1010
+ "grad_norm": 0.3799867331981659,
1011
+ "learning_rate": 2.310564393575624e-05,
1012
+ "loss": 2.0021,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.04874675885911841,
1017
+ "grad_norm": 0.4954136908054352,
1018
+ "learning_rate": 2.2403532075064698e-05,
1019
+ "loss": 2.2683,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.04909248055315471,
1024
+ "grad_norm": 0.6524636149406433,
1025
+ "learning_rate": 2.1709238203169878e-05,
1026
+ "loss": 2.2982,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.04943820224719101,
1031
+ "grad_norm": 1.1989296674728394,
1032
+ "learning_rate": 2.1022952133083876e-05,
1033
+ "loss": 2.5617,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 0.04978392394122731,
1038
+ "grad_norm": 0.36062389612197876,
1039
+ "learning_rate": 2.034486148856572e-05,
1040
+ "loss": 1.3403,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 0.05012964563526361,
1045
+ "grad_norm": 0.2494451403617859,
1046
+ "learning_rate": 1.967515165282694e-05,
1047
+ "loss": 1.3838,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 0.050475367329299914,
1052
+ "grad_norm": 0.35722383856773376,
1053
+ "learning_rate": 1.9014005717849613e-05,
1054
+ "loss": 1.8768,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 0.050821089023336215,
1059
+ "grad_norm": 0.4523886740207672,
1060
+ "learning_rate": 1.836160443433085e-05,
1061
+ "loss": 2.1016,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 0.051166810717372516,
1066
+ "grad_norm": 0.5969461798667908,
1067
+ "learning_rate": 1.7718126162267396e-05,
1068
+ "loss": 2.223,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 0.051512532411408817,
1073
+ "grad_norm": 0.998309850692749,
1074
+ "learning_rate": 1.7083746822193733e-05,
1075
+ "loss": 2.3714,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 0.05185825410544512,
1080
+ "grad_norm": 3.694688558578491,
1081
+ "learning_rate": 1.6458639847087217e-05,
1082
+ "loss": 2.797,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 0.05185825410544512,
1087
+ "eval_loss": 2.138585329055786,
1088
+ "eval_runtime": 129.102,
1089
+ "eval_samples_per_second": 301.878,
1090
+ "eval_steps_per_second": 9.434,
1091
+ "step": 150
1092
  }
1093
  ],
1094
  "logging_steps": 1,
 
1117
  "attributes": {}
1118
  }
1119
  },
1120
+ "total_flos": 9.515693955350528e+16,
1121
  "train_batch_size": 8,
1122
  "trial_name": null,
1123
  "trial_params": null