Training in progress, step 744, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b3fa76795aac7ce31b919a79ce0da184e31cd62feb3a8b1a2eb838957202798
|
3 |
size 167832240
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 85723732
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6b0f55b2c27d4899beec2319887ce2031f6991a2c282f12dc8403a58f0d4431
|
3 |
size 85723732
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:390f0dbac2add99b9de0bdfc18171c87964e9eac7b5a9d79ded1784da24c00e8
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e22f1029354fc2004de93e539443b580b540e5122ae66043f3e66f977d6dd066
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.267533302307129,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-700",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4971,6 +4971,314 @@
|
|
4971 |
"eval_samples_per_second": 6.272,
|
4972 |
"eval_steps_per_second": 1.568,
|
4973 |
"step": 700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4974 |
}
|
4975 |
],
|
4976 |
"logging_steps": 1,
|
@@ -4994,12 +5302,12 @@
|
|
4994 |
"should_evaluate": false,
|
4995 |
"should_log": false,
|
4996 |
"should_save": true,
|
4997 |
-
"should_training_stop":
|
4998 |
},
|
4999 |
"attributes": {}
|
5000 |
}
|
5001 |
},
|
5002 |
-
"total_flos": 2.
|
5003 |
"train_batch_size": 4,
|
5004 |
"trial_name": null,
|
5005 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.267533302307129,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-700",
|
4 |
+
"epoch": 0.0776081259045806,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 744,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4971 |
"eval_samples_per_second": 6.272,
|
4972 |
"eval_steps_per_second": 1.568,
|
4973 |
"step": 700
|
4974 |
+
},
|
4975 |
+
{
|
4976 |
+
"epoch": 0.07312271002568684,
|
4977 |
+
"grad_norm": 0.5641259551048279,
|
4978 |
+
"learning_rate": 1.688839258059971e-06,
|
4979 |
+
"loss": 1.3232,
|
4980 |
+
"step": 701
|
4981 |
+
},
|
4982 |
+
{
|
4983 |
+
"epoch": 0.0732270220228704,
|
4984 |
+
"grad_norm": 0.5656439661979675,
|
4985 |
+
"learning_rate": 1.6114111754051974e-06,
|
4986 |
+
"loss": 1.2328,
|
4987 |
+
"step": 702
|
4988 |
+
},
|
4989 |
+
{
|
4990 |
+
"epoch": 0.07333133402005398,
|
4991 |
+
"grad_norm": 0.5426310896873474,
|
4992 |
+
"learning_rate": 1.5357854948895634e-06,
|
4993 |
+
"loss": 1.251,
|
4994 |
+
"step": 703
|
4995 |
+
},
|
4996 |
+
{
|
4997 |
+
"epoch": 0.07343564601723755,
|
4998 |
+
"grad_norm": 0.5175634622573853,
|
4999 |
+
"learning_rate": 1.4619636019164606e-06,
|
5000 |
+
"loss": 1.1482,
|
5001 |
+
"step": 704
|
5002 |
+
},
|
5003 |
+
{
|
5004 |
+
"epoch": 0.07353995801442113,
|
5005 |
+
"grad_norm": 0.5635023713111877,
|
5006 |
+
"learning_rate": 1.3899468488453583e-06,
|
5007 |
+
"loss": 1.2063,
|
5008 |
+
"step": 705
|
5009 |
+
},
|
5010 |
+
{
|
5011 |
+
"epoch": 0.07364427001160471,
|
5012 |
+
"grad_norm": 0.518434464931488,
|
5013 |
+
"learning_rate": 1.319736554966955e-06,
|
5014 |
+
"loss": 1.4171,
|
5015 |
+
"step": 706
|
5016 |
+
},
|
5017 |
+
{
|
5018 |
+
"epoch": 0.07374858200878828,
|
5019 |
+
"grad_norm": 0.5022286176681519,
|
5020 |
+
"learning_rate": 1.2513340064790102e-06,
|
5021 |
+
"loss": 1.1245,
|
5022 |
+
"step": 707
|
5023 |
+
},
|
5024 |
+
{
|
5025 |
+
"epoch": 0.07385289400597186,
|
5026 |
+
"grad_norm": 0.5017629265785217,
|
5027 |
+
"learning_rate": 1.1847404564628185e-06,
|
5028 |
+
"loss": 1.144,
|
5029 |
+
"step": 708
|
5030 |
+
},
|
5031 |
+
{
|
5032 |
+
"epoch": 0.07395720600315543,
|
5033 |
+
"grad_norm": 0.5079241991043091,
|
5034 |
+
"learning_rate": 1.1199571248602382e-06,
|
5035 |
+
"loss": 1.3892,
|
5036 |
+
"step": 709
|
5037 |
+
},
|
5038 |
+
{
|
5039 |
+
"epoch": 0.07406151800033901,
|
5040 |
+
"grad_norm": 0.558506190776825,
|
5041 |
+
"learning_rate": 1.0569851984513103e-06,
|
5042 |
+
"loss": 1.2448,
|
5043 |
+
"step": 710
|
5044 |
+
},
|
5045 |
+
{
|
5046 |
+
"epoch": 0.0741658299975226,
|
5047 |
+
"grad_norm": 0.5008535981178284,
|
5048 |
+
"learning_rate": 9.958258308325973e-07,
|
5049 |
+
"loss": 1.2267,
|
5050 |
+
"step": 711
|
5051 |
+
},
|
5052 |
+
{
|
5053 |
+
"epoch": 0.07427014199470616,
|
5054 |
+
"grad_norm": 0.53139728307724,
|
5055 |
+
"learning_rate": 9.364801423959235e-07,
|
5056 |
+
"loss": 1.2265,
|
5057 |
+
"step": 712
|
5058 |
+
},
|
5059 |
+
{
|
5060 |
+
"epoch": 0.07437445399188974,
|
5061 |
+
"grad_norm": 0.5449005961418152,
|
5062 |
+
"learning_rate": 8.78949220308023e-07,
|
5063 |
+
"loss": 1.3256,
|
5064 |
+
"step": 713
|
5065 |
+
},
|
5066 |
+
{
|
5067 |
+
"epoch": 0.07447876598907331,
|
5068 |
+
"grad_norm": 0.5834050178527832,
|
5069 |
+
"learning_rate": 8.232341184904457e-07,
|
5070 |
+
"loss": 1.2626,
|
5071 |
+
"step": 714
|
5072 |
+
},
|
5073 |
+
{
|
5074 |
+
"epoch": 0.0745830779862569,
|
5075 |
+
"grad_norm": 0.5395832657814026,
|
5076 |
+
"learning_rate": 7.693358576003617e-07,
|
5077 |
+
"loss": 1.2074,
|
5078 |
+
"step": 715
|
5079 |
+
},
|
5080 |
+
{
|
5081 |
+
"epoch": 0.07468738998344047,
|
5082 |
+
"grad_norm": 0.5254425406455994,
|
5083 |
+
"learning_rate": 7.172554250118535e-07,
|
5084 |
+
"loss": 1.2065,
|
5085 |
+
"step": 716
|
5086 |
+
},
|
5087 |
+
{
|
5088 |
+
"epoch": 0.07479170198062404,
|
5089 |
+
"grad_norm": 0.5222790837287903,
|
5090 |
+
"learning_rate": 6.66993774797775e-07,
|
5091 |
+
"loss": 1.1553,
|
5092 |
+
"step": 717
|
5093 |
+
},
|
5094 |
+
{
|
5095 |
+
"epoch": 0.07489601397780762,
|
5096 |
+
"grad_norm": 0.5531771183013916,
|
5097 |
+
"learning_rate": 6.185518277123214e-07,
|
5098 |
+
"loss": 1.1405,
|
5099 |
+
"step": 718
|
5100 |
+
},
|
5101 |
+
{
|
5102 |
+
"epoch": 0.0750003259749912,
|
5103 |
+
"grad_norm": 0.5848060250282288,
|
5104 |
+
"learning_rate": 5.719304711741535e-07,
|
5105 |
+
"loss": 1.283,
|
5106 |
+
"step": 719
|
5107 |
+
},
|
5108 |
+
{
|
5109 |
+
"epoch": 0.07510463797217477,
|
5110 |
+
"grad_norm": 0.6206434965133667,
|
5111 |
+
"learning_rate": 5.271305592501108e-07,
|
5112 |
+
"loss": 1.2983,
|
5113 |
+
"step": 720
|
5114 |
+
},
|
5115 |
+
{
|
5116 |
+
"epoch": 0.07520894996935835,
|
5117 |
+
"grad_norm": 0.5380674600601196,
|
5118 |
+
"learning_rate": 4.841529126396238e-07,
|
5119 |
+
"loss": 1.4001,
|
5120 |
+
"step": 721
|
5121 |
+
},
|
5122 |
+
{
|
5123 |
+
"epoch": 0.07531326196654192,
|
5124 |
+
"grad_norm": 0.5470457673072815,
|
5125 |
+
"learning_rate": 4.429983186596265e-07,
|
5126 |
+
"loss": 1.2379,
|
5127 |
+
"step": 722
|
5128 |
+
},
|
5129 |
+
{
|
5130 |
+
"epoch": 0.0754175739637255,
|
5131 |
+
"grad_norm": 0.598996102809906,
|
5132 |
+
"learning_rate": 4.036675312301452e-07,
|
5133 |
+
"loss": 1.2304,
|
5134 |
+
"step": 723
|
5135 |
+
},
|
5136 |
+
{
|
5137 |
+
"epoch": 0.07552188596090909,
|
5138 |
+
"grad_norm": 0.521056056022644,
|
5139 |
+
"learning_rate": 3.6616127086051e-07,
|
5140 |
+
"loss": 1.3609,
|
5141 |
+
"step": 724
|
5142 |
+
},
|
5143 |
+
{
|
5144 |
+
"epoch": 0.07562619795809265,
|
5145 |
+
"grad_norm": 0.5366652011871338,
|
5146 |
+
"learning_rate": 3.3048022463612047e-07,
|
5147 |
+
"loss": 1.2715,
|
5148 |
+
"step": 725
|
5149 |
+
},
|
5150 |
+
{
|
5151 |
+
"epoch": 0.07573050995527623,
|
5152 |
+
"grad_norm": 0.5772047638893127,
|
5153 |
+
"learning_rate": 2.9662504620588947e-07,
|
5154 |
+
"loss": 1.1976,
|
5155 |
+
"step": 726
|
5156 |
+
},
|
5157 |
+
{
|
5158 |
+
"epoch": 0.0758348219524598,
|
5159 |
+
"grad_norm": 0.547029435634613,
|
5160 |
+
"learning_rate": 2.6459635577026353e-07,
|
5161 |
+
"loss": 1.3886,
|
5162 |
+
"step": 727
|
5163 |
+
},
|
5164 |
+
{
|
5165 |
+
"epoch": 0.07593913394964338,
|
5166 |
+
"grad_norm": 0.5879443287849426,
|
5167 |
+
"learning_rate": 2.343947400698432e-07,
|
5168 |
+
"loss": 1.3782,
|
5169 |
+
"step": 728
|
5170 |
+
},
|
5171 |
+
{
|
5172 |
+
"epoch": 0.07604344594682697,
|
5173 |
+
"grad_norm": 0.5763773322105408,
|
5174 |
+
"learning_rate": 2.0602075237465823e-07,
|
5175 |
+
"loss": 1.2558,
|
5176 |
+
"step": 729
|
5177 |
+
},
|
5178 |
+
{
|
5179 |
+
"epoch": 0.07614775794401053,
|
5180 |
+
"grad_norm": 0.7472733855247498,
|
5181 |
+
"learning_rate": 1.7947491247399806e-07,
|
5182 |
+
"loss": 1.4806,
|
5183 |
+
"step": 730
|
5184 |
+
},
|
5185 |
+
{
|
5186 |
+
"epoch": 0.07625206994119411,
|
5187 |
+
"grad_norm": 0.54900723695755,
|
5188 |
+
"learning_rate": 1.5475770666694144e-07,
|
5189 |
+
"loss": 1.1513,
|
5190 |
+
"step": 731
|
5191 |
+
},
|
5192 |
+
{
|
5193 |
+
"epoch": 0.07635638193837768,
|
5194 |
+
"grad_norm": 0.5599141120910645,
|
5195 |
+
"learning_rate": 1.318695877533971e-07,
|
5196 |
+
"loss": 1.1123,
|
5197 |
+
"step": 732
|
5198 |
+
},
|
5199 |
+
{
|
5200 |
+
"epoch": 0.07646069393556126,
|
5201 |
+
"grad_norm": 0.5251076817512512,
|
5202 |
+
"learning_rate": 1.1081097502584348e-07,
|
5203 |
+
"loss": 1.1586,
|
5204 |
+
"step": 733
|
5205 |
+
},
|
5206 |
+
{
|
5207 |
+
"epoch": 0.07656500593274485,
|
5208 |
+
"grad_norm": 0.5731672644615173,
|
5209 |
+
"learning_rate": 9.158225426160183e-08,
|
5210 |
+
"loss": 1.1249,
|
5211 |
+
"step": 734
|
5212 |
+
},
|
5213 |
+
{
|
5214 |
+
"epoch": 0.07666931792992841,
|
5215 |
+
"grad_norm": 0.5583503842353821,
|
5216 |
+
"learning_rate": 7.418377771585273e-08,
|
5217 |
+
"loss": 1.184,
|
5218 |
+
"step": 735
|
5219 |
+
},
|
5220 |
+
{
|
5221 |
+
"epoch": 0.076773629927112,
|
5222 |
+
"grad_norm": 0.7562082409858704,
|
5223 |
+
"learning_rate": 5.86158641150969e-08,
|
5224 |
+
"loss": 1.3864,
|
5225 |
+
"step": 736
|
5226 |
+
},
|
5227 |
+
{
|
5228 |
+
"epoch": 0.07687794192429556,
|
5229 |
+
"grad_norm": 0.492931604385376,
|
5230 |
+
"learning_rate": 4.487879865133771e-08,
|
5231 |
+
"loss": 1.2668,
|
5232 |
+
"step": 737
|
5233 |
+
},
|
5234 |
+
{
|
5235 |
+
"epoch": 0.07698225392147914,
|
5236 |
+
"grad_norm": 0.5797216296195984,
|
5237 |
+
"learning_rate": 3.2972832976918554e-08,
|
5238 |
+
"loss": 1.2089,
|
5239 |
+
"step": 738
|
5240 |
+
},
|
5241 |
+
{
|
5242 |
+
"epoch": 0.07708656591866273,
|
5243 |
+
"grad_norm": 0.5604730844497681,
|
5244 |
+
"learning_rate": 2.2898185199826673e-08,
|
5245 |
+
"loss": 1.404,
|
5246 |
+
"step": 739
|
5247 |
+
},
|
5248 |
+
{
|
5249 |
+
"epoch": 0.07719087791584629,
|
5250 |
+
"grad_norm": 0.5185406804084778,
|
5251 |
+
"learning_rate": 1.4655039879740706e-08,
|
5252 |
+
"loss": 1.2627,
|
5253 |
+
"step": 740
|
5254 |
+
},
|
5255 |
+
{
|
5256 |
+
"epoch": 0.07729518991302987,
|
5257 |
+
"grad_norm": 0.5420083999633789,
|
5258 |
+
"learning_rate": 8.243548024655656e-09,
|
5259 |
+
"loss": 1.1388,
|
5260 |
+
"step": 741
|
5261 |
+
},
|
5262 |
+
{
|
5263 |
+
"epoch": 0.07739950191021344,
|
5264 |
+
"grad_norm": 0.5024222731590271,
|
5265 |
+
"learning_rate": 3.663827088085103e-09,
|
5266 |
+
"loss": 1.2322,
|
5267 |
+
"step": 742
|
5268 |
+
},
|
5269 |
+
{
|
5270 |
+
"epoch": 0.07750381390739702,
|
5271 |
+
"grad_norm": 0.6090103387832642,
|
5272 |
+
"learning_rate": 9.159609669406877e-10,
|
5273 |
+
"loss": 1.3192,
|
5274 |
+
"step": 743
|
5275 |
+
},
|
5276 |
+
{
|
5277 |
+
"epoch": 0.0776081259045806,
|
5278 |
+
"grad_norm": 0.53780597448349,
|
5279 |
+
"learning_rate": 0.0,
|
5280 |
+
"loss": 1.13,
|
5281 |
+
"step": 744
|
5282 |
}
|
5283 |
],
|
5284 |
"logging_steps": 1,
|
|
|
5302 |
"should_evaluate": false,
|
5303 |
"should_log": false,
|
5304 |
"should_save": true,
|
5305 |
+
"should_training_stop": true
|
5306 |
},
|
5307 |
"attributes": {}
|
5308 |
}
|
5309 |
},
|
5310 |
+
"total_flos": 2.381458180229038e+18,
|
5311 |
"train_batch_size": 4,
|
5312 |
"trial_name": null,
|
5313 |
"trial_params": null
|