Training in progress, step 769, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 78480072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf532bff9ad6147dd88328720e03b44878aa6b0931a77cd997d677bdba34e871
|
3 |
size 78480072
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40131524
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21819e721894f2463b4a3a69c9c1c6f71462d4e01d9fe1b76da32267867a5726
|
3 |
size 40131524
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72cb262ec7962a2bec51b38ffd60bf00b445c7c161ad40a55d35f9ba16677aa8
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bcc6b391be8ff7ed73c430f9b92a688f34c68358531300f1a61ed95351f819a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.4688050746917725,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-700",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5027,6 +5027,497 @@
|
|
5027 |
"eval_samples_per_second": 27.933,
|
5028 |
"eval_steps_per_second": 6.986,
|
5029 |
"step": 700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5030 |
}
|
5031 |
],
|
5032 |
"logging_steps": 1,
|
@@ -5050,12 +5541,12 @@
|
|
5050 |
"should_evaluate": false,
|
5051 |
"should_log": false,
|
5052 |
"should_save": true,
|
5053 |
-
"should_training_stop":
|
5054 |
},
|
5055 |
"attributes": {}
|
5056 |
}
|
5057 |
},
|
5058 |
-
"total_flos":
|
5059 |
"train_batch_size": 4,
|
5060 |
"trial_name": null,
|
5061 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.4688050746917725,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-700",
|
4 |
+
"epoch": 1.0004064049418842,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 769,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5027 |
"eval_samples_per_second": 27.933,
|
5028 |
"eval_steps_per_second": 6.986,
|
5029 |
"step": 700
|
5030 |
+
},
|
5031 |
+
{
|
5032 |
+
"epoch": 0.9116475656343981,
|
5033 |
+
"grad_norm": 0.7602055668830872,
|
5034 |
+
"learning_rate": 3.934912380055289e-06,
|
5035 |
+
"loss": 1.4222,
|
5036 |
+
"step": 701
|
5037 |
+
},
|
5038 |
+
{
|
5039 |
+
"epoch": 0.9129480614484272,
|
5040 |
+
"grad_norm": 0.7412660717964172,
|
5041 |
+
"learning_rate": 3.820767937093095e-06,
|
5042 |
+
"loss": 1.4024,
|
5043 |
+
"step": 702
|
5044 |
+
},
|
5045 |
+
{
|
5046 |
+
"epoch": 0.9142485572624564,
|
5047 |
+
"grad_norm": 0.6979288458824158,
|
5048 |
+
"learning_rate": 3.7082712652200867e-06,
|
5049 |
+
"loss": 1.4652,
|
5050 |
+
"step": 703
|
5051 |
+
},
|
5052 |
+
{
|
5053 |
+
"epoch": 0.9155490530764854,
|
5054 |
+
"grad_norm": 0.659394383430481,
|
5055 |
+
"learning_rate": 3.5974242917625187e-06,
|
5056 |
+
"loss": 1.4534,
|
5057 |
+
"step": 704
|
5058 |
+
},
|
5059 |
+
{
|
5060 |
+
"epoch": 0.9168495488905145,
|
5061 |
+
"grad_norm": 0.7618691325187683,
|
5062 |
+
"learning_rate": 3.488228915783631e-06,
|
5063 |
+
"loss": 1.3859,
|
5064 |
+
"step": 705
|
5065 |
+
},
|
5066 |
+
{
|
5067 |
+
"epoch": 0.9181500447045436,
|
5068 |
+
"grad_norm": 0.6013392806053162,
|
5069 |
+
"learning_rate": 3.380687008050909e-06,
|
5070 |
+
"loss": 1.4909,
|
5071 |
+
"step": 706
|
5072 |
+
},
|
5073 |
+
{
|
5074 |
+
"epoch": 0.9194505405185727,
|
5075 |
+
"grad_norm": 0.6869235634803772,
|
5076 |
+
"learning_rate": 3.2748004110041863e-06,
|
5077 |
+
"loss": 1.3593,
|
5078 |
+
"step": 707
|
5079 |
+
},
|
5080 |
+
{
|
5081 |
+
"epoch": 0.9207510363326018,
|
5082 |
+
"grad_norm": 0.7875847816467285,
|
5083 |
+
"learning_rate": 3.1705709387239934e-06,
|
5084 |
+
"loss": 1.5395,
|
5085 |
+
"step": 708
|
5086 |
+
},
|
5087 |
+
{
|
5088 |
+
"epoch": 0.9220515321466309,
|
5089 |
+
"grad_norm": 0.7080655097961426,
|
5090 |
+
"learning_rate": 3.068000376900515e-06,
|
5091 |
+
"loss": 1.4826,
|
5092 |
+
"step": 709
|
5093 |
+
},
|
5094 |
+
{
|
5095 |
+
"epoch": 0.92335202796066,
|
5096 |
+
"grad_norm": 0.7241777181625366,
|
5097 |
+
"learning_rate": 2.9670904828030033e-06,
|
5098 |
+
"loss": 1.4333,
|
5099 |
+
"step": 710
|
5100 |
+
},
|
5101 |
+
{
|
5102 |
+
"epoch": 0.9246525237746891,
|
5103 |
+
"grad_norm": 0.745314359664917,
|
5104 |
+
"learning_rate": 2.8678429852496467e-06,
|
5105 |
+
"loss": 1.3511,
|
5106 |
+
"step": 711
|
5107 |
+
},
|
5108 |
+
{
|
5109 |
+
"epoch": 0.9259530195887182,
|
5110 |
+
"grad_norm": 0.7951449751853943,
|
5111 |
+
"learning_rate": 2.770259584577972e-06,
|
5112 |
+
"loss": 1.4815,
|
5113 |
+
"step": 712
|
5114 |
+
},
|
5115 |
+
{
|
5116 |
+
"epoch": 0.9272535154027473,
|
5117 |
+
"grad_norm": 0.6265352964401245,
|
5118 |
+
"learning_rate": 2.6743419526157e-06,
|
5119 |
+
"loss": 1.511,
|
5120 |
+
"step": 713
|
5121 |
+
},
|
5122 |
+
{
|
5123 |
+
"epoch": 0.9285540112167764,
|
5124 |
+
"grad_norm": 0.6809277534484863,
|
5125 |
+
"learning_rate": 2.580091732652101e-06,
|
5126 |
+
"loss": 1.4647,
|
5127 |
+
"step": 714
|
5128 |
+
},
|
5129 |
+
{
|
5130 |
+
"epoch": 0.9298545070308055,
|
5131 |
+
"grad_norm": 1.0379259586334229,
|
5132 |
+
"learning_rate": 2.4875105394098654e-06,
|
5133 |
+
"loss": 1.6214,
|
5134 |
+
"step": 715
|
5135 |
+
},
|
5136 |
+
{
|
5137 |
+
"epoch": 0.9311550028448345,
|
5138 |
+
"grad_norm": 0.8921117782592773,
|
5139 |
+
"learning_rate": 2.3965999590174095e-06,
|
5140 |
+
"loss": 1.5787,
|
5141 |
+
"step": 716
|
5142 |
+
},
|
5143 |
+
{
|
5144 |
+
"epoch": 0.9324554986588637,
|
5145 |
+
"grad_norm": 0.8291761875152588,
|
5146 |
+
"learning_rate": 2.3073615489817235e-06,
|
5147 |
+
"loss": 1.3749,
|
5148 |
+
"step": 717
|
5149 |
+
},
|
5150 |
+
{
|
5151 |
+
"epoch": 0.9337559944728928,
|
5152 |
+
"grad_norm": 0.734061598777771,
|
5153 |
+
"learning_rate": 2.219796838161681e-06,
|
5154 |
+
"loss": 1.3417,
|
5155 |
+
"step": 718
|
5156 |
+
},
|
5157 |
+
{
|
5158 |
+
"epoch": 0.9350564902869218,
|
5159 |
+
"grad_norm": 0.7294670343399048,
|
5160 |
+
"learning_rate": 2.1339073267418464e-06,
|
5161 |
+
"loss": 1.42,
|
5162 |
+
"step": 719
|
5163 |
+
},
|
5164 |
+
{
|
5165 |
+
"epoch": 0.936356986100951,
|
5166 |
+
"grad_norm": 0.7059687972068787,
|
5167 |
+
"learning_rate": 2.0496944862067656e-06,
|
5168 |
+
"loss": 1.5228,
|
5169 |
+
"step": 720
|
5170 |
+
},
|
5171 |
+
{
|
5172 |
+
"epoch": 0.9376574819149801,
|
5173 |
+
"grad_norm": 0.7029430270195007,
|
5174 |
+
"learning_rate": 1.967159759315751e-06,
|
5175 |
+
"loss": 1.4259,
|
5176 |
+
"step": 721
|
5177 |
+
},
|
5178 |
+
{
|
5179 |
+
"epoch": 0.9389579777290091,
|
5180 |
+
"grad_norm": 0.7115610241889954,
|
5181 |
+
"learning_rate": 1.8863045600782003e-06,
|
5182 |
+
"loss": 1.5353,
|
5183 |
+
"step": 722
|
5184 |
+
},
|
5185 |
+
{
|
5186 |
+
"epoch": 0.9402584735430383,
|
5187 |
+
"grad_norm": 0.6966084241867065,
|
5188 |
+
"learning_rate": 1.8071302737293295e-06,
|
5189 |
+
"loss": 1.4112,
|
5190 |
+
"step": 723
|
5191 |
+
},
|
5192 |
+
{
|
5193 |
+
"epoch": 0.9415589693570674,
|
5194 |
+
"grad_norm": 0.6711810827255249,
|
5195 |
+
"learning_rate": 1.7296382567064672e-06,
|
5196 |
+
"loss": 1.5192,
|
5197 |
+
"step": 724
|
5198 |
+
},
|
5199 |
+
{
|
5200 |
+
"epoch": 0.9428594651710965,
|
5201 |
+
"grad_norm": 0.7636929154396057,
|
5202 |
+
"learning_rate": 1.6538298366257976e-06,
|
5203 |
+
"loss": 1.3406,
|
5204 |
+
"step": 725
|
5205 |
+
},
|
5206 |
+
{
|
5207 |
+
"epoch": 0.9441599609851256,
|
5208 |
+
"grad_norm": 0.7429101467132568,
|
5209 |
+
"learning_rate": 1.57970631225961e-06,
|
5210 |
+
"loss": 1.4707,
|
5211 |
+
"step": 726
|
5212 |
+
},
|
5213 |
+
{
|
5214 |
+
"epoch": 0.9454604567991547,
|
5215 |
+
"grad_norm": 0.7741535902023315,
|
5216 |
+
"learning_rate": 1.5072689535141072e-06,
|
5217 |
+
"loss": 1.5783,
|
5218 |
+
"step": 727
|
5219 |
+
},
|
5220 |
+
{
|
5221 |
+
"epoch": 0.9467609526131838,
|
5222 |
+
"grad_norm": 0.7151786684989929,
|
5223 |
+
"learning_rate": 1.4365190014075437e-06,
|
5224 |
+
"loss": 1.5202,
|
5225 |
+
"step": 728
|
5226 |
+
},
|
5227 |
+
{
|
5228 |
+
"epoch": 0.9480614484272128,
|
5229 |
+
"grad_norm": 0.793623685836792,
|
5230 |
+
"learning_rate": 1.3674576680490659e-06,
|
5231 |
+
"loss": 1.4622,
|
5232 |
+
"step": 729
|
5233 |
+
},
|
5234 |
+
{
|
5235 |
+
"epoch": 0.949361944241242,
|
5236 |
+
"grad_norm": 0.6933899521827698,
|
5237 |
+
"learning_rate": 1.3000861366179062e-06,
|
5238 |
+
"loss": 1.4376,
|
5239 |
+
"step": 730
|
5240 |
+
},
|
5241 |
+
{
|
5242 |
+
"epoch": 0.9506624400552711,
|
5243 |
+
"grad_norm": 0.826909065246582,
|
5244 |
+
"learning_rate": 1.234405561343066e-06,
|
5245 |
+
"loss": 1.6414,
|
5246 |
+
"step": 731
|
5247 |
+
},
|
5248 |
+
{
|
5249 |
+
"epoch": 0.9519629358693001,
|
5250 |
+
"grad_norm": 0.7606424689292908,
|
5251 |
+
"learning_rate": 1.1704170674836313e-06,
|
5252 |
+
"loss": 1.474,
|
5253 |
+
"step": 732
|
5254 |
+
},
|
5255 |
+
{
|
5256 |
+
"epoch": 0.9532634316833293,
|
5257 |
+
"grad_norm": 0.7522351145744324,
|
5258 |
+
"learning_rate": 1.1081217513094212e-06,
|
5259 |
+
"loss": 1.353,
|
5260 |
+
"step": 733
|
5261 |
+
},
|
5262 |
+
{
|
5263 |
+
"epoch": 0.9545639274973584,
|
5264 |
+
"grad_norm": 0.6792885661125183,
|
5265 |
+
"learning_rate": 1.047520680082248e-06,
|
5266 |
+
"loss": 1.4632,
|
5267 |
+
"step": 734
|
5268 |
+
},
|
5269 |
+
{
|
5270 |
+
"epoch": 0.9558644233113874,
|
5271 |
+
"grad_norm": 0.7526161074638367,
|
5272 |
+
"learning_rate": 9.886148920376203e-07,
|
5273 |
+
"loss": 1.4683,
|
5274 |
+
"step": 735
|
5275 |
+
},
|
5276 |
+
{
|
5277 |
+
"epoch": 0.9571649191254166,
|
5278 |
+
"grad_norm": 0.7043469548225403,
|
5279 |
+
"learning_rate": 9.314053963669245e-07,
|
5280 |
+
"loss": 1.64,
|
5281 |
+
"step": 736
|
5282 |
+
},
|
5283 |
+
{
|
5284 |
+
"epoch": 0.9584654149394457,
|
5285 |
+
"grad_norm": 0.6877920627593994,
|
5286 |
+
"learning_rate": 8.75893173200204e-07,
|
5287 |
+
"loss": 1.4917,
|
5288 |
+
"step": 737
|
5289 |
+
},
|
5290 |
+
{
|
5291 |
+
"epoch": 0.9597659107534747,
|
5292 |
+
"grad_norm": 0.7135722637176514,
|
5293 |
+
"learning_rate": 8.220791735892964e-07,
|
5294 |
+
"loss": 1.4555,
|
5295 |
+
"step": 738
|
5296 |
+
},
|
5297 |
+
{
|
5298 |
+
"epoch": 0.9610664065675039,
|
5299 |
+
"grad_norm": 0.7366563677787781,
|
5300 |
+
"learning_rate": 7.699643194915784e-07,
|
5301 |
+
"loss": 1.4801,
|
5302 |
+
"step": 739
|
5303 |
+
},
|
5304 |
+
{
|
5305 |
+
"epoch": 0.962366902381533,
|
5306 |
+
"grad_norm": 0.600986123085022,
|
5307 |
+
"learning_rate": 7.1954950375418e-07,
|
5308 |
+
"loss": 1.4188,
|
5309 |
+
"step": 740
|
5310 |
+
},
|
5311 |
+
{
|
5312 |
+
"epoch": 0.963667398195562,
|
5313 |
+
"grad_norm": 0.682475745677948,
|
5314 |
+
"learning_rate": 6.708355900986396e-07,
|
5315 |
+
"loss": 1.4593,
|
5316 |
+
"step": 741
|
5317 |
+
},
|
5318 |
+
{
|
5319 |
+
"epoch": 0.9649678940095912,
|
5320 |
+
"grad_norm": 0.8102880120277405,
|
5321 |
+
"learning_rate": 6.238234131061616e-07,
|
5322 |
+
"loss": 1.4449,
|
5323 |
+
"step": 742
|
5324 |
+
},
|
5325 |
+
{
|
5326 |
+
"epoch": 0.9662683898236203,
|
5327 |
+
"grad_norm": 0.6956253051757812,
|
5328 |
+
"learning_rate": 5.785137782032824e-07,
|
5329 |
+
"loss": 1.3999,
|
5330 |
+
"step": 743
|
5331 |
+
},
|
5332 |
+
{
|
5333 |
+
"epoch": 0.9675688856376493,
|
5334 |
+
"grad_norm": 0.7653059363365173,
|
5335 |
+
"learning_rate": 5.349074616480931e-07,
|
5336 |
+
"loss": 1.5632,
|
5337 |
+
"step": 744
|
5338 |
+
},
|
5339 |
+
{
|
5340 |
+
"epoch": 0.9688693814516784,
|
5341 |
+
"grad_norm": 0.784920334815979,
|
5342 |
+
"learning_rate": 4.93005210516928e-07,
|
5343 |
+
"loss": 1.5078,
|
5344 |
+
"step": 745
|
5345 |
+
},
|
5346 |
+
{
|
5347 |
+
"epoch": 0.9701698772657076,
|
5348 |
+
"grad_norm": 0.8322311043739319,
|
5349 |
+
"learning_rate": 4.5280774269154115e-07,
|
5350 |
+
"loss": 1.5513,
|
5351 |
+
"step": 746
|
5352 |
+
},
|
5353 |
+
{
|
5354 |
+
"epoch": 0.9714703730797366,
|
5355 |
+
"grad_norm": 0.8491717576980591,
|
5356 |
+
"learning_rate": 4.143157468468717e-07,
|
5357 |
+
"loss": 1.4737,
|
5358 |
+
"step": 747
|
5359 |
+
},
|
5360 |
+
{
|
5361 |
+
"epoch": 0.9727708688937657,
|
5362 |
+
"grad_norm": 0.6789981722831726,
|
5363 |
+
"learning_rate": 3.775298824391982e-07,
|
5364 |
+
"loss": 1.5004,
|
5365 |
+
"step": 748
|
5366 |
+
},
|
5367 |
+
{
|
5368 |
+
"epoch": 0.9740713647077949,
|
5369 |
+
"grad_norm": 0.7747517824172974,
|
5370 |
+
"learning_rate": 3.424507796948362e-07,
|
5371 |
+
"loss": 1.5806,
|
5372 |
+
"step": 749
|
5373 |
+
},
|
5374 |
+
{
|
5375 |
+
"epoch": 0.9753718605218239,
|
5376 |
+
"grad_norm": 0.8059523105621338,
|
5377 |
+
"learning_rate": 3.090790395993692e-07,
|
5378 |
+
"loss": 1.4772,
|
5379 |
+
"step": 750
|
5380 |
+
},
|
5381 |
+
{
|
5382 |
+
"epoch": 0.9753718605218239,
|
5383 |
+
"eval_loss": 1.4685416221618652,
|
5384 |
+
"eval_runtime": 92.6813,
|
5385 |
+
"eval_samples_per_second": 27.956,
|
5386 |
+
"eval_steps_per_second": 6.992,
|
5387 |
+
"step": 750
|
5388 |
+
},
|
5389 |
+
{
|
5390 |
+
"epoch": 0.976672356335853,
|
5391 |
+
"grad_norm": 0.6508041024208069,
|
5392 |
+
"learning_rate": 2.774152338873126e-07,
|
5393 |
+
"loss": 1.4536,
|
5394 |
+
"step": 751
|
5395 |
+
},
|
5396 |
+
{
|
5397 |
+
"epoch": 0.9779728521498822,
|
5398 |
+
"grad_norm": 0.7037675380706787,
|
5399 |
+
"learning_rate": 2.474599050323989e-07,
|
5400 |
+
"loss": 1.4824,
|
5401 |
+
"step": 752
|
5402 |
+
},
|
5403 |
+
{
|
5404 |
+
"epoch": 0.9792733479639112,
|
5405 |
+
"grad_norm": 0.7295902967453003,
|
5406 |
+
"learning_rate": 2.1921356623816336e-07,
|
5407 |
+
"loss": 1.3991,
|
5408 |
+
"step": 753
|
5409 |
+
},
|
5410 |
+
{
|
5411 |
+
"epoch": 0.9805738437779403,
|
5412 |
+
"grad_norm": 0.6926706433296204,
|
5413 |
+
"learning_rate": 1.9267670142926187e-07,
|
5414 |
+
"loss": 1.3409,
|
5415 |
+
"step": 754
|
5416 |
+
},
|
5417 |
+
{
|
5418 |
+
"epoch": 0.9818743395919695,
|
5419 |
+
"grad_norm": 0.8004979491233826,
|
5420 |
+
"learning_rate": 1.6784976524312213e-07,
|
5421 |
+
"loss": 1.4575,
|
5422 |
+
"step": 755
|
5423 |
+
},
|
5424 |
+
{
|
5425 |
+
"epoch": 0.9831748354059985,
|
5426 |
+
"grad_norm": 0.6473626494407654,
|
5427 |
+
"learning_rate": 1.4473318302216098e-07,
|
5428 |
+
"loss": 1.2736,
|
5429 |
+
"step": 756
|
5430 |
+
},
|
5431 |
+
{
|
5432 |
+
"epoch": 0.9844753312200276,
|
5433 |
+
"grad_norm": 0.7723749876022339,
|
5434 |
+
"learning_rate": 1.2332735080651248e-07,
|
5435 |
+
"loss": 1.386,
|
5436 |
+
"step": 757
|
5437 |
+
},
|
5438 |
+
{
|
5439 |
+
"epoch": 0.9857758270340568,
|
5440 |
+
"grad_norm": 0.6859511733055115,
|
5441 |
+
"learning_rate": 1.0363263532724432e-07,
|
5442 |
+
"loss": 1.4629,
|
5443 |
+
"step": 758
|
5444 |
+
},
|
5445 |
+
{
|
5446 |
+
"epoch": 0.9870763228480859,
|
5447 |
+
"grad_norm": 0.7101691961288452,
|
5448 |
+
"learning_rate": 8.564937400004081e-08,
|
5449 |
+
"loss": 1.534,
|
5450 |
+
"step": 759
|
5451 |
+
},
|
5452 |
+
{
|
5453 |
+
"epoch": 0.9883768186621149,
|
5454 |
+
"grad_norm": 0.7447732090950012,
|
5455 |
+
"learning_rate": 6.9377874919474e-08,
|
5456 |
+
"loss": 1.3709,
|
5457 |
+
"step": 760
|
5458 |
+
},
|
5459 |
+
{
|
5460 |
+
"epoch": 0.989677314476144,
|
5461 |
+
"grad_norm": 0.6958511471748352,
|
5462 |
+
"learning_rate": 5.4818416853674726e-08,
|
5463 |
+
"loss": 1.3528,
|
5464 |
+
"step": 761
|
5465 |
+
},
|
5466 |
+
{
|
5467 |
+
"epoch": 0.9909778102901732,
|
5468 |
+
"grad_norm": 0.8112965226173401,
|
5469 |
+
"learning_rate": 4.1971249239591834e-08,
|
5470 |
+
"loss": 1.6432,
|
5471 |
+
"step": 762
|
5472 |
+
},
|
5473 |
+
{
|
5474 |
+
"epoch": 0.9922783061042022,
|
5475 |
+
"grad_norm": 0.7658011317253113,
|
5476 |
+
"learning_rate": 3.0836592178717926e-08,
|
5477 |
+
"loss": 1.5357,
|
5478 |
+
"step": 763
|
5479 |
+
},
|
5480 |
+
{
|
5481 |
+
"epoch": 0.9935788019182313,
|
5482 |
+
"grad_norm": 0.7664233446121216,
|
5483 |
+
"learning_rate": 2.141463643328123e-08,
|
5484 |
+
"loss": 1.5152,
|
5485 |
+
"step": 764
|
5486 |
+
},
|
5487 |
+
{
|
5488 |
+
"epoch": 0.9948792977322605,
|
5489 |
+
"grad_norm": 0.6668312549591064,
|
5490 |
+
"learning_rate": 1.370554342302599e-08,
|
5491 |
+
"loss": 1.4316,
|
5492 |
+
"step": 765
|
5493 |
+
},
|
5494 |
+
{
|
5495 |
+
"epoch": 0.9961797935462895,
|
5496 |
+
"grad_norm": 0.7010061144828796,
|
5497 |
+
"learning_rate": 7.709445222403577e-09,
|
5498 |
+
"loss": 1.4363,
|
5499 |
+
"step": 766
|
5500 |
+
},
|
5501 |
+
{
|
5502 |
+
"epoch": 0.9974802893603186,
|
5503 |
+
"grad_norm": 0.7528097629547119,
|
5504 |
+
"learning_rate": 3.4264445583631622e-09,
|
5505 |
+
"loss": 1.6157,
|
5506 |
+
"step": 767
|
5507 |
+
},
|
5508 |
+
{
|
5509 |
+
"epoch": 0.9987807851743478,
|
5510 |
+
"grad_norm": 0.662280261516571,
|
5511 |
+
"learning_rate": 8.566148085309423e-10,
|
5512 |
+
"loss": 1.3786,
|
5513 |
+
"step": 768
|
5514 |
+
},
|
5515 |
+
{
|
5516 |
+
"epoch": 1.0004064049418842,
|
5517 |
+
"grad_norm": 0.988018274307251,
|
5518 |
+
"learning_rate": 0.0,
|
5519 |
+
"loss": 1.7631,
|
5520 |
+
"step": 769
|
5521 |
}
|
5522 |
],
|
5523 |
"logging_steps": 1,
|
|
|
5541 |
"should_evaluate": false,
|
5542 |
"should_log": false,
|
5543 |
"should_save": true,
|
5544 |
+
"should_training_stop": true
|
5545 |
},
|
5546 |
"attributes": {}
|
5547 |
}
|
5548 |
},
|
5549 |
+
"total_flos": 6.052677812433715e+17,
|
5550 |
"train_batch_size": 4,
|
5551 |
"trial_name": null,
|
5552 |
"trial_params": null
|