Upload checkpoint 8702
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +144 -4
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957560304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89b6ed8f166125c4e0ccb4438463feac8c4befdcfa3b5fc23df50b931dd37964
|
3 |
size 4957560304
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3989163248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfeea3b480b0ac3a7be7edb3e3d45b0b6eed0bc230ddbdd298ef3463ce89ffd9
|
3 |
size 3989163248
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17893865224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc5cc673934837b111c47f04c42854cbbf4155db979be7dde3c8474b55635ed6
|
3 |
size 17893865224
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb0516760d88d4baa388b6e998401e0078c7ad3407932309df513a21a23fcf7a
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20167,6 +20167,146 @@
|
|
20167 |
"learning_rate": 2.5107040290095474e-08,
|
20168 |
"loss": 1.6549,
|
20169 |
"step": 8640
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20170 |
}
|
20171 |
],
|
20172 |
"logging_steps": 3,
|
@@ -20181,12 +20321,12 @@
|
|
20181 |
"should_evaluate": false,
|
20182 |
"should_log": false,
|
20183 |
"should_save": true,
|
20184 |
-
"should_training_stop":
|
20185 |
},
|
20186 |
"attributes": {}
|
20187 |
}
|
20188 |
},
|
20189 |
-
"total_flos": 3.
|
20190 |
"train_batch_size": 4,
|
20191 |
"trial_name": null,
|
20192 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999425452456191,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 8702,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20167 |
"learning_rate": 2.5107040290095474e-08,
|
20168 |
"loss": 1.6549,
|
20169 |
"step": 8640
|
20170 |
+
},
|
20171 |
+
{
|
20172 |
+
"epoch": 0.9931628842286699,
|
20173 |
+
"grad_norm": 0.38671875,
|
20174 |
+
"learning_rate": 2.2736199954154213e-08,
|
20175 |
+
"loss": 1.6457,
|
20176 |
+
"step": 8643
|
20177 |
+
},
|
20178 |
+
{
|
20179 |
+
"epoch": 0.9935076127549555,
|
20180 |
+
"grad_norm": 0.3828125,
|
20181 |
+
"learning_rate": 2.0482904581231588e-08,
|
20182 |
+
"loss": 1.6282,
|
20183 |
+
"step": 8646
|
20184 |
+
},
|
20185 |
+
{
|
20186 |
+
"epoch": 0.993852341281241,
|
20187 |
+
"grad_norm": 0.376953125,
|
20188 |
+
"learning_rate": 1.834715682056398e-08,
|
20189 |
+
"loss": 1.654,
|
20190 |
+
"step": 8649
|
20191 |
+
},
|
20192 |
+
{
|
20193 |
+
"epoch": 0.9941970698075265,
|
20194 |
+
"grad_norm": 0.388671875,
|
20195 |
+
"learning_rate": 1.632895918319832e-08,
|
20196 |
+
"loss": 1.6449,
|
20197 |
+
"step": 8652
|
20198 |
+
},
|
20199 |
+
{
|
20200 |
+
"epoch": 0.9945417983338122,
|
20201 |
+
"grad_norm": 0.376953125,
|
20202 |
+
"learning_rate": 1.4428314041958767e-08,
|
20203 |
+
"loss": 1.6957,
|
20204 |
+
"step": 8655
|
20205 |
+
},
|
20206 |
+
{
|
20207 |
+
"epoch": 0.9948865268600977,
|
20208 |
+
"grad_norm": 0.380859375,
|
20209 |
+
"learning_rate": 1.2645223631457815e-08,
|
20210 |
+
"loss": 1.6494,
|
20211 |
+
"step": 8658
|
20212 |
+
},
|
20213 |
+
{
|
20214 |
+
"epoch": 0.9952312553863832,
|
20215 |
+
"grad_norm": 0.39453125,
|
20216 |
+
"learning_rate": 1.0979690048107394e-08,
|
20217 |
+
"loss": 1.6152,
|
20218 |
+
"step": 8661
|
20219 |
+
},
|
20220 |
+
{
|
20221 |
+
"epoch": 0.9955759839126688,
|
20222 |
+
"grad_norm": 0.37109375,
|
20223 |
+
"learning_rate": 9.431715250118878e-09,
|
20224 |
+
"loss": 1.5803,
|
20225 |
+
"step": 8664
|
20226 |
+
},
|
20227 |
+
{
|
20228 |
+
"epoch": 0.9959207124389543,
|
20229 |
+
"grad_norm": 0.400390625,
|
20230 |
+
"learning_rate": 8.001301057447563e-09,
|
20231 |
+
"loss": 1.6867,
|
20232 |
+
"step": 8667
|
20233 |
+
},
|
20234 |
+
{
|
20235 |
+
"epoch": 0.9962654409652398,
|
20236 |
+
"grad_norm": 0.392578125,
|
20237 |
+
"learning_rate": 6.688449151881493e-09,
|
20238 |
+
"loss": 1.6015,
|
20239 |
+
"step": 8670
|
20240 |
+
},
|
20241 |
+
{
|
20242 |
+
"epoch": 0.9966101694915255,
|
20243 |
+
"grad_norm": 0.390625,
|
20244 |
+
"learning_rate": 5.493161076941533e-09,
|
20245 |
+
"loss": 1.5957,
|
20246 |
+
"step": 8673
|
20247 |
+
},
|
20248 |
+
{
|
20249 |
+
"epoch": 0.996954898017811,
|
20250 |
+
"grad_norm": 0.373046875,
|
20251 |
+
"learning_rate": 4.415438237959091e-09,
|
20252 |
+
"loss": 1.6479,
|
20253 |
+
"step": 8676
|
20254 |
+
},
|
20255 |
+
{
|
20256 |
+
"epoch": 0.9972996265440965,
|
20257 |
+
"grad_norm": 0.37890625,
|
20258 |
+
"learning_rate": 3.455281902031704e-09,
|
20259 |
+
"loss": 1.6768,
|
20260 |
+
"step": 8679
|
20261 |
+
},
|
20262 |
+
{
|
20263 |
+
"epoch": 0.9976443550703821,
|
20264 |
+
"grad_norm": 0.3671875,
|
20265 |
+
"learning_rate": 2.612693198023042e-09,
|
20266 |
+
"loss": 1.6631,
|
20267 |
+
"step": 8682
|
20268 |
+
},
|
20269 |
+
{
|
20270 |
+
"epoch": 0.9979890835966676,
|
20271 |
+
"grad_norm": 0.380859375,
|
20272 |
+
"learning_rate": 1.8876731165962115e-09,
|
20273 |
+
"loss": 1.6653,
|
20274 |
+
"step": 8685
|
20275 |
+
},
|
20276 |
+
{
|
20277 |
+
"epoch": 0.9983338121229531,
|
20278 |
+
"grad_norm": 0.369140625,
|
20279 |
+
"learning_rate": 1.2802225101471444e-09,
|
20280 |
+
"loss": 1.5957,
|
20281 |
+
"step": 8688
|
20282 |
+
},
|
20283 |
+
{
|
20284 |
+
"epoch": 0.9986785406492388,
|
20285 |
+
"grad_norm": 0.392578125,
|
20286 |
+
"learning_rate": 7.903420928823124e-10,
|
20287 |
+
"loss": 1.6107,
|
20288 |
+
"step": 8691
|
20289 |
+
},
|
20290 |
+
{
|
20291 |
+
"epoch": 0.9990232691755243,
|
20292 |
+
"grad_norm": 0.384765625,
|
20293 |
+
"learning_rate": 4.1803244075211414e-10,
|
20294 |
+
"loss": 1.6701,
|
20295 |
+
"step": 8694
|
20296 |
+
},
|
20297 |
+
{
|
20298 |
+
"epoch": 0.9993679977018098,
|
20299 |
+
"grad_norm": 0.384765625,
|
20300 |
+
"learning_rate": 1.6329399149528356e-10,
|
20301 |
+
"loss": 1.6652,
|
20302 |
+
"step": 8697
|
20303 |
+
},
|
20304 |
+
{
|
20305 |
+
"epoch": 0.9997127262280954,
|
20306 |
+
"grad_norm": 0.384765625,
|
20307 |
+
"learning_rate": 2.6127044616686137e-11,
|
20308 |
+
"loss": 1.7509,
|
20309 |
+
"step": 8700
|
20310 |
}
|
20311 |
],
|
20312 |
"logging_steps": 3,
|
|
|
20321 |
"should_evaluate": false,
|
20322 |
"should_log": false,
|
20323 |
"should_save": true,
|
20324 |
+
"should_training_stop": true
|
20325 |
},
|
20326 |
"attributes": {}
|
20327 |
}
|
20328 |
},
|
20329 |
+
"total_flos": 3.1626194748539142e+19,
|
20330 |
"train_batch_size": 4,
|
20331 |
"trial_name": null,
|
20332 |
"trial_params": null
|