Training in progress, epoch 12
Browse files- last-checkpoint/{global_step764203 β global_step833676}/mp_rank_00_model_states.pt +1 -1
- last-checkpoint/{global_step764203 β global_step833676}/zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/{global_step764203 β global_step833676}/zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/{global_step764203 β global_step833676}/zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/latest +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/May29_03-16-06_user-SYS-5049A-TR/events.out.tfevents.1685297788.user-SYS-5049A-TR.557399.0 +2 -2
last-checkpoint/{global_step764203 β global_step833676}/mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59134503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a10bd8e1bdaf252f74bea2291d5cca8f62eb7c148658b26368a2d6b0ba7c8fa0
|
3 |
size 59134503
|
last-checkpoint/{global_step764203 β global_step833676}/zero_pp_rank_0_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118216675
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5357b0ddb95f9e6f7bf04d666fc26a1073ddfb37c1db9f1fa161e5b0aa236f0
|
3 |
size 118216675
|
last-checkpoint/{global_step764203 β global_step833676}/zero_pp_rank_1_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118217955
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9897318fe2bb26a0e6506e150b5359f333188a81a5641fb093924b256734118f
|
3 |
size 118217955
|
last-checkpoint/{global_step764203 β global_step833676}/zero_pp_rank_2_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118221091
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edddfd234d5e9f057274e50758e3fad9d9059344c52a509d6c1acb72db8dc867
|
3 |
size 118221091
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step833676
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59121639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc2ca3b5ddf381486e7d313cce4d992c3c21fc5091bd64a28c07dff9fdb4bb4a
|
3 |
size 59121639
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bc140d0e95ef9cbc743cac8a7f9d0435dd673e76216c4b818af7bf560901baa
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3943c64e1378ed4c3abc5aebafe6c70e5628fb6a86eb5487b1b382b95394539
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac632b7a956eded5feca7f7c4414c5ae7b352c7f8180c312ce5d70adc950be46
|
3 |
size 14503
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9273,11 +9273,854 @@
|
|
9273 |
"eval_samples_per_second": 723.769,
|
9274 |
"eval_steps_per_second": 30.157,
|
9275 |
"step": 764203
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9276 |
}
|
9277 |
],
|
9278 |
"max_steps": 972622,
|
9279 |
"num_train_epochs": 14,
|
9280 |
-
"total_flos":
|
9281 |
"trial_name": null,
|
9282 |
"trial_params": null
|
9283 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 12.0,
|
5 |
+
"global_step": 833676,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9273 |
"eval_samples_per_second": 723.769,
|
9274 |
"eval_steps_per_second": 30.157,
|
9275 |
"step": 764203
|
9276 |
+
},
|
9277 |
+
{
|
9278 |
+
"epoch": 11.0,
|
9279 |
+
"learning_rate": 2.177277896343838e-05,
|
9280 |
+
"loss": 1.275,
|
9281 |
+
"step": 764500
|
9282 |
+
},
|
9283 |
+
{
|
9284 |
+
"epoch": 11.01,
|
9285 |
+
"learning_rate": 2.1721059928652658e-05,
|
9286 |
+
"loss": 1.2762,
|
9287 |
+
"step": 765000
|
9288 |
+
},
|
9289 |
+
{
|
9290 |
+
"epoch": 11.02,
|
9291 |
+
"learning_rate": 2.1669133186899923e-05,
|
9292 |
+
"loss": 1.2754,
|
9293 |
+
"step": 765500
|
9294 |
+
},
|
9295 |
+
{
|
9296 |
+
"epoch": 11.03,
|
9297 |
+
"learning_rate": 2.1617414152114197e-05,
|
9298 |
+
"loss": 1.2784,
|
9299 |
+
"step": 766000
|
9300 |
+
},
|
9301 |
+
{
|
9302 |
+
"epoch": 11.03,
|
9303 |
+
"learning_rate": 2.1565487410361462e-05,
|
9304 |
+
"loss": 1.2729,
|
9305 |
+
"step": 766500
|
9306 |
+
},
|
9307 |
+
{
|
9308 |
+
"epoch": 11.04,
|
9309 |
+
"learning_rate": 2.1513768375575737e-05,
|
9310 |
+
"loss": 1.2745,
|
9311 |
+
"step": 767000
|
9312 |
+
},
|
9313 |
+
{
|
9314 |
+
"epoch": 11.05,
|
9315 |
+
"learning_rate": 2.1461841633823005e-05,
|
9316 |
+
"loss": 1.2754,
|
9317 |
+
"step": 767500
|
9318 |
+
},
|
9319 |
+
{
|
9320 |
+
"epoch": 11.05,
|
9321 |
+
"learning_rate": 2.141012259903728e-05,
|
9322 |
+
"loss": 1.2776,
|
9323 |
+
"step": 768000
|
9324 |
+
},
|
9325 |
+
{
|
9326 |
+
"epoch": 11.06,
|
9327 |
+
"learning_rate": 2.1358195857284544e-05,
|
9328 |
+
"loss": 1.2776,
|
9329 |
+
"step": 768500
|
9330 |
+
},
|
9331 |
+
{
|
9332 |
+
"epoch": 11.07,
|
9333 |
+
"learning_rate": 2.130647682249882e-05,
|
9334 |
+
"loss": 1.2742,
|
9335 |
+
"step": 769000
|
9336 |
+
},
|
9337 |
+
{
|
9338 |
+
"epoch": 11.08,
|
9339 |
+
"learning_rate": 2.1254550080746087e-05,
|
9340 |
+
"loss": 1.2744,
|
9341 |
+
"step": 769500
|
9342 |
+
},
|
9343 |
+
{
|
9344 |
+
"epoch": 11.08,
|
9345 |
+
"learning_rate": 2.120283104596036e-05,
|
9346 |
+
"loss": 1.2783,
|
9347 |
+
"step": 770000
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 11.09,
|
9351 |
+
"learning_rate": 2.1150904304207626e-05,
|
9352 |
+
"loss": 1.2766,
|
9353 |
+
"step": 770500
|
9354 |
+
},
|
9355 |
+
{
|
9356 |
+
"epoch": 11.1,
|
9357 |
+
"learning_rate": 2.10991852694219e-05,
|
9358 |
+
"loss": 1.2728,
|
9359 |
+
"step": 771000
|
9360 |
+
},
|
9361 |
+
{
|
9362 |
+
"epoch": 11.11,
|
9363 |
+
"learning_rate": 2.1047258527669165e-05,
|
9364 |
+
"loss": 1.2722,
|
9365 |
+
"step": 771500
|
9366 |
+
},
|
9367 |
+
{
|
9368 |
+
"epoch": 11.11,
|
9369 |
+
"learning_rate": 2.099553949288344e-05,
|
9370 |
+
"loss": 1.2745,
|
9371 |
+
"step": 772000
|
9372 |
+
},
|
9373 |
+
{
|
9374 |
+
"epoch": 11.12,
|
9375 |
+
"learning_rate": 2.0943612751130705e-05,
|
9376 |
+
"loss": 1.2762,
|
9377 |
+
"step": 772500
|
9378 |
+
},
|
9379 |
+
{
|
9380 |
+
"epoch": 11.13,
|
9381 |
+
"learning_rate": 2.0891893716344982e-05,
|
9382 |
+
"loss": 1.2718,
|
9383 |
+
"step": 773000
|
9384 |
+
},
|
9385 |
+
{
|
9386 |
+
"epoch": 11.13,
|
9387 |
+
"learning_rate": 2.0839966974592244e-05,
|
9388 |
+
"loss": 1.2739,
|
9389 |
+
"step": 773500
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 11.14,
|
9393 |
+
"learning_rate": 2.0788247939806522e-05,
|
9394 |
+
"loss": 1.2741,
|
9395 |
+
"step": 774000
|
9396 |
+
},
|
9397 |
+
{
|
9398 |
+
"epoch": 11.15,
|
9399 |
+
"learning_rate": 2.0736321198053787e-05,
|
9400 |
+
"loss": 1.2727,
|
9401 |
+
"step": 774500
|
9402 |
+
},
|
9403 |
+
{
|
9404 |
+
"epoch": 11.16,
|
9405 |
+
"learning_rate": 2.0684602163268064e-05,
|
9406 |
+
"loss": 1.2749,
|
9407 |
+
"step": 775000
|
9408 |
+
},
|
9409 |
+
{
|
9410 |
+
"epoch": 11.16,
|
9411 |
+
"learning_rate": 2.0632675421515326e-05,
|
9412 |
+
"loss": 1.2738,
|
9413 |
+
"step": 775500
|
9414 |
+
},
|
9415 |
+
{
|
9416 |
+
"epoch": 11.17,
|
9417 |
+
"learning_rate": 2.0580956386729604e-05,
|
9418 |
+
"loss": 1.276,
|
9419 |
+
"step": 776000
|
9420 |
+
},
|
9421 |
+
{
|
9422 |
+
"epoch": 11.18,
|
9423 |
+
"learning_rate": 2.052902964497687e-05,
|
9424 |
+
"loss": 1.2733,
|
9425 |
+
"step": 776500
|
9426 |
+
},
|
9427 |
+
{
|
9428 |
+
"epoch": 11.18,
|
9429 |
+
"learning_rate": 2.0477310610191143e-05,
|
9430 |
+
"loss": 1.2752,
|
9431 |
+
"step": 777000
|
9432 |
+
},
|
9433 |
+
{
|
9434 |
+
"epoch": 11.19,
|
9435 |
+
"learning_rate": 2.0425383868438408e-05,
|
9436 |
+
"loss": 1.2737,
|
9437 |
+
"step": 777500
|
9438 |
+
},
|
9439 |
+
{
|
9440 |
+
"epoch": 11.2,
|
9441 |
+
"learning_rate": 2.0373664833652682e-05,
|
9442 |
+
"loss": 1.2759,
|
9443 |
+
"step": 778000
|
9444 |
+
},
|
9445 |
+
{
|
9446 |
+
"epoch": 11.21,
|
9447 |
+
"learning_rate": 2.0321738091899947e-05,
|
9448 |
+
"loss": 1.2737,
|
9449 |
+
"step": 778500
|
9450 |
+
},
|
9451 |
+
{
|
9452 |
+
"epoch": 11.21,
|
9453 |
+
"learning_rate": 2.027001905711422e-05,
|
9454 |
+
"loss": 1.2762,
|
9455 |
+
"step": 779000
|
9456 |
+
},
|
9457 |
+
{
|
9458 |
+
"epoch": 11.22,
|
9459 |
+
"learning_rate": 2.021809231536149e-05,
|
9460 |
+
"loss": 1.2741,
|
9461 |
+
"step": 779500
|
9462 |
+
},
|
9463 |
+
{
|
9464 |
+
"epoch": 11.23,
|
9465 |
+
"learning_rate": 2.0166373280575764e-05,
|
9466 |
+
"loss": 1.2751,
|
9467 |
+
"step": 780000
|
9468 |
+
},
|
9469 |
+
{
|
9470 |
+
"epoch": 11.23,
|
9471 |
+
"learning_rate": 2.011444653882303e-05,
|
9472 |
+
"loss": 1.2703,
|
9473 |
+
"step": 780500
|
9474 |
+
},
|
9475 |
+
{
|
9476 |
+
"epoch": 11.24,
|
9477 |
+
"learning_rate": 2.0062727504037303e-05,
|
9478 |
+
"loss": 1.2712,
|
9479 |
+
"step": 781000
|
9480 |
+
},
|
9481 |
+
{
|
9482 |
+
"epoch": 11.25,
|
9483 |
+
"learning_rate": 2.001080076228457e-05,
|
9484 |
+
"loss": 1.2725,
|
9485 |
+
"step": 781500
|
9486 |
+
},
|
9487 |
+
{
|
9488 |
+
"epoch": 11.26,
|
9489 |
+
"learning_rate": 1.9959081727498846e-05,
|
9490 |
+
"loss": 1.2758,
|
9491 |
+
"step": 782000
|
9492 |
+
},
|
9493 |
+
{
|
9494 |
+
"epoch": 11.26,
|
9495 |
+
"learning_rate": 1.990715498574611e-05,
|
9496 |
+
"loss": 1.2721,
|
9497 |
+
"step": 782500
|
9498 |
+
},
|
9499 |
+
{
|
9500 |
+
"epoch": 11.27,
|
9501 |
+
"learning_rate": 1.9855435950960385e-05,
|
9502 |
+
"loss": 1.2733,
|
9503 |
+
"step": 783000
|
9504 |
+
},
|
9505 |
+
{
|
9506 |
+
"epoch": 11.28,
|
9507 |
+
"learning_rate": 1.980350920920765e-05,
|
9508 |
+
"loss": 1.2716,
|
9509 |
+
"step": 783500
|
9510 |
+
},
|
9511 |
+
{
|
9512 |
+
"epoch": 11.28,
|
9513 |
+
"learning_rate": 1.9751790174421928e-05,
|
9514 |
+
"loss": 1.2729,
|
9515 |
+
"step": 784000
|
9516 |
+
},
|
9517 |
+
{
|
9518 |
+
"epoch": 11.29,
|
9519 |
+
"learning_rate": 1.969986343266919e-05,
|
9520 |
+
"loss": 1.2715,
|
9521 |
+
"step": 784500
|
9522 |
+
},
|
9523 |
+
{
|
9524 |
+
"epoch": 11.3,
|
9525 |
+
"learning_rate": 1.9648144397883467e-05,
|
9526 |
+
"loss": 1.2722,
|
9527 |
+
"step": 785000
|
9528 |
+
},
|
9529 |
+
{
|
9530 |
+
"epoch": 11.31,
|
9531 |
+
"learning_rate": 1.959621765613073e-05,
|
9532 |
+
"loss": 1.2745,
|
9533 |
+
"step": 785500
|
9534 |
+
},
|
9535 |
+
{
|
9536 |
+
"epoch": 11.31,
|
9537 |
+
"learning_rate": 1.954449862134501e-05,
|
9538 |
+
"loss": 1.2719,
|
9539 |
+
"step": 786000
|
9540 |
+
},
|
9541 |
+
{
|
9542 |
+
"epoch": 11.32,
|
9543 |
+
"learning_rate": 1.949257187959227e-05,
|
9544 |
+
"loss": 1.2755,
|
9545 |
+
"step": 786500
|
9546 |
+
},
|
9547 |
+
{
|
9548 |
+
"epoch": 11.33,
|
9549 |
+
"learning_rate": 1.944085284480655e-05,
|
9550 |
+
"loss": 1.2732,
|
9551 |
+
"step": 787000
|
9552 |
+
},
|
9553 |
+
{
|
9554 |
+
"epoch": 11.34,
|
9555 |
+
"learning_rate": 1.938892610305381e-05,
|
9556 |
+
"loss": 1.2701,
|
9557 |
+
"step": 787500
|
9558 |
+
},
|
9559 |
+
{
|
9560 |
+
"epoch": 11.34,
|
9561 |
+
"learning_rate": 1.933720706826809e-05,
|
9562 |
+
"loss": 1.2736,
|
9563 |
+
"step": 788000
|
9564 |
+
},
|
9565 |
+
{
|
9566 |
+
"epoch": 11.35,
|
9567 |
+
"learning_rate": 1.9285280326515353e-05,
|
9568 |
+
"loss": 1.2753,
|
9569 |
+
"step": 788500
|
9570 |
+
},
|
9571 |
+
{
|
9572 |
+
"epoch": 11.36,
|
9573 |
+
"learning_rate": 1.9233561291729628e-05,
|
9574 |
+
"loss": 1.2701,
|
9575 |
+
"step": 789000
|
9576 |
+
},
|
9577 |
+
{
|
9578 |
+
"epoch": 11.36,
|
9579 |
+
"learning_rate": 1.9181634549976893e-05,
|
9580 |
+
"loss": 1.2725,
|
9581 |
+
"step": 789500
|
9582 |
+
},
|
9583 |
+
{
|
9584 |
+
"epoch": 11.37,
|
9585 |
+
"learning_rate": 1.9129915515191167e-05,
|
9586 |
+
"loss": 1.2725,
|
9587 |
+
"step": 790000
|
9588 |
+
},
|
9589 |
+
{
|
9590 |
+
"epoch": 11.38,
|
9591 |
+
"learning_rate": 1.9077988773438435e-05,
|
9592 |
+
"loss": 1.2705,
|
9593 |
+
"step": 790500
|
9594 |
+
},
|
9595 |
+
{
|
9596 |
+
"epoch": 11.39,
|
9597 |
+
"learning_rate": 1.902626973865271e-05,
|
9598 |
+
"loss": 1.2739,
|
9599 |
+
"step": 791000
|
9600 |
+
},
|
9601 |
+
{
|
9602 |
+
"epoch": 11.39,
|
9603 |
+
"learning_rate": 1.8974342996899974e-05,
|
9604 |
+
"loss": 1.2696,
|
9605 |
+
"step": 791500
|
9606 |
+
},
|
9607 |
+
{
|
9608 |
+
"epoch": 11.4,
|
9609 |
+
"learning_rate": 1.892262396211425e-05,
|
9610 |
+
"loss": 1.2732,
|
9611 |
+
"step": 792000
|
9612 |
+
},
|
9613 |
+
{
|
9614 |
+
"epoch": 11.41,
|
9615 |
+
"learning_rate": 1.8870697220361517e-05,
|
9616 |
+
"loss": 1.2682,
|
9617 |
+
"step": 792500
|
9618 |
+
},
|
9619 |
+
{
|
9620 |
+
"epoch": 11.41,
|
9621 |
+
"learning_rate": 1.881897818557579e-05,
|
9622 |
+
"loss": 1.2717,
|
9623 |
+
"step": 793000
|
9624 |
+
},
|
9625 |
+
{
|
9626 |
+
"epoch": 11.42,
|
9627 |
+
"learning_rate": 1.8767051443823056e-05,
|
9628 |
+
"loss": 1.2715,
|
9629 |
+
"step": 793500
|
9630 |
+
},
|
9631 |
+
{
|
9632 |
+
"epoch": 11.43,
|
9633 |
+
"learning_rate": 1.871533240903733e-05,
|
9634 |
+
"loss": 1.2696,
|
9635 |
+
"step": 794000
|
9636 |
+
},
|
9637 |
+
{
|
9638 |
+
"epoch": 11.44,
|
9639 |
+
"learning_rate": 1.8663405667284596e-05,
|
9640 |
+
"loss": 1.2723,
|
9641 |
+
"step": 794500
|
9642 |
+
},
|
9643 |
+
{
|
9644 |
+
"epoch": 11.44,
|
9645 |
+
"learning_rate": 1.861168663249887e-05,
|
9646 |
+
"loss": 1.274,
|
9647 |
+
"step": 795000
|
9648 |
+
},
|
9649 |
+
{
|
9650 |
+
"epoch": 11.45,
|
9651 |
+
"learning_rate": 1.855975989074614e-05,
|
9652 |
+
"loss": 1.2716,
|
9653 |
+
"step": 795500
|
9654 |
+
},
|
9655 |
+
{
|
9656 |
+
"epoch": 11.46,
|
9657 |
+
"learning_rate": 1.8508040855960413e-05,
|
9658 |
+
"loss": 1.2727,
|
9659 |
+
"step": 796000
|
9660 |
+
},
|
9661 |
+
{
|
9662 |
+
"epoch": 11.46,
|
9663 |
+
"learning_rate": 1.8456114114207678e-05,
|
9664 |
+
"loss": 1.2708,
|
9665 |
+
"step": 796500
|
9666 |
+
},
|
9667 |
+
{
|
9668 |
+
"epoch": 11.47,
|
9669 |
+
"learning_rate": 1.8404395079421952e-05,
|
9670 |
+
"loss": 1.271,
|
9671 |
+
"step": 797000
|
9672 |
+
},
|
9673 |
+
{
|
9674 |
+
"epoch": 11.48,
|
9675 |
+
"learning_rate": 1.8352468337669217e-05,
|
9676 |
+
"loss": 1.2711,
|
9677 |
+
"step": 797500
|
9678 |
+
},
|
9679 |
+
{
|
9680 |
+
"epoch": 11.49,
|
9681 |
+
"learning_rate": 1.8300749302883495e-05,
|
9682 |
+
"loss": 1.2709,
|
9683 |
+
"step": 798000
|
9684 |
+
},
|
9685 |
+
{
|
9686 |
+
"epoch": 11.49,
|
9687 |
+
"learning_rate": 1.8248822561130756e-05,
|
9688 |
+
"loss": 1.2735,
|
9689 |
+
"step": 798500
|
9690 |
+
},
|
9691 |
+
{
|
9692 |
+
"epoch": 11.5,
|
9693 |
+
"learning_rate": 1.8197103526345034e-05,
|
9694 |
+
"loss": 1.2738,
|
9695 |
+
"step": 799000
|
9696 |
+
},
|
9697 |
+
{
|
9698 |
+
"epoch": 11.51,
|
9699 |
+
"learning_rate": 1.81451767845923e-05,
|
9700 |
+
"loss": 1.2711,
|
9701 |
+
"step": 799500
|
9702 |
+
},
|
9703 |
+
{
|
9704 |
+
"epoch": 11.52,
|
9705 |
+
"learning_rate": 1.8093457749806577e-05,
|
9706 |
+
"loss": 1.2698,
|
9707 |
+
"step": 800000
|
9708 |
+
},
|
9709 |
+
{
|
9710 |
+
"epoch": 11.52,
|
9711 |
+
"learning_rate": 1.8041531008053838e-05,
|
9712 |
+
"loss": 1.2724,
|
9713 |
+
"step": 800500
|
9714 |
+
},
|
9715 |
+
{
|
9716 |
+
"epoch": 11.53,
|
9717 |
+
"learning_rate": 1.7989811973268116e-05,
|
9718 |
+
"loss": 1.2722,
|
9719 |
+
"step": 801000
|
9720 |
+
},
|
9721 |
+
{
|
9722 |
+
"epoch": 11.54,
|
9723 |
+
"learning_rate": 1.7937885231515377e-05,
|
9724 |
+
"loss": 1.2702,
|
9725 |
+
"step": 801500
|
9726 |
+
},
|
9727 |
+
{
|
9728 |
+
"epoch": 11.54,
|
9729 |
+
"learning_rate": 1.7886166196729655e-05,
|
9730 |
+
"loss": 1.2709,
|
9731 |
+
"step": 802000
|
9732 |
+
},
|
9733 |
+
{
|
9734 |
+
"epoch": 11.55,
|
9735 |
+
"learning_rate": 1.783423945497692e-05,
|
9736 |
+
"loss": 1.2713,
|
9737 |
+
"step": 802500
|
9738 |
+
},
|
9739 |
+
{
|
9740 |
+
"epoch": 11.56,
|
9741 |
+
"learning_rate": 1.7782520420191194e-05,
|
9742 |
+
"loss": 1.2681,
|
9743 |
+
"step": 803000
|
9744 |
+
},
|
9745 |
+
{
|
9746 |
+
"epoch": 11.57,
|
9747 |
+
"learning_rate": 1.773059367843846e-05,
|
9748 |
+
"loss": 1.2699,
|
9749 |
+
"step": 803500
|
9750 |
+
},
|
9751 |
+
{
|
9752 |
+
"epoch": 11.57,
|
9753 |
+
"learning_rate": 1.7678874643652734e-05,
|
9754 |
+
"loss": 1.2735,
|
9755 |
+
"step": 804000
|
9756 |
+
},
|
9757 |
+
{
|
9758 |
+
"epoch": 11.58,
|
9759 |
+
"learning_rate": 1.7626947901900002e-05,
|
9760 |
+
"loss": 1.2735,
|
9761 |
+
"step": 804500
|
9762 |
+
},
|
9763 |
+
{
|
9764 |
+
"epoch": 11.59,
|
9765 |
+
"learning_rate": 1.7575228867114276e-05,
|
9766 |
+
"loss": 1.2681,
|
9767 |
+
"step": 805000
|
9768 |
+
},
|
9769 |
+
{
|
9770 |
+
"epoch": 11.59,
|
9771 |
+
"learning_rate": 1.752330212536154e-05,
|
9772 |
+
"loss": 1.2676,
|
9773 |
+
"step": 805500
|
9774 |
+
},
|
9775 |
+
{
|
9776 |
+
"epoch": 11.6,
|
9777 |
+
"learning_rate": 1.7471583090575816e-05,
|
9778 |
+
"loss": 1.2735,
|
9779 |
+
"step": 806000
|
9780 |
+
},
|
9781 |
+
{
|
9782 |
+
"epoch": 11.61,
|
9783 |
+
"learning_rate": 1.7419656348823084e-05,
|
9784 |
+
"loss": 1.2731,
|
9785 |
+
"step": 806500
|
9786 |
+
},
|
9787 |
+
{
|
9788 |
+
"epoch": 11.62,
|
9789 |
+
"learning_rate": 1.7367937314037358e-05,
|
9790 |
+
"loss": 1.2707,
|
9791 |
+
"step": 807000
|
9792 |
+
},
|
9793 |
+
{
|
9794 |
+
"epoch": 11.62,
|
9795 |
+
"learning_rate": 1.7316010572284623e-05,
|
9796 |
+
"loss": 1.2702,
|
9797 |
+
"step": 807500
|
9798 |
+
},
|
9799 |
+
{
|
9800 |
+
"epoch": 11.63,
|
9801 |
+
"learning_rate": 1.7264291537498897e-05,
|
9802 |
+
"loss": 1.2704,
|
9803 |
+
"step": 808000
|
9804 |
+
},
|
9805 |
+
{
|
9806 |
+
"epoch": 11.64,
|
9807 |
+
"learning_rate": 1.7212364795746162e-05,
|
9808 |
+
"loss": 1.2714,
|
9809 |
+
"step": 808500
|
9810 |
+
},
|
9811 |
+
{
|
9812 |
+
"epoch": 11.64,
|
9813 |
+
"learning_rate": 1.7160645760960437e-05,
|
9814 |
+
"loss": 1.2696,
|
9815 |
+
"step": 809000
|
9816 |
+
},
|
9817 |
+
{
|
9818 |
+
"epoch": 11.65,
|
9819 |
+
"learning_rate": 1.71087190192077e-05,
|
9820 |
+
"loss": 1.2687,
|
9821 |
+
"step": 809500
|
9822 |
+
},
|
9823 |
+
{
|
9824 |
+
"epoch": 11.66,
|
9825 |
+
"learning_rate": 1.705699998442198e-05,
|
9826 |
+
"loss": 1.2699,
|
9827 |
+
"step": 810000
|
9828 |
+
},
|
9829 |
+
{
|
9830 |
+
"epoch": 11.67,
|
9831 |
+
"learning_rate": 1.700507324266924e-05,
|
9832 |
+
"loss": 1.2696,
|
9833 |
+
"step": 810500
|
9834 |
+
},
|
9835 |
+
{
|
9836 |
+
"epoch": 11.67,
|
9837 |
+
"learning_rate": 1.695335420788352e-05,
|
9838 |
+
"loss": 1.2726,
|
9839 |
+
"step": 811000
|
9840 |
+
},
|
9841 |
+
{
|
9842 |
+
"epoch": 11.68,
|
9843 |
+
"learning_rate": 1.6901427466130784e-05,
|
9844 |
+
"loss": 1.2691,
|
9845 |
+
"step": 811500
|
9846 |
+
},
|
9847 |
+
{
|
9848 |
+
"epoch": 11.69,
|
9849 |
+
"learning_rate": 1.684970843134506e-05,
|
9850 |
+
"loss": 1.2685,
|
9851 |
+
"step": 812000
|
9852 |
+
},
|
9853 |
+
{
|
9854 |
+
"epoch": 11.7,
|
9855 |
+
"learning_rate": 1.6797781689592323e-05,
|
9856 |
+
"loss": 1.2709,
|
9857 |
+
"step": 812500
|
9858 |
+
},
|
9859 |
+
{
|
9860 |
+
"epoch": 11.7,
|
9861 |
+
"learning_rate": 1.67460626548066e-05,
|
9862 |
+
"loss": 1.2718,
|
9863 |
+
"step": 813000
|
9864 |
+
},
|
9865 |
+
{
|
9866 |
+
"epoch": 11.71,
|
9867 |
+
"learning_rate": 1.6694135913053865e-05,
|
9868 |
+
"loss": 1.269,
|
9869 |
+
"step": 813500
|
9870 |
+
},
|
9871 |
+
{
|
9872 |
+
"epoch": 11.72,
|
9873 |
+
"learning_rate": 1.664241687826814e-05,
|
9874 |
+
"loss": 1.2669,
|
9875 |
+
"step": 814000
|
9876 |
+
},
|
9877 |
+
{
|
9878 |
+
"epoch": 11.72,
|
9879 |
+
"learning_rate": 1.6590490136515405e-05,
|
9880 |
+
"loss": 1.2679,
|
9881 |
+
"step": 814500
|
9882 |
+
},
|
9883 |
+
{
|
9884 |
+
"epoch": 11.73,
|
9885 |
+
"learning_rate": 1.653877110172968e-05,
|
9886 |
+
"loss": 1.2695,
|
9887 |
+
"step": 815000
|
9888 |
+
},
|
9889 |
+
{
|
9890 |
+
"epoch": 11.74,
|
9891 |
+
"learning_rate": 1.6486844359976944e-05,
|
9892 |
+
"loss": 1.2757,
|
9893 |
+
"step": 815500
|
9894 |
+
},
|
9895 |
+
{
|
9896 |
+
"epoch": 11.75,
|
9897 |
+
"learning_rate": 1.6435125325191222e-05,
|
9898 |
+
"loss": 1.2692,
|
9899 |
+
"step": 816000
|
9900 |
+
},
|
9901 |
+
{
|
9902 |
+
"epoch": 11.75,
|
9903 |
+
"learning_rate": 1.6383198583438487e-05,
|
9904 |
+
"loss": 1.2675,
|
9905 |
+
"step": 816500
|
9906 |
+
},
|
9907 |
+
{
|
9908 |
+
"epoch": 11.76,
|
9909 |
+
"learning_rate": 1.633147954865276e-05,
|
9910 |
+
"loss": 1.2689,
|
9911 |
+
"step": 817000
|
9912 |
+
},
|
9913 |
+
{
|
9914 |
+
"epoch": 11.77,
|
9915 |
+
"learning_rate": 1.6279552806900026e-05,
|
9916 |
+
"loss": 1.2712,
|
9917 |
+
"step": 817500
|
9918 |
+
},
|
9919 |
+
{
|
9920 |
+
"epoch": 11.77,
|
9921 |
+
"learning_rate": 1.62278337721143e-05,
|
9922 |
+
"loss": 1.269,
|
9923 |
+
"step": 818000
|
9924 |
+
},
|
9925 |
+
{
|
9926 |
+
"epoch": 11.78,
|
9927 |
+
"learning_rate": 1.617590703036157e-05,
|
9928 |
+
"loss": 1.2699,
|
9929 |
+
"step": 818500
|
9930 |
+
},
|
9931 |
+
{
|
9932 |
+
"epoch": 11.79,
|
9933 |
+
"learning_rate": 1.6124187995575843e-05,
|
9934 |
+
"loss": 1.27,
|
9935 |
+
"step": 819000
|
9936 |
+
},
|
9937 |
+
{
|
9938 |
+
"epoch": 11.8,
|
9939 |
+
"learning_rate": 1.6072261253823108e-05,
|
9940 |
+
"loss": 1.2709,
|
9941 |
+
"step": 819500
|
9942 |
+
},
|
9943 |
+
{
|
9944 |
+
"epoch": 11.8,
|
9945 |
+
"learning_rate": 1.6020542219037382e-05,
|
9946 |
+
"loss": 1.2704,
|
9947 |
+
"step": 820000
|
9948 |
+
},
|
9949 |
+
{
|
9950 |
+
"epoch": 11.81,
|
9951 |
+
"learning_rate": 1.5968615477284647e-05,
|
9952 |
+
"loss": 1.2693,
|
9953 |
+
"step": 820500
|
9954 |
+
},
|
9955 |
+
{
|
9956 |
+
"epoch": 11.82,
|
9957 |
+
"learning_rate": 1.5916896442498925e-05,
|
9958 |
+
"loss": 1.2687,
|
9959 |
+
"step": 821000
|
9960 |
+
},
|
9961 |
+
{
|
9962 |
+
"epoch": 11.82,
|
9963 |
+
"learning_rate": 1.5864969700746186e-05,
|
9964 |
+
"loss": 1.272,
|
9965 |
+
"step": 821500
|
9966 |
+
},
|
9967 |
+
{
|
9968 |
+
"epoch": 11.83,
|
9969 |
+
"learning_rate": 1.5813250665960464e-05,
|
9970 |
+
"loss": 1.2649,
|
9971 |
+
"step": 822000
|
9972 |
+
},
|
9973 |
+
{
|
9974 |
+
"epoch": 11.84,
|
9975 |
+
"learning_rate": 1.576132392420773e-05,
|
9976 |
+
"loss": 1.2684,
|
9977 |
+
"step": 822500
|
9978 |
+
},
|
9979 |
+
{
|
9980 |
+
"epoch": 11.85,
|
9981 |
+
"learning_rate": 1.5709604889422007e-05,
|
9982 |
+
"loss": 1.2676,
|
9983 |
+
"step": 823000
|
9984 |
+
},
|
9985 |
+
{
|
9986 |
+
"epoch": 11.85,
|
9987 |
+
"learning_rate": 1.565767814766927e-05,
|
9988 |
+
"loss": 1.2697,
|
9989 |
+
"step": 823500
|
9990 |
+
},
|
9991 |
+
{
|
9992 |
+
"epoch": 11.86,
|
9993 |
+
"learning_rate": 1.5605959112883546e-05,
|
9994 |
+
"loss": 1.2692,
|
9995 |
+
"step": 824000
|
9996 |
+
},
|
9997 |
+
{
|
9998 |
+
"epoch": 11.87,
|
9999 |
+
"learning_rate": 1.5554032371130808e-05,
|
10000 |
+
"loss": 1.2675,
|
10001 |
+
"step": 824500
|
10002 |
+
},
|
10003 |
+
{
|
10004 |
+
"epoch": 11.88,
|
10005 |
+
"learning_rate": 1.5502313336345085e-05,
|
10006 |
+
"loss": 1.2674,
|
10007 |
+
"step": 825000
|
10008 |
+
},
|
10009 |
+
{
|
10010 |
+
"epoch": 11.88,
|
10011 |
+
"learning_rate": 1.545038659459235e-05,
|
10012 |
+
"loss": 1.2704,
|
10013 |
+
"step": 825500
|
10014 |
+
},
|
10015 |
+
{
|
10016 |
+
"epoch": 11.89,
|
10017 |
+
"learning_rate": 1.5398667559806625e-05,
|
10018 |
+
"loss": 1.266,
|
10019 |
+
"step": 826000
|
10020 |
+
},
|
10021 |
+
{
|
10022 |
+
"epoch": 11.9,
|
10023 |
+
"learning_rate": 1.534674081805389e-05,
|
10024 |
+
"loss": 1.2701,
|
10025 |
+
"step": 826500
|
10026 |
+
},
|
10027 |
+
{
|
10028 |
+
"epoch": 11.9,
|
10029 |
+
"learning_rate": 1.5295021783268164e-05,
|
10030 |
+
"loss": 1.2666,
|
10031 |
+
"step": 827000
|
10032 |
+
},
|
10033 |
+
{
|
10034 |
+
"epoch": 11.91,
|
10035 |
+
"learning_rate": 1.5243095041515432e-05,
|
10036 |
+
"loss": 1.2715,
|
10037 |
+
"step": 827500
|
10038 |
+
},
|
10039 |
+
{
|
10040 |
+
"epoch": 11.92,
|
10041 |
+
"learning_rate": 1.5191376006729707e-05,
|
10042 |
+
"loss": 1.2691,
|
10043 |
+
"step": 828000
|
10044 |
+
},
|
10045 |
+
{
|
10046 |
+
"epoch": 11.93,
|
10047 |
+
"learning_rate": 1.5139449264976971e-05,
|
10048 |
+
"loss": 1.2685,
|
10049 |
+
"step": 828500
|
10050 |
+
},
|
10051 |
+
{
|
10052 |
+
"epoch": 11.93,
|
10053 |
+
"learning_rate": 1.5087730230191246e-05,
|
10054 |
+
"loss": 1.2707,
|
10055 |
+
"step": 829000
|
10056 |
+
},
|
10057 |
+
{
|
10058 |
+
"epoch": 11.94,
|
10059 |
+
"learning_rate": 1.5035803488438512e-05,
|
10060 |
+
"loss": 1.2687,
|
10061 |
+
"step": 829500
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 11.95,
|
10065 |
+
"learning_rate": 1.4984084453652788e-05,
|
10066 |
+
"loss": 1.2697,
|
10067 |
+
"step": 830000
|
10068 |
+
},
|
10069 |
+
{
|
10070 |
+
"epoch": 11.95,
|
10071 |
+
"learning_rate": 1.4932157711900052e-05,
|
10072 |
+
"loss": 1.2665,
|
10073 |
+
"step": 830500
|
10074 |
+
},
|
10075 |
+
{
|
10076 |
+
"epoch": 11.96,
|
10077 |
+
"learning_rate": 1.4880438677114328e-05,
|
10078 |
+
"loss": 1.2661,
|
10079 |
+
"step": 831000
|
10080 |
+
},
|
10081 |
+
{
|
10082 |
+
"epoch": 11.97,
|
10083 |
+
"learning_rate": 1.4828511935361591e-05,
|
10084 |
+
"loss": 1.2696,
|
10085 |
+
"step": 831500
|
10086 |
+
},
|
10087 |
+
{
|
10088 |
+
"epoch": 11.98,
|
10089 |
+
"learning_rate": 1.4776792900575867e-05,
|
10090 |
+
"loss": 1.2692,
|
10091 |
+
"step": 832000
|
10092 |
+
},
|
10093 |
+
{
|
10094 |
+
"epoch": 11.98,
|
10095 |
+
"learning_rate": 1.4724866158823134e-05,
|
10096 |
+
"loss": 1.2665,
|
10097 |
+
"step": 832500
|
10098 |
+
},
|
10099 |
+
{
|
10100 |
+
"epoch": 11.99,
|
10101 |
+
"learning_rate": 1.467314712403741e-05,
|
10102 |
+
"loss": 1.2682,
|
10103 |
+
"step": 833000
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 12.0,
|
10107 |
+
"learning_rate": 1.4621220382284673e-05,
|
10108 |
+
"loss": 1.2694,
|
10109 |
+
"step": 833500
|
10110 |
+
},
|
10111 |
+
{
|
10112 |
+
"epoch": 12.0,
|
10113 |
+
"eval_accuracy": 0.7431053375139824,
|
10114 |
+
"eval_loss": 1.1376953125,
|
10115 |
+
"eval_runtime": 747.4572,
|
10116 |
+
"eval_samples_per_second": 721.029,
|
10117 |
+
"eval_steps_per_second": 30.043,
|
10118 |
+
"step": 833676
|
10119 |
}
|
10120 |
],
|
10121 |
"max_steps": 972622,
|
10122 |
"num_train_epochs": 14,
|
10123 |
+
"total_flos": 6.296629362397544e+18,
|
10124 |
"trial_name": null,
|
10125 |
"trial_params": null
|
10126 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59121639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc2ca3b5ddf381486e7d313cce4d992c3c21fc5091bd64a28c07dff9fdb4bb4a
|
3 |
size 59121639
|
runs/May29_03-16-06_user-SYS-5049A-TR/events.out.tfevents.1685297788.user-SYS-5049A-TR.557399.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c999829c90a5cf8554f15412e37f8e4a6d962f8db875b9ed57de0c61a7b22a46
|
3 |
+
size 274025
|