Training in progress, epoch 11
Browse files- last-checkpoint/{global_step694730 β global_step764203}/mp_rank_00_model_states.pt +1 -1
- last-checkpoint/{global_step694730 β global_step764203}/zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/{global_step694730 β global_step764203}/zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/{global_step694730 β global_step764203}/zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/latest +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Feb22_11-16-27_user-SYS-5049A-TR/events.out.tfevents.1677032209.user-SYS-5049A-TR.55703.0 +2 -2
last-checkpoint/{global_step694730 β global_step764203}/mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59134503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:957ee02a261b6bfc99fc73ea67bd87b9a19090647bf92442249930ffd0120616
|
3 |
size 59134503
|
last-checkpoint/{global_step694730 β global_step764203}/zero_pp_rank_0_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118216675
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f27d7eb472bc3f4e910186c454447d51ae61ae3e5bc5705aed01117f21e4f439
|
3 |
size 118216675
|
last-checkpoint/{global_step694730 β global_step764203}/zero_pp_rank_1_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118217955
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e6b6fed03b1341c0bb208a6f9ae54cc64bd7ebea666b859caf3c7f9bcf96c16
|
3 |
size 118217955
|
last-checkpoint/{global_step694730 β global_step764203}/zero_pp_rank_2_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118221091
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:799b982154df0374c7243ebd969031c761789dd5cb606eebc1903009c444c1be
|
3 |
size 118221091
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step764203
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59121639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eda519537384a51d41a4a58b603f16ba211d4cdc516840c5fe985d5f26ea19e0
|
3 |
size 59121639
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5326b7d97861f628b803fe440b0a6b1644e36579b2582f5db7027e77542c648c
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd99ee4ba032a3620b579bb7224c1482125e732862d1ed96ef21acfe546d242c
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af18f0195f365b86261ebeb1b8c35c2ae70733f2059a39025bf1bd0e07744a7c
|
3 |
size 14503
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -8430,11 +8430,854 @@
|
|
8430 |
"eval_samples_per_second": 823.146,
|
8431 |
"eval_steps_per_second": 34.298,
|
8432 |
"step": 694730
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8433 |
}
|
8434 |
],
|
8435 |
"max_steps": 972622,
|
8436 |
"num_train_epochs": 14,
|
8437 |
-
"total_flos":
|
8438 |
"trial_name": null,
|
8439 |
"trial_params": null
|
8440 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.0,
|
5 |
+
"global_step": 764203,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
8430 |
"eval_samples_per_second": 823.146,
|
8431 |
"eval_steps_per_second": 34.298,
|
8432 |
"step": 694730
|
8433 |
+
},
|
8434 |
+
{
|
8435 |
+
"epoch": 10.0,
|
8436 |
+
"learning_rate": 2.8976264286344828e-05,
|
8437 |
+
"loss": 1.7335,
|
8438 |
+
"step": 695000
|
8439 |
+
},
|
8440 |
+
{
|
8441 |
+
"epoch": 10.01,
|
8442 |
+
"learning_rate": 2.8924545251559106e-05,
|
8443 |
+
"loss": 1.729,
|
8444 |
+
"step": 695500
|
8445 |
+
},
|
8446 |
+
{
|
8447 |
+
"epoch": 10.02,
|
8448 |
+
"learning_rate": 2.8872618509806364e-05,
|
8449 |
+
"loss": 1.7314,
|
8450 |
+
"step": 696000
|
8451 |
+
},
|
8452 |
+
{
|
8453 |
+
"epoch": 10.03,
|
8454 |
+
"learning_rate": 2.8820899475020642e-05,
|
8455 |
+
"loss": 1.7345,
|
8456 |
+
"step": 696500
|
8457 |
+
},
|
8458 |
+
{
|
8459 |
+
"epoch": 10.03,
|
8460 |
+
"learning_rate": 2.8768972733267907e-05,
|
8461 |
+
"loss": 1.7342,
|
8462 |
+
"step": 697000
|
8463 |
+
},
|
8464 |
+
{
|
8465 |
+
"epoch": 10.04,
|
8466 |
+
"learning_rate": 2.8717253698482184e-05,
|
8467 |
+
"loss": 1.7326,
|
8468 |
+
"step": 697500
|
8469 |
+
},
|
8470 |
+
{
|
8471 |
+
"epoch": 10.05,
|
8472 |
+
"learning_rate": 2.866532695672945e-05,
|
8473 |
+
"loss": 1.7301,
|
8474 |
+
"step": 698000
|
8475 |
+
},
|
8476 |
+
{
|
8477 |
+
"epoch": 10.05,
|
8478 |
+
"learning_rate": 2.8613607921943724e-05,
|
8479 |
+
"loss": 1.7292,
|
8480 |
+
"step": 698500
|
8481 |
+
},
|
8482 |
+
{
|
8483 |
+
"epoch": 10.06,
|
8484 |
+
"learning_rate": 2.8561681180190985e-05,
|
8485 |
+
"loss": 1.7291,
|
8486 |
+
"step": 699000
|
8487 |
+
},
|
8488 |
+
{
|
8489 |
+
"epoch": 10.07,
|
8490 |
+
"learning_rate": 2.8509962145405263e-05,
|
8491 |
+
"loss": 1.7321,
|
8492 |
+
"step": 699500
|
8493 |
+
},
|
8494 |
+
{
|
8495 |
+
"epoch": 10.08,
|
8496 |
+
"learning_rate": 2.8458035403652528e-05,
|
8497 |
+
"loss": 1.7314,
|
8498 |
+
"step": 700000
|
8499 |
+
},
|
8500 |
+
{
|
8501 |
+
"epoch": 10.08,
|
8502 |
+
"learning_rate": 2.8406316368866802e-05,
|
8503 |
+
"loss": 1.7333,
|
8504 |
+
"step": 700500
|
8505 |
+
},
|
8506 |
+
{
|
8507 |
+
"epoch": 10.09,
|
8508 |
+
"learning_rate": 2.835438962711407e-05,
|
8509 |
+
"loss": 1.7329,
|
8510 |
+
"step": 701000
|
8511 |
+
},
|
8512 |
+
{
|
8513 |
+
"epoch": 10.1,
|
8514 |
+
"learning_rate": 2.8302670592328345e-05,
|
8515 |
+
"loss": 1.7337,
|
8516 |
+
"step": 701500
|
8517 |
+
},
|
8518 |
+
{
|
8519 |
+
"epoch": 10.1,
|
8520 |
+
"learning_rate": 2.8250743850575613e-05,
|
8521 |
+
"loss": 1.7346,
|
8522 |
+
"step": 702000
|
8523 |
+
},
|
8524 |
+
{
|
8525 |
+
"epoch": 10.11,
|
8526 |
+
"learning_rate": 2.8199024815789887e-05,
|
8527 |
+
"loss": 1.734,
|
8528 |
+
"step": 702500
|
8529 |
+
},
|
8530 |
+
{
|
8531 |
+
"epoch": 10.12,
|
8532 |
+
"learning_rate": 2.814709807403715e-05,
|
8533 |
+
"loss": 1.736,
|
8534 |
+
"step": 703000
|
8535 |
+
},
|
8536 |
+
{
|
8537 |
+
"epoch": 10.13,
|
8538 |
+
"learning_rate": 2.8095379039251423e-05,
|
8539 |
+
"loss": 1.7305,
|
8540 |
+
"step": 703500
|
8541 |
+
},
|
8542 |
+
{
|
8543 |
+
"epoch": 10.13,
|
8544 |
+
"learning_rate": 2.804345229749869e-05,
|
8545 |
+
"loss": 1.7273,
|
8546 |
+
"step": 704000
|
8547 |
+
},
|
8548 |
+
{
|
8549 |
+
"epoch": 10.14,
|
8550 |
+
"learning_rate": 2.7991733262712966e-05,
|
8551 |
+
"loss": 1.734,
|
8552 |
+
"step": 704500
|
8553 |
+
},
|
8554 |
+
{
|
8555 |
+
"epoch": 10.15,
|
8556 |
+
"learning_rate": 2.793980652096023e-05,
|
8557 |
+
"loss": 1.7322,
|
8558 |
+
"step": 705000
|
8559 |
+
},
|
8560 |
+
{
|
8561 |
+
"epoch": 10.16,
|
8562 |
+
"learning_rate": 2.788808748617451e-05,
|
8563 |
+
"loss": 1.7312,
|
8564 |
+
"step": 705500
|
8565 |
+
},
|
8566 |
+
{
|
8567 |
+
"epoch": 10.16,
|
8568 |
+
"learning_rate": 2.783616074442177e-05,
|
8569 |
+
"loss": 1.7309,
|
8570 |
+
"step": 706000
|
8571 |
+
},
|
8572 |
+
{
|
8573 |
+
"epoch": 10.17,
|
8574 |
+
"learning_rate": 2.7784441709636045e-05,
|
8575 |
+
"loss": 1.7292,
|
8576 |
+
"step": 706500
|
8577 |
+
},
|
8578 |
+
{
|
8579 |
+
"epoch": 10.18,
|
8580 |
+
"learning_rate": 2.773251496788331e-05,
|
8581 |
+
"loss": 1.7313,
|
8582 |
+
"step": 707000
|
8583 |
+
},
|
8584 |
+
{
|
8585 |
+
"epoch": 10.18,
|
8586 |
+
"learning_rate": 2.7680795933097587e-05,
|
8587 |
+
"loss": 1.7328,
|
8588 |
+
"step": 707500
|
8589 |
+
},
|
8590 |
+
{
|
8591 |
+
"epoch": 10.19,
|
8592 |
+
"learning_rate": 2.7628869191344852e-05,
|
8593 |
+
"loss": 1.7298,
|
8594 |
+
"step": 708000
|
8595 |
+
},
|
8596 |
+
{
|
8597 |
+
"epoch": 10.2,
|
8598 |
+
"learning_rate": 2.757715015655913e-05,
|
8599 |
+
"loss": 1.73,
|
8600 |
+
"step": 708500
|
8601 |
+
},
|
8602 |
+
{
|
8603 |
+
"epoch": 10.21,
|
8604 |
+
"learning_rate": 2.7525223414806395e-05,
|
8605 |
+
"loss": 1.7338,
|
8606 |
+
"step": 709000
|
8607 |
+
},
|
8608 |
+
{
|
8609 |
+
"epoch": 10.21,
|
8610 |
+
"learning_rate": 2.747350438002067e-05,
|
8611 |
+
"loss": 1.7327,
|
8612 |
+
"step": 709500
|
8613 |
+
},
|
8614 |
+
{
|
8615 |
+
"epoch": 10.22,
|
8616 |
+
"learning_rate": 2.742157763826793e-05,
|
8617 |
+
"loss": 1.7283,
|
8618 |
+
"step": 710000
|
8619 |
+
},
|
8620 |
+
{
|
8621 |
+
"epoch": 10.23,
|
8622 |
+
"learning_rate": 2.736985860348221e-05,
|
8623 |
+
"loss": 1.7319,
|
8624 |
+
"step": 710500
|
8625 |
+
},
|
8626 |
+
{
|
8627 |
+
"epoch": 10.23,
|
8628 |
+
"learning_rate": 2.7317931861729473e-05,
|
8629 |
+
"loss": 1.729,
|
8630 |
+
"step": 711000
|
8631 |
+
},
|
8632 |
+
{
|
8633 |
+
"epoch": 10.24,
|
8634 |
+
"learning_rate": 2.7266212826943748e-05,
|
8635 |
+
"loss": 1.7318,
|
8636 |
+
"step": 711500
|
8637 |
+
},
|
8638 |
+
{
|
8639 |
+
"epoch": 10.25,
|
8640 |
+
"learning_rate": 2.7214286085191016e-05,
|
8641 |
+
"loss": 1.7307,
|
8642 |
+
"step": 712000
|
8643 |
+
},
|
8644 |
+
{
|
8645 |
+
"epoch": 10.26,
|
8646 |
+
"learning_rate": 2.716256705040529e-05,
|
8647 |
+
"loss": 1.7321,
|
8648 |
+
"step": 712500
|
8649 |
+
},
|
8650 |
+
{
|
8651 |
+
"epoch": 10.26,
|
8652 |
+
"learning_rate": 2.7110640308652552e-05,
|
8653 |
+
"loss": 1.7313,
|
8654 |
+
"step": 713000
|
8655 |
+
},
|
8656 |
+
{
|
8657 |
+
"epoch": 10.27,
|
8658 |
+
"learning_rate": 2.7058921273866826e-05,
|
8659 |
+
"loss": 1.7283,
|
8660 |
+
"step": 713500
|
8661 |
+
},
|
8662 |
+
{
|
8663 |
+
"epoch": 10.28,
|
8664 |
+
"learning_rate": 2.7006994532114095e-05,
|
8665 |
+
"loss": 1.7248,
|
8666 |
+
"step": 714000
|
8667 |
+
},
|
8668 |
+
{
|
8669 |
+
"epoch": 10.28,
|
8670 |
+
"learning_rate": 2.695527549732837e-05,
|
8671 |
+
"loss": 1.7267,
|
8672 |
+
"step": 714500
|
8673 |
+
},
|
8674 |
+
{
|
8675 |
+
"epoch": 10.29,
|
8676 |
+
"learning_rate": 2.6903348755575637e-05,
|
8677 |
+
"loss": 1.7297,
|
8678 |
+
"step": 715000
|
8679 |
+
},
|
8680 |
+
{
|
8681 |
+
"epoch": 10.3,
|
8682 |
+
"learning_rate": 2.685162972078991e-05,
|
8683 |
+
"loss": 1.7297,
|
8684 |
+
"step": 715500
|
8685 |
+
},
|
8686 |
+
{
|
8687 |
+
"epoch": 10.31,
|
8688 |
+
"learning_rate": 2.6799702979037176e-05,
|
8689 |
+
"loss": 1.7281,
|
8690 |
+
"step": 716000
|
8691 |
+
},
|
8692 |
+
{
|
8693 |
+
"epoch": 10.31,
|
8694 |
+
"learning_rate": 2.6747983944251454e-05,
|
8695 |
+
"loss": 1.7306,
|
8696 |
+
"step": 716500
|
8697 |
+
},
|
8698 |
+
{
|
8699 |
+
"epoch": 10.32,
|
8700 |
+
"learning_rate": 2.6696057202498716e-05,
|
8701 |
+
"loss": 1.7297,
|
8702 |
+
"step": 717000
|
8703 |
+
},
|
8704 |
+
{
|
8705 |
+
"epoch": 10.33,
|
8706 |
+
"learning_rate": 2.664433816771299e-05,
|
8707 |
+
"loss": 1.7319,
|
8708 |
+
"step": 717500
|
8709 |
+
},
|
8710 |
+
{
|
8711 |
+
"epoch": 10.33,
|
8712 |
+
"learning_rate": 2.6592411425960255e-05,
|
8713 |
+
"loss": 1.7312,
|
8714 |
+
"step": 718000
|
8715 |
+
},
|
8716 |
+
{
|
8717 |
+
"epoch": 10.34,
|
8718 |
+
"learning_rate": 2.6540692391174533e-05,
|
8719 |
+
"loss": 1.7296,
|
8720 |
+
"step": 718500
|
8721 |
+
},
|
8722 |
+
{
|
8723 |
+
"epoch": 10.35,
|
8724 |
+
"learning_rate": 2.6488765649421798e-05,
|
8725 |
+
"loss": 1.7301,
|
8726 |
+
"step": 719000
|
8727 |
+
},
|
8728 |
+
{
|
8729 |
+
"epoch": 10.36,
|
8730 |
+
"learning_rate": 2.6437046614636075e-05,
|
8731 |
+
"loss": 1.7252,
|
8732 |
+
"step": 719500
|
8733 |
+
},
|
8734 |
+
{
|
8735 |
+
"epoch": 10.36,
|
8736 |
+
"learning_rate": 2.6385119872883333e-05,
|
8737 |
+
"loss": 1.7303,
|
8738 |
+
"step": 720000
|
8739 |
+
},
|
8740 |
+
{
|
8741 |
+
"epoch": 10.37,
|
8742 |
+
"learning_rate": 2.633340083809761e-05,
|
8743 |
+
"loss": 1.7276,
|
8744 |
+
"step": 720500
|
8745 |
+
},
|
8746 |
+
{
|
8747 |
+
"epoch": 10.38,
|
8748 |
+
"learning_rate": 2.6281474096344876e-05,
|
8749 |
+
"loss": 1.7308,
|
8750 |
+
"step": 721000
|
8751 |
+
},
|
8752 |
+
{
|
8753 |
+
"epoch": 10.39,
|
8754 |
+
"learning_rate": 2.6229755061559154e-05,
|
8755 |
+
"loss": 1.7275,
|
8756 |
+
"step": 721500
|
8757 |
+
},
|
8758 |
+
{
|
8759 |
+
"epoch": 10.39,
|
8760 |
+
"learning_rate": 2.617782831980642e-05,
|
8761 |
+
"loss": 1.7294,
|
8762 |
+
"step": 722000
|
8763 |
+
},
|
8764 |
+
{
|
8765 |
+
"epoch": 10.4,
|
8766 |
+
"learning_rate": 2.6126109285020693e-05,
|
8767 |
+
"loss": 1.7296,
|
8768 |
+
"step": 722500
|
8769 |
+
},
|
8770 |
+
{
|
8771 |
+
"epoch": 10.41,
|
8772 |
+
"learning_rate": 2.607418254326796e-05,
|
8773 |
+
"loss": 1.7331,
|
8774 |
+
"step": 723000
|
8775 |
+
},
|
8776 |
+
{
|
8777 |
+
"epoch": 10.41,
|
8778 |
+
"learning_rate": 2.6022463508482236e-05,
|
8779 |
+
"loss": 1.7316,
|
8780 |
+
"step": 723500
|
8781 |
+
},
|
8782 |
+
{
|
8783 |
+
"epoch": 10.42,
|
8784 |
+
"learning_rate": 2.5970536766729497e-05,
|
8785 |
+
"loss": 1.7348,
|
8786 |
+
"step": 724000
|
8787 |
+
},
|
8788 |
+
{
|
8789 |
+
"epoch": 10.43,
|
8790 |
+
"learning_rate": 2.5918817731943772e-05,
|
8791 |
+
"loss": 1.7302,
|
8792 |
+
"step": 724500
|
8793 |
+
},
|
8794 |
+
{
|
8795 |
+
"epoch": 10.44,
|
8796 |
+
"learning_rate": 2.586689099019104e-05,
|
8797 |
+
"loss": 1.7292,
|
8798 |
+
"step": 725000
|
8799 |
+
},
|
8800 |
+
{
|
8801 |
+
"epoch": 10.44,
|
8802 |
+
"learning_rate": 2.5815171955405314e-05,
|
8803 |
+
"loss": 1.7273,
|
8804 |
+
"step": 725500
|
8805 |
+
},
|
8806 |
+
{
|
8807 |
+
"epoch": 10.45,
|
8808 |
+
"learning_rate": 2.5763245213652583e-05,
|
8809 |
+
"loss": 1.7286,
|
8810 |
+
"step": 726000
|
8811 |
+
},
|
8812 |
+
{
|
8813 |
+
"epoch": 10.46,
|
8814 |
+
"learning_rate": 2.5711526178866857e-05,
|
8815 |
+
"loss": 1.7255,
|
8816 |
+
"step": 726500
|
8817 |
+
},
|
8818 |
+
{
|
8819 |
+
"epoch": 10.46,
|
8820 |
+
"learning_rate": 2.565959943711412e-05,
|
8821 |
+
"loss": 1.7281,
|
8822 |
+
"step": 727000
|
8823 |
+
},
|
8824 |
+
{
|
8825 |
+
"epoch": 10.47,
|
8826 |
+
"learning_rate": 2.5607880402328393e-05,
|
8827 |
+
"loss": 1.7314,
|
8828 |
+
"step": 727500
|
8829 |
+
},
|
8830 |
+
{
|
8831 |
+
"epoch": 10.48,
|
8832 |
+
"learning_rate": 2.555595366057566e-05,
|
8833 |
+
"loss": 1.7246,
|
8834 |
+
"step": 728000
|
8835 |
+
},
|
8836 |
+
{
|
8837 |
+
"epoch": 10.49,
|
8838 |
+
"learning_rate": 2.5504234625789936e-05,
|
8839 |
+
"loss": 1.729,
|
8840 |
+
"step": 728500
|
8841 |
+
},
|
8842 |
+
{
|
8843 |
+
"epoch": 10.49,
|
8844 |
+
"learning_rate": 2.54523078840372e-05,
|
8845 |
+
"loss": 1.7246,
|
8846 |
+
"step": 729000
|
8847 |
+
},
|
8848 |
+
{
|
8849 |
+
"epoch": 10.5,
|
8850 |
+
"learning_rate": 2.5400588849251478e-05,
|
8851 |
+
"loss": 1.73,
|
8852 |
+
"step": 729500
|
8853 |
+
},
|
8854 |
+
{
|
8855 |
+
"epoch": 10.51,
|
8856 |
+
"learning_rate": 2.5348662107498743e-05,
|
8857 |
+
"loss": 1.7267,
|
8858 |
+
"step": 730000
|
8859 |
+
},
|
8860 |
+
{
|
8861 |
+
"epoch": 10.51,
|
8862 |
+
"learning_rate": 2.529694307271302e-05,
|
8863 |
+
"loss": 1.7278,
|
8864 |
+
"step": 730500
|
8865 |
+
},
|
8866 |
+
{
|
8867 |
+
"epoch": 10.52,
|
8868 |
+
"learning_rate": 2.524501633096028e-05,
|
8869 |
+
"loss": 1.7264,
|
8870 |
+
"step": 731000
|
8871 |
+
},
|
8872 |
+
{
|
8873 |
+
"epoch": 10.53,
|
8874 |
+
"learning_rate": 2.5193297296174557e-05,
|
8875 |
+
"loss": 1.7308,
|
8876 |
+
"step": 731500
|
8877 |
+
},
|
8878 |
+
{
|
8879 |
+
"epoch": 10.54,
|
8880 |
+
"learning_rate": 2.514137055442182e-05,
|
8881 |
+
"loss": 1.7262,
|
8882 |
+
"step": 732000
|
8883 |
+
},
|
8884 |
+
{
|
8885 |
+
"epoch": 10.54,
|
8886 |
+
"learning_rate": 2.50896515196361e-05,
|
8887 |
+
"loss": 1.7284,
|
8888 |
+
"step": 732500
|
8889 |
+
},
|
8890 |
+
{
|
8891 |
+
"epoch": 10.55,
|
8892 |
+
"learning_rate": 2.5037724777883364e-05,
|
8893 |
+
"loss": 1.727,
|
8894 |
+
"step": 733000
|
8895 |
+
},
|
8896 |
+
{
|
8897 |
+
"epoch": 10.56,
|
8898 |
+
"learning_rate": 2.498600574309764e-05,
|
8899 |
+
"loss": 1.7257,
|
8900 |
+
"step": 733500
|
8901 |
+
},
|
8902 |
+
{
|
8903 |
+
"epoch": 10.57,
|
8904 |
+
"learning_rate": 2.4934079001344904e-05,
|
8905 |
+
"loss": 1.7264,
|
8906 |
+
"step": 734000
|
8907 |
+
},
|
8908 |
+
{
|
8909 |
+
"epoch": 10.57,
|
8910 |
+
"learning_rate": 2.4882359966559178e-05,
|
8911 |
+
"loss": 1.7271,
|
8912 |
+
"step": 734500
|
8913 |
+
},
|
8914 |
+
{
|
8915 |
+
"epoch": 10.58,
|
8916 |
+
"learning_rate": 2.4830433224806443e-05,
|
8917 |
+
"loss": 1.7238,
|
8918 |
+
"step": 735000
|
8919 |
+
},
|
8920 |
+
{
|
8921 |
+
"epoch": 10.59,
|
8922 |
+
"learning_rate": 2.4778714190020717e-05,
|
8923 |
+
"loss": 1.7282,
|
8924 |
+
"step": 735500
|
8925 |
+
},
|
8926 |
+
{
|
8927 |
+
"epoch": 10.59,
|
8928 |
+
"learning_rate": 2.4726787448267985e-05,
|
8929 |
+
"loss": 1.728,
|
8930 |
+
"step": 736000
|
8931 |
+
},
|
8932 |
+
{
|
8933 |
+
"epoch": 10.6,
|
8934 |
+
"learning_rate": 2.467506841348226e-05,
|
8935 |
+
"loss": 1.7236,
|
8936 |
+
"step": 736500
|
8937 |
+
},
|
8938 |
+
{
|
8939 |
+
"epoch": 10.61,
|
8940 |
+
"learning_rate": 2.4623141671729525e-05,
|
8941 |
+
"loss": 1.7285,
|
8942 |
+
"step": 737000
|
8943 |
+
},
|
8944 |
+
{
|
8945 |
+
"epoch": 10.62,
|
8946 |
+
"learning_rate": 2.45714226369438e-05,
|
8947 |
+
"loss": 1.7228,
|
8948 |
+
"step": 737500
|
8949 |
+
},
|
8950 |
+
{
|
8951 |
+
"epoch": 10.62,
|
8952 |
+
"learning_rate": 2.4519495895191067e-05,
|
8953 |
+
"loss": 1.7276,
|
8954 |
+
"step": 738000
|
8955 |
+
},
|
8956 |
+
{
|
8957 |
+
"epoch": 10.63,
|
8958 |
+
"learning_rate": 2.4467776860405342e-05,
|
8959 |
+
"loss": 1.7287,
|
8960 |
+
"step": 738500
|
8961 |
+
},
|
8962 |
+
{
|
8963 |
+
"epoch": 10.64,
|
8964 |
+
"learning_rate": 2.4415850118652607e-05,
|
8965 |
+
"loss": 1.7278,
|
8966 |
+
"step": 739000
|
8967 |
+
},
|
8968 |
+
{
|
8969 |
+
"epoch": 10.64,
|
8970 |
+
"learning_rate": 2.436413108386688e-05,
|
8971 |
+
"loss": 1.728,
|
8972 |
+
"step": 739500
|
8973 |
+
},
|
8974 |
+
{
|
8975 |
+
"epoch": 10.65,
|
8976 |
+
"learning_rate": 2.431220434211415e-05,
|
8977 |
+
"loss": 1.7278,
|
8978 |
+
"step": 740000
|
8979 |
+
},
|
8980 |
+
{
|
8981 |
+
"epoch": 10.66,
|
8982 |
+
"learning_rate": 2.4260485307328424e-05,
|
8983 |
+
"loss": 1.7244,
|
8984 |
+
"step": 740500
|
8985 |
+
},
|
8986 |
+
{
|
8987 |
+
"epoch": 10.67,
|
8988 |
+
"learning_rate": 2.4208558565575685e-05,
|
8989 |
+
"loss": 1.7253,
|
8990 |
+
"step": 741000
|
8991 |
+
},
|
8992 |
+
{
|
8993 |
+
"epoch": 10.67,
|
8994 |
+
"learning_rate": 2.4156839530789963e-05,
|
8995 |
+
"loss": 1.7291,
|
8996 |
+
"step": 741500
|
8997 |
+
},
|
8998 |
+
{
|
8999 |
+
"epoch": 10.68,
|
9000 |
+
"learning_rate": 2.4104912789037224e-05,
|
9001 |
+
"loss": 1.7234,
|
9002 |
+
"step": 742000
|
9003 |
+
},
|
9004 |
+
{
|
9005 |
+
"epoch": 10.69,
|
9006 |
+
"learning_rate": 2.4053193754251502e-05,
|
9007 |
+
"loss": 1.7239,
|
9008 |
+
"step": 742500
|
9009 |
+
},
|
9010 |
+
{
|
9011 |
+
"epoch": 10.69,
|
9012 |
+
"learning_rate": 2.4001267012498767e-05,
|
9013 |
+
"loss": 1.726,
|
9014 |
+
"step": 743000
|
9015 |
+
},
|
9016 |
+
{
|
9017 |
+
"epoch": 10.7,
|
9018 |
+
"learning_rate": 2.3949547977713045e-05,
|
9019 |
+
"loss": 1.729,
|
9020 |
+
"step": 743500
|
9021 |
+
},
|
9022 |
+
{
|
9023 |
+
"epoch": 10.71,
|
9024 |
+
"learning_rate": 2.3897621235960306e-05,
|
9025 |
+
"loss": 1.7244,
|
9026 |
+
"step": 744000
|
9027 |
+
},
|
9028 |
+
{
|
9029 |
+
"epoch": 10.72,
|
9030 |
+
"learning_rate": 2.3845902201174584e-05,
|
9031 |
+
"loss": 1.724,
|
9032 |
+
"step": 744500
|
9033 |
+
},
|
9034 |
+
{
|
9035 |
+
"epoch": 10.72,
|
9036 |
+
"learning_rate": 2.379397545942185e-05,
|
9037 |
+
"loss": 1.7243,
|
9038 |
+
"step": 745000
|
9039 |
+
},
|
9040 |
+
{
|
9041 |
+
"epoch": 10.73,
|
9042 |
+
"learning_rate": 2.3742256424636123e-05,
|
9043 |
+
"loss": 1.7262,
|
9044 |
+
"step": 745500
|
9045 |
+
},
|
9046 |
+
{
|
9047 |
+
"epoch": 10.74,
|
9048 |
+
"learning_rate": 2.369032968288339e-05,
|
9049 |
+
"loss": 1.7221,
|
9050 |
+
"step": 746000
|
9051 |
+
},
|
9052 |
+
{
|
9053 |
+
"epoch": 10.75,
|
9054 |
+
"learning_rate": 2.3638610648097663e-05,
|
9055 |
+
"loss": 1.7231,
|
9056 |
+
"step": 746500
|
9057 |
+
},
|
9058 |
+
{
|
9059 |
+
"epoch": 10.75,
|
9060 |
+
"learning_rate": 2.358668390634493e-05,
|
9061 |
+
"loss": 1.7281,
|
9062 |
+
"step": 747000
|
9063 |
+
},
|
9064 |
+
{
|
9065 |
+
"epoch": 10.76,
|
9066 |
+
"learning_rate": 2.3534964871559205e-05,
|
9067 |
+
"loss": 1.728,
|
9068 |
+
"step": 747500
|
9069 |
+
},
|
9070 |
+
{
|
9071 |
+
"epoch": 10.77,
|
9072 |
+
"learning_rate": 2.348303812980647e-05,
|
9073 |
+
"loss": 1.7238,
|
9074 |
+
"step": 748000
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 10.77,
|
9078 |
+
"learning_rate": 2.3431319095020745e-05,
|
9079 |
+
"loss": 1.7245,
|
9080 |
+
"step": 748500
|
9081 |
+
},
|
9082 |
+
{
|
9083 |
+
"epoch": 10.78,
|
9084 |
+
"learning_rate": 2.337939235326801e-05,
|
9085 |
+
"loss": 1.7276,
|
9086 |
+
"step": 749000
|
9087 |
+
},
|
9088 |
+
{
|
9089 |
+
"epoch": 10.79,
|
9090 |
+
"learning_rate": 2.3327673318482287e-05,
|
9091 |
+
"loss": 1.7243,
|
9092 |
+
"step": 749500
|
9093 |
+
},
|
9094 |
+
{
|
9095 |
+
"epoch": 10.8,
|
9096 |
+
"learning_rate": 2.3275746576729552e-05,
|
9097 |
+
"loss": 1.7242,
|
9098 |
+
"step": 750000
|
9099 |
+
},
|
9100 |
+
{
|
9101 |
+
"epoch": 10.8,
|
9102 |
+
"learning_rate": 2.3224027541943827e-05,
|
9103 |
+
"loss": 1.7268,
|
9104 |
+
"step": 750500
|
9105 |
+
},
|
9106 |
+
{
|
9107 |
+
"epoch": 10.81,
|
9108 |
+
"learning_rate": 2.317210080019109e-05,
|
9109 |
+
"loss": 1.7224,
|
9110 |
+
"step": 751000
|
9111 |
+
},
|
9112 |
+
{
|
9113 |
+
"epoch": 10.82,
|
9114 |
+
"learning_rate": 2.3120174058438356e-05,
|
9115 |
+
"loss": 1.7241,
|
9116 |
+
"step": 751500
|
9117 |
+
},
|
9118 |
+
{
|
9119 |
+
"epoch": 10.82,
|
9120 |
+
"learning_rate": 2.3068455023652634e-05,
|
9121 |
+
"loss": 1.7226,
|
9122 |
+
"step": 752000
|
9123 |
+
},
|
9124 |
+
{
|
9125 |
+
"epoch": 10.83,
|
9126 |
+
"learning_rate": 2.3016528281899896e-05,
|
9127 |
+
"loss": 1.7251,
|
9128 |
+
"step": 752500
|
9129 |
+
},
|
9130 |
+
{
|
9131 |
+
"epoch": 10.84,
|
9132 |
+
"learning_rate": 2.2964809247114173e-05,
|
9133 |
+
"loss": 1.7182,
|
9134 |
+
"step": 753000
|
9135 |
+
},
|
9136 |
+
{
|
9137 |
+
"epoch": 10.85,
|
9138 |
+
"learning_rate": 2.2912882505361438e-05,
|
9139 |
+
"loss": 1.7232,
|
9140 |
+
"step": 753500
|
9141 |
+
},
|
9142 |
+
{
|
9143 |
+
"epoch": 10.85,
|
9144 |
+
"learning_rate": 2.2861163470575713e-05,
|
9145 |
+
"loss": 1.7256,
|
9146 |
+
"step": 754000
|
9147 |
+
},
|
9148 |
+
{
|
9149 |
+
"epoch": 10.86,
|
9150 |
+
"learning_rate": 2.2809236728822978e-05,
|
9151 |
+
"loss": 1.7254,
|
9152 |
+
"step": 754500
|
9153 |
+
},
|
9154 |
+
{
|
9155 |
+
"epoch": 10.87,
|
9156 |
+
"learning_rate": 2.2757517694037252e-05,
|
9157 |
+
"loss": 1.7269,
|
9158 |
+
"step": 755000
|
9159 |
+
},
|
9160 |
+
{
|
9161 |
+
"epoch": 10.87,
|
9162 |
+
"learning_rate": 2.2705590952284517e-05,
|
9163 |
+
"loss": 1.7254,
|
9164 |
+
"step": 755500
|
9165 |
+
},
|
9166 |
+
{
|
9167 |
+
"epoch": 10.88,
|
9168 |
+
"learning_rate": 2.2653871917498795e-05,
|
9169 |
+
"loss": 1.7247,
|
9170 |
+
"step": 756000
|
9171 |
+
},
|
9172 |
+
{
|
9173 |
+
"epoch": 10.89,
|
9174 |
+
"learning_rate": 2.260194517574606e-05,
|
9175 |
+
"loss": 1.7232,
|
9176 |
+
"step": 756500
|
9177 |
+
},
|
9178 |
+
{
|
9179 |
+
"epoch": 10.9,
|
9180 |
+
"learning_rate": 2.2550226140960334e-05,
|
9181 |
+
"loss": 1.7252,
|
9182 |
+
"step": 757000
|
9183 |
+
},
|
9184 |
+
{
|
9185 |
+
"epoch": 10.9,
|
9186 |
+
"learning_rate": 2.24982993992076e-05,
|
9187 |
+
"loss": 1.7227,
|
9188 |
+
"step": 757500
|
9189 |
+
},
|
9190 |
+
{
|
9191 |
+
"epoch": 10.91,
|
9192 |
+
"learning_rate": 2.2446580364421873e-05,
|
9193 |
+
"loss": 1.7207,
|
9194 |
+
"step": 758000
|
9195 |
+
},
|
9196 |
+
{
|
9197 |
+
"epoch": 10.92,
|
9198 |
+
"learning_rate": 2.239465362266914e-05,
|
9199 |
+
"loss": 1.7222,
|
9200 |
+
"step": 758500
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 10.93,
|
9204 |
+
"learning_rate": 2.2342934587883416e-05,
|
9205 |
+
"loss": 1.7244,
|
9206 |
+
"step": 759000
|
9207 |
+
},
|
9208 |
+
{
|
9209 |
+
"epoch": 10.93,
|
9210 |
+
"learning_rate": 2.229100784613068e-05,
|
9211 |
+
"loss": 1.7237,
|
9212 |
+
"step": 759500
|
9213 |
+
},
|
9214 |
+
{
|
9215 |
+
"epoch": 10.94,
|
9216 |
+
"learning_rate": 2.2239288811344955e-05,
|
9217 |
+
"loss": 1.7208,
|
9218 |
+
"step": 760000
|
9219 |
+
},
|
9220 |
+
{
|
9221 |
+
"epoch": 10.95,
|
9222 |
+
"learning_rate": 2.218736206959222e-05,
|
9223 |
+
"loss": 1.7214,
|
9224 |
+
"step": 760500
|
9225 |
+
},
|
9226 |
+
{
|
9227 |
+
"epoch": 10.95,
|
9228 |
+
"learning_rate": 2.2135643034806498e-05,
|
9229 |
+
"loss": 1.7169,
|
9230 |
+
"step": 761000
|
9231 |
+
},
|
9232 |
+
{
|
9233 |
+
"epoch": 10.96,
|
9234 |
+
"learning_rate": 2.208371629305376e-05,
|
9235 |
+
"loss": 1.7241,
|
9236 |
+
"step": 761500
|
9237 |
+
},
|
9238 |
+
{
|
9239 |
+
"epoch": 10.97,
|
9240 |
+
"learning_rate": 2.2031997258268037e-05,
|
9241 |
+
"loss": 1.7228,
|
9242 |
+
"step": 762000
|
9243 |
+
},
|
9244 |
+
{
|
9245 |
+
"epoch": 10.98,
|
9246 |
+
"learning_rate": 2.1980070516515302e-05,
|
9247 |
+
"loss": 1.72,
|
9248 |
+
"step": 762500
|
9249 |
+
},
|
9250 |
+
{
|
9251 |
+
"epoch": 10.98,
|
9252 |
+
"learning_rate": 2.192835148172958e-05,
|
9253 |
+
"loss": 1.7202,
|
9254 |
+
"step": 763000
|
9255 |
+
},
|
9256 |
+
{
|
9257 |
+
"epoch": 10.99,
|
9258 |
+
"learning_rate": 2.187642473997684e-05,
|
9259 |
+
"loss": 1.7232,
|
9260 |
+
"step": 763500
|
9261 |
+
},
|
9262 |
+
{
|
9263 |
+
"epoch": 11.0,
|
9264 |
+
"learning_rate": 2.182470570519112e-05,
|
9265 |
+
"loss": 1.7225,
|
9266 |
+
"step": 764000
|
9267 |
+
},
|
9268 |
+
{
|
9269 |
+
"epoch": 11.0,
|
9270 |
+
"eval_accuracy": 0.6647001699746609,
|
9271 |
+
"eval_loss": 1.5908203125,
|
9272 |
+
"eval_runtime": 653.6993,
|
9273 |
+
"eval_samples_per_second": 824.442,
|
9274 |
+
"eval_steps_per_second": 34.352,
|
9275 |
+
"step": 764203
|
9276 |
}
|
9277 |
],
|
9278 |
"max_steps": 972622,
|
9279 |
"num_train_epochs": 14,
|
9280 |
+
"total_flos": 5.477391803378303e+18,
|
9281 |
"trial_name": null,
|
9282 |
"trial_params": null
|
9283 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59121639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eda519537384a51d41a4a58b603f16ba211d4cdc516840c5fe985d5f26ea19e0
|
3 |
size 59121639
|
runs/Feb22_11-16-27_user-SYS-5049A-TR/events.out.tfevents.1677032209.user-SYS-5049A-TR.55703.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bdf33703fbf3b6aa16fbab3def8fe9ed924d0ad3c9b789ba3b9b80a46cf39cc
|
3 |
+
size 251444
|