Training in progress, step 2400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500770656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bc773204153173a13c1ac40b0d299d63826a9009d800e65a16ac4dff721fee9
|
3 |
size 500770656
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 254918356
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2354a9b4460be38c2facc081861e1d39817d9e3f7d6d7818671513775a0f21bd
|
3 |
size 254918356
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8e8df32598dfacb12011daa77172ba188bcb85dc5dfb5c57bf90f20875c1ee3
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfa45a2010848f8ba6bd00a9aefaa39f18e6a555b04b4e25c9be094c299a3176
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.635880708694458,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1800",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -15503,6 +15503,1414 @@
|
|
15503 |
"eval_samples_per_second": 2.51,
|
15504 |
"eval_steps_per_second": 2.51,
|
15505 |
"step": 2200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15506 |
}
|
15507 |
],
|
15508 |
"logging_steps": 1,
|
@@ -15517,7 +16925,7 @@
|
|
15517 |
"early_stopping_threshold": 0.0
|
15518 |
},
|
15519 |
"attributes": {
|
15520 |
-
"early_stopping_patience_counter":
|
15521 |
}
|
15522 |
},
|
15523 |
"TrainerControl": {
|
@@ -15526,12 +16934,12 @@
|
|
15526 |
"should_evaluate": false,
|
15527 |
"should_log": false,
|
15528 |
"should_save": true,
|
15529 |
-
"should_training_stop":
|
15530 |
},
|
15531 |
"attributes": {}
|
15532 |
}
|
15533 |
},
|
15534 |
-
"total_flos": 3.
|
15535 |
"train_batch_size": 1,
|
15536 |
"trial_name": null,
|
15537 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.635880708694458,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1800",
|
4 |
+
"epoch": 0.33105731429753776,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 2400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
15503 |
"eval_samples_per_second": 2.51,
|
15504 |
"eval_steps_per_second": 2.51,
|
15505 |
"step": 2200
|
15506 |
+
},
|
15507 |
+
{
|
15508 |
+
"epoch": 0.3036071453203669,
|
15509 |
+
"grad_norm": 0.7370956540107727,
|
15510 |
+
"learning_rate": 0.000195108780321177,
|
15511 |
+
"loss": 0.4832,
|
15512 |
+
"step": 2201
|
15513 |
+
},
|
15514 |
+
{
|
15515 |
+
"epoch": 0.30374508586799087,
|
15516 |
+
"grad_norm": 0.6172298192977905,
|
15517 |
+
"learning_rate": 0.00019510431046312185,
|
15518 |
+
"loss": 0.5685,
|
15519 |
+
"step": 2202
|
15520 |
+
},
|
15521 |
+
{
|
15522 |
+
"epoch": 0.3038830264156149,
|
15523 |
+
"grad_norm": 0.4689820408821106,
|
15524 |
+
"learning_rate": 0.0001950998386148504,
|
15525 |
+
"loss": 0.3635,
|
15526 |
+
"step": 2203
|
15527 |
+
},
|
15528 |
+
{
|
15529 |
+
"epoch": 0.3040209669632388,
|
15530 |
+
"grad_norm": 0.8951042294502258,
|
15531 |
+
"learning_rate": 0.00019509536477645617,
|
15532 |
+
"loss": 0.8364,
|
15533 |
+
"step": 2204
|
15534 |
+
},
|
15535 |
+
{
|
15536 |
+
"epoch": 0.30415890751086283,
|
15537 |
+
"grad_norm": 0.6719712018966675,
|
15538 |
+
"learning_rate": 0.00019509088894803286,
|
15539 |
+
"loss": 0.2531,
|
15540 |
+
"step": 2205
|
15541 |
+
},
|
15542 |
+
{
|
15543 |
+
"epoch": 0.3042968480584868,
|
15544 |
+
"grad_norm": 0.730803370475769,
|
15545 |
+
"learning_rate": 0.00019508641112967408,
|
15546 |
+
"loss": 0.5159,
|
15547 |
+
"step": 2206
|
15548 |
+
},
|
15549 |
+
{
|
15550 |
+
"epoch": 0.3044347886061108,
|
15551 |
+
"grad_norm": 0.7379736304283142,
|
15552 |
+
"learning_rate": 0.0001950819313214736,
|
15553 |
+
"loss": 0.4162,
|
15554 |
+
"step": 2207
|
15555 |
+
},
|
15556 |
+
{
|
15557 |
+
"epoch": 0.30457272915373473,
|
15558 |
+
"grad_norm": 0.8285558223724365,
|
15559 |
+
"learning_rate": 0.00019507744952352508,
|
15560 |
+
"loss": 0.7966,
|
15561 |
+
"step": 2208
|
15562 |
+
},
|
15563 |
+
{
|
15564 |
+
"epoch": 0.30471066970135874,
|
15565 |
+
"grad_norm": 0.8864738941192627,
|
15566 |
+
"learning_rate": 0.00019507296573592235,
|
15567 |
+
"loss": 0.7326,
|
15568 |
+
"step": 2209
|
15569 |
+
},
|
15570 |
+
{
|
15571 |
+
"epoch": 0.3048486102489827,
|
15572 |
+
"grad_norm": 0.7778903841972351,
|
15573 |
+
"learning_rate": 0.00019506847995875924,
|
15574 |
+
"loss": 0.4939,
|
15575 |
+
"step": 2210
|
15576 |
+
},
|
15577 |
+
{
|
15578 |
+
"epoch": 0.30498655079660664,
|
15579 |
+
"grad_norm": 0.7180725932121277,
|
15580 |
+
"learning_rate": 0.00019506399219212966,
|
15581 |
+
"loss": 0.5479,
|
15582 |
+
"step": 2211
|
15583 |
+
},
|
15584 |
+
{
|
15585 |
+
"epoch": 0.30512449134423064,
|
15586 |
+
"grad_norm": 0.9998635053634644,
|
15587 |
+
"learning_rate": 0.00019505950243612746,
|
15588 |
+
"loss": 1.02,
|
15589 |
+
"step": 2212
|
15590 |
+
},
|
15591 |
+
{
|
15592 |
+
"epoch": 0.3052624318918546,
|
15593 |
+
"grad_norm": 1.4325881004333496,
|
15594 |
+
"learning_rate": 0.00019505501069084659,
|
15595 |
+
"loss": 0.6919,
|
15596 |
+
"step": 2213
|
15597 |
+
},
|
15598 |
+
{
|
15599 |
+
"epoch": 0.3054003724394786,
|
15600 |
+
"grad_norm": 0.7900728583335876,
|
15601 |
+
"learning_rate": 0.00019505051695638113,
|
15602 |
+
"loss": 0.3652,
|
15603 |
+
"step": 2214
|
15604 |
+
},
|
15605 |
+
{
|
15606 |
+
"epoch": 0.30553831298710254,
|
15607 |
+
"grad_norm": 0.8904551863670349,
|
15608 |
+
"learning_rate": 0.00019504602123282508,
|
15609 |
+
"loss": 0.8051,
|
15610 |
+
"step": 2215
|
15611 |
+
},
|
15612 |
+
{
|
15613 |
+
"epoch": 0.30567625353472655,
|
15614 |
+
"grad_norm": 0.5742565989494324,
|
15615 |
+
"learning_rate": 0.00019504152352027245,
|
15616 |
+
"loss": 0.3562,
|
15617 |
+
"step": 2216
|
15618 |
+
},
|
15619 |
+
{
|
15620 |
+
"epoch": 0.3058141940823505,
|
15621 |
+
"grad_norm": 0.8754223585128784,
|
15622 |
+
"learning_rate": 0.00019503702381881745,
|
15623 |
+
"loss": 0.7154,
|
15624 |
+
"step": 2217
|
15625 |
+
},
|
15626 |
+
{
|
15627 |
+
"epoch": 0.3059521346299745,
|
15628 |
+
"grad_norm": 0.834255576133728,
|
15629 |
+
"learning_rate": 0.00019503252212855422,
|
15630 |
+
"loss": 0.8241,
|
15631 |
+
"step": 2218
|
15632 |
+
},
|
15633 |
+
{
|
15634 |
+
"epoch": 0.30609007517759845,
|
15635 |
+
"grad_norm": 0.8959856033325195,
|
15636 |
+
"learning_rate": 0.00019502801844957697,
|
15637 |
+
"loss": 1.1416,
|
15638 |
+
"step": 2219
|
15639 |
+
},
|
15640 |
+
{
|
15641 |
+
"epoch": 0.3062280157252224,
|
15642 |
+
"grad_norm": 0.76212078332901,
|
15643 |
+
"learning_rate": 0.00019502351278197994,
|
15644 |
+
"loss": 0.5501,
|
15645 |
+
"step": 2220
|
15646 |
+
},
|
15647 |
+
{
|
15648 |
+
"epoch": 0.3063659562728464,
|
15649 |
+
"grad_norm": 1.0702933073043823,
|
15650 |
+
"learning_rate": 0.0001950190051258574,
|
15651 |
+
"loss": 0.5158,
|
15652 |
+
"step": 2221
|
15653 |
+
},
|
15654 |
+
{
|
15655 |
+
"epoch": 0.30650389682047036,
|
15656 |
+
"grad_norm": 0.9771005511283875,
|
15657 |
+
"learning_rate": 0.00019501449548130372,
|
15658 |
+
"loss": 0.6492,
|
15659 |
+
"step": 2222
|
15660 |
+
},
|
15661 |
+
{
|
15662 |
+
"epoch": 0.30664183736809436,
|
15663 |
+
"grad_norm": 0.6449692845344543,
|
15664 |
+
"learning_rate": 0.00019500998384841322,
|
15665 |
+
"loss": 0.581,
|
15666 |
+
"step": 2223
|
15667 |
+
},
|
15668 |
+
{
|
15669 |
+
"epoch": 0.3067797779157183,
|
15670 |
+
"grad_norm": 0.6486768126487732,
|
15671 |
+
"learning_rate": 0.00019500547022728034,
|
15672 |
+
"loss": 0.6896,
|
15673 |
+
"step": 2224
|
15674 |
+
},
|
15675 |
+
{
|
15676 |
+
"epoch": 0.3069177184633423,
|
15677 |
+
"grad_norm": 0.570933997631073,
|
15678 |
+
"learning_rate": 0.00019500095461799955,
|
15679 |
+
"loss": 0.4472,
|
15680 |
+
"step": 2225
|
15681 |
+
},
|
15682 |
+
{
|
15683 |
+
"epoch": 0.30705565901096626,
|
15684 |
+
"grad_norm": 0.6124463081359863,
|
15685 |
+
"learning_rate": 0.00019499643702066536,
|
15686 |
+
"loss": 0.49,
|
15687 |
+
"step": 2226
|
15688 |
+
},
|
15689 |
+
{
|
15690 |
+
"epoch": 0.30719359955859027,
|
15691 |
+
"grad_norm": 1.030892014503479,
|
15692 |
+
"learning_rate": 0.00019499191743537224,
|
15693 |
+
"loss": 0.6116,
|
15694 |
+
"step": 2227
|
15695 |
+
},
|
15696 |
+
{
|
15697 |
+
"epoch": 0.3073315401062142,
|
15698 |
+
"grad_norm": 0.7422316670417786,
|
15699 |
+
"learning_rate": 0.00019498739586221482,
|
15700 |
+
"loss": 0.4349,
|
15701 |
+
"step": 2228
|
15702 |
+
},
|
15703 |
+
{
|
15704 |
+
"epoch": 0.3074694806538382,
|
15705 |
+
"grad_norm": 1.2078644037246704,
|
15706 |
+
"learning_rate": 0.00019498287230128775,
|
15707 |
+
"loss": 0.8739,
|
15708 |
+
"step": 2229
|
15709 |
+
},
|
15710 |
+
{
|
15711 |
+
"epoch": 0.3076074212014622,
|
15712 |
+
"grad_norm": 0.6796876788139343,
|
15713 |
+
"learning_rate": 0.0001949783467526856,
|
15714 |
+
"loss": 0.4402,
|
15715 |
+
"step": 2230
|
15716 |
+
},
|
15717 |
+
{
|
15718 |
+
"epoch": 0.3077453617490861,
|
15719 |
+
"grad_norm": 0.9108544588088989,
|
15720 |
+
"learning_rate": 0.00019497381921650318,
|
15721 |
+
"loss": 0.8838,
|
15722 |
+
"step": 2231
|
15723 |
+
},
|
15724 |
+
{
|
15725 |
+
"epoch": 0.3078833022967101,
|
15726 |
+
"grad_norm": 0.9964629411697388,
|
15727 |
+
"learning_rate": 0.00019496928969283517,
|
15728 |
+
"loss": 0.7255,
|
15729 |
+
"step": 2232
|
15730 |
+
},
|
15731 |
+
{
|
15732 |
+
"epoch": 0.3080212428443341,
|
15733 |
+
"grad_norm": 1.5495188236236572,
|
15734 |
+
"learning_rate": 0.00019496475818177634,
|
15735 |
+
"loss": 1.264,
|
15736 |
+
"step": 2233
|
15737 |
+
},
|
15738 |
+
{
|
15739 |
+
"epoch": 0.3081591833919581,
|
15740 |
+
"grad_norm": 0.8140445351600647,
|
15741 |
+
"learning_rate": 0.0001949602246834216,
|
15742 |
+
"loss": 0.9636,
|
15743 |
+
"step": 2234
|
15744 |
+
},
|
15745 |
+
{
|
15746 |
+
"epoch": 0.30829712393958203,
|
15747 |
+
"grad_norm": 0.6906377077102661,
|
15748 |
+
"learning_rate": 0.0001949556891978658,
|
15749 |
+
"loss": 0.553,
|
15750 |
+
"step": 2235
|
15751 |
+
},
|
15752 |
+
{
|
15753 |
+
"epoch": 0.30843506448720603,
|
15754 |
+
"grad_norm": 0.8340548872947693,
|
15755 |
+
"learning_rate": 0.00019495115172520378,
|
15756 |
+
"loss": 0.5792,
|
15757 |
+
"step": 2236
|
15758 |
+
},
|
15759 |
+
{
|
15760 |
+
"epoch": 0.30857300503483,
|
15761 |
+
"grad_norm": 1.0296357870101929,
|
15762 |
+
"learning_rate": 0.00019494661226553055,
|
15763 |
+
"loss": 0.971,
|
15764 |
+
"step": 2237
|
15765 |
+
},
|
15766 |
+
{
|
15767 |
+
"epoch": 0.308710945582454,
|
15768 |
+
"grad_norm": 0.7610672116279602,
|
15769 |
+
"learning_rate": 0.0001949420708189411,
|
15770 |
+
"loss": 0.5375,
|
15771 |
+
"step": 2238
|
15772 |
+
},
|
15773 |
+
{
|
15774 |
+
"epoch": 0.30884888613007794,
|
15775 |
+
"grad_norm": 0.722172200679779,
|
15776 |
+
"learning_rate": 0.00019493752738553046,
|
15777 |
+
"loss": 0.479,
|
15778 |
+
"step": 2239
|
15779 |
+
},
|
15780 |
+
{
|
15781 |
+
"epoch": 0.3089868266777019,
|
15782 |
+
"grad_norm": 0.8141410946846008,
|
15783 |
+
"learning_rate": 0.00019493298196539375,
|
15784 |
+
"loss": 0.8384,
|
15785 |
+
"step": 2240
|
15786 |
+
},
|
15787 |
+
{
|
15788 |
+
"epoch": 0.3091247672253259,
|
15789 |
+
"grad_norm": 0.7743800282478333,
|
15790 |
+
"learning_rate": 0.000194928434558626,
|
15791 |
+
"loss": 0.9943,
|
15792 |
+
"step": 2241
|
15793 |
+
},
|
15794 |
+
{
|
15795 |
+
"epoch": 0.30926270777294984,
|
15796 |
+
"grad_norm": 0.6680206656455994,
|
15797 |
+
"learning_rate": 0.00019492388516532247,
|
15798 |
+
"loss": 0.4103,
|
15799 |
+
"step": 2242
|
15800 |
+
},
|
15801 |
+
{
|
15802 |
+
"epoch": 0.30940064832057385,
|
15803 |
+
"grad_norm": 0.9488325715065002,
|
15804 |
+
"learning_rate": 0.0001949193337855783,
|
15805 |
+
"loss": 0.8465,
|
15806 |
+
"step": 2243
|
15807 |
+
},
|
15808 |
+
{
|
15809 |
+
"epoch": 0.3095385888681978,
|
15810 |
+
"grad_norm": 0.5857890248298645,
|
15811 |
+
"learning_rate": 0.00019491478041948877,
|
15812 |
+
"loss": 0.395,
|
15813 |
+
"step": 2244
|
15814 |
+
},
|
15815 |
+
{
|
15816 |
+
"epoch": 0.3096765294158218,
|
15817 |
+
"grad_norm": 0.5725042223930359,
|
15818 |
+
"learning_rate": 0.00019491022506714912,
|
15819 |
+
"loss": 0.3626,
|
15820 |
+
"step": 2245
|
15821 |
+
},
|
15822 |
+
{
|
15823 |
+
"epoch": 0.30981446996344575,
|
15824 |
+
"grad_norm": 0.7076693773269653,
|
15825 |
+
"learning_rate": 0.00019490566772865475,
|
15826 |
+
"loss": 0.5949,
|
15827 |
+
"step": 2246
|
15828 |
+
},
|
15829 |
+
{
|
15830 |
+
"epoch": 0.30995241051106975,
|
15831 |
+
"grad_norm": 0.8544387817382812,
|
15832 |
+
"learning_rate": 0.00019490110840410097,
|
15833 |
+
"loss": 1.0608,
|
15834 |
+
"step": 2247
|
15835 |
+
},
|
15836 |
+
{
|
15837 |
+
"epoch": 0.3100903510586937,
|
15838 |
+
"grad_norm": 0.832599937915802,
|
15839 |
+
"learning_rate": 0.00019489654709358323,
|
15840 |
+
"loss": 0.807,
|
15841 |
+
"step": 2248
|
15842 |
+
},
|
15843 |
+
{
|
15844 |
+
"epoch": 0.31022829160631765,
|
15845 |
+
"grad_norm": 1.0049424171447754,
|
15846 |
+
"learning_rate": 0.00019489198379719696,
|
15847 |
+
"loss": 0.794,
|
15848 |
+
"step": 2249
|
15849 |
+
},
|
15850 |
+
{
|
15851 |
+
"epoch": 0.31036623215394166,
|
15852 |
+
"grad_norm": 0.6564392447471619,
|
15853 |
+
"learning_rate": 0.00019488741851503765,
|
15854 |
+
"loss": 0.5557,
|
15855 |
+
"step": 2250
|
15856 |
+
},
|
15857 |
+
{
|
15858 |
+
"epoch": 0.3105041727015656,
|
15859 |
+
"grad_norm": 0.5619440078735352,
|
15860 |
+
"learning_rate": 0.00019488285124720086,
|
15861 |
+
"loss": 0.4077,
|
15862 |
+
"step": 2251
|
15863 |
+
},
|
15864 |
+
{
|
15865 |
+
"epoch": 0.3106421132491896,
|
15866 |
+
"grad_norm": 0.5860351920127869,
|
15867 |
+
"learning_rate": 0.00019487828199378214,
|
15868 |
+
"loss": 0.4018,
|
15869 |
+
"step": 2252
|
15870 |
+
},
|
15871 |
+
{
|
15872 |
+
"epoch": 0.31078005379681356,
|
15873 |
+
"grad_norm": 0.7864125370979309,
|
15874 |
+
"learning_rate": 0.00019487371075487713,
|
15875 |
+
"loss": 0.6525,
|
15876 |
+
"step": 2253
|
15877 |
+
},
|
15878 |
+
{
|
15879 |
+
"epoch": 0.31091799434443756,
|
15880 |
+
"grad_norm": 0.6421269178390503,
|
15881 |
+
"learning_rate": 0.00019486913753058148,
|
15882 |
+
"loss": 0.4446,
|
15883 |
+
"step": 2254
|
15884 |
+
},
|
15885 |
+
{
|
15886 |
+
"epoch": 0.3110559348920615,
|
15887 |
+
"grad_norm": 1.2416633367538452,
|
15888 |
+
"learning_rate": 0.0001948645623209909,
|
15889 |
+
"loss": 0.5695,
|
15890 |
+
"step": 2255
|
15891 |
+
},
|
15892 |
+
{
|
15893 |
+
"epoch": 0.3111938754396855,
|
15894 |
+
"grad_norm": 1.3990689516067505,
|
15895 |
+
"learning_rate": 0.00019485998512620113,
|
15896 |
+
"loss": 0.8486,
|
15897 |
+
"step": 2256
|
15898 |
+
},
|
15899 |
+
{
|
15900 |
+
"epoch": 0.31133181598730947,
|
15901 |
+
"grad_norm": 0.8644762635231018,
|
15902 |
+
"learning_rate": 0.00019485540594630794,
|
15903 |
+
"loss": 0.5197,
|
15904 |
+
"step": 2257
|
15905 |
+
},
|
15906 |
+
{
|
15907 |
+
"epoch": 0.3114697565349334,
|
15908 |
+
"grad_norm": 0.7197523713111877,
|
15909 |
+
"learning_rate": 0.0001948508247814072,
|
15910 |
+
"loss": 0.4854,
|
15911 |
+
"step": 2258
|
15912 |
+
},
|
15913 |
+
{
|
15914 |
+
"epoch": 0.3116076970825574,
|
15915 |
+
"grad_norm": 0.7777307033538818,
|
15916 |
+
"learning_rate": 0.00019484624163159474,
|
15917 |
+
"loss": 0.8011,
|
15918 |
+
"step": 2259
|
15919 |
+
},
|
15920 |
+
{
|
15921 |
+
"epoch": 0.31174563763018137,
|
15922 |
+
"grad_norm": 3.498762369155884,
|
15923 |
+
"learning_rate": 0.00019484165649696648,
|
15924 |
+
"loss": 1.2415,
|
15925 |
+
"step": 2260
|
15926 |
+
},
|
15927 |
+
{
|
15928 |
+
"epoch": 0.3118835781778054,
|
15929 |
+
"grad_norm": 0.8177916407585144,
|
15930 |
+
"learning_rate": 0.00019483706937761837,
|
15931 |
+
"loss": 0.6254,
|
15932 |
+
"step": 2261
|
15933 |
+
},
|
15934 |
+
{
|
15935 |
+
"epoch": 0.3120215187254293,
|
15936 |
+
"grad_norm": 0.8077528476715088,
|
15937 |
+
"learning_rate": 0.0001948324802736464,
|
15938 |
+
"loss": 1.1841,
|
15939 |
+
"step": 2262
|
15940 |
+
},
|
15941 |
+
{
|
15942 |
+
"epoch": 0.31215945927305333,
|
15943 |
+
"grad_norm": 0.7529622316360474,
|
15944 |
+
"learning_rate": 0.00019482788918514664,
|
15945 |
+
"loss": 0.5046,
|
15946 |
+
"step": 2263
|
15947 |
+
},
|
15948 |
+
{
|
15949 |
+
"epoch": 0.3122973998206773,
|
15950 |
+
"grad_norm": 0.6038236618041992,
|
15951 |
+
"learning_rate": 0.0001948232961122151,
|
15952 |
+
"loss": 0.5598,
|
15953 |
+
"step": 2264
|
15954 |
+
},
|
15955 |
+
{
|
15956 |
+
"epoch": 0.3124353403683013,
|
15957 |
+
"grad_norm": 0.6496687531471252,
|
15958 |
+
"learning_rate": 0.00019481870105494796,
|
15959 |
+
"loss": 0.3127,
|
15960 |
+
"step": 2265
|
15961 |
+
},
|
15962 |
+
{
|
15963 |
+
"epoch": 0.31257328091592523,
|
15964 |
+
"grad_norm": 0.8372655510902405,
|
15965 |
+
"learning_rate": 0.00019481410401344133,
|
15966 |
+
"loss": 0.7623,
|
15967 |
+
"step": 2266
|
15968 |
+
},
|
15969 |
+
{
|
15970 |
+
"epoch": 0.3127112214635492,
|
15971 |
+
"grad_norm": 0.9408671855926514,
|
15972 |
+
"learning_rate": 0.00019480950498779144,
|
15973 |
+
"loss": 0.913,
|
15974 |
+
"step": 2267
|
15975 |
+
},
|
15976 |
+
{
|
15977 |
+
"epoch": 0.3128491620111732,
|
15978 |
+
"grad_norm": 1.2297847270965576,
|
15979 |
+
"learning_rate": 0.00019480490397809456,
|
15980 |
+
"loss": 0.7727,
|
15981 |
+
"step": 2268
|
15982 |
+
},
|
15983 |
+
{
|
15984 |
+
"epoch": 0.31298710255879714,
|
15985 |
+
"grad_norm": 0.8657265305519104,
|
15986 |
+
"learning_rate": 0.0001948003009844469,
|
15987 |
+
"loss": 0.7712,
|
15988 |
+
"step": 2269
|
15989 |
+
},
|
15990 |
+
{
|
15991 |
+
"epoch": 0.31312504310642114,
|
15992 |
+
"grad_norm": 0.6789664030075073,
|
15993 |
+
"learning_rate": 0.00019479569600694486,
|
15994 |
+
"loss": 0.5377,
|
15995 |
+
"step": 2270
|
15996 |
+
},
|
15997 |
+
{
|
15998 |
+
"epoch": 0.3132629836540451,
|
15999 |
+
"grad_norm": 0.8153241872787476,
|
16000 |
+
"learning_rate": 0.00019479108904568474,
|
16001 |
+
"loss": 0.438,
|
16002 |
+
"step": 2271
|
16003 |
+
},
|
16004 |
+
{
|
16005 |
+
"epoch": 0.3134009242016691,
|
16006 |
+
"grad_norm": 0.820363461971283,
|
16007 |
+
"learning_rate": 0.00019478648010076298,
|
16008 |
+
"loss": 0.5774,
|
16009 |
+
"step": 2272
|
16010 |
+
},
|
16011 |
+
{
|
16012 |
+
"epoch": 0.31353886474929304,
|
16013 |
+
"grad_norm": 0.9345502853393555,
|
16014 |
+
"learning_rate": 0.00019478186917227605,
|
16015 |
+
"loss": 0.7403,
|
16016 |
+
"step": 2273
|
16017 |
+
},
|
16018 |
+
{
|
16019 |
+
"epoch": 0.31367680529691705,
|
16020 |
+
"grad_norm": 0.6386396884918213,
|
16021 |
+
"learning_rate": 0.00019477725626032043,
|
16022 |
+
"loss": 0.5016,
|
16023 |
+
"step": 2274
|
16024 |
+
},
|
16025 |
+
{
|
16026 |
+
"epoch": 0.313814745844541,
|
16027 |
+
"grad_norm": 1.081990122795105,
|
16028 |
+
"learning_rate": 0.00019477264136499262,
|
16029 |
+
"loss": 0.7868,
|
16030 |
+
"step": 2275
|
16031 |
+
},
|
16032 |
+
{
|
16033 |
+
"epoch": 0.313952686392165,
|
16034 |
+
"grad_norm": 0.7201882600784302,
|
16035 |
+
"learning_rate": 0.00019476802448638924,
|
16036 |
+
"loss": 0.488,
|
16037 |
+
"step": 2276
|
16038 |
+
},
|
16039 |
+
{
|
16040 |
+
"epoch": 0.31409062693978895,
|
16041 |
+
"grad_norm": 0.7955479621887207,
|
16042 |
+
"learning_rate": 0.00019476340562460688,
|
16043 |
+
"loss": 0.7676,
|
16044 |
+
"step": 2277
|
16045 |
+
},
|
16046 |
+
{
|
16047 |
+
"epoch": 0.3142285674874129,
|
16048 |
+
"grad_norm": 0.731919527053833,
|
16049 |
+
"learning_rate": 0.0001947587847797422,
|
16050 |
+
"loss": 0.579,
|
16051 |
+
"step": 2278
|
16052 |
+
},
|
16053 |
+
{
|
16054 |
+
"epoch": 0.3143665080350369,
|
16055 |
+
"grad_norm": 1.8228474855422974,
|
16056 |
+
"learning_rate": 0.00019475416195189192,
|
16057 |
+
"loss": 0.8461,
|
16058 |
+
"step": 2279
|
16059 |
+
},
|
16060 |
+
{
|
16061 |
+
"epoch": 0.31450444858266086,
|
16062 |
+
"grad_norm": 0.5661347508430481,
|
16063 |
+
"learning_rate": 0.00019474953714115274,
|
16064 |
+
"loss": 0.3593,
|
16065 |
+
"step": 2280
|
16066 |
+
},
|
16067 |
+
{
|
16068 |
+
"epoch": 0.31464238913028486,
|
16069 |
+
"grad_norm": 0.747999370098114,
|
16070 |
+
"learning_rate": 0.00019474491034762145,
|
16071 |
+
"loss": 0.6878,
|
16072 |
+
"step": 2281
|
16073 |
+
},
|
16074 |
+
{
|
16075 |
+
"epoch": 0.3147803296779088,
|
16076 |
+
"grad_norm": 0.9928996562957764,
|
16077 |
+
"learning_rate": 0.0001947402815713949,
|
16078 |
+
"loss": 0.8761,
|
16079 |
+
"step": 2282
|
16080 |
+
},
|
16081 |
+
{
|
16082 |
+
"epoch": 0.3149182702255328,
|
16083 |
+
"grad_norm": 0.7003133893013,
|
16084 |
+
"learning_rate": 0.00019473565081256996,
|
16085 |
+
"loss": 0.4855,
|
16086 |
+
"step": 2283
|
16087 |
+
},
|
16088 |
+
{
|
16089 |
+
"epoch": 0.31505621077315676,
|
16090 |
+
"grad_norm": 0.6472734808921814,
|
16091 |
+
"learning_rate": 0.00019473101807124352,
|
16092 |
+
"loss": 0.511,
|
16093 |
+
"step": 2284
|
16094 |
+
},
|
16095 |
+
{
|
16096 |
+
"epoch": 0.31519415132078077,
|
16097 |
+
"grad_norm": 0.723513662815094,
|
16098 |
+
"learning_rate": 0.0001947263833475125,
|
16099 |
+
"loss": 0.4892,
|
16100 |
+
"step": 2285
|
16101 |
+
},
|
16102 |
+
{
|
16103 |
+
"epoch": 0.3153320918684047,
|
16104 |
+
"grad_norm": 1.6176047325134277,
|
16105 |
+
"learning_rate": 0.00019472174664147393,
|
16106 |
+
"loss": 0.5581,
|
16107 |
+
"step": 2286
|
16108 |
+
},
|
16109 |
+
{
|
16110 |
+
"epoch": 0.31547003241602867,
|
16111 |
+
"grad_norm": 0.9376205801963806,
|
16112 |
+
"learning_rate": 0.00019471710795322485,
|
16113 |
+
"loss": 1.091,
|
16114 |
+
"step": 2287
|
16115 |
+
},
|
16116 |
+
{
|
16117 |
+
"epoch": 0.31560797296365267,
|
16118 |
+
"grad_norm": 1.0848584175109863,
|
16119 |
+
"learning_rate": 0.00019471246728286227,
|
16120 |
+
"loss": 0.6718,
|
16121 |
+
"step": 2288
|
16122 |
+
},
|
16123 |
+
{
|
16124 |
+
"epoch": 0.3157459135112766,
|
16125 |
+
"grad_norm": 1.0394634008407593,
|
16126 |
+
"learning_rate": 0.00019470782463048336,
|
16127 |
+
"loss": 0.4477,
|
16128 |
+
"step": 2289
|
16129 |
+
},
|
16130 |
+
{
|
16131 |
+
"epoch": 0.3158838540589006,
|
16132 |
+
"grad_norm": 0.8964745998382568,
|
16133 |
+
"learning_rate": 0.00019470317999618523,
|
16134 |
+
"loss": 0.4769,
|
16135 |
+
"step": 2290
|
16136 |
+
},
|
16137 |
+
{
|
16138 |
+
"epoch": 0.3160217946065246,
|
16139 |
+
"grad_norm": 0.6246095299720764,
|
16140 |
+
"learning_rate": 0.00019469853338006514,
|
16141 |
+
"loss": 0.2479,
|
16142 |
+
"step": 2291
|
16143 |
+
},
|
16144 |
+
{
|
16145 |
+
"epoch": 0.3161597351541486,
|
16146 |
+
"grad_norm": 0.878368079662323,
|
16147 |
+
"learning_rate": 0.0001946938847822203,
|
16148 |
+
"loss": 0.3964,
|
16149 |
+
"step": 2292
|
16150 |
+
},
|
16151 |
+
{
|
16152 |
+
"epoch": 0.31629767570177253,
|
16153 |
+
"grad_norm": 0.6446416974067688,
|
16154 |
+
"learning_rate": 0.00019468923420274797,
|
16155 |
+
"loss": 0.6782,
|
16156 |
+
"step": 2293
|
16157 |
+
},
|
16158 |
+
{
|
16159 |
+
"epoch": 0.31643561624939653,
|
16160 |
+
"grad_norm": 0.8462199568748474,
|
16161 |
+
"learning_rate": 0.0001946845816417455,
|
16162 |
+
"loss": 0.6378,
|
16163 |
+
"step": 2294
|
16164 |
+
},
|
16165 |
+
{
|
16166 |
+
"epoch": 0.3165735567970205,
|
16167 |
+
"grad_norm": 0.7193346619606018,
|
16168 |
+
"learning_rate": 0.00019467992709931017,
|
16169 |
+
"loss": 0.5933,
|
16170 |
+
"step": 2295
|
16171 |
+
},
|
16172 |
+
{
|
16173 |
+
"epoch": 0.31671149734464443,
|
16174 |
+
"grad_norm": 1.4028959274291992,
|
16175 |
+
"learning_rate": 0.00019467527057553952,
|
16176 |
+
"loss": 1.1746,
|
16177 |
+
"step": 2296
|
16178 |
+
},
|
16179 |
+
{
|
16180 |
+
"epoch": 0.31684943789226844,
|
16181 |
+
"grad_norm": 0.8412365913391113,
|
16182 |
+
"learning_rate": 0.00019467061207053087,
|
16183 |
+
"loss": 0.5632,
|
16184 |
+
"step": 2297
|
16185 |
+
},
|
16186 |
+
{
|
16187 |
+
"epoch": 0.3169873784398924,
|
16188 |
+
"grad_norm": 0.6352449655532837,
|
16189 |
+
"learning_rate": 0.0001946659515843818,
|
16190 |
+
"loss": 0.4559,
|
16191 |
+
"step": 2298
|
16192 |
+
},
|
16193 |
+
{
|
16194 |
+
"epoch": 0.3171253189875164,
|
16195 |
+
"grad_norm": 0.48701727390289307,
|
16196 |
+
"learning_rate": 0.00019466128911718982,
|
16197 |
+
"loss": 0.2398,
|
16198 |
+
"step": 2299
|
16199 |
+
},
|
16200 |
+
{
|
16201 |
+
"epoch": 0.31726325953514034,
|
16202 |
+
"grad_norm": 0.5449528098106384,
|
16203 |
+
"learning_rate": 0.00019465662466905243,
|
16204 |
+
"loss": 0.6206,
|
16205 |
+
"step": 2300
|
16206 |
+
},
|
16207 |
+
{
|
16208 |
+
"epoch": 0.31740120008276435,
|
16209 |
+
"grad_norm": 1.2383208274841309,
|
16210 |
+
"learning_rate": 0.00019465195824006732,
|
16211 |
+
"loss": 0.9354,
|
16212 |
+
"step": 2301
|
16213 |
+
},
|
16214 |
+
{
|
16215 |
+
"epoch": 0.3175391406303883,
|
16216 |
+
"grad_norm": 0.9451349377632141,
|
16217 |
+
"learning_rate": 0.00019464728983033212,
|
16218 |
+
"loss": 0.9349,
|
16219 |
+
"step": 2302
|
16220 |
+
},
|
16221 |
+
{
|
16222 |
+
"epoch": 0.3176770811780123,
|
16223 |
+
"grad_norm": 0.7076907753944397,
|
16224 |
+
"learning_rate": 0.0001946426194399445,
|
16225 |
+
"loss": 0.667,
|
16226 |
+
"step": 2303
|
16227 |
+
},
|
16228 |
+
{
|
16229 |
+
"epoch": 0.31781502172563625,
|
16230 |
+
"grad_norm": 0.6356270909309387,
|
16231 |
+
"learning_rate": 0.00019463794706900224,
|
16232 |
+
"loss": 0.2469,
|
16233 |
+
"step": 2304
|
16234 |
+
},
|
16235 |
+
{
|
16236 |
+
"epoch": 0.3179529622732602,
|
16237 |
+
"grad_norm": 0.8059444427490234,
|
16238 |
+
"learning_rate": 0.00019463327271760308,
|
16239 |
+
"loss": 0.6322,
|
16240 |
+
"step": 2305
|
16241 |
+
},
|
16242 |
+
{
|
16243 |
+
"epoch": 0.3180909028208842,
|
16244 |
+
"grad_norm": 0.7126657366752625,
|
16245 |
+
"learning_rate": 0.00019462859638584484,
|
16246 |
+
"loss": 0.4607,
|
16247 |
+
"step": 2306
|
16248 |
+
},
|
16249 |
+
{
|
16250 |
+
"epoch": 0.31822884336850815,
|
16251 |
+
"grad_norm": 1.20512855052948,
|
16252 |
+
"learning_rate": 0.0001946239180738254,
|
16253 |
+
"loss": 0.7065,
|
16254 |
+
"step": 2307
|
16255 |
+
},
|
16256 |
+
{
|
16257 |
+
"epoch": 0.31836678391613216,
|
16258 |
+
"grad_norm": 1.0039737224578857,
|
16259 |
+
"learning_rate": 0.00019461923778164267,
|
16260 |
+
"loss": 0.7817,
|
16261 |
+
"step": 2308
|
16262 |
+
},
|
16263 |
+
{
|
16264 |
+
"epoch": 0.3185047244637561,
|
16265 |
+
"grad_norm": 0.8472278118133545,
|
16266 |
+
"learning_rate": 0.00019461455550939455,
|
16267 |
+
"loss": 0.7392,
|
16268 |
+
"step": 2309
|
16269 |
+
},
|
16270 |
+
{
|
16271 |
+
"epoch": 0.3186426650113801,
|
16272 |
+
"grad_norm": 0.8026204109191895,
|
16273 |
+
"learning_rate": 0.00019460987125717905,
|
16274 |
+
"loss": 0.6547,
|
16275 |
+
"step": 2310
|
16276 |
+
},
|
16277 |
+
{
|
16278 |
+
"epoch": 0.31878060555900406,
|
16279 |
+
"grad_norm": 0.985788881778717,
|
16280 |
+
"learning_rate": 0.00019460518502509422,
|
16281 |
+
"loss": 0.3619,
|
16282 |
+
"step": 2311
|
16283 |
+
},
|
16284 |
+
{
|
16285 |
+
"epoch": 0.31891854610662806,
|
16286 |
+
"grad_norm": 0.913837194442749,
|
16287 |
+
"learning_rate": 0.00019460049681323808,
|
16288 |
+
"loss": 0.8376,
|
16289 |
+
"step": 2312
|
16290 |
+
},
|
16291 |
+
{
|
16292 |
+
"epoch": 0.319056486654252,
|
16293 |
+
"grad_norm": 0.6265845894813538,
|
16294 |
+
"learning_rate": 0.0001945958066217088,
|
16295 |
+
"loss": 0.579,
|
16296 |
+
"step": 2313
|
16297 |
+
},
|
16298 |
+
{
|
16299 |
+
"epoch": 0.319194427201876,
|
16300 |
+
"grad_norm": 0.9424504637718201,
|
16301 |
+
"learning_rate": 0.00019459111445060444,
|
16302 |
+
"loss": 0.5184,
|
16303 |
+
"step": 2314
|
16304 |
+
},
|
16305 |
+
{
|
16306 |
+
"epoch": 0.31933236774949997,
|
16307 |
+
"grad_norm": 0.5835946202278137,
|
16308 |
+
"learning_rate": 0.00019458642030002326,
|
16309 |
+
"loss": 0.4495,
|
16310 |
+
"step": 2315
|
16311 |
+
},
|
16312 |
+
{
|
16313 |
+
"epoch": 0.3194703082971239,
|
16314 |
+
"grad_norm": 0.7594127058982849,
|
16315 |
+
"learning_rate": 0.00019458172417006347,
|
16316 |
+
"loss": 0.7142,
|
16317 |
+
"step": 2316
|
16318 |
+
},
|
16319 |
+
{
|
16320 |
+
"epoch": 0.3196082488447479,
|
16321 |
+
"grad_norm": 0.6176849007606506,
|
16322 |
+
"learning_rate": 0.00019457702606082337,
|
16323 |
+
"loss": 0.3594,
|
16324 |
+
"step": 2317
|
16325 |
+
},
|
16326 |
+
{
|
16327 |
+
"epoch": 0.31974618939237187,
|
16328 |
+
"grad_norm": 1.6596888303756714,
|
16329 |
+
"learning_rate": 0.00019457232597240126,
|
16330 |
+
"loss": 0.9118,
|
16331 |
+
"step": 2318
|
16332 |
+
},
|
16333 |
+
{
|
16334 |
+
"epoch": 0.3198841299399959,
|
16335 |
+
"grad_norm": 0.8690287470817566,
|
16336 |
+
"learning_rate": 0.00019456762390489548,
|
16337 |
+
"loss": 0.566,
|
16338 |
+
"step": 2319
|
16339 |
+
},
|
16340 |
+
{
|
16341 |
+
"epoch": 0.3200220704876198,
|
16342 |
+
"grad_norm": 1.1131110191345215,
|
16343 |
+
"learning_rate": 0.0001945629198584044,
|
16344 |
+
"loss": 1.1,
|
16345 |
+
"step": 2320
|
16346 |
+
},
|
16347 |
+
{
|
16348 |
+
"epoch": 0.32016001103524383,
|
16349 |
+
"grad_norm": 0.7218566536903381,
|
16350 |
+
"learning_rate": 0.00019455821383302657,
|
16351 |
+
"loss": 0.5501,
|
16352 |
+
"step": 2321
|
16353 |
+
},
|
16354 |
+
{
|
16355 |
+
"epoch": 0.3202979515828678,
|
16356 |
+
"grad_norm": 0.5688751339912415,
|
16357 |
+
"learning_rate": 0.00019455350582886038,
|
16358 |
+
"loss": 0.5373,
|
16359 |
+
"step": 2322
|
16360 |
+
},
|
16361 |
+
{
|
16362 |
+
"epoch": 0.3204358921304918,
|
16363 |
+
"grad_norm": 1.2792819738388062,
|
16364 |
+
"learning_rate": 0.00019454879584600437,
|
16365 |
+
"loss": 0.733,
|
16366 |
+
"step": 2323
|
16367 |
+
},
|
16368 |
+
{
|
16369 |
+
"epoch": 0.32057383267811573,
|
16370 |
+
"grad_norm": 0.9383312463760376,
|
16371 |
+
"learning_rate": 0.0001945440838845571,
|
16372 |
+
"loss": 0.4367,
|
16373 |
+
"step": 2324
|
16374 |
+
},
|
16375 |
+
{
|
16376 |
+
"epoch": 0.3207117732257397,
|
16377 |
+
"grad_norm": 0.9324066042900085,
|
16378 |
+
"learning_rate": 0.00019453936994461718,
|
16379 |
+
"loss": 0.9925,
|
16380 |
+
"step": 2325
|
16381 |
+
},
|
16382 |
+
{
|
16383 |
+
"epoch": 0.3208497137733637,
|
16384 |
+
"grad_norm": 1.0629867315292358,
|
16385 |
+
"learning_rate": 0.0001945346540262833,
|
16386 |
+
"loss": 0.7296,
|
16387 |
+
"step": 2326
|
16388 |
+
},
|
16389 |
+
{
|
16390 |
+
"epoch": 0.32098765432098764,
|
16391 |
+
"grad_norm": 0.7863196730613708,
|
16392 |
+
"learning_rate": 0.0001945299361296541,
|
16393 |
+
"loss": 0.936,
|
16394 |
+
"step": 2327
|
16395 |
+
},
|
16396 |
+
{
|
16397 |
+
"epoch": 0.32112559486861164,
|
16398 |
+
"grad_norm": 0.6948659420013428,
|
16399 |
+
"learning_rate": 0.0001945252162548283,
|
16400 |
+
"loss": 0.4759,
|
16401 |
+
"step": 2328
|
16402 |
+
},
|
16403 |
+
{
|
16404 |
+
"epoch": 0.3212635354162356,
|
16405 |
+
"grad_norm": 0.908307671546936,
|
16406 |
+
"learning_rate": 0.00019452049440190473,
|
16407 |
+
"loss": 0.8042,
|
16408 |
+
"step": 2329
|
16409 |
+
},
|
16410 |
+
{
|
16411 |
+
"epoch": 0.3214014759638596,
|
16412 |
+
"grad_norm": 0.814140796661377,
|
16413 |
+
"learning_rate": 0.00019451577057098213,
|
16414 |
+
"loss": 0.7884,
|
16415 |
+
"step": 2330
|
16416 |
+
},
|
16417 |
+
{
|
16418 |
+
"epoch": 0.32153941651148354,
|
16419 |
+
"grad_norm": 0.752573549747467,
|
16420 |
+
"learning_rate": 0.0001945110447621594,
|
16421 |
+
"loss": 0.8405,
|
16422 |
+
"step": 2331
|
16423 |
+
},
|
16424 |
+
{
|
16425 |
+
"epoch": 0.32167735705910755,
|
16426 |
+
"grad_norm": 0.8677518963813782,
|
16427 |
+
"learning_rate": 0.00019450631697553542,
|
16428 |
+
"loss": 0.8891,
|
16429 |
+
"step": 2332
|
16430 |
+
},
|
16431 |
+
{
|
16432 |
+
"epoch": 0.3218152976067315,
|
16433 |
+
"grad_norm": 0.7212129831314087,
|
16434 |
+
"learning_rate": 0.00019450158721120916,
|
16435 |
+
"loss": 0.5369,
|
16436 |
+
"step": 2333
|
16437 |
+
},
|
16438 |
+
{
|
16439 |
+
"epoch": 0.32195323815435545,
|
16440 |
+
"grad_norm": 0.6805658936500549,
|
16441 |
+
"learning_rate": 0.00019449685546927954,
|
16442 |
+
"loss": 0.4181,
|
16443 |
+
"step": 2334
|
16444 |
+
},
|
16445 |
+
{
|
16446 |
+
"epoch": 0.32209117870197945,
|
16447 |
+
"grad_norm": 0.8572118878364563,
|
16448 |
+
"learning_rate": 0.0001944921217498456,
|
16449 |
+
"loss": 0.4678,
|
16450 |
+
"step": 2335
|
16451 |
+
},
|
16452 |
+
{
|
16453 |
+
"epoch": 0.3222291192496034,
|
16454 |
+
"grad_norm": 0.7739250063896179,
|
16455 |
+
"learning_rate": 0.00019448738605300645,
|
16456 |
+
"loss": 0.8138,
|
16457 |
+
"step": 2336
|
16458 |
+
},
|
16459 |
+
{
|
16460 |
+
"epoch": 0.3223670597972274,
|
16461 |
+
"grad_norm": 0.9221212863922119,
|
16462 |
+
"learning_rate": 0.00019448264837886113,
|
16463 |
+
"loss": 0.6867,
|
16464 |
+
"step": 2337
|
16465 |
+
},
|
16466 |
+
{
|
16467 |
+
"epoch": 0.32250500034485136,
|
16468 |
+
"grad_norm": 0.5943915247917175,
|
16469 |
+
"learning_rate": 0.0001944779087275088,
|
16470 |
+
"loss": 0.4269,
|
16471 |
+
"step": 2338
|
16472 |
+
},
|
16473 |
+
{
|
16474 |
+
"epoch": 0.32264294089247536,
|
16475 |
+
"grad_norm": 0.7601683735847473,
|
16476 |
+
"learning_rate": 0.00019447316709904865,
|
16477 |
+
"loss": 0.4699,
|
16478 |
+
"step": 2339
|
16479 |
+
},
|
16480 |
+
{
|
16481 |
+
"epoch": 0.3227808814400993,
|
16482 |
+
"grad_norm": 0.8653863072395325,
|
16483 |
+
"learning_rate": 0.0001944684234935799,
|
16484 |
+
"loss": 0.6408,
|
16485 |
+
"step": 2340
|
16486 |
+
},
|
16487 |
+
{
|
16488 |
+
"epoch": 0.3229188219877233,
|
16489 |
+
"grad_norm": 0.8126456141471863,
|
16490 |
+
"learning_rate": 0.00019446367791120186,
|
16491 |
+
"loss": 0.7773,
|
16492 |
+
"step": 2341
|
16493 |
+
},
|
16494 |
+
{
|
16495 |
+
"epoch": 0.32305676253534726,
|
16496 |
+
"grad_norm": 0.6638123393058777,
|
16497 |
+
"learning_rate": 0.00019445893035201383,
|
16498 |
+
"loss": 0.4854,
|
16499 |
+
"step": 2342
|
16500 |
+
},
|
16501 |
+
{
|
16502 |
+
"epoch": 0.3231947030829712,
|
16503 |
+
"grad_norm": 1.3545905351638794,
|
16504 |
+
"learning_rate": 0.00019445418081611506,
|
16505 |
+
"loss": 0.9794,
|
16506 |
+
"step": 2343
|
16507 |
+
},
|
16508 |
+
{
|
16509 |
+
"epoch": 0.3233326436305952,
|
16510 |
+
"grad_norm": 0.8681669235229492,
|
16511 |
+
"learning_rate": 0.00019444942930360503,
|
16512 |
+
"loss": 0.8998,
|
16513 |
+
"step": 2344
|
16514 |
+
},
|
16515 |
+
{
|
16516 |
+
"epoch": 0.32347058417821917,
|
16517 |
+
"grad_norm": 1.0023455619812012,
|
16518 |
+
"learning_rate": 0.00019444467581458322,
|
16519 |
+
"loss": 0.7062,
|
16520 |
+
"step": 2345
|
16521 |
+
},
|
16522 |
+
{
|
16523 |
+
"epoch": 0.32360852472584317,
|
16524 |
+
"grad_norm": 0.8101288676261902,
|
16525 |
+
"learning_rate": 0.00019443992034914897,
|
16526 |
+
"loss": 0.6581,
|
16527 |
+
"step": 2346
|
16528 |
+
},
|
16529 |
+
{
|
16530 |
+
"epoch": 0.3237464652734671,
|
16531 |
+
"grad_norm": 1.2586729526519775,
|
16532 |
+
"learning_rate": 0.00019443516290740194,
|
16533 |
+
"loss": 0.7804,
|
16534 |
+
"step": 2347
|
16535 |
+
},
|
16536 |
+
{
|
16537 |
+
"epoch": 0.3238844058210911,
|
16538 |
+
"grad_norm": 0.9507285356521606,
|
16539 |
+
"learning_rate": 0.00019443040348944156,
|
16540 |
+
"loss": 0.6049,
|
16541 |
+
"step": 2348
|
16542 |
+
},
|
16543 |
+
{
|
16544 |
+
"epoch": 0.3240223463687151,
|
16545 |
+
"grad_norm": 0.6528936624526978,
|
16546 |
+
"learning_rate": 0.00019442564209536754,
|
16547 |
+
"loss": 0.4616,
|
16548 |
+
"step": 2349
|
16549 |
+
},
|
16550 |
+
{
|
16551 |
+
"epoch": 0.3241602869163391,
|
16552 |
+
"grad_norm": 0.7113572359085083,
|
16553 |
+
"learning_rate": 0.00019442087872527944,
|
16554 |
+
"loss": 0.6116,
|
16555 |
+
"step": 2350
|
16556 |
+
},
|
16557 |
+
{
|
16558 |
+
"epoch": 0.32429822746396303,
|
16559 |
+
"grad_norm": 0.5419871807098389,
|
16560 |
+
"learning_rate": 0.00019441611337927696,
|
16561 |
+
"loss": 0.2321,
|
16562 |
+
"step": 2351
|
16563 |
+
},
|
16564 |
+
{
|
16565 |
+
"epoch": 0.324436168011587,
|
16566 |
+
"grad_norm": 0.679607629776001,
|
16567 |
+
"learning_rate": 0.00019441134605745986,
|
16568 |
+
"loss": 0.459,
|
16569 |
+
"step": 2352
|
16570 |
+
},
|
16571 |
+
{
|
16572 |
+
"epoch": 0.324574108559211,
|
16573 |
+
"grad_norm": 0.9691960215568542,
|
16574 |
+
"learning_rate": 0.00019440657675992787,
|
16575 |
+
"loss": 0.4727,
|
16576 |
+
"step": 2353
|
16577 |
+
},
|
16578 |
+
{
|
16579 |
+
"epoch": 0.32471204910683493,
|
16580 |
+
"grad_norm": 0.8125988841056824,
|
16581 |
+
"learning_rate": 0.0001944018054867808,
|
16582 |
+
"loss": 0.8017,
|
16583 |
+
"step": 2354
|
16584 |
+
},
|
16585 |
+
{
|
16586 |
+
"epoch": 0.32484998965445894,
|
16587 |
+
"grad_norm": 1.649573802947998,
|
16588 |
+
"learning_rate": 0.00019439703223811847,
|
16589 |
+
"loss": 0.829,
|
16590 |
+
"step": 2355
|
16591 |
+
},
|
16592 |
+
{
|
16593 |
+
"epoch": 0.3249879302020829,
|
16594 |
+
"grad_norm": 0.745305061340332,
|
16595 |
+
"learning_rate": 0.00019439225701404085,
|
16596 |
+
"loss": 0.4651,
|
16597 |
+
"step": 2356
|
16598 |
+
},
|
16599 |
+
{
|
16600 |
+
"epoch": 0.3251258707497069,
|
16601 |
+
"grad_norm": 0.6748473048210144,
|
16602 |
+
"learning_rate": 0.00019438747981464775,
|
16603 |
+
"loss": 0.5996,
|
16604 |
+
"step": 2357
|
16605 |
+
},
|
16606 |
+
{
|
16607 |
+
"epoch": 0.32526381129733084,
|
16608 |
+
"grad_norm": 1.0531598329544067,
|
16609 |
+
"learning_rate": 0.00019438270064003926,
|
16610 |
+
"loss": 0.9084,
|
16611 |
+
"step": 2358
|
16612 |
+
},
|
16613 |
+
{
|
16614 |
+
"epoch": 0.32540175184495485,
|
16615 |
+
"grad_norm": 0.9223348498344421,
|
16616 |
+
"learning_rate": 0.00019437791949031535,
|
16617 |
+
"loss": 0.7866,
|
16618 |
+
"step": 2359
|
16619 |
+
},
|
16620 |
+
{
|
16621 |
+
"epoch": 0.3255396923925788,
|
16622 |
+
"grad_norm": 0.7526196837425232,
|
16623 |
+
"learning_rate": 0.00019437313636557602,
|
16624 |
+
"loss": 0.4975,
|
16625 |
+
"step": 2360
|
16626 |
+
},
|
16627 |
+
{
|
16628 |
+
"epoch": 0.3256776329402028,
|
16629 |
+
"grad_norm": 1.6201496124267578,
|
16630 |
+
"learning_rate": 0.00019436835126592143,
|
16631 |
+
"loss": 0.7395,
|
16632 |
+
"step": 2361
|
16633 |
+
},
|
16634 |
+
{
|
16635 |
+
"epoch": 0.32581557348782675,
|
16636 |
+
"grad_norm": 0.7340310215950012,
|
16637 |
+
"learning_rate": 0.00019436356419145166,
|
16638 |
+
"loss": 0.6325,
|
16639 |
+
"step": 2362
|
16640 |
+
},
|
16641 |
+
{
|
16642 |
+
"epoch": 0.3259535140354507,
|
16643 |
+
"grad_norm": 1.1777743101119995,
|
16644 |
+
"learning_rate": 0.00019435877514226697,
|
16645 |
+
"loss": 0.4779,
|
16646 |
+
"step": 2363
|
16647 |
+
},
|
16648 |
+
{
|
16649 |
+
"epoch": 0.3260914545830747,
|
16650 |
+
"grad_norm": 0.9242397546768188,
|
16651 |
+
"learning_rate": 0.00019435398411846752,
|
16652 |
+
"loss": 0.4674,
|
16653 |
+
"step": 2364
|
16654 |
+
},
|
16655 |
+
{
|
16656 |
+
"epoch": 0.32622939513069865,
|
16657 |
+
"grad_norm": 0.6935853958129883,
|
16658 |
+
"learning_rate": 0.00019434919112015355,
|
16659 |
+
"loss": 0.3949,
|
16660 |
+
"step": 2365
|
16661 |
+
},
|
16662 |
+
{
|
16663 |
+
"epoch": 0.32636733567832266,
|
16664 |
+
"grad_norm": 0.7134401202201843,
|
16665 |
+
"learning_rate": 0.00019434439614742543,
|
16666 |
+
"loss": 0.5659,
|
16667 |
+
"step": 2366
|
16668 |
+
},
|
16669 |
+
{
|
16670 |
+
"epoch": 0.3265052762259466,
|
16671 |
+
"grad_norm": 0.9489606618881226,
|
16672 |
+
"learning_rate": 0.00019433959920038345,
|
16673 |
+
"loss": 0.7124,
|
16674 |
+
"step": 2367
|
16675 |
+
},
|
16676 |
+
{
|
16677 |
+
"epoch": 0.3266432167735706,
|
16678 |
+
"grad_norm": 0.6194107532501221,
|
16679 |
+
"learning_rate": 0.000194334800279128,
|
16680 |
+
"loss": 0.8171,
|
16681 |
+
"step": 2368
|
16682 |
+
},
|
16683 |
+
{
|
16684 |
+
"epoch": 0.32678115732119456,
|
16685 |
+
"grad_norm": 0.8815126419067383,
|
16686 |
+
"learning_rate": 0.00019432999938375953,
|
16687 |
+
"loss": 0.5195,
|
16688 |
+
"step": 2369
|
16689 |
+
},
|
16690 |
+
{
|
16691 |
+
"epoch": 0.32691909786881856,
|
16692 |
+
"grad_norm": 0.5797806978225708,
|
16693 |
+
"learning_rate": 0.0001943251965143785,
|
16694 |
+
"loss": 0.4952,
|
16695 |
+
"step": 2370
|
16696 |
+
},
|
16697 |
+
{
|
16698 |
+
"epoch": 0.3270570384164425,
|
16699 |
+
"grad_norm": 0.9306840300559998,
|
16700 |
+
"learning_rate": 0.00019432039167108537,
|
16701 |
+
"loss": 0.476,
|
16702 |
+
"step": 2371
|
16703 |
+
},
|
16704 |
+
{
|
16705 |
+
"epoch": 0.32719497896406646,
|
16706 |
+
"grad_norm": 0.6784822344779968,
|
16707 |
+
"learning_rate": 0.00019431558485398076,
|
16708 |
+
"loss": 0.641,
|
16709 |
+
"step": 2372
|
16710 |
+
},
|
16711 |
+
{
|
16712 |
+
"epoch": 0.32733291951169047,
|
16713 |
+
"grad_norm": 0.7142674922943115,
|
16714 |
+
"learning_rate": 0.00019431077606316523,
|
16715 |
+
"loss": 0.6712,
|
16716 |
+
"step": 2373
|
16717 |
+
},
|
16718 |
+
{
|
16719 |
+
"epoch": 0.3274708600593144,
|
16720 |
+
"grad_norm": 1.0263147354125977,
|
16721 |
+
"learning_rate": 0.00019430596529873938,
|
16722 |
+
"loss": 0.8278,
|
16723 |
+
"step": 2374
|
16724 |
+
},
|
16725 |
+
{
|
16726 |
+
"epoch": 0.3276088006069384,
|
16727 |
+
"grad_norm": 0.672478199005127,
|
16728 |
+
"learning_rate": 0.00019430115256080394,
|
16729 |
+
"loss": 0.5935,
|
16730 |
+
"step": 2375
|
16731 |
+
},
|
16732 |
+
{
|
16733 |
+
"epoch": 0.32774674115456237,
|
16734 |
+
"grad_norm": 0.9333507418632507,
|
16735 |
+
"learning_rate": 0.0001942963378494596,
|
16736 |
+
"loss": 0.664,
|
16737 |
+
"step": 2376
|
16738 |
+
},
|
16739 |
+
{
|
16740 |
+
"epoch": 0.3278846817021864,
|
16741 |
+
"grad_norm": 0.8227028250694275,
|
16742 |
+
"learning_rate": 0.0001942915211648071,
|
16743 |
+
"loss": 0.3793,
|
16744 |
+
"step": 2377
|
16745 |
+
},
|
16746 |
+
{
|
16747 |
+
"epoch": 0.3280226222498103,
|
16748 |
+
"grad_norm": 0.8363267183303833,
|
16749 |
+
"learning_rate": 0.00019428670250694728,
|
16750 |
+
"loss": 0.534,
|
16751 |
+
"step": 2378
|
16752 |
+
},
|
16753 |
+
{
|
16754 |
+
"epoch": 0.32816056279743433,
|
16755 |
+
"grad_norm": 0.6801791787147522,
|
16756 |
+
"learning_rate": 0.00019428188187598094,
|
16757 |
+
"loss": 0.5693,
|
16758 |
+
"step": 2379
|
16759 |
+
},
|
16760 |
+
{
|
16761 |
+
"epoch": 0.3282985033450583,
|
16762 |
+
"grad_norm": 0.9937869310379028,
|
16763 |
+
"learning_rate": 0.00019427705927200896,
|
16764 |
+
"loss": 0.4011,
|
16765 |
+
"step": 2380
|
16766 |
+
},
|
16767 |
+
{
|
16768 |
+
"epoch": 0.32843644389268223,
|
16769 |
+
"grad_norm": 0.7679700255393982,
|
16770 |
+
"learning_rate": 0.00019427223469513228,
|
16771 |
+
"loss": 0.4928,
|
16772 |
+
"step": 2381
|
16773 |
+
},
|
16774 |
+
{
|
16775 |
+
"epoch": 0.32857438444030623,
|
16776 |
+
"grad_norm": 1.2696233987808228,
|
16777 |
+
"learning_rate": 0.00019426740814545185,
|
16778 |
+
"loss": 0.3716,
|
16779 |
+
"step": 2382
|
16780 |
+
},
|
16781 |
+
{
|
16782 |
+
"epoch": 0.3287123249879302,
|
16783 |
+
"grad_norm": 0.816831648349762,
|
16784 |
+
"learning_rate": 0.00019426257962306868,
|
16785 |
+
"loss": 0.469,
|
16786 |
+
"step": 2383
|
16787 |
+
},
|
16788 |
+
{
|
16789 |
+
"epoch": 0.3288502655355542,
|
16790 |
+
"grad_norm": 1.172206163406372,
|
16791 |
+
"learning_rate": 0.0001942577491280838,
|
16792 |
+
"loss": 0.7011,
|
16793 |
+
"step": 2384
|
16794 |
+
},
|
16795 |
+
{
|
16796 |
+
"epoch": 0.32898820608317814,
|
16797 |
+
"grad_norm": 0.8468907475471497,
|
16798 |
+
"learning_rate": 0.00019425291666059832,
|
16799 |
+
"loss": 0.2813,
|
16800 |
+
"step": 2385
|
16801 |
+
},
|
16802 |
+
{
|
16803 |
+
"epoch": 0.32912614663080214,
|
16804 |
+
"grad_norm": 0.9245859980583191,
|
16805 |
+
"learning_rate": 0.00019424808222071337,
|
16806 |
+
"loss": 0.5006,
|
16807 |
+
"step": 2386
|
16808 |
+
},
|
16809 |
+
{
|
16810 |
+
"epoch": 0.3292640871784261,
|
16811 |
+
"grad_norm": 1.3314694166183472,
|
16812 |
+
"learning_rate": 0.00019424324580853006,
|
16813 |
+
"loss": 0.3318,
|
16814 |
+
"step": 2387
|
16815 |
+
},
|
16816 |
+
{
|
16817 |
+
"epoch": 0.3294020277260501,
|
16818 |
+
"grad_norm": 0.6868737936019897,
|
16819 |
+
"learning_rate": 0.00019423840742414968,
|
16820 |
+
"loss": 0.4828,
|
16821 |
+
"step": 2388
|
16822 |
+
},
|
16823 |
+
{
|
16824 |
+
"epoch": 0.32953996827367404,
|
16825 |
+
"grad_norm": 0.5695831775665283,
|
16826 |
+
"learning_rate": 0.00019423356706767343,
|
16827 |
+
"loss": 0.4117,
|
16828 |
+
"step": 2389
|
16829 |
+
},
|
16830 |
+
{
|
16831 |
+
"epoch": 0.329677908821298,
|
16832 |
+
"grad_norm": 0.8199607729911804,
|
16833 |
+
"learning_rate": 0.00019422872473920264,
|
16834 |
+
"loss": 0.8271,
|
16835 |
+
"step": 2390
|
16836 |
+
},
|
16837 |
+
{
|
16838 |
+
"epoch": 0.329815849368922,
|
16839 |
+
"grad_norm": 0.9360648989677429,
|
16840 |
+
"learning_rate": 0.0001942238804388386,
|
16841 |
+
"loss": 0.8707,
|
16842 |
+
"step": 2391
|
16843 |
+
},
|
16844 |
+
{
|
16845 |
+
"epoch": 0.32995378991654595,
|
16846 |
+
"grad_norm": 0.7775169610977173,
|
16847 |
+
"learning_rate": 0.00019421903416668273,
|
16848 |
+
"loss": 0.5637,
|
16849 |
+
"step": 2392
|
16850 |
+
},
|
16851 |
+
{
|
16852 |
+
"epoch": 0.33009173046416995,
|
16853 |
+
"grad_norm": 0.8939715027809143,
|
16854 |
+
"learning_rate": 0.0001942141859228364,
|
16855 |
+
"loss": 0.681,
|
16856 |
+
"step": 2393
|
16857 |
+
},
|
16858 |
+
{
|
16859 |
+
"epoch": 0.3302296710117939,
|
16860 |
+
"grad_norm": 0.7903376221656799,
|
16861 |
+
"learning_rate": 0.00019420933570740112,
|
16862 |
+
"loss": 0.6019,
|
16863 |
+
"step": 2394
|
16864 |
+
},
|
16865 |
+
{
|
16866 |
+
"epoch": 0.3303676115594179,
|
16867 |
+
"grad_norm": 0.5653364062309265,
|
16868 |
+
"learning_rate": 0.00019420448352047833,
|
16869 |
+
"loss": 0.4377,
|
16870 |
+
"step": 2395
|
16871 |
+
},
|
16872 |
+
{
|
16873 |
+
"epoch": 0.33050555210704186,
|
16874 |
+
"grad_norm": 0.6574212312698364,
|
16875 |
+
"learning_rate": 0.0001941996293621696,
|
16876 |
+
"loss": 0.3995,
|
16877 |
+
"step": 2396
|
16878 |
+
},
|
16879 |
+
{
|
16880 |
+
"epoch": 0.33064349265466586,
|
16881 |
+
"grad_norm": 0.9487119913101196,
|
16882 |
+
"learning_rate": 0.00019419477323257654,
|
16883 |
+
"loss": 0.652,
|
16884 |
+
"step": 2397
|
16885 |
+
},
|
16886 |
+
{
|
16887 |
+
"epoch": 0.3307814332022898,
|
16888 |
+
"grad_norm": 0.8530499339103699,
|
16889 |
+
"learning_rate": 0.0001941899151318007,
|
16890 |
+
"loss": 0.7224,
|
16891 |
+
"step": 2398
|
16892 |
+
},
|
16893 |
+
{
|
16894 |
+
"epoch": 0.3309193737499138,
|
16895 |
+
"grad_norm": 0.8137893676757812,
|
16896 |
+
"learning_rate": 0.0001941850550599438,
|
16897 |
+
"loss": 0.5038,
|
16898 |
+
"step": 2399
|
16899 |
+
},
|
16900 |
+
{
|
16901 |
+
"epoch": 0.33105731429753776,
|
16902 |
+
"grad_norm": 0.8479599356651306,
|
16903 |
+
"learning_rate": 0.00019418019301710757,
|
16904 |
+
"loss": 0.4543,
|
16905 |
+
"step": 2400
|
16906 |
+
},
|
16907 |
+
{
|
16908 |
+
"epoch": 0.33105731429753776,
|
16909 |
+
"eval_loss": 0.6588593125343323,
|
16910 |
+
"eval_runtime": 23.4746,
|
16911 |
+
"eval_samples_per_second": 2.513,
|
16912 |
+
"eval_steps_per_second": 2.513,
|
16913 |
+
"step": 2400
|
16914 |
}
|
16915 |
],
|
16916 |
"logging_steps": 1,
|
|
|
16925 |
"early_stopping_threshold": 0.0
|
16926 |
},
|
16927 |
"attributes": {
|
16928 |
+
"early_stopping_patience_counter": 3
|
16929 |
}
|
16930 |
},
|
16931 |
"TrainerControl": {
|
|
|
16934 |
"should_evaluate": false,
|
16935 |
"should_log": false,
|
16936 |
"should_save": true,
|
16937 |
+
"should_training_stop": true
|
16938 |
},
|
16939 |
"attributes": {}
|
16940 |
}
|
16941 |
},
|
16942 |
+
"total_flos": 3.8002757108760576e+17,
|
16943 |
"train_batch_size": 1,
|
16944 |
"trial_name": null,
|
16945 |
"trial_params": null
|