Training in progress, step 264, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step264/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step264/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +473 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 763470136
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6b7fbaf2d6a6e1654728bf2b64ff7a097f615d5247c146dd31d3eccfa8fc30f
|
3 |
size 763470136
|
last-checkpoint/global_step264/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ca144ba18752225282c0f4544d978254c2ae4f35d68cae745a609c918846e1f
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:614e9ee88e9bcb3a45429c59b2bf2769ba433567f51363c7a4253b475b21e6bc
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d655ebb2146d9487a8d09cd971ceb207ee79d6d7c42ab932971bb75ba93e940c
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89eae13e8c0ba60e40a9c3d3f11b76646b83ba197f6abf0aba73b689661edd6c
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0557f6a14c69ef21c2ed3a5c48be9f7a3a47f102c4b16e6db9df1e8036a73a5
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d0052e580fe2d1f70992b2fcc233627549397786188980f2ec6a856d84d45f9
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67db40a4c668372a5d51fa8aad0bf99b67dda6f1acdcfc128369378fd063a309
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca273d1927a1a34aa37630afe3fae7e5b729674eeb60cf491b164d2415105d78
|
3 |
+
size 289065424
|
last-checkpoint/global_step264/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef56386f8cf47768c633eb8b8ab10076925c3b5e457f1548072d50346f8b468d
|
3 |
+
size 348711830
|
last-checkpoint/global_step264/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5bfd7e5ba0884a15f626b0c26345601fddc4900a760abdb231c701069e165d2
|
3 |
+
size 348711830
|
last-checkpoint/global_step264/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67d112660240266bc61d3417f0745f1145af0c63963c37f516b89c36b4985ece
|
3 |
+
size 348711830
|
last-checkpoint/global_step264/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36d966bcd023af6eda0fb4a38472fa53c83aa9a27937caf21dd846c3fd6f9274
|
3 |
+
size 348711830
|
last-checkpoint/global_step264/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c28b986c53d2c4dc45b78474b2e13f40f55e7da315b0b108f035a72e86cd9308
|
3 |
+
size 348711830
|
last-checkpoint/global_step264/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e59b35e6b3d5165fdaae19137b371d5a6458b844c4a65494b02eba621e681844
|
3 |
+
size 348711830
|
last-checkpoint/global_step264/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:881ffb5534b69c57a2e00bbfd8c40881c2f08386b7a27b1f3c4179356f366e18
|
3 |
+
size 348711830
|
last-checkpoint/global_step264/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63a9082d03b443436f3f7c312f3eae46a679986eef5d0a24630c726e3b8afa34
|
3 |
+
size 348711830
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step264
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:756188867614fe144ce7bb4100b8fdc4a53793718efdbfd597ab9a7af1127cb3
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9326dda8ccb88256fea16bdb08bf3d8ee2d7890d74941621ea0ae79baad53127
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc7ea8107c02800ceda5d3219d8139cc0c46423c770369f8d482750d2ee66b59
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5100775819feb4598b355aaf5ae7a2d05f1e6c33d82585848692501430716b79
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:375d7beb01cab64b2715fb3d805593967127e2433072776577d1a22535bc71f6
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be4bc162636adeba1331e40da73f3fb1fde2fb44472545ff46bc3e2a6588d115
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e4ec9613f9c318e718457c34ba482fb1b487745cd80d6e26c4479f47030f964
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6c5785e3656da35a0034b82ee38c2b260ac87d57dc93498957445739f27c017
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26eca587873b25805521ebb406b132a4ba3e54d5f099d35d9e497769da91dcd6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 66,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1425,6 +1425,476 @@
|
|
1425 |
"eval_samples_per_second": 1.793,
|
1426 |
"eval_steps_per_second": 0.126,
|
1427 |
"step": 198
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1428 |
}
|
1429 |
],
|
1430 |
"logging_steps": 1,
|
@@ -1444,7 +1914,7 @@
|
|
1444 |
"attributes": {}
|
1445 |
}
|
1446 |
},
|
1447 |
-
"total_flos":
|
1448 |
"train_batch_size": 2,
|
1449 |
"trial_name": null,
|
1450 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8098159509202454,
|
5 |
"eval_steps": 66,
|
6 |
+
"global_step": 264,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1425 |
"eval_samples_per_second": 1.793,
|
1426 |
"eval_steps_per_second": 0.126,
|
1427 |
"step": 198
|
1428 |
+
},
|
1429 |
+
{
|
1430 |
+
"epoch": 0.6104294478527608,
|
1431 |
+
"grad_norm": 0.31755022515076264,
|
1432 |
+
"learning_rate": 4.313016905898286e-05,
|
1433 |
+
"loss": 1.8861,
|
1434 |
+
"step": 199
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 0.6134969325153374,
|
1438 |
+
"grad_norm": 0.37514333178831394,
|
1439 |
+
"learning_rate": 4.268516544675628e-05,
|
1440 |
+
"loss": 1.9366,
|
1441 |
+
"step": 200
|
1442 |
+
},
|
1443 |
+
{
|
1444 |
+
"epoch": 0.6165644171779141,
|
1445 |
+
"grad_norm": 0.2768732078613857,
|
1446 |
+
"learning_rate": 4.224145985535202e-05,
|
1447 |
+
"loss": 1.8781,
|
1448 |
+
"step": 201
|
1449 |
+
},
|
1450 |
+
{
|
1451 |
+
"epoch": 0.6196319018404908,
|
1452 |
+
"grad_norm": 0.385983235346578,
|
1453 |
+
"learning_rate": 4.1799099052681934e-05,
|
1454 |
+
"loss": 1.9089,
|
1455 |
+
"step": 202
|
1456 |
+
},
|
1457 |
+
{
|
1458 |
+
"epoch": 0.6226993865030674,
|
1459 |
+
"grad_norm": 0.34929147929166254,
|
1460 |
+
"learning_rate": 4.135812966491305e-05,
|
1461 |
+
"loss": 1.9409,
|
1462 |
+
"step": 203
|
1463 |
+
},
|
1464 |
+
{
|
1465 |
+
"epoch": 0.6257668711656442,
|
1466 |
+
"grad_norm": 0.3448745967701562,
|
1467 |
+
"learning_rate": 4.091859817155307e-05,
|
1468 |
+
"loss": 1.8935,
|
1469 |
+
"step": 204
|
1470 |
+
},
|
1471 |
+
{
|
1472 |
+
"epoch": 0.6288343558282209,
|
1473 |
+
"grad_norm": 0.24777573443198542,
|
1474 |
+
"learning_rate": 4.048055090055125e-05,
|
1475 |
+
"loss": 1.9007,
|
1476 |
+
"step": 205
|
1477 |
+
},
|
1478 |
+
{
|
1479 |
+
"epoch": 0.6319018404907976,
|
1480 |
+
"grad_norm": 0.33163324355956286,
|
1481 |
+
"learning_rate": 4.004403402341532e-05,
|
1482 |
+
"loss": 1.8816,
|
1483 |
+
"step": 206
|
1484 |
+
},
|
1485 |
+
{
|
1486 |
+
"epoch": 0.6349693251533742,
|
1487 |
+
"grad_norm": 0.6161345209342699,
|
1488 |
+
"learning_rate": 3.960909355034491e-05,
|
1489 |
+
"loss": 1.8952,
|
1490 |
+
"step": 207
|
1491 |
+
},
|
1492 |
+
{
|
1493 |
+
"epoch": 0.6380368098159509,
|
1494 |
+
"grad_norm": 0.29863513222265725,
|
1495 |
+
"learning_rate": 3.917577532538185e-05,
|
1496 |
+
"loss": 1.8622,
|
1497 |
+
"step": 208
|
1498 |
+
},
|
1499 |
+
{
|
1500 |
+
"epoch": 0.6411042944785276,
|
1501 |
+
"grad_norm": 0.23544641297651625,
|
1502 |
+
"learning_rate": 3.8744125021578126e-05,
|
1503 |
+
"loss": 1.9098,
|
1504 |
+
"step": 209
|
1505 |
+
},
|
1506 |
+
{
|
1507 |
+
"epoch": 0.6441717791411042,
|
1508 |
+
"grad_norm": 0.29701664972205183,
|
1509 |
+
"learning_rate": 3.831418813618177e-05,
|
1510 |
+
"loss": 1.8963,
|
1511 |
+
"step": 210
|
1512 |
+
},
|
1513 |
+
{
|
1514 |
+
"epoch": 0.647239263803681,
|
1515 |
+
"grad_norm": 0.2608462550147094,
|
1516 |
+
"learning_rate": 3.788600998584135e-05,
|
1517 |
+
"loss": 1.9425,
|
1518 |
+
"step": 211
|
1519 |
+
},
|
1520 |
+
{
|
1521 |
+
"epoch": 0.6503067484662577,
|
1522 |
+
"grad_norm": 0.2753794235571961,
|
1523 |
+
"learning_rate": 3.7459635701829435e-05,
|
1524 |
+
"loss": 1.9312,
|
1525 |
+
"step": 212
|
1526 |
+
},
|
1527 |
+
{
|
1528 |
+
"epoch": 0.6533742331288344,
|
1529 |
+
"grad_norm": 0.40974803557689143,
|
1530 |
+
"learning_rate": 3.703511022528562e-05,
|
1531 |
+
"loss": 1.8992,
|
1532 |
+
"step": 213
|
1533 |
+
},
|
1534 |
+
{
|
1535 |
+
"epoch": 0.656441717791411,
|
1536 |
+
"grad_norm": 0.24030236007607908,
|
1537 |
+
"learning_rate": 3.6612478302479594e-05,
|
1538 |
+
"loss": 1.9326,
|
1539 |
+
"step": 214
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"epoch": 0.6595092024539877,
|
1543 |
+
"grad_norm": 0.4383608820045659,
|
1544 |
+
"learning_rate": 3.619178448009477e-05,
|
1545 |
+
"loss": 1.932,
|
1546 |
+
"step": 215
|
1547 |
+
},
|
1548 |
+
{
|
1549 |
+
"epoch": 0.6625766871165644,
|
1550 |
+
"grad_norm": 0.5102560092350799,
|
1551 |
+
"learning_rate": 3.5773073100532874e-05,
|
1552 |
+
"loss": 1.8956,
|
1553 |
+
"step": 216
|
1554 |
+
},
|
1555 |
+
{
|
1556 |
+
"epoch": 0.6656441717791411,
|
1557 |
+
"grad_norm": 0.36274812580727284,
|
1558 |
+
"learning_rate": 3.535638829724019e-05,
|
1559 |
+
"loss": 1.8919,
|
1560 |
+
"step": 217
|
1561 |
+
},
|
1562 |
+
{
|
1563 |
+
"epoch": 0.6687116564417178,
|
1564 |
+
"grad_norm": 0.23488730500365318,
|
1565 |
+
"learning_rate": 3.494177399005578e-05,
|
1566 |
+
"loss": 1.9158,
|
1567 |
+
"step": 218
|
1568 |
+
},
|
1569 |
+
{
|
1570 |
+
"epoch": 0.6717791411042945,
|
1571 |
+
"grad_norm": 0.3741689726256645,
|
1572 |
+
"learning_rate": 3.452927388058206e-05,
|
1573 |
+
"loss": 1.9423,
|
1574 |
+
"step": 219
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 0.6748466257668712,
|
1578 |
+
"grad_norm": 0.25651456348082824,
|
1579 |
+
"learning_rate": 3.411893144757866e-05,
|
1580 |
+
"loss": 1.8415,
|
1581 |
+
"step": 220
|
1582 |
+
},
|
1583 |
+
{
|
1584 |
+
"epoch": 0.6779141104294478,
|
1585 |
+
"grad_norm": 0.2612828905023135,
|
1586 |
+
"learning_rate": 3.3710789942379556e-05,
|
1587 |
+
"loss": 1.9472,
|
1588 |
+
"step": 221
|
1589 |
+
},
|
1590 |
+
{
|
1591 |
+
"epoch": 0.6809815950920245,
|
1592 |
+
"grad_norm": 0.25469935789428655,
|
1593 |
+
"learning_rate": 3.33048923843343e-05,
|
1594 |
+
"loss": 1.949,
|
1595 |
+
"step": 222
|
1596 |
+
},
|
1597 |
+
{
|
1598 |
+
"epoch": 0.6840490797546013,
|
1599 |
+
"grad_norm": 0.23410106434735667,
|
1600 |
+
"learning_rate": 3.2901281556273646e-05,
|
1601 |
+
"loss": 1.8963,
|
1602 |
+
"step": 223
|
1603 |
+
},
|
1604 |
+
{
|
1605 |
+
"epoch": 0.6871165644171779,
|
1606 |
+
"grad_norm": 0.25811790889112224,
|
1607 |
+
"learning_rate": 3.250000000000001e-05,
|
1608 |
+
"loss": 1.8488,
|
1609 |
+
"step": 224
|
1610 |
+
},
|
1611 |
+
{
|
1612 |
+
"epoch": 0.6901840490797546,
|
1613 |
+
"grad_norm": 0.2701258126507899,
|
1614 |
+
"learning_rate": 3.210109001180358e-05,
|
1615 |
+
"loss": 1.9429,
|
1616 |
+
"step": 225
|
1617 |
+
},
|
1618 |
+
{
|
1619 |
+
"epoch": 0.6932515337423313,
|
1620 |
+
"grad_norm": 0.27336206551312103,
|
1621 |
+
"learning_rate": 3.170459363800409e-05,
|
1622 |
+
"loss": 1.9063,
|
1623 |
+
"step": 226
|
1624 |
+
},
|
1625 |
+
{
|
1626 |
+
"epoch": 0.696319018404908,
|
1627 |
+
"grad_norm": 0.30139160569284024,
|
1628 |
+
"learning_rate": 3.1310552670518986e-05,
|
1629 |
+
"loss": 1.9182,
|
1630 |
+
"step": 227
|
1631 |
+
},
|
1632 |
+
{
|
1633 |
+
"epoch": 0.6993865030674846,
|
1634 |
+
"grad_norm": 0.23370917590561624,
|
1635 |
+
"learning_rate": 3.0919008642458494e-05,
|
1636 |
+
"loss": 1.9541,
|
1637 |
+
"step": 228
|
1638 |
+
},
|
1639 |
+
{
|
1640 |
+
"epoch": 0.7024539877300614,
|
1641 |
+
"grad_norm": 0.22222235132591592,
|
1642 |
+
"learning_rate": 3.053000282374781e-05,
|
1643 |
+
"loss": 1.8864,
|
1644 |
+
"step": 229
|
1645 |
+
},
|
1646 |
+
{
|
1647 |
+
"epoch": 0.7055214723926381,
|
1648 |
+
"grad_norm": 0.27873390973935386,
|
1649 |
+
"learning_rate": 3.014357621677724e-05,
|
1650 |
+
"loss": 1.8852,
|
1651 |
+
"step": 230
|
1652 |
+
},
|
1653 |
+
{
|
1654 |
+
"epoch": 0.7085889570552147,
|
1655 |
+
"grad_norm": 0.3108583744507131,
|
1656 |
+
"learning_rate": 2.9759769552080376e-05,
|
1657 |
+
"loss": 1.8663,
|
1658 |
+
"step": 231
|
1659 |
+
},
|
1660 |
+
{
|
1661 |
+
"epoch": 0.7116564417177914,
|
1662 |
+
"grad_norm": 0.30913975922284836,
|
1663 |
+
"learning_rate": 2.93786232840409e-05,
|
1664 |
+
"loss": 1.9404,
|
1665 |
+
"step": 232
|
1666 |
+
},
|
1667 |
+
{
|
1668 |
+
"epoch": 0.7147239263803681,
|
1669 |
+
"grad_norm": 0.28385532690084997,
|
1670 |
+
"learning_rate": 2.90001775866287e-05,
|
1671 |
+
"loss": 1.9023,
|
1672 |
+
"step": 233
|
1673 |
+
},
|
1674 |
+
{
|
1675 |
+
"epoch": 0.7177914110429447,
|
1676 |
+
"grad_norm": 0.2808200803737186,
|
1677 |
+
"learning_rate": 2.8624472349165355e-05,
|
1678 |
+
"loss": 1.9192,
|
1679 |
+
"step": 234
|
1680 |
+
},
|
1681 |
+
{
|
1682 |
+
"epoch": 0.7208588957055214,
|
1683 |
+
"grad_norm": 0.23648694756886077,
|
1684 |
+
"learning_rate": 2.8251547172119603e-05,
|
1685 |
+
"loss": 2.0132,
|
1686 |
+
"step": 235
|
1687 |
+
},
|
1688 |
+
{
|
1689 |
+
"epoch": 0.7239263803680982,
|
1690 |
+
"grad_norm": 0.6069490067148141,
|
1691 |
+
"learning_rate": 2.7881441362933468e-05,
|
1692 |
+
"loss": 1.8395,
|
1693 |
+
"step": 236
|
1694 |
+
},
|
1695 |
+
{
|
1696 |
+
"epoch": 0.7269938650306749,
|
1697 |
+
"grad_norm": 0.3350257794257116,
|
1698 |
+
"learning_rate": 2.751419393187905e-05,
|
1699 |
+
"loss": 1.8667,
|
1700 |
+
"step": 237
|
1701 |
+
},
|
1702 |
+
{
|
1703 |
+
"epoch": 0.7300613496932515,
|
1704 |
+
"grad_norm": 0.232164276820369,
|
1705 |
+
"learning_rate": 2.7149843587946744e-05,
|
1706 |
+
"loss": 1.8656,
|
1707 |
+
"step": 238
|
1708 |
+
},
|
1709 |
+
{
|
1710 |
+
"epoch": 0.7331288343558282,
|
1711 |
+
"grad_norm": 0.38356734047420593,
|
1712 |
+
"learning_rate": 2.6788428734765224e-05,
|
1713 |
+
"loss": 1.9048,
|
1714 |
+
"step": 239
|
1715 |
+
},
|
1716 |
+
{
|
1717 |
+
"epoch": 0.7361963190184049,
|
1718 |
+
"grad_norm": 0.2618731826165273,
|
1719 |
+
"learning_rate": 2.642998746655348e-05,
|
1720 |
+
"loss": 1.9783,
|
1721 |
+
"step": 240
|
1722 |
+
},
|
1723 |
+
{
|
1724 |
+
"epoch": 0.7392638036809815,
|
1725 |
+
"grad_norm": 0.6648822511934657,
|
1726 |
+
"learning_rate": 2.6074557564105727e-05,
|
1727 |
+
"loss": 1.9043,
|
1728 |
+
"step": 241
|
1729 |
+
},
|
1730 |
+
{
|
1731 |
+
"epoch": 0.7423312883435583,
|
1732 |
+
"grad_norm": 0.27175163581016115,
|
1733 |
+
"learning_rate": 2.5722176490809118e-05,
|
1734 |
+
"loss": 1.9585,
|
1735 |
+
"step": 242
|
1736 |
+
},
|
1737 |
+
{
|
1738 |
+
"epoch": 0.745398773006135,
|
1739 |
+
"grad_norm": 0.3925966681047075,
|
1740 |
+
"learning_rate": 2.5372881388694912e-05,
|
1741 |
+
"loss": 1.8515,
|
1742 |
+
"step": 243
|
1743 |
+
},
|
1744 |
+
{
|
1745 |
+
"epoch": 0.7484662576687117,
|
1746 |
+
"grad_norm": 0.37190935188206453,
|
1747 |
+
"learning_rate": 2.5026709074523748e-05,
|
1748 |
+
"loss": 1.9688,
|
1749 |
+
"step": 244
|
1750 |
+
},
|
1751 |
+
{
|
1752 |
+
"epoch": 0.7515337423312883,
|
1753 |
+
"grad_norm": 0.2257138379202953,
|
1754 |
+
"learning_rate": 2.4683696035904928e-05,
|
1755 |
+
"loss": 1.9486,
|
1756 |
+
"step": 245
|
1757 |
+
},
|
1758 |
+
{
|
1759 |
+
"epoch": 0.754601226993865,
|
1760 |
+
"grad_norm": 0.2274145468605237,
|
1761 |
+
"learning_rate": 2.434387842745056e-05,
|
1762 |
+
"loss": 1.9302,
|
1763 |
+
"step": 246
|
1764 |
+
},
|
1765 |
+
{
|
1766 |
+
"epoch": 0.7576687116564417,
|
1767 |
+
"grad_norm": 0.5126959359452324,
|
1768 |
+
"learning_rate": 2.400729206696477e-05,
|
1769 |
+
"loss": 1.9443,
|
1770 |
+
"step": 247
|
1771 |
+
},
|
1772 |
+
{
|
1773 |
+
"epoch": 0.7607361963190185,
|
1774 |
+
"grad_norm": 0.2551304692334095,
|
1775 |
+
"learning_rate": 2.3673972431668306e-05,
|
1776 |
+
"loss": 2.009,
|
1777 |
+
"step": 248
|
1778 |
+
},
|
1779 |
+
{
|
1780 |
+
"epoch": 0.7638036809815951,
|
1781 |
+
"grad_norm": 0.4447523876477682,
|
1782 |
+
"learning_rate": 2.334395465445926e-05,
|
1783 |
+
"loss": 1.8468,
|
1784 |
+
"step": 249
|
1785 |
+
},
|
1786 |
+
{
|
1787 |
+
"epoch": 0.7668711656441718,
|
1788 |
+
"grad_norm": 0.2657558360669318,
|
1789 |
+
"learning_rate": 2.3017273520209882e-05,
|
1790 |
+
"loss": 1.8886,
|
1791 |
+
"step": 250
|
1792 |
+
},
|
1793 |
+
{
|
1794 |
+
"epoch": 0.7699386503067485,
|
1795 |
+
"grad_norm": 0.37573420755761094,
|
1796 |
+
"learning_rate": 2.2693963462100117e-05,
|
1797 |
+
"loss": 1.8663,
|
1798 |
+
"step": 251
|
1799 |
+
},
|
1800 |
+
{
|
1801 |
+
"epoch": 0.7730061349693251,
|
1802 |
+
"grad_norm": 0.26075506564879214,
|
1803 |
+
"learning_rate": 2.2374058557988336e-05,
|
1804 |
+
"loss": 1.909,
|
1805 |
+
"step": 252
|
1806 |
+
},
|
1807 |
+
{
|
1808 |
+
"epoch": 0.7760736196319018,
|
1809 |
+
"grad_norm": 0.2951446457265513,
|
1810 |
+
"learning_rate": 2.2057592526819353e-05,
|
1811 |
+
"loss": 1.9362,
|
1812 |
+
"step": 253
|
1813 |
+
},
|
1814 |
+
{
|
1815 |
+
"epoch": 0.7791411042944786,
|
1816 |
+
"grad_norm": 0.24420003456766767,
|
1817 |
+
"learning_rate": 2.1744598725070347e-05,
|
1818 |
+
"loss": 1.9134,
|
1819 |
+
"step": 254
|
1820 |
+
},
|
1821 |
+
{
|
1822 |
+
"epoch": 0.7822085889570553,
|
1823 |
+
"grad_norm": 0.2563261666147908,
|
1824 |
+
"learning_rate": 2.143511014323506e-05,
|
1825 |
+
"loss": 1.9569,
|
1826 |
+
"step": 255
|
1827 |
+
},
|
1828 |
+
{
|
1829 |
+
"epoch": 0.7852760736196319,
|
1830 |
+
"grad_norm": 0.27427716272900493,
|
1831 |
+
"learning_rate": 2.11291594023464e-05,
|
1832 |
+
"loss": 1.8982,
|
1833 |
+
"step": 256
|
1834 |
+
},
|
1835 |
+
{
|
1836 |
+
"epoch": 0.7883435582822086,
|
1837 |
+
"grad_norm": 0.4685271777395839,
|
1838 |
+
"learning_rate": 2.082677875053818e-05,
|
1839 |
+
"loss": 1.9256,
|
1840 |
+
"step": 257
|
1841 |
+
},
|
1842 |
+
{
|
1843 |
+
"epoch": 0.7914110429447853,
|
1844 |
+
"grad_norm": 0.3080424306042412,
|
1845 |
+
"learning_rate": 2.0528000059645997e-05,
|
1846 |
+
"loss": 1.9154,
|
1847 |
+
"step": 258
|
1848 |
+
},
|
1849 |
+
{
|
1850 |
+
"epoch": 0.7944785276073619,
|
1851 |
+
"grad_norm": 0.2672783439075976,
|
1852 |
+
"learning_rate": 2.023285482184785e-05,
|
1853 |
+
"loss": 1.9574,
|
1854 |
+
"step": 259
|
1855 |
+
},
|
1856 |
+
{
|
1857 |
+
"epoch": 0.7975460122699386,
|
1858 |
+
"grad_norm": 0.3311914465278651,
|
1859 |
+
"learning_rate": 1.994137414634483e-05,
|
1860 |
+
"loss": 1.9133,
|
1861 |
+
"step": 260
|
1862 |
+
},
|
1863 |
+
{
|
1864 |
+
"epoch": 0.8006134969325154,
|
1865 |
+
"grad_norm": 0.7675438620825049,
|
1866 |
+
"learning_rate": 1.9653588756082064e-05,
|
1867 |
+
"loss": 1.892,
|
1868 |
+
"step": 261
|
1869 |
+
},
|
1870 |
+
{
|
1871 |
+
"epoch": 0.803680981595092,
|
1872 |
+
"grad_norm": 0.2757310062776552,
|
1873 |
+
"learning_rate": 1.9369528984510394e-05,
|
1874 |
+
"loss": 1.9087,
|
1875 |
+
"step": 262
|
1876 |
+
},
|
1877 |
+
{
|
1878 |
+
"epoch": 0.8067484662576687,
|
1879 |
+
"grad_norm": 0.24797296946202665,
|
1880 |
+
"learning_rate": 1.9089224772389225e-05,
|
1881 |
+
"loss": 1.8836,
|
1882 |
+
"step": 263
|
1883 |
+
},
|
1884 |
+
{
|
1885 |
+
"epoch": 0.8098159509202454,
|
1886 |
+
"grad_norm": 0.41244928985184576,
|
1887 |
+
"learning_rate": 1.881270566463062e-05,
|
1888 |
+
"loss": 1.9094,
|
1889 |
+
"step": 264
|
1890 |
+
},
|
1891 |
+
{
|
1892 |
+
"epoch": 0.8098159509202454,
|
1893 |
+
"eval_loss": 2.593792200088501,
|
1894 |
+
"eval_runtime": 55.7303,
|
1895 |
+
"eval_samples_per_second": 1.794,
|
1896 |
+
"eval_steps_per_second": 0.126,
|
1897 |
+
"step": 264
|
1898 |
}
|
1899 |
],
|
1900 |
"logging_steps": 1,
|
|
|
1914 |
"attributes": {}
|
1915 |
}
|
1916 |
},
|
1917 |
+
"total_flos": 288286794842112.0,
|
1918 |
"train_batch_size": 2,
|
1919 |
"trial_name": null,
|
1920 |
"trial_params": null
|