Training in progress, epoch 12
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Aug02_00-40-05_user-SYS-5049A-TR/events.out.tfevents.1659368419.user-SYS-5049A-TR.4008140.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 236469913
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee64dad6d55d6511beb6ac0b9d53d67463b43f31b3c562840789a31fb7f3ab68
|
3 |
size 236469913
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118242180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30d2075d628d49b7e8ef70e8eea7597d65c9743427dd0d8f469ad8d7e3fe87e1
|
3 |
size 118242180
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da377c1b16e0291f1d5c1d80cc85b79a3d05b723665175d2784787456eb041f1
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82531253b364cd83aa3acd17616fa6b687da8d3637e43c753c627947983b45a3
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bfe7c18952ddc5f5995dc7a21d9ebb622f29d27e920708e7de9b94f5fbe46f3
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8373a1388c2e6c546588d79c969bd60bd5ee55c12004454fea447aefb01b11a
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31d6c65356079d9822a9fd489b5c7372ee32f1df23966bba618ffdc93860f84c
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9273,11 +9273,854 @@
|
|
9273 |
"eval_samples_per_second": 832.052,
|
9274 |
"eval_steps_per_second": 34.669,
|
9275 |
"step": 764203
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9276 |
}
|
9277 |
],
|
9278 |
"max_steps": 972622,
|
9279 |
"num_train_epochs": 14,
|
9280 |
-
"total_flos": 5.
|
9281 |
"trial_name": null,
|
9282 |
"trial_params": null
|
9283 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 12.0,
|
5 |
+
"global_step": 833676,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9273 |
"eval_samples_per_second": 832.052,
|
9274 |
"eval_steps_per_second": 34.669,
|
9275 |
"step": 764203
|
9276 |
+
},
|
9277 |
+
{
|
9278 |
+
"epoch": 11.0,
|
9279 |
+
"learning_rate": 2.1649089464583368e-05,
|
9280 |
+
"loss": 1.837,
|
9281 |
+
"step": 764500
|
9282 |
+
},
|
9283 |
+
{
|
9284 |
+
"epoch": 11.01,
|
9285 |
+
"learning_rate": 2.1597266576314137e-05,
|
9286 |
+
"loss": 1.8367,
|
9287 |
+
"step": 765000
|
9288 |
+
},
|
9289 |
+
{
|
9290 |
+
"epoch": 11.02,
|
9291 |
+
"learning_rate": 2.1545339834561402e-05,
|
9292 |
+
"loss": 1.8372,
|
9293 |
+
"step": 765500
|
9294 |
+
},
|
9295 |
+
{
|
9296 |
+
"epoch": 11.03,
|
9297 |
+
"learning_rate": 2.1493413092808667e-05,
|
9298 |
+
"loss": 1.8376,
|
9299 |
+
"step": 766000
|
9300 |
+
},
|
9301 |
+
{
|
9302 |
+
"epoch": 11.03,
|
9303 |
+
"learning_rate": 2.1441486351055932e-05,
|
9304 |
+
"loss": 1.8358,
|
9305 |
+
"step": 766500
|
9306 |
+
},
|
9307 |
+
{
|
9308 |
+
"epoch": 11.04,
|
9309 |
+
"learning_rate": 2.13896634627867e-05,
|
9310 |
+
"loss": 1.8376,
|
9311 |
+
"step": 767000
|
9312 |
+
},
|
9313 |
+
{
|
9314 |
+
"epoch": 11.05,
|
9315 |
+
"learning_rate": 2.1337736721033966e-05,
|
9316 |
+
"loss": 1.8382,
|
9317 |
+
"step": 767500
|
9318 |
+
},
|
9319 |
+
{
|
9320 |
+
"epoch": 11.05,
|
9321 |
+
"learning_rate": 2.128580997928123e-05,
|
9322 |
+
"loss": 1.8359,
|
9323 |
+
"step": 768000
|
9324 |
+
},
|
9325 |
+
{
|
9326 |
+
"epoch": 11.06,
|
9327 |
+
"learning_rate": 2.1233883237528496e-05,
|
9328 |
+
"loss": 1.8354,
|
9329 |
+
"step": 768500
|
9330 |
+
},
|
9331 |
+
{
|
9332 |
+
"epoch": 11.07,
|
9333 |
+
"learning_rate": 2.1181956495775758e-05,
|
9334 |
+
"loss": 1.8305,
|
9335 |
+
"step": 769000
|
9336 |
+
},
|
9337 |
+
{
|
9338 |
+
"epoch": 11.08,
|
9339 |
+
"learning_rate": 2.1130029754023026e-05,
|
9340 |
+
"loss": 1.8408,
|
9341 |
+
"step": 769500
|
9342 |
+
},
|
9343 |
+
{
|
9344 |
+
"epoch": 11.08,
|
9345 |
+
"learning_rate": 2.107810301227029e-05,
|
9346 |
+
"loss": 1.8375,
|
9347 |
+
"step": 770000
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 11.09,
|
9351 |
+
"learning_rate": 2.1026176270517556e-05,
|
9352 |
+
"loss": 1.8367,
|
9353 |
+
"step": 770500
|
9354 |
+
},
|
9355 |
+
{
|
9356 |
+
"epoch": 11.1,
|
9357 |
+
"learning_rate": 2.0974353382248325e-05,
|
9358 |
+
"loss": 1.8384,
|
9359 |
+
"step": 771000
|
9360 |
+
},
|
9361 |
+
{
|
9362 |
+
"epoch": 11.11,
|
9363 |
+
"learning_rate": 2.092242664049559e-05,
|
9364 |
+
"loss": 1.8399,
|
9365 |
+
"step": 771500
|
9366 |
+
},
|
9367 |
+
{
|
9368 |
+
"epoch": 11.11,
|
9369 |
+
"learning_rate": 2.0870499898742855e-05,
|
9370 |
+
"loss": 1.8376,
|
9371 |
+
"step": 772000
|
9372 |
+
},
|
9373 |
+
{
|
9374 |
+
"epoch": 11.12,
|
9375 |
+
"learning_rate": 2.081857315699012e-05,
|
9376 |
+
"loss": 1.8338,
|
9377 |
+
"step": 772500
|
9378 |
+
},
|
9379 |
+
{
|
9380 |
+
"epoch": 11.13,
|
9381 |
+
"learning_rate": 2.0766646415237385e-05,
|
9382 |
+
"loss": 1.839,
|
9383 |
+
"step": 773000
|
9384 |
+
},
|
9385 |
+
{
|
9386 |
+
"epoch": 11.13,
|
9387 |
+
"learning_rate": 2.0714823526968155e-05,
|
9388 |
+
"loss": 1.8359,
|
9389 |
+
"step": 773500
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 11.14,
|
9393 |
+
"learning_rate": 2.066289678521542e-05,
|
9394 |
+
"loss": 1.8366,
|
9395 |
+
"step": 774000
|
9396 |
+
},
|
9397 |
+
{
|
9398 |
+
"epoch": 11.15,
|
9399 |
+
"learning_rate": 2.0610970043462685e-05,
|
9400 |
+
"loss": 1.8367,
|
9401 |
+
"step": 774500
|
9402 |
+
},
|
9403 |
+
{
|
9404 |
+
"epoch": 11.16,
|
9405 |
+
"learning_rate": 2.055904330170995e-05,
|
9406 |
+
"loss": 1.8327,
|
9407 |
+
"step": 775000
|
9408 |
+
},
|
9409 |
+
{
|
9410 |
+
"epoch": 11.16,
|
9411 |
+
"learning_rate": 2.050711655995721e-05,
|
9412 |
+
"loss": 1.8384,
|
9413 |
+
"step": 775500
|
9414 |
+
},
|
9415 |
+
{
|
9416 |
+
"epoch": 11.17,
|
9417 |
+
"learning_rate": 2.0455293671687984e-05,
|
9418 |
+
"loss": 1.839,
|
9419 |
+
"step": 776000
|
9420 |
+
},
|
9421 |
+
{
|
9422 |
+
"epoch": 11.18,
|
9423 |
+
"learning_rate": 2.0403366929935245e-05,
|
9424 |
+
"loss": 1.8345,
|
9425 |
+
"step": 776500
|
9426 |
+
},
|
9427 |
+
{
|
9428 |
+
"epoch": 11.18,
|
9429 |
+
"learning_rate": 2.0351440188182514e-05,
|
9430 |
+
"loss": 1.8379,
|
9431 |
+
"step": 777000
|
9432 |
+
},
|
9433 |
+
{
|
9434 |
+
"epoch": 11.19,
|
9435 |
+
"learning_rate": 2.029951344642978e-05,
|
9436 |
+
"loss": 1.834,
|
9437 |
+
"step": 777500
|
9438 |
+
},
|
9439 |
+
{
|
9440 |
+
"epoch": 11.2,
|
9441 |
+
"learning_rate": 2.0247586704677044e-05,
|
9442 |
+
"loss": 1.8389,
|
9443 |
+
"step": 778000
|
9444 |
+
},
|
9445 |
+
{
|
9446 |
+
"epoch": 11.21,
|
9447 |
+
"learning_rate": 2.0195763816407813e-05,
|
9448 |
+
"loss": 1.8309,
|
9449 |
+
"step": 778500
|
9450 |
+
},
|
9451 |
+
{
|
9452 |
+
"epoch": 11.21,
|
9453 |
+
"learning_rate": 2.0143837074655078e-05,
|
9454 |
+
"loss": 1.8367,
|
9455 |
+
"step": 779000
|
9456 |
+
},
|
9457 |
+
{
|
9458 |
+
"epoch": 11.22,
|
9459 |
+
"learning_rate": 2.0091910332902343e-05,
|
9460 |
+
"loss": 1.8367,
|
9461 |
+
"step": 779500
|
9462 |
+
},
|
9463 |
+
{
|
9464 |
+
"epoch": 11.23,
|
9465 |
+
"learning_rate": 2.0039983591149608e-05,
|
9466 |
+
"loss": 1.834,
|
9467 |
+
"step": 780000
|
9468 |
+
},
|
9469 |
+
{
|
9470 |
+
"epoch": 11.23,
|
9471 |
+
"learning_rate": 1.9988056849396873e-05,
|
9472 |
+
"loss": 1.837,
|
9473 |
+
"step": 780500
|
9474 |
+
},
|
9475 |
+
{
|
9476 |
+
"epoch": 11.24,
|
9477 |
+
"learning_rate": 1.9936130107644134e-05,
|
9478 |
+
"loss": 1.8344,
|
9479 |
+
"step": 781000
|
9480 |
+
},
|
9481 |
+
{
|
9482 |
+
"epoch": 11.25,
|
9483 |
+
"learning_rate": 1.9884203365891403e-05,
|
9484 |
+
"loss": 1.8388,
|
9485 |
+
"step": 781500
|
9486 |
+
},
|
9487 |
+
{
|
9488 |
+
"epoch": 11.26,
|
9489 |
+
"learning_rate": 1.9832380477622172e-05,
|
9490 |
+
"loss": 1.8317,
|
9491 |
+
"step": 782000
|
9492 |
+
},
|
9493 |
+
{
|
9494 |
+
"epoch": 11.26,
|
9495 |
+
"learning_rate": 1.9780453735869437e-05,
|
9496 |
+
"loss": 1.8334,
|
9497 |
+
"step": 782500
|
9498 |
+
},
|
9499 |
+
{
|
9500 |
+
"epoch": 11.27,
|
9501 |
+
"learning_rate": 1.97285269941167e-05,
|
9502 |
+
"loss": 1.8407,
|
9503 |
+
"step": 783000
|
9504 |
+
},
|
9505 |
+
{
|
9506 |
+
"epoch": 11.28,
|
9507 |
+
"learning_rate": 1.9676600252363967e-05,
|
9508 |
+
"loss": 1.8296,
|
9509 |
+
"step": 783500
|
9510 |
+
},
|
9511 |
+
{
|
9512 |
+
"epoch": 11.28,
|
9513 |
+
"learning_rate": 1.962467351061123e-05,
|
9514 |
+
"loss": 1.8319,
|
9515 |
+
"step": 784000
|
9516 |
+
},
|
9517 |
+
{
|
9518 |
+
"epoch": 11.29,
|
9519 |
+
"learning_rate": 1.9572850622342e-05,
|
9520 |
+
"loss": 1.8387,
|
9521 |
+
"step": 784500
|
9522 |
+
},
|
9523 |
+
{
|
9524 |
+
"epoch": 11.3,
|
9525 |
+
"learning_rate": 1.9520923880589266e-05,
|
9526 |
+
"loss": 1.8324,
|
9527 |
+
"step": 785000
|
9528 |
+
},
|
9529 |
+
{
|
9530 |
+
"epoch": 11.31,
|
9531 |
+
"learning_rate": 1.946899713883653e-05,
|
9532 |
+
"loss": 1.83,
|
9533 |
+
"step": 785500
|
9534 |
+
},
|
9535 |
+
{
|
9536 |
+
"epoch": 11.31,
|
9537 |
+
"learning_rate": 1.9417070397083796e-05,
|
9538 |
+
"loss": 1.8346,
|
9539 |
+
"step": 786000
|
9540 |
+
},
|
9541 |
+
{
|
9542 |
+
"epoch": 11.32,
|
9543 |
+
"learning_rate": 1.936514365533106e-05,
|
9544 |
+
"loss": 1.832,
|
9545 |
+
"step": 786500
|
9546 |
+
},
|
9547 |
+
{
|
9548 |
+
"epoch": 11.33,
|
9549 |
+
"learning_rate": 1.9313216913578326e-05,
|
9550 |
+
"loss": 1.8318,
|
9551 |
+
"step": 787000
|
9552 |
+
},
|
9553 |
+
{
|
9554 |
+
"epoch": 11.34,
|
9555 |
+
"learning_rate": 1.9261290171825588e-05,
|
9556 |
+
"loss": 1.8377,
|
9557 |
+
"step": 787500
|
9558 |
+
},
|
9559 |
+
{
|
9560 |
+
"epoch": 11.34,
|
9561 |
+
"learning_rate": 1.920946728355636e-05,
|
9562 |
+
"loss": 1.8335,
|
9563 |
+
"step": 788000
|
9564 |
+
},
|
9565 |
+
{
|
9566 |
+
"epoch": 11.35,
|
9567 |
+
"learning_rate": 1.9157540541803622e-05,
|
9568 |
+
"loss": 1.8374,
|
9569 |
+
"step": 788500
|
9570 |
+
},
|
9571 |
+
{
|
9572 |
+
"epoch": 11.36,
|
9573 |
+
"learning_rate": 1.910561380005089e-05,
|
9574 |
+
"loss": 1.8371,
|
9575 |
+
"step": 789000
|
9576 |
+
},
|
9577 |
+
{
|
9578 |
+
"epoch": 11.36,
|
9579 |
+
"learning_rate": 1.9053687058298152e-05,
|
9580 |
+
"loss": 1.8323,
|
9581 |
+
"step": 789500
|
9582 |
+
},
|
9583 |
+
{
|
9584 |
+
"epoch": 11.37,
|
9585 |
+
"learning_rate": 1.900176031654542e-05,
|
9586 |
+
"loss": 1.835,
|
9587 |
+
"step": 790000
|
9588 |
+
},
|
9589 |
+
{
|
9590 |
+
"epoch": 11.38,
|
9591 |
+
"learning_rate": 1.8949937428276186e-05,
|
9592 |
+
"loss": 1.8342,
|
9593 |
+
"step": 790500
|
9594 |
+
},
|
9595 |
+
{
|
9596 |
+
"epoch": 11.39,
|
9597 |
+
"learning_rate": 1.8898010686523455e-05,
|
9598 |
+
"loss": 1.8323,
|
9599 |
+
"step": 791000
|
9600 |
+
},
|
9601 |
+
{
|
9602 |
+
"epoch": 11.39,
|
9603 |
+
"learning_rate": 1.8846083944770716e-05,
|
9604 |
+
"loss": 1.8341,
|
9605 |
+
"step": 791500
|
9606 |
+
},
|
9607 |
+
{
|
9608 |
+
"epoch": 11.4,
|
9609 |
+
"learning_rate": 1.8794157203017984e-05,
|
9610 |
+
"loss": 1.8349,
|
9611 |
+
"step": 792000
|
9612 |
+
},
|
9613 |
+
{
|
9614 |
+
"epoch": 11.41,
|
9615 |
+
"learning_rate": 1.874223046126525e-05,
|
9616 |
+
"loss": 1.8314,
|
9617 |
+
"step": 792500
|
9618 |
+
},
|
9619 |
+
{
|
9620 |
+
"epoch": 11.41,
|
9621 |
+
"learning_rate": 1.869040757299602e-05,
|
9622 |
+
"loss": 1.8342,
|
9623 |
+
"step": 793000
|
9624 |
+
},
|
9625 |
+
{
|
9626 |
+
"epoch": 11.42,
|
9627 |
+
"learning_rate": 1.8638480831243284e-05,
|
9628 |
+
"loss": 1.8299,
|
9629 |
+
"step": 793500
|
9630 |
+
},
|
9631 |
+
{
|
9632 |
+
"epoch": 11.43,
|
9633 |
+
"learning_rate": 1.858655408949055e-05,
|
9634 |
+
"loss": 1.8336,
|
9635 |
+
"step": 794000
|
9636 |
+
},
|
9637 |
+
{
|
9638 |
+
"epoch": 11.44,
|
9639 |
+
"learning_rate": 1.8534627347737814e-05,
|
9640 |
+
"loss": 1.8342,
|
9641 |
+
"step": 794500
|
9642 |
+
},
|
9643 |
+
{
|
9644 |
+
"epoch": 11.44,
|
9645 |
+
"learning_rate": 1.8482700605985075e-05,
|
9646 |
+
"loss": 1.8341,
|
9647 |
+
"step": 795000
|
9648 |
+
},
|
9649 |
+
{
|
9650 |
+
"epoch": 11.45,
|
9651 |
+
"learning_rate": 1.8430877717715848e-05,
|
9652 |
+
"loss": 1.8299,
|
9653 |
+
"step": 795500
|
9654 |
+
},
|
9655 |
+
{
|
9656 |
+
"epoch": 11.46,
|
9657 |
+
"learning_rate": 1.8378950975963113e-05,
|
9658 |
+
"loss": 1.8331,
|
9659 |
+
"step": 796000
|
9660 |
+
},
|
9661 |
+
{
|
9662 |
+
"epoch": 11.46,
|
9663 |
+
"learning_rate": 1.8327024234210378e-05,
|
9664 |
+
"loss": 1.8351,
|
9665 |
+
"step": 796500
|
9666 |
+
},
|
9667 |
+
{
|
9668 |
+
"epoch": 11.47,
|
9669 |
+
"learning_rate": 1.827509749245764e-05,
|
9670 |
+
"loss": 1.8326,
|
9671 |
+
"step": 797000
|
9672 |
+
},
|
9673 |
+
{
|
9674 |
+
"epoch": 11.48,
|
9675 |
+
"learning_rate": 1.8223170750704908e-05,
|
9676 |
+
"loss": 1.8316,
|
9677 |
+
"step": 797500
|
9678 |
+
},
|
9679 |
+
{
|
9680 |
+
"epoch": 11.49,
|
9681 |
+
"learning_rate": 1.8171347862435674e-05,
|
9682 |
+
"loss": 1.8373,
|
9683 |
+
"step": 798000
|
9684 |
+
},
|
9685 |
+
{
|
9686 |
+
"epoch": 11.49,
|
9687 |
+
"learning_rate": 1.8119421120682942e-05,
|
9688 |
+
"loss": 1.8335,
|
9689 |
+
"step": 798500
|
9690 |
+
},
|
9691 |
+
{
|
9692 |
+
"epoch": 11.5,
|
9693 |
+
"learning_rate": 1.8067494378930204e-05,
|
9694 |
+
"loss": 1.8332,
|
9695 |
+
"step": 799000
|
9696 |
+
},
|
9697 |
+
{
|
9698 |
+
"epoch": 11.51,
|
9699 |
+
"learning_rate": 1.8015567637177472e-05,
|
9700 |
+
"loss": 1.8335,
|
9701 |
+
"step": 799500
|
9702 |
+
},
|
9703 |
+
{
|
9704 |
+
"epoch": 11.52,
|
9705 |
+
"learning_rate": 1.7963640895424734e-05,
|
9706 |
+
"loss": 1.834,
|
9707 |
+
"step": 800000
|
9708 |
+
},
|
9709 |
+
{
|
9710 |
+
"epoch": 11.52,
|
9711 |
+
"learning_rate": 1.7911818007155507e-05,
|
9712 |
+
"loss": 1.8336,
|
9713 |
+
"step": 800500
|
9714 |
+
},
|
9715 |
+
{
|
9716 |
+
"epoch": 11.53,
|
9717 |
+
"learning_rate": 1.785989126540277e-05,
|
9718 |
+
"loss": 1.8333,
|
9719 |
+
"step": 801000
|
9720 |
+
},
|
9721 |
+
{
|
9722 |
+
"epoch": 11.54,
|
9723 |
+
"learning_rate": 1.7807964523650037e-05,
|
9724 |
+
"loss": 1.8331,
|
9725 |
+
"step": 801500
|
9726 |
+
},
|
9727 |
+
{
|
9728 |
+
"epoch": 11.54,
|
9729 |
+
"learning_rate": 1.77560377818973e-05,
|
9730 |
+
"loss": 1.8332,
|
9731 |
+
"step": 802000
|
9732 |
+
},
|
9733 |
+
{
|
9734 |
+
"epoch": 11.55,
|
9735 |
+
"learning_rate": 1.7704111040144563e-05,
|
9736 |
+
"loss": 1.831,
|
9737 |
+
"step": 802500
|
9738 |
+
},
|
9739 |
+
{
|
9740 |
+
"epoch": 11.56,
|
9741 |
+
"learning_rate": 1.765218429839183e-05,
|
9742 |
+
"loss": 1.8317,
|
9743 |
+
"step": 803000
|
9744 |
+
},
|
9745 |
+
{
|
9746 |
+
"epoch": 11.57,
|
9747 |
+
"learning_rate": 1.7600257556639093e-05,
|
9748 |
+
"loss": 1.8344,
|
9749 |
+
"step": 803500
|
9750 |
+
},
|
9751 |
+
{
|
9752 |
+
"epoch": 11.57,
|
9753 |
+
"learning_rate": 1.7548434668369866e-05,
|
9754 |
+
"loss": 1.8334,
|
9755 |
+
"step": 804000
|
9756 |
+
},
|
9757 |
+
{
|
9758 |
+
"epoch": 11.58,
|
9759 |
+
"learning_rate": 1.7496507926617127e-05,
|
9760 |
+
"loss": 1.8333,
|
9761 |
+
"step": 804500
|
9762 |
+
},
|
9763 |
+
{
|
9764 |
+
"epoch": 11.59,
|
9765 |
+
"learning_rate": 1.7444581184864396e-05,
|
9766 |
+
"loss": 1.8317,
|
9767 |
+
"step": 805000
|
9768 |
+
},
|
9769 |
+
{
|
9770 |
+
"epoch": 11.59,
|
9771 |
+
"learning_rate": 1.7392654443111657e-05,
|
9772 |
+
"loss": 1.8368,
|
9773 |
+
"step": 805500
|
9774 |
+
},
|
9775 |
+
{
|
9776 |
+
"epoch": 11.6,
|
9777 |
+
"learning_rate": 1.734083155484243e-05,
|
9778 |
+
"loss": 1.8312,
|
9779 |
+
"step": 806000
|
9780 |
+
},
|
9781 |
+
{
|
9782 |
+
"epoch": 11.61,
|
9783 |
+
"learning_rate": 1.728890481308969e-05,
|
9784 |
+
"loss": 1.8325,
|
9785 |
+
"step": 806500
|
9786 |
+
},
|
9787 |
+
{
|
9788 |
+
"epoch": 11.62,
|
9789 |
+
"learning_rate": 1.723697807133696e-05,
|
9790 |
+
"loss": 1.8306,
|
9791 |
+
"step": 807000
|
9792 |
+
},
|
9793 |
+
{
|
9794 |
+
"epoch": 11.62,
|
9795 |
+
"learning_rate": 1.718505132958422e-05,
|
9796 |
+
"loss": 1.8335,
|
9797 |
+
"step": 807500
|
9798 |
+
},
|
9799 |
+
{
|
9800 |
+
"epoch": 11.63,
|
9801 |
+
"learning_rate": 1.7133228441314994e-05,
|
9802 |
+
"loss": 1.833,
|
9803 |
+
"step": 808000
|
9804 |
+
},
|
9805 |
+
{
|
9806 |
+
"epoch": 11.64,
|
9807 |
+
"learning_rate": 1.7081301699562256e-05,
|
9808 |
+
"loss": 1.8344,
|
9809 |
+
"step": 808500
|
9810 |
+
},
|
9811 |
+
{
|
9812 |
+
"epoch": 11.64,
|
9813 |
+
"learning_rate": 1.7029374957809524e-05,
|
9814 |
+
"loss": 1.8356,
|
9815 |
+
"step": 809000
|
9816 |
+
},
|
9817 |
+
{
|
9818 |
+
"epoch": 11.65,
|
9819 |
+
"learning_rate": 1.697744821605679e-05,
|
9820 |
+
"loss": 1.8329,
|
9821 |
+
"step": 809500
|
9822 |
+
},
|
9823 |
+
{
|
9824 |
+
"epoch": 11.66,
|
9825 |
+
"learning_rate": 1.692552147430405e-05,
|
9826 |
+
"loss": 1.8343,
|
9827 |
+
"step": 810000
|
9828 |
+
},
|
9829 |
+
{
|
9830 |
+
"epoch": 11.67,
|
9831 |
+
"learning_rate": 1.6873698586034824e-05,
|
9832 |
+
"loss": 1.8342,
|
9833 |
+
"step": 810500
|
9834 |
+
},
|
9835 |
+
{
|
9836 |
+
"epoch": 11.67,
|
9837 |
+
"learning_rate": 1.682177184428209e-05,
|
9838 |
+
"loss": 1.8328,
|
9839 |
+
"step": 811000
|
9840 |
+
},
|
9841 |
+
{
|
9842 |
+
"epoch": 11.68,
|
9843 |
+
"learning_rate": 1.6769845102529354e-05,
|
9844 |
+
"loss": 1.8328,
|
9845 |
+
"step": 811500
|
9846 |
+
},
|
9847 |
+
{
|
9848 |
+
"epoch": 11.69,
|
9849 |
+
"learning_rate": 1.6717918360776615e-05,
|
9850 |
+
"loss": 1.832,
|
9851 |
+
"step": 812000
|
9852 |
+
},
|
9853 |
+
{
|
9854 |
+
"epoch": 11.7,
|
9855 |
+
"learning_rate": 1.6665991619023883e-05,
|
9856 |
+
"loss": 1.8305,
|
9857 |
+
"step": 812500
|
9858 |
+
},
|
9859 |
+
{
|
9860 |
+
"epoch": 11.7,
|
9861 |
+
"learning_rate": 1.6614064877271145e-05,
|
9862 |
+
"loss": 1.8287,
|
9863 |
+
"step": 813000
|
9864 |
+
},
|
9865 |
+
{
|
9866 |
+
"epoch": 11.71,
|
9867 |
+
"learning_rate": 1.6562241989001918e-05,
|
9868 |
+
"loss": 1.8324,
|
9869 |
+
"step": 813500
|
9870 |
+
},
|
9871 |
+
{
|
9872 |
+
"epoch": 11.72,
|
9873 |
+
"learning_rate": 1.651031524724918e-05,
|
9874 |
+
"loss": 1.8325,
|
9875 |
+
"step": 814000
|
9876 |
+
},
|
9877 |
+
{
|
9878 |
+
"epoch": 11.72,
|
9879 |
+
"learning_rate": 1.6458388505496448e-05,
|
9880 |
+
"loss": 1.8304,
|
9881 |
+
"step": 814500
|
9882 |
+
},
|
9883 |
+
{
|
9884 |
+
"epoch": 11.73,
|
9885 |
+
"learning_rate": 1.640646176374371e-05,
|
9886 |
+
"loss": 1.8321,
|
9887 |
+
"step": 815000
|
9888 |
+
},
|
9889 |
+
{
|
9890 |
+
"epoch": 11.74,
|
9891 |
+
"learning_rate": 1.6354638875474482e-05,
|
9892 |
+
"loss": 1.8324,
|
9893 |
+
"step": 815500
|
9894 |
+
},
|
9895 |
+
{
|
9896 |
+
"epoch": 11.75,
|
9897 |
+
"learning_rate": 1.6302712133721744e-05,
|
9898 |
+
"loss": 1.8286,
|
9899 |
+
"step": 816000
|
9900 |
+
},
|
9901 |
+
{
|
9902 |
+
"epoch": 11.75,
|
9903 |
+
"learning_rate": 1.6250785391969012e-05,
|
9904 |
+
"loss": 1.8319,
|
9905 |
+
"step": 816500
|
9906 |
+
},
|
9907 |
+
{
|
9908 |
+
"epoch": 11.76,
|
9909 |
+
"learning_rate": 1.6198858650216277e-05,
|
9910 |
+
"loss": 1.8282,
|
9911 |
+
"step": 817000
|
9912 |
+
},
|
9913 |
+
{
|
9914 |
+
"epoch": 11.77,
|
9915 |
+
"learning_rate": 1.6147035761947047e-05,
|
9916 |
+
"loss": 1.8265,
|
9917 |
+
"step": 817500
|
9918 |
+
},
|
9919 |
+
{
|
9920 |
+
"epoch": 11.77,
|
9921 |
+
"learning_rate": 1.609510902019431e-05,
|
9922 |
+
"loss": 1.8288,
|
9923 |
+
"step": 818000
|
9924 |
+
},
|
9925 |
+
{
|
9926 |
+
"epoch": 11.78,
|
9927 |
+
"learning_rate": 1.6043182278441576e-05,
|
9928 |
+
"loss": 1.8334,
|
9929 |
+
"step": 818500
|
9930 |
+
},
|
9931 |
+
{
|
9932 |
+
"epoch": 11.79,
|
9933 |
+
"learning_rate": 1.599125553668884e-05,
|
9934 |
+
"loss": 1.8322,
|
9935 |
+
"step": 819000
|
9936 |
+
},
|
9937 |
+
{
|
9938 |
+
"epoch": 11.8,
|
9939 |
+
"learning_rate": 1.5939328794936103e-05,
|
9940 |
+
"loss": 1.8318,
|
9941 |
+
"step": 819500
|
9942 |
+
},
|
9943 |
+
{
|
9944 |
+
"epoch": 11.8,
|
9945 |
+
"learning_rate": 1.5887505906666876e-05,
|
9946 |
+
"loss": 1.8281,
|
9947 |
+
"step": 820000
|
9948 |
+
},
|
9949 |
+
{
|
9950 |
+
"epoch": 11.81,
|
9951 |
+
"learning_rate": 1.583557916491414e-05,
|
9952 |
+
"loss": 1.8279,
|
9953 |
+
"step": 820500
|
9954 |
+
},
|
9955 |
+
{
|
9956 |
+
"epoch": 11.82,
|
9957 |
+
"learning_rate": 1.5783652423161406e-05,
|
9958 |
+
"loss": 1.8319,
|
9959 |
+
"step": 821000
|
9960 |
+
},
|
9961 |
+
{
|
9962 |
+
"epoch": 11.82,
|
9963 |
+
"learning_rate": 1.5731725681408667e-05,
|
9964 |
+
"loss": 1.8281,
|
9965 |
+
"step": 821500
|
9966 |
+
},
|
9967 |
+
{
|
9968 |
+
"epoch": 11.83,
|
9969 |
+
"learning_rate": 1.5679798939655935e-05,
|
9970 |
+
"loss": 1.83,
|
9971 |
+
"step": 822000
|
9972 |
+
},
|
9973 |
+
{
|
9974 |
+
"epoch": 11.84,
|
9975 |
+
"learning_rate": 1.5627976051386705e-05,
|
9976 |
+
"loss": 1.831,
|
9977 |
+
"step": 822500
|
9978 |
+
},
|
9979 |
+
{
|
9980 |
+
"epoch": 11.85,
|
9981 |
+
"learning_rate": 1.557604930963397e-05,
|
9982 |
+
"loss": 1.8308,
|
9983 |
+
"step": 823000
|
9984 |
+
},
|
9985 |
+
{
|
9986 |
+
"epoch": 11.85,
|
9987 |
+
"learning_rate": 1.552412256788123e-05,
|
9988 |
+
"loss": 1.8302,
|
9989 |
+
"step": 823500
|
9990 |
+
},
|
9991 |
+
{
|
9992 |
+
"epoch": 11.86,
|
9993 |
+
"learning_rate": 1.54721958261285e-05,
|
9994 |
+
"loss": 1.8306,
|
9995 |
+
"step": 824000
|
9996 |
+
},
|
9997 |
+
{
|
9998 |
+
"epoch": 11.87,
|
9999 |
+
"learning_rate": 1.5420372937859266e-05,
|
10000 |
+
"loss": 1.828,
|
10001 |
+
"step": 824500
|
10002 |
+
},
|
10003 |
+
{
|
10004 |
+
"epoch": 11.88,
|
10005 |
+
"learning_rate": 1.5368446196106534e-05,
|
10006 |
+
"loss": 1.8306,
|
10007 |
+
"step": 825000
|
10008 |
+
},
|
10009 |
+
{
|
10010 |
+
"epoch": 11.88,
|
10011 |
+
"learning_rate": 1.53165194543538e-05,
|
10012 |
+
"loss": 1.8288,
|
10013 |
+
"step": 825500
|
10014 |
+
},
|
10015 |
+
{
|
10016 |
+
"epoch": 11.89,
|
10017 |
+
"learning_rate": 1.5264592712601064e-05,
|
10018 |
+
"loss": 1.8304,
|
10019 |
+
"step": 826000
|
10020 |
+
},
|
10021 |
+
{
|
10022 |
+
"epoch": 11.9,
|
10023 |
+
"learning_rate": 1.5212769824331832e-05,
|
10024 |
+
"loss": 1.8285,
|
10025 |
+
"step": 826500
|
10026 |
+
},
|
10027 |
+
{
|
10028 |
+
"epoch": 11.9,
|
10029 |
+
"learning_rate": 1.5160843082579099e-05,
|
10030 |
+
"loss": 1.8287,
|
10031 |
+
"step": 827000
|
10032 |
+
},
|
10033 |
+
{
|
10034 |
+
"epoch": 11.91,
|
10035 |
+
"learning_rate": 1.5108916340826362e-05,
|
10036 |
+
"loss": 1.8291,
|
10037 |
+
"step": 827500
|
10038 |
+
},
|
10039 |
+
{
|
10040 |
+
"epoch": 11.92,
|
10041 |
+
"learning_rate": 1.5056989599073628e-05,
|
10042 |
+
"loss": 1.8314,
|
10043 |
+
"step": 828000
|
10044 |
+
},
|
10045 |
+
{
|
10046 |
+
"epoch": 11.93,
|
10047 |
+
"learning_rate": 1.5005062857320892e-05,
|
10048 |
+
"loss": 1.8258,
|
10049 |
+
"step": 828500
|
10050 |
+
},
|
10051 |
+
{
|
10052 |
+
"epoch": 11.93,
|
10053 |
+
"learning_rate": 1.4953239969051663e-05,
|
10054 |
+
"loss": 1.8287,
|
10055 |
+
"step": 829000
|
10056 |
+
},
|
10057 |
+
{
|
10058 |
+
"epoch": 11.94,
|
10059 |
+
"learning_rate": 1.4901313227298926e-05,
|
10060 |
+
"loss": 1.8294,
|
10061 |
+
"step": 829500
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 11.95,
|
10065 |
+
"learning_rate": 1.4849386485546193e-05,
|
10066 |
+
"loss": 1.8328,
|
10067 |
+
"step": 830000
|
10068 |
+
},
|
10069 |
+
{
|
10070 |
+
"epoch": 11.95,
|
10071 |
+
"learning_rate": 1.4797459743793458e-05,
|
10072 |
+
"loss": 1.8287,
|
10073 |
+
"step": 830500
|
10074 |
+
},
|
10075 |
+
{
|
10076 |
+
"epoch": 11.96,
|
10077 |
+
"learning_rate": 1.4745636855524227e-05,
|
10078 |
+
"loss": 1.8279,
|
10079 |
+
"step": 831000
|
10080 |
+
},
|
10081 |
+
{
|
10082 |
+
"epoch": 11.97,
|
10083 |
+
"learning_rate": 1.4693710113771492e-05,
|
10084 |
+
"loss": 1.8286,
|
10085 |
+
"step": 831500
|
10086 |
+
},
|
10087 |
+
{
|
10088 |
+
"epoch": 11.98,
|
10089 |
+
"learning_rate": 1.4641783372018755e-05,
|
10090 |
+
"loss": 1.8254,
|
10091 |
+
"step": 832000
|
10092 |
+
},
|
10093 |
+
{
|
10094 |
+
"epoch": 11.98,
|
10095 |
+
"learning_rate": 1.4589856630266022e-05,
|
10096 |
+
"loss": 1.8274,
|
10097 |
+
"step": 832500
|
10098 |
+
},
|
10099 |
+
{
|
10100 |
+
"epoch": 11.99,
|
10101 |
+
"learning_rate": 1.4538033741996793e-05,
|
10102 |
+
"loss": 1.8279,
|
10103 |
+
"step": 833000
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 12.0,
|
10107 |
+
"learning_rate": 1.4486107000244057e-05,
|
10108 |
+
"loss": 1.8245,
|
10109 |
+
"step": 833500
|
10110 |
+
},
|
10111 |
+
{
|
10112 |
+
"epoch": 12.0,
|
10113 |
+
"eval_accuracy": 0.6503763935317559,
|
10114 |
+
"eval_loss": 1.6876965761184692,
|
10115 |
+
"eval_runtime": 647.389,
|
10116 |
+
"eval_samples_per_second": 832.478,
|
10117 |
+
"eval_steps_per_second": 34.687,
|
10118 |
+
"step": 833676
|
10119 |
}
|
10120 |
],
|
10121 |
"max_steps": 972622,
|
10122 |
"num_train_epochs": 14,
|
10123 |
+
"total_flos": 5.832190387963298e+18,
|
10124 |
"trial_name": null,
|
10125 |
"trial_params": null
|
10126 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118242180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30d2075d628d49b7e8ef70e8eea7597d65c9743427dd0d8f469ad8d7e3fe87e1
|
3 |
size 118242180
|
runs/Aug02_00-40-05_user-SYS-5049A-TR/events.out.tfevents.1659368419.user-SYS-5049A-TR.4008140.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af05549d74148c78a30fded743641173f0fcc77b72e6135712a94988aef723ba
|
3 |
+
size 273998
|