Training in progress, step 678, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73911112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c5e63de907b19c7910db799e3b78a42b2e9f9179854322d92c712e5afde4b89
|
3 |
size 73911112
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 37965684
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a14e009833d53fbe9c82d489fe0e9b4ad66c19f8ea8adae05db1259a8aef663d
|
3 |
size 37965684
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6b4a91354537b8cb9fcab809682ea0aa63e50d33b8c1b98a541a3219c1729c7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e5684e3c4181284222b5bfd802e16a9d019f6d061509f1ae13f7ad0785a15
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 2.048067808151245,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4311,6 +4311,560 @@
|
|
4311 |
"eval_samples_per_second": 34.057,
|
4312 |
"eval_steps_per_second": 8.514,
|
4313 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4314 |
}
|
4315 |
],
|
4316 |
"logging_steps": 1,
|
@@ -4334,12 +4888,12 @@
|
|
4334 |
"should_evaluate": false,
|
4335 |
"should_log": false,
|
4336 |
"should_save": true,
|
4337 |
-
"should_training_stop":
|
4338 |
},
|
4339 |
"attributes": {}
|
4340 |
}
|
4341 |
},
|
4342 |
-
"total_flos": 3.
|
4343 |
"train_batch_size": 4,
|
4344 |
"trial_name": null,
|
4345 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 2.048067808151245,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
4 |
+
"epoch": 1.9977900552486187,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 678,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4311 |
"eval_samples_per_second": 34.057,
|
4312 |
"eval_steps_per_second": 8.514,
|
4313 |
"step": 600
|
4314 |
+
},
|
4315 |
+
{
|
4316 |
+
"epoch": 1.770902394106814,
|
4317 |
+
"grad_norm": 0.3590349853038788,
|
4318 |
+
"learning_rate": 6.485553382910026e-06,
|
4319 |
+
"loss": 1.9617,
|
4320 |
+
"step": 601
|
4321 |
+
},
|
4322 |
+
{
|
4323 |
+
"epoch": 1.7738489871086556,
|
4324 |
+
"grad_norm": 0.35265564918518066,
|
4325 |
+
"learning_rate": 6.319977191187232e-06,
|
4326 |
+
"loss": 2.0291,
|
4327 |
+
"step": 602
|
4328 |
+
},
|
4329 |
+
{
|
4330 |
+
"epoch": 1.7767955801104973,
|
4331 |
+
"grad_norm": 0.34210672974586487,
|
4332 |
+
"learning_rate": 6.156473015002029e-06,
|
4333 |
+
"loss": 1.9461,
|
4334 |
+
"step": 603
|
4335 |
+
},
|
4336 |
+
{
|
4337 |
+
"epoch": 1.7797421731123388,
|
4338 |
+
"grad_norm": 0.36943700909614563,
|
4339 |
+
"learning_rate": 5.995044470741151e-06,
|
4340 |
+
"loss": 1.9437,
|
4341 |
+
"step": 604
|
4342 |
+
},
|
4343 |
+
{
|
4344 |
+
"epoch": 1.7826887661141804,
|
4345 |
+
"grad_norm": 0.3616660237312317,
|
4346 |
+
"learning_rate": 5.835695128882513e-06,
|
4347 |
+
"loss": 1.9987,
|
4348 |
+
"step": 605
|
4349 |
+
},
|
4350 |
+
{
|
4351 |
+
"epoch": 1.7856353591160221,
|
4352 |
+
"grad_norm": 0.35595205426216125,
|
4353 |
+
"learning_rate": 5.678428513916212e-06,
|
4354 |
+
"loss": 2.0232,
|
4355 |
+
"step": 606
|
4356 |
+
},
|
4357 |
+
{
|
4358 |
+
"epoch": 1.7885819521178639,
|
4359 |
+
"grad_norm": 0.3314264416694641,
|
4360 |
+
"learning_rate": 5.5232481042665764e-06,
|
4361 |
+
"loss": 1.7963,
|
4362 |
+
"step": 607
|
4363 |
+
},
|
4364 |
+
{
|
4365 |
+
"epoch": 1.7915285451197054,
|
4366 |
+
"grad_norm": 0.3526301681995392,
|
4367 |
+
"learning_rate": 5.370157332215209e-06,
|
4368 |
+
"loss": 2.0898,
|
4369 |
+
"step": 608
|
4370 |
+
},
|
4371 |
+
{
|
4372 |
+
"epoch": 1.794475138121547,
|
4373 |
+
"grad_norm": 0.3451070785522461,
|
4374 |
+
"learning_rate": 5.219159583825106e-06,
|
4375 |
+
"loss": 1.8876,
|
4376 |
+
"step": 609
|
4377 |
+
},
|
4378 |
+
{
|
4379 |
+
"epoch": 1.7974217311233884,
|
4380 |
+
"grad_norm": 0.34398043155670166,
|
4381 |
+
"learning_rate": 5.07025819886574e-06,
|
4382 |
+
"loss": 1.9447,
|
4383 |
+
"step": 610
|
4384 |
+
},
|
4385 |
+
{
|
4386 |
+
"epoch": 1.8003683241252302,
|
4387 |
+
"grad_norm": 0.33297380805015564,
|
4388 |
+
"learning_rate": 4.923456470739219e-06,
|
4389 |
+
"loss": 1.9111,
|
4390 |
+
"step": 611
|
4391 |
+
},
|
4392 |
+
{
|
4393 |
+
"epoch": 1.803314917127072,
|
4394 |
+
"grad_norm": 0.35569652915000916,
|
4395 |
+
"learning_rate": 4.778757646407362e-06,
|
4396 |
+
"loss": 2.052,
|
4397 |
+
"step": 612
|
4398 |
+
},
|
4399 |
+
{
|
4400 |
+
"epoch": 1.8062615101289135,
|
4401 |
+
"grad_norm": 0.3721804618835449,
|
4402 |
+
"learning_rate": 4.636164926320064e-06,
|
4403 |
+
"loss": 2.1146,
|
4404 |
+
"step": 613
|
4405 |
+
},
|
4406 |
+
{
|
4407 |
+
"epoch": 1.809208103130755,
|
4408 |
+
"grad_norm": 0.3460250198841095,
|
4409 |
+
"learning_rate": 4.495681464344259e-06,
|
4410 |
+
"loss": 1.9138,
|
4411 |
+
"step": 614
|
4412 |
+
},
|
4413 |
+
{
|
4414 |
+
"epoch": 1.8121546961325967,
|
4415 |
+
"grad_norm": 0.38758331537246704,
|
4416 |
+
"learning_rate": 4.357310367694378e-06,
|
4417 |
+
"loss": 1.975,
|
4418 |
+
"step": 615
|
4419 |
+
},
|
4420 |
+
{
|
4421 |
+
"epoch": 1.8151012891344385,
|
4422 |
+
"grad_norm": 0.37770211696624756,
|
4423 |
+
"learning_rate": 4.22105469686348e-06,
|
4424 |
+
"loss": 2.0591,
|
4425 |
+
"step": 616
|
4426 |
+
},
|
4427 |
+
{
|
4428 |
+
"epoch": 1.81804788213628,
|
4429 |
+
"grad_norm": 0.3766396641731262,
|
4430 |
+
"learning_rate": 4.086917465555662e-06,
|
4431 |
+
"loss": 2.0694,
|
4432 |
+
"step": 617
|
4433 |
+
},
|
4434 |
+
{
|
4435 |
+
"epoch": 1.8209944751381215,
|
4436 |
+
"grad_norm": 0.3475191593170166,
|
4437 |
+
"learning_rate": 3.954901640619368e-06,
|
4438 |
+
"loss": 1.9057,
|
4439 |
+
"step": 618
|
4440 |
+
},
|
4441 |
+
{
|
4442 |
+
"epoch": 1.823941068139963,
|
4443 |
+
"grad_norm": 0.3547375798225403,
|
4444 |
+
"learning_rate": 3.825010141981677e-06,
|
4445 |
+
"loss": 2.0484,
|
4446 |
+
"step": 619
|
4447 |
+
},
|
4448 |
+
{
|
4449 |
+
"epoch": 1.8268876611418048,
|
4450 |
+
"grad_norm": 0.35701024532318115,
|
4451 |
+
"learning_rate": 3.6972458425838962e-06,
|
4452 |
+
"loss": 2.084,
|
4453 |
+
"step": 620
|
4454 |
+
},
|
4455 |
+
{
|
4456 |
+
"epoch": 1.8298342541436465,
|
4457 |
+
"grad_norm": 0.3376360535621643,
|
4458 |
+
"learning_rate": 3.571611568317856e-06,
|
4459 |
+
"loss": 1.8885,
|
4460 |
+
"step": 621
|
4461 |
+
},
|
4462 |
+
{
|
4463 |
+
"epoch": 1.832780847145488,
|
4464 |
+
"grad_norm": 0.34869226813316345,
|
4465 |
+
"learning_rate": 3.4481100979635306e-06,
|
4466 |
+
"loss": 2.1957,
|
4467 |
+
"step": 622
|
4468 |
+
},
|
4469 |
+
{
|
4470 |
+
"epoch": 1.8357274401473296,
|
4471 |
+
"grad_norm": 0.3375036418437958,
|
4472 |
+
"learning_rate": 3.32674416312746e-06,
|
4473 |
+
"loss": 1.8859,
|
4474 |
+
"step": 623
|
4475 |
+
},
|
4476 |
+
{
|
4477 |
+
"epoch": 1.838674033149171,
|
4478 |
+
"grad_norm": 0.3811117708683014,
|
4479 |
+
"learning_rate": 3.207516448182435e-06,
|
4480 |
+
"loss": 2.147,
|
4481 |
+
"step": 624
|
4482 |
+
},
|
4483 |
+
{
|
4484 |
+
"epoch": 1.8416206261510129,
|
4485 |
+
"grad_norm": 0.3357301652431488,
|
4486 |
+
"learning_rate": 3.0904295902080326e-06,
|
4487 |
+
"loss": 1.7973,
|
4488 |
+
"step": 625
|
4489 |
+
},
|
4490 |
+
{
|
4491 |
+
"epoch": 1.8445672191528546,
|
4492 |
+
"grad_norm": 0.38550111651420593,
|
4493 |
+
"learning_rate": 2.9754861789324073e-06,
|
4494 |
+
"loss": 2.1196,
|
4495 |
+
"step": 626
|
4496 |
+
},
|
4497 |
+
{
|
4498 |
+
"epoch": 1.8475138121546961,
|
4499 |
+
"grad_norm": 0.3680182099342346,
|
4500 |
+
"learning_rate": 2.8626887566748807e-06,
|
4501 |
+
"loss": 2.0742,
|
4502 |
+
"step": 627
|
4503 |
+
},
|
4504 |
+
{
|
4505 |
+
"epoch": 1.8504604051565376,
|
4506 |
+
"grad_norm": 0.3618917763233185,
|
4507 |
+
"learning_rate": 2.752039818289809e-06,
|
4508 |
+
"loss": 2.0812,
|
4509 |
+
"step": 628
|
4510 |
+
},
|
4511 |
+
{
|
4512 |
+
"epoch": 1.8534069981583794,
|
4513 |
+
"grad_norm": 0.3539152443408966,
|
4514 |
+
"learning_rate": 2.6435418111113276e-06,
|
4515 |
+
"loss": 1.9961,
|
4516 |
+
"step": 629
|
4517 |
+
},
|
4518 |
+
{
|
4519 |
+
"epoch": 1.8563535911602211,
|
4520 |
+
"grad_norm": 0.35489708185195923,
|
4521 |
+
"learning_rate": 2.537197134899294e-06,
|
4522 |
+
"loss": 2.0572,
|
4523 |
+
"step": 630
|
4524 |
+
},
|
4525 |
+
{
|
4526 |
+
"epoch": 1.8593001841620627,
|
4527 |
+
"grad_norm": 0.34133121371269226,
|
4528 |
+
"learning_rate": 2.433008141786153e-06,
|
4529 |
+
"loss": 1.9718,
|
4530 |
+
"step": 631
|
4531 |
+
},
|
4532 |
+
{
|
4533 |
+
"epoch": 1.8622467771639042,
|
4534 |
+
"grad_norm": 0.34101560711860657,
|
4535 |
+
"learning_rate": 2.330977136224932e-06,
|
4536 |
+
"loss": 1.881,
|
4537 |
+
"step": 632
|
4538 |
+
},
|
4539 |
+
{
|
4540 |
+
"epoch": 1.8651933701657457,
|
4541 |
+
"grad_norm": 0.3772835433483124,
|
4542 |
+
"learning_rate": 2.2311063749382742e-06,
|
4543 |
+
"loss": 2.0331,
|
4544 |
+
"step": 633
|
4545 |
+
},
|
4546 |
+
{
|
4547 |
+
"epoch": 1.8681399631675875,
|
4548 |
+
"grad_norm": 0.3728832006454468,
|
4549 |
+
"learning_rate": 2.1333980668685414e-06,
|
4550 |
+
"loss": 1.9764,
|
4551 |
+
"step": 634
|
4552 |
+
},
|
4553 |
+
{
|
4554 |
+
"epoch": 1.8710865561694292,
|
4555 |
+
"grad_norm": 0.36289656162261963,
|
4556 |
+
"learning_rate": 2.037854373128889e-06,
|
4557 |
+
"loss": 2.006,
|
4558 |
+
"step": 635
|
4559 |
+
},
|
4560 |
+
{
|
4561 |
+
"epoch": 1.8740331491712707,
|
4562 |
+
"grad_norm": 0.36972489953041077,
|
4563 |
+
"learning_rate": 1.9444774069555694e-06,
|
4564 |
+
"loss": 2.1053,
|
4565 |
+
"step": 636
|
4566 |
+
},
|
4567 |
+
{
|
4568 |
+
"epoch": 1.8769797421731123,
|
4569 |
+
"grad_norm": 0.34683194756507874,
|
4570 |
+
"learning_rate": 1.8532692336611035e-06,
|
4571 |
+
"loss": 2.0312,
|
4572 |
+
"step": 637
|
4573 |
+
},
|
4574 |
+
{
|
4575 |
+
"epoch": 1.879926335174954,
|
4576 |
+
"grad_norm": 0.3639572858810425,
|
4577 |
+
"learning_rate": 1.7642318705886286e-06,
|
4578 |
+
"loss": 1.9183,
|
4579 |
+
"step": 638
|
4580 |
+
},
|
4581 |
+
{
|
4582 |
+
"epoch": 1.8828729281767957,
|
4583 |
+
"grad_norm": 0.3998357355594635,
|
4584 |
+
"learning_rate": 1.6773672870673218e-06,
|
4585 |
+
"loss": 2.0961,
|
4586 |
+
"step": 639
|
4587 |
+
},
|
4588 |
+
{
|
4589 |
+
"epoch": 1.8858195211786373,
|
4590 |
+
"grad_norm": 0.3396947383880615,
|
4591 |
+
"learning_rate": 1.5926774043687365e-06,
|
4592 |
+
"loss": 1.9543,
|
4593 |
+
"step": 640
|
4594 |
+
},
|
4595 |
+
{
|
4596 |
+
"epoch": 1.8887661141804788,
|
4597 |
+
"grad_norm": 0.3566475510597229,
|
4598 |
+
"learning_rate": 1.510164095664457e-06,
|
4599 |
+
"loss": 2.0163,
|
4600 |
+
"step": 641
|
4601 |
+
},
|
4602 |
+
{
|
4603 |
+
"epoch": 1.8917127071823203,
|
4604 |
+
"grad_norm": 0.33658653497695923,
|
4605 |
+
"learning_rate": 1.4298291859845214e-06,
|
4606 |
+
"loss": 1.9524,
|
4607 |
+
"step": 642
|
4608 |
+
},
|
4609 |
+
{
|
4610 |
+
"epoch": 1.894659300184162,
|
4611 |
+
"grad_norm": 0.35298457741737366,
|
4612 |
+
"learning_rate": 1.351674452177143e-06,
|
4613 |
+
"loss": 2.0429,
|
4614 |
+
"step": 643
|
4615 |
+
},
|
4616 |
+
{
|
4617 |
+
"epoch": 1.8976058931860038,
|
4618 |
+
"grad_norm": 0.353605180978775,
|
4619 |
+
"learning_rate": 1.2757016228693964e-06,
|
4620 |
+
"loss": 2.0201,
|
4621 |
+
"step": 644
|
4622 |
+
},
|
4623 |
+
{
|
4624 |
+
"epoch": 1.9005524861878453,
|
4625 |
+
"grad_norm": 0.3473069667816162,
|
4626 |
+
"learning_rate": 1.2019123784289488e-06,
|
4627 |
+
"loss": 2.054,
|
4628 |
+
"step": 645
|
4629 |
+
},
|
4630 |
+
{
|
4631 |
+
"epoch": 1.9034990791896869,
|
4632 |
+
"grad_norm": 0.373976469039917,
|
4633 |
+
"learning_rate": 1.1303083509269452e-06,
|
4634 |
+
"loss": 1.9479,
|
4635 |
+
"step": 646
|
4636 |
+
},
|
4637 |
+
{
|
4638 |
+
"epoch": 1.9064456721915284,
|
4639 |
+
"grad_norm": 0.3614707887172699,
|
4640 |
+
"learning_rate": 1.0608911241018594e-06,
|
4641 |
+
"loss": 1.9979,
|
4642 |
+
"step": 647
|
4643 |
+
},
|
4644 |
+
{
|
4645 |
+
"epoch": 1.9093922651933701,
|
4646 |
+
"grad_norm": 0.35335227847099304,
|
4647 |
+
"learning_rate": 9.936622333245104e-07,
|
4648 |
+
"loss": 1.9355,
|
4649 |
+
"step": 648
|
4650 |
+
},
|
4651 |
+
{
|
4652 |
+
"epoch": 1.9123388581952119,
|
4653 |
+
"grad_norm": 0.3640158474445343,
|
4654 |
+
"learning_rate": 9.286231655640799e-07,
|
4655 |
+
"loss": 2.0179,
|
4656 |
+
"step": 649
|
4657 |
+
},
|
4658 |
+
{
|
4659 |
+
"epoch": 1.9152854511970534,
|
4660 |
+
"grad_norm": 0.3631095290184021,
|
4661 |
+
"learning_rate": 8.657753593552143e-07,
|
4662 |
+
"loss": 1.9934,
|
4663 |
+
"step": 650
|
4664 |
+
},
|
4665 |
+
{
|
4666 |
+
"epoch": 1.9152854511970534,
|
4667 |
+
"eval_loss": 2.0474448204040527,
|
4668 |
+
"eval_runtime": 33.5976,
|
4669 |
+
"eval_samples_per_second": 34.05,
|
4670 |
+
"eval_steps_per_second": 8.513,
|
4671 |
+
"step": 650
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 1.918232044198895,
|
4675 |
+
"grad_norm": 0.3535584509372711,
|
4676 |
+
"learning_rate": 8.051202047662187e-07,
|
4677 |
+
"loss": 1.9061,
|
4678 |
+
"step": 651
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 1.9211786372007367,
|
4682 |
+
"grad_norm": 0.3503694534301758,
|
4683 |
+
"learning_rate": 7.466590433683251e-07,
|
4684 |
+
"loss": 1.9775,
|
4685 |
+
"step": 652
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 1.9241252302025784,
|
4689 |
+
"grad_norm": 0.3380708396434784,
|
4690 |
+
"learning_rate": 6.903931682059827e-07,
|
4691 |
+
"loss": 1.8295,
|
4692 |
+
"step": 653
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 1.92707182320442,
|
4696 |
+
"grad_norm": 0.3362259268760681,
|
4697 |
+
"learning_rate": 6.363238237683033e-07,
|
4698 |
+
"loss": 1.7838,
|
4699 |
+
"step": 654
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 1.9300184162062615,
|
4703 |
+
"grad_norm": 0.3610450327396393,
|
4704 |
+
"learning_rate": 5.844522059614943e-07,
|
4705 |
+
"loss": 2.1624,
|
4706 |
+
"step": 655
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 1.932965009208103,
|
4710 |
+
"grad_norm": 0.34263625741004944,
|
4711 |
+
"learning_rate": 5.347794620824576e-07,
|
4712 |
+
"loss": 1.9548,
|
4713 |
+
"step": 656
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 1.9359116022099447,
|
4717 |
+
"grad_norm": 0.35191041231155396,
|
4718 |
+
"learning_rate": 4.873066907933543e-07,
|
4719 |
+
"loss": 2.0965,
|
4720 |
+
"step": 657
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 1.9388581952117865,
|
4724 |
+
"grad_norm": 0.361122727394104,
|
4725 |
+
"learning_rate": 4.4203494209733576e-07,
|
4726 |
+
"loss": 1.9781,
|
4727 |
+
"step": 658
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 1.941804788213628,
|
4731 |
+
"grad_norm": 0.35918036103248596,
|
4732 |
+
"learning_rate": 3.9896521731532797e-07,
|
4733 |
+
"loss": 1.8921,
|
4734 |
+
"step": 659
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 1.9447513812154695,
|
4738 |
+
"grad_norm": 0.37808507680892944,
|
4739 |
+
"learning_rate": 3.580984690638611e-07,
|
4740 |
+
"loss": 2.2418,
|
4741 |
+
"step": 660
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 1.9476979742173113,
|
4745 |
+
"grad_norm": 0.34101665019989014,
|
4746 |
+
"learning_rate": 3.194356012340305e-07,
|
4747 |
+
"loss": 1.8401,
|
4748 |
+
"step": 661
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 1.9506445672191528,
|
4752 |
+
"grad_norm": 0.360929936170578,
|
4753 |
+
"learning_rate": 2.8297746897146816e-07,
|
4754 |
+
"loss": 2.0246,
|
4755 |
+
"step": 662
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 1.9535911602209945,
|
4759 |
+
"grad_norm": 0.36307036876678467,
|
4760 |
+
"learning_rate": 2.48724878657447e-07,
|
4761 |
+
"loss": 2.0241,
|
4762 |
+
"step": 663
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 1.956537753222836,
|
4766 |
+
"grad_norm": 0.3703291118144989,
|
4767 |
+
"learning_rate": 2.1667858789105043e-07,
|
4768 |
+
"loss": 2.1115,
|
4769 |
+
"step": 664
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 1.9594843462246776,
|
4773 |
+
"grad_norm": 0.3611985445022583,
|
4774 |
+
"learning_rate": 1.8683930547243045e-07,
|
4775 |
+
"loss": 1.9565,
|
4776 |
+
"step": 665
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 1.9624309392265193,
|
4780 |
+
"grad_norm": 0.351419061422348,
|
4781 |
+
"learning_rate": 1.5920769138706438e-07,
|
4782 |
+
"loss": 1.8914,
|
4783 |
+
"step": 666
|
4784 |
+
},
|
4785 |
+
{
|
4786 |
+
"epoch": 1.965377532228361,
|
4787 |
+
"grad_norm": 0.36377424001693726,
|
4788 |
+
"learning_rate": 1.3378435679122226e-07,
|
4789 |
+
"loss": 2.0512,
|
4790 |
+
"step": 667
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 1.9683241252302026,
|
4794 |
+
"grad_norm": 0.36848321557044983,
|
4795 |
+
"learning_rate": 1.1056986399845537e-07,
|
4796 |
+
"loss": 2.1475,
|
4797 |
+
"step": 668
|
4798 |
+
},
|
4799 |
+
{
|
4800 |
+
"epoch": 1.9712707182320441,
|
4801 |
+
"grad_norm": 0.3410709500312805,
|
4802 |
+
"learning_rate": 8.956472646710623e-08,
|
4803 |
+
"loss": 1.9034,
|
4804 |
+
"step": 669
|
4805 |
+
},
|
4806 |
+
{
|
4807 |
+
"epoch": 1.9742173112338857,
|
4808 |
+
"grad_norm": 0.37526410818099976,
|
4809 |
+
"learning_rate": 7.07694087889621e-08,
|
4810 |
+
"loss": 2.1324,
|
4811 |
+
"step": 670
|
4812 |
+
},
|
4813 |
+
{
|
4814 |
+
"epoch": 1.9771639042357274,
|
4815 |
+
"grad_norm": 0.3631035089492798,
|
4816 |
+
"learning_rate": 5.418432667905204e-08,
|
4817 |
+
"loss": 1.9301,
|
4818 |
+
"step": 671
|
4819 |
+
},
|
4820 |
+
{
|
4821 |
+
"epoch": 1.9801104972375692,
|
4822 |
+
"grad_norm": 0.3520108759403229,
|
4823 |
+
"learning_rate": 3.980984696634327e-08,
|
4824 |
+
"loss": 1.9756,
|
4825 |
+
"step": 672
|
4826 |
+
},
|
4827 |
+
{
|
4828 |
+
"epoch": 1.9830570902394107,
|
4829 |
+
"grad_norm": 0.35726797580718994,
|
4830 |
+
"learning_rate": 2.764628758570309e-08,
|
4831 |
+
"loss": 1.9707,
|
4832 |
+
"step": 673
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 1.9860036832412522,
|
4836 |
+
"grad_norm": 0.35579240322113037,
|
4837 |
+
"learning_rate": 1.7693917570837936e-08,
|
4838 |
+
"loss": 1.9214,
|
4839 |
+
"step": 674
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 1.988950276243094,
|
4843 |
+
"grad_norm": 0.3749707043170929,
|
4844 |
+
"learning_rate": 9.95295704835364e-09,
|
4845 |
+
"loss": 2.0993,
|
4846 |
+
"step": 675
|
4847 |
+
},
|
4848 |
+
{
|
4849 |
+
"epoch": 1.9918968692449357,
|
4850 |
+
"grad_norm": 0.35399675369262695,
|
4851 |
+
"learning_rate": 4.42357723288156e-09,
|
4852 |
+
"loss": 1.9808,
|
4853 |
+
"step": 676
|
4854 |
+
},
|
4855 |
+
{
|
4856 |
+
"epoch": 1.9948434622467772,
|
4857 |
+
"grad_norm": 0.34812989830970764,
|
4858 |
+
"learning_rate": 1.1059004233038296e-09,
|
4859 |
+
"loss": 1.9791,
|
4860 |
+
"step": 677
|
4861 |
+
},
|
4862 |
+
{
|
4863 |
+
"epoch": 1.9977900552486187,
|
4864 |
+
"grad_norm": 0.3504377603530884,
|
4865 |
+
"learning_rate": 0.0,
|
4866 |
+
"loss": 1.9639,
|
4867 |
+
"step": 678
|
4868 |
}
|
4869 |
],
|
4870 |
"logging_steps": 1,
|
|
|
4888 |
"should_evaluate": false,
|
4889 |
"should_log": false,
|
4890 |
"should_save": true,
|
4891 |
+
"should_training_stop": true
|
4892 |
},
|
4893 |
"attributes": {}
|
4894 |
}
|
4895 |
},
|
4896 |
+
"total_flos": 3.544397205114716e+17,
|
4897 |
"train_batch_size": 4,
|
4898 |
"trial_name": null,
|
4899 |
"trial_params": null
|