Romain-XV commited on
Commit
0fbfda0
·
verified ·
1 Parent(s): 7446102

Training in progress, step 678, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:798495212ffbe2d661f45c58b4dde2b470a5d9405f2ca836ceffeb3519f87b9f
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5e63de907b19c7910db799e3b78a42b2e9f9179854322d92c712e5afde4b89
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e527d5c564e6baad9c3626c20566561c3f3796e8bd110aee53f3262f7443ad6b
3
  size 37965684
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a14e009833d53fbe9c82d489fe0e9b4ad66c19f8ea8adae05db1259a8aef663d
3
  size 37965684
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d50938b0c074c46e5ebfab86bc5496d4da624a1aed174c37e59cd548461c1665
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b4a91354537b8cb9fcab809682ea0aa63e50d33b8c1b98a541a3219c1729c7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afbdecfe8917cda87c4dc409742f9fdbc81109fe8f3de28716a1b9d03463baff
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f12e5684e3c4181284222b5bfd802e16a9d019f6d061509f1ae13f7ad0785a15
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.048067808151245,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
- "epoch": 1.7679558011049723,
5
  "eval_steps": 50,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4311,6 +4311,560 @@
4311
  "eval_samples_per_second": 34.057,
4312
  "eval_steps_per_second": 8.514,
4313
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4314
  }
4315
  ],
4316
  "logging_steps": 1,
@@ -4334,12 +4888,12 @@
4334
  "should_evaluate": false,
4335
  "should_log": false,
4336
  "should_save": true,
4337
- "should_training_stop": false
4338
  },
4339
  "attributes": {}
4340
  }
4341
  },
4342
- "total_flos": 3.136841328025928e+17,
4343
  "train_batch_size": 4,
4344
  "trial_name": null,
4345
  "trial_params": null
 
1
  {
2
  "best_metric": 2.048067808151245,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
+ "epoch": 1.9977900552486187,
5
  "eval_steps": 50,
6
+ "global_step": 678,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4311
  "eval_samples_per_second": 34.057,
4312
  "eval_steps_per_second": 8.514,
4313
  "step": 600
4314
+ },
4315
+ {
4316
+ "epoch": 1.770902394106814,
4317
+ "grad_norm": 0.3590349853038788,
4318
+ "learning_rate": 6.485553382910026e-06,
4319
+ "loss": 1.9617,
4320
+ "step": 601
4321
+ },
4322
+ {
4323
+ "epoch": 1.7738489871086556,
4324
+ "grad_norm": 0.35265564918518066,
4325
+ "learning_rate": 6.319977191187232e-06,
4326
+ "loss": 2.0291,
4327
+ "step": 602
4328
+ },
4329
+ {
4330
+ "epoch": 1.7767955801104973,
4331
+ "grad_norm": 0.34210672974586487,
4332
+ "learning_rate": 6.156473015002029e-06,
4333
+ "loss": 1.9461,
4334
+ "step": 603
4335
+ },
4336
+ {
4337
+ "epoch": 1.7797421731123388,
4338
+ "grad_norm": 0.36943700909614563,
4339
+ "learning_rate": 5.995044470741151e-06,
4340
+ "loss": 1.9437,
4341
+ "step": 604
4342
+ },
4343
+ {
4344
+ "epoch": 1.7826887661141804,
4345
+ "grad_norm": 0.3616660237312317,
4346
+ "learning_rate": 5.835695128882513e-06,
4347
+ "loss": 1.9987,
4348
+ "step": 605
4349
+ },
4350
+ {
4351
+ "epoch": 1.7856353591160221,
4352
+ "grad_norm": 0.35595205426216125,
4353
+ "learning_rate": 5.678428513916212e-06,
4354
+ "loss": 2.0232,
4355
+ "step": 606
4356
+ },
4357
+ {
4358
+ "epoch": 1.7885819521178639,
4359
+ "grad_norm": 0.3314264416694641,
4360
+ "learning_rate": 5.5232481042665764e-06,
4361
+ "loss": 1.7963,
4362
+ "step": 607
4363
+ },
4364
+ {
4365
+ "epoch": 1.7915285451197054,
4366
+ "grad_norm": 0.3526301681995392,
4367
+ "learning_rate": 5.370157332215209e-06,
4368
+ "loss": 2.0898,
4369
+ "step": 608
4370
+ },
4371
+ {
4372
+ "epoch": 1.794475138121547,
4373
+ "grad_norm": 0.3451070785522461,
4374
+ "learning_rate": 5.219159583825106e-06,
4375
+ "loss": 1.8876,
4376
+ "step": 609
4377
+ },
4378
+ {
4379
+ "epoch": 1.7974217311233884,
4380
+ "grad_norm": 0.34398043155670166,
4381
+ "learning_rate": 5.07025819886574e-06,
4382
+ "loss": 1.9447,
4383
+ "step": 610
4384
+ },
4385
+ {
4386
+ "epoch": 1.8003683241252302,
4387
+ "grad_norm": 0.33297380805015564,
4388
+ "learning_rate": 4.923456470739219e-06,
4389
+ "loss": 1.9111,
4390
+ "step": 611
4391
+ },
4392
+ {
4393
+ "epoch": 1.803314917127072,
4394
+ "grad_norm": 0.35569652915000916,
4395
+ "learning_rate": 4.778757646407362e-06,
4396
+ "loss": 2.052,
4397
+ "step": 612
4398
+ },
4399
+ {
4400
+ "epoch": 1.8062615101289135,
4401
+ "grad_norm": 0.3721804618835449,
4402
+ "learning_rate": 4.636164926320064e-06,
4403
+ "loss": 2.1146,
4404
+ "step": 613
4405
+ },
4406
+ {
4407
+ "epoch": 1.809208103130755,
4408
+ "grad_norm": 0.3460250198841095,
4409
+ "learning_rate": 4.495681464344259e-06,
4410
+ "loss": 1.9138,
4411
+ "step": 614
4412
+ },
4413
+ {
4414
+ "epoch": 1.8121546961325967,
4415
+ "grad_norm": 0.38758331537246704,
4416
+ "learning_rate": 4.357310367694378e-06,
4417
+ "loss": 1.975,
4418
+ "step": 615
4419
+ },
4420
+ {
4421
+ "epoch": 1.8151012891344385,
4422
+ "grad_norm": 0.37770211696624756,
4423
+ "learning_rate": 4.22105469686348e-06,
4424
+ "loss": 2.0591,
4425
+ "step": 616
4426
+ },
4427
+ {
4428
+ "epoch": 1.81804788213628,
4429
+ "grad_norm": 0.3766396641731262,
4430
+ "learning_rate": 4.086917465555662e-06,
4431
+ "loss": 2.0694,
4432
+ "step": 617
4433
+ },
4434
+ {
4435
+ "epoch": 1.8209944751381215,
4436
+ "grad_norm": 0.3475191593170166,
4437
+ "learning_rate": 3.954901640619368e-06,
4438
+ "loss": 1.9057,
4439
+ "step": 618
4440
+ },
4441
+ {
4442
+ "epoch": 1.823941068139963,
4443
+ "grad_norm": 0.3547375798225403,
4444
+ "learning_rate": 3.825010141981677e-06,
4445
+ "loss": 2.0484,
4446
+ "step": 619
4447
+ },
4448
+ {
4449
+ "epoch": 1.8268876611418048,
4450
+ "grad_norm": 0.35701024532318115,
4451
+ "learning_rate": 3.6972458425838962e-06,
4452
+ "loss": 2.084,
4453
+ "step": 620
4454
+ },
4455
+ {
4456
+ "epoch": 1.8298342541436465,
4457
+ "grad_norm": 0.3376360535621643,
4458
+ "learning_rate": 3.571611568317856e-06,
4459
+ "loss": 1.8885,
4460
+ "step": 621
4461
+ },
4462
+ {
4463
+ "epoch": 1.832780847145488,
4464
+ "grad_norm": 0.34869226813316345,
4465
+ "learning_rate": 3.4481100979635306e-06,
4466
+ "loss": 2.1957,
4467
+ "step": 622
4468
+ },
4469
+ {
4470
+ "epoch": 1.8357274401473296,
4471
+ "grad_norm": 0.3375036418437958,
4472
+ "learning_rate": 3.32674416312746e-06,
4473
+ "loss": 1.8859,
4474
+ "step": 623
4475
+ },
4476
+ {
4477
+ "epoch": 1.838674033149171,
4478
+ "grad_norm": 0.3811117708683014,
4479
+ "learning_rate": 3.207516448182435e-06,
4480
+ "loss": 2.147,
4481
+ "step": 624
4482
+ },
4483
+ {
4484
+ "epoch": 1.8416206261510129,
4485
+ "grad_norm": 0.3357301652431488,
4486
+ "learning_rate": 3.0904295902080326e-06,
4487
+ "loss": 1.7973,
4488
+ "step": 625
4489
+ },
4490
+ {
4491
+ "epoch": 1.8445672191528546,
4492
+ "grad_norm": 0.38550111651420593,
4493
+ "learning_rate": 2.9754861789324073e-06,
4494
+ "loss": 2.1196,
4495
+ "step": 626
4496
+ },
4497
+ {
4498
+ "epoch": 1.8475138121546961,
4499
+ "grad_norm": 0.3680182099342346,
4500
+ "learning_rate": 2.8626887566748807e-06,
4501
+ "loss": 2.0742,
4502
+ "step": 627
4503
+ },
4504
+ {
4505
+ "epoch": 1.8504604051565376,
4506
+ "grad_norm": 0.3618917763233185,
4507
+ "learning_rate": 2.752039818289809e-06,
4508
+ "loss": 2.0812,
4509
+ "step": 628
4510
+ },
4511
+ {
4512
+ "epoch": 1.8534069981583794,
4513
+ "grad_norm": 0.3539152443408966,
4514
+ "learning_rate": 2.6435418111113276e-06,
4515
+ "loss": 1.9961,
4516
+ "step": 629
4517
+ },
4518
+ {
4519
+ "epoch": 1.8563535911602211,
4520
+ "grad_norm": 0.35489708185195923,
4521
+ "learning_rate": 2.537197134899294e-06,
4522
+ "loss": 2.0572,
4523
+ "step": 630
4524
+ },
4525
+ {
4526
+ "epoch": 1.8593001841620627,
4527
+ "grad_norm": 0.34133121371269226,
4528
+ "learning_rate": 2.433008141786153e-06,
4529
+ "loss": 1.9718,
4530
+ "step": 631
4531
+ },
4532
+ {
4533
+ "epoch": 1.8622467771639042,
4534
+ "grad_norm": 0.34101560711860657,
4535
+ "learning_rate": 2.330977136224932e-06,
4536
+ "loss": 1.881,
4537
+ "step": 632
4538
+ },
4539
+ {
4540
+ "epoch": 1.8651933701657457,
4541
+ "grad_norm": 0.3772835433483124,
4542
+ "learning_rate": 2.2311063749382742e-06,
4543
+ "loss": 2.0331,
4544
+ "step": 633
4545
+ },
4546
+ {
4547
+ "epoch": 1.8681399631675875,
4548
+ "grad_norm": 0.3728832006454468,
4549
+ "learning_rate": 2.1333980668685414e-06,
4550
+ "loss": 1.9764,
4551
+ "step": 634
4552
+ },
4553
+ {
4554
+ "epoch": 1.8710865561694292,
4555
+ "grad_norm": 0.36289656162261963,
4556
+ "learning_rate": 2.037854373128889e-06,
4557
+ "loss": 2.006,
4558
+ "step": 635
4559
+ },
4560
+ {
4561
+ "epoch": 1.8740331491712707,
4562
+ "grad_norm": 0.36972489953041077,
4563
+ "learning_rate": 1.9444774069555694e-06,
4564
+ "loss": 2.1053,
4565
+ "step": 636
4566
+ },
4567
+ {
4568
+ "epoch": 1.8769797421731123,
4569
+ "grad_norm": 0.34683194756507874,
4570
+ "learning_rate": 1.8532692336611035e-06,
4571
+ "loss": 2.0312,
4572
+ "step": 637
4573
+ },
4574
+ {
4575
+ "epoch": 1.879926335174954,
4576
+ "grad_norm": 0.3639572858810425,
4577
+ "learning_rate": 1.7642318705886286e-06,
4578
+ "loss": 1.9183,
4579
+ "step": 638
4580
+ },
4581
+ {
4582
+ "epoch": 1.8828729281767957,
4583
+ "grad_norm": 0.3998357355594635,
4584
+ "learning_rate": 1.6773672870673218e-06,
4585
+ "loss": 2.0961,
4586
+ "step": 639
4587
+ },
4588
+ {
4589
+ "epoch": 1.8858195211786373,
4590
+ "grad_norm": 0.3396947383880615,
4591
+ "learning_rate": 1.5926774043687365e-06,
4592
+ "loss": 1.9543,
4593
+ "step": 640
4594
+ },
4595
+ {
4596
+ "epoch": 1.8887661141804788,
4597
+ "grad_norm": 0.3566475510597229,
4598
+ "learning_rate": 1.510164095664457e-06,
4599
+ "loss": 2.0163,
4600
+ "step": 641
4601
+ },
4602
+ {
4603
+ "epoch": 1.8917127071823203,
4604
+ "grad_norm": 0.33658653497695923,
4605
+ "learning_rate": 1.4298291859845214e-06,
4606
+ "loss": 1.9524,
4607
+ "step": 642
4608
+ },
4609
+ {
4610
+ "epoch": 1.894659300184162,
4611
+ "grad_norm": 0.35298457741737366,
4612
+ "learning_rate": 1.351674452177143e-06,
4613
+ "loss": 2.0429,
4614
+ "step": 643
4615
+ },
4616
+ {
4617
+ "epoch": 1.8976058931860038,
4618
+ "grad_norm": 0.353605180978775,
4619
+ "learning_rate": 1.2757016228693964e-06,
4620
+ "loss": 2.0201,
4621
+ "step": 644
4622
+ },
4623
+ {
4624
+ "epoch": 1.9005524861878453,
4625
+ "grad_norm": 0.3473069667816162,
4626
+ "learning_rate": 1.2019123784289488e-06,
4627
+ "loss": 2.054,
4628
+ "step": 645
4629
+ },
4630
+ {
4631
+ "epoch": 1.9034990791896869,
4632
+ "grad_norm": 0.373976469039917,
4633
+ "learning_rate": 1.1303083509269452e-06,
4634
+ "loss": 1.9479,
4635
+ "step": 646
4636
+ },
4637
+ {
4638
+ "epoch": 1.9064456721915284,
4639
+ "grad_norm": 0.3614707887172699,
4640
+ "learning_rate": 1.0608911241018594e-06,
4641
+ "loss": 1.9979,
4642
+ "step": 647
4643
+ },
4644
+ {
4645
+ "epoch": 1.9093922651933701,
4646
+ "grad_norm": 0.35335227847099304,
4647
+ "learning_rate": 9.936622333245104e-07,
4648
+ "loss": 1.9355,
4649
+ "step": 648
4650
+ },
4651
+ {
4652
+ "epoch": 1.9123388581952119,
4653
+ "grad_norm": 0.3640158474445343,
4654
+ "learning_rate": 9.286231655640799e-07,
4655
+ "loss": 2.0179,
4656
+ "step": 649
4657
+ },
4658
+ {
4659
+ "epoch": 1.9152854511970534,
4660
+ "grad_norm": 0.3631095290184021,
4661
+ "learning_rate": 8.657753593552143e-07,
4662
+ "loss": 1.9934,
4663
+ "step": 650
4664
+ },
4665
+ {
4666
+ "epoch": 1.9152854511970534,
4667
+ "eval_loss": 2.0474448204040527,
4668
+ "eval_runtime": 33.5976,
4669
+ "eval_samples_per_second": 34.05,
4670
+ "eval_steps_per_second": 8.513,
4671
+ "step": 650
4672
+ },
4673
+ {
4674
+ "epoch": 1.918232044198895,
4675
+ "grad_norm": 0.3535584509372711,
4676
+ "learning_rate": 8.051202047662187e-07,
4677
+ "loss": 1.9061,
4678
+ "step": 651
4679
+ },
4680
+ {
4681
+ "epoch": 1.9211786372007367,
4682
+ "grad_norm": 0.3503694534301758,
4683
+ "learning_rate": 7.466590433683251e-07,
4684
+ "loss": 1.9775,
4685
+ "step": 652
4686
+ },
4687
+ {
4688
+ "epoch": 1.9241252302025784,
4689
+ "grad_norm": 0.3380708396434784,
4690
+ "learning_rate": 6.903931682059827e-07,
4691
+ "loss": 1.8295,
4692
+ "step": 653
4693
+ },
4694
+ {
4695
+ "epoch": 1.92707182320442,
4696
+ "grad_norm": 0.3362259268760681,
4697
+ "learning_rate": 6.363238237683033e-07,
4698
+ "loss": 1.7838,
4699
+ "step": 654
4700
+ },
4701
+ {
4702
+ "epoch": 1.9300184162062615,
4703
+ "grad_norm": 0.3610450327396393,
4704
+ "learning_rate": 5.844522059614943e-07,
4705
+ "loss": 2.1624,
4706
+ "step": 655
4707
+ },
4708
+ {
4709
+ "epoch": 1.932965009208103,
4710
+ "grad_norm": 0.34263625741004944,
4711
+ "learning_rate": 5.347794620824576e-07,
4712
+ "loss": 1.9548,
4713
+ "step": 656
4714
+ },
4715
+ {
4716
+ "epoch": 1.9359116022099447,
4717
+ "grad_norm": 0.35191041231155396,
4718
+ "learning_rate": 4.873066907933543e-07,
4719
+ "loss": 2.0965,
4720
+ "step": 657
4721
+ },
4722
+ {
4723
+ "epoch": 1.9388581952117865,
4724
+ "grad_norm": 0.361122727394104,
4725
+ "learning_rate": 4.4203494209733576e-07,
4726
+ "loss": 1.9781,
4727
+ "step": 658
4728
+ },
4729
+ {
4730
+ "epoch": 1.941804788213628,
4731
+ "grad_norm": 0.35918036103248596,
4732
+ "learning_rate": 3.9896521731532797e-07,
4733
+ "loss": 1.8921,
4734
+ "step": 659
4735
+ },
4736
+ {
4737
+ "epoch": 1.9447513812154695,
4738
+ "grad_norm": 0.37808507680892944,
4739
+ "learning_rate": 3.580984690638611e-07,
4740
+ "loss": 2.2418,
4741
+ "step": 660
4742
+ },
4743
+ {
4744
+ "epoch": 1.9476979742173113,
4745
+ "grad_norm": 0.34101665019989014,
4746
+ "learning_rate": 3.194356012340305e-07,
4747
+ "loss": 1.8401,
4748
+ "step": 661
4749
+ },
4750
+ {
4751
+ "epoch": 1.9506445672191528,
4752
+ "grad_norm": 0.360929936170578,
4753
+ "learning_rate": 2.8297746897146816e-07,
4754
+ "loss": 2.0246,
4755
+ "step": 662
4756
+ },
4757
+ {
4758
+ "epoch": 1.9535911602209945,
4759
+ "grad_norm": 0.36307036876678467,
4760
+ "learning_rate": 2.48724878657447e-07,
4761
+ "loss": 2.0241,
4762
+ "step": 663
4763
+ },
4764
+ {
4765
+ "epoch": 1.956537753222836,
4766
+ "grad_norm": 0.3703291118144989,
4767
+ "learning_rate": 2.1667858789105043e-07,
4768
+ "loss": 2.1115,
4769
+ "step": 664
4770
+ },
4771
+ {
4772
+ "epoch": 1.9594843462246776,
4773
+ "grad_norm": 0.3611985445022583,
4774
+ "learning_rate": 1.8683930547243045e-07,
4775
+ "loss": 1.9565,
4776
+ "step": 665
4777
+ },
4778
+ {
4779
+ "epoch": 1.9624309392265193,
4780
+ "grad_norm": 0.351419061422348,
4781
+ "learning_rate": 1.5920769138706438e-07,
4782
+ "loss": 1.8914,
4783
+ "step": 666
4784
+ },
4785
+ {
4786
+ "epoch": 1.965377532228361,
4787
+ "grad_norm": 0.36377424001693726,
4788
+ "learning_rate": 1.3378435679122226e-07,
4789
+ "loss": 2.0512,
4790
+ "step": 667
4791
+ },
4792
+ {
4793
+ "epoch": 1.9683241252302026,
4794
+ "grad_norm": 0.36848321557044983,
4795
+ "learning_rate": 1.1056986399845537e-07,
4796
+ "loss": 2.1475,
4797
+ "step": 668
4798
+ },
4799
+ {
4800
+ "epoch": 1.9712707182320441,
4801
+ "grad_norm": 0.3410709500312805,
4802
+ "learning_rate": 8.956472646710623e-08,
4803
+ "loss": 1.9034,
4804
+ "step": 669
4805
+ },
4806
+ {
4807
+ "epoch": 1.9742173112338857,
4808
+ "grad_norm": 0.37526410818099976,
4809
+ "learning_rate": 7.07694087889621e-08,
4810
+ "loss": 2.1324,
4811
+ "step": 670
4812
+ },
4813
+ {
4814
+ "epoch": 1.9771639042357274,
4815
+ "grad_norm": 0.3631035089492798,
4816
+ "learning_rate": 5.418432667905204e-08,
4817
+ "loss": 1.9301,
4818
+ "step": 671
4819
+ },
4820
+ {
4821
+ "epoch": 1.9801104972375692,
4822
+ "grad_norm": 0.3520108759403229,
4823
+ "learning_rate": 3.980984696634327e-08,
4824
+ "loss": 1.9756,
4825
+ "step": 672
4826
+ },
4827
+ {
4828
+ "epoch": 1.9830570902394107,
4829
+ "grad_norm": 0.35726797580718994,
4830
+ "learning_rate": 2.764628758570309e-08,
4831
+ "loss": 1.9707,
4832
+ "step": 673
4833
+ },
4834
+ {
4835
+ "epoch": 1.9860036832412522,
4836
+ "grad_norm": 0.35579240322113037,
4837
+ "learning_rate": 1.7693917570837936e-08,
4838
+ "loss": 1.9214,
4839
+ "step": 674
4840
+ },
4841
+ {
4842
+ "epoch": 1.988950276243094,
4843
+ "grad_norm": 0.3749707043170929,
4844
+ "learning_rate": 9.95295704835364e-09,
4845
+ "loss": 2.0993,
4846
+ "step": 675
4847
+ },
4848
+ {
4849
+ "epoch": 1.9918968692449357,
4850
+ "grad_norm": 0.35399675369262695,
4851
+ "learning_rate": 4.42357723288156e-09,
4852
+ "loss": 1.9808,
4853
+ "step": 676
4854
+ },
4855
+ {
4856
+ "epoch": 1.9948434622467772,
4857
+ "grad_norm": 0.34812989830970764,
4858
+ "learning_rate": 1.1059004233038296e-09,
4859
+ "loss": 1.9791,
4860
+ "step": 677
4861
+ },
4862
+ {
4863
+ "epoch": 1.9977900552486187,
4864
+ "grad_norm": 0.3504377603530884,
4865
+ "learning_rate": 0.0,
4866
+ "loss": 1.9639,
4867
+ "step": 678
4868
  }
4869
  ],
4870
  "logging_steps": 1,
 
4888
  "should_evaluate": false,
4889
  "should_log": false,
4890
  "should_save": true,
4891
+ "should_training_stop": true
4892
  },
4893
  "attributes": {}
4894
  }
4895
  },
4896
+ "total_flos": 3.544397205114716e+17,
4897
  "train_batch_size": 4,
4898
  "trial_name": null,
4899
  "trial_params": null