Training in progress, step 1600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12855c6edb373afc263d5adc1cce5e474e55e27e895db3ac4e9eda092ba6ba05
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd8edd4b23894593994dd1597dbc694ccc81001cb7faf035867a3339f3d6ca28
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8638b300d1710a0a91eddee1842313beb0132dc808a74bdec0a8ed907abcb85b
 size 37965684

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3fa9ed80396e8f083b3d374dcff89bc7c9037cfc4e5585d635d883ef34faa3f
 size 37965684

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cecffa649477cde6b40bd260b6359859772b91b5d750fffad2796693b388ef2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9ec4f7c75fc8dc73feec982dc49c1db4c92d75f32040d95e28ccfad97fcf3c4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1235d31d1a147c77b30bd4cbd6d7709eb22d609c0b8e11beebf69778ce556673
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a342d756ed2deadb7604676fd81f55043100e7cbb57f36d28b57fa6ef3c9fda
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.04516534134745598,
   "best_model_checkpoint": "miner_id_24/checkpoint-1400",
-  "epoch": 0.1311088531253073,
   "eval_steps": 100,
-  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -10635,6 +10635,714 @@
       "eval_samples_per_second": 29.681,
       "eval_steps_per_second": 7.42,
       "step": 1500
     }
   ],
   "logging_steps": 1,
@@ -10649,7 +11357,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -10658,12 +11366,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.5330794309851546e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.04516534134745598,
   "best_model_checkpoint": "miner_id_24/checkpoint-1400",
+  "epoch": 0.13984944333366112,
   "eval_steps": 100,
+  "global_step": 1600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 29.681,
       "eval_steps_per_second": 7.42,
       "step": 1500
+    },
+    {
+      "epoch": 0.13119625902739082,
+      "grad_norm": 0.08697141706943512,
+      "learning_rate": 0.00010350138745749725,
+      "loss": 0.0364,
+      "step": 1501
+    },
+    {
+      "epoch": 0.13128366492947435,
+      "grad_norm": 0.11794944107532501,
+      "learning_rate": 0.00010339844573921038,
+      "loss": 0.0406,
+      "step": 1502
+    },
+    {
+      "epoch": 0.1313710708315579,
+      "grad_norm": 0.20966710150241852,
+      "learning_rate": 0.00010329550041530005,
+      "loss": 0.0717,
+      "step": 1503
+    },
+    {
+      "epoch": 0.13145847673364144,
+      "grad_norm": 0.15292756259441376,
+      "learning_rate": 0.0001031925515949874,
+      "loss": 0.0329,
+      "step": 1504
+    },
+    {
+      "epoch": 0.13154588263572498,
+      "grad_norm": 0.06401754915714264,
+      "learning_rate": 0.00010308959938749729,
+      "loss": 0.024,
+      "step": 1505
+    },
+    {
+      "epoch": 0.1316332885378085,
+      "grad_norm": 0.0975053682923317,
+      "learning_rate": 0.00010298664390205812,
+      "loss": 0.0453,
+      "step": 1506
+    },
+    {
+      "epoch": 0.13172069443989207,
+      "grad_norm": 0.07814626395702362,
+      "learning_rate": 0.00010288368524790182,
+      "loss": 0.029,
+      "step": 1507
+    },
+    {
+      "epoch": 0.1318081003419756,
+      "grad_norm": 0.08279038220643997,
+      "learning_rate": 0.00010278072353426365,
+      "loss": 0.0267,
+      "step": 1508
+    },
+    {
+      "epoch": 0.13189550624405913,
+      "grad_norm": 0.10478601604700089,
+      "learning_rate": 0.0001026777588703821,
+      "loss": 0.0389,
+      "step": 1509
+    },
+    {
+      "epoch": 0.13198291214614266,
+      "grad_norm": 0.0825965404510498,
+      "learning_rate": 0.00010257479136549889,
+      "loss": 0.0278,
+      "step": 1510
+    },
+    {
+      "epoch": 0.1320703180482262,
+      "grad_norm": 0.09572532027959824,
+      "learning_rate": 0.00010247182112885861,
+      "loss": 0.0352,
+      "step": 1511
+    },
+    {
+      "epoch": 0.13215772395030975,
+      "grad_norm": 0.06361362338066101,
+      "learning_rate": 0.00010236884826970878,
+      "loss": 0.0197,
+      "step": 1512
+    },
+    {
+      "epoch": 0.13224512985239328,
+      "grad_norm": 0.08282926678657532,
+      "learning_rate": 0.00010226587289729988,
+      "loss": 0.0324,
+      "step": 1513
+    },
+    {
+      "epoch": 0.13233253575447682,
+      "grad_norm": 0.13065838813781738,
+      "learning_rate": 0.00010216289512088479,
+      "loss": 0.0547,
+      "step": 1514
+    },
+    {
+      "epoch": 0.13241994165656035,
+      "grad_norm": 0.060922764241695404,
+      "learning_rate": 0.00010205991504971912,
+      "loss": 0.0205,
+      "step": 1515
+    },
+    {
+      "epoch": 0.1325073475586439,
+      "grad_norm": 0.07022725045681,
+      "learning_rate": 0.00010195693279306089,
+      "loss": 0.0221,
+      "step": 1516
+    },
+    {
+      "epoch": 0.13259475346072744,
+      "grad_norm": 0.10593367367982864,
+      "learning_rate": 0.00010185394846017036,
+      "loss": 0.036,
+      "step": 1517
+    },
+    {
+      "epoch": 0.13268215936281097,
+      "grad_norm": 0.07627231627702713,
+      "learning_rate": 0.00010175096216031006,
+      "loss": 0.0322,
+      "step": 1518
+    },
+    {
+      "epoch": 0.1327695652648945,
+      "grad_norm": 0.05789932608604431,
+      "learning_rate": 0.0001016479740027446,
+      "loss": 0.0289,
+      "step": 1519
+    },
+    {
+      "epoch": 0.13285697116697806,
+      "grad_norm": 0.09948685020208359,
+      "learning_rate": 0.00010154498409674051,
+      "loss": 0.0331,
+      "step": 1520
+    },
+    {
+      "epoch": 0.1329443770690616,
+      "grad_norm": 0.07831980288028717,
+      "learning_rate": 0.00010144199255156631,
+      "loss": 0.0306,
+      "step": 1521
+    },
+    {
+      "epoch": 0.13303178297114512,
+      "grad_norm": 0.10856369137763977,
+      "learning_rate": 0.00010133899947649209,
+      "loss": 0.0355,
+      "step": 1522
+    },
+    {
+      "epoch": 0.13311918887322866,
+      "grad_norm": 0.16798873245716095,
+      "learning_rate": 0.0001012360049807896,
+      "loss": 0.0508,
+      "step": 1523
+    },
+    {
+      "epoch": 0.13320659477531221,
+      "grad_norm": 0.11936990171670914,
+      "learning_rate": 0.00010113300917373226,
+      "loss": 0.0575,
+      "step": 1524
+    },
+    {
+      "epoch": 0.13329400067739575,
+      "grad_norm": 0.09081139415502548,
+      "learning_rate": 0.00010103001216459469,
+      "loss": 0.0288,
+      "step": 1525
+    },
+    {
+      "epoch": 0.13338140657947928,
+      "grad_norm": 0.08312739431858063,
+      "learning_rate": 0.0001009270140626528,
+      "loss": 0.0196,
+      "step": 1526
+    },
+    {
+      "epoch": 0.1334688124815628,
+      "grad_norm": 0.08712684363126755,
+      "learning_rate": 0.0001008240149771838,
+      "loss": 0.0302,
+      "step": 1527
+    },
+    {
+      "epoch": 0.13355621838364637,
+      "grad_norm": 0.17785540223121643,
+      "learning_rate": 0.0001007210150174658,
+      "loss": 0.0321,
+      "step": 1528
+    },
+    {
+      "epoch": 0.1336436242857299,
+      "grad_norm": 0.119221031665802,
+      "learning_rate": 0.00010061801429277796,
+      "loss": 0.0481,
+      "step": 1529
+    },
+    {
+      "epoch": 0.13373103018781343,
+      "grad_norm": 0.04911545664072037,
+      "learning_rate": 0.00010051501291240008,
+      "loss": 0.0181,
+      "step": 1530
+    },
+    {
+      "epoch": 0.13381843608989696,
+      "grad_norm": 0.05027123540639877,
+      "learning_rate": 0.00010041201098561286,
+      "loss": 0.0183,
+      "step": 1531
+    },
+    {
+      "epoch": 0.1339058419919805,
+      "grad_norm": 0.13207297027111053,
+      "learning_rate": 0.00010030900862169744,
+      "loss": 0.0185,
+      "step": 1532
+    },
+    {
+      "epoch": 0.13399324789406405,
+      "grad_norm": 0.05222494527697563,
+      "learning_rate": 0.00010020600592993548,
+      "loss": 0.0165,
+      "step": 1533
+    },
+    {
+      "epoch": 0.1340806537961476,
+      "grad_norm": 0.07455245405435562,
+      "learning_rate": 0.00010010300301960888,
+      "loss": 0.0299,
+      "step": 1534
+    },
+    {
+      "epoch": 0.13416805969823112,
+      "grad_norm": 0.07873023301362991,
+      "learning_rate": 0.0001,
+      "loss": 0.0283,
+      "step": 1535
+    },
+    {
+      "epoch": 0.13425546560031465,
+      "grad_norm": 0.07464205473661423,
+      "learning_rate": 9.989699698039111e-05,
+      "loss": 0.0312,
+      "step": 1536
+    },
+    {
+      "epoch": 0.1343428715023982,
+      "grad_norm": 0.08801527321338654,
+      "learning_rate": 9.979399407006458e-05,
+      "loss": 0.0233,
+      "step": 1537
+    },
+    {
+      "epoch": 0.13443027740448174,
+      "grad_norm": 0.104781374335289,
+      "learning_rate": 9.969099137830259e-05,
+      "loss": 0.0353,
+      "step": 1538
+    },
+    {
+      "epoch": 0.13451768330656527,
+      "grad_norm": 0.0791974663734436,
+      "learning_rate": 9.958798901438715e-05,
+      "loss": 0.0283,
+      "step": 1539
+    },
+    {
+      "epoch": 0.1346050892086488,
+      "grad_norm": 0.060504913330078125,
+      "learning_rate": 9.948498708759993e-05,
+      "loss": 0.0192,
+      "step": 1540
+    },
+    {
+      "epoch": 0.13469249511073236,
+      "grad_norm": 0.04931584745645523,
+      "learning_rate": 9.938198570722207e-05,
+      "loss": 0.0225,
+      "step": 1541
+    },
+    {
+      "epoch": 0.1347799010128159,
+      "grad_norm": 0.10144636034965515,
+      "learning_rate": 9.927898498253422e-05,
+      "loss": 0.0259,
+      "step": 1542
+    },
+    {
+      "epoch": 0.13486730691489943,
+      "grad_norm": 0.06569879502058029,
+      "learning_rate": 9.917598502281621e-05,
+      "loss": 0.0258,
+      "step": 1543
+    },
+    {
+      "epoch": 0.13495471281698296,
+      "grad_norm": 0.06734506040811539,
+      "learning_rate": 9.90729859373472e-05,
+      "loss": 0.0259,
+      "step": 1544
+    },
+    {
+      "epoch": 0.13504211871906652,
+      "grad_norm": 0.07036790996789932,
+      "learning_rate": 9.896998783540536e-05,
+      "loss": 0.0289,
+      "step": 1545
+    },
+    {
+      "epoch": 0.13512952462115005,
+      "grad_norm": 0.05877295881509781,
+      "learning_rate": 9.886699082626775e-05,
+      "loss": 0.0193,
+      "step": 1546
+    },
+    {
+      "epoch": 0.13521693052323358,
+      "grad_norm": 0.0712662935256958,
+      "learning_rate": 9.87639950192104e-05,
+      "loss": 0.0256,
+      "step": 1547
+    },
+    {
+      "epoch": 0.1353043364253171,
+      "grad_norm": 0.10854820907115936,
+      "learning_rate": 9.866100052350796e-05,
+      "loss": 0.0423,
+      "step": 1548
+    },
+    {
+      "epoch": 0.13539174232740067,
+      "grad_norm": 0.10757939517498016,
+      "learning_rate": 9.855800744843372e-05,
+      "loss": 0.0378,
+      "step": 1549
+    },
+    {
+      "epoch": 0.1354791482294842,
+      "grad_norm": 0.07478786259889603,
+      "learning_rate": 9.845501590325948e-05,
+      "loss": 0.0251,
+      "step": 1550
+    },
+    {
+      "epoch": 0.13556655413156773,
+      "grad_norm": 0.08897072076797485,
+      "learning_rate": 9.835202599725544e-05,
+      "loss": 0.027,
+      "step": 1551
+    },
+    {
+      "epoch": 0.13565396003365127,
+      "grad_norm": 0.07537046819925308,
+      "learning_rate": 9.824903783968996e-05,
+      "loss": 0.0223,
+      "step": 1552
+    },
+    {
+      "epoch": 0.1357413659357348,
+      "grad_norm": 0.0578019842505455,
+      "learning_rate": 9.814605153982967e-05,
+      "loss": 0.0197,
+      "step": 1553
+    },
+    {
+      "epoch": 0.13582877183781836,
+      "grad_norm": 0.0902201235294342,
+      "learning_rate": 9.804306720693913e-05,
+      "loss": 0.0264,
+      "step": 1554
+    },
+    {
+      "epoch": 0.1359161777399019,
+      "grad_norm": 0.1797989010810852,
+      "learning_rate": 9.794008495028087e-05,
+      "loss": 0.0301,
+      "step": 1555
+    },
+    {
+      "epoch": 0.13600358364198542,
+      "grad_norm": 0.1257629692554474,
+      "learning_rate": 9.783710487911523e-05,
+      "loss": 0.0292,
+      "step": 1556
+    },
+    {
+      "epoch": 0.13609098954406895,
+      "grad_norm": 0.0894462838768959,
+      "learning_rate": 9.773412710270016e-05,
+      "loss": 0.0302,
+      "step": 1557
+    },
+    {
+      "epoch": 0.1361783954461525,
+      "grad_norm": 0.1309974491596222,
+      "learning_rate": 9.763115173029121e-05,
+      "loss": 0.038,
+      "step": 1558
+    },
+    {
+      "epoch": 0.13626580134823604,
+      "grad_norm": 0.15583781898021698,
+      "learning_rate": 9.752817887114146e-05,
+      "loss": 0.0389,
+      "step": 1559
+    },
+    {
+      "epoch": 0.13635320725031957,
+      "grad_norm": 0.055186927318573,
+      "learning_rate": 9.742520863450115e-05,
+      "loss": 0.0215,
+      "step": 1560
+    },
+    {
+      "epoch": 0.1364406131524031,
+      "grad_norm": 0.06592228263616562,
+      "learning_rate": 9.73222411296179e-05,
+      "loss": 0.0205,
+      "step": 1561
+    },
+    {
+      "epoch": 0.13652801905448667,
+      "grad_norm": 0.16731403768062592,
+      "learning_rate": 9.721927646573639e-05,
+      "loss": 0.0476,
+      "step": 1562
+    },
+    {
+      "epoch": 0.1366154249565702,
+      "grad_norm": 0.07706723362207413,
+      "learning_rate": 9.71163147520982e-05,
+      "loss": 0.0264,
+      "step": 1563
+    },
+    {
+      "epoch": 0.13670283085865373,
+      "grad_norm": 0.0804000049829483,
+      "learning_rate": 9.70133560979419e-05,
+      "loss": 0.0284,
+      "step": 1564
+    },
+    {
+      "epoch": 0.13679023676073726,
+      "grad_norm": 0.1061941385269165,
+      "learning_rate": 9.691040061250273e-05,
+      "loss": 0.0333,
+      "step": 1565
+    },
+    {
+      "epoch": 0.13687764266282082,
+      "grad_norm": 0.06735522300004959,
+      "learning_rate": 9.680744840501261e-05,
+      "loss": 0.0242,
+      "step": 1566
+    },
+    {
+      "epoch": 0.13696504856490435,
+      "grad_norm": 0.16169671714305878,
+      "learning_rate": 9.670449958469999e-05,
+      "loss": 0.0599,
+      "step": 1567
+    },
+    {
+      "epoch": 0.13705245446698788,
+      "grad_norm": 0.054925642907619476,
+      "learning_rate": 9.660155426078964e-05,
+      "loss": 0.0177,
+      "step": 1568
+    },
+    {
+      "epoch": 0.13713986036907141,
+      "grad_norm": 0.10755883902311325,
+      "learning_rate": 9.649861254250275e-05,
+      "loss": 0.0349,
+      "step": 1569
+    },
+    {
+      "epoch": 0.13722726627115495,
+      "grad_norm": 0.06160590425133705,
+      "learning_rate": 9.639567453905661e-05,
+      "loss": 0.024,
+      "step": 1570
+    },
+    {
+      "epoch": 0.1373146721732385,
+      "grad_norm": 0.20590682327747345,
+      "learning_rate": 9.629274035966457e-05,
+      "loss": 0.0551,
+      "step": 1571
+    },
+    {
+      "epoch": 0.13740207807532204,
+      "grad_norm": 0.1019708439707756,
+      "learning_rate": 9.618981011353592e-05,
+      "loss": 0.0335,
+      "step": 1572
+    },
+    {
+      "epoch": 0.13748948397740557,
+      "grad_norm": 0.06255567073822021,
+      "learning_rate": 9.608688390987584e-05,
+      "loss": 0.0234,
+      "step": 1573
+    },
+    {
+      "epoch": 0.1375768898794891,
+      "grad_norm": 0.10318184643983841,
+      "learning_rate": 9.59839618578851e-05,
+      "loss": 0.039,
+      "step": 1574
+    },
+    {
+      "epoch": 0.13766429578157266,
+      "grad_norm": 0.1166694313287735,
+      "learning_rate": 9.58810440667602e-05,
+      "loss": 0.0457,
+      "step": 1575
+    },
+    {
+      "epoch": 0.1377517016836562,
+      "grad_norm": 0.1220555454492569,
+      "learning_rate": 9.577813064569301e-05,
+      "loss": 0.0469,
+      "step": 1576
+    },
+    {
+      "epoch": 0.13783910758573972,
+      "grad_norm": 0.06090294197201729,
+      "learning_rate": 9.567522170387082e-05,
+      "loss": 0.0246,
+      "step": 1577
+    },
+    {
+      "epoch": 0.13792651348782325,
+      "grad_norm": 0.14577405154705048,
+      "learning_rate": 9.557231735047619e-05,
+      "loss": 0.0258,
+      "step": 1578
+    },
+    {
+      "epoch": 0.13801391938990681,
+      "grad_norm": 0.10201065242290497,
+      "learning_rate": 9.546941769468673e-05,
+      "loss": 0.0313,
+      "step": 1579
+    },
+    {
+      "epoch": 0.13810132529199035,
+      "grad_norm": 0.10618524253368378,
+      "learning_rate": 9.536652284567513e-05,
+      "loss": 0.0301,
+      "step": 1580
+    },
+    {
+      "epoch": 0.13818873119407388,
+      "grad_norm": 0.1036718562245369,
+      "learning_rate": 9.5263632912609e-05,
+      "loss": 0.0289,
+      "step": 1581
+    },
+    {
+      "epoch": 0.1382761370961574,
+      "grad_norm": 0.10260613262653351,
+      "learning_rate": 9.516074800465064e-05,
+      "loss": 0.0358,
+      "step": 1582
+    },
+    {
+      "epoch": 0.13836354299824097,
+      "grad_norm": 0.09655741602182388,
+      "learning_rate": 9.505786823095712e-05,
+      "loss": 0.0363,
+      "step": 1583
+    },
+    {
+      "epoch": 0.1384509489003245,
+      "grad_norm": 0.17335118353366852,
+      "learning_rate": 9.495499370068004e-05,
+      "loss": 0.029,
+      "step": 1584
+    },
+    {
+      "epoch": 0.13853835480240803,
+      "grad_norm": 0.07364212721586227,
+      "learning_rate": 9.485212452296535e-05,
+      "loss": 0.0333,
+      "step": 1585
+    },
+    {
+      "epoch": 0.13862576070449156,
+      "grad_norm": 0.1601741462945938,
+      "learning_rate": 9.474926080695345e-05,
+      "loss": 0.0691,
+      "step": 1586
+    },
+    {
+      "epoch": 0.13871316660657512,
+      "grad_norm": 0.06292049586772919,
+      "learning_rate": 9.464640266177883e-05,
+      "loss": 0.0215,
+      "step": 1587
+    },
+    {
+      "epoch": 0.13880057250865865,
+      "grad_norm": 0.09334374964237213,
+      "learning_rate": 9.454355019657017e-05,
+      "loss": 0.0269,
+      "step": 1588
+    },
+    {
+      "epoch": 0.13888797841074219,
+      "grad_norm": 0.07139234989881516,
+      "learning_rate": 9.444070352045006e-05,
+      "loss": 0.025,
+      "step": 1589
+    },
+    {
+      "epoch": 0.13897538431282572,
+      "grad_norm": 0.07587097585201263,
+      "learning_rate": 9.433786274253495e-05,
+      "loss": 0.0256,
+      "step": 1590
+    },
+    {
+      "epoch": 0.13906279021490925,
+      "grad_norm": 0.1509503573179245,
+      "learning_rate": 9.423502797193508e-05,
+      "loss": 0.0459,
+      "step": 1591
+    },
+    {
+      "epoch": 0.1391501961169928,
+      "grad_norm": 0.15848566591739655,
+      "learning_rate": 9.413219931775426e-05,
+      "loss": 0.0469,
+      "step": 1592
+    },
+    {
+      "epoch": 0.13923760201907634,
+      "grad_norm": 0.126972034573555,
+      "learning_rate": 9.402937688908983e-05,
+      "loss": 0.0427,
+      "step": 1593
+    },
+    {
+      "epoch": 0.13932500792115987,
+      "grad_norm": 0.10220225900411606,
+      "learning_rate": 9.392656079503256e-05,
+      "loss": 0.0349,
+      "step": 1594
+    },
+    {
+      "epoch": 0.1394124138232434,
+      "grad_norm": 0.07176347076892853,
+      "learning_rate": 9.382375114466644e-05,
+      "loss": 0.0295,
+      "step": 1595
+    },
+    {
+      "epoch": 0.13949981972532696,
+      "grad_norm": 0.13212533295154572,
+      "learning_rate": 9.372094804706867e-05,
+      "loss": 0.0551,
+      "step": 1596
+    },
+    {
+      "epoch": 0.1395872256274105,
+      "grad_norm": 0.10252740979194641,
+      "learning_rate": 9.36181516113095e-05,
+      "loss": 0.0403,
+      "step": 1597
+    },
+    {
+      "epoch": 0.13967463152949403,
+      "grad_norm": 0.06956984102725983,
+      "learning_rate": 9.351536194645198e-05,
+      "loss": 0.0216,
+      "step": 1598
+    },
+    {
+      "epoch": 0.13976203743157756,
+      "grad_norm": 0.11074226349592209,
+      "learning_rate": 9.341257916155222e-05,
+      "loss": 0.0274,
+      "step": 1599
+    },
+    {
+      "epoch": 0.13984944333366112,
+      "grad_norm": 0.14012524485588074,
+      "learning_rate": 9.330980336565887e-05,
+      "loss": 0.0365,
+      "step": 1600
+    },
+    {
+      "epoch": 0.13984944333366112,
+      "eval_loss": 0.04779767617583275,
+      "eval_runtime": 168.5729,
+      "eval_samples_per_second": 29.661,
+      "eval_steps_per_second": 7.415,
+      "step": 1600
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.835480666709688e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null