phospho-app
/

nebo1337-GetTheRubber-l5ydy3nt7g

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 19.931506849315067,
+  "eval_steps": 500,
+  "global_step": 1455,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.136986301369863,
+      "grad_norm": 7.088261127471924,
+      "learning_rate": 2.7397260273972603e-05,
+      "loss": 1.4774,
+      "step": 10
+    },
+    {
+      "epoch": 0.273972602739726,
+      "grad_norm": 3.0658137798309326,
+      "learning_rate": 5.479452054794521e-05,
+      "loss": 0.5535,
+      "step": 20
+    },
+    {
+      "epoch": 0.410958904109589,
+      "grad_norm": 1.415347695350647,
+      "learning_rate": 8.219178082191781e-05,
+      "loss": 0.2769,
+      "step": 30
+    },
+    {
+      "epoch": 0.547945205479452,
+      "grad_norm": 1.2462260723114014,
+      "learning_rate": 0.00010958904109589041,
+      "loss": 0.2071,
+      "step": 40
+    },
+    {
+      "epoch": 0.684931506849315,
+      "grad_norm": 1.1219278573989868,
+      "learning_rate": 0.000136986301369863,
+      "loss": 0.179,
+      "step": 50
+    },
+    {
+      "epoch": 0.821917808219178,
+      "grad_norm": 2.4104228019714355,
+      "learning_rate": 0.00016438356164383562,
+      "loss": 0.1587,
+      "step": 60
+    },
+    {
+      "epoch": 0.958904109589041,
+      "grad_norm": 1.2239787578582764,
+      "learning_rate": 0.0001917808219178082,
+      "loss": 0.1366,
+      "step": 70
+    },
+    {
+      "epoch": 1.095890410958904,
+      "grad_norm": 0.9942715167999268,
+      "learning_rate": 0.00019998733979961563,
+      "loss": 0.1218,
+      "step": 80
+    },
+    {
+      "epoch": 1.2328767123287672,
+      "grad_norm": 0.6293880939483643,
+      "learning_rate": 0.0001999253383717226,
+      "loss": 0.1168,
+      "step": 90
+    },
+    {
+      "epoch": 1.36986301369863,
+      "grad_norm": 0.7170248031616211,
+      "learning_rate": 0.00019981170237143067,
+      "loss": 0.1052,
+      "step": 100
+    },
+    {
+      "epoch": 1.5068493150684932,
+      "grad_norm": 0.7464343905448914,
+      "learning_rate": 0.00019964649051804355,
+      "loss": 0.1066,
+      "step": 110
+    },
+    {
+      "epoch": 1.643835616438356,
+      "grad_norm": 0.6828764081001282,
+      "learning_rate": 0.000199429788181734,
+      "loss": 0.1057,
+      "step": 120
+    },
+    {
+      "epoch": 1.7808219178082192,
+      "grad_norm": 0.6028720736503601,
+      "learning_rate": 0.0001991617073394306,
+      "loss": 0.0843,
+      "step": 130
+    },
+    {
+      "epoch": 1.9178082191780823,
+      "grad_norm": 0.5440357327461243,
+      "learning_rate": 0.00019884238651695556,
+      "loss": 0.0948,
+      "step": 140
+    },
+    {
+      "epoch": 2.0547945205479454,
+      "grad_norm": 0.8612964749336243,
+      "learning_rate": 0.00019847199071744415,
+      "loss": 0.085,
+      "step": 150
+    },
+    {
+      "epoch": 2.191780821917808,
+      "grad_norm": 0.889124870300293,
+      "learning_rate": 0.00019805071133608242,
+      "loss": 0.0962,
+      "step": 160
+    },
+    {
+      "epoch": 2.328767123287671,
+      "grad_norm": 0.45466411113739014,
+      "learning_rate": 0.0001975787660612072,
+      "loss": 0.0763,
+      "step": 170
+    },
+    {
+      "epoch": 2.4657534246575343,
+      "grad_norm": 0.42088282108306885,
+      "learning_rate": 0.00019705639876181969,
+      "loss": 0.0635,
+      "step": 180
+    },
+    {
+      "epoch": 2.602739726027397,
+      "grad_norm": 0.5170985460281372,
+      "learning_rate": 0.00019648387936157068,
+      "loss": 0.0726,
+      "step": 190
+    },
+    {
+      "epoch": 2.73972602739726,
+      "grad_norm": 0.4313249886035919,
+      "learning_rate": 0.00019586150369928245,
+      "loss": 0.0669,
+      "step": 200
+    },
+    {
+      "epoch": 2.8767123287671232,
+      "grad_norm": 0.3355115056037903,
+      "learning_rate": 0.00019518959337607957,
+      "loss": 0.0682,
+      "step": 210
+    },
+    {
+      "epoch": 3.0136986301369864,
+      "grad_norm": 0.34427109360694885,
+      "learning_rate": 0.0001944684955892075,
+      "loss": 0.0638,
+      "step": 220
+    },
+    {
+      "epoch": 3.1506849315068495,
+      "grad_norm": 0.2929873466491699,
+      "learning_rate": 0.0001936985829526247,
+      "loss": 0.0632,
+      "step": 230
+    },
+    {
+      "epoch": 3.287671232876712,
+      "grad_norm": 0.3884938657283783,
+      "learning_rate": 0.00019288025330446126,
+      "loss": 0.0655,
+      "step": 240
+    },
+    {
+      "epoch": 3.4246575342465753,
+      "grad_norm": 0.27399152517318726,
+      "learning_rate": 0.00019201392950144363,
+      "loss": 0.0533,
+      "step": 250
+    },
+    {
+      "epoch": 3.5616438356164384,
+      "grad_norm": 0.2924444079399109,
+      "learning_rate": 0.0001911000592003909,
+      "loss": 0.0589,
+      "step": 260
+    },
+    {
+      "epoch": 3.6986301369863015,
+      "grad_norm": 0.43013861775398254,
+      "learning_rate": 0.00019013911462689668,
+      "loss": 0.0615,
+      "step": 270
+    },
+    {
+      "epoch": 3.8356164383561646,
+      "grad_norm": 0.5247001647949219,
+      "learning_rate": 0.000189131592331315,
+      "loss": 0.0583,
+      "step": 280
+    },
+    {
+      "epoch": 3.9726027397260273,
+      "grad_norm": 0.5796880722045898,
+      "learning_rate": 0.00018807801293217735,
+      "loss": 0.0556,
+      "step": 290
+    },
+    {
+      "epoch": 4.109589041095891,
+      "grad_norm": 0.5179729461669922,
+      "learning_rate": 0.00018697892084717238,
+      "loss": 0.056,
+      "step": 300
+    },
+    {
+      "epoch": 4.2465753424657535,
+      "grad_norm": 0.42960262298583984,
+      "learning_rate": 0.00018583488401182843,
+      "loss": 0.0637,
+      "step": 310
+    },
+    {
+      "epoch": 4.383561643835616,
+      "grad_norm": 0.3196163773536682,
+      "learning_rate": 0.0001846464935860431,
+      "loss": 0.0518,
+      "step": 320
+    },
+    {
+      "epoch": 4.52054794520548,
+      "grad_norm": 0.4424096643924713,
+      "learning_rate": 0.0001834143636486124,
+      "loss": 0.0524,
+      "step": 330
+    },
+    {
+      "epoch": 4.657534246575342,
+      "grad_norm": 0.50010746717453,
+      "learning_rate": 0.00018213913087991685,
+      "loss": 0.0629,
+      "step": 340
+    },
+    {
+      "epoch": 4.794520547945205,
+      "grad_norm": 0.4036540389060974,
+      "learning_rate": 0.00018082145423292868,
+      "loss": 0.0531,
+      "step": 350
+    },
+    {
+      "epoch": 4.931506849315069,
+      "grad_norm": 0.36036092042922974,
+      "learning_rate": 0.0001794620145927101,
+      "loss": 0.0556,
+      "step": 360
+    },
+    {
+      "epoch": 5.068493150684931,
+      "grad_norm": 0.22472509741783142,
+      "learning_rate": 0.00017806151442457827,
+      "loss": 0.0446,
+      "step": 370
+    },
+    {
+      "epoch": 5.205479452054795,
+      "grad_norm": 0.3514921963214874,
+      "learning_rate": 0.00017662067741111974,
+      "loss": 0.0443,
+      "step": 380
+    },
+    {
+      "epoch": 5.342465753424658,
+      "grad_norm": 0.2920095920562744,
+      "learning_rate": 0.00017514024807824055,
+      "loss": 0.0451,
+      "step": 390
+    },
+    {
+      "epoch": 5.47945205479452,
+      "grad_norm": 0.21051590144634247,
+      "learning_rate": 0.00017362099141044626,
+      "loss": 0.0476,
+      "step": 400
+    },
+    {
+      "epoch": 5.616438356164384,
+      "grad_norm": 0.36196619272232056,
+      "learning_rate": 0.00017206369245555036,
+      "loss": 0.0521,
+      "step": 410
+    },
+    {
+      "epoch": 5.7534246575342465,
+      "grad_norm": 0.3503723442554474,
+      "learning_rate": 0.0001704691559190155,
+      "loss": 0.0472,
+      "step": 420
+    },
+    {
+      "epoch": 5.890410958904109,
+      "grad_norm": 0.3881896734237671,
+      "learning_rate": 0.0001688382057481364,
+      "loss": 0.0537,
+      "step": 430
+    },
+    {
+      "epoch": 6.027397260273973,
+      "grad_norm": 0.29409492015838623,
+      "learning_rate": 0.00016717168470628077,
+      "loss": 0.0436,
+      "step": 440
+    },
+    {
+      "epoch": 6.164383561643835,
+      "grad_norm": 0.2455558031797409,
+      "learning_rate": 0.0001654704539374066,
+      "loss": 0.0429,
+      "step": 450
+    },
+    {
+      "epoch": 6.301369863013699,
+      "grad_norm": 0.30749672651290894,
+      "learning_rate": 0.00016373539252108202,
+      "loss": 0.042,
+      "step": 460
+    },
+    {
+      "epoch": 6.438356164383562,
+      "grad_norm": 0.4117829501628876,
+      "learning_rate": 0.00016196739701823716,
+      "loss": 0.0422,
+      "step": 470
+    },
+    {
+      "epoch": 6.575342465753424,
+      "grad_norm": 0.3047957718372345,
+      "learning_rate": 0.00016016738100788297,
+      "loss": 0.0456,
+      "step": 480
+    },
+    {
+      "epoch": 6.712328767123288,
+      "grad_norm": 0.3104310631752014,
+      "learning_rate": 0.00015833627461503595,
+      "loss": 0.0405,
+      "step": 490
+    },
+    {
+      "epoch": 6.8493150684931505,
+      "grad_norm": 0.3713166415691376,
+      "learning_rate": 0.0001564750240300934,
+      "loss": 0.0451,
+      "step": 500
+    },
+    {
+      "epoch": 6.986301369863014,
+      "grad_norm": 0.23804673552513123,
+      "learning_rate": 0.00015458459101990693,
+      "loss": 0.0387,
+      "step": 510
+    },
+    {
+      "epoch": 7.123287671232877,
+      "grad_norm": 0.4476951062679291,
+      "learning_rate": 0.00015266595243080714,
+      "loss": 0.0406,
+      "step": 520
+    },
+    {
+      "epoch": 7.260273972602739,
+      "grad_norm": 0.27973777055740356,
+      "learning_rate": 0.00015072009968383656,
+      "loss": 0.0464,
+      "step": 530
+    },
+    {
+      "epoch": 7.397260273972603,
+      "grad_norm": 0.3597777783870697,
+      "learning_rate": 0.00014874803826245089,
+      "loss": 0.0459,
+      "step": 540
+    },
+    {
+      "epoch": 7.534246575342466,
+      "grad_norm": 0.27027377486228943,
+      "learning_rate": 0.00014675078719295415,
+      "loss": 0.0375,
+      "step": 550
+    },
+    {
+      "epoch": 7.671232876712329,
+      "grad_norm": 0.27681443095207214,
+      "learning_rate": 0.00014472937851793557,
+      "loss": 0.0421,
+      "step": 560
+    },
+    {
+      "epoch": 7.808219178082192,
+      "grad_norm": 0.3312411904335022,
+      "learning_rate": 0.00014268485676298078,
+      "loss": 0.048,
+      "step": 570
+    },
+    {
+      "epoch": 7.945205479452055,
+      "grad_norm": 0.2358381599187851,
+      "learning_rate": 0.0001406182783969324,
+      "loss": 0.0409,
+      "step": 580
+    },
+    {
+      "epoch": 8.082191780821917,
+      "grad_norm": 0.19072838127613068,
+      "learning_rate": 0.00013853071128597924,
+      "loss": 0.0417,
+      "step": 590
+    },
+    {
+      "epoch": 8.219178082191782,
+      "grad_norm": 0.3328644931316376,
+      "learning_rate": 0.0001364232341418564,
+      "loss": 0.0397,
+      "step": 600
+    },
+    {
+      "epoch": 8.356164383561644,
+      "grad_norm": 0.27157458662986755,
+      "learning_rate": 0.00013429693596444067,
+      "loss": 0.0395,
+      "step": 610
+    },
+    {
+      "epoch": 8.493150684931507,
+      "grad_norm": 0.2969032824039459,
+      "learning_rate": 0.00013215291547903006,
+      "loss": 0.0406,
+      "step": 620
+    },
+    {
+      "epoch": 8.63013698630137,
+      "grad_norm": 0.2864357829093933,
+      "learning_rate": 0.00012999228056859784,
+      "loss": 0.0424,
+      "step": 630
+    },
+    {
+      "epoch": 8.767123287671232,
+      "grad_norm": 0.25885725021362305,
+      "learning_rate": 0.00012781614770131442,
+      "loss": 0.0392,
+      "step": 640
+    },
+    {
+      "epoch": 8.904109589041095,
+      "grad_norm": 0.2456735372543335,
+      "learning_rate": 0.00012562564135363313,
+      "loss": 0.0415,
+      "step": 650
+    },
+    {
+      "epoch": 9.04109589041096,
+      "grad_norm": 0.41431066393852234,
+      "learning_rate": 0.0001234218934292376,
+      "loss": 0.0407,
+      "step": 660
+    },
+    {
+      "epoch": 9.178082191780822,
+      "grad_norm": 0.260213702917099,
+      "learning_rate": 0.00012120604267415172,
+      "loss": 0.0393,
+      "step": 670
+    },
+    {
+      "epoch": 9.315068493150685,
+      "grad_norm": 0.3395901322364807,
+      "learning_rate": 0.00011897923408831346,
+      "loss": 0.035,
+      "step": 680
+    },
+    {
+      "epoch": 9.452054794520548,
+      "grad_norm": 0.3405311405658722,
+      "learning_rate": 0.0001167426183339174,
+      "loss": 0.0342,
+      "step": 690
+    },
+    {
+      "epoch": 9.58904109589041,
+      "grad_norm": 0.20802819728851318,
+      "learning_rate": 0.00011449735114083127,
+      "loss": 0.0347,
+      "step": 700
+    },
+    {
+      "epoch": 9.726027397260275,
+      "grad_norm": 0.5094506144523621,
+      "learning_rate": 0.00011224459270939384,
+      "loss": 0.0373,
+      "step": 710
+    },
+    {
+      "epoch": 9.863013698630137,
+      "grad_norm": 0.21799403429031372,
+      "learning_rate": 0.000109985507110903,
+      "loss": 0.0392,
+      "step": 720
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.28433603048324585,
+      "learning_rate": 0.00010772126168610325,
+      "loss": 0.0373,
+      "step": 730
+    },
+    {
+      "epoch": 10.136986301369863,
+      "grad_norm": 0.3425813913345337,
+      "learning_rate": 0.00010545302644198405,
+      "loss": 0.0385,
+      "step": 740
+    },
+    {
+      "epoch": 10.273972602739725,
+      "grad_norm": 0.2662697434425354,
+      "learning_rate": 0.00010318197344720018,
+      "loss": 0.0347,
+      "step": 750
+    },
+    {
+      "epoch": 10.41095890410959,
+      "grad_norm": 0.2841816842556,
+      "learning_rate": 0.0001009092762264271,
+      "loss": 0.04,
+      "step": 760
+    },
+    {
+      "epoch": 10.547945205479452,
+      "grad_norm": 0.2933363914489746,
+      "learning_rate": 9.863610915396365e-05,
+      "loss": 0.0363,
+      "step": 770
+    },
+    {
+      "epoch": 10.684931506849315,
+      "grad_norm": 0.20692330598831177,
+      "learning_rate": 9.63636468468959e-05,
+      "loss": 0.0361,
+      "step": 780
+    },
+    {
+      "epoch": 10.821917808219178,
+      "grad_norm": 0.24741721153259277,
+      "learning_rate": 9.409306355813529e-05,
+      "loss": 0.0341,
+      "step": 790
+    },
+    {
+      "epoch": 10.95890410958904,
+      "grad_norm": 0.1948077529668808,
+      "learning_rate": 9.18255325696454e-05,
+      "loss": 0.0349,
+      "step": 800
+    },
+    {
+      "epoch": 11.095890410958905,
+      "grad_norm": 0.16165360808372498,
+      "learning_rate": 8.956222558616998e-05,
+      "loss": 0.0318,
+      "step": 810
+    },
+    {
+      "epoch": 11.232876712328768,
+      "grad_norm": 0.25702184438705444,
+      "learning_rate": 8.730431212977625e-05,
+      "loss": 0.0281,
+      "step": 820
+    },
+    {
+      "epoch": 11.36986301369863,
+      "grad_norm": 0.27587395906448364,
+      "learning_rate": 8.505295893552594e-05,
+      "loss": 0.0349,
+      "step": 830
+    },
+    {
+      "epoch": 11.506849315068493,
+      "grad_norm": 0.3140430152416229,
+      "learning_rate": 8.280932934858652e-05,
+      "loss": 0.0305,
+      "step": 840
+    },
+    {
+      "epoch": 11.643835616438356,
+      "grad_norm": 0.21165433526039124,
+      "learning_rate": 8.05745827230941e-05,
+      "loss": 0.0314,
+      "step": 850
+    },
+    {
+      "epoch": 11.780821917808218,
+      "grad_norm": 0.20445489883422852,
+      "learning_rate": 7.834987382307861e-05,
+      "loss": 0.0319,
+      "step": 860
+    },
+    {
+      "epoch": 11.917808219178083,
+      "grad_norm": 0.27832481265068054,
+      "learning_rate": 7.613635222576072e-05,
+      "loss": 0.0334,
+      "step": 870
+    },
+    {
+      "epoch": 12.054794520547945,
+      "grad_norm": 0.25728923082351685,
+      "learning_rate": 7.393516172752919e-05,
+      "loss": 0.033,
+      "step": 880
+    },
+    {
+      "epoch": 12.191780821917808,
+      "grad_norm": 0.2254086136817932,
+      "learning_rate": 7.174743975290513e-05,
+      "loss": 0.0346,
+      "step": 890
+    },
+    {
+      "epoch": 12.32876712328767,
+      "grad_norm": 0.31018713116645813,
+      "learning_rate": 6.957431676679896e-05,
+      "loss": 0.0329,
+      "step": 900
+    },
+    {
+      "epoch": 12.465753424657533,
+      "grad_norm": 0.32662343978881836,
+      "learning_rate": 6.741691569036338e-05,
+      "loss": 0.0342,
+      "step": 910
+    },
+    {
+      "epoch": 12.602739726027398,
+      "grad_norm": 0.2533169984817505,
+      "learning_rate": 6.527635132074493e-05,
+      "loss": 0.0264,
+      "step": 920
+    },
+    {
+      "epoch": 12.73972602739726,
+      "grad_norm": 0.27445635199546814,
+      "learning_rate": 6.315372975503285e-05,
+      "loss": 0.0281,
+      "step": 930
+    },
+    {
+      "epoch": 12.876712328767123,
+      "grad_norm": 0.21471256017684937,
+      "learning_rate": 6.1050147818704e-05,
+      "loss": 0.0321,
+      "step": 940
+    },
+    {
+      "epoch": 13.013698630136986,
+      "grad_norm": 0.19105984270572662,
+      "learning_rate": 5.896669249885851e-05,
+      "loss": 0.0273,
+      "step": 950
+    },
+    {
+      "epoch": 13.150684931506849,
+      "grad_norm": 0.3308360278606415,
+      "learning_rate": 5.690444038253935e-05,
+      "loss": 0.0343,
+      "step": 960
+    },
+    {
+      "epoch": 13.287671232876713,
+      "grad_norm": 0.1988590806722641,
+      "learning_rate": 5.4864457100425783e-05,
+      "loss": 0.028,
+      "step": 970
+    },
+    {
+      "epoch": 13.424657534246576,
+      "grad_norm": 0.1858794391155243,
+      "learning_rate": 5.284779677618841e-05,
+      "loss": 0.0273,
+      "step": 980
+    },
+    {
+      "epoch": 13.561643835616438,
+      "grad_norm": 0.29671627283096313,
+      "learning_rate": 5.0855501481790305e-05,
+      "loss": 0.0271,
+      "step": 990
+    },
+    {
+      "epoch": 13.698630136986301,
+      "grad_norm": 0.17693527042865753,
+      "learning_rate": 4.8888600699015496e-05,
+      "loss": 0.034,
+      "step": 1000
+    },
+    {
+      "epoch": 13.835616438356164,
+      "grad_norm": 0.31038013100624084,
+      "learning_rate": 4.694811078750338e-05,
+      "loss": 0.0251,
+      "step": 1010
+    },
+    {
+      "epoch": 13.972602739726028,
+      "grad_norm": 0.3317829668521881,
+      "learning_rate": 4.50350344595635e-05,
+      "loss": 0.0334,
+      "step": 1020
+    },
+    {
+      "epoch": 14.10958904109589,
+      "grad_norm": 0.1818408966064453,
+      "learning_rate": 4.315036026204262e-05,
+      "loss": 0.0272,
+      "step": 1030
+    },
+    {
+      "epoch": 14.246575342465754,
+      "grad_norm": 0.2105715572834015,
+      "learning_rate": 4.129506206551138e-05,
+      "loss": 0.025,
+      "step": 1040
+    },
+    {
+      "epoch": 14.383561643835616,
+      "grad_norm": 0.18613150715827942,
+      "learning_rate": 3.947009856103465e-05,
+      "loss": 0.0238,
+      "step": 1050
+    },
+    {
+      "epoch": 14.520547945205479,
+      "grad_norm": 0.2959461212158203,
+      "learning_rate": 3.767641276478563e-05,
+      "loss": 0.0249,
+      "step": 1060
+    },
+    {
+      "epoch": 14.657534246575342,
+      "grad_norm": 0.18495745956897736,
+      "learning_rate": 3.591493153075966e-05,
+      "loss": 0.0214,
+      "step": 1070
+    },
+    {
+      "epoch": 14.794520547945206,
+      "grad_norm": 0.1501263529062271,
+      "learning_rate": 3.41865650718396e-05,
+      "loss": 0.0266,
+      "step": 1080
+    },
+    {
+      "epoch": 14.931506849315069,
+      "grad_norm": 0.3387095332145691,
+      "learning_rate": 3.24922064894601e-05,
+      "loss": 0.0268,
+      "step": 1090
+    },
+    {
+      "epoch": 15.068493150684931,
+      "grad_norm": 0.23434942960739136,
+      "learning_rate": 3.083273131211382e-05,
+      "loss": 0.0272,
+      "step": 1100
+    },
+    {
+      "epoch": 15.205479452054794,
+      "grad_norm": 0.163187175989151,
+      "learning_rate": 2.920899704293849e-05,
+      "loss": 0.0232,
+      "step": 1110
+    },
+    {
+      "epoch": 15.342465753424657,
+      "grad_norm": 0.20000265538692474,
+      "learning_rate": 2.762184271661785e-05,
+      "loss": 0.0261,
+      "step": 1120
+    },
+    {
+      "epoch": 15.479452054794521,
+      "grad_norm": 0.18943333625793457,
+      "learning_rate": 2.6072088465826038e-05,
+      "loss": 0.0246,
+      "step": 1130
+    },
+    {
+      "epoch": 15.616438356164384,
+      "grad_norm": 0.2833252251148224,
+      "learning_rate": 2.4560535097439108e-05,
+      "loss": 0.0253,
+      "step": 1140
+    },
+    {
+      "epoch": 15.753424657534246,
+      "grad_norm": 0.1302843540906906,
+      "learning_rate": 2.308796367873296e-05,
+      "loss": 0.0246,
+      "step": 1150
+    },
+    {
+      "epoch": 15.89041095890411,
+      "grad_norm": 0.16615238785743713,
+      "learning_rate": 2.165513513378121e-05,
+      "loss": 0.0254,
+      "step": 1160
+    },
+    {
+      "epoch": 16.027397260273972,
+      "grad_norm": 0.17113815248012543,
+      "learning_rate": 2.0262789850261798e-05,
+      "loss": 0.0288,
+      "step": 1170
+    },
+    {
+      "epoch": 16.164383561643834,
+      "grad_norm": 0.21394069492816925,
+      "learning_rate": 1.8911647296875147e-05,
+      "loss": 0.025,
+      "step": 1180
+    },
+    {
+      "epoch": 16.301369863013697,
+      "grad_norm": 0.2763649523258209,
+      "learning_rate": 1.7602405651572275e-05,
+      "loss": 0.0219,
+      "step": 1190
+    },
+    {
+      "epoch": 16.438356164383563,
+      "grad_norm": 0.13925646245479584,
+      "learning_rate": 1.6335741440784035e-05,
+      "loss": 0.0217,
+      "step": 1200
+    },
+    {
+      "epoch": 16.575342465753426,
+      "grad_norm": 0.20826192200183868,
+      "learning_rate": 1.511230918983867e-05,
+      "loss": 0.023,
+      "step": 1210
+    },
+    {
+      "epoch": 16.71232876712329,
+      "grad_norm": 0.2256271094083786,
+      "learning_rate": 1.3932741084747913e-05,
+      "loss": 0.023,
+      "step": 1220
+    },
+    {
+      "epoch": 16.84931506849315,
+      "grad_norm": 0.27016547322273254,
+      "learning_rate": 1.2797646645536566e-05,
+      "loss": 0.0211,
+      "step": 1230
+    },
+    {
+      "epoch": 16.986301369863014,
+      "grad_norm": 0.26627489924430847,
+      "learning_rate": 1.1707612411284253e-05,
+      "loss": 0.0235,
+      "step": 1240
+    },
+    {
+      "epoch": 17.123287671232877,
+      "grad_norm": 0.18498767912387848,
+      "learning_rate": 1.0663201637042252e-05,
+      "loss": 0.022,
+      "step": 1250
+    },
+    {
+      "epoch": 17.26027397260274,
+      "grad_norm": 0.23852607607841492,
+      "learning_rate": 9.664954002781745e-06,
+      "loss": 0.0228,
+      "step": 1260
+    },
+    {
+      "epoch": 17.397260273972602,
+      "grad_norm": 0.15411531925201416,
+      "learning_rate": 8.713385334524283e-06,
+      "loss": 0.0198,
+      "step": 1270
+    },
+    {
+      "epoch": 17.534246575342465,
+      "grad_norm": 0.25403866171836853,
+      "learning_rate": 7.808987337798158e-06,
+      "loss": 0.0257,
+      "step": 1280
+    },
+    {
+      "epoch": 17.671232876712327,
+      "grad_norm": 0.14403975009918213,
+      "learning_rate": 6.952227343558671e-06,
+      "loss": 0.0215,
+      "step": 1290
+    },
+    {
+      "epoch": 17.80821917808219,
+      "grad_norm": 0.188527911901474,
+      "learning_rate": 6.143548066703475e-06,
+      "loss": 0.0224,
+      "step": 1300
+    },
+    {
+      "epoch": 17.945205479452056,
+      "grad_norm": 0.1309424489736557,
+      "learning_rate": 5.383367377307857e-06,
+      "loss": 0.0215,
+      "step": 1310
+    },
+    {
+      "epoch": 18.08219178082192,
+      "grad_norm": 0.11233002692461014,
+      "learning_rate": 4.672078084698095e-06,
+      "loss": 0.0211,
+      "step": 1320
+    },
+    {
+      "epoch": 18.21917808219178,
+      "grad_norm": 0.22869743406772614,
+      "learning_rate": 4.010047734474454e-06,
+      "loss": 0.0215,
+      "step": 1330
+    },
+    {
+      "epoch": 18.356164383561644,
+      "grad_norm": 0.11979719996452332,
+      "learning_rate": 3.397618418588877e-06,
+      "loss": 0.0273,
+      "step": 1340
+    },
+    {
+      "epoch": 18.493150684931507,
+      "grad_norm": 0.2112375795841217,
+      "learning_rate": 2.8351065985751766e-06,
+      "loss": 0.0228,
+      "step": 1350
+    },
+    {
+      "epoch": 18.63013698630137,
+      "grad_norm": 0.14134034514427185,
+      "learning_rate": 2.322802942023461e-06,
+      "loss": 0.0247,
+      "step": 1360
+    },
+    {
+      "epoch": 18.767123287671232,
+      "grad_norm": 0.09884881973266602,
+      "learning_rate": 1.8609721723830132e-06,
+      "loss": 0.0196,
+      "step": 1370
+    },
+    {
+      "epoch": 18.904109589041095,
+      "grad_norm": 0.14044946432113647,
+      "learning_rate": 1.4498529321713584e-06,
+      "loss": 0.0198,
+      "step": 1380
+    },
+    {
+      "epoch": 19.041095890410958,
+      "grad_norm": 0.13853876292705536,
+      "learning_rate": 1.0896576596600705e-06,
+      "loss": 0.0182,
+      "step": 1390
+    },
+    {
+      "epoch": 19.17808219178082,
+      "grad_norm": 0.1654110848903656,
+      "learning_rate": 7.80572479101327e-07,
+      "loss": 0.0229,
+      "step": 1400
+    },
+    {
+      "epoch": 19.315068493150687,
+      "grad_norm": 0.15151838958263397,
+      "learning_rate": 5.227571045515633e-07,
+      "loss": 0.0202,
+      "step": 1410
+    },
+    {
+      "epoch": 19.45205479452055,
+      "grad_norm": 0.2258201688528061,
+      "learning_rate": 3.163447573422351e-07,
+      "loss": 0.0197,
+      "step": 1420
+    },
+    {
+      "epoch": 19.589041095890412,
+      "grad_norm": 0.24640779197216034,
+      "learning_rate": 1.614420972401165e-07,
+      "loss": 0.0187,
+      "step": 1430
+    },
+    {
+      "epoch": 19.726027397260275,
+      "grad_norm": 0.21181590855121613,
+      "learning_rate": 5.812916733284324e-08,
+      "loss": 0.0198,
+      "step": 1440
+    },
+    {
+      "epoch": 19.863013698630137,
+      "grad_norm": 0.14787183701992035,
+      "learning_rate": 6.459352668164442e-09,
+      "loss": 0.0186,
+      "step": 1450
+    },
+    {
+      "epoch": 19.931506849315067,
+      "step": 1455,
+      "total_flos": 1.1504025698630573e+17,
+      "train_loss": 0.05927258820058554,
+      "train_runtime": 1048.7401,
+      "train_samples_per_second": 88.792,
+      "train_steps_per_second": 1.387
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1455,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1504025698630573e+17,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}