{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 200, "global_step": 729, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004117344312918168, "grad_norm": 3.3125, "learning_rate": 0.0, "loss": 1.7239, "step": 1 }, { "epoch": 0.020586721564590838, "grad_norm": 2.265625, "learning_rate": 1.0958904109589042e-05, "loss": 1.725, "step": 5 }, { "epoch": 0.041173443129181676, "grad_norm": 1.6640625, "learning_rate": 2.4657534246575342e-05, "loss": 1.6554, "step": 10 }, { "epoch": 0.06176016469377252, "grad_norm": 1.65625, "learning_rate": 3.8356164383561644e-05, "loss": 1.5948, "step": 15 }, { "epoch": 0.08234688625836335, "grad_norm": 1.8203125, "learning_rate": 5.2054794520547945e-05, "loss": 1.5621, "step": 20 }, { "epoch": 0.1029336078229542, "grad_norm": 1.8359375, "learning_rate": 6.575342465753424e-05, "loss": 1.5373, "step": 25 }, { "epoch": 0.12352032938754504, "grad_norm": 1.625, "learning_rate": 7.945205479452055e-05, "loss": 1.4909, "step": 30 }, { "epoch": 0.14410705095213588, "grad_norm": 1.3046875, "learning_rate": 9.315068493150684e-05, "loss": 1.4503, "step": 35 }, { "epoch": 0.1646937725167267, "grad_norm": 1.125, "learning_rate": 0.00010684931506849317, "loss": 1.4406, "step": 40 }, { "epoch": 0.18528049408131755, "grad_norm": 1.09375, "learning_rate": 0.00012054794520547945, "loss": 1.446, "step": 45 }, { "epoch": 0.2058672156459084, "grad_norm": 1.5078125, "learning_rate": 0.00013424657534246576, "loss": 1.4431, "step": 50 }, { "epoch": 0.22645393721049922, "grad_norm": 1.0078125, "learning_rate": 0.00014794520547945205, "loss": 1.4659, "step": 55 }, { "epoch": 0.24704065877509007, "grad_norm": 1.359375, "learning_rate": 0.00016164383561643837, "loss": 1.4371, "step": 60 }, { "epoch": 0.2676273803396809, "grad_norm": 1.2109375, "learning_rate": 0.00017534246575342467, "loss": 1.4631, "step": 65 }, { "epoch": 0.28821410190427177, "grad_norm": 1.4765625, "learning_rate": 0.00018904109589041096, "loss": 1.4515, "step": 70 }, { "epoch": 0.30880082346886256, "grad_norm": 1.1953125, "learning_rate": 0.00019999885326982427, "loss": 1.4602, "step": 75 }, { "epoch": 0.3293875450334534, "grad_norm": 0.9609375, "learning_rate": 0.00019995872047508514, "loss": 1.4369, "step": 80 }, { "epoch": 0.34997426659804426, "grad_norm": 0.953125, "learning_rate": 0.00019986127746865, "loss": 1.454, "step": 85 }, { "epoch": 0.3705609881626351, "grad_norm": 0.8671875, "learning_rate": 0.00019970658011837404, "loss": 1.4406, "step": 90 }, { "epoch": 0.39114770972722596, "grad_norm": 0.8046875, "learning_rate": 0.00019949471711824872, "loss": 1.445, "step": 95 }, { "epoch": 0.4117344312918168, "grad_norm": 0.83203125, "learning_rate": 0.0001992258099375498, "loss": 1.4431, "step": 100 }, { "epoch": 0.4323211528564076, "grad_norm": 0.68359375, "learning_rate": 0.00019890001275119453, "loss": 1.46, "step": 105 }, { "epoch": 0.45290787442099845, "grad_norm": 0.86328125, "learning_rate": 0.000198517512351347, "loss": 1.4477, "step": 110 }, { "epoch": 0.4734945959855893, "grad_norm": 0.70703125, "learning_rate": 0.00019807852804032305, "loss": 1.4202, "step": 115 }, { "epoch": 0.49408131755018014, "grad_norm": 0.69921875, "learning_rate": 0.00019758331150485575, "loss": 1.4393, "step": 120 }, { "epoch": 0.514668039114771, "grad_norm": 0.69921875, "learning_rate": 0.00019703214667179353, "loss": 1.4277, "step": 125 }, { "epoch": 0.5352547606793618, "grad_norm": 0.7578125, "learning_rate": 0.0001964253495453141, "loss": 1.4421, "step": 130 }, { "epoch": 0.5558414822439527, "grad_norm": 0.61328125, "learning_rate": 0.0001957632680257468, "loss": 1.4553, "step": 135 }, { "epoch": 0.5764282038085435, "grad_norm": 0.6640625, "learning_rate": 0.0001950462817101079, "loss": 1.4364, "step": 140 }, { "epoch": 0.5970149253731343, "grad_norm": 0.6171875, "learning_rate": 0.000194274801674463, "loss": 1.4269, "step": 145 }, { "epoch": 0.6176016469377251, "grad_norm": 0.625, "learning_rate": 0.0001934492702382411, "loss": 1.4327, "step": 150 }, { "epoch": 0.638188368502316, "grad_norm": 0.6640625, "learning_rate": 0.0001925701607106357, "loss": 1.4219, "step": 155 }, { "epoch": 0.6587750900669068, "grad_norm": 0.6796875, "learning_rate": 0.00019163797711923823, "loss": 1.417, "step": 160 }, { "epoch": 0.6793618116314977, "grad_norm": 0.64453125, "learning_rate": 0.0001906532539210597, "loss": 1.4277, "step": 165 }, { "epoch": 0.6999485331960885, "grad_norm": 0.62109375, "learning_rate": 0.00018961655569610557, "loss": 1.407, "step": 170 }, { "epoch": 0.7205352547606794, "grad_norm": 0.640625, "learning_rate": 0.0001885284768236801, "loss": 1.4221, "step": 175 }, { "epoch": 0.7411219763252702, "grad_norm": 0.58984375, "learning_rate": 0.00018738964114160583, "loss": 1.4035, "step": 180 }, { "epoch": 0.7617086978898611, "grad_norm": 0.58984375, "learning_rate": 0.00018620070158855301, "loss": 1.4082, "step": 185 }, { "epoch": 0.7822954194544519, "grad_norm": 0.59375, "learning_rate": 0.00018496233982968457, "loss": 1.408, "step": 190 }, { "epoch": 0.8028821410190428, "grad_norm": 0.58984375, "learning_rate": 0.00018367526586583098, "loss": 1.4207, "step": 195 }, { "epoch": 0.8234688625836336, "grad_norm": 0.62109375, "learning_rate": 0.00018234021762641945, "loss": 1.4151, "step": 200 }, { "epoch": 0.8234688625836336, "eval_loss": 1.4270434379577637, "eval_runtime": 300.5446, "eval_samples_per_second": 22.978, "eval_steps_per_second": 1.437, "step": 200 }, { "epoch": 0.8440555841482243, "grad_norm": 0.65234375, "learning_rate": 0.00018095796054639013, "loss": 1.4259, "step": 205 }, { "epoch": 0.8646423057128152, "grad_norm": 0.57421875, "learning_rate": 0.00017952928712734268, "loss": 1.3941, "step": 210 }, { "epoch": 0.885229027277406, "grad_norm": 0.59765625, "learning_rate": 0.00017805501648316424, "loss": 1.3959, "step": 215 }, { "epoch": 0.9058157488419969, "grad_norm": 0.609375, "learning_rate": 0.00017653599387039992, "loss": 1.4122, "step": 220 }, { "epoch": 0.9264024704065877, "grad_norm": 0.62109375, "learning_rate": 0.00017497309020363417, "loss": 1.4074, "step": 225 }, { "epoch": 0.9469891919711786, "grad_norm": 0.54296875, "learning_rate": 0.00017336720155616185, "loss": 1.421, "step": 230 }, { "epoch": 0.9675759135357694, "grad_norm": 0.61328125, "learning_rate": 0.00017171924864623454, "loss": 1.4146, "step": 235 }, { "epoch": 0.9881626351003603, "grad_norm": 0.52734375, "learning_rate": 0.0001700301763091771, "loss": 1.3834, "step": 240 }, { "epoch": 1.0082346886258364, "grad_norm": 0.7890625, "learning_rate": 0.00016830095295567693, "loss": 1.3598, "step": 245 }, { "epoch": 1.0288214101904272, "grad_norm": 0.875, "learning_rate": 0.00016653257001655652, "loss": 1.2955, "step": 250 }, { "epoch": 1.0494081317550181, "grad_norm": 0.5703125, "learning_rate": 0.00016472604137434784, "loss": 1.2763, "step": 255 }, { "epoch": 1.0699948533196089, "grad_norm": 0.6015625, "learning_rate": 0.00016288240278199395, "loss": 1.2821, "step": 260 }, { "epoch": 1.0905815748841996, "grad_norm": 0.5390625, "learning_rate": 0.00016100271126901175, "loss": 1.2932, "step": 265 }, { "epoch": 1.1111682964487906, "grad_norm": 0.54296875, "learning_rate": 0.00015908804453545607, "loss": 1.2985, "step": 270 }, { "epoch": 1.1317550180133813, "grad_norm": 0.54296875, "learning_rate": 0.0001571395003340323, "loss": 1.2692, "step": 275 }, { "epoch": 1.1523417395779723, "grad_norm": 0.5546875, "learning_rate": 0.00015515819584071216, "loss": 1.2829, "step": 280 }, { "epoch": 1.172928461142563, "grad_norm": 0.50390625, "learning_rate": 0.0001531452670142135, "loss": 1.2881, "step": 285 }, { "epoch": 1.193515182707154, "grad_norm": 0.53125, "learning_rate": 0.00015110186794471103, "loss": 1.2853, "step": 290 }, { "epoch": 1.2141019042717447, "grad_norm": 0.578125, "learning_rate": 0.00014902917019215164, "loss": 1.2629, "step": 295 }, { "epoch": 1.2346886258363357, "grad_norm": 0.53515625, "learning_rate": 0.00014692836211455373, "loss": 1.284, "step": 300 }, { "epoch": 1.2552753474009264, "grad_norm": 0.498046875, "learning_rate": 0.00014480064818667528, "loss": 1.2776, "step": 305 }, { "epoch": 1.2758620689655173, "grad_norm": 0.5546875, "learning_rate": 0.000142647248309442, "loss": 1.2677, "step": 310 }, { "epoch": 1.296448790530108, "grad_norm": 0.5625, "learning_rate": 0.0001404693971105306, "loss": 1.2838, "step": 315 }, { "epoch": 1.3170355120946988, "grad_norm": 0.58203125, "learning_rate": 0.000138268343236509, "loss": 1.2875, "step": 320 }, { "epoch": 1.3376222336592898, "grad_norm": 0.5234375, "learning_rate": 0.00013604534863693868, "loss": 1.2793, "step": 325 }, { "epoch": 1.3582089552238805, "grad_norm": 0.52734375, "learning_rate": 0.00013380168784085027, "loss": 1.289, "step": 330 }, { "epoch": 1.3787956767884715, "grad_norm": 0.51953125, "learning_rate": 0.0001315386472260063, "loss": 1.2902, "step": 335 }, { "epoch": 1.3993823983530622, "grad_norm": 0.50390625, "learning_rate": 0.00012925752428137125, "loss": 1.2931, "step": 340 }, { "epoch": 1.4199691199176532, "grad_norm": 0.470703125, "learning_rate": 0.0001269596268632105, "loss": 1.2757, "step": 345 }, { "epoch": 1.440555841482244, "grad_norm": 0.52734375, "learning_rate": 0.00012464627244524593, "loss": 1.2884, "step": 350 }, { "epoch": 1.4611425630468349, "grad_norm": 0.51171875, "learning_rate": 0.00012231878736329683, "loss": 1.2945, "step": 355 }, { "epoch": 1.4817292846114256, "grad_norm": 0.49609375, "learning_rate": 0.00011997850605484033, "loss": 1.2876, "step": 360 }, { "epoch": 1.5023160061760166, "grad_norm": 0.51171875, "learning_rate": 0.00011762677029392652, "loss": 1.2849, "step": 365 }, { "epoch": 1.5229027277406073, "grad_norm": 0.482421875, "learning_rate": 0.00011526492842188745, "loss": 1.2768, "step": 370 }, { "epoch": 1.543489449305198, "grad_norm": 0.49609375, "learning_rate": 0.0001128943345742806, "loss": 1.2785, "step": 375 }, { "epoch": 1.564076170869789, "grad_norm": 0.494140625, "learning_rate": 0.0001105163479045106, "loss": 1.2818, "step": 380 }, { "epoch": 1.58466289243438, "grad_norm": 0.462890625, "learning_rate": 0.00010813233180457367, "loss": 1.2722, "step": 385 }, { "epoch": 1.6052496139989707, "grad_norm": 0.470703125, "learning_rate": 0.00010574365312337235, "loss": 1.2864, "step": 390 }, { "epoch": 1.6258363355635614, "grad_norm": 0.451171875, "learning_rate": 0.00010335168138304776, "loss": 1.2681, "step": 395 }, { "epoch": 1.6464230571281524, "grad_norm": 0.5, "learning_rate": 0.00010095778799377959, "loss": 1.2891, "step": 400 }, { "epoch": 1.6464230571281524, "eval_loss": 1.3904892206192017, "eval_runtime": 300.5338, "eval_samples_per_second": 22.979, "eval_steps_per_second": 1.437, "step": 400 }, { "epoch": 1.6670097786927431, "grad_norm": 0.478515625, "learning_rate": 9.856334546750349e-05, "loss": 1.2842, "step": 405 }, { "epoch": 1.6875965002573339, "grad_norm": 0.458984375, "learning_rate": 9.616972663099647e-05, "loss": 1.2671, "step": 410 }, { "epoch": 1.7081832218219248, "grad_norm": 0.45703125, "learning_rate": 9.377830383878246e-05, "loss": 1.2617, "step": 415 }, { "epoch": 1.7287699433865158, "grad_norm": 0.453125, "learning_rate": 9.139044818630784e-05, "loss": 1.2609, "step": 420 }, { "epoch": 1.7493566649511065, "grad_norm": 0.435546875, "learning_rate": 8.90075287238395e-05, "loss": 1.2686, "step": 425 }, { "epoch": 1.7699433865156973, "grad_norm": 0.458984375, "learning_rate": 8.663091167153516e-05, "loss": 1.2603, "step": 430 }, { "epoch": 1.7905301080802882, "grad_norm": 0.4375, "learning_rate": 8.426195963613626e-05, "loss": 1.2728, "step": 435 }, { "epoch": 1.8111168296448792, "grad_norm": 0.4375, "learning_rate": 8.190203082973272e-05, "loss": 1.2503, "step": 440 }, { "epoch": 1.83170355120947, "grad_norm": 0.462890625, "learning_rate": 7.955247829104738e-05, "loss": 1.2781, "step": 445 }, { "epoch": 1.8522902727740607, "grad_norm": 0.435546875, "learning_rate": 7.721464910968627e-05, "loss": 1.2635, "step": 450 }, { "epoch": 1.8728769943386516, "grad_norm": 0.45703125, "learning_rate": 7.488988365379984e-05, "loss": 1.262, "step": 455 }, { "epoch": 1.8934637159032424, "grad_norm": 0.423828125, "learning_rate": 7.25795148015982e-05, "loss": 1.24, "step": 460 }, { "epoch": 1.914050437467833, "grad_norm": 0.431640625, "learning_rate": 7.028486717715993e-05, "loss": 1.2687, "step": 465 }, { "epoch": 1.934637159032424, "grad_norm": 0.443359375, "learning_rate": 6.800725639097411e-05, "loss": 1.2644, "step": 470 }, { "epoch": 1.955223880597015, "grad_norm": 0.4375, "learning_rate": 6.574798828564948e-05, "loss": 1.2602, "step": 475 }, { "epoch": 1.9758106021616058, "grad_norm": 0.4375, "learning_rate": 6.35083581872245e-05, "loss": 1.271, "step": 480 }, { "epoch": 1.9963973237261965, "grad_norm": 0.42578125, "learning_rate": 6.128965016250637e-05, "loss": 1.253, "step": 485 }, { "epoch": 2.016469377251673, "grad_norm": 0.6875, "learning_rate": 5.909313628286601e-05, "loss": 1.1933, "step": 490 }, { "epoch": 2.0370560988162634, "grad_norm": 0.54296875, "learning_rate": 5.692007589491014e-05, "loss": 1.179, "step": 495 }, { "epoch": 2.0576428203808543, "grad_norm": 0.5390625, "learning_rate": 5.477171489844881e-05, "loss": 1.1669, "step": 500 }, { "epoch": 2.0782295419454453, "grad_norm": 0.443359375, "learning_rate": 5.2649285032173045e-05, "loss": 1.1673, "step": 505 }, { "epoch": 2.0988162635100363, "grad_norm": 0.4375, "learning_rate": 5.055400316745096e-05, "loss": 1.1703, "step": 510 }, { "epoch": 2.1194029850746268, "grad_norm": 0.427734375, "learning_rate": 4.848707061064849e-05, "loss": 1.1856, "step": 515 }, { "epoch": 2.1399897066392177, "grad_norm": 0.41015625, "learning_rate": 4.64496724143736e-05, "loss": 1.1569, "step": 520 }, { "epoch": 2.1605764282038087, "grad_norm": 0.419921875, "learning_rate": 4.444297669803981e-05, "loss": 1.1611, "step": 525 }, { "epoch": 2.181163149768399, "grad_norm": 0.421875, "learning_rate": 4.2468133978137945e-05, "loss": 1.1738, "step": 530 }, { "epoch": 2.20174987133299, "grad_norm": 0.40234375, "learning_rate": 4.052627650860057e-05, "loss": 1.1708, "step": 535 }, { "epoch": 2.222336592897581, "grad_norm": 0.39453125, "learning_rate": 3.861851763163665e-05, "loss": 1.1789, "step": 540 }, { "epoch": 2.242923314462172, "grad_norm": 0.416015625, "learning_rate": 3.6745951139409395e-05, "loss": 1.1662, "step": 545 }, { "epoch": 2.2635100360267626, "grad_norm": 0.41015625, "learning_rate": 3.4909650646922896e-05, "loss": 1.1598, "step": 550 }, { "epoch": 2.2840967575913536, "grad_norm": 0.396484375, "learning_rate": 3.3110668976476746e-05, "loss": 1.1604, "step": 555 }, { "epoch": 2.3046834791559445, "grad_norm": 0.40234375, "learning_rate": 3.1350037554042446e-05, "loss": 1.1548, "step": 560 }, { "epoch": 2.325270200720535, "grad_norm": 0.388671875, "learning_rate": 2.9628765817906667e-05, "loss": 1.1779, "step": 565 }, { "epoch": 2.345856922285126, "grad_norm": 0.3984375, "learning_rate": 2.794784063992131e-05, "loss": 1.1664, "step": 570 }, { "epoch": 2.366443643849717, "grad_norm": 0.404296875, "learning_rate": 2.6308225759691428e-05, "loss": 1.1706, "step": 575 }, { "epoch": 2.387030365414308, "grad_norm": 0.40625, "learning_rate": 2.471086123202602e-05, "loss": 1.1739, "step": 580 }, { "epoch": 2.4076170869788984, "grad_norm": 0.392578125, "learning_rate": 2.3156662887968207e-05, "loss": 1.1613, "step": 585 }, { "epoch": 2.4282038085434894, "grad_norm": 0.390625, "learning_rate": 2.164652180971358e-05, "loss": 1.1611, "step": 590 }, { "epoch": 2.4487905301080803, "grad_norm": 0.392578125, "learning_rate": 2.0181303819718457e-05, "loss": 1.1646, "step": 595 }, { "epoch": 2.4693772516726713, "grad_norm": 0.388671875, "learning_rate": 1.8761848984290064e-05, "loss": 1.1677, "step": 600 }, { "epoch": 2.4693772516726713, "eval_loss": 1.3902530670166016, "eval_runtime": 300.5428, "eval_samples_per_second": 22.978, "eval_steps_per_second": 1.437, "step": 600 }, { "epoch": 2.489963973237262, "grad_norm": 0.380859375, "learning_rate": 1.7388971131944033e-05, "loss": 1.1783, "step": 605 }, { "epoch": 2.510550694801853, "grad_norm": 0.39453125, "learning_rate": 1.6063457386805004e-05, "loss": 1.1652, "step": 610 }, { "epoch": 2.5311374163664437, "grad_norm": 0.396484375, "learning_rate": 1.4786067717317743e-05, "loss": 1.1578, "step": 615 }, { "epoch": 2.5517241379310347, "grad_norm": 0.390625, "learning_rate": 1.3557534500527769e-05, "loss": 1.1665, "step": 620 }, { "epoch": 2.572310859495625, "grad_norm": 0.3828125, "learning_rate": 1.2378562102181179e-05, "loss": 1.1662, "step": 625 }, { "epoch": 2.592897581060216, "grad_norm": 0.3828125, "learning_rate": 1.1249826472884572e-05, "loss": 1.1713, "step": 630 }, { "epoch": 2.613484302624807, "grad_norm": 0.39453125, "learning_rate": 1.0171974760556114e-05, "loss": 1.1724, "step": 635 }, { "epoch": 2.6340710241893976, "grad_norm": 0.396484375, "learning_rate": 9.145624939390762e-06, "loss": 1.1559, "step": 640 }, { "epoch": 2.6546577457539886, "grad_norm": 0.380859375, "learning_rate": 8.171365455551506e-06, "loss": 1.1768, "step": 645 }, { "epoch": 2.6752444673185796, "grad_norm": 0.388671875, "learning_rate": 7.249754889790539e-06, "loss": 1.1638, "step": 650 }, { "epoch": 2.6958311888831705, "grad_norm": 0.3828125, "learning_rate": 6.38132163719305e-06, "loss": 1.1572, "step": 655 }, { "epoch": 2.716417910447761, "grad_norm": 0.384765625, "learning_rate": 5.56656360422797e-06, "loss": 1.1696, "step": 660 }, { "epoch": 2.737004632012352, "grad_norm": 0.376953125, "learning_rate": 4.805947923278864e-06, "loss": 1.1778, "step": 665 }, { "epoch": 2.757591353576943, "grad_norm": 0.38671875, "learning_rate": 4.099910684818698e-06, "loss": 1.177, "step": 670 }, { "epoch": 2.7781780751415335, "grad_norm": 0.380859375, "learning_rate": 3.4488566873824513e-06, "loss": 1.176, "step": 675 }, { "epoch": 2.7987647967061244, "grad_norm": 0.3828125, "learning_rate": 2.853159205480216e-06, "loss": 1.1635, "step": 680 }, { "epoch": 2.8193515182707154, "grad_norm": 0.380859375, "learning_rate": 2.3131597755845946e-06, "loss": 1.1825, "step": 685 }, { "epoch": 2.8399382398353064, "grad_norm": 0.3828125, "learning_rate": 1.8291680003145073e-06, "loss": 1.1639, "step": 690 }, { "epoch": 2.8605249613998973, "grad_norm": 0.380859375, "learning_rate": 1.4014613709280145e-06, "loss": 1.1742, "step": 695 }, { "epoch": 2.881111682964488, "grad_norm": 0.384765625, "learning_rate": 1.0302851082258369e-06, "loss": 1.1423, "step": 700 }, { "epoch": 2.901698404529079, "grad_norm": 0.384765625, "learning_rate": 7.158520219567533e-07, "loss": 1.1591, "step": 705 }, { "epoch": 2.9222851260936697, "grad_norm": 0.39453125, "learning_rate": 4.583423888055105e-07, "loss": 1.1736, "step": 710 }, { "epoch": 2.9428718476582603, "grad_norm": 0.384765625, "learning_rate": 2.57903849033192e-07, "loss": 1.1783, "step": 715 }, { "epoch": 2.9634585692228512, "grad_norm": 0.3828125, "learning_rate": 1.1465132182936212e-07, "loss": 1.173, "step": 720 }, { "epoch": 2.984045290787442, "grad_norm": 0.384765625, "learning_rate": 2.8666939424415452e-08, "loss": 1.1552, "step": 725 }, { "epoch": 3.0, "step": 729, "total_flos": 1.0094624425786737e+18, "train_loss": 1.2982249011391638, "train_runtime": 26293.112, "train_samples_per_second": 7.093, "train_steps_per_second": 0.028 } ], "logging_steps": 5, "max_steps": 729, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0094624425786737e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }