{ "best_metric": 0.9274308681488037, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 3.0, "eval_steps": 50, "global_step": 141, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02127659574468085, "grad_norm": 13.848388671875, "learning_rate": 1.02e-05, "loss": 4.4616, "step": 1 }, { "epoch": 0.02127659574468085, "eval_loss": 3.8836147785186768, "eval_runtime": 3.5795, "eval_samples_per_second": 176.841, "eval_steps_per_second": 5.587, "step": 1 }, { "epoch": 0.0425531914893617, "grad_norm": 6.909480094909668, "learning_rate": 2.04e-05, "loss": 3.8433, "step": 2 }, { "epoch": 0.06382978723404255, "grad_norm": 5.8276777267456055, "learning_rate": 3.06e-05, "loss": 3.5629, "step": 3 }, { "epoch": 0.0851063829787234, "grad_norm": 5.752169132232666, "learning_rate": 4.08e-05, "loss": 3.5849, "step": 4 }, { "epoch": 0.10638297872340426, "grad_norm": 6.22681999206543, "learning_rate": 5.1e-05, "loss": 3.1647, "step": 5 }, { "epoch": 0.1276595744680851, "grad_norm": 9.792737007141113, "learning_rate": 6.12e-05, "loss": 2.7336, "step": 6 }, { "epoch": 0.14893617021276595, "grad_norm": 8.102272033691406, "learning_rate": 7.14e-05, "loss": 2.2142, "step": 7 }, { "epoch": 0.1702127659574468, "grad_norm": 5.091486930847168, "learning_rate": 8.16e-05, "loss": 1.9189, "step": 8 }, { "epoch": 0.19148936170212766, "grad_norm": 2.87353515625, "learning_rate": 9.18e-05, "loss": 1.9533, "step": 9 }, { "epoch": 0.2127659574468085, "grad_norm": 2.3648200035095215, "learning_rate": 0.000102, "loss": 1.5369, "step": 10 }, { "epoch": 0.23404255319148937, "grad_norm": 1.6724998950958252, "learning_rate": 0.00010198533518731099, "loss": 1.4629, "step": 11 }, { "epoch": 0.2553191489361702, "grad_norm": 1.2147119045257568, "learning_rate": 0.0001019413491828413, "loss": 1.1376, "step": 12 }, { "epoch": 0.2765957446808511, "grad_norm": 7.048776626586914, "learning_rate": 0.00010186806728253272, "loss": 0.8492, "step": 13 }, { "epoch": 0.2978723404255319, "grad_norm": 5.654258728027344, "learning_rate": 0.00010176553163012415, "loss": 0.5646, "step": 14 }, { "epoch": 0.3191489361702128, "grad_norm": 4.756352424621582, "learning_rate": 0.00010163380119291505, "loss": 1.6847, "step": 15 }, { "epoch": 0.3404255319148936, "grad_norm": 1.8449368476867676, "learning_rate": 0.00010147295172785395, "loss": 1.4681, "step": 16 }, { "epoch": 0.3617021276595745, "grad_norm": 0.6776601672172546, "learning_rate": 0.00010128307573797129, "loss": 1.244, "step": 17 }, { "epoch": 0.3829787234042553, "grad_norm": 0.5467591285705566, "learning_rate": 0.00010106428241918177, "loss": 1.1554, "step": 18 }, { "epoch": 0.40425531914893614, "grad_norm": 0.8386530876159668, "learning_rate": 0.00010081669759748692, "loss": 0.7783, "step": 19 }, { "epoch": 0.425531914893617, "grad_norm": 0.9994120597839355, "learning_rate": 0.00010054046365661356, "loss": 0.1474, "step": 20 }, { "epoch": 0.44680851063829785, "grad_norm": 0.9148034453392029, "learning_rate": 0.00010023573945613038, "loss": 1.3979, "step": 21 }, { "epoch": 0.46808510638297873, "grad_norm": 0.6885150074958801, "learning_rate": 9.99027002400892e-05, "loss": 1.4452, "step": 22 }, { "epoch": 0.48936170212765956, "grad_norm": 0.5044928193092346, "learning_rate": 9.954153753624383e-05, "loss": 1.2974, "step": 23 }, { "epoch": 0.5106382978723404, "grad_norm": 0.517201840877533, "learning_rate": 9.915245904590414e-05, "loss": 1.2173, "step": 24 }, { "epoch": 0.5319148936170213, "grad_norm": 0.7127615809440613, "learning_rate": 9.873568852448903e-05, "loss": 1.0097, "step": 25 }, { "epoch": 0.5531914893617021, "grad_norm": 1.4452153444290161, "learning_rate": 9.829146565284679e-05, "loss": 0.0585, "step": 26 }, { "epoch": 0.574468085106383, "grad_norm": 0.6424712538719177, "learning_rate": 9.782004589941682e-05, "loss": 1.1097, "step": 27 }, { "epoch": 0.5957446808510638, "grad_norm": 0.4880043864250183, "learning_rate": 9.732170037331209e-05, "loss": 1.4546, "step": 28 }, { "epoch": 0.6170212765957447, "grad_norm": 0.4043918251991272, "learning_rate": 9.679671566840698e-05, "loss": 1.2941, "step": 29 }, { "epoch": 0.6382978723404256, "grad_norm": 0.4223072826862335, "learning_rate": 9.624539369851954e-05, "loss": 1.14, "step": 30 }, { "epoch": 0.6595744680851063, "grad_norm": 0.5029892325401306, "learning_rate": 9.566805152378394e-05, "loss": 0.9826, "step": 31 }, { "epoch": 0.6808510638297872, "grad_norm": 0.5004958510398865, "learning_rate": 9.50650211683119e-05, "loss": 0.3189, "step": 32 }, { "epoch": 0.7021276595744681, "grad_norm": 0.46429964900016785, "learning_rate": 9.443664942924885e-05, "loss": 0.9003, "step": 33 }, { "epoch": 0.723404255319149, "grad_norm": 0.5286682844161987, "learning_rate": 9.378329767733415e-05, "loss": 1.4447, "step": 34 }, { "epoch": 0.7446808510638298, "grad_norm": 0.4269276559352875, "learning_rate": 9.310534164908e-05, "loss": 1.3137, "step": 35 }, { "epoch": 0.7659574468085106, "grad_norm": 0.370991587638855, "learning_rate": 9.240317123068899e-05, "loss": 1.1297, "step": 36 }, { "epoch": 0.7872340425531915, "grad_norm": 0.4747011065483093, "learning_rate": 9.167719023383408e-05, "loss": 1.0179, "step": 37 }, { "epoch": 0.8085106382978723, "grad_norm": 0.4905516803264618, "learning_rate": 9.09278161634304e-05, "loss": 0.4583, "step": 38 }, { "epoch": 0.8297872340425532, "grad_norm": 0.43988707661628723, "learning_rate": 9.015547997753193e-05, "loss": 0.6616, "step": 39 }, { "epoch": 0.851063829787234, "grad_norm": 0.45796748995780945, "learning_rate": 8.936062583949154e-05, "loss": 1.4275, "step": 40 }, { "epoch": 0.8723404255319149, "grad_norm": 0.36554864048957825, "learning_rate": 8.854371086252688e-05, "loss": 1.2779, "step": 41 }, { "epoch": 0.8936170212765957, "grad_norm": 0.3677642345428467, "learning_rate": 8.770520484683873e-05, "loss": 1.1917, "step": 42 }, { "epoch": 0.9148936170212766, "grad_norm": 0.35996830463409424, "learning_rate": 8.68455900094333e-05, "loss": 1.0847, "step": 43 }, { "epoch": 0.9361702127659575, "grad_norm": 0.4477192163467407, "learning_rate": 8.596536070680378e-05, "loss": 0.7112, "step": 44 }, { "epoch": 0.9574468085106383, "grad_norm": 0.4123137891292572, "learning_rate": 8.506502315063037e-05, "loss": 0.9964, "step": 45 }, { "epoch": 0.9787234042553191, "grad_norm": 0.358024001121521, "learning_rate": 8.414509511666283e-05, "loss": 1.2649, "step": 46 }, { "epoch": 1.0, "grad_norm": 0.37631550431251526, "learning_rate": 8.320610564695234e-05, "loss": 0.9995, "step": 47 }, { "epoch": 1.0212765957446808, "grad_norm": 0.22452251613140106, "learning_rate": 8.224859474560443e-05, "loss": 0.1898, "step": 48 }, { "epoch": 1.0425531914893618, "grad_norm": 0.4229590594768524, "learning_rate": 8.127311306822753e-05, "loss": 1.1368, "step": 49 }, { "epoch": 1.0638297872340425, "grad_norm": 0.37615370750427246, "learning_rate": 8.028022160525618e-05, "loss": 1.3321, "step": 50 }, { "epoch": 1.0638297872340425, "eval_loss": 0.9616568088531494, "eval_runtime": 3.2062, "eval_samples_per_second": 197.427, "eval_steps_per_second": 6.238, "step": 50 }, { "epoch": 1.0851063829787233, "grad_norm": 0.32277727127075195, "learning_rate": 7.927049135933059e-05, "loss": 1.1556, "step": 51 }, { "epoch": 1.1063829787234043, "grad_norm": 0.4011160135269165, "learning_rate": 7.82445030169183e-05, "loss": 1.0646, "step": 52 }, { "epoch": 1.127659574468085, "grad_norm": 0.5565645098686218, "learning_rate": 7.720284661436687e-05, "loss": 0.8884, "step": 53 }, { "epoch": 1.148936170212766, "grad_norm": 0.30222636461257935, "learning_rate": 7.614612119857942e-05, "loss": 0.2506, "step": 54 }, { "epoch": 1.1702127659574468, "grad_norm": 0.3559470772743225, "learning_rate": 7.507493448250836e-05, "loss": 0.722, "step": 55 }, { "epoch": 1.1914893617021276, "grad_norm": 0.4607730805873871, "learning_rate": 7.398990249566532e-05, "loss": 1.3531, "step": 56 }, { "epoch": 1.2127659574468086, "grad_norm": 0.39202919602394104, "learning_rate": 7.289164922984824e-05, "loss": 1.2476, "step": 57 }, { "epoch": 1.2340425531914894, "grad_norm": 0.35366523265838623, "learning_rate": 7.178080628028965e-05, "loss": 1.1099, "step": 58 }, { "epoch": 1.2553191489361701, "grad_norm": 0.40545791387557983, "learning_rate": 7.065801248243196e-05, "loss": 0.9506, "step": 59 }, { "epoch": 1.2765957446808511, "grad_norm": 0.3662566542625427, "learning_rate": 6.952391354453924e-05, "loss": 0.4258, "step": 60 }, { "epoch": 1.297872340425532, "grad_norm": 0.28656280040740967, "learning_rate": 6.837916167635644e-05, "loss": 0.4806, "step": 61 }, { "epoch": 1.3191489361702127, "grad_norm": 0.47951042652130127, "learning_rate": 6.722441521402946e-05, "loss": 1.3409, "step": 62 }, { "epoch": 1.3404255319148937, "grad_norm": 0.4036313593387604, "learning_rate": 6.606033824150241e-05, "loss": 1.272, "step": 63 }, { "epoch": 1.3617021276595744, "grad_norm": 0.372051477432251, "learning_rate": 6.48876002086089e-05, "loss": 1.0842, "step": 64 }, { "epoch": 1.3829787234042552, "grad_norm": 0.4357682466506958, "learning_rate": 6.37068755460778e-05, "loss": 1.0105, "step": 65 }, { "epoch": 1.4042553191489362, "grad_norm": 0.5092247128486633, "learning_rate": 6.251884327767429e-05, "loss": 0.6371, "step": 66 }, { "epoch": 1.425531914893617, "grad_norm": 0.202036052942276, "learning_rate": 6.132418662969977e-05, "loss": 0.2384, "step": 67 }, { "epoch": 1.4468085106382977, "grad_norm": 0.4590073227882385, "learning_rate": 6.012359263807463e-05, "loss": 1.2431, "step": 68 }, { "epoch": 1.4680851063829787, "grad_norm": 0.395398885011673, "learning_rate": 5.891775175323035e-05, "loss": 1.2642, "step": 69 }, { "epoch": 1.4893617021276595, "grad_norm": 0.4025956690311432, "learning_rate": 5.770735744303787e-05, "loss": 1.1548, "step": 70 }, { "epoch": 1.5106382978723403, "grad_norm": 0.402270644903183, "learning_rate": 5.6493105794000665e-05, "loss": 1.0174, "step": 71 }, { "epoch": 1.5319148936170213, "grad_norm": 0.5136646032333374, "learning_rate": 5.52756951109419e-05, "loss": 0.8706, "step": 72 }, { "epoch": 1.5531914893617023, "grad_norm": 0.0806485190987587, "learning_rate": 5.405582551541579e-05, "loss": 0.0626, "step": 73 }, { "epoch": 1.574468085106383, "grad_norm": 0.42770785093307495, "learning_rate": 5.283419854307425e-05, "loss": 1.0383, "step": 74 }, { "epoch": 1.5957446808510638, "grad_norm": 0.4656476080417633, "learning_rate": 5.16115167402202e-05, "loss": 1.293, "step": 75 }, { "epoch": 1.6170212765957448, "grad_norm": 0.38593193888664246, "learning_rate": 5.0388483259779815e-05, "loss": 1.2081, "step": 76 }, { "epoch": 1.6382978723404256, "grad_norm": 0.3994680643081665, "learning_rate": 4.916580145692577e-05, "loss": 1.0481, "step": 77 }, { "epoch": 1.6595744680851063, "grad_norm": 0.506732702255249, "learning_rate": 4.794417448458422e-05, "loss": 0.8817, "step": 78 }, { "epoch": 1.6808510638297873, "grad_norm": 0.272098571062088, "learning_rate": 4.67243048890581e-05, "loss": 0.2088, "step": 79 }, { "epoch": 1.702127659574468, "grad_norm": 0.35723525285720825, "learning_rate": 4.5506894205999334e-05, "loss": 0.6819, "step": 80 }, { "epoch": 1.7234042553191489, "grad_norm": 0.4605822265148163, "learning_rate": 4.429264255696214e-05, "loss": 1.3524, "step": 81 }, { "epoch": 1.7446808510638299, "grad_norm": 0.38222458958625793, "learning_rate": 4.308224824676965e-05, "loss": 1.1625, "step": 82 }, { "epoch": 1.7659574468085106, "grad_norm": 0.3701620399951935, "learning_rate": 4.187640736192537e-05, "loss": 1.064, "step": 83 }, { "epoch": 1.7872340425531914, "grad_norm": 0.4499792456626892, "learning_rate": 4.067581337030022e-05, "loss": 0.9158, "step": 84 }, { "epoch": 1.8085106382978724, "grad_norm": 0.3936365842819214, "learning_rate": 3.948115672232572e-05, "loss": 0.4121, "step": 85 }, { "epoch": 1.8297872340425532, "grad_norm": 0.27606216073036194, "learning_rate": 3.8293124453922226e-05, "loss": 0.4227, "step": 86 }, { "epoch": 1.851063829787234, "grad_norm": 0.48591378331184387, "learning_rate": 3.711239979139111e-05, "loss": 1.3174, "step": 87 }, { "epoch": 1.872340425531915, "grad_norm": 0.3974682092666626, "learning_rate": 3.593966175849759e-05, "loss": 1.2122, "step": 88 }, { "epoch": 1.8936170212765957, "grad_norm": 0.3816875219345093, "learning_rate": 3.477558478597054e-05, "loss": 1.1615, "step": 89 }, { "epoch": 1.9148936170212765, "grad_norm": 0.4059462547302246, "learning_rate": 3.362083832364357e-05, "loss": 0.9824, "step": 90 }, { "epoch": 1.9361702127659575, "grad_norm": 0.4912261366844177, "learning_rate": 3.247608645546074e-05, "loss": 0.683, "step": 91 }, { "epoch": 1.9574468085106385, "grad_norm": 0.4084428548812866, "learning_rate": 3.134198751756804e-05, "loss": 1.0533, "step": 92 }, { "epoch": 1.978723404255319, "grad_norm": 0.38703203201293945, "learning_rate": 3.0219193719710368e-05, "loss": 1.1633, "step": 93 }, { "epoch": 2.0, "grad_norm": 0.4213656783103943, "learning_rate": 2.910835077015177e-05, "loss": 0.9266, "step": 94 }, { "epoch": 2.021276595744681, "grad_norm": 0.08342910557985306, "learning_rate": 2.8010097504334692e-05, "loss": 0.0614, "step": 95 }, { "epoch": 2.0425531914893615, "grad_norm": 0.46658873558044434, "learning_rate": 2.692506551749165e-05, "loss": 1.0568, "step": 96 }, { "epoch": 2.0638297872340425, "grad_norm": 0.4580381512641907, "learning_rate": 2.5853878801420582e-05, "loss": 1.2753, "step": 97 }, { "epoch": 2.0851063829787235, "grad_norm": 0.3629004955291748, "learning_rate": 2.4797153385633147e-05, "loss": 1.0902, "step": 98 }, { "epoch": 2.106382978723404, "grad_norm": 0.3911716938018799, "learning_rate": 2.3755496983081708e-05, "loss": 1.0178, "step": 99 }, { "epoch": 2.127659574468085, "grad_norm": 0.475111186504364, "learning_rate": 2.2729508640669428e-05, "loss": 0.8545, "step": 100 }, { "epoch": 2.127659574468085, "eval_loss": 0.9274308681488037, "eval_runtime": 3.2361, "eval_samples_per_second": 195.604, "eval_steps_per_second": 6.18, "step": 100 }, { "epoch": 2.148936170212766, "grad_norm": 0.2774420976638794, "learning_rate": 2.1719778394743813e-05, "loss": 0.212, "step": 101 }, { "epoch": 2.1702127659574466, "grad_norm": 0.3814822733402252, "learning_rate": 2.0726886931772476e-05, "loss": 0.7554, "step": 102 }, { "epoch": 2.1914893617021276, "grad_norm": 0.423093318939209, "learning_rate": 1.9751405254395587e-05, "loss": 1.3008, "step": 103 }, { "epoch": 2.2127659574468086, "grad_norm": 0.38136741518974304, "learning_rate": 1.879389435304766e-05, "loss": 1.1388, "step": 104 }, { "epoch": 2.2340425531914896, "grad_norm": 0.39356493949890137, "learning_rate": 1.7854904883337184e-05, "loss": 0.9796, "step": 105 }, { "epoch": 2.25531914893617, "grad_norm": 0.47382405400276184, "learning_rate": 1.693497684936963e-05, "loss": 0.8731, "step": 106 }, { "epoch": 2.276595744680851, "grad_norm": 0.4495427906513214, "learning_rate": 1.6034639293196224e-05, "loss": 0.4196, "step": 107 }, { "epoch": 2.297872340425532, "grad_norm": 0.26799333095550537, "learning_rate": 1.515440999056669e-05, "loss": 0.338, "step": 108 }, { "epoch": 2.3191489361702127, "grad_norm": 0.4940812587738037, "learning_rate": 1.429479515316127e-05, "loss": 1.1992, "step": 109 }, { "epoch": 2.3404255319148937, "grad_norm": 0.4212472140789032, "learning_rate": 1.3456289137473124e-05, "loss": 1.153, "step": 110 }, { "epoch": 2.3617021276595747, "grad_norm": 0.4149324893951416, "learning_rate": 1.263937416050847e-05, "loss": 1.05, "step": 111 }, { "epoch": 2.382978723404255, "grad_norm": 0.4490218460559845, "learning_rate": 1.1844520022468092e-05, "loss": 0.9362, "step": 112 }, { "epoch": 2.404255319148936, "grad_norm": 0.5155778527259827, "learning_rate": 1.1072183836569599e-05, "loss": 0.615, "step": 113 }, { "epoch": 2.425531914893617, "grad_norm": 0.1996319442987442, "learning_rate": 1.0322809766165916e-05, "loss": 0.1577, "step": 114 }, { "epoch": 2.4468085106382977, "grad_norm": 0.5449104905128479, "learning_rate": 9.596828769311028e-06, "loss": 1.2252, "step": 115 }, { "epoch": 2.4680851063829787, "grad_norm": 0.4638878405094147, "learning_rate": 8.894658350919999e-06, "loss": 1.2128, "step": 116 }, { "epoch": 2.4893617021276597, "grad_norm": 0.4165002107620239, "learning_rate": 8.216702322665849e-06, "loss": 1.0802, "step": 117 }, { "epoch": 2.5106382978723403, "grad_norm": 0.42553117871284485, "learning_rate": 7.563350570751137e-06, "loss": 0.9043, "step": 118 }, { "epoch": 2.5319148936170213, "grad_norm": 0.5580489039421082, "learning_rate": 6.934978831688112e-06, "loss": 0.7406, "step": 119 }, { "epoch": 2.5531914893617023, "grad_norm": 0.14457367360591888, "learning_rate": 6.331948476216073e-06, "loss": 0.0878, "step": 120 }, { "epoch": 2.574468085106383, "grad_norm": 0.46198827028274536, "learning_rate": 5.754606301480452e-06, "loss": 1.0055, "step": 121 }, { "epoch": 2.595744680851064, "grad_norm": 0.4651603102684021, "learning_rate": 5.2032843315930305e-06, "loss": 1.2232, "step": 122 }, { "epoch": 2.617021276595745, "grad_norm": 0.41709625720977783, "learning_rate": 4.678299626687903e-06, "loss": 1.0644, "step": 123 }, { "epoch": 2.6382978723404253, "grad_norm": 0.42545390129089355, "learning_rate": 4.179954100583199e-06, "loss": 0.9916, "step": 124 }, { "epoch": 2.6595744680851063, "grad_norm": 0.5188893675804138, "learning_rate": 3.708534347153212e-06, "loss": 0.8641, "step": 125 }, { "epoch": 2.6808510638297873, "grad_norm": 0.30658212304115295, "learning_rate": 3.26431147551097e-06, "loss": 0.1864, "step": 126 }, { "epoch": 2.702127659574468, "grad_norm": 0.4011896252632141, "learning_rate": 2.8475409540958616e-06, "loss": 0.6371, "step": 127 }, { "epoch": 2.723404255319149, "grad_norm": 0.4772135615348816, "learning_rate": 2.45846246375617e-06, "loss": 1.2719, "step": 128 }, { "epoch": 2.74468085106383, "grad_norm": 0.44009700417518616, "learning_rate": 2.097299759910797e-06, "loss": 1.1568, "step": 129 }, { "epoch": 2.7659574468085104, "grad_norm": 0.42015552520751953, "learning_rate": 1.7642605438696306e-06, "loss": 1.0539, "step": 130 }, { "epoch": 2.7872340425531914, "grad_norm": 0.4982571303844452, "learning_rate": 1.4595363433864484e-06, "loss": 0.8517, "step": 131 }, { "epoch": 2.8085106382978724, "grad_norm": 0.4780231714248657, "learning_rate": 1.1833024025130858e-06, "loss": 0.3794, "step": 132 }, { "epoch": 2.829787234042553, "grad_norm": 0.3601129949092865, "learning_rate": 9.357175808182305e-07, "loss": 0.5229, "step": 133 }, { "epoch": 2.851063829787234, "grad_norm": 0.5078785419464111, "learning_rate": 7.169242620287227e-07, "loss": 1.2511, "step": 134 }, { "epoch": 2.872340425531915, "grad_norm": 0.42775991559028625, "learning_rate": 5.270482721460563e-07, "loss": 1.1546, "step": 135 }, { "epoch": 2.8936170212765955, "grad_norm": 0.43165627121925354, "learning_rate": 3.6619880708494724e-07, "loss": 1.0596, "step": 136 }, { "epoch": 2.9148936170212765, "grad_norm": 0.4631091356277466, "learning_rate": 2.3446836987585295e-07, "loss": 0.9146, "step": 137 }, { "epoch": 2.9361702127659575, "grad_norm": 0.5159528851509094, "learning_rate": 1.319327174672832e-07, "loss": 0.5763, "step": 138 }, { "epoch": 2.9574468085106385, "grad_norm": 0.426923006772995, "learning_rate": 5.865081715870424e-08, "loss": 0.7757, "step": 139 }, { "epoch": 2.978723404255319, "grad_norm": 0.4215574264526367, "learning_rate": 1.4664812689001438e-08, "loss": 1.14, "step": 140 }, { "epoch": 3.0, "grad_norm": 0.5064499378204346, "learning_rate": 0.0, "loss": 0.8367, "step": 141 } ], "logging_steps": 1, "max_steps": 141, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.466930952990884e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }