{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 3.0042194092827006, "eval_steps": 50, "global_step": 178, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016877637130801686, "grad_norm": 2.967948057630565e-05, "learning_rate": 1e-05, "loss": 46.0, "step": 1 }, { "epoch": 0.016877637130801686, "eval_loss": 11.5, "eval_runtime": 0.4602, "eval_samples_per_second": 217.308, "eval_steps_per_second": 54.327, "step": 1 }, { "epoch": 0.03375527426160337, "grad_norm": 3.292097244411707e-05, "learning_rate": 2e-05, "loss": 46.0, "step": 2 }, { "epoch": 0.05063291139240506, "grad_norm": 3.594873123802245e-05, "learning_rate": 3e-05, "loss": 46.0, "step": 3 }, { "epoch": 0.06751054852320675, "grad_norm": 2.2968290068092756e-05, "learning_rate": 4e-05, "loss": 46.0, "step": 4 }, { "epoch": 0.08438818565400844, "grad_norm": 4.390135291032493e-05, "learning_rate": 5e-05, "loss": 46.0, "step": 5 }, { "epoch": 0.10126582278481013, "grad_norm": 3.81232712243218e-05, "learning_rate": 6e-05, "loss": 46.0, "step": 6 }, { "epoch": 0.11814345991561181, "grad_norm": 4.6551958803320304e-05, "learning_rate": 7e-05, "loss": 46.0, "step": 7 }, { "epoch": 0.1350210970464135, "grad_norm": 3.0438888643402606e-05, "learning_rate": 8e-05, "loss": 46.0, "step": 8 }, { "epoch": 0.1518987341772152, "grad_norm": 3.3523647289257497e-05, "learning_rate": 9e-05, "loss": 46.0, "step": 9 }, { "epoch": 0.16877637130801687, "grad_norm": 3.665711119538173e-05, "learning_rate": 0.0001, "loss": 46.0, "step": 10 }, { "epoch": 0.18565400843881857, "grad_norm": 4.438432733877562e-05, "learning_rate": 9.999125804563732e-05, "loss": 46.0, "step": 11 }, { "epoch": 0.20253164556962025, "grad_norm": 3.8814559957245365e-05, "learning_rate": 9.996503523941994e-05, "loss": 46.0, "step": 12 }, { "epoch": 0.21940928270042195, "grad_norm": 3.1020721507957205e-05, "learning_rate": 9.992134075089084e-05, "loss": 46.0, "step": 13 }, { "epoch": 0.23628691983122363, "grad_norm": 5.652836262015626e-05, "learning_rate": 9.986018985905901e-05, "loss": 46.0, "step": 14 }, { "epoch": 0.25316455696202533, "grad_norm": 2.6881582016358152e-05, "learning_rate": 9.978160394705668e-05, "loss": 46.0, "step": 15 }, { "epoch": 0.270042194092827, "grad_norm": 4.152996916673146e-05, "learning_rate": 9.968561049466214e-05, "loss": 46.0, "step": 16 }, { "epoch": 0.2869198312236287, "grad_norm": 3.717005529324524e-05, "learning_rate": 9.957224306869053e-05, "loss": 46.0, "step": 17 }, { "epoch": 0.3037974683544304, "grad_norm": 3.7945897929603234e-05, "learning_rate": 9.944154131125642e-05, "loss": 46.0, "step": 18 }, { "epoch": 0.3206751054852321, "grad_norm": 4.0988223190652207e-05, "learning_rate": 9.92935509259118e-05, "loss": 46.0, "step": 19 }, { "epoch": 0.33755274261603374, "grad_norm": 3.8232275983318686e-05, "learning_rate": 9.912832366166442e-05, "loss": 46.0, "step": 20 }, { "epoch": 0.35443037974683544, "grad_norm": 4.0358921978622675e-05, "learning_rate": 9.894591729488242e-05, "loss": 46.0, "step": 21 }, { "epoch": 0.37130801687763715, "grad_norm": 4.8853878979571164e-05, "learning_rate": 9.874639560909117e-05, "loss": 46.0, "step": 22 }, { "epoch": 0.3881856540084388, "grad_norm": 4.539159999694675e-05, "learning_rate": 9.852982837266955e-05, "loss": 46.0, "step": 23 }, { "epoch": 0.4050632911392405, "grad_norm": 3.974886203650385e-05, "learning_rate": 9.829629131445342e-05, "loss": 46.0, "step": 24 }, { "epoch": 0.4219409282700422, "grad_norm": 5.6305820180568844e-05, "learning_rate": 9.804586609725499e-05, "loss": 46.0, "step": 25 }, { "epoch": 0.4388185654008439, "grad_norm": 6.0451780882431194e-05, "learning_rate": 9.777864028930705e-05, "loss": 46.0, "step": 26 }, { "epoch": 0.45569620253164556, "grad_norm": 7.924340752651915e-05, "learning_rate": 9.74947073336423e-05, "loss": 46.0, "step": 27 }, { "epoch": 0.47257383966244726, "grad_norm": 5.868041262147017e-05, "learning_rate": 9.719416651541839e-05, "loss": 46.0, "step": 28 }, { "epoch": 0.48945147679324896, "grad_norm": 4.6550754632335156e-05, "learning_rate": 9.687712292719997e-05, "loss": 46.0, "step": 29 }, { "epoch": 0.5063291139240507, "grad_norm": 3.241928789066151e-05, "learning_rate": 9.654368743221022e-05, "loss": 46.0, "step": 30 }, { "epoch": 0.5232067510548524, "grad_norm": 5.7375815231353045e-05, "learning_rate": 9.619397662556435e-05, "loss": 46.0, "step": 31 }, { "epoch": 0.540084388185654, "grad_norm": 5.515938755706884e-05, "learning_rate": 9.582811279349882e-05, "loss": 46.0, "step": 32 }, { "epoch": 0.5569620253164557, "grad_norm": 6.320214015431702e-05, "learning_rate": 9.544622387061055e-05, "loss": 46.0, "step": 33 }, { "epoch": 0.5738396624472574, "grad_norm": 5.135314495419152e-05, "learning_rate": 9.504844339512095e-05, "loss": 46.0, "step": 34 }, { "epoch": 0.5907172995780591, "grad_norm": 8.740446355659515e-05, "learning_rate": 9.463491046218058e-05, "loss": 46.0, "step": 35 }, { "epoch": 0.6075949367088608, "grad_norm": 4.9349069740856066e-05, "learning_rate": 9.420576967523049e-05, "loss": 46.0, "step": 36 }, { "epoch": 0.6244725738396625, "grad_norm": 7.72976636653766e-05, "learning_rate": 9.376117109543769e-05, "loss": 46.0, "step": 37 }, { "epoch": 0.6413502109704642, "grad_norm": 8.270151738543063e-05, "learning_rate": 9.330127018922194e-05, "loss": 46.0, "step": 38 }, { "epoch": 0.6582278481012658, "grad_norm": 8.696226723259315e-05, "learning_rate": 9.282622777389258e-05, "loss": 46.0, "step": 39 }, { "epoch": 0.6751054852320675, "grad_norm": 0.00012761405378114432, "learning_rate": 9.233620996141421e-05, "loss": 46.0, "step": 40 }, { "epoch": 0.6919831223628692, "grad_norm": 0.00010962937085423619, "learning_rate": 9.183138810032099e-05, "loss": 46.0, "step": 41 }, { "epoch": 0.7088607594936709, "grad_norm": 9.749073797138408e-05, "learning_rate": 9.131193871579975e-05, "loss": 46.0, "step": 42 }, { "epoch": 0.7257383966244726, "grad_norm": 6.206992111401632e-05, "learning_rate": 9.077804344796302e-05, "loss": 46.0, "step": 43 }, { "epoch": 0.7426160337552743, "grad_norm": 9.596747986506671e-05, "learning_rate": 9.022988898833342e-05, "loss": 46.0, "step": 44 }, { "epoch": 0.759493670886076, "grad_norm": 8.014837658265606e-05, "learning_rate": 8.966766701456177e-05, "loss": 46.0, "step": 45 }, { "epoch": 0.7763713080168776, "grad_norm": 7.777752034598961e-05, "learning_rate": 8.90915741234015e-05, "loss": 46.0, "step": 46 }, { "epoch": 0.7932489451476793, "grad_norm": 7.10346648702398e-05, "learning_rate": 8.850181176196315e-05, "loss": 46.0, "step": 47 }, { "epoch": 0.810126582278481, "grad_norm": 8.740553312236443e-05, "learning_rate": 8.789858615727265e-05, "loss": 46.0, "step": 48 }, { "epoch": 0.8270042194092827, "grad_norm": 8.364198583876714e-05, "learning_rate": 8.728210824415827e-05, "loss": 46.0, "step": 49 }, { "epoch": 0.8438818565400844, "grad_norm": 8.315607556141913e-05, "learning_rate": 8.665259359149132e-05, "loss": 46.0, "step": 50 }, { "epoch": 0.8438818565400844, "eval_loss": 11.5, "eval_runtime": 0.4558, "eval_samples_per_second": 219.398, "eval_steps_per_second": 54.849, "step": 50 }, { "epoch": 0.8607594936708861, "grad_norm": 0.00010916520841419697, "learning_rate": 8.601026232680634e-05, "loss": 46.0, "step": 51 }, { "epoch": 0.8776371308016878, "grad_norm": 0.00012586795492097735, "learning_rate": 8.535533905932738e-05, "loss": 46.0, "step": 52 }, { "epoch": 0.8945147679324894, "grad_norm": 0.00015618074394296855, "learning_rate": 8.468805280142709e-05, "loss": 46.0, "step": 53 }, { "epoch": 0.9113924050632911, "grad_norm": 0.00016819369920995086, "learning_rate": 8.400863688854597e-05, "loss": 46.0, "step": 54 }, { "epoch": 0.9282700421940928, "grad_norm": 0.000174582441104576, "learning_rate": 8.33173288976002e-05, "loss": 46.0, "step": 55 }, { "epoch": 0.9451476793248945, "grad_norm": 0.00016325573960784823, "learning_rate": 8.261437056390606e-05, "loss": 46.0, "step": 56 }, { "epoch": 0.9620253164556962, "grad_norm": 0.00010732583177741617, "learning_rate": 8.190000769665044e-05, "loss": 46.0, "step": 57 }, { "epoch": 0.9789029535864979, "grad_norm": 0.00013193223276175559, "learning_rate": 8.117449009293668e-05, "loss": 46.0, "step": 58 }, { "epoch": 0.9957805907172996, "grad_norm": 0.00013810786185786128, "learning_rate": 8.043807145043604e-05, "loss": 46.0, "step": 59 }, { "epoch": 1.0126582278481013, "grad_norm": 0.0001639191759750247, "learning_rate": 7.969100927867507e-05, "loss": 46.0, "step": 60 }, { "epoch": 1.029535864978903, "grad_norm": 0.00013262128049973398, "learning_rate": 7.89335648089903e-05, "loss": 46.0, "step": 61 }, { "epoch": 1.0464135021097047, "grad_norm": 0.00011318427277728915, "learning_rate": 7.81660029031811e-05, "loss": 46.0, "step": 62 }, { "epoch": 1.0632911392405062, "grad_norm": 9.013406815938652e-05, "learning_rate": 7.738859196089358e-05, "loss": 46.0, "step": 63 }, { "epoch": 1.080168776371308, "grad_norm": 0.00016967093688435853, "learning_rate": 7.660160382576683e-05, "loss": 46.0, "step": 64 }, { "epoch": 1.0970464135021096, "grad_norm": 9.265363769372925e-05, "learning_rate": 7.580531369037533e-05, "loss": 46.0, "step": 65 }, { "epoch": 1.1139240506329113, "grad_norm": 0.00015064820763655007, "learning_rate": 7.500000000000001e-05, "loss": 46.0, "step": 66 }, { "epoch": 1.130801687763713, "grad_norm": 0.00017271583783440292, "learning_rate": 7.4185944355262e-05, "loss": 46.0, "step": 67 }, { "epoch": 1.1476793248945147, "grad_norm": 0.0001652558712521568, "learning_rate": 7.33634314136531e-05, "loss": 46.0, "step": 68 }, { "epoch": 1.1645569620253164, "grad_norm": 0.0001693817030172795, "learning_rate": 7.253274878999727e-05, "loss": 46.0, "step": 69 }, { "epoch": 1.1814345991561181, "grad_norm": 0.0001843701465986669, "learning_rate": 7.169418695587791e-05, "loss": 46.0, "step": 70 }, { "epoch": 1.1983122362869199, "grad_norm": 0.0002148488420061767, "learning_rate": 7.084803913806641e-05, "loss": 46.0, "step": 71 }, { "epoch": 1.2151898734177216, "grad_norm": 0.00015176553279161453, "learning_rate": 6.999460121598704e-05, "loss": 46.0, "step": 72 }, { "epoch": 1.2320675105485233, "grad_norm": 0.00032408779952675104, "learning_rate": 6.91341716182545e-05, "loss": 46.0, "step": 73 }, { "epoch": 1.248945147679325, "grad_norm": 0.00020304243662394583, "learning_rate": 6.826705121831976e-05, "loss": 46.0, "step": 74 }, { "epoch": 1.2658227848101267, "grad_norm": 0.00017582262807991356, "learning_rate": 6.739354322926136e-05, "loss": 46.0, "step": 75 }, { "epoch": 1.2827004219409281, "grad_norm": 0.00014730200928170234, "learning_rate": 6.651395309775837e-05, "loss": 46.0, "step": 76 }, { "epoch": 1.29957805907173, "grad_norm": 0.00017154941451735795, "learning_rate": 6.562858839728223e-05, "loss": 46.0, "step": 77 }, { "epoch": 1.3164556962025316, "grad_norm": 0.0001536866184324026, "learning_rate": 6.473775872054521e-05, "loss": 46.0, "step": 78 }, { "epoch": 1.3333333333333333, "grad_norm": 0.00021167498198337853, "learning_rate": 6.384177557124247e-05, "loss": 46.0, "step": 79 }, { "epoch": 1.350210970464135, "grad_norm": 0.00025016022846102715, "learning_rate": 6.294095225512603e-05, "loss": 46.0, "step": 80 }, { "epoch": 1.3670886075949367, "grad_norm": 0.00016790356312412769, "learning_rate": 6.203560377044866e-05, "loss": 46.0, "step": 81 }, { "epoch": 1.3839662447257384, "grad_norm": 0.00023659625730942935, "learning_rate": 6.112604669781572e-05, "loss": 46.0, "step": 82 }, { "epoch": 1.40084388185654, "grad_norm": 0.00020904654229525477, "learning_rate": 6.021259908948402e-05, "loss": 46.0, "step": 83 }, { "epoch": 1.4177215189873418, "grad_norm": 0.00021072087110951543, "learning_rate": 5.9295580358145744e-05, "loss": 46.0, "step": 84 }, { "epoch": 1.4345991561181435, "grad_norm": 0.0002617282443679869, "learning_rate": 5.837531116523682e-05, "loss": 46.0, "step": 85 }, { "epoch": 1.4514767932489452, "grad_norm": 0.0003239940560888499, "learning_rate": 5.745211330880872e-05, "loss": 46.0, "step": 86 }, { "epoch": 1.4683544303797469, "grad_norm": 0.0002726827224250883, "learning_rate": 5.6526309611002594e-05, "loss": 46.0, "step": 87 }, { "epoch": 1.4852320675105486, "grad_norm": 0.00014856709458399564, "learning_rate": 5.559822380516539e-05, "loss": 46.0, "step": 88 }, { "epoch": 1.50210970464135, "grad_norm": 0.0001399767934344709, "learning_rate": 5.466818042264753e-05, "loss": 46.0, "step": 89 }, { "epoch": 1.518987341772152, "grad_norm": 0.00017444368859287351, "learning_rate": 5.373650467932122e-05, "loss": 46.0, "step": 90 }, { "epoch": 1.5358649789029535, "grad_norm": 0.00011793487647082657, "learning_rate": 5.2803522361859594e-05, "loss": 46.0, "step": 91 }, { "epoch": 1.5527426160337554, "grad_norm": 0.0001796389406081289, "learning_rate": 5.18695597138163e-05, "loss": 46.0, "step": 92 }, { "epoch": 1.5696202531645569, "grad_norm": 0.00024031831708271056, "learning_rate": 5.0934943321545115e-05, "loss": 46.0, "step": 93 }, { "epoch": 1.5864978902953588, "grad_norm": 0.00025376968551427126, "learning_rate": 5e-05, "loss": 46.0, "step": 94 }, { "epoch": 1.6033755274261603, "grad_norm": 0.00026438775239512324, "learning_rate": 4.9065056678454904e-05, "loss": 46.0, "step": 95 }, { "epoch": 1.620253164556962, "grad_norm": 0.0002723085635807365, "learning_rate": 4.813044028618373e-05, "loss": 46.0, "step": 96 }, { "epoch": 1.6371308016877637, "grad_norm": 0.00025567712145857513, "learning_rate": 4.7196477638140404e-05, "loss": 46.0, "step": 97 }, { "epoch": 1.6540084388185654, "grad_norm": 0.00031465181382372975, "learning_rate": 4.626349532067879e-05, "loss": 46.0, "step": 98 }, { "epoch": 1.6708860759493671, "grad_norm": 0.00022951647406443954, "learning_rate": 4.5331819577352474e-05, "loss": 46.0, "step": 99 }, { "epoch": 1.6877637130801688, "grad_norm": 0.0003478522994555533, "learning_rate": 4.4401776194834613e-05, "loss": 46.0, "step": 100 }, { "epoch": 1.6877637130801688, "eval_loss": 11.5, "eval_runtime": 0.4548, "eval_samples_per_second": 219.873, "eval_steps_per_second": 54.968, "step": 100 }, { "epoch": 1.7046413502109705, "grad_norm": 0.00026516575599089265, "learning_rate": 4.347369038899744e-05, "loss": 46.0, "step": 101 }, { "epoch": 1.721518987341772, "grad_norm": 0.00024240474158432335, "learning_rate": 4.254788669119127e-05, "loss": 46.0, "step": 102 }, { "epoch": 1.738396624472574, "grad_norm": 0.00023591986973769963, "learning_rate": 4.162468883476319e-05, "loss": 46.0, "step": 103 }, { "epoch": 1.7552742616033754, "grad_norm": 0.0003183831868227571, "learning_rate": 4.0704419641854274e-05, "loss": 46.0, "step": 104 }, { "epoch": 1.7721518987341773, "grad_norm": 0.0002121532306773588, "learning_rate": 3.978740091051599e-05, "loss": 46.0, "step": 105 }, { "epoch": 1.7890295358649788, "grad_norm": 0.0003402355359867215, "learning_rate": 3.887395330218429e-05, "loss": 46.0, "step": 106 }, { "epoch": 1.8059071729957807, "grad_norm": 0.00024602783378213644, "learning_rate": 3.7964396229551364e-05, "loss": 46.0, "step": 107 }, { "epoch": 1.8227848101265822, "grad_norm": 0.0003652887826319784, "learning_rate": 3.705904774487396e-05, "loss": 46.0, "step": 108 }, { "epoch": 1.839662447257384, "grad_norm": 0.00026453068130649626, "learning_rate": 3.6158224428757535e-05, "loss": 46.0, "step": 109 }, { "epoch": 1.8565400843881856, "grad_norm": 0.000420587370172143, "learning_rate": 3.5262241279454785e-05, "loss": 46.0, "step": 110 }, { "epoch": 1.8734177215189873, "grad_norm": 0.00035577849484980106, "learning_rate": 3.4371411602717784e-05, "loss": 46.0, "step": 111 }, { "epoch": 1.890295358649789, "grad_norm": 0.00026039552176371217, "learning_rate": 3.3486046902241664e-05, "loss": 46.0, "step": 112 }, { "epoch": 1.9071729957805907, "grad_norm": 0.0004721936129499227, "learning_rate": 3.2606456770738636e-05, "loss": 46.0, "step": 113 }, { "epoch": 1.9240506329113924, "grad_norm": 0.00035611429484561086, "learning_rate": 3.173294878168025e-05, "loss": 46.0, "step": 114 }, { "epoch": 1.9409282700421941, "grad_norm": 0.0003153131401631981, "learning_rate": 3.086582838174551e-05, "loss": 46.0, "step": 115 }, { "epoch": 1.9578059071729959, "grad_norm": 0.00016873011190909892, "learning_rate": 3.000539878401296e-05, "loss": 46.0, "step": 116 }, { "epoch": 1.9746835443037973, "grad_norm": 0.0004188524326309562, "learning_rate": 2.9151960861933614e-05, "loss": 46.0, "step": 117 }, { "epoch": 1.9915611814345993, "grad_norm": 0.00036706108949147165, "learning_rate": 2.8305813044122097e-05, "loss": 46.0, "step": 118 }, { "epoch": 2.0084388185654007, "grad_norm": 0.00030106233316473663, "learning_rate": 2.746725121000273e-05, "loss": 46.0, "step": 119 }, { "epoch": 2.0253164556962027, "grad_norm": 0.000258777872659266, "learning_rate": 2.66365685863469e-05, "loss": 46.0, "step": 120 }, { "epoch": 2.042194092827004, "grad_norm": 0.00025581257068552077, "learning_rate": 2.581405564473801e-05, "loss": 46.0, "step": 121 }, { "epoch": 2.059071729957806, "grad_norm": 0.00021439410920720547, "learning_rate": 2.500000000000001e-05, "loss": 46.0, "step": 122 }, { "epoch": 2.0759493670886076, "grad_norm": 0.0002857085200957954, "learning_rate": 2.4194686309624663e-05, "loss": 46.0, "step": 123 }, { "epoch": 2.0928270042194095, "grad_norm": 0.00017036408826243132, "learning_rate": 2.3398396174233178e-05, "loss": 46.0, "step": 124 }, { "epoch": 2.109704641350211, "grad_norm": 0.0003021091397386044, "learning_rate": 2.261140803910644e-05, "loss": 46.0, "step": 125 }, { "epoch": 2.1265822784810124, "grad_norm": 0.0003933632397092879, "learning_rate": 2.1833997096818898e-05, "loss": 46.0, "step": 126 }, { "epoch": 2.1434599156118144, "grad_norm": 0.00024831638438627124, "learning_rate": 2.1066435191009715e-05, "loss": 46.0, "step": 127 }, { "epoch": 2.160337552742616, "grad_norm": 0.00022113206796348095, "learning_rate": 2.0308990721324927e-05, "loss": 46.0, "step": 128 }, { "epoch": 2.1772151898734178, "grad_norm": 0.0004507877165451646, "learning_rate": 1.9561928549563968e-05, "loss": 46.0, "step": 129 }, { "epoch": 2.1940928270042193, "grad_norm": 0.0003287454601377249, "learning_rate": 1.8825509907063327e-05, "loss": 46.0, "step": 130 }, { "epoch": 2.210970464135021, "grad_norm": 0.0005602631135843694, "learning_rate": 1.8099992303349577e-05, "loss": 46.0, "step": 131 }, { "epoch": 2.2278481012658227, "grad_norm": 0.00033776002237573266, "learning_rate": 1.738562943609396e-05, "loss": 46.0, "step": 132 }, { "epoch": 2.2447257383966246, "grad_norm": 0.00023615958343725652, "learning_rate": 1.6682671102399805e-05, "loss": 46.0, "step": 133 }, { "epoch": 2.261603375527426, "grad_norm": 0.0002480424882378429, "learning_rate": 1.599136311145402e-05, "loss": 46.0, "step": 134 }, { "epoch": 2.278481012658228, "grad_norm": 0.00016537895135115832, "learning_rate": 1.531194719857292e-05, "loss": 46.0, "step": 135 }, { "epoch": 2.2953586497890295, "grad_norm": 0.00034641518141143024, "learning_rate": 1.4644660940672627e-05, "loss": 46.0, "step": 136 }, { "epoch": 2.3122362869198314, "grad_norm": 0.0002024546847678721, "learning_rate": 1.398973767319368e-05, "loss": 46.0, "step": 137 }, { "epoch": 2.329113924050633, "grad_norm": 0.00029308354714885354, "learning_rate": 1.3347406408508695e-05, "loss": 46.0, "step": 138 }, { "epoch": 2.3459915611814344, "grad_norm": 0.00040009443182498217, "learning_rate": 1.2717891755841722e-05, "loss": 46.0, "step": 139 }, { "epoch": 2.3628691983122363, "grad_norm": 0.00043195937178097665, "learning_rate": 1.2101413842727345e-05, "loss": 46.0, "step": 140 }, { "epoch": 2.379746835443038, "grad_norm": 0.00037425121990963817, "learning_rate": 1.1498188238036861e-05, "loss": 46.0, "step": 141 }, { "epoch": 2.3966244725738397, "grad_norm": 0.0004133072798140347, "learning_rate": 1.090842587659851e-05, "loss": 46.0, "step": 142 }, { "epoch": 2.413502109704641, "grad_norm": 0.0004287827177904546, "learning_rate": 1.0332332985438248e-05, "loss": 46.0, "step": 143 }, { "epoch": 2.430379746835443, "grad_norm": 0.0003258808283135295, "learning_rate": 9.770111011666583e-06, "loss": 46.0, "step": 144 }, { "epoch": 2.4472573839662446, "grad_norm": 0.0005163907189853489, "learning_rate": 9.221956552036992e-06, "loss": 46.0, "step": 145 }, { "epoch": 2.4641350210970465, "grad_norm": 0.0003076877328567207, "learning_rate": 8.688061284200266e-06, "loss": 46.0, "step": 146 }, { "epoch": 2.481012658227848, "grad_norm": 0.00027283711824566126, "learning_rate": 8.168611899679013e-06, "loss": 46.0, "step": 147 }, { "epoch": 2.49789029535865, "grad_norm": 0.00025697125238366425, "learning_rate": 7.663790038585793e-06, "loss": 46.0, "step": 148 }, { "epoch": 2.5147679324894514, "grad_norm": 0.0003032241074834019, "learning_rate": 7.173772226107434e-06, "loss": 46.0, "step": 149 }, { "epoch": 2.5316455696202533, "grad_norm": 0.0003130268305540085, "learning_rate": 6.698729810778065e-06, "loss": 46.0, "step": 150 }, { "epoch": 2.5316455696202533, "eval_loss": 11.5, "eval_runtime": 0.4549, "eval_samples_per_second": 219.836, "eval_steps_per_second": 54.959, "step": 150 }, { "epoch": 2.548523206751055, "grad_norm": 0.0002957537362817675, "learning_rate": 6.238828904562316e-06, "loss": 46.0, "step": 151 }, { "epoch": 2.5654008438818563, "grad_norm": 0.0003422394802328199, "learning_rate": 5.794230324769517e-06, "loss": 46.0, "step": 152 }, { "epoch": 2.5822784810126582, "grad_norm": 0.0002922121493611485, "learning_rate": 5.365089537819434e-06, "loss": 46.0, "step": 153 }, { "epoch": 2.59915611814346, "grad_norm": 0.0002365944819757715, "learning_rate": 4.951556604879048e-06, "loss": 46.0, "step": 154 }, { "epoch": 2.6160337552742616, "grad_norm": 0.00023224019969347864, "learning_rate": 4.5537761293894535e-06, "loss": 46.0, "step": 155 }, { "epoch": 2.632911392405063, "grad_norm": 0.0006254777545109391, "learning_rate": 4.1718872065011904e-06, "loss": 46.0, "step": 156 }, { "epoch": 2.649789029535865, "grad_norm": 0.0002028813469223678, "learning_rate": 3.8060233744356633e-06, "loss": 46.0, "step": 157 }, { "epoch": 2.6666666666666665, "grad_norm": 0.0004217730602249503, "learning_rate": 3.4563125677897932e-06, "loss": 46.0, "step": 158 }, { "epoch": 2.6835443037974684, "grad_norm": 0.00040401105070486665, "learning_rate": 3.1228770728000455e-06, "loss": 46.0, "step": 159 }, { "epoch": 2.70042194092827, "grad_norm": 0.0005044445861130953, "learning_rate": 2.8058334845816213e-06, "loss": 46.0, "step": 160 }, { "epoch": 2.717299578059072, "grad_norm": 0.00028344604652374983, "learning_rate": 2.5052926663577e-06, "loss": 46.0, "step": 161 }, { "epoch": 2.7341772151898733, "grad_norm": 0.00025991967413574457, "learning_rate": 2.221359710692961e-06, "loss": 46.0, "step": 162 }, { "epoch": 2.7510548523206753, "grad_norm": 0.00036968718632124364, "learning_rate": 1.9541339027450256e-06, "loss": 46.0, "step": 163 }, { "epoch": 2.7679324894514767, "grad_norm": 0.0002495154330972582, "learning_rate": 1.70370868554659e-06, "loss": 46.0, "step": 164 }, { "epoch": 2.7848101265822782, "grad_norm": 0.00027114481781609356, "learning_rate": 1.4701716273304521e-06, "loss": 46.0, "step": 165 }, { "epoch": 2.80168776371308, "grad_norm": 0.00027105191838927567, "learning_rate": 1.2536043909088191e-06, "loss": 46.0, "step": 166 }, { "epoch": 2.818565400843882, "grad_norm": 0.00027963874163106084, "learning_rate": 1.0540827051175818e-06, "loss": 46.0, "step": 167 }, { "epoch": 2.8354430379746836, "grad_norm": 0.0003596085589379072, "learning_rate": 8.716763383355864e-07, "loss": 46.0, "step": 168 }, { "epoch": 2.852320675105485, "grad_norm": 0.0002700402110349387, "learning_rate": 7.064490740882057e-07, "loss": 46.0, "step": 169 }, { "epoch": 2.869198312236287, "grad_norm": 0.0003367900208104402, "learning_rate": 5.584586887435739e-07, "loss": 46.0, "step": 170 }, { "epoch": 2.8860759493670884, "grad_norm": 0.0005412886966951191, "learning_rate": 4.277569313094809e-07, "loss": 46.0, "step": 171 }, { "epoch": 2.9029535864978904, "grad_norm": 0.0004628577153198421, "learning_rate": 3.143895053378698e-07, "loss": 46.0, "step": 172 }, { "epoch": 2.919831223628692, "grad_norm": 0.00022919590992387384, "learning_rate": 2.1839605294330933e-07, "loss": 46.0, "step": 173 }, { "epoch": 2.9367088607594938, "grad_norm": 0.00035376212326809764, "learning_rate": 1.3981014094099353e-07, "loss": 46.0, "step": 174 }, { "epoch": 2.9535864978902953, "grad_norm": 0.0006031348602846265, "learning_rate": 7.865924910916977e-08, "loss": 46.0, "step": 175 }, { "epoch": 2.970464135021097, "grad_norm": 0.0004883770016022027, "learning_rate": 3.496476058006959e-08, "loss": 46.0, "step": 176 }, { "epoch": 2.9873417721518987, "grad_norm": 0.00046055944403633475, "learning_rate": 8.741954362678772e-09, "loss": 46.0, "step": 177 }, { "epoch": 3.0042194092827006, "grad_norm": 0.0002950435155071318, "learning_rate": 0.0, "loss": 46.0, "step": 178 } ], "logging_steps": 1, "max_steps": 178, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 28621031866368.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }