{ "best_metric": 0.4131671190261841, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.005134458635517617, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.5672293177588086e-05, "grad_norm": 1.026159644126892, "learning_rate": 1.0060000000000002e-05, "loss": 1.2569, "step": 1 }, { "epoch": 2.5672293177588086e-05, "eval_loss": 1.1921963691711426, "eval_runtime": 1183.8788, "eval_samples_per_second": 13.854, "eval_steps_per_second": 3.464, "step": 1 }, { "epoch": 5.134458635517617e-05, "grad_norm": 1.255319595336914, "learning_rate": 2.0120000000000004e-05, "loss": 1.2297, "step": 2 }, { "epoch": 7.701687953276427e-05, "grad_norm": 1.6948285102844238, "learning_rate": 3.018e-05, "loss": 1.6159, "step": 3 }, { "epoch": 0.00010268917271035235, "grad_norm": 1.570288062095642, "learning_rate": 4.024000000000001e-05, "loss": 1.5701, "step": 4 }, { "epoch": 0.00012836146588794044, "grad_norm": 1.4998438358306885, "learning_rate": 5.03e-05, "loss": 1.7115, "step": 5 }, { "epoch": 0.00015403375906552854, "grad_norm": 1.805379867553711, "learning_rate": 6.036e-05, "loss": 1.7774, "step": 6 }, { "epoch": 0.00017970605224311662, "grad_norm": 2.1143808364868164, "learning_rate": 7.042e-05, "loss": 2.1946, "step": 7 }, { "epoch": 0.0002053783454207047, "grad_norm": 2.083022356033325, "learning_rate": 8.048000000000002e-05, "loss": 1.9931, "step": 8 }, { "epoch": 0.0002310506385982928, "grad_norm": 2.2490415573120117, "learning_rate": 9.054000000000001e-05, "loss": 2.7258, "step": 9 }, { "epoch": 0.00025672293177588087, "grad_norm": 1.9612363576889038, "learning_rate": 0.0001006, "loss": 1.7879, "step": 10 }, { "epoch": 0.00028239522495346895, "grad_norm": 2.7282867431640625, "learning_rate": 0.00010007052631578948, "loss": 2.3426, "step": 11 }, { "epoch": 0.0003080675181310571, "grad_norm": 2.3359768390655518, "learning_rate": 9.954105263157895e-05, "loss": 1.8971, "step": 12 }, { "epoch": 0.00033373981130864515, "grad_norm": 2.0164449214935303, "learning_rate": 9.901157894736842e-05, "loss": 1.8824, "step": 13 }, { "epoch": 0.00035941210448623323, "grad_norm": 2.7772645950317383, "learning_rate": 9.84821052631579e-05, "loss": 2.6385, "step": 14 }, { "epoch": 0.0003850843976638213, "grad_norm": 2.503121852874756, "learning_rate": 9.795263157894737e-05, "loss": 1.9396, "step": 15 }, { "epoch": 0.0004107566908414094, "grad_norm": 2.5371038913726807, "learning_rate": 9.742315789473686e-05, "loss": 2.1417, "step": 16 }, { "epoch": 0.0004364289840189975, "grad_norm": 3.1160120964050293, "learning_rate": 9.689368421052633e-05, "loss": 2.4972, "step": 17 }, { "epoch": 0.0004621012771965856, "grad_norm": 3.7234385013580322, "learning_rate": 9.63642105263158e-05, "loss": 2.569, "step": 18 }, { "epoch": 0.00048777357037417367, "grad_norm": 3.364891290664673, "learning_rate": 9.583473684210527e-05, "loss": 2.4822, "step": 19 }, { "epoch": 0.0005134458635517617, "grad_norm": 9.361184120178223, "learning_rate": 9.530526315789474e-05, "loss": 1.3435, "step": 20 }, { "epoch": 0.0005391181567293499, "grad_norm": 5.888495445251465, "learning_rate": 9.477578947368422e-05, "loss": 0.8447, "step": 21 }, { "epoch": 0.0005647904499069379, "grad_norm": 4.862771511077881, "learning_rate": 9.424631578947369e-05, "loss": 0.4383, "step": 22 }, { "epoch": 0.000590462743084526, "grad_norm": 1.6988191604614258, "learning_rate": 9.371684210526316e-05, "loss": 0.1793, "step": 23 }, { "epoch": 0.0006161350362621142, "grad_norm": 0.5420213341712952, "learning_rate": 9.318736842105263e-05, "loss": 0.0468, "step": 24 }, { "epoch": 0.0006418073294397022, "grad_norm": 0.26038220524787903, "learning_rate": 9.26578947368421e-05, "loss": 0.0137, "step": 25 }, { "epoch": 0.0006674796226172903, "grad_norm": 0.11139990389347076, "learning_rate": 9.212842105263159e-05, "loss": 0.004, "step": 26 }, { "epoch": 0.0006931519157948783, "grad_norm": 0.030027125030755997, "learning_rate": 9.159894736842107e-05, "loss": 0.0012, "step": 27 }, { "epoch": 0.0007188242089724665, "grad_norm": 0.015038474462926388, "learning_rate": 9.106947368421054e-05, "loss": 0.0005, "step": 28 }, { "epoch": 0.0007444965021500546, "grad_norm": 0.008915350772440434, "learning_rate": 9.054000000000001e-05, "loss": 0.0002, "step": 29 }, { "epoch": 0.0007701687953276426, "grad_norm": 0.006986498832702637, "learning_rate": 9.001052631578948e-05, "loss": 0.0002, "step": 30 }, { "epoch": 0.0007958410885052307, "grad_norm": 0.0063736108131706715, "learning_rate": 8.948105263157895e-05, "loss": 0.0001, "step": 31 }, { "epoch": 0.0008215133816828188, "grad_norm": 0.007324839010834694, "learning_rate": 8.895157894736842e-05, "loss": 0.0001, "step": 32 }, { "epoch": 0.0008471856748604069, "grad_norm": 0.010746892541646957, "learning_rate": 8.842210526315789e-05, "loss": 0.0002, "step": 33 }, { "epoch": 0.000872857968037995, "grad_norm": 0.014329284429550171, "learning_rate": 8.789263157894738e-05, "loss": 0.0002, "step": 34 }, { "epoch": 0.000898530261215583, "grad_norm": 0.012248692102730274, "learning_rate": 8.736315789473685e-05, "loss": 0.0002, "step": 35 }, { "epoch": 0.0009242025543931712, "grad_norm": 0.009668469429016113, "learning_rate": 8.683368421052632e-05, "loss": 0.0001, "step": 36 }, { "epoch": 0.0009498748475707593, "grad_norm": 0.007297456730157137, "learning_rate": 8.63042105263158e-05, "loss": 0.0001, "step": 37 }, { "epoch": 0.0009755471407483473, "grad_norm": 0.005695601459592581, "learning_rate": 8.577473684210527e-05, "loss": 0.0001, "step": 38 }, { "epoch": 0.0010012194339259354, "grad_norm": 0.004724627826362848, "learning_rate": 8.524526315789474e-05, "loss": 0.0001, "step": 39 }, { "epoch": 0.0010268917271035235, "grad_norm": 0.0041098096407949924, "learning_rate": 8.471578947368421e-05, "loss": 0.0001, "step": 40 }, { "epoch": 0.0010525640202811116, "grad_norm": 0.003558989381417632, "learning_rate": 8.41863157894737e-05, "loss": 0.0001, "step": 41 }, { "epoch": 0.0010782363134586997, "grad_norm": 0.00332538690418005, "learning_rate": 8.365684210526317e-05, "loss": 0.0001, "step": 42 }, { "epoch": 0.0011039086066362879, "grad_norm": 0.0027524020988494158, "learning_rate": 8.312736842105264e-05, "loss": 0.0001, "step": 43 }, { "epoch": 0.0011295808998138758, "grad_norm": 0.002535044215619564, "learning_rate": 8.259789473684211e-05, "loss": 0.0001, "step": 44 }, { "epoch": 0.001155253192991464, "grad_norm": 0.00231631426140666, "learning_rate": 8.206842105263158e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.001180925486169052, "grad_norm": 0.0019382525933906436, "learning_rate": 8.153894736842105e-05, "loss": 0.0, "step": 46 }, { "epoch": 0.0012065977793466402, "grad_norm": 0.022770335897803307, "learning_rate": 8.100947368421053e-05, "loss": 0.0001, "step": 47 }, { "epoch": 0.0012322700725242283, "grad_norm": 0.0018689745338633657, "learning_rate": 8.048000000000002e-05, "loss": 0.0, "step": 48 }, { "epoch": 0.0012579423657018162, "grad_norm": 20.690397262573242, "learning_rate": 7.995052631578949e-05, "loss": 4.2531, "step": 49 }, { "epoch": 0.0012836146588794044, "grad_norm": 18.80559730529785, "learning_rate": 7.942105263157896e-05, "loss": 3.8468, "step": 50 }, { "epoch": 0.0012836146588794044, "eval_loss": 0.5789145231246948, "eval_runtime": 1182.6905, "eval_samples_per_second": 13.868, "eval_steps_per_second": 3.468, "step": 50 }, { "epoch": 0.0013092869520569925, "grad_norm": 2.402365207672119, "learning_rate": 7.889157894736843e-05, "loss": 1.2955, "step": 51 }, { "epoch": 0.0013349592452345806, "grad_norm": 2.3085319995880127, "learning_rate": 7.83621052631579e-05, "loss": 1.9171, "step": 52 }, { "epoch": 0.0013606315384121687, "grad_norm": 1.7359583377838135, "learning_rate": 7.783263157894737e-05, "loss": 1.5831, "step": 53 }, { "epoch": 0.0013863038315897567, "grad_norm": 1.780971884727478, "learning_rate": 7.730315789473684e-05, "loss": 1.9551, "step": 54 }, { "epoch": 0.0014119761247673448, "grad_norm": 1.7606279850006104, "learning_rate": 7.677368421052632e-05, "loss": 1.7301, "step": 55 }, { "epoch": 0.001437648417944933, "grad_norm": 1.6475731134414673, "learning_rate": 7.624421052631579e-05, "loss": 1.8897, "step": 56 }, { "epoch": 0.001463320711122521, "grad_norm": 1.541284203529358, "learning_rate": 7.571473684210526e-05, "loss": 1.8631, "step": 57 }, { "epoch": 0.0014889930043001092, "grad_norm": 1.9083011150360107, "learning_rate": 7.518526315789475e-05, "loss": 1.9604, "step": 58 }, { "epoch": 0.001514665297477697, "grad_norm": 1.855704665184021, "learning_rate": 7.465578947368422e-05, "loss": 2.3663, "step": 59 }, { "epoch": 0.0015403375906552852, "grad_norm": 1.8971003293991089, "learning_rate": 7.412631578947369e-05, "loss": 2.0379, "step": 60 }, { "epoch": 0.0015660098838328734, "grad_norm": 1.7835102081298828, "learning_rate": 7.359684210526317e-05, "loss": 2.1501, "step": 61 }, { "epoch": 0.0015916821770104615, "grad_norm": 1.6390843391418457, "learning_rate": 7.306736842105264e-05, "loss": 2.1538, "step": 62 }, { "epoch": 0.0016173544701880496, "grad_norm": 1.7119994163513184, "learning_rate": 7.253789473684211e-05, "loss": 1.7663, "step": 63 }, { "epoch": 0.0016430267633656375, "grad_norm": 1.6038694381713867, "learning_rate": 7.200842105263158e-05, "loss": 2.2391, "step": 64 }, { "epoch": 0.0016686990565432257, "grad_norm": 1.7166171073913574, "learning_rate": 7.147894736842105e-05, "loss": 2.1004, "step": 65 }, { "epoch": 0.0016943713497208138, "grad_norm": 1.7185120582580566, "learning_rate": 7.094947368421052e-05, "loss": 1.732, "step": 66 }, { "epoch": 0.001720043642898402, "grad_norm": 1.7218092679977417, "learning_rate": 7.042e-05, "loss": 1.92, "step": 67 }, { "epoch": 0.00174571593607599, "grad_norm": 1.944508671760559, "learning_rate": 6.989052631578948e-05, "loss": 1.8669, "step": 68 }, { "epoch": 0.001771388229253578, "grad_norm": 2.296661615371704, "learning_rate": 6.936105263157896e-05, "loss": 1.5057, "step": 69 }, { "epoch": 0.001797060522431166, "grad_norm": 2.606893539428711, "learning_rate": 6.883157894736843e-05, "loss": 2.3624, "step": 70 }, { "epoch": 0.0018227328156087542, "grad_norm": 5.274344444274902, "learning_rate": 6.83021052631579e-05, "loss": 2.6868, "step": 71 }, { "epoch": 0.0018484051087863424, "grad_norm": 2.480908155441284, "learning_rate": 6.777263157894737e-05, "loss": 1.9085, "step": 72 }, { "epoch": 0.0018740774019639305, "grad_norm": 1.2263275384902954, "learning_rate": 6.724315789473684e-05, "loss": 0.1152, "step": 73 }, { "epoch": 0.0018997496951415186, "grad_norm": 0.09067212790250778, "learning_rate": 6.671368421052631e-05, "loss": 0.0036, "step": 74 }, { "epoch": 0.0019254219883191065, "grad_norm": 0.04749782010912895, "learning_rate": 6.61842105263158e-05, "loss": 0.002, "step": 75 }, { "epoch": 0.0019510942814966947, "grad_norm": 0.03309526666998863, "learning_rate": 6.565473684210527e-05, "loss": 0.0014, "step": 76 }, { "epoch": 0.0019767665746742826, "grad_norm": 0.021400053054094315, "learning_rate": 6.512526315789474e-05, "loss": 0.0009, "step": 77 }, { "epoch": 0.0020024388678518707, "grad_norm": 0.017963755875825882, "learning_rate": 6.459578947368421e-05, "loss": 0.0008, "step": 78 }, { "epoch": 0.002028111161029459, "grad_norm": 1.1963677406311035, "learning_rate": 6.406631578947369e-05, "loss": 0.2937, "step": 79 }, { "epoch": 0.002053783454207047, "grad_norm": 0.010341464541852474, "learning_rate": 6.353684210526316e-05, "loss": 0.0005, "step": 80 }, { "epoch": 0.002079455747384635, "grad_norm": 0.00791017897427082, "learning_rate": 6.300736842105263e-05, "loss": 0.0004, "step": 81 }, { "epoch": 0.0021051280405622232, "grad_norm": 0.00704535935074091, "learning_rate": 6.247789473684212e-05, "loss": 0.0003, "step": 82 }, { "epoch": 0.0021308003337398114, "grad_norm": 0.005926445592194796, "learning_rate": 6.194842105263159e-05, "loss": 0.0003, "step": 83 }, { "epoch": 0.0021564726269173995, "grad_norm": 0.005512417294085026, "learning_rate": 6.141894736842106e-05, "loss": 0.0003, "step": 84 }, { "epoch": 0.0021821449200949876, "grad_norm": 0.005944954231381416, "learning_rate": 6.088947368421053e-05, "loss": 0.0003, "step": 85 }, { "epoch": 0.0022078172132725758, "grad_norm": 0.005636818241328001, "learning_rate": 6.036e-05, "loss": 0.0003, "step": 86 }, { "epoch": 0.0022334895064501635, "grad_norm": 0.005355035420507193, "learning_rate": 5.9830526315789475e-05, "loss": 0.0003, "step": 87 }, { "epoch": 0.0022591617996277516, "grad_norm": 0.004906694870442152, "learning_rate": 5.9301052631578946e-05, "loss": 0.0002, "step": 88 }, { "epoch": 0.0022848340928053397, "grad_norm": 0.00455674109980464, "learning_rate": 5.877157894736843e-05, "loss": 0.0002, "step": 89 }, { "epoch": 0.002310506385982928, "grad_norm": 0.004359242040663958, "learning_rate": 5.82421052631579e-05, "loss": 0.0002, "step": 90 }, { "epoch": 0.002336178679160516, "grad_norm": 0.004265242256224155, "learning_rate": 5.771263157894737e-05, "loss": 0.0002, "step": 91 }, { "epoch": 0.002361850972338104, "grad_norm": 0.00403448985889554, "learning_rate": 5.718315789473685e-05, "loss": 0.0002, "step": 92 }, { "epoch": 0.0023875232655156922, "grad_norm": 0.003738979110494256, "learning_rate": 5.665368421052632e-05, "loss": 0.0002, "step": 93 }, { "epoch": 0.0024131955586932804, "grad_norm": 0.0035066159907728434, "learning_rate": 5.612421052631579e-05, "loss": 0.0002, "step": 94 }, { "epoch": 0.0024388678518708685, "grad_norm": 0.0033452173229306936, "learning_rate": 5.559473684210527e-05, "loss": 0.0001, "step": 95 }, { "epoch": 0.0024645401450484566, "grad_norm": 0.0031449589878320694, "learning_rate": 5.506526315789474e-05, "loss": 0.0001, "step": 96 }, { "epoch": 0.0024902124382260448, "grad_norm": 0.8770474791526794, "learning_rate": 5.453578947368421e-05, "loss": 0.1372, "step": 97 }, { "epoch": 0.0025158847314036325, "grad_norm": 0.006977991200983524, "learning_rate": 5.400631578947369e-05, "loss": 0.0002, "step": 98 }, { "epoch": 0.0025415570245812206, "grad_norm": 2.6762304306030273, "learning_rate": 5.347684210526316e-05, "loss": 1.5861, "step": 99 }, { "epoch": 0.0025672293177588087, "grad_norm": 6.186676502227783, "learning_rate": 5.294736842105263e-05, "loss": 3.1351, "step": 100 }, { "epoch": 0.0025672293177588087, "eval_loss": 0.444609671831131, "eval_runtime": 1180.4892, "eval_samples_per_second": 13.894, "eval_steps_per_second": 3.474, "step": 100 }, { "epoch": 0.002592901610936397, "grad_norm": 1.0906559228897095, "learning_rate": 5.24178947368421e-05, "loss": 1.5745, "step": 101 }, { "epoch": 0.002618573904113985, "grad_norm": 1.057900071144104, "learning_rate": 5.1888421052631585e-05, "loss": 1.359, "step": 102 }, { "epoch": 0.002644246197291573, "grad_norm": 1.1936546564102173, "learning_rate": 5.135894736842106e-05, "loss": 1.8506, "step": 103 }, { "epoch": 0.0026699184904691612, "grad_norm": 1.1742337942123413, "learning_rate": 5.082947368421053e-05, "loss": 1.4235, "step": 104 }, { "epoch": 0.0026955907836467494, "grad_norm": 1.4636642932891846, "learning_rate": 5.03e-05, "loss": 1.9341, "step": 105 }, { "epoch": 0.0027212630768243375, "grad_norm": 1.366550087928772, "learning_rate": 4.977052631578947e-05, "loss": 1.7735, "step": 106 }, { "epoch": 0.0027469353700019256, "grad_norm": 1.4252630472183228, "learning_rate": 4.924105263157895e-05, "loss": 1.8694, "step": 107 }, { "epoch": 0.0027726076631795133, "grad_norm": 1.5163335800170898, "learning_rate": 4.871157894736843e-05, "loss": 1.8035, "step": 108 }, { "epoch": 0.0027982799563571015, "grad_norm": 1.6696662902832031, "learning_rate": 4.81821052631579e-05, "loss": 2.4422, "step": 109 }, { "epoch": 0.0028239522495346896, "grad_norm": 1.823583960533142, "learning_rate": 4.765263157894737e-05, "loss": 2.0223, "step": 110 }, { "epoch": 0.0028496245427122777, "grad_norm": 1.7153486013412476, "learning_rate": 4.7123157894736845e-05, "loss": 2.3425, "step": 111 }, { "epoch": 0.002875296835889866, "grad_norm": 1.597476601600647, "learning_rate": 4.6593684210526316e-05, "loss": 1.9461, "step": 112 }, { "epoch": 0.002900969129067454, "grad_norm": 1.6382873058319092, "learning_rate": 4.606421052631579e-05, "loss": 1.7633, "step": 113 }, { "epoch": 0.002926641422245042, "grad_norm": 1.7374053001403809, "learning_rate": 4.553473684210527e-05, "loss": 2.0122, "step": 114 }, { "epoch": 0.0029523137154226302, "grad_norm": 1.976195216178894, "learning_rate": 4.500526315789474e-05, "loss": 2.218, "step": 115 }, { "epoch": 0.0029779860086002184, "grad_norm": 2.0346622467041016, "learning_rate": 4.447578947368421e-05, "loss": 2.3013, "step": 116 }, { "epoch": 0.0030036583017778065, "grad_norm": 1.756709337234497, "learning_rate": 4.394631578947369e-05, "loss": 1.9596, "step": 117 }, { "epoch": 0.003029330594955394, "grad_norm": 1.9882556200027466, "learning_rate": 4.341684210526316e-05, "loss": 2.2595, "step": 118 }, { "epoch": 0.0030550028881329823, "grad_norm": 2.60469388961792, "learning_rate": 4.2887368421052636e-05, "loss": 1.5153, "step": 119 }, { "epoch": 0.0030806751813105705, "grad_norm": 0.9778649806976318, "learning_rate": 4.2357894736842106e-05, "loss": 0.0113, "step": 120 }, { "epoch": 0.0031063474744881586, "grad_norm": 0.011567816138267517, "learning_rate": 4.182842105263158e-05, "loss": 0.0004, "step": 121 }, { "epoch": 0.0031320197676657467, "grad_norm": 0.010046404786407948, "learning_rate": 4.1298947368421053e-05, "loss": 0.0004, "step": 122 }, { "epoch": 0.003157692060843335, "grad_norm": 0.009395002387464046, "learning_rate": 4.0769473684210524e-05, "loss": 0.0003, "step": 123 }, { "epoch": 0.003183364354020923, "grad_norm": 0.008893662132322788, "learning_rate": 4.024000000000001e-05, "loss": 0.0003, "step": 124 }, { "epoch": 0.003209036647198511, "grad_norm": 0.008500020019710064, "learning_rate": 3.971052631578948e-05, "loss": 0.0003, "step": 125 }, { "epoch": 0.0032347089403760992, "grad_norm": 0.0076544624753296375, "learning_rate": 3.918105263157895e-05, "loss": 0.0003, "step": 126 }, { "epoch": 0.0032603812335536874, "grad_norm": 0.007298609241843224, "learning_rate": 3.865157894736842e-05, "loss": 0.0003, "step": 127 }, { "epoch": 0.003286053526731275, "grad_norm": 0.006563248578459024, "learning_rate": 3.8122105263157896e-05, "loss": 0.0002, "step": 128 }, { "epoch": 0.003311725819908863, "grad_norm": 0.006430802401155233, "learning_rate": 3.759263157894737e-05, "loss": 0.0002, "step": 129 }, { "epoch": 0.0033373981130864513, "grad_norm": 0.006386774126440287, "learning_rate": 3.7063157894736844e-05, "loss": 0.0002, "step": 130 }, { "epoch": 0.0033630704062640395, "grad_norm": 0.6408913731575012, "learning_rate": 3.653368421052632e-05, "loss": 0.0931, "step": 131 }, { "epoch": 0.0033887426994416276, "grad_norm": 0.005842825397849083, "learning_rate": 3.600421052631579e-05, "loss": 0.0002, "step": 132 }, { "epoch": 0.0034144149926192157, "grad_norm": 0.005725574214011431, "learning_rate": 3.547473684210526e-05, "loss": 0.0002, "step": 133 }, { "epoch": 0.003440087285796804, "grad_norm": 0.006068081129342318, "learning_rate": 3.494526315789474e-05, "loss": 0.0002, "step": 134 }, { "epoch": 0.003465759578974392, "grad_norm": 0.00514700124040246, "learning_rate": 3.4415789473684216e-05, "loss": 0.0002, "step": 135 }, { "epoch": 0.00349143187215198, "grad_norm": 0.005522563587874174, "learning_rate": 3.3886315789473686e-05, "loss": 0.0002, "step": 136 }, { "epoch": 0.0035171041653295682, "grad_norm": 0.0057330005802214146, "learning_rate": 3.3356842105263156e-05, "loss": 0.0002, "step": 137 }, { "epoch": 0.003542776458507156, "grad_norm": 0.0057954080402851105, "learning_rate": 3.2827368421052634e-05, "loss": 0.0002, "step": 138 }, { "epoch": 0.003568448751684744, "grad_norm": 0.005006751511245966, "learning_rate": 3.2297894736842104e-05, "loss": 0.0002, "step": 139 }, { "epoch": 0.003594121044862332, "grad_norm": 0.005032096989452839, "learning_rate": 3.176842105263158e-05, "loss": 0.0002, "step": 140 }, { "epoch": 0.0036197933380399203, "grad_norm": 0.004935144912451506, "learning_rate": 3.123894736842106e-05, "loss": 0.0002, "step": 141 }, { "epoch": 0.0036454656312175085, "grad_norm": 0.00508884247392416, "learning_rate": 3.070947368421053e-05, "loss": 0.0002, "step": 142 }, { "epoch": 0.0036711379243950966, "grad_norm": 0.004769055638462305, "learning_rate": 3.018e-05, "loss": 0.0002, "step": 143 }, { "epoch": 0.0036968102175726847, "grad_norm": 0.004287198651582003, "learning_rate": 2.9650526315789473e-05, "loss": 0.0002, "step": 144 }, { "epoch": 0.003722482510750273, "grad_norm": 0.004471136257052422, "learning_rate": 2.912105263157895e-05, "loss": 0.0002, "step": 145 }, { "epoch": 0.003748154803927861, "grad_norm": 0.004509101156145334, "learning_rate": 2.8591578947368424e-05, "loss": 0.0002, "step": 146 }, { "epoch": 0.003773827097105449, "grad_norm": 0.005288159940391779, "learning_rate": 2.8062105263157894e-05, "loss": 0.0002, "step": 147 }, { "epoch": 0.0037994993902830372, "grad_norm": 1.4431769847869873, "learning_rate": 2.753263157894737e-05, "loss": 0.4431, "step": 148 }, { "epoch": 0.003825171683460625, "grad_norm": 2.757993459701538, "learning_rate": 2.7003157894736845e-05, "loss": 1.9569, "step": 149 }, { "epoch": 0.003850843976638213, "grad_norm": 6.777806758880615, "learning_rate": 2.6473684210526315e-05, "loss": 2.2696, "step": 150 }, { "epoch": 0.003850843976638213, "eval_loss": 0.41922199726104736, "eval_runtime": 1184.8981, "eval_samples_per_second": 13.843, "eval_steps_per_second": 3.461, "step": 150 }, { "epoch": 0.003876516269815801, "grad_norm": 0.7326381802558899, "learning_rate": 2.5944210526315793e-05, "loss": 1.3282, "step": 151 }, { "epoch": 0.0039021885629933893, "grad_norm": 0.931328535079956, "learning_rate": 2.5414736842105266e-05, "loss": 1.2657, "step": 152 }, { "epoch": 0.003927860856170978, "grad_norm": 0.7979090213775635, "learning_rate": 2.4885263157894737e-05, "loss": 1.0431, "step": 153 }, { "epoch": 0.003953533149348565, "grad_norm": 0.8406013250350952, "learning_rate": 2.4355789473684214e-05, "loss": 1.5444, "step": 154 }, { "epoch": 0.003979205442526153, "grad_norm": 1.172977328300476, "learning_rate": 2.3826315789473684e-05, "loss": 2.2357, "step": 155 }, { "epoch": 0.004004877735703741, "grad_norm": 1.081085443496704, "learning_rate": 2.3296842105263158e-05, "loss": 1.9574, "step": 156 }, { "epoch": 0.0040305500288813296, "grad_norm": 1.213394284248352, "learning_rate": 2.2767368421052635e-05, "loss": 1.7966, "step": 157 }, { "epoch": 0.004056222322058918, "grad_norm": 1.3481943607330322, "learning_rate": 2.2237894736842105e-05, "loss": 2.1099, "step": 158 }, { "epoch": 0.004081894615236506, "grad_norm": 1.1967196464538574, "learning_rate": 2.170842105263158e-05, "loss": 1.7427, "step": 159 }, { "epoch": 0.004107566908414094, "grad_norm": 1.6480809450149536, "learning_rate": 2.1178947368421053e-05, "loss": 2.2222, "step": 160 }, { "epoch": 0.004133239201591682, "grad_norm": 1.4550552368164062, "learning_rate": 2.0649473684210527e-05, "loss": 1.5513, "step": 161 }, { "epoch": 0.00415891149476927, "grad_norm": 1.5787893533706665, "learning_rate": 2.0120000000000004e-05, "loss": 2.0812, "step": 162 }, { "epoch": 0.004184583787946858, "grad_norm": 1.6700499057769775, "learning_rate": 1.9590526315789474e-05, "loss": 2.0889, "step": 163 }, { "epoch": 0.0042102560811244465, "grad_norm": 1.2719978094100952, "learning_rate": 1.9061052631578948e-05, "loss": 1.6051, "step": 164 }, { "epoch": 0.004235928374302035, "grad_norm": 2.337200164794922, "learning_rate": 1.8531578947368422e-05, "loss": 2.2032, "step": 165 }, { "epoch": 0.004261600667479623, "grad_norm": 2.0356595516204834, "learning_rate": 1.8002105263157896e-05, "loss": 2.3998, "step": 166 }, { "epoch": 0.004287272960657211, "grad_norm": 1.9695961475372314, "learning_rate": 1.747263157894737e-05, "loss": 2.3592, "step": 167 }, { "epoch": 0.004312945253834799, "grad_norm": 2.0183303356170654, "learning_rate": 1.6943157894736843e-05, "loss": 2.1365, "step": 168 }, { "epoch": 0.004338617547012387, "grad_norm": 2.894932270050049, "learning_rate": 1.6413684210526317e-05, "loss": 2.4704, "step": 169 }, { "epoch": 0.004364289840189975, "grad_norm": 1.7332582473754883, "learning_rate": 1.588421052631579e-05, "loss": 0.8739, "step": 170 }, { "epoch": 0.004389962133367563, "grad_norm": 0.0018881710711866617, "learning_rate": 1.5354736842105264e-05, "loss": 0.0001, "step": 171 }, { "epoch": 0.0044156344265451515, "grad_norm": 0.0019908491522073746, "learning_rate": 1.4825263157894736e-05, "loss": 0.0001, "step": 172 }, { "epoch": 0.00444130671972274, "grad_norm": 0.0019480012124404311, "learning_rate": 1.4295789473684212e-05, "loss": 0.0001, "step": 173 }, { "epoch": 0.004466979012900327, "grad_norm": 0.0019311723299324512, "learning_rate": 1.3766315789473686e-05, "loss": 0.0001, "step": 174 }, { "epoch": 0.004492651306077915, "grad_norm": 0.0019914451986551285, "learning_rate": 1.3236842105263158e-05, "loss": 0.0001, "step": 175 }, { "epoch": 0.004518323599255503, "grad_norm": 0.001993614248931408, "learning_rate": 1.2707368421052633e-05, "loss": 0.0001, "step": 176 }, { "epoch": 0.004543995892433091, "grad_norm": 0.0019910179544240236, "learning_rate": 1.2177894736842107e-05, "loss": 0.0001, "step": 177 }, { "epoch": 0.004569668185610679, "grad_norm": 0.002013832563534379, "learning_rate": 1.1648421052631579e-05, "loss": 0.0001, "step": 178 }, { "epoch": 0.0045953404787882676, "grad_norm": 0.0020234170369803905, "learning_rate": 1.1118947368421053e-05, "loss": 0.0001, "step": 179 }, { "epoch": 0.004621012771965856, "grad_norm": 0.0019844514317810535, "learning_rate": 1.0589473684210526e-05, "loss": 0.0001, "step": 180 }, { "epoch": 0.004646685065143444, "grad_norm": 0.00206298241391778, "learning_rate": 1.0060000000000002e-05, "loss": 0.0001, "step": 181 }, { "epoch": 0.004672357358321032, "grad_norm": 0.0020082283299416304, "learning_rate": 9.530526315789474e-06, "loss": 0.0001, "step": 182 }, { "epoch": 0.00469802965149862, "grad_norm": 0.0020704076159745455, "learning_rate": 9.001052631578948e-06, "loss": 0.0001, "step": 183 }, { "epoch": 0.004723701944676208, "grad_norm": 0.002045375294983387, "learning_rate": 8.471578947368422e-06, "loss": 0.0001, "step": 184 }, { "epoch": 0.004749374237853796, "grad_norm": 0.0021098172292113304, "learning_rate": 7.942105263157895e-06, "loss": 0.0001, "step": 185 }, { "epoch": 0.0047750465310313845, "grad_norm": 0.0021194566506892443, "learning_rate": 7.412631578947368e-06, "loss": 0.0001, "step": 186 }, { "epoch": 0.004800718824208973, "grad_norm": 0.0020953835919499397, "learning_rate": 6.883157894736843e-06, "loss": 0.0001, "step": 187 }, { "epoch": 0.004826391117386561, "grad_norm": 0.0021119611337780952, "learning_rate": 6.3536842105263166e-06, "loss": 0.0001, "step": 188 }, { "epoch": 0.004852063410564149, "grad_norm": 0.0021464722231030464, "learning_rate": 5.8242105263157895e-06, "loss": 0.0001, "step": 189 }, { "epoch": 0.004877735703741737, "grad_norm": 0.0022363984026014805, "learning_rate": 5.294736842105263e-06, "loss": 0.0001, "step": 190 }, { "epoch": 0.004903407996919325, "grad_norm": 0.002060087164863944, "learning_rate": 4.765263157894737e-06, "loss": 0.0001, "step": 191 }, { "epoch": 0.004929080290096913, "grad_norm": 0.0019431081600487232, "learning_rate": 4.235789473684211e-06, "loss": 0.0001, "step": 192 }, { "epoch": 0.004954752583274501, "grad_norm": 0.0021071808878332376, "learning_rate": 3.706315789473684e-06, "loss": 0.0001, "step": 193 }, { "epoch": 0.0049804248764520895, "grad_norm": 0.0020984155125916004, "learning_rate": 3.1768421052631583e-06, "loss": 0.0001, "step": 194 }, { "epoch": 0.005006097169629677, "grad_norm": 0.002161442069336772, "learning_rate": 2.6473684210526316e-06, "loss": 0.0001, "step": 195 }, { "epoch": 0.005031769462807265, "grad_norm": 0.002335567260161042, "learning_rate": 2.1178947368421054e-06, "loss": 0.0001, "step": 196 }, { "epoch": 0.005057441755984853, "grad_norm": 1.0611015558242798, "learning_rate": 1.5884210526315791e-06, "loss": 0.2117, "step": 197 }, { "epoch": 0.005083114049162441, "grad_norm": 0.39899060130119324, "learning_rate": 1.0589473684210527e-06, "loss": 0.089, "step": 198 }, { "epoch": 0.005108786342340029, "grad_norm": 3.193357467651367, "learning_rate": 5.294736842105263e-07, "loss": 1.4026, "step": 199 }, { "epoch": 0.005134458635517617, "grad_norm": 3.7854483127593994, "learning_rate": 0.0, "loss": 1.5641, "step": 200 }, { "epoch": 0.005134458635517617, "eval_loss": 0.4131671190261841, "eval_runtime": 1189.4105, "eval_samples_per_second": 13.79, "eval_steps_per_second": 3.448, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.795839081879962e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }