diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,92530 +1,99460 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9999621656388332, + "epoch": 1.0, "eval_steps": 500, - "global_step": 13215, + "global_step": 14205, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 7.56687223336234e-05, - "grad_norm": 272.8875732421875, - "learning_rate": 2.7231467473524962e-08, - "loss": 2.6021, + "epoch": 7.039774727208729e-05, + "grad_norm": 327.84918212890625, + "learning_rate": 3.278688524590164e-08, + "loss": 3.1585, "step": 1 }, { - "epoch": 0.0001513374446672468, - "grad_norm": 513.3167114257812, - "learning_rate": 5.4462934947049924e-08, - "loss": 2.8401, + "epoch": 0.00014079549454417458, + "grad_norm": 324.16375732421875, + "learning_rate": 6.557377049180328e-08, + "loss": 3.0366, "step": 2 }, { - "epoch": 0.0002270061670008702, - "grad_norm": 390.2225036621094, - "learning_rate": 8.169440242057489e-08, - "loss": 2.7988, + "epoch": 0.00021119324181626187, + "grad_norm": 310.46563720703125, + "learning_rate": 9.836065573770492e-08, + "loss": 2.8635, "step": 3 }, { - "epoch": 0.0003026748893344936, - "grad_norm": 272.265625, - "learning_rate": 1.0892586989409985e-07, - "loss": 2.912, + "epoch": 0.00028159098908834917, + "grad_norm": 307.29510498046875, + "learning_rate": 1.3114754098360656e-07, + "loss": 3.0714, "step": 4 }, { - "epoch": 0.000378343611668117, - "grad_norm": 306.4111633300781, - "learning_rate": 1.3615733736762482e-07, - "loss": 2.8548, + "epoch": 0.00035198873636043646, + "grad_norm": 289.9696044921875, + "learning_rate": 1.639344262295082e-07, + "loss": 2.8635, "step": 5 }, { - "epoch": 0.0004540123340017404, - "grad_norm": 251.80809020996094, - "learning_rate": 1.6338880484114979e-07, - "loss": 2.885, + "epoch": 0.00042238648363252375, + "grad_norm": 277.79486083984375, + "learning_rate": 1.9672131147540984e-07, + "loss": 3.0361, "step": 6 }, { - "epoch": 0.0005296810563353637, - "grad_norm": 884.6312866210938, - "learning_rate": 1.9062027231467473e-07, - "loss": 2.9442, + "epoch": 0.0004927842309046111, + "grad_norm": 325.2139587402344, + "learning_rate": 2.295081967213115e-07, + "loss": 2.984, "step": 7 }, { - "epoch": 0.0006053497786689872, - "grad_norm": 391.5308837890625, - "learning_rate": 2.178517397881997e-07, - "loss": 3.0565, + "epoch": 0.0005631819781766983, + "grad_norm": 327.0364990234375, + "learning_rate": 2.622950819672131e-07, + "loss": 2.8488, "step": 8 }, { - "epoch": 0.0006810185010026105, - "grad_norm": 295.6712646484375, - "learning_rate": 2.4508320726172467e-07, - "loss": 2.9165, + "epoch": 0.0006335797254487857, + "grad_norm": 276.72918701171875, + "learning_rate": 2.950819672131147e-07, + "loss": 2.751, "step": 9 }, { - "epoch": 0.000756687223336234, - "grad_norm": 733.3089599609375, - "learning_rate": 2.7231467473524963e-07, - "loss": 2.7356, + "epoch": 0.0007039774727208729, + "grad_norm": 292.3111267089844, + "learning_rate": 3.278688524590164e-07, + "loss": 2.7861, "step": 10 }, { - "epoch": 0.0008323559456698573, - "grad_norm": 363.34002685546875, - "learning_rate": 2.995461422087746e-07, - "loss": 2.6017, + "epoch": 0.0007743752199929603, + "grad_norm": 243.3485107421875, + "learning_rate": 3.60655737704918e-07, + "loss": 2.8307, "step": 11 }, { - "epoch": 0.0009080246680034808, - "grad_norm": 256.9432373046875, - "learning_rate": 3.2677760968229957e-07, - "loss": 2.4895, + "epoch": 0.0008447729672650475, + "grad_norm": 371.9922180175781, + "learning_rate": 3.9344262295081967e-07, + "loss": 2.9013, "step": 12 }, { - "epoch": 0.0009836933903371041, - "grad_norm": 314.6168212890625, - "learning_rate": 3.5400907715582454e-07, - "loss": 2.8149, + "epoch": 0.0009151707145371348, + "grad_norm": 308.1575927734375, + "learning_rate": 4.2622950819672127e-07, + "loss": 2.8596, "step": 13 }, { - "epoch": 0.0010593621126707275, - "grad_norm": 266.22314453125, - "learning_rate": 3.8124054462934946e-07, - "loss": 2.6049, + "epoch": 0.0009855684618092222, + "grad_norm": 298.6134033203125, + "learning_rate": 4.59016393442623e-07, + "loss": 2.8469, "step": 14 }, { - "epoch": 0.001135030835004351, - "grad_norm": 263.47576904296875, - "learning_rate": 4.084720121028744e-07, - "loss": 2.5644, + "epoch": 0.0010559662090813093, + "grad_norm": 265.7218322753906, + "learning_rate": 4.918032786885245e-07, + "loss": 2.7987, "step": 15 }, { - "epoch": 0.0012106995573379744, - "grad_norm": 223.0679473876953, - "learning_rate": 4.357034795763994e-07, - "loss": 2.6094, + "epoch": 0.0011263639563533967, + "grad_norm": 264.1418151855469, + "learning_rate": 5.245901639344262e-07, + "loss": 2.6814, "step": 16 }, { - "epoch": 0.0012863682796715977, - "grad_norm": 248.41636657714844, - "learning_rate": 4.629349470499244e-07, - "loss": 2.4842, + "epoch": 0.001196761703625484, + "grad_norm": 198.11727905273438, + "learning_rate": 5.573770491803279e-07, + "loss": 2.4296, "step": 17 }, { - "epoch": 0.001362037002005221, - "grad_norm": 826.5869750976562, - "learning_rate": 4.901664145234493e-07, - "loss": 2.6023, + "epoch": 0.0012671594508975714, + "grad_norm": 207.57676696777344, + "learning_rate": 5.901639344262294e-07, + "loss": 2.2371, "step": 18 }, { - "epoch": 0.0014377057243388446, - "grad_norm": 265.9122009277344, - "learning_rate": 5.173978819969742e-07, - "loss": 2.4341, + "epoch": 0.0013375571981696585, + "grad_norm": 159.0576629638672, + "learning_rate": 6.229508196721311e-07, + "loss": 2.3463, "step": 19 }, { - "epoch": 0.001513374446672468, - "grad_norm": 190.68650817871094, - "learning_rate": 5.446293494704993e-07, - "loss": 2.3036, + "epoch": 0.0014079549454417458, + "grad_norm": 173.71910095214844, + "learning_rate": 6.557377049180328e-07, + "loss": 2.2895, "step": 20 }, { - "epoch": 0.0015890431690060913, - "grad_norm": 184.1608123779297, - "learning_rate": 5.718608169440242e-07, - "loss": 2.3596, + "epoch": 0.0014783526927138332, + "grad_norm": 184.00210571289062, + "learning_rate": 6.885245901639344e-07, + "loss": 2.2514, "step": 21 }, { - "epoch": 0.0016647118913397146, - "grad_norm": 425.2568054199219, - "learning_rate": 5.990922844175492e-07, - "loss": 2.4074, + "epoch": 0.0015487504399859205, + "grad_norm": 165.90907287597656, + "learning_rate": 7.21311475409836e-07, + "loss": 2.2558, "step": 22 }, { - "epoch": 0.0017403806136733382, - "grad_norm": 203.991455078125, - "learning_rate": 6.263237518910741e-07, - "loss": 2.2616, + "epoch": 0.0016191481872580077, + "grad_norm": 148.08236694335938, + "learning_rate": 7.540983606557376e-07, + "loss": 2.2918, "step": 23 }, { - "epoch": 0.0018160493360069615, - "grad_norm": 135.3365020751953, - "learning_rate": 6.535552193645991e-07, - "loss": 2.1286, + "epoch": 0.001689545934530095, + "grad_norm": 72.54096221923828, + "learning_rate": 7.868852459016393e-07, + "loss": 1.794, "step": 24 }, { - "epoch": 0.0018917180583405849, - "grad_norm": 136.6835174560547, - "learning_rate": 6.807866868381241e-07, - "loss": 2.404, + "epoch": 0.0017599436818021823, + "grad_norm": 69.06537628173828, + "learning_rate": 8.19672131147541e-07, + "loss": 1.9379, "step": 25 }, { - "epoch": 0.0019673867806742082, - "grad_norm": 105.73369598388672, - "learning_rate": 7.080181543116491e-07, - "loss": 2.2063, + "epoch": 0.0018303414290742697, + "grad_norm": 49.08950424194336, + "learning_rate": 8.524590163934425e-07, + "loss": 1.9297, "step": 26 }, { - "epoch": 0.002043055503007832, - "grad_norm": 86.31427001953125, - "learning_rate": 7.352496217851739e-07, - "loss": 1.9829, + "epoch": 0.0019007391763463568, + "grad_norm": 52.08783721923828, + "learning_rate": 8.852459016393443e-07, + "loss": 1.7796, "step": 27 }, { - "epoch": 0.002118724225341455, - "grad_norm": 124.91061401367188, - "learning_rate": 7.624810892586989e-07, - "loss": 1.7351, + "epoch": 0.0019711369236184444, + "grad_norm": 63.28836441040039, + "learning_rate": 9.18032786885246e-07, + "loss": 1.8632, "step": 28 }, { - "epoch": 0.0021943929476750785, - "grad_norm": 48.06635284423828, - "learning_rate": 7.897125567322239e-07, - "loss": 2.0282, + "epoch": 0.0020415346708905313, + "grad_norm": 47.32307052612305, + "learning_rate": 9.508196721311477e-07, + "loss": 1.8497, "step": 29 }, { - "epoch": 0.002270061670008702, - "grad_norm": 85.16302490234375, - "learning_rate": 8.169440242057488e-07, - "loss": 1.6746, + "epoch": 0.0021119324181626186, + "grad_norm": 63.913455963134766, + "learning_rate": 9.83606557377049e-07, + "loss": 1.6733, "step": 30 }, { - "epoch": 0.002345730392342325, - "grad_norm": 54.819854736328125, - "learning_rate": 8.441754916792739e-07, - "loss": 1.9421, + "epoch": 0.002182330165434706, + "grad_norm": 36.255428314208984, + "learning_rate": 1.0163934426229509e-06, + "loss": 1.672, "step": 31 }, { - "epoch": 0.0024213991146759487, - "grad_norm": 44.45909881591797, - "learning_rate": 8.714069591527988e-07, - "loss": 1.6696, + "epoch": 0.0022527279127067933, + "grad_norm": 33.048282623291016, + "learning_rate": 1.0491803278688525e-06, + "loss": 1.6959, "step": 32 }, { - "epoch": 0.0024970678370095723, - "grad_norm": 46.482994079589844, - "learning_rate": 8.986384266263238e-07, - "loss": 1.7952, + "epoch": 0.0023231256599788807, + "grad_norm": 28.955873489379883, + "learning_rate": 1.081967213114754e-06, + "loss": 1.7657, "step": 33 }, { - "epoch": 0.0025727365593431954, - "grad_norm": 117.33851623535156, - "learning_rate": 9.258698940998488e-07, - "loss": 1.6118, + "epoch": 0.002393523407250968, + "grad_norm": 36.223793029785156, + "learning_rate": 1.1147540983606559e-06, + "loss": 1.6015, "step": 34 }, { - "epoch": 0.002648405281676819, - "grad_norm": 40.68963623046875, - "learning_rate": 9.531013615733736e-07, - "loss": 2.1438, + "epoch": 0.0024639211545230554, + "grad_norm": 33.806453704833984, + "learning_rate": 1.1475409836065573e-06, + "loss": 1.6152, "step": 35 }, { - "epoch": 0.002724074004010442, - "grad_norm": 50.864559173583984, - "learning_rate": 9.803328290468987e-07, - "loss": 1.9518, + "epoch": 0.0025343189017951427, + "grad_norm": 21.79129981994629, + "learning_rate": 1.1803278688524589e-06, + "loss": 1.3907, "step": 36 }, { - "epoch": 0.0027997427263440657, - "grad_norm": 55.14411544799805, - "learning_rate": 1.0075642965204236e-06, - "loss": 1.8224, + "epoch": 0.00260471664906723, + "grad_norm": 17.227123260498047, + "learning_rate": 1.2131147540983607e-06, + "loss": 1.4542, "step": 37 }, { - "epoch": 0.002875411448677689, - "grad_norm": 112.16168975830078, - "learning_rate": 1.0347957639939485e-06, - "loss": 1.5419, + "epoch": 0.002675114396339317, + "grad_norm": 21.50402069091797, + "learning_rate": 1.2459016393442623e-06, + "loss": 1.3454, "step": 38 }, { - "epoch": 0.0029510801710113123, - "grad_norm": 37.99906539916992, - "learning_rate": 1.0620272314674736e-06, - "loss": 1.5378, + "epoch": 0.0027455121436114043, + "grad_norm": 21.000303268432617, + "learning_rate": 1.2786885245901639e-06, + "loss": 1.4508, "step": 39 }, { - "epoch": 0.003026748893344936, - "grad_norm": 85.35050964355469, - "learning_rate": 1.0892586989409985e-06, - "loss": 1.6139, + "epoch": 0.0028159098908834917, + "grad_norm": 28.696334838867188, + "learning_rate": 1.3114754098360657e-06, + "loss": 1.4662, "step": 40 }, { - "epoch": 0.0031024176156785595, - "grad_norm": 50.46384048461914, - "learning_rate": 1.1164901664145235e-06, - "loss": 1.6364, + "epoch": 0.002886307638155579, + "grad_norm": 29.718914031982422, + "learning_rate": 1.344262295081967e-06, + "loss": 1.3502, "step": 41 }, { - "epoch": 0.0031780863380121826, - "grad_norm": 74.24533081054688, - "learning_rate": 1.1437216338880484e-06, - "loss": 1.8635, + "epoch": 0.0029567053854276664, + "grad_norm": 17.292678833007812, + "learning_rate": 1.3770491803278689e-06, + "loss": 1.4447, "step": 42 }, { - "epoch": 0.003253755060345806, - "grad_norm": 75.34041595458984, - "learning_rate": 1.1709531013615733e-06, - "loss": 1.6906, + "epoch": 0.0030271031326997537, + "grad_norm": 18.584470748901367, + "learning_rate": 1.4098360655737705e-06, + "loss": 1.3633, "step": 43 }, { - "epoch": 0.0033294237826794293, - "grad_norm": 63.062156677246094, - "learning_rate": 1.1981845688350984e-06, - "loss": 1.5042, + "epoch": 0.003097500879971841, + "grad_norm": 12.572699546813965, + "learning_rate": 1.442622950819672e-06, + "loss": 1.2521, "step": 44 }, { - "epoch": 0.003405092505013053, - "grad_norm": 80.37957763671875, - "learning_rate": 1.2254160363086233e-06, - "loss": 1.5012, + "epoch": 0.0031678986272439284, + "grad_norm": 13.638853073120117, + "learning_rate": 1.4754098360655739e-06, + "loss": 1.2253, "step": 45 }, { - "epoch": 0.0034807612273466764, - "grad_norm": 35.123077392578125, - "learning_rate": 1.2526475037821482e-06, - "loss": 1.3791, + "epoch": 0.0032382963745160153, + "grad_norm": 16.544931411743164, + "learning_rate": 1.5081967213114753e-06, + "loss": 1.3009, "step": 46 }, { - "epoch": 0.0035564299496802995, - "grad_norm": 64.57160949707031, - "learning_rate": 1.2798789712556734e-06, - "loss": 1.5318, + "epoch": 0.0033086941217881027, + "grad_norm": 54.511104583740234, + "learning_rate": 1.540983606557377e-06, + "loss": 1.248, "step": 47 }, { - "epoch": 0.003632098672013923, - "grad_norm": 44.77900314331055, - "learning_rate": 1.3071104387291983e-06, - "loss": 1.4773, + "epoch": 0.00337909186906019, + "grad_norm": 10.137621879577637, + "learning_rate": 1.5737704918032787e-06, + "loss": 1.2674, "step": 48 }, { - "epoch": 0.0037077673943475466, - "grad_norm": 29.615036010742188, - "learning_rate": 1.3343419062027232e-06, - "loss": 1.3712, + "epoch": 0.0034494896163322773, + "grad_norm": 16.869211196899414, + "learning_rate": 1.6065573770491803e-06, + "loss": 1.3589, "step": 49 }, { - "epoch": 0.0037834361166811698, - "grad_norm": 45.1904411315918, - "learning_rate": 1.3615733736762481e-06, - "loss": 1.539, + "epoch": 0.0035198873636043647, + "grad_norm": 10.89695930480957, + "learning_rate": 1.639344262295082e-06, + "loss": 1.2846, "step": 50 }, { - "epoch": 0.0038591048390147933, - "grad_norm": 26.724008560180664, - "learning_rate": 1.3888048411497732e-06, - "loss": 1.2884, + "epoch": 0.003590285110876452, + "grad_norm": 23.282859802246094, + "learning_rate": 1.6721311475409835e-06, + "loss": 1.2672, "step": 51 }, { - "epoch": 0.0039347735613484165, - "grad_norm": 26.483842849731445, - "learning_rate": 1.4160363086232982e-06, - "loss": 1.2467, + "epoch": 0.0036606828581485394, + "grad_norm": 12.657679557800293, + "learning_rate": 1.704918032786885e-06, + "loss": 1.2303, "step": 52 }, { - "epoch": 0.00401044228368204, - "grad_norm": 25.459138870239258, - "learning_rate": 1.443267776096823e-06, - "loss": 1.376, + "epoch": 0.0037310806054206267, + "grad_norm": 11.95226764678955, + "learning_rate": 1.737704918032787e-06, + "loss": 1.1187, "step": 53 }, { - "epoch": 0.004086111006015664, - "grad_norm": 612.6622314453125, - "learning_rate": 1.4704992435703478e-06, - "loss": 1.3352, + "epoch": 0.0038014783526927136, + "grad_norm": 9.12307357788086, + "learning_rate": 1.7704918032786885e-06, + "loss": 1.2917, "step": 54 }, { - "epoch": 0.004161779728349287, - "grad_norm": 12.120221138000488, - "learning_rate": 1.497730711043873e-06, - "loss": 1.1906, + "epoch": 0.003871876099964801, + "grad_norm": 11.277148246765137, + "learning_rate": 1.80327868852459e-06, + "loss": 1.2548, "step": 55 }, { - "epoch": 0.00423744845068291, - "grad_norm": 49.24264144897461, - "learning_rate": 1.5249621785173978e-06, - "loss": 1.23, + "epoch": 0.003942273847236889, + "grad_norm": 9.948272705078125, + "learning_rate": 1.836065573770492e-06, + "loss": 1.3304, "step": 56 }, { - "epoch": 0.004313117173016533, - "grad_norm": 22.598411560058594, - "learning_rate": 1.5521936459909227e-06, - "loss": 1.2773, + "epoch": 0.004012671594508976, + "grad_norm": 7.373676776885986, + "learning_rate": 1.8688524590163935e-06, + "loss": 1.2114, "step": 57 }, { - "epoch": 0.004388785895350157, - "grad_norm": 22.006301879882812, - "learning_rate": 1.5794251134644479e-06, - "loss": 1.381, + "epoch": 0.004083069341781063, + "grad_norm": 9.533021926879883, + "learning_rate": 1.9016393442622953e-06, + "loss": 1.2734, "step": 58 }, { - "epoch": 0.0044644546176837805, - "grad_norm": 20.625085830688477, - "learning_rate": 1.6066565809379728e-06, - "loss": 1.3473, + "epoch": 0.00415346708905315, + "grad_norm": 8.02818489074707, + "learning_rate": 1.9344262295081967e-06, + "loss": 1.0933, "step": 59 }, { - "epoch": 0.004540123340017404, - "grad_norm": 29.52547264099121, - "learning_rate": 1.6338880484114977e-06, - "loss": 1.3312, + "epoch": 0.004223864836325237, + "grad_norm": 14.541219711303711, + "learning_rate": 1.967213114754098e-06, + "loss": 1.127, "step": 60 }, { - "epoch": 0.004615792062351028, - "grad_norm": 13.293269157409668, - "learning_rate": 1.6611195158850228e-06, - "loss": 1.0988, + "epoch": 0.004294262583597325, + "grad_norm": 10.511273384094238, + "learning_rate": 2e-06, + "loss": 1.0691, "step": 61 }, { - "epoch": 0.00469146078468465, - "grad_norm": 9.618423461914062, - "learning_rate": 1.6883509833585477e-06, - "loss": 1.1596, + "epoch": 0.004364660330869412, + "grad_norm": 10.930693626403809, + "learning_rate": 2.0327868852459017e-06, + "loss": 1.1183, "step": 62 }, { - "epoch": 0.004767129507018274, - "grad_norm": 9.537832260131836, - "learning_rate": 1.7155824508320727e-06, - "loss": 1.1767, + "epoch": 0.0044350580781415, + "grad_norm": 7.912240982055664, + "learning_rate": 2.065573770491803e-06, + "loss": 1.1607, "step": 63 }, { - "epoch": 0.0048427982293518974, - "grad_norm": 17.054410934448242, - "learning_rate": 1.7428139183055976e-06, - "loss": 1.2305, + "epoch": 0.004505455825413587, + "grad_norm": 11.118265151977539, + "learning_rate": 2.098360655737705e-06, + "loss": 1.1515, "step": 64 }, { - "epoch": 0.004918466951685521, - "grad_norm": 23.415481567382812, - "learning_rate": 1.7700453857791227e-06, - "loss": 1.1254, + "epoch": 0.0045758535726856744, + "grad_norm": 12.548416137695312, + "learning_rate": 2.1311475409836067e-06, + "loss": 1.2582, "step": 65 }, { - "epoch": 0.0049941356740191446, - "grad_norm": 6.73915958404541, - "learning_rate": 1.7972768532526476e-06, - "loss": 1.049, + "epoch": 0.004646251319957761, + "grad_norm": 16.8477840423584, + "learning_rate": 2.163934426229508e-06, + "loss": 1.035, "step": 66 }, { - "epoch": 0.005069804396352767, - "grad_norm": 21.562658309936523, - "learning_rate": 1.8245083207261725e-06, - "loss": 1.1231, + "epoch": 0.004716649067229848, + "grad_norm": 7.983528137207031, + "learning_rate": 2.19672131147541e-06, + "loss": 1.0744, "step": 67 }, { - "epoch": 0.005145473118686391, - "grad_norm": 16.345674514770508, - "learning_rate": 1.8517397881996977e-06, - "loss": 1.0867, + "epoch": 0.004787046814501936, + "grad_norm": 8.137669563293457, + "learning_rate": 2.2295081967213117e-06, + "loss": 1.3031, "step": 68 }, { - "epoch": 0.005221141841020014, - "grad_norm": 10.502206802368164, - "learning_rate": 1.8789712556732226e-06, - "loss": 1.3118, + "epoch": 0.004857444561774023, + "grad_norm": 7.361748218536377, + "learning_rate": 2.262295081967213e-06, + "loss": 1.1622, "step": 69 }, { - "epoch": 0.005296810563353638, - "grad_norm": 8.155364036560059, - "learning_rate": 1.9062027231467473e-06, - "loss": 1.2176, + "epoch": 0.004927842309046111, + "grad_norm": 5.179004192352295, + "learning_rate": 2.2950819672131145e-06, + "loss": 0.9968, "step": 70 }, { - "epoch": 0.0053724792856872615, - "grad_norm": 18.021318435668945, - "learning_rate": 1.933434190620272e-06, - "loss": 1.1159, + "epoch": 0.004998240056318198, + "grad_norm": 6.043445110321045, + "learning_rate": 2.3278688524590163e-06, + "loss": 1.0931, "step": 71 }, { - "epoch": 0.005448148008020884, - "grad_norm": 10.645997047424316, - "learning_rate": 1.9606656580937973e-06, - "loss": 1.3196, + "epoch": 0.005068637803590285, + "grad_norm": 11.305768966674805, + "learning_rate": 2.3606557377049177e-06, + "loss": 1.1006, "step": 72 }, { - "epoch": 0.005523816730354508, - "grad_norm": 12.845873832702637, - "learning_rate": 1.987897125567322e-06, - "loss": 1.1641, + "epoch": 0.005139035550862372, + "grad_norm": 8.141422271728516, + "learning_rate": 2.3934426229508195e-06, + "loss": 1.0072, "step": 73 }, { - "epoch": 0.005599485452688131, - "grad_norm": 7.4295148849487305, - "learning_rate": 2.015128593040847e-06, - "loss": 1.2317, + "epoch": 0.00520943329813446, + "grad_norm": 19.329275131225586, + "learning_rate": 2.4262295081967213e-06, + "loss": 1.0971, "step": 74 }, { - "epoch": 0.005675154175021755, - "grad_norm": 12.633703231811523, - "learning_rate": 2.0423600605143723e-06, - "loss": 1.207, + "epoch": 0.005279831045406547, + "grad_norm": 12.364167213439941, + "learning_rate": 2.4590163934426227e-06, + "loss": 1.1431, "step": 75 }, { - "epoch": 0.005750822897355378, - "grad_norm": 12.621756553649902, - "learning_rate": 2.069591527987897e-06, - "loss": 1.2415, + "epoch": 0.005350228792678634, + "grad_norm": 7.520205020904541, + "learning_rate": 2.4918032786885245e-06, + "loss": 1.1131, "step": 76 }, { - "epoch": 0.005826491619689002, - "grad_norm": 7.364112377166748, - "learning_rate": 2.096822995461422e-06, - "loss": 1.2141, + "epoch": 0.005420626539950722, + "grad_norm": 5.590424060821533, + "learning_rate": 2.5245901639344264e-06, + "loss": 1.0403, "step": 77 }, { - "epoch": 0.005902160342022625, - "grad_norm": 7.511165142059326, - "learning_rate": 2.1240544629349472e-06, - "loss": 1.0121, + "epoch": 0.005491024287222809, + "grad_norm": 8.368648529052734, + "learning_rate": 2.5573770491803277e-06, + "loss": 1.177, "step": 78 }, { - "epoch": 0.005977829064356248, - "grad_norm": 14.060636520385742, - "learning_rate": 2.151285930408472e-06, - "loss": 1.1539, + "epoch": 0.005561422034494896, + "grad_norm": 34.531700134277344, + "learning_rate": 2.5901639344262296e-06, + "loss": 1.1472, "step": 79 }, { - "epoch": 0.006053497786689872, - "grad_norm": 49.88886642456055, - "learning_rate": 2.178517397881997e-06, - "loss": 1.0104, + "epoch": 0.005631819781766983, + "grad_norm": 5.946082592010498, + "learning_rate": 2.6229508196721314e-06, + "loss": 1.136, "step": 80 }, { - "epoch": 0.006129166509023495, - "grad_norm": 6.6888017654418945, - "learning_rate": 2.205748865355522e-06, - "loss": 0.9701, + "epoch": 0.005702217529039071, + "grad_norm": 7.393290042877197, + "learning_rate": 2.6557377049180328e-06, + "loss": 1.0315, "step": 81 }, { - "epoch": 0.006204835231357119, - "grad_norm": 7.409695148468018, - "learning_rate": 2.232980332829047e-06, - "loss": 1.0943, + "epoch": 0.005772615276311158, + "grad_norm": 5.287744998931885, + "learning_rate": 2.688524590163934e-06, + "loss": 1.0025, "step": 82 }, { - "epoch": 0.006280503953690742, - "grad_norm": 9.27059268951416, - "learning_rate": 2.260211800302572e-06, - "loss": 1.1012, + "epoch": 0.005843013023583245, + "grad_norm": 6.3712592124938965, + "learning_rate": 2.721311475409836e-06, + "loss": 1.0929, "step": 83 }, { - "epoch": 0.006356172676024365, - "grad_norm": 8.880078315734863, - "learning_rate": 2.2874432677760967e-06, - "loss": 1.0485, + "epoch": 0.005913410770855333, + "grad_norm": 8.836492538452148, + "learning_rate": 2.7540983606557378e-06, + "loss": 1.0585, "step": 84 }, { - "epoch": 0.006431841398357989, - "grad_norm": 8.912999153137207, - "learning_rate": 2.314674735249622e-06, - "loss": 1.0787, + "epoch": 0.00598380851812742, + "grad_norm": 7.777792930603027, + "learning_rate": 2.786885245901639e-06, + "loss": 1.0443, "step": 85 }, { - "epoch": 0.006507510120691612, - "grad_norm": 6.5734028816223145, - "learning_rate": 2.3419062027231466e-06, - "loss": 1.1154, + "epoch": 0.006054206265399507, + "grad_norm": 5.140045166015625, + "learning_rate": 2.819672131147541e-06, + "loss": 1.0398, "step": 86 }, { - "epoch": 0.006583178843025236, - "grad_norm": 6.327483654022217, - "learning_rate": 2.369137670196672e-06, - "loss": 1.1694, + "epoch": 0.006124604012671594, + "grad_norm": 4.603422164916992, + "learning_rate": 2.8524590163934428e-06, + "loss": 1.1361, "step": 87 }, { - "epoch": 0.0066588475653588585, - "grad_norm": 12.942300796508789, - "learning_rate": 2.396369137670197e-06, - "loss": 0.9813, + "epoch": 0.006195001759943682, + "grad_norm": 6.970488548278809, + "learning_rate": 2.885245901639344e-06, + "loss": 0.9699, "step": 88 }, { - "epoch": 0.006734516287692482, - "grad_norm": 10.303013801574707, - "learning_rate": 2.423600605143722e-06, - "loss": 1.0833, + "epoch": 0.006265399507215769, + "grad_norm": 4.829523086547852, + "learning_rate": 2.918032786885246e-06, + "loss": 1.042, "step": 89 }, { - "epoch": 0.006810185010026106, - "grad_norm": 7.25071907043457, - "learning_rate": 2.4508320726172467e-06, - "loss": 1.1512, + "epoch": 0.006335797254487857, + "grad_norm": 4.901027202606201, + "learning_rate": 2.9508196721311478e-06, + "loss": 1.1206, "step": 90 }, { - "epoch": 0.006885853732359729, - "grad_norm": 4.276927471160889, - "learning_rate": 2.4780635400907718e-06, - "loss": 1.1131, + "epoch": 0.006406195001759944, + "grad_norm": 6.397693157196045, + "learning_rate": 2.9836065573770487e-06, + "loss": 1.1563, "step": 91 }, { - "epoch": 0.006961522454693353, - "grad_norm": 11.979231834411621, - "learning_rate": 2.5052950075642965e-06, - "loss": 1.1562, + "epoch": 0.006476592749032031, + "grad_norm": 4.759715557098389, + "learning_rate": 3.0163934426229506e-06, + "loss": 1.0057, "step": 92 }, { - "epoch": 0.0070371911770269755, - "grad_norm": 6.277972221374512, - "learning_rate": 2.532526475037821e-06, - "loss": 1.1771, + "epoch": 0.006546990496304118, + "grad_norm": 5.691864967346191, + "learning_rate": 3.0491803278688524e-06, + "loss": 0.9445, "step": 93 }, { - "epoch": 0.007112859899360599, - "grad_norm": 7.063577175140381, - "learning_rate": 2.5597579425113467e-06, - "loss": 0.9926, + "epoch": 0.006617388243576205, + "grad_norm": 5.3856425285339355, + "learning_rate": 3.081967213114754e-06, + "loss": 1.0753, "step": 94 }, { - "epoch": 0.007188528621694223, - "grad_norm": 8.788790702819824, - "learning_rate": 2.5869894099848714e-06, - "loss": 0.9004, + "epoch": 0.006687785990848293, + "grad_norm": 5.01511287689209, + "learning_rate": 3.1147540983606556e-06, + "loss": 0.9226, "step": 95 }, { - "epoch": 0.007264197344027846, - "grad_norm": 7.295443058013916, - "learning_rate": 2.6142208774583966e-06, - "loss": 1.1524, + "epoch": 0.00675818373812038, + "grad_norm": 5.576282978057861, + "learning_rate": 3.1475409836065574e-06, + "loss": 0.9603, "step": 96 }, { - "epoch": 0.00733986606636147, - "grad_norm": 9.583985328674316, - "learning_rate": 2.6414523449319213e-06, - "loss": 1.1887, + "epoch": 0.006828581485392468, + "grad_norm": 4.554950714111328, + "learning_rate": 3.180327868852459e-06, + "loss": 1.1536, "step": 97 }, { - "epoch": 0.007415534788695093, - "grad_norm": 7.807141304016113, - "learning_rate": 2.6686838124054464e-06, - "loss": 1.0794, + "epoch": 0.006898979232664555, + "grad_norm": 6.626781463623047, + "learning_rate": 3.2131147540983606e-06, + "loss": 1.0018, "step": 98 }, { - "epoch": 0.007491203511028716, - "grad_norm": 9.609314918518066, - "learning_rate": 2.695915279878971e-06, - "loss": 1.0667, + "epoch": 0.006969376979936642, + "grad_norm": 4.615559101104736, + "learning_rate": 3.2459016393442624e-06, + "loss": 1.1572, "step": 99 }, { - "epoch": 0.0075668722333623395, - "grad_norm": 7.190148830413818, - "learning_rate": 2.7231467473524962e-06, - "loss": 1.1177, + "epoch": 0.007039774727208729, + "grad_norm": 5.9900126457214355, + "learning_rate": 3.278688524590164e-06, + "loss": 0.9231, "step": 100 }, { - "epoch": 0.007642540955695963, - "grad_norm": 5.9306206703186035, - "learning_rate": 2.750378214826021e-06, - "loss": 1.0946, + "epoch": 0.007110172474480816, + "grad_norm": 5.481687068939209, + "learning_rate": 3.3114754098360656e-06, + "loss": 1.0406, "step": 101 }, { - "epoch": 0.007718209678029587, - "grad_norm": 6.257357597351074, - "learning_rate": 2.7776096822995465e-06, - "loss": 1.1852, + "epoch": 0.007180570221752904, + "grad_norm": 4.90478515625, + "learning_rate": 3.344262295081967e-06, + "loss": 1.1233, "step": 102 }, { - "epoch": 0.00779387840036321, - "grad_norm": 4.176384449005127, - "learning_rate": 2.804841149773071e-06, - "loss": 1.1085, + "epoch": 0.007250967969024991, + "grad_norm": 5.622109889984131, + "learning_rate": 3.377049180327869e-06, + "loss": 1.0167, "step": 103 }, { - "epoch": 0.007869547122696833, - "grad_norm": 5.795867919921875, - "learning_rate": 2.8320726172465963e-06, - "loss": 1.064, + "epoch": 0.007321365716297079, + "grad_norm": 7.457308769226074, + "learning_rate": 3.40983606557377e-06, + "loss": 0.8555, "step": 104 }, { - "epoch": 0.007945215845030457, - "grad_norm": 87.45132446289062, - "learning_rate": 2.859304084720121e-06, - "loss": 1.0562, + "epoch": 0.007391763463569166, + "grad_norm": 7.173100471496582, + "learning_rate": 3.442622950819672e-06, + "loss": 1.0897, "step": 105 }, { - "epoch": 0.00802088456736408, - "grad_norm": 5.800858020782471, - "learning_rate": 2.886535552193646e-06, - "loss": 1.213, + "epoch": 0.0074621612108412535, + "grad_norm": 5.910191535949707, + "learning_rate": 3.475409836065574e-06, + "loss": 1.0607, "step": 106 }, { - "epoch": 0.008096553289697703, - "grad_norm": 6.449529647827148, - "learning_rate": 2.913767019667171e-06, - "loss": 1.2756, + "epoch": 0.00753255895811334, + "grad_norm": 6.937999725341797, + "learning_rate": 3.508196721311475e-06, + "loss": 1.0604, "step": 107 }, { - "epoch": 0.008172222012031327, - "grad_norm": 15.208488464355469, - "learning_rate": 2.9409984871406956e-06, - "loss": 1.1593, + "epoch": 0.007602956705385427, + "grad_norm": 5.2612385749816895, + "learning_rate": 3.540983606557377e-06, + "loss": 0.9314, "step": 108 }, { - "epoch": 0.00824789073436495, - "grad_norm": 6.07266902923584, - "learning_rate": 2.968229954614221e-06, - "loss": 0.9689, + "epoch": 0.007673354452657515, + "grad_norm": 5.505253791809082, + "learning_rate": 3.573770491803279e-06, + "loss": 0.8598, "step": 109 }, { - "epoch": 0.008323559456698574, - "grad_norm": 5.045470714569092, - "learning_rate": 2.995461422087746e-06, - "loss": 1.088, + "epoch": 0.007743752199929602, + "grad_norm": 4.654024124145508, + "learning_rate": 3.60655737704918e-06, + "loss": 1.0895, "step": 110 }, { - "epoch": 0.008399228179032197, - "grad_norm": 8.590188980102539, - "learning_rate": 3.022692889561271e-06, - "loss": 1.0692, + "epoch": 0.00781414994720169, + "grad_norm": 4.011298179626465, + "learning_rate": 3.639344262295082e-06, + "loss": 0.9924, "step": 111 }, { - "epoch": 0.00847489690136582, - "grad_norm": 9.020132064819336, - "learning_rate": 3.0499243570347956e-06, - "loss": 1.0444, + "epoch": 0.007884547694473778, + "grad_norm": 6.011845111846924, + "learning_rate": 3.672131147540984e-06, + "loss": 0.9868, "step": 112 }, { - "epoch": 0.008550565623699444, - "grad_norm": 5.601812362670898, - "learning_rate": 3.0771558245083208e-06, - "loss": 1.0266, + "epoch": 0.007954945441745864, + "grad_norm": 4.440524578094482, + "learning_rate": 3.704918032786885e-06, + "loss": 0.9508, "step": 113 }, { - "epoch": 0.008626234346033067, - "grad_norm": 3.23660945892334, - "learning_rate": 3.1043872919818455e-06, - "loss": 1.0381, + "epoch": 0.008025343189017951, + "grad_norm": 4.9957966804504395, + "learning_rate": 3.737704918032787e-06, + "loss": 0.9013, "step": 114 }, { - "epoch": 0.008701903068366691, - "grad_norm": 4.900957107543945, - "learning_rate": 3.131618759455371e-06, - "loss": 1.1345, + "epoch": 0.008095740936290039, + "grad_norm": 5.244853496551514, + "learning_rate": 3.770491803278689e-06, + "loss": 0.9226, "step": 115 }, { - "epoch": 0.008777571790700314, - "grad_norm": 7.415119171142578, - "learning_rate": 3.1588502269288957e-06, - "loss": 0.9433, + "epoch": 0.008166138683562125, + "grad_norm": 5.495327949523926, + "learning_rate": 3.8032786885245906e-06, + "loss": 1.0457, "step": 116 }, { - "epoch": 0.008853240513033937, - "grad_norm": 4.038257122039795, - "learning_rate": 3.186081694402421e-06, - "loss": 1.1984, + "epoch": 0.008236536430834213, + "grad_norm": 4.33501672744751, + "learning_rate": 3.8360655737704925e-06, + "loss": 0.9441, "step": 117 }, { - "epoch": 0.008928909235367561, - "grad_norm": 4.441867828369141, - "learning_rate": 3.2133131618759456e-06, - "loss": 1.0773, + "epoch": 0.0083069341781063, + "grad_norm": 4.686830997467041, + "learning_rate": 3.868852459016393e-06, + "loss": 0.912, "step": 118 }, { - "epoch": 0.009004577957701184, - "grad_norm": 5.363611698150635, - "learning_rate": 3.2405446293494707e-06, - "loss": 1.1884, + "epoch": 0.008377331925378389, + "grad_norm": 7.1909708976745605, + "learning_rate": 3.901639344262294e-06, + "loss": 1.0878, "step": 119 }, { - "epoch": 0.009080246680034808, - "grad_norm": 6.429077625274658, - "learning_rate": 3.2677760968229954e-06, - "loss": 1.0864, + "epoch": 0.008447729672650475, + "grad_norm": 4.669498443603516, + "learning_rate": 3.934426229508196e-06, + "loss": 1.0657, "step": 120 }, { - "epoch": 0.00915591540236843, - "grad_norm": 4.815485000610352, - "learning_rate": 3.2950075642965205e-06, - "loss": 1.0554, + "epoch": 0.008518127419922562, + "grad_norm": 5.245838165283203, + "learning_rate": 3.967213114754098e-06, + "loss": 1.0839, "step": 121 }, { - "epoch": 0.009231584124702055, - "grad_norm": 11.878281593322754, - "learning_rate": 3.3222390317700457e-06, - "loss": 1.2129, + "epoch": 0.00858852516719465, + "grad_norm": 4.338572025299072, + "learning_rate": 4e-06, + "loss": 0.9384, "step": 122 }, { - "epoch": 0.009307252847035678, - "grad_norm": 6.188513278961182, - "learning_rate": 3.3494704992435704e-06, - "loss": 1.0672, + "epoch": 0.008658922914466738, + "grad_norm": 5.470822811126709, + "learning_rate": 4.032786885245902e-06, + "loss": 1.0732, "step": 123 }, { - "epoch": 0.0093829215693693, - "grad_norm": 4.279908657073975, - "learning_rate": 3.3767019667170955e-06, - "loss": 1.1063, + "epoch": 0.008729320661738824, + "grad_norm": 4.264244079589844, + "learning_rate": 4.0655737704918034e-06, + "loss": 1.0364, "step": 124 }, { - "epoch": 0.009458590291702925, - "grad_norm": 8.566632270812988, - "learning_rate": 3.40393343419062e-06, - "loss": 0.9051, + "epoch": 0.008799718409010912, + "grad_norm": 4.813968181610107, + "learning_rate": 4.098360655737704e-06, + "loss": 1.0919, "step": 125 }, { - "epoch": 0.009534259014036548, - "grad_norm": 10.101946830749512, - "learning_rate": 3.4311649016641453e-06, - "loss": 1.1801, + "epoch": 0.008870116156283, + "grad_norm": 5.334741592407227, + "learning_rate": 4.131147540983606e-06, + "loss": 1.1235, "step": 126 }, { - "epoch": 0.009609927736370172, - "grad_norm": 3.696890115737915, - "learning_rate": 3.45839636913767e-06, - "loss": 1.1499, + "epoch": 0.008940513903555086, + "grad_norm": 5.502646446228027, + "learning_rate": 4.163934426229508e-06, + "loss": 0.9513, "step": 127 }, { - "epoch": 0.009685596458703795, - "grad_norm": 4.319789409637451, - "learning_rate": 3.485627836611195e-06, - "loss": 1.1783, + "epoch": 0.009010911650827173, + "grad_norm": 5.643490791320801, + "learning_rate": 4.19672131147541e-06, + "loss": 1.0333, "step": 128 }, { - "epoch": 0.009761265181037418, - "grad_norm": 3.6111180782318115, - "learning_rate": 3.51285930408472e-06, - "loss": 1.2035, + "epoch": 0.009081309398099261, + "grad_norm": 6.624214172363281, + "learning_rate": 4.229508196721312e-06, + "loss": 0.9192, "step": 129 }, { - "epoch": 0.009836933903371042, - "grad_norm": 6.946579933166504, - "learning_rate": 3.5400907715582454e-06, - "loss": 1.429, + "epoch": 0.009151707145371349, + "grad_norm": 4.946504592895508, + "learning_rate": 4.2622950819672135e-06, + "loss": 1.0762, "step": 130 }, { - "epoch": 0.009912602625704665, - "grad_norm": 4.380500316619873, - "learning_rate": 3.56732223903177e-06, - "loss": 0.9982, + "epoch": 0.009222104892643435, + "grad_norm": 4.4725871086120605, + "learning_rate": 4.2950819672131144e-06, + "loss": 1.0471, "step": 131 }, { - "epoch": 0.009988271348038289, - "grad_norm": 110.80493927001953, - "learning_rate": 3.5945537065052952e-06, - "loss": 1.053, + "epoch": 0.009292502639915523, + "grad_norm": 7.486828327178955, + "learning_rate": 4.327868852459016e-06, + "loss": 1.0653, "step": 132 }, { - "epoch": 0.010063940070371912, - "grad_norm": 4.770391941070557, - "learning_rate": 3.62178517397882e-06, - "loss": 0.9609, + "epoch": 0.00936290038718761, + "grad_norm": 9.454997062683105, + "learning_rate": 4.360655737704918e-06, + "loss": 1.0513, "step": 133 }, { - "epoch": 0.010139608792705535, - "grad_norm": 4.621954441070557, - "learning_rate": 3.649016641452345e-06, - "loss": 1.0428, + "epoch": 0.009433298134459697, + "grad_norm": 5.494566917419434, + "learning_rate": 4.39344262295082e-06, + "loss": 0.9125, "step": 134 }, { - "epoch": 0.010215277515039159, - "grad_norm": 4.873030185699463, - "learning_rate": 3.6762481089258698e-06, - "loss": 1.0355, + "epoch": 0.009503695881731784, + "grad_norm": 5.223452091217041, + "learning_rate": 4.426229508196722e-06, + "loss": 1.1218, "step": 135 }, { - "epoch": 0.010290946237372782, - "grad_norm": 5.315187931060791, - "learning_rate": 3.7034795763993953e-06, - "loss": 1.2877, + "epoch": 0.009574093629003872, + "grad_norm": 4.113816261291504, + "learning_rate": 4.4590163934426235e-06, + "loss": 0.9365, "step": 136 }, { - "epoch": 0.010366614959706406, - "grad_norm": 3.4662725925445557, - "learning_rate": 3.73071104387292e-06, - "loss": 0.9696, + "epoch": 0.00964449137627596, + "grad_norm": 4.752868175506592, + "learning_rate": 4.4918032786885244e-06, + "loss": 0.9606, "step": 137 }, { - "epoch": 0.010442283682040029, - "grad_norm": 6.4335784912109375, - "learning_rate": 3.757942511346445e-06, - "loss": 1.0649, + "epoch": 0.009714889123548046, + "grad_norm": 4.174314498901367, + "learning_rate": 4.524590163934426e-06, + "loss": 0.8709, "step": 138 }, { - "epoch": 0.010517952404373651, - "grad_norm": 5.575058937072754, - "learning_rate": 3.78517397881997e-06, - "loss": 1.2124, + "epoch": 0.009785286870820134, + "grad_norm": 4.687844753265381, + "learning_rate": 4.557377049180327e-06, + "loss": 1.1115, "step": 139 }, { - "epoch": 0.010593621126707276, - "grad_norm": 4.741557598114014, - "learning_rate": 3.8124054462934946e-06, - "loss": 1.0165, + "epoch": 0.009855684618092221, + "grad_norm": 4.23372745513916, + "learning_rate": 4.590163934426229e-06, + "loss": 1.0595, "step": 140 }, { - "epoch": 0.010669289849040899, - "grad_norm": 6.094121932983398, - "learning_rate": 3.83963691376702e-06, - "loss": 1.1381, + "epoch": 0.009926082365364308, + "grad_norm": 4.5904459953308105, + "learning_rate": 4.622950819672131e-06, + "loss": 1.0146, "step": 141 }, { - "epoch": 0.010744958571374523, - "grad_norm": 3.2143642902374268, - "learning_rate": 3.866868381240544e-06, - "loss": 1.1538, + "epoch": 0.009996480112636395, + "grad_norm": 3.6803858280181885, + "learning_rate": 4.655737704918033e-06, + "loss": 0.9218, "step": 142 }, { - "epoch": 0.010820627293708146, - "grad_norm": 8.476461410522461, - "learning_rate": 3.89409984871407e-06, - "loss": 0.9853, + "epoch": 0.010066877859908483, + "grad_norm": 4.220322132110596, + "learning_rate": 4.6885245901639345e-06, + "loss": 0.9692, "step": 143 }, { - "epoch": 0.010896296016041768, - "grad_norm": 5.929498672485352, - "learning_rate": 3.921331316187595e-06, - "loss": 1.2708, + "epoch": 0.01013727560718057, + "grad_norm": 5.108364105224609, + "learning_rate": 4.7213114754098354e-06, + "loss": 1.091, "step": 144 }, { - "epoch": 0.010971964738375393, - "grad_norm": 3.594865560531616, - "learning_rate": 3.948562783661119e-06, - "loss": 1.077, + "epoch": 0.010207673354452657, + "grad_norm": 5.284567356109619, + "learning_rate": 4.754098360655737e-06, + "loss": 0.944, "step": 145 }, { - "epoch": 0.011047633460709015, - "grad_norm": 5.143126964569092, - "learning_rate": 3.975794251134644e-06, - "loss": 0.8743, + "epoch": 0.010278071101724745, + "grad_norm": 3.9610142707824707, + "learning_rate": 4.786885245901639e-06, + "loss": 1.1799, "step": 146 }, { - "epoch": 0.01112330218304264, - "grad_norm": 3.9312331676483154, - "learning_rate": 4.00302571860817e-06, - "loss": 1.0474, + "epoch": 0.010348468848996832, + "grad_norm": 5.0468597412109375, + "learning_rate": 4.819672131147541e-06, + "loss": 1.0112, "step": 147 }, { - "epoch": 0.011198970905376263, - "grad_norm": 8.739069938659668, - "learning_rate": 4.030257186081694e-06, - "loss": 1.1, + "epoch": 0.01041886659626892, + "grad_norm": 6.582139492034912, + "learning_rate": 4.852459016393443e-06, + "loss": 0.9559, "step": 148 }, { - "epoch": 0.011274639627709885, - "grad_norm": 4.785227298736572, - "learning_rate": 4.05748865355522e-06, - "loss": 1.0795, + "epoch": 0.010489264343541006, + "grad_norm": 3.8485188484191895, + "learning_rate": 4.8852459016393445e-06, + "loss": 0.9447, "step": 149 }, { - "epoch": 0.01135030835004351, - "grad_norm": 4.471089839935303, - "learning_rate": 4.0847201210287446e-06, - "loss": 1.0992, + "epoch": 0.010559662090813094, + "grad_norm": 4.572494029998779, + "learning_rate": 4.9180327868852455e-06, + "loss": 1.0548, "step": 150 }, { - "epoch": 0.011425977072377132, - "grad_norm": 9.019781112670898, - "learning_rate": 4.111951588502269e-06, - "loss": 1.0866, + "epoch": 0.010630059838085182, + "grad_norm": 4.870029449462891, + "learning_rate": 4.950819672131147e-06, + "loss": 1.0466, "step": 151 }, { - "epoch": 0.011501645794710757, - "grad_norm": 5.071846961975098, - "learning_rate": 4.139183055975794e-06, - "loss": 1.1729, + "epoch": 0.010700457585357268, + "grad_norm": 4.742334842681885, + "learning_rate": 4.983606557377049e-06, + "loss": 0.9156, "step": 152 }, { - "epoch": 0.01157731451704438, - "grad_norm": 4.376003742218018, - "learning_rate": 4.1664145234493195e-06, - "loss": 1.203, + "epoch": 0.010770855332629356, + "grad_norm": 4.570158004760742, + "learning_rate": 5.016393442622951e-06, + "loss": 0.8503, "step": 153 }, { - "epoch": 0.011652983239378004, - "grad_norm": 6.377297878265381, - "learning_rate": 4.193645990922844e-06, - "loss": 1.1541, + "epoch": 0.010841253079901443, + "grad_norm": 4.887471675872803, + "learning_rate": 5.049180327868853e-06, + "loss": 0.9545, "step": 154 }, { - "epoch": 0.011728651961711627, - "grad_norm": 7.824789047241211, - "learning_rate": 4.220877458396369e-06, - "loss": 1.0479, + "epoch": 0.010911650827173531, + "grad_norm": 4.630635738372803, + "learning_rate": 5.0819672131147545e-06, + "loss": 0.8893, "step": 155 }, { - "epoch": 0.01180432068404525, - "grad_norm": 4.779434680938721, - "learning_rate": 4.2481089258698945e-06, - "loss": 1.0529, + "epoch": 0.010982048574445617, + "grad_norm": 4.824710845947266, + "learning_rate": 5.1147540983606555e-06, + "loss": 0.8591, "step": 156 }, { - "epoch": 0.011879989406378874, - "grad_norm": 8.425029754638672, - "learning_rate": 4.275340393343419e-06, - "loss": 1.0992, + "epoch": 0.011052446321717705, + "grad_norm": 3.679910659790039, + "learning_rate": 5.147540983606557e-06, + "loss": 0.9802, "step": 157 }, { - "epoch": 0.011955658128712496, - "grad_norm": 5.336421966552734, - "learning_rate": 4.302571860816944e-06, - "loss": 1.0226, + "epoch": 0.011122844068989793, + "grad_norm": 5.713979244232178, + "learning_rate": 5.180327868852459e-06, + "loss": 0.989, "step": 158 }, { - "epoch": 0.012031326851046121, - "grad_norm": 7.749419212341309, - "learning_rate": 4.329803328290469e-06, - "loss": 1.0299, + "epoch": 0.011193241816261879, + "grad_norm": 4.592340469360352, + "learning_rate": 5.213114754098361e-06, + "loss": 0.9028, "step": 159 }, { - "epoch": 0.012106995573379744, - "grad_norm": 8.775020599365234, - "learning_rate": 4.357034795763994e-06, - "loss": 1.2269, + "epoch": 0.011263639563533967, + "grad_norm": 3.9298043251037598, + "learning_rate": 5.245901639344263e-06, + "loss": 1.0798, "step": 160 }, { - "epoch": 0.012182664295713366, - "grad_norm": 6.738064289093018, - "learning_rate": 4.384266263237519e-06, - "loss": 1.0477, + "epoch": 0.011334037310806054, + "grad_norm": 3.508730888366699, + "learning_rate": 5.278688524590164e-06, + "loss": 0.9157, "step": 161 }, { - "epoch": 0.01225833301804699, - "grad_norm": 4.490780353546143, - "learning_rate": 4.411497730711044e-06, - "loss": 1.0986, + "epoch": 0.011404435058078142, + "grad_norm": 5.089319705963135, + "learning_rate": 5.3114754098360655e-06, + "loss": 0.9711, "step": 162 }, { - "epoch": 0.012334001740380613, - "grad_norm": 4.881013870239258, - "learning_rate": 4.438729198184569e-06, - "loss": 1.0979, + "epoch": 0.011474832805350228, + "grad_norm": 4.619134426116943, + "learning_rate": 5.3442622950819665e-06, + "loss": 0.9138, "step": 163 }, { - "epoch": 0.012409670462714238, - "grad_norm": 3.8033907413482666, - "learning_rate": 4.465960665658094e-06, - "loss": 1.2036, + "epoch": 0.011545230552622316, + "grad_norm": 6.311204433441162, + "learning_rate": 5.377049180327868e-06, + "loss": 0.9143, "step": 164 }, { - "epoch": 0.01248533918504786, - "grad_norm": 4.627386093139648, - "learning_rate": 4.4931921331316185e-06, - "loss": 1.0643, + "epoch": 0.011615628299894404, + "grad_norm": 4.683346271514893, + "learning_rate": 5.40983606557377e-06, + "loss": 0.9999, "step": 165 }, { - "epoch": 0.012561007907381483, - "grad_norm": 4.1467108726501465, - "learning_rate": 4.520423600605144e-06, - "loss": 1.0594, + "epoch": 0.01168602604716649, + "grad_norm": 5.870517253875732, + "learning_rate": 5.442622950819672e-06, + "loss": 0.8818, "step": 166 }, { - "epoch": 0.012636676629715108, - "grad_norm": 3.9435312747955322, - "learning_rate": 4.54765506807867e-06, - "loss": 1.0522, + "epoch": 0.011756423794438578, + "grad_norm": 5.734021186828613, + "learning_rate": 5.475409836065574e-06, + "loss": 0.9859, "step": 167 }, { - "epoch": 0.01271234535204873, - "grad_norm": 7.734802722930908, - "learning_rate": 4.5748865355521935e-06, - "loss": 1.1185, + "epoch": 0.011826821541710665, + "grad_norm": 4.360768795013428, + "learning_rate": 5.5081967213114755e-06, + "loss": 0.9062, "step": 168 }, { - "epoch": 0.012788014074382355, - "grad_norm": 11.209372520446777, - "learning_rate": 4.602118003025719e-06, - "loss": 1.0703, + "epoch": 0.011897219288982753, + "grad_norm": 4.667993068695068, + "learning_rate": 5.5409836065573765e-06, + "loss": 0.9653, "step": 169 }, { - "epoch": 0.012863682796715977, - "grad_norm": 4.650092124938965, - "learning_rate": 4.629349470499244e-06, - "loss": 0.9494, + "epoch": 0.01196761703625484, + "grad_norm": 4.329316139221191, + "learning_rate": 5.573770491803278e-06, + "loss": 0.9568, "step": 170 }, { - "epoch": 0.0129393515190496, - "grad_norm": 3.6348683834075928, - "learning_rate": 4.6565809379727684e-06, - "loss": 0.9492, + "epoch": 0.012038014783526927, + "grad_norm": 4.864582538604736, + "learning_rate": 5.60655737704918e-06, + "loss": 0.9355, "step": 171 }, { - "epoch": 0.013015020241383225, - "grad_norm": 4.223328590393066, - "learning_rate": 4.683812405446293e-06, - "loss": 1.0867, + "epoch": 0.012108412530799015, + "grad_norm": 4.687588691711426, + "learning_rate": 5.639344262295082e-06, + "loss": 1.0508, "step": 172 }, { - "epoch": 0.013090688963716847, - "grad_norm": 3.262395143508911, - "learning_rate": 4.711043872919819e-06, - "loss": 1.0776, + "epoch": 0.012178810278071103, + "grad_norm": 4.791179656982422, + "learning_rate": 5.672131147540984e-06, + "loss": 0.8908, "step": 173 }, { - "epoch": 0.013166357686050472, - "grad_norm": 8.45308780670166, - "learning_rate": 4.738275340393344e-06, - "loss": 0.9743, + "epoch": 0.012249208025343189, + "grad_norm": 4.379640579223633, + "learning_rate": 5.7049180327868855e-06, + "loss": 0.8989, "step": 174 }, { - "epoch": 0.013242026408384094, - "grad_norm": 6.289074420928955, - "learning_rate": 4.765506807866868e-06, - "loss": 1.0345, + "epoch": 0.012319605772615276, + "grad_norm": 4.857367038726807, + "learning_rate": 5.737704918032787e-06, + "loss": 0.9819, "step": 175 }, { - "epoch": 0.013317695130717717, - "grad_norm": 3.5306406021118164, - "learning_rate": 4.792738275340394e-06, - "loss": 0.9317, + "epoch": 0.012390003519887364, + "grad_norm": 3.231602668762207, + "learning_rate": 5.770491803278688e-06, + "loss": 0.9106, "step": 176 }, { - "epoch": 0.013393363853051342, - "grad_norm": 2.8451457023620605, - "learning_rate": 4.819969742813918e-06, - "loss": 1.1339, + "epoch": 0.01246040126715945, + "grad_norm": 5.948474407196045, + "learning_rate": 5.80327868852459e-06, + "loss": 0.9429, "step": 177 }, { - "epoch": 0.013469032575384964, - "grad_norm": 4.440673351287842, - "learning_rate": 4.847201210287444e-06, - "loss": 1.1597, + "epoch": 0.012530799014431538, + "grad_norm": 3.734452724456787, + "learning_rate": 5.836065573770492e-06, + "loss": 0.9505, "step": 178 }, { - "epoch": 0.013544701297718589, - "grad_norm": 4.818862438201904, - "learning_rate": 4.874432677760968e-06, - "loss": 1.2254, + "epoch": 0.012601196761703626, + "grad_norm": 4.1793131828308105, + "learning_rate": 5.868852459016394e-06, + "loss": 0.8195, "step": 179 }, { - "epoch": 0.013620370020052211, - "grad_norm": 28.1450138092041, - "learning_rate": 4.901664145234493e-06, - "loss": 1.0227, + "epoch": 0.012671594508975714, + "grad_norm": 4.072559356689453, + "learning_rate": 5.9016393442622956e-06, + "loss": 1.0033, "step": 180 }, { - "epoch": 0.013696038742385834, - "grad_norm": 5.457899570465088, - "learning_rate": 4.928895612708019e-06, - "loss": 0.9296, + "epoch": 0.0127419922562478, + "grad_norm": 4.6122565269470215, + "learning_rate": 5.934426229508197e-06, + "loss": 0.881, "step": 181 }, { - "epoch": 0.013771707464719458, - "grad_norm": 7.308279514312744, - "learning_rate": 4.9561270801815436e-06, - "loss": 1.0794, + "epoch": 0.012812390003519887, + "grad_norm": 4.698636531829834, + "learning_rate": 5.9672131147540975e-06, + "loss": 0.8632, "step": 182 }, { - "epoch": 0.013847376187053081, - "grad_norm": 4.073877811431885, - "learning_rate": 4.983358547655068e-06, - "loss": 1.0149, + "epoch": 0.012882787750791975, + "grad_norm": 4.806349277496338, + "learning_rate": 5.999999999999999e-06, + "loss": 0.8217, "step": 183 }, { - "epoch": 0.013923044909386706, - "grad_norm": 4.441009044647217, - "learning_rate": 5.010590015128593e-06, - "loss": 1.1169, + "epoch": 0.012953185498064061, + "grad_norm": 3.4926156997680664, + "learning_rate": 6.032786885245901e-06, + "loss": 0.9821, "step": 184 }, { - "epoch": 0.013998713631720328, - "grad_norm": 3.17683744430542, - "learning_rate": 5.0378214826021185e-06, - "loss": 0.9667, + "epoch": 0.013023583245336149, + "grad_norm": 5.424435615539551, + "learning_rate": 6.065573770491803e-06, + "loss": 0.924, "step": 185 }, { - "epoch": 0.014074382354053951, - "grad_norm": 5.486913204193115, - "learning_rate": 5.065052950075642e-06, - "loss": 0.9504, + "epoch": 0.013093980992608237, + "grad_norm": 3.5752687454223633, + "learning_rate": 6.098360655737705e-06, + "loss": 0.9442, "step": 186 }, { - "epoch": 0.014150051076387575, - "grad_norm": 3.611471176147461, - "learning_rate": 5.092284417549168e-06, - "loss": 1.0478, + "epoch": 0.013164378739880325, + "grad_norm": 3.6515116691589355, + "learning_rate": 6.1311475409836066e-06, + "loss": 0.8173, "step": 187 }, { - "epoch": 0.014225719798721198, - "grad_norm": 4.8300981521606445, - "learning_rate": 5.1195158850226935e-06, - "loss": 1.0817, + "epoch": 0.01323477648715241, + "grad_norm": 6.96682596206665, + "learning_rate": 6.163934426229508e-06, + "loss": 0.8514, "step": 188 }, { - "epoch": 0.014301388521054823, - "grad_norm": 13.371825218200684, - "learning_rate": 5.146747352496218e-06, - "loss": 1.046, + "epoch": 0.013305174234424498, + "grad_norm": 3.709293842315674, + "learning_rate": 6.196721311475409e-06, + "loss": 0.9514, "step": 189 }, { - "epoch": 0.014377057243388445, - "grad_norm": 4.5194621086120605, - "learning_rate": 5.173978819969743e-06, - "loss": 0.9503, + "epoch": 0.013375571981696586, + "grad_norm": 3.450347423553467, + "learning_rate": 6.229508196721311e-06, + "loss": 1.0049, "step": 190 }, { - "epoch": 0.01445272596572207, - "grad_norm": 4.531895637512207, - "learning_rate": 5.201210287443268e-06, - "loss": 0.8728, + "epoch": 0.013445969728968672, + "grad_norm": 3.825087785720825, + "learning_rate": 6.262295081967213e-06, + "loss": 0.9766, "step": 191 }, { - "epoch": 0.014528394688055692, - "grad_norm": 4.822807788848877, - "learning_rate": 5.228441754916793e-06, - "loss": 0.9806, + "epoch": 0.01351636747624076, + "grad_norm": 3.5818939208984375, + "learning_rate": 6.295081967213115e-06, + "loss": 1.0047, "step": 192 }, { - "epoch": 0.014604063410389315, - "grad_norm": 2.7862422466278076, - "learning_rate": 5.255673222390318e-06, - "loss": 1.0006, + "epoch": 0.013586765223512848, + "grad_norm": 5.840765476226807, + "learning_rate": 6.3278688524590166e-06, + "loss": 0.9427, "step": 193 }, { - "epoch": 0.01467973213272294, - "grad_norm": 4.66079044342041, - "learning_rate": 5.2829046898638426e-06, - "loss": 1.0805, + "epoch": 0.013657162970784936, + "grad_norm": 4.033573150634766, + "learning_rate": 6.360655737704918e-06, + "loss": 1.0539, "step": 194 }, { - "epoch": 0.014755400855056562, - "grad_norm": 3.729022264480591, - "learning_rate": 5.310136157337367e-06, - "loss": 1.1511, + "epoch": 0.013727560718057022, + "grad_norm": 6.832907199859619, + "learning_rate": 6.393442622950819e-06, + "loss": 0.9193, "step": 195 }, { - "epoch": 0.014831069577390187, - "grad_norm": 2.492928981781006, - "learning_rate": 5.337367624810893e-06, - "loss": 1.0107, + "epoch": 0.01379795846532911, + "grad_norm": 4.365390300750732, + "learning_rate": 6.426229508196721e-06, + "loss": 1.1125, "step": 196 }, { - "epoch": 0.01490673829972381, - "grad_norm": 5.297464370727539, - "learning_rate": 5.364599092284418e-06, - "loss": 1.0515, + "epoch": 0.013868356212601197, + "grad_norm": 3.70200514793396, + "learning_rate": 6.459016393442623e-06, + "loss": 0.9829, "step": 197 }, { - "epoch": 0.014982407022057432, - "grad_norm": 5.455732345581055, - "learning_rate": 5.391830559757942e-06, - "loss": 0.9973, + "epoch": 0.013938753959873283, + "grad_norm": 12.645018577575684, + "learning_rate": 6.491803278688525e-06, + "loss": 0.8635, "step": 198 }, { - "epoch": 0.015058075744391056, - "grad_norm": 10.293705940246582, - "learning_rate": 5.419062027231468e-06, - "loss": 1.0379, + "epoch": 0.014009151707145371, + "grad_norm": 3.4400744438171387, + "learning_rate": 6.524590163934427e-06, + "loss": 0.9068, "step": 199 }, { - "epoch": 0.015133744466724679, - "grad_norm": 6.046285152435303, - "learning_rate": 5.4462934947049925e-06, - "loss": 1.0908, + "epoch": 0.014079549454417459, + "grad_norm": 3.9763290882110596, + "learning_rate": 6.557377049180328e-06, + "loss": 0.8948, "step": 200 }, { - "epoch": 0.015209413189058303, - "grad_norm": 19.921154022216797, - "learning_rate": 5.473524962178517e-06, - "loss": 0.9654, + "epoch": 0.014149947201689547, + "grad_norm": 4.3906121253967285, + "learning_rate": 6.590163934426229e-06, + "loss": 0.89, "step": 201 }, { - "epoch": 0.015285081911391926, - "grad_norm": 3.6806235313415527, - "learning_rate": 5.500756429652042e-06, - "loss": 1.1749, + "epoch": 0.014220344948961633, + "grad_norm": 3.3688299655914307, + "learning_rate": 6.622950819672131e-06, + "loss": 0.8262, "step": 202 }, { - "epoch": 0.015360750633725549, - "grad_norm": 17.536718368530273, - "learning_rate": 5.5279878971255674e-06, - "loss": 0.9876, + "epoch": 0.01429074269623372, + "grad_norm": 3.9636080265045166, + "learning_rate": 6.655737704918032e-06, + "loss": 1.0216, "step": 203 }, { - "epoch": 0.015436419356059173, - "grad_norm": 6.348948955535889, - "learning_rate": 5.555219364599093e-06, - "loss": 1.1173, + "epoch": 0.014361140443505808, + "grad_norm": 3.3210840225219727, + "learning_rate": 6.688524590163934e-06, + "loss": 0.8529, "step": 204 }, { - "epoch": 0.015512088078392796, - "grad_norm": 3.97938871383667, - "learning_rate": 5.582450832072617e-06, - "loss": 0.9842, + "epoch": 0.014431538190777896, + "grad_norm": 3.6048660278320312, + "learning_rate": 6.721311475409836e-06, + "loss": 0.9815, "step": 205 }, { - "epoch": 0.01558775680072642, - "grad_norm": 22.651533126831055, - "learning_rate": 5.609682299546142e-06, - "loss": 1.2029, + "epoch": 0.014501935938049982, + "grad_norm": 4.036753177642822, + "learning_rate": 6.754098360655738e-06, + "loss": 0.9306, "step": 206 }, { - "epoch": 0.015663425523060045, - "grad_norm": 2.902076005935669, - "learning_rate": 5.636913767019667e-06, - "loss": 0.9287, + "epoch": 0.01457233368532207, + "grad_norm": 6.482748985290527, + "learning_rate": 6.786885245901639e-06, + "loss": 0.8143, "step": 207 }, { - "epoch": 0.015739094245393666, - "grad_norm": 4.252765655517578, - "learning_rate": 5.664145234493193e-06, - "loss": 1.1368, + "epoch": 0.014642731432594158, + "grad_norm": 3.839012384414673, + "learning_rate": 6.81967213114754e-06, + "loss": 0.8622, "step": 208 }, { - "epoch": 0.01581476296772729, - "grad_norm": 7.302497386932373, - "learning_rate": 5.6913767019667165e-06, - "loss": 0.909, + "epoch": 0.014713129179866244, + "grad_norm": 3.51278018951416, + "learning_rate": 6.852459016393442e-06, + "loss": 0.7603, "step": 209 }, { - "epoch": 0.015890431690060915, - "grad_norm": 3.428435802459717, - "learning_rate": 5.718608169440242e-06, - "loss": 0.8431, + "epoch": 0.014783526927138331, + "grad_norm": 3.8590409755706787, + "learning_rate": 6.885245901639344e-06, + "loss": 0.9236, "step": 210 }, { - "epoch": 0.015966100412394536, - "grad_norm": 26.798280715942383, - "learning_rate": 5.745839636913768e-06, - "loss": 1.055, + "epoch": 0.014853924674410419, + "grad_norm": 4.040726184844971, + "learning_rate": 6.918032786885246e-06, + "loss": 0.867, "step": 211 }, { - "epoch": 0.01604176913472816, - "grad_norm": 2.7646429538726807, - "learning_rate": 5.773071104387292e-06, - "loss": 0.9447, + "epoch": 0.014924322421682507, + "grad_norm": 3.7332282066345215, + "learning_rate": 6.950819672131148e-06, + "loss": 0.9563, "step": 212 }, { - "epoch": 0.016117437857061784, - "grad_norm": 41.349822998046875, - "learning_rate": 5.800302571860817e-06, - "loss": 0.9701, + "epoch": 0.014994720168954593, + "grad_norm": 4.816858291625977, + "learning_rate": 6.983606557377049e-06, + "loss": 1.0263, "step": 213 }, { - "epoch": 0.016193106579395405, - "grad_norm": 3.406937599182129, - "learning_rate": 5.827534039334342e-06, - "loss": 0.8292, + "epoch": 0.01506511791622668, + "grad_norm": 3.9906628131866455, + "learning_rate": 7.01639344262295e-06, + "loss": 0.9658, "step": 214 }, { - "epoch": 0.01626877530172903, - "grad_norm": 4.102505207061768, - "learning_rate": 5.854765506807867e-06, - "loss": 1.0225, + "epoch": 0.015135515663498769, + "grad_norm": 3.294687509536743, + "learning_rate": 7.049180327868852e-06, + "loss": 0.9832, "step": 215 }, { - "epoch": 0.016344444024062654, - "grad_norm": 3.140641689300537, - "learning_rate": 5.881996974281391e-06, - "loss": 1.0587, + "epoch": 0.015205913410770855, + "grad_norm": 4.467609405517578, + "learning_rate": 7.081967213114754e-06, + "loss": 0.9228, "step": 216 }, { - "epoch": 0.01642011274639628, - "grad_norm": 3.22949481010437, - "learning_rate": 5.909228441754917e-06, - "loss": 0.9173, + "epoch": 0.015276311158042942, + "grad_norm": 3.532924175262451, + "learning_rate": 7.114754098360656e-06, + "loss": 0.8478, "step": 217 }, { - "epoch": 0.0164957814687299, - "grad_norm": 3.503279209136963, - "learning_rate": 5.936459909228442e-06, - "loss": 1.0667, + "epoch": 0.01534670890531503, + "grad_norm": 4.039727210998535, + "learning_rate": 7.147540983606558e-06, + "loss": 0.828, "step": 218 }, { - "epoch": 0.016571450191063524, - "grad_norm": 3.5009310245513916, - "learning_rate": 5.963691376701967e-06, - "loss": 0.9895, + "epoch": 0.015417106652587118, + "grad_norm": 3.7719645500183105, + "learning_rate": 7.1803278688524594e-06, + "loss": 1.0205, "step": 219 }, { - "epoch": 0.01664711891339715, - "grad_norm": 2.58160400390625, - "learning_rate": 5.990922844175492e-06, - "loss": 0.995, + "epoch": 0.015487504399859204, + "grad_norm": 6.250553607940674, + "learning_rate": 7.21311475409836e-06, + "loss": 0.9531, "step": 220 }, { - "epoch": 0.01672278763573077, - "grad_norm": 3.4536960124969482, - "learning_rate": 6.018154311649016e-06, - "loss": 0.965, + "epoch": 0.015557902147131292, + "grad_norm": 3.9546256065368652, + "learning_rate": 7.245901639344262e-06, + "loss": 0.8949, "step": 221 }, { - "epoch": 0.016798456358064394, - "grad_norm": 4.148844242095947, - "learning_rate": 6.045385779122542e-06, - "loss": 1.1884, + "epoch": 0.01562829989440338, + "grad_norm": 33.228485107421875, + "learning_rate": 7.278688524590164e-06, + "loss": 0.868, "step": 222 }, { - "epoch": 0.01687412508039802, - "grad_norm": 3.2756500244140625, - "learning_rate": 6.0726172465960674e-06, - "loss": 1.0833, + "epoch": 0.015698697641675467, + "grad_norm": 4.165818691253662, + "learning_rate": 7.311475409836066e-06, + "loss": 0.9377, "step": 223 }, { - "epoch": 0.01694979380273164, - "grad_norm": 11.904067993164062, - "learning_rate": 6.099848714069591e-06, - "loss": 0.9768, + "epoch": 0.015769095388947555, + "grad_norm": 3.811441659927368, + "learning_rate": 7.344262295081968e-06, + "loss": 1.024, "step": 224 }, { - "epoch": 0.017025462525065264, - "grad_norm": 3.4661612510681152, - "learning_rate": 6.127080181543117e-06, - "loss": 1.0659, + "epoch": 0.01583949313621964, + "grad_norm": 11.727596282958984, + "learning_rate": 7.3770491803278695e-06, + "loss": 0.9551, "step": 225 }, { - "epoch": 0.017101131247398888, - "grad_norm": 4.6939167976379395, - "learning_rate": 6.1543116490166416e-06, - "loss": 1.0258, + "epoch": 0.015909890883491727, + "grad_norm": 3.6691038608551025, + "learning_rate": 7.40983606557377e-06, + "loss": 0.7604, "step": 226 }, { - "epoch": 0.017176799969732513, - "grad_norm": 2.9636013507843018, - "learning_rate": 6.181543116490167e-06, - "loss": 1.0982, + "epoch": 0.015980288630763815, + "grad_norm": 4.284067153930664, + "learning_rate": 7.442622950819672e-06, + "loss": 0.9928, "step": 227 }, { - "epoch": 0.017252468692066134, - "grad_norm": 4.7203826904296875, - "learning_rate": 6.208774583963691e-06, - "loss": 1.1271, + "epoch": 0.016050686378035903, + "grad_norm": 3.348663568496704, + "learning_rate": 7.475409836065574e-06, + "loss": 0.8041, "step": 228 }, { - "epoch": 0.017328137414399758, - "grad_norm": 16.543560028076172, - "learning_rate": 6.2360060514372165e-06, - "loss": 1.0417, + "epoch": 0.01612108412530799, + "grad_norm": 3.5675337314605713, + "learning_rate": 7.508196721311476e-06, + "loss": 0.9229, "step": 229 }, { - "epoch": 0.017403806136733382, - "grad_norm": 3.9379353523254395, - "learning_rate": 6.263237518910742e-06, - "loss": 1.0619, + "epoch": 0.016191481872580078, + "grad_norm": 3.7404117584228516, + "learning_rate": 7.540983606557378e-06, + "loss": 0.9307, "step": 230 }, { - "epoch": 0.017479474859067003, - "grad_norm": 3.562490701675415, - "learning_rate": 6.290468986384266e-06, - "loss": 0.8995, + "epoch": 0.016261879619852166, + "grad_norm": 3.531076669692993, + "learning_rate": 7.5737704918032795e-06, + "loss": 0.8148, "step": 231 }, { - "epoch": 0.017555143581400628, - "grad_norm": 3.5367138385772705, - "learning_rate": 6.3177004538577915e-06, - "loss": 1.0685, + "epoch": 0.01633227736712425, + "grad_norm": 5.2189435958862305, + "learning_rate": 7.606557377049181e-06, + "loss": 0.813, "step": 232 }, { - "epoch": 0.017630812303734252, - "grad_norm": 4.708889961242676, - "learning_rate": 6.344931921331316e-06, - "loss": 0.9722, + "epoch": 0.016402675114396338, + "grad_norm": 9.95646858215332, + "learning_rate": 7.639344262295082e-06, + "loss": 0.7263, "step": 233 }, { - "epoch": 0.017706481026067873, - "grad_norm": 3.517542600631714, - "learning_rate": 6.372163388804842e-06, - "loss": 1.0182, + "epoch": 0.016473072861668426, + "grad_norm": 4.791502475738525, + "learning_rate": 7.672131147540985e-06, + "loss": 0.8592, "step": 234 }, { - "epoch": 0.017782149748401498, - "grad_norm": 5.070290565490723, - "learning_rate": 6.399394856278366e-06, - "loss": 0.9933, + "epoch": 0.016543470608940514, + "grad_norm": 3.484861135482788, + "learning_rate": 7.704918032786884e-06, + "loss": 0.8775, "step": 235 }, { - "epoch": 0.017857818470735122, - "grad_norm": 3.898589849472046, - "learning_rate": 6.426626323751891e-06, - "loss": 0.8593, + "epoch": 0.0166138683562126, + "grad_norm": 3.6990580558776855, + "learning_rate": 7.737704918032787e-06, + "loss": 0.9638, "step": 236 }, { - "epoch": 0.017933487193068746, - "grad_norm": 3.1231884956359863, - "learning_rate": 6.453857791225417e-06, - "loss": 1.2586, + "epoch": 0.01668426610348469, + "grad_norm": 4.778743743896484, + "learning_rate": 7.770491803278688e-06, + "loss": 0.921, "step": 237 }, { - "epoch": 0.018009155915402367, - "grad_norm": 4.742455005645752, - "learning_rate": 6.481089258698941e-06, - "loss": 1.0591, + "epoch": 0.016754663850756777, + "grad_norm": 3.448641061782837, + "learning_rate": 7.803278688524589e-06, + "loss": 0.9816, "step": 238 }, { - "epoch": 0.018084824637735992, - "grad_norm": 5.456655502319336, - "learning_rate": 6.508320726172466e-06, - "loss": 0.7356, + "epoch": 0.016825061598028865, + "grad_norm": 4.0377888679504395, + "learning_rate": 7.836065573770491e-06, + "loss": 0.8806, "step": 239 }, { - "epoch": 0.018160493360069616, - "grad_norm": 3.905632495880127, - "learning_rate": 6.535552193645991e-06, - "loss": 0.881, + "epoch": 0.01689545934530095, + "grad_norm": 5.805827617645264, + "learning_rate": 7.868852459016392e-06, + "loss": 0.9271, "step": 240 }, { - "epoch": 0.018236162082403237, - "grad_norm": 2.6957733631134033, - "learning_rate": 6.562783661119516e-06, - "loss": 1.0049, + "epoch": 0.016965857092573037, + "grad_norm": 3.366772174835205, + "learning_rate": 7.901639344262295e-06, + "loss": 0.9154, "step": 241 }, { - "epoch": 0.01831183080473686, - "grad_norm": 3.1716785430908203, - "learning_rate": 6.590015128593041e-06, - "loss": 0.981, + "epoch": 0.017036254839845125, + "grad_norm": 4.433914661407471, + "learning_rate": 7.934426229508196e-06, + "loss": 0.8839, "step": 242 }, { - "epoch": 0.018387499527070486, - "grad_norm": 3.713045597076416, - "learning_rate": 6.617246596066566e-06, - "loss": 0.9511, + "epoch": 0.017106652587117212, + "grad_norm": 3.0600433349609375, + "learning_rate": 7.967213114754097e-06, + "loss": 1.113, "step": 243 }, { - "epoch": 0.01846316824940411, - "grad_norm": 6.078882694244385, - "learning_rate": 6.644478063540091e-06, - "loss": 0.8243, + "epoch": 0.0171770503343893, + "grad_norm": 3.755241632461548, + "learning_rate": 8e-06, + "loss": 0.9123, "step": 244 }, { - "epoch": 0.01853883697173773, - "grad_norm": 3.7608482837677, - "learning_rate": 6.671709531013616e-06, - "loss": 0.8829, + "epoch": 0.017247448081661388, + "grad_norm": 4.450098514556885, + "learning_rate": 8.0327868852459e-06, + "loss": 0.8917, "step": 245 }, { - "epoch": 0.018614505694071356, - "grad_norm": 6.1873321533203125, - "learning_rate": 6.698940998487141e-06, - "loss": 1.0227, + "epoch": 0.017317845828933476, + "grad_norm": 3.9600069522857666, + "learning_rate": 8.065573770491803e-06, + "loss": 0.8284, "step": 246 }, { - "epoch": 0.01869017441640498, - "grad_norm": 3.6704089641571045, - "learning_rate": 6.726172465960665e-06, - "loss": 0.9248, + "epoch": 0.01738824357620556, + "grad_norm": 3.405292510986328, + "learning_rate": 8.098360655737704e-06, + "loss": 0.8444, "step": 247 }, { - "epoch": 0.0187658431387386, - "grad_norm": 3.1460700035095215, - "learning_rate": 6.753403933434191e-06, - "loss": 1.0313, + "epoch": 0.017458641323477648, + "grad_norm": 5.175904273986816, + "learning_rate": 8.131147540983607e-06, + "loss": 0.9082, "step": 248 }, { - "epoch": 0.018841511861072226, - "grad_norm": 3.855139970779419, - "learning_rate": 6.780635400907716e-06, - "loss": 0.9423, + "epoch": 0.017529039070749736, + "grad_norm": 3.744906187057495, + "learning_rate": 8.163934426229508e-06, + "loss": 0.9623, "step": 249 }, { - "epoch": 0.01891718058340585, - "grad_norm": 3.8225889205932617, - "learning_rate": 6.80786686838124e-06, - "loss": 0.9772, + "epoch": 0.017599436818021823, + "grad_norm": 4.443304061889648, + "learning_rate": 8.196721311475409e-06, + "loss": 0.9636, "step": 250 }, { - "epoch": 0.01899284930573947, - "grad_norm": 2.7261159420013428, - "learning_rate": 6.835098335854766e-06, - "loss": 0.9998, + "epoch": 0.01766983456529391, + "grad_norm": 4.138010501861572, + "learning_rate": 8.229508196721311e-06, + "loss": 0.99, "step": 251 }, { - "epoch": 0.019068518028073096, - "grad_norm": 2.887666702270508, - "learning_rate": 6.862329803328291e-06, - "loss": 1.052, + "epoch": 0.017740232312566, + "grad_norm": 3.523056983947754, + "learning_rate": 8.262295081967212e-06, + "loss": 0.7728, "step": 252 }, { - "epoch": 0.01914418675040672, - "grad_norm": 3.3819353580474854, - "learning_rate": 6.889561270801816e-06, - "loss": 0.9431, + "epoch": 0.017810630059838087, + "grad_norm": 3.575166940689087, + "learning_rate": 8.295081967213115e-06, + "loss": 0.8997, "step": 253 }, { - "epoch": 0.019219855472740344, - "grad_norm": 3.0159807205200195, - "learning_rate": 6.91679273827534e-06, - "loss": 0.9702, + "epoch": 0.01788102780711017, + "grad_norm": 4.717628479003906, + "learning_rate": 8.327868852459016e-06, + "loss": 0.8844, "step": 254 }, { - "epoch": 0.019295524195073965, - "grad_norm": 3.413630962371826, - "learning_rate": 6.944024205748866e-06, - "loss": 0.8845, + "epoch": 0.01795142555438226, + "grad_norm": 4.759855270385742, + "learning_rate": 8.360655737704917e-06, + "loss": 0.8421, "step": 255 }, { - "epoch": 0.01937119291740759, - "grad_norm": 5.078720569610596, - "learning_rate": 6.97125567322239e-06, - "loss": 0.9984, + "epoch": 0.018021823301654347, + "grad_norm": 3.1284937858581543, + "learning_rate": 8.39344262295082e-06, + "loss": 0.7817, "step": 256 }, { - "epoch": 0.019446861639741214, - "grad_norm": 3.1097350120544434, - "learning_rate": 6.998487140695916e-06, - "loss": 1.0355, + "epoch": 0.018092221048926434, + "grad_norm": 4.886071681976318, + "learning_rate": 8.42622950819672e-06, + "loss": 0.9239, "step": 257 }, { - "epoch": 0.019522530362074835, - "grad_norm": 4.542701721191406, - "learning_rate": 7.02571860816944e-06, - "loss": 1.1622, + "epoch": 0.018162618796198522, + "grad_norm": 5.257235527038574, + "learning_rate": 8.459016393442623e-06, + "loss": 0.8524, "step": 258 }, { - "epoch": 0.01959819908440846, - "grad_norm": 10.559003829956055, - "learning_rate": 7.052950075642965e-06, - "loss": 1.2439, + "epoch": 0.01823301654347061, + "grad_norm": 3.9936141967773438, + "learning_rate": 8.491803278688524e-06, + "loss": 1.1224, "step": 259 }, { - "epoch": 0.019673867806742084, - "grad_norm": 5.9329609870910645, - "learning_rate": 7.080181543116491e-06, - "loss": 1.0442, + "epoch": 0.018303414290742698, + "grad_norm": 4.840516567230225, + "learning_rate": 8.524590163934427e-06, + "loss": 0.8795, "step": 260 }, { - "epoch": 0.019749536529075705, - "grad_norm": 3.9243202209472656, - "learning_rate": 7.1074130105900155e-06, - "loss": 0.9679, + "epoch": 0.018373812038014782, + "grad_norm": 4.416502952575684, + "learning_rate": 8.557377049180328e-06, + "loss": 0.9806, "step": 261 }, { - "epoch": 0.01982520525140933, - "grad_norm": 4.9199910163879395, - "learning_rate": 7.13464447806354e-06, - "loss": 1.1109, + "epoch": 0.01844420978528687, + "grad_norm": 3.600187063217163, + "learning_rate": 8.590163934426229e-06, + "loss": 0.7993, "step": 262 }, { - "epoch": 0.019900873973742954, - "grad_norm": 2.8583781719207764, - "learning_rate": 7.161875945537065e-06, - "loss": 1.0152, + "epoch": 0.018514607532558958, + "grad_norm": 6.5911149978637695, + "learning_rate": 8.622950819672132e-06, + "loss": 0.8129, "step": 263 }, { - "epoch": 0.019976542696076578, - "grad_norm": 3.566678524017334, - "learning_rate": 7.1891074130105905e-06, - "loss": 1.0698, + "epoch": 0.018585005279831045, + "grad_norm": 3.085052490234375, + "learning_rate": 8.655737704918032e-06, + "loss": 1.0117, "step": 264 }, { - "epoch": 0.0200522114184102, - "grad_norm": 4.623547554016113, - "learning_rate": 7.216338880484114e-06, - "loss": 1.0657, + "epoch": 0.018655403027103133, + "grad_norm": 3.9560866355895996, + "learning_rate": 8.688524590163935e-06, + "loss": 0.9242, "step": 265 }, { - "epoch": 0.020127880140743824, - "grad_norm": 3.9932198524475098, - "learning_rate": 7.24357034795764e-06, - "loss": 0.9034, + "epoch": 0.01872580077437522, + "grad_norm": 3.763780117034912, + "learning_rate": 8.721311475409836e-06, + "loss": 0.8646, "step": 266 }, { - "epoch": 0.020203548863077448, - "grad_norm": 2.8435165882110596, - "learning_rate": 7.2708018154311654e-06, - "loss": 0.8764, + "epoch": 0.01879619852164731, + "grad_norm": 5.71022891998291, + "learning_rate": 8.754098360655739e-06, + "loss": 0.9488, "step": 267 }, { - "epoch": 0.02027921758541107, - "grad_norm": 3.262338161468506, - "learning_rate": 7.29803328290469e-06, - "loss": 1.1094, + "epoch": 0.018866596268919393, + "grad_norm": 3.3141403198242188, + "learning_rate": 8.78688524590164e-06, + "loss": 0.9574, "step": 268 }, { - "epoch": 0.020354886307744693, - "grad_norm": 8.468903541564941, - "learning_rate": 7.325264750378215e-06, - "loss": 0.9228, + "epoch": 0.01893699401619148, + "grad_norm": 3.565404176712036, + "learning_rate": 8.81967213114754e-06, + "loss": 0.9155, "step": 269 }, { - "epoch": 0.020430555030078318, - "grad_norm": 3.0359139442443848, - "learning_rate": 7.3524962178517395e-06, - "loss": 0.8587, + "epoch": 0.01900739176346357, + "grad_norm": 3.527479648590088, + "learning_rate": 8.852459016393443e-06, + "loss": 0.8826, "step": 270 }, { - "epoch": 0.02050622375241194, - "grad_norm": 8.242533683776855, - "learning_rate": 7.379727685325265e-06, - "loss": 1.0187, + "epoch": 0.019077789510735656, + "grad_norm": 4.499534606933594, + "learning_rate": 8.885245901639344e-06, + "loss": 0.8084, "step": 271 }, { - "epoch": 0.020581892474745563, - "grad_norm": 2.938859224319458, - "learning_rate": 7.406959152798791e-06, - "loss": 1.0385, + "epoch": 0.019148187258007744, + "grad_norm": 4.082296371459961, + "learning_rate": 8.918032786885247e-06, + "loss": 0.8621, "step": 272 }, { - "epoch": 0.020657561197079188, - "grad_norm": 3.197413921356201, - "learning_rate": 7.4341906202723145e-06, - "loss": 0.9181, + "epoch": 0.019218585005279832, + "grad_norm": 3.5235462188720703, + "learning_rate": 8.950819672131148e-06, + "loss": 0.7596, "step": 273 }, { - "epoch": 0.020733229919412812, - "grad_norm": 5.897491931915283, - "learning_rate": 7.46142208774584e-06, - "loss": 1.0107, + "epoch": 0.01928898275255192, + "grad_norm": 4.5780110359191895, + "learning_rate": 8.983606557377049e-06, + "loss": 0.9243, "step": 274 }, { - "epoch": 0.020808898641746433, - "grad_norm": 3.680340528488159, - "learning_rate": 7.488653555219365e-06, - "loss": 1.0152, + "epoch": 0.019359380499824004, + "grad_norm": 5.595239162445068, + "learning_rate": 9.016393442622952e-06, + "loss": 0.968, "step": 275 }, { - "epoch": 0.020884567364080057, - "grad_norm": 6.313503265380859, - "learning_rate": 7.51588502269289e-06, - "loss": 1.0487, + "epoch": 0.019429778247096092, + "grad_norm": 5.420657157897949, + "learning_rate": 9.049180327868853e-06, + "loss": 0.8049, "step": 276 }, { - "epoch": 0.020960236086413682, - "grad_norm": 3.8063981533050537, - "learning_rate": 7.543116490166414e-06, - "loss": 1.0217, + "epoch": 0.01950017599436818, + "grad_norm": 3.4581263065338135, + "learning_rate": 9.081967213114755e-06, + "loss": 0.7713, "step": 277 }, { - "epoch": 0.021035904808747303, - "grad_norm": 9.276975631713867, - "learning_rate": 7.57034795763994e-06, - "loss": 0.9587, + "epoch": 0.019570573741640267, + "grad_norm": 4.157036781311035, + "learning_rate": 9.114754098360654e-06, + "loss": 0.9512, "step": 278 }, { - "epoch": 0.021111573531080927, - "grad_norm": 4.729788303375244, - "learning_rate": 7.597579425113465e-06, - "loss": 0.8946, + "epoch": 0.019640971488912355, + "grad_norm": 3.5737931728363037, + "learning_rate": 9.147540983606557e-06, + "loss": 0.8692, "step": 279 }, { - "epoch": 0.02118724225341455, - "grad_norm": 3.3282487392425537, - "learning_rate": 7.624810892586989e-06, - "loss": 1.1086, + "epoch": 0.019711369236184443, + "grad_norm": 4.260402202606201, + "learning_rate": 9.180327868852458e-06, + "loss": 0.9243, "step": 280 }, { - "epoch": 0.021262910975748176, - "grad_norm": 2.672250509262085, - "learning_rate": 7.652042360060515e-06, - "loss": 0.8411, + "epoch": 0.01978176698345653, + "grad_norm": 4.510676383972168, + "learning_rate": 9.213114754098359e-06, + "loss": 0.7892, "step": 281 }, { - "epoch": 0.021338579698081797, - "grad_norm": 4.054312705993652, - "learning_rate": 7.67927382753404e-06, - "loss": 1.0374, + "epoch": 0.019852164730728615, + "grad_norm": 4.167537212371826, + "learning_rate": 9.245901639344262e-06, + "loss": 1.0038, "step": 282 }, { - "epoch": 0.02141424842041542, - "grad_norm": 4.272651195526123, - "learning_rate": 7.706505295007564e-06, - "loss": 1.039, + "epoch": 0.019922562478000703, + "grad_norm": 3.8265209197998047, + "learning_rate": 9.278688524590163e-06, + "loss": 0.9305, "step": 283 }, { - "epoch": 0.021489917142749046, - "grad_norm": 3.3986735343933105, - "learning_rate": 7.733736762481089e-06, - "loss": 0.9995, + "epoch": 0.01999296022527279, + "grad_norm": 3.835582733154297, + "learning_rate": 9.311475409836065e-06, + "loss": 0.9405, "step": 284 }, { - "epoch": 0.021565585865082667, - "grad_norm": 4.5488481521606445, - "learning_rate": 7.760968229954613e-06, - "loss": 1.1762, + "epoch": 0.02006335797254488, + "grad_norm": 3.796865224838257, + "learning_rate": 9.344262295081966e-06, + "loss": 0.9191, "step": 285 }, { - "epoch": 0.02164125458741629, - "grad_norm": 4.396289348602295, - "learning_rate": 7.78819969742814e-06, - "loss": 0.9458, + "epoch": 0.020133755719816966, + "grad_norm": 5.346733570098877, + "learning_rate": 9.377049180327869e-06, + "loss": 1.1239, "step": 286 }, { - "epoch": 0.021716923309749916, - "grad_norm": 4.582161903381348, - "learning_rate": 7.815431164901665e-06, - "loss": 1.0327, + "epoch": 0.020204153467089054, + "grad_norm": 3.646650552749634, + "learning_rate": 9.40983606557377e-06, + "loss": 0.7792, "step": 287 }, { - "epoch": 0.021792592032083537, - "grad_norm": 4.647852420806885, - "learning_rate": 7.84266263237519e-06, - "loss": 1.0392, + "epoch": 0.02027455121436114, + "grad_norm": 6.542603492736816, + "learning_rate": 9.442622950819671e-06, + "loss": 0.839, "step": 288 }, { - "epoch": 0.02186826075441716, - "grad_norm": 5.218382358551025, - "learning_rate": 7.869894099848714e-06, - "loss": 0.9462, + "epoch": 0.020344948961633226, + "grad_norm": 12.629976272583008, + "learning_rate": 9.475409836065574e-06, + "loss": 0.8477, "step": 289 }, { - "epoch": 0.021943929476750786, - "grad_norm": 5.8986029624938965, - "learning_rate": 7.897125567322239e-06, - "loss": 0.975, + "epoch": 0.020415346708905314, + "grad_norm": 3.32816743850708, + "learning_rate": 9.508196721311474e-06, + "loss": 0.9066, "step": 290 }, { - "epoch": 0.02201959819908441, - "grad_norm": 3.1991119384765625, - "learning_rate": 7.924357034795765e-06, - "loss": 0.8724, + "epoch": 0.0204857444561774, + "grad_norm": 4.204385757446289, + "learning_rate": 9.540983606557377e-06, + "loss": 0.8649, "step": 291 }, { - "epoch": 0.02209526692141803, - "grad_norm": 3.476820230484009, - "learning_rate": 7.951588502269288e-06, - "loss": 0.9873, + "epoch": 0.02055614220344949, + "grad_norm": 4.297039985656738, + "learning_rate": 9.573770491803278e-06, + "loss": 0.9547, "step": 292 }, { - "epoch": 0.022170935643751655, - "grad_norm": 3.6004443168640137, - "learning_rate": 7.978819969742815e-06, - "loss": 0.8335, + "epoch": 0.020626539950721577, + "grad_norm": 4.393106937408447, + "learning_rate": 9.60655737704918e-06, + "loss": 0.8204, "step": 293 }, { - "epoch": 0.02224660436608528, - "grad_norm": 2.7738335132598877, - "learning_rate": 8.00605143721634e-06, - "loss": 1.0594, + "epoch": 0.020696937697993665, + "grad_norm": 3.8621678352355957, + "learning_rate": 9.639344262295082e-06, + "loss": 0.8103, "step": 294 }, { - "epoch": 0.0223222730884189, - "grad_norm": 4.626110076904297, - "learning_rate": 8.033282904689864e-06, - "loss": 1.0716, + "epoch": 0.020767335445265753, + "grad_norm": 4.772561073303223, + "learning_rate": 9.672131147540983e-06, + "loss": 0.8503, "step": 295 }, { - "epoch": 0.022397941810752525, - "grad_norm": 4.319602966308594, - "learning_rate": 8.060514372163389e-06, - "loss": 0.9551, + "epoch": 0.02083773319253784, + "grad_norm": 4.562353610992432, + "learning_rate": 9.704918032786885e-06, + "loss": 1.0322, "step": 296 }, { - "epoch": 0.02247361053308615, - "grad_norm": 4.208806991577148, - "learning_rate": 8.087745839636913e-06, - "loss": 0.8445, + "epoch": 0.020908130939809925, + "grad_norm": 3.6378495693206787, + "learning_rate": 9.737704918032786e-06, + "loss": 0.8998, "step": 297 }, { - "epoch": 0.02254927925541977, - "grad_norm": 4.937840938568115, - "learning_rate": 8.11497730711044e-06, - "loss": 1.0403, + "epoch": 0.020978528687082013, + "grad_norm": 4.551540374755859, + "learning_rate": 9.770491803278689e-06, + "loss": 0.8743, "step": 298 }, { - "epoch": 0.022624947977753395, - "grad_norm": 6.741675853729248, - "learning_rate": 8.142208774583963e-06, - "loss": 0.9795, + "epoch": 0.0210489264343541, + "grad_norm": 5.512988090515137, + "learning_rate": 9.80327868852459e-06, + "loss": 0.9397, "step": 299 }, { - "epoch": 0.02270061670008702, - "grad_norm": 4.731930255889893, - "learning_rate": 8.169440242057489e-06, - "loss": 0.9626, + "epoch": 0.021119324181626188, + "grad_norm": 4.027811050415039, + "learning_rate": 9.836065573770491e-06, + "loss": 0.8617, "step": 300 }, { - "epoch": 0.022776285422420644, - "grad_norm": 3.032463550567627, - "learning_rate": 8.196671709531014e-06, - "loss": 0.8131, + "epoch": 0.021189721928898276, + "grad_norm": 6.318881511688232, + "learning_rate": 9.868852459016394e-06, + "loss": 0.9393, "step": 301 }, { - "epoch": 0.022851954144754265, - "grad_norm": 3.7094199657440186, - "learning_rate": 8.223903177004539e-06, - "loss": 0.9966, + "epoch": 0.021260119676170364, + "grad_norm": 12.178510665893555, + "learning_rate": 9.901639344262295e-06, + "loss": 0.7923, "step": 302 }, { - "epoch": 0.02292762286708789, - "grad_norm": 4.705551624298096, - "learning_rate": 8.251134644478063e-06, - "loss": 1.0382, + "epoch": 0.02133051742344245, + "grad_norm": 6.0396223068237305, + "learning_rate": 9.934426229508197e-06, + "loss": 0.868, "step": 303 }, { - "epoch": 0.023003291589421514, - "grad_norm": 4.8940510749816895, - "learning_rate": 8.278366111951588e-06, - "loss": 0.9043, + "epoch": 0.021400915170714536, + "grad_norm": 3.185314178466797, + "learning_rate": 9.967213114754098e-06, + "loss": 0.8724, "step": 304 }, { - "epoch": 0.023078960311755135, - "grad_norm": 5.167042255401611, - "learning_rate": 8.305597579425114e-06, - "loss": 0.7667, + "epoch": 0.021471312917986624, + "grad_norm": 3.8482203483581543, + "learning_rate": 1e-05, + "loss": 0.853, "step": 305 }, { - "epoch": 0.02315462903408876, - "grad_norm": 5.086777687072754, - "learning_rate": 8.332829046898639e-06, - "loss": 0.8992, + "epoch": 0.02154171066525871, + "grad_norm": 3.8758749961853027, + "learning_rate": 1.0032786885245902e-05, + "loss": 0.8002, "step": 306 }, { - "epoch": 0.023230297756422384, - "grad_norm": 12.379859924316406, - "learning_rate": 8.360060514372164e-06, - "loss": 1.0104, + "epoch": 0.0216121084125308, + "grad_norm": 3.7108590602874756, + "learning_rate": 1.0065573770491803e-05, + "loss": 0.7803, "step": 307 }, { - "epoch": 0.023305966478756008, - "grad_norm": 5.195709228515625, - "learning_rate": 8.387291981845688e-06, - "loss": 0.9138, + "epoch": 0.021682506159802887, + "grad_norm": 3.105503559112549, + "learning_rate": 1.0098360655737705e-05, + "loss": 0.9925, "step": 308 }, { - "epoch": 0.02338163520108963, - "grad_norm": 4.756507396697998, - "learning_rate": 8.414523449319213e-06, - "loss": 0.9112, + "epoch": 0.021752903907074975, + "grad_norm": 3.8915374279022217, + "learning_rate": 1.0131147540983606e-05, + "loss": 0.955, "step": 309 }, { - "epoch": 0.023457303923423253, - "grad_norm": 4.002053737640381, - "learning_rate": 8.441754916792738e-06, - "loss": 0.8504, + "epoch": 0.021823301654347062, + "grad_norm": 3.0151827335357666, + "learning_rate": 1.0163934426229509e-05, + "loss": 0.8564, "step": 310 }, { - "epoch": 0.023532972645756878, - "grad_norm": 4.4022064208984375, - "learning_rate": 8.468986384266263e-06, - "loss": 0.9997, + "epoch": 0.021893699401619147, + "grad_norm": 3.0709047317504883, + "learning_rate": 1.019672131147541e-05, + "loss": 0.7904, "step": 311 }, { - "epoch": 0.0236086413680905, - "grad_norm": 6.589466571807861, - "learning_rate": 8.496217851739789e-06, - "loss": 0.9104, + "epoch": 0.021964097148891235, + "grad_norm": 3.9401955604553223, + "learning_rate": 1.0229508196721311e-05, + "loss": 0.8529, "step": 312 }, { - "epoch": 0.023684310090424123, - "grad_norm": 4.6508989334106445, - "learning_rate": 8.523449319213314e-06, - "loss": 0.9674, + "epoch": 0.022034494896163322, + "grad_norm": 3.1581814289093018, + "learning_rate": 1.0262295081967214e-05, + "loss": 0.8227, "step": 313 }, { - "epoch": 0.023759978812757748, - "grad_norm": 4.066075325012207, - "learning_rate": 8.550680786686838e-06, - "loss": 0.8831, + "epoch": 0.02210489264343541, + "grad_norm": 4.276463031768799, + "learning_rate": 1.0295081967213115e-05, + "loss": 0.8545, "step": 314 }, { - "epoch": 0.02383564753509137, - "grad_norm": 8.613035202026367, - "learning_rate": 8.577912254160363e-06, - "loss": 0.8986, + "epoch": 0.022175290390707498, + "grad_norm": 2.8280324935913086, + "learning_rate": 1.0327868852459017e-05, + "loss": 0.7905, "step": 315 }, { - "epoch": 0.023911316257424993, - "grad_norm": 3.7301228046417236, - "learning_rate": 8.605143721633888e-06, - "loss": 0.7915, + "epoch": 0.022245688137979586, + "grad_norm": 3.7594704627990723, + "learning_rate": 1.0360655737704918e-05, + "loss": 0.9257, "step": 316 }, { - "epoch": 0.023986984979758617, - "grad_norm": 8.060126304626465, - "learning_rate": 8.632375189107414e-06, - "loss": 0.9269, + "epoch": 0.022316085885251673, + "grad_norm": 3.823127508163452, + "learning_rate": 1.0393442622950821e-05, + "loss": 0.817, "step": 317 }, { - "epoch": 0.024062653702092242, - "grad_norm": 4.43103551864624, - "learning_rate": 8.659606656580937e-06, - "loss": 0.9495, + "epoch": 0.022386483632523758, + "grad_norm": 3.0090863704681396, + "learning_rate": 1.0426229508196722e-05, + "loss": 0.8526, "step": 318 }, { - "epoch": 0.024138322424425863, - "grad_norm": 3.900829553604126, - "learning_rate": 8.686838124054464e-06, - "loss": 0.8808, + "epoch": 0.022456881379795846, + "grad_norm": 8.69205379486084, + "learning_rate": 1.0459016393442623e-05, + "loss": 0.8943, "step": 319 }, { - "epoch": 0.024213991146759487, - "grad_norm": 4.360715866088867, - "learning_rate": 8.714069591527988e-06, - "loss": 1.0735, + "epoch": 0.022527279127067933, + "grad_norm": 3.4876506328582764, + "learning_rate": 1.0491803278688525e-05, + "loss": 0.8231, "step": 320 }, { - "epoch": 0.02428965986909311, - "grad_norm": 5.8079633712768555, - "learning_rate": 8.741301059001513e-06, - "loss": 1.0089, + "epoch": 0.02259767687434002, + "grad_norm": 4.229576110839844, + "learning_rate": 1.0524590163934425e-05, + "loss": 0.9239, "step": 321 }, { - "epoch": 0.024365328591426733, - "grad_norm": 31.655710220336914, - "learning_rate": 8.768532526475038e-06, - "loss": 1.0929, + "epoch": 0.02266807462161211, + "grad_norm": 4.177773952484131, + "learning_rate": 1.0557377049180327e-05, + "loss": 0.8573, "step": 322 }, { - "epoch": 0.024440997313760357, - "grad_norm": 5.170853137969971, - "learning_rate": 8.795763993948562e-06, - "loss": 1.066, + "epoch": 0.022738472368884197, + "grad_norm": 3.3738083839416504, + "learning_rate": 1.0590163934426228e-05, + "loss": 1.0935, "step": 323 }, { - "epoch": 0.02451666603609398, - "grad_norm": 3.7530641555786133, - "learning_rate": 8.822995461422089e-06, - "loss": 0.8663, + "epoch": 0.022808870116156284, + "grad_norm": 4.092806816101074, + "learning_rate": 1.0622950819672131e-05, + "loss": 0.8651, "step": 324 }, { - "epoch": 0.024592334758427602, - "grad_norm": 4.36927604675293, - "learning_rate": 8.850226928895612e-06, - "loss": 0.8544, + "epoch": 0.02287926786342837, + "grad_norm": 3.2100119590759277, + "learning_rate": 1.0655737704918032e-05, + "loss": 0.9224, "step": 325 }, { - "epoch": 0.024668003480761227, - "grad_norm": 3.9863076210021973, - "learning_rate": 8.877458396369138e-06, - "loss": 0.935, + "epoch": 0.022949665610700457, + "grad_norm": 2.491509437561035, + "learning_rate": 1.0688524590163933e-05, + "loss": 0.8787, "step": 326 }, { - "epoch": 0.02474367220309485, - "grad_norm": 3.438127040863037, - "learning_rate": 8.904689863842663e-06, - "loss": 0.9287, + "epoch": 0.023020063357972544, + "grad_norm": 3.2033119201660156, + "learning_rate": 1.0721311475409836e-05, + "loss": 0.8584, "step": 327 }, { - "epoch": 0.024819340925428476, - "grad_norm": 5.561075210571289, - "learning_rate": 8.931921331316188e-06, - "loss": 1.0388, + "epoch": 0.023090461105244632, + "grad_norm": 4.005705833435059, + "learning_rate": 1.0754098360655737e-05, + "loss": 0.8808, "step": 328 }, { - "epoch": 0.024895009647762097, - "grad_norm": 4.477010726928711, - "learning_rate": 8.959152798789712e-06, - "loss": 0.9875, + "epoch": 0.02316085885251672, + "grad_norm": 4.587125301361084, + "learning_rate": 1.078688524590164e-05, + "loss": 0.92, "step": 329 }, { - "epoch": 0.02497067837009572, - "grad_norm": 4.917139053344727, - "learning_rate": 8.986384266263237e-06, - "loss": 0.8978, + "epoch": 0.023231256599788808, + "grad_norm": 3.5061874389648438, + "learning_rate": 1.081967213114754e-05, + "loss": 0.9886, "step": 330 }, { - "epoch": 0.025046347092429345, - "grad_norm": 4.206781387329102, - "learning_rate": 9.013615733736763e-06, - "loss": 0.96, + "epoch": 0.023301654347060895, + "grad_norm": 3.692006826400757, + "learning_rate": 1.0852459016393443e-05, + "loss": 0.87, "step": 331 }, { - "epoch": 0.025122015814762966, - "grad_norm": 2.9883878231048584, - "learning_rate": 9.040847201210288e-06, - "loss": 0.857, + "epoch": 0.02337205209433298, + "grad_norm": 3.919563055038452, + "learning_rate": 1.0885245901639344e-05, + "loss": 0.7871, "step": 332 }, { - "epoch": 0.02519768453709659, - "grad_norm": 9.420825958251953, - "learning_rate": 9.068078668683813e-06, - "loss": 1.0236, + "epoch": 0.023442449841605068, + "grad_norm": 3.279864549636841, + "learning_rate": 1.0918032786885245e-05, + "loss": 0.9709, "step": 333 }, { - "epoch": 0.025273353259430215, - "grad_norm": 4.354033470153809, - "learning_rate": 9.09531013615734e-06, - "loss": 0.9295, + "epoch": 0.023512847588877155, + "grad_norm": 3.615349531173706, + "learning_rate": 1.0950819672131147e-05, + "loss": 0.8734, "step": 334 }, { - "epoch": 0.025349021981763836, - "grad_norm": 4.949868679046631, - "learning_rate": 9.122541603630862e-06, - "loss": 0.7853, + "epoch": 0.023583245336149243, + "grad_norm": 4.019484996795654, + "learning_rate": 1.0983606557377048e-05, + "loss": 0.9027, "step": 335 }, { - "epoch": 0.02542469070409746, - "grad_norm": 4.656820297241211, - "learning_rate": 9.149773071104387e-06, - "loss": 1.096, + "epoch": 0.02365364308342133, + "grad_norm": 4.335453987121582, + "learning_rate": 1.1016393442622951e-05, + "loss": 0.8611, "step": 336 }, { - "epoch": 0.025500359426431085, - "grad_norm": 4.696194171905518, - "learning_rate": 9.177004538577912e-06, - "loss": 0.8964, + "epoch": 0.02372404083069342, + "grad_norm": 4.048886299133301, + "learning_rate": 1.1049180327868852e-05, + "loss": 0.9317, "step": 337 }, { - "epoch": 0.02557602814876471, - "grad_norm": 4.845102310180664, - "learning_rate": 9.204236006051438e-06, - "loss": 1.1736, + "epoch": 0.023794438577965506, + "grad_norm": 3.2750508785247803, + "learning_rate": 1.1081967213114753e-05, + "loss": 0.9007, "step": 338 }, { - "epoch": 0.02565169687109833, - "grad_norm": 6.742333889007568, - "learning_rate": 9.231467473524963e-06, - "loss": 0.9915, + "epoch": 0.02386483632523759, + "grad_norm": 2.9000089168548584, + "learning_rate": 1.1114754098360656e-05, + "loss": 0.8333, "step": 339 }, { - "epoch": 0.025727365593431955, - "grad_norm": 4.6622538566589355, - "learning_rate": 9.258698940998487e-06, - "loss": 1.0591, + "epoch": 0.02393523407250968, + "grad_norm": 7.008626937866211, + "learning_rate": 1.1147540983606557e-05, + "loss": 0.8901, "step": 340 }, { - "epoch": 0.02580303431576558, - "grad_norm": 4.871918201446533, - "learning_rate": 9.285930408472014e-06, - "loss": 1.0679, + "epoch": 0.024005631819781766, + "grad_norm": 4.288931369781494, + "learning_rate": 1.118032786885246e-05, + "loss": 0.8171, "step": 341 }, { - "epoch": 0.0258787030380992, - "grad_norm": 3.380262851715088, - "learning_rate": 9.313161875945537e-06, - "loss": 0.7519, + "epoch": 0.024076029567053854, + "grad_norm": 2.9231760501861572, + "learning_rate": 1.121311475409836e-05, + "loss": 0.807, "step": 342 }, { - "epoch": 0.025954371760432825, - "grad_norm": 4.895992755889893, - "learning_rate": 9.340393343419062e-06, - "loss": 0.9666, + "epoch": 0.024146427314325942, + "grad_norm": 4.322519302368164, + "learning_rate": 1.1245901639344263e-05, + "loss": 0.9253, "step": 343 }, { - "epoch": 0.02603004048276645, - "grad_norm": 5.649062633514404, - "learning_rate": 9.367624810892586e-06, - "loss": 0.9477, + "epoch": 0.02421682506159803, + "grad_norm": 5.361907005310059, + "learning_rate": 1.1278688524590164e-05, + "loss": 0.9289, "step": 344 }, { - "epoch": 0.026105709205100074, - "grad_norm": 5.298853397369385, - "learning_rate": 9.394856278366113e-06, - "loss": 0.8321, + "epoch": 0.024287222808870117, + "grad_norm": 5.50483512878418, + "learning_rate": 1.1311475409836065e-05, + "loss": 1.0301, "step": 345 }, { - "epoch": 0.026181377927433695, - "grad_norm": 15.001054763793945, - "learning_rate": 9.422087745839637e-06, - "loss": 0.8249, + "epoch": 0.024357620556142205, + "grad_norm": 3.138530969619751, + "learning_rate": 1.1344262295081967e-05, + "loss": 0.9246, "step": 346 }, { - "epoch": 0.02625704664976732, - "grad_norm": 7.537627220153809, - "learning_rate": 9.449319213313162e-06, - "loss": 0.8955, + "epoch": 0.02442801830341429, + "grad_norm": 3.2169291973114014, + "learning_rate": 1.1377049180327868e-05, + "loss": 0.8365, "step": 347 }, { - "epoch": 0.026332715372100943, - "grad_norm": 6.6606245040893555, - "learning_rate": 9.476550680786688e-06, - "loss": 0.9237, + "epoch": 0.024498416050686377, + "grad_norm": 3.394390344619751, + "learning_rate": 1.1409836065573771e-05, + "loss": 0.9969, "step": 348 }, { - "epoch": 0.026408384094434564, - "grad_norm": 7.1370673179626465, - "learning_rate": 9.503782148260213e-06, - "loss": 1.0608, + "epoch": 0.024568813797958465, + "grad_norm": 4.191263675689697, + "learning_rate": 1.1442622950819672e-05, + "loss": 0.9901, "step": 349 }, { - "epoch": 0.02648405281676819, - "grad_norm": 4.019873142242432, - "learning_rate": 9.531013615733736e-06, - "loss": 0.9431, + "epoch": 0.024639211545230553, + "grad_norm": 3.2689449787139893, + "learning_rate": 1.1475409836065575e-05, + "loss": 0.8752, "step": 350 }, { - "epoch": 0.026559721539101813, - "grad_norm": 3.8298895359039307, - "learning_rate": 9.558245083207261e-06, - "loss": 0.9514, + "epoch": 0.02470960929250264, + "grad_norm": 3.0763604640960693, + "learning_rate": 1.1508196721311476e-05, + "loss": 0.8122, "step": 351 }, { - "epoch": 0.026635390261435434, - "grad_norm": 3.851069688796997, - "learning_rate": 9.585476550680787e-06, - "loss": 1.044, + "epoch": 0.02478000703977473, + "grad_norm": 3.4816110134124756, + "learning_rate": 1.1540983606557377e-05, + "loss": 0.9626, "step": 352 }, { - "epoch": 0.02671105898376906, - "grad_norm": 5.827500343322754, - "learning_rate": 9.612708018154312e-06, - "loss": 0.8259, + "epoch": 0.024850404787046816, + "grad_norm": 3.3246467113494873, + "learning_rate": 1.157377049180328e-05, + "loss": 0.9586, "step": 353 }, { - "epoch": 0.026786727706102683, - "grad_norm": 4.617655277252197, - "learning_rate": 9.639939485627837e-06, - "loss": 0.8898, + "epoch": 0.0249208025343189, + "grad_norm": 4.295650482177734, + "learning_rate": 1.160655737704918e-05, + "loss": 0.9201, "step": 354 }, { - "epoch": 0.026862396428436307, - "grad_norm": 7.916740417480469, - "learning_rate": 9.667170953101363e-06, - "loss": 0.8235, + "epoch": 0.024991200281590988, + "grad_norm": 3.224130153656006, + "learning_rate": 1.1639344262295083e-05, + "loss": 0.8959, "step": 355 }, { - "epoch": 0.02693806515076993, - "grad_norm": 4.243466377258301, - "learning_rate": 9.694402420574888e-06, - "loss": 0.9921, + "epoch": 0.025061598028863076, + "grad_norm": 2.9787838459014893, + "learning_rate": 1.1672131147540984e-05, + "loss": 0.7972, "step": 356 }, { - "epoch": 0.027013733873103553, - "grad_norm": 6.203061580657959, - "learning_rate": 9.72163388804841e-06, - "loss": 1.0662, + "epoch": 0.025131995776135164, + "grad_norm": 4.881450176239014, + "learning_rate": 1.1704918032786885e-05, + "loss": 0.9564, "step": 357 }, { - "epoch": 0.027089402595437177, - "grad_norm": 4.784158229827881, - "learning_rate": 9.748865355521936e-06, - "loss": 0.8327, + "epoch": 0.02520239352340725, + "grad_norm": 3.2482476234436035, + "learning_rate": 1.1737704918032788e-05, + "loss": 0.8253, "step": 358 }, { - "epoch": 0.027165071317770798, - "grad_norm": 4.381805896759033, - "learning_rate": 9.776096822995462e-06, - "loss": 1.0241, + "epoch": 0.02527279127067934, + "grad_norm": 3.149916172027588, + "learning_rate": 1.1770491803278688e-05, + "loss": 0.9232, "step": 359 }, { - "epoch": 0.027240740040104423, - "grad_norm": 4.59453821182251, - "learning_rate": 9.803328290468987e-06, - "loss": 1.1991, + "epoch": 0.025343189017951427, + "grad_norm": 2.9563393592834473, + "learning_rate": 1.1803278688524591e-05, + "loss": 0.8205, "step": 360 }, { - "epoch": 0.027316408762438047, - "grad_norm": 4.59682035446167, - "learning_rate": 9.830559757942511e-06, - "loss": 0.9785, + "epoch": 0.02541358676522351, + "grad_norm": 3.6794357299804688, + "learning_rate": 1.1836065573770492e-05, + "loss": 1.0042, "step": 361 }, { - "epoch": 0.027392077484771668, - "grad_norm": 3.2296361923217773, - "learning_rate": 9.857791225416038e-06, - "loss": 0.8848, + "epoch": 0.0254839845124956, + "grad_norm": 4.08628511428833, + "learning_rate": 1.1868852459016395e-05, + "loss": 0.8463, "step": 362 }, { - "epoch": 0.027467746207105292, - "grad_norm": 4.408949375152588, - "learning_rate": 9.885022692889562e-06, - "loss": 1.1622, + "epoch": 0.025554382259767687, + "grad_norm": 3.9274065494537354, + "learning_rate": 1.1901639344262294e-05, + "loss": 0.8509, "step": 363 }, { - "epoch": 0.027543414929438917, - "grad_norm": 4.724997520446777, - "learning_rate": 9.912254160363087e-06, - "loss": 0.7884, + "epoch": 0.025624780007039775, + "grad_norm": 3.9656147956848145, + "learning_rate": 1.1934426229508195e-05, + "loss": 0.8812, "step": 364 }, { - "epoch": 0.02761908365177254, - "grad_norm": 3.5149667263031006, - "learning_rate": 9.93948562783661e-06, - "loss": 0.9649, + "epoch": 0.025695177754311863, + "grad_norm": 2.792161226272583, + "learning_rate": 1.1967213114754098e-05, + "loss": 0.924, "step": 365 }, { - "epoch": 0.027694752374106162, - "grad_norm": 3.3947033882141113, - "learning_rate": 9.966717095310137e-06, - "loss": 0.8381, + "epoch": 0.02576557550158395, + "grad_norm": 3.3945376873016357, + "learning_rate": 1.1999999999999999e-05, + "loss": 0.8569, "step": 366 }, { - "epoch": 0.027770421096439787, - "grad_norm": 7.352261066436768, - "learning_rate": 9.993948562783661e-06, - "loss": 0.8653, + "epoch": 0.025835973248856038, + "grad_norm": 2.9763596057891846, + "learning_rate": 1.2032786885245901e-05, + "loss": 0.8601, "step": 367 }, { - "epoch": 0.02784608981877341, - "grad_norm": 5.134012699127197, - "learning_rate": 1.0021180030257186e-05, - "loss": 0.8702, + "epoch": 0.025906370996128122, + "grad_norm": 3.2409420013427734, + "learning_rate": 1.2065573770491802e-05, + "loss": 0.7626, "step": 368 }, { - "epoch": 0.027921758541107032, - "grad_norm": 4.905878067016602, - "learning_rate": 1.0048411497730712e-05, - "loss": 0.9004, + "epoch": 0.02597676874340021, + "grad_norm": 3.584272623062134, + "learning_rate": 1.2098360655737705e-05, + "loss": 0.8841, "step": 369 }, { - "epoch": 0.027997427263440657, - "grad_norm": 6.044192790985107, - "learning_rate": 1.0075642965204237e-05, - "loss": 1.0201, + "epoch": 0.026047166490672298, + "grad_norm": 8.443313598632812, + "learning_rate": 1.2131147540983606e-05, + "loss": 0.8161, "step": 370 }, { - "epoch": 0.02807309598577428, - "grad_norm": 4.332431316375732, - "learning_rate": 1.0102874432677762e-05, - "loss": 0.9866, + "epoch": 0.026117564237944386, + "grad_norm": 4.192190170288086, + "learning_rate": 1.2163934426229507e-05, + "loss": 0.9495, "step": 371 }, { - "epoch": 0.028148764708107902, - "grad_norm": 5.870851516723633, - "learning_rate": 1.0130105900151285e-05, - "loss": 0.8737, + "epoch": 0.026187961985216474, + "grad_norm": 3.1835973262786865, + "learning_rate": 1.219672131147541e-05, + "loss": 0.8923, "step": 372 }, { - "epoch": 0.028224433430441526, - "grad_norm": 4.059363842010498, - "learning_rate": 1.0157337367624811e-05, - "loss": 0.851, + "epoch": 0.02625835973248856, + "grad_norm": 8.168956756591797, + "learning_rate": 1.222950819672131e-05, + "loss": 0.8354, "step": 373 }, { - "epoch": 0.02830010215277515, - "grad_norm": 5.465144634246826, - "learning_rate": 1.0184568835098336e-05, - "loss": 1.0343, + "epoch": 0.02632875747976065, + "grad_norm": 3.3233137130737305, + "learning_rate": 1.2262295081967213e-05, + "loss": 0.8309, "step": 374 }, { - "epoch": 0.028375770875108775, - "grad_norm": 4.673175811767578, - "learning_rate": 1.021180030257186e-05, - "loss": 0.9237, + "epoch": 0.026399155227032733, + "grad_norm": 4.14939546585083, + "learning_rate": 1.2295081967213114e-05, + "loss": 1.0437, "step": 375 }, { - "epoch": 0.028451439597442396, - "grad_norm": 3.5958478450775146, - "learning_rate": 1.0239031770045387e-05, - "loss": 0.9103, + "epoch": 0.02646955297430482, + "grad_norm": 3.9036550521850586, + "learning_rate": 1.2327868852459017e-05, + "loss": 0.8834, "step": 376 }, { - "epoch": 0.02852710831977602, - "grad_norm": 9.658095359802246, - "learning_rate": 1.0266263237518912e-05, - "loss": 1.0272, + "epoch": 0.02653995072157691, + "grad_norm": 2.8705637454986572, + "learning_rate": 1.2360655737704918e-05, + "loss": 0.9176, "step": 377 }, { - "epoch": 0.028602777042109645, - "grad_norm": 4.175169944763184, - "learning_rate": 1.0293494704992436e-05, - "loss": 0.8041, + "epoch": 0.026610348468848997, + "grad_norm": 3.9401586055755615, + "learning_rate": 1.2393442622950819e-05, + "loss": 0.9489, "step": 378 }, { - "epoch": 0.028678445764443266, - "grad_norm": 3.949751853942871, - "learning_rate": 1.0320726172465961e-05, - "loss": 0.8977, + "epoch": 0.026680746216121085, + "grad_norm": 4.097420692443848, + "learning_rate": 1.2426229508196721e-05, + "loss": 0.8844, "step": 379 }, { - "epoch": 0.02875411448677689, - "grad_norm": 4.572116374969482, - "learning_rate": 1.0347957639939486e-05, - "loss": 0.8524, + "epoch": 0.026751143963393172, + "grad_norm": 2.4673445224761963, + "learning_rate": 1.2459016393442622e-05, + "loss": 1.0373, "step": 380 }, { - "epoch": 0.028829783209110515, - "grad_norm": 4.106285095214844, - "learning_rate": 1.037518910741301e-05, - "loss": 1.0366, + "epoch": 0.02682154171066526, + "grad_norm": 3.5751023292541504, + "learning_rate": 1.2491803278688525e-05, + "loss": 1.0651, "step": 381 }, { - "epoch": 0.02890545193144414, - "grad_norm": 3.8881635665893555, - "learning_rate": 1.0402420574886535e-05, - "loss": 0.8127, + "epoch": 0.026891939457937344, + "grad_norm": 3.6647045612335205, + "learning_rate": 1.2524590163934426e-05, + "loss": 0.9208, "step": 382 }, { - "epoch": 0.02898112065377776, - "grad_norm": 6.574056625366211, - "learning_rate": 1.0429652042360062e-05, - "loss": 0.805, + "epoch": 0.026962337205209432, + "grad_norm": 3.4178709983825684, + "learning_rate": 1.2557377049180327e-05, + "loss": 0.7167, "step": 383 }, { - "epoch": 0.029056789376111385, - "grad_norm": 4.1317057609558105, - "learning_rate": 1.0456883509833586e-05, - "loss": 1.1139, + "epoch": 0.02703273495248152, + "grad_norm": 3.1912715435028076, + "learning_rate": 1.259016393442623e-05, + "loss": 0.8717, "step": 384 }, { - "epoch": 0.02913245809844501, - "grad_norm": 6.96987771987915, - "learning_rate": 1.0484114977307111e-05, - "loss": 1.0626, + "epoch": 0.027103132699753608, + "grad_norm": 4.120757102966309, + "learning_rate": 1.262295081967213e-05, + "loss": 0.8786, "step": 385 }, { - "epoch": 0.02920812682077863, - "grad_norm": 5.030163764953613, - "learning_rate": 1.0511346444780636e-05, - "loss": 0.7772, + "epoch": 0.027173530447025696, + "grad_norm": 3.8534224033355713, + "learning_rate": 1.2655737704918033e-05, + "loss": 0.8859, "step": 386 }, { - "epoch": 0.029283795543112254, - "grad_norm": 8.177231788635254, - "learning_rate": 1.053857791225416e-05, - "loss": 0.9079, + "epoch": 0.027243928194297783, + "grad_norm": 4.2396674156188965, + "learning_rate": 1.2688524590163934e-05, + "loss": 0.8825, "step": 387 }, { - "epoch": 0.02935946426544588, - "grad_norm": 4.530209541320801, - "learning_rate": 1.0565809379727685e-05, - "loss": 0.8447, + "epoch": 0.02731432594156987, + "grad_norm": 6.677631378173828, + "learning_rate": 1.2721311475409837e-05, + "loss": 0.9759, "step": 388 }, { - "epoch": 0.0294351329877795, - "grad_norm": 12.534080505371094, - "learning_rate": 1.059304084720121e-05, - "loss": 0.9899, + "epoch": 0.027384723688841955, + "grad_norm": 3.362023115158081, + "learning_rate": 1.2754098360655738e-05, + "loss": 0.8176, "step": 389 }, { - "epoch": 0.029510801710113124, - "grad_norm": 4.852429389953613, - "learning_rate": 1.0620272314674735e-05, - "loss": 1.0398, + "epoch": 0.027455121436114043, + "grad_norm": 3.2810916900634766, + "learning_rate": 1.2786885245901639e-05, + "loss": 0.7615, "step": 390 }, { - "epoch": 0.02958647043244675, - "grad_norm": 4.2400054931640625, - "learning_rate": 1.0647503782148261e-05, - "loss": 0.8936, + "epoch": 0.02752551918338613, + "grad_norm": 4.447373867034912, + "learning_rate": 1.2819672131147541e-05, + "loss": 0.9375, "step": 391 }, { - "epoch": 0.029662139154780373, - "grad_norm": 5.989685535430908, - "learning_rate": 1.0674735249621786e-05, - "loss": 0.8694, + "epoch": 0.02759591693065822, + "grad_norm": 3.0620594024658203, + "learning_rate": 1.2852459016393442e-05, + "loss": 0.8822, "step": 392 }, { - "epoch": 0.029737807877113994, - "grad_norm": 4.045843124389648, - "learning_rate": 1.070196671709531e-05, - "loss": 0.9632, + "epoch": 0.027666314677930307, + "grad_norm": 3.624209403991699, + "learning_rate": 1.2885245901639345e-05, + "loss": 0.831, "step": 393 }, { - "epoch": 0.02981347659944762, - "grad_norm": 4.707093238830566, - "learning_rate": 1.0729198184568837e-05, - "loss": 0.8046, + "epoch": 0.027736712425202394, + "grad_norm": 4.090682506561279, + "learning_rate": 1.2918032786885246e-05, + "loss": 0.8782, "step": 394 }, { - "epoch": 0.029889145321781243, - "grad_norm": 5.141930103302002, - "learning_rate": 1.075642965204236e-05, - "loss": 0.9606, + "epoch": 0.027807110172474482, + "grad_norm": 3.303088903427124, + "learning_rate": 1.2950819672131147e-05, + "loss": 0.9747, "step": 395 }, { - "epoch": 0.029964814044114864, - "grad_norm": 9.92322826385498, - "learning_rate": 1.0783661119515884e-05, - "loss": 0.9988, + "epoch": 0.027877507919746566, + "grad_norm": 3.6377437114715576, + "learning_rate": 1.298360655737705e-05, + "loss": 0.8018, "step": 396 }, { - "epoch": 0.03004048276644849, - "grad_norm": 5.097169399261475, - "learning_rate": 1.0810892586989409e-05, - "loss": 0.8988, + "epoch": 0.027947905667018654, + "grad_norm": 3.304927349090576, + "learning_rate": 1.301639344262295e-05, + "loss": 0.7309, "step": 397 }, { - "epoch": 0.030116151488782113, - "grad_norm": 4.876684665679932, - "learning_rate": 1.0838124054462936e-05, - "loss": 0.874, + "epoch": 0.028018303414290742, + "grad_norm": 3.411590337753296, + "learning_rate": 1.3049180327868853e-05, + "loss": 0.8462, "step": 398 }, { - "epoch": 0.030191820211115734, - "grad_norm": 4.846562385559082, - "learning_rate": 1.086535552193646e-05, - "loss": 0.8658, + "epoch": 0.02808870116156283, + "grad_norm": 2.687112331390381, + "learning_rate": 1.3081967213114754e-05, + "loss": 0.8583, "step": 399 }, { - "epoch": 0.030267488933449358, - "grad_norm": 5.538702011108398, - "learning_rate": 1.0892586989409985e-05, - "loss": 1.0144, + "epoch": 0.028159098908834918, + "grad_norm": 3.0755984783172607, + "learning_rate": 1.3114754098360657e-05, + "loss": 0.8903, "step": 400 }, { - "epoch": 0.030343157655782983, - "grad_norm": 4.698038578033447, - "learning_rate": 1.0919818456883511e-05, - "loss": 0.9173, + "epoch": 0.028229496656107005, + "grad_norm": 4.091688632965088, + "learning_rate": 1.3147540983606558e-05, + "loss": 0.8205, "step": 401 }, { - "epoch": 0.030418826378116607, - "grad_norm": 6.059201717376709, - "learning_rate": 1.0947049924357034e-05, - "loss": 0.7875, + "epoch": 0.028299894403379093, + "grad_norm": 3.68048095703125, + "learning_rate": 1.3180327868852459e-05, + "loss": 0.9744, "step": 402 }, { - "epoch": 0.030494495100450228, - "grad_norm": 6.118393421173096, - "learning_rate": 1.0974281391830559e-05, - "loss": 0.9391, + "epoch": 0.02837029215065118, + "grad_norm": 3.4734582901000977, + "learning_rate": 1.3213114754098361e-05, + "loss": 0.7506, "step": 403 }, { - "epoch": 0.030570163822783852, - "grad_norm": 4.088007926940918, - "learning_rate": 1.1001512859304084e-05, - "loss": 0.7948, + "epoch": 0.028440689897923265, + "grad_norm": 4.7491631507873535, + "learning_rate": 1.3245901639344262e-05, + "loss": 0.9155, "step": 404 }, { - "epoch": 0.030645832545117477, - "grad_norm": 4.4451799392700195, - "learning_rate": 1.102874432677761e-05, - "loss": 0.9371, + "epoch": 0.028511087645195353, + "grad_norm": 2.956101417541504, + "learning_rate": 1.3278688524590165e-05, + "loss": 0.8446, "step": 405 }, { - "epoch": 0.030721501267451098, - "grad_norm": 4.529284477233887, - "learning_rate": 1.1055975794251135e-05, - "loss": 0.9817, + "epoch": 0.02858148539246744, + "grad_norm": 4.001038074493408, + "learning_rate": 1.3311475409836064e-05, + "loss": 0.9084, "step": 406 }, { - "epoch": 0.030797169989784722, - "grad_norm": 7.541872978210449, - "learning_rate": 1.108320726172466e-05, - "loss": 0.8889, + "epoch": 0.02865188313973953, + "grad_norm": 2.7428414821624756, + "learning_rate": 1.3344262295081967e-05, + "loss": 0.8285, "step": 407 }, { - "epoch": 0.030872838712118347, - "grad_norm": 3.850817918777466, - "learning_rate": 1.1110438729198186e-05, - "loss": 0.8753, + "epoch": 0.028722280887011616, + "grad_norm": 2.962327480316162, + "learning_rate": 1.3377049180327868e-05, + "loss": 0.7576, "step": 408 }, { - "epoch": 0.030948507434451968, - "grad_norm": 3.5445756912231445, - "learning_rate": 1.113767019667171e-05, - "loss": 0.8393, + "epoch": 0.028792678634283704, + "grad_norm": 2.857193946838379, + "learning_rate": 1.3409836065573769e-05, + "loss": 0.8189, "step": 409 }, { - "epoch": 0.031024176156785592, - "grad_norm": 5.169709205627441, - "learning_rate": 1.1164901664145234e-05, - "loss": 0.9982, + "epoch": 0.028863076381555792, + "grad_norm": 2.9832582473754883, + "learning_rate": 1.3442622950819672e-05, + "loss": 0.8578, "step": 410 }, { - "epoch": 0.031099844879119216, - "grad_norm": 3.5694003105163574, - "learning_rate": 1.1192133131618758e-05, - "loss": 0.7693, + "epoch": 0.028933474128827876, + "grad_norm": 3.54685115814209, + "learning_rate": 1.3475409836065573e-05, + "loss": 0.8826, "step": 411 }, { - "epoch": 0.03117551360145284, - "grad_norm": 5.7016921043396, - "learning_rate": 1.1219364599092285e-05, - "loss": 0.8983, + "epoch": 0.029003871876099964, + "grad_norm": 3.376476287841797, + "learning_rate": 1.3508196721311475e-05, + "loss": 0.8494, "step": 412 }, { - "epoch": 0.031251182323786465, - "grad_norm": 5.174305438995361, - "learning_rate": 1.124659606656581e-05, - "loss": 0.9126, + "epoch": 0.02907426962337205, + "grad_norm": 2.993804931640625, + "learning_rate": 1.3540983606557376e-05, + "loss": 0.8651, "step": 413 }, { - "epoch": 0.03132685104612009, - "grad_norm": 4.78248929977417, - "learning_rate": 1.1273827534039334e-05, - "loss": 1.0237, + "epoch": 0.02914466737064414, + "grad_norm": 3.983947277069092, + "learning_rate": 1.3573770491803279e-05, + "loss": 0.786, "step": 414 }, { - "epoch": 0.03140251976845371, - "grad_norm": 4.276739120483398, - "learning_rate": 1.130105900151286e-05, - "loss": 0.953, + "epoch": 0.029215065117916227, + "grad_norm": 3.155109167098999, + "learning_rate": 1.360655737704918e-05, + "loss": 0.8423, "step": 415 }, { - "epoch": 0.03147818849078733, - "grad_norm": 5.136653900146484, - "learning_rate": 1.1328290468986385e-05, - "loss": 0.8138, + "epoch": 0.029285462865188315, + "grad_norm": 2.866457223892212, + "learning_rate": 1.363934426229508e-05, + "loss": 0.8535, "step": 416 }, { - "epoch": 0.031553857213120956, - "grad_norm": 3.566028356552124, - "learning_rate": 1.1355521936459908e-05, - "loss": 1.0327, + "epoch": 0.029355860612460403, + "grad_norm": 3.9705259799957275, + "learning_rate": 1.3672131147540983e-05, + "loss": 0.8879, "step": 417 }, { - "epoch": 0.03162952593545458, - "grad_norm": 3.272423267364502, - "learning_rate": 1.1382753403933433e-05, - "loss": 1.0262, + "epoch": 0.029426258359732487, + "grad_norm": 2.7610883712768555, + "learning_rate": 1.3704918032786884e-05, + "loss": 0.8251, "step": 418 }, { - "epoch": 0.031705194657788205, - "grad_norm": 4.595939636230469, - "learning_rate": 1.140998487140696e-05, - "loss": 0.8515, + "epoch": 0.029496656107004575, + "grad_norm": 4.710381984710693, + "learning_rate": 1.3737704918032787e-05, + "loss": 0.8721, "step": 419 }, { - "epoch": 0.03178086338012183, - "grad_norm": 3.546163320541382, - "learning_rate": 1.1437216338880484e-05, - "loss": 0.7262, + "epoch": 0.029567053854276663, + "grad_norm": 3.6030678749084473, + "learning_rate": 1.3770491803278688e-05, + "loss": 0.8505, "step": 420 }, { - "epoch": 0.03185653210245545, - "grad_norm": 4.943700313568115, - "learning_rate": 1.1464447806354009e-05, - "loss": 0.9381, + "epoch": 0.02963745160154875, + "grad_norm": 4.483442783355713, + "learning_rate": 1.3803278688524589e-05, + "loss": 0.974, "step": 421 }, { - "epoch": 0.03193220082478907, - "grad_norm": 5.774724960327148, - "learning_rate": 1.1491679273827535e-05, - "loss": 0.9081, + "epoch": 0.029707849348820838, + "grad_norm": 2.9787371158599854, + "learning_rate": 1.3836065573770492e-05, + "loss": 0.8912, "step": 422 }, { - "epoch": 0.032007869547122696, - "grad_norm": 4.097910404205322, - "learning_rate": 1.151891074130106e-05, - "loss": 0.8876, + "epoch": 0.029778247096092926, + "grad_norm": 10.03062629699707, + "learning_rate": 1.3868852459016393e-05, + "loss": 0.8466, "step": 423 }, { - "epoch": 0.03208353826945632, - "grad_norm": 4.992226600646973, - "learning_rate": 1.1546142208774585e-05, - "loss": 0.8628, + "epoch": 0.029848644843365014, + "grad_norm": 3.2609753608703613, + "learning_rate": 1.3901639344262295e-05, + "loss": 1.019, "step": 424 }, { - "epoch": 0.032159206991789945, - "grad_norm": 4.852366924285889, - "learning_rate": 1.1573373676248108e-05, - "loss": 0.9476, + "epoch": 0.029919042590637098, + "grad_norm": 3.382953405380249, + "learning_rate": 1.3934426229508196e-05, + "loss": 0.7309, "step": 425 }, { - "epoch": 0.03223487571412357, - "grad_norm": 5.32084321975708, - "learning_rate": 1.1600605143721634e-05, - "loss": 0.8845, + "epoch": 0.029989440337909186, + "grad_norm": 3.458962917327881, + "learning_rate": 1.3967213114754099e-05, + "loss": 0.7334, "step": 426 }, { - "epoch": 0.03231054443645719, - "grad_norm": 5.613223552703857, - "learning_rate": 1.1627836611195159e-05, - "loss": 0.756, + "epoch": 0.030059838085181274, + "grad_norm": 2.909278392791748, + "learning_rate": 1.4e-05, + "loss": 0.8875, "step": 427 }, { - "epoch": 0.03238621315879081, - "grad_norm": 4.22434139251709, - "learning_rate": 1.1655068078668683e-05, - "loss": 0.9494, + "epoch": 0.03013023583245336, + "grad_norm": 2.987887144088745, + "learning_rate": 1.3999999818031829e-05, + "loss": 0.8224, "step": 428 }, { - "epoch": 0.032461881881124435, - "grad_norm": 4.021113395690918, - "learning_rate": 1.168229954614221e-05, - "loss": 0.7561, + "epoch": 0.03020063357972545, + "grad_norm": 4.058437824249268, + "learning_rate": 1.399999927212733e-05, + "loss": 0.8063, "step": 429 }, { - "epoch": 0.03253755060345806, - "grad_norm": 4.726623058319092, - "learning_rate": 1.1709531013615735e-05, - "loss": 0.9944, + "epoch": 0.030271031326997537, + "grad_norm": 3.4275295734405518, + "learning_rate": 1.399999836228653e-05, + "loss": 0.7084, "step": 430 }, { - "epoch": 0.032613219325791684, - "grad_norm": 5.192655563354492, - "learning_rate": 1.173676248108926e-05, - "loss": 0.9211, + "epoch": 0.030341429074269625, + "grad_norm": 2.714033842086792, + "learning_rate": 1.3999997088509473e-05, + "loss": 0.9475, "step": 431 }, { - "epoch": 0.03268888804812531, - "grad_norm": 3.9181580543518066, - "learning_rate": 1.1763993948562782e-05, - "loss": 0.9154, + "epoch": 0.03041182682154171, + "grad_norm": 3.0211703777313232, + "learning_rate": 1.3999995450796226e-05, + "loss": 0.9334, "step": 432 }, { - "epoch": 0.03276455677045893, - "grad_norm": 6.171100616455078, - "learning_rate": 1.1791225416036309e-05, - "loss": 0.9509, + "epoch": 0.030482224568813797, + "grad_norm": 3.760977268218994, + "learning_rate": 1.399999344914688e-05, + "loss": 0.9017, "step": 433 }, { - "epoch": 0.03284022549279256, - "grad_norm": 3.8820559978485107, - "learning_rate": 1.1818456883509833e-05, - "loss": 1.0268, + "epoch": 0.030552622316085885, + "grad_norm": 3.030567169189453, + "learning_rate": 1.3999991083561535e-05, + "loss": 0.8015, "step": 434 }, { - "epoch": 0.032915894215126175, - "grad_norm": 4.744935512542725, - "learning_rate": 1.1845688350983358e-05, - "loss": 0.9251, + "epoch": 0.030623020063357972, + "grad_norm": 2.8472259044647217, + "learning_rate": 1.399998835404031e-05, + "loss": 0.9097, "step": 435 }, { - "epoch": 0.0329915629374598, - "grad_norm": 3.204756736755371, - "learning_rate": 1.1872919818456884e-05, - "loss": 0.8481, + "epoch": 0.03069341781063006, + "grad_norm": 3.631469249725342, + "learning_rate": 1.3999985260583356e-05, + "loss": 0.9332, "step": 436 }, { - "epoch": 0.033067231659793424, - "grad_norm": 4.440789699554443, - "learning_rate": 1.190015128593041e-05, - "loss": 1.072, + "epoch": 0.030763815557902148, + "grad_norm": 3.273707151412964, + "learning_rate": 1.3999981803190826e-05, + "loss": 0.8974, "step": 437 }, { - "epoch": 0.03314290038212705, - "grad_norm": 4.594890594482422, - "learning_rate": 1.1927382753403934e-05, - "loss": 0.8416, + "epoch": 0.030834213305174236, + "grad_norm": 3.247506618499756, + "learning_rate": 1.3999977981862903e-05, + "loss": 0.8162, "step": 438 }, { - "epoch": 0.03321856910446067, - "grad_norm": 8.748790740966797, - "learning_rate": 1.195461422087746e-05, - "loss": 0.7003, + "epoch": 0.03090461105244632, + "grad_norm": 2.9269092082977295, + "learning_rate": 1.3999973796599787e-05, + "loss": 0.7554, "step": 439 }, { - "epoch": 0.0332942378267943, - "grad_norm": 6.574450969696045, - "learning_rate": 1.1981845688350983e-05, - "loss": 0.9899, + "epoch": 0.030975008799718408, + "grad_norm": 4.147475719451904, + "learning_rate": 1.399996924740169e-05, + "loss": 0.7695, "step": 440 }, { - "epoch": 0.033369906549127915, - "grad_norm": 3.3959763050079346, - "learning_rate": 1.2009077155824508e-05, - "loss": 0.8483, + "epoch": 0.031045406546990496, + "grad_norm": 2.865320920944214, + "learning_rate": 1.3999964334268854e-05, + "loss": 0.7519, "step": 441 }, { - "epoch": 0.03344557527146154, - "grad_norm": 9.578702926635742, - "learning_rate": 1.2036308623298033e-05, - "loss": 0.6854, + "epoch": 0.031115804294262583, + "grad_norm": 3.071187973022461, + "learning_rate": 1.3999959057201534e-05, + "loss": 0.8914, "step": 442 }, { - "epoch": 0.03352124399379516, - "grad_norm": 10.351158142089844, - "learning_rate": 1.2063540090771559e-05, - "loss": 0.9046, + "epoch": 0.03118620204153467, + "grad_norm": 3.1855711936950684, + "learning_rate": 1.39999534162e-05, + "loss": 0.836, "step": 443 }, { - "epoch": 0.03359691271612879, - "grad_norm": 3.339411497116089, - "learning_rate": 1.2090771558245084e-05, - "loss": 0.8759, + "epoch": 0.03125659978880676, + "grad_norm": 3.908602714538574, + "learning_rate": 1.3999947411264552e-05, + "loss": 0.7964, "step": 444 }, { - "epoch": 0.03367258143846241, - "grad_norm": 3.9380717277526855, - "learning_rate": 1.2118003025718608e-05, - "loss": 0.869, + "epoch": 0.03132699753607884, + "grad_norm": 2.958383798599243, + "learning_rate": 1.3999941042395497e-05, + "loss": 0.7704, "step": 445 }, { - "epoch": 0.03374825016079604, - "grad_norm": 3.6196255683898926, - "learning_rate": 1.2145234493192135e-05, - "loss": 0.853, + "epoch": 0.031397395283350935, + "grad_norm": 3.568150043487549, + "learning_rate": 1.3999934309593166e-05, + "loss": 0.852, "step": 446 }, { - "epoch": 0.03382391888312966, - "grad_norm": 3.6324236392974854, - "learning_rate": 1.2172465960665658e-05, - "loss": 0.9892, + "epoch": 0.03146779303062302, + "grad_norm": 3.233139991760254, + "learning_rate": 1.3999927212857913e-05, + "loss": 0.6286, "step": 447 }, { - "epoch": 0.03389958760546328, - "grad_norm": 4.3121185302734375, - "learning_rate": 1.2199697428139183e-05, - "loss": 0.8917, + "epoch": 0.03153819077789511, + "grad_norm": 3.2236971855163574, + "learning_rate": 1.3999919752190102e-05, + "loss": 0.8857, "step": 448 }, { - "epoch": 0.0339752563277969, - "grad_norm": 6.199253559112549, - "learning_rate": 1.2226928895612707e-05, - "loss": 0.8886, + "epoch": 0.031608588525167194, + "grad_norm": 3.905930995941162, + "learning_rate": 1.3999911927590125e-05, + "loss": 0.8415, "step": 449 }, { - "epoch": 0.03405092505013053, - "grad_norm": 5.536099433898926, - "learning_rate": 1.2254160363086234e-05, - "loss": 0.8908, + "epoch": 0.03167898627243928, + "grad_norm": 2.910520076751709, + "learning_rate": 1.3999903739058389e-05, + "loss": 0.9067, "step": 450 }, { - "epoch": 0.03412659377246415, - "grad_norm": 4.678923606872559, - "learning_rate": 1.2281391830559758e-05, - "loss": 1.0336, + "epoch": 0.03174938401971137, + "grad_norm": 2.934964895248413, + "learning_rate": 1.3999895186595318e-05, + "loss": 0.7156, "step": 451 }, { - "epoch": 0.034202262494797776, - "grad_norm": 5.405990123748779, - "learning_rate": 1.2308623298033283e-05, - "loss": 0.8555, + "epoch": 0.031819781766983454, + "grad_norm": 2.808274030685425, + "learning_rate": 1.3999886270201355e-05, + "loss": 0.8309, "step": 452 }, { - "epoch": 0.0342779312171314, - "grad_norm": 5.1637749671936035, - "learning_rate": 1.233585476550681e-05, - "loss": 0.8824, + "epoch": 0.031890179514255546, + "grad_norm": 4.40629243850708, + "learning_rate": 1.3999876989876965e-05, + "loss": 0.6986, "step": 453 }, { - "epoch": 0.034353599939465025, - "grad_norm": 8.398664474487305, - "learning_rate": 1.2363086232980334e-05, - "loss": 0.993, + "epoch": 0.03196057726152763, + "grad_norm": 7.634444713592529, + "learning_rate": 1.3999867345622634e-05, + "loss": 0.9843, "step": 454 }, { - "epoch": 0.03442926866179864, - "grad_norm": 4.705966472625732, - "learning_rate": 1.2390317700453857e-05, - "loss": 0.9114, + "epoch": 0.03203097500879972, + "grad_norm": 2.8840482234954834, + "learning_rate": 1.3999857337438856e-05, + "loss": 0.8994, "step": 455 }, { - "epoch": 0.03450493738413227, - "grad_norm": 4.132481575012207, - "learning_rate": 1.2417549167927382e-05, - "loss": 1.108, + "epoch": 0.032101372756071805, + "grad_norm": 3.249030113220215, + "learning_rate": 1.3999846965326159e-05, + "loss": 0.8732, "step": 456 }, { - "epoch": 0.03458060610646589, - "grad_norm": 6.405203342437744, - "learning_rate": 1.2444780635400908e-05, - "loss": 1.0141, + "epoch": 0.03217177050334389, + "grad_norm": 2.768885374069214, + "learning_rate": 1.3999836229285078e-05, + "loss": 0.7642, "step": 457 }, { - "epoch": 0.034656274828799516, - "grad_norm": 4.2582597732543945, - "learning_rate": 1.2472012102874433e-05, - "loss": 0.8596, + "epoch": 0.03224216825061598, + "grad_norm": 2.7615644931793213, + "learning_rate": 1.399982512931617e-05, + "loss": 0.8237, "step": 458 }, { - "epoch": 0.03473194355113314, - "grad_norm": 4.002652168273926, - "learning_rate": 1.2499243570347958e-05, - "loss": 0.9137, + "epoch": 0.032312565997888065, + "grad_norm": 7.379369258880615, + "learning_rate": 1.3999813665420017e-05, + "loss": 0.9239, "step": 459 }, { - "epoch": 0.034807612273466765, - "grad_norm": 4.454577445983887, - "learning_rate": 1.2526475037821484e-05, - "loss": 0.8705, + "epoch": 0.032382963745160157, + "grad_norm": 2.8263051509857178, + "learning_rate": 1.399980183759721e-05, + "loss": 0.7436, "step": 460 }, { - "epoch": 0.03488328099580039, - "grad_norm": 4.909870147705078, - "learning_rate": 1.2553706505295009e-05, - "loss": 1.1714, + "epoch": 0.03245336149243224, + "grad_norm": 3.3926925659179688, + "learning_rate": 1.3999789645848368e-05, + "loss": 0.8874, "step": 461 }, { - "epoch": 0.03495894971813401, - "grad_norm": 7.202528953552246, - "learning_rate": 1.2580937972768532e-05, - "loss": 0.9013, + "epoch": 0.03252375923970433, + "grad_norm": 4.752262115478516, + "learning_rate": 1.3999777090174121e-05, + "loss": 0.8465, "step": 462 }, { - "epoch": 0.03503461844046763, - "grad_norm": 4.110122203826904, - "learning_rate": 1.2608169440242057e-05, - "loss": 0.8812, + "epoch": 0.032594156986976416, + "grad_norm": 2.965500593185425, + "learning_rate": 1.3999764170575125e-05, + "loss": 0.894, "step": 463 }, { - "epoch": 0.035110287162801256, - "grad_norm": 3.5475730895996094, - "learning_rate": 1.2635400907715583e-05, - "loss": 0.9704, + "epoch": 0.0326645547342485, + "grad_norm": 3.2397937774658203, + "learning_rate": 1.399975088705205e-05, + "loss": 0.8432, "step": 464 }, { - "epoch": 0.03518595588513488, - "grad_norm": 3.4889214038848877, - "learning_rate": 1.2662632375189108e-05, - "loss": 0.9656, + "epoch": 0.03273495248152059, + "grad_norm": 3.501350164413452, + "learning_rate": 1.3999737239605587e-05, + "loss": 0.8014, "step": 465 }, { - "epoch": 0.035261624607468504, - "grad_norm": 3.9124395847320557, - "learning_rate": 1.2689863842662632e-05, - "loss": 0.7515, + "epoch": 0.032805350228792676, + "grad_norm": 2.5790843963623047, + "learning_rate": 1.3999723228236445e-05, + "loss": 0.7864, "step": 466 }, { - "epoch": 0.03533729332980213, - "grad_norm": 6.498013496398926, - "learning_rate": 1.2717095310136159e-05, - "loss": 0.8926, + "epoch": 0.03287574797606477, + "grad_norm": 3.581630229949951, + "learning_rate": 1.3999708852945353e-05, + "loss": 0.7955, "step": 467 }, { - "epoch": 0.035412962052135746, - "grad_norm": 3.9321653842926025, - "learning_rate": 1.2744326777609683e-05, - "loss": 0.7346, + "epoch": 0.03294614572333685, + "grad_norm": 3.182894229888916, + "learning_rate": 1.399969411373306e-05, + "loss": 0.8419, "step": 468 }, { - "epoch": 0.03548863077446937, - "grad_norm": 5.16299295425415, - "learning_rate": 1.2771558245083208e-05, - "loss": 0.8197, + "epoch": 0.03301654347060894, + "grad_norm": 2.7621166706085205, + "learning_rate": 1.399967901060033e-05, + "loss": 0.8543, "step": 469 }, { - "epoch": 0.035564299496802995, - "grad_norm": 4.675112247467041, - "learning_rate": 1.2798789712556731e-05, - "loss": 0.8971, + "epoch": 0.03308694121788103, + "grad_norm": 3.957764148712158, + "learning_rate": 1.3999663543547949e-05, + "loss": 0.8113, "step": 470 }, { - "epoch": 0.03563996821913662, - "grad_norm": 2.9948925971984863, - "learning_rate": 1.2826021180030258e-05, - "loss": 0.8531, + "epoch": 0.03315733896515311, + "grad_norm": 3.3380677700042725, + "learning_rate": 1.399964771257672e-05, + "loss": 0.8188, "step": 471 }, { - "epoch": 0.035715636941470244, - "grad_norm": 4.1595354080200195, - "learning_rate": 1.2853252647503782e-05, - "loss": 0.8913, + "epoch": 0.0332277367124252, + "grad_norm": 3.0422704219818115, + "learning_rate": 1.3999631517687468e-05, + "loss": 0.9065, "step": 472 }, { - "epoch": 0.03579130566380387, - "grad_norm": 2.9230337142944336, - "learning_rate": 1.2880484114977307e-05, - "loss": 1.1192, + "epoch": 0.03329813445969729, + "grad_norm": 3.3505020141601562, + "learning_rate": 1.3999614958881036e-05, + "loss": 1.0004, "step": 473 }, { - "epoch": 0.03586697438613749, - "grad_norm": 6.0981831550598145, - "learning_rate": 1.2907715582450833e-05, - "loss": 0.8437, + "epoch": 0.03336853220696938, + "grad_norm": 3.649298906326294, + "learning_rate": 1.399959803615828e-05, + "loss": 0.7714, "step": 474 }, { - "epoch": 0.03594264310847111, - "grad_norm": 6.705804824829102, - "learning_rate": 1.2934947049924358e-05, - "loss": 0.9358, + "epoch": 0.03343892995424146, + "grad_norm": 3.2996115684509277, + "learning_rate": 1.3999580749520084e-05, + "loss": 0.8265, "step": 475 }, { - "epoch": 0.036018311830804735, - "grad_norm": 3.546778440475464, - "learning_rate": 1.2962178517397883e-05, - "loss": 1.0628, + "epoch": 0.033509327701513554, + "grad_norm": 2.788752555847168, + "learning_rate": 1.3999563098967344e-05, + "loss": 0.8221, "step": 476 }, { - "epoch": 0.03609398055313836, - "grad_norm": 3.6508278846740723, - "learning_rate": 1.2989409984871406e-05, - "loss": 0.8347, + "epoch": 0.03357972544878564, + "grad_norm": 3.599248170852661, + "learning_rate": 1.3999545084500983e-05, + "loss": 0.8316, "step": 477 }, { - "epoch": 0.036169649275471984, - "grad_norm": 5.718278884887695, - "learning_rate": 1.3016641452344932e-05, - "loss": 0.9513, + "epoch": 0.03365012319605773, + "grad_norm": 2.5447847843170166, + "learning_rate": 1.3999526706121932e-05, + "loss": 0.7904, "step": 478 }, { - "epoch": 0.03624531799780561, - "grad_norm": 9.246580123901367, - "learning_rate": 1.3043872919818457e-05, - "loss": 0.9638, + "epoch": 0.033720520943329814, + "grad_norm": 2.77638578414917, + "learning_rate": 1.3999507963831148e-05, + "loss": 0.9359, "step": 479 }, { - "epoch": 0.03632098672013923, - "grad_norm": 5.03000545501709, - "learning_rate": 1.3071104387291982e-05, - "loss": 0.9532, + "epoch": 0.0337909186906019, + "grad_norm": 2.4172489643096924, + "learning_rate": 1.3999488857629606e-05, + "loss": 0.8059, "step": 480 }, { - "epoch": 0.03639665544247286, - "grad_norm": 4.656915187835693, - "learning_rate": 1.3098335854765508e-05, - "loss": 0.9497, + "epoch": 0.03386131643787399, + "grad_norm": 7.59112024307251, + "learning_rate": 1.3999469387518299e-05, + "loss": 0.788, "step": 481 }, { - "epoch": 0.036472324164806474, - "grad_norm": 4.1055731773376465, - "learning_rate": 1.3125567322239033e-05, - "loss": 0.782, + "epoch": 0.033931714185146074, + "grad_norm": 3.0883567333221436, + "learning_rate": 1.3999449553498239e-05, + "loss": 0.8457, "step": 482 }, { - "epoch": 0.0365479928871401, - "grad_norm": 3.7302215099334717, - "learning_rate": 1.3152798789712557e-05, - "loss": 0.8432, + "epoch": 0.034002111932418165, + "grad_norm": 3.146864414215088, + "learning_rate": 1.399942935557046e-05, + "loss": 0.745, "step": 483 }, { - "epoch": 0.03662366160947372, - "grad_norm": 4.635787487030029, - "learning_rate": 1.3180030257186082e-05, - "loss": 0.857, + "epoch": 0.03407250967969025, + "grad_norm": 6.104170799255371, + "learning_rate": 1.3999408793736007e-05, + "loss": 0.8233, "step": 484 }, { - "epoch": 0.03669933033180735, - "grad_norm": 4.681149482727051, - "learning_rate": 1.3207261724659607e-05, - "loss": 0.7984, + "epoch": 0.03414290742696234, + "grad_norm": 3.1644911766052246, + "learning_rate": 1.3999387867995954e-05, + "loss": 0.9187, "step": 485 }, { - "epoch": 0.03677499905414097, - "grad_norm": 6.412924289703369, - "learning_rate": 1.3234493192133132e-05, - "loss": 0.8818, + "epoch": 0.034213305174234425, + "grad_norm": 3.0731852054595947, + "learning_rate": 1.3999366578351386e-05, + "loss": 0.8721, "step": 486 }, { - "epoch": 0.0368506677764746, - "grad_norm": 3.8682901859283447, - "learning_rate": 1.3261724659606656e-05, - "loss": 0.7964, + "epoch": 0.03428370292150651, + "grad_norm": 3.164170742034912, + "learning_rate": 1.399934492480341e-05, + "loss": 0.8133, "step": 487 }, { - "epoch": 0.03692633649880822, - "grad_norm": 5.080738544464111, - "learning_rate": 1.3288956127080183e-05, - "loss": 0.8436, + "epoch": 0.0343541006687786, + "grad_norm": 3.7331066131591797, + "learning_rate": 1.3999322907353152e-05, + "loss": 0.8101, "step": 488 }, { - "epoch": 0.03700200522114184, - "grad_norm": 3.049335241317749, - "learning_rate": 1.3316187594553707e-05, - "loss": 0.9249, + "epoch": 0.034424498416050685, + "grad_norm": 3.648189067840576, + "learning_rate": 1.3999300526001758e-05, + "loss": 0.9061, "step": 489 }, { - "epoch": 0.03707767394347546, - "grad_norm": 4.670293807983398, - "learning_rate": 1.3343419062027232e-05, - "loss": 0.729, + "epoch": 0.034494896163322776, + "grad_norm": 3.298630714416504, + "learning_rate": 1.399927778075039e-05, + "loss": 0.824, "step": 490 }, { - "epoch": 0.03715334266580909, - "grad_norm": 4.936186790466309, - "learning_rate": 1.3370650529500757e-05, - "loss": 0.8145, + "epoch": 0.03456529391059486, + "grad_norm": 2.7077085971832275, + "learning_rate": 1.399925467160023e-05, + "loss": 0.7211, "step": 491 }, { - "epoch": 0.03722901138814271, - "grad_norm": 2.468773365020752, - "learning_rate": 1.3397881996974281e-05, - "loss": 1.0231, + "epoch": 0.03463569165786695, + "grad_norm": 3.1308085918426514, + "learning_rate": 1.3999231198552484e-05, + "loss": 0.8887, "step": 492 }, { - "epoch": 0.037304680110476336, - "grad_norm": 3.9304311275482178, - "learning_rate": 1.3425113464447806e-05, - "loss": 0.9577, + "epoch": 0.034706089405139036, + "grad_norm": 3.9145584106445312, + "learning_rate": 1.399920736160837e-05, + "loss": 0.8735, "step": 493 }, { - "epoch": 0.03738034883280996, - "grad_norm": 3.941254138946533, - "learning_rate": 1.345234493192133e-05, - "loss": 0.8663, + "epoch": 0.03477648715241112, + "grad_norm": 3.065737009048462, + "learning_rate": 1.3999183160769123e-05, + "loss": 0.842, "step": 494 }, { - "epoch": 0.03745601755514358, - "grad_norm": 3.897300958633423, - "learning_rate": 1.3479576399394857e-05, - "loss": 0.9065, + "epoch": 0.03484688489968321, + "grad_norm": 2.5276222229003906, + "learning_rate": 1.3999158596036004e-05, + "loss": 0.9675, "step": 495 }, { - "epoch": 0.0375316862774772, - "grad_norm": 5.854770183563232, - "learning_rate": 1.3506807866868382e-05, - "loss": 0.8533, + "epoch": 0.034917282646955296, + "grad_norm": 3.0536723136901855, + "learning_rate": 1.3999133667410293e-05, + "loss": 0.7658, "step": 496 }, { - "epoch": 0.03760735499981083, - "grad_norm": 5.508477210998535, - "learning_rate": 1.3534039334341907e-05, - "loss": 0.7829, + "epoch": 0.03498768039422739, + "grad_norm": 3.9542236328125, + "learning_rate": 1.3999108374893284e-05, + "loss": 0.7336, "step": 497 }, { - "epoch": 0.03768302372214445, - "grad_norm": 3.432650566101074, - "learning_rate": 1.3561270801815431e-05, - "loss": 0.9835, + "epoch": 0.03505807814149947, + "grad_norm": 6.914444923400879, + "learning_rate": 1.399908271848629e-05, + "loss": 0.7613, "step": 498 }, { - "epoch": 0.037758692444478076, - "grad_norm": 4.13020133972168, - "learning_rate": 1.3588502269288958e-05, - "loss": 0.9426, + "epoch": 0.03512847588877156, + "grad_norm": 3.5489625930786133, + "learning_rate": 1.3999056698190648e-05, + "loss": 0.8326, "step": 499 }, { - "epoch": 0.0378343611668117, - "grad_norm": 3.107402801513672, - "learning_rate": 1.361573373676248e-05, - "loss": 0.7618, + "epoch": 0.03519887363604365, + "grad_norm": 3.487492084503174, + "learning_rate": 1.399903031400771e-05, + "loss": 0.8483, "step": 500 }, { - "epoch": 0.037910029889145325, - "grad_norm": 6.790006637573242, - "learning_rate": 1.3642965204236005e-05, - "loss": 1.0121, + "epoch": 0.03526927138331573, + "grad_norm": 3.68188738822937, + "learning_rate": 1.3999003565938846e-05, + "loss": 0.8374, "step": 501 }, { - "epoch": 0.03798569861147894, - "grad_norm": 4.519580841064453, - "learning_rate": 1.3670196671709532e-05, - "loss": 0.887, + "epoch": 0.03533966913058782, + "grad_norm": 3.024183750152588, + "learning_rate": 1.3998976453985448e-05, + "loss": 0.917, "step": 502 }, { - "epoch": 0.03806136733381257, - "grad_norm": 4.3927903175354, - "learning_rate": 1.3697428139183057e-05, - "loss": 0.848, + "epoch": 0.03541006687785991, + "grad_norm": 3.220881462097168, + "learning_rate": 1.3998948978148927e-05, + "loss": 0.9516, "step": 503 }, { - "epoch": 0.03813703605614619, - "grad_norm": 4.329632759094238, - "learning_rate": 1.3724659606656581e-05, - "loss": 0.8182, + "epoch": 0.035480464625132, + "grad_norm": 3.5055460929870605, + "learning_rate": 1.3998921138430708e-05, + "loss": 0.8359, "step": 504 }, { - "epoch": 0.038212704778479815, - "grad_norm": 5.273471355438232, - "learning_rate": 1.3751891074130106e-05, - "loss": 0.9141, + "epoch": 0.03555086237240408, + "grad_norm": 3.837888717651367, + "learning_rate": 1.3998892934832241e-05, + "loss": 0.8461, "step": 505 }, { - "epoch": 0.03828837350081344, - "grad_norm": 3.8324403762817383, - "learning_rate": 1.3779122541603632e-05, - "loss": 0.9766, + "epoch": 0.035621260119676174, + "grad_norm": 3.272800922393799, + "learning_rate": 1.3998864367354991e-05, + "loss": 0.8408, "step": 506 }, { - "epoch": 0.038364042223147064, - "grad_norm": 3.876749038696289, - "learning_rate": 1.3806354009077157e-05, - "loss": 0.8484, + "epoch": 0.03569165786694826, + "grad_norm": 5.356675624847412, + "learning_rate": 1.3998835436000448e-05, + "loss": 0.8245, "step": 507 }, { - "epoch": 0.03843971094548069, - "grad_norm": 4.648043155670166, - "learning_rate": 1.383358547655068e-05, - "loss": 0.9291, + "epoch": 0.03576205561422034, + "grad_norm": 2.628054618835449, + "learning_rate": 1.3998806140770108e-05, + "loss": 0.887, "step": 508 }, { - "epoch": 0.038515379667814306, - "grad_norm": 4.072823524475098, - "learning_rate": 1.3860816944024205e-05, - "loss": 1.0211, + "epoch": 0.03583245336149243, + "grad_norm": 3.076587677001953, + "learning_rate": 1.39987764816655e-05, + "loss": 0.8769, "step": 509 }, { - "epoch": 0.03859104839014793, - "grad_norm": 7.409148216247559, - "learning_rate": 1.3888048411497731e-05, - "loss": 0.9665, + "epoch": 0.03590285110876452, + "grad_norm": 3.016392707824707, + "learning_rate": 1.3998746458688163e-05, + "loss": 0.9284, "step": 510 }, { - "epoch": 0.038666717112481555, - "grad_norm": 5.668654441833496, - "learning_rate": 1.3915279878971256e-05, - "loss": 0.8603, + "epoch": 0.03597324885603661, + "grad_norm": 2.8268473148345947, + "learning_rate": 1.3998716071839662e-05, + "loss": 0.8744, "step": 511 }, { - "epoch": 0.03874238583481518, - "grad_norm": 4.457876205444336, - "learning_rate": 1.394251134644478e-05, - "loss": 0.8055, + "epoch": 0.03604364660330869, + "grad_norm": 2.404001235961914, + "learning_rate": 1.399868532112157e-05, + "loss": 0.9969, "step": 512 }, { - "epoch": 0.038818054557148804, - "grad_norm": 4.217092514038086, - "learning_rate": 1.3969742813918307e-05, - "loss": 0.893, + "epoch": 0.036114044350580785, + "grad_norm": 3.0389187335968018, + "learning_rate": 1.3998654206535492e-05, + "loss": 0.9624, "step": 513 }, { - "epoch": 0.03889372327948243, - "grad_norm": 4.033523082733154, - "learning_rate": 1.3996974281391832e-05, - "loss": 0.8748, + "epoch": 0.03618444209785287, + "grad_norm": 3.059232473373413, + "learning_rate": 1.3998622728083044e-05, + "loss": 0.8563, "step": 514 }, { - "epoch": 0.03896939200181605, - "grad_norm": 3.2417023181915283, - "learning_rate": 1.4024205748865355e-05, - "loss": 0.7512, + "epoch": 0.03625483984512495, + "grad_norm": 2.895965099334717, + "learning_rate": 1.3998590885765861e-05, + "loss": 0.774, "step": 515 }, { - "epoch": 0.03904506072414967, - "grad_norm": 4.064194679260254, - "learning_rate": 1.405143721633888e-05, - "loss": 0.991, + "epoch": 0.036325237592397044, + "grad_norm": 3.0412559509277344, + "learning_rate": 1.39985586795856e-05, + "loss": 0.765, "step": 516 }, { - "epoch": 0.039120729446483295, - "grad_norm": 5.263235569000244, - "learning_rate": 1.4078668683812406e-05, - "loss": 0.8356, + "epoch": 0.03639563533966913, + "grad_norm": 3.577714681625366, + "learning_rate": 1.3998526109543935e-05, + "loss": 0.7684, "step": 517 }, { - "epoch": 0.03919639816881692, - "grad_norm": 3.2027482986450195, - "learning_rate": 1.410590015128593e-05, - "loss": 1.0551, + "epoch": 0.03646603308694122, + "grad_norm": 3.3580434322357178, + "learning_rate": 1.399849317564256e-05, + "loss": 0.8338, "step": 518 }, { - "epoch": 0.039272066891150544, - "grad_norm": 6.763327121734619, - "learning_rate": 1.4133131618759455e-05, - "loss": 0.8429, + "epoch": 0.036536430834213304, + "grad_norm": 3.4461493492126465, + "learning_rate": 1.3998459877883186e-05, + "loss": 0.7974, "step": 519 }, { - "epoch": 0.03934773561348417, - "grad_norm": 4.308533668518066, - "learning_rate": 1.4160363086232982e-05, - "loss": 0.8884, + "epoch": 0.036606828581485396, + "grad_norm": 2.9838409423828125, + "learning_rate": 1.3998426216267543e-05, + "loss": 0.7476, "step": 520 }, { - "epoch": 0.03942340433581779, - "grad_norm": 4.909972667694092, - "learning_rate": 1.4187594553706506e-05, - "loss": 0.7806, + "epoch": 0.03667722632875748, + "grad_norm": 3.0507543087005615, + "learning_rate": 1.3998392190797386e-05, + "loss": 0.9019, "step": 521 }, { - "epoch": 0.03949907305815141, - "grad_norm": 3.7141098976135254, - "learning_rate": 1.4214826021180031e-05, - "loss": 0.7759, + "epoch": 0.036747624076029564, + "grad_norm": 3.39325213432312, + "learning_rate": 1.3998357801474478e-05, + "loss": 0.8099, "step": 522 }, { - "epoch": 0.039574741780485034, - "grad_norm": 4.333841800689697, - "learning_rate": 1.4242057488653554e-05, - "loss": 0.9594, + "epoch": 0.036818021823301655, + "grad_norm": 3.2275614738464355, + "learning_rate": 1.3998323048300611e-05, + "loss": 0.833, "step": 523 }, { - "epoch": 0.03965041050281866, - "grad_norm": 7.9005866050720215, - "learning_rate": 1.426928895612708e-05, - "loss": 0.9144, + "epoch": 0.03688841957057374, + "grad_norm": 2.5024795532226562, + "learning_rate": 1.3998287931277591e-05, + "loss": 0.9081, "step": 524 }, { - "epoch": 0.03972607922515228, - "grad_norm": 4.86323881149292, - "learning_rate": 1.4296520423600605e-05, - "loss": 0.85, + "epoch": 0.03695881731784583, + "grad_norm": 2.9611589908599854, + "learning_rate": 1.3998252450407244e-05, + "loss": 0.8463, "step": 525 }, { - "epoch": 0.03980174794748591, - "grad_norm": 2.843881130218506, - "learning_rate": 1.432375189107413e-05, - "loss": 0.9132, + "epoch": 0.037029215065117915, + "grad_norm": 3.1987977027893066, + "learning_rate": 1.3998216605691412e-05, + "loss": 0.7821, "step": 526 }, { - "epoch": 0.03987741666981953, - "grad_norm": 3.5814990997314453, - "learning_rate": 1.4350983358547656e-05, - "loss": 0.8243, + "epoch": 0.037099612812390007, + "grad_norm": 3.1313233375549316, + "learning_rate": 1.3998180397131961e-05, + "loss": 0.8094, "step": 527 }, { - "epoch": 0.039953085392153156, - "grad_norm": 3.7590556144714355, - "learning_rate": 1.4378214826021181e-05, - "loss": 0.8779, + "epoch": 0.03717001055966209, + "grad_norm": 2.824742317199707, + "learning_rate": 1.3998143824730776e-05, + "loss": 0.8074, "step": 528 }, { - "epoch": 0.040028754114486774, - "grad_norm": 4.117438316345215, - "learning_rate": 1.4405446293494706e-05, - "loss": 0.9465, + "epoch": 0.037240408306934175, + "grad_norm": 2.761240243911743, + "learning_rate": 1.3998106888489754e-05, + "loss": 0.8212, "step": 529 }, { - "epoch": 0.0401044228368204, - "grad_norm": 6.806588649749756, - "learning_rate": 1.4432677760968229e-05, - "loss": 0.8587, + "epoch": 0.037310806054206266, + "grad_norm": 3.0550966262817383, + "learning_rate": 1.3998069588410818e-05, + "loss": 0.8611, "step": 530 }, { - "epoch": 0.04018009155915402, - "grad_norm": 3.3301045894622803, - "learning_rate": 1.4459909228441755e-05, - "loss": 0.8867, + "epoch": 0.03738120380147835, + "grad_norm": 2.5256669521331787, + "learning_rate": 1.3998031924495905e-05, + "loss": 0.7901, "step": 531 }, { - "epoch": 0.04025576028148765, - "grad_norm": 3.395404577255249, - "learning_rate": 1.448714069591528e-05, - "loss": 0.9337, + "epoch": 0.03745160154875044, + "grad_norm": 2.9315972328186035, + "learning_rate": 1.3997993896746972e-05, + "loss": 0.8549, "step": 532 }, { - "epoch": 0.04033142900382127, - "grad_norm": 6.818991184234619, - "learning_rate": 1.4514372163388804e-05, - "loss": 1.0161, + "epoch": 0.037521999296022526, + "grad_norm": 2.3416874408721924, + "learning_rate": 1.3997955505166002e-05, + "loss": 0.7122, "step": 533 }, { - "epoch": 0.040407097726154896, - "grad_norm": 3.7646358013153076, - "learning_rate": 1.4541603630862331e-05, - "loss": 0.9049, + "epoch": 0.03759239704329462, + "grad_norm": 2.7199528217315674, + "learning_rate": 1.3997916749754988e-05, + "loss": 0.7902, "step": 534 }, { - "epoch": 0.04048276644848852, - "grad_norm": 3.209998369216919, - "learning_rate": 1.4568835098335856e-05, - "loss": 0.9199, + "epoch": 0.0376627947905667, + "grad_norm": 3.4181931018829346, + "learning_rate": 1.3997877630515943e-05, + "loss": 0.8113, "step": 535 }, { - "epoch": 0.04055843517082214, - "grad_norm": 4.078510761260986, - "learning_rate": 1.459606656580938e-05, - "loss": 0.9277, + "epoch": 0.037733192537838786, + "grad_norm": 2.9263803958892822, + "learning_rate": 1.3997838147450903e-05, + "loss": 0.9299, "step": 536 }, { - "epoch": 0.04063410389315576, - "grad_norm": 4.5334153175354, - "learning_rate": 1.4623298033282907e-05, - "loss": 0.9826, + "epoch": 0.03780359028511088, + "grad_norm": 2.4479856491088867, + "learning_rate": 1.399779830056192e-05, + "loss": 0.8647, "step": 537 }, { - "epoch": 0.04070977261548939, - "grad_norm": 4.209270000457764, - "learning_rate": 1.465052950075643e-05, - "loss": 0.8722, + "epoch": 0.03787398803238296, + "grad_norm": 3.3899643421173096, + "learning_rate": 1.3997758089851065e-05, + "loss": 0.793, "step": 538 }, { - "epoch": 0.04078544133782301, - "grad_norm": 4.012211799621582, - "learning_rate": 1.4677760968229954e-05, - "loss": 0.7578, + "epoch": 0.03794438577965505, + "grad_norm": 2.6238455772399902, + "learning_rate": 1.3997717515320428e-05, + "loss": 0.8028, "step": 539 }, { - "epoch": 0.040861110060156636, - "grad_norm": 3.805192232131958, - "learning_rate": 1.4704992435703479e-05, - "loss": 0.7765, + "epoch": 0.03801478352692714, + "grad_norm": 3.8376450538635254, + "learning_rate": 1.3997676576972122e-05, + "loss": 0.898, "step": 540 }, { - "epoch": 0.04093677878249026, - "grad_norm": 6.301825046539307, - "learning_rate": 1.4732223903177005e-05, - "loss": 0.8551, + "epoch": 0.03808518127419923, + "grad_norm": 2.825827121734619, + "learning_rate": 1.3997635274808273e-05, + "loss": 0.9195, "step": 541 }, { - "epoch": 0.04101244750482388, - "grad_norm": 3.2638895511627197, - "learning_rate": 1.475945537065053e-05, - "loss": 0.81, + "epoch": 0.03815557902147131, + "grad_norm": 2.6318182945251465, + "learning_rate": 1.3997593608831028e-05, + "loss": 0.7487, "step": 542 }, { - "epoch": 0.0410881162271575, - "grad_norm": 4.314562797546387, - "learning_rate": 1.4786686838124055e-05, - "loss": 1.0944, + "epoch": 0.0382259767687434, + "grad_norm": 2.5382261276245117, + "learning_rate": 1.3997551579042555e-05, + "loss": 0.7593, "step": 543 }, { - "epoch": 0.041163784949491126, - "grad_norm": 3.090569496154785, - "learning_rate": 1.4813918305597581e-05, - "loss": 1.0042, + "epoch": 0.03829637451601549, + "grad_norm": 3.178757667541504, + "learning_rate": 1.3997509185445037e-05, + "loss": 0.958, "step": 544 }, { - "epoch": 0.04123945367182475, - "grad_norm": 3.7688913345336914, - "learning_rate": 1.4841149773071104e-05, - "loss": 0.9544, + "epoch": 0.03836677226328757, + "grad_norm": 2.946504831314087, + "learning_rate": 1.3997466428040681e-05, + "loss": 0.8427, "step": 545 }, { - "epoch": 0.041315122394158375, - "grad_norm": 4.588676929473877, - "learning_rate": 1.4868381240544629e-05, - "loss": 0.7449, + "epoch": 0.038437170010559664, + "grad_norm": 3.6611249446868896, + "learning_rate": 1.3997423306831707e-05, + "loss": 0.8035, "step": 546 }, { - "epoch": 0.041390791116492, - "grad_norm": 6.916925430297852, - "learning_rate": 1.4895612708018154e-05, - "loss": 0.9825, + "epoch": 0.03850756775783175, + "grad_norm": 2.8863515853881836, + "learning_rate": 1.3997379821820358e-05, + "loss": 0.8334, "step": 547 }, { - "epoch": 0.041466459838825624, - "grad_norm": 3.3256642818450928, - "learning_rate": 1.492284417549168e-05, - "loss": 0.9232, + "epoch": 0.03857796550510384, + "grad_norm": 3.2965848445892334, + "learning_rate": 1.3997335973008894e-05, + "loss": 0.843, "step": 548 }, { - "epoch": 0.04154212856115924, - "grad_norm": 5.033417224884033, - "learning_rate": 1.4950075642965205e-05, - "loss": 0.9566, + "epoch": 0.038648363252375924, + "grad_norm": 2.537041425704956, + "learning_rate": 1.3997291760399596e-05, + "loss": 0.7059, "step": 549 }, { - "epoch": 0.041617797283492866, - "grad_norm": 3.85809063911438, - "learning_rate": 1.497730711043873e-05, - "loss": 0.993, + "epoch": 0.03871876099964801, + "grad_norm": 2.905217170715332, + "learning_rate": 1.3997247183994762e-05, + "loss": 0.8397, "step": 550 }, { - "epoch": 0.04169346600582649, - "grad_norm": 5.949283599853516, - "learning_rate": 1.5004538577912256e-05, - "loss": 0.9769, + "epoch": 0.0387891587469201, + "grad_norm": 3.3437349796295166, + "learning_rate": 1.3997202243796712e-05, + "loss": 0.8168, "step": 551 }, { - "epoch": 0.041769134728160115, - "grad_norm": 4.104134559631348, - "learning_rate": 1.503177004538578e-05, - "loss": 0.8464, + "epoch": 0.038859556494192184, + "grad_norm": 3.1850955486297607, + "learning_rate": 1.3997156939807778e-05, + "loss": 0.7065, "step": 552 }, { - "epoch": 0.04184480345049374, - "grad_norm": 5.211521148681641, - "learning_rate": 1.5059001512859304e-05, - "loss": 0.8422, + "epoch": 0.038929954241464275, + "grad_norm": 4.86928653717041, + "learning_rate": 1.3997111272030317e-05, + "loss": 0.8004, "step": 553 }, { - "epoch": 0.041920472172827364, - "grad_norm": 3.4157001972198486, - "learning_rate": 1.5086232980332828e-05, - "loss": 0.8472, + "epoch": 0.03900035198873636, + "grad_norm": 2.6666688919067383, + "learning_rate": 1.3997065240466707e-05, + "loss": 0.7804, "step": 554 }, { - "epoch": 0.04199614089516099, - "grad_norm": 3.895693778991699, - "learning_rate": 1.5113464447806355e-05, - "loss": 0.7614, + "epoch": 0.03907074973600845, + "grad_norm": 4.539239883422852, + "learning_rate": 1.3997018845119336e-05, + "loss": 0.741, "step": 555 }, { - "epoch": 0.042071809617494606, - "grad_norm": 4.627487659454346, - "learning_rate": 1.514069591527988e-05, - "loss": 0.9064, + "epoch": 0.039141147483280535, + "grad_norm": 3.263653516769409, + "learning_rate": 1.399697208599062e-05, + "loss": 0.8334, "step": 556 }, { - "epoch": 0.04214747833982823, - "grad_norm": 3.6824750900268555, - "learning_rate": 1.5167927382753404e-05, - "loss": 0.8688, + "epoch": 0.03921154523055262, + "grad_norm": 3.2612195014953613, + "learning_rate": 1.3996924963082987e-05, + "loss": 0.8353, "step": 557 }, { - "epoch": 0.042223147062161855, - "grad_norm": 3.035003185272217, - "learning_rate": 1.519515885022693e-05, - "loss": 0.9319, + "epoch": 0.03928194297782471, + "grad_norm": 2.8736414909362793, + "learning_rate": 1.3996877476398889e-05, + "loss": 0.8151, "step": 558 }, { - "epoch": 0.04229881578449548, - "grad_norm": 3.1040902137756348, - "learning_rate": 1.5222390317700455e-05, - "loss": 0.8281, + "epoch": 0.039352340725096795, + "grad_norm": 3.4466845989227295, + "learning_rate": 1.3996829625940792e-05, + "loss": 0.7471, "step": 559 }, { - "epoch": 0.0423744845068291, - "grad_norm": 3.9689624309539795, - "learning_rate": 1.5249621785173978e-05, - "loss": 0.8638, + "epoch": 0.039422738472368886, + "grad_norm": 2.3491859436035156, + "learning_rate": 1.3996781411711188e-05, + "loss": 0.8278, "step": 560 }, { - "epoch": 0.04245015322916273, - "grad_norm": 4.4839701652526855, - "learning_rate": 1.5276853252647503e-05, - "loss": 0.7902, + "epoch": 0.03949313621964097, + "grad_norm": 2.6061103343963623, + "learning_rate": 1.3996732833712582e-05, + "loss": 1.0888, "step": 561 }, { - "epoch": 0.04252582195149635, - "grad_norm": 5.034473419189453, - "learning_rate": 1.530408472012103e-05, - "loss": 0.8222, + "epoch": 0.03956353396691306, + "grad_norm": 3.4924848079681396, + "learning_rate": 1.39966838919475e-05, + "loss": 0.8635, "step": 562 }, { - "epoch": 0.04260149067382997, - "grad_norm": 3.7882745265960693, - "learning_rate": 1.5331316187594552e-05, - "loss": 0.8985, + "epoch": 0.039633931714185146, + "grad_norm": 3.603238582611084, + "learning_rate": 1.3996634586418486e-05, + "loss": 0.7736, "step": 563 }, { - "epoch": 0.042677159396163594, - "grad_norm": 4.125184059143066, - "learning_rate": 1.535854765506808e-05, - "loss": 0.9292, + "epoch": 0.03970432946145723, + "grad_norm": 3.2938106060028076, + "learning_rate": 1.3996584917128102e-05, + "loss": 0.7416, "step": 564 }, { - "epoch": 0.04275282811849722, - "grad_norm": 4.87890625, - "learning_rate": 1.5385779122541605e-05, - "loss": 0.7701, + "epoch": 0.03977472720872932, + "grad_norm": 2.842083215713501, + "learning_rate": 1.3996534884078931e-05, + "loss": 0.6779, "step": 565 }, { - "epoch": 0.04282849684083084, - "grad_norm": 4.0733513832092285, - "learning_rate": 1.5413010590015128e-05, - "loss": 0.9535, + "epoch": 0.039845124956001406, + "grad_norm": 3.6062729358673096, + "learning_rate": 1.3996484487273575e-05, + "loss": 0.8641, "step": 566 }, { - "epoch": 0.04290416556316447, - "grad_norm": 5.096096515655518, - "learning_rate": 1.5440242057488655e-05, - "loss": 0.7682, + "epoch": 0.0399155227032735, + "grad_norm": 2.9615612030029297, + "learning_rate": 1.3996433726714657e-05, + "loss": 0.8178, "step": 567 }, { - "epoch": 0.04297983428549809, - "grad_norm": 3.4975779056549072, - "learning_rate": 1.5467473524962178e-05, - "loss": 0.9259, + "epoch": 0.03998592045054558, + "grad_norm": 3.1762547492980957, + "learning_rate": 1.399638260240481e-05, + "loss": 0.8065, "step": 568 }, { - "epoch": 0.04305550300783171, - "grad_norm": 4.021080493927002, - "learning_rate": 1.5494704992435704e-05, - "loss": 0.7498, + "epoch": 0.04005631819781767, + "grad_norm": 2.8781039714813232, + "learning_rate": 1.3996331114346697e-05, + "loss": 0.8916, "step": 569 }, { - "epoch": 0.043131171730165334, - "grad_norm": 4.798002243041992, - "learning_rate": 1.5521936459909227e-05, - "loss": 0.9342, + "epoch": 0.04012671594508976, + "grad_norm": 3.3186848163604736, + "learning_rate": 1.3996279262542994e-05, + "loss": 0.7611, "step": 570 }, { - "epoch": 0.04320684045249896, - "grad_norm": 3.3058738708496094, - "learning_rate": 1.5549167927382753e-05, - "loss": 0.8086, + "epoch": 0.04019711369236184, + "grad_norm": 3.444287061691284, + "learning_rate": 1.3996227046996394e-05, + "loss": 0.6326, "step": 571 }, { - "epoch": 0.04328250917483258, - "grad_norm": 4.445735931396484, - "learning_rate": 1.557639939485628e-05, - "loss": 0.8993, + "epoch": 0.04026751143963393, + "grad_norm": 3.2134804725646973, + "learning_rate": 1.3996174467709613e-05, + "loss": 0.8539, "step": 572 }, { - "epoch": 0.04335817789716621, - "grad_norm": 3.855530023574829, - "learning_rate": 1.5603630862329803e-05, - "loss": 0.8861, + "epoch": 0.04033790918690602, + "grad_norm": 2.9034829139709473, + "learning_rate": 1.3996121524685388e-05, + "loss": 0.7416, "step": 573 }, { - "epoch": 0.04343384661949983, - "grad_norm": 3.9461214542388916, - "learning_rate": 1.563086232980333e-05, - "loss": 0.8456, + "epoch": 0.04040830693417811, + "grad_norm": 2.5816683769226074, + "learning_rate": 1.3996068217926468e-05, + "loss": 0.785, "step": 574 }, { - "epoch": 0.043509515341833456, - "grad_norm": 2.9989559650421143, - "learning_rate": 1.5658093797276852e-05, - "loss": 0.9882, + "epoch": 0.04047870468145019, + "grad_norm": 2.8911941051483154, + "learning_rate": 1.3996014547435623e-05, + "loss": 0.8308, "step": 575 }, { - "epoch": 0.043585184064167073, - "grad_norm": 6.7230916023254395, - "learning_rate": 1.568532526475038e-05, - "loss": 0.8234, + "epoch": 0.04054910242872228, + "grad_norm": 3.1707026958465576, + "learning_rate": 1.3995960513215648e-05, + "loss": 0.8582, "step": 576 }, { - "epoch": 0.0436608527865007, - "grad_norm": 4.629927635192871, - "learning_rate": 1.57125567322239e-05, - "loss": 0.8634, + "epoch": 0.04061950017599437, + "grad_norm": 2.6814775466918945, + "learning_rate": 1.399590611526935e-05, + "loss": 0.775, "step": 577 }, { - "epoch": 0.04373652150883432, - "grad_norm": 6.619353294372559, - "learning_rate": 1.5739788199697428e-05, - "loss": 0.8626, + "epoch": 0.04068989792326645, + "grad_norm": 3.1916022300720215, + "learning_rate": 1.3995851353599557e-05, + "loss": 0.8466, "step": 578 }, { - "epoch": 0.04381219023116795, - "grad_norm": 2.4462685585021973, - "learning_rate": 1.5767019667170954e-05, - "loss": 0.7996, + "epoch": 0.04076029567053854, + "grad_norm": 3.254413604736328, + "learning_rate": 1.3995796228209116e-05, + "loss": 0.8542, "step": 579 }, { - "epoch": 0.04388785895350157, - "grad_norm": 3.107055902481079, - "learning_rate": 1.5794251134644477e-05, - "loss": 1.1027, + "epoch": 0.04083069341781063, + "grad_norm": 2.639108180999756, + "learning_rate": 1.3995740739100894e-05, + "loss": 0.8417, "step": 580 }, { - "epoch": 0.043963527675835196, - "grad_norm": 3.176931858062744, - "learning_rate": 1.5821482602118004e-05, - "loss": 0.8589, + "epoch": 0.04090109116508272, + "grad_norm": 3.2948527336120605, + "learning_rate": 1.3995684886277773e-05, + "loss": 0.9285, "step": 581 }, { - "epoch": 0.04403919639816882, - "grad_norm": 6.571891784667969, - "learning_rate": 1.584871406959153e-05, - "loss": 0.8045, + "epoch": 0.0409714889123548, + "grad_norm": 4.132096767425537, + "learning_rate": 1.399562866974266e-05, + "loss": 0.8599, "step": 582 }, { - "epoch": 0.04411486512050244, - "grad_norm": 4.314690589904785, - "learning_rate": 1.5875945537065053e-05, - "loss": 0.8888, + "epoch": 0.041041886659626894, + "grad_norm": 3.2728514671325684, + "learning_rate": 1.3995572089498479e-05, + "loss": 0.7564, "step": 583 }, { - "epoch": 0.04419053384283606, - "grad_norm": 3.6380622386932373, - "learning_rate": 1.5903177004538576e-05, - "loss": 0.7593, + "epoch": 0.04111228440689898, + "grad_norm": 2.8726067543029785, + "learning_rate": 1.3995515145548168e-05, + "loss": 0.8545, "step": 584 }, { - "epoch": 0.044266202565169686, - "grad_norm": 5.430633544921875, - "learning_rate": 1.5930408472012103e-05, - "loss": 0.884, + "epoch": 0.04118268215417107, + "grad_norm": 3.138972043991089, + "learning_rate": 1.3995457837894689e-05, + "loss": 0.7385, "step": 585 }, { - "epoch": 0.04434187128750331, - "grad_norm": 3.1226465702056885, - "learning_rate": 1.595763993948563e-05, - "loss": 0.9011, + "epoch": 0.041253079901443154, + "grad_norm": 2.5554850101470947, + "learning_rate": 1.3995400166541021e-05, + "loss": 0.6948, "step": 586 }, { - "epoch": 0.044417540009836935, - "grad_norm": 3.8268587589263916, - "learning_rate": 1.5984871406959152e-05, - "loss": 0.8305, + "epoch": 0.04132347764871524, + "grad_norm": 2.983220100402832, + "learning_rate": 1.3995342131490164e-05, + "loss": 0.8357, "step": 587 }, { - "epoch": 0.04449320873217056, - "grad_norm": 5.864771842956543, - "learning_rate": 1.601210287443268e-05, - "loss": 0.7973, + "epoch": 0.04139387539598733, + "grad_norm": 3.447674512863159, + "learning_rate": 1.3995283732745133e-05, + "loss": 0.8964, "step": 588 }, { - "epoch": 0.044568877454504184, - "grad_norm": 4.280256748199463, - "learning_rate": 1.6039334341906205e-05, - "loss": 0.7368, + "epoch": 0.041464273143259414, + "grad_norm": 2.6571645736694336, + "learning_rate": 1.3995224970308967e-05, + "loss": 0.7378, "step": 589 }, { - "epoch": 0.0446445461768378, - "grad_norm": 4.382325649261475, - "learning_rate": 1.6066565809379728e-05, - "loss": 0.8988, + "epoch": 0.041534670890531505, + "grad_norm": 2.5773870944976807, + "learning_rate": 1.3995165844184718e-05, + "loss": 0.974, "step": 590 }, { - "epoch": 0.044720214899171426, - "grad_norm": 4.108618259429932, - "learning_rate": 1.609379727685325e-05, - "loss": 0.8688, + "epoch": 0.04160506863780359, + "grad_norm": 2.931352138519287, + "learning_rate": 1.3995106354375463e-05, + "loss": 0.8262, "step": 591 }, { - "epoch": 0.04479588362150505, - "grad_norm": 3.6567695140838623, - "learning_rate": 1.6121028744326777e-05, - "loss": 0.8829, + "epoch": 0.04167546638507568, + "grad_norm": 3.0011138916015625, + "learning_rate": 1.3995046500884293e-05, + "loss": 0.8139, "step": 592 }, { - "epoch": 0.044871552343838675, - "grad_norm": 3.6836469173431396, - "learning_rate": 1.6148260211800304e-05, - "loss": 1.0172, + "epoch": 0.041745864132347765, + "grad_norm": 4.662199974060059, + "learning_rate": 1.399498628371432e-05, + "loss": 0.8194, "step": 593 }, { - "epoch": 0.0449472210661723, - "grad_norm": 3.5387299060821533, - "learning_rate": 1.6175491679273827e-05, - "loss": 0.7189, + "epoch": 0.04181626187961985, + "grad_norm": 3.69881272315979, + "learning_rate": 1.3994925702868674e-05, + "loss": 0.8952, "step": 594 }, { - "epoch": 0.045022889788505924, - "grad_norm": 13.61294174194336, - "learning_rate": 1.6202723146747353e-05, - "loss": 0.7605, + "epoch": 0.04188665962689194, + "grad_norm": 3.118802070617676, + "learning_rate": 1.3994864758350507e-05, + "loss": 0.8823, "step": 595 }, { - "epoch": 0.04509855851083954, - "grad_norm": 4.561513900756836, - "learning_rate": 1.622995461422088e-05, - "loss": 0.7954, + "epoch": 0.041957057374164025, + "grad_norm": 3.833556652069092, + "learning_rate": 1.3994803450162988e-05, + "loss": 0.9066, "step": 596 }, { - "epoch": 0.045174227233173166, - "grad_norm": 4.984888553619385, - "learning_rate": 1.6257186081694402e-05, - "loss": 1.0385, + "epoch": 0.042027455121436116, + "grad_norm": 2.8065719604492188, + "learning_rate": 1.39947417783093e-05, + "loss": 0.8227, "step": 597 }, { - "epoch": 0.04524989595550679, - "grad_norm": 3.820335865020752, - "learning_rate": 1.6284417549167925e-05, - "loss": 0.8676, + "epoch": 0.0420978528687082, + "grad_norm": 2.7200613021850586, + "learning_rate": 1.3994679742792654e-05, + "loss": 0.8565, "step": 598 }, { - "epoch": 0.045325564677840414, - "grad_norm": 3.2544524669647217, - "learning_rate": 1.6311649016641452e-05, - "loss": 0.9514, + "epoch": 0.04216825061598029, + "grad_norm": 2.7900142669677734, + "learning_rate": 1.3994617343616272e-05, + "loss": 0.7952, "step": 599 }, { - "epoch": 0.04540123340017404, - "grad_norm": 5.345118999481201, - "learning_rate": 1.6338880484114978e-05, - "loss": 0.7714, + "epoch": 0.042238648363252376, + "grad_norm": 3.8182265758514404, + "learning_rate": 1.3994554580783401e-05, + "loss": 0.788, "step": 600 }, { - "epoch": 0.04547690212250766, - "grad_norm": 3.907956123352051, - "learning_rate": 1.63661119515885e-05, - "loss": 0.8939, + "epoch": 0.04230904611052446, + "grad_norm": 3.8539509773254395, + "learning_rate": 1.3994491454297302e-05, + "loss": 0.9168, "step": 601 }, { - "epoch": 0.04555257084484129, - "grad_norm": 6.510712146759033, - "learning_rate": 1.6393343419062028e-05, - "loss": 0.8035, + "epoch": 0.04237944385779655, + "grad_norm": 2.8815250396728516, + "learning_rate": 1.3994427964161258e-05, + "loss": 0.6446, "step": 602 }, { - "epoch": 0.045628239567174905, - "grad_norm": 3.6979787349700928, - "learning_rate": 1.6420574886535554e-05, - "loss": 0.8553, + "epoch": 0.042449841605068636, + "grad_norm": 3.436572313308716, + "learning_rate": 1.3994364110378567e-05, + "loss": 0.92, "step": 603 }, { - "epoch": 0.04570390828950853, - "grad_norm": 4.597548007965088, - "learning_rate": 1.6447806354009077e-05, - "loss": 0.803, + "epoch": 0.04252023935234073, + "grad_norm": 2.9643971920013428, + "learning_rate": 1.3994299892952555e-05, + "loss": 0.8225, "step": 604 }, { - "epoch": 0.045779577011842154, - "grad_norm": 4.338045120239258, - "learning_rate": 1.64750378214826e-05, - "loss": 0.878, + "epoch": 0.04259063709961281, + "grad_norm": 6.986907958984375, + "learning_rate": 1.3994235311886555e-05, + "loss": 0.8363, "step": 605 }, { - "epoch": 0.04585524573417578, - "grad_norm": 4.208822727203369, - "learning_rate": 1.6502269288956126e-05, - "loss": 0.807, + "epoch": 0.0426610348468849, + "grad_norm": 2.9424030780792236, + "learning_rate": 1.3994170367183928e-05, + "loss": 0.9139, "step": 606 }, { - "epoch": 0.0459309144565094, - "grad_norm": 3.9648923873901367, - "learning_rate": 1.6529500756429653e-05, - "loss": 0.8748, + "epoch": 0.04273143259415699, + "grad_norm": 4.638256549835205, + "learning_rate": 1.3994105058848048e-05, + "loss": 0.9338, "step": 607 }, { - "epoch": 0.04600658317884303, - "grad_norm": 2.76554536819458, - "learning_rate": 1.6556732223903176e-05, - "loss": 0.8559, + "epoch": 0.04280183034142907, + "grad_norm": 2.68192195892334, + "learning_rate": 1.3994039386882312e-05, + "loss": 0.9014, "step": 608 }, { - "epoch": 0.04608225190117665, - "grad_norm": 5.518862724304199, - "learning_rate": 1.6583963691376702e-05, - "loss": 0.8451, + "epoch": 0.04287222808870116, + "grad_norm": 3.1741292476654053, + "learning_rate": 1.3993973351290136e-05, + "loss": 0.8264, "step": 609 }, { - "epoch": 0.04615792062351027, - "grad_norm": 4.203677177429199, - "learning_rate": 1.661119515885023e-05, - "loss": 0.9215, + "epoch": 0.04294262583597325, + "grad_norm": 3.42448353767395, + "learning_rate": 1.399390695207495e-05, + "loss": 0.9273, "step": 610 }, { - "epoch": 0.046233589345843894, - "grad_norm": 3.4287822246551514, - "learning_rate": 1.6638426626323752e-05, - "loss": 0.8132, + "epoch": 0.04301302358324534, + "grad_norm": 5.154784202575684, + "learning_rate": 1.3993840189240207e-05, + "loss": 0.7445, "step": 611 }, { - "epoch": 0.04630925806817752, - "grad_norm": 4.197726726531982, - "learning_rate": 1.6665658093797278e-05, - "loss": 0.8095, + "epoch": 0.04308342133051742, + "grad_norm": 3.2171409130096436, + "learning_rate": 1.399377306278938e-05, + "loss": 0.8539, "step": 612 }, { - "epoch": 0.04638492679051114, - "grad_norm": 4.408070087432861, - "learning_rate": 1.66928895612708e-05, - "loss": 1.0774, + "epoch": 0.043153819077789514, + "grad_norm": 3.340319871902466, + "learning_rate": 1.3993705572725957e-05, + "loss": 0.7391, "step": 613 }, { - "epoch": 0.04646059551284477, - "grad_norm": 3.8713626861572266, - "learning_rate": 1.6720121028744328e-05, - "loss": 0.7434, + "epoch": 0.0432242168250616, + "grad_norm": 2.962698221206665, + "learning_rate": 1.3993637719053447e-05, + "loss": 0.8524, "step": 614 }, { - "epoch": 0.04653626423517839, - "grad_norm": 3.408956527709961, - "learning_rate": 1.674735249621785e-05, - "loss": 0.8511, + "epoch": 0.04329461457233368, + "grad_norm": 2.737208604812622, + "learning_rate": 1.3993569501775377e-05, + "loss": 0.8582, "step": 615 }, { - "epoch": 0.046611932957512016, - "grad_norm": 2.916395902633667, - "learning_rate": 1.6774583963691377e-05, - "loss": 1.0129, + "epoch": 0.043365012319605774, + "grad_norm": 3.2467129230499268, + "learning_rate": 1.3993500920895297e-05, + "loss": 0.863, "step": 616 }, { - "epoch": 0.04668760167984563, - "grad_norm": 3.563767671585083, - "learning_rate": 1.6801815431164903e-05, - "loss": 0.927, + "epoch": 0.04343541006687786, + "grad_norm": 2.6415817737579346, + "learning_rate": 1.399343197641677e-05, + "loss": 0.8093, "step": 617 }, { - "epoch": 0.04676327040217926, - "grad_norm": 12.874147415161133, - "learning_rate": 1.6829046898638426e-05, - "loss": 0.7429, + "epoch": 0.04350580781414995, + "grad_norm": 2.807705879211426, + "learning_rate": 1.399336266834338e-05, + "loss": 0.7733, "step": 618 }, { - "epoch": 0.04683893912451288, - "grad_norm": 4.559039115905762, - "learning_rate": 1.6856278366111953e-05, - "loss": 0.9711, + "epoch": 0.043576205561422034, + "grad_norm": 3.71528697013855, + "learning_rate": 1.3993292996678733e-05, + "loss": 0.8476, "step": 619 }, { - "epoch": 0.04691460784684651, - "grad_norm": 5.084630489349365, - "learning_rate": 1.6883509833585476e-05, - "loss": 0.87, + "epoch": 0.043646603308694125, + "grad_norm": 4.940652370452881, + "learning_rate": 1.3993222961426446e-05, + "loss": 0.9715, "step": 620 }, { - "epoch": 0.04699027656918013, - "grad_norm": 4.294825553894043, - "learning_rate": 1.6910741301059002e-05, - "loss": 1.1267, + "epoch": 0.04371700105596621, + "grad_norm": 3.358454465866089, + "learning_rate": 1.3993152562590168e-05, + "loss": 0.8072, "step": 621 }, { - "epoch": 0.047065945291513755, - "grad_norm": 14.884833335876465, - "learning_rate": 1.6937972768532525e-05, - "loss": 0.9987, + "epoch": 0.043787398803238294, + "grad_norm": 2.5002331733703613, + "learning_rate": 1.3993081800173553e-05, + "loss": 0.8363, "step": 622 }, { - "epoch": 0.04714161401384737, - "grad_norm": 3.429875373840332, - "learning_rate": 1.696520423600605e-05, - "loss": 0.8943, + "epoch": 0.043857796550510385, + "grad_norm": 3.0807623863220215, + "learning_rate": 1.3993010674180281e-05, + "loss": 0.8641, "step": 623 }, { - "epoch": 0.047217282736181, - "grad_norm": 4.022202014923096, - "learning_rate": 1.6992435703479578e-05, - "loss": 0.9646, + "epoch": 0.04392819429778247, + "grad_norm": 3.0479373931884766, + "learning_rate": 1.3992939184614052e-05, + "loss": 0.7353, "step": 624 }, { - "epoch": 0.04729295145851462, - "grad_norm": 3.040421962738037, - "learning_rate": 1.70196671709531e-05, - "loss": 0.9784, + "epoch": 0.04399859204505456, + "grad_norm": 2.978351593017578, + "learning_rate": 1.3992867331478581e-05, + "loss": 0.8863, "step": 625 }, { - "epoch": 0.047368620180848246, - "grad_norm": 4.135276794433594, - "learning_rate": 1.7046898638426627e-05, - "loss": 0.793, + "epoch": 0.044068989792326645, + "grad_norm": 2.8319127559661865, + "learning_rate": 1.3992795114777605e-05, + "loss": 0.8966, "step": 626 }, { - "epoch": 0.04744428890318187, - "grad_norm": 3.7351131439208984, - "learning_rate": 1.7074130105900154e-05, - "loss": 0.7567, + "epoch": 0.044139387539598736, + "grad_norm": 4.638989448547363, + "learning_rate": 1.3992722534514879e-05, + "loss": 0.9038, "step": 627 }, { - "epoch": 0.047519957625515495, - "grad_norm": 3.320626974105835, - "learning_rate": 1.7101361573373677e-05, - "loss": 0.6704, + "epoch": 0.04420978528687082, + "grad_norm": 3.836480140686035, + "learning_rate": 1.3992649590694173e-05, + "loss": 0.8008, "step": 628 }, { - "epoch": 0.04759562634784912, - "grad_norm": 4.5212178230285645, - "learning_rate": 1.71285930408472e-05, - "loss": 0.8622, + "epoch": 0.044280183034142905, + "grad_norm": 3.361671209335327, + "learning_rate": 1.3992576283319283e-05, + "loss": 0.9475, "step": 629 }, { - "epoch": 0.04767129507018274, - "grad_norm": 4.007808208465576, - "learning_rate": 1.7155824508320726e-05, - "loss": 0.8503, + "epoch": 0.044350580781414996, + "grad_norm": 2.2469801902770996, + "learning_rate": 1.399250261239402e-05, + "loss": 0.8169, "step": 630 }, { - "epoch": 0.04774696379251636, - "grad_norm": 4.011386394500732, - "learning_rate": 1.7183055975794253e-05, - "loss": 0.9573, + "epoch": 0.04442097852868708, + "grad_norm": 2.9453229904174805, + "learning_rate": 1.3992428577922211e-05, + "loss": 0.7529, "step": 631 }, { - "epoch": 0.047822632514849986, - "grad_norm": 4.2028937339782715, - "learning_rate": 1.7210287443267776e-05, - "loss": 0.8327, + "epoch": 0.04449137627595917, + "grad_norm": 2.7885899543762207, + "learning_rate": 1.399235417990771e-05, + "loss": 0.839, "step": 632 }, { - "epoch": 0.04789830123718361, - "grad_norm": 3.389353036880493, - "learning_rate": 1.7237518910741302e-05, - "loss": 0.8626, + "epoch": 0.044561774023231256, + "grad_norm": 2.331615924835205, + "learning_rate": 1.3992279418354381e-05, + "loss": 0.7937, "step": 633 }, { - "epoch": 0.047973969959517235, - "grad_norm": 3.483424663543701, - "learning_rate": 1.726475037821483e-05, - "loss": 1.0075, + "epoch": 0.04463217177050335, + "grad_norm": 3.755197286605835, + "learning_rate": 1.3992204293266114e-05, + "loss": 0.885, "step": 634 }, { - "epoch": 0.04804963868185086, - "grad_norm": 2.878598213195801, - "learning_rate": 1.729198184568835e-05, - "loss": 0.8132, + "epoch": 0.04470256951777543, + "grad_norm": 2.7593140602111816, + "learning_rate": 1.3992128804646811e-05, + "loss": 0.7841, "step": 635 }, { - "epoch": 0.048125307404184484, - "grad_norm": 4.429380893707275, - "learning_rate": 1.7319213313161874e-05, - "loss": 0.8764, + "epoch": 0.044772967265047516, + "grad_norm": 2.3185527324676514, + "learning_rate": 1.3992052952500402e-05, + "loss": 0.8011, "step": 636 }, { - "epoch": 0.0482009761265181, - "grad_norm": 3.748349189758301, - "learning_rate": 1.73464447806354e-05, - "loss": 0.7818, + "epoch": 0.04484336501231961, + "grad_norm": 3.883204221725464, + "learning_rate": 1.3991976736830825e-05, + "loss": 0.9378, "step": 637 }, { - "epoch": 0.048276644848851726, - "grad_norm": 2.982710838317871, - "learning_rate": 1.7373676248108927e-05, - "loss": 0.8131, + "epoch": 0.04491376275959169, + "grad_norm": 2.983015537261963, + "learning_rate": 1.3991900157642048e-05, + "loss": 0.8947, "step": 638 }, { - "epoch": 0.04835231357118535, - "grad_norm": 3.4813013076782227, - "learning_rate": 1.740090771558245e-05, - "loss": 1.0064, + "epoch": 0.04498416050686378, + "grad_norm": 3.223975658416748, + "learning_rate": 1.3991823214938046e-05, + "loss": 0.7582, "step": 639 }, { - "epoch": 0.048427982293518974, - "grad_norm": 4.015783309936523, - "learning_rate": 1.7428139183055977e-05, - "loss": 0.775, + "epoch": 0.04505455825413587, + "grad_norm": 3.5113606452941895, + "learning_rate": 1.3991745908722824e-05, + "loss": 0.7585, "step": 640 }, { - "epoch": 0.0485036510158526, - "grad_norm": 5.063205242156982, - "learning_rate": 1.7455370650529503e-05, - "loss": 0.8112, + "epoch": 0.04512495600140796, + "grad_norm": 3.5058138370513916, + "learning_rate": 1.3991668239000401e-05, + "loss": 0.9325, "step": 641 }, { - "epoch": 0.04857931973818622, - "grad_norm": 3.2861835956573486, - "learning_rate": 1.7482602118003026e-05, - "loss": 0.8949, + "epoch": 0.04519535374868004, + "grad_norm": 2.8188700675964355, + "learning_rate": 1.3991590205774816e-05, + "loss": 0.8776, "step": 642 }, { - "epoch": 0.04865498846051984, - "grad_norm": 4.188798904418945, - "learning_rate": 1.750983358547655e-05, - "loss": 0.7644, + "epoch": 0.045265751495952126, + "grad_norm": 2.6184709072113037, + "learning_rate": 1.399151180905012e-05, + "loss": 0.8521, "step": 643 }, { - "epoch": 0.048730657182853465, - "grad_norm": 2.6496074199676514, - "learning_rate": 1.7537065052950075e-05, - "loss": 0.7834, + "epoch": 0.04533614924322422, + "grad_norm": 8.33962345123291, + "learning_rate": 1.3991433048830394e-05, + "loss": 0.7851, "step": 644 }, { - "epoch": 0.04880632590518709, - "grad_norm": 3.977748155593872, - "learning_rate": 1.7564296520423602e-05, - "loss": 0.7691, + "epoch": 0.0454065469904963, + "grad_norm": 2.7237563133239746, + "learning_rate": 1.3991353925119735e-05, + "loss": 0.7464, "step": 645 }, { - "epoch": 0.048881994627520714, - "grad_norm": 4.396695613861084, - "learning_rate": 1.7591527987897125e-05, - "loss": 0.7951, + "epoch": 0.04547694473776839, + "grad_norm": 2.90704083442688, + "learning_rate": 1.3991274437922251e-05, + "loss": 0.808, "step": 646 }, { - "epoch": 0.04895766334985434, - "grad_norm": 3.3221042156219482, - "learning_rate": 1.761875945537065e-05, - "loss": 0.69, + "epoch": 0.04554734248504048, + "grad_norm": 2.781816244125366, + "learning_rate": 1.3991194587242079e-05, + "loss": 0.8226, "step": 647 }, { - "epoch": 0.04903333207218796, - "grad_norm": 4.295675754547119, - "learning_rate": 1.7645990922844178e-05, - "loss": 0.6565, + "epoch": 0.04561774023231257, + "grad_norm": 3.1352083683013916, + "learning_rate": 1.3991114373083367e-05, + "loss": 0.8892, "step": 648 }, { - "epoch": 0.04910900079452159, - "grad_norm": 3.0245003700256348, - "learning_rate": 1.76732223903177e-05, - "loss": 0.9869, + "epoch": 0.04568813797958465, + "grad_norm": 2.9643239974975586, + "learning_rate": 1.399103379545029e-05, + "loss": 0.8932, "step": 649 }, { - "epoch": 0.049184669516855205, - "grad_norm": 3.450180768966675, - "learning_rate": 1.7700453857791224e-05, - "loss": 0.894, + "epoch": 0.04575853572685674, + "grad_norm": 3.1343560218811035, + "learning_rate": 1.3990952854347032e-05, + "loss": 0.6376, "step": 650 }, { - "epoch": 0.04926033823918883, - "grad_norm": 3.598787546157837, - "learning_rate": 1.772768532526475e-05, - "loss": 0.8114, + "epoch": 0.04582893347412883, + "grad_norm": 2.53393292427063, + "learning_rate": 1.3990871549777804e-05, + "loss": 0.8668, "step": 651 }, { - "epoch": 0.049336006961522454, - "grad_norm": 3.394605875015259, - "learning_rate": 1.7754916792738276e-05, - "loss": 0.913, + "epoch": 0.04589933122140091, + "grad_norm": 2.611707925796509, + "learning_rate": 1.3990789881746834e-05, + "loss": 0.7879, "step": 652 }, { - "epoch": 0.04941167568385608, - "grad_norm": 3.7939605712890625, - "learning_rate": 1.77821482602118e-05, - "loss": 0.9026, + "epoch": 0.045969728968673004, + "grad_norm": 2.382315158843994, + "learning_rate": 1.3990707850258365e-05, + "loss": 0.9378, "step": 653 }, { - "epoch": 0.0494873444061897, - "grad_norm": 3.1907098293304443, - "learning_rate": 1.7809379727685326e-05, - "loss": 0.8377, + "epoch": 0.04604012671594509, + "grad_norm": 2.7706809043884277, + "learning_rate": 1.3990625455316667e-05, + "loss": 0.8717, "step": 654 }, { - "epoch": 0.04956301312852333, - "grad_norm": 3.942924976348877, - "learning_rate": 1.7836611195158852e-05, - "loss": 0.8688, + "epoch": 0.04611052446321718, + "grad_norm": 3.0526013374328613, + "learning_rate": 1.3990542696926018e-05, + "loss": 0.8795, "step": 655 }, { - "epoch": 0.04963868185085695, - "grad_norm": 3.0986690521240234, - "learning_rate": 1.7863842662632375e-05, - "loss": 0.8261, + "epoch": 0.046180922210489264, + "grad_norm": 3.222999095916748, + "learning_rate": 1.3990459575090723e-05, + "loss": 0.8207, "step": 656 }, { - "epoch": 0.04971435057319057, - "grad_norm": 4.000396728515625, - "learning_rate": 1.78910741301059e-05, - "loss": 0.8151, + "epoch": 0.04625131995776135, + "grad_norm": 3.0456573963165283, + "learning_rate": 1.3990376089815105e-05, + "loss": 0.8873, "step": 657 }, { - "epoch": 0.04979001929552419, - "grad_norm": 4.246333122253418, - "learning_rate": 1.7918305597579425e-05, - "loss": 0.7004, + "epoch": 0.04632171770503344, + "grad_norm": 2.870551824569702, + "learning_rate": 1.3990292241103502e-05, + "loss": 0.931, "step": 658 }, { - "epoch": 0.04986568801785782, - "grad_norm": 3.094942092895508, - "learning_rate": 1.794553706505295e-05, - "loss": 0.8833, + "epoch": 0.046392115452305524, + "grad_norm": 3.0117945671081543, + "learning_rate": 1.3990208028960276e-05, + "loss": 0.8932, "step": 659 }, { - "epoch": 0.04994135674019144, - "grad_norm": 2.5228271484375, - "learning_rate": 1.7972768532526474e-05, - "loss": 0.786, + "epoch": 0.046462513199577615, + "grad_norm": 3.116360664367676, + "learning_rate": 1.39901234533898e-05, + "loss": 0.9275, "step": 660 }, { - "epoch": 0.05001702546252507, - "grad_norm": 3.9577856063842773, - "learning_rate": 1.8e-05, - "loss": 0.9358, + "epoch": 0.0465329109468497, + "grad_norm": 3.6685383319854736, + "learning_rate": 1.399003851439648e-05, + "loss": 0.8892, "step": 661 }, { - "epoch": 0.05009269418485869, - "grad_norm": 2.605454444885254, - "learning_rate": 1.7999999718195446e-05, - "loss": 0.7314, + "epoch": 0.04660330869412179, + "grad_norm": 2.819368839263916, + "learning_rate": 1.3989953211984725e-05, + "loss": 0.9099, "step": 662 }, { - "epoch": 0.050168362907192315, - "grad_norm": 4.23893928527832, - "learning_rate": 1.79999988727818e-05, - "loss": 0.7816, + "epoch": 0.046673706441393875, + "grad_norm": 3.810616970062256, + "learning_rate": 1.3989867546158971e-05, + "loss": 0.7366, "step": 663 }, { - "epoch": 0.05024403162952593, - "grad_norm": 3.738476037979126, - "learning_rate": 1.7999997463759113e-05, - "loss": 0.7877, + "epoch": 0.04674410418866596, + "grad_norm": 2.8623907566070557, + "learning_rate": 1.3989781516923675e-05, + "loss": 0.8477, "step": 664 }, { - "epoch": 0.05031970035185956, - "grad_norm": 3.7416157722473145, - "learning_rate": 1.7999995491127477e-05, - "loss": 0.9857, + "epoch": 0.04681450193593805, + "grad_norm": 2.969770669937134, + "learning_rate": 1.3989695124283307e-05, + "loss": 0.7092, "step": 665 }, { - "epoch": 0.05039536907419318, - "grad_norm": 2.7435741424560547, - "learning_rate": 1.7999992954887013e-05, - "loss": 0.683, + "epoch": 0.046884899683210135, + "grad_norm": 2.634084939956665, + "learning_rate": 1.3989608368242359e-05, + "loss": 0.8062, "step": 666 }, { - "epoch": 0.050471037796526806, - "grad_norm": 2.920893430709839, - "learning_rate": 1.7999989855037883e-05, - "loss": 0.9957, + "epoch": 0.046955297430482226, + "grad_norm": 2.4992165565490723, + "learning_rate": 1.3989521248805342e-05, + "loss": 0.9245, "step": 667 }, { - "epoch": 0.05054670651886043, - "grad_norm": 3.039703607559204, - "learning_rate": 1.7999986191580278e-05, - "loss": 0.7383, + "epoch": 0.04702569517775431, + "grad_norm": 2.5653514862060547, + "learning_rate": 1.3989433765976784e-05, + "loss": 0.8038, "step": 668 }, { - "epoch": 0.050622375241194055, - "grad_norm": 3.4982380867004395, - "learning_rate": 1.7999981964514427e-05, - "loss": 0.9463, + "epoch": 0.0470960929250264, + "grad_norm": 2.589076042175293, + "learning_rate": 1.3989345919761236e-05, + "loss": 0.8655, "step": 669 }, { - "epoch": 0.05069804396352767, - "grad_norm": 6.600189208984375, - "learning_rate": 1.7999977173840594e-05, - "loss": 0.9587, + "epoch": 0.047166490672298486, + "grad_norm": 2.902453899383545, + "learning_rate": 1.3989257710163262e-05, + "loss": 0.8627, "step": 670 }, { - "epoch": 0.0507737126858613, - "grad_norm": 4.686428070068359, - "learning_rate": 1.7999971819559082e-05, - "loss": 0.8895, + "epoch": 0.04723688841957057, + "grad_norm": 2.7680416107177734, + "learning_rate": 1.398916913718745e-05, + "loss": 0.8249, "step": 671 }, { - "epoch": 0.05084938140819492, - "grad_norm": 3.403703451156616, - "learning_rate": 1.799996590167023e-05, - "loss": 0.9366, + "epoch": 0.04730728616684266, + "grad_norm": 3.3486061096191406, + "learning_rate": 1.3989080200838407e-05, + "loss": 0.8974, "step": 672 }, { - "epoch": 0.050925050130528546, - "grad_norm": 3.352269411087036, - "learning_rate": 1.7999959420174395e-05, - "loss": 0.9894, + "epoch": 0.047377683914114746, + "grad_norm": 2.5028817653656006, + "learning_rate": 1.3988990901120753e-05, + "loss": 0.8272, "step": 673 }, { - "epoch": 0.05100071885286217, - "grad_norm": 3.1470065116882324, - "learning_rate": 1.7999952375072e-05, - "loss": 0.8608, + "epoch": 0.04744808166138684, + "grad_norm": 5.2081217765808105, + "learning_rate": 1.3988901238039133e-05, + "loss": 0.9207, "step": 674 }, { - "epoch": 0.051076387575195795, - "grad_norm": 5.2624897956848145, - "learning_rate": 1.7999944766363475e-05, - "loss": 0.8582, + "epoch": 0.04751847940865892, + "grad_norm": 3.142010450363159, + "learning_rate": 1.3988811211598208e-05, + "loss": 0.8575, "step": 675 }, { - "epoch": 0.05115205629752942, - "grad_norm": 4.0696187019348145, - "learning_rate": 1.7999936594049297e-05, - "loss": 0.8385, + "epoch": 0.04758887715593101, + "grad_norm": 3.288538694381714, + "learning_rate": 1.3988720821802658e-05, + "loss": 0.8307, "step": 676 }, { - "epoch": 0.05122772501986304, - "grad_norm": 4.101423740386963, - "learning_rate": 1.7999927858129984e-05, - "loss": 0.8864, + "epoch": 0.0476592749032031, + "grad_norm": 2.5837507247924805, + "learning_rate": 1.3988630068657184e-05, + "loss": 0.6858, "step": 677 }, { - "epoch": 0.05130339374219666, - "grad_norm": 4.04774284362793, - "learning_rate": 1.7999918558606075e-05, - "loss": 0.8205, + "epoch": 0.04772967265047518, + "grad_norm": 2.890468120574951, + "learning_rate": 1.3988538952166504e-05, + "loss": 0.9703, "step": 678 }, { - "epoch": 0.051379062464530285, - "grad_norm": 4.320160388946533, - "learning_rate": 1.7999908695478162e-05, - "loss": 1.0081, + "epoch": 0.04780007039774727, + "grad_norm": 2.7492434978485107, + "learning_rate": 1.3988447472335353e-05, + "loss": 0.9226, "step": 679 }, { - "epoch": 0.05145473118686391, - "grad_norm": 4.122174263000488, - "learning_rate": 1.7999898268746852e-05, - "loss": 0.6311, + "epoch": 0.04787046814501936, + "grad_norm": 2.8699605464935303, + "learning_rate": 1.3988355629168489e-05, + "loss": 0.8332, "step": 680 }, { - "epoch": 0.051530399909197534, - "grad_norm": 4.779628276824951, - "learning_rate": 1.7999887278412806e-05, - "loss": 0.9552, + "epoch": 0.04794086589229145, + "grad_norm": 2.451011896133423, + "learning_rate": 1.3988263422670688e-05, + "loss": 0.8303, "step": 681 }, { - "epoch": 0.05160606863153116, - "grad_norm": 3.8603785037994385, - "learning_rate": 1.7999875724476707e-05, - "loss": 0.889, + "epoch": 0.04801126363956353, + "grad_norm": 2.8877081871032715, + "learning_rate": 1.3988170852846743e-05, + "loss": 0.9855, "step": 682 }, { - "epoch": 0.05168173735386478, - "grad_norm": 2.222905158996582, - "learning_rate": 1.7999863606939286e-05, - "loss": 1.0504, + "epoch": 0.048081661386835624, + "grad_norm": 2.841970682144165, + "learning_rate": 1.3988077919701465e-05, + "loss": 0.8673, "step": 683 }, { - "epoch": 0.0517574060761984, - "grad_norm": 2.802685260772705, - "learning_rate": 1.7999850925801292e-05, - "loss": 0.9105, + "epoch": 0.04815205913410771, + "grad_norm": 3.1847903728485107, + "learning_rate": 1.3987984623239687e-05, + "loss": 0.792, "step": 684 }, { - "epoch": 0.051833074798532025, - "grad_norm": 3.3969781398773193, - "learning_rate": 1.7999837681063527e-05, - "loss": 0.7931, + "epoch": 0.04822245688137979, + "grad_norm": 2.6723263263702393, + "learning_rate": 1.3987890963466262e-05, + "loss": 0.8908, "step": 685 }, { - "epoch": 0.05190874352086565, - "grad_norm": 3.549208641052246, - "learning_rate": 1.7999823872726814e-05, - "loss": 0.9147, + "epoch": 0.048292854628651884, + "grad_norm": 3.3318393230438232, + "learning_rate": 1.3987796940386055e-05, + "loss": 0.7154, "step": 686 }, { - "epoch": 0.051984412243199274, - "grad_norm": 2.738788366317749, - "learning_rate": 1.7999809500792023e-05, - "loss": 0.6898, + "epoch": 0.04836325237592397, + "grad_norm": 3.527179718017578, + "learning_rate": 1.3987702554003958e-05, + "loss": 0.9761, "step": 687 }, { - "epoch": 0.0520600809655329, - "grad_norm": 5.0861711502075195, - "learning_rate": 1.799979456526005e-05, - "loss": 0.9764, + "epoch": 0.04843365012319606, + "grad_norm": 2.9489657878875732, + "learning_rate": 1.3987607804324876e-05, + "loss": 0.8084, "step": 688 }, { - "epoch": 0.05213574968786652, - "grad_norm": 3.7050702571868896, - "learning_rate": 1.799977906613184e-05, - "loss": 0.8309, + "epoch": 0.048504047870468144, + "grad_norm": 3.057199478149414, + "learning_rate": 1.3987512691353734e-05, + "loss": 0.8009, "step": 689 }, { - "epoch": 0.05221141841020015, - "grad_norm": 2.8487472534179688, - "learning_rate": 1.7999763003408348e-05, - "loss": 0.798, + "epoch": 0.048574445617740235, + "grad_norm": 2.946714401245117, + "learning_rate": 1.3987417215095481e-05, + "loss": 0.8358, "step": 690 }, { - "epoch": 0.052287087132533765, - "grad_norm": 3.320040464401245, - "learning_rate": 1.7999746377090593e-05, - "loss": 0.9132, + "epoch": 0.04864484336501232, + "grad_norm": 2.3823678493499756, + "learning_rate": 1.3987321375555081e-05, + "loss": 0.9048, "step": 691 }, { - "epoch": 0.05236275585486739, - "grad_norm": 2.664503574371338, - "learning_rate": 1.7999729187179606e-05, - "loss": 0.8706, + "epoch": 0.04871524111228441, + "grad_norm": 2.932769298553467, + "learning_rate": 1.3987225172737512e-05, + "loss": 0.7162, "step": 692 }, { - "epoch": 0.052438424577201014, - "grad_norm": 5.23117208480835, - "learning_rate": 1.7999711433676474e-05, - "loss": 0.7535, + "epoch": 0.048785638859556495, + "grad_norm": 2.647247076034546, + "learning_rate": 1.3987128606647777e-05, + "loss": 0.8488, "step": 693 }, { - "epoch": 0.05251409329953464, - "grad_norm": 4.686688423156738, - "learning_rate": 1.7999693116582302e-05, - "loss": 0.9761, + "epoch": 0.04885603660682858, + "grad_norm": 2.645198106765747, + "learning_rate": 1.39870316772909e-05, + "loss": 0.821, "step": 694 }, { - "epoch": 0.05258976202186826, - "grad_norm": 2.983670234680176, - "learning_rate": 1.7999674235898237e-05, - "loss": 0.8898, + "epoch": 0.04892643435410067, + "grad_norm": 2.560230255126953, + "learning_rate": 1.3986934384671917e-05, + "loss": 0.8415, "step": 695 }, { - "epoch": 0.05266543074420189, - "grad_norm": 3.057015895843506, - "learning_rate": 1.7999654791625463e-05, - "loss": 0.7925, + "epoch": 0.048996832101372755, + "grad_norm": 3.8510472774505615, + "learning_rate": 1.3986836728795889e-05, + "loss": 0.7878, "step": 696 }, { - "epoch": 0.052741099466535504, - "grad_norm": 3.9914684295654297, - "learning_rate": 1.79996347837652e-05, - "loss": 0.7957, + "epoch": 0.049067229848644846, + "grad_norm": 3.201700448989868, + "learning_rate": 1.3986738709667891e-05, + "loss": 0.7937, "step": 697 }, { - "epoch": 0.05281676818886913, - "grad_norm": 3.987391233444214, - "learning_rate": 1.7999614212318696e-05, - "loss": 0.9454, + "epoch": 0.04913762759591693, + "grad_norm": 2.4279398918151855, + "learning_rate": 1.398664032729302e-05, + "loss": 0.7355, "step": 698 }, { - "epoch": 0.05289243691120275, - "grad_norm": 4.3634138107299805, - "learning_rate": 1.7999593077287244e-05, - "loss": 1.1532, + "epoch": 0.04920802534318902, + "grad_norm": 2.3685619831085205, + "learning_rate": 1.3986541581676391e-05, + "loss": 0.898, "step": 699 }, { - "epoch": 0.05296810563353638, - "grad_norm": 3.055154800415039, - "learning_rate": 1.799957137867216e-05, - "loss": 0.8241, + "epoch": 0.049278423090461106, + "grad_norm": 2.8274645805358887, + "learning_rate": 1.3986442472823138e-05, + "loss": 0.8004, "step": 700 }, { - "epoch": 0.05304377435587, - "grad_norm": 3.825345277786255, - "learning_rate": 1.7999549116474813e-05, - "loss": 0.8979, + "epoch": 0.04934882083773319, + "grad_norm": 3.1318671703338623, + "learning_rate": 1.3986343000738412e-05, + "loss": 0.859, "step": 701 }, { - "epoch": 0.053119443078203626, - "grad_norm": 4.292139530181885, - "learning_rate": 1.7999526290696592e-05, - "loss": 0.9344, + "epoch": 0.04941921858500528, + "grad_norm": 2.432746648788452, + "learning_rate": 1.3986243165427389e-05, + "loss": 0.814, "step": 702 }, { - "epoch": 0.05319511180053725, - "grad_norm": 4.645684719085693, - "learning_rate": 1.7999502901338925e-05, - "loss": 0.9731, + "epoch": 0.049489616332277366, + "grad_norm": 3.2953317165374756, + "learning_rate": 1.3986142966895255e-05, + "loss": 0.9016, "step": 703 }, { - "epoch": 0.05327078052287087, - "grad_norm": 2.999361753463745, - "learning_rate": 1.7999478948403278e-05, - "loss": 0.9273, + "epoch": 0.04956001407954946, + "grad_norm": 2.799640417098999, + "learning_rate": 1.398604240514722e-05, + "loss": 0.8283, "step": 704 }, { - "epoch": 0.05334644924520449, - "grad_norm": 2.876819610595703, - "learning_rate": 1.7999454431891153e-05, - "loss": 0.8832, + "epoch": 0.04963041182682154, + "grad_norm": 2.985623359680176, + "learning_rate": 1.3985941480188516e-05, + "loss": 0.7764, "step": 705 }, { - "epoch": 0.05342211796753812, - "grad_norm": 3.5443317890167236, - "learning_rate": 1.7999429351804084e-05, - "loss": 0.792, + "epoch": 0.04970080957409363, + "grad_norm": 2.5629806518554688, + "learning_rate": 1.3985840192024385e-05, + "loss": 0.793, "step": 706 }, { - "epoch": 0.05349778668987174, - "grad_norm": 2.4923086166381836, - "learning_rate": 1.799940370814364e-05, - "loss": 0.8596, + "epoch": 0.04977120732136572, + "grad_norm": 2.641219139099121, + "learning_rate": 1.3985738540660098e-05, + "loss": 0.9018, "step": 707 }, { - "epoch": 0.053573455412205366, - "grad_norm": 3.5171520709991455, - "learning_rate": 1.799937750091143e-05, - "loss": 0.8311, + "epoch": 0.0498416050686378, + "grad_norm": 2.911142587661743, + "learning_rate": 1.3985636526100934e-05, + "loss": 0.8143, "step": 708 }, { - "epoch": 0.05364912413453899, - "grad_norm": 3.3235208988189697, - "learning_rate": 1.799935073010909e-05, - "loss": 0.7817, + "epoch": 0.04991200281590989, + "grad_norm": 2.908027410507202, + "learning_rate": 1.3985534148352206e-05, + "loss": 0.7829, "step": 709 }, { - "epoch": 0.053724792856872615, - "grad_norm": 3.130582809448242, - "learning_rate": 1.79993233957383e-05, - "loss": 0.8418, + "epoch": 0.049982400563181976, + "grad_norm": 3.7878053188323975, + "learning_rate": 1.3985431407419227e-05, + "loss": 0.934, "step": 710 }, { - "epoch": 0.05380046157920623, - "grad_norm": 2.8573694229125977, - "learning_rate": 1.7999295497800774e-05, - "loss": 0.7832, + "epoch": 0.05005279831045407, + "grad_norm": 2.39341402053833, + "learning_rate": 1.3985328303307345e-05, + "loss": 0.8724, "step": 711 }, { - "epoch": 0.05387613030153986, - "grad_norm": 3.514740467071533, - "learning_rate": 1.7999267036298257e-05, - "loss": 0.8998, + "epoch": 0.05012319605772615, + "grad_norm": 3.2111599445343018, + "learning_rate": 1.3985224836021916e-05, + "loss": 0.83, "step": 712 }, { - "epoch": 0.05395179902387348, - "grad_norm": 4.873480796813965, - "learning_rate": 1.799923801123253e-05, - "loss": 0.9173, + "epoch": 0.05019359380499824, + "grad_norm": 2.432382106781006, + "learning_rate": 1.3985121005568324e-05, + "loss": 0.8139, "step": 713 }, { - "epoch": 0.054027467746207106, - "grad_norm": 3.092484951019287, - "learning_rate": 1.7999208422605412e-05, - "loss": 0.922, + "epoch": 0.05026399155227033, + "grad_norm": 3.898714065551758, + "learning_rate": 1.3985016811951963e-05, + "loss": 0.7808, "step": 714 }, { - "epoch": 0.05410313646854073, - "grad_norm": 3.5271174907684326, - "learning_rate": 1.7999178270418757e-05, - "loss": 0.8321, + "epoch": 0.05033438929954241, + "grad_norm": 2.8255977630615234, + "learning_rate": 1.3984912255178252e-05, + "loss": 0.7893, "step": 715 }, { - "epoch": 0.054178805190874355, - "grad_norm": 3.2208545207977295, - "learning_rate": 1.799914755467445e-05, - "loss": 0.9818, + "epoch": 0.0504047870468145, + "grad_norm": 2.616126537322998, + "learning_rate": 1.3984807335252628e-05, + "loss": 0.728, "step": 716 }, { - "epoch": 0.05425447391320797, - "grad_norm": 3.0340662002563477, - "learning_rate": 1.7999116275374415e-05, - "loss": 0.7099, + "epoch": 0.05047518479408659, + "grad_norm": 2.8033738136291504, + "learning_rate": 1.3984702052180544e-05, + "loss": 0.8237, "step": 717 }, { - "epoch": 0.054330142635541596, - "grad_norm": 3.023000717163086, - "learning_rate": 1.799908443252061e-05, - "loss": 0.835, + "epoch": 0.05054558254135868, + "grad_norm": 2.8952503204345703, + "learning_rate": 1.3984596405967475e-05, + "loss": 0.7724, "step": 718 }, { - "epoch": 0.05440581135787522, - "grad_norm": 4.08595609664917, - "learning_rate": 1.799905202611504e-05, - "loss": 0.8734, + "epoch": 0.05061598028863076, + "grad_norm": 2.75644588470459, + "learning_rate": 1.3984490396618913e-05, + "loss": 0.7862, "step": 719 }, { - "epoch": 0.054481480080208845, - "grad_norm": 3.454214572906494, - "learning_rate": 1.799901905615972e-05, - "loss": 1.0036, + "epoch": 0.050686378035902854, + "grad_norm": 2.79675555229187, + "learning_rate": 1.3984384024140369e-05, + "loss": 0.7688, "step": 720 }, { - "epoch": 0.05455714880254247, - "grad_norm": 3.4939661026000977, - "learning_rate": 1.799898552265672e-05, - "loss": 0.7938, + "epoch": 0.05075677578317494, + "grad_norm": 2.7456278800964355, + "learning_rate": 1.3984277288537374e-05, + "loss": 0.6981, "step": 721 }, { - "epoch": 0.054632817524876094, - "grad_norm": 4.215449333190918, - "learning_rate": 1.799895142560814e-05, - "loss": 0.9008, + "epoch": 0.05082717353044702, + "grad_norm": 2.5601515769958496, + "learning_rate": 1.3984170189815481e-05, + "loss": 0.8164, "step": 722 }, { - "epoch": 0.05470848624720972, - "grad_norm": 3.8113982677459717, - "learning_rate": 1.799891676501612e-05, - "loss": 0.8452, + "epoch": 0.050897571277719114, + "grad_norm": 3.6979687213897705, + "learning_rate": 1.398406272798025e-05, + "loss": 0.6856, "step": 723 }, { - "epoch": 0.054784154969543336, - "grad_norm": 3.6723668575286865, - "learning_rate": 1.7998881540882822e-05, - "loss": 0.8416, + "epoch": 0.0509679690249912, + "grad_norm": 2.6759891510009766, + "learning_rate": 1.3983954903037274e-05, + "loss": 0.871, "step": 724 }, { - "epoch": 0.05485982369187696, - "grad_norm": 3.342585325241089, - "learning_rate": 1.7998845753210456e-05, - "loss": 0.9927, + "epoch": 0.05103836677226329, + "grad_norm": 2.295236110687256, + "learning_rate": 1.3983846714992158e-05, + "loss": 0.6784, "step": 725 }, { - "epoch": 0.054935492414210585, - "grad_norm": 3.9180972576141357, - "learning_rate": 1.7998809402001267e-05, - "loss": 0.9583, + "epoch": 0.051108764519535374, + "grad_norm": 4.108501434326172, + "learning_rate": 1.3983738163850526e-05, + "loss": 0.8216, "step": 726 }, { - "epoch": 0.05501116113654421, - "grad_norm": 3.55850887298584, - "learning_rate": 1.7998772487257524e-05, - "loss": 0.8274, + "epoch": 0.051179162266807465, + "grad_norm": 3.1564605236053467, + "learning_rate": 1.3983629249618025e-05, + "loss": 0.7545, "step": 727 }, { - "epoch": 0.055086829858877834, - "grad_norm": 3.635193347930908, - "learning_rate": 1.799873500898154e-05, - "loss": 0.851, + "epoch": 0.05124956001407955, + "grad_norm": 3.203169584274292, + "learning_rate": 1.398351997230031e-05, + "loss": 0.8203, "step": 728 }, { - "epoch": 0.05516249858121146, - "grad_norm": 2.7853517532348633, - "learning_rate": 1.799869696717567e-05, - "loss": 0.778, + "epoch": 0.051319957761351634, + "grad_norm": 3.2010984420776367, + "learning_rate": 1.398341033190307e-05, + "loss": 0.8009, "step": 729 }, { - "epoch": 0.05523816730354508, - "grad_norm": 3.3416101932525635, - "learning_rate": 1.799865836184229e-05, - "loss": 0.8563, + "epoch": 0.051390355508623725, + "grad_norm": 3.5716562271118164, + "learning_rate": 1.3983300328432003e-05, + "loss": 0.7984, "step": 730 }, { - "epoch": 0.0553138360258787, - "grad_norm": 3.1914992332458496, - "learning_rate": 1.7998619192983812e-05, - "loss": 0.7991, + "epoch": 0.05146075325589581, + "grad_norm": 2.6651852130889893, + "learning_rate": 1.3983189961892826e-05, + "loss": 0.8903, "step": 731 }, { - "epoch": 0.055389504748212325, - "grad_norm": 3.1963469982147217, - "learning_rate": 1.79985794606027e-05, - "loss": 0.9286, + "epoch": 0.0515311510031679, + "grad_norm": 3.229210376739502, + "learning_rate": 1.3983079232291279e-05, + "loss": 0.8723, "step": 732 }, { - "epoch": 0.05546517347054595, - "grad_norm": 3.363598346710205, - "learning_rate": 1.7998539164701437e-05, - "loss": 0.8608, + "epoch": 0.051601548750439985, + "grad_norm": 3.8051159381866455, + "learning_rate": 1.3982968139633116e-05, + "loss": 0.754, "step": 733 }, { - "epoch": 0.05554084219287957, - "grad_norm": 3.9688327312469482, - "learning_rate": 1.7998498305282548e-05, - "loss": 0.947, + "epoch": 0.051671946497712076, + "grad_norm": 2.803358793258667, + "learning_rate": 1.3982856683924116e-05, + "loss": 0.8331, "step": 734 }, { - "epoch": 0.0556165109152132, - "grad_norm": 3.629190683364868, - "learning_rate": 1.7998456882348587e-05, - "loss": 0.7682, + "epoch": 0.05174234424498416, + "grad_norm": 2.683288097381592, + "learning_rate": 1.3982744865170075e-05, + "loss": 0.8174, "step": 735 }, { - "epoch": 0.05569217963754682, - "grad_norm": 2.6202425956726074, - "learning_rate": 1.7998414895902153e-05, - "loss": 0.8611, + "epoch": 0.051812741992256245, + "grad_norm": 2.5379276275634766, + "learning_rate": 1.3982632683376805e-05, + "loss": 0.9519, "step": 736 }, { - "epoch": 0.05576784835988045, - "grad_norm": 3.04758882522583, - "learning_rate": 1.7998372345945874e-05, - "loss": 1.0072, + "epoch": 0.051883139739528336, + "grad_norm": 2.5294148921966553, + "learning_rate": 1.3982520138550136e-05, + "loss": 0.811, "step": 737 }, { - "epoch": 0.055843517082214064, - "grad_norm": 3.110172748565674, - "learning_rate": 1.7998329232482415e-05, - "loss": 0.7794, + "epoch": 0.05195353748680042, + "grad_norm": 3.005319595336914, + "learning_rate": 1.3982407230695923e-05, + "loss": 0.7679, "step": 738 }, { - "epoch": 0.05591918580454769, - "grad_norm": 3.5827243328094482, - "learning_rate": 1.7998285555514472e-05, - "loss": 0.8902, + "epoch": 0.05202393523407251, + "grad_norm": 3.278287649154663, + "learning_rate": 1.398229395982003e-05, + "loss": 0.7206, "step": 739 }, { - "epoch": 0.05599485452688131, - "grad_norm": 3.689215898513794, - "learning_rate": 1.799824131504479e-05, - "loss": 0.9457, + "epoch": 0.052094332981344596, + "grad_norm": 2.724275588989258, + "learning_rate": 1.3982180325928356e-05, + "loss": 0.7438, "step": 740 }, { - "epoch": 0.05607052324921494, - "grad_norm": 3.847498893737793, - "learning_rate": 1.799819651107613e-05, - "loss": 0.9951, + "epoch": 0.05216473072861669, + "grad_norm": 2.333674907684326, + "learning_rate": 1.39820663290268e-05, + "loss": 0.7723, "step": 741 }, { - "epoch": 0.05614619197154856, - "grad_norm": 3.818758249282837, - "learning_rate": 1.7998151143611298e-05, - "loss": 0.8568, + "epoch": 0.05223512847588877, + "grad_norm": 2.563398599624634, + "learning_rate": 1.3981951969121293e-05, + "loss": 0.8379, "step": 742 }, { - "epoch": 0.056221860693882186, - "grad_norm": 4.948990821838379, - "learning_rate": 1.799810521265314e-05, - "loss": 0.7821, + "epoch": 0.052305526223160856, + "grad_norm": 2.8131086826324463, + "learning_rate": 1.398183724621778e-05, + "loss": 0.762, "step": 743 }, { - "epoch": 0.056297529416215804, - "grad_norm": 2.994140625, - "learning_rate": 1.799805871820453e-05, - "loss": 0.8261, + "epoch": 0.05237592397043295, + "grad_norm": 3.1646552085876465, + "learning_rate": 1.3981722160322228e-05, + "loss": 0.7743, "step": 744 }, { - "epoch": 0.05637319813854943, - "grad_norm": 3.428760528564453, - "learning_rate": 1.799801166026838e-05, - "loss": 0.9666, + "epoch": 0.05244632171770503, + "grad_norm": 2.68544864654541, + "learning_rate": 1.3981606711440614e-05, + "loss": 0.7779, "step": 745 }, { - "epoch": 0.05644886686088305, - "grad_norm": 3.410270929336548, - "learning_rate": 1.7997964038847636e-05, - "loss": 0.7529, + "epoch": 0.05251671946497712, + "grad_norm": 8.268613815307617, + "learning_rate": 1.3981490899578947e-05, + "loss": 0.7663, "step": 746 }, { - "epoch": 0.05652453558321668, - "grad_norm": 2.595470428466797, - "learning_rate": 1.7997915853945282e-05, - "loss": 0.9564, + "epoch": 0.05258711721224921, + "grad_norm": 2.4277842044830322, + "learning_rate": 1.3981374724743243e-05, + "loss": 0.8292, "step": 747 }, { - "epoch": 0.0566002043055503, - "grad_norm": 2.552440881729126, - "learning_rate": 1.7997867105564336e-05, - "loss": 0.925, + "epoch": 0.0526575149595213, + "grad_norm": 2.7284724712371826, + "learning_rate": 1.3981258186939546e-05, + "loss": 0.8414, "step": 748 }, { - "epoch": 0.056675873027883926, - "grad_norm": 3.9681804180145264, - "learning_rate": 1.7997817793707845e-05, - "loss": 1.0332, + "epoch": 0.05272791270679338, + "grad_norm": 3.6003355979919434, + "learning_rate": 1.3981141286173913e-05, + "loss": 0.786, "step": 749 }, { - "epoch": 0.05675154175021755, - "grad_norm": 2.687912940979004, - "learning_rate": 1.7997767918378904e-05, - "loss": 0.8711, + "epoch": 0.05279831045406547, + "grad_norm": 1.644735336303711, + "learning_rate": 1.3981024022452419e-05, + "loss": 0.9029, "step": 750 }, { - "epoch": 0.05682721047255117, - "grad_norm": 3.232062578201294, - "learning_rate": 1.799771747958063e-05, - "loss": 0.6525, + "epoch": 0.05286870820133756, + "grad_norm": 3.3569185733795166, + "learning_rate": 1.3980906395781168e-05, + "loss": 0.9093, "step": 751 }, { - "epoch": 0.05690287919488479, - "grad_norm": 3.3690457344055176, - "learning_rate": 1.7997666477316194e-05, - "loss": 0.9147, + "epoch": 0.05293910594860964, + "grad_norm": 3.2259087562561035, + "learning_rate": 1.398078840616627e-05, + "loss": 0.7531, "step": 752 }, { - "epoch": 0.05697854791721842, - "grad_norm": 3.5086419582366943, - "learning_rate": 1.7997614911588774e-05, - "loss": 0.9292, + "epoch": 0.053009503695881734, + "grad_norm": 2.7807788848876953, + "learning_rate": 1.398067005361386e-05, + "loss": 0.9643, "step": 753 }, { - "epoch": 0.05705421663955204, - "grad_norm": 2.7476987838745117, - "learning_rate": 1.7997562782401604e-05, - "loss": 0.7515, + "epoch": 0.05307990144315382, + "grad_norm": 3.459033250808716, + "learning_rate": 1.3980551338130091e-05, + "loss": 0.7065, "step": 754 }, { - "epoch": 0.057129885361885666, - "grad_norm": 2.3388469219207764, - "learning_rate": 1.7997510089757956e-05, - "loss": 1.0614, + "epoch": 0.05315029919042591, + "grad_norm": 3.129061698913574, + "learning_rate": 1.3980432259721137e-05, + "loss": 0.8084, "step": 755 }, { - "epoch": 0.05720555408421929, - "grad_norm": 3.508303165435791, - "learning_rate": 1.7997456833661124e-05, - "loss": 0.7057, + "epoch": 0.053220696937697994, + "grad_norm": 2.747666358947754, + "learning_rate": 1.3980312818393188e-05, + "loss": 0.8485, "step": 756 }, { - "epoch": 0.057281222806552914, - "grad_norm": 4.021640300750732, - "learning_rate": 1.7997403014114445e-05, - "loss": 1.0216, + "epoch": 0.05329109468497008, + "grad_norm": 2.5373542308807373, + "learning_rate": 1.3980193014152455e-05, + "loss": 0.9374, "step": 757 }, { - "epoch": 0.05735689152888653, - "grad_norm": 5.258941173553467, - "learning_rate": 1.7997348631121287e-05, - "loss": 0.8469, + "epoch": 0.05336149243224217, + "grad_norm": 2.4282190799713135, + "learning_rate": 1.3980072847005163e-05, + "loss": 0.7267, "step": 758 }, { - "epoch": 0.057432560251220156, - "grad_norm": 3.1040396690368652, - "learning_rate": 1.7997293684685055e-05, - "loss": 0.8839, + "epoch": 0.05343189017951425, + "grad_norm": 2.6626877784729004, + "learning_rate": 1.3979952316957564e-05, + "loss": 0.8298, "step": 759 }, { - "epoch": 0.05750822897355378, - "grad_norm": 3.224198341369629, - "learning_rate": 1.7997238174809194e-05, - "loss": 0.8264, + "epoch": 0.053502287926786345, + "grad_norm": 2.56164288520813, + "learning_rate": 1.3979831424015923e-05, + "loss": 1.0345, "step": 760 }, { - "epoch": 0.057583897695887405, - "grad_norm": 3.097722291946411, - "learning_rate": 1.7997182101497175e-05, - "loss": 0.7879, + "epoch": 0.05357268567405843, + "grad_norm": 2.4681146144866943, + "learning_rate": 1.3979710168186523e-05, + "loss": 0.8936, "step": 761 }, { - "epoch": 0.05765956641822103, - "grad_norm": 3.591596841812134, - "learning_rate": 1.7997125464752517e-05, - "loss": 0.8322, + "epoch": 0.05364308342133052, + "grad_norm": 3.349982738494873, + "learning_rate": 1.3979588549475671e-05, + "loss": 0.959, "step": 762 }, { - "epoch": 0.057735235140554654, - "grad_norm": 3.280409336090088, - "learning_rate": 1.7997068264578757e-05, - "loss": 0.8275, + "epoch": 0.053713481168602605, + "grad_norm": 3.5909135341644287, + "learning_rate": 1.397946656788969e-05, + "loss": 0.9173, "step": 763 }, { - "epoch": 0.05781090386288828, - "grad_norm": 3.701860189437866, - "learning_rate": 1.7997010500979488e-05, - "loss": 0.8116, + "epoch": 0.05378387891587469, + "grad_norm": 2.757384777069092, + "learning_rate": 1.397934422343492e-05, + "loss": 0.7083, "step": 764 }, { - "epoch": 0.057886572585221896, - "grad_norm": 3.2338805198669434, - "learning_rate": 1.7996952173958317e-05, - "loss": 0.8088, + "epoch": 0.05385427666314678, + "grad_norm": 2.7169158458709717, + "learning_rate": 1.3979221516117723e-05, + "loss": 0.7742, "step": 765 }, { - "epoch": 0.05796224130755552, - "grad_norm": 3.3278093338012695, - "learning_rate": 1.79968932835189e-05, - "loss": 0.7139, + "epoch": 0.053924674410418864, + "grad_norm": 2.4077370166778564, + "learning_rate": 1.397909844594448e-05, + "loss": 0.7265, "step": 766 }, { - "epoch": 0.058037910029889145, - "grad_norm": 2.84871768951416, - "learning_rate": 1.799683382966493e-05, - "loss": 0.8951, + "epoch": 0.053995072157690956, + "grad_norm": 2.8874993324279785, + "learning_rate": 1.3978975012921587e-05, + "loss": 0.8733, "step": 767 }, { - "epoch": 0.05811357875222277, - "grad_norm": 3.250761032104492, - "learning_rate": 1.7996773812400124e-05, - "loss": 0.834, + "epoch": 0.05406546990496304, + "grad_norm": 3.2958602905273438, + "learning_rate": 1.3978851217055463e-05, + "loss": 0.8807, "step": 768 }, { - "epoch": 0.058189247474556394, - "grad_norm": 3.869211435317993, - "learning_rate": 1.7996713231728244e-05, - "loss": 0.9022, + "epoch": 0.05413586765223513, + "grad_norm": 3.138025999069214, + "learning_rate": 1.3978727058352545e-05, + "loss": 0.7914, "step": 769 }, { - "epoch": 0.05826491619689002, - "grad_norm": 3.068364143371582, - "learning_rate": 1.7996652087653082e-05, - "loss": 0.882, + "epoch": 0.054206265399507216, + "grad_norm": 3.454031229019165, + "learning_rate": 1.3978602536819286e-05, + "loss": 0.9262, "step": 770 }, { - "epoch": 0.058340584919223636, - "grad_norm": 3.9008500576019287, - "learning_rate": 1.7996590380178466e-05, - "loss": 0.956, + "epoch": 0.0542766631467793, + "grad_norm": 3.2560415267944336, + "learning_rate": 1.3978477652462158e-05, + "loss": 0.8609, "step": 771 }, { - "epoch": 0.05841625364155726, - "grad_norm": 5.665666580200195, - "learning_rate": 1.7996528109308266e-05, - "loss": 0.8128, + "epoch": 0.05434706089405139, + "grad_norm": 2.585158348083496, + "learning_rate": 1.3978352405287659e-05, + "loss": 0.8444, "step": 772 }, { - "epoch": 0.058491922363890884, - "grad_norm": 3.024960517883301, - "learning_rate": 1.7996465275046374e-05, - "loss": 0.9174, + "epoch": 0.054417458641323475, + "grad_norm": 2.9122416973114014, + "learning_rate": 1.39782267953023e-05, + "loss": 0.7026, "step": 773 }, { - "epoch": 0.05856759108622451, - "grad_norm": 3.01311993598938, - "learning_rate": 1.7996401877396733e-05, - "loss": 0.8168, + "epoch": 0.05448785638859557, + "grad_norm": 2.9291064739227295, + "learning_rate": 1.3978100822512608e-05, + "loss": 0.7923, "step": 774 }, { - "epoch": 0.05864325980855813, - "grad_norm": 3.073803186416626, - "learning_rate": 1.7996337916363302e-05, - "loss": 0.7588, + "epoch": 0.05455825413586765, + "grad_norm": 3.025038242340088, + "learning_rate": 1.3977974486925136e-05, + "loss": 0.8966, "step": 775 }, { - "epoch": 0.05871892853089176, - "grad_norm": 3.6292426586151123, - "learning_rate": 1.7996273391950095e-05, - "loss": 1.1097, + "epoch": 0.05462865188313974, + "grad_norm": 3.147413730621338, + "learning_rate": 1.3977847788546451e-05, + "loss": 0.8772, "step": 776 }, { - "epoch": 0.05879459725322538, - "grad_norm": 3.8415868282318115, - "learning_rate": 1.7996208304161153e-05, - "loss": 0.9531, + "epoch": 0.054699049630411826, + "grad_norm": 3.3828623294830322, + "learning_rate": 1.397772072738314e-05, + "loss": 0.8226, "step": 777 }, { - "epoch": 0.058870265975559, - "grad_norm": 2.900418996810913, - "learning_rate": 1.799614265300055e-05, - "loss": 0.8801, + "epoch": 0.05476944737768391, + "grad_norm": 2.7313575744628906, + "learning_rate": 1.3977593303441807e-05, + "loss": 0.8902, "step": 778 }, { - "epoch": 0.058945934697892624, - "grad_norm": 3.2337486743927, - "learning_rate": 1.7996076438472395e-05, - "loss": 0.92, + "epoch": 0.054839845124956, + "grad_norm": 3.092282295227051, + "learning_rate": 1.3977465516729079e-05, + "loss": 0.753, "step": 779 }, { - "epoch": 0.05902160342022625, - "grad_norm": 2.9472317695617676, - "learning_rate": 1.7996009660580836e-05, - "loss": 0.8633, + "epoch": 0.054910242872228086, + "grad_norm": 2.471165657043457, + "learning_rate": 1.39773373672516e-05, + "loss": 0.7202, "step": 780 }, { - "epoch": 0.05909727214255987, - "grad_norm": 2.4730706214904785, - "learning_rate": 1.7995942319330056e-05, - "loss": 0.8554, + "epoch": 0.05498064061950018, + "grad_norm": 3.506514310836792, + "learning_rate": 1.3977208855016035e-05, + "loss": 0.9916, "step": 781 }, { - "epoch": 0.0591729408648935, - "grad_norm": 5.070908546447754, - "learning_rate": 1.7995874414724272e-05, - "loss": 0.7889, + "epoch": 0.05505103836677226, + "grad_norm": 2.498566150665283, + "learning_rate": 1.397707998002906e-05, + "loss": 0.718, "step": 782 }, { - "epoch": 0.05924860958722712, - "grad_norm": 3.5135512351989746, - "learning_rate": 1.7995805946767734e-05, - "loss": 0.802, + "epoch": 0.05512143611404435, + "grad_norm": 2.9316225051879883, + "learning_rate": 1.397695074229738e-05, + "loss": 0.6682, "step": 783 }, { - "epoch": 0.059324278309560746, - "grad_norm": 3.356902599334717, - "learning_rate": 1.7995736915464735e-05, - "loss": 0.8238, + "epoch": 0.05519183386131644, + "grad_norm": 2.7276298999786377, + "learning_rate": 1.3976821141827709e-05, + "loss": 0.9103, "step": 784 }, { - "epoch": 0.059399947031894364, - "grad_norm": 3.2595343589782715, - "learning_rate": 1.7995667320819595e-05, - "loss": 0.8915, + "epoch": 0.05526223160858852, + "grad_norm": 3.407465934753418, + "learning_rate": 1.397669117862679e-05, + "loss": 0.6751, "step": 785 }, { - "epoch": 0.05947561575422799, - "grad_norm": 2.725177526473999, - "learning_rate": 1.799559716283667e-05, - "loss": 0.7803, + "epoch": 0.05533262935586061, + "grad_norm": 2.7540299892425537, + "learning_rate": 1.3976560852701376e-05, + "loss": 0.8264, "step": 786 }, { - "epoch": 0.05955128447656161, - "grad_norm": 3.298215389251709, - "learning_rate": 1.7995526441520354e-05, - "loss": 0.9538, + "epoch": 0.0554030271031327, + "grad_norm": 2.8935627937316895, + "learning_rate": 1.397643016405825e-05, + "loss": 0.7803, "step": 787 }, { - "epoch": 0.05962695319889524, - "grad_norm": 4.367799758911133, - "learning_rate": 1.7995455156875077e-05, - "loss": 0.9063, + "epoch": 0.05547342485040479, + "grad_norm": 2.9142093658447266, + "learning_rate": 1.3976299112704197e-05, + "loss": 0.809, "step": 788 }, { - "epoch": 0.05970262192122886, - "grad_norm": 2.9157984256744385, - "learning_rate": 1.7995383308905307e-05, - "loss": 0.8681, + "epoch": 0.05554382259767687, + "grad_norm": 2.8878235816955566, + "learning_rate": 1.3976167698646038e-05, + "loss": 0.8731, "step": 789 }, { - "epoch": 0.059778290643562486, - "grad_norm": 3.340041399002075, - "learning_rate": 1.7995310897615537e-05, - "loss": 0.9215, + "epoch": 0.055614220344948964, + "grad_norm": 2.626134157180786, + "learning_rate": 1.3976035921890599e-05, + "loss": 0.8549, "step": 790 }, { - "epoch": 0.05985395936589611, - "grad_norm": 3.1033027172088623, - "learning_rate": 1.7995237923010306e-05, - "loss": 0.8081, + "epoch": 0.05568461809222105, + "grad_norm": 2.4057083129882812, + "learning_rate": 1.3975903782444738e-05, + "loss": 0.8847, "step": 791 }, { - "epoch": 0.05992962808822973, - "grad_norm": 3.03116774559021, - "learning_rate": 1.799516438509418e-05, - "loss": 0.9414, + "epoch": 0.05575501583949313, + "grad_norm": 2.7630984783172607, + "learning_rate": 1.397577128031532e-05, + "loss": 0.8382, "step": 792 }, { - "epoch": 0.06000529681056335, - "grad_norm": 3.3425679206848145, - "learning_rate": 1.7995090283871765e-05, - "loss": 0.8291, + "epoch": 0.055825413586765224, + "grad_norm": 2.34981107711792, + "learning_rate": 1.3975638415509235e-05, + "loss": 0.7721, "step": 793 }, { - "epoch": 0.06008096553289698, - "grad_norm": 2.8552069664001465, - "learning_rate": 1.7995015619347707e-05, - "loss": 0.9352, + "epoch": 0.05589581133403731, + "grad_norm": 2.826700210571289, + "learning_rate": 1.3975505188033392e-05, + "loss": 0.8582, "step": 794 }, { - "epoch": 0.0601566342552306, - "grad_norm": 3.53585147857666, - "learning_rate": 1.7994940391526674e-05, - "loss": 0.8699, + "epoch": 0.0559662090813094, + "grad_norm": 2.648467540740967, + "learning_rate": 1.3975371597894717e-05, + "loss": 0.9139, "step": 795 }, { - "epoch": 0.060232302977564225, - "grad_norm": 2.6548848152160645, - "learning_rate": 1.7994864600413383e-05, - "loss": 0.806, + "epoch": 0.056036606828581484, + "grad_norm": 3.2171995639801025, + "learning_rate": 1.3975237645100155e-05, + "loss": 0.8077, "step": 796 }, { - "epoch": 0.06030797169989785, - "grad_norm": 2.734811782836914, - "learning_rate": 1.7994788246012578e-05, - "loss": 0.73, + "epoch": 0.056107004575853575, + "grad_norm": 2.8273582458496094, + "learning_rate": 1.397510332965667e-05, + "loss": 0.8328, "step": 797 }, { - "epoch": 0.06038364042223147, - "grad_norm": 3.6536951065063477, - "learning_rate": 1.7994711328329038e-05, - "loss": 0.7225, + "epoch": 0.05617740232312566, + "grad_norm": 3.3063392639160156, + "learning_rate": 1.3974968651571248e-05, + "loss": 0.7679, "step": 798 }, { - "epoch": 0.06045930914456509, - "grad_norm": 2.3973493576049805, - "learning_rate": 1.7994633847367582e-05, - "loss": 0.661, + "epoch": 0.05624780007039775, + "grad_norm": 2.952239751815796, + "learning_rate": 1.3974833610850887e-05, + "loss": 0.6983, "step": 799 }, { - "epoch": 0.060534977866898716, - "grad_norm": 4.086350917816162, - "learning_rate": 1.7994555803133065e-05, - "loss": 0.8949, + "epoch": 0.056318197817669835, + "grad_norm": 2.5223381519317627, + "learning_rate": 1.3974698207502611e-05, + "loss": 0.8215, "step": 800 }, { - "epoch": 0.06061064658923234, - "grad_norm": 2.1674516201019287, - "learning_rate": 1.799447719563037e-05, - "loss": 1.0709, + "epoch": 0.05638859556494192, + "grad_norm": 2.665891408920288, + "learning_rate": 1.3974562441533458e-05, + "loss": 0.7619, "step": 801 }, { - "epoch": 0.060686315311565965, - "grad_norm": 3.184936761856079, - "learning_rate": 1.799439802486442e-05, - "loss": 0.7324, + "epoch": 0.05645899331221401, + "grad_norm": 3.7186219692230225, + "learning_rate": 1.3974426312950485e-05, + "loss": 0.8978, "step": 802 }, { - "epoch": 0.06076198403389959, - "grad_norm": 2.968808889389038, - "learning_rate": 1.7994318290840178e-05, - "loss": 0.84, + "epoch": 0.056529391059486095, + "grad_norm": 2.9490206241607666, + "learning_rate": 1.3974289821760775e-05, + "loss": 0.8617, "step": 803 }, { - "epoch": 0.060837652756233214, - "grad_norm": 3.6430764198303223, - "learning_rate": 1.799423799356263e-05, - "loss": 0.8638, + "epoch": 0.056599788806758186, + "grad_norm": 2.8732612133026123, + "learning_rate": 1.3974152967971418e-05, + "loss": 0.8459, "step": 804 }, { - "epoch": 0.06091332147856683, - "grad_norm": 2.8016927242279053, - "learning_rate": 1.799415713303681e-05, - "loss": 0.7604, + "epoch": 0.05667018655403027, + "grad_norm": 2.5365946292877197, + "learning_rate": 1.3974015751589534e-05, + "loss": 0.7412, "step": 805 }, { - "epoch": 0.060988990200900456, - "grad_norm": 7.259315013885498, - "learning_rate": 1.799407570926778e-05, - "loss": 0.9089, + "epoch": 0.05674058430130236, + "grad_norm": 3.166045665740967, + "learning_rate": 1.3973878172622255e-05, + "loss": 0.8991, "step": 806 }, { - "epoch": 0.06106465892323408, - "grad_norm": 4.342022895812988, - "learning_rate": 1.7993993722260635e-05, - "loss": 0.7734, + "epoch": 0.056810982048574446, + "grad_norm": 2.456942319869995, + "learning_rate": 1.3973740231076735e-05, + "loss": 0.7869, "step": 807 }, { - "epoch": 0.061140327645567705, - "grad_norm": 4.369460582733154, - "learning_rate": 1.7993911172020517e-05, - "loss": 0.8225, + "epoch": 0.05688137979584653, + "grad_norm": 2.7324979305267334, + "learning_rate": 1.397360192696014e-05, + "loss": 0.7298, "step": 808 }, { - "epoch": 0.06121599636790133, - "grad_norm": 3.1216466426849365, - "learning_rate": 1.7993828058552593e-05, - "loss": 1.0397, + "epoch": 0.05695177754311862, + "grad_norm": 2.9507203102111816, + "learning_rate": 1.397346326027967e-05, + "loss": 0.8127, "step": 809 }, { - "epoch": 0.061291665090234954, - "grad_norm": 3.13508677482605, - "learning_rate": 1.799374438186206e-05, - "loss": 0.9094, + "epoch": 0.057022175290390706, + "grad_norm": 2.5356690883636475, + "learning_rate": 1.3973324231042529e-05, + "loss": 0.8101, "step": 810 }, { - "epoch": 0.06136733381256858, - "grad_norm": 11.32715892791748, - "learning_rate": 1.799366014195417e-05, - "loss": 0.9524, + "epoch": 0.0570925730376628, + "grad_norm": 2.731660842895508, + "learning_rate": 1.3973184839255945e-05, + "loss": 0.8733, "step": 811 }, { - "epoch": 0.061443002534902195, - "grad_norm": 2.600041389465332, - "learning_rate": 1.799357533883419e-05, - "loss": 0.7271, + "epoch": 0.05716297078493488, + "grad_norm": 2.4052608013153076, + "learning_rate": 1.3973045084927163e-05, + "loss": 0.8011, "step": 812 }, { - "epoch": 0.06151867125723582, - "grad_norm": 3.431683301925659, - "learning_rate": 1.7993489972507434e-05, - "loss": 0.8767, + "epoch": 0.05723336853220697, + "grad_norm": 2.607543468475342, + "learning_rate": 1.3972904968063455e-05, + "loss": 0.8972, "step": 813 }, { - "epoch": 0.061594339979569444, - "grad_norm": 3.009431838989258, - "learning_rate": 1.799340404297925e-05, - "loss": 0.8367, + "epoch": 0.05730376627947906, + "grad_norm": 2.789609909057617, + "learning_rate": 1.3972764488672102e-05, + "loss": 0.8017, "step": 814 }, { - "epoch": 0.06167000870190307, - "grad_norm": 3.2158117294311523, - "learning_rate": 1.7993317550255014e-05, - "loss": 0.8516, + "epoch": 0.05737416402675114, + "grad_norm": 2.291703224182129, + "learning_rate": 1.397262364676041e-05, + "loss": 0.8318, "step": 815 }, { - "epoch": 0.06174567742423669, - "grad_norm": 3.753148317337036, - "learning_rate": 1.7993230494340145e-05, - "loss": 0.8619, + "epoch": 0.05744456177402323, + "grad_norm": 2.660813570022583, + "learning_rate": 1.3972482442335697e-05, + "loss": 0.7756, "step": 816 }, { - "epoch": 0.06182134614657032, - "grad_norm": 3.217808485031128, - "learning_rate": 1.7993142875240097e-05, - "loss": 0.7954, + "epoch": 0.05751495952129532, + "grad_norm": 2.9769861698150635, + "learning_rate": 1.3972340875405308e-05, + "loss": 0.798, "step": 817 }, { - "epoch": 0.061897014868903935, - "grad_norm": 6.548532009124756, - "learning_rate": 1.7993054692960354e-05, - "loss": 0.8667, + "epoch": 0.05758535726856741, + "grad_norm": 2.650627374649048, + "learning_rate": 1.3972198945976603e-05, + "loss": 0.7363, "step": 818 }, { - "epoch": 0.06197268359123756, - "grad_norm": 3.1087334156036377, - "learning_rate": 1.7992965947506437e-05, - "loss": 0.9301, + "epoch": 0.05765575501583949, + "grad_norm": 2.342474937438965, + "learning_rate": 1.3972056654056959e-05, + "loss": 0.7422, "step": 819 }, { - "epoch": 0.062048352313571184, - "grad_norm": 4.297720432281494, - "learning_rate": 1.7992876638883907e-05, - "loss": 0.753, + "epoch": 0.057726152763111584, + "grad_norm": 3.2747371196746826, + "learning_rate": 1.3971913999653776e-05, + "loss": 0.7691, "step": 820 }, { - "epoch": 0.06212402103590481, - "grad_norm": 3.4362447261810303, - "learning_rate": 1.7992786767098353e-05, - "loss": 0.7636, + "epoch": 0.05779655051038367, + "grad_norm": 2.545027732849121, + "learning_rate": 1.3971770982774472e-05, + "loss": 0.8188, "step": 821 }, { - "epoch": 0.06219968975823843, - "grad_norm": 2.7632527351379395, - "learning_rate": 1.799269633215541e-05, - "loss": 0.8575, + "epoch": 0.05786694825765575, + "grad_norm": 2.148822546005249, + "learning_rate": 1.3971627603426477e-05, + "loss": 0.8305, "step": 822 }, { - "epoch": 0.06227535848057206, - "grad_norm": 3.267557144165039, - "learning_rate": 1.7992605334060736e-05, - "loss": 0.7376, + "epoch": 0.057937346004927844, + "grad_norm": 4.2569661140441895, + "learning_rate": 1.3971483861617252e-05, + "loss": 0.6319, "step": 823 }, { - "epoch": 0.06235102720290568, - "grad_norm": 3.381315231323242, - "learning_rate": 1.7992513772820027e-05, - "loss": 0.9032, + "epoch": 0.05800774375219993, + "grad_norm": 2.091796875, + "learning_rate": 1.3971339757354267e-05, + "loss": 0.8086, "step": 824 }, { - "epoch": 0.0624266959252393, - "grad_norm": 3.6174585819244385, - "learning_rate": 1.7992421648439024e-05, - "loss": 0.7052, + "epoch": 0.05807814149947202, + "grad_norm": 3.198848247528076, + "learning_rate": 1.3971195290645013e-05, + "loss": 0.8679, "step": 825 }, { - "epoch": 0.06250236464757293, - "grad_norm": 3.082953929901123, - "learning_rate": 1.799232896092349e-05, - "loss": 0.9377, + "epoch": 0.0581485392467441, + "grad_norm": 2.404658555984497, + "learning_rate": 1.3971050461497003e-05, + "loss": 0.7893, "step": 826 }, { - "epoch": 0.06257803336990655, - "grad_norm": 5.397732734680176, - "learning_rate": 1.7992235710279233e-05, - "loss": 0.8913, + "epoch": 0.058218936994016195, + "grad_norm": 2.7750935554504395, + "learning_rate": 1.3970905269917766e-05, + "loss": 0.7891, "step": 827 }, { - "epoch": 0.06265370209224018, - "grad_norm": 3.0445351600646973, - "learning_rate": 1.799214189651209e-05, - "loss": 0.9004, + "epoch": 0.05828933474128828, + "grad_norm": 5.766189098358154, + "learning_rate": 1.3970759715914852e-05, + "loss": 0.8984, "step": 828 }, { - "epoch": 0.06272937081457379, - "grad_norm": 3.1507112979888916, - "learning_rate": 1.799204751962794e-05, - "loss": 1.0471, + "epoch": 0.05835973248856036, + "grad_norm": 2.7002551555633545, + "learning_rate": 1.3970613799495825e-05, + "loss": 0.7683, "step": 829 }, { - "epoch": 0.06280503953690741, - "grad_norm": 4.134524822235107, - "learning_rate": 1.7991952579632688e-05, - "loss": 0.8125, + "epoch": 0.058430130235832455, + "grad_norm": 2.4011354446411133, + "learning_rate": 1.3970467520668275e-05, + "loss": 0.7126, "step": 830 }, { - "epoch": 0.06288070825924104, - "grad_norm": 2.9399423599243164, - "learning_rate": 1.799185707653228e-05, - "loss": 0.7965, + "epoch": 0.05850052798310454, + "grad_norm": 2.474435567855835, + "learning_rate": 1.3970320879439807e-05, + "loss": 0.7117, "step": 831 }, { - "epoch": 0.06295637698157466, - "grad_norm": 4.048933506011963, - "learning_rate": 1.7991761010332704e-05, - "loss": 0.8824, + "epoch": 0.05857092573037663, + "grad_norm": 2.8345251083374023, + "learning_rate": 1.397017387581804e-05, + "loss": 0.6874, "step": 832 }, { - "epoch": 0.06303204570390829, - "grad_norm": 2.8442611694335938, - "learning_rate": 1.7991664381039968e-05, - "loss": 0.6825, + "epoch": 0.058641323477648714, + "grad_norm": 2.2286150455474854, + "learning_rate": 1.3970026509810625e-05, + "loss": 0.8337, "step": 833 }, { - "epoch": 0.06310771442624191, - "grad_norm": 3.6340487003326416, - "learning_rate": 1.7991567188660125e-05, - "loss": 0.8944, + "epoch": 0.058711721224920806, + "grad_norm": 2.337022066116333, + "learning_rate": 1.3969878781425217e-05, + "loss": 0.8015, "step": 834 }, { - "epoch": 0.06318338314857554, - "grad_norm": 3.6592376232147217, - "learning_rate": 1.7991469433199264e-05, - "loss": 0.8148, + "epoch": 0.05878211897219289, + "grad_norm": 2.785576343536377, + "learning_rate": 1.3969730690669501e-05, + "loss": 0.8072, "step": 835 }, { - "epoch": 0.06325905187090916, - "grad_norm": 3.6150155067443848, - "learning_rate": 1.7991371114663503e-05, - "loss": 0.6471, + "epoch": 0.058852516719464974, + "grad_norm": 2.2109322547912598, + "learning_rate": 1.3969582237551171e-05, + "loss": 0.7654, "step": 836 }, { - "epoch": 0.06333472059324279, - "grad_norm": 3.5820491313934326, - "learning_rate": 1.7991272233059003e-05, - "loss": 0.9492, + "epoch": 0.058922914466737066, + "grad_norm": 2.5643649101257324, + "learning_rate": 1.396943342207795e-05, + "loss": 0.7154, "step": 837 }, { - "epoch": 0.06341038931557641, - "grad_norm": 3.483809471130371, - "learning_rate": 1.7991172788391953e-05, - "loss": 0.7662, + "epoch": 0.05899331221400915, + "grad_norm": 2.2703800201416016, + "learning_rate": 1.3969284244257575e-05, + "loss": 0.7333, "step": 838 }, { - "epoch": 0.06348605803791003, - "grad_norm": 3.0306272506713867, - "learning_rate": 1.7991072780668585e-05, - "loss": 0.9009, + "epoch": 0.05906370996128124, + "grad_norm": 2.5668768882751465, + "learning_rate": 1.3969134704097797e-05, + "loss": 0.8526, "step": 839 }, { - "epoch": 0.06356172676024366, - "grad_norm": 3.402259588241577, - "learning_rate": 1.7990972209895155e-05, - "loss": 0.7558, + "epoch": 0.059134107708553325, + "grad_norm": 4.667150497436523, + "learning_rate": 1.3968984801606395e-05, + "loss": 0.8812, "step": 840 }, { - "epoch": 0.06363739548257728, - "grad_norm": 2.673753023147583, - "learning_rate": 1.7990871076077967e-05, - "loss": 0.7811, + "epoch": 0.05920450545582542, + "grad_norm": 2.428640365600586, + "learning_rate": 1.3968834536791164e-05, + "loss": 0.7127, "step": 841 }, { - "epoch": 0.0637130642049109, - "grad_norm": 4.777785778045654, - "learning_rate": 1.799076937922335e-05, - "loss": 0.8718, + "epoch": 0.0592749032030975, + "grad_norm": 2.8358137607574463, + "learning_rate": 1.3968683909659913e-05, + "loss": 0.8006, "step": 842 }, { - "epoch": 0.06378873292724452, - "grad_norm": 3.1430823802948, - "learning_rate": 1.799066711933768e-05, - "loss": 0.7224, + "epoch": 0.059345300950369585, + "grad_norm": 2.904513359069824, + "learning_rate": 1.3968532920220474e-05, + "loss": 0.7269, "step": 843 }, { - "epoch": 0.06386440164957814, - "grad_norm": 3.542694568634033, - "learning_rate": 1.799056429642735e-05, - "loss": 0.7901, + "epoch": 0.059415698697641676, + "grad_norm": 2.365891933441162, + "learning_rate": 1.3968381568480697e-05, + "loss": 0.9274, "step": 844 }, { - "epoch": 0.06394007037191177, - "grad_norm": 3.038499116897583, - "learning_rate": 1.7990460910498806e-05, - "loss": 0.79, + "epoch": 0.05948609644491376, + "grad_norm": 2.6593594551086426, + "learning_rate": 1.3968229854448452e-05, + "loss": 0.6971, "step": 845 }, { - "epoch": 0.06401573909424539, - "grad_norm": 3.5024659633636475, - "learning_rate": 1.7990356961558523e-05, - "loss": 0.9269, + "epoch": 0.05955649419218585, + "grad_norm": 2.6830294132232666, + "learning_rate": 1.3968077778131624e-05, + "loss": 0.8179, "step": 846 }, { - "epoch": 0.06409140781657902, - "grad_norm": 4.1338067054748535, - "learning_rate": 1.7990252449613008e-05, - "loss": 0.9418, + "epoch": 0.059626891939457936, + "grad_norm": 2.8241679668426514, + "learning_rate": 1.3967925339538126e-05, + "loss": 0.8688, "step": 847 }, { - "epoch": 0.06416707653891264, - "grad_norm": 3.374940872192383, - "learning_rate": 1.7990147374668806e-05, - "loss": 0.9184, + "epoch": 0.05969728968673003, + "grad_norm": 3.442892551422119, + "learning_rate": 1.3967772538675875e-05, + "loss": 0.8506, "step": 848 }, { - "epoch": 0.06424274526124626, - "grad_norm": 3.2009170055389404, - "learning_rate": 1.7990041736732497e-05, - "loss": 0.7091, + "epoch": 0.05976768743400211, + "grad_norm": 2.785004138946533, + "learning_rate": 1.396761937555282e-05, + "loss": 0.8459, "step": 849 }, { - "epoch": 0.06431841398357989, - "grad_norm": 4.257187366485596, - "learning_rate": 1.79899355358107e-05, - "loss": 0.958, + "epoch": 0.059838085181274196, + "grad_norm": 2.54880428314209, + "learning_rate": 1.3967465850176926e-05, + "loss": 0.8746, "step": 850 }, { - "epoch": 0.06439408270591351, - "grad_norm": 3.449984550476074, - "learning_rate": 1.798982877191006e-05, - "loss": 0.7824, + "epoch": 0.05990848292854629, + "grad_norm": 2.7987897396087646, + "learning_rate": 1.396731196255617e-05, + "loss": 0.8557, "step": 851 }, { - "epoch": 0.06446975142824714, - "grad_norm": 4.163568019866943, - "learning_rate": 1.798972144503727e-05, - "loss": 0.8174, + "epoch": 0.05997888067581837, + "grad_norm": 3.092935085296631, + "learning_rate": 1.3967157712698556e-05, + "loss": 0.8721, "step": 852 }, { - "epoch": 0.06454542015058076, - "grad_norm": 4.116754531860352, - "learning_rate": 1.7989613555199045e-05, - "loss": 0.69, + "epoch": 0.06004927842309046, + "grad_norm": 2.317570686340332, + "learning_rate": 1.3967003100612103e-05, + "loss": 0.7147, "step": 853 }, { - "epoch": 0.06462108887291439, - "grad_norm": 4.340511322021484, - "learning_rate": 1.798950510240214e-05, - "loss": 0.8673, + "epoch": 0.06011967617036255, + "grad_norm": 2.254296064376831, + "learning_rate": 1.3966848126304848e-05, + "loss": 0.8274, "step": 854 }, { - "epoch": 0.06469675759524801, - "grad_norm": 4.204843521118164, - "learning_rate": 1.798939608665335e-05, - "loss": 0.7685, + "epoch": 0.06019007391763464, + "grad_norm": 2.559051513671875, + "learning_rate": 1.3966692789784851e-05, + "loss": 0.8253, "step": 855 }, { - "epoch": 0.06477242631758162, - "grad_norm": 5.48193359375, - "learning_rate": 1.7989286507959505e-05, - "loss": 0.8221, + "epoch": 0.06026047166490672, + "grad_norm": 3.6009714603424072, + "learning_rate": 1.3966537091060184e-05, + "loss": 0.8067, "step": 856 }, { - "epoch": 0.06484809503991525, - "grad_norm": 3.1936569213867188, - "learning_rate": 1.7989176366327463e-05, - "loss": 0.8692, + "epoch": 0.06033086941217881, + "grad_norm": 2.2450320720672607, + "learning_rate": 1.3966381030138945e-05, + "loss": 0.783, "step": 857 }, { - "epoch": 0.06492376376224887, - "grad_norm": 3.160611867904663, - "learning_rate": 1.7989065661764122e-05, - "loss": 0.7909, + "epoch": 0.0604012671594509, + "grad_norm": 2.143674612045288, + "learning_rate": 1.3966224607029247e-05, + "loss": 0.8417, "step": 858 }, { - "epoch": 0.0649994324845825, - "grad_norm": 2.972747564315796, - "learning_rate": 1.7988954394276416e-05, - "loss": 0.9906, + "epoch": 0.06047166490672298, + "grad_norm": 2.7825140953063965, + "learning_rate": 1.3966067821739225e-05, + "loss": 0.7448, "step": 859 }, { - "epoch": 0.06507510120691612, - "grad_norm": 3.2030298709869385, - "learning_rate": 1.798884256387131e-05, - "loss": 0.8621, + "epoch": 0.060542062653995074, + "grad_norm": 2.6882572174072266, + "learning_rate": 1.3965910674277027e-05, + "loss": 0.8692, "step": 860 }, { - "epoch": 0.06515076992924974, - "grad_norm": 3.12058162689209, - "learning_rate": 1.7988730170555808e-05, - "loss": 0.9119, + "epoch": 0.06061246040126716, + "grad_norm": 2.4077882766723633, + "learning_rate": 1.3965753164650822e-05, + "loss": 0.7442, "step": 861 }, { - "epoch": 0.06522643865158337, - "grad_norm": 3.048793077468872, - "learning_rate": 1.7988617214336953e-05, - "loss": 0.8322, + "epoch": 0.06068285814853925, + "grad_norm": 2.590895652770996, + "learning_rate": 1.3965595292868804e-05, + "loss": 0.7469, "step": 862 }, { - "epoch": 0.06530210737391699, - "grad_norm": 3.0921437740325928, - "learning_rate": 1.7988503695221814e-05, - "loss": 0.8441, + "epoch": 0.060753255895811334, + "grad_norm": 2.3315577507019043, + "learning_rate": 1.3965437058939178e-05, + "loss": 0.8921, "step": 863 }, { - "epoch": 0.06537777609625062, - "grad_norm": 2.826828718185425, - "learning_rate": 1.7988389613217504e-05, - "loss": 0.9022, + "epoch": 0.06082365364308342, + "grad_norm": 2.2158522605895996, + "learning_rate": 1.3965278462870171e-05, + "loss": 0.8957, "step": 864 }, { - "epoch": 0.06545344481858424, - "grad_norm": 2.6267917156219482, - "learning_rate": 1.798827496833116e-05, - "loss": 0.8584, + "epoch": 0.06089405139035551, + "grad_norm": 2.5753707885742188, + "learning_rate": 1.3965119504670028e-05, + "loss": 0.8768, "step": 865 }, { - "epoch": 0.06552911354091787, - "grad_norm": 2.9729437828063965, - "learning_rate": 1.7988159760569968e-05, - "loss": 0.9119, + "epoch": 0.060964449137627594, + "grad_norm": 2.806683301925659, + "learning_rate": 1.3964960184347014e-05, + "loss": 0.714, "step": 866 }, { - "epoch": 0.06560478226325149, - "grad_norm": 2.4244964122772217, - "learning_rate": 1.798804398994114e-05, - "loss": 0.7932, + "epoch": 0.061034846884899685, + "grad_norm": 2.431647539138794, + "learning_rate": 1.3964800501909412e-05, + "loss": 0.7876, "step": 867 }, { - "epoch": 0.06568045098558511, - "grad_norm": 4.761054515838623, - "learning_rate": 1.7987927656451928e-05, - "loss": 0.9412, + "epoch": 0.06110524463217177, + "grad_norm": 2.6119256019592285, + "learning_rate": 1.3964640457365526e-05, + "loss": 0.6665, "step": 868 }, { - "epoch": 0.06575611970791873, - "grad_norm": 2.717557191848755, - "learning_rate": 1.7987810760109615e-05, - "loss": 0.7506, + "epoch": 0.06117564237944386, + "grad_norm": 2.294191598892212, + "learning_rate": 1.3964480050723674e-05, + "loss": 0.8057, "step": 869 }, { - "epoch": 0.06583178843025235, - "grad_norm": 3.001830577850342, - "learning_rate": 1.798769330092152e-05, - "loss": 0.8709, + "epoch": 0.061246040126715945, + "grad_norm": 2.7430543899536133, + "learning_rate": 1.3964319281992194e-05, + "loss": 0.7695, "step": 870 }, { - "epoch": 0.06590745715258597, - "grad_norm": 3.638742208480835, - "learning_rate": 1.7987575278895005e-05, - "loss": 0.7777, + "epoch": 0.06131643787398803, + "grad_norm": 2.3632636070251465, + "learning_rate": 1.3964158151179451e-05, + "loss": 0.7169, "step": 871 }, { - "epoch": 0.0659831258749196, - "grad_norm": 2.8013672828674316, - "learning_rate": 1.798745669403745e-05, - "loss": 0.7571, + "epoch": 0.06138683562126012, + "grad_norm": 2.2400379180908203, + "learning_rate": 1.3963996658293816e-05, + "loss": 0.811, "step": 872 }, { - "epoch": 0.06605879459725322, - "grad_norm": 2.839331865310669, - "learning_rate": 1.7987337546356293e-05, - "loss": 0.7515, + "epoch": 0.061457233368532205, + "grad_norm": 2.3603732585906982, + "learning_rate": 1.396383480334369e-05, + "loss": 0.8874, "step": 873 }, { - "epoch": 0.06613446331958685, - "grad_norm": 3.6502885818481445, - "learning_rate": 1.798721783585899e-05, - "loss": 0.7892, + "epoch": 0.061527631115804296, + "grad_norm": 2.2423322200775146, + "learning_rate": 1.3963672586337485e-05, + "loss": 0.9054, "step": 874 }, { - "epoch": 0.06621013204192047, - "grad_norm": 2.118971347808838, - "learning_rate": 1.7987097562553037e-05, - "loss": 0.9736, + "epoch": 0.06159802886307638, + "grad_norm": 2.2417402267456055, + "learning_rate": 1.3963510007283636e-05, + "loss": 0.7709, "step": 875 }, { - "epoch": 0.0662858007642541, - "grad_norm": 2.8734261989593506, - "learning_rate": 1.7986976726445966e-05, - "loss": 0.783, + "epoch": 0.06166842661034847, + "grad_norm": 2.196552276611328, + "learning_rate": 1.3963347066190593e-05, + "loss": 0.8201, "step": 876 }, { - "epoch": 0.06636146948658772, - "grad_norm": 2.609933376312256, - "learning_rate": 1.7986855327545346e-05, - "loss": 0.7125, + "epoch": 0.061738824357620556, + "grad_norm": 3.877890110015869, + "learning_rate": 1.3963183763066833e-05, + "loss": 0.7808, "step": 877 }, { - "epoch": 0.06643713820892135, - "grad_norm": 3.158010721206665, - "learning_rate": 1.798673336585878e-05, - "loss": 0.7234, + "epoch": 0.06180922210489264, + "grad_norm": 2.4083194732666016, + "learning_rate": 1.3963020097920842e-05, + "loss": 0.7178, "step": 878 }, { - "epoch": 0.06651280693125497, - "grad_norm": 3.257824182510376, - "learning_rate": 1.7986610841393902e-05, - "loss": 0.9167, + "epoch": 0.06187961985216473, + "grad_norm": 2.343318223953247, + "learning_rate": 1.3962856070761128e-05, + "loss": 0.9155, "step": 879 }, { - "epoch": 0.0665884756535886, - "grad_norm": 2.97019100189209, - "learning_rate": 1.7986487754158386e-05, - "loss": 0.7206, + "epoch": 0.061950017599436816, + "grad_norm": 3.645878791809082, + "learning_rate": 1.3962691681596223e-05, + "loss": 0.772, "step": 880 }, { - "epoch": 0.06666414437592222, - "grad_norm": 2.5462703704833984, - "learning_rate": 1.7986364104159942e-05, - "loss": 0.8476, + "epoch": 0.06202041534670891, + "grad_norm": 2.770340919494629, + "learning_rate": 1.396252693043467e-05, + "loss": 0.7787, "step": 881 }, { - "epoch": 0.06673981309825583, - "grad_norm": 3.024618625640869, - "learning_rate": 1.7986239891406314e-05, - "loss": 0.929, + "epoch": 0.06209081309398099, + "grad_norm": 2.5928235054016113, + "learning_rate": 1.3962361817285039e-05, + "loss": 0.7318, "step": 882 }, { - "epoch": 0.06681548182058945, - "grad_norm": 4.000933647155762, - "learning_rate": 1.7986115115905276e-05, - "loss": 0.8126, + "epoch": 0.06216121084125308, + "grad_norm": 3.6422841548919678, + "learning_rate": 1.3962196342155907e-05, + "loss": 0.7569, "step": 883 }, { - "epoch": 0.06689115054292308, - "grad_norm": 2.9372494220733643, - "learning_rate": 1.798598977766465e-05, - "loss": 0.8261, + "epoch": 0.06223160858852517, + "grad_norm": 2.1934292316436768, + "learning_rate": 1.3962030505055887e-05, + "loss": 0.7066, "step": 884 }, { - "epoch": 0.0669668192652567, - "grad_norm": 2.813204526901245, - "learning_rate": 1.7985863876692276e-05, - "loss": 0.8327, + "epoch": 0.06230200633579725, + "grad_norm": 2.57673716545105, + "learning_rate": 1.3961864305993592e-05, + "loss": 0.8142, "step": 885 }, { - "epoch": 0.06704248798759033, - "grad_norm": 3.385720729827881, - "learning_rate": 1.798573741299604e-05, - "loss": 0.7472, + "epoch": 0.06237240408306934, + "grad_norm": 2.441199541091919, + "learning_rate": 1.3961697744977668e-05, + "loss": 0.8148, "step": 886 }, { - "epoch": 0.06711815670992395, - "grad_norm": 2.617894172668457, - "learning_rate": 1.798561038658387e-05, - "loss": 0.781, + "epoch": 0.06244280183034143, + "grad_norm": 2.782151699066162, + "learning_rate": 1.3961530822016773e-05, + "loss": 0.8528, "step": 887 }, { - "epoch": 0.06719382543225758, - "grad_norm": 3.153611660003662, - "learning_rate": 1.798548279746371e-05, - "loss": 0.7154, + "epoch": 0.06251319957761352, + "grad_norm": 2.274099588394165, + "learning_rate": 1.3961363537119586e-05, + "loss": 0.7547, "step": 888 }, { - "epoch": 0.0672694941545912, - "grad_norm": 2.9759254455566406, - "learning_rate": 1.7985354645643556e-05, - "loss": 0.7758, + "epoch": 0.06258359732488561, + "grad_norm": 2.8162405490875244, + "learning_rate": 1.3961195890294807e-05, + "loss": 0.7486, "step": 889 }, { - "epoch": 0.06734516287692482, - "grad_norm": 3.233285665512085, - "learning_rate": 1.798522593113143e-05, - "loss": 0.8326, + "epoch": 0.06265399507215769, + "grad_norm": 2.324718713760376, + "learning_rate": 1.3961027881551145e-05, + "loss": 0.8116, "step": 890 }, { - "epoch": 0.06742083159925845, - "grad_norm": 3.2557930946350098, - "learning_rate": 1.7985096653935396e-05, - "loss": 0.7994, + "epoch": 0.06272439281942978, + "grad_norm": 2.2138397693634033, + "learning_rate": 1.3960859510897341e-05, + "loss": 0.9492, "step": 891 }, { - "epoch": 0.06749650032159207, - "grad_norm": 3.4396860599517822, - "learning_rate": 1.7984966814063547e-05, - "loss": 0.8146, + "epoch": 0.06279479056670187, + "grad_norm": 2.5554590225219727, + "learning_rate": 1.3960690778342147e-05, + "loss": 0.8364, "step": 892 }, { - "epoch": 0.0675721690439257, - "grad_norm": 2.9307057857513428, - "learning_rate": 1.7984836411524018e-05, - "loss": 0.8404, + "epoch": 0.06286518831397395, + "grad_norm": 2.6081950664520264, + "learning_rate": 1.3960521683894334e-05, + "loss": 0.8786, "step": 893 }, { - "epoch": 0.06764783776625932, - "grad_norm": 3.1052684783935547, - "learning_rate": 1.798470544632497e-05, - "loss": 0.9605, + "epoch": 0.06293558606124604, + "grad_norm": 2.758977174758911, + "learning_rate": 1.3960352227562696e-05, + "loss": 0.7312, "step": 894 }, { - "epoch": 0.06772350648859295, - "grad_norm": 3.313931465148926, - "learning_rate": 1.798457391847461e-05, - "loss": 0.7114, + "epoch": 0.06300598380851813, + "grad_norm": 2.5388519763946533, + "learning_rate": 1.3960182409356042e-05, + "loss": 0.9258, "step": 895 }, { - "epoch": 0.06779917521092656, - "grad_norm": 3.335641860961914, - "learning_rate": 1.7984441827981166e-05, - "loss": 0.9155, + "epoch": 0.06307638155579022, + "grad_norm": 2.8218441009521484, + "learning_rate": 1.3960012229283202e-05, + "loss": 0.8577, "step": 896 }, { - "epoch": 0.06787484393326018, - "grad_norm": 2.163098096847534, - "learning_rate": 1.7984309174852918e-05, - "loss": 0.807, + "epoch": 0.0631467793030623, + "grad_norm": 2.5186567306518555, + "learning_rate": 1.395984168735302e-05, + "loss": 0.6195, "step": 897 }, { - "epoch": 0.0679505126555938, - "grad_norm": 3.4636337757110596, - "learning_rate": 1.7984175959098172e-05, - "loss": 0.7748, + "epoch": 0.06321717705033439, + "grad_norm": 2.2771897315979004, + "learning_rate": 1.395967078357437e-05, + "loss": 0.7535, "step": 898 }, { - "epoch": 0.06802618137792743, - "grad_norm": 2.3630826473236084, - "learning_rate": 1.798404218072527e-05, - "loss": 0.7619, + "epoch": 0.06328757479760648, + "grad_norm": 2.7869532108306885, + "learning_rate": 1.3959499517956129e-05, + "loss": 0.8432, "step": 899 }, { - "epoch": 0.06810185010026105, - "grad_norm": 2.6266446113586426, - "learning_rate": 1.7983907839742587e-05, - "loss": 0.9418, + "epoch": 0.06335797254487856, + "grad_norm": 2.3160929679870605, + "learning_rate": 1.3959327890507207e-05, + "loss": 0.8497, "step": 900 }, { - "epoch": 0.06817751882259468, - "grad_norm": 2.760838747024536, - "learning_rate": 1.798377293615854e-05, - "loss": 0.836, + "epoch": 0.06342837029215065, + "grad_norm": 2.9046618938446045, + "learning_rate": 1.3959155901236526e-05, + "loss": 0.8242, "step": 901 }, { - "epoch": 0.0682531875449283, - "grad_norm": 3.108145236968994, - "learning_rate": 1.798363746998157e-05, - "loss": 0.7683, + "epoch": 0.06349876803942274, + "grad_norm": 2.5057413578033447, + "learning_rate": 1.3958983550153024e-05, + "loss": 0.8951, "step": 902 }, { - "epoch": 0.06832885626726193, - "grad_norm": 2.807042360305786, - "learning_rate": 1.7983501441220168e-05, - "loss": 0.9376, + "epoch": 0.06356916578669483, + "grad_norm": 2.5708539485931396, + "learning_rate": 1.3958810837265666e-05, + "loss": 0.7396, "step": 903 }, { - "epoch": 0.06840452498959555, - "grad_norm": 3.531285047531128, - "learning_rate": 1.798336484988285e-05, - "loss": 0.7328, + "epoch": 0.06363956353396691, + "grad_norm": 2.965043306350708, + "learning_rate": 1.3958637762583432e-05, + "loss": 0.8626, "step": 904 }, { - "epoch": 0.06848019371192918, - "grad_norm": 3.469963788986206, - "learning_rate": 1.7983227695978168e-05, - "loss": 0.7034, + "epoch": 0.063709961281239, + "grad_norm": 2.5182418823242188, + "learning_rate": 1.3958464326115317e-05, + "loss": 0.8784, "step": 905 }, { - "epoch": 0.0685558624342628, - "grad_norm": 3.210841417312622, - "learning_rate": 1.798308997951471e-05, - "loss": 0.6951, + "epoch": 0.06378035902851109, + "grad_norm": 2.505825996398926, + "learning_rate": 1.395829052787034e-05, + "loss": 0.8719, "step": 906 }, { - "epoch": 0.06863153115659643, - "grad_norm": 2.795273542404175, - "learning_rate": 1.798295170050111e-05, - "loss": 0.7853, + "epoch": 0.06385075677578317, + "grad_norm": 2.88649320602417, + "learning_rate": 1.3958116367857537e-05, + "loss": 0.8756, "step": 907 }, { - "epoch": 0.06870719987893005, - "grad_norm": 4.241882801055908, - "learning_rate": 1.7982812858946015e-05, - "loss": 0.8056, + "epoch": 0.06392115452305526, + "grad_norm": 2.516996383666992, + "learning_rate": 1.3957941846085964e-05, + "loss": 0.8033, "step": 908 }, { - "epoch": 0.06878286860126366, - "grad_norm": 2.5910651683807373, - "learning_rate": 1.7982673454858125e-05, - "loss": 0.6758, + "epoch": 0.06399155227032735, + "grad_norm": 2.2492072582244873, + "learning_rate": 1.395776696256469e-05, + "loss": 0.555, "step": 909 }, { - "epoch": 0.06885853732359729, - "grad_norm": 3.2898170948028564, - "learning_rate": 1.798253348824617e-05, - "loss": 0.8563, + "epoch": 0.06406195001759944, + "grad_norm": 2.584751844406128, + "learning_rate": 1.3957591717302811e-05, + "loss": 0.7872, "step": 910 }, { - "epoch": 0.06893420604593091, - "grad_norm": 3.170915126800537, - "learning_rate": 1.7982392959118914e-05, - "loss": 0.9903, + "epoch": 0.06413234776487152, + "grad_norm": 2.3776657581329346, + "learning_rate": 1.3957416110309438e-05, + "loss": 0.7906, "step": 911 }, { - "epoch": 0.06900987476826453, - "grad_norm": 2.6784350872039795, - "learning_rate": 1.7982251867485162e-05, - "loss": 0.82, + "epoch": 0.06420274551214361, + "grad_norm": 2.716198682785034, + "learning_rate": 1.39572401415937e-05, + "loss": 0.9037, "step": 912 }, { - "epoch": 0.06908554349059816, - "grad_norm": 2.870120048522949, - "learning_rate": 1.798211021335374e-05, - "loss": 0.7205, + "epoch": 0.0642731432594157, + "grad_norm": 2.3903534412384033, + "learning_rate": 1.3957063811164747e-05, + "loss": 0.7966, "step": 913 }, { - "epoch": 0.06916121221293178, - "grad_norm": 3.627228260040283, - "learning_rate": 1.798196799673353e-05, - "loss": 0.7787, + "epoch": 0.06434354100668778, + "grad_norm": 3.060811996459961, + "learning_rate": 1.3956887119031746e-05, + "loss": 0.862, "step": 914 }, { - "epoch": 0.06923688093526541, - "grad_norm": 3.563584089279175, - "learning_rate": 1.7981825217633433e-05, - "loss": 0.9949, + "epoch": 0.06441393875395987, + "grad_norm": 2.5334577560424805, + "learning_rate": 1.3956710065203883e-05, + "loss": 0.8558, "step": 915 }, { - "epoch": 0.06931254965759903, - "grad_norm": 3.695765495300293, - "learning_rate": 1.7981681876062388e-05, - "loss": 0.673, + "epoch": 0.06448433650123196, + "grad_norm": 3.0273704528808594, + "learning_rate": 1.3956532649690363e-05, + "loss": 0.7837, "step": 916 }, { - "epoch": 0.06938821837993266, - "grad_norm": 3.3603649139404297, - "learning_rate": 1.798153797202937e-05, - "loss": 0.9694, + "epoch": 0.06455473424850405, + "grad_norm": 2.472519636154175, + "learning_rate": 1.3956354872500409e-05, + "loss": 0.758, "step": 917 }, { - "epoch": 0.06946388710226628, - "grad_norm": 3.820831537246704, - "learning_rate": 1.7981393505543403e-05, - "loss": 0.9224, + "epoch": 0.06462513199577613, + "grad_norm": 2.5911266803741455, + "learning_rate": 1.3956176733643265e-05, + "loss": 0.8698, "step": 918 }, { - "epoch": 0.0695395558245999, - "grad_norm": 3.589085102081299, - "learning_rate": 1.798124847661352e-05, - "loss": 0.8123, + "epoch": 0.06469552974304822, + "grad_norm": 3.0595457553863525, + "learning_rate": 1.3955998233128195e-05, + "loss": 0.8511, "step": 919 }, { - "epoch": 0.06961522454693353, - "grad_norm": 3.0185937881469727, - "learning_rate": 1.798110288524881e-05, - "loss": 0.8082, + "epoch": 0.06476592749032031, + "grad_norm": 3.6815438270568848, + "learning_rate": 1.3955819370964476e-05, + "loss": 0.8021, "step": 920 }, { - "epoch": 0.06969089326926715, - "grad_norm": 3.6897995471954346, - "learning_rate": 1.7980956731458387e-05, - "loss": 0.9175, + "epoch": 0.06483632523759239, + "grad_norm": 2.643683433532715, + "learning_rate": 1.3955640147161409e-05, + "loss": 0.8538, "step": 921 }, { - "epoch": 0.06976656199160078, - "grad_norm": 3.113912582397461, - "learning_rate": 1.7980810015251407e-05, - "loss": 0.888, + "epoch": 0.06490672298486448, + "grad_norm": 2.6022913455963135, + "learning_rate": 1.395546056172831e-05, + "loss": 0.8202, "step": 922 }, { - "epoch": 0.06984223071393439, - "grad_norm": 2.264333486557007, - "learning_rate": 1.7980662736637054e-05, - "loss": 0.6739, + "epoch": 0.06497712073213657, + "grad_norm": 2.823735475540161, + "learning_rate": 1.3955280614674517e-05, + "loss": 0.7663, "step": 923 }, { - "epoch": 0.06991789943626801, - "grad_norm": 1.843481421470642, - "learning_rate": 1.7980514895624558e-05, - "loss": 1.0251, + "epoch": 0.06504751847940866, + "grad_norm": 2.8828020095825195, + "learning_rate": 1.3955100306009388e-05, + "loss": 0.7653, "step": 924 }, { - "epoch": 0.06999356815860164, - "grad_norm": 3.6482927799224854, - "learning_rate": 1.798036649222317e-05, - "loss": 0.7277, + "epoch": 0.06511791622668074, + "grad_norm": 2.5093917846679688, + "learning_rate": 1.3954919635742294e-05, + "loss": 0.8627, "step": 925 }, { - "epoch": 0.07006923688093526, - "grad_norm": 11.99400520324707, - "learning_rate": 1.7980217526442186e-05, - "loss": 0.9066, + "epoch": 0.06518831397395283, + "grad_norm": 2.5644984245300293, + "learning_rate": 1.3954738603882629e-05, + "loss": 0.8325, "step": 926 }, { - "epoch": 0.07014490560326889, - "grad_norm": 2.5644752979278564, - "learning_rate": 1.7980067998290935e-05, - "loss": 0.887, + "epoch": 0.06525871172122492, + "grad_norm": 2.372300624847412, + "learning_rate": 1.3954557210439807e-05, + "loss": 0.7594, "step": 927 }, { - "epoch": 0.07022057432560251, - "grad_norm": 3.71718692779541, - "learning_rate": 1.797991790777878e-05, - "loss": 0.8685, + "epoch": 0.065329109468497, + "grad_norm": 2.7516050338745117, + "learning_rate": 1.3954375455423254e-05, + "loss": 0.7429, "step": 928 }, { - "epoch": 0.07029624304793614, - "grad_norm": 2.8822622299194336, - "learning_rate": 1.797976725491512e-05, - "loss": 0.8336, + "epoch": 0.06539950721576909, + "grad_norm": 2.5907065868377686, + "learning_rate": 1.3954193338842425e-05, + "loss": 0.7783, "step": 929 }, { - "epoch": 0.07037191177026976, - "grad_norm": 2.9357829093933105, - "learning_rate": 1.7979616039709396e-05, - "loss": 0.8856, + "epoch": 0.06546990496304118, + "grad_norm": 2.336833953857422, + "learning_rate": 1.3954010860706786e-05, + "loss": 0.9542, "step": 930 }, { - "epoch": 0.07044758049260338, - "grad_norm": 2.640735387802124, - "learning_rate": 1.7979464262171067e-05, - "loss": 0.7398, + "epoch": 0.06554030271031328, + "grad_norm": 2.9466209411621094, + "learning_rate": 1.3953828021025824e-05, + "loss": 0.7254, "step": 931 }, { - "epoch": 0.07052324921493701, - "grad_norm": 3.1476693153381348, - "learning_rate": 1.7979311922309645e-05, - "loss": 0.9748, + "epoch": 0.06561070045758535, + "grad_norm": 2.976130723953247, + "learning_rate": 1.3953644819809048e-05, + "loss": 0.8004, "step": 932 }, { - "epoch": 0.07059891793727063, - "grad_norm": 2.6864423751831055, - "learning_rate": 1.7979159020134668e-05, - "loss": 0.7716, + "epoch": 0.06568109820485744, + "grad_norm": 2.1289875507354736, + "learning_rate": 1.3953461257065976e-05, + "loss": 0.8877, "step": 933 }, { - "epoch": 0.07067458665960426, - "grad_norm": 2.750220537185669, - "learning_rate": 1.797900555565571e-05, - "loss": 0.8742, + "epoch": 0.06575149595212954, + "grad_norm": 2.133739709854126, + "learning_rate": 1.395327733280616e-05, + "loss": 0.6603, "step": 934 }, { - "epoch": 0.07075025538193788, - "grad_norm": 2.5933568477630615, - "learning_rate": 1.7978851528882382e-05, - "loss": 0.833, + "epoch": 0.06582189369940161, + "grad_norm": 2.2336537837982178, + "learning_rate": 1.3953093047039154e-05, + "loss": 0.802, "step": 935 }, { - "epoch": 0.07082592410427149, - "grad_norm": 2.8534131050109863, - "learning_rate": 1.7978696939824333e-05, - "loss": 0.8054, + "epoch": 0.0658922914466737, + "grad_norm": 2.4950695037841797, + "learning_rate": 1.3952908399774547e-05, + "loss": 0.8386, "step": 936 }, { - "epoch": 0.07090159282660512, - "grad_norm": 3.7665860652923584, - "learning_rate": 1.7978541788491237e-05, - "loss": 0.9409, + "epoch": 0.0659626891939458, + "grad_norm": 2.5089645385742188, + "learning_rate": 1.3952723391021932e-05, + "loss": 0.7866, "step": 937 }, { - "epoch": 0.07097726154893874, - "grad_norm": 2.939113140106201, - "learning_rate": 1.7978386074892816e-05, - "loss": 0.8041, + "epoch": 0.06603308694121789, + "grad_norm": 2.930619716644287, + "learning_rate": 1.3952538020790935e-05, + "loss": 0.8368, "step": 938 }, { - "epoch": 0.07105293027127237, - "grad_norm": 3.101107597351074, - "learning_rate": 1.7978229799038816e-05, - "loss": 0.8247, + "epoch": 0.06610348468848996, + "grad_norm": 2.3006300926208496, + "learning_rate": 1.3952352289091189e-05, + "loss": 0.914, "step": 939 }, { - "epoch": 0.07112859899360599, - "grad_norm": 2.7688238620758057, - "learning_rate": 1.7978072960939034e-05, - "loss": 0.8326, + "epoch": 0.06617388243576205, + "grad_norm": 2.725554943084717, + "learning_rate": 1.3952166195932348e-05, + "loss": 0.7082, "step": 940 }, { - "epoch": 0.07120426771593961, - "grad_norm": 3.1420252323150635, - "learning_rate": 1.797791556060328e-05, - "loss": 0.8231, + "epoch": 0.06624428018303415, + "grad_norm": 2.626526355743408, + "learning_rate": 1.3951979741324091e-05, + "loss": 0.8396, "step": 941 }, { - "epoch": 0.07127993643827324, - "grad_norm": 2.776109218597412, - "learning_rate": 1.7977757598041417e-05, - "loss": 0.7977, + "epoch": 0.06631467793030622, + "grad_norm": 2.8915317058563232, + "learning_rate": 1.3951792925276112e-05, + "loss": 0.7729, "step": 942 }, { - "epoch": 0.07135560516060686, - "grad_norm": 4.262285232543945, - "learning_rate": 1.7977599073263335e-05, - "loss": 0.7962, + "epoch": 0.06638507567757831, + "grad_norm": 2.6034092903137207, + "learning_rate": 1.3951605747798124e-05, + "loss": 0.8324, "step": 943 }, { - "epoch": 0.07143127388294049, - "grad_norm": 3.1178438663482666, - "learning_rate": 1.7977439986278962e-05, - "loss": 0.8491, + "epoch": 0.0664554734248504, + "grad_norm": 2.2596564292907715, + "learning_rate": 1.3951418208899856e-05, + "loss": 0.9193, "step": 944 }, { - "epoch": 0.07150694260527411, - "grad_norm": 3.3614895343780518, - "learning_rate": 1.797728033709826e-05, - "loss": 0.9089, + "epoch": 0.0665258711721225, + "grad_norm": 3.0044050216674805, + "learning_rate": 1.3951230308591062e-05, + "loss": 0.8466, "step": 945 }, { - "epoch": 0.07158261132760774, - "grad_norm": 2.6752171516418457, - "learning_rate": 1.797712012573123e-05, - "loss": 0.9587, + "epoch": 0.06659626891939457, + "grad_norm": 3.2138712406158447, + "learning_rate": 1.3951042046881506e-05, + "loss": 0.8262, "step": 946 }, { - "epoch": 0.07165828004994136, - "grad_norm": 3.405928373336792, - "learning_rate": 1.79769593521879e-05, - "loss": 0.8991, + "epoch": 0.06666666666666667, + "grad_norm": 2.708765983581543, + "learning_rate": 1.395085342378098e-05, + "loss": 0.7262, "step": 947 }, { - "epoch": 0.07173394877227499, - "grad_norm": 2.2228682041168213, - "learning_rate": 1.7976798016478336e-05, - "loss": 1.106, + "epoch": 0.06673706441393876, + "grad_norm": 2.777843713760376, + "learning_rate": 1.395066443929929e-05, + "loss": 0.747, "step": 948 }, { - "epoch": 0.07180961749460861, - "grad_norm": 2.7371156215667725, - "learning_rate": 1.797663611861265e-05, - "loss": 0.8074, + "epoch": 0.06680746216121083, + "grad_norm": 2.318438768386841, + "learning_rate": 1.3950475093446258e-05, + "loss": 0.6696, "step": 949 }, { - "epoch": 0.07188528621694222, - "grad_norm": 3.274010181427002, - "learning_rate": 1.7976473658600977e-05, - "loss": 0.8784, + "epoch": 0.06687785990848293, + "grad_norm": 2.8077874183654785, + "learning_rate": 1.3950285386231735e-05, + "loss": 0.8441, "step": 950 }, { - "epoch": 0.07196095493927585, - "grad_norm": 3.2630934715270996, - "learning_rate": 1.797631063645349e-05, - "loss": 0.929, + "epoch": 0.06694825765575502, + "grad_norm": 2.369778871536255, + "learning_rate": 1.395009531766558e-05, + "loss": 0.8648, "step": 951 }, { - "epoch": 0.07203662366160947, - "grad_norm": 3.075411796569824, - "learning_rate": 1.7976147052180395e-05, - "loss": 0.7251, + "epoch": 0.06701865540302711, + "grad_norm": 3.0671184062957764, + "learning_rate": 1.3949904887757672e-05, + "loss": 0.7907, "step": 952 }, { - "epoch": 0.0721122923839431, - "grad_norm": 2.965583324432373, - "learning_rate": 1.797598290579194e-05, - "loss": 0.7216, + "epoch": 0.06708905315029919, + "grad_norm": 2.6719393730163574, + "learning_rate": 1.3949714096517917e-05, + "loss": 0.7698, "step": 953 }, { - "epoch": 0.07218796110627672, - "grad_norm": 2.7841546535491943, - "learning_rate": 1.797581819729841e-05, - "loss": 0.8072, + "epoch": 0.06715945089757128, + "grad_norm": 2.499262809753418, + "learning_rate": 1.394952294395623e-05, + "loss": 0.6852, "step": 954 }, { - "epoch": 0.07226362982861034, - "grad_norm": 3.408371686935425, - "learning_rate": 1.7975652926710108e-05, - "loss": 0.7652, + "epoch": 0.06722984864484337, + "grad_norm": 3.1569724082946777, + "learning_rate": 1.3949331430082553e-05, + "loss": 0.8786, "step": 955 }, { - "epoch": 0.07233929855094397, - "grad_norm": 3.180001974105835, - "learning_rate": 1.7975487094037386e-05, - "loss": 0.9272, + "epoch": 0.06730024639211546, + "grad_norm": 2.5234477519989014, + "learning_rate": 1.394913955490684e-05, + "loss": 0.7482, "step": 956 }, { - "epoch": 0.07241496727327759, - "grad_norm": 3.346219301223755, - "learning_rate": 1.7975320699290637e-05, - "loss": 0.9778, + "epoch": 0.06737064413938754, + "grad_norm": 2.2858223915100098, + "learning_rate": 1.394894731843907e-05, + "loss": 0.8151, "step": 957 }, { - "epoch": 0.07249063599561122, - "grad_norm": 2.9968905448913574, - "learning_rate": 1.7975153742480274e-05, - "loss": 0.8965, + "epoch": 0.06744104188665963, + "grad_norm": 2.7205429077148438, + "learning_rate": 1.3948754720689235e-05, + "loss": 0.7942, "step": 958 }, { - "epoch": 0.07256630471794484, - "grad_norm": 3.1787264347076416, - "learning_rate": 1.7974986223616754e-05, - "loss": 0.7344, + "epoch": 0.06751143963393172, + "grad_norm": 3.3332679271698, + "learning_rate": 1.3948561761667348e-05, + "loss": 0.8172, "step": 959 }, { - "epoch": 0.07264197344027847, - "grad_norm": 3.266357898712158, - "learning_rate": 1.797481814271057e-05, - "loss": 0.8381, + "epoch": 0.0675818373812038, + "grad_norm": 2.451005458831787, + "learning_rate": 1.3948368441383442e-05, + "loss": 0.8328, "step": 960 }, { - "epoch": 0.07271764216261209, - "grad_norm": 3.33705472946167, - "learning_rate": 1.7974649499772244e-05, - "loss": 0.745, + "epoch": 0.06765223512847589, + "grad_norm": 2.8664941787719727, + "learning_rate": 1.394817475984757e-05, + "loss": 0.9442, "step": 961 }, { - "epoch": 0.07279331088494571, - "grad_norm": 3.2236170768737793, - "learning_rate": 1.797448029481234e-05, - "loss": 0.7465, + "epoch": 0.06772263287574798, + "grad_norm": 2.4130663871765137, + "learning_rate": 1.3947980717069798e-05, + "loss": 0.7826, "step": 962 }, { - "epoch": 0.07286897960727932, - "grad_norm": 3.4352869987487793, - "learning_rate": 1.797431052784145e-05, - "loss": 1.004, + "epoch": 0.06779303062302007, + "grad_norm": 2.7792553901672363, + "learning_rate": 1.3947786313060217e-05, + "loss": 0.8956, "step": 963 }, { - "epoch": 0.07294464832961295, - "grad_norm": 3.1209468841552734, - "learning_rate": 1.797414019887021e-05, - "loss": 0.9475, + "epoch": 0.06786342837029215, + "grad_norm": 2.4310874938964844, + "learning_rate": 1.394759154782893e-05, + "loss": 0.8183, "step": 964 }, { - "epoch": 0.07302031705194657, - "grad_norm": 7.6214823722839355, - "learning_rate": 1.7973969307909286e-05, - "loss": 0.8257, + "epoch": 0.06793382611756424, + "grad_norm": 2.2132728099823, + "learning_rate": 1.394739642138607e-05, + "loss": 0.8695, "step": 965 }, { - "epoch": 0.0730959857742802, - "grad_norm": 3.500762939453125, - "learning_rate": 1.797379785496938e-05, - "loss": 0.8723, + "epoch": 0.06800422386483633, + "grad_norm": 2.590578556060791, + "learning_rate": 1.3947200933741775e-05, + "loss": 0.8263, "step": 966 }, { - "epoch": 0.07317165449661382, - "grad_norm": 3.0872161388397217, - "learning_rate": 1.7973625840061224e-05, - "loss": 0.8551, + "epoch": 0.06807462161210841, + "grad_norm": 3.2390189170837402, + "learning_rate": 1.3947005084906214e-05, + "loss": 0.7805, "step": 967 }, { - "epoch": 0.07324732321894745, - "grad_norm": 3.6307787895202637, - "learning_rate": 1.7973453263195595e-05, - "loss": 0.8331, + "epoch": 0.0681450193593805, + "grad_norm": 2.4756510257720947, + "learning_rate": 1.3946808874889567e-05, + "loss": 0.7554, "step": 968 }, { - "epoch": 0.07332299194128107, - "grad_norm": 3.153038501739502, - "learning_rate": 1.79732801243833e-05, - "loss": 0.8229, + "epoch": 0.06821541710665259, + "grad_norm": 2.925624370574951, + "learning_rate": 1.3946612303702034e-05, + "loss": 0.76, "step": 969 }, { - "epoch": 0.0733986606636147, - "grad_norm": 1.7069755792617798, - "learning_rate": 1.797310642363518e-05, - "loss": 0.9515, + "epoch": 0.06828581485392468, + "grad_norm": 2.5858829021453857, + "learning_rate": 1.3946415371353837e-05, + "loss": 0.8139, "step": 970 }, { - "epoch": 0.07347432938594832, - "grad_norm": 2.6972126960754395, - "learning_rate": 1.797293216096211e-05, - "loss": 0.7397, + "epoch": 0.06835621260119676, + "grad_norm": 2.9992947578430176, + "learning_rate": 1.3946218077855214e-05, + "loss": 0.7655, "step": 971 }, { - "epoch": 0.07354999810828194, - "grad_norm": 2.9356179237365723, - "learning_rate": 1.7972757336375012e-05, - "loss": 0.8123, + "epoch": 0.06842661034846885, + "grad_norm": 2.6279289722442627, + "learning_rate": 1.3946020423216422e-05, + "loss": 0.8007, "step": 972 }, { - "epoch": 0.07362566683061557, - "grad_norm": 2.5552573204040527, - "learning_rate": 1.7972581949884823e-05, - "loss": 0.8397, + "epoch": 0.06849700809574094, + "grad_norm": 3.1828341484069824, + "learning_rate": 1.3945822407447737e-05, + "loss": 0.8132, "step": 973 }, { - "epoch": 0.0737013355529492, - "grad_norm": 2.462688684463501, - "learning_rate": 1.7972406001502535e-05, - "loss": 0.8085, + "epoch": 0.06856740584301302, + "grad_norm": 2.6185081005096436, + "learning_rate": 1.3945624030559456e-05, + "loss": 0.7004, "step": 974 }, { - "epoch": 0.07377700427528282, - "grad_norm": 2.716464042663574, - "learning_rate": 1.797222949123916e-05, - "loss": 1.0184, + "epoch": 0.06863780359028511, + "grad_norm": 2.731748342514038, + "learning_rate": 1.3945425292561889e-05, + "loss": 0.6964, "step": 975 }, { - "epoch": 0.07385267299761644, - "grad_norm": 2.534637451171875, - "learning_rate": 1.797205241910576e-05, - "loss": 0.769, + "epoch": 0.0687082013375572, + "grad_norm": 2.537097454071045, + "learning_rate": 1.3945226193465372e-05, + "loss": 0.8247, "step": 976 }, { - "epoch": 0.07392834171995005, - "grad_norm": 2.6971538066864014, - "learning_rate": 1.797187478511341e-05, - "loss": 0.8612, + "epoch": 0.06877859908482929, + "grad_norm": 2.258087158203125, + "learning_rate": 1.3945026733280255e-05, + "loss": 0.6482, "step": 977 }, { - "epoch": 0.07400401044228368, - "grad_norm": 2.319307565689087, - "learning_rate": 1.797169658927325e-05, - "loss": 0.6711, + "epoch": 0.06884899683210137, + "grad_norm": 2.617506265640259, + "learning_rate": 1.394482691201691e-05, + "loss": 0.7944, "step": 978 }, { - "epoch": 0.0740796791646173, - "grad_norm": 3.083146333694458, - "learning_rate": 1.7971517831596428e-05, - "loss": 0.9988, + "epoch": 0.06891939457937346, + "grad_norm": 2.6988179683685303, + "learning_rate": 1.3944626729685724e-05, + "loss": 0.7084, "step": 979 }, { - "epoch": 0.07415534788695093, - "grad_norm": 3.323866367340088, - "learning_rate": 1.7971338512094144e-05, - "loss": 0.83, + "epoch": 0.06898979232664555, + "grad_norm": 2.727134943008423, + "learning_rate": 1.3944426186297103e-05, + "loss": 0.8194, "step": 980 }, { - "epoch": 0.07423101660928455, - "grad_norm": 2.6332504749298096, - "learning_rate": 1.7971158630777623e-05, - "loss": 0.8075, + "epoch": 0.06906019007391763, + "grad_norm": 2.4431238174438477, + "learning_rate": 1.3944225281861477e-05, + "loss": 0.8124, "step": 981 }, { - "epoch": 0.07430668533161817, - "grad_norm": 3.7535693645477295, - "learning_rate": 1.797097818765813e-05, - "loss": 0.7579, + "epoch": 0.06913058782118972, + "grad_norm": 2.46311092376709, + "learning_rate": 1.3944024016389288e-05, + "loss": 0.9192, "step": 982 }, { - "epoch": 0.0743823540539518, - "grad_norm": 3.424109697341919, - "learning_rate": 1.797079718274697e-05, - "loss": 0.9592, + "epoch": 0.06920098556846181, + "grad_norm": 2.7169837951660156, + "learning_rate": 1.3943822389891002e-05, + "loss": 0.841, "step": 983 }, { - "epoch": 0.07445802277628542, - "grad_norm": 2.7965245246887207, - "learning_rate": 1.797061561605548e-05, - "loss": 0.8751, + "epoch": 0.0692713833157339, + "grad_norm": 2.5774283409118652, + "learning_rate": 1.39436204023771e-05, + "loss": 0.8443, "step": 984 }, { - "epoch": 0.07453369149861905, - "grad_norm": 2.6444272994995117, - "learning_rate": 1.7970433487595018e-05, - "loss": 0.8987, + "epoch": 0.06934178106300598, + "grad_norm": 2.657177448272705, + "learning_rate": 1.3943418053858087e-05, + "loss": 0.755, "step": 985 }, { - "epoch": 0.07460936022095267, - "grad_norm": 2.68102765083313, - "learning_rate": 1.7970250797377002e-05, - "loss": 0.8993, + "epoch": 0.06941217881027807, + "grad_norm": 2.262249708175659, + "learning_rate": 1.3943215344344482e-05, + "loss": 0.8641, "step": 986 }, { - "epoch": 0.0746850289432863, - "grad_norm": 2.6379127502441406, - "learning_rate": 1.7970067545412865e-05, - "loss": 0.7778, + "epoch": 0.06948257655755016, + "grad_norm": 2.59678053855896, + "learning_rate": 1.394301227384682e-05, + "loss": 0.7825, "step": 987 }, { - "epoch": 0.07476069766561992, - "grad_norm": 4.525475025177002, - "learning_rate": 1.796988373171409e-05, - "loss": 0.8909, + "epoch": 0.06955297430482224, + "grad_norm": 4.437134265899658, + "learning_rate": 1.3942808842375664e-05, + "loss": 0.8252, "step": 988 }, { - "epoch": 0.07483636638795355, - "grad_norm": 2.7560689449310303, - "learning_rate": 1.7969699356292177e-05, - "loss": 0.8144, + "epoch": 0.06962337205209433, + "grad_norm": 2.383888006210327, + "learning_rate": 1.3942605049941587e-05, + "loss": 0.7879, "step": 989 }, { - "epoch": 0.07491203511028716, - "grad_norm": 2.7288384437561035, - "learning_rate": 1.7969514419158682e-05, - "loss": 0.802, + "epoch": 0.06969376979936642, + "grad_norm": 2.4319252967834473, + "learning_rate": 1.3942400896555186e-05, + "loss": 0.9268, "step": 990 }, { - "epoch": 0.07498770383262078, - "grad_norm": 3.1117804050445557, - "learning_rate": 1.7969328920325184e-05, - "loss": 0.7979, + "epoch": 0.06976416754663851, + "grad_norm": 2.6267049312591553, + "learning_rate": 1.3942196382227078e-05, + "loss": 0.8338, "step": 991 }, { - "epoch": 0.0750633725549544, - "grad_norm": 3.190317392349243, - "learning_rate": 1.79691428598033e-05, - "loss": 0.773, + "epoch": 0.06983456529391059, + "grad_norm": 2.6093499660491943, + "learning_rate": 1.3941991506967891e-05, + "loss": 0.7347, "step": 992 }, { - "epoch": 0.07513904127728803, - "grad_norm": 4.079197883605957, - "learning_rate": 1.7968956237604678e-05, - "loss": 0.6914, + "epoch": 0.06990496304118268, + "grad_norm": 2.0639071464538574, + "learning_rate": 1.3941786270788276e-05, + "loss": 0.7907, "step": 993 }, { - "epoch": 0.07521470999962165, - "grad_norm": 2.5737321376800537, - "learning_rate": 1.796876905374101e-05, - "loss": 0.752, + "epoch": 0.06997536078845477, + "grad_norm": 3.0175085067749023, + "learning_rate": 1.3941580673698909e-05, + "loss": 0.7355, "step": 994 }, { - "epoch": 0.07529037872195528, - "grad_norm": 3.0443410873413086, - "learning_rate": 1.796858130822401e-05, - "loss": 0.8317, + "epoch": 0.07004575853572685, + "grad_norm": 2.684511423110962, + "learning_rate": 1.3941374715710476e-05, + "loss": 0.6537, "step": 995 }, { - "epoch": 0.0753660474442889, - "grad_norm": 3.2446975708007812, - "learning_rate": 1.7968393001065445e-05, - "loss": 0.7763, + "epoch": 0.07011615628299894, + "grad_norm": 2.2390151023864746, + "learning_rate": 1.3941168396833684e-05, + "loss": 0.698, "step": 996 }, { - "epoch": 0.07544171616662253, - "grad_norm": 3.4776625633239746, - "learning_rate": 1.79682041322771e-05, - "loss": 0.6775, + "epoch": 0.07018655403027103, + "grad_norm": 2.383263111114502, + "learning_rate": 1.3940961717079261e-05, + "loss": 0.7268, "step": 997 }, { - "epoch": 0.07551738488895615, - "grad_norm": 2.343702554702759, - "learning_rate": 1.796801470187081e-05, - "loss": 0.9111, + "epoch": 0.07025695177754313, + "grad_norm": 2.4326331615448, + "learning_rate": 1.394075467645795e-05, + "loss": 0.7216, "step": 998 }, { - "epoch": 0.07559305361128978, - "grad_norm": 2.4391534328460693, - "learning_rate": 1.7967824709858428e-05, - "loss": 1.1345, + "epoch": 0.0703273495248152, + "grad_norm": 2.4525201320648193, + "learning_rate": 1.3940547274980522e-05, + "loss": 0.7209, "step": 999 }, { - "epoch": 0.0756687223336234, - "grad_norm": 2.1746954917907715, - "learning_rate": 1.796763415625186e-05, - "loss": 0.6839, + "epoch": 0.0703977472720873, + "grad_norm": 2.4150009155273438, + "learning_rate": 1.3940339512657751e-05, + "loss": 0.7812, "step": 1000 }, { - "epoch": 0.07574439105595702, - "grad_norm": 2.8759877681732178, - "learning_rate": 1.7967443041063037e-05, - "loss": 0.793, + "epoch": 0.07046814501935939, + "grad_norm": 2.7470998764038086, + "learning_rate": 1.3940131389500446e-05, + "loss": 0.9454, "step": 1001 }, { - "epoch": 0.07582005977829065, - "grad_norm": 2.7766287326812744, - "learning_rate": 1.7967251364303927e-05, - "loss": 0.9124, + "epoch": 0.07053854276663146, + "grad_norm": 2.7927896976470947, + "learning_rate": 1.3939922905519424e-05, + "loss": 0.852, "step": 1002 }, { - "epoch": 0.07589572850062427, - "grad_norm": 3.971047878265381, - "learning_rate": 1.796705912598653e-05, - "loss": 0.9387, + "epoch": 0.07060894051390355, + "grad_norm": 2.1819887161254883, + "learning_rate": 1.3939714060725525e-05, + "loss": 0.7457, "step": 1003 }, { - "epoch": 0.07597139722295788, - "grad_norm": 3.4732584953308105, - "learning_rate": 1.796686632612289e-05, - "loss": 0.7853, + "epoch": 0.07067933826117564, + "grad_norm": 2.189953327178955, + "learning_rate": 1.3939504855129605e-05, + "loss": 0.7914, "step": 1004 }, { - "epoch": 0.07604706594529151, - "grad_norm": 2.530043125152588, - "learning_rate": 1.7966672964725074e-05, - "loss": 0.7116, + "epoch": 0.07074973600844774, + "grad_norm": 2.948251485824585, + "learning_rate": 1.3939295288742546e-05, + "loss": 0.7367, "step": 1005 }, { - "epoch": 0.07612273466762513, - "grad_norm": 3.753622531890869, - "learning_rate": 1.79664790418052e-05, - "loss": 0.7789, + "epoch": 0.07082013375571981, + "grad_norm": 2.8013503551483154, + "learning_rate": 1.393908536157524e-05, + "loss": 0.8736, "step": 1006 }, { - "epoch": 0.07619840338995876, - "grad_norm": 2.8898422718048096, - "learning_rate": 1.7966284557375405e-05, - "loss": 0.8084, + "epoch": 0.0708905315029919, + "grad_norm": 2.2470145225524902, + "learning_rate": 1.39388750736386e-05, + "loss": 0.8063, "step": 1007 }, { - "epoch": 0.07627407211229238, - "grad_norm": 3.0014569759368896, - "learning_rate": 1.7966089511447872e-05, - "loss": 0.9103, + "epoch": 0.070960929250264, + "grad_norm": 2.463219165802002, + "learning_rate": 1.393866442494356e-05, + "loss": 0.814, "step": 1008 }, { - "epoch": 0.076349740834626, - "grad_norm": 3.0454745292663574, - "learning_rate": 1.7965893904034813e-05, - "loss": 0.841, + "epoch": 0.07103132699753607, + "grad_norm": 2.3860814571380615, + "learning_rate": 1.3938453415501076e-05, + "loss": 0.8458, "step": 1009 }, { - "epoch": 0.07642540955695963, - "grad_norm": 6.323338031768799, - "learning_rate": 1.7965697735148482e-05, - "loss": 0.7438, + "epoch": 0.07110172474480816, + "grad_norm": 2.907344102859497, + "learning_rate": 1.3938242045322112e-05, + "loss": 0.8431, "step": 1010 }, { - "epoch": 0.07650107827929326, - "grad_norm": 2.459744930267334, - "learning_rate": 1.7965501004801158e-05, - "loss": 0.7822, + "epoch": 0.07117212249208026, + "grad_norm": 1.891729712486267, + "learning_rate": 1.3938030314417662e-05, + "loss": 0.9015, "step": 1011 }, { - "epoch": 0.07657674700162688, - "grad_norm": 2.981001138687134, - "learning_rate": 1.796530371300516e-05, - "loss": 0.9066, + "epoch": 0.07124252023935235, + "grad_norm": 2.1517698764801025, + "learning_rate": 1.3937818222798732e-05, + "loss": 0.823, "step": 1012 }, { - "epoch": 0.0766524157239605, - "grad_norm": 2.747135639190674, - "learning_rate": 1.7965105859772847e-05, - "loss": 0.6591, + "epoch": 0.07131291798662442, + "grad_norm": 2.2781972885131836, + "learning_rate": 1.393760577047635e-05, + "loss": 0.7062, "step": 1013 }, { - "epoch": 0.07672808444629413, - "grad_norm": 2.3893380165100098, - "learning_rate": 1.796490744511661e-05, - "loss": 0.7462, + "epoch": 0.07138331573389652, + "grad_norm": 2.3424575328826904, + "learning_rate": 1.3937392957461559e-05, + "loss": 0.7659, "step": 1014 }, { - "epoch": 0.07680375316862775, - "grad_norm": 3.2017297744750977, - "learning_rate": 1.796470846904887e-05, - "loss": 0.9729, + "epoch": 0.07145371348116861, + "grad_norm": 2.90763783454895, + "learning_rate": 1.3937179783765427e-05, + "loss": 0.665, "step": 1015 }, { - "epoch": 0.07687942189096138, - "grad_norm": 3.246903896331787, - "learning_rate": 1.7964508931582095e-05, - "loss": 0.6984, + "epoch": 0.07152411122844068, + "grad_norm": 2.2444393634796143, + "learning_rate": 1.3936966249399035e-05, + "loss": 0.9269, "step": 1016 }, { - "epoch": 0.07695509061329499, - "grad_norm": 2.975456953048706, - "learning_rate": 1.7964308832728775e-05, - "loss": 0.8159, + "epoch": 0.07159450897571278, + "grad_norm": 2.8298094272613525, + "learning_rate": 1.3936752354373485e-05, + "loss": 0.8599, "step": 1017 }, { - "epoch": 0.07703075933562861, - "grad_norm": 2.729341506958008, - "learning_rate": 1.796410817250144e-05, - "loss": 0.8509, + "epoch": 0.07166490672298487, + "grad_norm": 2.283485174179077, + "learning_rate": 1.3936538098699898e-05, + "loss": 0.9594, "step": 1018 }, { - "epoch": 0.07710642805796224, - "grad_norm": 3.228543758392334, - "learning_rate": 1.7963906950912657e-05, - "loss": 0.9637, + "epoch": 0.07173530447025696, + "grad_norm": 2.510566473007202, + "learning_rate": 1.3936323482389413e-05, + "loss": 0.7105, "step": 1019 }, { - "epoch": 0.07718209678029586, - "grad_norm": 2.6817281246185303, - "learning_rate": 1.7963705167975032e-05, - "loss": 0.9073, + "epoch": 0.07180570221752904, + "grad_norm": 2.8578338623046875, + "learning_rate": 1.3936108505453188e-05, + "loss": 0.8545, "step": 1020 }, { - "epoch": 0.07725776550262949, - "grad_norm": 2.673149585723877, - "learning_rate": 1.7963502823701195e-05, - "loss": 0.7813, + "epoch": 0.07187609996480113, + "grad_norm": 2.3185393810272217, + "learning_rate": 1.39358931679024e-05, + "loss": 0.7891, "step": 1021 }, { - "epoch": 0.07733343422496311, - "grad_norm": 2.8436264991760254, - "learning_rate": 1.7963299918103818e-05, - "loss": 0.7875, + "epoch": 0.07194649771207322, + "grad_norm": 2.1223130226135254, + "learning_rate": 1.3935677469748245e-05, + "loss": 0.7416, "step": 1022 }, { - "epoch": 0.07740910294729673, - "grad_norm": 3.4467597007751465, - "learning_rate": 1.796309645119561e-05, - "loss": 0.7861, + "epoch": 0.0720168954593453, + "grad_norm": 3.4597904682159424, + "learning_rate": 1.3935461411001937e-05, + "loss": 0.7913, "step": 1023 }, { - "epoch": 0.07748477166963036, - "grad_norm": 4.416311740875244, - "learning_rate": 1.7962892422989313e-05, - "loss": 0.8662, + "epoch": 0.07208729320661739, + "grad_norm": 2.9353513717651367, + "learning_rate": 1.3935244991674708e-05, + "loss": 0.7688, "step": 1024 }, { - "epoch": 0.07756044039196398, - "grad_norm": 2.7716546058654785, - "learning_rate": 1.79626878334977e-05, - "loss": 0.6516, + "epoch": 0.07215769095388948, + "grad_norm": 2.6923165321350098, + "learning_rate": 1.3935028211777813e-05, + "loss": 0.8269, "step": 1025 }, { - "epoch": 0.07763610911429761, - "grad_norm": 2.379066228866577, - "learning_rate": 1.796248268273359e-05, - "loss": 0.7032, + "epoch": 0.07222808870116157, + "grad_norm": 2.4170939922332764, + "learning_rate": 1.393481107132252e-05, + "loss": 0.867, "step": 1026 }, { - "epoch": 0.07771177783663123, - "grad_norm": 3.2015442848205566, - "learning_rate": 1.7962276970709827e-05, - "loss": 0.8266, + "epoch": 0.07229848644843365, + "grad_norm": 2.5160932540893555, + "learning_rate": 1.3934593570320117e-05, + "loss": 0.8418, "step": 1027 }, { - "epoch": 0.07778744655896486, - "grad_norm": 2.301879405975342, - "learning_rate": 1.796207069743929e-05, - "loss": 0.8037, + "epoch": 0.07236888419570574, + "grad_norm": 2.5388927459716797, + "learning_rate": 1.3934375708781917e-05, + "loss": 0.8047, "step": 1028 }, { - "epoch": 0.07786311528129848, - "grad_norm": 2.7185168266296387, - "learning_rate": 1.7961863862934897e-05, - "loss": 0.8516, + "epoch": 0.07243928194297783, + "grad_norm": 2.607654094696045, + "learning_rate": 1.3934157486719242e-05, + "loss": 0.7792, "step": 1029 }, { - "epoch": 0.0779387840036321, - "grad_norm": 3.952467679977417, - "learning_rate": 1.796165646720961e-05, - "loss": 0.8924, + "epoch": 0.0725096796902499, + "grad_norm": 2.4479799270629883, + "learning_rate": 1.393393890414344e-05, + "loss": 0.7874, "step": 1030 }, { - "epoch": 0.07801445272596572, - "grad_norm": 2.7374305725097656, - "learning_rate": 1.79614485102764e-05, - "loss": 0.8953, + "epoch": 0.072580077437522, + "grad_norm": 2.4575772285461426, + "learning_rate": 1.3933719961065873e-05, + "loss": 0.8379, "step": 1031 }, { - "epoch": 0.07809012144829934, - "grad_norm": 3.123100996017456, - "learning_rate": 1.7961239992148306e-05, - "loss": 0.9221, + "epoch": 0.07265047518479409, + "grad_norm": 2.1588051319122314, + "learning_rate": 1.3933500657497927e-05, + "loss": 0.7105, "step": 1032 }, { - "epoch": 0.07816579017063296, - "grad_norm": 2.811434507369995, - "learning_rate": 1.7961030912838376e-05, - "loss": 0.7309, + "epoch": 0.07272087293206618, + "grad_norm": 2.3705193996429443, + "learning_rate": 1.3933280993451003e-05, + "loss": 0.8423, "step": 1033 }, { - "epoch": 0.07824145889296659, - "grad_norm": 3.855139970779419, - "learning_rate": 1.796082127235971e-05, - "loss": 0.7561, + "epoch": 0.07279127067933826, + "grad_norm": 2.5429670810699463, + "learning_rate": 1.393306096893652e-05, + "loss": 0.8712, "step": 1034 }, { - "epoch": 0.07831712761530021, - "grad_norm": 3.214775562286377, - "learning_rate": 1.796061107072543e-05, - "loss": 0.965, + "epoch": 0.07286166842661035, + "grad_norm": 2.8496360778808594, + "learning_rate": 1.3932840583965918e-05, + "loss": 0.7684, "step": 1035 }, { - "epoch": 0.07839279633763384, - "grad_norm": 2.650777578353882, - "learning_rate": 1.7960400307948706e-05, - "loss": 0.9342, + "epoch": 0.07293206617388244, + "grad_norm": 3.180054187774658, + "learning_rate": 1.3932619838550656e-05, + "loss": 0.7796, "step": 1036 }, { - "epoch": 0.07846846505996746, - "grad_norm": 2.863734722137451, - "learning_rate": 1.796018898404273e-05, - "loss": 0.8635, + "epoch": 0.07300246392115452, + "grad_norm": 2.1675944328308105, + "learning_rate": 1.393239873270221e-05, + "loss": 0.8011, "step": 1037 }, { - "epoch": 0.07854413378230109, - "grad_norm": 5.087371349334717, - "learning_rate": 1.795997709902074e-05, - "loss": 0.6543, + "epoch": 0.07307286166842661, + "grad_norm": 2.1751883029937744, + "learning_rate": 1.3932177266432075e-05, + "loss": 0.7815, "step": 1038 }, { - "epoch": 0.07861980250463471, - "grad_norm": 2.6036596298217773, - "learning_rate": 1.7959764652896006e-05, - "loss": 0.8956, + "epoch": 0.0731432594156987, + "grad_norm": 2.3250463008880615, + "learning_rate": 1.3931955439751768e-05, + "loss": 0.7085, "step": 1039 }, { - "epoch": 0.07869547122696834, - "grad_norm": 2.6661086082458496, - "learning_rate": 1.7959551645681827e-05, - "loss": 0.9456, + "epoch": 0.07321365716297079, + "grad_norm": 2.6485543251037598, + "learning_rate": 1.3931733252672818e-05, + "loss": 0.8421, "step": 1040 }, { - "epoch": 0.07877113994930196, - "grad_norm": 2.790140390396118, - "learning_rate": 1.7959338077391547e-05, - "loss": 0.8146, + "epoch": 0.07328405491024287, + "grad_norm": 2.3346197605133057, + "learning_rate": 1.3931510705206778e-05, + "loss": 0.7984, "step": 1041 }, { - "epoch": 0.07884680867163558, - "grad_norm": 3.7499725818634033, - "learning_rate": 1.795912394803854e-05, - "loss": 0.7346, + "epoch": 0.07335445265751496, + "grad_norm": 2.98880672454834, + "learning_rate": 1.393128779736522e-05, + "loss": 0.8117, "step": 1042 }, { - "epoch": 0.07892247739396921, - "grad_norm": 2.917370080947876, - "learning_rate": 1.7958909257636214e-05, - "loss": 0.8348, + "epoch": 0.07342485040478705, + "grad_norm": 2.919203042984009, + "learning_rate": 1.393106452915973e-05, + "loss": 0.9268, "step": 1043 }, { - "epoch": 0.07899814611630282, - "grad_norm": 2.5935680866241455, - "learning_rate": 1.795869400619801e-05, - "loss": 0.6081, + "epoch": 0.07349524815205913, + "grad_norm": 3.87673020362854, + "learning_rate": 1.393084090060192e-05, + "loss": 0.8939, "step": 1044 }, { - "epoch": 0.07907381483863644, - "grad_norm": 2.097604990005493, - "learning_rate": 1.7958478193737412e-05, - "loss": 0.7379, + "epoch": 0.07356564589933122, + "grad_norm": 2.200892448425293, + "learning_rate": 1.3930616911703412e-05, + "loss": 0.7958, "step": 1045 }, { - "epoch": 0.07914948356097007, - "grad_norm": 2.9579460620880127, - "learning_rate": 1.7958261820267936e-05, - "loss": 0.9227, + "epoch": 0.07363604364660331, + "grad_norm": 2.1954853534698486, + "learning_rate": 1.3930392562475857e-05, + "loss": 0.8652, "step": 1046 }, { - "epoch": 0.0792251522833037, - "grad_norm": 4.755364418029785, - "learning_rate": 1.7958044885803133e-05, - "loss": 0.8909, + "epoch": 0.0737064413938754, + "grad_norm": 2.35556960105896, + "learning_rate": 1.3930167852930916e-05, + "loss": 0.7573, "step": 1047 }, { - "epoch": 0.07930082100563732, - "grad_norm": 2.9622743129730225, - "learning_rate": 1.7957827390356577e-05, - "loss": 0.6475, + "epoch": 0.07377683914114748, + "grad_norm": 2.7508351802825928, + "learning_rate": 1.3929942783080271e-05, + "loss": 0.8458, "step": 1048 }, { - "epoch": 0.07937648972797094, - "grad_norm": 2.9405174255371094, - "learning_rate": 1.7957609333941906e-05, - "loss": 0.8427, + "epoch": 0.07384723688841957, + "grad_norm": 2.088918685913086, + "learning_rate": 1.3929717352935623e-05, + "loss": 0.644, "step": 1049 }, { - "epoch": 0.07945215845030457, - "grad_norm": 2.8209495544433594, - "learning_rate": 1.795739071657276e-05, - "loss": 0.7966, + "epoch": 0.07391763463569166, + "grad_norm": 2.0359601974487305, + "learning_rate": 1.3929491562508697e-05, + "loss": 0.7599, "step": 1050 }, { - "epoch": 0.07952782717263819, - "grad_norm": 2.4763989448547363, - "learning_rate": 1.795717153826284e-05, - "loss": 0.6992, + "epoch": 0.07398803238296374, + "grad_norm": 2.542320728302002, + "learning_rate": 1.3929265411811227e-05, + "loss": 0.8338, "step": 1051 }, { - "epoch": 0.07960349589497182, - "grad_norm": 2.8910422325134277, - "learning_rate": 1.7956951799025865e-05, - "loss": 0.8601, + "epoch": 0.07405843013023583, + "grad_norm": 2.520738363265991, + "learning_rate": 1.3929038900854975e-05, + "loss": 0.8378, "step": 1052 }, { - "epoch": 0.07967916461730544, - "grad_norm": 2.9164462089538574, - "learning_rate": 1.7956731498875598e-05, - "loss": 0.8017, + "epoch": 0.07412882787750792, + "grad_norm": 2.5593581199645996, + "learning_rate": 1.3928812029651712e-05, + "loss": 0.6889, "step": 1053 }, { - "epoch": 0.07975483333963906, - "grad_norm": 2.7864677906036377, - "learning_rate": 1.7956510637825835e-05, - "loss": 0.8465, + "epoch": 0.07419922562478001, + "grad_norm": 2.6799166202545166, + "learning_rate": 1.3928584798213238e-05, + "loss": 0.8063, "step": 1054 }, { - "epoch": 0.07983050206197269, - "grad_norm": 3.374191999435425, - "learning_rate": 1.7956289215890405e-05, - "loss": 0.8502, + "epoch": 0.07426962337205209, + "grad_norm": 2.3922972679138184, + "learning_rate": 1.3928357206551365e-05, + "loss": 0.8827, "step": 1055 }, { - "epoch": 0.07990617078430631, - "grad_norm": 4.065507411956787, - "learning_rate": 1.795606723308318e-05, - "loss": 0.8219, + "epoch": 0.07434002111932418, + "grad_norm": 2.9360947608947754, + "learning_rate": 1.3928129254677927e-05, + "loss": 0.8623, "step": 1056 }, { - "epoch": 0.07998183950663992, - "grad_norm": 3.0881083011627197, - "learning_rate": 1.7955844689418055e-05, - "loss": 0.8383, + "epoch": 0.07441041886659627, + "grad_norm": 2.47277569770813, + "learning_rate": 1.3927900942604773e-05, + "loss": 0.866, "step": 1057 }, { - "epoch": 0.08005750822897355, - "grad_norm": 2.8912618160247803, - "learning_rate": 1.7955621584908968e-05, - "loss": 1.0209, + "epoch": 0.07448081661386835, + "grad_norm": 2.4367527961730957, + "learning_rate": 1.3927672270343776e-05, + "loss": 0.7611, "step": 1058 }, { - "epoch": 0.08013317695130717, - "grad_norm": 2.972893714904785, - "learning_rate": 1.7955397919569894e-05, - "loss": 0.7862, + "epoch": 0.07455121436114044, + "grad_norm": 2.37290096282959, + "learning_rate": 1.3927443237906826e-05, + "loss": 0.7717, "step": 1059 }, { - "epoch": 0.0802088456736408, - "grad_norm": 3.151890277862549, - "learning_rate": 1.7955173693414835e-05, - "loss": 0.9524, + "epoch": 0.07462161210841253, + "grad_norm": 2.6298134326934814, + "learning_rate": 1.3927213845305825e-05, + "loss": 0.8921, "step": 1060 }, { - "epoch": 0.08028451439597442, - "grad_norm": 3.1022751331329346, - "learning_rate": 1.7954948906457836e-05, - "loss": 0.9726, + "epoch": 0.07469200985568462, + "grad_norm": 3.458796977996826, + "learning_rate": 1.3926984092552704e-05, + "loss": 0.7997, "step": 1061 }, { - "epoch": 0.08036018311830805, - "grad_norm": 3.058262825012207, - "learning_rate": 1.7954723558712973e-05, - "loss": 0.8667, + "epoch": 0.0747624076029567, + "grad_norm": 2.196821451187134, + "learning_rate": 1.3926753979659404e-05, + "loss": 0.7017, "step": 1062 }, { - "epoch": 0.08043585184064167, - "grad_norm": 3.0045084953308105, - "learning_rate": 1.7954497650194356e-05, - "loss": 0.7895, + "epoch": 0.07483280535022879, + "grad_norm": 2.338628053665161, + "learning_rate": 1.3926523506637893e-05, + "loss": 0.8007, "step": 1063 }, { - "epoch": 0.0805115205629753, - "grad_norm": 2.1319167613983154, - "learning_rate": 1.7954271180916137e-05, - "loss": 0.6551, + "epoch": 0.07490320309750088, + "grad_norm": 2.6149370670318604, + "learning_rate": 1.3926292673500153e-05, + "loss": 0.7721, "step": 1064 }, { - "epoch": 0.08058718928530892, - "grad_norm": 4.05554723739624, - "learning_rate": 1.795404415089249e-05, - "loss": 1.0026, + "epoch": 0.07497360084477296, + "grad_norm": 2.2722558975219727, + "learning_rate": 1.3926061480258183e-05, + "loss": 0.8677, "step": 1065 }, { - "epoch": 0.08066285800764254, - "grad_norm": 2.6859283447265625, - "learning_rate": 1.795381656013764e-05, - "loss": 0.7522, + "epoch": 0.07504399859204505, + "grad_norm": 2.337745428085327, + "learning_rate": 1.3925829926924005e-05, + "loss": 0.7921, "step": 1066 }, { - "epoch": 0.08073852672997617, - "grad_norm": 2.984954833984375, - "learning_rate": 1.795358840866584e-05, - "loss": 0.9283, + "epoch": 0.07511439633931714, + "grad_norm": 2.441837787628174, + "learning_rate": 1.3925598013509656e-05, + "loss": 0.7871, "step": 1067 }, { - "epoch": 0.08081419545230979, - "grad_norm": 5.049993991851807, - "learning_rate": 1.7953359696491368e-05, - "loss": 0.883, + "epoch": 0.07518479408658924, + "grad_norm": 2.2845687866210938, + "learning_rate": 1.3925365740027192e-05, + "loss": 0.7445, "step": 1068 }, { - "epoch": 0.08088986417464342, - "grad_norm": 3.801880359649658, - "learning_rate": 1.7953130423628558e-05, - "loss": 0.8939, + "epoch": 0.07525519183386131, + "grad_norm": 2.469635009765625, + "learning_rate": 1.3925133106488694e-05, + "loss": 0.8387, "step": 1069 }, { - "epoch": 0.08096553289697704, - "grad_norm": 4.104866981506348, - "learning_rate": 1.795290059009176e-05, - "loss": 0.725, + "epoch": 0.0753255895811334, + "grad_norm": 2.2762258052825928, + "learning_rate": 1.3924900112906252e-05, + "loss": 0.8014, "step": 1070 }, { - "epoch": 0.08104120161931065, - "grad_norm": 3.0002243518829346, - "learning_rate": 1.7952670195895373e-05, - "loss": 0.9259, + "epoch": 0.0753959873284055, + "grad_norm": 2.3023054599761963, + "learning_rate": 1.3924666759291983e-05, + "loss": 0.8065, "step": 1071 }, { - "epoch": 0.08111687034164428, - "grad_norm": 4.523970127105713, - "learning_rate": 1.7952439241053818e-05, - "loss": 0.8686, + "epoch": 0.07546638507567757, + "grad_norm": 2.5582659244537354, + "learning_rate": 1.3924433045658017e-05, + "loss": 0.8776, "step": 1072 }, { - "epoch": 0.0811925390639779, - "grad_norm": 4.293980598449707, - "learning_rate": 1.7952207725581565e-05, - "loss": 0.9891, + "epoch": 0.07553678282294966, + "grad_norm": 2.3534300327301025, + "learning_rate": 1.3924198972016508e-05, + "loss": 0.7587, "step": 1073 }, { - "epoch": 0.08126820778631152, - "grad_norm": 6.949241638183594, - "learning_rate": 1.7951975649493112e-05, - "loss": 0.7205, + "epoch": 0.07560718057022175, + "grad_norm": 2.9335005283355713, + "learning_rate": 1.392396453837962e-05, + "loss": 0.8025, "step": 1074 }, { - "epoch": 0.08134387650864515, - "grad_norm": 2.5196433067321777, - "learning_rate": 1.795174301280298e-05, - "loss": 0.7382, + "epoch": 0.07567757831749385, + "grad_norm": 2.695983409881592, + "learning_rate": 1.3923729744759548e-05, + "loss": 0.7857, "step": 1075 }, { - "epoch": 0.08141954523097877, - "grad_norm": 3.6562304496765137, - "learning_rate": 1.7951509815525758e-05, - "loss": 0.7558, + "epoch": 0.07574797606476592, + "grad_norm": 2.541840076446533, + "learning_rate": 1.3923494591168495e-05, + "loss": 0.8693, "step": 1076 }, { - "epoch": 0.0814952139533124, - "grad_norm": 2.6832971572875977, - "learning_rate": 1.7951276057676035e-05, - "loss": 0.7999, + "epoch": 0.07581837381203801, + "grad_norm": 4.434484004974365, + "learning_rate": 1.3923259077618688e-05, + "loss": 0.8389, "step": 1077 }, { - "epoch": 0.08157088267564602, - "grad_norm": 2.6821744441986084, - "learning_rate": 1.795104173926845e-05, - "loss": 0.7422, + "epoch": 0.0758887715593101, + "grad_norm": 3.152869462966919, + "learning_rate": 1.392302320412237e-05, + "loss": 0.7761, "step": 1078 }, { - "epoch": 0.08164655139797965, - "grad_norm": 3.0677764415740967, - "learning_rate": 1.795080686031768e-05, - "loss": 0.9085, + "epoch": 0.07595916930658218, + "grad_norm": 2.712646245956421, + "learning_rate": 1.3922786970691809e-05, + "loss": 0.7359, "step": 1079 }, { - "epoch": 0.08172222012031327, - "grad_norm": 2.698085069656372, - "learning_rate": 1.7950571420838438e-05, - "loss": 0.8755, + "epoch": 0.07602956705385427, + "grad_norm": 3.388559341430664, + "learning_rate": 1.3922550377339281e-05, + "loss": 0.9247, "step": 1080 }, { - "epoch": 0.0817978888426469, - "grad_norm": 2.651939630508423, - "learning_rate": 1.7950335420845463e-05, - "loss": 0.7144, + "epoch": 0.07609996480112637, + "grad_norm": 2.246995687484741, + "learning_rate": 1.392231342407709e-05, + "loss": 0.7172, "step": 1081 }, { - "epoch": 0.08187355756498052, - "grad_norm": 3.2069571018218994, - "learning_rate": 1.7950098860353534e-05, - "loss": 0.834, + "epoch": 0.07617036254839846, + "grad_norm": 2.3917324542999268, + "learning_rate": 1.3922076110917556e-05, + "loss": 0.8817, "step": 1082 }, { - "epoch": 0.08194922628731414, - "grad_norm": 2.9685559272766113, - "learning_rate": 1.7949861739377464e-05, - "loss": 0.9, + "epoch": 0.07624076029567053, + "grad_norm": 2.307525873184204, + "learning_rate": 1.3921838437873013e-05, + "loss": 0.7499, "step": 1083 }, { - "epoch": 0.08202489500964776, - "grad_norm": 3.860081434249878, - "learning_rate": 1.7949624057932108e-05, - "loss": 0.8808, + "epoch": 0.07631115804294263, + "grad_norm": 2.585801362991333, + "learning_rate": 1.3921600404955824e-05, + "loss": 0.7765, "step": 1084 }, { - "epoch": 0.08210056373198138, - "grad_norm": 3.6514697074890137, - "learning_rate": 1.7949385816032348e-05, - "loss": 0.8848, + "epoch": 0.07638155579021472, + "grad_norm": 2.4935460090637207, + "learning_rate": 1.392136201217836e-05, + "loss": 0.8588, "step": 1085 }, { - "epoch": 0.082176232454315, - "grad_norm": 2.7582156658172607, - "learning_rate": 1.79491470136931e-05, - "loss": 0.8554, + "epoch": 0.0764519535374868, + "grad_norm": 2.640094518661499, + "learning_rate": 1.3921123259553017e-05, + "loss": 0.8392, "step": 1086 }, { - "epoch": 0.08225190117664863, - "grad_norm": 3.2017011642456055, - "learning_rate": 1.7948907650929322e-05, - "loss": 0.9358, + "epoch": 0.07652235128475889, + "grad_norm": 2.3773410320281982, + "learning_rate": 1.3920884147092208e-05, + "loss": 0.7789, "step": 1087 }, { - "epoch": 0.08232756989898225, - "grad_norm": 2.749764919281006, - "learning_rate": 1.7948667727756e-05, - "loss": 0.7399, + "epoch": 0.07659274903203098, + "grad_norm": 2.4625816345214844, + "learning_rate": 1.3920644674808364e-05, + "loss": 0.8092, "step": 1088 }, { - "epoch": 0.08240323862131588, - "grad_norm": 3.237679958343506, - "learning_rate": 1.7948427244188163e-05, - "loss": 1.0332, + "epoch": 0.07666314677930307, + "grad_norm": 2.482403039932251, + "learning_rate": 1.3920404842713935e-05, + "loss": 0.8263, "step": 1089 }, { - "epoch": 0.0824789073436495, - "grad_norm": 3.3725643157958984, - "learning_rate": 1.794818620024087e-05, - "loss": 0.8071, + "epoch": 0.07673354452657515, + "grad_norm": 2.3082869052886963, + "learning_rate": 1.3920164650821391e-05, + "loss": 0.7719, "step": 1090 }, { - "epoch": 0.08255457606598313, - "grad_norm": 3.030790090560913, - "learning_rate": 1.7947944595929215e-05, - "loss": 0.973, + "epoch": 0.07680394227384724, + "grad_norm": 2.3619649410247803, + "learning_rate": 1.3919924099143219e-05, + "loss": 0.766, "step": 1091 }, { - "epoch": 0.08263024478831675, - "grad_norm": 3.141195058822632, - "learning_rate": 1.794770243126833e-05, - "loss": 0.8654, + "epoch": 0.07687434002111933, + "grad_norm": 5.365654945373535, + "learning_rate": 1.3919683187691927e-05, + "loss": 0.7163, "step": 1092 }, { - "epoch": 0.08270591351065038, - "grad_norm": 2.7971293926239014, - "learning_rate": 1.7947459706273376e-05, - "loss": 0.9006, + "epoch": 0.0769447377683914, + "grad_norm": 2.377168893814087, + "learning_rate": 1.391944191648004e-05, + "loss": 0.875, "step": 1093 }, { - "epoch": 0.082781582232984, - "grad_norm": 2.6309781074523926, - "learning_rate": 1.7947216420959556e-05, - "loss": 0.8958, + "epoch": 0.0770151355156635, + "grad_norm": 2.7836620807647705, + "learning_rate": 1.3919200285520099e-05, + "loss": 0.7892, "step": 1094 }, { - "epoch": 0.08285725095531762, - "grad_norm": 2.7773690223693848, - "learning_rate": 1.7946972575342104e-05, - "loss": 0.7116, + "epoch": 0.07708553326293559, + "grad_norm": 2.541724681854248, + "learning_rate": 1.3918958294824671e-05, + "loss": 0.8978, "step": 1095 }, { - "epoch": 0.08293291967765125, - "grad_norm": 3.7063984870910645, - "learning_rate": 1.7946728169436292e-05, - "loss": 0.8178, + "epoch": 0.07715593101020768, + "grad_norm": 2.6473701000213623, + "learning_rate": 1.3918715944406332e-05, + "loss": 0.8525, "step": 1096 }, { - "epoch": 0.08300858839998487, - "grad_norm": 2.698293685913086, - "learning_rate": 1.7946483203257426e-05, - "loss": 0.7834, + "epoch": 0.07722632875747976, + "grad_norm": 2.8525781631469727, + "learning_rate": 1.3918473234277689e-05, + "loss": 0.9435, "step": 1097 }, { - "epoch": 0.08308425712231848, - "grad_norm": 3.520792245864868, - "learning_rate": 1.7946237676820842e-05, - "loss": 0.9203, + "epoch": 0.07729672650475185, + "grad_norm": 2.5585198402404785, + "learning_rate": 1.3918230164451354e-05, + "loss": 0.6999, "step": 1098 }, { - "epoch": 0.08315992584465211, - "grad_norm": 5.286828517913818, - "learning_rate": 1.794599159014192e-05, - "loss": 0.9913, + "epoch": 0.07736712425202394, + "grad_norm": 3.036473035812378, + "learning_rate": 1.3917986734939968e-05, + "loss": 0.8294, "step": 1099 }, { - "epoch": 0.08323559456698573, - "grad_norm": 2.7940480709075928, - "learning_rate": 1.7945744943236073e-05, - "loss": 0.8903, + "epoch": 0.07743752199929602, + "grad_norm": 2.722968578338623, + "learning_rate": 1.3917742945756186e-05, + "loss": 0.813, "step": 1100 }, { - "epoch": 0.08331126328931936, - "grad_norm": 3.531196355819702, - "learning_rate": 1.794549773611874e-05, - "loss": 0.9732, + "epoch": 0.07750791974656811, + "grad_norm": 2.578220844268799, + "learning_rate": 1.3917498796912684e-05, + "loss": 0.7201, "step": 1101 }, { - "epoch": 0.08338693201165298, - "grad_norm": 2.8770856857299805, - "learning_rate": 1.7945249968805412e-05, - "loss": 0.7176, + "epoch": 0.0775783174938402, + "grad_norm": 2.4114596843719482, + "learning_rate": 1.3917254288422154e-05, + "loss": 0.7861, "step": 1102 }, { - "epoch": 0.0834626007339866, - "grad_norm": 2.6823272705078125, - "learning_rate": 1.794500164131159e-05, - "loss": 0.7169, + "epoch": 0.07764871524111229, + "grad_norm": 2.184274673461914, + "learning_rate": 1.391700942029731e-05, + "loss": 0.8199, "step": 1103 }, { - "epoch": 0.08353826945632023, - "grad_norm": 14.177001953125, - "learning_rate": 1.794475275365284e-05, - "loss": 0.7207, + "epoch": 0.07771911298838437, + "grad_norm": 2.7250559329986572, + "learning_rate": 1.391676419255088e-05, + "loss": 0.7585, "step": 1104 }, { - "epoch": 0.08361393817865385, - "grad_norm": 3.2320713996887207, - "learning_rate": 1.7944503305844738e-05, - "loss": 0.799, + "epoch": 0.07778951073565646, + "grad_norm": 2.0673868656158447, + "learning_rate": 1.3916518605195617e-05, + "loss": 0.6973, "step": 1105 }, { - "epoch": 0.08368960690098748, - "grad_norm": 3.453160524368286, - "learning_rate": 1.794425329790291e-05, - "loss": 0.6669, + "epoch": 0.07785990848292855, + "grad_norm": 2.280029535293579, + "learning_rate": 1.3916272658244288e-05, + "loss": 0.876, "step": 1106 }, { - "epoch": 0.0837652756233211, - "grad_norm": 2.6871984004974365, - "learning_rate": 1.794400272984301e-05, - "loss": 0.8554, + "epoch": 0.07793030623020063, + "grad_norm": 2.3047807216644287, + "learning_rate": 1.3916026351709677e-05, + "loss": 0.793, "step": 1107 }, { - "epoch": 0.08384094434565473, - "grad_norm": 3.1583449840545654, - "learning_rate": 1.7943751601680732e-05, - "loss": 0.8331, + "epoch": 0.07800070397747272, + "grad_norm": 2.5945513248443604, + "learning_rate": 1.3915779685604596e-05, + "loss": 0.8831, "step": 1108 }, { - "epoch": 0.08391661306798835, - "grad_norm": 2.431509256362915, - "learning_rate": 1.79434999134318e-05, - "loss": 0.9261, + "epoch": 0.07807110172474481, + "grad_norm": 2.213367223739624, + "learning_rate": 1.3915532659941863e-05, + "loss": 0.7468, "step": 1109 }, { - "epoch": 0.08399228179032198, - "grad_norm": 5.224951267242432, - "learning_rate": 1.7943247665111978e-05, - "loss": 0.8823, + "epoch": 0.0781414994720169, + "grad_norm": 2.7465169429779053, + "learning_rate": 1.3915285274734326e-05, + "loss": 0.9056, "step": 1110 }, { - "epoch": 0.08406795051265559, - "grad_norm": 3.4264092445373535, - "learning_rate": 1.7942994856737063e-05, - "loss": 0.7422, + "epoch": 0.07821189721928898, + "grad_norm": 3.006166696548462, + "learning_rate": 1.3915037529994845e-05, + "loss": 0.7478, "step": 1111 }, { - "epoch": 0.08414361923498921, - "grad_norm": 3.036907196044922, - "learning_rate": 1.7942741488322882e-05, - "loss": 0.8846, + "epoch": 0.07828229496656107, + "grad_norm": 2.5754997730255127, + "learning_rate": 1.3914789425736299e-05, + "loss": 0.8475, "step": 1112 }, { - "epoch": 0.08421928795732284, - "grad_norm": 2.8463785648345947, - "learning_rate": 1.7942487559885306e-05, - "loss": 0.8043, + "epoch": 0.07835269271383316, + "grad_norm": 2.2889387607574463, + "learning_rate": 1.3914540961971588e-05, + "loss": 0.8606, "step": 1113 }, { - "epoch": 0.08429495667965646, - "grad_norm": 3.0611727237701416, - "learning_rate": 1.7942233071440235e-05, - "loss": 0.7897, + "epoch": 0.07842309046110524, + "grad_norm": 2.606018304824829, + "learning_rate": 1.391429213871363e-05, + "loss": 0.7655, "step": 1114 }, { - "epoch": 0.08437062540199008, - "grad_norm": 3.0145342350006104, - "learning_rate": 1.7941978023003604e-05, - "loss": 0.8423, + "epoch": 0.07849348820837733, + "grad_norm": 2.458618402481079, + "learning_rate": 1.3914042955975363e-05, + "loss": 0.828, "step": 1115 }, { - "epoch": 0.08444629412432371, - "grad_norm": 2.9739110469818115, - "learning_rate": 1.794172241459139e-05, - "loss": 0.9366, + "epoch": 0.07856388595564942, + "grad_norm": 2.7074198722839355, + "learning_rate": 1.391379341376974e-05, + "loss": 0.7365, "step": 1116 }, { - "epoch": 0.08452196284665733, - "grad_norm": 3.091020345687866, - "learning_rate": 1.7941466246219597e-05, - "loss": 0.7762, + "epoch": 0.07863428370292151, + "grad_norm": 2.198666572570801, + "learning_rate": 1.3913543512109736e-05, + "loss": 0.799, "step": 1117 }, { - "epoch": 0.08459763156899096, - "grad_norm": 2.4491589069366455, - "learning_rate": 1.7941209517904267e-05, - "loss": 0.791, + "epoch": 0.07870468145019359, + "grad_norm": 3.0352747440338135, + "learning_rate": 1.3913293251008345e-05, + "loss": 0.7889, "step": 1118 }, { - "epoch": 0.08467330029132458, - "grad_norm": 3.116558313369751, - "learning_rate": 1.794095222966148e-05, - "loss": 0.8275, + "epoch": 0.07877507919746568, + "grad_norm": 2.559067487716675, + "learning_rate": 1.3913042630478575e-05, + "loss": 0.8505, "step": 1119 }, { - "epoch": 0.0847489690136582, - "grad_norm": 3.4584758281707764, - "learning_rate": 1.7940694381507345e-05, - "loss": 0.9236, + "epoch": 0.07884547694473777, + "grad_norm": 2.5808472633361816, + "learning_rate": 1.3912791650533457e-05, + "loss": 0.9135, "step": 1120 }, { - "epoch": 0.08482463773599183, - "grad_norm": 3.4700706005096436, - "learning_rate": 1.794043597345801e-05, - "loss": 0.9343, + "epoch": 0.07891587469200985, + "grad_norm": 2.842872142791748, + "learning_rate": 1.3912540311186044e-05, + "loss": 0.8751, "step": 1121 }, { - "epoch": 0.08490030645832546, - "grad_norm": 2.679157257080078, - "learning_rate": 1.7940177005529653e-05, - "loss": 0.908, + "epoch": 0.07898627243928194, + "grad_norm": 2.315624475479126, + "learning_rate": 1.3912288612449396e-05, + "loss": 0.7087, "step": 1122 }, { - "epoch": 0.08497597518065908, - "grad_norm": 4.534050941467285, - "learning_rate": 1.7939917477738502e-05, - "loss": 0.761, + "epoch": 0.07905667018655403, + "grad_norm": 4.027956962585449, + "learning_rate": 1.3912036554336606e-05, + "loss": 0.8411, "step": 1123 }, { - "epoch": 0.0850516439029927, - "grad_norm": 3.0127124786376953, - "learning_rate": 1.79396573901008e-05, - "loss": 0.7555, + "epoch": 0.07912706793382612, + "grad_norm": 2.1606645584106445, + "learning_rate": 1.3911784136860776e-05, + "loss": 0.7693, "step": 1124 }, { - "epoch": 0.08512731262532632, - "grad_norm": 2.5386579036712646, - "learning_rate": 1.793939674263284e-05, - "loss": 0.8402, + "epoch": 0.0791974656810982, + "grad_norm": 2.377943515777588, + "learning_rate": 1.3911531360035027e-05, + "loss": 0.8472, "step": 1125 }, { - "epoch": 0.08520298134765994, - "grad_norm": 3.1187853813171387, - "learning_rate": 1.793913553535094e-05, - "loss": 0.7755, + "epoch": 0.07926786342837029, + "grad_norm": 2.6446452140808105, + "learning_rate": 1.3911278223872502e-05, + "loss": 0.851, "step": 1126 }, { - "epoch": 0.08527865006999356, - "grad_norm": 3.3859164714813232, - "learning_rate": 1.793887376827146e-05, - "loss": 0.7611, + "epoch": 0.07933826117564238, + "grad_norm": 2.9320027828216553, + "learning_rate": 1.3911024728386364e-05, + "loss": 0.8202, "step": 1127 }, { - "epoch": 0.08535431879232719, - "grad_norm": 2.509925127029419, - "learning_rate": 1.7938611441410795e-05, - "loss": 0.7936, + "epoch": 0.07940865892291446, + "grad_norm": 2.632298469543457, + "learning_rate": 1.3910770873589794e-05, + "loss": 0.8758, "step": 1128 }, { - "epoch": 0.08542998751466081, - "grad_norm": 2.7846434116363525, - "learning_rate": 1.793834855478537e-05, - "loss": 0.6917, + "epoch": 0.07947905667018655, + "grad_norm": 2.4498450756073, + "learning_rate": 1.3910516659495986e-05, + "loss": 0.7561, "step": 1129 }, { - "epoch": 0.08550565623699444, - "grad_norm": 3.471247673034668, - "learning_rate": 1.7938085108411648e-05, - "loss": 0.8178, + "epoch": 0.07954945441745864, + "grad_norm": 2.2349772453308105, + "learning_rate": 1.3910262086118157e-05, + "loss": 0.7609, "step": 1130 }, { - "epoch": 0.08558132495932806, - "grad_norm": 2.7567524909973145, - "learning_rate": 1.7937821102306127e-05, - "loss": 0.8146, + "epoch": 0.07961985216473073, + "grad_norm": 2.0929770469665527, + "learning_rate": 1.3910007153469544e-05, + "loss": 0.7206, "step": 1131 }, { - "epoch": 0.08565699368166169, - "grad_norm": 3.3898468017578125, - "learning_rate": 1.793755653648534e-05, - "loss": 0.7553, + "epoch": 0.07969024991200281, + "grad_norm": 2.680788993835449, + "learning_rate": 1.3909751861563402e-05, + "loss": 0.6386, "step": 1132 }, { - "epoch": 0.08573266240399531, - "grad_norm": 7.726632595062256, - "learning_rate": 1.7937291410965855e-05, - "loss": 0.803, + "epoch": 0.0797606476592749, + "grad_norm": 2.9313316345214844, + "learning_rate": 1.3909496210413001e-05, + "loss": 0.789, "step": 1133 }, { - "epoch": 0.08580833112632894, - "grad_norm": 3.307058811187744, - "learning_rate": 1.7937025725764273e-05, - "loss": 0.9423, + "epoch": 0.079831045406547, + "grad_norm": 2.3499743938446045, + "learning_rate": 1.3909240200031637e-05, + "loss": 0.9106, "step": 1134 }, { - "epoch": 0.08588399984866256, - "grad_norm": 3.0027151107788086, - "learning_rate": 1.793675948089724e-05, - "loss": 0.8663, + "epoch": 0.07990144315381907, + "grad_norm": 2.4001412391662598, + "learning_rate": 1.3908983830432618e-05, + "loss": 0.8043, "step": 1135 }, { - "epoch": 0.08595966857099618, - "grad_norm": 2.042656898498535, - "learning_rate": 1.793649267638142e-05, - "loss": 1.0062, + "epoch": 0.07997184090109116, + "grad_norm": 2.4667415618896484, + "learning_rate": 1.390872710162927e-05, + "loss": 0.838, "step": 1136 }, { - "epoch": 0.08603533729332981, - "grad_norm": 4.205798625946045, - "learning_rate": 1.7936225312233523e-05, - "loss": 0.7688, + "epoch": 0.08004223864836325, + "grad_norm": 3.071204423904419, + "learning_rate": 1.3908470013634942e-05, + "loss": 0.7939, "step": 1137 }, { - "epoch": 0.08611100601566342, - "grad_norm": 2.7063050270080566, - "learning_rate": 1.7935957388470297e-05, - "loss": 0.7372, + "epoch": 0.08011263639563534, + "grad_norm": 4.286511421203613, + "learning_rate": 1.3908212566463004e-05, + "loss": 0.938, "step": 1138 }, { - "epoch": 0.08618667473799704, - "grad_norm": 2.97853422164917, - "learning_rate": 1.7935688905108513e-05, - "loss": 0.8734, + "epoch": 0.08018303414290742, + "grad_norm": 2.4984350204467773, + "learning_rate": 1.3907954760126839e-05, + "loss": 0.7742, "step": 1139 }, { - "epoch": 0.08626234346033067, - "grad_norm": 2.819689989089966, - "learning_rate": 1.793541986216499e-05, - "loss": 0.8523, + "epoch": 0.08025343189017951, + "grad_norm": 2.583301544189453, + "learning_rate": 1.3907696594639846e-05, + "loss": 0.8492, "step": 1140 }, { - "epoch": 0.08633801218266429, - "grad_norm": 3.4364449977874756, - "learning_rate": 1.7935150259656575e-05, - "loss": 0.9452, + "epoch": 0.0803238296374516, + "grad_norm": 2.0510408878326416, + "learning_rate": 1.3907438070015454e-05, + "loss": 0.7681, "step": 1141 }, { - "epoch": 0.08641368090499792, - "grad_norm": 3.2552027702331543, - "learning_rate": 1.7934880097600153e-05, - "loss": 0.955, + "epoch": 0.08039422738472368, + "grad_norm": 2.0713818073272705, + "learning_rate": 1.3907179186267103e-05, + "loss": 0.8141, "step": 1142 }, { - "epoch": 0.08648934962733154, - "grad_norm": 2.6701014041900635, - "learning_rate": 1.7934609376012637e-05, - "loss": 0.909, + "epoch": 0.08046462513199577, + "grad_norm": 1.994084119796753, + "learning_rate": 1.3906919943408247e-05, + "loss": 0.7366, "step": 1143 }, { - "epoch": 0.08656501834966517, - "grad_norm": 3.5823423862457275, - "learning_rate": 1.7934338094910986e-05, - "loss": 0.7366, + "epoch": 0.08053502287926786, + "grad_norm": 2.5985875129699707, + "learning_rate": 1.390666034145237e-05, + "loss": 0.7806, "step": 1144 }, { - "epoch": 0.08664068707199879, - "grad_norm": 4.914628505706787, - "learning_rate": 1.7934066254312185e-05, - "loss": 0.7957, + "epoch": 0.08060542062653996, + "grad_norm": 2.1617910861968994, + "learning_rate": 1.3906400380412965e-05, + "loss": 0.9237, "step": 1145 }, { - "epoch": 0.08671635579433241, - "grad_norm": 3.194420576095581, - "learning_rate": 1.7933793854233258e-05, - "loss": 0.8123, + "epoch": 0.08067581837381203, + "grad_norm": 2.5079469680786133, + "learning_rate": 1.390614006030355e-05, + "loss": 0.6956, "step": 1146 }, { - "epoch": 0.08679202451666604, - "grad_norm": 3.094132900238037, - "learning_rate": 1.7933520894691268e-05, - "loss": 0.9446, + "epoch": 0.08074621612108412, + "grad_norm": 2.830551862716675, + "learning_rate": 1.390587938113766e-05, + "loss": 0.8772, "step": 1147 }, { - "epoch": 0.08686769323899966, - "grad_norm": 3.0485928058624268, - "learning_rate": 1.7933247375703302e-05, - "loss": 0.8139, + "epoch": 0.08081661386835622, + "grad_norm": 3.30703067779541, + "learning_rate": 1.3905618342928843e-05, + "loss": 0.756, "step": 1148 }, { - "epoch": 0.08694336196133329, - "grad_norm": 2.725642681121826, - "learning_rate": 1.7932973297286493e-05, - "loss": 0.8204, + "epoch": 0.0808870116156283, + "grad_norm": 2.7300477027893066, + "learning_rate": 1.3905356945690678e-05, + "loss": 0.7942, "step": 1149 }, { - "epoch": 0.08701903068366691, - "grad_norm": 2.9291388988494873, - "learning_rate": 1.7932698659458002e-05, - "loss": 0.7845, + "epoch": 0.08095740936290038, + "grad_norm": 2.952061891555786, + "learning_rate": 1.3905095189436748e-05, + "loss": 0.82, "step": 1150 }, { - "epoch": 0.08709469940600054, - "grad_norm": 3.9883525371551514, - "learning_rate": 1.793242346223503e-05, - "loss": 0.9038, + "epoch": 0.08102780711017248, + "grad_norm": 2.432748794555664, + "learning_rate": 1.390483307418067e-05, + "loss": 0.7844, "step": 1151 }, { - "epoch": 0.08717036812833415, - "grad_norm": 2.8218777179718018, - "learning_rate": 1.7932147705634813e-05, - "loss": 0.9964, + "epoch": 0.08109820485744457, + "grad_norm": 2.337480306625366, + "learning_rate": 1.3904570599936064e-05, + "loss": 0.8442, "step": 1152 }, { - "epoch": 0.08724603685066777, - "grad_norm": 3.1680803298950195, - "learning_rate": 1.7931871389674615e-05, - "loss": 0.8961, + "epoch": 0.08116860260471664, + "grad_norm": 2.1601502895355225, + "learning_rate": 1.390430776671658e-05, + "loss": 0.798, "step": 1153 }, { - "epoch": 0.0873217055730014, - "grad_norm": 3.147364616394043, - "learning_rate": 1.7931594514371738e-05, - "loss": 0.8748, + "epoch": 0.08123900035198874, + "grad_norm": 2.622744083404541, + "learning_rate": 1.3904044574535883e-05, + "loss": 0.7757, "step": 1154 }, { - "epoch": 0.08739737429533502, - "grad_norm": 2.9208531379699707, - "learning_rate": 1.7931317079743526e-05, - "loss": 0.8665, + "epoch": 0.08130939809926083, + "grad_norm": 2.1422293186187744, + "learning_rate": 1.3903781023407656e-05, + "loss": 0.5373, "step": 1155 }, { - "epoch": 0.08747304301766864, - "grad_norm": 3.3694779872894287, - "learning_rate": 1.793103908580735e-05, - "loss": 0.955, + "epoch": 0.0813797958465329, + "grad_norm": 1.9106401205062866, + "learning_rate": 1.3903517113345598e-05, + "loss": 0.7232, "step": 1156 }, { - "epoch": 0.08754871174000227, - "grad_norm": 2.678873300552368, - "learning_rate": 1.793076053258062e-05, - "loss": 0.7752, + "epoch": 0.081450193593805, + "grad_norm": 2.0706582069396973, + "learning_rate": 1.3903252844363438e-05, + "loss": 0.9279, "step": 1157 }, { - "epoch": 0.0876243804623359, - "grad_norm": 2.5789995193481445, - "learning_rate": 1.793048142008078e-05, - "loss": 0.8021, + "epoch": 0.08152059134107709, + "grad_norm": 2.421539068222046, + "learning_rate": 1.3902988216474909e-05, + "loss": 0.7756, "step": 1158 }, { - "epoch": 0.08770004918466952, - "grad_norm": 2.937720537185669, - "learning_rate": 1.793020174832531e-05, - "loss": 0.7064, + "epoch": 0.08159098908834918, + "grad_norm": 2.4386990070343018, + "learning_rate": 1.3902723229693771e-05, + "loss": 0.7715, "step": 1159 }, { - "epoch": 0.08777571790700314, - "grad_norm": 3.5758986473083496, - "learning_rate": 1.7929921517331725e-05, - "loss": 0.7418, + "epoch": 0.08166138683562126, + "grad_norm": 2.156053066253662, + "learning_rate": 1.39024578840338e-05, + "loss": 0.6423, "step": 1160 }, { - "epoch": 0.08785138662933677, - "grad_norm": 3.9661405086517334, - "learning_rate": 1.792964072711757e-05, - "loss": 0.9024, + "epoch": 0.08173178458289335, + "grad_norm": 2.495974540710449, + "learning_rate": 1.390219217950879e-05, + "loss": 0.9415, "step": 1161 }, { - "epoch": 0.08792705535167039, - "grad_norm": 2.9074490070343018, - "learning_rate": 1.792935937770043e-05, - "loss": 1.0127, + "epoch": 0.08180218233016544, + "grad_norm": 2.8849143981933594, + "learning_rate": 1.3901926116132562e-05, + "loss": 0.8068, "step": 1162 }, { - "epoch": 0.08800272407400402, - "grad_norm": 3.0278260707855225, - "learning_rate": 1.7929077469097923e-05, - "loss": 0.8291, + "epoch": 0.08187258007743752, + "grad_norm": 2.169585943222046, + "learning_rate": 1.390165969391894e-05, + "loss": 0.7219, "step": 1163 }, { - "epoch": 0.08807839279633764, - "grad_norm": 2.941364049911499, - "learning_rate": 1.792879500132771e-05, - "loss": 0.7886, + "epoch": 0.0819429778247096, + "grad_norm": 2.262244701385498, + "learning_rate": 1.3901392912881783e-05, + "loss": 0.7655, "step": 1164 }, { - "epoch": 0.08815406151867125, - "grad_norm": 3.2918078899383545, - "learning_rate": 1.7928511974407468e-05, - "loss": 0.8103, + "epoch": 0.0820133755719817, + "grad_norm": 2.673832893371582, + "learning_rate": 1.3901125773034958e-05, + "loss": 0.7721, "step": 1165 }, { - "epoch": 0.08822973024100488, - "grad_norm": 3.120277166366577, - "learning_rate": 1.7928228388354932e-05, - "loss": 0.8215, + "epoch": 0.08208377331925379, + "grad_norm": 2.455653190612793, + "learning_rate": 1.3900858274392355e-05, + "loss": 0.8008, "step": 1166 }, { - "epoch": 0.0883053989633385, - "grad_norm": 3.4401562213897705, - "learning_rate": 1.7927944243187857e-05, - "loss": 0.8355, + "epoch": 0.08215417106652587, + "grad_norm": 2.270050048828125, + "learning_rate": 1.390059041696788e-05, + "loss": 0.8389, "step": 1167 }, { - "epoch": 0.08838106768567212, - "grad_norm": 3.9332945346832275, - "learning_rate": 1.7927659538924037e-05, - "loss": 0.966, + "epoch": 0.08222456881379796, + "grad_norm": 1.9465999603271484, + "learning_rate": 1.3900322200775458e-05, + "loss": 0.7993, "step": 1168 }, { - "epoch": 0.08845673640800575, - "grad_norm": 3.2182464599609375, - "learning_rate": 1.7927374275581298e-05, - "loss": 0.8184, + "epoch": 0.08229496656107005, + "grad_norm": 3.4678690433502197, + "learning_rate": 1.3900053625829035e-05, + "loss": 0.833, "step": 1169 }, { - "epoch": 0.08853240513033937, - "grad_norm": 4.130304336547852, - "learning_rate": 1.792708845317751e-05, - "loss": 0.8465, + "epoch": 0.08236536430834214, + "grad_norm": 2.734975814819336, + "learning_rate": 1.3899784692142576e-05, + "loss": 0.7439, "step": 1170 }, { - "epoch": 0.088608073852673, - "grad_norm": 3.9958245754241943, - "learning_rate": 1.792680207173057e-05, - "loss": 0.7846, + "epoch": 0.08243576205561422, + "grad_norm": 2.213425397872925, + "learning_rate": 1.3899515399730065e-05, + "loss": 0.7798, "step": 1171 }, { - "epoch": 0.08868374257500662, - "grad_norm": 3.126950740814209, - "learning_rate": 1.792651513125841e-05, - "loss": 0.9448, + "epoch": 0.08250615980288631, + "grad_norm": 2.859841823577881, + "learning_rate": 1.3899245748605495e-05, + "loss": 0.8909, "step": 1172 }, { - "epoch": 0.08875941129734025, - "grad_norm": 2.5483791828155518, - "learning_rate": 1.7926227631779e-05, - "loss": 0.818, + "epoch": 0.0825765575501584, + "grad_norm": 2.0889432430267334, + "learning_rate": 1.3898975738782894e-05, + "loss": 0.8147, "step": 1173 }, { - "epoch": 0.08883508001967387, - "grad_norm": 3.175549268722534, - "learning_rate": 1.7925939573310348e-05, - "loss": 0.8815, + "epoch": 0.08264695529743048, + "grad_norm": 2.589876413345337, + "learning_rate": 1.3898705370276294e-05, + "loss": 0.7111, "step": 1174 }, { - "epoch": 0.0889107487420075, - "grad_norm": 3.0271804332733154, - "learning_rate": 1.7925650955870484e-05, - "loss": 0.8213, + "epoch": 0.08271735304470257, + "grad_norm": 2.3769683837890625, + "learning_rate": 1.3898434643099754e-05, + "loss": 0.681, "step": 1175 }, { - "epoch": 0.08898641746434112, - "grad_norm": 4.855120658874512, - "learning_rate": 1.7925361779477492e-05, - "loss": 0.7978, + "epoch": 0.08278775079197466, + "grad_norm": 2.908860683441162, + "learning_rate": 1.3898163557267349e-05, + "loss": 0.7034, "step": 1176 }, { - "epoch": 0.08906208618667474, - "grad_norm": 2.6035373210906982, - "learning_rate": 1.792507204414948e-05, - "loss": 0.7594, + "epoch": 0.08285814853924675, + "grad_norm": 2.564044237136841, + "learning_rate": 1.3897892112793175e-05, + "loss": 0.8255, "step": 1177 }, { - "epoch": 0.08913775490900837, - "grad_norm": 2.7867379188537598, - "learning_rate": 1.7924781749904583e-05, - "loss": 0.9074, + "epoch": 0.08292854628651883, + "grad_norm": 2.162842035293579, + "learning_rate": 1.3897620309691343e-05, + "loss": 0.706, "step": 1178 }, { - "epoch": 0.08921342363134198, - "grad_norm": 2.4809141159057617, - "learning_rate": 1.792449089676099e-05, - "loss": 0.7016, + "epoch": 0.08299894403379092, + "grad_norm": 2.9402880668640137, + "learning_rate": 1.3897348147975985e-05, + "loss": 0.8585, "step": 1179 }, { - "epoch": 0.0892890923536756, - "grad_norm": 3.8243072032928467, - "learning_rate": 1.7924199484736912e-05, - "loss": 0.9437, + "epoch": 0.08306934178106301, + "grad_norm": 2.2693889141082764, + "learning_rate": 1.3897075627661247e-05, + "loss": 0.7962, "step": 1180 }, { - "epoch": 0.08936476107600923, - "grad_norm": 2.4355239868164062, - "learning_rate": 1.7923907513850598e-05, - "loss": 0.7985, + "epoch": 0.08313973952833509, + "grad_norm": 3.39579176902771, + "learning_rate": 1.3896802748761302e-05, + "loss": 0.8012, "step": 1181 }, { - "epoch": 0.08944042979834285, - "grad_norm": 3.5210628509521484, - "learning_rate": 1.792361498412033e-05, - "loss": 0.9034, + "epoch": 0.08321013727560718, + "grad_norm": 2.2999424934387207, + "learning_rate": 1.3896529511290336e-05, + "loss": 0.8648, "step": 1182 }, { - "epoch": 0.08951609852067648, - "grad_norm": 3.205606460571289, - "learning_rate": 1.7923321895564434e-05, - "loss": 0.836, + "epoch": 0.08328053502287927, + "grad_norm": 2.5290372371673584, + "learning_rate": 1.3896255915262555e-05, + "loss": 0.681, "step": 1183 }, { - "epoch": 0.0895917672430101, - "grad_norm": 2.4469003677368164, - "learning_rate": 1.7923028248201254e-05, - "loss": 0.8001, + "epoch": 0.08335093277015136, + "grad_norm": 2.6502726078033447, + "learning_rate": 1.3895981960692185e-05, + "loss": 0.8042, "step": 1184 }, { - "epoch": 0.08966743596534373, - "grad_norm": 3.101621150970459, - "learning_rate": 1.792273404204919e-05, - "loss": 0.9058, + "epoch": 0.08342133051742344, + "grad_norm": 2.231309413909912, + "learning_rate": 1.3895707647593465e-05, + "loss": 0.8956, "step": 1185 }, { - "epoch": 0.08974310468767735, - "grad_norm": 2.8717565536499023, - "learning_rate": 1.7922439277126656e-05, - "loss": 0.8039, + "epoch": 0.08349172826469553, + "grad_norm": 2.863345146179199, + "learning_rate": 1.389543297598066e-05, + "loss": 0.885, "step": 1186 }, { - "epoch": 0.08981877341001097, - "grad_norm": 2.7401671409606934, - "learning_rate": 1.7922143953452117e-05, - "loss": 0.8037, + "epoch": 0.08356212601196762, + "grad_norm": 3.0012638568878174, + "learning_rate": 1.3895157945868051e-05, + "loss": 0.8302, "step": 1187 }, { - "epoch": 0.0898944421323446, - "grad_norm": 3.1042513847351074, - "learning_rate": 1.7921848071044065e-05, - "loss": 0.8021, + "epoch": 0.0836325237592397, + "grad_norm": 2.793842077255249, + "learning_rate": 1.3894882557269934e-05, + "loss": 0.9294, "step": 1188 }, { - "epoch": 0.08997011085467822, - "grad_norm": 3.410513162612915, - "learning_rate": 1.7921551629921033e-05, - "loss": 0.7713, + "epoch": 0.08370292150651179, + "grad_norm": 2.2975356578826904, + "learning_rate": 1.3894606810200628e-05, + "loss": 0.7988, "step": 1189 }, { - "epoch": 0.09004577957701185, - "grad_norm": 2.913480758666992, - "learning_rate": 1.792125463010158e-05, - "loss": 0.6799, + "epoch": 0.08377331925378388, + "grad_norm": 2.162510871887207, + "learning_rate": 1.3894330704674472e-05, + "loss": 0.7758, "step": 1190 }, { - "epoch": 0.09012144829934547, - "grad_norm": 3.294175863265991, - "learning_rate": 1.792095707160431e-05, - "loss": 0.9056, + "epoch": 0.08384371700105597, + "grad_norm": 2.535217046737671, + "learning_rate": 1.3894054240705819e-05, + "loss": 0.7795, "step": 1191 }, { - "epoch": 0.09019711702167908, - "grad_norm": 3.1891300678253174, - "learning_rate": 1.792065895444785e-05, - "loss": 0.8954, + "epoch": 0.08391411474832805, + "grad_norm": 2.562312126159668, + "learning_rate": 1.389377741830904e-05, + "loss": 0.7146, "step": 1192 }, { - "epoch": 0.0902727857440127, - "grad_norm": 2.7054443359375, - "learning_rate": 1.7920360278650874e-05, - "loss": 0.8146, + "epoch": 0.08398451249560014, + "grad_norm": 2.719789505004883, + "learning_rate": 1.3893500237498531e-05, + "loss": 0.8078, "step": 1193 }, { - "epoch": 0.09034845446634633, - "grad_norm": 2.5543649196624756, - "learning_rate": 1.7920061044232086e-05, - "loss": 0.7524, + "epoch": 0.08405491024287223, + "grad_norm": 2.3458685874938965, + "learning_rate": 1.3893222698288703e-05, + "loss": 0.8022, "step": 1194 }, { - "epoch": 0.09042412318867996, - "grad_norm": 2.7355525493621826, - "learning_rate": 1.7919761251210227e-05, - "loss": 0.8009, + "epoch": 0.08412530799014431, + "grad_norm": 3.404025077819824, + "learning_rate": 1.3892944800693981e-05, + "loss": 0.6935, "step": 1195 }, { - "epoch": 0.09049979191101358, - "grad_norm": 2.5173544883728027, - "learning_rate": 1.7919460899604065e-05, - "loss": 0.8508, + "epoch": 0.0841957057374164, + "grad_norm": 2.1639485359191895, + "learning_rate": 1.389266654472882e-05, + "loss": 0.7435, "step": 1196 }, { - "epoch": 0.0905754606333472, - "grad_norm": 4.627716064453125, - "learning_rate": 1.7919159989432417e-05, - "loss": 0.7769, + "epoch": 0.08426610348468849, + "grad_norm": 2.474079132080078, + "learning_rate": 1.389238793040768e-05, + "loss": 0.8526, "step": 1197 }, { - "epoch": 0.09065112935568083, - "grad_norm": 3.5971720218658447, - "learning_rate": 1.7918858520714118e-05, - "loss": 0.7218, + "epoch": 0.08433650123196058, + "grad_norm": 2.1720364093780518, + "learning_rate": 1.389210895774505e-05, + "loss": 0.7507, "step": 1198 }, { - "epoch": 0.09072679807801445, - "grad_norm": 3.731093168258667, - "learning_rate": 1.791855649346805e-05, - "loss": 0.9134, + "epoch": 0.08440689897923266, + "grad_norm": 2.9660544395446777, + "learning_rate": 1.3891829626755432e-05, + "loss": 0.8288, "step": 1199 }, { - "epoch": 0.09080246680034808, - "grad_norm": 3.367734670639038, - "learning_rate": 1.7918253907713133e-05, - "loss": 0.7829, + "epoch": 0.08447729672650475, + "grad_norm": 2.234060049057007, + "learning_rate": 1.3891549937453353e-05, + "loss": 0.8511, "step": 1200 }, { - "epoch": 0.0908781355226817, - "grad_norm": 2.7320973873138428, - "learning_rate": 1.791795076346831e-05, - "loss": 0.8174, + "epoch": 0.08454769447377684, + "grad_norm": 2.2961580753326416, + "learning_rate": 1.389126988985335e-05, + "loss": 0.7524, "step": 1201 }, { - "epoch": 0.09095380424501533, - "grad_norm": 2.977246046066284, - "learning_rate": 1.7917647060752562e-05, - "loss": 0.9902, + "epoch": 0.08461809222104892, + "grad_norm": 2.5138049125671387, + "learning_rate": 1.3890989483969984e-05, + "loss": 0.8859, "step": 1202 }, { - "epoch": 0.09102947296734895, - "grad_norm": 2.8450841903686523, - "learning_rate": 1.7917342799584916e-05, - "loss": 0.7602, + "epoch": 0.08468848996832101, + "grad_norm": 2.0798654556274414, + "learning_rate": 1.3890708719817834e-05, + "loss": 0.7693, "step": 1203 }, { - "epoch": 0.09110514168968258, - "grad_norm": 2.729694128036499, - "learning_rate": 1.791703797998442e-05, - "loss": 0.7567, + "epoch": 0.0847588877155931, + "grad_norm": 3.122779607772827, + "learning_rate": 1.3890427597411498e-05, + "loss": 0.8716, "step": 1204 }, { - "epoch": 0.0911808104120162, - "grad_norm": 2.8043293952941895, - "learning_rate": 1.7916732601970166e-05, - "loss": 0.8673, + "epoch": 0.0848292854628652, + "grad_norm": 2.847696542739868, + "learning_rate": 1.3890146116765588e-05, + "loss": 0.8266, "step": 1205 }, { - "epoch": 0.09125647913434981, - "grad_norm": 2.3596503734588623, - "learning_rate": 1.7916426665561276e-05, - "loss": 0.6582, + "epoch": 0.08489968321013727, + "grad_norm": 2.395873785018921, + "learning_rate": 1.3889864277894744e-05, + "loss": 0.911, "step": 1206 }, { - "epoch": 0.09133214785668343, - "grad_norm": 3.7093918323516846, - "learning_rate": 1.791612017077691e-05, - "loss": 0.8625, + "epoch": 0.08497008095740936, + "grad_norm": 2.2786810398101807, + "learning_rate": 1.3889582080813615e-05, + "loss": 0.8183, "step": 1207 }, { - "epoch": 0.09140781657901706, - "grad_norm": 3.144490957260132, - "learning_rate": 1.791581311763626e-05, - "loss": 0.7741, + "epoch": 0.08504047870468145, + "grad_norm": 2.9216792583465576, + "learning_rate": 1.3889299525536876e-05, + "loss": 0.8303, "step": 1208 }, { - "epoch": 0.09148348530135068, - "grad_norm": 2.924307346343994, - "learning_rate": 1.7915505506158553e-05, - "loss": 0.7959, + "epoch": 0.08511087645195353, + "grad_norm": 2.7165164947509766, + "learning_rate": 1.3889016612079214e-05, + "loss": 0.8358, "step": 1209 }, { - "epoch": 0.09155915402368431, - "grad_norm": 3.3152287006378174, - "learning_rate": 1.7915197336363054e-05, - "loss": 0.928, + "epoch": 0.08518127419922562, + "grad_norm": 2.5285568237304688, + "learning_rate": 1.388873334045534e-05, + "loss": 0.7461, "step": 1210 }, { - "epoch": 0.09163482274601793, - "grad_norm": 3.4837348461151123, - "learning_rate": 1.7914888608269068e-05, - "loss": 0.8039, + "epoch": 0.08525167194649771, + "grad_norm": 2.4055800437927246, + "learning_rate": 1.3888449710679979e-05, + "loss": 0.8048, "step": 1211 }, { - "epoch": 0.09171049146835156, - "grad_norm": 3.198897361755371, - "learning_rate": 1.791457932189592e-05, - "loss": 0.8471, + "epoch": 0.0853220696937698, + "grad_norm": 2.3476266860961914, + "learning_rate": 1.388816572276788e-05, + "loss": 0.7908, "step": 1212 }, { - "epoch": 0.09178616019068518, - "grad_norm": 2.604722023010254, - "learning_rate": 1.791426947726298e-05, - "loss": 0.7698, + "epoch": 0.08539246744104188, + "grad_norm": 2.1052346229553223, + "learning_rate": 1.3887881376733808e-05, + "loss": 0.7113, "step": 1213 }, { - "epoch": 0.0918618289130188, - "grad_norm": 5.606075286865234, - "learning_rate": 1.7913959074389655e-05, - "loss": 0.7748, + "epoch": 0.08546286518831397, + "grad_norm": 2.121363401412964, + "learning_rate": 1.3887596672592544e-05, + "loss": 0.9373, "step": 1214 }, { - "epoch": 0.09193749763535243, - "grad_norm": 3.001539707183838, - "learning_rate": 1.7913648113295382e-05, - "loss": 0.8456, + "epoch": 0.08553326293558607, + "grad_norm": 2.3110511302948, + "learning_rate": 1.3887311610358892e-05, + "loss": 0.8173, "step": 1215 }, { - "epoch": 0.09201316635768605, - "grad_norm": 2.6370272636413574, - "learning_rate": 1.7913336593999634e-05, - "loss": 0.8696, + "epoch": 0.08560366068285814, + "grad_norm": 2.68574857711792, + "learning_rate": 1.3887026190047672e-05, + "loss": 0.8506, "step": 1216 }, { - "epoch": 0.09208883508001968, - "grad_norm": 3.1904518604278564, - "learning_rate": 1.791302451652192e-05, - "loss": 0.7573, + "epoch": 0.08567405843013023, + "grad_norm": 2.5414986610412598, + "learning_rate": 1.3886740411673721e-05, + "loss": 0.8525, "step": 1217 }, { - "epoch": 0.0921645038023533, - "grad_norm": 4.082500457763672, - "learning_rate": 1.791271188088178e-05, - "loss": 0.6788, + "epoch": 0.08574445617740233, + "grad_norm": 2.2566280364990234, + "learning_rate": 1.38864542752519e-05, + "loss": 0.8529, "step": 1218 }, { - "epoch": 0.09224017252468691, - "grad_norm": 3.6102075576782227, - "learning_rate": 1.7912398687098794e-05, - "loss": 0.867, + "epoch": 0.08581485392467442, + "grad_norm": 2.4427168369293213, + "learning_rate": 1.3886167780797087e-05, + "loss": 0.7075, "step": 1219 }, { - "epoch": 0.09231584124702054, - "grad_norm": 2.7773969173431396, - "learning_rate": 1.7912084935192577e-05, - "loss": 0.7679, + "epoch": 0.0858852516719465, + "grad_norm": 3.031726360321045, + "learning_rate": 1.388588092832417e-05, + "loss": 0.7759, "step": 1220 }, { - "epoch": 0.09239150996935416, - "grad_norm": 2.8499839305877686, - "learning_rate": 1.791177062518278e-05, - "loss": 0.819, + "epoch": 0.08595564941921859, + "grad_norm": 4.495028018951416, + "learning_rate": 1.3885593717848072e-05, + "loss": 0.7418, "step": 1221 }, { - "epoch": 0.09246717869168779, - "grad_norm": 2.839503765106201, - "learning_rate": 1.7911455757089076e-05, - "loss": 0.8847, + "epoch": 0.08602604716649068, + "grad_norm": 2.4878880977630615, + "learning_rate": 1.3885306149383719e-05, + "loss": 0.6506, "step": 1222 }, { - "epoch": 0.09254284741402141, - "grad_norm": 3.260828733444214, - "learning_rate": 1.7911140330931193e-05, - "loss": 0.8336, + "epoch": 0.08609644491376275, + "grad_norm": 2.5579073429107666, + "learning_rate": 1.388501822294606e-05, + "loss": 0.8879, "step": 1223 }, { - "epoch": 0.09261851613635504, - "grad_norm": 2.5245110988616943, - "learning_rate": 1.7910824346728882e-05, - "loss": 1.0465, + "epoch": 0.08616684266103485, + "grad_norm": 2.3285892009735107, + "learning_rate": 1.3884729938550072e-05, + "loss": 0.6988, "step": 1224 }, { - "epoch": 0.09269418485868866, - "grad_norm": 3.2489395141601562, - "learning_rate": 1.7910507804501925e-05, - "loss": 0.7982, + "epoch": 0.08623724040830694, + "grad_norm": 7.200527191162109, + "learning_rate": 1.3884441296210738e-05, + "loss": 0.7056, "step": 1225 }, { - "epoch": 0.09276985358102229, - "grad_norm": 2.756540060043335, - "learning_rate": 1.7910190704270155e-05, - "loss": 0.818, + "epoch": 0.08630763815557903, + "grad_norm": 3.112534284591675, + "learning_rate": 1.3884152295943068e-05, + "loss": 0.6785, "step": 1226 }, { - "epoch": 0.09284552230335591, - "grad_norm": 2.708287477493286, - "learning_rate": 1.7909873046053417e-05, - "loss": 0.7818, + "epoch": 0.0863780359028511, + "grad_norm": 2.8253750801086426, + "learning_rate": 1.3883862937762082e-05, + "loss": 0.9163, "step": 1227 }, { - "epoch": 0.09292119102568953, - "grad_norm": 3.1949870586395264, - "learning_rate": 1.7909554829871615e-05, - "loss": 0.7934, + "epoch": 0.0864484336501232, + "grad_norm": 3.5740163326263428, + "learning_rate": 1.3883573221682832e-05, + "loss": 0.856, "step": 1228 }, { - "epoch": 0.09299685974802316, - "grad_norm": 3.080357074737549, - "learning_rate": 1.7909236055744675e-05, - "loss": 0.7607, + "epoch": 0.08651883139739529, + "grad_norm": 2.1805641651153564, + "learning_rate": 1.3883283147720374e-05, + "loss": 0.8387, "step": 1229 }, { - "epoch": 0.09307252847035678, - "grad_norm": 3.3811452388763428, - "learning_rate": 1.790891672369255e-05, - "loss": 0.9556, + "epoch": 0.08658922914466737, + "grad_norm": 2.7790706157684326, + "learning_rate": 1.388299271588979e-05, + "loss": 0.8236, "step": 1230 }, { - "epoch": 0.09314819719269041, - "grad_norm": 2.7672924995422363, - "learning_rate": 1.790859683373525e-05, - "loss": 0.7895, + "epoch": 0.08665962689193946, + "grad_norm": 2.7124087810516357, + "learning_rate": 1.3882701926206182e-05, + "loss": 0.6686, "step": 1231 }, { - "epoch": 0.09322386591502403, - "grad_norm": 2.8778274059295654, - "learning_rate": 1.7908276385892802e-05, - "loss": 0.7614, + "epoch": 0.08673002463921155, + "grad_norm": 2.5937278270721436, + "learning_rate": 1.3882410778684666e-05, + "loss": 0.6733, "step": 1232 }, { - "epoch": 0.09329953463735764, - "grad_norm": 3.135817289352417, - "learning_rate": 1.7907955380185276e-05, - "loss": 0.7486, + "epoch": 0.08680042238648364, + "grad_norm": 2.4791929721832275, + "learning_rate": 1.3882119273340384e-05, + "loss": 0.7743, "step": 1233 }, { - "epoch": 0.09337520335969127, - "grad_norm": 2.9601290225982666, - "learning_rate": 1.790763381663277e-05, - "loss": 0.7852, + "epoch": 0.08687082013375572, + "grad_norm": 2.413642644882202, + "learning_rate": 1.3881827410188485e-05, + "loss": 0.8598, "step": 1234 }, { - "epoch": 0.09345087208202489, - "grad_norm": 2.9267449378967285, - "learning_rate": 1.790731169525542e-05, - "loss": 0.9109, + "epoch": 0.08694121788102781, + "grad_norm": 2.8802154064178467, + "learning_rate": 1.3881535189244148e-05, + "loss": 0.7586, "step": 1235 }, { - "epoch": 0.09352654080435852, - "grad_norm": 2.473426342010498, - "learning_rate": 1.7906989016073405e-05, - "loss": 0.8989, + "epoch": 0.0870116156282999, + "grad_norm": 2.4883174896240234, + "learning_rate": 1.3881242610522565e-05, + "loss": 0.8073, "step": 1236 }, { - "epoch": 0.09360220952669214, - "grad_norm": 3.5370841026306152, - "learning_rate": 1.790666577910693e-05, - "loss": 0.7684, + "epoch": 0.08708201337557198, + "grad_norm": 2.4571712017059326, + "learning_rate": 1.3880949674038945e-05, + "loss": 0.8557, "step": 1237 }, { - "epoch": 0.09367787824902576, - "grad_norm": 2.9519424438476562, - "learning_rate": 1.7906341984376237e-05, - "loss": 0.8645, + "epoch": 0.08715241112284407, + "grad_norm": 2.7038302421569824, + "learning_rate": 1.388065637980852e-05, + "loss": 0.8538, "step": 1238 }, { - "epoch": 0.09375354697135939, - "grad_norm": 3.7128260135650635, - "learning_rate": 1.79060176319016e-05, - "loss": 0.903, + "epoch": 0.08722280887011616, + "grad_norm": 4.072126388549805, + "learning_rate": 1.3880362727846538e-05, + "loss": 0.6583, "step": 1239 }, { - "epoch": 0.09382921569369301, - "grad_norm": 2.769270420074463, - "learning_rate": 1.7905692721703332e-05, - "loss": 0.7397, + "epoch": 0.08729320661738825, + "grad_norm": 2.840019941329956, + "learning_rate": 1.3880068718168267e-05, + "loss": 0.752, "step": 1240 }, { - "epoch": 0.09390488441602664, - "grad_norm": 2.524073600769043, - "learning_rate": 1.7905367253801784e-05, - "loss": 0.6321, + "epoch": 0.08736360436466033, + "grad_norm": 2.644291639328003, + "learning_rate": 1.3879774350788994e-05, + "loss": 0.827, "step": 1241 }, { - "epoch": 0.09398055313836026, - "grad_norm": 2.9674630165100098, - "learning_rate": 1.7905041228217335e-05, - "loss": 0.8209, + "epoch": 0.08743400211193242, + "grad_norm": 2.6969783306121826, + "learning_rate": 1.3879479625724018e-05, + "loss": 0.8139, "step": 1242 }, { - "epoch": 0.09405622186069389, - "grad_norm": 2.350693941116333, - "learning_rate": 1.79047146449704e-05, - "loss": 0.8143, + "epoch": 0.08750439985920451, + "grad_norm": 2.3683059215545654, + "learning_rate": 1.387918454298867e-05, + "loss": 0.7872, "step": 1243 }, { - "epoch": 0.09413189058302751, - "grad_norm": 2.93047833442688, - "learning_rate": 1.7904387504081435e-05, - "loss": 0.8576, + "epoch": 0.08757479760647659, + "grad_norm": 2.622821092605591, + "learning_rate": 1.3878889102598285e-05, + "loss": 0.8955, "step": 1244 }, { - "epoch": 0.09420755930536114, - "grad_norm": 2.9106175899505615, - "learning_rate": 1.7904059805570923e-05, - "loss": 0.7653, + "epoch": 0.08764519535374868, + "grad_norm": 2.576616048812866, + "learning_rate": 1.3878593304568225e-05, + "loss": 0.8943, "step": 1245 }, { - "epoch": 0.09428322802769475, - "grad_norm": 2.6790316104888916, - "learning_rate": 1.7903731549459388e-05, - "loss": 0.8273, + "epoch": 0.08771559310102077, + "grad_norm": 2.552034616470337, + "learning_rate": 1.3878297148913871e-05, + "loss": 0.9009, "step": 1246 }, { - "epoch": 0.09435889675002837, - "grad_norm": 2.455004930496216, - "learning_rate": 1.7903402735767385e-05, - "loss": 0.7973, + "epoch": 0.08778599084829286, + "grad_norm": 2.806102752685547, + "learning_rate": 1.387800063565062e-05, + "loss": 0.7493, "step": 1247 }, { - "epoch": 0.094434565472362, - "grad_norm": 3.4603185653686523, - "learning_rate": 1.7903073364515504e-05, - "loss": 0.7254, + "epoch": 0.08785638859556494, + "grad_norm": 2.813443422317505, + "learning_rate": 1.3877703764793886e-05, + "loss": 0.8974, "step": 1248 }, { - "epoch": 0.09451023419469562, - "grad_norm": 3.0400404930114746, - "learning_rate": 1.790274343572437e-05, - "loss": 0.8121, + "epoch": 0.08792678634283703, + "grad_norm": 2.6058084964752197, + "learning_rate": 1.3877406536359102e-05, + "loss": 0.9062, "step": 1249 }, { - "epoch": 0.09458590291702924, - "grad_norm": 2.6459450721740723, - "learning_rate": 1.7902412949414652e-05, - "loss": 0.871, + "epoch": 0.08799718409010912, + "grad_norm": 2.30733585357666, + "learning_rate": 1.3877108950361726e-05, + "loss": 0.7061, "step": 1250 }, { - "epoch": 0.09466157163936287, - "grad_norm": 3.258230686187744, - "learning_rate": 1.790208190560704e-05, - "loss": 0.8936, + "epoch": 0.0880675818373812, + "grad_norm": 2.7863380908966064, + "learning_rate": 1.3876811006817227e-05, + "loss": 0.8113, "step": 1251 }, { - "epoch": 0.09473724036169649, - "grad_norm": 3.0523667335510254, - "learning_rate": 1.7901750304322267e-05, - "loss": 0.8872, + "epoch": 0.08813797958465329, + "grad_norm": 2.764026641845703, + "learning_rate": 1.3876512705741095e-05, + "loss": 0.8435, "step": 1252 }, { - "epoch": 0.09481290908403012, - "grad_norm": 3.323202610015869, - "learning_rate": 1.79014181455811e-05, - "loss": 0.9111, + "epoch": 0.08820837733192538, + "grad_norm": 2.821798324584961, + "learning_rate": 1.3876214047148842e-05, + "loss": 0.8674, "step": 1253 }, { - "epoch": 0.09488857780636374, - "grad_norm": 2.76963210105896, - "learning_rate": 1.7901085429404335e-05, - "loss": 0.8893, + "epoch": 0.08827877507919747, + "grad_norm": 2.3681561946868896, + "learning_rate": 1.387591503105599e-05, + "loss": 0.7847, "step": 1254 }, { - "epoch": 0.09496424652869737, - "grad_norm": 2.809248685836792, - "learning_rate": 1.790075215581281e-05, - "loss": 0.8977, + "epoch": 0.08834917282646955, + "grad_norm": 2.340742349624634, + "learning_rate": 1.387561565747809e-05, + "loss": 0.7261, "step": 1255 }, { - "epoch": 0.09503991525103099, - "grad_norm": 3.7128524780273438, - "learning_rate": 1.79004183248274e-05, - "loss": 0.846, + "epoch": 0.08841957057374164, + "grad_norm": 2.657803773880005, + "learning_rate": 1.3875315926430704e-05, + "loss": 0.878, "step": 1256 }, { - "epoch": 0.09511558397336461, - "grad_norm": 2.7078073024749756, - "learning_rate": 1.7900083936469003e-05, - "loss": 0.8137, + "epoch": 0.08848996832101373, + "grad_norm": 2.602449893951416, + "learning_rate": 1.3875015837929418e-05, + "loss": 0.8129, "step": 1257 }, { - "epoch": 0.09519125269569824, - "grad_norm": 3.0570812225341797, - "learning_rate": 1.7899748990758564e-05, - "loss": 0.9453, + "epoch": 0.08856036606828581, + "grad_norm": 2.1301064491271973, + "learning_rate": 1.3874715391989833e-05, + "loss": 0.7257, "step": 1258 }, { - "epoch": 0.09526692141803185, - "grad_norm": 3.7988598346710205, - "learning_rate": 1.789941348771706e-05, - "loss": 0.6545, + "epoch": 0.0886307638155579, + "grad_norm": 2.774780035018921, + "learning_rate": 1.3874414588627566e-05, + "loss": 0.7082, "step": 1259 }, { - "epoch": 0.09534259014036547, - "grad_norm": 2.1266133785247803, - "learning_rate": 1.7899077427365496e-05, - "loss": 0.7383, + "epoch": 0.08870116156282999, + "grad_norm": 2.597869396209717, + "learning_rate": 1.387411342785826e-05, + "loss": 0.7646, "step": 1260 }, { - "epoch": 0.0954182588626991, - "grad_norm": 2.4982497692108154, - "learning_rate": 1.7898740809724925e-05, - "loss": 0.6417, + "epoch": 0.08877155931010208, + "grad_norm": 2.3135950565338135, + "learning_rate": 1.3873811909697572e-05, + "loss": 0.914, "step": 1261 }, { - "epoch": 0.09549392758503272, - "grad_norm": 3.788729190826416, - "learning_rate": 1.789840363481642e-05, - "loss": 0.7754, + "epoch": 0.08884195705737416, + "grad_norm": 3.342412233352661, + "learning_rate": 1.3873510034161175e-05, + "loss": 0.8342, "step": 1262 }, { - "epoch": 0.09556959630736635, - "grad_norm": 2.876657009124756, - "learning_rate": 1.78980659026611e-05, - "loss": 0.8848, + "epoch": 0.08891235480464625, + "grad_norm": 2.6504201889038086, + "learning_rate": 1.387320780126477e-05, + "loss": 0.7169, "step": 1263 }, { - "epoch": 0.09564526502969997, - "grad_norm": 2.669171094894409, - "learning_rate": 1.789772761328011e-05, - "loss": 0.7351, + "epoch": 0.08898275255191834, + "grad_norm": 2.190500020980835, + "learning_rate": 1.3872905211024065e-05, + "loss": 0.8503, "step": 1264 }, { - "epoch": 0.0957209337520336, - "grad_norm": 2.671163558959961, - "learning_rate": 1.7897388766694643e-05, - "loss": 0.9625, + "epoch": 0.08905315029919042, + "grad_norm": 2.3023252487182617, + "learning_rate": 1.3872602263454793e-05, + "loss": 0.8676, "step": 1265 }, { - "epoch": 0.09579660247436722, - "grad_norm": 3.019002676010132, - "learning_rate": 1.789704936292591e-05, - "loss": 0.6518, + "epoch": 0.08912354804646251, + "grad_norm": 2.9510107040405273, + "learning_rate": 1.3872298958572704e-05, + "loss": 0.8212, "step": 1266 }, { - "epoch": 0.09587227119670085, - "grad_norm": 2.8676607608795166, - "learning_rate": 1.789670940199517e-05, - "loss": 0.8793, + "epoch": 0.0891939457937346, + "grad_norm": 2.6536686420440674, + "learning_rate": 1.387199529639357e-05, + "loss": 0.8426, "step": 1267 }, { - "epoch": 0.09594793991903447, - "grad_norm": 2.384054660797119, - "learning_rate": 1.789636888392371e-05, - "loss": 0.8888, + "epoch": 0.0892643435410067, + "grad_norm": 2.8836700916290283, + "learning_rate": 1.3871691276933177e-05, + "loss": 0.8276, "step": 1268 }, { - "epoch": 0.0960236086413681, - "grad_norm": 4.045263290405273, - "learning_rate": 1.789602780873286e-05, - "loss": 0.8233, + "epoch": 0.08933474128827877, + "grad_norm": 2.256439208984375, + "learning_rate": 1.387138690020733e-05, + "loss": 0.7493, "step": 1269 }, { - "epoch": 0.09609927736370172, - "grad_norm": 2.7333438396453857, - "learning_rate": 1.7895686176443973e-05, - "loss": 0.8029, + "epoch": 0.08940513903555086, + "grad_norm": 2.5551016330718994, + "learning_rate": 1.3871082166231855e-05, + "loss": 0.7489, "step": 1270 }, { - "epoch": 0.09617494608603534, - "grad_norm": 2.8120603561401367, - "learning_rate": 1.7895343987078446e-05, - "loss": 0.9133, + "epoch": 0.08947553678282295, + "grad_norm": 2.937544107437134, + "learning_rate": 1.3870777075022597e-05, + "loss": 0.7872, "step": 1271 }, { - "epoch": 0.09625061480836897, - "grad_norm": 2.5713160037994385, - "learning_rate": 1.789500124065771e-05, - "loss": 0.8908, + "epoch": 0.08954593453009503, + "grad_norm": 2.5060267448425293, + "learning_rate": 1.3870471626595416e-05, + "loss": 0.7802, "step": 1272 }, { - "epoch": 0.09632628353070258, - "grad_norm": 3.9488155841827393, - "learning_rate": 1.7894657937203222e-05, - "loss": 0.8737, + "epoch": 0.08961633227736712, + "grad_norm": 2.519090175628662, + "learning_rate": 1.3870165820966192e-05, + "loss": 0.7205, "step": 1273 }, { - "epoch": 0.0964019522530362, - "grad_norm": 4.038736820220947, - "learning_rate": 1.7894314076736486e-05, - "loss": 0.9211, + "epoch": 0.08968673002463921, + "grad_norm": 2.4442453384399414, + "learning_rate": 1.3869859658150824e-05, + "loss": 0.8573, "step": 1274 }, { - "epoch": 0.09647762097536983, - "grad_norm": 2.971062660217285, - "learning_rate": 1.789396965927904e-05, - "loss": 0.8733, + "epoch": 0.0897571277719113, + "grad_norm": 3.242283344268799, + "learning_rate": 1.3869553138165233e-05, + "loss": 0.7878, "step": 1275 }, { - "epoch": 0.09655328969770345, - "grad_norm": 2.516186237335205, - "learning_rate": 1.789362468485244e-05, - "loss": 0.7466, + "epoch": 0.08982752551918338, + "grad_norm": 2.114166259765625, + "learning_rate": 1.3869246261025351e-05, + "loss": 0.7206, "step": 1276 }, { - "epoch": 0.09662895842003708, - "grad_norm": 3.4188225269317627, - "learning_rate": 1.78932791534783e-05, - "loss": 0.9705, + "epoch": 0.08989792326645547, + "grad_norm": 2.6408743858337402, + "learning_rate": 1.3868939026747136e-05, + "loss": 0.8894, "step": 1277 }, { - "epoch": 0.0967046271423707, - "grad_norm": 2.534454822540283, - "learning_rate": 1.7892933065178257e-05, - "loss": 0.8904, + "epoch": 0.08996832101372756, + "grad_norm": 3.6767542362213135, + "learning_rate": 1.3868631435346559e-05, + "loss": 0.7697, "step": 1278 }, { - "epoch": 0.09678029586470432, - "grad_norm": 2.6995832920074463, - "learning_rate": 1.789258641997398e-05, - "loss": 0.6556, + "epoch": 0.09003871876099964, + "grad_norm": 5.262668132781982, + "learning_rate": 1.3868323486839615e-05, + "loss": 0.8461, "step": 1279 }, { - "epoch": 0.09685596458703795, - "grad_norm": 3.069735050201416, - "learning_rate": 1.789223921788718e-05, - "loss": 0.9564, + "epoch": 0.09010911650827173, + "grad_norm": 2.3567543029785156, + "learning_rate": 1.386801518124231e-05, + "loss": 0.7171, "step": 1280 }, { - "epoch": 0.09693163330937157, - "grad_norm": 4.048869609832764, - "learning_rate": 1.7891891458939597e-05, - "loss": 0.846, + "epoch": 0.09017951425554382, + "grad_norm": 2.3477931022644043, + "learning_rate": 1.3867706518570676e-05, + "loss": 0.715, "step": 1281 }, { - "epoch": 0.0970073020317052, - "grad_norm": 2.709456443786621, - "learning_rate": 1.7891543143153014e-05, - "loss": 0.7941, + "epoch": 0.09024991200281592, + "grad_norm": 2.534640073776245, + "learning_rate": 1.3867397498840761e-05, + "loss": 0.7449, "step": 1282 }, { - "epoch": 0.09708297075403882, - "grad_norm": 3.2407002449035645, - "learning_rate": 1.7891194270549238e-05, - "loss": 0.7715, + "epoch": 0.090320309750088, + "grad_norm": 2.312380790710449, + "learning_rate": 1.3867088122068632e-05, + "loss": 0.7646, "step": 1283 }, { - "epoch": 0.09715863947637245, - "grad_norm": 4.350454807281494, - "learning_rate": 1.7890844841150122e-05, - "loss": 0.8406, + "epoch": 0.09039070749736008, + "grad_norm": 2.9777767658233643, + "learning_rate": 1.3866778388270371e-05, + "loss": 0.7833, "step": 1284 }, { - "epoch": 0.09723430819870607, - "grad_norm": 3.1144919395446777, - "learning_rate": 1.789049485497754e-05, - "loss": 0.9677, + "epoch": 0.09046110524463218, + "grad_norm": 2.1043152809143066, + "learning_rate": 1.3866468297462083e-05, + "loss": 0.7406, "step": 1285 }, { - "epoch": 0.09730997692103968, - "grad_norm": 5.615466594696045, - "learning_rate": 1.7890144312053423e-05, - "loss": 0.8997, + "epoch": 0.09053150299190425, + "grad_norm": 2.614577293395996, + "learning_rate": 1.386615784965989e-05, + "loss": 0.806, "step": 1286 }, { - "epoch": 0.0973856456433733, - "grad_norm": 3.7363674640655518, - "learning_rate": 1.788979321239971e-05, - "loss": 0.8649, + "epoch": 0.09060190073917634, + "grad_norm": 2.661571741104126, + "learning_rate": 1.3865847044879931e-05, + "loss": 0.9052, "step": 1287 }, { - "epoch": 0.09746131436570693, - "grad_norm": 2.72586989402771, - "learning_rate": 1.7889441556038394e-05, - "loss": 0.8034, + "epoch": 0.09067229848644844, + "grad_norm": 2.3111274242401123, + "learning_rate": 1.3865535883138366e-05, + "loss": 0.8733, "step": 1288 }, { - "epoch": 0.09753698308804055, - "grad_norm": 3.393902540206909, - "learning_rate": 1.7889089342991495e-05, - "loss": 1.0079, + "epoch": 0.09074269623372053, + "grad_norm": 1.9842638969421387, + "learning_rate": 1.3865224364451372e-05, + "loss": 0.8459, "step": 1289 }, { - "epoch": 0.09761265181037418, - "grad_norm": 2.7981932163238525, - "learning_rate": 1.7888736573281073e-05, - "loss": 0.7422, + "epoch": 0.0908130939809926, + "grad_norm": 2.4144270420074463, + "learning_rate": 1.3864912488835148e-05, + "loss": 0.8015, "step": 1290 }, { - "epoch": 0.0976883205327078, - "grad_norm": 3.0770058631896973, - "learning_rate": 1.7888383246929213e-05, - "loss": 0.8174, + "epoch": 0.0908834917282647, + "grad_norm": 2.203235387802124, + "learning_rate": 1.3864600256305906e-05, + "loss": 0.8799, "step": 1291 }, { - "epoch": 0.09776398925504143, - "grad_norm": 3.3533740043640137, - "learning_rate": 1.7888029363958048e-05, - "loss": 0.753, + "epoch": 0.09095388947553679, + "grad_norm": 2.2370188236236572, + "learning_rate": 1.386428766687988e-05, + "loss": 0.825, "step": 1292 }, { - "epoch": 0.09783965797737505, - "grad_norm": 3.7199461460113525, - "learning_rate": 1.788767492438974e-05, - "loss": 0.7974, + "epoch": 0.09102428722280886, + "grad_norm": 2.5947988033294678, + "learning_rate": 1.386397472057332e-05, + "loss": 0.7476, "step": 1293 }, { - "epoch": 0.09791532669970868, - "grad_norm": 3.1728827953338623, - "learning_rate": 1.788731992824648e-05, - "loss": 0.6211, + "epoch": 0.09109468497008096, + "grad_norm": 2.7820844650268555, + "learning_rate": 1.3863661417402497e-05, + "loss": 0.868, "step": 1294 }, { - "epoch": 0.0979909954220423, - "grad_norm": 3.2378153800964355, - "learning_rate": 1.7886964375550497e-05, - "loss": 0.8974, + "epoch": 0.09116508271735305, + "grad_norm": 2.434504747390747, + "learning_rate": 1.3863347757383704e-05, + "loss": 0.8229, "step": 1295 }, { - "epoch": 0.09806666414437593, - "grad_norm": 2.729747772216797, - "learning_rate": 1.7886608266324063e-05, - "loss": 0.8367, + "epoch": 0.09123548046462514, + "grad_norm": 2.003082752227783, + "learning_rate": 1.3863033740533243e-05, + "loss": 0.8127, "step": 1296 }, { - "epoch": 0.09814233286670955, - "grad_norm": 3.429668664932251, - "learning_rate": 1.7886251600589478e-05, - "loss": 0.8581, + "epoch": 0.09130587821189722, + "grad_norm": 2.8632137775421143, + "learning_rate": 1.3862719366867443e-05, + "loss": 0.815, "step": 1297 }, { - "epoch": 0.09821800158904317, - "grad_norm": 2.603367567062378, - "learning_rate": 1.7885894378369077e-05, - "loss": 0.789, + "epoch": 0.0913762759591693, + "grad_norm": 2.8333075046539307, + "learning_rate": 1.3862404636402647e-05, + "loss": 0.8728, "step": 1298 }, { - "epoch": 0.0982936703113768, - "grad_norm": 3.1779685020446777, - "learning_rate": 1.7885536599685227e-05, - "loss": 0.8841, + "epoch": 0.0914466737064414, + "grad_norm": 3.1794545650482178, + "learning_rate": 1.386208954915522e-05, + "loss": 0.8344, "step": 1299 }, { - "epoch": 0.09836933903371041, - "grad_norm": 2.518272638320923, - "learning_rate": 1.7885178264560335e-05, - "loss": 0.7745, + "epoch": 0.09151707145371347, + "grad_norm": 2.5439274311065674, + "learning_rate": 1.3861774105141543e-05, + "loss": 0.8246, "step": 1300 }, { - "epoch": 0.09844500775604403, - "grad_norm": 2.8336172103881836, - "learning_rate": 1.7884819373016844e-05, - "loss": 0.7445, + "epoch": 0.09158746920098557, + "grad_norm": 3.2485299110412598, + "learning_rate": 1.3861458304378014e-05, + "loss": 0.8567, "step": 1301 }, { - "epoch": 0.09852067647837766, - "grad_norm": 3.497591972351074, - "learning_rate": 1.7884459925077227e-05, - "loss": 0.7151, + "epoch": 0.09165786694825766, + "grad_norm": 2.0220799446105957, + "learning_rate": 1.3861142146881056e-05, + "loss": 0.6762, "step": 1302 }, { - "epoch": 0.09859634520071128, - "grad_norm": 1.6441137790679932, - "learning_rate": 1.7884099920763995e-05, - "loss": 0.9654, + "epoch": 0.09172826469552975, + "grad_norm": 2.6894168853759766, + "learning_rate": 1.3860825632667102e-05, + "loss": 0.7731, "step": 1303 }, { - "epoch": 0.09867201392304491, - "grad_norm": 2.5127291679382324, - "learning_rate": 1.788373936009969e-05, - "loss": 0.625, + "epoch": 0.09179866244280183, + "grad_norm": 2.2226099967956543, + "learning_rate": 1.3860508761752612e-05, + "loss": 0.806, "step": 1304 }, { - "epoch": 0.09874768264537853, - "grad_norm": 2.6683413982391357, - "learning_rate": 1.788337824310689e-05, - "loss": 0.9659, + "epoch": 0.09186906019007392, + "grad_norm": 2.5051088333129883, + "learning_rate": 1.3860191534154057e-05, + "loss": 0.783, "step": 1305 }, { - "epoch": 0.09882335136771216, - "grad_norm": 2.3852410316467285, - "learning_rate": 1.7883016569808213e-05, - "loss": 0.8527, + "epoch": 0.09193945793734601, + "grad_norm": 2.3245034217834473, + "learning_rate": 1.385987394988793e-05, + "loss": 0.7509, "step": 1306 }, { - "epoch": 0.09889902009004578, - "grad_norm": 3.4505207538604736, - "learning_rate": 1.788265434022631e-05, - "loss": 0.797, + "epoch": 0.09200985568461809, + "grad_norm": 2.097900867462158, + "learning_rate": 1.3859556008970746e-05, + "loss": 0.8158, "step": 1307 }, { - "epoch": 0.0989746888123794, - "grad_norm": 3.126436471939087, - "learning_rate": 1.7882291554383862e-05, - "loss": 0.8493, + "epoch": 0.09208025343189018, + "grad_norm": 2.423870325088501, + "learning_rate": 1.3859237711419033e-05, + "loss": 0.8729, "step": 1308 }, { - "epoch": 0.09905035753471303, - "grad_norm": 2.6196725368499756, - "learning_rate": 1.7881928212303586e-05, - "loss": 0.8203, + "epoch": 0.09215065117916227, + "grad_norm": 2.469750165939331, + "learning_rate": 1.3858919057249338e-05, + "loss": 0.6643, "step": 1309 }, { - "epoch": 0.09912602625704665, - "grad_norm": 3.3748865127563477, - "learning_rate": 1.788156431400824e-05, - "loss": 0.9412, + "epoch": 0.09222104892643436, + "grad_norm": 2.652338743209839, + "learning_rate": 1.3858600046478229e-05, + "loss": 0.8066, "step": 1310 }, { - "epoch": 0.09920169497938028, - "grad_norm": 3.1734228134155273, - "learning_rate": 1.788119985952061e-05, - "loss": 0.8705, + "epoch": 0.09229144667370644, + "grad_norm": 2.964411973953247, + "learning_rate": 1.385828067912229e-05, + "loss": 0.9045, "step": 1311 }, { - "epoch": 0.0992773637017139, - "grad_norm": 2.6753692626953125, - "learning_rate": 1.7880834848863517e-05, - "loss": 0.7506, + "epoch": 0.09236184442097853, + "grad_norm": 2.3329355716705322, + "learning_rate": 1.385796095519813e-05, + "loss": 0.803, "step": 1312 }, { - "epoch": 0.09935303242404751, - "grad_norm": 2.6498630046844482, - "learning_rate": 1.788046928205982e-05, - "loss": 0.7981, + "epoch": 0.09243224216825062, + "grad_norm": 2.5097787380218506, + "learning_rate": 1.385764087472237e-05, + "loss": 0.7405, "step": 1313 }, { - "epoch": 0.09942870114638114, - "grad_norm": 3.271476984024048, - "learning_rate": 1.788010315913242e-05, - "loss": 0.8675, + "epoch": 0.0925026399155227, + "grad_norm": 2.2900328636169434, + "learning_rate": 1.3857320437711648e-05, + "loss": 0.7038, "step": 1314 }, { - "epoch": 0.09950436986871476, - "grad_norm": 2.3523943424224854, - "learning_rate": 1.7879736480104234e-05, - "loss": 0.8247, + "epoch": 0.09257303766279479, + "grad_norm": 2.4353079795837402, + "learning_rate": 1.3856999644182628e-05, + "loss": 0.7825, "step": 1315 }, { - "epoch": 0.09958003859104839, - "grad_norm": 2.718277931213379, - "learning_rate": 1.787936924499823e-05, - "loss": 0.8838, + "epoch": 0.09264343541006688, + "grad_norm": 1.9602270126342773, + "learning_rate": 1.3856678494151985e-05, + "loss": 0.8439, "step": 1316 }, { - "epoch": 0.09965570731338201, - "grad_norm": 2.883328676223755, - "learning_rate": 1.7879001453837406e-05, - "loss": 0.8652, + "epoch": 0.09271383315733897, + "grad_norm": 2.6522114276885986, + "learning_rate": 1.3856356987636416e-05, + "loss": 0.7998, "step": 1317 }, { - "epoch": 0.09973137603571564, - "grad_norm": 4.095880508422852, - "learning_rate": 1.787863310664479e-05, - "loss": 0.8405, + "epoch": 0.09278423090461105, + "grad_norm": 2.130891799926758, + "learning_rate": 1.3856035124652641e-05, + "loss": 0.6967, "step": 1318 }, { - "epoch": 0.09980704475804926, - "grad_norm": 3.1553285121917725, - "learning_rate": 1.7878264203443453e-05, - "loss": 0.8698, + "epoch": 0.09285462865188314, + "grad_norm": 2.1575052738189697, + "learning_rate": 1.3855712905217388e-05, + "loss": 0.7556, "step": 1319 }, { - "epoch": 0.09988271348038288, - "grad_norm": 3.0702428817749023, - "learning_rate": 1.7877894744256494e-05, - "loss": 0.8055, + "epoch": 0.09292502639915523, + "grad_norm": 2.6170058250427246, + "learning_rate": 1.3855390329347415e-05, + "loss": 0.7389, "step": 1320 }, { - "epoch": 0.09995838220271651, - "grad_norm": 3.2465262413024902, - "learning_rate": 1.7877524729107054e-05, - "loss": 0.8476, + "epoch": 0.09299542414642731, + "grad_norm": 2.095670700073242, + "learning_rate": 1.3855067397059488e-05, + "loss": 0.7532, "step": 1321 }, { - "epoch": 0.10003405092505013, - "grad_norm": 2.630358934402466, - "learning_rate": 1.7877154158018306e-05, - "loss": 0.7447, + "epoch": 0.0930658218936994, + "grad_norm": 2.4671149253845215, + "learning_rate": 1.38547441083704e-05, + "loss": 0.7577, "step": 1322 }, { - "epoch": 0.10010971964738376, - "grad_norm": 3.00877046585083, - "learning_rate": 1.7876783031013445e-05, - "loss": 0.7267, + "epoch": 0.09313621964097149, + "grad_norm": 2.939295530319214, + "learning_rate": 1.3854420463296956e-05, + "loss": 0.8789, "step": 1323 }, { - "epoch": 0.10018538836971738, - "grad_norm": 3.247757911682129, - "learning_rate": 1.7876411348115726e-05, - "loss": 0.9357, + "epoch": 0.09320661738824358, + "grad_norm": 2.3854010105133057, + "learning_rate": 1.3854096461855986e-05, + "loss": 0.8467, "step": 1324 }, { - "epoch": 0.100261057092051, - "grad_norm": 2.484332323074341, - "learning_rate": 1.7876039109348413e-05, - "loss": 0.8055, + "epoch": 0.09327701513551566, + "grad_norm": 2.4174675941467285, + "learning_rate": 1.3853772104064333e-05, + "loss": 0.8978, "step": 1325 }, { - "epoch": 0.10033672581438463, - "grad_norm": 2.731320858001709, - "learning_rate": 1.7875666314734823e-05, - "loss": 0.7875, + "epoch": 0.09334741288278775, + "grad_norm": 2.2650229930877686, + "learning_rate": 1.385344738993886e-05, + "loss": 0.7247, "step": 1326 }, { - "epoch": 0.10041239453671824, - "grad_norm": 2.6516387462615967, - "learning_rate": 1.7875292964298306e-05, - "loss": 0.8813, + "epoch": 0.09341781063005984, + "grad_norm": 2.561947822570801, + "learning_rate": 1.385312231949645e-05, + "loss": 0.8488, "step": 1327 }, { - "epoch": 0.10048806325905187, - "grad_norm": 3.460632085800171, - "learning_rate": 1.7874919058062234e-05, - "loss": 0.8705, + "epoch": 0.09348820837733192, + "grad_norm": 2.5747900009155273, + "learning_rate": 1.3852796892754007e-05, + "loss": 0.8617, "step": 1328 }, { - "epoch": 0.10056373198138549, - "grad_norm": 3.08870792388916, - "learning_rate": 1.7874544596050024e-05, - "loss": 0.9075, + "epoch": 0.09355860612460401, + "grad_norm": 2.3968632221221924, + "learning_rate": 1.3852471109728445e-05, + "loss": 0.859, "step": 1329 }, { - "epoch": 0.10063940070371911, - "grad_norm": 2.5822556018829346, - "learning_rate": 1.787416957828513e-05, - "loss": 0.7953, + "epoch": 0.0936290038718761, + "grad_norm": 2.723215341567993, + "learning_rate": 1.3852144970436706e-05, + "loss": 0.8457, "step": 1330 }, { - "epoch": 0.10071506942605274, - "grad_norm": 2.9101879596710205, - "learning_rate": 1.7873794004791034e-05, - "loss": 0.7639, + "epoch": 0.09369940161914819, + "grad_norm": 2.238443374633789, + "learning_rate": 1.3851818474895742e-05, + "loss": 0.8388, "step": 1331 }, { - "epoch": 0.10079073814838636, - "grad_norm": 3.263343095779419, - "learning_rate": 1.7873417875591257e-05, - "loss": 0.789, + "epoch": 0.09376979936642027, + "grad_norm": 2.9781036376953125, + "learning_rate": 1.3851491623122532e-05, + "loss": 0.7862, "step": 1332 }, { - "epoch": 0.10086640687071999, - "grad_norm": 3.0501604080200195, - "learning_rate": 1.7873041190709348e-05, - "loss": 0.8689, + "epoch": 0.09384019711369236, + "grad_norm": 2.152423858642578, + "learning_rate": 1.3851164415134068e-05, + "loss": 0.7846, "step": 1333 }, { - "epoch": 0.10094207559305361, - "grad_norm": 10.238188743591309, - "learning_rate": 1.7872663950168907e-05, - "loss": 0.8529, + "epoch": 0.09391059486096445, + "grad_norm": 5.138025760650635, + "learning_rate": 1.3850836850947357e-05, + "loss": 0.6571, "step": 1334 }, { - "epoch": 0.10101774431538724, - "grad_norm": 2.7818658351898193, - "learning_rate": 1.7872286153993548e-05, - "loss": 0.883, + "epoch": 0.09398099260823653, + "grad_norm": 2.105922222137451, + "learning_rate": 1.3850508930579436e-05, + "loss": 0.7917, "step": 1335 }, { - "epoch": 0.10109341303772086, - "grad_norm": 3.551182985305786, - "learning_rate": 1.7871907802206934e-05, - "loss": 0.7372, + "epoch": 0.09405139035550862, + "grad_norm": 2.361989974975586, + "learning_rate": 1.3850180654047353e-05, + "loss": 0.8194, "step": 1336 }, { - "epoch": 0.10116908176005449, - "grad_norm": 3.187056541442871, - "learning_rate": 1.7871528894832758e-05, - "loss": 0.8921, + "epoch": 0.09412178810278071, + "grad_norm": 2.111793279647827, + "learning_rate": 1.3849852021368174e-05, + "loss": 0.7655, "step": 1337 }, { - "epoch": 0.10124475048238811, - "grad_norm": 3.0303001403808594, - "learning_rate": 1.7871149431894747e-05, - "loss": 0.9956, + "epoch": 0.0941921858500528, + "grad_norm": 2.6305441856384277, + "learning_rate": 1.3849523032558984e-05, + "loss": 0.9304, "step": 1338 }, { - "epoch": 0.10132041920472173, - "grad_norm": 2.6252822875976562, - "learning_rate": 1.787076941341667e-05, - "loss": 0.6815, + "epoch": 0.09426258359732488, + "grad_norm": 2.5034306049346924, + "learning_rate": 1.3849193687636888e-05, + "loss": 0.7168, "step": 1339 }, { - "epoch": 0.10139608792705535, - "grad_norm": 2.8354198932647705, - "learning_rate": 1.787038883942232e-05, - "loss": 0.8485, + "epoch": 0.09433298134459697, + "grad_norm": 2.2680504322052, + "learning_rate": 1.3848863986619008e-05, + "loss": 0.7562, "step": 1340 }, { - "epoch": 0.10147175664938897, - "grad_norm": 2.798238754272461, - "learning_rate": 1.787000770993553e-05, - "loss": 0.8172, + "epoch": 0.09440337909186906, + "grad_norm": 2.807737350463867, + "learning_rate": 1.3848533929522489e-05, + "loss": 0.7224, "step": 1341 }, { - "epoch": 0.1015474253717226, - "grad_norm": 3.534174680709839, - "learning_rate": 1.7869626024980167e-05, - "loss": 0.7315, + "epoch": 0.09447377683914114, + "grad_norm": 2.4741384983062744, + "learning_rate": 1.3848203516364487e-05, + "loss": 0.7647, "step": 1342 }, { - "epoch": 0.10162309409405622, - "grad_norm": 2.5657639503479004, - "learning_rate": 1.7869243784580133e-05, - "loss": 0.6961, + "epoch": 0.09454417458641323, + "grad_norm": 2.344810962677002, + "learning_rate": 1.3847872747162183e-05, + "loss": 0.7944, "step": 1343 }, { - "epoch": 0.10169876281638984, - "grad_norm": 3.29061222076416, - "learning_rate": 1.7868860988759372e-05, - "loss": 0.7886, + "epoch": 0.09461457233368532, + "grad_norm": 2.4946224689483643, + "learning_rate": 1.3847541621932774e-05, + "loss": 0.7797, "step": 1344 }, { - "epoch": 0.10177443153872347, - "grad_norm": 2.4909753799438477, - "learning_rate": 1.7868477637541845e-05, - "loss": 0.8066, + "epoch": 0.09468497008095741, + "grad_norm": 2.278512477874756, + "learning_rate": 1.3847210140693473e-05, + "loss": 0.8775, "step": 1345 }, { - "epoch": 0.10185010026105709, - "grad_norm": 3.2499606609344482, - "learning_rate": 1.7868093730951568e-05, - "loss": 0.8099, + "epoch": 0.09475536782822949, + "grad_norm": 2.3273086547851562, + "learning_rate": 1.3846878303461514e-05, + "loss": 0.7515, "step": 1346 }, { - "epoch": 0.10192576898339072, - "grad_norm": 2.9574480056762695, - "learning_rate": 1.7867709269012575e-05, - "loss": 0.7153, + "epoch": 0.09482576557550158, + "grad_norm": 2.5070323944091797, + "learning_rate": 1.3846546110254153e-05, + "loss": 0.7492, "step": 1347 }, { - "epoch": 0.10200143770572434, - "grad_norm": 2.3955531120300293, - "learning_rate": 1.786732425174895e-05, - "loss": 1.0746, + "epoch": 0.09489616332277367, + "grad_norm": 2.062927484512329, + "learning_rate": 1.384621356108866e-05, + "loss": 0.8259, "step": 1348 }, { - "epoch": 0.10207710642805796, - "grad_norm": 2.7731571197509766, - "learning_rate": 1.7866938679184797e-05, - "loss": 0.8378, + "epoch": 0.09496656107004575, + "grad_norm": 2.5209076404571533, + "learning_rate": 1.3845880655982322e-05, + "loss": 0.7886, "step": 1349 }, { - "epoch": 0.10215277515039159, - "grad_norm": 2.637190818786621, - "learning_rate": 1.7866552551344267e-05, - "loss": 0.9882, + "epoch": 0.09503695881731784, + "grad_norm": 2.4856977462768555, + "learning_rate": 1.3845547394952451e-05, + "loss": 0.8099, "step": 1350 }, { - "epoch": 0.10222844387272521, - "grad_norm": 3.307260036468506, - "learning_rate": 1.7866165868251535e-05, - "loss": 0.8507, + "epoch": 0.09510735656458993, + "grad_norm": 2.125852346420288, + "learning_rate": 1.3845213778016367e-05, + "loss": 0.7939, "step": 1351 }, { - "epoch": 0.10230411259505884, - "grad_norm": 3.339919090270996, - "learning_rate": 1.786577862993082e-05, - "loss": 0.8656, + "epoch": 0.09517775431186203, + "grad_norm": 2.2064595222473145, + "learning_rate": 1.3844879805191423e-05, + "loss": 0.7733, "step": 1352 }, { - "epoch": 0.10237978131739246, - "grad_norm": 3.161778211593628, - "learning_rate": 1.7865390836406373e-05, - "loss": 0.8829, + "epoch": 0.0952481520591341, + "grad_norm": 2.861234188079834, + "learning_rate": 1.3844545476494977e-05, + "loss": 0.7291, "step": 1353 }, { - "epoch": 0.10245545003972607, - "grad_norm": 7.813992500305176, - "learning_rate": 1.786500248770248e-05, - "loss": 0.7528, + "epoch": 0.0953185498064062, + "grad_norm": 2.4675984382629395, + "learning_rate": 1.3844210791944414e-05, + "loss": 0.6809, "step": 1354 }, { - "epoch": 0.1025311187620597, - "grad_norm": 3.0543980598449707, - "learning_rate": 1.7864613583843453e-05, - "loss": 0.7212, + "epoch": 0.09538894755367829, + "grad_norm": 2.3248350620269775, + "learning_rate": 1.3843875751557133e-05, + "loss": 0.718, "step": 1355 }, { - "epoch": 0.10260678748439332, - "grad_norm": 5.032805442810059, - "learning_rate": 1.7864224124853656e-05, - "loss": 0.772, + "epoch": 0.09545934530095036, + "grad_norm": 2.4369735717773438, + "learning_rate": 1.3843540355350554e-05, + "loss": 0.7312, "step": 1356 }, { - "epoch": 0.10268245620672695, - "grad_norm": 2.9874513149261475, - "learning_rate": 1.7863834110757476e-05, - "loss": 0.7245, + "epoch": 0.09552974304822245, + "grad_norm": 2.5247199535369873, + "learning_rate": 1.3843204603342115e-05, + "loss": 0.7104, "step": 1357 }, { - "epoch": 0.10275812492906057, - "grad_norm": 3.358482837677002, - "learning_rate": 1.786344354157933e-05, - "loss": 0.9143, + "epoch": 0.09560014079549455, + "grad_norm": 4.219001293182373, + "learning_rate": 1.3842868495549268e-05, + "loss": 0.7432, "step": 1358 }, { - "epoch": 0.1028337936513942, - "grad_norm": 2.869976758956909, - "learning_rate": 1.7863052417343684e-05, - "loss": 0.7532, + "epoch": 0.09567053854276664, + "grad_norm": 2.19252610206604, + "learning_rate": 1.3842532031989493e-05, + "loss": 0.8241, "step": 1359 }, { - "epoch": 0.10290946237372782, - "grad_norm": 3.7541520595550537, - "learning_rate": 1.7862660738075028e-05, - "loss": 0.8099, + "epoch": 0.09574093629003871, + "grad_norm": 4.109724044799805, + "learning_rate": 1.384219521268028e-05, + "loss": 0.8959, "step": 1360 }, { - "epoch": 0.10298513109606144, - "grad_norm": 2.752075433731079, - "learning_rate": 1.7862268503797893e-05, - "loss": 0.7319, + "epoch": 0.0958113340373108, + "grad_norm": 3.523434638977051, + "learning_rate": 1.384185803763914e-05, + "loss": 0.7549, "step": 1361 }, { - "epoch": 0.10306079981839507, - "grad_norm": 3.171396017074585, - "learning_rate": 1.786187571453684e-05, - "loss": 0.7254, + "epoch": 0.0958817317845829, + "grad_norm": 2.586801052093506, + "learning_rate": 1.3841520506883607e-05, + "loss": 0.682, "step": 1362 }, { - "epoch": 0.10313646854072869, - "grad_norm": 2.9250547885894775, - "learning_rate": 1.7861482370316464e-05, - "loss": 0.7121, + "epoch": 0.09595212953185497, + "grad_norm": 3.636446237564087, + "learning_rate": 1.3841182620431225e-05, + "loss": 0.7714, "step": 1363 }, { - "epoch": 0.10321213726306232, - "grad_norm": 3.4453530311584473, - "learning_rate": 1.78610884711614e-05, - "loss": 0.8577, + "epoch": 0.09602252727912707, + "grad_norm": 2.432814359664917, + "learning_rate": 1.3840844378299565e-05, + "loss": 0.7042, "step": 1364 }, { - "epoch": 0.10328780598539594, - "grad_norm": 3.4727487564086914, - "learning_rate": 1.7860694017096323e-05, - "loss": 0.7081, + "epoch": 0.09609292502639916, + "grad_norm": 2.7302401065826416, + "learning_rate": 1.3840505780506206e-05, + "loss": 0.716, "step": 1365 }, { - "epoch": 0.10336347470772957, - "grad_norm": 2.7783381938934326, - "learning_rate": 1.7860299008145922e-05, - "loss": 0.8319, + "epoch": 0.09616332277367125, + "grad_norm": 3.0137319564819336, + "learning_rate": 1.3840166827068759e-05, + "loss": 0.8235, "step": 1366 }, { - "epoch": 0.10343914343006318, - "grad_norm": 2.9331109523773193, - "learning_rate": 1.785990344433494e-05, - "loss": 0.772, + "epoch": 0.09623372052094332, + "grad_norm": 2.7418832778930664, + "learning_rate": 1.3839827518004845e-05, + "loss": 0.7196, "step": 1367 }, { - "epoch": 0.1035148121523968, - "grad_norm": 2.4165093898773193, - "learning_rate": 1.7859507325688146e-05, - "loss": 0.8665, + "epoch": 0.09630411826821542, + "grad_norm": 2.4431426525115967, + "learning_rate": 1.38394878533321e-05, + "loss": 0.7854, "step": 1368 }, { - "epoch": 0.10359048087473043, - "grad_norm": 3.596440553665161, - "learning_rate": 1.7859110652230352e-05, - "loss": 0.8207, + "epoch": 0.09637451601548751, + "grad_norm": 2.243542432785034, + "learning_rate": 1.3839147833068192e-05, + "loss": 0.8738, "step": 1369 }, { - "epoch": 0.10366614959706405, - "grad_norm": 3.142336368560791, - "learning_rate": 1.7858713423986392e-05, - "loss": 0.8451, + "epoch": 0.09644491376275958, + "grad_norm": 2.922335386276245, + "learning_rate": 1.3838807457230792e-05, + "loss": 0.8736, "step": 1370 }, { - "epoch": 0.10374181831939767, - "grad_norm": 3.1580209732055664, - "learning_rate": 1.7858315640981147e-05, - "loss": 0.8636, + "epoch": 0.09651531151003168, + "grad_norm": 2.514521360397339, + "learning_rate": 1.3838466725837598e-05, + "loss": 0.8892, "step": 1371 }, { - "epoch": 0.1038174870417313, - "grad_norm": 2.814429998397827, - "learning_rate": 1.7857917303239527e-05, - "loss": 0.8564, + "epoch": 0.09658570925730377, + "grad_norm": 2.7302026748657227, + "learning_rate": 1.3838125638906328e-05, + "loss": 0.8144, "step": 1372 }, { - "epoch": 0.10389315576406492, - "grad_norm": 2.6497771739959717, - "learning_rate": 1.7857518410786472e-05, - "loss": 0.7462, + "epoch": 0.09665610700457586, + "grad_norm": 3.0278453826904297, + "learning_rate": 1.383778419645471e-05, + "loss": 0.7362, "step": 1373 }, { - "epoch": 0.10396882448639855, - "grad_norm": 2.9846158027648926, - "learning_rate": 1.7857118963646963e-05, - "loss": 0.706, + "epoch": 0.09672650475184794, + "grad_norm": 2.437964916229248, + "learning_rate": 1.38374423985005e-05, + "loss": 0.9273, "step": 1374 }, { - "epoch": 0.10404449320873217, - "grad_norm": 3.386066436767578, - "learning_rate": 1.785671896184602e-05, - "loss": 0.7739, + "epoch": 0.09679690249912003, + "grad_norm": 2.7410168647766113, + "learning_rate": 1.383710024506147e-05, + "loss": 0.7983, "step": 1375 }, { - "epoch": 0.1041201619310658, - "grad_norm": 4.174465179443359, - "learning_rate": 1.7856318405408694e-05, - "loss": 0.8237, + "epoch": 0.09686730024639212, + "grad_norm": 2.2270326614379883, + "learning_rate": 1.3836757736155403e-05, + "loss": 0.6236, "step": 1376 }, { - "epoch": 0.10419583065339942, - "grad_norm": 3.151991844177246, - "learning_rate": 1.785591729436006e-05, - "loss": 0.8827, + "epoch": 0.0969376979936642, + "grad_norm": 2.5478107929229736, + "learning_rate": 1.3836414871800111e-05, + "loss": 0.8237, "step": 1377 }, { - "epoch": 0.10427149937573305, - "grad_norm": 3.0265588760375977, - "learning_rate": 1.785551562872524e-05, - "loss": 0.7439, + "epoch": 0.09700809574093629, + "grad_norm": 2.2321324348449707, + "learning_rate": 1.3836071652013418e-05, + "loss": 0.8182, "step": 1378 }, { - "epoch": 0.10434716809806667, - "grad_norm": 2.958704710006714, - "learning_rate": 1.7855113408529395e-05, - "loss": 0.7563, + "epoch": 0.09707849348820838, + "grad_norm": 2.3184924125671387, + "learning_rate": 1.3835728076813168e-05, + "loss": 0.865, "step": 1379 }, { - "epoch": 0.1044228368204003, - "grad_norm": 2.7380552291870117, - "learning_rate": 1.7854710633797703e-05, - "loss": 0.6439, + "epoch": 0.09714889123548047, + "grad_norm": 2.6252634525299072, + "learning_rate": 1.3835384146217225e-05, + "loss": 0.8602, "step": 1380 }, { - "epoch": 0.1044985055427339, - "grad_norm": 2.067690134048462, - "learning_rate": 1.785430730455539e-05, - "loss": 0.8905, + "epoch": 0.09721928898275255, + "grad_norm": 2.3333702087402344, + "learning_rate": 1.383503986024347e-05, + "loss": 0.7972, "step": 1381 }, { - "epoch": 0.10457417426506753, - "grad_norm": 2.9177439212799072, - "learning_rate": 1.785390342082772e-05, - "loss": 0.9292, + "epoch": 0.09728968673002464, + "grad_norm": 2.43939208984375, + "learning_rate": 1.3834695218909803e-05, + "loss": 0.8496, "step": 1382 }, { - "epoch": 0.10464984298740115, - "grad_norm": 3.5004894733428955, - "learning_rate": 1.7853498982639977e-05, - "loss": 0.7962, + "epoch": 0.09736008447729673, + "grad_norm": 2.213832378387451, + "learning_rate": 1.3834350222234141e-05, + "loss": 0.7533, "step": 1383 }, { - "epoch": 0.10472551170973478, - "grad_norm": 2.974620819091797, - "learning_rate": 1.7853093990017494e-05, - "loss": 0.8304, + "epoch": 0.09743048222456882, + "grad_norm": 2.013314723968506, + "learning_rate": 1.3834004870234422e-05, + "loss": 0.7306, "step": 1384 }, { - "epoch": 0.1048011804320684, - "grad_norm": 3.4205729961395264, - "learning_rate": 1.785268844298563e-05, - "loss": 0.8792, + "epoch": 0.0975008799718409, + "grad_norm": 2.263777732849121, + "learning_rate": 1.3833659162928599e-05, + "loss": 0.8346, "step": 1385 }, { - "epoch": 0.10487684915440203, - "grad_norm": 3.7999086380004883, - "learning_rate": 1.7852282341569782e-05, - "loss": 0.9034, + "epoch": 0.09757127771911299, + "grad_norm": 2.3456687927246094, + "learning_rate": 1.383331310033465e-05, + "loss": 0.9148, "step": 1386 }, { - "epoch": 0.10495251787673565, - "grad_norm": 3.3099162578582764, - "learning_rate": 1.7851875685795383e-05, - "loss": 0.9594, + "epoch": 0.09764167546638508, + "grad_norm": 2.3890085220336914, + "learning_rate": 1.3832966682470563e-05, + "loss": 0.7463, "step": 1387 }, { - "epoch": 0.10502818659906928, - "grad_norm": 3.3331620693206787, - "learning_rate": 1.78514684756879e-05, - "loss": 0.9426, + "epoch": 0.09771207321365716, + "grad_norm": 3.2109227180480957, + "learning_rate": 1.3832619909354347e-05, + "loss": 0.7994, "step": 1388 }, { - "epoch": 0.1051038553214029, - "grad_norm": 2.9932851791381836, - "learning_rate": 1.7851060711272827e-05, - "loss": 0.7063, + "epoch": 0.09778247096092925, + "grad_norm": 2.240267515182495, + "learning_rate": 1.3832272781004037e-05, + "loss": 0.7914, "step": 1389 }, { - "epoch": 0.10517952404373652, - "grad_norm": 3.7370762825012207, - "learning_rate": 1.7850652392575712e-05, - "loss": 0.9669, + "epoch": 0.09785286870820134, + "grad_norm": 2.224663019180298, + "learning_rate": 1.3831925297437678e-05, + "loss": 0.7649, "step": 1390 }, { - "epoch": 0.10525519276607015, - "grad_norm": 3.106455087661743, - "learning_rate": 1.785024351962211e-05, - "loss": 0.7777, + "epoch": 0.09792326645547343, + "grad_norm": 2.4558374881744385, + "learning_rate": 1.3831577458673334e-05, + "loss": 0.7993, "step": 1391 }, { - "epoch": 0.10533086148840377, - "grad_norm": 3.7146058082580566, - "learning_rate": 1.784983409243764e-05, - "loss": 0.9436, + "epoch": 0.09799366420274551, + "grad_norm": 2.7335100173950195, + "learning_rate": 1.3831229264729092e-05, + "loss": 0.8467, "step": 1392 }, { - "epoch": 0.1054065302107374, - "grad_norm": 2.6527533531188965, - "learning_rate": 1.784942411104793e-05, - "loss": 0.7476, + "epoch": 0.0980640619500176, + "grad_norm": 2.355727434158325, + "learning_rate": 1.3830880715623052e-05, + "loss": 0.8278, "step": 1393 }, { - "epoch": 0.10548219893307101, - "grad_norm": 2.8467373847961426, - "learning_rate": 1.7849013575478664e-05, - "loss": 0.6969, + "epoch": 0.09813445969728969, + "grad_norm": 2.302319049835205, + "learning_rate": 1.3830531811373339e-05, + "loss": 0.8303, "step": 1394 }, { - "epoch": 0.10555786765540463, - "grad_norm": 3.833505153656006, - "learning_rate": 1.7848602485755542e-05, - "loss": 0.8751, + "epoch": 0.09820485744456177, + "grad_norm": 2.192305564880371, + "learning_rate": 1.3830182551998088e-05, + "loss": 0.7256, "step": 1395 }, { - "epoch": 0.10563353637773826, - "grad_norm": 3.4765021800994873, - "learning_rate": 1.7848190841904314e-05, - "loss": 0.7033, + "epoch": 0.09827525519183386, + "grad_norm": 2.150691509246826, + "learning_rate": 1.3829832937515463e-05, + "loss": 0.7992, "step": 1396 }, { - "epoch": 0.10570920510007188, - "grad_norm": 2.6900827884674072, - "learning_rate": 1.784777864395076e-05, - "loss": 0.7621, + "epoch": 0.09834565293910595, + "grad_norm": 2.6273810863494873, + "learning_rate": 1.3829482967943637e-05, + "loss": 0.8328, "step": 1397 }, { - "epoch": 0.1057848738224055, - "grad_norm": 4.700630187988281, - "learning_rate": 1.7847365891920688e-05, - "loss": 0.9304, + "epoch": 0.09841605068637804, + "grad_norm": 2.681490421295166, + "learning_rate": 1.3829132643300807e-05, + "loss": 0.7747, "step": 1398 }, { - "epoch": 0.10586054254473913, - "grad_norm": 3.0544216632843018, - "learning_rate": 1.7846952585839946e-05, - "loss": 0.8242, + "epoch": 0.09848644843365012, + "grad_norm": 2.4854586124420166, + "learning_rate": 1.3828781963605188e-05, + "loss": 0.8758, "step": 1399 }, { - "epoch": 0.10593621126707276, - "grad_norm": 2.659796714782715, - "learning_rate": 1.784653872573442e-05, - "loss": 0.8753, + "epoch": 0.09855684618092221, + "grad_norm": 2.2144296169281006, + "learning_rate": 1.3828430928875007e-05, + "loss": 0.8129, "step": 1400 }, { - "epoch": 0.10601187998940638, - "grad_norm": 2.934483289718628, - "learning_rate": 1.784612431163003e-05, - "loss": 0.7843, + "epoch": 0.0986272439281943, + "grad_norm": 2.1386542320251465, + "learning_rate": 1.3828079539128519e-05, + "loss": 0.7519, "step": 1401 }, { - "epoch": 0.10608754871174, - "grad_norm": 2.5334622859954834, - "learning_rate": 1.784570934355272e-05, - "loss": 0.8974, + "epoch": 0.09869764167546638, + "grad_norm": 3.1108200550079346, + "learning_rate": 1.3827727794383992e-05, + "loss": 0.8049, "step": 1402 }, { - "epoch": 0.10616321743407363, - "grad_norm": 1.8454688787460327, - "learning_rate": 1.784529382152848e-05, - "loss": 0.999, + "epoch": 0.09876803942273847, + "grad_norm": 2.401463747024536, + "learning_rate": 1.3827375694659713e-05, + "loss": 0.9259, "step": 1403 }, { - "epoch": 0.10623888615640725, - "grad_norm": 3.1255669593811035, - "learning_rate": 1.7844877745583333e-05, - "loss": 0.8522, + "epoch": 0.09883843717001056, + "grad_norm": 2.5965259075164795, + "learning_rate": 1.382702323997399e-05, + "loss": 0.7451, "step": 1404 }, { - "epoch": 0.10631455487874088, - "grad_norm": 2.9718644618988037, - "learning_rate": 1.7844461115743334e-05, - "loss": 0.8519, + "epoch": 0.09890883491728265, + "grad_norm": 2.4389536380767822, + "learning_rate": 1.3826670430345144e-05, + "loss": 0.7561, "step": 1405 }, { - "epoch": 0.1063902236010745, - "grad_norm": 2.7671189308166504, - "learning_rate": 1.7844043932034572e-05, - "loss": 0.9112, + "epoch": 0.09897923266455473, + "grad_norm": 2.2094004154205322, + "learning_rate": 1.3826317265791519e-05, + "loss": 0.8405, "step": 1406 }, { - "epoch": 0.10646589232340813, - "grad_norm": 3.579397678375244, - "learning_rate": 1.7843626194483174e-05, - "loss": 0.8709, + "epoch": 0.09904963041182682, + "grad_norm": 2.685638189315796, + "learning_rate": 1.382596374633148e-05, + "loss": 0.8458, "step": 1407 }, { - "epoch": 0.10654156104574174, - "grad_norm": 2.603875160217285, - "learning_rate": 1.78432079031153e-05, - "loss": 0.7613, + "epoch": 0.09912002815909891, + "grad_norm": 2.936405658721924, + "learning_rate": 1.3825609871983402e-05, + "loss": 0.7925, "step": 1408 }, { - "epoch": 0.10661722976807536, - "grad_norm": 2.835737466812134, - "learning_rate": 1.7842789057957146e-05, - "loss": 0.821, + "epoch": 0.09919042590637099, + "grad_norm": 3.0183722972869873, + "learning_rate": 1.3825255642765685e-05, + "loss": 0.7976, "step": 1409 }, { - "epoch": 0.10669289849040899, - "grad_norm": 2.738487958908081, - "learning_rate": 1.784236965903494e-05, - "loss": 0.7394, + "epoch": 0.09926082365364308, + "grad_norm": 2.388864040374756, + "learning_rate": 1.3824901058696747e-05, + "loss": 0.6759, "step": 1410 }, { - "epoch": 0.10676856721274261, - "grad_norm": 2.9330735206604004, - "learning_rate": 1.7841949706374944e-05, - "loss": 0.9316, + "epoch": 0.09933122140091517, + "grad_norm": 2.0609161853790283, + "learning_rate": 1.382454611979502e-05, + "loss": 0.6893, "step": 1411 }, { - "epoch": 0.10684423593507623, - "grad_norm": 3.281982898712158, - "learning_rate": 1.784152920000346e-05, - "loss": 0.7128, + "epoch": 0.09940161914818726, + "grad_norm": 2.0833375453948975, + "learning_rate": 1.3824190826078961e-05, + "loss": 0.8141, "step": 1412 }, { - "epoch": 0.10691990465740986, - "grad_norm": 2.7696800231933594, - "learning_rate": 1.7841108139946824e-05, - "loss": 0.8896, + "epoch": 0.09947201689545934, + "grad_norm": 2.0329315662384033, + "learning_rate": 1.382383517756704e-05, + "loss": 0.6997, "step": 1413 }, { - "epoch": 0.10699557337974348, - "grad_norm": 6.206698894500732, - "learning_rate": 1.7840686526231394e-05, - "loss": 0.8619, + "epoch": 0.09954241464273143, + "grad_norm": 2.3665215969085693, + "learning_rate": 1.3823479174277747e-05, + "loss": 0.8026, "step": 1414 }, { - "epoch": 0.10707124210207711, - "grad_norm": 2.468066453933716, - "learning_rate": 1.7840264358883585e-05, - "loss": 0.5857, + "epoch": 0.09961281239000352, + "grad_norm": 2.459826946258545, + "learning_rate": 1.3823122816229595e-05, + "loss": 0.6956, "step": 1415 }, { - "epoch": 0.10714691082441073, - "grad_norm": 3.581237316131592, - "learning_rate": 1.7839841637929827e-05, - "loss": 0.8797, + "epoch": 0.0996832101372756, + "grad_norm": 2.236638069152832, + "learning_rate": 1.3822766103441106e-05, + "loss": 0.8378, "step": 1416 }, { - "epoch": 0.10722257954674436, - "grad_norm": 2.6938111782073975, - "learning_rate": 1.7839418363396596e-05, - "loss": 0.6231, + "epoch": 0.0997536078845477, + "grad_norm": 2.592947244644165, + "learning_rate": 1.382240903593083e-05, + "loss": 0.7812, "step": 1417 }, { - "epoch": 0.10729824826907798, - "grad_norm": 3.101189613342285, - "learning_rate": 1.7838994535310393e-05, - "loss": 0.9891, + "epoch": 0.09982400563181978, + "grad_norm": 2.330080986022949, + "learning_rate": 1.382205161371733e-05, + "loss": 0.8787, "step": 1418 }, { - "epoch": 0.1073739169914116, - "grad_norm": 3.6879820823669434, - "learning_rate": 1.7838570153697767e-05, - "loss": 0.7532, + "epoch": 0.09989440337909188, + "grad_norm": 2.26477313041687, + "learning_rate": 1.3821693836819186e-05, + "loss": 0.9215, "step": 1419 }, { - "epoch": 0.10744958571374523, - "grad_norm": 3.3243961334228516, - "learning_rate": 1.783814521858529e-05, - "loss": 0.7972, + "epoch": 0.09996480112636395, + "grad_norm": 2.4268507957458496, + "learning_rate": 1.3821335705255002e-05, + "loss": 0.827, "step": 1420 }, { - "epoch": 0.10752525443607884, - "grad_norm": 3.053708076477051, - "learning_rate": 1.783771972999957e-05, - "loss": 0.8763, + "epoch": 0.10003519887363604, + "grad_norm": 2.1846697330474854, + "learning_rate": 1.3820977219043397e-05, + "loss": 0.7865, "step": 1421 }, { - "epoch": 0.10760092315841246, - "grad_norm": 3.4024465084075928, - "learning_rate": 1.783729368796726e-05, - "loss": 0.8386, + "epoch": 0.10010559662090814, + "grad_norm": 3.0725224018096924, + "learning_rate": 1.382061837820301e-05, + "loss": 0.8126, "step": 1422 }, { - "epoch": 0.10767659188074609, - "grad_norm": 3.4709556102752686, - "learning_rate": 1.7836867092515034e-05, - "loss": 0.8942, + "epoch": 0.10017599436818021, + "grad_norm": 2.04606294631958, + "learning_rate": 1.3820259182752493e-05, + "loss": 0.8874, "step": 1423 }, { - "epoch": 0.10775226060307971, - "grad_norm": 2.8416600227355957, - "learning_rate": 1.783643994366961e-05, - "loss": 0.9081, + "epoch": 0.1002463921154523, + "grad_norm": 2.1336867809295654, + "learning_rate": 1.3819899632710528e-05, + "loss": 0.6933, "step": 1424 }, { - "epoch": 0.10782792932541334, - "grad_norm": 2.391524076461792, - "learning_rate": 1.7836012241457736e-05, - "loss": 0.6704, + "epoch": 0.1003167898627244, + "grad_norm": 2.678637742996216, + "learning_rate": 1.3819539728095802e-05, + "loss": 0.795, "step": 1425 }, { - "epoch": 0.10790359804774696, - "grad_norm": 2.441361665725708, - "learning_rate": 1.7835583985906197e-05, - "loss": 0.6307, + "epoch": 0.10038718760999649, + "grad_norm": 2.6219325065612793, + "learning_rate": 1.381917946892703e-05, + "loss": 0.7864, "step": 1426 }, { - "epoch": 0.10797926677008059, - "grad_norm": 2.4388327598571777, - "learning_rate": 1.7835155177041807e-05, - "loss": 0.7966, + "epoch": 0.10045758535726856, + "grad_norm": 2.3786487579345703, + "learning_rate": 1.3818818855222944e-05, + "loss": 0.7588, "step": 1427 }, { - "epoch": 0.10805493549241421, - "grad_norm": 2.3517658710479736, - "learning_rate": 1.7834725814891427e-05, - "loss": 0.7591, + "epoch": 0.10052798310454066, + "grad_norm": 2.474106788635254, + "learning_rate": 1.3818457887002286e-05, + "loss": 0.701, "step": 1428 }, { - "epoch": 0.10813060421474784, - "grad_norm": 2.327765941619873, - "learning_rate": 1.783429589948194e-05, - "loss": 0.8393, + "epoch": 0.10059838085181275, + "grad_norm": 2.1290125846862793, + "learning_rate": 1.381809656428383e-05, + "loss": 0.8068, "step": 1429 }, { - "epoch": 0.10820627293708146, - "grad_norm": 2.1608386039733887, - "learning_rate": 1.7833865430840273e-05, - "loss": 0.6654, + "epoch": 0.10066877859908482, + "grad_norm": 3.7950210571289062, + "learning_rate": 1.3817734887086358e-05, + "loss": 0.7795, "step": 1430 }, { - "epoch": 0.10828194165941508, - "grad_norm": 2.7661025524139404, - "learning_rate": 1.783343440899338e-05, - "loss": 0.6717, + "epoch": 0.10073917634635692, + "grad_norm": 2.3376305103302, + "learning_rate": 1.3817372855428678e-05, + "loss": 0.8429, "step": 1431 }, { - "epoch": 0.10835761038174871, - "grad_norm": 3.8667588233947754, - "learning_rate": 1.783300283396825e-05, - "loss": 0.8499, + "epoch": 0.100809574093629, + "grad_norm": 2.2078070640563965, + "learning_rate": 1.3817010469329607e-05, + "loss": 0.7091, "step": 1432 }, { - "epoch": 0.10843327910408233, - "grad_norm": 2.459967851638794, - "learning_rate": 1.7832570705791915e-05, - "loss": 0.9147, + "epoch": 0.1008799718409011, + "grad_norm": 2.5158956050872803, + "learning_rate": 1.3816647728807986e-05, + "loss": 0.7847, "step": 1433 }, { - "epoch": 0.10850894782641594, - "grad_norm": 2.2012739181518555, - "learning_rate": 1.7832138024491435e-05, - "loss": 0.787, + "epoch": 0.10095036958817317, + "grad_norm": 3.1038427352905273, + "learning_rate": 1.3816284633882678e-05, + "loss": 0.7041, "step": 1434 }, { - "epoch": 0.10858461654874957, - "grad_norm": 2.780473232269287, - "learning_rate": 1.7831704790093903e-05, - "loss": 0.9463, + "epoch": 0.10102076733544527, + "grad_norm": 2.0924556255340576, + "learning_rate": 1.3815921184572557e-05, + "loss": 0.7322, "step": 1435 }, { - "epoch": 0.10866028527108319, - "grad_norm": 2.5738842487335205, - "learning_rate": 1.7831271002626457e-05, - "loss": 0.7366, + "epoch": 0.10109116508271736, + "grad_norm": 2.1860690116882324, + "learning_rate": 1.3815557380896521e-05, + "loss": 0.7041, "step": 1436 }, { - "epoch": 0.10873595399341682, - "grad_norm": 2.993759870529175, - "learning_rate": 1.7830836662116253e-05, - "loss": 0.7384, + "epoch": 0.10116156282998943, + "grad_norm": 2.4287257194519043, + "learning_rate": 1.3815193222873485e-05, + "loss": 0.7524, "step": 1437 }, { - "epoch": 0.10881162271575044, - "grad_norm": 2.661965847015381, - "learning_rate": 1.7830401768590494e-05, - "loss": 0.7393, + "epoch": 0.10123196057726153, + "grad_norm": 2.60332989692688, + "learning_rate": 1.3814828710522382e-05, + "loss": 0.8885, "step": 1438 }, { - "epoch": 0.10888729143808407, - "grad_norm": 2.5119550228118896, - "learning_rate": 1.782996632207642e-05, - "loss": 0.8387, + "epoch": 0.10130235832453362, + "grad_norm": 2.408057451248169, + "learning_rate": 1.381446384386216e-05, + "loss": 0.7395, "step": 1439 }, { - "epoch": 0.10896296016041769, - "grad_norm": 2.4007089138031006, - "learning_rate": 1.7829530322601288e-05, - "loss": 0.8684, + "epoch": 0.10137275607180571, + "grad_norm": 2.122253894805908, + "learning_rate": 1.3814098622911794e-05, + "loss": 0.827, "step": 1440 }, { - "epoch": 0.10903862888275132, - "grad_norm": 2.434774398803711, - "learning_rate": 1.7829093770192415e-05, - "loss": 0.746, + "epoch": 0.10144315381907779, + "grad_norm": 2.693152666091919, + "learning_rate": 1.3813733047690267e-05, + "loss": 0.8107, "step": 1441 }, { - "epoch": 0.10911429760508494, - "grad_norm": 3.004561185836792, - "learning_rate": 1.782865666487713e-05, - "loss": 0.7922, + "epoch": 0.10151355156634988, + "grad_norm": 2.1799581050872803, + "learning_rate": 1.3813367118216589e-05, + "loss": 0.7079, "step": 1442 }, { - "epoch": 0.10918996632741856, - "grad_norm": 2.905332565307617, - "learning_rate": 1.7828219006682814e-05, - "loss": 0.7966, + "epoch": 0.10158394931362197, + "grad_norm": 2.427865982055664, + "learning_rate": 1.3813000834509785e-05, + "loss": 0.7561, "step": 1443 }, { - "epoch": 0.10926563504975219, - "grad_norm": 2.940967559814453, - "learning_rate": 1.7827780795636866e-05, - "loss": 0.8431, + "epoch": 0.10165434706089405, + "grad_norm": 2.6954433917999268, + "learning_rate": 1.3812634196588895e-05, + "loss": 0.7484, "step": 1444 }, { - "epoch": 0.10934130377208581, - "grad_norm": 2.504350185394287, - "learning_rate": 1.782734203176673e-05, - "loss": 0.938, + "epoch": 0.10172474480816614, + "grad_norm": 2.0755398273468018, + "learning_rate": 1.3812267204472986e-05, + "loss": 0.9217, "step": 1445 }, { - "epoch": 0.10941697249441944, - "grad_norm": 2.725872278213501, - "learning_rate": 1.782690271509989e-05, - "loss": 0.9283, + "epoch": 0.10179514255543823, + "grad_norm": 2.3625447750091553, + "learning_rate": 1.3811899858181134e-05, + "loss": 0.8219, "step": 1446 }, { - "epoch": 0.10949264121675306, - "grad_norm": 2.9516894817352295, - "learning_rate": 1.7826462845663853e-05, - "loss": 0.8293, + "epoch": 0.10186554030271032, + "grad_norm": 3.6413323879241943, + "learning_rate": 1.381153215773244e-05, + "loss": 0.7096, "step": 1447 }, { - "epoch": 0.10956830993908667, - "grad_norm": 3.0764172077178955, - "learning_rate": 1.782602242348616e-05, - "loss": 0.9247, + "epoch": 0.1019359380499824, + "grad_norm": 2.14170241355896, + "learning_rate": 1.3811164103146018e-05, + "loss": 0.818, "step": 1448 }, { - "epoch": 0.1096439786614203, - "grad_norm": 3.033979892730713, - "learning_rate": 1.7825581448594394e-05, - "loss": 0.7406, + "epoch": 0.10200633579725449, + "grad_norm": 2.2434394359588623, + "learning_rate": 1.3810795694441008e-05, + "loss": 0.7065, "step": 1449 }, { - "epoch": 0.10971964738375392, - "grad_norm": 2.9168546199798584, - "learning_rate": 1.782513992101618e-05, - "loss": 0.8797, + "epoch": 0.10207673354452658, + "grad_norm": 2.6800942420959473, + "learning_rate": 1.3810426931636562e-05, + "loss": 0.8615, "step": 1450 }, { - "epoch": 0.10979531610608755, - "grad_norm": 2.965071201324463, - "learning_rate": 1.782469784077915e-05, - "loss": 0.7152, + "epoch": 0.10214713129179866, + "grad_norm": 1.9535430669784546, + "learning_rate": 1.3810057814751848e-05, + "loss": 0.7508, "step": 1451 }, { - "epoch": 0.10987098482842117, - "grad_norm": 2.7454051971435547, - "learning_rate": 1.7824255207911008e-05, - "loss": 0.8399, + "epoch": 0.10221752903907075, + "grad_norm": 2.622714042663574, + "learning_rate": 1.3809688343806065e-05, + "loss": 0.9419, "step": 1452 }, { - "epoch": 0.1099466535507548, - "grad_norm": 3.45354962348938, - "learning_rate": 1.782381202243946e-05, - "loss": 0.8285, + "epoch": 0.10228792678634284, + "grad_norm": 2.1764702796936035, + "learning_rate": 1.3809318518818418e-05, + "loss": 0.724, "step": 1453 }, { - "epoch": 0.11002232227308842, - "grad_norm": 2.291821002960205, - "learning_rate": 1.7823368284392266e-05, - "loss": 0.8612, + "epoch": 0.10235832453361493, + "grad_norm": 15.88931655883789, + "learning_rate": 1.380894833980813e-05, + "loss": 0.9554, "step": 1454 }, { - "epoch": 0.11009799099542204, - "grad_norm": 2.7993972301483154, - "learning_rate": 1.782292399379721e-05, - "loss": 0.7609, + "epoch": 0.10242872228088701, + "grad_norm": 2.414395570755005, + "learning_rate": 1.3808577806794456e-05, + "loss": 0.8448, "step": 1455 }, { - "epoch": 0.11017365971775567, - "grad_norm": 2.7965731620788574, - "learning_rate": 1.7822479150682113e-05, - "loss": 0.8857, + "epoch": 0.1024991200281591, + "grad_norm": 2.182896614074707, + "learning_rate": 1.3808206919796653e-05, + "loss": 0.7506, "step": 1456 }, { - "epoch": 0.11024932844008929, - "grad_norm": 2.9071121215820312, - "learning_rate": 1.782203375507484e-05, - "loss": 0.6945, + "epoch": 0.10256951777543119, + "grad_norm": 1.814693808555603, + "learning_rate": 1.3807835678834006e-05, + "loss": 0.7709, "step": 1457 }, { - "epoch": 0.11032499716242292, - "grad_norm": 6.042922496795654, - "learning_rate": 1.7821587807003278e-05, - "loss": 0.7653, + "epoch": 0.10263991552270327, + "grad_norm": 2.3296077251434326, + "learning_rate": 1.3807464083925818e-05, + "loss": 0.8634, "step": 1458 }, { - "epoch": 0.11040066588475654, - "grad_norm": 2.6927385330200195, - "learning_rate": 1.782114130649536e-05, - "loss": 0.9095, + "epoch": 0.10271031326997536, + "grad_norm": 2.0389816761016846, + "learning_rate": 1.3807092135091406e-05, + "loss": 0.8545, "step": 1459 }, { - "epoch": 0.11047633460709017, - "grad_norm": 2.8487069606781006, - "learning_rate": 1.7820694253579036e-05, - "loss": 0.8508, + "epoch": 0.10278071101724745, + "grad_norm": 2.178295373916626, + "learning_rate": 1.380671983235011e-05, + "loss": 0.675, "step": 1460 }, { - "epoch": 0.11055200332942378, - "grad_norm": 3.1788697242736816, - "learning_rate": 1.782024664828231e-05, - "loss": 0.7621, + "epoch": 0.10285110876451954, + "grad_norm": 2.246753215789795, + "learning_rate": 1.3806347175721283e-05, + "loss": 0.8762, "step": 1461 }, { - "epoch": 0.1106276720517574, - "grad_norm": 3.724763870239258, - "learning_rate": 1.7819798490633212e-05, - "loss": 0.6952, + "epoch": 0.10292150651179162, + "grad_norm": 2.3613970279693604, + "learning_rate": 1.3805974165224307e-05, + "loss": 0.8966, "step": 1462 }, { - "epoch": 0.11070334077409102, - "grad_norm": 2.963629961013794, - "learning_rate": 1.7819349780659806e-05, - "loss": 0.7546, + "epoch": 0.10299190425906371, + "grad_norm": 2.319946050643921, + "learning_rate": 1.3805600800878565e-05, + "loss": 0.7722, "step": 1463 }, { - "epoch": 0.11077900949642465, - "grad_norm": 2.979599952697754, - "learning_rate": 1.781890051839019e-05, - "loss": 0.8406, + "epoch": 0.1030623020063358, + "grad_norm": 1.9336450099945068, + "learning_rate": 1.3805227082703475e-05, + "loss": 0.7318, "step": 1464 }, { - "epoch": 0.11085467821875827, - "grad_norm": 2.474740505218506, - "learning_rate": 1.78184507038525e-05, - "loss": 0.7787, + "epoch": 0.10313269975360788, + "grad_norm": 2.513247489929199, + "learning_rate": 1.3804853010718469e-05, + "loss": 0.9151, "step": 1465 }, { - "epoch": 0.1109303469410919, - "grad_norm": 2.5070388317108154, - "learning_rate": 1.7818000337074906e-05, - "loss": 0.7781, + "epoch": 0.10320309750087997, + "grad_norm": 2.2665843963623047, + "learning_rate": 1.3804478584942989e-05, + "loss": 0.7256, "step": 1466 }, { - "epoch": 0.11100601566342552, - "grad_norm": 2.9093334674835205, - "learning_rate": 1.7817549418085607e-05, - "loss": 0.7751, + "epoch": 0.10327349524815206, + "grad_norm": 2.424797534942627, + "learning_rate": 1.3804103805396506e-05, + "loss": 0.863, "step": 1467 }, { - "epoch": 0.11108168438575915, - "grad_norm": 2.6724863052368164, - "learning_rate": 1.7817097946912847e-05, - "loss": 0.8846, + "epoch": 0.10334389299542415, + "grad_norm": 2.3860042095184326, + "learning_rate": 1.3803728672098505e-05, + "loss": 0.7132, "step": 1468 }, { - "epoch": 0.11115735310809277, - "grad_norm": 2.9973912239074707, - "learning_rate": 1.7816645923584896e-05, - "loss": 0.701, + "epoch": 0.10341429074269623, + "grad_norm": 2.0085227489471436, + "learning_rate": 1.3803353185068488e-05, + "loss": 0.7155, "step": 1469 }, { - "epoch": 0.1112330218304264, - "grad_norm": 2.5031442642211914, - "learning_rate": 1.781619334813006e-05, - "loss": 0.7866, + "epoch": 0.10348468848996832, + "grad_norm": 2.5789105892181396, + "learning_rate": 1.3802977344325976e-05, + "loss": 0.8819, "step": 1470 }, { - "epoch": 0.11130869055276002, - "grad_norm": 2.907050609588623, - "learning_rate": 1.781574022057668e-05, - "loss": 0.7756, + "epoch": 0.10355508623724041, + "grad_norm": 1.8085596561431885, + "learning_rate": 1.3802601149890512e-05, + "loss": 0.73, "step": 1471 }, { - "epoch": 0.11138435927509364, - "grad_norm": 2.397503137588501, - "learning_rate": 1.7815286540953133e-05, - "loss": 0.7306, + "epoch": 0.10362548398451249, + "grad_norm": 2.3451921939849854, + "learning_rate": 1.3802224601781653e-05, + "loss": 0.7507, "step": 1472 }, { - "epoch": 0.11146002799742727, - "grad_norm": 2.7645323276519775, - "learning_rate": 1.7814832309287835e-05, - "loss": 0.81, + "epoch": 0.10369588173178458, + "grad_norm": 2.8347837924957275, + "learning_rate": 1.3801847700018977e-05, + "loss": 0.8573, "step": 1473 }, { - "epoch": 0.1115356967197609, - "grad_norm": 2.5474066734313965, - "learning_rate": 1.7814377525609223e-05, - "loss": 1.0083, + "epoch": 0.10376627947905667, + "grad_norm": 2.3351364135742188, + "learning_rate": 1.380147044462208e-05, + "loss": 0.6663, "step": 1474 }, { - "epoch": 0.1116113654420945, - "grad_norm": 3.7379724979400635, - "learning_rate": 1.7813922189945782e-05, - "loss": 0.8414, + "epoch": 0.10383667722632876, + "grad_norm": 2.3278415203094482, + "learning_rate": 1.3801092835610571e-05, + "loss": 0.758, "step": 1475 }, { - "epoch": 0.11168703416442813, - "grad_norm": 2.205005645751953, - "learning_rate": 1.7813466302326027e-05, - "loss": 0.8559, + "epoch": 0.10390707497360084, + "grad_norm": 2.6337335109710693, + "learning_rate": 1.3800714873004087e-05, + "loss": 0.8577, "step": 1476 }, { - "epoch": 0.11176270288676175, - "grad_norm": 2.9247653484344482, - "learning_rate": 1.7813009862778505e-05, - "loss": 0.7688, + "epoch": 0.10397747272087293, + "grad_norm": 2.395157814025879, + "learning_rate": 1.380033655682228e-05, + "loss": 0.8075, "step": 1477 }, { - "epoch": 0.11183837160909538, - "grad_norm": 2.9259767532348633, - "learning_rate": 1.7812552871331803e-05, - "loss": 0.8447, + "epoch": 0.10404787046814502, + "grad_norm": 2.215142011642456, + "learning_rate": 1.3799957887084816e-05, + "loss": 0.7872, "step": 1478 }, { - "epoch": 0.111914040331429, - "grad_norm": 2.8542733192443848, - "learning_rate": 1.7812095328014533e-05, - "loss": 0.7469, + "epoch": 0.1041182682154171, + "grad_norm": 3.155498504638672, + "learning_rate": 1.3799578863811383e-05, + "loss": 0.7143, "step": 1479 }, { - "epoch": 0.11198970905376263, - "grad_norm": 2.260713577270508, - "learning_rate": 1.7811637232855356e-05, - "loss": 0.6106, + "epoch": 0.10418866596268919, + "grad_norm": 2.5961904525756836, + "learning_rate": 1.3799199487021687e-05, + "loss": 0.737, "step": 1480 }, { - "epoch": 0.11206537777609625, - "grad_norm": 2.205512046813965, - "learning_rate": 1.7811178585882952e-05, - "loss": 0.8235, + "epoch": 0.10425906370996128, + "grad_norm": 2.1150755882263184, + "learning_rate": 1.379881975673545e-05, + "loss": 0.7873, "step": 1481 }, { - "epoch": 0.11214104649842987, - "grad_norm": 2.5569989681243896, - "learning_rate": 1.781071938712605e-05, - "loss": 0.8225, + "epoch": 0.10432946145723337, + "grad_norm": 2.1122090816497803, + "learning_rate": 1.3798439672972418e-05, + "loss": 0.7582, "step": 1482 }, { - "epoch": 0.1122167152207635, - "grad_norm": 2.4361040592193604, - "learning_rate": 1.7810259636613398e-05, - "loss": 0.8132, + "epoch": 0.10439985920450545, + "grad_norm": 2.2658681869506836, + "learning_rate": 1.3798059235752351e-05, + "loss": 0.7638, "step": 1483 }, { - "epoch": 0.11229238394309712, - "grad_norm": 3.278949737548828, - "learning_rate": 1.7809799334373792e-05, - "loss": 0.909, + "epoch": 0.10447025695177754, + "grad_norm": 2.445270299911499, + "learning_rate": 1.3797678445095027e-05, + "loss": 0.9052, "step": 1484 }, { - "epoch": 0.11236805266543075, - "grad_norm": 3.451547145843506, - "learning_rate": 1.780933848043606e-05, - "loss": 0.8415, + "epoch": 0.10454065469904963, + "grad_norm": 3.452242612838745, + "learning_rate": 1.3797297301020245e-05, + "loss": 0.711, "step": 1485 }, { - "epoch": 0.11244372138776437, - "grad_norm": 2.9717342853546143, - "learning_rate": 1.7808877074829058e-05, - "loss": 0.8156, + "epoch": 0.10461105244632171, + "grad_norm": 5.248964309692383, + "learning_rate": 1.379691580354782e-05, + "loss": 0.8683, "step": 1486 }, { - "epoch": 0.112519390110098, - "grad_norm": 2.911635398864746, - "learning_rate": 1.7808415117581683e-05, - "loss": 0.892, + "epoch": 0.1046814501935938, + "grad_norm": 2.1563470363616943, + "learning_rate": 1.3796533952697584e-05, + "loss": 0.7775, "step": 1487 }, { - "epoch": 0.11259505883243161, - "grad_norm": 2.9125287532806396, - "learning_rate": 1.7807952608722862e-05, - "loss": 0.8326, + "epoch": 0.1047518479408659, + "grad_norm": 2.463484048843384, + "learning_rate": 1.3796151748489396e-05, + "loss": 0.8441, "step": 1488 }, { - "epoch": 0.11267072755476523, - "grad_norm": 2.8065741062164307, - "learning_rate": 1.7807489548281562e-05, - "loss": 0.888, + "epoch": 0.10482224568813799, + "grad_norm": 2.6832313537597656, + "learning_rate": 1.3795769190943122e-05, + "loss": 0.6716, "step": 1489 }, { - "epoch": 0.11274639627709886, - "grad_norm": 2.326284408569336, - "learning_rate": 1.780702593628678e-05, - "loss": 0.6024, + "epoch": 0.10489264343541006, + "grad_norm": 2.9963252544403076, + "learning_rate": 1.3795386280078654e-05, + "loss": 0.7348, "step": 1490 }, { - "epoch": 0.11282206499943248, - "grad_norm": 2.677926778793335, - "learning_rate": 1.7806561772767548e-05, - "loss": 0.7457, + "epoch": 0.10496304118268215, + "grad_norm": 2.4201712608337402, + "learning_rate": 1.3795003015915899e-05, + "loss": 0.6824, "step": 1491 }, { - "epoch": 0.1128977337217661, - "grad_norm": 2.430309534072876, - "learning_rate": 1.7806097057752933e-05, - "loss": 0.7384, + "epoch": 0.10503343892995425, + "grad_norm": 2.5559449195861816, + "learning_rate": 1.3794619398474783e-05, + "loss": 0.798, "step": 1492 }, { - "epoch": 0.11297340244409973, - "grad_norm": 2.58219838142395, - "learning_rate": 1.780563179127204e-05, - "loss": 0.8198, + "epoch": 0.10510383667722632, + "grad_norm": 2.4412336349487305, + "learning_rate": 1.3794235427775252e-05, + "loss": 0.8827, "step": 1493 }, { - "epoch": 0.11304907116643335, - "grad_norm": 2.838965892791748, - "learning_rate": 1.7805165973354e-05, - "loss": 0.9538, + "epoch": 0.10517423442449841, + "grad_norm": 6.1311469078063965, + "learning_rate": 1.3793851103837264e-05, + "loss": 0.8787, "step": 1494 }, { - "epoch": 0.11312473988876698, - "grad_norm": 2.168802499771118, - "learning_rate": 1.780469960402799e-05, - "loss": 0.9857, + "epoch": 0.1052446321717705, + "grad_norm": 2.130945920944214, + "learning_rate": 1.3793466426680807e-05, + "loss": 0.8718, "step": 1495 }, { - "epoch": 0.1132004086111006, - "grad_norm": 3.0226144790649414, - "learning_rate": 1.7804232683323212e-05, - "loss": 0.8795, + "epoch": 0.1053150299190426, + "grad_norm": 2.796344757080078, + "learning_rate": 1.379308139632588e-05, + "loss": 0.7159, "step": 1496 }, { - "epoch": 0.11327607733343423, - "grad_norm": 2.2193140983581543, - "learning_rate": 1.7803765211268907e-05, - "loss": 0.8259, + "epoch": 0.10538542766631467, + "grad_norm": 2.3017640113830566, + "learning_rate": 1.3792696012792496e-05, + "loss": 0.8439, "step": 1497 }, { - "epoch": 0.11335174605576785, - "grad_norm": 2.614348888397217, - "learning_rate": 1.7803297187894352e-05, - "loss": 0.7653, + "epoch": 0.10545582541358677, + "grad_norm": 1.9842846393585205, + "learning_rate": 1.3792310276100697e-05, + "loss": 0.7138, "step": 1498 }, { - "epoch": 0.11342741477810148, - "grad_norm": 2.413205862045288, - "learning_rate": 1.780282861322885e-05, - "loss": 0.8608, + "epoch": 0.10552622316085886, + "grad_norm": 2.2421340942382812, + "learning_rate": 1.3791924186270532e-05, + "loss": 0.7605, "step": 1499 }, { - "epoch": 0.1135030835004351, - "grad_norm": 2.941840648651123, - "learning_rate": 1.780235948730175e-05, - "loss": 0.7904, + "epoch": 0.10559662090813093, + "grad_norm": 2.492703914642334, + "learning_rate": 1.3791537743322079e-05, + "loss": 0.6555, "step": 1500 }, { - "epoch": 0.11357875222276873, - "grad_norm": 2.9199554920196533, - "learning_rate": 1.780188981014243e-05, - "loss": 0.8542, + "epoch": 0.10566701865540302, + "grad_norm": 2.4917867183685303, + "learning_rate": 1.3791150947275428e-05, + "loss": 0.8293, "step": 1501 }, { - "epoch": 0.11365442094510234, - "grad_norm": 2.8440537452697754, - "learning_rate": 1.7801419581780295e-05, - "loss": 0.766, + "epoch": 0.10573741640267512, + "grad_norm": 2.1704835891723633, + "learning_rate": 1.3790763798150688e-05, + "loss": 0.6777, "step": 1502 }, { - "epoch": 0.11373008966743596, - "grad_norm": 2.193862199783325, - "learning_rate": 1.7800948802244805e-05, - "loss": 0.8432, + "epoch": 0.10580781414994721, + "grad_norm": 4.156985282897949, + "learning_rate": 1.379037629596799e-05, + "loss": 0.7612, "step": 1503 }, { - "epoch": 0.11380575838976958, - "grad_norm": 2.660568952560425, - "learning_rate": 1.7800477471565435e-05, - "loss": 0.8334, + "epoch": 0.10587821189721928, + "grad_norm": 2.530759334564209, + "learning_rate": 1.3789988440747477e-05, + "loss": 0.6595, "step": 1504 }, { - "epoch": 0.11388142711210321, - "grad_norm": 2.565652847290039, - "learning_rate": 1.78000055897717e-05, - "loss": 0.7056, + "epoch": 0.10594860964449138, + "grad_norm": 2.3515915870666504, + "learning_rate": 1.3789600232509315e-05, + "loss": 0.7781, "step": 1505 }, { - "epoch": 0.11395709583443683, - "grad_norm": 2.698594808578491, - "learning_rate": 1.7799533156893153e-05, - "loss": 0.8236, + "epoch": 0.10601900739176347, + "grad_norm": 2.176265239715576, + "learning_rate": 1.378921167127369e-05, + "loss": 0.729, "step": 1506 }, { - "epoch": 0.11403276455677046, - "grad_norm": 2.662174940109253, - "learning_rate": 1.779906017295938e-05, - "loss": 0.7694, + "epoch": 0.10608940513903554, + "grad_norm": 2.4852330684661865, + "learning_rate": 1.37888227570608e-05, + "loss": 0.8698, "step": 1507 }, { - "epoch": 0.11410843327910408, - "grad_norm": 2.9940743446350098, - "learning_rate": 1.7798586637999993e-05, - "loss": 1.1411, + "epoch": 0.10615980288630764, + "grad_norm": 2.5232508182525635, + "learning_rate": 1.3788433489890868e-05, + "loss": 0.8116, "step": 1508 }, { - "epoch": 0.1141841020014377, - "grad_norm": 2.8996222019195557, - "learning_rate": 1.7798112552044658e-05, - "loss": 0.715, + "epoch": 0.10623020063357973, + "grad_norm": 2.1038448810577393, + "learning_rate": 1.378804386978413e-05, + "loss": 0.8661, "step": 1509 }, { - "epoch": 0.11425977072377133, - "grad_norm": 2.917023181915283, - "learning_rate": 1.7797637915123058e-05, - "loss": 0.5476, + "epoch": 0.10630059838085182, + "grad_norm": 2.3320393562316895, + "learning_rate": 1.3787653896760842e-05, + "loss": 0.7679, "step": 1510 }, { - "epoch": 0.11433543944610496, - "grad_norm": 2.769496440887451, - "learning_rate": 1.7797162727264917e-05, - "loss": 0.8295, + "epoch": 0.1063709961281239, + "grad_norm": 2.31546950340271, + "learning_rate": 1.3787263570841283e-05, + "loss": 0.6911, "step": 1511 }, { - "epoch": 0.11441110816843858, - "grad_norm": 2.2324085235595703, - "learning_rate": 1.779668698849999e-05, - "loss": 0.7346, + "epoch": 0.10644139387539599, + "grad_norm": 2.821732759475708, + "learning_rate": 1.3786872892045744e-05, + "loss": 0.808, "step": 1512 }, { - "epoch": 0.1144867768907722, - "grad_norm": 3.295725107192993, - "learning_rate": 1.7796210698858077e-05, - "loss": 0.7722, + "epoch": 0.10651179162266808, + "grad_norm": 2.292588233947754, + "learning_rate": 1.3786481860394535e-05, + "loss": 0.8586, "step": 1513 }, { - "epoch": 0.11456244561310583, - "grad_norm": 2.2366225719451904, - "learning_rate": 1.7795733858368992e-05, - "loss": 0.7013, + "epoch": 0.10658218936994016, + "grad_norm": 2.3611974716186523, + "learning_rate": 1.378609047590799e-05, + "loss": 0.7076, "step": 1514 }, { - "epoch": 0.11463811433543944, - "grad_norm": 3.1166555881500244, - "learning_rate": 1.7795256467062612e-05, - "loss": 0.8173, + "epoch": 0.10665258711721225, + "grad_norm": 2.450965404510498, + "learning_rate": 1.3785698738606454e-05, + "loss": 0.8565, "step": 1515 }, { - "epoch": 0.11471378305777306, - "grad_norm": 2.2865703105926514, - "learning_rate": 1.779477852496882e-05, - "loss": 0.745, + "epoch": 0.10672298486448434, + "grad_norm": 2.2162728309631348, + "learning_rate": 1.3785306648510295e-05, + "loss": 0.718, "step": 1516 }, { - "epoch": 0.11478945178010669, - "grad_norm": 2.9082911014556885, - "learning_rate": 1.779430003211755e-05, - "loss": 0.9206, + "epoch": 0.10679338261175643, + "grad_norm": 1.9335877895355225, + "learning_rate": 1.37849142056399e-05, + "loss": 0.8567, "step": 1517 }, { - "epoch": 0.11486512050244031, - "grad_norm": 2.4655425548553467, - "learning_rate": 1.779382098853877e-05, - "loss": 0.7449, + "epoch": 0.1068637803590285, + "grad_norm": 2.0152080059051514, + "learning_rate": 1.3784521410015669e-05, + "loss": 0.6725, "step": 1518 }, { - "epoch": 0.11494078922477394, - "grad_norm": 3.6643152236938477, - "learning_rate": 1.7793341394262476e-05, - "loss": 0.8769, + "epoch": 0.1069341781063006, + "grad_norm": 2.3049638271331787, + "learning_rate": 1.3784128261658024e-05, + "loss": 0.821, "step": 1519 }, { - "epoch": 0.11501645794710756, - "grad_norm": 2.4818716049194336, - "learning_rate": 1.7792861249318704e-05, - "loss": 0.7565, + "epoch": 0.10700457585357269, + "grad_norm": 2.1999385356903076, + "learning_rate": 1.3783734760587409e-05, + "loss": 0.7963, "step": 1520 }, { - "epoch": 0.11509212666944119, - "grad_norm": 2.0801405906677246, - "learning_rate": 1.779238055373752e-05, - "loss": 0.5753, + "epoch": 0.10707497360084477, + "grad_norm": 2.3560354709625244, + "learning_rate": 1.378334090682428e-05, + "loss": 0.8099, "step": 1521 }, { - "epoch": 0.11516779539177481, - "grad_norm": 3.2640860080718994, - "learning_rate": 1.779189930754903e-05, - "loss": 0.6951, + "epoch": 0.10714537134811686, + "grad_norm": 2.474320650100708, + "learning_rate": 1.3782946700389112e-05, + "loss": 0.7399, "step": 1522 }, { - "epoch": 0.11524346411410843, - "grad_norm": 3.1288063526153564, - "learning_rate": 1.7791417510783368e-05, - "loss": 0.8465, + "epoch": 0.10721576909538895, + "grad_norm": 2.6907835006713867, + "learning_rate": 1.3782552141302403e-05, + "loss": 0.845, "step": 1523 }, { - "epoch": 0.11531913283644206, - "grad_norm": 3.1117849349975586, - "learning_rate": 1.7790935163470706e-05, - "loss": 0.685, + "epoch": 0.10728616684266104, + "grad_norm": 2.3242878913879395, + "learning_rate": 1.3782157229584666e-05, + "loss": 0.8766, "step": 1524 }, { - "epoch": 0.11539480155877568, - "grad_norm": 2.816326379776001, - "learning_rate": 1.779045226564125e-05, - "loss": 0.7518, + "epoch": 0.10735656458993312, + "grad_norm": 2.5410642623901367, + "learning_rate": 1.3781761965256433e-05, + "loss": 0.8534, "step": 1525 }, { - "epoch": 0.11547047028110931, - "grad_norm": 3.014407157897949, - "learning_rate": 1.7789968817325242e-05, - "loss": 0.8803, + "epoch": 0.10742696233720521, + "grad_norm": 2.1979005336761475, + "learning_rate": 1.3781366348338253e-05, + "loss": 0.7058, "step": 1526 }, { - "epoch": 0.11554613900344293, - "grad_norm": 3.064116954803467, - "learning_rate": 1.7789484818552954e-05, - "loss": 0.7059, + "epoch": 0.1074973600844773, + "grad_norm": 2.142456531524658, + "learning_rate": 1.3780970378850695e-05, + "loss": 0.6504, "step": 1527 }, { - "epoch": 0.11562180772577656, - "grad_norm": 2.1914854049682617, - "learning_rate": 1.77890002693547e-05, - "loss": 0.7042, + "epoch": 0.10756775783174938, + "grad_norm": 2.778135299682617, + "learning_rate": 1.3780574056814344e-05, + "loss": 0.8209, "step": 1528 }, { - "epoch": 0.11569747644811017, - "grad_norm": 3.057530403137207, - "learning_rate": 1.7788515169760824e-05, - "loss": 0.876, + "epoch": 0.10763815557902147, + "grad_norm": 2.399174928665161, + "learning_rate": 1.378017738224981e-05, + "loss": 0.8494, "step": 1529 }, { - "epoch": 0.11577314517044379, - "grad_norm": 2.713554859161377, - "learning_rate": 1.7788029519801703e-05, - "loss": 0.8374, + "epoch": 0.10770855332629356, + "grad_norm": 2.568596839904785, + "learning_rate": 1.3779780355177712e-05, + "loss": 0.7679, "step": 1530 }, { - "epoch": 0.11584881389277742, - "grad_norm": 2.849468231201172, - "learning_rate": 1.7787543319507743e-05, - "loss": 0.924, + "epoch": 0.10777895107356565, + "grad_norm": 2.2742397785186768, + "learning_rate": 1.3779382975618693e-05, + "loss": 0.7192, "step": 1531 }, { - "epoch": 0.11592448261511104, - "grad_norm": 3.1437432765960693, - "learning_rate": 1.7787056568909405e-05, - "loss": 0.8471, + "epoch": 0.10784934882083773, + "grad_norm": 2.569723129272461, + "learning_rate": 1.3778985243593413e-05, + "loss": 0.7805, "step": 1532 }, { - "epoch": 0.11600015133744467, - "grad_norm": 2.3561949729919434, - "learning_rate": 1.778656926803716e-05, - "loss": 0.8902, + "epoch": 0.10791974656810982, + "grad_norm": 2.9943783283233643, + "learning_rate": 1.377858715912255e-05, + "loss": 0.7614, "step": 1533 }, { - "epoch": 0.11607582005977829, - "grad_norm": 1.9011698961257935, - "learning_rate": 1.778608141692153e-05, - "loss": 0.8698, + "epoch": 0.10799014431538191, + "grad_norm": 1.8934288024902344, + "learning_rate": 1.3778188722226803e-05, + "loss": 0.7107, "step": 1534 }, { - "epoch": 0.11615148878211191, - "grad_norm": 3.898846387863159, - "learning_rate": 1.7785593015593066e-05, - "loss": 0.7568, + "epoch": 0.10806054206265399, + "grad_norm": 1.9892622232437134, + "learning_rate": 1.3777789932926884e-05, + "loss": 0.8463, "step": 1535 }, { - "epoch": 0.11622715750444554, - "grad_norm": 3.060079574584961, - "learning_rate": 1.7785104064082347e-05, - "loss": 0.863, + "epoch": 0.10813093980992608, + "grad_norm": 2.344217300415039, + "learning_rate": 1.3777390791243531e-05, + "loss": 0.7441, "step": 1536 }, { - "epoch": 0.11630282622677916, - "grad_norm": 3.5187714099884033, - "learning_rate": 1.7784614562419998e-05, - "loss": 0.8006, + "epoch": 0.10820133755719817, + "grad_norm": 2.1213929653167725, + "learning_rate": 1.3776991297197492e-05, + "loss": 0.7516, "step": 1537 }, { - "epoch": 0.11637849494911279, - "grad_norm": 2.9314115047454834, - "learning_rate": 1.7784124510636672e-05, - "loss": 0.9548, + "epoch": 0.10827173530447026, + "grad_norm": 2.3346328735351562, + "learning_rate": 1.3776591450809535e-05, + "loss": 0.795, "step": 1538 }, { - "epoch": 0.11645416367144641, - "grad_norm": 2.3972244262695312, - "learning_rate": 1.7783633908763062e-05, - "loss": 0.6688, + "epoch": 0.10834213305174234, + "grad_norm": 2.819348096847534, + "learning_rate": 1.3776191252100455e-05, + "loss": 0.7661, "step": 1539 }, { - "epoch": 0.11652983239378004, - "grad_norm": 2.985501766204834, - "learning_rate": 1.7783142756829882e-05, - "loss": 0.7211, + "epoch": 0.10841253079901443, + "grad_norm": 2.0938241481781006, + "learning_rate": 1.3775790701091053e-05, + "loss": 0.6065, "step": 1540 }, { - "epoch": 0.11660550111611366, - "grad_norm": 2.532233476638794, - "learning_rate": 1.7782651054867895e-05, - "loss": 0.8695, + "epoch": 0.10848292854628652, + "grad_norm": 2.23346209526062, + "learning_rate": 1.3775389797802154e-05, + "loss": 0.7844, "step": 1541 }, { - "epoch": 0.11668116983844727, - "grad_norm": 3.1398353576660156, - "learning_rate": 1.7782158802907893e-05, - "loss": 0.796, + "epoch": 0.1085533262935586, + "grad_norm": 2.686587333679199, + "learning_rate": 1.3774988542254607e-05, + "loss": 0.8275, "step": 1542 }, { - "epoch": 0.1167568385607809, - "grad_norm": 3.156766414642334, - "learning_rate": 1.7781666000980705e-05, - "loss": 0.8581, + "epoch": 0.10862372404083069, + "grad_norm": 2.984802484512329, + "learning_rate": 1.3774586934469269e-05, + "loss": 0.7932, "step": 1543 }, { - "epoch": 0.11683250728311452, - "grad_norm": 3.025268793106079, - "learning_rate": 1.7781172649117186e-05, - "loss": 0.7749, + "epoch": 0.10869412178810278, + "grad_norm": 2.589205503463745, + "learning_rate": 1.377418497446702e-05, + "loss": 0.8327, "step": 1544 }, { - "epoch": 0.11690817600544814, - "grad_norm": 3.071802854537964, - "learning_rate": 1.7780678747348236e-05, - "loss": 0.7598, + "epoch": 0.10876451953537487, + "grad_norm": 2.5902462005615234, + "learning_rate": 1.3773782662268759e-05, + "loss": 0.6885, "step": 1545 }, { - "epoch": 0.11698384472778177, - "grad_norm": 4.598686218261719, - "learning_rate": 1.7780184295704778e-05, - "loss": 0.8049, + "epoch": 0.10883491728264695, + "grad_norm": 2.215656042098999, + "learning_rate": 1.3773379997895402e-05, + "loss": 0.7258, "step": 1546 }, { - "epoch": 0.1170595134501154, - "grad_norm": 3.0059025287628174, - "learning_rate": 1.7779689294217784e-05, - "loss": 0.7546, + "epoch": 0.10890531502991904, + "grad_norm": 2.6093668937683105, + "learning_rate": 1.3772976981367885e-05, + "loss": 0.7529, "step": 1547 }, { - "epoch": 0.11713518217244902, - "grad_norm": 2.655482292175293, - "learning_rate": 1.777919374291825e-05, - "loss": 0.941, + "epoch": 0.10897571277719113, + "grad_norm": 2.5215015411376953, + "learning_rate": 1.3772573612707161e-05, + "loss": 0.8452, "step": 1548 }, { - "epoch": 0.11721085089478264, - "grad_norm": 2.7230942249298096, - "learning_rate": 1.7778697641837208e-05, - "loss": 0.8749, + "epoch": 0.10904611052446321, + "grad_norm": 2.0720107555389404, + "learning_rate": 1.3772169891934201e-05, + "loss": 0.7325, "step": 1549 }, { - "epoch": 0.11728651961711627, - "grad_norm": 3.8638458251953125, - "learning_rate": 1.7778200991005724e-05, - "loss": 0.7645, + "epoch": 0.1091165082717353, + "grad_norm": 2.2003824710845947, + "learning_rate": 1.3771765819069998e-05, + "loss": 0.8077, "step": 1550 }, { - "epoch": 0.11736218833944989, - "grad_norm": 2.8020644187927246, - "learning_rate": 1.7777703790454906e-05, - "loss": 0.6915, + "epoch": 0.1091869060190074, + "grad_norm": 1.852858543395996, + "learning_rate": 1.3771361394135554e-05, + "loss": 0.9487, "step": 1551 }, { - "epoch": 0.11743785706178352, - "grad_norm": 4.486051559448242, - "learning_rate": 1.777720604021588e-05, - "loss": 0.7654, + "epoch": 0.10925730376627948, + "grad_norm": 2.7708635330200195, + "learning_rate": 1.3770956617151899e-05, + "loss": 0.7743, "step": 1552 }, { - "epoch": 0.11751352578411714, - "grad_norm": 3.036688804626465, - "learning_rate": 1.7776707740319828e-05, - "loss": 0.9693, + "epoch": 0.10932770151355156, + "grad_norm": 2.2521536350250244, + "learning_rate": 1.3770551488140079e-05, + "loss": 0.8309, "step": 1553 }, { - "epoch": 0.11758919450645076, - "grad_norm": 2.724858045578003, - "learning_rate": 1.7776208890797947e-05, - "loss": 0.6755, + "epoch": 0.10939809926082365, + "grad_norm": 2.516474962234497, + "learning_rate": 1.3770146007121153e-05, + "loss": 0.7085, "step": 1554 }, { - "epoch": 0.11766486322878439, - "grad_norm": 3.0751144886016846, - "learning_rate": 1.7775709491681482e-05, - "loss": 0.9963, + "epoch": 0.10946849700809574, + "grad_norm": 2.722050428390503, + "learning_rate": 1.3769740174116205e-05, + "loss": 0.7666, "step": 1555 }, { - "epoch": 0.117740531951118, - "grad_norm": 2.686180591583252, - "learning_rate": 1.7775209543001703e-05, - "loss": 0.8259, + "epoch": 0.10953889475536782, + "grad_norm": 2.39557147026062, + "learning_rate": 1.3769333989146335e-05, + "loss": 0.9074, "step": 1556 }, { - "epoch": 0.11781620067345162, - "grad_norm": 2.430630683898926, - "learning_rate": 1.777470904478992e-05, - "loss": 0.8329, + "epoch": 0.10960929250263991, + "grad_norm": 2.4584810733795166, + "learning_rate": 1.3768927452232663e-05, + "loss": 0.7519, "step": 1557 }, { - "epoch": 0.11789186939578525, - "grad_norm": 2.6584362983703613, - "learning_rate": 1.7774207997077477e-05, - "loss": 0.8525, + "epoch": 0.109679690249912, + "grad_norm": 1.8734687566757202, + "learning_rate": 1.3768520563396318e-05, + "loss": 0.7593, "step": 1558 }, { - "epoch": 0.11796753811811887, - "grad_norm": 2.8905189037323, - "learning_rate": 1.777370639989575e-05, - "loss": 0.8348, + "epoch": 0.1097500879971841, + "grad_norm": 2.6431589126586914, + "learning_rate": 1.376811332265846e-05, + "loss": 0.8385, "step": 1559 }, { - "epoch": 0.1180432068404525, - "grad_norm": 2.841679334640503, - "learning_rate": 1.777320425327615e-05, - "loss": 0.8827, + "epoch": 0.10982048574445617, + "grad_norm": 2.065969705581665, + "learning_rate": 1.3767705730040263e-05, + "loss": 0.6954, "step": 1560 }, { - "epoch": 0.11811887556278612, - "grad_norm": 2.7715628147125244, - "learning_rate": 1.777270155725012e-05, - "loss": 0.8298, + "epoch": 0.10989088349172826, + "grad_norm": 2.441608190536499, + "learning_rate": 1.3767297785562914e-05, + "loss": 0.6235, "step": 1561 }, { - "epoch": 0.11819454428511975, - "grad_norm": 3.1917660236358643, - "learning_rate": 1.777219831184915e-05, - "loss": 0.8082, + "epoch": 0.10996128123900036, + "grad_norm": 2.069265842437744, + "learning_rate": 1.3766889489247625e-05, + "loss": 0.8317, "step": 1562 }, { - "epoch": 0.11827021300745337, - "grad_norm": 3.6017658710479736, - "learning_rate": 1.7771694517104746e-05, - "loss": 0.7245, + "epoch": 0.11003167898627243, + "grad_norm": 2.5596110820770264, + "learning_rate": 1.3766480841115622e-05, + "loss": 0.7313, "step": 1563 }, { - "epoch": 0.118345881729787, - "grad_norm": 3.7225780487060547, - "learning_rate": 1.777119017304846e-05, - "loss": 0.7443, + "epoch": 0.11010207673354452, + "grad_norm": 2.2286272048950195, + "learning_rate": 1.3766071841188153e-05, + "loss": 0.8558, "step": 1564 }, { - "epoch": 0.11842155045212062, - "grad_norm": 3.468682289123535, - "learning_rate": 1.7770685279711877e-05, - "loss": 0.7181, + "epoch": 0.11017247448081662, + "grad_norm": 2.110929012298584, + "learning_rate": 1.3765662489486481e-05, + "loss": 0.7445, "step": 1565 }, { - "epoch": 0.11849721917445424, - "grad_norm": 3.647542715072632, - "learning_rate": 1.7770179837126613e-05, - "loss": 0.7155, + "epoch": 0.1102428722280887, + "grad_norm": 2.2707436084747314, + "learning_rate": 1.376525278603189e-05, + "loss": 0.8032, "step": 1566 }, { - "epoch": 0.11857288789678787, - "grad_norm": 3.232402801513672, - "learning_rate": 1.7769673845324322e-05, - "loss": 0.7418, + "epoch": 0.11031326997536078, + "grad_norm": 2.361915349960327, + "learning_rate": 1.3764842730845677e-05, + "loss": 0.7057, "step": 1567 }, { - "epoch": 0.11864855661912149, - "grad_norm": 2.8265175819396973, - "learning_rate": 1.776916730433669e-05, - "loss": 0.7867, + "epoch": 0.11038366772263287, + "grad_norm": 2.2197439670562744, + "learning_rate": 1.3764432323949166e-05, + "loss": 0.751, "step": 1568 }, { - "epoch": 0.1187242253414551, - "grad_norm": 2.74609637260437, - "learning_rate": 1.7768660214195437e-05, - "loss": 0.7622, + "epoch": 0.11045406546990497, + "grad_norm": 2.6382789611816406, + "learning_rate": 1.376402156536369e-05, + "loss": 0.7405, "step": 1569 }, { - "epoch": 0.11879989406378873, - "grad_norm": 2.700554609298706, - "learning_rate": 1.7768152574932323e-05, - "loss": 0.9818, + "epoch": 0.11052446321717704, + "grad_norm": 2.2375335693359375, + "learning_rate": 1.376361045511061e-05, + "loss": 0.751, "step": 1570 }, { - "epoch": 0.11887556278612235, - "grad_norm": 2.617316722869873, - "learning_rate": 1.776764438657913e-05, - "loss": 0.8502, + "epoch": 0.11059486096444913, + "grad_norm": 2.525139093399048, + "learning_rate": 1.3763198993211295e-05, + "loss": 0.7353, "step": 1571 }, { - "epoch": 0.11895123150845598, - "grad_norm": 2.603131055831909, - "learning_rate": 1.7767135649167694e-05, - "loss": 0.7249, + "epoch": 0.11066525871172123, + "grad_norm": 2.719688892364502, + "learning_rate": 1.376278717968714e-05, + "loss": 0.7547, "step": 1572 }, { - "epoch": 0.1190269002307896, - "grad_norm": 2.8606648445129395, - "learning_rate": 1.7766626362729864e-05, - "loss": 0.7766, + "epoch": 0.11073565645899332, + "grad_norm": 3.9968318939208984, + "learning_rate": 1.3762375014559555e-05, + "loss": 0.813, "step": 1573 }, { - "epoch": 0.11910256895312323, - "grad_norm": 3.8220481872558594, - "learning_rate": 1.776611652729754e-05, - "loss": 0.7473, + "epoch": 0.1108060542062654, + "grad_norm": 1.9999995231628418, + "learning_rate": 1.3761962497849967e-05, + "loss": 0.7629, "step": 1574 }, { - "epoch": 0.11917823767545685, - "grad_norm": 2.6276204586029053, - "learning_rate": 1.7765606142902642e-05, - "loss": 0.8983, + "epoch": 0.11087645195353749, + "grad_norm": 2.2618777751922607, + "learning_rate": 1.3761549629579826e-05, + "loss": 0.8747, "step": 1575 }, { - "epoch": 0.11925390639779047, - "grad_norm": 1.8426728248596191, - "learning_rate": 1.7765095209577137e-05, - "loss": 1.0027, + "epoch": 0.11094684970080958, + "grad_norm": 2.2160682678222656, + "learning_rate": 1.3761136409770596e-05, + "loss": 0.7841, "step": 1576 }, { - "epoch": 0.1193295751201241, - "grad_norm": 3.0207700729370117, - "learning_rate": 1.776458372735302e-05, - "loss": 0.7059, + "epoch": 0.11101724744808165, + "grad_norm": 2.665341854095459, + "learning_rate": 1.376072283844376e-05, + "loss": 0.8087, "step": 1577 }, { - "epoch": 0.11940524384245772, - "grad_norm": 2.4392471313476562, - "learning_rate": 1.776407169626232e-05, - "loss": 0.6909, + "epoch": 0.11108764519535375, + "grad_norm": 2.1624488830566406, + "learning_rate": 1.3760308915620823e-05, + "loss": 0.7127, "step": 1578 }, { - "epoch": 0.11948091256479135, - "grad_norm": 3.044403076171875, - "learning_rate": 1.7763559116337107e-05, - "loss": 0.7622, + "epoch": 0.11115804294262584, + "grad_norm": 2.41340970993042, + "learning_rate": 1.37598946413233e-05, + "loss": 0.7059, "step": 1579 }, { - "epoch": 0.11955658128712497, - "grad_norm": 2.991702079772949, - "learning_rate": 1.776304598760948e-05, - "loss": 0.7587, + "epoch": 0.11122844068989793, + "grad_norm": 2.4131994247436523, + "learning_rate": 1.3759480015572736e-05, + "loss": 0.7686, "step": 1580 }, { - "epoch": 0.1196322500094586, - "grad_norm": 2.4249448776245117, - "learning_rate": 1.7762532310111565e-05, - "loss": 0.6753, + "epoch": 0.11129883843717, + "grad_norm": 2.199885129928589, + "learning_rate": 1.3759065038390682e-05, + "loss": 0.934, "step": 1581 }, { - "epoch": 0.11970791873179222, - "grad_norm": 2.8478915691375732, - "learning_rate": 1.7762018083875536e-05, - "loss": 0.6938, + "epoch": 0.1113692361844421, + "grad_norm": 2.820788621902466, + "learning_rate": 1.3758649709798714e-05, + "loss": 0.8383, "step": 1582 }, { - "epoch": 0.11978358745412583, - "grad_norm": 2.7412543296813965, - "learning_rate": 1.7761503308933594e-05, - "loss": 0.6663, + "epoch": 0.11143963393171419, + "grad_norm": 2.6545708179473877, + "learning_rate": 1.3758234029818429e-05, + "loss": 0.7904, "step": 1583 }, { - "epoch": 0.11985925617645946, - "grad_norm": 2.393305540084839, - "learning_rate": 1.776098798531798e-05, - "loss": 0.9015, + "epoch": 0.11151003167898627, + "grad_norm": 2.1028177738189697, + "learning_rate": 1.3757817998471434e-05, + "loss": 0.7631, "step": 1584 }, { - "epoch": 0.11993492489879308, - "grad_norm": 3.5225398540496826, - "learning_rate": 1.776047211306096e-05, - "loss": 0.7928, + "epoch": 0.11158042942625836, + "grad_norm": 2.7426469326019287, + "learning_rate": 1.3757401615779364e-05, + "loss": 0.8399, "step": 1585 }, { - "epoch": 0.1200105936211267, - "grad_norm": 2.1863973140716553, - "learning_rate": 1.7759955692194843e-05, - "loss": 0.8247, + "epoch": 0.11165082717353045, + "grad_norm": 2.337364435195923, + "learning_rate": 1.3756984881763861e-05, + "loss": 0.692, "step": 1586 }, { - "epoch": 0.12008626234346033, - "grad_norm": 2.749263048171997, - "learning_rate": 1.7759438722751962e-05, - "loss": 0.7481, + "epoch": 0.11172122492080254, + "grad_norm": 3.7064905166625977, + "learning_rate": 1.3756567796446597e-05, + "loss": 0.7916, "step": 1587 }, { - "epoch": 0.12016193106579395, - "grad_norm": 2.8146281242370605, - "learning_rate": 1.7758921204764704e-05, - "loss": 0.8196, + "epoch": 0.11179162266807462, + "grad_norm": 2.885540246963501, + "learning_rate": 1.3756150359849253e-05, + "loss": 0.7648, "step": 1588 }, { - "epoch": 0.12023759978812758, - "grad_norm": 2.485360622406006, - "learning_rate": 1.7758403138265465e-05, - "loss": 0.7325, + "epoch": 0.11186202041534671, + "grad_norm": 2.3998348712921143, + "learning_rate": 1.3755732571993531e-05, + "loss": 0.726, "step": 1589 }, { - "epoch": 0.1203132685104612, - "grad_norm": 2.7749016284942627, - "learning_rate": 1.7757884523286697e-05, - "loss": 0.9098, + "epoch": 0.1119324181626188, + "grad_norm": 2.584264039993286, + "learning_rate": 1.3755314432901158e-05, + "loss": 0.7897, "step": 1590 }, { - "epoch": 0.12038893723279483, - "grad_norm": 2.742647647857666, - "learning_rate": 1.775736535986087e-05, - "loss": 0.8401, + "epoch": 0.11200281590989089, + "grad_norm": 2.650606632232666, + "learning_rate": 1.3754895942593868e-05, + "loss": 0.6567, "step": 1591 }, { - "epoch": 0.12046460595512845, - "grad_norm": 2.826692581176758, - "learning_rate": 1.7756845648020502e-05, - "loss": 0.8449, + "epoch": 0.11207321365716297, + "grad_norm": 2.9833645820617676, + "learning_rate": 1.3754477101093422e-05, + "loss": 0.7043, "step": 1592 }, { - "epoch": 0.12054027467746208, - "grad_norm": 3.2634994983673096, - "learning_rate": 1.7756325387798138e-05, - "loss": 0.7922, + "epoch": 0.11214361140443506, + "grad_norm": 2.1243815422058105, + "learning_rate": 1.375405790842159e-05, + "loss": 0.7933, "step": 1593 }, { - "epoch": 0.1206159433997957, - "grad_norm": 2.3694705963134766, - "learning_rate": 1.7755804579226352e-05, - "loss": 0.8471, + "epoch": 0.11221400915170715, + "grad_norm": 2.1083154678344727, + "learning_rate": 1.3753638364600176e-05, + "loss": 0.6333, "step": 1594 }, { - "epoch": 0.12069161212212932, - "grad_norm": 2.5997414588928223, - "learning_rate": 1.775528322233777e-05, - "loss": 0.7086, + "epoch": 0.11228440689897923, + "grad_norm": 2.7171194553375244, + "learning_rate": 1.3753218469650982e-05, + "loss": 0.653, "step": 1595 }, { - "epoch": 0.12076728084446293, - "grad_norm": 2.655085802078247, - "learning_rate": 1.775476131716503e-05, - "loss": 0.746, + "epoch": 0.11235480464625132, + "grad_norm": 2.327781915664673, + "learning_rate": 1.3752798223595845e-05, + "loss": 0.7474, "step": 1596 }, { - "epoch": 0.12084294956679656, - "grad_norm": 2.6947407722473145, - "learning_rate": 1.7754238863740822e-05, - "loss": 0.8031, + "epoch": 0.11242520239352341, + "grad_norm": 2.116194725036621, + "learning_rate": 1.3752377626456616e-05, + "loss": 0.7419, "step": 1597 }, { - "epoch": 0.12091861828913018, - "grad_norm": 3.005265951156616, - "learning_rate": 1.775371586209786e-05, - "loss": 0.7431, + "epoch": 0.1124956001407955, + "grad_norm": 2.2473554611206055, + "learning_rate": 1.3751956678255156e-05, + "loss": 0.7368, "step": 1598 }, { - "epoch": 0.12099428701146381, - "grad_norm": 2.7612268924713135, - "learning_rate": 1.7753192312268897e-05, - "loss": 0.8103, + "epoch": 0.11256599788806758, + "grad_norm": 2.37587308883667, + "learning_rate": 1.3751535379013354e-05, + "loss": 0.7844, "step": 1599 }, { - "epoch": 0.12106995573379743, - "grad_norm": 2.702098846435547, - "learning_rate": 1.775266821428672e-05, - "loss": 0.7749, + "epoch": 0.11263639563533967, + "grad_norm": 2.624990463256836, + "learning_rate": 1.3751113728753112e-05, + "loss": 0.9343, "step": 1600 }, { - "epoch": 0.12114562445613106, - "grad_norm": 3.0759353637695312, - "learning_rate": 1.7752143568184155e-05, - "loss": 0.7911, + "epoch": 0.11270679338261176, + "grad_norm": 2.6170895099639893, + "learning_rate": 1.3750691727496353e-05, + "loss": 0.7902, "step": 1601 }, { - "epoch": 0.12122129317846468, - "grad_norm": 2.7693514823913574, - "learning_rate": 1.7751618373994046e-05, - "loss": 0.7506, + "epoch": 0.11277719112988384, + "grad_norm": 2.4804720878601074, + "learning_rate": 1.3750269375265021e-05, + "loss": 0.7791, "step": 1602 }, { - "epoch": 0.1212969619007983, - "grad_norm": 2.7553870677948, - "learning_rate": 1.775109263174929e-05, - "loss": 0.6945, + "epoch": 0.11284758887715593, + "grad_norm": 2.3533356189727783, + "learning_rate": 1.3749846672081068e-05, + "loss": 0.7054, "step": 1603 }, { - "epoch": 0.12137263062313193, - "grad_norm": 3.5849199295043945, - "learning_rate": 1.7750566341482813e-05, - "loss": 0.9296, + "epoch": 0.11291798662442802, + "grad_norm": 2.214348316192627, + "learning_rate": 1.3749423617966472e-05, + "loss": 0.668, "step": 1604 }, { - "epoch": 0.12144829934546555, - "grad_norm": 2.122450828552246, - "learning_rate": 1.7750039503227564e-05, - "loss": 0.8155, + "epoch": 0.11298838437170011, + "grad_norm": 3.3927958011627197, + "learning_rate": 1.374900021294323e-05, + "loss": 0.7077, "step": 1605 }, { - "epoch": 0.12152396806779918, - "grad_norm": 2.7267143726348877, - "learning_rate": 1.774951211701654e-05, - "loss": 0.8056, + "epoch": 0.11305878211897219, + "grad_norm": 2.6119022369384766, + "learning_rate": 1.3748576457033356e-05, + "loss": 0.78, "step": 1606 }, { - "epoch": 0.1215996367901328, - "grad_norm": 2.3027350902557373, - "learning_rate": 1.774898418288277e-05, - "loss": 0.923, + "epoch": 0.11312917986624428, + "grad_norm": 2.0686051845550537, + "learning_rate": 1.3748152350258881e-05, + "loss": 0.9245, "step": 1607 }, { - "epoch": 0.12167530551246643, - "grad_norm": 2.8576319217681885, - "learning_rate": 1.774845570085931e-05, - "loss": 0.7195, + "epoch": 0.11319957761351637, + "grad_norm": 1.9094802141189575, + "learning_rate": 1.3747727892641852e-05, + "loss": 0.824, "step": 1608 }, { - "epoch": 0.12175097423480005, - "grad_norm": 2.155660390853882, - "learning_rate": 1.7747926670979264e-05, - "loss": 0.6233, + "epoch": 0.11326997536078845, + "grad_norm": 2.0125341415405273, + "learning_rate": 1.374730308420434e-05, + "loss": 0.7698, "step": 1609 }, { - "epoch": 0.12182664295713366, - "grad_norm": 6.00687837600708, - "learning_rate": 1.774739709327575e-05, - "loss": 0.8258, + "epoch": 0.11334037310806054, + "grad_norm": 2.095142126083374, + "learning_rate": 1.374687792496843e-05, + "loss": 0.8276, "step": 1610 }, { - "epoch": 0.12190231167946729, - "grad_norm": 2.7313666343688965, - "learning_rate": 1.774686696778194e-05, - "loss": 0.7564, + "epoch": 0.11341077085533263, + "grad_norm": 1.9614841938018799, + "learning_rate": 1.3746452414956225e-05, + "loss": 0.7378, "step": 1611 }, { - "epoch": 0.12197798040180091, - "grad_norm": 2.975884199142456, - "learning_rate": 1.774633629453103e-05, - "loss": 0.8001, + "epoch": 0.11348116860260472, + "grad_norm": 2.2347846031188965, + "learning_rate": 1.3746026554189849e-05, + "loss": 0.7507, "step": 1612 }, { - "epoch": 0.12205364912413454, - "grad_norm": 2.4056174755096436, - "learning_rate": 1.7745805073556252e-05, - "loss": 0.6628, + "epoch": 0.1135515663498768, + "grad_norm": 3.453697681427002, + "learning_rate": 1.3745600342691444e-05, + "loss": 0.7648, "step": 1613 }, { - "epoch": 0.12212931784646816, - "grad_norm": 2.666964292526245, - "learning_rate": 1.7745273304890872e-05, - "loss": 0.7826, + "epoch": 0.11362196409714889, + "grad_norm": 2.3130042552948, + "learning_rate": 1.3745173780483166e-05, + "loss": 0.7792, "step": 1614 }, { - "epoch": 0.12220498656880179, - "grad_norm": 2.7258975505828857, - "learning_rate": 1.7744740988568195e-05, - "loss": 0.6598, + "epoch": 0.11369236184442098, + "grad_norm": 2.4547555446624756, + "learning_rate": 1.3744746867587196e-05, + "loss": 0.6616, "step": 1615 }, { - "epoch": 0.12228065529113541, - "grad_norm": 3.982149600982666, - "learning_rate": 1.774420812462155e-05, - "loss": 0.8036, + "epoch": 0.11376275959169306, + "grad_norm": 2.9933316707611084, + "learning_rate": 1.3744319604025727e-05, + "loss": 0.8676, "step": 1616 }, { - "epoch": 0.12235632401346903, - "grad_norm": 2.672240734100342, - "learning_rate": 1.7743674713084312e-05, - "loss": 0.7409, + "epoch": 0.11383315733896515, + "grad_norm": 2.3520171642303467, + "learning_rate": 1.3743891989820975e-05, + "loss": 0.7445, "step": 1617 }, { - "epoch": 0.12243199273580266, - "grad_norm": 2.278903007507324, - "learning_rate": 1.774314075398988e-05, - "loss": 0.7339, + "epoch": 0.11390355508623724, + "grad_norm": 2.0795040130615234, + "learning_rate": 1.3743464024995171e-05, + "loss": 0.8499, "step": 1618 }, { - "epoch": 0.12250766145813628, - "grad_norm": 3.2077767848968506, - "learning_rate": 1.7742606247371698e-05, - "loss": 0.859, + "epoch": 0.11397395283350933, + "grad_norm": 1.9858736991882324, + "learning_rate": 1.3743035709570565e-05, + "loss": 0.7188, "step": 1619 }, { - "epoch": 0.12258333018046991, - "grad_norm": 2.292569398880005, - "learning_rate": 1.7742071193263233e-05, - "loss": 0.73, + "epoch": 0.11404435058078141, + "grad_norm": 2.459197998046875, + "learning_rate": 1.3742607043569425e-05, + "loss": 0.708, "step": 1620 }, { - "epoch": 0.12265899890280353, - "grad_norm": 2.9495203495025635, - "learning_rate": 1.7741535591697998e-05, - "loss": 0.8434, + "epoch": 0.1141147483280535, + "grad_norm": 2.6868722438812256, + "learning_rate": 1.3742178027014039e-05, + "loss": 0.8647, "step": 1621 }, { - "epoch": 0.12273466762513716, - "grad_norm": 2.9479892253875732, - "learning_rate": 1.7740999442709528e-05, - "loss": 0.7948, + "epoch": 0.1141851460753256, + "grad_norm": 2.1901774406433105, + "learning_rate": 1.374174865992671e-05, + "loss": 0.8568, "step": 1622 }, { - "epoch": 0.12281033634747077, - "grad_norm": 2.773385763168335, - "learning_rate": 1.7740462746331402e-05, - "loss": 0.8904, + "epoch": 0.11425554382259767, + "grad_norm": 5.0182037353515625, + "learning_rate": 1.3741318942329764e-05, + "loss": 0.8051, "step": 1623 }, { - "epoch": 0.12288600506980439, - "grad_norm": 3.8508312702178955, - "learning_rate": 1.773992550259723e-05, - "loss": 0.7778, + "epoch": 0.11432594156986976, + "grad_norm": 2.520148515701294, + "learning_rate": 1.374088887424554e-05, + "loss": 0.7025, "step": 1624 }, { - "epoch": 0.12296167379213802, - "grad_norm": 3.088562488555908, - "learning_rate": 1.7739387711540655e-05, - "loss": 0.7213, + "epoch": 0.11439633931714185, + "grad_norm": 1.9115002155303955, + "learning_rate": 1.3740458455696399e-05, + "loss": 0.7787, "step": 1625 }, { - "epoch": 0.12303734251447164, - "grad_norm": 2.9575798511505127, - "learning_rate": 1.7738849373195352e-05, - "loss": 0.8504, + "epoch": 0.11446673706441395, + "grad_norm": 2.243359327316284, + "learning_rate": 1.3740027686704717e-05, + "loss": 0.7948, "step": 1626 }, { - "epoch": 0.12311301123680526, - "grad_norm": 4.4509596824646, - "learning_rate": 1.7738310487595038e-05, - "loss": 0.8436, + "epoch": 0.11453713481168602, + "grad_norm": 2.48187255859375, + "learning_rate": 1.3739596567292895e-05, + "loss": 0.6856, "step": 1627 }, { - "epoch": 0.12318867995913889, - "grad_norm": 3.1381912231445312, - "learning_rate": 1.773777105477346e-05, - "loss": 0.7903, + "epoch": 0.11460753255895811, + "grad_norm": 2.1901326179504395, + "learning_rate": 1.373916509748334e-05, + "loss": 0.8855, "step": 1628 }, { - "epoch": 0.12326434868147251, - "grad_norm": 2.663259267807007, - "learning_rate": 1.773723107476439e-05, - "loss": 0.8887, + "epoch": 0.1146779303062302, + "grad_norm": 2.011791467666626, + "learning_rate": 1.373873327729849e-05, + "loss": 0.6408, "step": 1629 }, { - "epoch": 0.12334001740380614, - "grad_norm": 2.6209001541137695, - "learning_rate": 1.773669054760166e-05, - "loss": 0.8118, + "epoch": 0.11474832805350228, + "grad_norm": 2.9995710849761963, + "learning_rate": 1.3738301106760791e-05, + "loss": 0.9366, "step": 1630 }, { - "epoch": 0.12341568612613976, - "grad_norm": 2.6906800270080566, - "learning_rate": 1.7736149473319102e-05, - "loss": 0.8902, + "epoch": 0.11481872580077437, + "grad_norm": 2.4127495288848877, + "learning_rate": 1.373786858589272e-05, + "loss": 0.8328, "step": 1631 }, { - "epoch": 0.12349135484847339, - "grad_norm": 2.746408700942993, - "learning_rate": 1.7735607851950613e-05, - "loss": 0.8419, + "epoch": 0.11488912354804647, + "grad_norm": 2.3442323207855225, + "learning_rate": 1.3737435714716754e-05, + "loss": 0.7791, "step": 1632 }, { - "epoch": 0.12356702357080701, - "grad_norm": 4.190911293029785, - "learning_rate": 1.7735065683530103e-05, - "loss": 0.8135, + "epoch": 0.11495952129531856, + "grad_norm": 1.901787519454956, + "learning_rate": 1.3737002493255408e-05, + "loss": 0.7827, "step": 1633 }, { - "epoch": 0.12364269229314064, - "grad_norm": 4.614360332489014, - "learning_rate": 1.7734522968091528e-05, - "loss": 0.8491, + "epoch": 0.11502991904259063, + "grad_norm": 2.2320051193237305, + "learning_rate": 1.3736568921531198e-05, + "loss": 0.7933, "step": 1634 }, { - "epoch": 0.12371836101547426, - "grad_norm": 3.484330892562866, - "learning_rate": 1.7733979705668877e-05, - "loss": 0.8769, + "epoch": 0.11510031678986272, + "grad_norm": 2.824383497238159, + "learning_rate": 1.3736134999566668e-05, + "loss": 0.6985, "step": 1635 }, { - "epoch": 0.12379402973780787, - "grad_norm": 2.535391092300415, - "learning_rate": 1.7733435896296164e-05, - "loss": 0.8932, + "epoch": 0.11517071453713482, + "grad_norm": 2.056641101837158, + "learning_rate": 1.3735700727384381e-05, + "loss": 0.6575, "step": 1636 }, { - "epoch": 0.1238696984601415, - "grad_norm": 2.774635076522827, - "learning_rate": 1.773289154000745e-05, - "loss": 1.014, + "epoch": 0.1152411122844069, + "grad_norm": 1.8938333988189697, + "learning_rate": 1.3735266105006912e-05, + "loss": 0.7672, "step": 1637 }, { - "epoch": 0.12394536718247512, - "grad_norm": 2.7426695823669434, - "learning_rate": 1.773234663683682e-05, - "loss": 0.6978, + "epoch": 0.11531151003167898, + "grad_norm": 2.126490354537964, + "learning_rate": 1.373483113245686e-05, + "loss": 0.7045, "step": 1638 }, { - "epoch": 0.12402103590480874, - "grad_norm": 2.296440362930298, - "learning_rate": 1.77318011868184e-05, - "loss": 0.8189, + "epoch": 0.11538190777895108, + "grad_norm": 2.0757298469543457, + "learning_rate": 1.3734395809756836e-05, + "loss": 0.836, "step": 1639 }, { - "epoch": 0.12409670462714237, - "grad_norm": 2.881760835647583, - "learning_rate": 1.773125518998635e-05, - "loss": 0.8082, + "epoch": 0.11545230552622317, + "grad_norm": 2.2375311851501465, + "learning_rate": 1.3733960136929474e-05, + "loss": 0.7888, "step": 1640 }, { - "epoch": 0.12417237334947599, - "grad_norm": 2.310656785964966, - "learning_rate": 1.773070864637486e-05, - "loss": 0.8178, + "epoch": 0.11552270327349524, + "grad_norm": 1.913554310798645, + "learning_rate": 1.3733524113997428e-05, + "loss": 0.684, "step": 1641 }, { - "epoch": 0.12424804207180962, - "grad_norm": 3.410867691040039, - "learning_rate": 1.7730161556018154e-05, - "loss": 0.8674, + "epoch": 0.11559310102076734, + "grad_norm": 2.55549693107605, + "learning_rate": 1.3733087740983365e-05, + "loss": 0.8235, "step": 1642 }, { - "epoch": 0.12432371079414324, - "grad_norm": 2.032334327697754, - "learning_rate": 1.7729613918950496e-05, - "loss": 0.8109, + "epoch": 0.11566349876803943, + "grad_norm": 2.5618345737457275, + "learning_rate": 1.373265101790997e-05, + "loss": 0.8475, "step": 1643 }, { - "epoch": 0.12439937951647687, - "grad_norm": 2.293539047241211, - "learning_rate": 1.7729065735206177e-05, - "loss": 0.8798, + "epoch": 0.1157338965153115, + "grad_norm": 2.6319262981414795, + "learning_rate": 1.3732213944799956e-05, + "loss": 0.7127, "step": 1644 }, { - "epoch": 0.12447504823881049, - "grad_norm": 2.2878317832946777, - "learning_rate": 1.7728517004819527e-05, - "loss": 0.7187, + "epoch": 0.1158042942625836, + "grad_norm": 2.0036065578460693, + "learning_rate": 1.3731776521676038e-05, + "loss": 0.7844, "step": 1645 }, { - "epoch": 0.12455071696114411, - "grad_norm": 2.5122296810150146, - "learning_rate": 1.772796772782492e-05, - "loss": 0.8013, + "epoch": 0.11587469200985569, + "grad_norm": 5.534562110900879, + "learning_rate": 1.3731338748560963e-05, + "loss": 0.7247, "step": 1646 }, { - "epoch": 0.12462638568347774, - "grad_norm": 2.389878511428833, - "learning_rate": 1.7727417904256734e-05, - "loss": 0.8499, + "epoch": 0.11594508975712778, + "grad_norm": 2.531480312347412, + "learning_rate": 1.373090062547749e-05, + "loss": 0.813, "step": 1647 }, { - "epoch": 0.12470205440581136, - "grad_norm": 2.666120767593384, - "learning_rate": 1.7726867534149413e-05, - "loss": 0.723, + "epoch": 0.11601548750439986, + "grad_norm": 2.298062324523926, + "learning_rate": 1.3730462152448396e-05, + "loss": 0.7269, "step": 1648 }, { - "epoch": 0.12477772312814499, - "grad_norm": 3.555640697479248, - "learning_rate": 1.7726316617537424e-05, - "loss": 0.8265, + "epoch": 0.11608588525167195, + "grad_norm": 2.3988592624664307, + "learning_rate": 1.373002332949648e-05, + "loss": 0.771, "step": 1649 }, { - "epoch": 0.1248533918504786, - "grad_norm": 2.2089498043060303, - "learning_rate": 1.7725765154455262e-05, - "loss": 0.8063, + "epoch": 0.11615628299894404, + "grad_norm": 2.940329074859619, + "learning_rate": 1.372958415664456e-05, + "loss": 0.8466, "step": 1650 }, { - "epoch": 0.12492906057281222, - "grad_norm": 4.1747918128967285, - "learning_rate": 1.7725213144937464e-05, - "loss": 0.9545, + "epoch": 0.11622668074621612, + "grad_norm": 2.16636323928833, + "learning_rate": 1.3729144633915462e-05, + "loss": 0.8145, "step": 1651 }, { - "epoch": 0.12500472929514586, - "grad_norm": 2.3813788890838623, - "learning_rate": 1.7724660589018597e-05, - "loss": 0.8837, + "epoch": 0.1162970784934882, + "grad_norm": 1.6889487504959106, + "learning_rate": 1.3728704761332041e-05, + "loss": 0.8518, "step": 1652 }, { - "epoch": 0.12508039801747947, - "grad_norm": 3.1693472862243652, - "learning_rate": 1.7724107486733268e-05, - "loss": 0.8958, + "epoch": 0.1163674762407603, + "grad_norm": 2.1010234355926514, + "learning_rate": 1.3728264538917164e-05, + "loss": 0.6811, "step": 1653 }, { - "epoch": 0.1251560667398131, - "grad_norm": 2.4706733226776123, - "learning_rate": 1.772355383811611e-05, - "loss": 0.8077, + "epoch": 0.11643787398803239, + "grad_norm": 2.281613349914551, + "learning_rate": 1.372782396669372e-05, + "loss": 0.7079, "step": 1654 }, { - "epoch": 0.12523173546214672, - "grad_norm": 2.8894009590148926, - "learning_rate": 1.7722999643201794e-05, - "loss": 0.7148, + "epoch": 0.11650827173530447, + "grad_norm": 2.1578688621520996, + "learning_rate": 1.3727383044684617e-05, + "loss": 0.8109, "step": 1655 }, { - "epoch": 0.12530740418448036, - "grad_norm": 2.7157387733459473, - "learning_rate": 1.7722444902025025e-05, - "loss": 0.72, + "epoch": 0.11657866948257656, + "grad_norm": 1.805970311164856, + "learning_rate": 1.3726941772912775e-05, + "loss": 0.7356, "step": 1656 }, { - "epoch": 0.12538307290681397, - "grad_norm": 4.078158855438232, - "learning_rate": 1.7721889614620548e-05, - "loss": 0.7737, + "epoch": 0.11664906722984865, + "grad_norm": 2.5441226959228516, + "learning_rate": 1.3726500151401139e-05, + "loss": 0.7993, "step": 1657 }, { - "epoch": 0.12545874162914758, - "grad_norm": 2.9151313304901123, - "learning_rate": 1.772133378102313e-05, - "loss": 0.8545, + "epoch": 0.11671946497712073, + "grad_norm": 2.2186944484710693, + "learning_rate": 1.3726058180172669e-05, + "loss": 0.7953, "step": 1658 }, { - "epoch": 0.12553441035148122, - "grad_norm": 5.656601905822754, - "learning_rate": 1.7720777401267586e-05, - "loss": 0.8819, + "epoch": 0.11678986272439282, + "grad_norm": 2.434722900390625, + "learning_rate": 1.3725615859250341e-05, + "loss": 0.7261, "step": 1659 }, { - "epoch": 0.12561007907381483, - "grad_norm": 2.7481420040130615, - "learning_rate": 1.7720220475388756e-05, - "loss": 0.7213, + "epoch": 0.11686026047166491, + "grad_norm": 2.1667368412017822, + "learning_rate": 1.3725173188657154e-05, + "loss": 0.7615, "step": 1660 }, { - "epoch": 0.12568574779614847, - "grad_norm": 3.2132253646850586, - "learning_rate": 1.771966300342151e-05, - "loss": 0.8183, + "epoch": 0.116930658218937, + "grad_norm": 2.457986354827881, + "learning_rate": 1.3724730168416121e-05, + "loss": 0.6927, "step": 1661 }, { - "epoch": 0.12576141651848208, - "grad_norm": 2.4579145908355713, - "learning_rate": 1.771910498540077e-05, - "loss": 0.8897, + "epoch": 0.11700105596620908, + "grad_norm": 7.772386074066162, + "learning_rate": 1.3724286798550277e-05, + "loss": 0.7993, "step": 1662 }, { - "epoch": 0.12583708524081572, - "grad_norm": 2.608400344848633, - "learning_rate": 1.7718546421361465e-05, - "loss": 0.8401, + "epoch": 0.11707145371348117, + "grad_norm": 2.232818841934204, + "learning_rate": 1.3723843079082675e-05, + "loss": 0.7459, "step": 1663 }, { - "epoch": 0.12591275396314933, - "grad_norm": 2.6007118225097656, - "learning_rate": 1.771798731133859e-05, - "loss": 0.906, + "epoch": 0.11714185146075326, + "grad_norm": 3.3516647815704346, + "learning_rate": 1.372339901003638e-05, + "loss": 0.7055, "step": 1664 }, { - "epoch": 0.12598842268548296, - "grad_norm": 2.439805746078491, - "learning_rate": 1.7717427655367153e-05, - "loss": 0.9405, + "epoch": 0.11721224920802534, + "grad_norm": 1.9879977703094482, + "learning_rate": 1.372295459143448e-05, + "loss": 0.74, "step": 1665 }, { - "epoch": 0.12606409140781658, - "grad_norm": 2.689601421356201, - "learning_rate": 1.7716867453482198e-05, - "loss": 0.8398, + "epoch": 0.11728264695529743, + "grad_norm": 2.4007697105407715, + "learning_rate": 1.3722509823300084e-05, + "loss": 0.7927, "step": 1666 }, { - "epoch": 0.1261397601301502, - "grad_norm": 2.9567224979400635, - "learning_rate": 1.7716306705718814e-05, - "loss": 0.8278, + "epoch": 0.11735304470256952, + "grad_norm": 2.414088726043701, + "learning_rate": 1.3722064705656313e-05, + "loss": 0.8708, "step": 1667 }, { - "epoch": 0.12621542885248382, - "grad_norm": 3.0706992149353027, - "learning_rate": 1.7715745412112107e-05, - "loss": 0.7353, + "epoch": 0.11742344244984161, + "grad_norm": 2.484210968017578, + "learning_rate": 1.372161923852631e-05, + "loss": 0.7892, "step": 1668 }, { - "epoch": 0.12629109757481746, - "grad_norm": 2.521843910217285, - "learning_rate": 1.7715183572697234e-05, - "loss": 0.91, + "epoch": 0.11749384019711369, + "grad_norm": 2.1371328830718994, + "learning_rate": 1.3721173421933238e-05, + "loss": 0.7773, "step": 1669 }, { - "epoch": 0.12636676629715107, - "grad_norm": 2.7420334815979004, - "learning_rate": 1.771462118750938e-05, - "loss": 0.7666, + "epoch": 0.11756423794438578, + "grad_norm": 1.9878405332565308, + "learning_rate": 1.3720727255900269e-05, + "loss": 0.9082, "step": 1670 }, { - "epoch": 0.12644243501948468, - "grad_norm": 2.520315647125244, - "learning_rate": 1.7714058256583758e-05, - "loss": 0.8706, + "epoch": 0.11763463569165787, + "grad_norm": 2.158313751220703, + "learning_rate": 1.3720280740450607e-05, + "loss": 0.7632, "step": 1671 }, { - "epoch": 0.12651810374181832, - "grad_norm": 2.752073287963867, - "learning_rate": 1.7713494779955625e-05, - "loss": 0.9659, + "epoch": 0.11770503343892995, + "grad_norm": 1.7978370189666748, + "learning_rate": 1.3719833875607459e-05, + "loss": 0.7664, "step": 1672 }, { - "epoch": 0.12659377246415193, - "grad_norm": 2.854764223098755, - "learning_rate": 1.771293075766026e-05, - "loss": 0.8267, + "epoch": 0.11777543118620204, + "grad_norm": 2.7071845531463623, + "learning_rate": 1.3719386661394064e-05, + "loss": 0.6922, "step": 1673 }, { - "epoch": 0.12666944118648557, - "grad_norm": 2.201077938079834, - "learning_rate": 1.7712366189732995e-05, - "loss": 0.7964, + "epoch": 0.11784582893347413, + "grad_norm": 2.3808670043945312, + "learning_rate": 1.3718939097833673e-05, + "loss": 0.7241, "step": 1674 }, { - "epoch": 0.12674510990881918, - "grad_norm": 3.6953394412994385, - "learning_rate": 1.7711801076209182e-05, - "loss": 0.7227, + "epoch": 0.11791622668074622, + "grad_norm": 1.8843724727630615, + "learning_rate": 1.3718491184949549e-05, + "loss": 0.7683, "step": 1675 }, { - "epoch": 0.12682077863115282, - "grad_norm": 2.4317433834075928, - "learning_rate": 1.7711235417124207e-05, - "loss": 0.8278, + "epoch": 0.1179866244280183, + "grad_norm": 2.229771614074707, + "learning_rate": 1.3718042922764987e-05, + "loss": 0.8175, "step": 1676 }, { - "epoch": 0.12689644735348643, - "grad_norm": 3.292365550994873, - "learning_rate": 1.771066921251349e-05, - "loss": 0.8908, + "epoch": 0.11805702217529039, + "grad_norm": 2.474583864212036, + "learning_rate": 1.3717594311303288e-05, + "loss": 0.6857, "step": 1677 }, { - "epoch": 0.12697211607582007, - "grad_norm": 2.3354146480560303, - "learning_rate": 1.7710102462412498e-05, - "loss": 0.7178, + "epoch": 0.11812741992256248, + "grad_norm": 2.1939141750335693, + "learning_rate": 1.3717145350587776e-05, + "loss": 0.8505, "step": 1678 }, { - "epoch": 0.12704778479815368, - "grad_norm": 2.138141393661499, - "learning_rate": 1.7709535166856718e-05, - "loss": 0.7817, + "epoch": 0.11819781766983456, + "grad_norm": 2.1721243858337402, + "learning_rate": 1.3716696040641796e-05, + "loss": 0.8009, "step": 1679 }, { - "epoch": 0.12712345352048732, - "grad_norm": 2.597642183303833, - "learning_rate": 1.7708967325881675e-05, - "loss": 0.7315, + "epoch": 0.11826821541710665, + "grad_norm": 2.215710163116455, + "learning_rate": 1.3716246381488703e-05, + "loss": 0.8138, "step": 1680 }, { - "epoch": 0.12719912224282093, - "grad_norm": 2.2730774879455566, - "learning_rate": 1.7708398939522927e-05, - "loss": 0.7304, + "epoch": 0.11833861316437874, + "grad_norm": 2.1449291706085205, + "learning_rate": 1.3715796373151882e-05, + "loss": 0.8522, "step": 1681 }, { - "epoch": 0.12727479096515457, - "grad_norm": 2.7624247074127197, - "learning_rate": 1.7707830007816073e-05, - "loss": 0.8055, + "epoch": 0.11840901091165083, + "grad_norm": 2.0194499492645264, + "learning_rate": 1.3715346015654722e-05, + "loss": 0.7902, "step": 1682 }, { - "epoch": 0.12735045968748818, - "grad_norm": 2.6193227767944336, - "learning_rate": 1.770726053079674e-05, - "loss": 0.8424, + "epoch": 0.11847940865892291, + "grad_norm": 2.434890031814575, + "learning_rate": 1.3714895309020643e-05, + "loss": 0.7413, "step": 1683 }, { - "epoch": 0.1274261284098218, - "grad_norm": 2.64697265625, - "learning_rate": 1.770669050850059e-05, - "loss": 0.8105, + "epoch": 0.118549806406195, + "grad_norm": 1.8053114414215088, + "learning_rate": 1.3714444253273072e-05, + "loss": 0.7361, "step": 1684 }, { - "epoch": 0.12750179713215543, - "grad_norm": 2.303603172302246, - "learning_rate": 1.770611994096332e-05, - "loss": 0.8021, + "epoch": 0.1186202041534671, + "grad_norm": 2.92336368560791, + "learning_rate": 1.3713992848435469e-05, + "loss": 0.8394, "step": 1685 }, { - "epoch": 0.12757746585448904, - "grad_norm": 2.714512348175049, - "learning_rate": 1.7705548828220657e-05, - "loss": 0.7102, + "epoch": 0.11869060190073917, + "grad_norm": 2.0443735122680664, + "learning_rate": 1.3713541094531293e-05, + "loss": 0.7837, "step": 1686 }, { - "epoch": 0.12765313457682267, - "grad_norm": 3.6520884037017822, - "learning_rate": 1.7704977170308372e-05, - "loss": 0.7449, + "epoch": 0.11876099964801126, + "grad_norm": 2.306548833847046, + "learning_rate": 1.3713088991584036e-05, + "loss": 0.739, "step": 1687 }, { - "epoch": 0.12772880329915628, - "grad_norm": 3.0547521114349365, - "learning_rate": 1.770440496726226e-05, - "loss": 0.8081, + "epoch": 0.11883139739528335, + "grad_norm": 2.546522855758667, + "learning_rate": 1.3712636539617203e-05, + "loss": 0.694, "step": 1688 }, { - "epoch": 0.12780447202148992, - "grad_norm": 2.003868579864502, - "learning_rate": 1.770383221911816e-05, - "loss": 0.9698, + "epoch": 0.11890179514255544, + "grad_norm": 2.7044475078582764, + "learning_rate": 1.3712183738654317e-05, + "loss": 0.864, "step": 1689 }, { - "epoch": 0.12788014074382353, - "grad_norm": 2.442770481109619, - "learning_rate": 1.7703258925911927e-05, - "loss": 0.6974, + "epoch": 0.11897219288982752, + "grad_norm": 2.1581878662109375, + "learning_rate": 1.3711730588718918e-05, + "loss": 0.7442, "step": 1690 }, { - "epoch": 0.12795580946615717, - "grad_norm": 4.01525354385376, - "learning_rate": 1.7702685087679475e-05, - "loss": 0.8011, + "epoch": 0.11904259063709961, + "grad_norm": 2.402418613433838, + "learning_rate": 1.371127708983457e-05, + "loss": 0.823, "step": 1691 }, { - "epoch": 0.12803147818849078, - "grad_norm": 2.3456971645355225, - "learning_rate": 1.7702110704456735e-05, - "loss": 0.7804, + "epoch": 0.1191129883843717, + "grad_norm": 2.248704433441162, + "learning_rate": 1.3710823242024847e-05, + "loss": 0.8254, "step": 1692 }, { - "epoch": 0.12810714691082442, - "grad_norm": 3.353431224822998, - "learning_rate": 1.7701535776279678e-05, - "loss": 0.648, + "epoch": 0.11918338613164378, + "grad_norm": 2.4577865600585938, + "learning_rate": 1.3710369045313347e-05, + "loss": 0.7927, "step": 1693 }, { - "epoch": 0.12818281563315803, - "grad_norm": 3.051726818084717, - "learning_rate": 1.7700960303184303e-05, - "loss": 0.6802, + "epoch": 0.11925378387891587, + "grad_norm": 3.4288063049316406, + "learning_rate": 1.370991449972368e-05, + "loss": 0.791, "step": 1694 }, { - "epoch": 0.12825848435549167, - "grad_norm": 2.835416316986084, - "learning_rate": 1.7700384285206653e-05, - "loss": 0.734, + "epoch": 0.11932418162618796, + "grad_norm": 2.7118136882781982, + "learning_rate": 1.3709459605279484e-05, + "loss": 0.7797, "step": 1695 }, { - "epoch": 0.12833415307782528, - "grad_norm": 2.5643491744995117, - "learning_rate": 1.7699807722382798e-05, - "loss": 0.6028, + "epoch": 0.11939457937346006, + "grad_norm": 2.2333576679229736, + "learning_rate": 1.3709004362004405e-05, + "loss": 0.7694, "step": 1696 }, { - "epoch": 0.1284098218001589, - "grad_norm": 2.56653094291687, - "learning_rate": 1.7699230614748846e-05, - "loss": 0.7887, + "epoch": 0.11946497712073213, + "grad_norm": 2.468637466430664, + "learning_rate": 1.3708548769922114e-05, + "loss": 0.8167, "step": 1697 }, { - "epoch": 0.12848549052249253, - "grad_norm": 3.0402891635894775, - "learning_rate": 1.7698652962340934e-05, - "loss": 0.7655, + "epoch": 0.11953537486800422, + "grad_norm": 2.214554786682129, + "learning_rate": 1.3708092829056297e-05, + "loss": 0.77, "step": 1698 }, { - "epoch": 0.12856115924482614, - "grad_norm": 2.826277017593384, - "learning_rate": 1.769807476519524e-05, - "loss": 0.9516, + "epoch": 0.11960577261527632, + "grad_norm": 2.2729156017303467, + "learning_rate": 1.3707636539430658e-05, + "loss": 0.7675, "step": 1699 }, { - "epoch": 0.12863682796715978, - "grad_norm": 2.662139415740967, - "learning_rate": 1.7697496023347972e-05, - "loss": 0.6862, + "epoch": 0.11967617036254839, + "grad_norm": 2.5307085514068604, + "learning_rate": 1.3707179901068922e-05, + "loss": 0.9011, "step": 1700 }, { - "epoch": 0.1287124966894934, - "grad_norm": 2.36557936668396, - "learning_rate": 1.769691673683537e-05, - "loss": 0.6471, + "epoch": 0.11974656810982048, + "grad_norm": 2.4170241355895996, + "learning_rate": 1.3706722913994826e-05, + "loss": 0.7173, "step": 1701 }, { - "epoch": 0.12878816541182703, - "grad_norm": 2.5000391006469727, - "learning_rate": 1.7696336905693713e-05, - "loss": 0.7876, + "epoch": 0.11981696585709257, + "grad_norm": 2.164560317993164, + "learning_rate": 1.3706265578232134e-05, + "loss": 1.0667, "step": 1702 }, { - "epoch": 0.12886383413416064, - "grad_norm": 2.8562798500061035, - "learning_rate": 1.7695756529959313e-05, - "loss": 0.7194, + "epoch": 0.11988736360436467, + "grad_norm": 2.319335699081421, + "learning_rate": 1.370580789380462e-05, + "loss": 0.6794, "step": 1703 }, { - "epoch": 0.12893950285649428, - "grad_norm": 2.4133670330047607, - "learning_rate": 1.7695175609668516e-05, - "loss": 0.816, + "epoch": 0.11995776135163674, + "grad_norm": 2.3606350421905518, + "learning_rate": 1.3705349860736082e-05, + "loss": 0.83, "step": 1704 }, { - "epoch": 0.1290151715788279, - "grad_norm": 2.8379642963409424, - "learning_rate": 1.7694594144857696e-05, - "loss": 0.8176, + "epoch": 0.12002815909890883, + "grad_norm": 2.196242570877075, + "learning_rate": 1.3704891479050329e-05, + "loss": 0.8052, "step": 1705 }, { - "epoch": 0.12909084030116152, - "grad_norm": 2.2420616149902344, - "learning_rate": 1.769401213556327e-05, - "loss": 0.7886, + "epoch": 0.12009855684618093, + "grad_norm": 2.6347365379333496, + "learning_rate": 1.37044327487712e-05, + "loss": 0.7924, "step": 1706 }, { - "epoch": 0.12916650902349514, - "grad_norm": 2.5862045288085938, - "learning_rate": 1.769342958182168e-05, - "loss": 0.723, + "epoch": 0.120168954593453, + "grad_norm": 2.2286832332611084, + "learning_rate": 1.3703973669922538e-05, + "loss": 0.8611, "step": 1707 }, { - "epoch": 0.12924217774582877, - "grad_norm": 2.9615068435668945, - "learning_rate": 1.7692846483669416e-05, - "loss": 0.8738, + "epoch": 0.1202393523407251, + "grad_norm": 2.0836708545684814, + "learning_rate": 1.3703514242528211e-05, + "loss": 0.6764, "step": 1708 }, { - "epoch": 0.12931784646816238, - "grad_norm": 2.824129581451416, - "learning_rate": 1.7692262841142988e-05, - "loss": 0.7763, + "epoch": 0.12030975008799719, + "grad_norm": 2.0547873973846436, + "learning_rate": 1.370305446661211e-05, + "loss": 0.8295, "step": 1709 }, { - "epoch": 0.12939351519049602, - "grad_norm": 2.8640074729919434, - "learning_rate": 1.7691678654278947e-05, - "loss": 0.7885, + "epoch": 0.12038014783526928, + "grad_norm": 2.275881767272949, + "learning_rate": 1.3702594342198138e-05, + "loss": 0.8162, "step": 1710 }, { - "epoch": 0.12946918391282963, - "grad_norm": 3.950695753097534, - "learning_rate": 1.7691093923113875e-05, - "loss": 0.8261, + "epoch": 0.12045054558254135, + "grad_norm": 1.9025455713272095, + "learning_rate": 1.3702133869310213e-05, + "loss": 0.7354, "step": 1711 }, { - "epoch": 0.12954485263516324, - "grad_norm": 2.261723041534424, - "learning_rate": 1.769050864768439e-05, - "loss": 0.806, + "epoch": 0.12052094332981345, + "grad_norm": 2.205639600753784, + "learning_rate": 1.370167304797228e-05, + "loss": 0.7218, "step": 1712 }, { - "epoch": 0.12962052135749688, - "grad_norm": 2.690190076828003, - "learning_rate": 1.768992282802715e-05, - "loss": 0.7319, + "epoch": 0.12059134107708554, + "grad_norm": 2.573308229446411, + "learning_rate": 1.3701211878208297e-05, + "loss": 0.8593, "step": 1713 }, { - "epoch": 0.1296961900798305, - "grad_norm": 3.1310863494873047, - "learning_rate": 1.768933646417883e-05, - "loss": 0.6613, + "epoch": 0.12066173882435761, + "grad_norm": 2.389561653137207, + "learning_rate": 1.3700750360042236e-05, + "loss": 0.7506, "step": 1714 }, { - "epoch": 0.12977185880216413, - "grad_norm": 2.7063982486724854, - "learning_rate": 1.768874955617616e-05, - "loss": 0.8432, + "epoch": 0.1207321365716297, + "grad_norm": 1.9726464748382568, + "learning_rate": 1.3700288493498097e-05, + "loss": 0.7932, "step": 1715 }, { - "epoch": 0.12984752752449774, - "grad_norm": 2.001281499862671, - "learning_rate": 1.768816210405589e-05, - "loss": 0.6304, + "epoch": 0.1208025343189018, + "grad_norm": 2.191450357437134, + "learning_rate": 1.3699826278599892e-05, + "loss": 0.8635, "step": 1716 }, { - "epoch": 0.12992319624683138, - "grad_norm": 2.623138189315796, - "learning_rate": 1.7687574107854808e-05, - "loss": 0.8827, + "epoch": 0.12087293206617389, + "grad_norm": 2.0865025520324707, + "learning_rate": 1.3699363715371652e-05, + "loss": 0.7982, "step": 1717 }, { - "epoch": 0.129998864969165, - "grad_norm": 3.3270182609558105, - "learning_rate": 1.7686985567609735e-05, - "loss": 0.6884, + "epoch": 0.12094332981344597, + "grad_norm": 1.8518706560134888, + "learning_rate": 1.3698900803837422e-05, + "loss": 0.642, "step": 1718 }, { - "epoch": 0.13007453369149863, - "grad_norm": 3.246429204940796, - "learning_rate": 1.7686396483357528e-05, - "loss": 0.8164, + "epoch": 0.12101372756071806, + "grad_norm": 2.526310920715332, + "learning_rate": 1.3698437544021273e-05, + "loss": 0.8494, "step": 1719 }, { - "epoch": 0.13015020241383224, - "grad_norm": 2.8202688694000244, - "learning_rate": 1.7685806855135077e-05, - "loss": 0.7462, + "epoch": 0.12108412530799015, + "grad_norm": 2.230039119720459, + "learning_rate": 1.3697973935947291e-05, + "loss": 0.6433, "step": 1720 }, { - "epoch": 0.13022587113616588, - "grad_norm": 1.7720720767974854, - "learning_rate": 1.768521668297931e-05, - "loss": 0.9133, + "epoch": 0.12115452305526223, + "grad_norm": 1.886214256286621, + "learning_rate": 1.3697509979639579e-05, + "loss": 0.7352, "step": 1721 }, { - "epoch": 0.1303015398584995, - "grad_norm": 2.7071969509124756, - "learning_rate": 1.768462596692718e-05, - "loss": 0.857, + "epoch": 0.12122492080253432, + "grad_norm": 2.7028634548187256, + "learning_rate": 1.3697045675122257e-05, + "loss": 0.7563, "step": 1722 }, { - "epoch": 0.13037720858083313, - "grad_norm": 2.4455504417419434, - "learning_rate": 1.7684034707015686e-05, - "loss": 0.7251, + "epoch": 0.12129531854980641, + "grad_norm": 2.1451802253723145, + "learning_rate": 1.3696581022419461e-05, + "loss": 0.7101, "step": 1723 }, { - "epoch": 0.13045287730316674, - "grad_norm": 2.843379020690918, - "learning_rate": 1.768344290328185e-05, - "loss": 0.9646, + "epoch": 0.1213657162970785, + "grad_norm": 3.1332104206085205, + "learning_rate": 1.3696116021555357e-05, + "loss": 0.6775, "step": 1724 }, { - "epoch": 0.13052854602550035, - "grad_norm": 3.5226151943206787, - "learning_rate": 1.7682850555762735e-05, - "loss": 0.7941, + "epoch": 0.12143611404435058, + "grad_norm": 2.4279415607452393, + "learning_rate": 1.3695650672554114e-05, + "loss": 0.7864, "step": 1725 }, { - "epoch": 0.13060421474783399, - "grad_norm": 2.8279130458831787, - "learning_rate": 1.768225766449543e-05, - "loss": 0.7988, + "epoch": 0.12150651179162267, + "grad_norm": 2.1416592597961426, + "learning_rate": 1.3695184975439932e-05, + "loss": 0.7818, "step": 1726 }, { - "epoch": 0.1306798834701676, - "grad_norm": 3.3851380348205566, - "learning_rate": 1.7681664229517074e-05, - "loss": 0.7569, + "epoch": 0.12157690953889476, + "grad_norm": 2.28859543800354, + "learning_rate": 1.3694718930237018e-05, + "loss": 0.8247, "step": 1727 }, { - "epoch": 0.13075555219250123, - "grad_norm": 2.9672915935516357, - "learning_rate": 1.7681070250864817e-05, - "loss": 0.8753, + "epoch": 0.12164730728616684, + "grad_norm": 2.4603631496429443, + "learning_rate": 1.3694252536969605e-05, + "loss": 0.7613, "step": 1728 }, { - "epoch": 0.13083122091483484, - "grad_norm": 2.609323740005493, - "learning_rate": 1.768047572857587e-05, - "loss": 0.7996, + "epoch": 0.12171770503343893, + "grad_norm": 2.2283670902252197, + "learning_rate": 1.3693785795661935e-05, + "loss": 0.7244, "step": 1729 }, { - "epoch": 0.13090688963716848, - "grad_norm": 2.4190008640289307, - "learning_rate": 1.7679880662687453e-05, - "loss": 0.7647, + "epoch": 0.12178810278071102, + "grad_norm": 3.2336344718933105, + "learning_rate": 1.3693318706338283e-05, + "loss": 0.9057, "step": 1730 }, { - "epoch": 0.1309825583595021, - "grad_norm": 3.3898887634277344, - "learning_rate": 1.7679285053236838e-05, - "loss": 0.7903, + "epoch": 0.12185850052798311, + "grad_norm": 1.9941760301589966, + "learning_rate": 1.369285126902293e-05, + "loss": 0.7498, "step": 1731 }, { - "epoch": 0.13105822708183573, - "grad_norm": 2.466832160949707, - "learning_rate": 1.767868890026132e-05, - "loss": 0.6835, + "epoch": 0.12192889827525519, + "grad_norm": 2.0819497108459473, + "learning_rate": 1.3692383483740179e-05, + "loss": 0.7401, "step": 1732 }, { - "epoch": 0.13113389580416934, - "grad_norm": 3.120836019515991, - "learning_rate": 1.767809220379823e-05, - "loss": 0.7485, + "epoch": 0.12199929602252728, + "grad_norm": 2.2932868003845215, + "learning_rate": 1.3691915350514349e-05, + "loss": 0.7393, "step": 1733 }, { - "epoch": 0.13120956452650298, - "grad_norm": 2.3976681232452393, - "learning_rate": 1.7677494963884935e-05, - "loss": 0.7093, + "epoch": 0.12206969376979937, + "grad_norm": 2.5256993770599365, + "learning_rate": 1.3691446869369777e-05, + "loss": 0.8674, "step": 1734 }, { - "epoch": 0.1312852332488366, - "grad_norm": 3.236889600753784, - "learning_rate": 1.7676897180558844e-05, - "loss": 0.7506, + "epoch": 0.12214009151707145, + "grad_norm": 2.2602732181549072, + "learning_rate": 1.3690978040330825e-05, + "loss": 0.7654, "step": 1735 }, { - "epoch": 0.13136090197117023, - "grad_norm": 3.0290706157684326, - "learning_rate": 1.7676298853857387e-05, - "loss": 0.7999, + "epoch": 0.12221048926434354, + "grad_norm": 2.0076351165771484, + "learning_rate": 1.3690508863421861e-05, + "loss": 0.6942, "step": 1736 }, { - "epoch": 0.13143657069350384, - "grad_norm": 2.8244903087615967, - "learning_rate": 1.767569998381803e-05, - "loss": 0.6637, + "epoch": 0.12228088701161563, + "grad_norm": 2.7057385444641113, + "learning_rate": 1.3690039338667287e-05, + "loss": 0.7423, "step": 1737 }, { - "epoch": 0.13151223941583745, - "grad_norm": 2.6339778900146484, - "learning_rate": 1.7675100570478282e-05, - "loss": 0.81, + "epoch": 0.12235128475888772, + "grad_norm": 2.248765230178833, + "learning_rate": 1.3689569466091508e-05, + "loss": 0.722, "step": 1738 }, { - "epoch": 0.1315879081381711, - "grad_norm": 3.118966817855835, - "learning_rate": 1.7674500613875678e-05, - "loss": 0.8709, + "epoch": 0.1224216825061598, + "grad_norm": 2.634721517562866, + "learning_rate": 1.368909924571895e-05, + "loss": 0.7147, "step": 1739 }, { - "epoch": 0.1316635768605047, - "grad_norm": 3.025437593460083, - "learning_rate": 1.767390011404779e-05, - "loss": 0.8147, + "epoch": 0.12249208025343189, + "grad_norm": 1.789183259010315, + "learning_rate": 1.3688628677574067e-05, + "loss": 0.6733, "step": 1740 }, { - "epoch": 0.13173924558283834, - "grad_norm": 3.028900623321533, - "learning_rate": 1.767329907103222e-05, - "loss": 0.8358, + "epoch": 0.12256247800070398, + "grad_norm": 2.236478328704834, + "learning_rate": 1.3688157761681323e-05, + "loss": 0.9276, "step": 1741 }, { - "epoch": 0.13181491430517195, - "grad_norm": 2.833244800567627, - "learning_rate": 1.767269748486661e-05, - "loss": 0.7973, + "epoch": 0.12263287574797606, + "grad_norm": 2.4731874465942383, + "learning_rate": 1.3687686498065196e-05, + "loss": 0.6293, "step": 1742 }, { - "epoch": 0.1318905830275056, - "grad_norm": 2.8717784881591797, - "learning_rate": 1.7672095355588632e-05, - "loss": 0.8679, + "epoch": 0.12270327349524815, + "grad_norm": 2.5843875408172607, + "learning_rate": 1.3687214886750195e-05, + "loss": 0.7737, "step": 1743 }, { - "epoch": 0.1319662517498392, - "grad_norm": 2.449739456176758, - "learning_rate": 1.7671492683235993e-05, - "loss": 0.9128, + "epoch": 0.12277367124252024, + "grad_norm": 1.8833657503128052, + "learning_rate": 1.3686742927760834e-05, + "loss": 0.6721, "step": 1744 }, { - "epoch": 0.13204192047217284, - "grad_norm": 4.743024826049805, - "learning_rate": 1.7670889467846435e-05, - "loss": 0.884, + "epoch": 0.12284406898979233, + "grad_norm": 2.26562237739563, + "learning_rate": 1.3686270621121653e-05, + "loss": 0.8258, "step": 1745 }, { - "epoch": 0.13211758919450645, - "grad_norm": 2.698247194290161, - "learning_rate": 1.7670285709457732e-05, - "loss": 0.7641, + "epoch": 0.12291446673706441, + "grad_norm": 2.5318357944488525, + "learning_rate": 1.3685797966857205e-05, + "loss": 0.6698, "step": 1746 }, { - "epoch": 0.13219325791684008, - "grad_norm": 2.852203607559204, - "learning_rate": 1.76696814081077e-05, - "loss": 0.7957, + "epoch": 0.1229848644843365, + "grad_norm": 2.6742749214172363, + "learning_rate": 1.3685324964992067e-05, + "loss": 0.8127, "step": 1747 }, { - "epoch": 0.1322689266391737, - "grad_norm": 2.8371152877807617, - "learning_rate": 1.7669076563834174e-05, - "loss": 0.8086, + "epoch": 0.12305526223160859, + "grad_norm": 2.2961127758026123, + "learning_rate": 1.368485161555083e-05, + "loss": 0.795, "step": 1748 }, { - "epoch": 0.13234459536150733, - "grad_norm": 2.7854745388031006, - "learning_rate": 1.7668471176675033e-05, - "loss": 0.9564, + "epoch": 0.12312565997888067, + "grad_norm": 2.0450546741485596, + "learning_rate": 1.3684377918558103e-05, + "loss": 0.7856, "step": 1749 }, { - "epoch": 0.13242026408384094, - "grad_norm": 4.4708051681518555, - "learning_rate": 1.7667865246668193e-05, - "loss": 0.7811, + "epoch": 0.12319605772615276, + "grad_norm": 2.219982147216797, + "learning_rate": 1.3683903874038514e-05, + "loss": 0.6871, "step": 1750 }, { - "epoch": 0.13249593280617455, - "grad_norm": 3.00451922416687, - "learning_rate": 1.7667258773851596e-05, - "loss": 0.9362, + "epoch": 0.12326645547342485, + "grad_norm": 2.5235648155212402, + "learning_rate": 1.3683429482016709e-05, + "loss": 0.6545, "step": 1751 }, { - "epoch": 0.1325716015285082, - "grad_norm": 2.8037302494049072, - "learning_rate": 1.7666651758263218e-05, - "loss": 0.8856, + "epoch": 0.12333685322069694, + "grad_norm": 2.68070125579834, + "learning_rate": 1.3682954742517354e-05, + "loss": 0.5686, "step": 1752 }, { - "epoch": 0.1326472702508418, - "grad_norm": 2.7416181564331055, - "learning_rate": 1.7666044199941077e-05, - "loss": 0.7625, + "epoch": 0.12340725096796902, + "grad_norm": 2.7134644985198975, + "learning_rate": 1.3682479655565126e-05, + "loss": 0.9332, "step": 1753 }, { - "epoch": 0.13272293897317544, - "grad_norm": 3.8341903686523438, - "learning_rate": 1.766543609892322e-05, - "loss": 0.7551, + "epoch": 0.12347764871524111, + "grad_norm": 2.51389479637146, + "learning_rate": 1.368200422118473e-05, + "loss": 0.8313, "step": 1754 }, { - "epoch": 0.13279860769550905, - "grad_norm": 2.7719779014587402, - "learning_rate": 1.7664827455247725e-05, - "loss": 0.6949, + "epoch": 0.1235480464625132, + "grad_norm": 2.3755791187286377, + "learning_rate": 1.3681528439400885e-05, + "loss": 0.7192, "step": 1755 }, { - "epoch": 0.1328742764178427, - "grad_norm": 11.289454460144043, - "learning_rate": 1.766421826895271e-05, - "loss": 0.7725, + "epoch": 0.12361844420978528, + "grad_norm": 2.498749017715454, + "learning_rate": 1.3681052310238325e-05, + "loss": 0.729, "step": 1756 }, { - "epoch": 0.1329499451401763, - "grad_norm": 4.495529651641846, - "learning_rate": 1.7663608540076325e-05, - "loss": 0.935, + "epoch": 0.12368884195705737, + "grad_norm": 1.7530375719070435, + "learning_rate": 1.3680575833721803e-05, + "loss": 0.7343, "step": 1757 }, { - "epoch": 0.13302561386250994, - "grad_norm": 3.4977807998657227, - "learning_rate": 1.766299826865675e-05, - "loss": 0.7704, + "epoch": 0.12375923970432946, + "grad_norm": 2.370089530944824, + "learning_rate": 1.3680099009876094e-05, + "loss": 0.814, "step": 1758 }, { - "epoch": 0.13310128258484355, - "grad_norm": 3.455537796020508, - "learning_rate": 1.7662387454732206e-05, - "loss": 0.7805, + "epoch": 0.12382963745160155, + "grad_norm": 2.9836509227752686, + "learning_rate": 1.3679621838725988e-05, + "loss": 0.7956, "step": 1759 }, { - "epoch": 0.1331769513071772, - "grad_norm": 2.7036778926849365, - "learning_rate": 1.766177609834094e-05, - "loss": 0.7823, + "epoch": 0.12390003519887363, + "grad_norm": 3.9581139087677, + "learning_rate": 1.3679144320296295e-05, + "loss": 0.7087, "step": 1760 }, { - "epoch": 0.1332526200295108, - "grad_norm": 2.983051061630249, - "learning_rate": 1.7661164199521238e-05, - "loss": 0.8118, + "epoch": 0.12397043294614572, + "grad_norm": 2.209245204925537, + "learning_rate": 1.3678666454611838e-05, + "loss": 0.7089, "step": 1761 }, { - "epoch": 0.13332828875184444, - "grad_norm": 2.9861648082733154, - "learning_rate": 1.7660551758311424e-05, - "loss": 0.755, + "epoch": 0.12404083069341781, + "grad_norm": 5.0127458572387695, + "learning_rate": 1.3678188241697463e-05, + "loss": 0.7511, "step": 1762 }, { - "epoch": 0.13340395747417805, - "grad_norm": 3.221959352493286, - "learning_rate": 1.7659938774749843e-05, - "loss": 0.8239, + "epoch": 0.12411122844068989, + "grad_norm": 2.542032480239868, + "learning_rate": 1.3677709681578033e-05, + "loss": 0.7977, "step": 1763 }, { - "epoch": 0.13347962619651166, - "grad_norm": 2.7048370838165283, - "learning_rate": 1.765932524887489e-05, - "loss": 0.7978, + "epoch": 0.12418162618796198, + "grad_norm": 2.505225419998169, + "learning_rate": 1.3677230774278428e-05, + "loss": 0.821, "step": 1764 }, { - "epoch": 0.1335552949188453, - "grad_norm": 3.0908560752868652, - "learning_rate": 1.765871118072498e-05, - "loss": 0.951, + "epoch": 0.12425202393523407, + "grad_norm": 2.4881508350372314, + "learning_rate": 1.3676751519823549e-05, + "loss": 0.8916, "step": 1765 }, { - "epoch": 0.1336309636411789, - "grad_norm": 2.1981539726257324, - "learning_rate": 1.765809657033857e-05, - "loss": 0.6185, + "epoch": 0.12432242168250617, + "grad_norm": 2.3500194549560547, + "learning_rate": 1.3676271918238312e-05, + "loss": 0.7423, "step": 1766 }, { - "epoch": 0.13370663236351255, - "grad_norm": 2.284193277359009, - "learning_rate": 1.765748141775415e-05, - "loss": 1.0244, + "epoch": 0.12439281942977824, + "grad_norm": 2.2898924350738525, + "learning_rate": 1.3675791969547651e-05, + "loss": 0.6642, "step": 1767 }, { - "epoch": 0.13378230108584616, - "grad_norm": 3.8819425106048584, - "learning_rate": 1.7656865723010242e-05, - "loss": 0.7626, + "epoch": 0.12446321717705033, + "grad_norm": 2.7802574634552, + "learning_rate": 1.367531167377652e-05, + "loss": 0.7727, "step": 1768 }, { - "epoch": 0.1338579698081798, - "grad_norm": 2.6897873878479004, - "learning_rate": 1.7656249486145405e-05, - "loss": 0.666, + "epoch": 0.12453361492432242, + "grad_norm": 2.9698970317840576, + "learning_rate": 1.3674831030949889e-05, + "loss": 0.8203, "step": 1769 }, { - "epoch": 0.1339336385305134, - "grad_norm": 3.00016713142395, - "learning_rate": 1.7655632707198225e-05, - "loss": 0.8359, + "epoch": 0.1246040126715945, + "grad_norm": 2.263657331466675, + "learning_rate": 1.3674350041092749e-05, + "loss": 0.7665, "step": 1770 }, { - "epoch": 0.13400930725284704, - "grad_norm": 2.38024640083313, - "learning_rate": 1.7655015386207326e-05, - "loss": 0.7616, + "epoch": 0.1246744104188666, + "grad_norm": 2.315575361251831, + "learning_rate": 1.3673868704230103e-05, + "loss": 0.7683, "step": 1771 }, { - "epoch": 0.13408497597518065, - "grad_norm": 2.9403862953186035, - "learning_rate": 1.7654397523211374e-05, - "loss": 0.804, + "epoch": 0.12474480816613868, + "grad_norm": 2.316565990447998, + "learning_rate": 1.3673387020386982e-05, + "loss": 0.7948, "step": 1772 }, { - "epoch": 0.1341606446975143, - "grad_norm": 3.072727680206299, - "learning_rate": 1.7653779118249055e-05, - "loss": 0.7256, + "epoch": 0.12481520591341078, + "grad_norm": 2.2005763053894043, + "learning_rate": 1.3672904989588425e-05, + "loss": 0.8106, "step": 1773 }, { - "epoch": 0.1342363134198479, - "grad_norm": 3.713294267654419, - "learning_rate": 1.76531601713591e-05, - "loss": 0.842, + "epoch": 0.12488560366068285, + "grad_norm": 2.4389989376068115, + "learning_rate": 1.3672422611859493e-05, + "loss": 0.7609, "step": 1774 }, { - "epoch": 0.13431198214218154, - "grad_norm": 2.764293909072876, - "learning_rate": 1.7652540682580267e-05, - "loss": 0.7639, + "epoch": 0.12495600140795494, + "grad_norm": 2.227780818939209, + "learning_rate": 1.3671939887225267e-05, + "loss": 0.7493, "step": 1775 }, { - "epoch": 0.13438765086451515, - "grad_norm": 2.774491310119629, - "learning_rate": 1.765192065195135e-05, - "loss": 0.8338, + "epoch": 0.12502639915522704, + "grad_norm": 2.284590244293213, + "learning_rate": 1.3671456815710845e-05, + "loss": 0.7383, "step": 1776 }, { - "epoch": 0.1344633195868488, - "grad_norm": 2.9168760776519775, - "learning_rate": 1.765130007951118e-05, - "loss": 0.7613, + "epoch": 0.1250967969024991, + "grad_norm": 2.3576862812042236, + "learning_rate": 1.3670973397341339e-05, + "loss": 0.8226, "step": 1777 }, { - "epoch": 0.1345389883091824, - "grad_norm": 2.8239054679870605, - "learning_rate": 1.7650678965298615e-05, - "loss": 0.908, + "epoch": 0.12516719464977122, + "grad_norm": 2.285094738006592, + "learning_rate": 1.3670489632141887e-05, + "loss": 1.0223, "step": 1778 }, { - "epoch": 0.134614657031516, - "grad_norm": 2.766092538833618, - "learning_rate": 1.7650057309352554e-05, - "loss": 0.8853, + "epoch": 0.1252375923970433, + "grad_norm": 2.1608047485351562, + "learning_rate": 1.3670005520137637e-05, + "loss": 0.7904, "step": 1779 }, { - "epoch": 0.13469032575384965, - "grad_norm": 2.6427292823791504, - "learning_rate": 1.7649435111711926e-05, - "loss": 0.7613, + "epoch": 0.12530799014431537, + "grad_norm": 3.7611827850341797, + "learning_rate": 1.3669521061353758e-05, + "loss": 0.8047, "step": 1780 }, { - "epoch": 0.13476599447618326, - "grad_norm": 3.0190093517303467, - "learning_rate": 1.7648812372415697e-05, - "loss": 0.7216, + "epoch": 0.12537838789158748, + "grad_norm": 2.2425339221954346, + "learning_rate": 1.366903625581544e-05, + "loss": 0.7349, "step": 1781 }, { - "epoch": 0.1348416631985169, - "grad_norm": 2.665872573852539, - "learning_rate": 1.7648189091502863e-05, - "loss": 0.9648, + "epoch": 0.12544878563885956, + "grad_norm": 2.3334922790527344, + "learning_rate": 1.3668551103547886e-05, + "loss": 0.8375, "step": 1782 }, { - "epoch": 0.1349173319208505, - "grad_norm": 2.2282044887542725, - "learning_rate": 1.7647565269012458e-05, - "loss": 0.8866, + "epoch": 0.12551918338613163, + "grad_norm": 2.2295351028442383, + "learning_rate": 1.3668065604576322e-05, + "loss": 0.7182, "step": 1783 }, { - "epoch": 0.13499300064318415, - "grad_norm": 2.46911883354187, - "learning_rate": 1.7646940904983545e-05, - "loss": 0.7876, + "epoch": 0.12558958113340374, + "grad_norm": 2.0622599124908447, + "learning_rate": 1.3667579758925988e-05, + "loss": 0.6562, "step": 1784 }, { - "epoch": 0.13506866936551776, - "grad_norm": 2.569694757461548, - "learning_rate": 1.7646315999455224e-05, - "loss": 0.5898, + "epoch": 0.12565997888067582, + "grad_norm": 2.1802384853363037, + "learning_rate": 1.3667093566622144e-05, + "loss": 0.6353, "step": 1785 }, { - "epoch": 0.1351443380878514, - "grad_norm": 2.6884071826934814, - "learning_rate": 1.764569055246663e-05, - "loss": 0.8368, + "epoch": 0.1257303766279479, + "grad_norm": 2.2342283725738525, + "learning_rate": 1.3666607027690067e-05, + "loss": 0.8221, "step": 1786 }, { - "epoch": 0.135220006810185, - "grad_norm": 2.3804450035095215, - "learning_rate": 1.764506456405693e-05, - "loss": 0.8086, + "epoch": 0.12580077437522, + "grad_norm": 3.9241135120391846, + "learning_rate": 1.3666120142155052e-05, + "loss": 0.6842, "step": 1787 }, { - "epoch": 0.13529567553251864, - "grad_norm": 2.71864914894104, - "learning_rate": 1.7644438034265326e-05, - "loss": 0.6779, + "epoch": 0.12587117212249208, + "grad_norm": 3.1805524826049805, + "learning_rate": 1.3665632910042416e-05, + "loss": 0.7361, "step": 1788 }, { - "epoch": 0.13537134425485225, - "grad_norm": 2.8667616844177246, - "learning_rate": 1.7643810963131053e-05, - "loss": 0.9573, + "epoch": 0.12594156986976418, + "grad_norm": 2.025632858276367, + "learning_rate": 1.3665145331377485e-05, + "loss": 0.8175, "step": 1789 }, { - "epoch": 0.1354470129771859, - "grad_norm": 2.3643083572387695, - "learning_rate": 1.764318335069338e-05, - "loss": 0.9273, + "epoch": 0.12601196761703626, + "grad_norm": 2.3465023040771484, + "learning_rate": 1.3664657406185616e-05, + "loss": 0.8467, "step": 1790 }, { - "epoch": 0.1355226816995195, - "grad_norm": 2.5259487628936768, - "learning_rate": 1.764255519699161e-05, - "loss": 0.6811, + "epoch": 0.12608236536430834, + "grad_norm": 2.5781002044677734, + "learning_rate": 1.366416913449217e-05, + "loss": 0.7824, "step": 1791 }, { - "epoch": 0.13559835042185311, - "grad_norm": 3.6173150539398193, - "learning_rate": 1.764192650206508e-05, - "loss": 0.9071, + "epoch": 0.12615276311158044, + "grad_norm": 2.270998477935791, + "learning_rate": 1.3663680516322536e-05, + "loss": 0.7427, "step": 1792 }, { - "epoch": 0.13567401914418675, - "grad_norm": 3.5628244876861572, - "learning_rate": 1.7641297265953158e-05, - "loss": 0.7724, + "epoch": 0.12622316085885252, + "grad_norm": 2.3521499633789062, + "learning_rate": 1.3663191551702117e-05, + "loss": 0.7755, "step": 1793 }, { - "epoch": 0.13574968786652036, - "grad_norm": 2.5445380210876465, - "learning_rate": 1.7640667488695258e-05, - "loss": 0.8766, + "epoch": 0.1262935586061246, + "grad_norm": 2.192549228668213, + "learning_rate": 1.3662702240656335e-05, + "loss": 0.8267, "step": 1794 }, { - "epoch": 0.135825356588854, - "grad_norm": 2.36080002784729, - "learning_rate": 1.764003717033081e-05, - "loss": 0.7998, + "epoch": 0.1263639563533967, + "grad_norm": 3.4676694869995117, + "learning_rate": 1.3662212583210626e-05, + "loss": 0.8215, "step": 1795 }, { - "epoch": 0.1359010253111876, - "grad_norm": 2.7486581802368164, - "learning_rate": 1.763940631089929e-05, - "loss": 0.7066, + "epoch": 0.12643435410066878, + "grad_norm": 1.8905069828033447, + "learning_rate": 1.3661722579390457e-05, + "loss": 0.705, "step": 1796 }, { - "epoch": 0.13597669403352125, - "grad_norm": 3.2052993774414062, - "learning_rate": 1.7638774910440197e-05, - "loss": 0.7667, + "epoch": 0.12650475184794086, + "grad_norm": 1.945239782333374, + "learning_rate": 1.3661232229221293e-05, + "loss": 0.8094, "step": 1797 }, { - "epoch": 0.13605236275585486, - "grad_norm": 2.3594470024108887, - "learning_rate": 1.7638142968993086e-05, - "loss": 0.8154, + "epoch": 0.12657514959521296, + "grad_norm": 1.8961021900177002, + "learning_rate": 1.3660741532728634e-05, + "loss": 0.7127, "step": 1798 }, { - "epoch": 0.1361280314781885, - "grad_norm": 3.531343936920166, - "learning_rate": 1.7637510486597517e-05, - "loss": 0.7966, + "epoch": 0.12664554734248504, + "grad_norm": 2.2346982955932617, + "learning_rate": 1.3660250489937992e-05, + "loss": 0.7724, "step": 1799 }, { - "epoch": 0.1362037002005221, - "grad_norm": 2.1436774730682373, - "learning_rate": 1.7636877463293108e-05, - "loss": 0.9163, + "epoch": 0.12671594508975711, + "grad_norm": 2.88657283782959, + "learning_rate": 1.3659759100874894e-05, + "loss": 0.7675, "step": 1800 }, { - "epoch": 0.13627936892285575, - "grad_norm": 2.2388010025024414, - "learning_rate": 1.76362438991195e-05, - "loss": 0.8786, + "epoch": 0.12678634283702922, + "grad_norm": 2.903137445449829, + "learning_rate": 1.365926736556489e-05, + "loss": 0.7079, "step": 1801 }, { - "epoch": 0.13635503764518936, - "grad_norm": 2.846320390701294, - "learning_rate": 1.7635609794116362e-05, - "loss": 0.8172, + "epoch": 0.1268567405843013, + "grad_norm": 2.1306254863739014, + "learning_rate": 1.3658775284033543e-05, + "loss": 0.8583, "step": 1802 }, { - "epoch": 0.136430706367523, - "grad_norm": 2.405848264694214, - "learning_rate": 1.7634975148323405e-05, - "loss": 0.9217, + "epoch": 0.1269271383315734, + "grad_norm": 2.0538322925567627, + "learning_rate": 1.3658282856306439e-05, + "loss": 0.7504, "step": 1803 }, { - "epoch": 0.1365063750898566, - "grad_norm": 2.645883321762085, - "learning_rate": 1.763433996178038e-05, - "loss": 0.8546, + "epoch": 0.12699753607884548, + "grad_norm": 2.036475896835327, + "learning_rate": 1.3657790082409178e-05, + "loss": 0.7124, "step": 1804 }, { - "epoch": 0.13658204381219022, - "grad_norm": 2.8361809253692627, - "learning_rate": 1.763370423452706e-05, - "loss": 0.8501, + "epoch": 0.12706793382611756, + "grad_norm": 2.062352418899536, + "learning_rate": 1.3657296962367384e-05, + "loss": 0.8698, "step": 1805 }, { - "epoch": 0.13665771253452386, - "grad_norm": 2.7826128005981445, - "learning_rate": 1.7633067966603254e-05, - "loss": 0.9248, + "epoch": 0.12713833157338966, + "grad_norm": 2.619528293609619, + "learning_rate": 1.3656803496206689e-05, + "loss": 0.6982, "step": 1806 }, { - "epoch": 0.13673338125685747, - "grad_norm": 2.6363344192504883, - "learning_rate": 1.7632431158048808e-05, - "loss": 0.8338, + "epoch": 0.12720872932066174, + "grad_norm": 2.4403038024902344, + "learning_rate": 1.3656309683952753e-05, + "loss": 0.7763, "step": 1807 }, { - "epoch": 0.1368090499791911, - "grad_norm": 3.479905366897583, - "learning_rate": 1.7631793808903604e-05, - "loss": 0.8466, + "epoch": 0.12727912706793382, + "grad_norm": 2.480867624282837, + "learning_rate": 1.3655815525631247e-05, + "loss": 0.7699, "step": 1808 }, { - "epoch": 0.13688471870152472, - "grad_norm": 3.4462170600891113, - "learning_rate": 1.7631155919207556e-05, - "loss": 0.9066, + "epoch": 0.12734952481520592, + "grad_norm": 2.3262274265289307, + "learning_rate": 1.3655321021267866e-05, + "loss": 0.6855, "step": 1809 }, { - "epoch": 0.13696038742385835, - "grad_norm": 3.2740397453308105, - "learning_rate": 1.76305174890006e-05, - "loss": 0.7991, + "epoch": 0.127419922562478, + "grad_norm": 2.6729824542999268, + "learning_rate": 1.3654826170888316e-05, + "loss": 0.8057, "step": 1810 }, { - "epoch": 0.13703605614619196, - "grad_norm": 2.6029398441314697, - "learning_rate": 1.7629878518322732e-05, - "loss": 0.7337, + "epoch": 0.12749032030975008, + "grad_norm": 2.2517542839050293, + "learning_rate": 1.3654330974518325e-05, + "loss": 0.7157, "step": 1811 }, { - "epoch": 0.1371117248685256, - "grad_norm": 2.4479711055755615, - "learning_rate": 1.7629239007213957e-05, - "loss": 0.7885, + "epoch": 0.12756071805702218, + "grad_norm": 2.434354543685913, + "learning_rate": 1.3653835432183644e-05, + "loss": 0.7105, "step": 1812 }, { - "epoch": 0.1371873935908592, - "grad_norm": 2.370789051055908, - "learning_rate": 1.7628598955714322e-05, - "loss": 0.8362, + "epoch": 0.12763111580429426, + "grad_norm": 2.0128586292266846, + "learning_rate": 1.3653339543910029e-05, + "loss": 0.6885, "step": 1813 }, { - "epoch": 0.13726306231319285, - "grad_norm": 2.8538105487823486, - "learning_rate": 1.7627958363863914e-05, - "loss": 0.6775, + "epoch": 0.12770151355156634, + "grad_norm": 2.305865526199341, + "learning_rate": 1.3652843309723267e-05, + "loss": 0.616, "step": 1814 }, { - "epoch": 0.13733873103552646, - "grad_norm": 2.297853469848633, - "learning_rate": 1.7627317231702847e-05, - "loss": 0.597, + "epoch": 0.12777191129883844, + "grad_norm": 2.304600477218628, + "learning_rate": 1.3652346729649157e-05, + "loss": 0.7827, "step": 1815 }, { - "epoch": 0.1374143997578601, - "grad_norm": 3.4059054851531982, - "learning_rate": 1.762667555927127e-05, - "loss": 0.6599, + "epoch": 0.12784230904611052, + "grad_norm": 2.375638961791992, + "learning_rate": 1.3651849803713515e-05, + "loss": 0.8232, "step": 1816 }, { - "epoch": 0.1374900684801937, - "grad_norm": 4.349469184875488, - "learning_rate": 1.762603334660937e-05, - "loss": 0.715, + "epoch": 0.12791270679338262, + "grad_norm": 2.7710015773773193, + "learning_rate": 1.3651352531942176e-05, + "loss": 0.7432, "step": 1817 }, { - "epoch": 0.13756573720252732, - "grad_norm": 2.5945630073547363, - "learning_rate": 1.762539059375736e-05, - "loss": 0.7752, + "epoch": 0.1279831045406547, + "grad_norm": 2.4607651233673096, + "learning_rate": 1.3650854914360997e-05, + "loss": 0.7505, "step": 1818 }, { - "epoch": 0.13764140592486096, - "grad_norm": 2.1834769248962402, - "learning_rate": 1.7624747300755493e-05, - "loss": 0.7783, + "epoch": 0.12805350228792678, + "grad_norm": 2.492663860321045, + "learning_rate": 1.3650356950995847e-05, + "loss": 0.8469, "step": 1819 }, { - "epoch": 0.13771707464719457, - "grad_norm": 2.6315038204193115, - "learning_rate": 1.7624103467644055e-05, - "loss": 0.7731, + "epoch": 0.12812390003519888, + "grad_norm": 2.140880584716797, + "learning_rate": 1.3649858641872617e-05, + "loss": 0.8327, "step": 1820 }, { - "epoch": 0.1377927433695282, - "grad_norm": 2.684382915496826, - "learning_rate": 1.7623459094463363e-05, - "loss": 0.6977, + "epoch": 0.12819429778247096, + "grad_norm": 2.491886854171753, + "learning_rate": 1.3649359987017213e-05, + "loss": 0.7072, "step": 1821 }, { - "epoch": 0.13786841209186182, - "grad_norm": 2.139249086380005, - "learning_rate": 1.762281418125377e-05, - "loss": 0.6902, + "epoch": 0.12826469552974304, + "grad_norm": 2.2673513889312744, + "learning_rate": 1.3648860986455561e-05, + "loss": 0.7884, "step": 1822 }, { - "epoch": 0.13794408081419546, - "grad_norm": 5.037784099578857, - "learning_rate": 1.7622168728055665e-05, - "loss": 0.823, + "epoch": 0.12833509327701514, + "grad_norm": 2.0024282932281494, + "learning_rate": 1.3648361640213607e-05, + "loss": 0.748, "step": 1823 }, { - "epoch": 0.13801974953652907, - "grad_norm": 3.9115712642669678, - "learning_rate": 1.762152273490947e-05, - "loss": 0.7472, + "epoch": 0.12840549102428722, + "grad_norm": 2.7802116870880127, + "learning_rate": 1.3647861948317307e-05, + "loss": 0.8078, "step": 1824 }, { - "epoch": 0.1380954182588627, - "grad_norm": 2.827516555786133, - "learning_rate": 1.7620876201855633e-05, - "loss": 0.8842, + "epoch": 0.1284758887715593, + "grad_norm": 2.1268422603607178, + "learning_rate": 1.3647361910792647e-05, + "loss": 0.8124, "step": 1825 }, { - "epoch": 0.13817108698119632, - "grad_norm": 2.3763670921325684, - "learning_rate": 1.7620229128934644e-05, - "loss": 0.7502, + "epoch": 0.1285462865188314, + "grad_norm": 2.3044674396514893, + "learning_rate": 1.3646861527665617e-05, + "loss": 0.761, "step": 1826 }, { - "epoch": 0.13824675570352996, - "grad_norm": 2.8924078941345215, - "learning_rate": 1.7619581516187026e-05, - "loss": 0.8482, + "epoch": 0.12861668426610348, + "grad_norm": 2.2514801025390625, + "learning_rate": 1.364636079896224e-05, + "loss": 0.8815, "step": 1827 }, { - "epoch": 0.13832242442586357, - "grad_norm": 2.8543429374694824, - "learning_rate": 1.7618933363653333e-05, - "loss": 0.7102, + "epoch": 0.12868708201337556, + "grad_norm": 2.3364713191986084, + "learning_rate": 1.3645859724708542e-05, + "loss": 0.889, "step": 1828 }, { - "epoch": 0.1383980931481972, - "grad_norm": 2.7342612743377686, - "learning_rate": 1.7618284671374157e-05, - "loss": 0.7424, + "epoch": 0.12875747976064766, + "grad_norm": 2.194827079772949, + "learning_rate": 1.364535830493058e-05, + "loss": 0.7427, "step": 1829 }, { - "epoch": 0.13847376187053081, - "grad_norm": 3.0927932262420654, - "learning_rate": 1.7617635439390123e-05, - "loss": 0.8179, + "epoch": 0.12882787750791974, + "grad_norm": 2.7342145442962646, + "learning_rate": 1.3644856539654423e-05, + "loss": 0.861, "step": 1830 }, { - "epoch": 0.13854943059286445, - "grad_norm": 2.353637933731079, - "learning_rate": 1.761698566774188e-05, - "loss": 0.7647, + "epoch": 0.12889827525519185, + "grad_norm": 2.5517847537994385, + "learning_rate": 1.3644354428906155e-05, + "loss": 0.7664, "step": 1831 }, { - "epoch": 0.13862509931519806, - "grad_norm": 2.7687482833862305, - "learning_rate": 1.7616335356470128e-05, - "loss": 0.9106, + "epoch": 0.12896867300246392, + "grad_norm": 2.11301589012146, + "learning_rate": 1.3643851972711884e-05, + "loss": 0.7276, "step": 1832 }, { - "epoch": 0.13870076803753167, - "grad_norm": 4.123755931854248, - "learning_rate": 1.7615684505615587e-05, - "loss": 0.9028, + "epoch": 0.129039070749736, + "grad_norm": 2.1110448837280273, + "learning_rate": 1.364334917109773e-05, + "loss": 0.6611, "step": 1833 }, { - "epoch": 0.1387764367598653, - "grad_norm": 2.1787283420562744, - "learning_rate": 1.7615033115219012e-05, - "loss": 0.6567, + "epoch": 0.1291094684970081, + "grad_norm": 1.6364364624023438, + "learning_rate": 1.3642846024089837e-05, + "loss": 0.8619, "step": 1834 }, { - "epoch": 0.13885210548219892, - "grad_norm": 2.4294352531433105, - "learning_rate": 1.76143811853212e-05, - "loss": 0.8595, + "epoch": 0.12917986624428018, + "grad_norm": 2.0771231651306152, + "learning_rate": 1.3642342531714361e-05, + "loss": 0.7425, "step": 1835 }, { - "epoch": 0.13892777420453256, - "grad_norm": 2.3959708213806152, - "learning_rate": 1.7613728715962978e-05, - "loss": 0.7286, + "epoch": 0.12925026399155226, + "grad_norm": 2.2103233337402344, + "learning_rate": 1.3641838693997485e-05, + "loss": 0.7834, "step": 1836 }, { - "epoch": 0.13900344292686617, - "grad_norm": 1.9494025707244873, - "learning_rate": 1.7613075707185203e-05, - "loss": 0.721, + "epoch": 0.12932066173882437, + "grad_norm": 2.305102825164795, + "learning_rate": 1.36413345109654e-05, + "loss": 0.8091, "step": 1837 }, { - "epoch": 0.1390791116491998, - "grad_norm": 1.5588613748550415, - "learning_rate": 1.7612422159028767e-05, - "loss": 0.9273, + "epoch": 0.12939105948609644, + "grad_norm": 2.3371663093566895, + "learning_rate": 1.3640829982644316e-05, + "loss": 0.657, "step": 1838 }, { - "epoch": 0.13915478037153342, - "grad_norm": 3.5548200607299805, - "learning_rate": 1.7611768071534604e-05, - "loss": 0.7202, + "epoch": 0.12946145723336852, + "grad_norm": 2.1166188716888428, + "learning_rate": 1.3640325109060466e-05, + "loss": 0.7913, "step": 1839 }, { - "epoch": 0.13923044909386706, - "grad_norm": 2.5746283531188965, - "learning_rate": 1.7611113444743665e-05, - "loss": 0.708, + "epoch": 0.12953185498064063, + "grad_norm": 2.5462310314178467, + "learning_rate": 1.3639819890240103e-05, + "loss": 0.7464, "step": 1840 }, { - "epoch": 0.13930611781620067, - "grad_norm": 2.695033073425293, - "learning_rate": 1.7610458278696955e-05, - "loss": 0.8412, + "epoch": 0.1296022527279127, + "grad_norm": 2.9634149074554443, + "learning_rate": 1.363931432620949e-05, + "loss": 0.7995, "step": 1841 }, { - "epoch": 0.1393817865385343, - "grad_norm": 2.7120509147644043, - "learning_rate": 1.7609802573435495e-05, - "loss": 0.8491, + "epoch": 0.12967265047518478, + "grad_norm": 3.758740186691284, + "learning_rate": 1.363880841699491e-05, + "loss": 0.7269, "step": 1842 }, { - "epoch": 0.13945745526086792, - "grad_norm": 2.5624072551727295, - "learning_rate": 1.7609146329000353e-05, - "loss": 0.8119, + "epoch": 0.12974304822245689, + "grad_norm": 2.2347915172576904, + "learning_rate": 1.363830216262267e-05, + "loss": 0.6948, "step": 1843 }, { - "epoch": 0.13953312398320156, - "grad_norm": 2.7809038162231445, - "learning_rate": 1.760848954543262e-05, - "loss": 0.7659, + "epoch": 0.12981344596972896, + "grad_norm": 2.3621180057525635, + "learning_rate": 1.3637795563119089e-05, + "loss": 0.8325, "step": 1844 }, { - "epoch": 0.13960879270553517, - "grad_norm": 2.43100905418396, - "learning_rate": 1.760783222277343e-05, - "loss": 0.6677, + "epoch": 0.12988384371700107, + "grad_norm": 2.2070670127868652, + "learning_rate": 1.3637288618510503e-05, + "loss": 0.6647, "step": 1845 }, { - "epoch": 0.13968446142786878, - "grad_norm": 2.619065999984741, - "learning_rate": 1.7607174361063944e-05, - "loss": 0.9192, + "epoch": 0.12995424146427315, + "grad_norm": 2.2238829135894775, + "learning_rate": 1.3636781328823273e-05, + "loss": 0.6848, "step": 1846 }, { - "epoch": 0.13976013015020242, - "grad_norm": 2.997462511062622, - "learning_rate": 1.7606515960345362e-05, - "loss": 0.7037, + "epoch": 0.13002463921154522, + "grad_norm": 3.487086772918701, + "learning_rate": 1.363627369408377e-05, + "loss": 0.9438, "step": 1847 }, { - "epoch": 0.13983579887253603, - "grad_norm": 2.8004891872406006, - "learning_rate": 1.7605857020658913e-05, - "loss": 0.6762, + "epoch": 0.13009503695881733, + "grad_norm": 2.861480236053467, + "learning_rate": 1.3635765714318387e-05, + "loss": 0.8817, "step": 1848 }, { - "epoch": 0.13991146759486967, - "grad_norm": 2.8649933338165283, - "learning_rate": 1.760519754204586e-05, - "loss": 0.6628, + "epoch": 0.1301654347060894, + "grad_norm": 2.2094192504882812, + "learning_rate": 1.3635257389553536e-05, + "loss": 0.7206, "step": 1849 }, { - "epoch": 0.13998713631720328, - "grad_norm": 2.591527223587036, - "learning_rate": 1.760453752454751e-05, - "loss": 0.8484, + "epoch": 0.13023583245336148, + "grad_norm": 2.120537042617798, + "learning_rate": 1.3634748719815644e-05, + "loss": 0.8532, "step": 1850 }, { - "epoch": 0.14006280503953691, - "grad_norm": 2.9254584312438965, - "learning_rate": 1.7603876968205185e-05, - "loss": 0.9029, + "epoch": 0.1303062302006336, + "grad_norm": 2.262815237045288, + "learning_rate": 1.3634239705131155e-05, + "loss": 0.8488, "step": 1851 }, { - "epoch": 0.14013847376187052, - "grad_norm": 2.7631466388702393, - "learning_rate": 1.7603215873060256e-05, - "loss": 0.8673, + "epoch": 0.13037662794790567, + "grad_norm": 2.104053258895874, + "learning_rate": 1.3633730345526538e-05, + "loss": 0.8025, "step": 1852 }, { - "epoch": 0.14021414248420416, - "grad_norm": 2.2092506885528564, - "learning_rate": 1.7602554239154126e-05, - "loss": 0.7803, + "epoch": 0.13044702569517774, + "grad_norm": 2.100344181060791, + "learning_rate": 1.3633220641028271e-05, + "loss": 0.6646, "step": 1853 }, { - "epoch": 0.14028981120653777, - "grad_norm": 5.1182026863098145, - "learning_rate": 1.7601892066528224e-05, - "loss": 0.7412, + "epoch": 0.13051742344244985, + "grad_norm": 3.003621816635132, + "learning_rate": 1.3632710591662857e-05, + "loss": 0.8229, "step": 1854 }, { - "epoch": 0.1403654799288714, - "grad_norm": 2.7302863597869873, - "learning_rate": 1.7601229355224018e-05, - "loss": 0.7575, + "epoch": 0.13058782118972193, + "grad_norm": 2.574700117111206, + "learning_rate": 1.3632200197456812e-05, + "loss": 0.7211, "step": 1855 }, { - "epoch": 0.14044114865120502, - "grad_norm": 2.1949663162231445, - "learning_rate": 1.7600566105283013e-05, - "loss": 0.5413, + "epoch": 0.130658218936994, + "grad_norm": 2.1832973957061768, + "learning_rate": 1.363168945843667e-05, + "loss": 0.7285, "step": 1856 }, { - "epoch": 0.14051681737353866, - "grad_norm": 2.7179486751556396, - "learning_rate": 1.7599902316746737e-05, - "loss": 0.7399, + "epoch": 0.1307286166842661, + "grad_norm": 2.831162214279175, + "learning_rate": 1.363117837462899e-05, + "loss": 0.774, "step": 1857 }, { - "epoch": 0.14059248609587227, - "grad_norm": 2.4774746894836426, - "learning_rate": 1.7599237989656765e-05, - "loss": 0.7259, + "epoch": 0.13079901443153819, + "grad_norm": 2.2083566188812256, + "learning_rate": 1.363066694606034e-05, + "loss": 0.82, "step": 1858 }, { - "epoch": 0.14066815481820588, - "grad_norm": 2.5634522438049316, - "learning_rate": 1.7598573124054694e-05, - "loss": 0.7805, + "epoch": 0.1308694121788103, + "grad_norm": 2.231173038482666, + "learning_rate": 1.363015517275731e-05, + "loss": 0.7601, "step": 1859 }, { - "epoch": 0.14074382354053952, - "grad_norm": 2.349278450012207, - "learning_rate": 1.7597907719982165e-05, - "loss": 0.8274, + "epoch": 0.13093980992608237, + "grad_norm": 2.01057767868042, + "learning_rate": 1.3629643054746508e-05, + "loss": 0.7534, "step": 1860 }, { - "epoch": 0.14081949226287313, - "grad_norm": 2.56119441986084, - "learning_rate": 1.7597241777480846e-05, - "loss": 1.0099, + "epoch": 0.13101020767335445, + "grad_norm": 2.2020952701568604, + "learning_rate": 1.3629130592054558e-05, + "loss": 0.8813, "step": 1861 }, { - "epoch": 0.14089516098520677, - "grad_norm": 2.2470288276672363, - "learning_rate": 1.759657529659244e-05, - "loss": 0.7354, + "epoch": 0.13108060542062655, + "grad_norm": 2.7038967609405518, + "learning_rate": 1.3628617784708106e-05, + "loss": 0.7683, "step": 1862 }, { - "epoch": 0.14097082970754038, - "grad_norm": 2.7131481170654297, - "learning_rate": 1.7595908277358683e-05, - "loss": 0.8058, + "epoch": 0.13115100316789863, + "grad_norm": 2.4637300968170166, + "learning_rate": 1.3628104632733813e-05, + "loss": 0.8272, "step": 1863 }, { - "epoch": 0.14104649842987402, - "grad_norm": 2.9659440517425537, - "learning_rate": 1.7595240719821348e-05, - "loss": 0.6039, + "epoch": 0.1312214009151707, + "grad_norm": 3.190948486328125, + "learning_rate": 1.3627591136158356e-05, + "loss": 0.8552, "step": 1864 }, { - "epoch": 0.14112216715220763, - "grad_norm": 2.83231782913208, - "learning_rate": 1.7594572624022236e-05, - "loss": 1.0244, + "epoch": 0.1312917986624428, + "grad_norm": 2.393796443939209, + "learning_rate": 1.3627077295008435e-05, + "loss": 0.7554, "step": 1865 }, { - "epoch": 0.14119783587454127, - "grad_norm": 2.453878402709961, - "learning_rate": 1.7593903990003194e-05, - "loss": 0.9862, + "epoch": 0.1313621964097149, + "grad_norm": 3.1703975200653076, + "learning_rate": 1.362656310931076e-05, + "loss": 0.7323, "step": 1866 }, { - "epoch": 0.14127350459687488, - "grad_norm": 2.553098678588867, - "learning_rate": 1.7593234817806085e-05, - "loss": 0.8229, + "epoch": 0.13143259415698696, + "grad_norm": 2.4044559001922607, + "learning_rate": 1.3626048579092071e-05, + "loss": 0.8052, "step": 1867 }, { - "epoch": 0.14134917331920852, - "grad_norm": 2.8726489543914795, - "learning_rate": 1.7592565107472817e-05, - "loss": 0.8612, + "epoch": 0.13150299190425907, + "grad_norm": 2.2382147312164307, + "learning_rate": 1.3625533704379115e-05, + "loss": 0.8178, "step": 1868 }, { - "epoch": 0.14142484204154213, - "grad_norm": 2.7454302310943604, - "learning_rate": 1.759189485904533e-05, - "loss": 0.912, + "epoch": 0.13157338965153115, + "grad_norm": 3.024627447128296, + "learning_rate": 1.3625018485198661e-05, + "loss": 0.7555, "step": 1869 }, { - "epoch": 0.14150051076387576, - "grad_norm": 2.6439144611358643, - "learning_rate": 1.7591224072565598e-05, - "loss": 0.6824, + "epoch": 0.13164378739880322, + "grad_norm": 2.018017292022705, + "learning_rate": 1.3624502921577497e-05, + "loss": 0.7941, "step": 1870 }, { - "epoch": 0.14157617948620937, - "grad_norm": 3.527799367904663, - "learning_rate": 1.7590552748075626e-05, - "loss": 0.9554, + "epoch": 0.13171418514607533, + "grad_norm": 2.8861565589904785, + "learning_rate": 1.3623987013542425e-05, + "loss": 0.7562, "step": 1871 }, { - "epoch": 0.14165184820854299, - "grad_norm": 3.4755446910858154, - "learning_rate": 1.7589880885617457e-05, - "loss": 0.7975, + "epoch": 0.1317845828933474, + "grad_norm": 2.131601333618164, + "learning_rate": 1.362347076112027e-05, + "loss": 0.7117, "step": 1872 }, { - "epoch": 0.14172751693087662, - "grad_norm": 2.0089948177337646, - "learning_rate": 1.7589208485233164e-05, - "loss": 0.7643, + "epoch": 0.1318549806406195, + "grad_norm": 2.5671279430389404, + "learning_rate": 1.362295416433787e-05, + "loss": 0.7809, "step": 1873 }, { - "epoch": 0.14180318565321023, - "grad_norm": 2.9593629837036133, - "learning_rate": 1.7588535546964853e-05, - "loss": 0.8146, + "epoch": 0.1319253783878916, + "grad_norm": 2.495081663131714, + "learning_rate": 1.3622437223222087e-05, + "loss": 0.7405, "step": 1874 }, { - "epoch": 0.14187885437554387, - "grad_norm": 2.3689467906951904, - "learning_rate": 1.758786207085467e-05, - "loss": 0.9881, + "epoch": 0.13199577613516367, + "grad_norm": 2.019268751144409, + "learning_rate": 1.3621919937799795e-05, + "loss": 0.8142, "step": 1875 }, { - "epoch": 0.14195452309787748, - "grad_norm": 2.658514976501465, - "learning_rate": 1.758718805694479e-05, - "loss": 0.5528, + "epoch": 0.13206617388243577, + "grad_norm": 2.019686460494995, + "learning_rate": 1.3621402308097887e-05, + "loss": 0.8295, "step": 1876 }, { - "epoch": 0.14203019182021112, - "grad_norm": 2.965433359146118, - "learning_rate": 1.7586513505277414e-05, - "loss": 0.6984, + "epoch": 0.13213657162970785, + "grad_norm": 2.1739399433135986, + "learning_rate": 1.3620884334143278e-05, + "loss": 0.8134, "step": 1877 }, { - "epoch": 0.14210586054254473, - "grad_norm": 3.3098106384277344, - "learning_rate": 1.758583841589479e-05, - "loss": 0.9566, + "epoch": 0.13220696937697993, + "grad_norm": 2.3830435276031494, + "learning_rate": 1.3620366015962896e-05, + "loss": 0.7498, "step": 1878 }, { - "epoch": 0.14218152926487837, - "grad_norm": 2.4268958568573, - "learning_rate": 1.7585162788839197e-05, - "loss": 0.8622, + "epoch": 0.13227736712425203, + "grad_norm": 2.5044844150543213, + "learning_rate": 1.3619847353583687e-05, + "loss": 0.7337, "step": 1879 }, { - "epoch": 0.14225719798721198, - "grad_norm": 3.331698417663574, - "learning_rate": 1.7584486624152943e-05, - "loss": 0.8862, + "epoch": 0.1323477648715241, + "grad_norm": 2.208521604537964, + "learning_rate": 1.3619328347032619e-05, + "loss": 0.7802, "step": 1880 }, { - "epoch": 0.14233286670954562, - "grad_norm": 2.746612787246704, - "learning_rate": 1.758380992187837e-05, - "loss": 0.8097, + "epoch": 0.1324181626187962, + "grad_norm": 2.6792333126068115, + "learning_rate": 1.3618808996336676e-05, + "loss": 0.8057, "step": 1881 }, { - "epoch": 0.14240853543187923, - "grad_norm": 2.4068593978881836, - "learning_rate": 1.7583132682057857e-05, - "loss": 0.8202, + "epoch": 0.1324885603660683, + "grad_norm": 1.9486831426620483, + "learning_rate": 1.361828930152286e-05, + "loss": 0.7195, "step": 1882 }, { - "epoch": 0.14248420415421287, - "grad_norm": 2.4909160137176514, - "learning_rate": 1.7582454904733815e-05, - "loss": 0.7272, + "epoch": 0.13255895811334037, + "grad_norm": 2.2408807277679443, + "learning_rate": 1.3617769262618188e-05, + "loss": 0.6922, "step": 1883 }, { - "epoch": 0.14255987287654648, - "grad_norm": 2.3360512256622314, - "learning_rate": 1.7581776589948686e-05, - "loss": 0.7754, + "epoch": 0.13262935586061245, + "grad_norm": 2.4251515865325928, + "learning_rate": 1.3617248879649701e-05, + "loss": 0.8559, "step": 1884 }, { - "epoch": 0.14263554159888012, - "grad_norm": 2.4334325790405273, - "learning_rate": 1.758109773774495e-05, - "loss": 0.7806, + "epoch": 0.13269975360788455, + "grad_norm": 2.0979878902435303, + "learning_rate": 1.361672815264445e-05, + "loss": 0.6283, "step": 1885 }, { - "epoch": 0.14271121032121373, - "grad_norm": 3.528743028640747, - "learning_rate": 1.758041834816512e-05, - "loss": 0.7452, + "epoch": 0.13277015135515663, + "grad_norm": 2.894254207611084, + "learning_rate": 1.361620708162951e-05, + "loss": 0.8755, "step": 1886 }, { - "epoch": 0.14278687904354734, - "grad_norm": 2.863650321960449, - "learning_rate": 1.757973842125174e-05, - "loss": 0.908, + "epoch": 0.13284054910242873, + "grad_norm": 2.005594491958618, + "learning_rate": 1.3615685666631972e-05, + "loss": 0.7578, "step": 1887 }, { - "epoch": 0.14286254776588098, - "grad_norm": 2.7504138946533203, - "learning_rate": 1.757905795704739e-05, - "loss": 0.8345, + "epoch": 0.1329109468497008, + "grad_norm": 2.0733625888824463, + "learning_rate": 1.3615163907678944e-05, + "loss": 0.7767, "step": 1888 }, { - "epoch": 0.1429382164882146, - "grad_norm": 2.4951789379119873, - "learning_rate": 1.7578376955594682e-05, - "loss": 0.7721, + "epoch": 0.1329813445969729, + "grad_norm": 2.7533702850341797, + "learning_rate": 1.3614641804797554e-05, + "loss": 0.8469, "step": 1889 }, { - "epoch": 0.14301388521054823, - "grad_norm": 2.6636242866516113, - "learning_rate": 1.7577695416936263e-05, - "loss": 0.9099, + "epoch": 0.133051742344245, + "grad_norm": 2.415290355682373, + "learning_rate": 1.3614119358014946e-05, + "loss": 0.761, "step": 1890 }, { - "epoch": 0.14308955393288184, - "grad_norm": 2.8234310150146484, - "learning_rate": 1.7577013341114815e-05, - "loss": 1.0285, + "epoch": 0.13312214009151707, + "grad_norm": 2.079442262649536, + "learning_rate": 1.3613596567358283e-05, + "loss": 0.7533, "step": 1891 }, { - "epoch": 0.14316522265521547, - "grad_norm": 3.015465259552002, - "learning_rate": 1.7576330728173047e-05, - "loss": 0.6027, + "epoch": 0.13319253783878915, + "grad_norm": 2.0873489379882812, + "learning_rate": 1.3613073432854744e-05, + "loss": 0.8722, "step": 1892 }, { - "epoch": 0.14324089137754908, - "grad_norm": 2.617048740386963, - "learning_rate": 1.7575647578153716e-05, - "loss": 0.9196, + "epoch": 0.13326293558606125, + "grad_norm": 2.010880708694458, + "learning_rate": 1.3612549954531528e-05, + "loss": 0.7316, "step": 1893 }, { - "epoch": 0.14331656009988272, - "grad_norm": 3.0418808460235596, - "learning_rate": 1.757496389109959e-05, - "loss": 0.68, + "epoch": 0.13333333333333333, + "grad_norm": 2.19970965385437, + "learning_rate": 1.3612026132415851e-05, + "loss": 0.7758, "step": 1894 }, { - "epoch": 0.14339222882221633, - "grad_norm": 2.8369641304016113, - "learning_rate": 1.7574279667053494e-05, - "loss": 0.8018, + "epoch": 0.1334037310806054, + "grad_norm": 2.8357908725738525, + "learning_rate": 1.3611501966534948e-05, + "loss": 0.7816, "step": 1895 }, { - "epoch": 0.14346789754454997, - "grad_norm": 2.903010129928589, - "learning_rate": 1.7573594906058273e-05, - "loss": 0.7899, + "epoch": 0.13347412882787751, + "grad_norm": 2.0144994258880615, + "learning_rate": 1.361097745691607e-05, + "loss": 0.721, "step": 1896 }, { - "epoch": 0.14354356626688358, - "grad_norm": 3.224677324295044, - "learning_rate": 1.7572909608156805e-05, - "loss": 0.8495, + "epoch": 0.1335445265751496, + "grad_norm": 2.2422001361846924, + "learning_rate": 1.3610452603586486e-05, + "loss": 0.8258, "step": 1897 }, { - "epoch": 0.14361923498921722, - "grad_norm": 3.112607955932617, - "learning_rate": 1.7572223773392012e-05, - "loss": 0.7727, + "epoch": 0.13361492432242167, + "grad_norm": 1.9337773323059082, + "learning_rate": 1.3609927406573484e-05, + "loss": 0.9022, "step": 1898 }, { - "epoch": 0.14369490371155083, - "grad_norm": 2.8533096313476562, - "learning_rate": 1.757153740180684e-05, - "loss": 0.7837, + "epoch": 0.13368532206969377, + "grad_norm": 2.263606548309326, + "learning_rate": 1.3609401865904372e-05, + "loss": 0.785, "step": 1899 }, { - "epoch": 0.14377057243388444, - "grad_norm": 7.023125648498535, - "learning_rate": 1.7570850493444273e-05, - "loss": 0.7611, + "epoch": 0.13375571981696585, + "grad_norm": 2.3631277084350586, + "learning_rate": 1.360887598160647e-05, + "loss": 0.6474, "step": 1900 }, { - "epoch": 0.14384624115621808, - "grad_norm": 3.051293134689331, - "learning_rate": 1.7570163048347325e-05, - "loss": 0.8792, + "epoch": 0.13382611756423796, + "grad_norm": 2.476577043533325, + "learning_rate": 1.360834975370712e-05, + "loss": 0.7258, "step": 1901 }, { - "epoch": 0.1439219098785517, - "grad_norm": 2.489708185195923, - "learning_rate": 1.7569475066559046e-05, - "loss": 0.7576, + "epoch": 0.13389651531151003, + "grad_norm": 2.17185378074646, + "learning_rate": 1.3607823182233682e-05, + "loss": 0.7332, "step": 1902 }, { - "epoch": 0.14399757860088533, - "grad_norm": 3.0184719562530518, - "learning_rate": 1.7568786548122527e-05, - "loss": 0.704, + "epoch": 0.1339669130587821, + "grad_norm": 2.0851991176605225, + "learning_rate": 1.3607296267213531e-05, + "loss": 0.746, "step": 1903 }, { - "epoch": 0.14407324732321894, - "grad_norm": 2.3036086559295654, - "learning_rate": 1.7568097493080874e-05, - "loss": 0.622, + "epoch": 0.13403731080605422, + "grad_norm": 2.326796770095825, + "learning_rate": 1.3606769008674066e-05, + "loss": 0.7286, "step": 1904 }, { - "epoch": 0.14414891604555258, - "grad_norm": 2.6877403259277344, - "learning_rate": 1.7567407901477243e-05, - "loss": 0.6003, + "epoch": 0.1341077085533263, + "grad_norm": 2.218841314315796, + "learning_rate": 1.3606241406642692e-05, + "loss": 0.8739, "step": 1905 }, { - "epoch": 0.1442245847678862, - "grad_norm": 2.290517568588257, - "learning_rate": 1.7566717773354822e-05, - "loss": 0.7039, + "epoch": 0.13417810630059837, + "grad_norm": 2.5065817832946777, + "learning_rate": 1.3605713461146849e-05, + "loss": 0.8621, "step": 1906 }, { - "epoch": 0.14430025349021983, - "grad_norm": 3.5468292236328125, - "learning_rate": 1.7566027108756826e-05, - "loss": 0.9343, + "epoch": 0.13424850404787048, + "grad_norm": 2.5060646533966064, + "learning_rate": 1.3605185172213977e-05, + "loss": 0.7049, "step": 1907 }, { - "epoch": 0.14437592221255344, - "grad_norm": 2.674797296524048, - "learning_rate": 1.7565335907726505e-05, - "loss": 0.7771, + "epoch": 0.13431890179514255, + "grad_norm": 2.225355863571167, + "learning_rate": 1.360465653987155e-05, + "loss": 0.7553, "step": 1908 }, { - "epoch": 0.14445159093488708, - "grad_norm": 2.3369529247283936, - "learning_rate": 1.7564644170307146e-05, - "loss": 0.7907, + "epoch": 0.13438929954241463, + "grad_norm": 2.3275983333587646, + "learning_rate": 1.3604127564147044e-05, + "loss": 0.7382, "step": 1909 }, { - "epoch": 0.14452725965722069, - "grad_norm": 2.8003242015838623, - "learning_rate": 1.756395189654207e-05, - "loss": 0.9282, + "epoch": 0.13445969728968674, + "grad_norm": 2.308629035949707, + "learning_rate": 1.3603598245067968e-05, + "loss": 0.7007, "step": 1910 }, { - "epoch": 0.14460292837955432, - "grad_norm": 3.076770305633545, - "learning_rate": 1.7563259086474627e-05, - "loss": 0.8343, + "epoch": 0.1345300950369588, + "grad_norm": 2.909109592437744, + "learning_rate": 1.3603068582661838e-05, + "loss": 0.7541, "step": 1911 }, { - "epoch": 0.14467859710188793, - "grad_norm": 2.4851486682891846, - "learning_rate": 1.7562565740148202e-05, - "loss": 0.813, + "epoch": 0.13460049278423092, + "grad_norm": 2.424083948135376, + "learning_rate": 1.360253857695619e-05, + "loss": 0.8619, "step": 1912 }, { - "epoch": 0.14475426582422155, - "grad_norm": 2.4978880882263184, - "learning_rate": 1.756187185760621e-05, - "loss": 0.7405, + "epoch": 0.134670890531503, + "grad_norm": 2.4796924591064453, + "learning_rate": 1.3602008227978584e-05, + "loss": 0.758, "step": 1913 }, { - "epoch": 0.14482993454655518, - "grad_norm": 2.255244255065918, - "learning_rate": 1.7561177438892118e-05, - "loss": 0.6685, + "epoch": 0.13474128827877507, + "grad_norm": 2.1611218452453613, + "learning_rate": 1.3601477535756591e-05, + "loss": 0.7379, "step": 1914 }, { - "epoch": 0.1449056032688888, - "grad_norm": 2.5343151092529297, - "learning_rate": 1.7560482484049402e-05, - "loss": 0.8281, + "epoch": 0.13481168602604718, + "grad_norm": 2.286999225616455, + "learning_rate": 1.3600946500317803e-05, + "loss": 0.7163, "step": 1915 }, { - "epoch": 0.14498127199122243, - "grad_norm": 2.053952693939209, - "learning_rate": 1.7559786993121583e-05, - "loss": 0.6369, + "epoch": 0.13488208377331926, + "grad_norm": 2.0519180297851562, + "learning_rate": 1.3600415121689826e-05, + "loss": 0.8041, "step": 1916 }, { - "epoch": 0.14505694071355604, - "grad_norm": 2.417632818222046, - "learning_rate": 1.755909096615222e-05, - "loss": 0.8845, + "epoch": 0.13495248152059133, + "grad_norm": 2.749436855316162, + "learning_rate": 1.3599883399900289e-05, + "loss": 0.8025, "step": 1917 }, { - "epoch": 0.14513260943588968, - "grad_norm": 2.182724714279175, - "learning_rate": 1.7558394403184892e-05, - "loss": 0.7699, + "epoch": 0.13502287926786344, + "grad_norm": 2.311777353286743, + "learning_rate": 1.359935133497684e-05, + "loss": 0.8143, "step": 1918 }, { - "epoch": 0.1452082781582233, - "grad_norm": 2.5295472145080566, - "learning_rate": 1.755769730426323e-05, - "loss": 0.8116, + "epoch": 0.13509327701513552, + "grad_norm": 2.494694948196411, + "learning_rate": 1.3598818926947135e-05, + "loss": 0.6401, "step": 1919 }, { - "epoch": 0.14528394688055693, - "grad_norm": 2.3989028930664062, - "learning_rate": 1.7556999669430882e-05, - "loss": 0.7237, + "epoch": 0.1351636747624076, + "grad_norm": 3.227447986602783, + "learning_rate": 1.3598286175838858e-05, + "loss": 0.7004, "step": 1920 }, { - "epoch": 0.14535961560289054, - "grad_norm": 2.519035816192627, - "learning_rate": 1.755630149873154e-05, - "loss": 0.9275, + "epoch": 0.1352340725096797, + "grad_norm": 2.2471258640289307, + "learning_rate": 1.359775308167971e-05, + "loss": 0.7483, "step": 1921 }, { - "epoch": 0.14543528432522418, - "grad_norm": 2.8320114612579346, - "learning_rate": 1.755560279220892e-05, - "loss": 0.8483, + "epoch": 0.13530447025695178, + "grad_norm": 1.8263351917266846, + "learning_rate": 1.3597219644497401e-05, + "loss": 0.7589, "step": 1922 }, { - "epoch": 0.1455109530475578, - "grad_norm": 2.433471202850342, - "learning_rate": 1.755490354990678e-05, - "loss": 0.7238, + "epoch": 0.13537486800422385, + "grad_norm": 2.466553211212158, + "learning_rate": 1.3596685864319669e-05, + "loss": 0.855, "step": 1923 }, { - "epoch": 0.14558662176989143, - "grad_norm": 2.112417697906494, - "learning_rate": 1.7554203771868918e-05, - "loss": 0.8088, + "epoch": 0.13544526575149596, + "grad_norm": 1.9169102907180786, + "learning_rate": 1.3596151741174264e-05, + "loss": 0.8291, "step": 1924 }, { - "epoch": 0.14566229049222504, - "grad_norm": 3.250727891921997, - "learning_rate": 1.755350345813914e-05, - "loss": 0.7521, + "epoch": 0.13551566349876804, + "grad_norm": 2.401426076889038, + "learning_rate": 1.3595617275088956e-05, + "loss": 0.7603, "step": 1925 }, { - "epoch": 0.14573795921455865, - "grad_norm": 2.6410152912139893, - "learning_rate": 1.7552802608761317e-05, - "loss": 0.8002, + "epoch": 0.13558606124604014, + "grad_norm": 2.499768018722534, + "learning_rate": 1.3595082466091533e-05, + "loss": 0.8387, "step": 1926 }, { - "epoch": 0.1458136279368923, - "grad_norm": 2.4262301921844482, - "learning_rate": 1.7552101223779325e-05, - "loss": 0.7778, + "epoch": 0.13565645899331222, + "grad_norm": 2.067671537399292, + "learning_rate": 1.3594547314209801e-05, + "loss": 0.7345, "step": 1927 }, { - "epoch": 0.1458892966592259, - "grad_norm": 2.871870517730713, - "learning_rate": 1.7551399303237097e-05, - "loss": 0.8634, + "epoch": 0.1357268567405843, + "grad_norm": 2.19545316696167, + "learning_rate": 1.359401181947158e-05, + "loss": 0.8727, "step": 1928 }, { - "epoch": 0.14596496538155954, - "grad_norm": 2.4270362854003906, - "learning_rate": 1.7550696847178586e-05, - "loss": 0.8465, + "epoch": 0.1357972544878564, + "grad_norm": 1.876368761062622, + "learning_rate": 1.3593475981904714e-05, + "loss": 0.6973, "step": 1929 }, { - "epoch": 0.14604063410389315, - "grad_norm": 2.422614097595215, - "learning_rate": 1.7549993855647778e-05, - "loss": 0.7685, + "epoch": 0.13586765223512848, + "grad_norm": 2.378824472427368, + "learning_rate": 1.3592939801537061e-05, + "loss": 0.8822, "step": 1930 }, { - "epoch": 0.14611630282622678, - "grad_norm": 2.4596493244171143, - "learning_rate": 1.7549290328688707e-05, - "loss": 0.8147, + "epoch": 0.13593804998240056, + "grad_norm": 3.0910491943359375, + "learning_rate": 1.3592403278396494e-05, + "loss": 0.6914, "step": 1931 }, { - "epoch": 0.1461919715485604, - "grad_norm": 2.994337558746338, - "learning_rate": 1.754858626634542e-05, - "loss": 0.7431, + "epoch": 0.13600844772967266, + "grad_norm": 2.362002372741699, + "learning_rate": 1.3591866412510913e-05, + "loss": 0.7972, "step": 1932 }, { - "epoch": 0.14626764027089403, - "grad_norm": 3.324469804763794, - "learning_rate": 1.754788166866201e-05, - "loss": 0.9868, + "epoch": 0.13607884547694474, + "grad_norm": 2.2485787868499756, + "learning_rate": 1.3591329203908224e-05, + "loss": 0.8505, "step": 1933 }, { - "epoch": 0.14634330899322764, - "grad_norm": 2.912229061126709, - "learning_rate": 1.7547176535682607e-05, - "loss": 0.8108, + "epoch": 0.13614924322421681, + "grad_norm": 2.3771824836730957, + "learning_rate": 1.3590791652616362e-05, + "loss": 0.8222, "step": 1934 }, { - "epoch": 0.14641897771556128, - "grad_norm": 3.092162847518921, - "learning_rate": 1.754647086745136e-05, - "loss": 0.7094, + "epoch": 0.13621964097148892, + "grad_norm": 2.6254820823669434, + "learning_rate": 1.3590253758663273e-05, + "loss": 0.7714, "step": 1935 }, { - "epoch": 0.1464946464378949, - "grad_norm": 2.593982219696045, - "learning_rate": 1.754576466401247e-05, - "loss": 0.7324, + "epoch": 0.136290038718761, + "grad_norm": 2.1136019229888916, + "learning_rate": 1.3589715522076925e-05, + "loss": 0.7104, "step": 1936 }, { - "epoch": 0.14657031516022853, - "grad_norm": 2.84027361869812, - "learning_rate": 1.7545057925410154e-05, - "loss": 0.707, + "epoch": 0.13636043646603307, + "grad_norm": 2.489468812942505, + "learning_rate": 1.3589176942885294e-05, + "loss": 0.7752, "step": 1937 }, { - "epoch": 0.14664598388256214, - "grad_norm": 2.8210887908935547, - "learning_rate": 1.754435065168867e-05, - "loss": 0.8568, + "epoch": 0.13643083421330518, + "grad_norm": 2.2035090923309326, + "learning_rate": 1.3588638021116389e-05, + "loss": 0.8186, "step": 1938 }, { - "epoch": 0.14672165260489575, - "grad_norm": 2.999987840652466, - "learning_rate": 1.754364284289232e-05, - "loss": 0.768, + "epoch": 0.13650123196057726, + "grad_norm": 1.7516924142837524, + "learning_rate": 1.3588098756798225e-05, + "loss": 0.6657, "step": 1939 }, { - "epoch": 0.1467973213272294, - "grad_norm": 3.40769362449646, - "learning_rate": 1.7542934499065413e-05, - "loss": 0.896, + "epoch": 0.13657162970784936, + "grad_norm": 2.3919460773468018, + "learning_rate": 1.3587559149958843e-05, + "loss": 0.7708, "step": 1940 }, { - "epoch": 0.146872990049563, - "grad_norm": 2.4607155323028564, - "learning_rate": 1.7542225620252318e-05, - "loss": 0.7203, + "epoch": 0.13664202745512144, + "grad_norm": 2.4444799423217773, + "learning_rate": 1.3587019200626292e-05, + "loss": 0.8476, "step": 1941 }, { - "epoch": 0.14694865877189664, - "grad_norm": 2.867579460144043, - "learning_rate": 1.754151620649743e-05, - "loss": 0.7944, + "epoch": 0.13671242520239352, + "grad_norm": 2.5161588191986084, + "learning_rate": 1.3586478908828649e-05, + "loss": 0.6873, "step": 1942 }, { - "epoch": 0.14702432749423025, - "grad_norm": 2.73762583732605, - "learning_rate": 1.7540806257845167e-05, - "loss": 0.8689, + "epoch": 0.13678282294966562, + "grad_norm": 2.228607654571533, + "learning_rate": 1.3585938274594003e-05, + "loss": 0.7474, "step": 1943 }, { - "epoch": 0.1470999962165639, - "grad_norm": 2.8091366291046143, - "learning_rate": 1.7540095774339995e-05, - "loss": 0.85, + "epoch": 0.1368532206969377, + "grad_norm": 2.4045674800872803, + "learning_rate": 1.358539729795046e-05, + "loss": 0.7818, "step": 1944 }, { - "epoch": 0.1471756649388975, - "grad_norm": 2.514357089996338, - "learning_rate": 1.75393847560264e-05, - "loss": 0.9253, + "epoch": 0.13692361844420978, + "grad_norm": 1.9466960430145264, + "learning_rate": 1.3584855978926146e-05, + "loss": 0.7054, "step": 1945 }, { - "epoch": 0.14725133366123114, - "grad_norm": 2.452388286590576, - "learning_rate": 1.7538673202948913e-05, - "loss": 0.5905, + "epoch": 0.13699401619148188, + "grad_norm": 2.2940680980682373, + "learning_rate": 1.3584314317549208e-05, + "loss": 0.7498, "step": 1946 }, { - "epoch": 0.14732700238356475, - "grad_norm": 2.8314881324768066, - "learning_rate": 1.7537961115152093e-05, - "loss": 0.7482, + "epoch": 0.13706441393875396, + "grad_norm": 2.4398934841156006, + "learning_rate": 1.3583772313847805e-05, + "loss": 0.7194, "step": 1947 }, { - "epoch": 0.1474026711058984, - "grad_norm": 3.156912088394165, - "learning_rate": 1.7537248492680532e-05, - "loss": 0.7742, + "epoch": 0.13713481168602604, + "grad_norm": 1.9971987009048462, + "learning_rate": 1.3583229967850117e-05, + "loss": 0.8609, "step": 1948 }, { - "epoch": 0.147478339828232, - "grad_norm": 2.562253713607788, - "learning_rate": 1.7536535335578858e-05, - "loss": 0.8122, + "epoch": 0.13720520943329814, + "grad_norm": 2.4983556270599365, + "learning_rate": 1.3582687279584341e-05, + "loss": 0.8667, "step": 1949 }, { - "epoch": 0.14755400855056564, - "grad_norm": 2.4131200313568115, - "learning_rate": 1.7535821643891732e-05, - "loss": 0.8995, + "epoch": 0.13727560718057022, + "grad_norm": 1.9318809509277344, + "learning_rate": 1.3582144249078692e-05, + "loss": 0.7655, "step": 1950 }, { - "epoch": 0.14762967727289925, - "grad_norm": 2.465721368789673, - "learning_rate": 1.7535107417663845e-05, - "loss": 0.7528, + "epoch": 0.1373460049278423, + "grad_norm": 1.9988089799880981, + "learning_rate": 1.3581600876361401e-05, + "loss": 0.808, "step": 1951 }, { - "epoch": 0.14770534599523288, - "grad_norm": 2.287510633468628, - "learning_rate": 1.7534392656939927e-05, - "loss": 0.7947, + "epoch": 0.1374164026751144, + "grad_norm": 2.1849348545074463, + "learning_rate": 1.3581057161460723e-05, + "loss": 0.7175, "step": 1952 }, { - "epoch": 0.1477810147175665, - "grad_norm": 2.6098928451538086, - "learning_rate": 1.7533677361764738e-05, - "loss": 0.8785, + "epoch": 0.13748680042238648, + "grad_norm": 2.3738396167755127, + "learning_rate": 1.3580513104404919e-05, + "loss": 0.7448, "step": 1953 }, { - "epoch": 0.1478566834399001, - "grad_norm": 2.5681772232055664, - "learning_rate": 1.7532961532183065e-05, - "loss": 0.7377, + "epoch": 0.13755719816965858, + "grad_norm": 2.326361894607544, + "learning_rate": 1.357996870522228e-05, + "loss": 0.6477, "step": 1954 }, { - "epoch": 0.14793235216223374, - "grad_norm": 2.2554850578308105, - "learning_rate": 1.753224516823975e-05, - "loss": 0.7856, + "epoch": 0.13762759591693066, + "grad_norm": 1.6295857429504395, + "learning_rate": 1.3579423963941111e-05, + "loss": 0.7371, "step": 1955 }, { - "epoch": 0.14800802088456735, - "grad_norm": 2.409834146499634, - "learning_rate": 1.7531528269979642e-05, - "loss": 0.7473, + "epoch": 0.13769799366420274, + "grad_norm": 2.1350173950195312, + "learning_rate": 1.3578878880589729e-05, + "loss": 0.8107, "step": 1956 }, { - "epoch": 0.148083689606901, - "grad_norm": 4.173964023590088, - "learning_rate": 1.753081083744764e-05, - "loss": 0.9151, + "epoch": 0.13776839141147484, + "grad_norm": 2.0894556045532227, + "learning_rate": 1.3578333455196477e-05, + "loss": 0.7772, "step": 1957 }, { - "epoch": 0.1481593583292346, - "grad_norm": 3.092050790786743, - "learning_rate": 1.753009287068867e-05, - "loss": 0.8214, + "epoch": 0.13783878915874692, + "grad_norm": 2.438178300857544, + "learning_rate": 1.3577787687789711e-05, + "loss": 0.7477, "step": 1958 }, { - "epoch": 0.14823502705156824, - "grad_norm": 2.639125108718872, - "learning_rate": 1.7529374369747697e-05, - "loss": 0.6853, + "epoch": 0.137909186906019, + "grad_norm": 2.361396312713623, + "learning_rate": 1.3577241578397805e-05, + "loss": 0.7835, "step": 1959 }, { - "epoch": 0.14831069577390185, - "grad_norm": 2.0184338092803955, - "learning_rate": 1.7528655334669715e-05, - "loss": 0.7324, + "epoch": 0.1379795846532911, + "grad_norm": 2.260019063949585, + "learning_rate": 1.3576695127049153e-05, + "loss": 0.743, "step": 1960 }, { - "epoch": 0.1483863644962355, - "grad_norm": 2.531182289123535, - "learning_rate": 1.7527935765499746e-05, - "loss": 0.8407, + "epoch": 0.13804998240056318, + "grad_norm": 1.9856444597244263, + "learning_rate": 1.3576148333772167e-05, + "loss": 0.7094, "step": 1961 }, { - "epoch": 0.1484620332185691, - "grad_norm": 2.4710707664489746, - "learning_rate": 1.7527215662282862e-05, - "loss": 0.7486, + "epoch": 0.13812038014783526, + "grad_norm": 2.427976131439209, + "learning_rate": 1.357560119859527e-05, + "loss": 0.68, "step": 1962 }, { - "epoch": 0.14853770194090274, - "grad_norm": 2.496640920639038, - "learning_rate": 1.7526495025064147e-05, - "loss": 0.7771, + "epoch": 0.13819077789510736, + "grad_norm": 2.2321524620056152, + "learning_rate": 1.3575053721546913e-05, + "loss": 0.7702, "step": 1963 }, { - "epoch": 0.14861337066323635, - "grad_norm": 2.6928977966308594, - "learning_rate": 1.752577385388874e-05, - "loss": 0.7551, + "epoch": 0.13826117564237944, + "grad_norm": 1.902010440826416, + "learning_rate": 1.357450590265556e-05, + "loss": 0.8051, "step": 1964 }, { - "epoch": 0.14868903938557, - "grad_norm": 3.124969720840454, - "learning_rate": 1.75250521488018e-05, - "loss": 0.8272, + "epoch": 0.13833157338965152, + "grad_norm": 2.2959933280944824, + "learning_rate": 1.3573957741949689e-05, + "loss": 0.8841, "step": 1965 }, { - "epoch": 0.1487647081079036, - "grad_norm": 2.917346477508545, - "learning_rate": 1.7524329909848514e-05, - "loss": 0.8293, + "epoch": 0.13840197113692362, + "grad_norm": 2.3663811683654785, + "learning_rate": 1.35734092394578e-05, + "loss": 0.758, "step": 1966 }, { - "epoch": 0.1488403768302372, - "grad_norm": 4.432890892028809, - "learning_rate": 1.7523607137074124e-05, - "loss": 0.6315, + "epoch": 0.1384723688841957, + "grad_norm": 2.561871290206909, + "learning_rate": 1.3572860395208415e-05, + "loss": 0.7589, "step": 1967 }, { - "epoch": 0.14891604555257085, - "grad_norm": 2.767793893814087, - "learning_rate": 1.7522883830523887e-05, - "loss": 0.8295, + "epoch": 0.1385427666314678, + "grad_norm": 2.1267249584198, + "learning_rate": 1.3572311209230063e-05, + "loss": 0.6337, "step": 1968 }, { - "epoch": 0.14899171427490446, - "grad_norm": 2.901761531829834, - "learning_rate": 1.7522159990243096e-05, - "loss": 0.8094, + "epoch": 0.13861316437873988, + "grad_norm": 2.2744343280792236, + "learning_rate": 1.35717616815513e-05, + "loss": 0.7442, "step": 1969 }, { - "epoch": 0.1490673829972381, - "grad_norm": 3.11002254486084, - "learning_rate": 1.7521435616277083e-05, - "loss": 0.801, + "epoch": 0.13868356212601196, + "grad_norm": 2.9091222286224365, + "learning_rate": 1.3571211812200694e-05, + "loss": 0.8805, "step": 1970 }, { - "epoch": 0.1491430517195717, - "grad_norm": 2.691927433013916, - "learning_rate": 1.7520710708671207e-05, - "loss": 0.8218, + "epoch": 0.13875395987328407, + "grad_norm": 5.055299282073975, + "learning_rate": 1.3570661601206836e-05, + "loss": 0.8747, "step": 1971 }, { - "epoch": 0.14921872044190534, - "grad_norm": 2.540382146835327, - "learning_rate": 1.751998526747087e-05, - "loss": 0.8464, + "epoch": 0.13882435762055614, + "grad_norm": 2.4621737003326416, + "learning_rate": 1.3570111048598332e-05, + "loss": 0.6658, "step": 1972 }, { - "epoch": 0.14929438916423896, - "grad_norm": 2.418942451477051, - "learning_rate": 1.75192592927215e-05, - "loss": 0.7234, + "epoch": 0.13889475536782822, + "grad_norm": 2.569411039352417, + "learning_rate": 1.3569560154403802e-05, + "loss": 0.8446, "step": 1973 }, { - "epoch": 0.1493700578865726, - "grad_norm": 3.039180278778076, - "learning_rate": 1.7518532784468555e-05, - "loss": 0.7202, + "epoch": 0.13896515311510033, + "grad_norm": 2.068253517150879, + "learning_rate": 1.356900891865189e-05, + "loss": 0.8468, "step": 1974 }, { - "epoch": 0.1494457266089062, - "grad_norm": 3.523127555847168, - "learning_rate": 1.7517805742757537e-05, - "loss": 0.83, + "epoch": 0.1390355508623724, + "grad_norm": 2.315368175506592, + "learning_rate": 1.3568457341371255e-05, + "loss": 0.7799, "step": 1975 }, { - "epoch": 0.14952139533123984, - "grad_norm": 2.5864298343658447, - "learning_rate": 1.751707816763397e-05, - "loss": 0.9138, + "epoch": 0.13910594860964448, + "grad_norm": 2.039508819580078, + "learning_rate": 1.3567905422590573e-05, + "loss": 0.8208, "step": 1976 }, { - "epoch": 0.14959706405357345, - "grad_norm": 2.7911880016326904, - "learning_rate": 1.7516350059143425e-05, - "loss": 0.7544, + "epoch": 0.13917634635691659, + "grad_norm": 2.2159430980682373, + "learning_rate": 1.3567353162338543e-05, + "loss": 0.7069, "step": 1977 }, { - "epoch": 0.1496727327759071, - "grad_norm": 3.0859878063201904, - "learning_rate": 1.7515621417331493e-05, - "loss": 0.77, + "epoch": 0.13924674410418866, + "grad_norm": 2.021608591079712, + "learning_rate": 1.3566800560643873e-05, + "loss": 0.8039, "step": 1978 }, { - "epoch": 0.1497484014982407, - "grad_norm": 2.647671699523926, - "learning_rate": 1.7514892242243805e-05, - "loss": 0.842, + "epoch": 0.13931714185146074, + "grad_norm": 2.191493034362793, + "learning_rate": 1.3566247617535295e-05, + "loss": 0.7746, "step": 1979 }, { - "epoch": 0.1498240702205743, - "grad_norm": 2.811793804168701, - "learning_rate": 1.7514162533926024e-05, - "loss": 0.8385, + "epoch": 0.13938753959873285, + "grad_norm": 2.211601734161377, + "learning_rate": 1.3565694333041556e-05, + "loss": 0.7813, "step": 1980 }, { - "epoch": 0.14989973894290795, - "grad_norm": 2.701957941055298, - "learning_rate": 1.7513432292423846e-05, - "loss": 0.726, + "epoch": 0.13945793734600492, + "grad_norm": 2.5179941654205322, + "learning_rate": 1.3565140707191422e-05, + "loss": 0.8092, "step": 1981 }, { - "epoch": 0.14997540766524156, - "grad_norm": 2.3943700790405273, - "learning_rate": 1.7512701517783006e-05, - "loss": 0.8246, + "epoch": 0.13952833509327703, + "grad_norm": 1.8915427923202515, + "learning_rate": 1.3564586740013677e-05, + "loss": 0.559, "step": 1982 }, { - "epoch": 0.1500510763875752, - "grad_norm": 2.8560993671417236, - "learning_rate": 1.751197021004926e-05, - "loss": 0.8316, + "epoch": 0.1395987328405491, + "grad_norm": 2.2617223262786865, + "learning_rate": 1.356403243153712e-05, + "loss": 0.7995, "step": 1983 }, { - "epoch": 0.1501267451099088, - "grad_norm": 2.4664223194122314, - "learning_rate": 1.7511238369268408e-05, - "loss": 0.8168, + "epoch": 0.13966913058782118, + "grad_norm": 2.1160953044891357, + "learning_rate": 1.3563477781790575e-05, + "loss": 0.7255, "step": 1984 }, { - "epoch": 0.15020241383224245, - "grad_norm": 2.2594258785247803, - "learning_rate": 1.7510505995486278e-05, - "loss": 0.6974, + "epoch": 0.1397395283350933, + "grad_norm": 2.628541946411133, + "learning_rate": 1.3562922790802877e-05, + "loss": 0.7983, "step": 1985 }, { - "epoch": 0.15027808255457606, - "grad_norm": 2.1898341178894043, - "learning_rate": 1.7509773088748744e-05, - "loss": 0.7319, + "epoch": 0.13980992608236537, + "grad_norm": 1.8303766250610352, + "learning_rate": 1.3562367458602878e-05, + "loss": 0.621, "step": 1986 }, { - "epoch": 0.1503537512769097, - "grad_norm": 2.4452483654022217, - "learning_rate": 1.7509039649101688e-05, - "loss": 0.9508, + "epoch": 0.13988032382963744, + "grad_norm": 2.4398915767669678, + "learning_rate": 1.356181178521945e-05, + "loss": 0.7953, "step": 1987 }, { - "epoch": 0.1504294199992433, - "grad_norm": 2.685222864151001, - "learning_rate": 1.750830567659105e-05, - "loss": 0.6802, + "epoch": 0.13995072157690955, + "grad_norm": 2.1239535808563232, + "learning_rate": 1.3561255770681487e-05, + "loss": 0.8115, "step": 1988 }, { - "epoch": 0.15050508872157695, - "grad_norm": 2.3486924171447754, - "learning_rate": 1.7507571171262793e-05, - "loss": 0.745, + "epoch": 0.14002111932418163, + "grad_norm": 2.0967984199523926, + "learning_rate": 1.3560699415017893e-05, + "loss": 0.7181, "step": 1989 }, { - "epoch": 0.15058075744391056, - "grad_norm": 2.269319534301758, - "learning_rate": 1.7506836133162912e-05, - "loss": 0.621, + "epoch": 0.1400915170714537, + "grad_norm": 2.217789888381958, + "learning_rate": 1.3560142718257594e-05, + "loss": 0.7811, "step": 1990 }, { - "epoch": 0.1506564261662442, - "grad_norm": 2.322559118270874, - "learning_rate": 1.7506100562337433e-05, - "loss": 0.706, + "epoch": 0.1401619148187258, + "grad_norm": 1.9287405014038086, + "learning_rate": 1.3559585680429536e-05, + "loss": 0.6843, "step": 1991 }, { - "epoch": 0.1507320948885778, - "grad_norm": 2.2499871253967285, - "learning_rate": 1.7505364458832433e-05, - "loss": 0.8762, + "epoch": 0.14023231256599789, + "grad_norm": 2.2977802753448486, + "learning_rate": 1.3559028301562676e-05, + "loss": 0.8362, "step": 1992 }, { - "epoch": 0.15080776361091142, - "grad_norm": 2.313094139099121, - "learning_rate": 1.7504627822693997e-05, - "loss": 0.8429, + "epoch": 0.14030271031326996, + "grad_norm": 2.161982536315918, + "learning_rate": 1.3558470581685996e-05, + "loss": 0.7367, "step": 1993 }, { - "epoch": 0.15088343233324505, - "grad_norm": 2.01436710357666, - "learning_rate": 1.750389065396826e-05, - "loss": 0.7233, + "epoch": 0.14037310806054207, + "grad_norm": 2.167020797729492, + "learning_rate": 1.3557912520828488e-05, + "loss": 0.7284, "step": 1994 }, { - "epoch": 0.15095910105557866, - "grad_norm": 3.0272583961486816, - "learning_rate": 1.7503152952701382e-05, - "loss": 0.7201, + "epoch": 0.14044350580781415, + "grad_norm": 2.2795302867889404, + "learning_rate": 1.355735411901917e-05, + "loss": 0.7943, "step": 1995 }, { - "epoch": 0.1510347697779123, - "grad_norm": 2.1522340774536133, - "learning_rate": 1.7502414718939565e-05, - "loss": 0.6485, + "epoch": 0.14051390355508625, + "grad_norm": 2.1784658432006836, + "learning_rate": 1.3556795376287075e-05, + "loss": 0.7391, "step": 1996 }, { - "epoch": 0.1511104385002459, - "grad_norm": 2.177858829498291, - "learning_rate": 1.750167595272904e-05, - "loss": 0.7589, + "epoch": 0.14058430130235833, + "grad_norm": 2.34366774559021, + "learning_rate": 1.3556236292661248e-05, + "loss": 0.8839, "step": 1997 }, { - "epoch": 0.15118610722257955, - "grad_norm": 2.149120569229126, - "learning_rate": 1.750093665411607e-05, - "loss": 0.6136, + "epoch": 0.1406546990496304, + "grad_norm": 2.379434108734131, + "learning_rate": 1.3555676868170758e-05, + "loss": 0.6602, "step": 1998 }, { - "epoch": 0.15126177594491316, - "grad_norm": 2.5754570960998535, - "learning_rate": 1.7500196823146948e-05, - "loss": 0.6516, + "epoch": 0.1407250967969025, + "grad_norm": 2.2524821758270264, + "learning_rate": 1.3555117102844692e-05, + "loss": 0.7744, "step": 1999 }, { - "epoch": 0.1513374446672468, - "grad_norm": 2.3540103435516357, - "learning_rate": 1.749945645986801e-05, - "loss": 0.7556, + "epoch": 0.1407954945441746, + "grad_norm": 1.9590221643447876, + "learning_rate": 1.3554556996712148e-05, + "loss": 0.6463, "step": 2000 }, { - "epoch": 0.1514131133895804, - "grad_norm": 2.42387318611145, - "learning_rate": 1.7498715564325618e-05, - "loss": 0.7252, + "epoch": 0.14086589229144666, + "grad_norm": 1.9231648445129395, + "learning_rate": 1.3553996549802252e-05, + "loss": 0.669, "step": 2001 }, { - "epoch": 0.15148878211191405, - "grad_norm": 2.679372549057007, - "learning_rate": 1.749797413656617e-05, - "loss": 0.7455, + "epoch": 0.14093629003871877, + "grad_norm": 2.07080078125, + "learning_rate": 1.3553435762144141e-05, + "loss": 0.6993, "step": 2002 }, { - "epoch": 0.15156445083424766, - "grad_norm": 2.259680986404419, - "learning_rate": 1.7497232176636094e-05, - "loss": 0.7875, + "epoch": 0.14100668778599085, + "grad_norm": 1.9978841543197632, + "learning_rate": 1.3552874633766967e-05, + "loss": 0.7055, "step": 2003 }, { - "epoch": 0.1516401195565813, - "grad_norm": 2.2387616634368896, - "learning_rate": 1.7496489684581854e-05, - "loss": 0.7815, + "epoch": 0.14107708553326292, + "grad_norm": 2.1399142742156982, + "learning_rate": 1.3552313164699908e-05, + "loss": 0.6853, "step": 2004 }, { - "epoch": 0.1517157882789149, - "grad_norm": 3.6523263454437256, - "learning_rate": 1.7495746660449954e-05, - "loss": 0.626, + "epoch": 0.14114748328053503, + "grad_norm": 1.893038034439087, + "learning_rate": 1.3551751354972154e-05, + "loss": 0.7835, "step": 2005 }, { - "epoch": 0.15179145700124855, - "grad_norm": 2.6934144496917725, - "learning_rate": 1.7495003104286916e-05, - "loss": 0.7533, + "epoch": 0.1412178810278071, + "grad_norm": 2.2094383239746094, + "learning_rate": 1.3551189204612912e-05, + "loss": 0.6954, "step": 2006 }, { - "epoch": 0.15186712572358216, - "grad_norm": 2.329314947128296, - "learning_rate": 1.749425901613931e-05, - "loss": 0.7858, + "epoch": 0.14128827877507918, + "grad_norm": 2.437511444091797, + "learning_rate": 1.355062671365141e-05, + "loss": 0.7354, "step": 2007 }, { - "epoch": 0.15194279444591577, - "grad_norm": 3.2107553482055664, - "learning_rate": 1.7493514396053727e-05, - "loss": 0.7217, + "epoch": 0.1413586765223513, + "grad_norm": 2.0128026008605957, + "learning_rate": 1.3550063882116892e-05, + "loss": 0.7522, "step": 2008 }, { - "epoch": 0.1520184631682494, - "grad_norm": 2.6318206787109375, - "learning_rate": 1.7492769244076804e-05, - "loss": 0.7701, + "epoch": 0.14142907426962337, + "grad_norm": 2.251699686050415, + "learning_rate": 1.3549500710038624e-05, + "loss": 0.7726, "step": 2009 }, { - "epoch": 0.15209413189058302, - "grad_norm": 2.870945453643799, - "learning_rate": 1.7492023560255202e-05, - "loss": 0.8018, + "epoch": 0.14149947201689547, + "grad_norm": 2.379786252975464, + "learning_rate": 1.354893719744588e-05, + "loss": 0.7993, "step": 2010 }, { - "epoch": 0.15216980061291666, - "grad_norm": 2.799070119857788, - "learning_rate": 1.7491277344635616e-05, - "loss": 0.7906, + "epoch": 0.14156986976416755, + "grad_norm": 2.4828431606292725, + "learning_rate": 1.3548373344367962e-05, + "loss": 0.8145, "step": 2011 }, { - "epoch": 0.15224546933525027, - "grad_norm": 3.5722224712371826, - "learning_rate": 1.7490530597264778e-05, - "loss": 0.7233, + "epoch": 0.14164026751143963, + "grad_norm": 1.9900507926940918, + "learning_rate": 1.354780915083418e-05, + "loss": 0.7595, "step": 2012 }, { - "epoch": 0.1523211380575839, - "grad_norm": 2.482611894607544, - "learning_rate": 1.7489783318189455e-05, - "loss": 0.7542, + "epoch": 0.14171066525871173, + "grad_norm": 1.7197569608688354, + "learning_rate": 1.3547244616873872e-05, + "loss": 0.682, "step": 2013 }, { - "epoch": 0.15239680677991752, - "grad_norm": 2.59255051612854, - "learning_rate": 1.748903550745644e-05, - "loss": 0.7671, + "epoch": 0.1417810630059838, + "grad_norm": 2.0535356998443604, + "learning_rate": 1.3546679742516386e-05, + "loss": 0.7058, "step": 2014 }, { - "epoch": 0.15247247550225115, - "grad_norm": 3.2076199054718018, - "learning_rate": 1.7488287165112564e-05, - "loss": 0.8936, + "epoch": 0.1418514607532559, + "grad_norm": 2.3813140392303467, + "learning_rate": 1.3546114527791092e-05, + "loss": 0.7853, "step": 2015 }, { - "epoch": 0.15254814422458476, - "grad_norm": 3.4623420238494873, - "learning_rate": 1.748753829120469e-05, - "loss": 0.7874, + "epoch": 0.141921858500528, + "grad_norm": 2.0817389488220215, + "learning_rate": 1.3545548972727374e-05, + "loss": 0.8953, "step": 2016 }, { - "epoch": 0.1526238129469184, - "grad_norm": 2.2293150424957275, - "learning_rate": 1.748678888577972e-05, - "loss": 0.8013, + "epoch": 0.14199225624780007, + "grad_norm": 2.2234771251678467, + "learning_rate": 1.354498307735464e-05, + "loss": 0.8031, "step": 2017 }, { - "epoch": 0.152699481669252, - "grad_norm": 2.1837499141693115, - "learning_rate": 1.748603894888458e-05, - "loss": 0.8155, + "epoch": 0.14206265399507215, + "grad_norm": 2.1697850227355957, + "learning_rate": 1.3544416841702305e-05, + "loss": 0.8644, "step": 2018 }, { - "epoch": 0.15277515039158565, - "grad_norm": 2.574540138244629, - "learning_rate": 1.748528848056623e-05, - "loss": 0.6882, + "epoch": 0.14213305174234425, + "grad_norm": 2.5696005821228027, + "learning_rate": 1.3543850265799814e-05, + "loss": 0.8473, "step": 2019 }, { - "epoch": 0.15285081911391926, - "grad_norm": 2.3444480895996094, - "learning_rate": 1.7484537480871676e-05, - "loss": 0.9241, + "epoch": 0.14220344948961633, + "grad_norm": 2.407658338546753, + "learning_rate": 1.3543283349676618e-05, + "loss": 0.7064, "step": 2020 }, { - "epoch": 0.15292648783625287, - "grad_norm": 2.1523497104644775, - "learning_rate": 1.7483785949847937e-05, - "loss": 0.7816, + "epoch": 0.1422738472368884, + "grad_norm": 2.0372283458709717, + "learning_rate": 1.3542716093362198e-05, + "loss": 0.9041, "step": 2021 }, { - "epoch": 0.1530021565585865, - "grad_norm": 2.120872735977173, - "learning_rate": 1.7483033887542087e-05, - "loss": 0.7581, + "epoch": 0.1423442449841605, + "grad_norm": 2.496581792831421, + "learning_rate": 1.354214849688604e-05, + "loss": 0.7259, "step": 2022 }, { - "epoch": 0.15307782528092012, - "grad_norm": 2.379638910293579, - "learning_rate": 1.7482281294001218e-05, - "loss": 0.7142, + "epoch": 0.1424146427314326, + "grad_norm": 2.0073697566986084, + "learning_rate": 1.3541580560277659e-05, + "loss": 0.6762, "step": 2023 }, { - "epoch": 0.15315349400325376, - "grad_norm": 1.9186440706253052, - "learning_rate": 1.7481528169272455e-05, - "loss": 0.6981, + "epoch": 0.1424850404787047, + "grad_norm": 2.1163651943206787, + "learning_rate": 1.3541012283566579e-05, + "loss": 0.6834, "step": 2024 }, { - "epoch": 0.15322916272558737, - "grad_norm": 2.0575826168060303, - "learning_rate": 1.7480774513402966e-05, - "loss": 0.6741, + "epoch": 0.14255543822597677, + "grad_norm": 2.4263651371002197, + "learning_rate": 1.3540443666782347e-05, + "loss": 0.8514, "step": 2025 }, { - "epoch": 0.153304831447921, - "grad_norm": 2.492513656616211, - "learning_rate": 1.7480020326439945e-05, - "loss": 0.845, + "epoch": 0.14262583597324885, + "grad_norm": 2.0010266304016113, + "learning_rate": 1.3539874709954525e-05, + "loss": 0.7415, "step": 2026 }, { - "epoch": 0.15338050017025462, - "grad_norm": 2.5875332355499268, - "learning_rate": 1.7479265608430632e-05, - "loss": 0.7312, + "epoch": 0.14269623372052095, + "grad_norm": 2.3978538513183594, + "learning_rate": 1.3539305413112693e-05, + "loss": 0.6778, "step": 2027 }, { - "epoch": 0.15345616889258826, - "grad_norm": 3.0655195713043213, - "learning_rate": 1.7478510359422273e-05, - "loss": 0.8206, + "epoch": 0.14276663146779303, + "grad_norm": 2.4727344512939453, + "learning_rate": 1.3538735776286452e-05, + "loss": 0.7823, "step": 2028 }, { - "epoch": 0.15353183761492187, - "grad_norm": 2.516611337661743, - "learning_rate": 1.7477754579462173e-05, - "loss": 0.6667, + "epoch": 0.1428370292150651, + "grad_norm": 2.0970239639282227, + "learning_rate": 1.3538165799505416e-05, + "loss": 0.674, "step": 2029 }, { - "epoch": 0.1536075063372555, - "grad_norm": 2.1421144008636475, - "learning_rate": 1.7476998268597665e-05, - "loss": 0.7155, + "epoch": 0.14290742696233721, + "grad_norm": 2.273420572280884, + "learning_rate": 1.3537595482799217e-05, + "loss": 0.7748, "step": 2030 }, { - "epoch": 0.15368317505958912, - "grad_norm": 2.9190337657928467, - "learning_rate": 1.7476241426876104e-05, - "loss": 0.756, + "epoch": 0.1429778247096093, + "grad_norm": 2.014585494995117, + "learning_rate": 1.353702482619751e-05, + "loss": 0.7278, "step": 2031 }, { - "epoch": 0.15375884378192275, - "grad_norm": 3.1374433040618896, - "learning_rate": 1.747548405434489e-05, - "loss": 0.7129, + "epoch": 0.14304822245688137, + "grad_norm": 1.9579099416732788, + "learning_rate": 1.3536453829729963e-05, + "loss": 0.6075, "step": 2032 }, { - "epoch": 0.15383451250425637, - "grad_norm": 3.1990249156951904, - "learning_rate": 1.747472615105145e-05, - "loss": 0.7409, + "epoch": 0.14311862020415347, + "grad_norm": 1.848496913909912, + "learning_rate": 1.3535882493426261e-05, + "loss": 0.7648, "step": 2033 }, { - "epoch": 0.15391018122658998, - "grad_norm": 2.4455344676971436, - "learning_rate": 1.7473967717043255e-05, - "loss": 0.6613, + "epoch": 0.14318901795142555, + "grad_norm": 1.948378324508667, + "learning_rate": 1.3535310817316107e-05, + "loss": 0.7598, "step": 2034 }, { - "epoch": 0.15398584994892361, - "grad_norm": 2.024739980697632, - "learning_rate": 1.747320875236779e-05, - "loss": 0.8867, + "epoch": 0.14325941569869763, + "grad_norm": 2.6666452884674072, + "learning_rate": 1.3534738801429225e-05, + "loss": 0.82, "step": 2035 }, { - "epoch": 0.15406151867125722, - "grad_norm": 2.6114304065704346, - "learning_rate": 1.747244925707258e-05, - "loss": 0.8703, + "epoch": 0.14332981344596973, + "grad_norm": 2.0397098064422607, + "learning_rate": 1.3534166445795355e-05, + "loss": 0.8616, "step": 2036 }, { - "epoch": 0.15413718739359086, - "grad_norm": 3.779912233352661, - "learning_rate": 1.7471689231205206e-05, - "loss": 0.8262, + "epoch": 0.1434002111932418, + "grad_norm": 1.5949040651321411, + "learning_rate": 1.3533593750444255e-05, + "loss": 0.9202, "step": 2037 }, { - "epoch": 0.15421285611592447, - "grad_norm": 3.2516801357269287, - "learning_rate": 1.7470928674813242e-05, - "loss": 0.7587, + "epoch": 0.14347060894051392, + "grad_norm": 2.1729824542999268, + "learning_rate": 1.3533020715405696e-05, + "loss": 0.7757, "step": 2038 }, { - "epoch": 0.1542885248382581, - "grad_norm": 2.9008138179779053, - "learning_rate": 1.7470167587944333e-05, - "loss": 0.7588, + "epoch": 0.143541006687786, + "grad_norm": 1.9796372652053833, + "learning_rate": 1.3532447340709477e-05, + "loss": 0.7831, "step": 2039 }, { - "epoch": 0.15436419356059172, - "grad_norm": 2.617128610610962, - "learning_rate": 1.7469405970646126e-05, - "loss": 0.6199, + "epoch": 0.14361140443505807, + "grad_norm": 2.455202341079712, + "learning_rate": 1.3531873626385404e-05, + "loss": 0.7406, "step": 2040 }, { - "epoch": 0.15443986228292536, - "grad_norm": 3.0137505531311035, - "learning_rate": 1.746864382296633e-05, - "loss": 0.7195, + "epoch": 0.14368180218233018, + "grad_norm": 2.6734743118286133, + "learning_rate": 1.3531299572463303e-05, + "loss": 0.753, "step": 2041 }, { - "epoch": 0.15451553100525897, - "grad_norm": 2.683501720428467, - "learning_rate": 1.7467881144952664e-05, - "loss": 0.8571, + "epoch": 0.14375219992960225, + "grad_norm": 2.151664972305298, + "learning_rate": 1.3530725178973024e-05, + "loss": 0.7391, "step": 2042 }, { - "epoch": 0.1545911997275926, - "grad_norm": 2.612112283706665, - "learning_rate": 1.7467117936652896e-05, - "loss": 0.8931, + "epoch": 0.14382259767687433, + "grad_norm": 1.8828901052474976, + "learning_rate": 1.3530150445944428e-05, + "loss": 0.6766, "step": 2043 }, { - "epoch": 0.15466686844992622, - "grad_norm": 3.511695384979248, - "learning_rate": 1.7466354198114813e-05, - "loss": 0.7837, + "epoch": 0.14389299542414644, + "grad_norm": 2.1596450805664062, + "learning_rate": 1.3529575373407397e-05, + "loss": 0.7485, "step": 2044 }, { - "epoch": 0.15474253717225986, - "grad_norm": 2.829535961151123, - "learning_rate": 1.7465589929386248e-05, - "loss": 0.8148, + "epoch": 0.1439633931714185, + "grad_norm": 2.6462273597717285, + "learning_rate": 1.3528999961391827e-05, + "loss": 0.7524, "step": 2045 }, { - "epoch": 0.15481820589459347, - "grad_norm": 2.3426201343536377, - "learning_rate": 1.746482513051506e-05, - "loss": 0.6724, + "epoch": 0.1440337909186906, + "grad_norm": 2.6336793899536133, + "learning_rate": 1.3528424209927637e-05, + "loss": 0.8082, "step": 2046 }, { - "epoch": 0.15489387461692708, - "grad_norm": 2.5401344299316406, - "learning_rate": 1.7464059801549144e-05, - "loss": 0.9651, + "epoch": 0.1441041886659627, + "grad_norm": 2.2029147148132324, + "learning_rate": 1.3527848119044759e-05, + "loss": 0.6921, "step": 2047 }, { - "epoch": 0.15496954333926072, - "grad_norm": 2.8630175590515137, - "learning_rate": 1.7463293942536427e-05, - "loss": 0.8498, + "epoch": 0.14417458641323477, + "grad_norm": 2.041109800338745, + "learning_rate": 1.3527271688773148e-05, + "loss": 0.6488, "step": 2048 }, { - "epoch": 0.15504521206159433, - "grad_norm": 2.4896228313446045, - "learning_rate": 1.746252755352487e-05, - "loss": 0.862, + "epoch": 0.14424498416050685, + "grad_norm": 2.4333689212799072, + "learning_rate": 1.3526694919142769e-05, + "loss": 0.9409, "step": 2049 }, { - "epoch": 0.15512088078392797, - "grad_norm": 2.259605646133423, - "learning_rate": 1.7461760634562468e-05, - "loss": 0.633, + "epoch": 0.14431538190777896, + "grad_norm": 2.4270708560943604, + "learning_rate": 1.352611781018361e-05, + "loss": 0.714, "step": 2050 }, { - "epoch": 0.15519654950626158, - "grad_norm": 2.4651870727539062, - "learning_rate": 1.7460993185697244e-05, - "loss": 0.7007, + "epoch": 0.14438577965505103, + "grad_norm": 2.2437524795532227, + "learning_rate": 1.3525540361925673e-05, + "loss": 0.7448, "step": 2051 }, { - "epoch": 0.15527221822859522, - "grad_norm": 2.3934268951416016, - "learning_rate": 1.7460225206977262e-05, - "loss": 0.9508, + "epoch": 0.14445617740232314, + "grad_norm": 2.15356183052063, + "learning_rate": 1.3524962574398986e-05, + "loss": 0.7838, "step": 2052 }, { - "epoch": 0.15534788695092883, - "grad_norm": 2.429025650024414, - "learning_rate": 1.7459456698450613e-05, - "loss": 0.6615, + "epoch": 0.14452657514959522, + "grad_norm": 2.0781919956207275, + "learning_rate": 1.3524384447633585e-05, + "loss": 0.7417, "step": 2053 }, { - "epoch": 0.15542355567326246, - "grad_norm": 2.19804310798645, - "learning_rate": 1.7458687660165425e-05, - "loss": 0.8376, + "epoch": 0.1445969728968673, + "grad_norm": 1.841194987297058, + "learning_rate": 1.3523805981659528e-05, + "loss": 0.7822, "step": 2054 }, { - "epoch": 0.15549922439559608, - "grad_norm": 2.211962938308716, - "learning_rate": 1.7457918092169857e-05, - "loss": 0.8152, + "epoch": 0.1446673706441394, + "grad_norm": 1.8723855018615723, + "learning_rate": 1.3523227176506889e-05, + "loss": 0.7409, "step": 2055 }, { - "epoch": 0.1555748931179297, - "grad_norm": 2.254776954650879, - "learning_rate": 1.74571479945121e-05, - "loss": 0.8516, + "epoch": 0.14473776839141148, + "grad_norm": 2.0017099380493164, + "learning_rate": 1.3522648032205761e-05, + "loss": 0.7193, "step": 2056 }, { - "epoch": 0.15565056184026332, - "grad_norm": 2.47976016998291, - "learning_rate": 1.7456377367240385e-05, - "loss": 0.8315, + "epoch": 0.14480816613868355, + "grad_norm": 2.3983771800994873, + "learning_rate": 1.3522068548786256e-05, + "loss": 0.9911, "step": 2057 }, { - "epoch": 0.15572623056259696, - "grad_norm": 2.8059258460998535, - "learning_rate": 1.7455606210402966e-05, - "loss": 0.7777, + "epoch": 0.14487856388595566, + "grad_norm": 1.853318452835083, + "learning_rate": 1.3521488726278499e-05, + "loss": 0.8539, "step": 2058 }, { - "epoch": 0.15580189928493057, - "grad_norm": 2.620880603790283, - "learning_rate": 1.7454834524048138e-05, - "loss": 0.6418, + "epoch": 0.14494896163322774, + "grad_norm": 2.0474300384521484, + "learning_rate": 1.3520908564712637e-05, + "loss": 0.7732, "step": 2059 }, { - "epoch": 0.1558775680072642, - "grad_norm": 2.414295196533203, - "learning_rate": 1.7454062308224226e-05, - "loss": 0.7401, + "epoch": 0.1450193593804998, + "grad_norm": 2.006171226501465, + "learning_rate": 1.3520328064118832e-05, + "loss": 0.7238, "step": 2060 }, { - "epoch": 0.15595323672959782, - "grad_norm": 2.7423794269561768, - "learning_rate": 1.7453289562979585e-05, - "loss": 0.8576, + "epoch": 0.14508975712777192, + "grad_norm": 2.271385908126831, + "learning_rate": 1.3519747224527268e-05, + "loss": 0.7742, "step": 2061 }, { - "epoch": 0.15602890545193143, - "grad_norm": 3.214839458465576, - "learning_rate": 1.7452516288362612e-05, - "loss": 0.8235, + "epoch": 0.145160154875044, + "grad_norm": 2.356719732284546, + "learning_rate": 1.351916604596814e-05, + "loss": 0.8425, "step": 2062 }, { - "epoch": 0.15610457417426507, - "grad_norm": 2.462529182434082, - "learning_rate": 1.7451742484421733e-05, - "loss": 0.8605, + "epoch": 0.14523055262231607, + "grad_norm": 2.22481632232666, + "learning_rate": 1.3518584528471665e-05, + "loss": 0.7367, "step": 2063 }, { - "epoch": 0.15618024289659868, - "grad_norm": 2.2474400997161865, - "learning_rate": 1.7450968151205402e-05, - "loss": 0.7083, + "epoch": 0.14530095036958818, + "grad_norm": 2.057844400405884, + "learning_rate": 1.3518002672068077e-05, + "loss": 0.7517, "step": 2064 }, { - "epoch": 0.15625591161893232, - "grad_norm": 2.528843879699707, - "learning_rate": 1.7450193288762116e-05, - "loss": 0.8239, + "epoch": 0.14537134811686026, + "grad_norm": 2.453312635421753, + "learning_rate": 1.3517420476787628e-05, + "loss": 0.7135, "step": 2065 }, { - "epoch": 0.15633158034126593, - "grad_norm": 2.497174024581909, - "learning_rate": 1.7449417897140387e-05, - "loss": 0.7607, + "epoch": 0.14544174586413236, + "grad_norm": 2.274406909942627, + "learning_rate": 1.3516837942660584e-05, + "loss": 0.7223, "step": 2066 }, { - "epoch": 0.15640724906359957, - "grad_norm": 3.976351261138916, - "learning_rate": 1.7448641976388783e-05, - "loss": 0.7265, + "epoch": 0.14551214361140444, + "grad_norm": 2.0773041248321533, + "learning_rate": 1.3516255069717235e-05, + "loss": 0.7632, "step": 2067 }, { - "epoch": 0.15648291778593318, - "grad_norm": 2.027620792388916, - "learning_rate": 1.7447865526555894e-05, - "loss": 0.8558, + "epoch": 0.14558254135867651, + "grad_norm": 2.1532270908355713, + "learning_rate": 1.3515671857987884e-05, + "loss": 0.8107, "step": 2068 }, { - "epoch": 0.15655858650826682, - "grad_norm": 2.824955701828003, - "learning_rate": 1.7447088547690343e-05, - "loss": 0.9394, + "epoch": 0.14565293910594862, + "grad_norm": 2.115966558456421, + "learning_rate": 1.3515088307502851e-05, + "loss": 0.7769, "step": 2069 }, { - "epoch": 0.15663425523060043, - "grad_norm": 2.474083185195923, - "learning_rate": 1.7446311039840784e-05, - "loss": 0.8471, + "epoch": 0.1457233368532207, + "grad_norm": 2.081608772277832, + "learning_rate": 1.3514504418292476e-05, + "loss": 0.7526, "step": 2070 }, { - "epoch": 0.15670992395293407, - "grad_norm": 2.226369619369507, - "learning_rate": 1.744553300305591e-05, - "loss": 0.7453, + "epoch": 0.14579373460049277, + "grad_norm": 2.567854881286621, + "learning_rate": 1.3513920190387117e-05, + "loss": 0.7062, "step": 2071 }, { - "epoch": 0.15678559267526768, - "grad_norm": 2.525721788406372, - "learning_rate": 1.7444754437384443e-05, - "loss": 1.0301, + "epoch": 0.14586413234776488, + "grad_norm": 2.0827255249023438, + "learning_rate": 1.3513335623817147e-05, + "loss": 0.8408, "step": 2072 }, { - "epoch": 0.15686126139760131, - "grad_norm": 2.348961591720581, - "learning_rate": 1.7443975342875138e-05, - "loss": 0.7909, + "epoch": 0.14593453009503696, + "grad_norm": 2.1328165531158447, + "learning_rate": 1.3512750718612961e-05, + "loss": 0.7661, "step": 2073 }, { - "epoch": 0.15693693011993493, - "grad_norm": 2.763505697250366, - "learning_rate": 1.7443195719576785e-05, - "loss": 0.8576, + "epoch": 0.14600492784230903, + "grad_norm": 2.272878885269165, + "learning_rate": 1.3512165474804967e-05, + "loss": 0.6991, "step": 2074 }, { - "epoch": 0.15701259884226854, - "grad_norm": 2.2634928226470947, - "learning_rate": 1.7442415567538213e-05, - "loss": 0.9044, + "epoch": 0.14607532558958114, + "grad_norm": 2.15435528755188, + "learning_rate": 1.351157989242359e-05, + "loss": 0.8042, "step": 2075 }, { - "epoch": 0.15708826756460217, - "grad_norm": 2.370476722717285, - "learning_rate": 1.7441634886808265e-05, - "loss": 0.8432, + "epoch": 0.14614572333685322, + "grad_norm": 2.0213234424591064, + "learning_rate": 1.3510993971499278e-05, + "loss": 0.8067, "step": 2076 }, { - "epoch": 0.15716393628693578, - "grad_norm": 2.5229246616363525, - "learning_rate": 1.7440853677435842e-05, - "loss": 0.7714, + "epoch": 0.1462161210841253, + "grad_norm": 1.9892933368682861, + "learning_rate": 1.3510407712062494e-05, + "loss": 0.7274, "step": 2077 }, { - "epoch": 0.15723960500926942, - "grad_norm": 2.90761137008667, - "learning_rate": 1.744007193946986e-05, - "loss": 0.6313, + "epoch": 0.1462865188313974, + "grad_norm": 2.292405128479004, + "learning_rate": 1.3509821114143713e-05, + "loss": 0.7592, "step": 2078 }, { - "epoch": 0.15731527373160303, - "grad_norm": 2.4619617462158203, - "learning_rate": 1.7439289672959275e-05, - "loss": 0.7495, + "epoch": 0.14635691657866948, + "grad_norm": 2.3261070251464844, + "learning_rate": 1.3509234177773443e-05, + "loss": 0.6977, "step": 2079 }, { - "epoch": 0.15739094245393667, - "grad_norm": 3.04870867729187, - "learning_rate": 1.743850687795307e-05, - "loss": 0.8112, + "epoch": 0.14642731432594158, + "grad_norm": 1.928760051727295, + "learning_rate": 1.3508646902982186e-05, + "loss": 0.6311, "step": 2080 }, { - "epoch": 0.15746661117627028, - "grad_norm": 2.4026286602020264, - "learning_rate": 1.7437723554500277e-05, - "loss": 0.772, + "epoch": 0.14649771207321366, + "grad_norm": 1.8416416645050049, + "learning_rate": 1.3508059289800486e-05, + "loss": 0.7095, "step": 2081 }, { - "epoch": 0.15754227989860392, - "grad_norm": 2.5476691722869873, - "learning_rate": 1.743693970264994e-05, - "loss": 0.8682, + "epoch": 0.14656810982048574, + "grad_norm": 1.9654814004898071, + "learning_rate": 1.350747133825889e-05, + "loss": 0.5989, "step": 2082 }, { - "epoch": 0.15761794862093753, - "grad_norm": 2.528425931930542, - "learning_rate": 1.7436155322451153e-05, - "loss": 0.9005, + "epoch": 0.14663850756775784, + "grad_norm": 2.245328903198242, + "learning_rate": 1.3506883048387965e-05, + "loss": 0.7371, "step": 2083 }, { - "epoch": 0.15769361734327117, - "grad_norm": 2.272146224975586, - "learning_rate": 1.743537041395303e-05, - "loss": 0.6577, + "epoch": 0.14670890531502992, + "grad_norm": 2.523977279663086, + "learning_rate": 1.3506294420218297e-05, + "loss": 0.8938, "step": 2084 }, { - "epoch": 0.15776928606560478, - "grad_norm": 2.186119794845581, - "learning_rate": 1.743458497720473e-05, - "loss": 0.7003, + "epoch": 0.146779303062302, + "grad_norm": 2.5129518508911133, + "learning_rate": 1.3505705453780489e-05, + "loss": 0.8447, "step": 2085 }, { - "epoch": 0.15784495478793842, - "grad_norm": 2.385634660720825, - "learning_rate": 1.743379901225544e-05, - "loss": 0.8375, + "epoch": 0.1468497008095741, + "grad_norm": 1.8679391145706177, + "learning_rate": 1.3505116149105162e-05, + "loss": 0.7379, "step": 2086 }, { - "epoch": 0.15792062351027203, - "grad_norm": 3.0107641220092773, - "learning_rate": 1.7433012519154378e-05, - "loss": 0.8261, + "epoch": 0.14692009855684618, + "grad_norm": 2.142808437347412, + "learning_rate": 1.3504526506222958e-05, + "loss": 0.6876, "step": 2087 }, { - "epoch": 0.15799629223260564, - "grad_norm": 2.3825418949127197, - "learning_rate": 1.7432225497950792e-05, - "loss": 0.729, + "epoch": 0.14699049630411826, + "grad_norm": 2.4002599716186523, + "learning_rate": 1.3503936525164527e-05, + "loss": 0.915, "step": 2088 }, { - "epoch": 0.15807196095493928, - "grad_norm": 2.1834664344787598, - "learning_rate": 1.7431437948693975e-05, - "loss": 0.6568, + "epoch": 0.14706089405139036, + "grad_norm": 2.4998977184295654, + "learning_rate": 1.3503346205960546e-05, + "loss": 0.6752, "step": 2089 }, { - "epoch": 0.1581476296772729, - "grad_norm": 2.4563395977020264, - "learning_rate": 1.7430649871433245e-05, - "loss": 0.7753, + "epoch": 0.14713129179866244, + "grad_norm": 2.238266706466675, + "learning_rate": 1.3502755548641706e-05, + "loss": 0.8425, "step": 2090 }, { - "epoch": 0.15822329839960653, - "grad_norm": 2.7324671745300293, - "learning_rate": 1.742986126621795e-05, - "loss": 0.7523, + "epoch": 0.14720168954593452, + "grad_norm": 1.77357017993927, + "learning_rate": 1.3502164553238716e-05, + "loss": 0.66, "step": 2091 }, { - "epoch": 0.15829896712194014, - "grad_norm": 2.3517651557922363, - "learning_rate": 1.7429072133097478e-05, - "loss": 0.7389, + "epoch": 0.14727208729320662, + "grad_norm": 2.413443088531494, + "learning_rate": 1.3501573219782304e-05, + "loss": 0.7532, "step": 2092 }, { - "epoch": 0.15837463584427378, - "grad_norm": 2.4391419887542725, - "learning_rate": 1.7428282472121245e-05, - "loss": 0.6748, + "epoch": 0.1473424850404787, + "grad_norm": 2.1906399726867676, + "learning_rate": 1.3500981548303212e-05, + "loss": 0.7516, "step": 2093 }, { - "epoch": 0.1584503045666074, - "grad_norm": 3.3053195476531982, - "learning_rate": 1.7427492283338704e-05, - "loss": 0.7699, + "epoch": 0.1474128827877508, + "grad_norm": 1.9895716905593872, + "learning_rate": 1.35003895388322e-05, + "loss": 0.6415, "step": 2094 }, { - "epoch": 0.15852597328894102, - "grad_norm": 2.2691550254821777, - "learning_rate": 1.7426701566799337e-05, - "loss": 0.8406, + "epoch": 0.14748328053502288, + "grad_norm": 2.1136491298675537, + "learning_rate": 1.349979719140005e-05, + "loss": 0.8296, "step": 2095 }, { - "epoch": 0.15860164201127464, - "grad_norm": 2.280519723892212, - "learning_rate": 1.7425910322552666e-05, - "loss": 0.8129, + "epoch": 0.14755367828229496, + "grad_norm": 1.871099829673767, + "learning_rate": 1.3499204506037556e-05, + "loss": 0.8159, "step": 2096 }, { - "epoch": 0.15867731073360827, - "grad_norm": 2.6433169841766357, - "learning_rate": 1.7425118550648234e-05, - "loss": 0.7612, + "epoch": 0.14762407602956706, + "grad_norm": 2.1271631717681885, + "learning_rate": 1.3498611482775534e-05, + "loss": 0.8054, "step": 2097 }, { - "epoch": 0.15875297945594188, - "grad_norm": 2.356234550476074, - "learning_rate": 1.742432625113563e-05, - "loss": 0.7845, + "epoch": 0.14769447377683914, + "grad_norm": 2.406583309173584, + "learning_rate": 1.3498018121644817e-05, + "loss": 0.7479, "step": 2098 }, { - "epoch": 0.15882864817827552, - "grad_norm": 2.7373485565185547, - "learning_rate": 1.742353342406447e-05, - "loss": 0.6441, + "epoch": 0.14776487152411122, + "grad_norm": 1.696142315864563, + "learning_rate": 1.3497424422676252e-05, + "loss": 0.7971, "step": 2099 }, { - "epoch": 0.15890431690060913, - "grad_norm": 2.2492804527282715, - "learning_rate": 1.7422740069484397e-05, - "loss": 0.6834, + "epoch": 0.14783526927138332, + "grad_norm": 2.1341099739074707, + "learning_rate": 1.3496830385900707e-05, + "loss": 0.8204, "step": 2100 }, { - "epoch": 0.15897998562294274, - "grad_norm": 2.219045639038086, - "learning_rate": 1.7421946187445104e-05, - "loss": 0.7691, + "epoch": 0.1479056670186554, + "grad_norm": 2.368579864501953, + "learning_rate": 1.3496236011349066e-05, + "loss": 0.757, "step": 2101 }, { - "epoch": 0.15905565434527638, - "grad_norm": 2.9211206436157227, - "learning_rate": 1.7421151777996297e-05, - "loss": 0.629, + "epoch": 0.14797606476592748, + "grad_norm": 4.759779453277588, + "learning_rate": 1.3495641299052231e-05, + "loss": 0.7203, "step": 2102 }, { - "epoch": 0.15913132306761, - "grad_norm": 3.131239414215088, - "learning_rate": 1.7420356841187732e-05, - "loss": 0.7825, + "epoch": 0.14804646251319958, + "grad_norm": 2.6825926303863525, + "learning_rate": 1.3495046249041123e-05, + "loss": 0.7415, "step": 2103 }, { - "epoch": 0.15920699178994363, - "grad_norm": 2.3928134441375732, - "learning_rate": 1.7419561377069183e-05, - "loss": 0.8064, + "epoch": 0.14811686026047166, + "grad_norm": 1.8315502405166626, + "learning_rate": 1.3494450861346677e-05, + "loss": 0.7361, "step": 2104 }, { - "epoch": 0.15928266051227724, - "grad_norm": 2.74513840675354, - "learning_rate": 1.741876538569047e-05, - "loss": 0.8204, + "epoch": 0.14818725800774374, + "grad_norm": 2.150681495666504, + "learning_rate": 1.349385513599985e-05, + "loss": 0.7164, "step": 2105 }, { - "epoch": 0.15935832923461088, - "grad_norm": 2.3963100910186768, - "learning_rate": 1.741796886710144e-05, - "loss": 1.0466, + "epoch": 0.14825765575501584, + "grad_norm": 2.04199481010437, + "learning_rate": 1.3493259073031614e-05, + "loss": 0.8232, "step": 2106 }, { - "epoch": 0.1594339979569445, - "grad_norm": 2.196810007095337, - "learning_rate": 1.7417171821351973e-05, - "loss": 0.6747, + "epoch": 0.14832805350228792, + "grad_norm": 2.420827627182007, + "learning_rate": 1.3492662672472957e-05, + "loss": 0.7505, "step": 2107 }, { - "epoch": 0.15950966667927813, - "grad_norm": 2.6434783935546875, - "learning_rate": 1.741637424849198e-05, - "loss": 0.8187, + "epoch": 0.14839845124956003, + "grad_norm": 1.9811785221099854, + "learning_rate": 1.3492065934354888e-05, + "loss": 0.7801, "step": 2108 }, { - "epoch": 0.15958533540161174, - "grad_norm": 2.148426055908203, - "learning_rate": 1.741557614857141e-05, - "loss": 0.7376, + "epoch": 0.1484688489968321, + "grad_norm": 1.921058177947998, + "learning_rate": 1.3491468858708431e-05, + "loss": 0.7536, "step": 2109 }, { - "epoch": 0.15966100412394538, - "grad_norm": 3.2016146183013916, - "learning_rate": 1.741477752164024e-05, - "loss": 0.7088, + "epoch": 0.14853924674410418, + "grad_norm": 2.300747871398926, + "learning_rate": 1.349087144556463e-05, + "loss": 0.8029, "step": 2110 }, { - "epoch": 0.159736672846279, - "grad_norm": 2.8447532653808594, - "learning_rate": 1.7413978367748488e-05, - "loss": 0.8271, + "epoch": 0.14860964449137629, + "grad_norm": 1.9932628870010376, + "learning_rate": 1.3490273694954543e-05, + "loss": 0.7183, "step": 2111 }, { - "epoch": 0.15981234156861263, - "grad_norm": 2.411562919616699, - "learning_rate": 1.7413178686946198e-05, - "loss": 0.801, + "epoch": 0.14868004223864836, + "grad_norm": 2.3676726818084717, + "learning_rate": 1.348967560690925e-05, + "loss": 0.7235, "step": 2112 }, { - "epoch": 0.15988801029094624, - "grad_norm": 2.4940292835235596, - "learning_rate": 1.7412378479283445e-05, - "loss": 0.838, + "epoch": 0.14875043998592044, + "grad_norm": 2.330206871032715, + "learning_rate": 1.3489077181459843e-05, + "loss": 0.7896, "step": 2113 }, { - "epoch": 0.15996367901327985, - "grad_norm": 2.510559320449829, - "learning_rate": 1.7411577744810343e-05, - "loss": 0.7729, + "epoch": 0.14882083773319255, + "grad_norm": 1.9547412395477295, + "learning_rate": 1.3488478418637438e-05, + "loss": 0.7492, "step": 2114 }, { - "epoch": 0.16003934773561349, - "grad_norm": 2.928279399871826, - "learning_rate": 1.7410776483577036e-05, - "loss": 0.9162, + "epoch": 0.14889123548046462, + "grad_norm": 1.8493527173995972, + "learning_rate": 1.3487879318473163e-05, + "loss": 0.8735, "step": 2115 }, { - "epoch": 0.1601150164579471, - "grad_norm": 2.311375617980957, - "learning_rate": 1.7409974695633702e-05, - "loss": 0.7684, + "epoch": 0.1489616332277367, + "grad_norm": 1.8976885080337524, + "learning_rate": 1.3487279880998167e-05, + "loss": 0.6917, "step": 2116 }, { - "epoch": 0.16019068518028073, - "grad_norm": 2.5286309719085693, - "learning_rate": 1.740917238103055e-05, - "loss": 0.7551, + "epoch": 0.1490320309750088, + "grad_norm": 2.3138837814331055, + "learning_rate": 1.3486680106243613e-05, + "loss": 0.7224, "step": 2117 }, { - "epoch": 0.16026635390261434, - "grad_norm": 2.7157998085021973, - "learning_rate": 1.740836953981783e-05, - "loss": 0.9096, + "epoch": 0.14910242872228088, + "grad_norm": 2.1206371784210205, + "learning_rate": 1.3486079994240687e-05, + "loss": 0.7869, "step": 2118 }, { - "epoch": 0.16034202262494798, - "grad_norm": 2.3941192626953125, - "learning_rate": 1.7407566172045808e-05, - "loss": 0.7154, + "epoch": 0.14917282646955296, + "grad_norm": 1.9168246984481812, + "learning_rate": 1.3485479545020588e-05, + "loss": 0.8178, "step": 2119 }, { - "epoch": 0.1604176913472816, - "grad_norm": 3.259812355041504, - "learning_rate": 1.74067622777648e-05, - "loss": 0.7483, + "epoch": 0.14924322421682507, + "grad_norm": 2.4285600185394287, + "learning_rate": 1.3484878758614534e-05, + "loss": 0.7171, "step": 2120 }, { - "epoch": 0.16049336006961523, - "grad_norm": 2.8680622577667236, - "learning_rate": 1.740595785702515e-05, - "loss": 0.7112, + "epoch": 0.14931362196409714, + "grad_norm": 2.0414702892303467, + "learning_rate": 1.348427763505376e-05, + "loss": 0.7704, "step": 2121 }, { - "epoch": 0.16056902879194884, - "grad_norm": 2.4445548057556152, - "learning_rate": 1.7405152909877228e-05, - "loss": 0.7903, + "epoch": 0.14938401971136925, + "grad_norm": 1.9084241390228271, + "learning_rate": 1.3483676174369518e-05, + "loss": 0.8843, "step": 2122 }, { - "epoch": 0.16064469751428248, - "grad_norm": 2.763958215713501, - "learning_rate": 1.7404347436371446e-05, - "loss": 0.7796, + "epoch": 0.14945441745864133, + "grad_norm": 2.732853651046753, + "learning_rate": 1.3483074376593081e-05, + "loss": 0.8176, "step": 2123 }, { - "epoch": 0.1607203662366161, - "grad_norm": 2.0752618312835693, - "learning_rate": 1.7403541436558246e-05, - "loss": 0.7199, + "epoch": 0.1495248152059134, + "grad_norm": 2.675346851348877, + "learning_rate": 1.3482472241755736e-05, + "loss": 0.713, "step": 2124 }, { - "epoch": 0.16079603495894973, - "grad_norm": 2.1711764335632324, - "learning_rate": 1.74027349104881e-05, - "loss": 0.6946, + "epoch": 0.1495952129531855, + "grad_norm": 2.0974748134613037, + "learning_rate": 1.3481869769888789e-05, + "loss": 0.7289, "step": 2125 }, { - "epoch": 0.16087170368128334, - "grad_norm": 2.151015043258667, - "learning_rate": 1.7401927858211516e-05, - "loss": 0.7603, + "epoch": 0.14966561070045759, + "grad_norm": 2.0021018981933594, + "learning_rate": 1.3481266961023563e-05, + "loss": 0.7983, "step": 2126 }, { - "epoch": 0.16094737240361698, - "grad_norm": 4.292178153991699, - "learning_rate": 1.7401120279779035e-05, - "loss": 0.7286, + "epoch": 0.14973600844772966, + "grad_norm": 5.459007263183594, + "learning_rate": 1.3480663815191395e-05, + "loss": 0.794, "step": 2127 }, { - "epoch": 0.1610230411259506, - "grad_norm": 4.497610569000244, - "learning_rate": 1.7400312175241226e-05, - "loss": 0.8232, + "epoch": 0.14980640619500177, + "grad_norm": 1.8780583143234253, + "learning_rate": 1.3480060332423647e-05, + "loss": 0.8177, "step": 2128 }, { - "epoch": 0.1610987098482842, - "grad_norm": 2.505603551864624, - "learning_rate": 1.73995035446487e-05, - "loss": 0.7859, + "epoch": 0.14987680394227385, + "grad_norm": 1.9384963512420654, + "learning_rate": 1.3479456512751693e-05, + "loss": 0.7537, "step": 2129 }, { - "epoch": 0.16117437857061784, - "grad_norm": 2.1637277603149414, - "learning_rate": 1.73986943880521e-05, - "loss": 0.7861, + "epoch": 0.14994720168954592, + "grad_norm": 2.3309268951416016, + "learning_rate": 1.3478852356206929e-05, + "loss": 0.7708, "step": 2130 }, { - "epoch": 0.16125004729295145, - "grad_norm": 3.3411808013916016, - "learning_rate": 1.7397884705502088e-05, - "loss": 0.8564, + "epoch": 0.15001759943681803, + "grad_norm": 2.0053508281707764, + "learning_rate": 1.3478247862820762e-05, + "loss": 0.7254, "step": 2131 }, { - "epoch": 0.1613257160152851, - "grad_norm": 2.2842612266540527, - "learning_rate": 1.7397074497049378e-05, - "loss": 0.799, + "epoch": 0.1500879971840901, + "grad_norm": 2.656656503677368, + "learning_rate": 1.3477643032624621e-05, + "loss": 0.7553, "step": 2132 }, { - "epoch": 0.1614013847376187, - "grad_norm": 3.669283628463745, - "learning_rate": 1.73962637627447e-05, - "loss": 0.7311, + "epoch": 0.1501583949313622, + "grad_norm": 2.056612491607666, + "learning_rate": 1.3477037865649952e-05, + "loss": 0.6966, "step": 2133 }, { - "epoch": 0.16147705345995234, - "grad_norm": 2.489490032196045, - "learning_rate": 1.7395452502638826e-05, - "loss": 0.8819, + "epoch": 0.1502287926786343, + "grad_norm": 2.0227396488189697, + "learning_rate": 1.3476432361928221e-05, + "loss": 0.6852, "step": 2134 }, { - "epoch": 0.16155272218228595, - "grad_norm": 2.164292097091675, - "learning_rate": 1.7394640716782564e-05, - "loss": 0.8138, + "epoch": 0.15029919042590636, + "grad_norm": 2.26312255859375, + "learning_rate": 1.3475826521490904e-05, + "loss": 0.8027, "step": 2135 }, { - "epoch": 0.16162839090461958, - "grad_norm": 2.452188014984131, - "learning_rate": 1.739382840522675e-05, - "loss": 0.7811, + "epoch": 0.15036958817317847, + "grad_norm": 2.1140329837799072, + "learning_rate": 1.3475220344369502e-05, + "loss": 0.7035, "step": 2136 }, { - "epoch": 0.1617040596269532, - "grad_norm": 2.700749397277832, - "learning_rate": 1.739301556802225e-05, - "loss": 0.7128, + "epoch": 0.15043998592045055, + "grad_norm": 2.215404987335205, + "learning_rate": 1.347461383059553e-05, + "loss": 0.7965, "step": 2137 }, { - "epoch": 0.16177972834928683, - "grad_norm": 3.382140874862671, - "learning_rate": 1.7392202205219974e-05, - "loss": 0.9743, + "epoch": 0.15051038366772262, + "grad_norm": 2.0065503120422363, + "learning_rate": 1.347400698020052e-05, + "loss": 0.7142, "step": 2138 }, { - "epoch": 0.16185539707162044, - "grad_norm": 2.4139597415924072, - "learning_rate": 1.739138831687085e-05, - "loss": 0.7896, + "epoch": 0.15058078141499473, + "grad_norm": 1.9836153984069824, + "learning_rate": 1.3473399793216024e-05, + "loss": 0.7518, "step": 2139 }, { - "epoch": 0.16193106579395408, - "grad_norm": 2.2004284858703613, - "learning_rate": 1.7390573903025845e-05, - "loss": 0.7469, + "epoch": 0.1506511791622668, + "grad_norm": 2.292107343673706, + "learning_rate": 1.3472792269673612e-05, + "loss": 0.8337, "step": 2140 }, { - "epoch": 0.1620067345162877, - "grad_norm": 2.8019471168518066, - "learning_rate": 1.7389758963735967e-05, - "loss": 0.7453, + "epoch": 0.15072157690953888, + "grad_norm": 3.4108846187591553, + "learning_rate": 1.3472184409604866e-05, + "loss": 0.672, "step": 2141 }, { - "epoch": 0.1620824032386213, - "grad_norm": 2.7168564796447754, - "learning_rate": 1.7388943499052246e-05, - "loss": 0.8727, + "epoch": 0.150791974656811, + "grad_norm": 2.554661512374878, + "learning_rate": 1.3471576213041393e-05, + "loss": 0.8581, "step": 2142 }, { - "epoch": 0.16215807196095494, - "grad_norm": 2.5624351501464844, - "learning_rate": 1.7388127509025748e-05, - "loss": 0.7883, + "epoch": 0.15086237240408307, + "grad_norm": 2.2160611152648926, + "learning_rate": 1.3470967680014811e-05, + "loss": 0.7696, "step": 2143 }, { - "epoch": 0.16223374068328855, - "grad_norm": 2.199237585067749, - "learning_rate": 1.738731099370758e-05, - "loss": 0.8322, + "epoch": 0.15093277015135514, + "grad_norm": 2.0324320793151855, + "learning_rate": 1.3470358810556757e-05, + "loss": 0.7195, "step": 2144 }, { - "epoch": 0.1623094094056222, - "grad_norm": 2.3310837745666504, - "learning_rate": 1.7386493953148867e-05, - "loss": 0.7478, + "epoch": 0.15100316789862725, + "grad_norm": 2.519993782043457, + "learning_rate": 1.346974960469889e-05, + "loss": 0.702, "step": 2145 }, { - "epoch": 0.1623850781279558, - "grad_norm": 2.901883363723755, - "learning_rate": 1.7385676387400777e-05, - "loss": 0.7515, + "epoch": 0.15107356564589933, + "grad_norm": 2.2141342163085938, + "learning_rate": 1.3469140062472882e-05, + "loss": 0.674, "step": 2146 }, { - "epoch": 0.16246074685028944, - "grad_norm": 2.0343589782714844, - "learning_rate": 1.7384858296514507e-05, - "loss": 0.6157, + "epoch": 0.15114396339317143, + "grad_norm": 2.0812056064605713, + "learning_rate": 1.346853018391042e-05, + "loss": 0.7779, "step": 2147 }, { - "epoch": 0.16253641557262305, - "grad_norm": 2.7100648880004883, - "learning_rate": 1.7384039680541295e-05, - "loss": 0.8054, + "epoch": 0.1512143611404435, + "grad_norm": 2.650134563446045, + "learning_rate": 1.346791996904322e-05, + "loss": 0.7948, "step": 2148 }, { - "epoch": 0.1626120842949567, - "grad_norm": 2.2112245559692383, - "learning_rate": 1.7383220539532396e-05, - "loss": 0.7847, + "epoch": 0.1512847588877156, + "grad_norm": 2.4188122749328613, + "learning_rate": 1.3467309417903002e-05, + "loss": 0.7203, "step": 2149 }, { - "epoch": 0.1626877530172903, - "grad_norm": 2.090649127960205, - "learning_rate": 1.7382400873539117e-05, - "loss": 0.7328, + "epoch": 0.1513551566349877, + "grad_norm": 1.969528317451477, + "learning_rate": 1.3466698530521508e-05, + "loss": 0.7485, "step": 2150 }, { - "epoch": 0.16276342173962394, - "grad_norm": 3.082857847213745, - "learning_rate": 1.738158068261278e-05, - "loss": 0.6708, + "epoch": 0.15142555438225977, + "grad_norm": 2.253983974456787, + "learning_rate": 1.3466087306930504e-05, + "loss": 0.724, "step": 2151 }, { - "epoch": 0.16283909046195755, - "grad_norm": 2.9798154830932617, - "learning_rate": 1.7380759966804754e-05, - "loss": 0.9559, + "epoch": 0.15149595212953185, + "grad_norm": 2.2035083770751953, + "learning_rate": 1.3465475747161762e-05, + "loss": 0.784, "step": 2152 }, { - "epoch": 0.16291475918429119, - "grad_norm": 2.2334978580474854, - "learning_rate": 1.7379938726166428e-05, - "loss": 0.6963, + "epoch": 0.15156634987680395, + "grad_norm": 2.592100143432617, + "learning_rate": 1.3464863851247083e-05, + "loss": 0.7148, "step": 2153 }, { - "epoch": 0.1629904279066248, - "grad_norm": 3.03721022605896, - "learning_rate": 1.737911696074924e-05, - "loss": 0.7875, + "epoch": 0.15163674762407603, + "grad_norm": 2.2732951641082764, + "learning_rate": 1.3464251619218274e-05, + "loss": 0.8334, "step": 2154 }, { - "epoch": 0.1630660966289584, - "grad_norm": 2.496274471282959, - "learning_rate": 1.7378294670604644e-05, - "loss": 0.8951, + "epoch": 0.1517071453713481, + "grad_norm": 2.508040189743042, + "learning_rate": 1.3463639051107172e-05, + "loss": 0.8046, "step": 2155 }, { - "epoch": 0.16314176535129205, - "grad_norm": 2.7649779319763184, - "learning_rate": 1.7377471855784138e-05, - "loss": 0.8018, + "epoch": 0.1517775431186202, + "grad_norm": 2.2785561084747314, + "learning_rate": 1.3463026146945621e-05, + "loss": 0.6839, "step": 2156 }, { - "epoch": 0.16321743407362566, - "grad_norm": 5.206967353820801, - "learning_rate": 1.7376648516339247e-05, - "loss": 0.7761, + "epoch": 0.1518479408658923, + "grad_norm": 2.246443033218384, + "learning_rate": 1.3462412906765488e-05, + "loss": 0.7419, "step": 2157 }, { - "epoch": 0.1632931027959593, - "grad_norm": 2.439495086669922, - "learning_rate": 1.7375824652321533e-05, - "loss": 0.6983, + "epoch": 0.15191833861316437, + "grad_norm": 2.75984525680542, + "learning_rate": 1.3461799330598654e-05, + "loss": 0.6685, "step": 2158 }, { - "epoch": 0.1633687715182929, - "grad_norm": 2.5603365898132324, - "learning_rate": 1.737500026378259e-05, - "loss": 0.7642, + "epoch": 0.15198873636043647, + "grad_norm": 2.513911008834839, + "learning_rate": 1.346118541847702e-05, + "loss": 0.8143, "step": 2159 }, { - "epoch": 0.16344444024062654, - "grad_norm": 2.957632303237915, - "learning_rate": 1.7374175350774042e-05, - "loss": 0.829, + "epoch": 0.15205913410770855, + "grad_norm": 2.6519956588745117, + "learning_rate": 1.3460571170432507e-05, + "loss": 0.7598, "step": 2160 }, { - "epoch": 0.16352010896296015, - "grad_norm": 2.3568167686462402, - "learning_rate": 1.7373349913347546e-05, - "loss": 0.8891, + "epoch": 0.15212953185498065, + "grad_norm": 2.294637441635132, + "learning_rate": 1.3459956586497047e-05, + "loss": 0.7726, "step": 2161 }, { - "epoch": 0.1635957776852938, - "grad_norm": 2.483896017074585, - "learning_rate": 1.7372523951554797e-05, - "loss": 0.6859, + "epoch": 0.15219992960225273, + "grad_norm": 2.025163173675537, + "learning_rate": 1.3459341666702595e-05, + "loss": 0.8848, "step": 2162 }, { - "epoch": 0.1636714464076274, - "grad_norm": 2.430391788482666, - "learning_rate": 1.737169746544752e-05, - "loss": 0.8047, + "epoch": 0.1522703273495248, + "grad_norm": 2.119497299194336, + "learning_rate": 1.3458726411081117e-05, + "loss": 0.6976, "step": 2163 }, { - "epoch": 0.16374711512996104, - "grad_norm": 2.568268060684204, - "learning_rate": 1.7370870455077468e-05, - "loss": 0.6092, + "epoch": 0.15234072509679691, + "grad_norm": 1.855493426322937, + "learning_rate": 1.3458110819664604e-05, + "loss": 0.8753, "step": 2164 }, { - "epoch": 0.16382278385229465, - "grad_norm": 2.6781516075134277, - "learning_rate": 1.7370042920496433e-05, - "loss": 0.7879, + "epoch": 0.152411122844069, + "grad_norm": 2.4184579849243164, + "learning_rate": 1.3457494892485062e-05, + "loss": 0.7751, "step": 2165 }, { - "epoch": 0.1638984525746283, - "grad_norm": 2.247915506362915, - "learning_rate": 1.7369214861756238e-05, - "loss": 0.7788, + "epoch": 0.15248152059134107, + "grad_norm": 2.121934652328491, + "learning_rate": 1.345687862957451e-05, + "loss": 0.7568, "step": 2166 }, { - "epoch": 0.1639741212969619, - "grad_norm": 2.176671028137207, - "learning_rate": 1.7368386278908742e-05, - "loss": 0.8544, + "epoch": 0.15255191833861317, + "grad_norm": 2.356076240539551, + "learning_rate": 1.3456262030964993e-05, + "loss": 0.7548, "step": 2167 }, { - "epoch": 0.1640497900192955, - "grad_norm": 2.515249013900757, - "learning_rate": 1.7367557172005827e-05, - "loss": 0.7041, + "epoch": 0.15262231608588525, + "grad_norm": 2.0751259326934814, + "learning_rate": 1.3455645096688564e-05, + "loss": 0.7796, "step": 2168 }, { - "epoch": 0.16412545874162915, - "grad_norm": 2.1374387741088867, - "learning_rate": 1.736672754109942e-05, - "loss": 0.8569, + "epoch": 0.15269271383315733, + "grad_norm": 2.3328893184661865, + "learning_rate": 1.3455027826777303e-05, + "loss": 0.815, "step": 2169 }, { - "epoch": 0.16420112746396276, - "grad_norm": 2.5364511013031006, - "learning_rate": 1.7365897386241472e-05, - "loss": 0.7735, + "epoch": 0.15276311158042943, + "grad_norm": 2.4695825576782227, + "learning_rate": 1.3454410221263293e-05, + "loss": 0.7275, "step": 2170 }, { - "epoch": 0.1642767961862964, - "grad_norm": 2.0026803016662598, - "learning_rate": 1.7365066707483972e-05, - "loss": 0.8604, + "epoch": 0.1528335093277015, + "grad_norm": 2.650050401687622, + "learning_rate": 1.3453792280178656e-05, + "loss": 0.7968, "step": 2171 }, { - "epoch": 0.16435246490863, - "grad_norm": 20.266813278198242, - "learning_rate": 1.736423550487894e-05, - "loss": 0.871, + "epoch": 0.1529039070749736, + "grad_norm": 1.8130273818969727, + "learning_rate": 1.3453174003555511e-05, + "loss": 0.967, "step": 2172 }, { - "epoch": 0.16442813363096365, - "grad_norm": 1.7573026418685913, - "learning_rate": 1.736340377847843e-05, - "loss": 0.6222, + "epoch": 0.1529743048222457, + "grad_norm": 2.562685489654541, + "learning_rate": 1.3452555391426006e-05, + "loss": 0.7806, "step": 2173 }, { - "epoch": 0.16450380235329726, - "grad_norm": 2.631108522415161, - "learning_rate": 1.736257152833452e-05, - "loss": 0.7496, + "epoch": 0.15304470256951777, + "grad_norm": 2.0534095764160156, + "learning_rate": 1.3451936443822304e-05, + "loss": 0.732, "step": 2174 }, { - "epoch": 0.1645794710756309, - "grad_norm": 2.149601459503174, - "learning_rate": 1.7361738754499332e-05, - "loss": 0.7281, + "epoch": 0.15311510031678988, + "grad_norm": 1.8753949403762817, + "learning_rate": 1.3451317160776578e-05, + "loss": 0.6831, "step": 2175 }, { - "epoch": 0.1646551397979645, - "grad_norm": 2.6889591217041016, - "learning_rate": 1.736090545702502e-05, - "loss": 0.7323, + "epoch": 0.15318549806406195, + "grad_norm": 1.9486125707626343, + "learning_rate": 1.3450697542321035e-05, + "loss": 0.6278, "step": 2176 }, { - "epoch": 0.16473080852029814, - "grad_norm": 2.2632665634155273, - "learning_rate": 1.736007163596377e-05, - "loss": 0.8867, + "epoch": 0.15325589581133403, + "grad_norm": 2.0178604125976562, + "learning_rate": 1.3450077588487882e-05, + "loss": 0.9266, "step": 2177 }, { - "epoch": 0.16480647724263175, - "grad_norm": 2.988801956176758, - "learning_rate": 1.735923729136779e-05, - "loss": 0.8742, + "epoch": 0.15332629355860614, + "grad_norm": 3.1256306171417236, + "learning_rate": 1.3449457299309354e-05, + "loss": 0.7522, "step": 2178 }, { - "epoch": 0.1648821459649654, - "grad_norm": 2.2661144733428955, - "learning_rate": 1.7358402423289332e-05, - "loss": 0.6946, + "epoch": 0.1533966913058782, + "grad_norm": 2.821002244949341, + "learning_rate": 1.3448836674817702e-05, + "loss": 0.7076, "step": 2179 }, { - "epoch": 0.164957814687299, - "grad_norm": 2.401752233505249, - "learning_rate": 1.735756703178068e-05, - "loss": 0.8024, + "epoch": 0.1534670890531503, + "grad_norm": 2.4704196453094482, + "learning_rate": 1.3448215715045189e-05, + "loss": 0.7397, "step": 2180 }, { - "epoch": 0.16503348340963264, - "grad_norm": 2.7266244888305664, - "learning_rate": 1.7356731116894153e-05, - "loss": 0.7484, + "epoch": 0.1535374868004224, + "grad_norm": 2.1623754501342773, + "learning_rate": 1.3447594420024101e-05, + "loss": 0.6148, "step": 2181 }, { - "epoch": 0.16510915213196625, - "grad_norm": 3.7801167964935303, - "learning_rate": 1.7355894678682094e-05, - "loss": 0.7794, + "epoch": 0.15360788454769447, + "grad_norm": 2.4597678184509277, + "learning_rate": 1.3446972789786742e-05, + "loss": 0.7348, "step": 2182 }, { - "epoch": 0.16518482085429986, - "grad_norm": 2.931405544281006, - "learning_rate": 1.7355057717196883e-05, - "loss": 0.6981, + "epoch": 0.15367828229496655, + "grad_norm": 2.5130789279937744, + "learning_rate": 1.3446350824365426e-05, + "loss": 0.776, "step": 2183 }, { - "epoch": 0.1652604895766335, - "grad_norm": 3.314436435699463, - "learning_rate": 1.7354220232490932e-05, - "loss": 0.8774, + "epoch": 0.15374868004223866, + "grad_norm": 2.5441150665283203, + "learning_rate": 1.3445728523792495e-05, + "loss": 0.7392, "step": 2184 }, { - "epoch": 0.1653361582989671, - "grad_norm": 2.7740743160247803, - "learning_rate": 1.735338222461669e-05, - "loss": 0.6553, + "epoch": 0.15381907778951073, + "grad_norm": 2.057218074798584, + "learning_rate": 1.34451058881003e-05, + "loss": 0.6171, "step": 2185 }, { - "epoch": 0.16541182702130075, - "grad_norm": 2.309906482696533, - "learning_rate": 1.735254369362664e-05, - "loss": 0.7038, + "epoch": 0.1538894755367828, + "grad_norm": 2.3416008949279785, + "learning_rate": 1.3444482917321212e-05, + "loss": 0.7609, "step": 2186 }, { - "epoch": 0.16548749574363436, - "grad_norm": 2.7351341247558594, - "learning_rate": 1.7351704639573284e-05, - "loss": 0.7777, + "epoch": 0.15395987328405492, + "grad_norm": 1.9904509782791138, + "learning_rate": 1.344385961148762e-05, + "loss": 0.756, "step": 2187 }, { - "epoch": 0.165563164465968, - "grad_norm": 3.7266592979431152, - "learning_rate": 1.735086506250917e-05, - "loss": 0.7223, + "epoch": 0.154030271031327, + "grad_norm": 2.3506217002868652, + "learning_rate": 1.3443235970631932e-05, + "loss": 0.7757, "step": 2188 }, { - "epoch": 0.1656388331883016, - "grad_norm": 7.991847038269043, - "learning_rate": 1.7350024962486876e-05, - "loss": 0.6462, + "epoch": 0.1541006687785991, + "grad_norm": 2.0688605308532715, + "learning_rate": 1.344261199478657e-05, + "loss": 0.6806, "step": 2189 }, { - "epoch": 0.16571450191063525, - "grad_norm": 2.2966339588165283, - "learning_rate": 1.7349184339559015e-05, - "loss": 0.8108, + "epoch": 0.15417106652587118, + "grad_norm": 3.3171520233154297, + "learning_rate": 1.3441987683983976e-05, + "loss": 0.7951, "step": 2190 }, { - "epoch": 0.16579017063296886, - "grad_norm": 2.601431131362915, - "learning_rate": 1.7348343193778223e-05, - "loss": 0.7492, + "epoch": 0.15424146427314325, + "grad_norm": 2.2091095447540283, + "learning_rate": 1.344136303825661e-05, + "loss": 0.7331, "step": 2191 }, { - "epoch": 0.1658658393553025, - "grad_norm": 3.003119945526123, - "learning_rate": 1.7347501525197177e-05, - "loss": 0.7404, + "epoch": 0.15431186202041536, + "grad_norm": 2.05800199508667, + "learning_rate": 1.344073805763694e-05, + "loss": 0.7561, "step": 2192 }, { - "epoch": 0.1659415080776361, - "grad_norm": 2.2687859535217285, - "learning_rate": 1.734665933386859e-05, - "loss": 0.7979, + "epoch": 0.15438225976768744, + "grad_norm": 2.123255729675293, + "learning_rate": 1.3440112742157471e-05, + "loss": 0.8071, "step": 2193 }, { - "epoch": 0.16601717679996975, - "grad_norm": 2.463181257247925, - "learning_rate": 1.73458166198452e-05, - "loss": 0.8188, + "epoch": 0.1544526575149595, + "grad_norm": 2.015413522720337, + "learning_rate": 1.3439487091850706e-05, + "loss": 0.7592, "step": 2194 }, { - "epoch": 0.16609284552230336, - "grad_norm": 2.8143796920776367, - "learning_rate": 1.7344973383179776e-05, - "loss": 0.8257, + "epoch": 0.15452305526223162, + "grad_norm": 2.4797780513763428, + "learning_rate": 1.3438861106749176e-05, + "loss": 0.8585, "step": 2195 }, { - "epoch": 0.16616851424463697, - "grad_norm": 2.4394776821136475, - "learning_rate": 1.7344129623925128e-05, - "loss": 0.7174, + "epoch": 0.1545934530095037, + "grad_norm": 1.9205869436264038, + "learning_rate": 1.3438234786885425e-05, + "loss": 0.785, "step": 2196 }, { - "epoch": 0.1662441829669706, - "grad_norm": 2.9498252868652344, - "learning_rate": 1.7343285342134096e-05, - "loss": 0.7468, + "epoch": 0.15466385075677577, + "grad_norm": 2.130974292755127, + "learning_rate": 1.3437608132292016e-05, + "loss": 0.7876, "step": 2197 }, { - "epoch": 0.16631985168930422, - "grad_norm": 2.7809300422668457, - "learning_rate": 1.734244053785955e-05, - "loss": 0.917, + "epoch": 0.15473424850404788, + "grad_norm": 2.844980239868164, + "learning_rate": 1.3436981143001532e-05, + "loss": 0.814, "step": 2198 }, { - "epoch": 0.16639552041163785, - "grad_norm": 2.0343682765960693, - "learning_rate": 1.7341595211154397e-05, - "loss": 0.8867, + "epoch": 0.15480464625131996, + "grad_norm": 1.8679320812225342, + "learning_rate": 1.3436353819046566e-05, + "loss": 0.7283, "step": 2199 }, { - "epoch": 0.16647118913397146, - "grad_norm": 2.406003713607788, - "learning_rate": 1.7340749362071567e-05, - "loss": 0.7902, + "epoch": 0.15487504399859203, + "grad_norm": 2.1953063011169434, + "learning_rate": 1.3435726160459738e-05, + "loss": 0.6428, "step": 2200 }, { - "epoch": 0.1665468578563051, - "grad_norm": 3.3123080730438232, - "learning_rate": 1.733990299066404e-05, - "loss": 0.8509, + "epoch": 0.15494544174586414, + "grad_norm": 2.0487847328186035, + "learning_rate": 1.3435098167273676e-05, + "loss": 0.6696, "step": 2201 }, { - "epoch": 0.1666225265786387, - "grad_norm": 2.0788116455078125, - "learning_rate": 1.733905609698481e-05, - "loss": 0.6832, + "epoch": 0.15501583949313621, + "grad_norm": 2.10386061668396, + "learning_rate": 1.3434469839521036e-05, + "loss": 0.6764, "step": 2202 }, { - "epoch": 0.16669819530097235, - "grad_norm": 2.732825517654419, - "learning_rate": 1.7338208681086916e-05, - "loss": 0.789, + "epoch": 0.15508623724040832, + "grad_norm": 2.0979483127593994, + "learning_rate": 1.3433841177234479e-05, + "loss": 0.764, "step": 2203 }, { - "epoch": 0.16677386402330596, - "grad_norm": 2.7512941360473633, - "learning_rate": 1.7337360743023425e-05, - "loss": 0.9523, + "epoch": 0.1551566349876804, + "grad_norm": 2.217282295227051, + "learning_rate": 1.3433212180446693e-05, + "loss": 0.6789, "step": 2204 }, { - "epoch": 0.1668495327456396, - "grad_norm": 2.181548833847046, - "learning_rate": 1.733651228284744e-05, - "loss": 0.7516, + "epoch": 0.15522703273495247, + "grad_norm": 2.3624212741851807, + "learning_rate": 1.3432582849190379e-05, + "loss": 0.7321, "step": 2205 }, { - "epoch": 0.1669252014679732, - "grad_norm": 2.5979111194610596, - "learning_rate": 1.733566330061209e-05, - "loss": 0.861, + "epoch": 0.15529743048222458, + "grad_norm": 2.4162209033966064, + "learning_rate": 1.3431953183498257e-05, + "loss": 0.715, "step": 2206 }, { - "epoch": 0.16700087019030685, - "grad_norm": 2.890141248703003, - "learning_rate": 1.7334813796370546e-05, - "loss": 0.8529, + "epoch": 0.15536782822949666, + "grad_norm": 2.024955987930298, + "learning_rate": 1.3431323183403066e-05, + "loss": 0.7963, "step": 2207 }, { - "epoch": 0.16707653891264046, - "grad_norm": 2.580782175064087, - "learning_rate": 1.7333963770176002e-05, - "loss": 0.8297, + "epoch": 0.15543822597676873, + "grad_norm": 2.0557663440704346, + "learning_rate": 1.3430692848937555e-05, + "loss": 0.7879, "step": 2208 }, { - "epoch": 0.16715220763497407, - "grad_norm": 3.2536733150482178, - "learning_rate": 1.7333113222081692e-05, - "loss": 0.723, + "epoch": 0.15550862372404084, + "grad_norm": 3.1832854747772217, + "learning_rate": 1.34300621801345e-05, + "loss": 0.7804, "step": 2209 }, { - "epoch": 0.1672278763573077, - "grad_norm": 2.6812384128570557, - "learning_rate": 1.733226215214088e-05, - "loss": 0.84, + "epoch": 0.15557902147131292, + "grad_norm": 2.731889486312866, + "learning_rate": 1.3429431177026691e-05, + "loss": 0.7807, "step": 2210 }, { - "epoch": 0.16730354507964132, - "grad_norm": 4.129171371459961, - "learning_rate": 1.733141056040686e-05, - "loss": 0.8535, + "epoch": 0.155649419218585, + "grad_norm": 2.2495880126953125, + "learning_rate": 1.342879983964693e-05, + "loss": 0.6807, "step": 2211 }, { - "epoch": 0.16737921380197496, - "grad_norm": 2.4980597496032715, - "learning_rate": 1.7330558446932965e-05, - "loss": 0.8225, + "epoch": 0.1557198169658571, + "grad_norm": 2.3592417240142822, + "learning_rate": 1.3428168168028044e-05, + "loss": 0.7953, "step": 2212 }, { - "epoch": 0.16745488252430857, - "grad_norm": 2.615471839904785, - "learning_rate": 1.7329705811772556e-05, - "loss": 0.7097, + "epoch": 0.15579021471312918, + "grad_norm": 1.8323405981063843, + "learning_rate": 1.342753616220287e-05, + "loss": 0.7125, "step": 2213 }, { - "epoch": 0.1675305512466422, - "grad_norm": 2.604362964630127, - "learning_rate": 1.7328852654979026e-05, - "loss": 0.8121, + "epoch": 0.15586061246040125, + "grad_norm": 1.968479871749878, + "learning_rate": 1.3426903822204274e-05, + "loss": 0.7996, "step": 2214 }, { - "epoch": 0.16760621996897582, - "grad_norm": 2.1432902812957764, - "learning_rate": 1.732799897660581e-05, - "loss": 0.5405, + "epoch": 0.15593101020767336, + "grad_norm": 2.2477004528045654, + "learning_rate": 1.3426271148065126e-05, + "loss": 0.7637, "step": 2215 }, { - "epoch": 0.16768188869130946, - "grad_norm": 2.9844110012054443, - "learning_rate": 1.7327144776706355e-05, - "loss": 0.8734, + "epoch": 0.15600140795494544, + "grad_norm": 1.9478635787963867, + "learning_rate": 1.3425638139818322e-05, + "loss": 0.7581, "step": 2216 }, { - "epoch": 0.16775755741364307, - "grad_norm": 2.605469226837158, - "learning_rate": 1.7326290055334162e-05, - "loss": 0.8101, + "epoch": 0.15607180570221754, + "grad_norm": 2.189944267272949, + "learning_rate": 1.342500479749677e-05, + "loss": 0.6811, "step": 2217 }, { - "epoch": 0.1678332261359767, - "grad_norm": 3.859015464782715, - "learning_rate": 1.7325434812542757e-05, - "loss": 0.7934, + "epoch": 0.15614220344948962, + "grad_norm": 2.154630661010742, + "learning_rate": 1.3424371121133401e-05, + "loss": 0.8533, "step": 2218 }, { - "epoch": 0.16790889485831031, - "grad_norm": 2.154299020767212, - "learning_rate": 1.7324579048385696e-05, - "loss": 0.7312, + "epoch": 0.1562126011967617, + "grad_norm": 1.8565149307250977, + "learning_rate": 1.342373711076116e-05, + "loss": 0.7078, "step": 2219 }, { - "epoch": 0.16798456358064395, - "grad_norm": 3.647308111190796, - "learning_rate": 1.732372276291657e-05, - "loss": 0.7668, + "epoch": 0.1562829989440338, + "grad_norm": 2.362338066101074, + "learning_rate": 1.3423102766413006e-05, + "loss": 0.8049, "step": 2220 }, { - "epoch": 0.16806023230297756, - "grad_norm": 2.0847365856170654, - "learning_rate": 1.7322865956189003e-05, - "loss": 0.7016, + "epoch": 0.15635339669130588, + "grad_norm": 2.048003911972046, + "learning_rate": 1.3422468088121926e-05, + "loss": 0.8115, "step": 2221 }, { - "epoch": 0.16813590102531117, - "grad_norm": 2.722703695297241, - "learning_rate": 1.732200862825665e-05, - "loss": 0.7803, + "epoch": 0.15642379443857796, + "grad_norm": 1.9707751274108887, + "learning_rate": 1.3421833075920911e-05, + "loss": 0.6435, "step": 2222 }, { - "epoch": 0.1682115697476448, - "grad_norm": 2.674581527709961, - "learning_rate": 1.7321150779173197e-05, - "loss": 0.854, + "epoch": 0.15649419218585006, + "grad_norm": 2.0584957599639893, + "learning_rate": 1.3421197729842979e-05, + "loss": 0.7382, "step": 2223 }, { - "epoch": 0.16828723846997842, - "grad_norm": 3.1795260906219482, - "learning_rate": 1.732029240899237e-05, - "loss": 0.7935, + "epoch": 0.15656458993312214, + "grad_norm": 2.646688938140869, + "learning_rate": 1.3420562049921162e-05, + "loss": 0.8087, "step": 2224 }, { - "epoch": 0.16836290719231206, - "grad_norm": 2.396897792816162, - "learning_rate": 1.7319433517767923e-05, - "loss": 0.7769, + "epoch": 0.15663498768039422, + "grad_norm": 2.012413501739502, + "learning_rate": 1.3419926036188506e-05, + "loss": 0.8742, "step": 2225 }, { - "epoch": 0.16843857591464567, - "grad_norm": 2.776615619659424, - "learning_rate": 1.731857410555364e-05, - "loss": 0.6984, + "epoch": 0.15670538542766632, + "grad_norm": 2.937718391418457, + "learning_rate": 1.3419289688678085e-05, + "loss": 0.6868, "step": 2226 }, { - "epoch": 0.1685142446369793, - "grad_norm": 2.690028429031372, - "learning_rate": 1.731771417240334e-05, - "loss": 0.7615, + "epoch": 0.1567757831749384, + "grad_norm": 2.1267971992492676, + "learning_rate": 1.3418653007422977e-05, + "loss": 0.8054, "step": 2227 }, { - "epoch": 0.16858991335931292, - "grad_norm": 2.915459156036377, - "learning_rate": 1.731685371837088e-05, - "loss": 0.9331, + "epoch": 0.15684618092221048, + "grad_norm": 2.001481294631958, + "learning_rate": 1.3418015992456289e-05, + "loss": 0.7436, "step": 2228 }, { - "epoch": 0.16866558208164656, - "grad_norm": 2.515017509460449, - "learning_rate": 1.7315992743510135e-05, - "loss": 0.7996, + "epoch": 0.15691657866948258, + "grad_norm": 2.16641902923584, + "learning_rate": 1.3417378643811137e-05, + "loss": 0.7993, "step": 2229 }, { - "epoch": 0.16874125080398017, - "grad_norm": 2.5439369678497314, - "learning_rate": 1.7315131247875028e-05, - "loss": 0.873, + "epoch": 0.15698697641675466, + "grad_norm": 2.2185988426208496, + "learning_rate": 1.3416740961520656e-05, + "loss": 0.6478, "step": 2230 }, { - "epoch": 0.1688169195263138, - "grad_norm": 3.400592803955078, - "learning_rate": 1.7314269231519512e-05, - "loss": 0.8382, + "epoch": 0.15705737416402676, + "grad_norm": 2.1004598140716553, + "learning_rate": 1.3416102945617998e-05, + "loss": 0.7673, "step": 2231 }, { - "epoch": 0.16889258824864742, - "grad_norm": 3.8131964206695557, - "learning_rate": 1.7313406694497562e-05, - "loss": 0.786, + "epoch": 0.15712777191129884, + "grad_norm": 1.9330203533172607, + "learning_rate": 1.3415464596136342e-05, + "loss": 0.685, "step": 2232 }, { - "epoch": 0.16896825697098106, - "grad_norm": 2.194751501083374, - "learning_rate": 1.7312543636863197e-05, - "loss": 0.7376, + "epoch": 0.15719816965857092, + "grad_norm": 3.6383349895477295, + "learning_rate": 1.3414825913108868e-05, + "loss": 0.7829, "step": 2233 }, { - "epoch": 0.16904392569331467, - "grad_norm": 2.373616933822632, - "learning_rate": 1.731168005867046e-05, - "loss": 0.6248, + "epoch": 0.15726856740584302, + "grad_norm": 2.0717153549194336, + "learning_rate": 1.3414186896568789e-05, + "loss": 0.7173, "step": 2234 }, { - "epoch": 0.1691195944156483, - "grad_norm": 2.5149641036987305, - "learning_rate": 1.731081595997344e-05, - "loss": 0.8259, + "epoch": 0.1573389651531151, + "grad_norm": 2.290285587310791, + "learning_rate": 1.3413547546549322e-05, + "loss": 0.7528, "step": 2235 }, { - "epoch": 0.16919526313798192, - "grad_norm": 2.6134889125823975, - "learning_rate": 1.730995134082624e-05, - "loss": 0.804, + "epoch": 0.15740936290038718, + "grad_norm": 2.3462352752685547, + "learning_rate": 1.3412907863083707e-05, + "loss": 0.7825, "step": 2236 }, { - "epoch": 0.16927093186031553, - "grad_norm": 2.909189462661743, - "learning_rate": 1.730908620128301e-05, - "loss": 0.8914, + "epoch": 0.15747976064765928, + "grad_norm": 2.1603755950927734, + "learning_rate": 1.3412267846205208e-05, + "loss": 0.707, "step": 2237 }, { - "epoch": 0.16934660058264916, - "grad_norm": 2.5435116291046143, - "learning_rate": 1.7308220541397926e-05, - "loss": 0.8368, + "epoch": 0.15755015839493136, + "grad_norm": 1.8994009494781494, + "learning_rate": 1.3411627495947095e-05, + "loss": 0.7562, "step": 2238 }, { - "epoch": 0.16942226930498278, - "grad_norm": 2.7107224464416504, - "learning_rate": 1.7307354361225204e-05, - "loss": 0.9474, + "epoch": 0.15762055614220344, + "grad_norm": 2.3450326919555664, + "learning_rate": 1.3410986812342663e-05, + "loss": 0.7931, "step": 2239 }, { - "epoch": 0.1694979380273164, - "grad_norm": 2.7715609073638916, - "learning_rate": 1.730648766081908e-05, - "loss": 0.7627, + "epoch": 0.15769095388947554, + "grad_norm": 2.3198678493499756, + "learning_rate": 1.341034579542522e-05, + "loss": 0.8035, "step": 2240 }, { - "epoch": 0.16957360674965002, - "grad_norm": 2.654773473739624, - "learning_rate": 1.730562044023383e-05, - "loss": 0.7729, + "epoch": 0.15776135163674762, + "grad_norm": 1.978468656539917, + "learning_rate": 1.340970444522809e-05, + "loss": 0.7396, "step": 2241 }, { - "epoch": 0.16964927547198366, - "grad_norm": 2.386650800704956, - "learning_rate": 1.730475269952377e-05, - "loss": 0.6608, + "epoch": 0.1578317493840197, + "grad_norm": 2.1059083938598633, + "learning_rate": 1.3409062761784625e-05, + "loss": 0.703, "step": 2242 }, { - "epoch": 0.16972494419431727, - "grad_norm": 2.307753086090088, - "learning_rate": 1.730388443874323e-05, - "loss": 0.7689, + "epoch": 0.1579021471312918, + "grad_norm": 2.486313581466675, + "learning_rate": 1.3408420745128184e-05, + "loss": 0.6329, "step": 2243 }, { - "epoch": 0.1698006129166509, - "grad_norm": 2.196772336959839, - "learning_rate": 1.7303015657946592e-05, - "loss": 0.7859, + "epoch": 0.15797254487856388, + "grad_norm": 2.99828839302063, + "learning_rate": 1.3407778395292139e-05, + "loss": 0.7217, "step": 2244 }, { - "epoch": 0.16987628163898452, - "grad_norm": 2.593203544616699, - "learning_rate": 1.730214635718826e-05, - "loss": 0.7733, + "epoch": 0.15804294262583599, + "grad_norm": 2.048733949661255, + "learning_rate": 1.3407135712309897e-05, + "loss": 0.7864, "step": 2245 }, { - "epoch": 0.16995195036131816, - "grad_norm": 2.494314193725586, - "learning_rate": 1.7301276536522664e-05, - "loss": 0.7156, + "epoch": 0.15811334037310806, + "grad_norm": 2.0664103031158447, + "learning_rate": 1.3406492696214864e-05, + "loss": 0.8935, "step": 2246 }, { - "epoch": 0.17002761908365177, - "grad_norm": 2.721299648284912, - "learning_rate": 1.7300406196004286e-05, - "loss": 0.738, + "epoch": 0.15818373812038014, + "grad_norm": 2.1033244132995605, + "learning_rate": 1.3405849347040475e-05, + "loss": 0.7721, "step": 2247 }, { - "epoch": 0.1701032878059854, - "grad_norm": 2.1940648555755615, - "learning_rate": 1.7299535335687622e-05, - "loss": 0.7942, + "epoch": 0.15825413586765225, + "grad_norm": 1.955871820449829, + "learning_rate": 1.3405205664820177e-05, + "loss": 0.739, "step": 2248 }, { - "epoch": 0.17017895652831902, - "grad_norm": 2.7148277759552, - "learning_rate": 1.7298663955627216e-05, - "loss": 0.8078, + "epoch": 0.15832453361492432, + "grad_norm": 5.485831260681152, + "learning_rate": 1.3404561649587433e-05, + "loss": 0.6948, "step": 2249 }, { - "epoch": 0.17025462525065263, - "grad_norm": 3.0322153568267822, - "learning_rate": 1.729779205587763e-05, - "loss": 0.9257, + "epoch": 0.1583949313621964, + "grad_norm": 2.0183870792388916, + "learning_rate": 1.340391730137573e-05, + "loss": 0.7003, "step": 2250 }, { - "epoch": 0.17033029397298627, - "grad_norm": 2.168626546859741, - "learning_rate": 1.7296919636493464e-05, - "loss": 0.8326, + "epoch": 0.1584653291094685, + "grad_norm": 1.9296573400497437, + "learning_rate": 1.3403272620218569e-05, + "loss": 0.6879, "step": 2251 }, { - "epoch": 0.17040596269531988, - "grad_norm": 2.8294248580932617, - "learning_rate": 1.729604669752936e-05, - "loss": 0.8557, + "epoch": 0.15853572685674058, + "grad_norm": 2.3384599685668945, + "learning_rate": 1.3402627606149464e-05, + "loss": 0.8641, "step": 2252 }, { - "epoch": 0.17048163141765352, - "grad_norm": 2.4835293292999268, - "learning_rate": 1.7295173239039975e-05, - "loss": 0.7724, + "epoch": 0.15860612460401266, + "grad_norm": 2.263939380645752, + "learning_rate": 1.3401982259201952e-05, + "loss": 0.7736, "step": 2253 }, { - "epoch": 0.17055730013998713, - "grad_norm": 3.3324198722839355, - "learning_rate": 1.7294299261080015e-05, - "loss": 0.7424, + "epoch": 0.15867652235128477, + "grad_norm": 1.8373514413833618, + "learning_rate": 1.340133657940958e-05, + "loss": 0.7518, "step": 2254 }, { - "epoch": 0.17063296886232077, - "grad_norm": 2.104118824005127, - "learning_rate": 1.7293424763704206e-05, - "loss": 0.7898, + "epoch": 0.15874692009855684, + "grad_norm": 3.5653927326202393, + "learning_rate": 1.3400690566805927e-05, + "loss": 0.7721, "step": 2255 }, { - "epoch": 0.17070863758465438, - "grad_norm": 2.7089343070983887, - "learning_rate": 1.7292549746967316e-05, - "loss": 0.9443, + "epoch": 0.15881731784582892, + "grad_norm": 2.4890785217285156, + "learning_rate": 1.3400044221424574e-05, + "loss": 0.7006, "step": 2256 }, { - "epoch": 0.17078430630698802, - "grad_norm": 3.223379611968994, - "learning_rate": 1.7291674210924138e-05, - "loss": 0.7764, + "epoch": 0.15888771559310103, + "grad_norm": 2.1866347789764404, + "learning_rate": 1.3399397543299124e-05, + "loss": 0.6805, "step": 2257 }, { - "epoch": 0.17085997502932163, - "grad_norm": 2.709465980529785, - "learning_rate": 1.7290798155629502e-05, - "loss": 0.7964, + "epoch": 0.1589581133403731, + "grad_norm": 2.399622917175293, + "learning_rate": 1.33987505324632e-05, + "loss": 0.6942, "step": 2258 }, { - "epoch": 0.17093564375165526, - "grad_norm": 2.5600428581237793, - "learning_rate": 1.7289921581138273e-05, - "loss": 0.7413, + "epoch": 0.1590285110876452, + "grad_norm": 2.1519815921783447, + "learning_rate": 1.3398103188950441e-05, + "loss": 0.7403, "step": 2259 }, { - "epoch": 0.17101131247398887, - "grad_norm": 1.963610053062439, - "learning_rate": 1.7289044487505337e-05, - "loss": 0.6844, + "epoch": 0.15909890883491729, + "grad_norm": 2.3854563236236572, + "learning_rate": 1.3397455512794502e-05, + "loss": 0.735, "step": 2260 }, { - "epoch": 0.1710869811963225, - "grad_norm": 2.8103370666503906, - "learning_rate": 1.728816687478563e-05, - "loss": 0.6223, + "epoch": 0.15916930658218936, + "grad_norm": 2.7952616214752197, + "learning_rate": 1.339680750402906e-05, + "loss": 0.7499, "step": 2261 }, { - "epoch": 0.17116264991865612, - "grad_norm": 2.9981131553649902, - "learning_rate": 1.7287288743034103e-05, - "loss": 0.7519, + "epoch": 0.15923970432946147, + "grad_norm": 2.271498203277588, + "learning_rate": 1.3396159162687799e-05, + "loss": 0.8105, "step": 2262 }, { - "epoch": 0.17123831864098973, - "grad_norm": 2.655627965927124, - "learning_rate": 1.728641009230575e-05, - "loss": 0.7625, + "epoch": 0.15931010207673355, + "grad_norm": 2.264178991317749, + "learning_rate": 1.3395510488804431e-05, + "loss": 0.7843, "step": 2263 }, { - "epoch": 0.17131398736332337, - "grad_norm": 2.775040626525879, - "learning_rate": 1.72855309226556e-05, - "loss": 0.7466, + "epoch": 0.15938049982400562, + "grad_norm": 3.0443201065063477, + "learning_rate": 1.3394861482412683e-05, + "loss": 0.6566, "step": 2264 }, { - "epoch": 0.17138965608565698, - "grad_norm": 3.4195356369018555, - "learning_rate": 1.72846512341387e-05, - "loss": 0.6637, + "epoch": 0.15945089757127773, + "grad_norm": 2.3530914783477783, + "learning_rate": 1.3394212143546293e-05, + "loss": 0.6824, "step": 2265 }, { - "epoch": 0.17146532480799062, - "grad_norm": 3.398912191390991, - "learning_rate": 1.7283771026810144e-05, - "loss": 0.7456, + "epoch": 0.1595212953185498, + "grad_norm": 2.1352293491363525, + "learning_rate": 1.3393562472239024e-05, + "loss": 0.8303, "step": 2266 }, { - "epoch": 0.17154099353032423, - "grad_norm": 2.160043478012085, - "learning_rate": 1.7282890300725054e-05, - "loss": 0.6909, + "epoch": 0.15959169306582188, + "grad_norm": 2.3257193565368652, + "learning_rate": 1.3392912468524651e-05, + "loss": 0.7825, "step": 2267 }, { - "epoch": 0.17161666225265787, - "grad_norm": 1.9356389045715332, - "learning_rate": 1.7282009055938587e-05, - "loss": 0.7153, + "epoch": 0.159662090813094, + "grad_norm": 1.9754266738891602, + "learning_rate": 1.339226213243697e-05, + "loss": 0.6425, "step": 2268 }, { - "epoch": 0.17169233097499148, - "grad_norm": 3.335268259048462, - "learning_rate": 1.728112729250592e-05, - "loss": 0.6971, + "epoch": 0.15973248856036606, + "grad_norm": 1.8860704898834229, + "learning_rate": 1.339161146400979e-05, + "loss": 0.8461, "step": 2269 }, { - "epoch": 0.17176799969732512, - "grad_norm": 2.4439728260040283, - "learning_rate": 1.728024501048228e-05, - "loss": 0.9026, + "epoch": 0.15980288630763814, + "grad_norm": 2.1612155437469482, + "learning_rate": 1.3390960463276942e-05, + "loss": 0.8282, "step": 2270 }, { - "epoch": 0.17184366841965873, - "grad_norm": 3.08240008354187, - "learning_rate": 1.7279362209922922e-05, - "loss": 0.7948, + "epoch": 0.15987328405491025, + "grad_norm": 2.236626625061035, + "learning_rate": 1.339030913027227e-05, + "loss": 0.7745, "step": 2271 }, { - "epoch": 0.17191933714199237, - "grad_norm": 2.764417886734009, - "learning_rate": 1.7278478890883115e-05, - "loss": 0.8714, + "epoch": 0.15994368180218232, + "grad_norm": 2.0775482654571533, + "learning_rate": 1.3389657465029642e-05, + "loss": 0.6815, "step": 2272 }, { - "epoch": 0.17199500586432598, - "grad_norm": 2.609437942504883, - "learning_rate": 1.727759505341819e-05, - "loss": 0.7332, + "epoch": 0.16001407954945443, + "grad_norm": 2.5543296337127686, + "learning_rate": 1.3389005467582935e-05, + "loss": 0.8643, "step": 2273 }, { - "epoch": 0.17207067458665962, - "grad_norm": 2.7951455116271973, - "learning_rate": 1.7276710697583485e-05, - "loss": 0.7675, + "epoch": 0.1600844772967265, + "grad_norm": 2.5123369693756104, + "learning_rate": 1.3388353137966045e-05, + "loss": 0.8307, "step": 2274 }, { - "epoch": 0.17214634330899323, - "grad_norm": 2.2547028064727783, - "learning_rate": 1.7275825823434386e-05, - "loss": 0.7684, + "epoch": 0.16015487504399858, + "grad_norm": 2.1633782386779785, + "learning_rate": 1.3387700476212891e-05, + "loss": 0.7254, "step": 2275 }, { - "epoch": 0.17222201203132684, - "grad_norm": 2.3958706855773926, - "learning_rate": 1.727494043102631e-05, - "loss": 0.7689, + "epoch": 0.1602252727912707, + "grad_norm": 2.167743444442749, + "learning_rate": 1.3387047482357404e-05, + "loss": 0.8127, "step": 2276 }, { - "epoch": 0.17229768075366048, - "grad_norm": 2.6258463859558105, - "learning_rate": 1.7274054520414697e-05, - "loss": 0.6702, + "epoch": 0.16029567053854277, + "grad_norm": 2.1557769775390625, + "learning_rate": 1.3386394156433536e-05, + "loss": 0.7612, "step": 2277 }, { - "epoch": 0.1723733494759941, - "grad_norm": 2.128289222717285, - "learning_rate": 1.7273168091655028e-05, - "loss": 0.8102, + "epoch": 0.16036606828581484, + "grad_norm": 2.0518486499786377, + "learning_rate": 1.338574049847525e-05, + "loss": 0.7088, "step": 2278 }, { - "epoch": 0.17244901819832772, - "grad_norm": 2.5921168327331543, - "learning_rate": 1.727228114480282e-05, - "loss": 0.7875, + "epoch": 0.16043646603308695, + "grad_norm": 2.3461802005767822, + "learning_rate": 1.3385086508516531e-05, + "loss": 0.8245, "step": 2279 }, { - "epoch": 0.17252468692066134, - "grad_norm": 2.994091033935547, - "learning_rate": 1.7271393679913604e-05, - "loss": 0.8095, + "epoch": 0.16050686378035903, + "grad_norm": 2.3803043365478516, + "learning_rate": 1.3384432186591385e-05, + "loss": 0.7421, "step": 2280 }, { - "epoch": 0.17260035564299497, - "grad_norm": 2.6522233486175537, - "learning_rate": 1.7270505697042966e-05, - "loss": 0.8525, + "epoch": 0.1605772615276311, + "grad_norm": 1.9778993129730225, + "learning_rate": 1.3383777532733826e-05, + "loss": 0.8539, "step": 2281 }, { - "epoch": 0.17267602436532858, - "grad_norm": 1.99582040309906, - "learning_rate": 1.7269617196246514e-05, - "loss": 0.7875, + "epoch": 0.1606476592749032, + "grad_norm": 2.046851873397827, + "learning_rate": 1.3383122546977891e-05, + "loss": 0.6805, "step": 2282 }, { - "epoch": 0.17275169308766222, - "grad_norm": 2.60740065574646, - "learning_rate": 1.726872817757988e-05, - "loss": 0.8859, + "epoch": 0.1607180570221753, + "grad_norm": 2.5133917331695557, + "learning_rate": 1.3382467229357634e-05, + "loss": 0.6588, "step": 2283 }, { - "epoch": 0.17282736180999583, - "grad_norm": 1.9426453113555908, - "learning_rate": 1.7267838641098748e-05, - "loss": 0.9347, + "epoch": 0.16078845476944736, + "grad_norm": 2.1003623008728027, + "learning_rate": 1.3381811579907128e-05, + "loss": 0.6426, "step": 2284 }, { - "epoch": 0.17290303053232947, - "grad_norm": 2.676067352294922, - "learning_rate": 1.7266948586858816e-05, - "loss": 0.674, + "epoch": 0.16085885251671947, + "grad_norm": 2.269505500793457, + "learning_rate": 1.3381155598660454e-05, + "loss": 0.7446, "step": 2285 }, { - "epoch": 0.17297869925466308, - "grad_norm": 2.255591869354248, - "learning_rate": 1.7266058014915826e-05, - "loss": 0.7917, + "epoch": 0.16092925026399155, + "grad_norm": 2.2276012897491455, + "learning_rate": 1.3380499285651723e-05, + "loss": 0.6928, "step": 2286 }, { - "epoch": 0.17305436797699672, - "grad_norm": 2.7783968448638916, - "learning_rate": 1.7265166925325547e-05, - "loss": 0.8044, + "epoch": 0.16099964801126365, + "grad_norm": 2.2092926502227783, + "learning_rate": 1.3379842640915057e-05, + "loss": 0.7758, "step": 2287 }, { - "epoch": 0.17313003669933033, - "grad_norm": 2.7550559043884277, - "learning_rate": 1.7264275318143784e-05, - "loss": 0.6446, + "epoch": 0.16107004575853573, + "grad_norm": 2.1171512603759766, + "learning_rate": 1.3379185664484592e-05, + "loss": 0.7819, "step": 2288 }, { - "epoch": 0.17320570542166394, - "grad_norm": 3.500746488571167, - "learning_rate": 1.726338319342637e-05, - "loss": 0.8033, + "epoch": 0.1611404435058078, + "grad_norm": 2.269700288772583, + "learning_rate": 1.3378528356394488e-05, + "loss": 0.7713, "step": 2289 }, { - "epoch": 0.17328137414399758, - "grad_norm": 2.1858432292938232, - "learning_rate": 1.7262490551229173e-05, - "loss": 0.647, + "epoch": 0.1612108412530799, + "grad_norm": 2.1459641456604004, + "learning_rate": 1.3377870716678918e-05, + "loss": 0.7447, "step": 2290 }, { - "epoch": 0.1733570428663312, - "grad_norm": 2.3065178394317627, - "learning_rate": 1.726159739160809e-05, - "loss": 0.9231, + "epoch": 0.161281239000352, + "grad_norm": 1.8733084201812744, + "learning_rate": 1.3377212745372074e-05, + "loss": 0.7936, "step": 2291 }, { - "epoch": 0.17343271158866483, - "grad_norm": 2.7201311588287354, - "learning_rate": 1.7260703714619062e-05, - "loss": 0.8506, + "epoch": 0.16135163674762407, + "grad_norm": 2.2887446880340576, + "learning_rate": 1.3376554442508163e-05, + "loss": 0.8308, "step": 2292 }, { - "epoch": 0.17350838031099844, - "grad_norm": 2.0879547595977783, - "learning_rate": 1.725980952031805e-05, - "loss": 0.8429, + "epoch": 0.16142203449489617, + "grad_norm": 2.080447196960449, + "learning_rate": 1.3375895808121412e-05, + "loss": 0.7351, "step": 2293 }, { - "epoch": 0.17358404903333208, - "grad_norm": 6.11818265914917, - "learning_rate": 1.7258914808761048e-05, - "loss": 0.6266, + "epoch": 0.16149243224216825, + "grad_norm": 2.197416305541992, + "learning_rate": 1.3375236842246063e-05, + "loss": 0.7453, "step": 2294 }, { - "epoch": 0.1736597177556657, - "grad_norm": 2.2385120391845703, - "learning_rate": 1.7258019580004084e-05, - "loss": 0.8526, + "epoch": 0.16156282998944033, + "grad_norm": 2.067242383956909, + "learning_rate": 1.3374577544916378e-05, + "loss": 0.7766, "step": 2295 }, { - "epoch": 0.17373538647799933, - "grad_norm": 3.8654024600982666, - "learning_rate": 1.725712383410323e-05, - "loss": 0.7531, + "epoch": 0.16163322773671243, + "grad_norm": 2.0386738777160645, + "learning_rate": 1.3373917916166632e-05, + "loss": 0.6798, "step": 2296 }, { - "epoch": 0.17381105520033294, - "grad_norm": 2.5260634422302246, - "learning_rate": 1.7256227571114577e-05, - "loss": 0.7502, + "epoch": 0.1617036254839845, + "grad_norm": 1.988314151763916, + "learning_rate": 1.3373257956031122e-05, + "loss": 0.8025, "step": 2297 }, { - "epoch": 0.17388672392266658, - "grad_norm": 2.305957794189453, - "learning_rate": 1.7255330791094244e-05, - "loss": 0.7199, + "epoch": 0.1617740232312566, + "grad_norm": 2.455324649810791, + "learning_rate": 1.3372597664544159e-05, + "loss": 0.8314, "step": 2298 }, { - "epoch": 0.17396239264500019, - "grad_norm": 2.708401918411255, - "learning_rate": 1.7254433494098393e-05, - "loss": 0.762, + "epoch": 0.1618444209785287, + "grad_norm": 2.0391433238983154, + "learning_rate": 1.337193704174007e-05, + "loss": 0.841, "step": 2299 }, { - "epoch": 0.17403806136733382, - "grad_norm": 3.0765719413757324, - "learning_rate": 1.7253535680183228e-05, - "loss": 0.6405, + "epoch": 0.16191481872580077, + "grad_norm": 1.9973831176757812, + "learning_rate": 1.3371276087653205e-05, + "loss": 0.7076, "step": 2300 }, { - "epoch": 0.17411373008966743, - "grad_norm": 2.6883769035339355, - "learning_rate": 1.7252637349404956e-05, - "loss": 0.7477, + "epoch": 0.16198521647307287, + "grad_norm": 2.609011650085449, + "learning_rate": 1.3370614802317926e-05, + "loss": 0.8251, "step": 2301 }, { - "epoch": 0.17418939881200107, - "grad_norm": 5.647037982940674, - "learning_rate": 1.725173850181984e-05, - "loss": 0.833, + "epoch": 0.16205561422034495, + "grad_norm": 2.0627872943878174, + "learning_rate": 1.3369953185768614e-05, + "loss": 0.7732, "step": 2302 }, { - "epoch": 0.17426506753433468, - "grad_norm": 2.7593023777008057, - "learning_rate": 1.725083913748418e-05, - "loss": 0.8364, + "epoch": 0.16212601196761703, + "grad_norm": 2.197601318359375, + "learning_rate": 1.3369291238039666e-05, + "loss": 0.7507, "step": 2303 }, { - "epoch": 0.1743407362566683, - "grad_norm": 2.6544225215911865, - "learning_rate": 1.7249939256454277e-05, - "loss": 0.7959, + "epoch": 0.16219640971488913, + "grad_norm": 2.254216432571411, + "learning_rate": 1.33686289591655e-05, + "loss": 0.7857, "step": 2304 }, { - "epoch": 0.17441640497900193, - "grad_norm": 2.8478567600250244, - "learning_rate": 1.7249038858786496e-05, - "loss": 0.8185, + "epoch": 0.1622668074621612, + "grad_norm": 2.0232017040252686, + "learning_rate": 1.3367966349180545e-05, + "loss": 0.7182, "step": 2305 }, { - "epoch": 0.17449207370133554, - "grad_norm": 2.937596082687378, - "learning_rate": 1.7248137944537224e-05, - "loss": 0.7666, + "epoch": 0.1623372052094333, + "grad_norm": 2.4330008029937744, + "learning_rate": 1.3367303408119252e-05, + "loss": 0.6904, "step": 2306 }, { - "epoch": 0.17456774242366918, - "grad_norm": 2.3636603355407715, - "learning_rate": 1.7247236513762876e-05, - "loss": 0.8019, + "epoch": 0.1624076029567054, + "grad_norm": 2.12690806388855, + "learning_rate": 1.336664013601609e-05, + "loss": 0.8127, "step": 2307 }, { - "epoch": 0.1746434111460028, - "grad_norm": 2.4322621822357178, - "learning_rate": 1.72463345665199e-05, - "loss": 0.7484, + "epoch": 0.16247800070397747, + "grad_norm": 2.2876992225646973, + "learning_rate": 1.336597653290554e-05, + "loss": 0.7146, "step": 2308 }, { - "epoch": 0.17471907986833643, - "grad_norm": 2.590067148208618, - "learning_rate": 1.7245432102864782e-05, - "loss": 0.6762, + "epoch": 0.16254839845124955, + "grad_norm": 2.1194303035736084, + "learning_rate": 1.3365312598822104e-05, + "loss": 0.7191, "step": 2309 }, { - "epoch": 0.17479474859067004, - "grad_norm": 2.2933037281036377, - "learning_rate": 1.7244529122854035e-05, - "loss": 0.7488, + "epoch": 0.16261879619852165, + "grad_norm": 2.3384408950805664, + "learning_rate": 1.3364648333800304e-05, + "loss": 0.6689, "step": 2310 }, { - "epoch": 0.17487041731300368, - "grad_norm": 2.3996517658233643, - "learning_rate": 1.724362562654421e-05, - "loss": 0.7258, + "epoch": 0.16268919394579373, + "grad_norm": 2.0537619590759277, + "learning_rate": 1.3363983737874669e-05, + "loss": 0.7001, "step": 2311 }, { - "epoch": 0.1749460860353373, - "grad_norm": 2.4362945556640625, - "learning_rate": 1.7242721613991887e-05, - "loss": 0.743, + "epoch": 0.1627595916930658, + "grad_norm": 2.058896541595459, + "learning_rate": 1.336331881107976e-05, + "loss": 0.7524, "step": 2312 }, { - "epoch": 0.17502175475767093, - "grad_norm": 2.572498321533203, - "learning_rate": 1.7241817085253678e-05, - "loss": 0.7258, + "epoch": 0.1628299894403379, + "grad_norm": 2.3189120292663574, + "learning_rate": 1.336265355345014e-05, + "loss": 0.7763, "step": 2313 }, { - "epoch": 0.17509742348000454, - "grad_norm": 2.252002477645874, - "learning_rate": 1.724091204038622e-05, - "loss": 0.752, + "epoch": 0.16290038718761, + "grad_norm": 2.5794613361358643, + "learning_rate": 1.3361987965020401e-05, + "loss": 0.8221, "step": 2314 }, { - "epoch": 0.17517309220233818, - "grad_norm": 2.6183085441589355, - "learning_rate": 1.7240006479446202e-05, - "loss": 0.5796, + "epoch": 0.1629707849348821, + "grad_norm": 2.224811315536499, + "learning_rate": 1.3361322045825145e-05, + "loss": 0.6423, "step": 2315 }, { - "epoch": 0.1752487609246718, - "grad_norm": 3.0919864177703857, - "learning_rate": 1.723910040249032e-05, - "loss": 0.841, + "epoch": 0.16304118268215417, + "grad_norm": 2.902578353881836, + "learning_rate": 1.3360655795898997e-05, + "loss": 0.7404, "step": 2316 }, { - "epoch": 0.1753244296470054, - "grad_norm": 2.5486013889312744, - "learning_rate": 1.7238193809575325e-05, - "loss": 0.8376, + "epoch": 0.16311158042942625, + "grad_norm": 2.059739351272583, + "learning_rate": 1.3359989215276592e-05, + "loss": 0.7916, "step": 2317 }, { - "epoch": 0.17540009836933904, - "grad_norm": 2.988577127456665, - "learning_rate": 1.723728670075799e-05, - "loss": 0.8065, + "epoch": 0.16318197817669836, + "grad_norm": 2.0754635334014893, + "learning_rate": 1.3359322303992587e-05, + "loss": 0.8201, "step": 2318 }, { - "epoch": 0.17547576709167265, - "grad_norm": 2.734192371368408, - "learning_rate": 1.7236379076095118e-05, - "loss": 0.7786, + "epoch": 0.16325237592397043, + "grad_norm": 2.681429386138916, + "learning_rate": 1.3358655062081655e-05, + "loss": 0.8236, "step": 2319 }, { - "epoch": 0.17555143581400628, - "grad_norm": 2.8744254112243652, - "learning_rate": 1.723547093564355e-05, - "loss": 0.8307, + "epoch": 0.1633227736712425, + "grad_norm": 2.0360195636749268, + "learning_rate": 1.3357987489578492e-05, + "loss": 0.7576, "step": 2320 }, { - "epoch": 0.1756271045363399, - "grad_norm": 2.407675266265869, - "learning_rate": 1.7234562279460156e-05, - "loss": 0.7314, + "epoch": 0.16339317141851462, + "grad_norm": 1.8847503662109375, + "learning_rate": 1.3357319586517797e-05, + "loss": 0.8359, "step": 2321 }, { - "epoch": 0.17570277325867353, - "grad_norm": 2.588304281234741, - "learning_rate": 1.7233653107601833e-05, - "loss": 0.7172, + "epoch": 0.1634635691657867, + "grad_norm": 1.9458824396133423, + "learning_rate": 1.33566513529343e-05, + "loss": 0.8049, "step": 2322 }, { - "epoch": 0.17577844198100714, - "grad_norm": 2.3886618614196777, - "learning_rate": 1.7232743420125526e-05, - "loss": 0.9478, + "epoch": 0.16353396691305877, + "grad_norm": 2.267979383468628, + "learning_rate": 1.3355982788862744e-05, + "loss": 0.6165, "step": 2323 }, { - "epoch": 0.17585411070334078, - "grad_norm": 3.020280599594116, - "learning_rate": 1.7231833217088195e-05, - "loss": 0.8497, + "epoch": 0.16360436466033088, + "grad_norm": 1.9858193397521973, + "learning_rate": 1.3355313894337885e-05, + "loss": 0.8363, "step": 2324 }, { - "epoch": 0.1759297794256744, - "grad_norm": 2.5860559940338135, - "learning_rate": 1.7230922498546847e-05, - "loss": 0.8636, + "epoch": 0.16367476240760295, + "grad_norm": 1.9259897470474243, + "learning_rate": 1.3354644669394502e-05, + "loss": 0.8054, "step": 2325 }, { - "epoch": 0.17600544814800803, - "grad_norm": 2.4977405071258545, - "learning_rate": 1.7230011264558506e-05, - "loss": 0.8239, + "epoch": 0.16374516015487503, + "grad_norm": 2.0841140747070312, + "learning_rate": 1.3353975114067387e-05, + "loss": 0.7083, "step": 2326 }, { - "epoch": 0.17608111687034164, - "grad_norm": 2.03897762298584, - "learning_rate": 1.7229099515180243e-05, - "loss": 0.6944, + "epoch": 0.16381555790214714, + "grad_norm": 1.9425593614578247, + "learning_rate": 1.335330522839135e-05, + "loss": 0.7354, "step": 2327 }, { - "epoch": 0.17615678559267528, - "grad_norm": 3.2319719791412354, - "learning_rate": 1.7228187250469154e-05, - "loss": 0.9229, + "epoch": 0.1638859556494192, + "grad_norm": 1.9074454307556152, + "learning_rate": 1.3352635012401224e-05, + "loss": 0.7817, "step": 2328 }, { - "epoch": 0.1762324543150089, - "grad_norm": 2.1355583667755127, - "learning_rate": 1.7227274470482363e-05, - "loss": 0.9233, + "epoch": 0.16395635339669132, + "grad_norm": 1.9436166286468506, + "learning_rate": 1.3351964466131845e-05, + "loss": 0.7134, "step": 2329 }, { - "epoch": 0.1763081230373425, - "grad_norm": 2.936140298843384, - "learning_rate": 1.7226361175277034e-05, - "loss": 0.6154, + "epoch": 0.1640267511439634, + "grad_norm": 2.320754289627075, + "learning_rate": 1.3351293589618086e-05, + "loss": 0.7886, "step": 2330 }, { - "epoch": 0.17638379175967614, - "grad_norm": 2.1924805641174316, - "learning_rate": 1.7225447364910364e-05, - "loss": 0.9198, + "epoch": 0.16409714889123547, + "grad_norm": 2.362657308578491, + "learning_rate": 1.3350622382894817e-05, + "loss": 0.7941, "step": 2331 }, { - "epoch": 0.17645946048200975, - "grad_norm": 2.7870981693267822, - "learning_rate": 1.7224533039439573e-05, - "loss": 0.7164, + "epoch": 0.16416754663850758, + "grad_norm": 2.320167064666748, + "learning_rate": 1.3349950845996942e-05, + "loss": 0.7494, "step": 2332 }, { - "epoch": 0.1765351292043434, - "grad_norm": 2.4783294200897217, - "learning_rate": 1.722361819892192e-05, - "loss": 0.6998, + "epoch": 0.16423794438577966, + "grad_norm": 2.1466825008392334, + "learning_rate": 1.334927897895937e-05, + "loss": 0.8336, "step": 2333 }, { - "epoch": 0.176610797926677, - "grad_norm": 2.5335793495178223, - "learning_rate": 1.7222702843414703e-05, - "loss": 0.7745, + "epoch": 0.16430834213305173, + "grad_norm": 2.160548448562622, + "learning_rate": 1.3348606781817034e-05, + "loss": 0.7543, "step": 2334 }, { - "epoch": 0.17668646664901064, - "grad_norm": 2.6729066371917725, - "learning_rate": 1.7221786972975234e-05, - "loss": 0.8394, + "epoch": 0.16437873988032384, + "grad_norm": 2.634936571121216, + "learning_rate": 1.3347934254604881e-05, + "loss": 0.6157, "step": 2335 }, { - "epoch": 0.17676213537134425, - "grad_norm": 2.4603934288024902, - "learning_rate": 1.7220870587660872e-05, - "loss": 0.8235, + "epoch": 0.16444913762759591, + "grad_norm": 2.275090456008911, + "learning_rate": 1.3347261397357877e-05, + "loss": 0.6974, "step": 2336 }, { - "epoch": 0.17683780409367789, - "grad_norm": 2.5660200119018555, - "learning_rate": 1.7219953687529006e-05, - "loss": 0.8368, + "epoch": 0.164519535374868, + "grad_norm": 2.03910756111145, + "learning_rate": 1.3346588210111006e-05, + "loss": 0.7872, "step": 2337 }, { - "epoch": 0.1769134728160115, - "grad_norm": 2.5912487506866455, - "learning_rate": 1.7219036272637054e-05, - "loss": 0.7614, + "epoch": 0.1645899331221401, + "grad_norm": 2.0447120666503906, + "learning_rate": 1.3345914692899267e-05, + "loss": 0.7645, "step": 2338 }, { - "epoch": 0.17698914153834513, - "grad_norm": 2.4109630584716797, - "learning_rate": 1.7218118343042468e-05, - "loss": 0.693, + "epoch": 0.16466033086941217, + "grad_norm": 1.7745771408081055, + "learning_rate": 1.3345240845757673e-05, + "loss": 0.7502, "step": 2339 }, { - "epoch": 0.17706481026067875, - "grad_norm": 2.0487425327301025, - "learning_rate": 1.7217199898802726e-05, - "loss": 0.9291, + "epoch": 0.16473072861668428, + "grad_norm": 1.9476163387298584, + "learning_rate": 1.3344566668721263e-05, + "loss": 0.8383, "step": 2340 }, { - "epoch": 0.17714047898301238, - "grad_norm": 2.833705425262451, - "learning_rate": 1.721628093997535e-05, - "loss": 0.8157, + "epoch": 0.16480112636395636, + "grad_norm": 4.614167213439941, + "learning_rate": 1.3343892161825085e-05, + "loss": 0.8415, "step": 2341 }, { - "epoch": 0.177216147705346, - "grad_norm": 2.298569679260254, - "learning_rate": 1.7215361466617892e-05, - "loss": 0.8041, + "epoch": 0.16487152411122843, + "grad_norm": 2.2071049213409424, + "learning_rate": 1.3343217325104208e-05, + "loss": 0.7134, "step": 2342 }, { - "epoch": 0.1772918164276796, - "grad_norm": 2.4448776245117188, - "learning_rate": 1.7214441478787923e-05, - "loss": 0.8001, + "epoch": 0.16494192185850054, + "grad_norm": 2.145023822784424, + "learning_rate": 1.3342542158593719e-05, + "loss": 0.7967, "step": 2343 }, { - "epoch": 0.17736748515001324, - "grad_norm": 2.5493338108062744, - "learning_rate": 1.7213520976543057e-05, - "loss": 0.9375, + "epoch": 0.16501231960577262, + "grad_norm": 2.7442305088043213, + "learning_rate": 1.3341866662328718e-05, + "loss": 0.7354, "step": 2344 }, { - "epoch": 0.17744315387234685, - "grad_norm": 2.8180043697357178, - "learning_rate": 1.7212599959940947e-05, - "loss": 0.8333, + "epoch": 0.1650827173530447, + "grad_norm": 2.2328155040740967, + "learning_rate": 1.3341190836344324e-05, + "loss": 0.7182, "step": 2345 }, { - "epoch": 0.1775188225946805, - "grad_norm": 2.575085163116455, - "learning_rate": 1.7211678429039264e-05, - "loss": 0.7597, + "epoch": 0.1651531151003168, + "grad_norm": 2.5931296348571777, + "learning_rate": 1.3340514680675678e-05, + "loss": 0.7595, "step": 2346 }, { - "epoch": 0.1775944913170141, - "grad_norm": 2.7505435943603516, - "learning_rate": 1.721075638389572e-05, - "loss": 0.7751, + "epoch": 0.16522351284758888, + "grad_norm": 2.1896555423736572, + "learning_rate": 1.333983819535793e-05, + "loss": 0.7745, "step": 2347 }, { - "epoch": 0.17767016003934774, - "grad_norm": 2.285794973373413, - "learning_rate": 1.7209833824568047e-05, - "loss": 0.7253, + "epoch": 0.16529391059486095, + "grad_norm": 2.5776174068450928, + "learning_rate": 1.3339161380426253e-05, + "loss": 0.7415, "step": 2348 }, { - "epoch": 0.17774582876168135, - "grad_norm": 3.101331949234009, - "learning_rate": 1.7208910751114033e-05, - "loss": 0.7653, + "epoch": 0.16536430834213306, + "grad_norm": 2.020125150680542, + "learning_rate": 1.3338484235915836e-05, + "loss": 0.8, "step": 2349 }, { - "epoch": 0.177821497484015, - "grad_norm": 2.0107734203338623, - "learning_rate": 1.7207987163591474e-05, - "loss": 0.7867, + "epoch": 0.16543470608940514, + "grad_norm": 2.0102787017822266, + "learning_rate": 1.3337806761861881e-05, + "loss": 0.704, "step": 2350 }, { - "epoch": 0.1778971662063486, - "grad_norm": 2.603510618209839, - "learning_rate": 1.720706306205821e-05, - "loss": 0.8109, + "epoch": 0.16550510383667721, + "grad_norm": 2.491095542907715, + "learning_rate": 1.3337128958299613e-05, + "loss": 0.7887, "step": 2351 }, { - "epoch": 0.17797283492868224, - "grad_norm": 4.241885185241699, - "learning_rate": 1.7206138446572113e-05, - "loss": 0.7568, + "epoch": 0.16557550158394932, + "grad_norm": 1.9096992015838623, + "learning_rate": 1.3336450825264272e-05, + "loss": 0.7159, "step": 2352 }, { - "epoch": 0.17804850365101585, - "grad_norm": 2.324835777282715, - "learning_rate": 1.720521331719109e-05, - "loss": 0.6711, + "epoch": 0.1656458993312214, + "grad_norm": 2.121264696121216, + "learning_rate": 1.3335772362791113e-05, + "loss": 0.8381, "step": 2353 }, { - "epoch": 0.1781241723733495, - "grad_norm": 3.291964292526245, - "learning_rate": 1.7204287673973062e-05, - "loss": 0.7461, + "epoch": 0.1657162970784935, + "grad_norm": 2.0532548427581787, + "learning_rate": 1.3335093570915414e-05, + "loss": 0.8544, "step": 2354 }, { - "epoch": 0.1781998410956831, - "grad_norm": 4.23928689956665, - "learning_rate": 1.7203361516976007e-05, - "loss": 0.6751, + "epoch": 0.16578669482576558, + "grad_norm": 2.589737892150879, + "learning_rate": 1.3334414449672459e-05, + "loss": 0.7116, "step": 2355 }, { - "epoch": 0.17827550981801674, - "grad_norm": 2.199976682662964, - "learning_rate": 1.7202434846257922e-05, - "loss": 0.8, + "epoch": 0.16585709257303766, + "grad_norm": 2.145540475845337, + "learning_rate": 1.333373499909756e-05, + "loss": 0.6991, "step": 2356 }, { - "epoch": 0.17835117854035035, - "grad_norm": 1.6747773885726929, - "learning_rate": 1.7201507661876838e-05, - "loss": 1.0083, + "epoch": 0.16592749032030976, + "grad_norm": 2.000086784362793, + "learning_rate": 1.3333055219226045e-05, + "loss": 0.8552, "step": 2357 }, { - "epoch": 0.17842684726268396, - "grad_norm": 2.7017219066619873, - "learning_rate": 1.7200579963890814e-05, - "loss": 0.8006, + "epoch": 0.16599788806758184, + "grad_norm": 2.23633074760437, + "learning_rate": 1.3332375110093253e-05, + "loss": 0.7965, "step": 2358 }, { - "epoch": 0.1785025159850176, - "grad_norm": 2.3280436992645264, - "learning_rate": 1.719965175235795e-05, - "loss": 0.8232, + "epoch": 0.16606828581485392, + "grad_norm": 2.246675491333008, + "learning_rate": 1.3331694671734542e-05, + "loss": 0.7207, "step": 2359 }, { - "epoch": 0.1785781847073512, - "grad_norm": 2.4222941398620605, - "learning_rate": 1.7198723027336374e-05, - "loss": 0.6266, + "epoch": 0.16613868356212602, + "grad_norm": 1.994308590888977, + "learning_rate": 1.3331013904185291e-05, + "loss": 0.7178, "step": 2360 }, { - "epoch": 0.17865385342968484, - "grad_norm": 2.6295063495635986, - "learning_rate": 1.7197793788884245e-05, - "loss": 0.7572, + "epoch": 0.1662090813093981, + "grad_norm": 2.5853710174560547, + "learning_rate": 1.3330332807480896e-05, + "loss": 0.6676, "step": 2361 }, { - "epoch": 0.17872952215201846, - "grad_norm": 2.855011224746704, - "learning_rate": 1.7196864037059748e-05, - "loss": 0.8607, + "epoch": 0.16627947905667018, + "grad_norm": 1.7984718084335327, + "learning_rate": 1.3329651381656762e-05, + "loss": 0.7783, "step": 2362 }, { - "epoch": 0.1788051908743521, - "grad_norm": 2.3858821392059326, - "learning_rate": 1.7195933771921118e-05, - "loss": 0.6728, + "epoch": 0.16634987680394228, + "grad_norm": 2.080811023712158, + "learning_rate": 1.3328969626748323e-05, + "loss": 0.8192, "step": 2363 }, { - "epoch": 0.1788808595966857, - "grad_norm": 3.4347083568573, - "learning_rate": 1.7195002993526604e-05, - "loss": 0.7073, + "epoch": 0.16642027455121436, + "grad_norm": 2.499884605407715, + "learning_rate": 1.332828754279102e-05, + "loss": 0.8739, "step": 2364 }, { - "epoch": 0.17895652831901934, - "grad_norm": 2.68788480758667, - "learning_rate": 1.71940717019345e-05, - "loss": 0.853, + "epoch": 0.16649067229848644, + "grad_norm": 2.087714195251465, + "learning_rate": 1.3327605129820316e-05, + "loss": 0.7243, "step": 2365 }, { - "epoch": 0.17903219704135295, - "grad_norm": 2.82952880859375, - "learning_rate": 1.7193139897203122e-05, - "loss": 0.8541, + "epoch": 0.16656107004575854, + "grad_norm": 1.7984980344772339, + "learning_rate": 1.3326922387871692e-05, + "loss": 0.757, "step": 2366 }, { - "epoch": 0.1791078657636866, - "grad_norm": 2.4485409259796143, - "learning_rate": 1.7192207579390824e-05, - "loss": 0.6887, + "epoch": 0.16663146779303062, + "grad_norm": 1.8298848867416382, + "learning_rate": 1.332623931698064e-05, + "loss": 0.8201, "step": 2367 }, { - "epoch": 0.1791835344860202, - "grad_norm": 2.3793416023254395, - "learning_rate": 1.7191274748555987e-05, - "loss": 0.9804, + "epoch": 0.16670186554030272, + "grad_norm": 2.3044862747192383, + "learning_rate": 1.332555591718268e-05, + "loss": 0.8105, "step": 2368 }, { - "epoch": 0.17925920320835384, - "grad_norm": 2.226893663406372, - "learning_rate": 1.7190341404757034e-05, - "loss": 0.877, + "epoch": 0.1667722632875748, + "grad_norm": 1.8338335752487183, + "learning_rate": 1.3324872188513339e-05, + "loss": 0.7019, "step": 2369 }, { - "epoch": 0.17933487193068745, - "grad_norm": 2.134824752807617, - "learning_rate": 1.7189407548052412e-05, - "loss": 0.8302, + "epoch": 0.16684266103484688, + "grad_norm": 2.242663860321045, + "learning_rate": 1.3324188131008164e-05, + "loss": 0.824, "step": 2370 }, { - "epoch": 0.17941054065302106, - "grad_norm": 2.6807243824005127, - "learning_rate": 1.7188473178500602e-05, - "loss": 0.6898, + "epoch": 0.16691305878211898, + "grad_norm": 2.0214903354644775, + "learning_rate": 1.3323503744702721e-05, + "loss": 0.8109, "step": 2371 }, { - "epoch": 0.1794862093753547, - "grad_norm": 2.6654117107391357, - "learning_rate": 1.7187538296160115e-05, - "loss": 0.6984, + "epoch": 0.16698345652939106, + "grad_norm": 3.2261860370635986, + "learning_rate": 1.3322819029632588e-05, + "loss": 0.8081, "step": 2372 }, { - "epoch": 0.1795618780976883, - "grad_norm": 2.5605947971343994, - "learning_rate": 1.71866029010895e-05, - "loss": 0.8535, + "epoch": 0.16705385427666314, + "grad_norm": 2.1992270946502686, + "learning_rate": 1.3322133985833371e-05, + "loss": 0.8043, "step": 2373 }, { - "epoch": 0.17963754682002195, - "grad_norm": 3.3925888538360596, - "learning_rate": 1.718566699334733e-05, - "loss": 0.8313, + "epoch": 0.16712425202393524, + "grad_norm": 3.584484577178955, + "learning_rate": 1.3321448613340683e-05, + "loss": 0.7316, "step": 2374 }, { - "epoch": 0.17971321554235556, - "grad_norm": 2.7094411849975586, - "learning_rate": 1.7184730572992222e-05, - "loss": 0.6807, + "epoch": 0.16719464977120732, + "grad_norm": 2.423704147338867, + "learning_rate": 1.3320762912190153e-05, + "loss": 0.7587, "step": 2375 }, { - "epoch": 0.1797888842646892, - "grad_norm": 3.0134902000427246, - "learning_rate": 1.718379364008281e-05, - "loss": 0.7635, + "epoch": 0.1672650475184794, + "grad_norm": 1.9866315126419067, + "learning_rate": 1.3320076882417436e-05, + "loss": 0.8232, "step": 2376 }, { - "epoch": 0.1798645529870228, - "grad_norm": 2.536494255065918, - "learning_rate": 1.718285619467777e-05, - "loss": 0.9329, + "epoch": 0.1673354452657515, + "grad_norm": 2.3284404277801514, + "learning_rate": 1.33193905240582e-05, + "loss": 0.7887, "step": 2377 }, { - "epoch": 0.17994022170935645, - "grad_norm": 2.6002416610717773, - "learning_rate": 1.718191823683581e-05, - "loss": 0.9118, + "epoch": 0.16740584301302358, + "grad_norm": 1.9245001077651978, + "learning_rate": 1.3318703837148125e-05, + "loss": 0.725, "step": 2378 }, { - "epoch": 0.18001589043169006, - "grad_norm": 2.81915283203125, - "learning_rate": 1.7180979766615663e-05, - "loss": 0.7235, + "epoch": 0.16747624076029566, + "grad_norm": 2.1582086086273193, + "learning_rate": 1.3318016821722916e-05, + "loss": 0.7719, "step": 2379 }, { - "epoch": 0.1800915591540237, - "grad_norm": 2.888113260269165, - "learning_rate": 1.7180040784076106e-05, - "loss": 0.6503, + "epoch": 0.16754663850756776, + "grad_norm": 6.218706130981445, + "learning_rate": 1.3317329477818292e-05, + "loss": 0.8714, "step": 2380 }, { - "epoch": 0.1801672278763573, - "grad_norm": 2.9773685932159424, - "learning_rate": 1.7179101289275937e-05, - "loss": 0.704, + "epoch": 0.16761703625483984, + "grad_norm": 2.1015031337738037, + "learning_rate": 1.3316641805469985e-05, + "loss": 0.6966, "step": 2381 }, { - "epoch": 0.18024289659869094, - "grad_norm": 2.8631527423858643, - "learning_rate": 1.717816128227399e-05, - "loss": 0.8687, + "epoch": 0.16768743400211195, + "grad_norm": 2.3775453567504883, + "learning_rate": 1.331595380471375e-05, + "loss": 0.7792, "step": 2382 }, { - "epoch": 0.18031856532102455, - "grad_norm": 2.364201068878174, - "learning_rate": 1.7177220763129133e-05, - "loss": 0.6779, + "epoch": 0.16775783174938402, + "grad_norm": 1.8784070014953613, + "learning_rate": 1.3315265475585358e-05, + "loss": 0.7812, "step": 2383 }, { - "epoch": 0.18039423404335816, - "grad_norm": 2.255162239074707, - "learning_rate": 1.7176279731900264e-05, - "loss": 0.6428, + "epoch": 0.1678282294966561, + "grad_norm": 2.476595640182495, + "learning_rate": 1.3314576818120593e-05, + "loss": 0.8238, "step": 2384 }, { - "epoch": 0.1804699027656918, - "grad_norm": 3.1965222358703613, - "learning_rate": 1.717533818864631e-05, - "loss": 0.7524, + "epoch": 0.1678986272439282, + "grad_norm": 1.7798161506652832, + "learning_rate": 1.3313887832355261e-05, + "loss": 0.7973, "step": 2385 }, { - "epoch": 0.1805455714880254, - "grad_norm": 2.435013771057129, - "learning_rate": 1.717439613342624e-05, - "loss": 0.8193, + "epoch": 0.16796902499120028, + "grad_norm": 2.3632750511169434, + "learning_rate": 1.3313198518325183e-05, + "loss": 0.6543, "step": 2386 }, { - "epoch": 0.18062124021035905, - "grad_norm": 3.8150510787963867, - "learning_rate": 1.7173453566299044e-05, - "loss": 0.7033, + "epoch": 0.16803942273847236, + "grad_norm": 1.9204093217849731, + "learning_rate": 1.3312508876066196e-05, + "loss": 0.7193, "step": 2387 }, { - "epoch": 0.18069690893269266, - "grad_norm": 2.313161849975586, - "learning_rate": 1.717251048732375e-05, - "loss": 0.8132, + "epoch": 0.16810982048574447, + "grad_norm": 2.0103089809417725, + "learning_rate": 1.3311818905614157e-05, + "loss": 0.8695, "step": 2388 }, { - "epoch": 0.1807725776550263, - "grad_norm": 3.1583354473114014, - "learning_rate": 1.717156689655941e-05, - "loss": 0.7569, + "epoch": 0.16818021823301654, + "grad_norm": 6.055141925811768, + "learning_rate": 1.3311128607004935e-05, + "loss": 0.6861, "step": 2389 }, { - "epoch": 0.1808482463773599, - "grad_norm": 2.503884792327881, - "learning_rate": 1.717062279406513e-05, - "loss": 0.6835, + "epoch": 0.16825061598028862, + "grad_norm": 1.7880421876907349, + "learning_rate": 1.331043798027442e-05, + "loss": 0.6338, "step": 2390 }, { - "epoch": 0.18092391509969355, - "grad_norm": 2.8149566650390625, - "learning_rate": 1.716967817990002e-05, - "loss": 0.8832, + "epoch": 0.16832101372756073, + "grad_norm": 2.1701903343200684, + "learning_rate": 1.3309747025458523e-05, + "loss": 0.7478, "step": 2391 }, { - "epoch": 0.18099958382202716, - "grad_norm": 2.804032802581787, - "learning_rate": 1.7168733054123238e-05, - "loss": 0.9157, + "epoch": 0.1683914114748328, + "grad_norm": 1.8264164924621582, + "learning_rate": 1.3309055742593161e-05, + "loss": 0.7174, "step": 2392 }, { - "epoch": 0.1810752525443608, - "grad_norm": 2.569125175476074, - "learning_rate": 1.7167787416793973e-05, - "loss": 0.9452, + "epoch": 0.16846180922210488, + "grad_norm": 2.307135581970215, + "learning_rate": 1.3308364131714278e-05, + "loss": 0.8012, "step": 2393 }, { - "epoch": 0.1811509212666944, - "grad_norm": 2.8881638050079346, - "learning_rate": 1.7166841267971438e-05, - "loss": 0.8094, + "epoch": 0.16853220696937699, + "grad_norm": 2.2471299171447754, + "learning_rate": 1.330767219285783e-05, + "loss": 0.7657, "step": 2394 }, { - "epoch": 0.18122658998902805, - "grad_norm": 2.7067344188690186, - "learning_rate": 1.716589460771489e-05, - "loss": 0.8893, + "epoch": 0.16860260471664906, + "grad_norm": 1.968122959136963, + "learning_rate": 1.3306979926059792e-05, + "loss": 0.8152, "step": 2395 }, { - "epoch": 0.18130225871136166, - "grad_norm": 3.1878957748413086, - "learning_rate": 1.716494743608361e-05, - "loss": 0.8778, + "epoch": 0.16867300246392117, + "grad_norm": 1.9301185607910156, + "learning_rate": 1.3306287331356156e-05, + "loss": 0.6932, "step": 2396 }, { - "epoch": 0.18137792743369527, - "grad_norm": 2.745701313018799, - "learning_rate": 1.7163999753136913e-05, - "loss": 0.9082, + "epoch": 0.16874340021119325, + "grad_norm": 2.2628538608551025, + "learning_rate": 1.330559440878293e-05, + "loss": 0.7907, "step": 2397 }, { - "epoch": 0.1814535961560289, - "grad_norm": 2.593395471572876, - "learning_rate": 1.7163051558934146e-05, - "loss": 0.8964, + "epoch": 0.16881379795846532, + "grad_norm": 2.432068347930908, + "learning_rate": 1.3304901158376142e-05, + "loss": 0.651, "step": 2398 }, { - "epoch": 0.18152926487836252, - "grad_norm": 2.715348243713379, - "learning_rate": 1.716210285353469e-05, - "loss": 0.6536, + "epoch": 0.16888419570573743, + "grad_norm": 2.601055383682251, + "learning_rate": 1.3304207580171829e-05, + "loss": 0.791, "step": 2399 }, { - "epoch": 0.18160493360069616, - "grad_norm": 2.5695583820343018, - "learning_rate": 1.716115363699795e-05, - "loss": 0.7906, + "epoch": 0.1689545934530095, + "grad_norm": 1.9932613372802734, + "learning_rate": 1.3303513674206059e-05, + "loss": 0.7739, "step": 2400 }, { - "epoch": 0.18168060232302977, - "grad_norm": 5.8394455909729, - "learning_rate": 1.7160203909383375e-05, - "loss": 0.7837, + "epoch": 0.16902499120028158, + "grad_norm": 2.4703972339630127, + "learning_rate": 1.3302819440514901e-05, + "loss": 0.7188, "step": 2401 }, { - "epoch": 0.1817562710453634, - "grad_norm": 2.660222291946411, - "learning_rate": 1.715925367075044e-05, - "loss": 0.6843, + "epoch": 0.1690953889475537, + "grad_norm": 2.1887924671173096, + "learning_rate": 1.3302124879134452e-05, + "loss": 0.7068, "step": 2402 }, { - "epoch": 0.18183193976769702, - "grad_norm": 3.027378559112549, - "learning_rate": 1.7158302921158647e-05, - "loss": 0.8906, + "epoch": 0.16916578669482576, + "grad_norm": 1.9023466110229492, + "learning_rate": 1.3301429990100824e-05, + "loss": 0.679, "step": 2403 }, { - "epoch": 0.18190760849003065, - "grad_norm": 2.216245174407959, - "learning_rate": 1.715735166066754e-05, - "loss": 0.9702, + "epoch": 0.16923618444209784, + "grad_norm": 1.864626407623291, + "learning_rate": 1.3300734773450144e-05, + "loss": 0.7485, "step": 2404 }, { - "epoch": 0.18198327721236426, - "grad_norm": 2.4401350021362305, - "learning_rate": 1.7156399889336684e-05, - "loss": 0.7855, + "epoch": 0.16930658218936995, + "grad_norm": 1.98395836353302, + "learning_rate": 1.3300039229218557e-05, + "loss": 0.7816, "step": 2405 }, { - "epoch": 0.1820589459346979, - "grad_norm": 2.4370734691619873, - "learning_rate": 1.715544760722569e-05, - "loss": 0.784, + "epoch": 0.16937697993664202, + "grad_norm": 2.1433682441711426, + "learning_rate": 1.3299343357442223e-05, + "loss": 0.7344, "step": 2406 }, { - "epoch": 0.1821346146570315, - "grad_norm": 5.49038553237915, - "learning_rate": 1.7154494814394186e-05, - "loss": 0.7539, + "epoch": 0.1694473776839141, + "grad_norm": 2.1019911766052246, + "learning_rate": 1.3298647158157326e-05, + "loss": 0.7512, "step": 2407 }, { - "epoch": 0.18221028337936515, - "grad_norm": 2.7418787479400635, - "learning_rate": 1.7153541510901844e-05, - "loss": 0.8306, + "epoch": 0.1695177754311862, + "grad_norm": 2.175269842147827, + "learning_rate": 1.3297950631400058e-05, + "loss": 0.8553, "step": 2408 }, { - "epoch": 0.18228595210169876, - "grad_norm": 2.403918981552124, - "learning_rate": 1.7152587696808358e-05, - "loss": 0.6963, + "epoch": 0.16958817317845828, + "grad_norm": 2.8591771125793457, + "learning_rate": 1.3297253777206631e-05, + "loss": 0.8061, "step": 2409 }, { - "epoch": 0.1823616208240324, - "grad_norm": 3.1243717670440674, - "learning_rate": 1.7151633372173467e-05, - "loss": 0.8133, + "epoch": 0.1696585709257304, + "grad_norm": 1.923150897026062, + "learning_rate": 1.3296556595613277e-05, + "loss": 0.6875, "step": 2410 }, { - "epoch": 0.182437289546366, - "grad_norm": 3.145928382873535, - "learning_rate": 1.7150678537056925e-05, - "loss": 0.7925, + "epoch": 0.16972896867300247, + "grad_norm": 1.8999671936035156, + "learning_rate": 1.3295859086656245e-05, + "loss": 0.836, "step": 2411 }, { - "epoch": 0.18251295826869962, - "grad_norm": 3.3592019081115723, - "learning_rate": 1.7149723191518533e-05, - "loss": 0.8252, + "epoch": 0.16979936642027454, + "grad_norm": 2.0105485916137695, + "learning_rate": 1.3295161250371797e-05, + "loss": 0.7317, "step": 2412 }, { - "epoch": 0.18258862699103326, - "grad_norm": 2.1932833194732666, - "learning_rate": 1.714876733561811e-05, - "loss": 0.8144, + "epoch": 0.16986976416754665, + "grad_norm": 2.3521924018859863, + "learning_rate": 1.3294463086796213e-05, + "loss": 0.7686, "step": 2413 }, { - "epoch": 0.18266429571336687, - "grad_norm": 2.8968517780303955, - "learning_rate": 1.7147810969415526e-05, - "loss": 0.7965, + "epoch": 0.16994016191481873, + "grad_norm": 2.1716041564941406, + "learning_rate": 1.3293764595965795e-05, + "loss": 0.7637, "step": 2414 }, { - "epoch": 0.1827399644357005, - "grad_norm": 2.438443660736084, - "learning_rate": 1.7146854092970663e-05, - "loss": 0.7568, + "epoch": 0.1700105596620908, + "grad_norm": 1.8556751012802124, + "learning_rate": 1.3293065777916854e-05, + "loss": 0.7504, "step": 2415 }, { - "epoch": 0.18281563315803412, - "grad_norm": 2.609872341156006, - "learning_rate": 1.7145896706343445e-05, - "loss": 0.6807, + "epoch": 0.1700809574093629, + "grad_norm": 1.9370185136795044, + "learning_rate": 1.3292366632685724e-05, + "loss": 0.7634, "step": 2416 }, { - "epoch": 0.18289130188036776, - "grad_norm": 2.623194932937622, - "learning_rate": 1.714493880959383e-05, - "loss": 0.7449, + "epoch": 0.170151355156635, + "grad_norm": 2.143339157104492, + "learning_rate": 1.3291667160308752e-05, + "loss": 0.7546, "step": 2417 }, { - "epoch": 0.18296697060270137, - "grad_norm": 2.9819412231445312, - "learning_rate": 1.7143980402781804e-05, - "loss": 0.9333, + "epoch": 0.17022175290390706, + "grad_norm": 1.679125428199768, + "learning_rate": 1.3290967360822309e-05, + "loss": 0.7335, "step": 2418 }, { - "epoch": 0.183042639325035, - "grad_norm": 2.4386146068573, - "learning_rate": 1.7143021485967382e-05, - "loss": 0.7959, + "epoch": 0.17029215065117917, + "grad_norm": 2.009000539779663, + "learning_rate": 1.3290267234262776e-05, + "loss": 0.7636, "step": 2419 }, { - "epoch": 0.18311830804736862, - "grad_norm": 2.293463706970215, - "learning_rate": 1.7142062059210618e-05, - "loss": 0.8353, + "epoch": 0.17036254839845125, + "grad_norm": 2.1441385746002197, + "learning_rate": 1.3289566780666551e-05, + "loss": 0.719, "step": 2420 }, { - "epoch": 0.18319397676970225, - "grad_norm": 3.0216493606567383, - "learning_rate": 1.7141102122571593e-05, - "loss": 0.8713, + "epoch": 0.17043294614572332, + "grad_norm": 2.4081528186798096, + "learning_rate": 1.3288866000070052e-05, + "loss": 0.8279, "step": 2421 }, { - "epoch": 0.18326964549203587, - "grad_norm": 2.45841908454895, - "learning_rate": 1.7140141676110424e-05, - "loss": 0.8078, + "epoch": 0.17050334389299543, + "grad_norm": 1.6819441318511963, + "learning_rate": 1.3288164892509717e-05, + "loss": 0.8701, "step": 2422 }, { - "epoch": 0.1833453142143695, - "grad_norm": 2.933749198913574, - "learning_rate": 1.713918071988725e-05, - "loss": 0.8919, + "epoch": 0.1705737416402675, + "grad_norm": 1.7235552072525024, + "learning_rate": 1.3287463458021992e-05, + "loss": 0.7003, "step": 2423 }, { - "epoch": 0.18342098293670311, - "grad_norm": 2.992274761199951, - "learning_rate": 1.713821925396226e-05, - "loss": 0.7021, + "epoch": 0.1706441393875396, + "grad_norm": 1.794406771659851, + "learning_rate": 1.328676169664335e-05, + "loss": 0.748, "step": 2424 }, { - "epoch": 0.18349665165903672, - "grad_norm": 2.653069257736206, - "learning_rate": 1.7137257278395655e-05, - "loss": 0.8012, + "epoch": 0.1707145371348117, + "grad_norm": 2.6313130855560303, + "learning_rate": 1.328605960841027e-05, + "loss": 0.6794, "step": 2425 }, { - "epoch": 0.18357232038137036, - "grad_norm": 2.428311586380005, - "learning_rate": 1.7136294793247677e-05, - "loss": 0.8426, + "epoch": 0.17078493488208377, + "grad_norm": 2.1056129932403564, + "learning_rate": 1.328535719335926e-05, + "loss": 0.7153, "step": 2426 }, { - "epoch": 0.18364798910370397, - "grad_norm": 3.16813063621521, - "learning_rate": 1.7135331798578607e-05, - "loss": 0.8006, + "epoch": 0.17085533262935587, + "grad_norm": 1.8124769926071167, + "learning_rate": 1.3284654451526838e-05, + "loss": 0.8174, "step": 2427 }, { - "epoch": 0.1837236578260376, - "grad_norm": 3.0173721313476562, - "learning_rate": 1.7134368294448746e-05, - "loss": 0.7249, + "epoch": 0.17092573037662795, + "grad_norm": 2.2503185272216797, + "learning_rate": 1.328395138294954e-05, + "loss": 0.8675, "step": 2428 }, { - "epoch": 0.18379932654837122, - "grad_norm": 3.074843406677246, - "learning_rate": 1.7133404280918435e-05, - "loss": 0.8502, + "epoch": 0.17099612812390003, + "grad_norm": 2.0855090618133545, + "learning_rate": 1.3283247987663915e-05, + "loss": 0.7706, "step": 2429 }, { - "epoch": 0.18387499527070486, - "grad_norm": 2.8122646808624268, - "learning_rate": 1.713243975804804e-05, - "loss": 0.8029, + "epoch": 0.17106652587117213, + "grad_norm": 1.8309695720672607, + "learning_rate": 1.3282544265706538e-05, + "loss": 0.7201, "step": 2430 }, { - "epoch": 0.18395066399303847, - "grad_norm": 2.632542610168457, - "learning_rate": 1.7131474725897958e-05, - "loss": 0.9138, + "epoch": 0.1711369236184442, + "grad_norm": 1.942726731300354, + "learning_rate": 1.3281840217113996e-05, + "loss": 0.7116, "step": 2431 }, { - "epoch": 0.1840263327153721, - "grad_norm": 2.9249496459960938, - "learning_rate": 1.7130509184528634e-05, - "loss": 0.8061, + "epoch": 0.1712073213657163, + "grad_norm": 1.9851869344711304, + "learning_rate": 1.3281135841922892e-05, + "loss": 0.8114, "step": 2432 }, { - "epoch": 0.18410200143770572, - "grad_norm": 2.6574416160583496, - "learning_rate": 1.7129543134000528e-05, - "loss": 0.846, + "epoch": 0.1712777191129884, + "grad_norm": 1.8475958108901978, + "learning_rate": 1.3280431140169847e-05, + "loss": 0.7688, "step": 2433 }, { - "epoch": 0.18417767016003936, - "grad_norm": 2.1470108032226562, - "learning_rate": 1.7128576574374138e-05, - "loss": 0.7422, + "epoch": 0.17134811686026047, + "grad_norm": 1.816476821899414, + "learning_rate": 1.3279726111891498e-05, + "loss": 0.6536, "step": 2434 }, { - "epoch": 0.18425333888237297, - "grad_norm": 3.626044750213623, - "learning_rate": 1.712760950570999e-05, - "loss": 0.8358, + "epoch": 0.17141851460753255, + "grad_norm": 2.042128801345825, + "learning_rate": 1.3279020757124502e-05, + "loss": 0.7782, "step": 2435 }, { - "epoch": 0.1843290076047066, - "grad_norm": 2.8826382160186768, - "learning_rate": 1.7126641928068642e-05, - "loss": 0.6992, + "epoch": 0.17148891235480465, + "grad_norm": 1.9167205095291138, + "learning_rate": 1.327831507590553e-05, + "loss": 0.7921, "step": 2436 }, { - "epoch": 0.18440467632704022, - "grad_norm": 2.540454149246216, - "learning_rate": 1.7125673841510696e-05, - "loss": 0.7279, + "epoch": 0.17155931010207673, + "grad_norm": 1.9378328323364258, + "learning_rate": 1.327760906827127e-05, + "loss": 0.6377, "step": 2437 }, { - "epoch": 0.18448034504937383, - "grad_norm": 4.273547172546387, - "learning_rate": 1.7124705246096776e-05, - "loss": 0.9469, + "epoch": 0.17162970784934883, + "grad_norm": 2.2389066219329834, + "learning_rate": 1.3276902734258432e-05, + "loss": 0.737, "step": 2438 }, { - "epoch": 0.18455601377170747, - "grad_norm": 2.69283390045166, - "learning_rate": 1.712373614188753e-05, - "loss": 0.7379, + "epoch": 0.1717001055966209, + "grad_norm": 1.9270156621932983, + "learning_rate": 1.3276196073903734e-05, + "loss": 0.7465, "step": 2439 }, { - "epoch": 0.18463168249404108, - "grad_norm": 2.7413628101348877, - "learning_rate": 1.712276652894365e-05, - "loss": 0.8033, + "epoch": 0.171770503343893, + "grad_norm": 2.0876636505126953, + "learning_rate": 1.3275489087243921e-05, + "loss": 0.7441, "step": 2440 }, { - "epoch": 0.18470735121637472, - "grad_norm": 2.5811452865600586, - "learning_rate": 1.7121796407325864e-05, - "loss": 0.7025, + "epoch": 0.1718409010911651, + "grad_norm": 2.8114848136901855, + "learning_rate": 1.3274781774315743e-05, + "loss": 0.5954, "step": 2441 }, { - "epoch": 0.18478301993870833, - "grad_norm": 2.8517563343048096, - "learning_rate": 1.7120825777094916e-05, - "loss": 0.8967, + "epoch": 0.17191129883843717, + "grad_norm": 1.7931652069091797, + "learning_rate": 1.327407413515598e-05, + "loss": 0.9398, "step": 2442 }, { - "epoch": 0.18485868866104196, - "grad_norm": 2.390089988708496, - "learning_rate": 1.7119854638311587e-05, - "loss": 0.7239, + "epoch": 0.17198169658570925, + "grad_norm": 2.411499261856079, + "learning_rate": 1.327336616980142e-05, + "loss": 0.7173, "step": 2443 }, { - "epoch": 0.18493435738337557, - "grad_norm": 3.3105874061584473, - "learning_rate": 1.71188829910367e-05, - "loss": 0.8914, + "epoch": 0.17205209433298135, + "grad_norm": 1.964766263961792, + "learning_rate": 1.327265787828887e-05, + "loss": 0.8329, "step": 2444 }, { - "epoch": 0.1850100261057092, - "grad_norm": 3.032083511352539, - "learning_rate": 1.7117910835331104e-05, - "loss": 0.85, + "epoch": 0.17212249208025343, + "grad_norm": 3.3048923015594482, + "learning_rate": 1.3271949260655158e-05, + "loss": 0.7313, "step": 2445 }, { - "epoch": 0.18508569482804282, - "grad_norm": 2.4324803352355957, - "learning_rate": 1.7116938171255672e-05, - "loss": 0.5128, + "epoch": 0.1721928898275255, + "grad_norm": 2.0728261470794678, + "learning_rate": 1.3271240316937124e-05, + "loss": 0.7123, "step": 2446 }, { - "epoch": 0.18516136355037646, - "grad_norm": 2.444301128387451, - "learning_rate": 1.711596499887132e-05, - "loss": 0.8662, + "epoch": 0.1722632875747976, + "grad_norm": 2.5477242469787598, + "learning_rate": 1.3270531047171625e-05, + "loss": 0.8588, "step": 2447 }, { - "epoch": 0.18523703227271007, - "grad_norm": 2.0396249294281006, - "learning_rate": 1.711499131823899e-05, - "loss": 0.6182, + "epoch": 0.1723336853220697, + "grad_norm": 2.1862668991088867, + "learning_rate": 1.3269821451395539e-05, + "loss": 0.7753, "step": 2448 }, { - "epoch": 0.1853127009950437, - "grad_norm": 2.8817059993743896, - "learning_rate": 1.7114017129419654e-05, - "loss": 0.8314, + "epoch": 0.17240408306934177, + "grad_norm": 2.038264513015747, + "learning_rate": 1.3269111529645756e-05, + "loss": 0.8689, "step": 2449 }, { - "epoch": 0.18538836971737732, - "grad_norm": 2.181652545928955, - "learning_rate": 1.7113042432474323e-05, - "loss": 0.7013, + "epoch": 0.17247448081661387, + "grad_norm": 2.775592803955078, + "learning_rate": 1.3268401281959187e-05, + "loss": 0.6616, "step": 2450 }, { - "epoch": 0.18546403843971093, - "grad_norm": 2.367856740951538, - "learning_rate": 1.7112067227464035e-05, - "loss": 0.818, + "epoch": 0.17254487856388595, + "grad_norm": 2.0679690837860107, + "learning_rate": 1.326769070837276e-05, + "loss": 0.5828, "step": 2451 }, { - "epoch": 0.18553970716204457, - "grad_norm": 2.3162214756011963, - "learning_rate": 1.7111091514449857e-05, - "loss": 0.8122, + "epoch": 0.17261527631115806, + "grad_norm": 2.33280086517334, + "learning_rate": 1.3266979808923416e-05, + "loss": 0.6991, "step": 2452 }, { - "epoch": 0.18561537588437818, - "grad_norm": 2.549567937850952, - "learning_rate": 1.7110115293492893e-05, - "loss": 0.7482, + "epoch": 0.17268567405843013, + "grad_norm": 1.7909483909606934, + "learning_rate": 1.3266268583648114e-05, + "loss": 0.6901, "step": 2453 }, { - "epoch": 0.18569104460671182, - "grad_norm": 1.9302699565887451, - "learning_rate": 1.7109138564654283e-05, - "loss": 0.8955, + "epoch": 0.1727560718057022, + "grad_norm": 1.9619282484054565, + "learning_rate": 1.3265557032583836e-05, + "loss": 0.8418, "step": 2454 }, { - "epoch": 0.18576671332904543, - "grad_norm": 2.4720423221588135, - "learning_rate": 1.7108161327995182e-05, - "loss": 0.7715, + "epoch": 0.17282646955297432, + "grad_norm": 1.8085087537765503, + "learning_rate": 1.3264845155767572e-05, + "loss": 0.891, "step": 2455 }, { - "epoch": 0.18584238205137907, - "grad_norm": 2.2266173362731934, - "learning_rate": 1.7107183583576798e-05, - "loss": 0.7588, + "epoch": 0.1728968673002464, + "grad_norm": 2.3299672603607178, + "learning_rate": 1.3264132953236335e-05, + "loss": 0.7147, "step": 2456 }, { - "epoch": 0.18591805077371268, - "grad_norm": 2.241393804550171, - "learning_rate": 1.7106205331460356e-05, - "loss": 0.6046, + "epoch": 0.17296726504751847, + "grad_norm": 1.9719486236572266, + "learning_rate": 1.3263420425027154e-05, + "loss": 0.5457, "step": 2457 }, { - "epoch": 0.18599371949604632, - "grad_norm": 2.3776702880859375, - "learning_rate": 1.7105226571707115e-05, - "loss": 0.7866, + "epoch": 0.17303766279479058, + "grad_norm": 2.506456136703491, + "learning_rate": 1.3262707571177071e-05, + "loss": 0.8225, "step": 2458 }, { - "epoch": 0.18606938821837993, - "grad_norm": 3.0117900371551514, - "learning_rate": 1.7104247304378372e-05, - "loss": 0.68, + "epoch": 0.17310806054206265, + "grad_norm": 2.1667258739471436, + "learning_rate": 1.3261994391723151e-05, + "loss": 0.6646, "step": 2459 }, { - "epoch": 0.18614505694071357, - "grad_norm": 2.2148845195770264, - "learning_rate": 1.7103267529535453e-05, - "loss": 0.8208, + "epoch": 0.17317845828933473, + "grad_norm": 2.641432762145996, + "learning_rate": 1.326128088670247e-05, + "loss": 0.8489, "step": 2460 }, { - "epoch": 0.18622072566304718, - "grad_norm": 2.5542850494384766, - "learning_rate": 1.710228724723971e-05, - "loss": 0.7452, + "epoch": 0.17324885603660684, + "grad_norm": 2.1061830520629883, + "learning_rate": 1.3260567056152126e-05, + "loss": 0.9063, "step": 2461 }, { - "epoch": 0.18629639438538081, - "grad_norm": 2.4348342418670654, - "learning_rate": 1.7101306457552532e-05, - "loss": 0.8503, + "epoch": 0.1733192537838789, + "grad_norm": 2.0071802139282227, + "learning_rate": 1.325985290010923e-05, + "loss": 0.8555, "step": 2462 }, { - "epoch": 0.18637206310771443, - "grad_norm": 2.9579315185546875, - "learning_rate": 1.7100325160535344e-05, - "loss": 0.9084, + "epoch": 0.173389651531151, + "grad_norm": 2.090104579925537, + "learning_rate": 1.3259138418610912e-05, + "loss": 0.7322, "step": 2463 }, { - "epoch": 0.18644773183004806, - "grad_norm": 2.543611764907837, - "learning_rate": 1.7099343356249594e-05, - "loss": 0.7714, + "epoch": 0.1734600492784231, + "grad_norm": 1.9993388652801514, + "learning_rate": 1.325842361169432e-05, + "loss": 0.7575, "step": 2464 }, { - "epoch": 0.18652340055238167, - "grad_norm": 2.5883982181549072, - "learning_rate": 1.7098361044756762e-05, - "loss": 0.6883, + "epoch": 0.17353044702569517, + "grad_norm": 2.194506883621216, + "learning_rate": 1.3257708479396616e-05, + "loss": 0.7638, "step": 2465 }, { - "epoch": 0.18659906927471528, - "grad_norm": 2.596781015396118, - "learning_rate": 1.7097378226118372e-05, - "loss": 0.7947, + "epoch": 0.17360084477296728, + "grad_norm": 1.8301949501037598, + "learning_rate": 1.3256993021754982e-05, + "loss": 0.7987, "step": 2466 }, { - "epoch": 0.18667473799704892, - "grad_norm": 1.9685479402542114, - "learning_rate": 1.709639490039597e-05, - "loss": 0.8463, + "epoch": 0.17367124252023936, + "grad_norm": 2.2814159393310547, + "learning_rate": 1.3256277238806614e-05, + "loss": 0.7377, "step": 2467 }, { - "epoch": 0.18675040671938253, - "grad_norm": 2.6280531883239746, - "learning_rate": 1.7095411067651128e-05, - "loss": 0.7112, + "epoch": 0.17374164026751143, + "grad_norm": 2.2343010902404785, + "learning_rate": 1.3255561130588725e-05, + "loss": 0.7746, "step": 2468 }, { - "epoch": 0.18682607544171617, - "grad_norm": 4.373874187469482, - "learning_rate": 1.7094426727945463e-05, - "loss": 0.7965, + "epoch": 0.17381203801478354, + "grad_norm": 2.029341697692871, + "learning_rate": 1.3254844697138545e-05, + "loss": 0.7983, "step": 2469 }, { - "epoch": 0.18690174416404978, - "grad_norm": 2.3263533115386963, - "learning_rate": 1.7093441881340614e-05, - "loss": 0.7271, + "epoch": 0.17388243576205561, + "grad_norm": 1.923988699913025, + "learning_rate": 1.325412793849333e-05, + "loss": 0.6752, "step": 2470 }, { - "epoch": 0.18697741288638342, - "grad_norm": 2.311795711517334, - "learning_rate": 1.709245652789826e-05, - "loss": 0.7724, + "epoch": 0.1739528335093277, + "grad_norm": 2.6908857822418213, + "learning_rate": 1.3253410854690335e-05, + "loss": 0.7238, "step": 2471 }, { - "epoch": 0.18705308160871703, - "grad_norm": 2.1716461181640625, - "learning_rate": 1.7091470667680102e-05, - "loss": 0.7175, + "epoch": 0.1740232312565998, + "grad_norm": 2.0780928134918213, + "learning_rate": 1.3252693445766849e-05, + "loss": 0.8495, "step": 2472 }, { - "epoch": 0.18712875033105067, - "grad_norm": 2.450676202774048, - "learning_rate": 1.7090484300747882e-05, - "loss": 0.7955, + "epoch": 0.17409362900387187, + "grad_norm": 1.9729321002960205, + "learning_rate": 1.3251975711760167e-05, + "loss": 0.6927, "step": 2473 }, { - "epoch": 0.18720441905338428, - "grad_norm": 2.3514206409454346, - "learning_rate": 1.7089497427163362e-05, - "loss": 0.6674, + "epoch": 0.17416402675114395, + "grad_norm": 2.0974180698394775, + "learning_rate": 1.3251257652707608e-05, + "loss": 0.5977, "step": 2474 }, { - "epoch": 0.18728008777571792, - "grad_norm": 2.2294673919677734, - "learning_rate": 1.7088510046988355e-05, - "loss": 0.6138, + "epoch": 0.17423442449841606, + "grad_norm": 2.371176242828369, + "learning_rate": 1.3250539268646499e-05, + "loss": 0.8417, "step": 2475 }, { - "epoch": 0.18735575649805153, - "grad_norm": 2.8601155281066895, - "learning_rate": 1.7087522160284684e-05, - "loss": 0.7145, + "epoch": 0.17430482224568813, + "grad_norm": 2.5941402912139893, + "learning_rate": 1.3249820559614192e-05, + "loss": 0.9982, "step": 2476 }, { - "epoch": 0.18743142522038517, - "grad_norm": 2.8566646575927734, - "learning_rate": 1.7086533767114216e-05, - "loss": 0.8687, + "epoch": 0.1743752199929602, + "grad_norm": 2.5652480125427246, + "learning_rate": 1.3249101525648057e-05, + "loss": 0.7126, "step": 2477 }, { - "epoch": 0.18750709394271878, - "grad_norm": 2.680727005004883, - "learning_rate": 1.7085544867538854e-05, - "loss": 0.6916, + "epoch": 0.17444561774023232, + "grad_norm": 2.2121517658233643, + "learning_rate": 1.3248382166785472e-05, + "loss": 0.7382, "step": 2478 }, { - "epoch": 0.1875827626650524, - "grad_norm": 2.9633843898773193, - "learning_rate": 1.7084555461620514e-05, - "loss": 0.8821, + "epoch": 0.1745160154875044, + "grad_norm": 2.478785276412964, + "learning_rate": 1.324766248306384e-05, + "loss": 0.7456, "step": 2479 }, { - "epoch": 0.18765843138738603, - "grad_norm": 2.1105926036834717, - "learning_rate": 1.7083565549421166e-05, - "loss": 0.713, + "epoch": 0.1745864132347765, + "grad_norm": 3.6467998027801514, + "learning_rate": 1.3246942474520576e-05, + "loss": 0.6537, "step": 2480 }, { - "epoch": 0.18773410010971964, - "grad_norm": 2.6153006553649902, - "learning_rate": 1.7082575131002796e-05, - "loss": 0.6687, + "epoch": 0.17465681098204858, + "grad_norm": 2.4121546745300293, + "learning_rate": 1.3246222141193117e-05, + "loss": 0.7841, "step": 2481 }, { - "epoch": 0.18780976883205328, - "grad_norm": 2.08457088470459, - "learning_rate": 1.708158420642743e-05, - "loss": 0.7836, + "epoch": 0.17472720872932065, + "grad_norm": 1.8438323736190796, + "learning_rate": 1.324550148311891e-05, + "loss": 0.7528, "step": 2482 }, { - "epoch": 0.18788543755438689, - "grad_norm": 2.58174467086792, - "learning_rate": 1.7080592775757122e-05, - "loss": 0.8457, + "epoch": 0.17479760647659276, + "grad_norm": 2.4135732650756836, + "learning_rate": 1.3244780500335426e-05, + "loss": 0.8292, "step": 2483 }, { - "epoch": 0.18796110627672052, - "grad_norm": 2.193037271499634, - "learning_rate": 1.707960083905396e-05, - "loss": 0.7799, + "epoch": 0.17486800422386484, + "grad_norm": 2.034721612930298, + "learning_rate": 1.3244059192880148e-05, + "loss": 0.8306, "step": 2484 }, { - "epoch": 0.18803677499905413, - "grad_norm": 2.2527616024017334, - "learning_rate": 1.707860839638006e-05, - "loss": 0.6341, + "epoch": 0.17493840197113691, + "grad_norm": 2.128718614578247, + "learning_rate": 1.3243337560790579e-05, + "loss": 0.6647, "step": 2485 }, { - "epoch": 0.18811244372138777, - "grad_norm": 2.2028379440307617, - "learning_rate": 1.707761544779757e-05, - "loss": 0.6469, + "epoch": 0.17500879971840902, + "grad_norm": 2.137626886367798, + "learning_rate": 1.3242615604104235e-05, + "loss": 0.728, "step": 2486 }, { - "epoch": 0.18818811244372138, - "grad_norm": 2.2932393550872803, - "learning_rate": 1.7076621993368676e-05, - "loss": 0.6554, + "epoch": 0.1750791974656811, + "grad_norm": 2.1954214572906494, + "learning_rate": 1.3241893322858651e-05, + "loss": 0.6652, "step": 2487 }, { - "epoch": 0.18826378116605502, - "grad_norm": 2.409485101699829, - "learning_rate": 1.7075628033155593e-05, - "loss": 0.905, + "epoch": 0.17514959521295317, + "grad_norm": 2.067700147628784, + "learning_rate": 1.324117071709138e-05, + "loss": 0.7745, "step": 2488 }, { - "epoch": 0.18833944988838863, - "grad_norm": 3.5237672328948975, - "learning_rate": 1.707463356722056e-05, - "loss": 0.7915, + "epoch": 0.17521999296022528, + "grad_norm": 2.0748746395111084, + "learning_rate": 1.3240447786839994e-05, + "loss": 0.8401, "step": 2489 }, { - "epoch": 0.18841511861072227, - "grad_norm": 2.7192323207855225, - "learning_rate": 1.7073638595625856e-05, - "loss": 0.8663, + "epoch": 0.17529039070749736, + "grad_norm": 3.0510470867156982, + "learning_rate": 1.3239724532142073e-05, + "loss": 0.6587, "step": 2490 }, { - "epoch": 0.18849078733305588, - "grad_norm": 2.6464853286743164, - "learning_rate": 1.707264311843379e-05, - "loss": 0.9029, + "epoch": 0.17536078845476943, + "grad_norm": 2.399211883544922, + "learning_rate": 1.3239000953035224e-05, + "loss": 0.7293, "step": 2491 }, { - "epoch": 0.1885664560553895, - "grad_norm": 2.4088399410247803, - "learning_rate": 1.7071647135706702e-05, - "loss": 0.7941, + "epoch": 0.17543118620204154, + "grad_norm": 2.3822696208953857, + "learning_rate": 1.3238277049557064e-05, + "loss": 0.8812, "step": 2492 }, { - "epoch": 0.18864212477772313, - "grad_norm": 2.637516975402832, - "learning_rate": 1.7070650647506966e-05, - "loss": 0.679, + "epoch": 0.17550158394931362, + "grad_norm": 2.2642223834991455, + "learning_rate": 1.323755282174523e-05, + "loss": 0.811, "step": 2493 }, { - "epoch": 0.18871779350005674, - "grad_norm": 2.1144580841064453, - "learning_rate": 1.7069653653896982e-05, - "loss": 0.7291, + "epoch": 0.17557198169658572, + "grad_norm": 2.1361095905303955, + "learning_rate": 1.3236828269637377e-05, + "loss": 0.8391, "step": 2494 }, { - "epoch": 0.18879346222239038, - "grad_norm": 2.473254680633545, - "learning_rate": 1.7068656154939183e-05, - "loss": 0.7055, + "epoch": 0.1756423794438578, + "grad_norm": 2.250175714492798, + "learning_rate": 1.3236103393271175e-05, + "loss": 0.797, "step": 2495 }, { - "epoch": 0.188869130944724, - "grad_norm": 2.5873825550079346, - "learning_rate": 1.7067658150696043e-05, - "loss": 0.8274, + "epoch": 0.17571277719112988, + "grad_norm": 2.177765369415283, + "learning_rate": 1.3235378192684309e-05, + "loss": 0.6579, "step": 2496 }, { - "epoch": 0.18894479966705763, - "grad_norm": 2.4804913997650146, - "learning_rate": 1.706665964123005e-05, - "loss": 0.7887, + "epoch": 0.17578317493840198, + "grad_norm": 2.17628812789917, + "learning_rate": 1.3234652667914482e-05, + "loss": 0.7547, "step": 2497 }, { - "epoch": 0.18902046838939124, - "grad_norm": 3.1803600788116455, - "learning_rate": 1.7065660626603745e-05, - "loss": 0.9983, + "epoch": 0.17585357268567406, + "grad_norm": 2.1396942138671875, + "learning_rate": 1.3233926818999416e-05, + "loss": 0.7059, "step": 2498 }, { - "epoch": 0.18909613711172488, - "grad_norm": 3.4321467876434326, - "learning_rate": 1.706466110687968e-05, - "loss": 0.8888, + "epoch": 0.17592397043294614, + "grad_norm": 1.8798319101333618, + "learning_rate": 1.323320064597685e-05, + "loss": 0.6737, "step": 2499 }, { - "epoch": 0.1891718058340585, - "grad_norm": 2.6586387157440186, - "learning_rate": 1.706366108212045e-05, - "loss": 0.9563, + "epoch": 0.17599436818021824, + "grad_norm": 2.8714842796325684, + "learning_rate": 1.323247414888454e-05, + "loss": 0.6814, "step": 2500 }, { - "epoch": 0.18924747455639213, - "grad_norm": 2.2730400562286377, - "learning_rate": 1.7062660552388687e-05, - "loss": 0.722, + "epoch": 0.17606476592749032, + "grad_norm": 2.4333648681640625, + "learning_rate": 1.3231747327760252e-05, + "loss": 0.8282, "step": 2501 }, { - "epoch": 0.18932314327872574, - "grad_norm": 2.669356107711792, - "learning_rate": 1.706165951774704e-05, - "loss": 0.7355, + "epoch": 0.1761351636747624, + "grad_norm": 2.19931697845459, + "learning_rate": 1.3231020182641776e-05, + "loss": 0.7011, "step": 2502 }, { - "epoch": 0.18939881200105937, - "grad_norm": 3.200263738632202, - "learning_rate": 1.70606579782582e-05, - "loss": 0.6391, + "epoch": 0.1762055614220345, + "grad_norm": 2.0979816913604736, + "learning_rate": 1.3230292713566919e-05, + "loss": 0.7723, "step": 2503 }, { - "epoch": 0.18947448072339299, - "grad_norm": 2.0862410068511963, - "learning_rate": 1.7059655933984886e-05, - "loss": 0.7393, + "epoch": 0.17627595916930658, + "grad_norm": 1.8135102987289429, + "learning_rate": 1.3229564920573499e-05, + "loss": 0.72, "step": 2504 }, { - "epoch": 0.1895501494457266, - "grad_norm": 2.3829562664031982, - "learning_rate": 1.7058653384989852e-05, - "loss": 0.7792, + "epoch": 0.17634635691657866, + "grad_norm": 2.125945568084717, + "learning_rate": 1.322883680369936e-05, + "loss": 0.7322, "step": 2505 }, { - "epoch": 0.18962581816806023, - "grad_norm": 2.7192342281341553, - "learning_rate": 1.7057650331335875e-05, - "loss": 0.8724, + "epoch": 0.17641675466385076, + "grad_norm": 2.1808784008026123, + "learning_rate": 1.3228108362982352e-05, + "loss": 0.8914, "step": 2506 }, { - "epoch": 0.18970148689039384, - "grad_norm": 2.462100028991699, - "learning_rate": 1.7056646773085773e-05, - "loss": 0.8377, + "epoch": 0.17648715241112284, + "grad_norm": 2.2015037536621094, + "learning_rate": 1.322737959846035e-05, + "loss": 0.7146, "step": 2507 }, { - "epoch": 0.18977715561272748, - "grad_norm": 2.152848243713379, - "learning_rate": 1.705564271030239e-05, - "loss": 0.8334, + "epoch": 0.17655755015839494, + "grad_norm": 2.0767343044281006, + "learning_rate": 1.3226650510171244e-05, + "loss": 0.7698, "step": 2508 }, { - "epoch": 0.1898528243350611, - "grad_norm": 2.1347358226776123, - "learning_rate": 1.705463814304861e-05, - "loss": 0.7019, + "epoch": 0.17662794790566702, + "grad_norm": 1.9536254405975342, + "learning_rate": 1.322592109815294e-05, + "loss": 0.6925, "step": 2509 }, { - "epoch": 0.18992849305739473, - "grad_norm": 2.732978343963623, - "learning_rate": 1.7053633071387336e-05, - "loss": 0.8409, + "epoch": 0.1766983456529391, + "grad_norm": 1.7571136951446533, + "learning_rate": 1.3225191362443357e-05, + "loss": 0.6414, "step": 2510 }, { - "epoch": 0.19000416177972834, - "grad_norm": 2.211718797683716, - "learning_rate": 1.7052627495381507e-05, - "loss": 0.7797, + "epoch": 0.1767687434002112, + "grad_norm": 3.1137123107910156, + "learning_rate": 1.322446130308044e-05, + "loss": 0.5837, "step": 2511 }, { - "epoch": 0.19007983050206198, - "grad_norm": 2.507467031478882, - "learning_rate": 1.7051621415094105e-05, - "loss": 0.7145, + "epoch": 0.17683914114748328, + "grad_norm": 2.6278882026672363, + "learning_rate": 1.322373092010214e-05, + "loss": 0.798, "step": 2512 }, { - "epoch": 0.1901554992243956, - "grad_norm": 2.3044538497924805, - "learning_rate": 1.7050614830588122e-05, - "loss": 0.8306, + "epoch": 0.17690953889475536, + "grad_norm": 3.15384578704834, + "learning_rate": 1.3223000213546434e-05, + "loss": 0.919, "step": 2513 }, { - "epoch": 0.19023116794672923, - "grad_norm": 2.1056079864501953, - "learning_rate": 1.7049607741926603e-05, - "loss": 0.9237, + "epoch": 0.17697993664202746, + "grad_norm": 2.062216281890869, + "learning_rate": 1.322226918345131e-05, + "loss": 0.6808, "step": 2514 }, { - "epoch": 0.19030683666906284, - "grad_norm": 2.6704158782958984, - "learning_rate": 1.704860014917261e-05, - "loss": 0.8201, + "epoch": 0.17705033438929954, + "grad_norm": 2.1357858180999756, + "learning_rate": 1.3221537829854777e-05, + "loss": 0.7989, "step": 2515 }, { - "epoch": 0.19038250539139648, - "grad_norm": 3.0640957355499268, - "learning_rate": 1.7047592052389243e-05, - "loss": 0.8734, + "epoch": 0.17712073213657162, + "grad_norm": 2.148599624633789, + "learning_rate": 1.3220806152794856e-05, + "loss": 0.7456, "step": 2516 }, { - "epoch": 0.1904581741137301, - "grad_norm": 2.7852325439453125, - "learning_rate": 1.7046583451639635e-05, - "loss": 0.8072, + "epoch": 0.17719112988384372, + "grad_norm": 2.3659207820892334, + "learning_rate": 1.3220074152309592e-05, + "loss": 0.6702, "step": 2517 }, { - "epoch": 0.1905338428360637, - "grad_norm": 3.1452224254608154, - "learning_rate": 1.7045574346986942e-05, - "loss": 0.6817, + "epoch": 0.1772615276311158, + "grad_norm": 4.222046852111816, + "learning_rate": 1.3219341828437038e-05, + "loss": 0.8006, "step": 2518 }, { - "epoch": 0.19060951155839734, - "grad_norm": 2.4162135124206543, - "learning_rate": 1.7044564738494367e-05, - "loss": 0.8041, + "epoch": 0.17733192537838788, + "grad_norm": 1.9350900650024414, + "learning_rate": 1.321860918121527e-05, + "loss": 0.7844, "step": 2519 }, { - "epoch": 0.19068518028073095, - "grad_norm": 2.7600903511047363, - "learning_rate": 1.704355462622512e-05, - "loss": 0.8216, + "epoch": 0.17740232312565998, + "grad_norm": 1.9199578762054443, + "learning_rate": 1.3217876210682378e-05, + "loss": 0.8074, "step": 2520 }, { - "epoch": 0.1907608490030646, - "grad_norm": 2.507215976715088, - "learning_rate": 1.7042544010242473e-05, - "loss": 0.8253, + "epoch": 0.17747272087293206, + "grad_norm": 2.167600631713867, + "learning_rate": 1.3217142916876471e-05, + "loss": 0.7737, "step": 2521 }, { - "epoch": 0.1908365177253982, - "grad_norm": 2.914543628692627, - "learning_rate": 1.7041532890609703e-05, - "loss": 0.9177, + "epoch": 0.17754311862020417, + "grad_norm": 1.9414867162704468, + "learning_rate": 1.3216409299835674e-05, + "loss": 0.6745, "step": 2522 }, { - "epoch": 0.19091218644773184, - "grad_norm": 3.545713186264038, - "learning_rate": 1.704052126739014e-05, - "loss": 0.6333, + "epoch": 0.17761351636747624, + "grad_norm": 2.4856436252593994, + "learning_rate": 1.3215675359598127e-05, + "loss": 0.7607, "step": 2523 }, { - "epoch": 0.19098785517006545, - "grad_norm": 2.720325469970703, - "learning_rate": 1.7039509140647124e-05, - "loss": 0.7723, + "epoch": 0.17768391411474832, + "grad_norm": 2.0950028896331787, + "learning_rate": 1.3214941096201987e-05, + "loss": 0.7793, "step": 2524 }, { - "epoch": 0.19106352389239908, - "grad_norm": 2.6830990314483643, - "learning_rate": 1.703849651044404e-05, - "loss": 0.8263, + "epoch": 0.17775431186202043, + "grad_norm": 1.9961518049240112, + "learning_rate": 1.3214206509685435e-05, + "loss": 0.7826, "step": 2525 }, { - "epoch": 0.1911391926147327, - "grad_norm": 2.6255667209625244, - "learning_rate": 1.703748337684431e-05, - "loss": 0.7983, + "epoch": 0.1778247096092925, + "grad_norm": 2.208261489868164, + "learning_rate": 1.3213471600086655e-05, + "loss": 0.6925, "step": 2526 }, { - "epoch": 0.19121486133706633, - "grad_norm": 3.0577828884124756, - "learning_rate": 1.7036469739911374e-05, - "loss": 0.5705, + "epoch": 0.17789510735656458, + "grad_norm": 2.2063775062561035, + "learning_rate": 1.321273636744386e-05, + "loss": 0.7345, "step": 2527 }, { - "epoch": 0.19129053005939994, - "grad_norm": 2.4407639503479004, - "learning_rate": 1.703545559970871e-05, - "loss": 0.7921, + "epoch": 0.17796550510383669, + "grad_norm": 2.2562854290008545, + "learning_rate": 1.3212000811795275e-05, + "loss": 0.7439, "step": 2528 }, { - "epoch": 0.19136619878173358, - "grad_norm": 3.169311046600342, - "learning_rate": 1.7034440956299825e-05, - "loss": 0.6525, + "epoch": 0.17803590285110876, + "grad_norm": 2.583740472793579, + "learning_rate": 1.3211264933179144e-05, + "loss": 0.6826, "step": 2529 }, { - "epoch": 0.1914418675040672, - "grad_norm": 2.345228433609009, - "learning_rate": 1.703342580974826e-05, - "loss": 0.6853, + "epoch": 0.17810630059838084, + "grad_norm": 2.054724931716919, + "learning_rate": 1.321052873163372e-05, + "loss": 0.7054, "step": 2530 }, { - "epoch": 0.19151753622640083, - "grad_norm": 2.094965696334839, - "learning_rate": 1.703241016011759e-05, - "loss": 0.6976, + "epoch": 0.17817669834565295, + "grad_norm": 2.1711981296539307, + "learning_rate": 1.3209792207197286e-05, + "loss": 0.8167, "step": 2531 }, { - "epoch": 0.19159320494873444, - "grad_norm": 2.3443291187286377, - "learning_rate": 1.7031394007471415e-05, - "loss": 0.6738, + "epoch": 0.17824709609292502, + "grad_norm": 2.4675133228302, + "learning_rate": 1.320905535990813e-05, + "loss": 0.7572, "step": 2532 }, { - "epoch": 0.19166887367106805, - "grad_norm": 2.716376304626465, - "learning_rate": 1.703037735187337e-05, - "loss": 0.8052, + "epoch": 0.1783174938401971, + "grad_norm": 2.1428332328796387, + "learning_rate": 1.3208318189804562e-05, + "loss": 0.8082, "step": 2533 }, { - "epoch": 0.1917445423934017, - "grad_norm": 2.7396631240844727, - "learning_rate": 1.7029360193387123e-05, - "loss": 0.6116, + "epoch": 0.1783878915874692, + "grad_norm": 2.0781171321868896, + "learning_rate": 1.3207580696924908e-05, + "loss": 0.8688, "step": 2534 }, { - "epoch": 0.1918202111157353, - "grad_norm": 2.562323570251465, - "learning_rate": 1.702834253207637e-05, - "loss": 0.7081, + "epoch": 0.17845828933474128, + "grad_norm": 2.3209595680236816, + "learning_rate": 1.3206842881307512e-05, + "loss": 0.7106, "step": 2535 }, { - "epoch": 0.19189587983806894, - "grad_norm": 2.4629499912261963, - "learning_rate": 1.702732436800484e-05, - "loss": 0.8838, + "epoch": 0.1785286870820134, + "grad_norm": 2.0255346298217773, + "learning_rate": 1.3206104742990736e-05, + "loss": 0.6834, "step": 2536 }, { - "epoch": 0.19197154856040255, - "grad_norm": 2.0351247787475586, - "learning_rate": 1.7026305701236294e-05, - "loss": 0.9576, + "epoch": 0.17859908482928546, + "grad_norm": 1.9358389377593994, + "learning_rate": 1.3205366282012951e-05, + "loss": 0.6892, "step": 2537 }, { - "epoch": 0.1920472172827362, - "grad_norm": 3.575366973876953, - "learning_rate": 1.7025286531834525e-05, - "loss": 0.6759, + "epoch": 0.17866948257655754, + "grad_norm": 2.129214286804199, + "learning_rate": 1.3204627498412554e-05, + "loss": 0.8404, "step": 2538 }, { - "epoch": 0.1921228860050698, - "grad_norm": 2.341073513031006, - "learning_rate": 1.7024266859863358e-05, - "loss": 0.7948, + "epoch": 0.17873988032382965, + "grad_norm": 2.8582489490509033, + "learning_rate": 1.3203888392227955e-05, + "loss": 0.7346, "step": 2539 }, { - "epoch": 0.19219855472740344, - "grad_norm": 2.288145065307617, - "learning_rate": 1.7023246685386646e-05, - "loss": 0.6851, + "epoch": 0.17881027807110172, + "grad_norm": 2.430223226547241, + "learning_rate": 1.3203148963497579e-05, + "loss": 0.6652, "step": 2540 }, { - "epoch": 0.19227422344973705, - "grad_norm": 2.1108577251434326, - "learning_rate": 1.7022226008468275e-05, - "loss": 0.783, + "epoch": 0.1788806758183738, + "grad_norm": 1.9675344228744507, + "learning_rate": 1.320240921225987e-05, + "loss": 0.7822, "step": 2541 }, { - "epoch": 0.19234989217207069, - "grad_norm": 2.4641237258911133, - "learning_rate": 1.7021204829172166e-05, - "loss": 0.833, + "epoch": 0.1789510735656459, + "grad_norm": 2.137423276901245, + "learning_rate": 1.3201669138553292e-05, + "loss": 0.6974, "step": 2542 }, { - "epoch": 0.1924255608944043, - "grad_norm": 3.356717109680176, - "learning_rate": 1.7020183147562267e-05, - "loss": 0.7958, + "epoch": 0.17902147131291798, + "grad_norm": 1.893980860710144, + "learning_rate": 1.3200928742416315e-05, + "loss": 0.7542, "step": 2543 }, { - "epoch": 0.19250122961673793, - "grad_norm": 2.6340181827545166, - "learning_rate": 1.7019160963702556e-05, - "loss": 0.7378, + "epoch": 0.17909186906019006, + "grad_norm": 2.354923963546753, + "learning_rate": 1.3200188023887439e-05, + "loss": 0.7338, "step": 2544 }, { - "epoch": 0.19257689833907154, - "grad_norm": 2.6588118076324463, - "learning_rate": 1.701813827765705e-05, - "loss": 0.9538, + "epoch": 0.17916226680746217, + "grad_norm": 1.849745512008667, + "learning_rate": 1.319944698300517e-05, + "loss": 0.7795, "step": 2545 }, { - "epoch": 0.19265256706140516, - "grad_norm": 2.6437458992004395, - "learning_rate": 1.7017115089489794e-05, - "loss": 0.739, + "epoch": 0.17923266455473424, + "grad_norm": 1.9921903610229492, + "learning_rate": 1.319870561980804e-05, + "loss": 0.7453, "step": 2546 }, { - "epoch": 0.1927282357837388, - "grad_norm": 2.546844005584717, - "learning_rate": 1.7016091399264856e-05, - "loss": 0.8209, + "epoch": 0.17930306230200635, + "grad_norm": 1.9598453044891357, + "learning_rate": 1.319796393433459e-05, + "loss": 0.7604, "step": 2547 }, { - "epoch": 0.1928039045060724, - "grad_norm": 2.78609299659729, - "learning_rate": 1.701506720704635e-05, - "loss": 0.9092, + "epoch": 0.17937346004927843, + "grad_norm": 2.1843385696411133, + "learning_rate": 1.3197221926623382e-05, + "loss": 0.7205, "step": 2548 }, { - "epoch": 0.19287957322840604, - "grad_norm": 2.1498682498931885, - "learning_rate": 1.7014042512898414e-05, - "loss": 0.8103, + "epoch": 0.1794438577965505, + "grad_norm": 2.0145952701568604, + "learning_rate": 1.3196479596712992e-05, + "loss": 0.6624, "step": 2549 }, { - "epoch": 0.19295524195073965, - "grad_norm": 2.6014087200164795, - "learning_rate": 1.701301731688521e-05, - "loss": 1.07, + "epoch": 0.1795142555438226, + "grad_norm": 2.1916537284851074, + "learning_rate": 1.319573694464202e-05, + "loss": 0.6388, "step": 2550 }, { - "epoch": 0.1930309106730733, - "grad_norm": 2.4372475147247314, - "learning_rate": 1.7011991619070948e-05, - "loss": 0.785, + "epoch": 0.1795846532910947, + "grad_norm": 2.240485906600952, + "learning_rate": 1.3194993970449069e-05, + "loss": 0.712, "step": 2551 }, { - "epoch": 0.1931065793954069, - "grad_norm": 2.3356244564056396, - "learning_rate": 1.7010965419519858e-05, - "loss": 0.689, + "epoch": 0.17965505103836676, + "grad_norm": 2.299838066101074, + "learning_rate": 1.3194250674172772e-05, + "loss": 0.8257, "step": 2552 }, { - "epoch": 0.19318224811774054, - "grad_norm": 2.393585443496704, - "learning_rate": 1.70099387182962e-05, - "loss": 0.7593, + "epoch": 0.17972544878563887, + "grad_norm": 2.335177183151245, + "learning_rate": 1.3193507055851774e-05, + "loss": 0.7383, "step": 2553 }, { - "epoch": 0.19325791684007415, - "grad_norm": 2.5867748260498047, - "learning_rate": 1.700891151546427e-05, - "loss": 0.7933, + "epoch": 0.17979584653291095, + "grad_norm": 2.0117366313934326, + "learning_rate": 1.3192763115524735e-05, + "loss": 0.773, "step": 2554 }, { - "epoch": 0.1933335855624078, - "grad_norm": 2.6387293338775635, - "learning_rate": 1.7007883811088403e-05, - "loss": 0.7924, + "epoch": 0.17986624428018302, + "grad_norm": 2.2581064701080322, + "learning_rate": 1.3192018853230331e-05, + "loss": 0.673, "step": 2555 }, { - "epoch": 0.1934092542847414, - "grad_norm": 1.7899304628372192, - "learning_rate": 1.7006855605232947e-05, - "loss": 0.8936, + "epoch": 0.17993664202745513, + "grad_norm": 2.201690435409546, + "learning_rate": 1.319127426900726e-05, + "loss": 0.7576, "step": 2556 }, { - "epoch": 0.19348492300707504, - "grad_norm": 3.2812225818634033, - "learning_rate": 1.7005826897962294e-05, - "loss": 0.7595, + "epoch": 0.1800070397747272, + "grad_norm": 2.33482027053833, + "learning_rate": 1.3190529362894234e-05, + "loss": 0.8834, "step": 2557 }, { - "epoch": 0.19356059172940865, - "grad_norm": 3.634249448776245, - "learning_rate": 1.7004797689340873e-05, - "loss": 0.8057, + "epoch": 0.18007743752199928, + "grad_norm": 2.5799663066864014, + "learning_rate": 1.3189784134929978e-05, + "loss": 0.781, "step": 2558 }, { - "epoch": 0.19363626045174226, - "grad_norm": 2.197214365005493, - "learning_rate": 1.7003767979433126e-05, - "loss": 0.7255, + "epoch": 0.1801478352692714, + "grad_norm": 2.4728870391845703, + "learning_rate": 1.3189038585153241e-05, + "loss": 0.7747, "step": 2559 }, { - "epoch": 0.1937119291740759, - "grad_norm": 2.3928468227386475, - "learning_rate": 1.7002737768303542e-05, - "loss": 1.0792, + "epoch": 0.18021823301654347, + "grad_norm": 2.7329437732696533, + "learning_rate": 1.3188292713602781e-05, + "loss": 0.627, "step": 2560 }, { - "epoch": 0.1937875978964095, - "grad_norm": 2.169796943664551, - "learning_rate": 1.7001707056016633e-05, - "loss": 0.8442, + "epoch": 0.18028863076381557, + "grad_norm": 1.8933359384536743, + "learning_rate": 1.3187546520317379e-05, + "loss": 0.6934, "step": 2561 }, { - "epoch": 0.19386326661874315, - "grad_norm": 2.0690131187438965, - "learning_rate": 1.7000675842636948e-05, - "loss": 0.7823, + "epoch": 0.18035902851108765, + "grad_norm": 2.2462096214294434, + "learning_rate": 1.3186800005335829e-05, + "loss": 0.7039, "step": 2562 }, { - "epoch": 0.19393893534107676, - "grad_norm": 2.256768226623535, - "learning_rate": 1.6999644128229065e-05, - "loss": 0.7462, + "epoch": 0.18042942625835973, + "grad_norm": 2.419424057006836, + "learning_rate": 1.3186053168696946e-05, + "loss": 0.808, "step": 2563 }, { - "epoch": 0.1940146040634104, - "grad_norm": 2.095914602279663, - "learning_rate": 1.6998611912857592e-05, - "loss": 0.8174, + "epoch": 0.18049982400563183, + "grad_norm": 2.152305841445923, + "learning_rate": 1.3185306010439553e-05, + "loss": 0.7456, "step": 2564 }, { - "epoch": 0.194090272785744, - "grad_norm": 3.0056588649749756, - "learning_rate": 1.6997579196587173e-05, - "loss": 0.8705, + "epoch": 0.1805702217529039, + "grad_norm": 1.9930144548416138, + "learning_rate": 1.3184558530602501e-05, + "loss": 0.6791, "step": 2565 }, { - "epoch": 0.19416594150807764, - "grad_norm": 2.5607128143310547, - "learning_rate": 1.6996545979482475e-05, - "loss": 0.7777, + "epoch": 0.180640619500176, + "grad_norm": 2.981900691986084, + "learning_rate": 1.318381072922465e-05, + "loss": 0.7138, "step": 2566 }, { - "epoch": 0.19424161023041125, - "grad_norm": 2.9612107276916504, - "learning_rate": 1.6995512261608202e-05, - "loss": 0.6993, + "epoch": 0.1807110172474481, + "grad_norm": 2.074373960494995, + "learning_rate": 1.3183062606344878e-05, + "loss": 0.79, "step": 2567 }, { - "epoch": 0.1943172789527449, - "grad_norm": 2.610933780670166, - "learning_rate": 1.6994478043029095e-05, - "loss": 0.7044, + "epoch": 0.18078141499472017, + "grad_norm": 1.9160135984420776, + "learning_rate": 1.318231416200208e-05, + "loss": 0.6882, "step": 2568 }, { - "epoch": 0.1943929476750785, - "grad_norm": 2.2328102588653564, - "learning_rate": 1.699344332380991e-05, - "loss": 0.6661, + "epoch": 0.18085181274199225, + "grad_norm": 1.7908340692520142, + "learning_rate": 1.3181565396235172e-05, + "loss": 0.7496, "step": 2569 }, { - "epoch": 0.19446861639741214, - "grad_norm": 2.128195285797119, - "learning_rate": 1.6992408104015458e-05, - "loss": 0.735, + "epoch": 0.18092221048926435, + "grad_norm": 2.3006176948547363, + "learning_rate": 1.318081630908308e-05, + "loss": 0.7614, "step": 2570 }, { - "epoch": 0.19454428511974575, - "grad_norm": 3.4304070472717285, - "learning_rate": 1.6991372383710555e-05, - "loss": 0.9446, + "epoch": 0.18099260823653643, + "grad_norm": 2.047255277633667, + "learning_rate": 1.3180066900584752e-05, + "loss": 0.7341, "step": 2571 }, { - "epoch": 0.19461995384207936, - "grad_norm": 2.2780416011810303, - "learning_rate": 1.6990336162960066e-05, - "loss": 0.8719, + "epoch": 0.1810630059838085, + "grad_norm": 3.022780656814575, + "learning_rate": 1.3179317170779146e-05, + "loss": 0.777, "step": 2572 }, { - "epoch": 0.194695622564413, - "grad_norm": 2.545768976211548, - "learning_rate": 1.6989299441828883e-05, - "loss": 0.7445, + "epoch": 0.1811334037310806, + "grad_norm": 2.029799461364746, + "learning_rate": 1.3178567119705247e-05, + "loss": 0.736, "step": 2573 }, { - "epoch": 0.1947712912867466, - "grad_norm": 2.4428822994232178, - "learning_rate": 1.698826222038193e-05, - "loss": 0.6788, + "epoch": 0.1812038014783527, + "grad_norm": 1.9279718399047852, + "learning_rate": 1.3177816747402043e-05, + "loss": 0.7287, "step": 2574 }, { - "epoch": 0.19484696000908025, - "grad_norm": 2.0839898586273193, - "learning_rate": 1.6987224498684157e-05, - "loss": 0.7712, + "epoch": 0.1812741992256248, + "grad_norm": 2.5501017570495605, + "learning_rate": 1.3177066053908556e-05, + "loss": 0.8234, "step": 2575 }, { - "epoch": 0.19492262873141386, - "grad_norm": 2.200305938720703, - "learning_rate": 1.6986186276800554e-05, - "loss": 0.6872, + "epoch": 0.18134459697289687, + "grad_norm": 2.23816180229187, + "learning_rate": 1.3176315039263808e-05, + "loss": 0.7087, "step": 2576 }, { - "epoch": 0.1949982974537475, - "grad_norm": 2.497018575668335, - "learning_rate": 1.6985147554796134e-05, - "loss": 0.7094, + "epoch": 0.18141499472016895, + "grad_norm": 2.1780097484588623, + "learning_rate": 1.3175563703506848e-05, + "loss": 0.8233, "step": 2577 }, { - "epoch": 0.1950739661760811, - "grad_norm": 2.3949403762817383, - "learning_rate": 1.698410833273595e-05, - "loss": 0.778, + "epoch": 0.18148539246744105, + "grad_norm": 1.982742428779602, + "learning_rate": 1.3174812046676739e-05, + "loss": 0.7138, "step": 2578 }, { - "epoch": 0.19514963489841475, - "grad_norm": 3.1228713989257812, - "learning_rate": 1.698306861068508e-05, - "loss": 0.8602, + "epoch": 0.18155579021471313, + "grad_norm": 2.1223337650299072, + "learning_rate": 1.3174060068812557e-05, + "loss": 0.7816, "step": 2579 }, { - "epoch": 0.19522530362074836, - "grad_norm": 2.2190120220184326, - "learning_rate": 1.6982028388708625e-05, - "loss": 0.8146, + "epoch": 0.1816261879619852, + "grad_norm": 2.0995934009552, + "learning_rate": 1.3173307769953404e-05, + "loss": 0.8094, "step": 2580 }, { - "epoch": 0.195300972343082, - "grad_norm": 1.9382598400115967, - "learning_rate": 1.698098766687174e-05, - "loss": 0.636, + "epoch": 0.1816965857092573, + "grad_norm": 2.844238519668579, + "learning_rate": 1.3172555150138387e-05, + "loss": 0.7193, "step": 2581 }, { - "epoch": 0.1953766410654156, - "grad_norm": 2.97119402885437, - "learning_rate": 1.6979946445239595e-05, - "loss": 0.732, + "epoch": 0.1817669834565294, + "grad_norm": 2.3782155513763428, + "learning_rate": 1.3171802209406638e-05, + "loss": 0.7014, "step": 2582 }, { - "epoch": 0.19545230978774925, - "grad_norm": 2.2818760871887207, - "learning_rate": 1.6978904723877394e-05, - "loss": 0.7439, + "epoch": 0.18183738120380147, + "grad_norm": 2.1788549423217773, + "learning_rate": 1.3171048947797302e-05, + "loss": 0.7811, "step": 2583 }, { - "epoch": 0.19552797851008286, - "grad_norm": 3.0975162982940674, - "learning_rate": 1.697786250285037e-05, - "loss": 0.8555, + "epoch": 0.18190777895107357, + "grad_norm": 2.029998302459717, + "learning_rate": 1.3170295365349545e-05, + "loss": 0.8022, "step": 2584 }, { - "epoch": 0.1956036472324165, - "grad_norm": 2.345454454421997, - "learning_rate": 1.6976819782223792e-05, - "loss": 0.7178, + "epoch": 0.18197817669834565, + "grad_norm": 2.033193349838257, + "learning_rate": 1.3169541462102542e-05, + "loss": 0.8016, "step": 2585 }, { - "epoch": 0.1956793159547501, - "grad_norm": 2.2025437355041504, - "learning_rate": 1.697577656206296e-05, - "loss": 0.8992, + "epoch": 0.18204857444561773, + "grad_norm": 1.821736216545105, + "learning_rate": 1.3168787238095489e-05, + "loss": 0.7834, "step": 2586 }, { - "epoch": 0.19575498467708372, - "grad_norm": 2.2825613021850586, - "learning_rate": 1.6974732842433202e-05, - "loss": 0.667, + "epoch": 0.18211897219288983, + "grad_norm": 2.1797523498535156, + "learning_rate": 1.3168032693367605e-05, + "loss": 0.7251, "step": 2587 }, { - "epoch": 0.19583065339941735, - "grad_norm": 2.3459088802337646, - "learning_rate": 1.697368862339988e-05, - "loss": 0.5786, + "epoch": 0.1821893699401619, + "grad_norm": 2.5700201988220215, + "learning_rate": 1.3167277827958111e-05, + "loss": 0.8391, "step": 2588 }, { - "epoch": 0.19590632212175096, - "grad_norm": 2.5378456115722656, - "learning_rate": 1.697264390502839e-05, - "loss": 0.7354, + "epoch": 0.18225976768743402, + "grad_norm": 2.2453713417053223, + "learning_rate": 1.3166522641906259e-05, + "loss": 0.7971, "step": 2589 }, { - "epoch": 0.1959819908440846, - "grad_norm": 2.5877671241760254, - "learning_rate": 1.697159868738415e-05, - "loss": 0.7067, + "epoch": 0.1823301654347061, + "grad_norm": 2.3246560096740723, + "learning_rate": 1.316576713525131e-05, + "loss": 0.7793, "step": 2590 }, { - "epoch": 0.1960576595664182, - "grad_norm": 2.3794543743133545, - "learning_rate": 1.6970552970532616e-05, - "loss": 0.8205, + "epoch": 0.18240056318197817, + "grad_norm": 2.135981798171997, + "learning_rate": 1.3165011308032544e-05, + "loss": 0.783, "step": 2591 }, { - "epoch": 0.19613332828875185, - "grad_norm": 2.144336462020874, - "learning_rate": 1.6969506754539278e-05, - "loss": 0.882, + "epoch": 0.18247096092925028, + "grad_norm": 2.7227280139923096, + "learning_rate": 1.3164255160289256e-05, + "loss": 0.6865, "step": 2592 }, { - "epoch": 0.19620899701108546, - "grad_norm": 2.0169589519500732, - "learning_rate": 1.6968460039469644e-05, - "loss": 0.8049, + "epoch": 0.18254135867652235, + "grad_norm": 5.095438003540039, + "learning_rate": 1.3163498692060761e-05, + "loss": 0.7009, "step": 2593 }, { - "epoch": 0.1962846657334191, - "grad_norm": 1.9170702695846558, - "learning_rate": 1.6967412825389272e-05, - "loss": 0.6913, + "epoch": 0.18261175642379443, + "grad_norm": 1.8484550714492798, + "learning_rate": 1.3162741903386387e-05, + "loss": 0.6173, "step": 2594 }, { - "epoch": 0.1963603344557527, - "grad_norm": 2.5606637001037598, - "learning_rate": 1.6966365112363743e-05, - "loss": 0.7495, + "epoch": 0.18268215417106654, + "grad_norm": 2.05271053314209, + "learning_rate": 1.3161984794305478e-05, + "loss": 0.7753, "step": 2595 }, { - "epoch": 0.19643600317808635, - "grad_norm": 2.407437801361084, - "learning_rate": 1.696531690045866e-05, - "loss": 0.8785, + "epoch": 0.1827525519183386, + "grad_norm": 2.1424920558929443, + "learning_rate": 1.3161227364857402e-05, + "loss": 0.7265, "step": 2596 }, { - "epoch": 0.19651167190041996, - "grad_norm": 2.435490369796753, - "learning_rate": 1.696426818973967e-05, - "loss": 0.7802, + "epoch": 0.1828229496656107, + "grad_norm": 1.983299732208252, + "learning_rate": 1.3160469615081532e-05, + "loss": 0.7524, "step": 2597 }, { - "epoch": 0.1965873406227536, - "grad_norm": 2.0081589221954346, - "learning_rate": 1.696321898027245e-05, - "loss": 0.6976, + "epoch": 0.1828933474128828, + "grad_norm": 2.0714409351348877, + "learning_rate": 1.3159711545017269e-05, + "loss": 0.904, "step": 2598 }, { - "epoch": 0.1966630093450872, - "grad_norm": 2.957495927810669, - "learning_rate": 1.6962169272122697e-05, - "loss": 0.7115, + "epoch": 0.18296374516015487, + "grad_norm": 2.088263988494873, + "learning_rate": 1.3158953154704024e-05, + "loss": 0.731, "step": 2599 }, { - "epoch": 0.19673867806742082, - "grad_norm": 2.271768808364868, - "learning_rate": 1.6961119065356155e-05, - "loss": 0.8056, + "epoch": 0.18303414290742695, + "grad_norm": 3.235067129135132, + "learning_rate": 1.3158194444181227e-05, + "loss": 0.6709, "step": 2600 }, { - "epoch": 0.19681434678975446, - "grad_norm": 1.9800879955291748, - "learning_rate": 1.6960068360038584e-05, - "loss": 0.6092, + "epoch": 0.18310454065469906, + "grad_norm": 2.321969509124756, + "learning_rate": 1.3157435413488323e-05, + "loss": 0.8552, "step": 2601 }, { - "epoch": 0.19689001551208807, - "grad_norm": 2.3020243644714355, - "learning_rate": 1.695901715623579e-05, - "loss": 0.8472, + "epoch": 0.18317493840197113, + "grad_norm": 2.177772283554077, + "learning_rate": 1.3156676062664776e-05, + "loss": 0.7059, "step": 2602 }, { - "epoch": 0.1969656842344217, - "grad_norm": 2.3017523288726807, - "learning_rate": 1.6957965454013597e-05, - "loss": 0.8187, + "epoch": 0.18324533614924324, + "grad_norm": 2.129748582839966, + "learning_rate": 1.3155916391750064e-05, + "loss": 0.834, "step": 2603 }, { - "epoch": 0.19704135295675532, - "grad_norm": 2.2522785663604736, - "learning_rate": 1.6956913253437868e-05, - "loss": 0.7279, + "epoch": 0.18331573389651531, + "grad_norm": 2.4029152393341064, + "learning_rate": 1.3155156400783683e-05, + "loss": 0.7735, "step": 2604 }, { - "epoch": 0.19711702167908896, - "grad_norm": 2.6222681999206543, - "learning_rate": 1.6955860554574495e-05, - "loss": 0.8215, + "epoch": 0.1833861316437874, + "grad_norm": 2.11425518989563, + "learning_rate": 1.3154396089805147e-05, + "loss": 0.9012, "step": 2605 }, { - "epoch": 0.19719269040142257, - "grad_norm": 2.9026076793670654, - "learning_rate": 1.6954807357489407e-05, - "loss": 0.6979, + "epoch": 0.1834565293910595, + "grad_norm": 1.973497748374939, + "learning_rate": 1.3153635458853986e-05, + "loss": 0.7238, "step": 2606 }, { - "epoch": 0.1972683591237562, - "grad_norm": 3.418788194656372, - "learning_rate": 1.6953753662248547e-05, - "loss": 0.7305, + "epoch": 0.18352692713833157, + "grad_norm": 1.997349739074707, + "learning_rate": 1.3152874507969744e-05, + "loss": 0.7601, "step": 2607 }, { - "epoch": 0.19734402784608981, - "grad_norm": 2.4104363918304443, - "learning_rate": 1.695269946891791e-05, - "loss": 0.7632, + "epoch": 0.18359732488560365, + "grad_norm": 3.2346019744873047, + "learning_rate": 1.3152113237191983e-05, + "loss": 0.7174, "step": 2608 }, { - "epoch": 0.19741969656842345, - "grad_norm": 2.85041880607605, - "learning_rate": 1.695164477756351e-05, - "loss": 0.7216, + "epoch": 0.18366772263287576, + "grad_norm": 2.3054420948028564, + "learning_rate": 1.3151351646560284e-05, + "loss": 0.9005, "step": 2609 }, { - "epoch": 0.19749536529075706, - "grad_norm": 2.079584836959839, - "learning_rate": 1.695058958825139e-05, - "loss": 0.7418, + "epoch": 0.18373812038014783, + "grad_norm": 2.045632839202881, + "learning_rate": 1.3150589736114241e-05, + "loss": 0.7871, "step": 2610 }, { - "epoch": 0.1975710340130907, - "grad_norm": 2.3523175716400146, - "learning_rate": 1.6949533901047643e-05, - "loss": 0.7151, + "epoch": 0.1838085181274199, + "grad_norm": 2.4561915397644043, + "learning_rate": 1.3149827505893466e-05, + "loss": 0.9326, "step": 2611 }, { - "epoch": 0.1976467027354243, - "grad_norm": 2.4954283237457275, - "learning_rate": 1.6948477716018366e-05, - "loss": 0.8416, + "epoch": 0.18387891587469202, + "grad_norm": 1.83491849899292, + "learning_rate": 1.3149064955937592e-05, + "loss": 0.6787, "step": 2612 }, { - "epoch": 0.19772237145775792, - "grad_norm": 2.920403480529785, - "learning_rate": 1.6947421033229706e-05, - "loss": 0.7814, + "epoch": 0.1839493136219641, + "grad_norm": 2.106222152709961, + "learning_rate": 1.3148302086286262e-05, + "loss": 0.7202, "step": 2613 }, { - "epoch": 0.19779804018009156, - "grad_norm": 2.6211936473846436, - "learning_rate": 1.6946363852747838e-05, - "loss": 0.7247, + "epoch": 0.18401971136923617, + "grad_norm": 2.0969901084899902, + "learning_rate": 1.3147538896979137e-05, + "loss": 0.8073, "step": 2614 }, { - "epoch": 0.19787370890242517, - "grad_norm": 3.040184259414673, - "learning_rate": 1.694530617463896e-05, - "loss": 0.8646, + "epoch": 0.18409010911650828, + "grad_norm": 1.9219626188278198, + "learning_rate": 1.3146775388055898e-05, + "loss": 0.7487, "step": 2615 }, { - "epoch": 0.1979493776247588, - "grad_norm": 2.736969232559204, - "learning_rate": 1.6944247998969318e-05, - "loss": 0.6909, + "epoch": 0.18416050686378035, + "grad_norm": 2.249863386154175, + "learning_rate": 1.3146011559556239e-05, + "loss": 0.7276, "step": 2616 }, { - "epoch": 0.19802504634709242, - "grad_norm": 2.3474671840667725, - "learning_rate": 1.694318932580517e-05, - "loss": 0.7216, + "epoch": 0.18423090461105246, + "grad_norm": 2.390856981277466, + "learning_rate": 1.3145247411519872e-05, + "loss": 0.8626, "step": 2617 }, { - "epoch": 0.19810071506942606, - "grad_norm": 2.3557677268981934, - "learning_rate": 1.6942130155212808e-05, - "loss": 0.7854, + "epoch": 0.18430130235832454, + "grad_norm": 2.309621810913086, + "learning_rate": 1.3144482943986527e-05, + "loss": 0.712, "step": 2618 }, { - "epoch": 0.19817638379175967, - "grad_norm": 3.080761432647705, - "learning_rate": 1.6941070487258573e-05, - "loss": 0.7452, + "epoch": 0.18437170010559661, + "grad_norm": 1.9140723943710327, + "learning_rate": 1.3143718156995951e-05, + "loss": 0.8127, "step": 2619 }, { - "epoch": 0.1982520525140933, - "grad_norm": 2.3704681396484375, - "learning_rate": 1.694001032200882e-05, - "loss": 0.6745, + "epoch": 0.18444209785286872, + "grad_norm": 2.037682294845581, + "learning_rate": 1.3142953050587903e-05, + "loss": 0.7239, "step": 2620 }, { - "epoch": 0.19832772123642692, - "grad_norm": 2.427135467529297, - "learning_rate": 1.6938949659529935e-05, - "loss": 0.8975, + "epoch": 0.1845124956001408, + "grad_norm": 2.2149691581726074, + "learning_rate": 1.3142187624802163e-05, + "loss": 0.8076, "step": 2621 }, { - "epoch": 0.19840338995876056, - "grad_norm": 2.0975005626678467, - "learning_rate": 1.693788849988835e-05, - "loss": 0.619, + "epoch": 0.18458289334741287, + "grad_norm": 1.9748437404632568, + "learning_rate": 1.3141421879678524e-05, + "loss": 0.7257, "step": 2622 }, { - "epoch": 0.19847905868109417, - "grad_norm": 2.5328569412231445, - "learning_rate": 1.6936826843150512e-05, - "loss": 0.7884, + "epoch": 0.18465329109468498, + "grad_norm": 2.719993829727173, + "learning_rate": 1.3140655815256799e-05, + "loss": 0.8001, "step": 2623 }, { - "epoch": 0.1985547274034278, - "grad_norm": 5.670637130737305, - "learning_rate": 1.6935764689382904e-05, - "loss": 0.8431, + "epoch": 0.18472368884195706, + "grad_norm": 2.2795767784118652, + "learning_rate": 1.313988943157682e-05, + "loss": 0.7066, "step": 2624 }, { - "epoch": 0.19863039612576142, - "grad_norm": 2.570315361022949, - "learning_rate": 1.6934702038652046e-05, - "loss": 0.8699, + "epoch": 0.18479408658922913, + "grad_norm": 1.9006603956222534, + "learning_rate": 1.3139122728678427e-05, + "loss": 0.7645, "step": 2625 }, { - "epoch": 0.19870606484809503, - "grad_norm": 2.185997247695923, - "learning_rate": 1.693363889102448e-05, - "loss": 0.8449, + "epoch": 0.18486448433650124, + "grad_norm": 2.548281669616699, + "learning_rate": 1.3138355706601484e-05, + "loss": 0.8719, "step": 2626 }, { - "epoch": 0.19878173357042866, - "grad_norm": 2.4839537143707275, - "learning_rate": 1.6932575246566788e-05, - "loss": 0.8201, + "epoch": 0.18493488208377332, + "grad_norm": 2.221097469329834, + "learning_rate": 1.3137588365385866e-05, + "loss": 0.7694, "step": 2627 }, { - "epoch": 0.19885740229276228, - "grad_norm": 2.1971192359924316, - "learning_rate": 1.6931511105345575e-05, - "loss": 0.7496, + "epoch": 0.1850052798310454, + "grad_norm": 2.1171281337738037, + "learning_rate": 1.3136820705071473e-05, + "loss": 0.8129, "step": 2628 }, { - "epoch": 0.1989330710150959, - "grad_norm": 2.9695796966552734, - "learning_rate": 1.6930446467427484e-05, - "loss": 0.7093, + "epoch": 0.1850756775783175, + "grad_norm": 1.8406339883804321, + "learning_rate": 1.3136052725698213e-05, + "loss": 0.7661, "step": 2629 }, { - "epoch": 0.19900873973742952, - "grad_norm": 2.3504600524902344, - "learning_rate": 1.6929381332879187e-05, - "loss": 0.7825, + "epoch": 0.18514607532558958, + "grad_norm": 1.972618818283081, + "learning_rate": 1.3135284427306015e-05, + "loss": 0.827, "step": 2630 }, { - "epoch": 0.19908440845976316, - "grad_norm": 2.4642410278320312, - "learning_rate": 1.6928315701767382e-05, - "loss": 0.8154, + "epoch": 0.18521647307286168, + "grad_norm": 2.441446542739868, + "learning_rate": 1.3134515809934822e-05, + "loss": 0.6671, "step": 2631 }, { - "epoch": 0.19916007718209677, - "grad_norm": 2.417527675628662, - "learning_rate": 1.6927249574158803e-05, - "loss": 0.7947, + "epoch": 0.18528687082013376, + "grad_norm": 2.2793939113616943, + "learning_rate": 1.3133746873624598e-05, + "loss": 0.7872, "step": 2632 }, { - "epoch": 0.1992357459044304, - "grad_norm": 2.4972589015960693, - "learning_rate": 1.692618295012022e-05, - "loss": 0.714, + "epoch": 0.18535726856740584, + "grad_norm": 2.352151393890381, + "learning_rate": 1.3132977618415317e-05, + "loss": 0.8278, "step": 2633 }, { - "epoch": 0.19931141462676402, - "grad_norm": 2.6074717044830322, - "learning_rate": 1.6925115829718424e-05, - "loss": 0.7362, + "epoch": 0.18542766631467794, + "grad_norm": 2.3360612392425537, + "learning_rate": 1.3132208044346977e-05, + "loss": 0.7176, "step": 2634 }, { - "epoch": 0.19938708334909766, - "grad_norm": 2.2612643241882324, - "learning_rate": 1.692404821302024e-05, - "loss": 0.8082, + "epoch": 0.18549806406195002, + "grad_norm": 2.3379364013671875, + "learning_rate": 1.3131438151459588e-05, + "loss": 0.7513, "step": 2635 }, { - "epoch": 0.19946275207143127, - "grad_norm": 3.058591842651367, - "learning_rate": 1.6922980100092524e-05, - "loss": 0.729, + "epoch": 0.1855684618092221, + "grad_norm": 2.258195400238037, + "learning_rate": 1.3130667939793175e-05, + "loss": 0.7619, "step": 2636 }, { - "epoch": 0.1995384207937649, - "grad_norm": 2.8211417198181152, - "learning_rate": 1.6921911491002175e-05, - "loss": 0.6836, + "epoch": 0.1856388595564942, + "grad_norm": 2.1863763332366943, + "learning_rate": 1.3129897409387782e-05, + "loss": 0.7743, "step": 2637 }, { - "epoch": 0.19961408951609852, - "grad_norm": 2.3648903369903564, - "learning_rate": 1.69208423858161e-05, - "loss": 0.7884, + "epoch": 0.18570925730376628, + "grad_norm": 2.183056354522705, + "learning_rate": 1.3129126560283472e-05, + "loss": 0.8822, "step": 2638 }, { - "epoch": 0.19968975823843216, - "grad_norm": 2.64884877204895, - "learning_rate": 1.691977278460126e-05, - "loss": 0.8841, + "epoch": 0.18577965505103836, + "grad_norm": 2.192319631576538, + "learning_rate": 1.3128355392520324e-05, + "loss": 0.8645, "step": 2639 }, { - "epoch": 0.19976542696076577, - "grad_norm": 2.9867165088653564, - "learning_rate": 1.6918702687424628e-05, - "loss": 0.7115, + "epoch": 0.18585005279831046, + "grad_norm": 2.2728517055511475, + "learning_rate": 1.3127583906138427e-05, + "loss": 0.8398, "step": 2640 }, { - "epoch": 0.19984109568309938, - "grad_norm": 2.427945137023926, - "learning_rate": 1.6917632094353226e-05, - "loss": 1.0163, + "epoch": 0.18592045054558254, + "grad_norm": 2.1914913654327393, + "learning_rate": 1.3126812101177893e-05, + "loss": 0.6859, "step": 2641 }, { - "epoch": 0.19991676440543302, - "grad_norm": 2.252856969833374, - "learning_rate": 1.6916561005454093e-05, - "loss": 0.8115, + "epoch": 0.18599084829285462, + "grad_norm": 2.056480884552002, + "learning_rate": 1.312603997767885e-05, + "loss": 0.9418, "step": 2642 }, { - "epoch": 0.19999243312776663, - "grad_norm": 2.524324893951416, - "learning_rate": 1.6915489420794304e-05, - "loss": 0.6516, + "epoch": 0.18606124604012672, + "grad_norm": 2.1114909648895264, + "learning_rate": 1.312526753568144e-05, + "loss": 0.8326, "step": 2643 }, { - "epoch": 0.20006810185010027, - "grad_norm": 2.3426573276519775, - "learning_rate": 1.691441734044096e-05, - "loss": 0.7816, + "epoch": 0.1861316437873988, + "grad_norm": 2.1123087406158447, + "learning_rate": 1.3124494775225822e-05, + "loss": 0.7574, "step": 2644 }, { - "epoch": 0.20014377057243388, - "grad_norm": 2.6278955936431885, - "learning_rate": 1.691334476446121e-05, - "loss": 0.6821, + "epoch": 0.1862020415346709, + "grad_norm": 2.235158920288086, + "learning_rate": 1.3123721696352178e-05, + "loss": 0.7829, "step": 2645 }, { - "epoch": 0.20021943929476752, - "grad_norm": 2.5441291332244873, - "learning_rate": 1.6912271692922216e-05, - "loss": 0.8005, + "epoch": 0.18627243928194298, + "grad_norm": 2.049412727355957, + "learning_rate": 1.3122948299100692e-05, + "loss": 0.8151, "step": 2646 }, { - "epoch": 0.20029510801710113, - "grad_norm": 3.0294198989868164, - "learning_rate": 1.691119812589118e-05, - "loss": 0.7918, + "epoch": 0.18634283702921506, + "grad_norm": 1.8408353328704834, + "learning_rate": 1.3122174583511582e-05, + "loss": 0.7386, "step": 2647 }, { - "epoch": 0.20037077673943476, - "grad_norm": 2.6301164627075195, - "learning_rate": 1.6910124063435322e-05, - "loss": 0.7959, + "epoch": 0.18641323477648716, + "grad_norm": 1.750189185142517, + "learning_rate": 1.312140054962507e-05, + "loss": 0.8086, "step": 2648 }, { - "epoch": 0.20044644546176837, - "grad_norm": 6.36815071105957, - "learning_rate": 1.6909049505621912e-05, - "loss": 0.8501, + "epoch": 0.18648363252375924, + "grad_norm": 1.9384422302246094, + "learning_rate": 1.3120626197481399e-05, + "loss": 0.7143, "step": 2649 }, { - "epoch": 0.200522114184102, - "grad_norm": 11.723066329956055, - "learning_rate": 1.6907974452518245e-05, - "loss": 0.6644, + "epoch": 0.18655403027103132, + "grad_norm": 2.1857073307037354, + "learning_rate": 1.3119851527120828e-05, + "loss": 0.7816, "step": 2650 }, { - "epoch": 0.20059778290643562, - "grad_norm": 30.52318572998047, - "learning_rate": 1.690689890419164e-05, - "loss": 0.8288, + "epoch": 0.18662442801830342, + "grad_norm": 2.556408405303955, + "learning_rate": 1.3119076538583635e-05, + "loss": 0.8768, "step": 2651 }, { - "epoch": 0.20067345162876926, - "grad_norm": 2.54634165763855, - "learning_rate": 1.6905822860709446e-05, - "loss": 0.807, + "epoch": 0.1866948257655755, + "grad_norm": 2.268882989883423, + "learning_rate": 1.3118301231910112e-05, + "loss": 0.7445, "step": 2652 }, { - "epoch": 0.20074912035110287, - "grad_norm": 2.408019542694092, - "learning_rate": 1.690474632213906e-05, - "loss": 0.9354, + "epoch": 0.18676522351284758, + "grad_norm": 2.4905362129211426, + "learning_rate": 1.3117525607140565e-05, + "loss": 0.6893, "step": 2653 }, { - "epoch": 0.20082478907343648, - "grad_norm": 3.3913652896881104, - "learning_rate": 1.690366928854789e-05, - "loss": 0.8307, + "epoch": 0.18683562126011968, + "grad_norm": 2.2609946727752686, + "learning_rate": 1.3116749664315323e-05, + "loss": 0.758, "step": 2654 }, { - "epoch": 0.20090045779577012, - "grad_norm": 3.0079503059387207, - "learning_rate": 1.6902591760003387e-05, - "loss": 0.6633, + "epoch": 0.18690601900739176, + "grad_norm": 2.7874274253845215, + "learning_rate": 1.3115973403474724e-05, + "loss": 0.6835, "step": 2655 }, { - "epoch": 0.20097612651810373, - "grad_norm": 2.45389461517334, - "learning_rate": 1.6901513736573027e-05, - "loss": 0.7959, + "epoch": 0.18697641675466384, + "grad_norm": 2.0392112731933594, + "learning_rate": 1.3115196824659131e-05, + "loss": 0.8259, "step": 2656 }, { - "epoch": 0.20105179524043737, - "grad_norm": 2.300036907196045, - "learning_rate": 1.690043521832432e-05, - "loss": 0.7693, + "epoch": 0.18704681450193594, + "grad_norm": 2.1472535133361816, + "learning_rate": 1.3114419927908916e-05, + "loss": 0.8148, "step": 2657 }, { - "epoch": 0.20112746396277098, - "grad_norm": 2.469834566116333, - "learning_rate": 1.6899356205324807e-05, - "loss": 0.8985, + "epoch": 0.18711721224920802, + "grad_norm": 1.9952208995819092, + "learning_rate": 1.3113642713264472e-05, + "loss": 0.7473, "step": 2658 }, { - "epoch": 0.20120313268510462, - "grad_norm": 2.4078941345214844, - "learning_rate": 1.6898276697642056e-05, - "loss": 0.8893, + "epoch": 0.18718760999648013, + "grad_norm": 1.7890840768814087, + "learning_rate": 1.3112865180766206e-05, + "loss": 0.7515, "step": 2659 }, { - "epoch": 0.20127880140743823, - "grad_norm": 2.462860584259033, - "learning_rate": 1.6897196695343672e-05, - "loss": 0.8923, + "epoch": 0.1872580077437522, + "grad_norm": 2.200063467025757, + "learning_rate": 1.3112087330454543e-05, + "loss": 0.7176, "step": 2660 }, { - "epoch": 0.20135447012977187, - "grad_norm": 2.2678468227386475, - "learning_rate": 1.6896116198497295e-05, - "loss": 0.8364, + "epoch": 0.18732840549102428, + "grad_norm": 1.8022172451019287, + "learning_rate": 1.3111309162369923e-05, + "loss": 0.6825, "step": 2661 }, { - "epoch": 0.20143013885210548, - "grad_norm": 2.6497673988342285, - "learning_rate": 1.6895035207170577e-05, - "loss": 0.7843, + "epoch": 0.18739880323829639, + "grad_norm": 1.8918417692184448, + "learning_rate": 1.3110530676552808e-05, + "loss": 0.7896, "step": 2662 }, { - "epoch": 0.20150580757443912, - "grad_norm": 2.344269037246704, - "learning_rate": 1.6893953721431218e-05, - "loss": 0.7275, + "epoch": 0.18746920098556846, + "grad_norm": 2.0192344188690186, + "learning_rate": 1.3109751873043668e-05, + "loss": 0.7555, "step": 2663 }, { - "epoch": 0.20158147629677273, - "grad_norm": 2.5566043853759766, - "learning_rate": 1.689287174134695e-05, - "loss": 0.856, + "epoch": 0.18753959873284054, + "grad_norm": 2.473616361618042, + "learning_rate": 1.3108972751882991e-05, + "loss": 0.6899, "step": 2664 }, { - "epoch": 0.20165714501910637, - "grad_norm": 2.8160223960876465, - "learning_rate": 1.689178926698552e-05, - "loss": 0.7982, + "epoch": 0.18760999648011265, + "grad_norm": 2.1776304244995117, + "learning_rate": 1.3108193313111292e-05, + "loss": 0.7622, "step": 2665 }, { - "epoch": 0.20173281374143998, - "grad_norm": 5.079640865325928, - "learning_rate": 1.6890706298414722e-05, - "loss": 0.8701, + "epoch": 0.18768039422738472, + "grad_norm": 2.513071060180664, + "learning_rate": 1.310741355676909e-05, + "loss": 0.5605, "step": 2666 }, { - "epoch": 0.2018084824637736, - "grad_norm": 2.1939332485198975, - "learning_rate": 1.6889622835702372e-05, - "loss": 0.6718, + "epoch": 0.1877507919746568, + "grad_norm": 1.8833409547805786, + "learning_rate": 1.3106633482896925e-05, + "loss": 0.7278, "step": 2667 }, { - "epoch": 0.20188415118610722, - "grad_norm": 2.220892906188965, - "learning_rate": 1.6888538878916328e-05, - "loss": 0.6956, + "epoch": 0.1878211897219289, + "grad_norm": 1.8820186853408813, + "learning_rate": 1.3105853091535358e-05, + "loss": 0.7216, "step": 2668 }, { - "epoch": 0.20195981990844084, - "grad_norm": 2.5196173191070557, - "learning_rate": 1.688745442812446e-05, - "loss": 0.864, + "epoch": 0.18789158746920098, + "grad_norm": 1.8471460342407227, + "learning_rate": 1.3105072382724955e-05, + "loss": 0.7329, "step": 2669 }, { - "epoch": 0.20203548863077447, - "grad_norm": 2.502357244491577, - "learning_rate": 1.6886369483394683e-05, - "loss": 0.7481, + "epoch": 0.18796198521647306, + "grad_norm": 1.9577990770339966, + "learning_rate": 1.3104291356506311e-05, + "loss": 0.7121, "step": 2670 }, { - "epoch": 0.20211115735310808, - "grad_norm": 3.1497068405151367, - "learning_rate": 1.6885284044794946e-05, - "loss": 0.7413, + "epoch": 0.18803238296374516, + "grad_norm": 2.065424680709839, + "learning_rate": 1.310351001292003e-05, + "loss": 0.7207, "step": 2671 }, { - "epoch": 0.20218682607544172, - "grad_norm": 2.358307361602783, - "learning_rate": 1.6884198112393216e-05, - "loss": 0.8536, + "epoch": 0.18810278071101724, + "grad_norm": 2.22249436378479, + "learning_rate": 1.310272835200674e-05, + "loss": 0.7717, "step": 2672 }, { - "epoch": 0.20226249479777533, - "grad_norm": 3.1044929027557373, - "learning_rate": 1.68831116862575e-05, - "loss": 0.8081, + "epoch": 0.18817317845828935, + "grad_norm": 2.4484732151031494, + "learning_rate": 1.3101946373807071e-05, + "loss": 0.7987, "step": 2673 }, { - "epoch": 0.20233816352010897, - "grad_norm": 2.201646327972412, - "learning_rate": 1.6882024766455832e-05, - "loss": 0.9349, + "epoch": 0.18824357620556142, + "grad_norm": 1.94876229763031, + "learning_rate": 1.3101164078361687e-05, + "loss": 0.7456, "step": 2674 }, { - "epoch": 0.20241383224244258, - "grad_norm": 2.6423752307891846, - "learning_rate": 1.6880937353056283e-05, - "loss": 0.7464, + "epoch": 0.1883139739528335, + "grad_norm": 2.24408221244812, + "learning_rate": 1.3100381465711256e-05, + "loss": 0.7804, "step": 2675 }, { - "epoch": 0.20248950096477622, - "grad_norm": 2.547576427459717, - "learning_rate": 1.6879849446126942e-05, - "loss": 0.6216, + "epoch": 0.1883843717001056, + "grad_norm": 2.2092385292053223, + "learning_rate": 1.3099598535896467e-05, + "loss": 0.71, "step": 2676 }, { - "epoch": 0.20256516968710983, - "grad_norm": 2.7774250507354736, - "learning_rate": 1.6878761045735946e-05, - "loss": 0.828, + "epoch": 0.18845476944737768, + "grad_norm": 2.3538308143615723, + "learning_rate": 1.3098815288958028e-05, + "loss": 0.8294, "step": 2677 }, { - "epoch": 0.20264083840944347, - "grad_norm": 2.6742935180664062, - "learning_rate": 1.6877672151951446e-05, - "loss": 0.7657, + "epoch": 0.18852516719464976, + "grad_norm": 2.5669524669647217, + "learning_rate": 1.3098031724936657e-05, + "loss": 0.7396, "step": 2678 }, { - "epoch": 0.20271650713177708, - "grad_norm": 2.081855535507202, - "learning_rate": 1.687658276484164e-05, - "loss": 0.7594, + "epoch": 0.18859556494192187, + "grad_norm": 2.335832357406616, + "learning_rate": 1.3097247843873093e-05, + "loss": 0.765, "step": 2679 }, { - "epoch": 0.2027921758541107, - "grad_norm": 2.9770843982696533, - "learning_rate": 1.6875492884474744e-05, - "loss": 0.777, + "epoch": 0.18866596268919394, + "grad_norm": 2.497159481048584, + "learning_rate": 1.3096463645808093e-05, + "loss": 0.7495, "step": 2680 }, { - "epoch": 0.20286784457644433, - "grad_norm": 2.1680080890655518, - "learning_rate": 1.6874402510919013e-05, - "loss": 0.8131, + "epoch": 0.18873636043646602, + "grad_norm": 2.2877869606018066, + "learning_rate": 1.3095679130782427e-05, + "loss": 0.6619, "step": 2681 }, { - "epoch": 0.20294351329877794, - "grad_norm": 2.5214853286743164, - "learning_rate": 1.6873311644242726e-05, - "loss": 0.8729, + "epoch": 0.18880675818373813, + "grad_norm": 1.9491753578186035, + "learning_rate": 1.309489429883688e-05, + "loss": 0.6713, "step": 2682 }, { - "epoch": 0.20301918202111158, - "grad_norm": 2.355656862258911, - "learning_rate": 1.68722202845142e-05, - "loss": 0.7149, + "epoch": 0.1888771559310102, + "grad_norm": 2.1020352840423584, + "learning_rate": 1.3094109150012263e-05, + "loss": 0.7333, "step": 2683 }, { - "epoch": 0.2030948507434452, - "grad_norm": 2.722672939300537, - "learning_rate": 1.6871128431801776e-05, - "loss": 0.7906, + "epoch": 0.18894755367828228, + "grad_norm": 2.605492353439331, + "learning_rate": 1.309332368434939e-05, + "loss": 0.7404, "step": 2684 }, { - "epoch": 0.20317051946577883, - "grad_norm": 2.526291608810425, - "learning_rate": 1.6870036086173833e-05, - "loss": 0.9885, + "epoch": 0.1890179514255544, + "grad_norm": 2.2921059131622314, + "learning_rate": 1.30925379018891e-05, + "loss": 0.7879, "step": 2685 }, { - "epoch": 0.20324618818811244, - "grad_norm": 2.7537612915039062, - "learning_rate": 1.686894324769877e-05, - "loss": 0.7336, + "epoch": 0.18908834917282646, + "grad_norm": 2.1571006774902344, + "learning_rate": 1.3091751802672246e-05, + "loss": 0.7287, "step": 2686 }, { - "epoch": 0.20332185691044607, - "grad_norm": 2.314716100692749, - "learning_rate": 1.686784991644504e-05, - "loss": 0.6739, + "epoch": 0.18915874692009857, + "grad_norm": 2.3039205074310303, + "learning_rate": 1.30909653867397e-05, + "loss": 0.7421, "step": 2687 }, { - "epoch": 0.20339752563277969, - "grad_norm": 2.298309087753296, - "learning_rate": 1.6866756092481092e-05, - "loss": 0.655, + "epoch": 0.18922914466737065, + "grad_norm": 2.1782045364379883, + "learning_rate": 1.3090178654132346e-05, + "loss": 0.7328, "step": 2688 }, { - "epoch": 0.20347319435511332, - "grad_norm": 2.149913787841797, - "learning_rate": 1.6865661775875437e-05, - "loss": 0.688, + "epoch": 0.18929954241464272, + "grad_norm": 1.9497147798538208, + "learning_rate": 1.3089391604891089e-05, + "loss": 0.6523, "step": 2689 }, { - "epoch": 0.20354886307744693, - "grad_norm": 2.236656904220581, - "learning_rate": 1.68645669666966e-05, - "loss": 0.8814, + "epoch": 0.18936994016191483, + "grad_norm": 2.2910635471343994, + "learning_rate": 1.3088604239056848e-05, + "loss": 0.6109, "step": 2690 }, { - "epoch": 0.20362453179978057, - "grad_norm": 2.557054042816162, - "learning_rate": 1.686347166501314e-05, - "loss": 0.8819, + "epoch": 0.1894403379091869, + "grad_norm": 2.323469638824463, + "learning_rate": 1.3087816556670557e-05, + "loss": 0.8345, "step": 2691 }, { - "epoch": 0.20370020052211418, - "grad_norm": 7.46500301361084, - "learning_rate": 1.6862375870893653e-05, - "loss": 0.807, + "epoch": 0.18951073565645898, + "grad_norm": 2.1230971813201904, + "learning_rate": 1.3087028557773171e-05, + "loss": 0.7779, "step": 2692 }, { - "epoch": 0.2037758692444478, - "grad_norm": 2.449782609939575, - "learning_rate": 1.686127958440676e-05, - "loss": 0.6193, + "epoch": 0.1895811334037311, + "grad_norm": 2.0256264209747314, + "learning_rate": 1.3086240242405659e-05, + "loss": 0.6761, "step": 2693 }, { - "epoch": 0.20385153796678143, - "grad_norm": 2.736797571182251, - "learning_rate": 1.6860182805621112e-05, - "loss": 0.9145, + "epoch": 0.18965153115100317, + "grad_norm": 2.2368178367614746, + "learning_rate": 1.3085451610609002e-05, + "loss": 0.8045, "step": 2694 }, { - "epoch": 0.20392720668911504, - "grad_norm": 2.61444354057312, - "learning_rate": 1.6859085534605395e-05, - "loss": 0.8425, + "epoch": 0.18972192889827524, + "grad_norm": 1.9269249439239502, + "learning_rate": 1.3084662662424205e-05, + "loss": 0.707, "step": 2695 }, { - "epoch": 0.20400287541144868, - "grad_norm": 2.4622318744659424, - "learning_rate": 1.6857987771428323e-05, - "loss": 0.9507, + "epoch": 0.18979232664554735, + "grad_norm": 2.1506247520446777, + "learning_rate": 1.3083873397892287e-05, + "loss": 0.7315, "step": 2696 }, { - "epoch": 0.2040785441337823, - "grad_norm": 2.3272974491119385, - "learning_rate": 1.6856889516158637e-05, - "loss": 0.774, + "epoch": 0.18986272439281943, + "grad_norm": 2.223329782485962, + "learning_rate": 1.308308381705428e-05, + "loss": 0.8151, "step": 2697 }, { - "epoch": 0.20415421285611593, - "grad_norm": 2.636510133743286, - "learning_rate": 1.685579076886512e-05, - "loss": 0.7583, + "epoch": 0.1899331221400915, + "grad_norm": 1.9812963008880615, + "learning_rate": 1.3082293919951235e-05, + "loss": 0.6929, "step": 2698 }, { - "epoch": 0.20422988157844954, - "grad_norm": 5.449431896209717, - "learning_rate": 1.6854691529616578e-05, - "loss": 0.7954, + "epoch": 0.1900035198873636, + "grad_norm": 1.9212409257888794, + "learning_rate": 1.3081503706624224e-05, + "loss": 0.8176, "step": 2699 }, { - "epoch": 0.20430555030078318, - "grad_norm": 2.3265273571014404, - "learning_rate": 1.6853591798481845e-05, - "loss": 0.6401, + "epoch": 0.1900739176346357, + "grad_norm": 2.3694651126861572, + "learning_rate": 1.3080713177114324e-05, + "loss": 0.7596, "step": 2700 }, { - "epoch": 0.2043812190231168, - "grad_norm": 2.6790006160736084, - "learning_rate": 1.685249157552979e-05, - "loss": 0.9875, + "epoch": 0.1901443153819078, + "grad_norm": 2.101994037628174, + "learning_rate": 1.307992233146264e-05, + "loss": 0.7429, "step": 2701 }, { - "epoch": 0.20445688774545043, - "grad_norm": 2.484577178955078, - "learning_rate": 1.6851390860829317e-05, - "loss": 0.7331, + "epoch": 0.19021471312917987, + "grad_norm": 1.9864675998687744, + "learning_rate": 1.307913116971029e-05, + "loss": 0.7076, "step": 2702 }, { - "epoch": 0.20453255646778404, - "grad_norm": 2.061288833618164, - "learning_rate": 1.6850289654449355e-05, - "loss": 0.7615, + "epoch": 0.19028511087645195, + "grad_norm": 2.2184348106384277, + "learning_rate": 1.3078339691898402e-05, + "loss": 0.7361, "step": 2703 }, { - "epoch": 0.20460822519011768, - "grad_norm": 1.9732778072357178, - "learning_rate": 1.684918795645886e-05, - "loss": 0.7419, + "epoch": 0.19035550862372405, + "grad_norm": 1.8963897228240967, + "learning_rate": 1.307754789806813e-05, + "loss": 0.6531, "step": 2704 }, { - "epoch": 0.2046838939124513, - "grad_norm": 2.913220167160034, - "learning_rate": 1.684808576692683e-05, - "loss": 0.7499, + "epoch": 0.19042590637099613, + "grad_norm": 1.8219187259674072, + "learning_rate": 1.3076755788260638e-05, + "loss": 0.6639, "step": 2705 }, { - "epoch": 0.20475956263478493, - "grad_norm": 1.9376118183135986, - "learning_rate": 1.6846983085922287e-05, - "loss": 0.8705, + "epoch": 0.1904963041182682, + "grad_norm": 1.9745874404907227, + "learning_rate": 1.3075963362517108e-05, + "loss": 0.7756, "step": 2706 }, { - "epoch": 0.20483523135711854, - "grad_norm": 2.383751153945923, - "learning_rate": 1.684587991351428e-05, - "loss": 0.8206, + "epoch": 0.1905667018655403, + "grad_norm": 2.273331642150879, + "learning_rate": 1.3075170620878743e-05, + "loss": 0.7995, "step": 2707 }, { - "epoch": 0.20491090007945215, - "grad_norm": 2.5557644367218018, - "learning_rate": 1.68447762497719e-05, - "loss": 0.802, + "epoch": 0.1906370996128124, + "grad_norm": 2.562375783920288, + "learning_rate": 1.3074377563386755e-05, + "loss": 0.6957, "step": 2708 }, { - "epoch": 0.20498656880178578, - "grad_norm": 2.486907958984375, - "learning_rate": 1.6843672094764253e-05, - "loss": 0.8066, + "epoch": 0.19070749736008447, + "grad_norm": 2.0580031871795654, + "learning_rate": 1.3073584190082375e-05, + "loss": 0.6794, "step": 2709 }, { - "epoch": 0.2050622375241194, - "grad_norm": 2.465059757232666, - "learning_rate": 1.6842567448560494e-05, - "loss": 0.8275, + "epoch": 0.19077789510735657, + "grad_norm": 2.2233617305755615, + "learning_rate": 1.307279050100685e-05, + "loss": 0.7888, "step": 2710 }, { - "epoch": 0.20513790624645303, - "grad_norm": 2.361616611480713, - "learning_rate": 1.6841462311229796e-05, - "loss": 0.7221, + "epoch": 0.19084829285462865, + "grad_norm": 1.9741060733795166, + "learning_rate": 1.3071996496201452e-05, + "loss": 0.7795, "step": 2711 }, { - "epoch": 0.20521357496878664, - "grad_norm": 3.4184422492980957, - "learning_rate": 1.6840356682841362e-05, - "loss": 0.6928, + "epoch": 0.19091869060190073, + "grad_norm": 2.3727169036865234, + "learning_rate": 1.3071202175707454e-05, + "loss": 0.7464, "step": 2712 }, { - "epoch": 0.20528924369112028, - "grad_norm": 2.329210042953491, - "learning_rate": 1.6839250563464436e-05, - "loss": 0.7858, + "epoch": 0.19098908834917283, + "grad_norm": 1.7336817979812622, + "learning_rate": 1.3070407539566157e-05, + "loss": 0.7341, "step": 2713 }, { - "epoch": 0.2053649124134539, - "grad_norm": 2.920400619506836, - "learning_rate": 1.6838143953168285e-05, - "loss": 0.7444, + "epoch": 0.1910594860964449, + "grad_norm": 1.9446141719818115, + "learning_rate": 1.3069612587818874e-05, + "loss": 0.7829, "step": 2714 }, { - "epoch": 0.20544058113578753, - "grad_norm": 2.222848415374756, - "learning_rate": 1.6837036852022205e-05, - "loss": 0.7223, + "epoch": 0.191129883843717, + "grad_norm": 2.096207618713379, + "learning_rate": 1.3068817320506938e-05, + "loss": 0.6939, "step": 2715 }, { - "epoch": 0.20551624985812114, - "grad_norm": 2.549514055252075, - "learning_rate": 1.683592926009553e-05, - "loss": 0.8735, + "epoch": 0.1912002815909891, + "grad_norm": 2.1018364429473877, + "learning_rate": 1.306802173767169e-05, + "loss": 0.7565, "step": 2716 }, { - "epoch": 0.20559191858045478, - "grad_norm": 3.1271274089813232, - "learning_rate": 1.6834821177457625e-05, - "loss": 0.7578, + "epoch": 0.19127067933826117, + "grad_norm": 1.9718762636184692, + "learning_rate": 1.3067225839354496e-05, + "loss": 0.7871, "step": 2717 }, { - "epoch": 0.2056675873027884, - "grad_norm": 2.493976354598999, - "learning_rate": 1.683371260417787e-05, - "loss": 0.714, + "epoch": 0.19134107708553327, + "grad_norm": 2.172664165496826, + "learning_rate": 1.3066429625596737e-05, + "loss": 0.6918, "step": 2718 }, { - "epoch": 0.20574325602512203, - "grad_norm": 2.656724214553833, - "learning_rate": 1.6832603540325702e-05, - "loss": 0.9036, + "epoch": 0.19141147483280535, + "grad_norm": 1.9310816526412964, + "learning_rate": 1.306563309643981e-05, + "loss": 0.7932, "step": 2719 }, { - "epoch": 0.20581892474745564, - "grad_norm": 1.9792472124099731, - "learning_rate": 1.683149398597056e-05, - "loss": 0.8492, + "epoch": 0.19148187258007743, + "grad_norm": 1.8669805526733398, + "learning_rate": 1.3064836251925121e-05, + "loss": 0.77, "step": 2720 }, { - "epoch": 0.20589459346978925, - "grad_norm": 3.5724356174468994, - "learning_rate": 1.6830383941181938e-05, - "loss": 0.7013, + "epoch": 0.19155227032734953, + "grad_norm": 2.0125415325164795, + "learning_rate": 1.3064039092094105e-05, + "loss": 0.7287, "step": 2721 }, { - "epoch": 0.2059702621921229, - "grad_norm": 2.62611985206604, - "learning_rate": 1.6829273406029347e-05, - "loss": 0.7121, + "epoch": 0.1916226680746216, + "grad_norm": 2.396197557449341, + "learning_rate": 1.3063241616988205e-05, + "loss": 0.802, "step": 2722 }, { - "epoch": 0.2060459309144565, - "grad_norm": 2.143665075302124, - "learning_rate": 1.6828162380582334e-05, - "loss": 0.686, + "epoch": 0.1916930658218937, + "grad_norm": 2.1417319774627686, + "learning_rate": 1.306244382664888e-05, + "loss": 0.6947, "step": 2723 }, { - "epoch": 0.20612159963679014, - "grad_norm": 2.061655044555664, - "learning_rate": 1.682705086491047e-05, - "loss": 0.9111, + "epoch": 0.1917634635691658, + "grad_norm": 2.1334128379821777, + "learning_rate": 1.3061645721117614e-05, + "loss": 0.7823, "step": 2724 }, { - "epoch": 0.20619726835912375, - "grad_norm": 2.1191210746765137, - "learning_rate": 1.6825938859083365e-05, - "loss": 0.7933, + "epoch": 0.19183386131643787, + "grad_norm": 2.569943428039551, + "learning_rate": 1.3060847300435894e-05, + "loss": 0.716, "step": 2725 }, { - "epoch": 0.20627293708145739, - "grad_norm": 2.6450743675231934, - "learning_rate": 1.6824826363170658e-05, - "loss": 0.6982, + "epoch": 0.19190425906370995, + "grad_norm": 3.4548208713531494, + "learning_rate": 1.3060048564645236e-05, + "loss": 0.6985, "step": 2726 }, { - "epoch": 0.206348605803791, - "grad_norm": 1.9432772397994995, - "learning_rate": 1.6823713377242015e-05, - "loss": 0.7923, + "epoch": 0.19197465681098205, + "grad_norm": 1.8567713499069214, + "learning_rate": 1.3059249513787161e-05, + "loss": 0.6867, "step": 2727 }, { - "epoch": 0.20642427452612463, - "grad_norm": 2.247926712036133, - "learning_rate": 1.6822599901367134e-05, - "loss": 0.7163, + "epoch": 0.19204505455825413, + "grad_norm": 2.592452049255371, + "learning_rate": 1.3058450147903217e-05, + "loss": 0.6469, "step": 2728 }, { - "epoch": 0.20649994324845825, - "grad_norm": 2.1911559104919434, - "learning_rate": 1.6821485935615748e-05, - "loss": 0.7813, + "epoch": 0.19211545230552624, + "grad_norm": 2.5163986682891846, + "learning_rate": 1.3057650467034967e-05, + "loss": 0.6935, "step": 2729 }, { - "epoch": 0.20657561197079188, - "grad_norm": 2.491403341293335, - "learning_rate": 1.6820371480057613e-05, - "loss": 0.8105, + "epoch": 0.1921858500527983, + "grad_norm": 2.787259340286255, + "learning_rate": 1.305685047122398e-05, + "loss": 0.7104, "step": 2730 }, { - "epoch": 0.2066512806931255, - "grad_norm": 2.1198718547821045, - "learning_rate": 1.6819256534762525e-05, - "loss": 0.8581, + "epoch": 0.1922562478000704, + "grad_norm": 1.847646951675415, + "learning_rate": 1.3056050160511853e-05, + "loss": 0.6959, "step": 2731 }, { - "epoch": 0.20672694941545913, - "grad_norm": 2.3443424701690674, - "learning_rate": 1.68181410998003e-05, - "loss": 0.791, + "epoch": 0.1923266455473425, + "grad_norm": 3.6204771995544434, + "learning_rate": 1.305524953494019e-05, + "loss": 0.7119, "step": 2732 }, { - "epoch": 0.20680261813779274, - "grad_norm": 1.8470584154129028, - "learning_rate": 1.6817025175240793e-05, - "loss": 0.8894, + "epoch": 0.19239704329461457, + "grad_norm": 2.364389181137085, + "learning_rate": 1.3054448594550623e-05, + "loss": 0.7977, "step": 2733 }, { - "epoch": 0.20687828686012635, - "grad_norm": 2.2806928157806396, - "learning_rate": 1.6815908761153887e-05, - "loss": 0.6281, + "epoch": 0.19246744104188665, + "grad_norm": 1.944573163986206, + "learning_rate": 1.305364733938479e-05, + "loss": 0.7006, "step": 2734 }, { - "epoch": 0.20695395558246, - "grad_norm": 2.3737926483154297, - "learning_rate": 1.681479185760949e-05, - "loss": 0.6816, + "epoch": 0.19253783878915876, + "grad_norm": 2.1404969692230225, + "learning_rate": 1.3052845769484348e-05, + "loss": 0.8093, "step": 2735 }, { - "epoch": 0.2070296243047936, - "grad_norm": 2.267542600631714, - "learning_rate": 1.681367446467756e-05, - "loss": 0.7355, + "epoch": 0.19260823653643083, + "grad_norm": 2.393246650695801, + "learning_rate": 1.3052043884890974e-05, + "loss": 0.8191, "step": 2736 }, { - "epoch": 0.20710529302712724, - "grad_norm": 2.259472608566284, - "learning_rate": 1.6812556582428052e-05, - "loss": 0.7937, + "epoch": 0.1926786342837029, + "grad_norm": 2.018052816390991, + "learning_rate": 1.3051241685646359e-05, + "loss": 0.6981, "step": 2737 }, { - "epoch": 0.20718096174946085, - "grad_norm": 2.7698655128479004, - "learning_rate": 1.6811438210930987e-05, - "loss": 0.8421, + "epoch": 0.19274903203097501, + "grad_norm": 2.018465757369995, + "learning_rate": 1.3050439171792205e-05, + "loss": 0.7866, "step": 2738 }, { - "epoch": 0.2072566304717945, - "grad_norm": 2.1473047733306885, - "learning_rate": 1.6810319350256397e-05, - "loss": 0.6328, + "epoch": 0.1928194297782471, + "grad_norm": 2.519308567047119, + "learning_rate": 1.304963634337024e-05, + "loss": 0.7022, "step": 2739 }, { - "epoch": 0.2073322991941281, - "grad_norm": 2.5372629165649414, - "learning_rate": 1.6809200000474347e-05, - "loss": 0.7475, + "epoch": 0.19288982752551917, + "grad_norm": 2.117354154586792, + "learning_rate": 1.3048833200422203e-05, + "loss": 0.7209, "step": 2740 }, { - "epoch": 0.20740796791646174, - "grad_norm": 2.0664265155792236, - "learning_rate": 1.6808080161654935e-05, - "loss": 0.6881, + "epoch": 0.19296022527279127, + "grad_norm": 1.8676178455352783, + "learning_rate": 1.304802974298985e-05, + "loss": 0.7249, "step": 2741 }, { - "epoch": 0.20748363663879535, - "grad_norm": 4.4088311195373535, - "learning_rate": 1.6806959833868288e-05, - "loss": 0.705, + "epoch": 0.19303062302006335, + "grad_norm": 1.9778831005096436, + "learning_rate": 1.3047225971114952e-05, + "loss": 0.6649, "step": 2742 }, { - "epoch": 0.207559305361129, - "grad_norm": 3.005873918533325, - "learning_rate": 1.6805839017184565e-05, - "loss": 0.8028, + "epoch": 0.19310102076733546, + "grad_norm": 2.2580888271331787, + "learning_rate": 1.30464218848393e-05, + "loss": 0.7246, "step": 2743 }, { - "epoch": 0.2076349740834626, - "grad_norm": 2.4871227741241455, - "learning_rate": 1.6804717711673957e-05, - "loss": 0.8642, + "epoch": 0.19317141851460753, + "grad_norm": 2.3609445095062256, + "learning_rate": 1.30456174842047e-05, + "loss": 0.7443, "step": 2744 }, { - "epoch": 0.20771064280579624, - "grad_norm": 2.4536328315734863, - "learning_rate": 1.680359591740668e-05, - "loss": 0.6728, + "epoch": 0.1932418162618796, + "grad_norm": 2.212705373764038, + "learning_rate": 1.304481276925297e-05, + "loss": 0.7427, "step": 2745 }, { - "epoch": 0.20778631152812985, - "grad_norm": 2.521181583404541, - "learning_rate": 1.680247363445299e-05, - "loss": 0.6283, + "epoch": 0.19331221400915172, + "grad_norm": 2.079806089401245, + "learning_rate": 1.3044007740025949e-05, + "loss": 0.7011, "step": 2746 }, { - "epoch": 0.20786198025046346, - "grad_norm": 2.403087615966797, - "learning_rate": 1.680135086288316e-05, - "loss": 0.7569, + "epoch": 0.1933826117564238, + "grad_norm": 1.9634250402450562, + "learning_rate": 1.3043202396565492e-05, + "loss": 0.788, "step": 2747 }, { - "epoch": 0.2079376489727971, - "grad_norm": 2.606722116470337, - "learning_rate": 1.6800227602767513e-05, - "loss": 0.8054, + "epoch": 0.19345300950369587, + "grad_norm": 2.3731157779693604, + "learning_rate": 1.3042396738913469e-05, + "loss": 0.6756, "step": 2748 }, { - "epoch": 0.2080133176951307, - "grad_norm": 2.361828088760376, - "learning_rate": 1.679910385417638e-05, - "loss": 0.8643, + "epoch": 0.19352340725096798, + "grad_norm": 2.054558038711548, + "learning_rate": 1.304159076711177e-05, + "loss": 0.8084, "step": 2749 }, { - "epoch": 0.20808898641746434, - "grad_norm": 3.0654191970825195, - "learning_rate": 1.679797961718014e-05, - "loss": 0.8303, + "epoch": 0.19359380499824005, + "grad_norm": 2.0574147701263428, + "learning_rate": 1.3040784481202292e-05, + "loss": 0.7848, "step": 2750 }, { - "epoch": 0.20816465513979795, - "grad_norm": 2.4694888591766357, - "learning_rate": 1.6796854891849195e-05, - "loss": 0.9629, + "epoch": 0.19366420274551213, + "grad_norm": 2.184295892715454, + "learning_rate": 1.303997788122696e-05, + "loss": 0.8919, "step": 2751 }, { - "epoch": 0.2082403238621316, - "grad_norm": 2.855731964111328, - "learning_rate": 1.6795729678253977e-05, - "loss": 0.9194, + "epoch": 0.19373460049278424, + "grad_norm": 2.161620855331421, + "learning_rate": 1.3039170967227707e-05, + "loss": 0.7383, "step": 2752 }, { - "epoch": 0.2083159925844652, - "grad_norm": 2.1465506553649902, - "learning_rate": 1.6794603976464953e-05, - "loss": 0.7555, + "epoch": 0.19380499824005631, + "grad_norm": 2.158189535140991, + "learning_rate": 1.3038363739246484e-05, + "loss": 0.8294, "step": 2753 }, { - "epoch": 0.20839166130679884, - "grad_norm": 2.5873541831970215, - "learning_rate": 1.6793477786552618e-05, - "loss": 0.7371, + "epoch": 0.1938753959873284, + "grad_norm": 2.151343822479248, + "learning_rate": 1.3037556197325266e-05, + "loss": 0.7484, "step": 2754 }, { - "epoch": 0.20846733002913245, - "grad_norm": 2.087071180343628, - "learning_rate": 1.679235110858749e-05, - "loss": 0.6777, + "epoch": 0.1939457937346005, + "grad_norm": 2.10341477394104, + "learning_rate": 1.303674834150603e-05, + "loss": 0.8381, "step": 2755 }, { - "epoch": 0.2085429987514661, - "grad_norm": 3.9847989082336426, - "learning_rate": 1.679122394264014e-05, - "loss": 0.8945, + "epoch": 0.19401619148187257, + "grad_norm": 2.0958869457244873, + "learning_rate": 1.3035940171830784e-05, + "loss": 0.756, "step": 2756 }, { - "epoch": 0.2086186674737997, - "grad_norm": 2.593203067779541, - "learning_rate": 1.6790096288781148e-05, - "loss": 0.6988, + "epoch": 0.19408658922914468, + "grad_norm": 1.867287516593933, + "learning_rate": 1.3035131688341538e-05, + "loss": 0.5915, "step": 2757 }, { - "epoch": 0.20869433619613334, - "grad_norm": 2.5116524696350098, - "learning_rate": 1.6788968147081126e-05, - "loss": 0.812, + "epoch": 0.19415698697641676, + "grad_norm": 1.762721061706543, + "learning_rate": 1.3034322891080333e-05, + "loss": 0.7172, "step": 2758 }, { - "epoch": 0.20877000491846695, - "grad_norm": 2.77005672454834, - "learning_rate": 1.6787839517610727e-05, - "loss": 0.877, + "epoch": 0.19422738472368883, + "grad_norm": 2.3382787704467773, + "learning_rate": 1.3033513780089213e-05, + "loss": 0.7807, "step": 2759 }, { - "epoch": 0.2088456736408006, - "grad_norm": 2.331638813018799, - "learning_rate": 1.6786710400440627e-05, - "loss": 0.8041, + "epoch": 0.19429778247096094, + "grad_norm": 2.003969430923462, + "learning_rate": 1.303270435541025e-05, + "loss": 0.7743, "step": 2760 }, { - "epoch": 0.2089213423631342, - "grad_norm": 2.0847373008728027, - "learning_rate": 1.678558079564154e-05, - "loss": 0.8226, + "epoch": 0.19436818021823302, + "grad_norm": 2.2048773765563965, + "learning_rate": 1.3031894617085521e-05, + "loss": 0.7692, "step": 2761 }, { - "epoch": 0.2089970110854678, - "grad_norm": 2.118413209915161, - "learning_rate": 1.6784450703284197e-05, - "loss": 0.8281, + "epoch": 0.1944385779655051, + "grad_norm": 2.39367413520813, + "learning_rate": 1.3031084565157129e-05, + "loss": 0.7393, "step": 2762 }, { - "epoch": 0.20907267980780145, - "grad_norm": 2.4616172313690186, - "learning_rate": 1.6783320123439376e-05, - "loss": 0.7126, + "epoch": 0.1945089757127772, + "grad_norm": 1.9684512615203857, + "learning_rate": 1.3030274199667189e-05, + "loss": 0.6916, "step": 2763 }, { - "epoch": 0.20914834853013506, - "grad_norm": 2.1653876304626465, - "learning_rate": 1.6782189056177875e-05, - "loss": 0.7439, + "epoch": 0.19457937346004928, + "grad_norm": 2.8447518348693848, + "learning_rate": 1.3029463520657832e-05, + "loss": 0.8532, "step": 2764 }, { - "epoch": 0.2092240172524687, - "grad_norm": 2.1744189262390137, - "learning_rate": 1.6781057501570522e-05, - "loss": 0.8306, + "epoch": 0.19464977120732135, + "grad_norm": 2.209026336669922, + "learning_rate": 1.3028652528171207e-05, + "loss": 0.9653, "step": 2765 }, { - "epoch": 0.2092996859748023, - "grad_norm": 2.326197624206543, - "learning_rate": 1.6779925459688186e-05, - "loss": 0.7588, + "epoch": 0.19472016895459346, + "grad_norm": 2.4242308139801025, + "learning_rate": 1.3027841222249476e-05, + "loss": 0.8526, "step": 2766 }, { - "epoch": 0.20937535469713595, - "grad_norm": 2.3091301918029785, - "learning_rate": 1.677879293060175e-05, - "loss": 0.7054, + "epoch": 0.19479056670186554, + "grad_norm": 2.165584087371826, + "learning_rate": 1.3027029602934823e-05, + "loss": 0.7654, "step": 2767 }, { - "epoch": 0.20945102341946956, - "grad_norm": 2.6318981647491455, - "learning_rate": 1.6777659914382144e-05, - "loss": 0.8123, + "epoch": 0.19486096444913764, + "grad_norm": 2.6486003398895264, + "learning_rate": 1.3026217670269438e-05, + "loss": 0.7788, "step": 2768 }, { - "epoch": 0.2095266921418032, - "grad_norm": 2.382268190383911, - "learning_rate": 1.6776526411100315e-05, - "loss": 0.815, + "epoch": 0.19493136219640972, + "grad_norm": 2.2641489505767822, + "learning_rate": 1.3025405424295543e-05, + "loss": 0.7694, "step": 2769 }, { - "epoch": 0.2096023608641368, - "grad_norm": 2.143889904022217, - "learning_rate": 1.6775392420827253e-05, - "loss": 0.7313, + "epoch": 0.1950017599436818, + "grad_norm": 1.7094526290893555, + "learning_rate": 1.3024592865055361e-05, + "loss": 0.7795, "step": 2770 }, { - "epoch": 0.20967802958647044, - "grad_norm": 2.355656385421753, - "learning_rate": 1.6774257943633967e-05, - "loss": 0.7956, + "epoch": 0.1950721576909539, + "grad_norm": 2.1737630367279053, + "learning_rate": 1.302377999259114e-05, + "loss": 0.6648, "step": 2771 }, { - "epoch": 0.20975369830880405, - "grad_norm": 2.269749641418457, - "learning_rate": 1.6773122979591503e-05, - "loss": 0.7962, + "epoch": 0.19514255543822598, + "grad_norm": 2.237395763397217, + "learning_rate": 1.302296680694514e-05, + "loss": 0.7551, "step": 2772 }, { - "epoch": 0.2098293670311377, - "grad_norm": 2.584016799926758, - "learning_rate": 1.6771987528770938e-05, - "loss": 0.8603, + "epoch": 0.19521295318549806, + "grad_norm": 2.3748974800109863, + "learning_rate": 1.3022153308159645e-05, + "loss": 0.6245, "step": 2773 }, { - "epoch": 0.2099050357534713, - "grad_norm": 2.4904229640960693, - "learning_rate": 1.6770851591243378e-05, - "loss": 0.7932, + "epoch": 0.19528335093277016, + "grad_norm": 2.247767448425293, + "learning_rate": 1.3021339496276943e-05, + "loss": 0.8586, "step": 2774 }, { - "epoch": 0.2099807044758049, - "grad_norm": 3.181654453277588, - "learning_rate": 1.6769715167079953e-05, - "loss": 0.9061, + "epoch": 0.19535374868004224, + "grad_norm": 3.1952812671661377, + "learning_rate": 1.3020525371339348e-05, + "loss": 0.8344, "step": 2775 }, { - "epoch": 0.21005637319813855, - "grad_norm": 2.1907131671905518, - "learning_rate": 1.6768578256351835e-05, - "loss": 0.7714, + "epoch": 0.19542414642731432, + "grad_norm": 2.8445017337799072, + "learning_rate": 1.3019710933389185e-05, + "loss": 0.6956, "step": 2776 }, { - "epoch": 0.21013204192047216, - "grad_norm": 2.349133253097534, - "learning_rate": 1.6767440859130222e-05, - "loss": 0.8765, + "epoch": 0.19549454417458642, + "grad_norm": 2.2520487308502197, + "learning_rate": 1.30188961824688e-05, + "loss": 0.7413, "step": 2777 }, { - "epoch": 0.2102077106428058, - "grad_norm": 2.340827226638794, - "learning_rate": 1.6766302975486342e-05, - "loss": 0.8702, + "epoch": 0.1955649419218585, + "grad_norm": 2.014202356338501, + "learning_rate": 1.3018081118620552e-05, + "loss": 0.8411, "step": 2778 }, { - "epoch": 0.2102833793651394, - "grad_norm": 2.352503776550293, - "learning_rate": 1.6765164605491445e-05, - "loss": 0.6379, + "epoch": 0.19563533966913058, + "grad_norm": 2.12870717048645, + "learning_rate": 1.3017265741886815e-05, + "loss": 0.6461, "step": 2779 }, { - "epoch": 0.21035904808747305, - "grad_norm": 2.20408034324646, - "learning_rate": 1.6764025749216826e-05, - "loss": 0.6901, + "epoch": 0.19570573741640268, + "grad_norm": 2.101797342300415, + "learning_rate": 1.3016450052309983e-05, + "loss": 0.8545, "step": 2780 }, { - "epoch": 0.21043471680980666, - "grad_norm": 2.612621784210205, - "learning_rate": 1.6762886406733803e-05, - "loss": 0.6788, + "epoch": 0.19577613516367476, + "grad_norm": 2.379990816116333, + "learning_rate": 1.3015634049932464e-05, + "loss": 0.7968, "step": 2781 }, { - "epoch": 0.2105103855321403, - "grad_norm": 2.921649932861328, - "learning_rate": 1.6761746578113727e-05, - "loss": 0.7544, + "epoch": 0.19584653291094686, + "grad_norm": 2.038531541824341, + "learning_rate": 1.3014817734796683e-05, + "loss": 0.7992, "step": 2782 }, { - "epoch": 0.2105860542544739, - "grad_norm": 3.306110382080078, - "learning_rate": 1.6760606263427975e-05, - "loss": 0.7943, + "epoch": 0.19591693065821894, + "grad_norm": 2.1800918579101562, + "learning_rate": 1.3014001106945081e-05, + "loss": 0.8469, "step": 2783 }, { - "epoch": 0.21066172297680755, - "grad_norm": 2.2433836460113525, - "learning_rate": 1.675946546274796e-05, - "loss": 0.9816, + "epoch": 0.19598732840549102, + "grad_norm": 2.6332337856292725, + "learning_rate": 1.3013184166420115e-05, + "loss": 0.753, "step": 2784 }, { - "epoch": 0.21073739169914116, - "grad_norm": 2.480579137802124, - "learning_rate": 1.6758324176145117e-05, - "loss": 0.7943, + "epoch": 0.19605772615276312, + "grad_norm": 1.955428123474121, + "learning_rate": 1.3012366913264256e-05, + "loss": 0.7138, "step": 2785 }, { - "epoch": 0.2108130604214748, - "grad_norm": 3.2746899127960205, - "learning_rate": 1.675718240369092e-05, - "loss": 0.8868, + "epoch": 0.1961281239000352, + "grad_norm": 4.231899261474609, + "learning_rate": 1.3011549347519996e-05, + "loss": 0.7401, "step": 2786 }, { - "epoch": 0.2108887291438084, - "grad_norm": 2.4543137550354004, - "learning_rate": 1.675604014545687e-05, - "loss": 0.7192, + "epoch": 0.19619852164730728, + "grad_norm": 2.3288609981536865, + "learning_rate": 1.3010731469229844e-05, + "loss": 0.6908, "step": 2787 }, { - "epoch": 0.21096439786614202, - "grad_norm": 2.593430519104004, - "learning_rate": 1.6754897401514504e-05, - "loss": 0.7476, + "epoch": 0.19626891939457938, + "grad_norm": 2.1061110496520996, + "learning_rate": 1.3009913278436318e-05, + "loss": 0.8138, "step": 2788 }, { - "epoch": 0.21104006658847566, - "grad_norm": 2.6094298362731934, - "learning_rate": 1.675375417193538e-05, - "loss": 0.8261, + "epoch": 0.19633931714185146, + "grad_norm": 2.6487224102020264, + "learning_rate": 1.3009094775181959e-05, + "loss": 0.8332, "step": 2789 }, { - "epoch": 0.21111573531080927, - "grad_norm": 2.957797050476074, - "learning_rate": 1.6752610456791093e-05, - "loss": 0.777, + "epoch": 0.19640971488912354, + "grad_norm": 2.483846426010132, + "learning_rate": 1.3008275959509318e-05, + "loss": 0.7152, "step": 2790 }, { - "epoch": 0.2111914040331429, - "grad_norm": 3.8428449630737305, - "learning_rate": 1.6751466256153257e-05, - "loss": 0.8653, + "epoch": 0.19648011263639564, + "grad_norm": 2.150294780731201, + "learning_rate": 1.300745683146097e-05, + "loss": 0.6975, "step": 2791 }, { - "epoch": 0.21126707275547651, - "grad_norm": 2.952915668487549, - "learning_rate": 1.675032157009354e-05, - "loss": 0.7709, + "epoch": 0.19655051038366772, + "grad_norm": 2.5024619102478027, + "learning_rate": 1.3006637391079499e-05, + "loss": 0.7803, "step": 2792 }, { - "epoch": 0.21134274147781015, - "grad_norm": 2.7285871505737305, - "learning_rate": 1.6749176398683616e-05, - "loss": 0.7093, + "epoch": 0.1966209081309398, + "grad_norm": 1.8588032722473145, + "learning_rate": 1.3005817638407513e-05, + "loss": 0.6949, "step": 2793 }, { - "epoch": 0.21141841020014376, - "grad_norm": 16.232563018798828, - "learning_rate": 1.67480307419952e-05, - "loss": 0.8038, + "epoch": 0.1966913058782119, + "grad_norm": 2.7311601638793945, + "learning_rate": 1.3004997573487626e-05, + "loss": 0.8369, "step": 2794 }, { - "epoch": 0.2114940789224774, - "grad_norm": 2.1137609481811523, - "learning_rate": 1.6746884600100038e-05, - "loss": 0.8155, + "epoch": 0.19676170362548398, + "grad_norm": 1.9006553888320923, + "learning_rate": 1.3004177196362478e-05, + "loss": 0.715, "step": 2795 }, { - "epoch": 0.211569747644811, - "grad_norm": 2.6953883171081543, - "learning_rate": 1.674573797306991e-05, - "loss": 0.736, + "epoch": 0.19683210137275609, + "grad_norm": 2.1600940227508545, + "learning_rate": 1.300335650707472e-05, + "loss": 0.7188, "step": 2796 }, { - "epoch": 0.21164541636714465, - "grad_norm": 2.4279799461364746, - "learning_rate": 1.6744590860976615e-05, - "loss": 0.7183, + "epoch": 0.19690249912002816, + "grad_norm": 2.0004117488861084, + "learning_rate": 1.3002535505667021e-05, + "loss": 0.7717, "step": 2797 }, { - "epoch": 0.21172108508947826, - "grad_norm": 2.5715172290802, - "learning_rate": 1.6743443263891994e-05, - "loss": 0.8558, + "epoch": 0.19697289686730024, + "grad_norm": 2.239529609680176, + "learning_rate": 1.3001714192182061e-05, + "loss": 0.7163, "step": 2798 }, { - "epoch": 0.2117967538118119, - "grad_norm": 2.9246373176574707, - "learning_rate": 1.6742295181887908e-05, - "loss": 0.8916, + "epoch": 0.19704329461457235, + "grad_norm": 2.7071211338043213, + "learning_rate": 1.300089256666255e-05, + "loss": 0.7636, "step": 2799 }, { - "epoch": 0.2118724225341455, - "grad_norm": 2.751868486404419, - "learning_rate": 1.6741146615036255e-05, - "loss": 0.9153, + "epoch": 0.19711369236184442, + "grad_norm": 1.9972362518310547, + "learning_rate": 1.3000070629151197e-05, + "loss": 0.8311, "step": 2800 }, { - "epoch": 0.21194809125647912, - "grad_norm": 2.7025206089019775, - "learning_rate": 1.6739997563408967e-05, - "loss": 0.6688, + "epoch": 0.1971840901091165, + "grad_norm": 2.205315113067627, + "learning_rate": 1.2999248379690739e-05, + "loss": 0.7539, "step": 2801 }, { - "epoch": 0.21202375997881276, - "grad_norm": 2.7657957077026367, - "learning_rate": 1.6738848027077994e-05, - "loss": 0.9089, + "epoch": 0.1972544878563886, + "grad_norm": 2.04468035697937, + "learning_rate": 1.2998425818323923e-05, + "loss": 0.7305, "step": 2802 }, { - "epoch": 0.21209942870114637, - "grad_norm": 2.771183967590332, - "learning_rate": 1.6737698006115326e-05, - "loss": 0.8091, + "epoch": 0.19732488560366068, + "grad_norm": 1.7602639198303223, + "learning_rate": 1.2997602945093516e-05, + "loss": 0.72, "step": 2803 }, { - "epoch": 0.21217509742348, - "grad_norm": 2.281928300857544, - "learning_rate": 1.6736547500592985e-05, - "loss": 0.7638, + "epoch": 0.19739528335093276, + "grad_norm": 3.369656801223755, + "learning_rate": 1.2996779760042301e-05, + "loss": 0.6883, "step": 2804 }, { - "epoch": 0.21225076614581362, - "grad_norm": 2.5218539237976074, - "learning_rate": 1.673539651058302e-05, - "loss": 0.9364, + "epoch": 0.19746568109820486, + "grad_norm": 2.1299684047698975, + "learning_rate": 1.2995956263213076e-05, + "loss": 0.8067, "step": 2805 }, { - "epoch": 0.21232643486814726, - "grad_norm": 2.1761555671691895, - "learning_rate": 1.6734245036157498e-05, - "loss": 0.7687, + "epoch": 0.19753607884547694, + "grad_norm": 2.0659384727478027, + "learning_rate": 1.2995132454648654e-05, + "loss": 0.7701, "step": 2806 }, { - "epoch": 0.21240210359048087, - "grad_norm": 2.418473720550537, - "learning_rate": 1.6733093077388543e-05, - "loss": 0.676, + "epoch": 0.19760647659274902, + "grad_norm": 2.0151760578155518, + "learning_rate": 1.2994308334391866e-05, + "loss": 0.6707, "step": 2807 }, { - "epoch": 0.2124777723128145, - "grad_norm": 1.9938814640045166, - "learning_rate": 1.673194063434828e-05, - "loss": 0.745, + "epoch": 0.19767687434002112, + "grad_norm": 1.9165823459625244, + "learning_rate": 1.299348390248556e-05, + "loss": 0.709, "step": 2808 }, { - "epoch": 0.21255344103514812, - "grad_norm": 2.486959934234619, - "learning_rate": 1.6730787707108895e-05, - "loss": 0.8677, + "epoch": 0.1977472720872932, + "grad_norm": 2.2062740325927734, + "learning_rate": 1.2992659158972595e-05, + "loss": 0.6871, "step": 2809 }, { - "epoch": 0.21262910975748175, - "grad_norm": 2.121563673019409, - "learning_rate": 1.6729634295742573e-05, - "loss": 0.7888, + "epoch": 0.1978176698345653, + "grad_norm": 2.1248085498809814, + "learning_rate": 1.2991834103895856e-05, + "loss": 0.8365, "step": 2810 }, { - "epoch": 0.21270477847981537, - "grad_norm": 2.618818759918213, - "learning_rate": 1.6728480400321553e-05, - "loss": 0.6763, + "epoch": 0.19788806758183738, + "grad_norm": 2.281906843185425, + "learning_rate": 1.2991008737298235e-05, + "loss": 0.7485, "step": 2811 }, { - "epoch": 0.212780447202149, - "grad_norm": 2.66679310798645, - "learning_rate": 1.6727326020918095e-05, - "loss": 0.761, + "epoch": 0.19795846532910946, + "grad_norm": 2.1689603328704834, + "learning_rate": 1.2990183059222643e-05, + "loss": 0.6495, "step": 2812 }, { - "epoch": 0.21285611592448261, - "grad_norm": 3.1060845851898193, - "learning_rate": 1.6726171157604486e-05, - "loss": 0.7265, + "epoch": 0.19802886307638157, + "grad_norm": 3.091979742050171, + "learning_rate": 1.298935706971201e-05, + "loss": 0.6978, "step": 2813 }, { - "epoch": 0.21293178464681625, - "grad_norm": 2.5589113235473633, - "learning_rate": 1.672501581045305e-05, - "loss": 0.8843, + "epoch": 0.19809926082365364, + "grad_norm": 1.8291183710098267, + "learning_rate": 1.2988530768809278e-05, + "loss": 0.8347, "step": 2814 }, { - "epoch": 0.21300745336914986, - "grad_norm": 2.34407901763916, - "learning_rate": 1.672385997953614e-05, - "loss": 0.8115, + "epoch": 0.19816965857092572, + "grad_norm": 2.44168758392334, + "learning_rate": 1.2987704156557407e-05, + "loss": 0.8155, "step": 2815 }, { - "epoch": 0.21308312209148347, - "grad_norm": 2.688868522644043, - "learning_rate": 1.6722703664926135e-05, - "loss": 0.6882, + "epoch": 0.19824005631819783, + "grad_norm": 1.8233612775802612, + "learning_rate": 1.2986877232999373e-05, + "loss": 0.7355, "step": 2816 }, { - "epoch": 0.2131587908138171, - "grad_norm": 2.3896734714508057, - "learning_rate": 1.672154686669545e-05, - "loss": 0.7876, + "epoch": 0.1983104540654699, + "grad_norm": 1.9227584600448608, + "learning_rate": 1.2986049998178172e-05, + "loss": 0.6583, "step": 2817 }, { - "epoch": 0.21323445953615072, - "grad_norm": 2.3544201850891113, - "learning_rate": 1.6720389584916525e-05, - "loss": 0.8239, + "epoch": 0.19838085181274198, + "grad_norm": 1.9398671388626099, + "learning_rate": 1.298522245213681e-05, + "loss": 0.6506, "step": 2818 }, { - "epoch": 0.21331012825848436, - "grad_norm": 2.2516539096832275, - "learning_rate": 1.671923181966183e-05, - "loss": 0.7131, + "epoch": 0.1984512495600141, + "grad_norm": 2.112593173980713, + "learning_rate": 1.2984394594918313e-05, + "loss": 0.708, "step": 2819 }, { - "epoch": 0.21338579698081797, - "grad_norm": 3.938749074935913, - "learning_rate": 1.671807357100387e-05, - "loss": 0.7547, + "epoch": 0.19852164730728616, + "grad_norm": 1.9524953365325928, + "learning_rate": 1.2983566426565719e-05, + "loss": 0.8291, "step": 2820 }, { - "epoch": 0.2134614657031516, - "grad_norm": 2.0515267848968506, - "learning_rate": 1.6716914839015185e-05, - "loss": 0.6756, + "epoch": 0.19859204505455824, + "grad_norm": 2.3048479557037354, + "learning_rate": 1.2982737947122089e-05, + "loss": 0.7372, "step": 2821 }, { - "epoch": 0.21353713442548522, - "grad_norm": 2.133115768432617, - "learning_rate": 1.6715755623768334e-05, - "loss": 0.7228, + "epoch": 0.19866244280183035, + "grad_norm": 1.8637635707855225, + "learning_rate": 1.2981909156630493e-05, + "loss": 0.717, "step": 2822 }, { - "epoch": 0.21361280314781886, - "grad_norm": 2.3022468090057373, - "learning_rate": 1.6714595925335906e-05, - "loss": 0.8373, + "epoch": 0.19873284054910242, + "grad_norm": 2.1397547721862793, + "learning_rate": 1.2981080055134025e-05, + "loss": 0.7968, "step": 2823 }, { - "epoch": 0.21368847187015247, - "grad_norm": 2.543943166732788, - "learning_rate": 1.671343574379053e-05, - "loss": 0.9197, + "epoch": 0.19880323829637453, + "grad_norm": 1.937309980392456, + "learning_rate": 1.2980250642675786e-05, + "loss": 0.6804, "step": 2824 }, { - "epoch": 0.2137641405924861, - "grad_norm": 2.1154918670654297, - "learning_rate": 1.6712275079204863e-05, - "loss": 0.9015, + "epoch": 0.1988736360436466, + "grad_norm": 2.0659267902374268, + "learning_rate": 1.2979420919298901e-05, + "loss": 0.6891, "step": 2825 }, { - "epoch": 0.21383980931481972, - "grad_norm": 2.0648181438446045, - "learning_rate": 1.671111393165158e-05, - "loss": 0.8252, + "epoch": 0.19894403379091868, + "grad_norm": 2.181600570678711, + "learning_rate": 1.2978590885046508e-05, + "loss": 0.7028, "step": 2826 }, { - "epoch": 0.21391547803715336, - "grad_norm": 2.3254873752593994, - "learning_rate": 1.6709952301203405e-05, - "loss": 0.7808, + "epoch": 0.1990144315381908, + "grad_norm": 2.405527353286743, + "learning_rate": 1.2977760539961759e-05, + "loss": 0.8053, "step": 2827 }, { - "epoch": 0.21399114675948697, - "grad_norm": 2.4721736907958984, - "learning_rate": 1.670879018793308e-05, - "loss": 0.7166, + "epoch": 0.19908482928546287, + "grad_norm": 2.1469526290893555, + "learning_rate": 1.2976929884087825e-05, + "loss": 0.6974, "step": 2828 }, { - "epoch": 0.21406681548182058, - "grad_norm": 2.7000808715820312, - "learning_rate": 1.6707627591913382e-05, - "loss": 0.8353, + "epoch": 0.19915522703273494, + "grad_norm": 2.1089656352996826, + "learning_rate": 1.2976098917467895e-05, + "loss": 0.7781, "step": 2829 }, { - "epoch": 0.21414248420415422, - "grad_norm": 2.1937551498413086, - "learning_rate": 1.6706464513217115e-05, - "loss": 0.874, + "epoch": 0.19922562478000705, + "grad_norm": 2.259326696395874, + "learning_rate": 1.297526764014517e-05, + "loss": 0.7774, "step": 2830 }, { - "epoch": 0.21421815292648783, - "grad_norm": 4.846593379974365, - "learning_rate": 1.670530095191711e-05, - "loss": 0.9048, + "epoch": 0.19929602252727913, + "grad_norm": 1.9495466947555542, + "learning_rate": 1.2974436052162866e-05, + "loss": 0.838, "step": 2831 }, { - "epoch": 0.21429382164882146, - "grad_norm": 2.571143627166748, - "learning_rate": 1.6704136908086242e-05, - "loss": 0.871, + "epoch": 0.1993664202745512, + "grad_norm": 2.1152100563049316, + "learning_rate": 1.2973604153564225e-05, + "loss": 0.756, "step": 2832 }, { - "epoch": 0.21436949037115507, - "grad_norm": 2.5679643154144287, - "learning_rate": 1.67029723817974e-05, - "loss": 0.7586, + "epoch": 0.1994368180218233, + "grad_norm": 2.0844058990478516, + "learning_rate": 1.2972771944392491e-05, + "loss": 0.8028, "step": 2833 }, { - "epoch": 0.2144451590934887, - "grad_norm": 2.327501058578491, - "learning_rate": 1.670180737312351e-05, - "loss": 0.7948, + "epoch": 0.1995072157690954, + "grad_norm": 2.151262044906616, + "learning_rate": 1.2971939424690936e-05, + "loss": 0.6658, "step": 2834 }, { - "epoch": 0.21452082781582232, - "grad_norm": 2.3081796169281006, - "learning_rate": 1.670064188213754e-05, - "loss": 0.8191, + "epoch": 0.19957761351636746, + "grad_norm": 2.0456371307373047, + "learning_rate": 1.2971106594502842e-05, + "loss": 0.7, "step": 2835 }, { - "epoch": 0.21459649653815596, - "grad_norm": 2.2802083492279053, - "learning_rate": 1.669947590891246e-05, - "loss": 0.7863, + "epoch": 0.19964801126363957, + "grad_norm": 1.9289238452911377, + "learning_rate": 1.2970273453871508e-05, + "loss": 0.8087, "step": 2836 }, { - "epoch": 0.21467216526048957, - "grad_norm": 1.9905712604522705, - "learning_rate": 1.6698309453521298e-05, - "loss": 0.6816, + "epoch": 0.19971840901091165, + "grad_norm": 2.058263063430786, + "learning_rate": 1.296944000284025e-05, + "loss": 0.7162, "step": 2837 }, { - "epoch": 0.2147478339828232, - "grad_norm": 2.6414380073547363, - "learning_rate": 1.66971425160371e-05, - "loss": 0.7644, + "epoch": 0.19978880675818375, + "grad_norm": 2.0099096298217773, + "learning_rate": 1.29686062414524e-05, + "loss": 0.6161, "step": 2838 }, { - "epoch": 0.21482350270515682, - "grad_norm": 2.325744390487671, - "learning_rate": 1.6695975096532946e-05, - "loss": 0.9109, + "epoch": 0.19985920450545583, + "grad_norm": 1.8196367025375366, + "learning_rate": 1.2967772169751306e-05, + "loss": 0.7605, "step": 2839 }, { - "epoch": 0.21489917142749046, - "grad_norm": 2.3487441539764404, - "learning_rate": 1.6694807195081934e-05, - "loss": 0.7954, + "epoch": 0.1999296022527279, + "grad_norm": 2.8683559894561768, + "learning_rate": 1.2966937787780332e-05, + "loss": 0.8063, "step": 2840 }, { - "epoch": 0.21497484014982407, - "grad_norm": 2.7436139583587646, - "learning_rate": 1.6693638811757206e-05, - "loss": 0.9525, + "epoch": 0.2, + "grad_norm": 2.2348105907440186, + "learning_rate": 1.2966103095582858e-05, + "loss": 0.6561, "step": 2841 }, { - "epoch": 0.21505050887215768, - "grad_norm": 2.626749038696289, - "learning_rate": 1.6692469946631935e-05, - "loss": 0.7477, + "epoch": 0.2000703977472721, + "grad_norm": 1.9138249158859253, + "learning_rate": 1.2965268093202281e-05, + "loss": 0.7738, "step": 2842 }, { - "epoch": 0.21512617759449132, - "grad_norm": 2.0960118770599365, - "learning_rate": 1.6691300599779314e-05, - "loss": 0.828, + "epoch": 0.20014079549454417, + "grad_norm": 2.21639084815979, + "learning_rate": 1.2964432780682014e-05, + "loss": 0.6888, "step": 2843 }, { - "epoch": 0.21520184631682493, - "grad_norm": 2.273026704788208, - "learning_rate": 1.6690130771272576e-05, - "loss": 0.6678, + "epoch": 0.20021119324181627, + "grad_norm": 1.9455065727233887, + "learning_rate": 1.2963597158065485e-05, + "loss": 0.6479, "step": 2844 }, { - "epoch": 0.21527751503915857, - "grad_norm": 1.8988648653030396, - "learning_rate": 1.6688960461184974e-05, - "loss": 0.6153, + "epoch": 0.20028159098908835, + "grad_norm": 2.2107162475585938, + "learning_rate": 1.2962761225396139e-05, + "loss": 0.872, "step": 2845 }, { - "epoch": 0.21535318376149218, - "grad_norm": 2.7627248764038086, - "learning_rate": 1.6687789669589797e-05, - "loss": 0.8108, + "epoch": 0.20035198873636043, + "grad_norm": 2.121769666671753, + "learning_rate": 1.2961924982717437e-05, + "loss": 0.7043, "step": 2846 }, { - "epoch": 0.21542885248382582, - "grad_norm": 7.99500846862793, - "learning_rate": 1.6686618396560365e-05, - "loss": 0.7978, + "epoch": 0.20042238648363253, + "grad_norm": 2.1713573932647705, + "learning_rate": 1.2961088430072853e-05, + "loss": 0.7202, "step": 2847 }, { - "epoch": 0.21550452120615943, - "grad_norm": 2.0139832496643066, - "learning_rate": 1.668544664217003e-05, - "loss": 0.8848, + "epoch": 0.2004927842309046, + "grad_norm": 2.034632682800293, + "learning_rate": 1.2960251567505886e-05, + "loss": 0.6721, "step": 2848 }, { - "epoch": 0.21558018992849307, - "grad_norm": 2.0754570960998535, - "learning_rate": 1.668427440649217e-05, - "loss": 0.8815, + "epoch": 0.20056318197817669, + "grad_norm": 1.7628371715545654, + "learning_rate": 1.2959414395060041e-05, + "loss": 0.794, "step": 2849 }, { - "epoch": 0.21565585865082668, - "grad_norm": 2.176095485687256, - "learning_rate": 1.668310168960019e-05, - "loss": 0.7272, + "epoch": 0.2006335797254488, + "grad_norm": 1.5182468891143799, + "learning_rate": 1.2958576912778844e-05, + "loss": 0.8976, "step": 2850 }, { - "epoch": 0.21573152737316031, - "grad_norm": 2.36541748046875, - "learning_rate": 1.668192849156753e-05, - "loss": 0.6364, + "epoch": 0.20070397747272087, + "grad_norm": 2.069378614425659, + "learning_rate": 1.2957739120705837e-05, + "loss": 0.7208, "step": 2851 }, { - "epoch": 0.21580719609549393, - "grad_norm": 2.3747830390930176, - "learning_rate": 1.6680754812467666e-05, - "loss": 0.8652, + "epoch": 0.20077437521999297, + "grad_norm": 2.0398383140563965, + "learning_rate": 1.2956901018884578e-05, + "loss": 0.8075, "step": 2852 }, { - "epoch": 0.21588286481782756, - "grad_norm": 2.5572152137756348, - "learning_rate": 1.667958065237409e-05, - "loss": 0.7099, + "epoch": 0.20084477296726505, + "grad_norm": 2.3953700065612793, + "learning_rate": 1.295606260735864e-05, + "loss": 0.837, "step": 2853 }, { - "epoch": 0.21595853354016117, - "grad_norm": 2.4014647006988525, - "learning_rate": 1.6678406011360337e-05, - "loss": 0.9067, + "epoch": 0.20091517071453713, + "grad_norm": 1.8547263145446777, + "learning_rate": 1.2955223886171611e-05, + "loss": 0.6763, "step": 2854 }, { - "epoch": 0.21603420226249478, - "grad_norm": 2.304295301437378, - "learning_rate": 1.6677230889499966e-05, - "loss": 0.6992, + "epoch": 0.20098556846180923, + "grad_norm": 2.385629415512085, + "learning_rate": 1.2954384855367101e-05, + "loss": 0.8897, "step": 2855 }, { - "epoch": 0.21610987098482842, - "grad_norm": 1.8173857927322388, - "learning_rate": 1.667605528686656e-05, - "loss": 1.0161, + "epoch": 0.2010559662090813, + "grad_norm": 3.1562159061431885, + "learning_rate": 1.295354551498873e-05, + "loss": 0.7942, "step": 2856 }, { - "epoch": 0.21618553970716203, - "grad_norm": 2.0855274200439453, - "learning_rate": 1.6674879203533748e-05, - "loss": 0.7699, + "epoch": 0.2011263639563534, + "grad_norm": 1.9930483102798462, + "learning_rate": 1.2952705865080135e-05, + "loss": 0.671, "step": 2857 }, { - "epoch": 0.21626120842949567, - "grad_norm": 1.9764469861984253, - "learning_rate": 1.6673702639575176e-05, - "loss": 0.8007, + "epoch": 0.2011967617036255, + "grad_norm": 1.9684098958969116, + "learning_rate": 1.2951865905684971e-05, + "loss": 0.8718, "step": 2858 }, { - "epoch": 0.21633687715182928, - "grad_norm": 2.9846057891845703, - "learning_rate": 1.6672525595064527e-05, - "loss": 0.7783, + "epoch": 0.20126715945089757, + "grad_norm": 2.0362963676452637, + "learning_rate": 1.2951025636846909e-05, + "loss": 0.7826, "step": 2859 }, { - "epoch": 0.21641254587416292, - "grad_norm": 2.2411768436431885, - "learning_rate": 1.667134807007551e-05, - "loss": 0.6131, + "epoch": 0.20133755719816965, + "grad_norm": 1.8419691324234009, + "learning_rate": 1.2950185058609634e-05, + "loss": 0.7464, "step": 2860 }, { - "epoch": 0.21648821459649653, - "grad_norm": 2.099818229675293, - "learning_rate": 1.6670170064681858e-05, - "loss": 0.7569, + "epoch": 0.20140795494544175, + "grad_norm": 3.8464763164520264, + "learning_rate": 1.2949344171016848e-05, + "loss": 0.7962, "step": 2861 }, { - "epoch": 0.21656388331883017, - "grad_norm": 2.3399593830108643, - "learning_rate": 1.6668991578957354e-05, - "loss": 0.7872, + "epoch": 0.20147835269271383, + "grad_norm": 2.2273335456848145, + "learning_rate": 1.2948502974112272e-05, + "loss": 0.7655, "step": 2862 }, { - "epoch": 0.21663955204116378, - "grad_norm": 2.527578115463257, - "learning_rate": 1.666781261297579e-05, - "loss": 0.6493, + "epoch": 0.2015487504399859, + "grad_norm": 2.1485912799835205, + "learning_rate": 1.294766146793964e-05, + "loss": 0.7673, "step": 2863 }, { - "epoch": 0.21671522076349742, - "grad_norm": 2.33992862701416, - "learning_rate": 1.6666633166811004e-05, - "loss": 0.7972, + "epoch": 0.201619148187258, + "grad_norm": 2.328144073486328, + "learning_rate": 1.29468196525427e-05, + "loss": 0.769, "step": 2864 }, { - "epoch": 0.21679088948583103, - "grad_norm": 2.1418380737304688, - "learning_rate": 1.666545324053685e-05, - "loss": 0.7548, + "epoch": 0.2016895459345301, + "grad_norm": 1.7949751615524292, + "learning_rate": 1.294597752796522e-05, + "loss": 0.7731, "step": 2865 }, { - "epoch": 0.21686655820816467, - "grad_norm": 2.6442832946777344, - "learning_rate": 1.6664272834227218e-05, - "loss": 0.8697, + "epoch": 0.2017599436818022, + "grad_norm": 2.0370733737945557, + "learning_rate": 1.2945135094250984e-05, + "loss": 0.7306, "step": 2866 }, { - "epoch": 0.21694222693049828, - "grad_norm": 2.5152578353881836, - "learning_rate": 1.666309194795603e-05, - "loss": 0.9075, + "epoch": 0.20183034142907427, + "grad_norm": 3.0403120517730713, + "learning_rate": 1.294429235144379e-05, + "loss": 0.8582, "step": 2867 }, { - "epoch": 0.2170178956528319, - "grad_norm": 2.834080696105957, - "learning_rate": 1.6661910581797246e-05, - "loss": 0.7793, + "epoch": 0.20190073917634635, + "grad_norm": 1.4900261163711548, + "learning_rate": 1.2943449299587454e-05, + "loss": 0.8521, "step": 2868 }, { - "epoch": 0.21709356437516553, - "grad_norm": 3.0419914722442627, - "learning_rate": 1.6660728735824834e-05, - "loss": 0.7984, + "epoch": 0.20197113692361846, + "grad_norm": 2.114074945449829, + "learning_rate": 1.2942605938725806e-05, + "loss": 0.8275, "step": 2869 }, { - "epoch": 0.21716923309749914, - "grad_norm": 2.7962558269500732, - "learning_rate": 1.6659546410112815e-05, - "loss": 0.9006, + "epoch": 0.20204153467089053, + "grad_norm": 2.214066743850708, + "learning_rate": 1.2941762268902694e-05, + "loss": 0.7005, "step": 2870 }, { - "epoch": 0.21724490181983278, - "grad_norm": 2.511221408843994, - "learning_rate": 1.6658363604735224e-05, - "loss": 0.8622, + "epoch": 0.2021119324181626, + "grad_norm": 1.9896901845932007, + "learning_rate": 1.2940918290161977e-05, + "loss": 0.7359, "step": 2871 }, { - "epoch": 0.21732057054216639, - "grad_norm": 2.652181386947632, - "learning_rate": 1.6657180319766134e-05, - "loss": 0.866, + "epoch": 0.20218233016543471, + "grad_norm": 2.5028276443481445, + "learning_rate": 1.2940074002547542e-05, + "loss": 0.722, "step": 2872 }, { - "epoch": 0.21739623926450002, - "grad_norm": 2.3145172595977783, - "learning_rate": 1.6655996555279645e-05, - "loss": 0.8523, + "epoch": 0.2022527279127068, + "grad_norm": 1.8832263946533203, + "learning_rate": 1.2939229406103278e-05, + "loss": 0.7537, "step": 2873 }, { - "epoch": 0.21747190798683363, - "grad_norm": 2.5612776279449463, - "learning_rate": 1.665481231134989e-05, - "loss": 0.8403, + "epoch": 0.20232312565997887, + "grad_norm": 2.0421197414398193, + "learning_rate": 1.2938384500873098e-05, + "loss": 0.7412, "step": 2874 }, { - "epoch": 0.21754757670916727, - "grad_norm": 1.8979072570800781, - "learning_rate": 1.665362758805103e-05, - "loss": 0.679, + "epoch": 0.20239352340725097, + "grad_norm": 1.8767123222351074, + "learning_rate": 1.293753928690093e-05, + "loss": 0.6927, "step": 2875 }, { - "epoch": 0.21762324543150088, - "grad_norm": 2.4776549339294434, - "learning_rate": 1.6652442385457255e-05, - "loss": 0.9507, + "epoch": 0.20246392115452305, + "grad_norm": 5.04095458984375, + "learning_rate": 1.2936693764230717e-05, + "loss": 0.6829, "step": 2876 }, { - "epoch": 0.21769891415383452, - "grad_norm": 3.0830917358398438, - "learning_rate": 1.6651256703642786e-05, - "loss": 0.7769, + "epoch": 0.20253431890179513, + "grad_norm": 2.0795814990997314, + "learning_rate": 1.2935847932906419e-05, + "loss": 0.7961, "step": 2877 }, { - "epoch": 0.21777458287616813, - "grad_norm": 2.253445625305176, - "learning_rate": 1.6650070542681876e-05, - "loss": 0.7806, + "epoch": 0.20260471664906723, + "grad_norm": 2.2549116611480713, + "learning_rate": 1.2935001792972011e-05, + "loss": 0.6345, "step": 2878 }, { - "epoch": 0.21785025159850177, - "grad_norm": 2.626587152481079, - "learning_rate": 1.6648883902648805e-05, - "loss": 0.784, + "epoch": 0.2026751143963393, + "grad_norm": 1.8898028135299683, + "learning_rate": 1.2934155344471485e-05, + "loss": 0.7062, "step": 2879 }, { - "epoch": 0.21792592032083538, - "grad_norm": 2.3169028759002686, - "learning_rate": 1.6647696783617887e-05, - "loss": 0.594, + "epoch": 0.20274551214361142, + "grad_norm": 2.3344531059265137, + "learning_rate": 1.2933308587448847e-05, + "loss": 0.7624, "step": 2880 }, { - "epoch": 0.21800158904316902, - "grad_norm": 2.367800712585449, - "learning_rate": 1.6646509185663458e-05, - "loss": 0.8395, + "epoch": 0.2028159098908835, + "grad_norm": 1.9059360027313232, + "learning_rate": 1.2932461521948125e-05, + "loss": 0.6858, "step": 2881 }, { - "epoch": 0.21807725776550263, - "grad_norm": 2.1728861331939697, - "learning_rate": 1.6645321108859894e-05, - "loss": 0.8088, + "epoch": 0.20288630763815557, + "grad_norm": 2.1258914470672607, + "learning_rate": 1.2931614148013353e-05, + "loss": 0.7384, "step": 2882 }, { - "epoch": 0.21815292648783624, - "grad_norm": 2.3152570724487305, - "learning_rate": 1.6644132553281592e-05, - "loss": 0.6268, + "epoch": 0.20295670538542768, + "grad_norm": 2.2573955059051514, + "learning_rate": 1.2930766465688591e-05, + "loss": 0.7864, "step": 2883 }, { - "epoch": 0.21822859521016988, - "grad_norm": 4.68181037902832, - "learning_rate": 1.6642943519002983e-05, - "loss": 0.8115, + "epoch": 0.20302710313269975, + "grad_norm": 1.8230419158935547, + "learning_rate": 1.2929918475017909e-05, + "loss": 0.7338, "step": 2884 }, { - "epoch": 0.2183042639325035, - "grad_norm": 2.9083902835845947, - "learning_rate": 1.6641754006098537e-05, - "loss": 0.9005, + "epoch": 0.20309750087997183, + "grad_norm": 2.21211838722229, + "learning_rate": 1.2929070176045392e-05, + "loss": 0.8074, "step": 2885 }, { - "epoch": 0.21837993265483713, - "grad_norm": 2.559480667114258, - "learning_rate": 1.6640564014642732e-05, - "loss": 0.7178, + "epoch": 0.20316789862724394, + "grad_norm": 2.009521007537842, + "learning_rate": 1.292822156881515e-05, + "loss": 0.7239, "step": 2886 }, { - "epoch": 0.21845560137717074, - "grad_norm": 2.6691970825195312, - "learning_rate": 1.66393735447101e-05, - "loss": 0.6153, + "epoch": 0.20323829637451601, + "grad_norm": 5.319012641906738, + "learning_rate": 1.29273726533713e-05, + "loss": 0.9075, "step": 2887 }, { - "epoch": 0.21853127009950438, - "grad_norm": 2.705824851989746, - "learning_rate": 1.663818259637519e-05, - "loss": 0.9264, + "epoch": 0.2033086941217881, + "grad_norm": 1.7754051685333252, + "learning_rate": 1.2926523429757975e-05, + "loss": 0.851, "step": 2888 }, { - "epoch": 0.218606938821838, - "grad_norm": 2.3913021087646484, - "learning_rate": 1.6636991169712577e-05, - "loss": 0.6872, + "epoch": 0.2033790918690602, + "grad_norm": 2.138119697570801, + "learning_rate": 1.2925673898019333e-05, + "loss": 0.7645, "step": 2889 }, { - "epoch": 0.21868260754417163, - "grad_norm": 2.4797730445861816, - "learning_rate": 1.6635799264796877e-05, - "loss": 0.8335, + "epoch": 0.20344948961633227, + "grad_norm": 2.1004340648651123, + "learning_rate": 1.2924824058199537e-05, + "loss": 0.7091, "step": 2890 }, { - "epoch": 0.21875827626650524, - "grad_norm": 2.379905939102173, - "learning_rate": 1.663460688170273e-05, - "loss": 0.8438, + "epoch": 0.20351988736360435, + "grad_norm": 2.3534152507781982, + "learning_rate": 1.2923973910342771e-05, + "loss": 0.7933, "step": 2891 }, { - "epoch": 0.21883394498883887, - "grad_norm": 2.6139721870422363, - "learning_rate": 1.6633414020504805e-05, - "loss": 0.7048, + "epoch": 0.20359028511087646, + "grad_norm": 1.8388102054595947, + "learning_rate": 1.2923123454493237e-05, + "loss": 0.7564, "step": 2892 }, { - "epoch": 0.21890961371117248, - "grad_norm": 2.9872958660125732, - "learning_rate": 1.6632220681277806e-05, - "loss": 0.7684, + "epoch": 0.20366068285814853, + "grad_norm": 1.6807938814163208, + "learning_rate": 1.2922272690695151e-05, + "loss": 0.6988, "step": 2893 }, { - "epoch": 0.21898528243350612, - "grad_norm": 1.7575455904006958, - "learning_rate": 1.6631026864096465e-05, - "loss": 0.9336, + "epoch": 0.20373108060542064, + "grad_norm": 2.025575876235962, + "learning_rate": 1.2921421618992744e-05, + "loss": 0.8812, "step": 2894 }, { - "epoch": 0.21906095115583973, - "grad_norm": 2.184025287628174, - "learning_rate": 1.6629832569035537e-05, - "loss": 0.7969, + "epoch": 0.20380147835269272, + "grad_norm": 2.384500503540039, + "learning_rate": 1.2920570239430264e-05, + "loss": 0.8095, "step": 2895 }, { - "epoch": 0.21913661987817334, - "grad_norm": 2.2860093116760254, - "learning_rate": 1.6628637796169815e-05, - "loss": 0.7119, + "epoch": 0.2038718760999648, + "grad_norm": 1.9504368305206299, + "learning_rate": 1.2919718552051975e-05, + "loss": 0.7702, "step": 2896 }, { - "epoch": 0.21921228860050698, - "grad_norm": 2.7359209060668945, - "learning_rate": 1.6627442545574122e-05, - "loss": 0.696, + "epoch": 0.2039422738472369, + "grad_norm": 2.3613922595977783, + "learning_rate": 1.2918866556902158e-05, + "loss": 0.7966, "step": 2897 }, { - "epoch": 0.2192879573228406, - "grad_norm": 2.2823524475097656, - "learning_rate": 1.6626246817323307e-05, - "loss": 0.7375, + "epoch": 0.20401267159450898, + "grad_norm": 2.1860833168029785, + "learning_rate": 1.291801425402511e-05, + "loss": 0.6658, "step": 2898 }, { - "epoch": 0.21936362604517423, - "grad_norm": 2.211632490158081, - "learning_rate": 1.6625050611492246e-05, - "loss": 0.8292, + "epoch": 0.20408306934178105, + "grad_norm": 1.856161117553711, + "learning_rate": 1.2917161643465138e-05, + "loss": 0.8094, "step": 2899 }, { - "epoch": 0.21943929476750784, - "grad_norm": 2.5959115028381348, - "learning_rate": 1.6623853928155857e-05, - "loss": 0.9269, + "epoch": 0.20415346708905316, + "grad_norm": 2.78106427192688, + "learning_rate": 1.2916308725266575e-05, + "loss": 0.7148, "step": 2900 }, { - "epoch": 0.21951496348984148, - "grad_norm": 2.787301778793335, - "learning_rate": 1.6622656767389077e-05, - "loss": 0.8806, + "epoch": 0.20422386483632524, + "grad_norm": 1.8995968103408813, + "learning_rate": 1.2915455499473762e-05, + "loss": 0.8199, "step": 2901 }, { - "epoch": 0.2195906322121751, - "grad_norm": 2.558061361312866, - "learning_rate": 1.6621459129266875e-05, - "loss": 0.8496, + "epoch": 0.2042942625835973, + "grad_norm": 2.266082525253296, + "learning_rate": 1.2914601966131061e-05, + "loss": 0.8534, "step": 2902 }, { - "epoch": 0.21966630093450873, - "grad_norm": 2.832548141479492, - "learning_rate": 1.662026101386425e-05, - "loss": 0.9092, + "epoch": 0.20436466033086942, + "grad_norm": 2.6306707859039307, + "learning_rate": 1.2913748125282849e-05, + "loss": 0.8553, "step": 2903 }, { - "epoch": 0.21974196965684234, - "grad_norm": 2.5366275310516357, - "learning_rate": 1.6619062421256235e-05, - "loss": 0.7832, + "epoch": 0.2044350580781415, + "grad_norm": 1.8121219873428345, + "learning_rate": 1.2912893976973514e-05, + "loss": 0.6494, "step": 2904 }, { - "epoch": 0.21981763837917598, - "grad_norm": 2.213609218597412, - "learning_rate": 1.6617863351517885e-05, - "loss": 0.9086, + "epoch": 0.20450545582541357, + "grad_norm": 3.189236879348755, + "learning_rate": 1.2912039521247465e-05, + "loss": 0.766, "step": 2905 }, { - "epoch": 0.2198933071015096, - "grad_norm": 2.412593126296997, - "learning_rate": 1.6616663804724297e-05, - "loss": 0.8328, + "epoch": 0.20457585357268568, + "grad_norm": 2.2701783180236816, + "learning_rate": 1.2911184758149127e-05, + "loss": 0.6702, "step": 2906 }, { - "epoch": 0.21996897582384323, - "grad_norm": 2.010378122329712, - "learning_rate": 1.6615463780950583e-05, - "loss": 0.8728, + "epoch": 0.20464625131995776, + "grad_norm": 2.1670026779174805, + "learning_rate": 1.2910329687722943e-05, + "loss": 0.7257, "step": 2907 }, { - "epoch": 0.22004464454617684, - "grad_norm": 2.071139335632324, - "learning_rate": 1.66142632802719e-05, - "loss": 0.6447, + "epoch": 0.20471664906722986, + "grad_norm": 1.777356505393982, + "learning_rate": 1.2909474310013364e-05, + "loss": 0.7617, "step": 2908 }, { - "epoch": 0.22012031326851045, - "grad_norm": 2.2854831218719482, - "learning_rate": 1.6613062302763417e-05, - "loss": 0.7125, + "epoch": 0.20478704681450194, + "grad_norm": 1.8031384944915771, + "learning_rate": 1.2908618625064865e-05, + "loss": 0.777, "step": 2909 }, { - "epoch": 0.2201959819908441, - "grad_norm": 2.3209803104400635, - "learning_rate": 1.6611860848500354e-05, - "loss": 0.7449, + "epoch": 0.20485744456177402, + "grad_norm": 2.091069221496582, + "learning_rate": 1.2907762632921932e-05, + "loss": 0.7017, "step": 2910 }, { - "epoch": 0.2202716507131777, - "grad_norm": 2.6688926219940186, - "learning_rate": 1.6610658917557942e-05, - "loss": 0.7961, + "epoch": 0.20492784230904612, + "grad_norm": 1.982006549835205, + "learning_rate": 1.2906906333629068e-05, + "loss": 0.7227, "step": 2911 }, { - "epoch": 0.22034731943551134, - "grad_norm": 2.9156992435455322, - "learning_rate": 1.6609456510011454e-05, - "loss": 0.7054, + "epoch": 0.2049982400563182, + "grad_norm": 2.096505880355835, + "learning_rate": 1.2906049727230795e-05, + "loss": 0.7961, "step": 2912 }, { - "epoch": 0.22042298815784495, - "grad_norm": 2.726867437362671, - "learning_rate": 1.6608253625936185e-05, - "loss": 0.9102, + "epoch": 0.20506863780359028, + "grad_norm": 1.738042950630188, + "learning_rate": 1.2905192813771648e-05, + "loss": 0.7264, "step": 2913 }, { - "epoch": 0.22049865688017858, - "grad_norm": 2.408320903778076, - "learning_rate": 1.6607050265407473e-05, - "loss": 0.6769, + "epoch": 0.20513903555086238, + "grad_norm": 2.8099374771118164, + "learning_rate": 1.2904335593296176e-05, + "loss": 0.8555, "step": 2914 }, { - "epoch": 0.2205743256025122, - "grad_norm": 2.9164679050445557, - "learning_rate": 1.660584642850066e-05, - "loss": 0.8634, + "epoch": 0.20520943329813446, + "grad_norm": 2.2637176513671875, + "learning_rate": 1.2903478065848955e-05, + "loss": 0.8351, "step": 2915 }, { - "epoch": 0.22064999432484583, - "grad_norm": 2.25538969039917, - "learning_rate": 1.660464211529115e-05, - "loss": 0.6191, + "epoch": 0.20527983104540654, + "grad_norm": 2.0306296348571777, + "learning_rate": 1.2902620231474559e-05, + "loss": 0.7509, "step": 2916 }, { - "epoch": 0.22072566304717944, - "grad_norm": 2.494601249694824, - "learning_rate": 1.660343732585435e-05, - "loss": 0.8541, + "epoch": 0.20535022879267864, + "grad_norm": 2.6248860359191895, + "learning_rate": 1.2901762090217591e-05, + "loss": 0.8289, "step": 2917 }, { - "epoch": 0.22080133176951308, - "grad_norm": 2.4575066566467285, - "learning_rate": 1.6602232060265712e-05, - "loss": 0.7013, + "epoch": 0.20542062653995072, + "grad_norm": 2.2198662757873535, + "learning_rate": 1.2900903642122667e-05, + "loss": 0.7655, "step": 2918 }, { - "epoch": 0.2208770004918467, - "grad_norm": 2.584141731262207, - "learning_rate": 1.660102631860072e-05, - "loss": 0.8288, + "epoch": 0.2054910242872228, + "grad_norm": 1.9832063913345337, + "learning_rate": 1.290004488723442e-05, + "loss": 0.6913, "step": 2919 }, { - "epoch": 0.22095266921418033, - "grad_norm": 2.5871658325195312, - "learning_rate": 1.659982010093487e-05, - "loss": 0.8565, + "epoch": 0.2055614220344949, + "grad_norm": 2.3485302925109863, + "learning_rate": 1.2899185825597493e-05, + "loss": 0.8328, "step": 2920 }, { - "epoch": 0.22102833793651394, - "grad_norm": 3.7710883617401123, - "learning_rate": 1.6598613407343707e-05, - "loss": 0.9081, + "epoch": 0.20563181978176698, + "grad_norm": 2.24904203414917, + "learning_rate": 1.2898326457256557e-05, + "loss": 0.7077, "step": 2921 }, { - "epoch": 0.22110400665884755, - "grad_norm": 2.9032387733459473, - "learning_rate": 1.659740623790279e-05, - "loss": 0.8016, + "epoch": 0.20570221752903908, + "grad_norm": 2.032475471496582, + "learning_rate": 1.2897466782256282e-05, + "loss": 0.8003, "step": 2922 }, { - "epoch": 0.2211796753811812, - "grad_norm": 2.4792866706848145, - "learning_rate": 1.6596198592687727e-05, - "loss": 0.6606, + "epoch": 0.20577261527631116, + "grad_norm": 2.842956304550171, + "learning_rate": 1.2896606800641372e-05, + "loss": 0.7039, "step": 2923 }, { - "epoch": 0.2212553441035148, - "grad_norm": 2.214895486831665, - "learning_rate": 1.6594990471774135e-05, - "loss": 0.7439, + "epoch": 0.20584301302358324, + "grad_norm": 2.5498368740081787, + "learning_rate": 1.2895746512456532e-05, + "loss": 0.7233, "step": 2924 }, { - "epoch": 0.22133101282584844, - "grad_norm": 2.056378126144409, - "learning_rate": 1.659378187523768e-05, - "loss": 0.794, + "epoch": 0.20591341077085534, + "grad_norm": 1.9273475408554077, + "learning_rate": 1.2894885917746491e-05, + "loss": 0.7536, "step": 2925 }, { - "epoch": 0.22140668154818205, - "grad_norm": 2.1232733726501465, - "learning_rate": 1.659257280315404e-05, - "loss": 0.7306, + "epoch": 0.20598380851812742, + "grad_norm": 2.493891716003418, + "learning_rate": 1.2894025016555995e-05, + "loss": 0.7637, "step": 2926 }, { - "epoch": 0.2214823502705157, - "grad_norm": 2.4625091552734375, - "learning_rate": 1.659136325559893e-05, - "loss": 0.8396, + "epoch": 0.2060542062653995, + "grad_norm": 2.4803435802459717, + "learning_rate": 1.2893163808929798e-05, + "loss": 0.6384, "step": 2927 }, { - "epoch": 0.2215580189928493, - "grad_norm": 2.7897610664367676, - "learning_rate": 1.6590153232648106e-05, - "loss": 0.8533, + "epoch": 0.2061246040126716, + "grad_norm": 2.988567352294922, + "learning_rate": 1.2892302294912678e-05, + "loss": 0.7345, "step": 2928 }, { - "epoch": 0.22163368771518294, - "grad_norm": 2.2553727626800537, - "learning_rate": 1.6588942734377333e-05, - "loss": 0.6447, + "epoch": 0.20619500175994368, + "grad_norm": 2.2329914569854736, + "learning_rate": 1.2891440474549424e-05, + "loss": 0.7217, "step": 2929 }, { - "epoch": 0.22170935643751655, - "grad_norm": 2.372699737548828, - "learning_rate": 1.658773176086242e-05, - "loss": 0.7301, + "epoch": 0.20626539950721576, + "grad_norm": 3.084268569946289, + "learning_rate": 1.2890578347884846e-05, + "loss": 0.8701, "step": 2930 }, { - "epoch": 0.22178502515985019, - "grad_norm": 2.075169086456299, - "learning_rate": 1.6586520312179203e-05, - "loss": 0.7797, + "epoch": 0.20633579725448786, + "grad_norm": 2.3218142986297607, + "learning_rate": 1.2889715914963762e-05, + "loss": 0.8188, "step": 2931 }, { - "epoch": 0.2218606938821838, - "grad_norm": 1.9998425245285034, - "learning_rate": 1.658530838840355e-05, - "loss": 0.8402, + "epoch": 0.20640619500175994, + "grad_norm": 2.604203224182129, + "learning_rate": 1.2888853175831017e-05, + "loss": 0.8558, "step": 2932 }, { - "epoch": 0.22193636260451743, - "grad_norm": 2.4670629501342773, - "learning_rate": 1.658409598961135e-05, - "loss": 0.7589, + "epoch": 0.20647659274903202, + "grad_norm": 1.9492626190185547, + "learning_rate": 1.288799013053146e-05, + "loss": 0.686, "step": 2933 }, { - "epoch": 0.22201203132685104, - "grad_norm": 1.9075826406478882, - "learning_rate": 1.6582883115878526e-05, - "loss": 0.7386, + "epoch": 0.20654699049630412, + "grad_norm": 2.570934534072876, + "learning_rate": 1.2887126779109965e-05, + "loss": 0.771, "step": 2934 }, { - "epoch": 0.22208770004918468, - "grad_norm": 2.3455803394317627, - "learning_rate": 1.6581669767281037e-05, - "loss": 0.8606, + "epoch": 0.2066173882435762, + "grad_norm": 2.85846209526062, + "learning_rate": 1.2886263121611416e-05, + "loss": 0.8736, "step": 2935 }, { - "epoch": 0.2221633687715183, - "grad_norm": 3.289113998413086, - "learning_rate": 1.6580455943894866e-05, - "loss": 0.5393, + "epoch": 0.2066877859908483, + "grad_norm": 3.319136619567871, + "learning_rate": 1.2885399158080718e-05, + "loss": 0.8952, "step": 2936 }, { - "epoch": 0.2222390374938519, - "grad_norm": 1.9734731912612915, - "learning_rate": 1.6579241645796026e-05, - "loss": 0.8515, + "epoch": 0.20675818373812038, + "grad_norm": 1.875622034072876, + "learning_rate": 1.2884534888562786e-05, + "loss": 0.6846, "step": 2937 }, { - "epoch": 0.22231470621618554, - "grad_norm": 2.288149118423462, - "learning_rate": 1.6578026873060556e-05, - "loss": 0.7706, + "epoch": 0.20682858148539246, + "grad_norm": 2.0086350440979004, + "learning_rate": 1.2883670313102557e-05, + "loss": 0.7173, "step": 2938 }, { - "epoch": 0.22239037493851915, - "grad_norm": 2.2239696979522705, - "learning_rate": 1.6576811625764537e-05, - "loss": 0.882, + "epoch": 0.20689897923266456, + "grad_norm": 1.9741716384887695, + "learning_rate": 1.2882805431744979e-05, + "loss": 0.83, "step": 2939 }, { - "epoch": 0.2224660436608528, - "grad_norm": 2.2956271171569824, - "learning_rate": 1.6575595903984065e-05, - "loss": 0.704, + "epoch": 0.20696937697993664, + "grad_norm": 1.8565759658813477, + "learning_rate": 1.288194024453502e-05, + "loss": 0.728, "step": 2940 }, { - "epoch": 0.2225417123831864, - "grad_norm": 2.0809905529022217, - "learning_rate": 1.6574379707795277e-05, - "loss": 1.0292, + "epoch": 0.20703977472720872, + "grad_norm": 2.306332588195801, + "learning_rate": 1.2881074751517658e-05, + "loss": 0.6522, "step": 2941 }, { - "epoch": 0.22261738110552004, - "grad_norm": 2.3830835819244385, - "learning_rate": 1.6573163037274333e-05, - "loss": 0.6888, + "epoch": 0.20711017247448082, + "grad_norm": 2.625253200531006, + "learning_rate": 1.2880208952737896e-05, + "loss": 0.7052, "step": 2942 }, { - "epoch": 0.22269304982785365, - "grad_norm": 2.550462007522583, - "learning_rate": 1.6571945892497423e-05, - "loss": 0.7566, + "epoch": 0.2071805702217529, + "grad_norm": 2.0825912952423096, + "learning_rate": 1.2879342848240744e-05, + "loss": 0.7516, "step": 2943 }, { - "epoch": 0.2227687185501873, - "grad_norm": 2.1050031185150146, - "learning_rate": 1.6570728273540773e-05, - "loss": 0.7667, + "epoch": 0.20725096796902498, + "grad_norm": 1.792840838432312, + "learning_rate": 1.2878476438071232e-05, + "loss": 0.7393, "step": 2944 }, { - "epoch": 0.2228443872725209, - "grad_norm": 3.8642396926879883, - "learning_rate": 1.6569510180480632e-05, - "loss": 0.7821, + "epoch": 0.20732136571629708, + "grad_norm": 4.916806221008301, + "learning_rate": 1.2877609722274408e-05, + "loss": 0.8064, "step": 2945 }, { - "epoch": 0.22292005599485454, - "grad_norm": 2.2704646587371826, - "learning_rate": 1.656829161339328e-05, - "loss": 0.8253, + "epoch": 0.20739176346356916, + "grad_norm": 2.1589739322662354, + "learning_rate": 1.287674270089533e-05, + "loss": 0.7475, "step": 2946 }, { - "epoch": 0.22299572471718815, - "grad_norm": 4.2045111656188965, - "learning_rate": 1.6567072572355026e-05, - "loss": 0.7635, + "epoch": 0.20746216121084124, + "grad_norm": 3.2354280948638916, + "learning_rate": 1.2875875373979076e-05, + "loss": 0.6974, "step": 2947 }, { - "epoch": 0.2230713934395218, - "grad_norm": 2.0793330669403076, - "learning_rate": 1.656585305744222e-05, - "loss": 0.8678, + "epoch": 0.20753255895811334, + "grad_norm": 2.1476645469665527, + "learning_rate": 1.2875007741570744e-05, + "loss": 0.7717, "step": 2948 }, { - "epoch": 0.2231470621618554, - "grad_norm": 2.1388299465179443, - "learning_rate": 1.6564633068731215e-05, - "loss": 0.7312, + "epoch": 0.20760295670538542, + "grad_norm": 4.201458930969238, + "learning_rate": 1.2874139803715434e-05, + "loss": 0.7375, "step": 2949 }, { - "epoch": 0.223222730884189, - "grad_norm": 2.120715856552124, - "learning_rate": 1.6563412606298426e-05, - "loss": 0.7689, + "epoch": 0.20767335445265753, + "grad_norm": 2.081859588623047, + "learning_rate": 1.2873271560458277e-05, + "loss": 0.8557, "step": 2950 }, { - "epoch": 0.22329839960652265, - "grad_norm": 2.12400484085083, - "learning_rate": 1.6562191670220272e-05, - "loss": 0.6913, + "epoch": 0.2077437521999296, + "grad_norm": 1.77464759349823, + "learning_rate": 1.2872403011844415e-05, + "loss": 0.7297, "step": 2951 }, { - "epoch": 0.22337406832885626, - "grad_norm": 2.987435817718506, - "learning_rate": 1.656097026057322e-05, - "loss": 0.7776, + "epoch": 0.20781414994720168, + "grad_norm": 2.1298911571502686, + "learning_rate": 1.2871534157918998e-05, + "loss": 0.6886, "step": 2952 }, { - "epoch": 0.2234497370511899, - "grad_norm": 2.098076105117798, - "learning_rate": 1.6559748377433756e-05, - "loss": 0.66, + "epoch": 0.2078845476944738, + "grad_norm": 2.9709126949310303, + "learning_rate": 1.2870664998727204e-05, + "loss": 0.694, "step": 2953 }, { - "epoch": 0.2235254057735235, - "grad_norm": 2.531951904296875, - "learning_rate": 1.6558526020878395e-05, - "loss": 0.8598, + "epoch": 0.20795494544174586, + "grad_norm": 2.084395170211792, + "learning_rate": 1.2869795534314222e-05, + "loss": 0.7137, "step": 2954 }, { - "epoch": 0.22360107449585714, - "grad_norm": 2.2897439002990723, - "learning_rate": 1.655730319098369e-05, - "loss": 0.8132, + "epoch": 0.20802534318901794, + "grad_norm": 2.20050048828125, + "learning_rate": 1.2868925764725251e-05, + "loss": 0.7864, "step": 2955 }, { - "epoch": 0.22367674321819075, - "grad_norm": 2.279578924179077, - "learning_rate": 1.6556079887826215e-05, - "loss": 0.7632, + "epoch": 0.20809574093629005, + "grad_norm": 2.1273438930511475, + "learning_rate": 1.2868055690005517e-05, + "loss": 0.7795, "step": 2956 }, { - "epoch": 0.2237524119405244, - "grad_norm": 2.306779623031616, - "learning_rate": 1.6554856111482576e-05, - "loss": 0.8243, + "epoch": 0.20816613868356212, + "grad_norm": 1.9609476327896118, + "learning_rate": 1.286718531020025e-05, + "loss": 0.6847, "step": 2957 }, { - "epoch": 0.223828080662858, - "grad_norm": 2.461686849594116, - "learning_rate": 1.6553631862029413e-05, - "loss": 0.7213, + "epoch": 0.2082365364308342, + "grad_norm": 2.0419363975524902, + "learning_rate": 1.286631462535471e-05, + "loss": 0.651, "step": 2958 }, { - "epoch": 0.22390374938519164, - "grad_norm": 2.0661559104919434, - "learning_rate": 1.6552407139543393e-05, - "loss": 0.8158, + "epoch": 0.2083069341781063, + "grad_norm": 2.424896478652954, + "learning_rate": 1.2865443635514155e-05, + "loss": 0.7778, "step": 2959 }, { - "epoch": 0.22397941810752525, - "grad_norm": 2.2545764446258545, - "learning_rate": 1.655118194410121e-05, - "loss": 0.7361, + "epoch": 0.20837733192537838, + "grad_norm": 2.2687911987304688, + "learning_rate": 1.2864572340723876e-05, + "loss": 0.7618, "step": 2960 }, { - "epoch": 0.2240550868298589, - "grad_norm": 2.0152387619018555, - "learning_rate": 1.6549956275779588e-05, - "loss": 0.7121, + "epoch": 0.20844772967265046, + "grad_norm": 1.8922141790390015, + "learning_rate": 1.286370074102917e-05, + "loss": 0.9435, "step": 2961 }, { - "epoch": 0.2241307555521925, - "grad_norm": 2.965620756149292, - "learning_rate": 1.6548730134655286e-05, - "loss": 0.6965, + "epoch": 0.20851812741992257, + "grad_norm": 2.1771111488342285, + "learning_rate": 1.286282883647535e-05, + "loss": 0.7413, "step": 2962 }, { - "epoch": 0.2242064242745261, - "grad_norm": 2.6358821392059326, - "learning_rate": 1.6547503520805087e-05, - "loss": 0.7382, + "epoch": 0.20858852516719464, + "grad_norm": 6.954104423522949, + "learning_rate": 1.286195662710775e-05, + "loss": 0.776, "step": 2963 }, { - "epoch": 0.22428209299685975, - "grad_norm": 3.588127613067627, - "learning_rate": 1.6546276434305805e-05, - "loss": 0.868, + "epoch": 0.20865892291446675, + "grad_norm": 3.183263063430786, + "learning_rate": 1.2861084112971717e-05, + "loss": 0.8058, "step": 2964 }, { - "epoch": 0.22435776171919336, - "grad_norm": 2.1781110763549805, - "learning_rate": 1.654504887523429e-05, - "loss": 0.829, + "epoch": 0.20872932066173883, + "grad_norm": 3.5537304878234863, + "learning_rate": 1.2860211294112612e-05, + "loss": 0.8838, "step": 2965 }, { - "epoch": 0.224433430441527, - "grad_norm": 2.047546863555908, - "learning_rate": 1.6543820843667405e-05, - "loss": 0.6987, + "epoch": 0.2087997184090109, + "grad_norm": 2.1024482250213623, + "learning_rate": 1.2859338170575815e-05, + "loss": 0.7075, "step": 2966 }, { - "epoch": 0.2245090991638606, - "grad_norm": 2.5507969856262207, - "learning_rate": 1.654259233968206e-05, - "loss": 0.6387, + "epoch": 0.208870116156283, + "grad_norm": 1.9769837856292725, + "learning_rate": 1.285846474240672e-05, + "loss": 0.8607, "step": 2967 }, { - "epoch": 0.22458476788619425, - "grad_norm": 2.1714725494384766, - "learning_rate": 1.654136336335519e-05, - "loss": 0.8598, + "epoch": 0.2089405139035551, + "grad_norm": 1.9503802061080933, + "learning_rate": 1.2857591009650735e-05, + "loss": 0.7236, "step": 2968 }, { - "epoch": 0.22466043660852786, - "grad_norm": 2.6356706619262695, - "learning_rate": 1.654013391476375e-05, - "loss": 0.8211, + "epoch": 0.20901091165082716, + "grad_norm": 2.055708646774292, + "learning_rate": 1.2856716972353292e-05, + "loss": 0.7197, "step": 2969 }, { - "epoch": 0.2247361053308615, - "grad_norm": 2.035926342010498, - "learning_rate": 1.653890399398474e-05, - "loss": 0.6791, + "epoch": 0.20908130939809927, + "grad_norm": 2.288564682006836, + "learning_rate": 1.2855842630559827e-05, + "loss": 0.5981, "step": 2970 }, { - "epoch": 0.2248117740531951, - "grad_norm": 2.380887508392334, - "learning_rate": 1.6537673601095178e-05, - "loss": 0.8578, + "epoch": 0.20915170714537135, + "grad_norm": 2.139580011367798, + "learning_rate": 1.2854967984315801e-05, + "loss": 0.7552, "step": 2971 }, { - "epoch": 0.22488744277552875, - "grad_norm": 2.41847562789917, - "learning_rate": 1.6536442736172114e-05, - "loss": 0.8885, + "epoch": 0.20922210489264342, + "grad_norm": 1.9567166566848755, + "learning_rate": 1.2854093033666689e-05, + "loss": 0.673, "step": 2972 }, { - "epoch": 0.22496311149786236, - "grad_norm": 2.5144875049591064, - "learning_rate": 1.653521139929263e-05, - "loss": 0.8791, + "epoch": 0.20929250263991553, + "grad_norm": 2.0222623348236084, + "learning_rate": 1.2853217778657975e-05, + "loss": 0.7723, "step": 2973 }, { - "epoch": 0.225038780220196, - "grad_norm": 2.021480083465576, - "learning_rate": 1.6533979590533838e-05, - "loss": 0.8099, + "epoch": 0.2093629003871876, + "grad_norm": 2.0530974864959717, + "learning_rate": 1.2852342219335169e-05, + "loss": 0.6352, "step": 2974 }, { - "epoch": 0.2251144489425296, - "grad_norm": 2.1968541145324707, - "learning_rate": 1.6532747309972876e-05, - "loss": 0.762, + "epoch": 0.2094332981344597, + "grad_norm": 2.242865800857544, + "learning_rate": 1.2851466355743791e-05, + "loss": 0.8141, "step": 2975 }, { - "epoch": 0.22519011766486322, - "grad_norm": 1.8280404806137085, - "learning_rate": 1.6531514557686913e-05, - "loss": 0.7402, + "epoch": 0.2095036958817318, + "grad_norm": 2.324664354324341, + "learning_rate": 1.2850590187929377e-05, + "loss": 0.7608, "step": 2976 }, { - "epoch": 0.22526578638719685, - "grad_norm": 2.3831984996795654, - "learning_rate": 1.6530281333753148e-05, - "loss": 0.7373, + "epoch": 0.20957409362900387, + "grad_norm": 2.3167903423309326, + "learning_rate": 1.284971371593748e-05, + "loss": 0.8451, "step": 2977 }, { - "epoch": 0.22534145510953046, - "grad_norm": 2.2614855766296387, - "learning_rate": 1.6529047638248808e-05, - "loss": 0.7542, + "epoch": 0.20964449137627597, + "grad_norm": 1.9584439992904663, + "learning_rate": 1.2848836939813672e-05, + "loss": 0.7223, "step": 2978 }, { - "epoch": 0.2254171238318641, - "grad_norm": 2.100390911102295, - "learning_rate": 1.6527813471251158e-05, - "loss": 0.7678, + "epoch": 0.20971488912354805, + "grad_norm": 2.2730774879455566, + "learning_rate": 1.2847959859603532e-05, + "loss": 0.7603, "step": 2979 }, { - "epoch": 0.2254927925541977, - "grad_norm": 2.33455491065979, - "learning_rate": 1.6526578832837476e-05, - "loss": 0.8819, + "epoch": 0.20978528687082013, + "grad_norm": 2.0540688037872314, + "learning_rate": 1.2847082475352662e-05, + "loss": 0.7106, "step": 2980 }, { - "epoch": 0.22556846127653135, - "grad_norm": 1.782531976699829, - "learning_rate": 1.6525343723085085e-05, - "loss": 0.6764, + "epoch": 0.20985568461809223, + "grad_norm": 2.425849437713623, + "learning_rate": 1.2846204787106681e-05, + "loss": 0.7864, "step": 2981 }, { - "epoch": 0.22564412999886496, - "grad_norm": 2.2353334426879883, - "learning_rate": 1.652410814207133e-05, - "loss": 0.7287, + "epoch": 0.2099260823653643, + "grad_norm": 1.8646453619003296, + "learning_rate": 1.2845326794911217e-05, + "loss": 0.7076, "step": 2982 }, { - "epoch": 0.2257197987211986, - "grad_norm": 3.260190486907959, - "learning_rate": 1.652287208987359e-05, - "loss": 0.8681, + "epoch": 0.20999648011263639, + "grad_norm": 2.402172565460205, + "learning_rate": 1.2844448498811919e-05, + "loss": 0.6891, "step": 2983 }, { - "epoch": 0.2257954674435322, - "grad_norm": 2.1882760524749756, - "learning_rate": 1.6521635566569266e-05, - "loss": 0.8207, + "epoch": 0.2100668778599085, + "grad_norm": 2.184095859527588, + "learning_rate": 1.2843569898854452e-05, + "loss": 0.7588, "step": 2984 }, { - "epoch": 0.22587113616586585, - "grad_norm": 2.4284403324127197, - "learning_rate": 1.6520398572235794e-05, - "loss": 0.8992, + "epoch": 0.21013727560718057, + "grad_norm": 2.4106931686401367, + "learning_rate": 1.2842690995084495e-05, + "loss": 0.8135, "step": 2985 }, { - "epoch": 0.22594680488819946, - "grad_norm": 2.3258399963378906, - "learning_rate": 1.6519161106950638e-05, - "loss": 0.9157, + "epoch": 0.21020767335445265, + "grad_norm": 2.2709105014801025, + "learning_rate": 1.2841811787547737e-05, + "loss": 0.7121, "step": 2986 }, { - "epoch": 0.2260224736105331, - "grad_norm": 2.984490156173706, - "learning_rate": 1.6517923170791298e-05, - "loss": 0.8868, + "epoch": 0.21027807110172475, + "grad_norm": 2.3836894035339355, + "learning_rate": 1.2840932276289898e-05, + "loss": 0.6981, "step": 2987 }, { - "epoch": 0.2260981423328667, - "grad_norm": 2.800849437713623, - "learning_rate": 1.651668476383529e-05, - "loss": 0.7888, + "epoch": 0.21034846884899683, + "grad_norm": 2.4069457054138184, + "learning_rate": 1.2840052461356698e-05, + "loss": 0.7624, "step": 2988 }, { - "epoch": 0.22617381105520035, - "grad_norm": 2.9878838062286377, - "learning_rate": 1.651544588616017e-05, - "loss": 0.7345, + "epoch": 0.21041886659626893, + "grad_norm": 2.672821521759033, + "learning_rate": 1.2839172342793882e-05, + "loss": 0.6154, "step": 2989 }, { - "epoch": 0.22624947977753396, - "grad_norm": 3.69124174118042, - "learning_rate": 1.651420653784352e-05, - "loss": 0.744, + "epoch": 0.210489264343541, + "grad_norm": 2.0721442699432373, + "learning_rate": 1.2838291920647206e-05, + "loss": 0.6433, "step": 2990 }, { - "epoch": 0.22632514849986757, - "grad_norm": 2.3240010738372803, - "learning_rate": 1.6512966718962958e-05, - "loss": 0.7195, + "epoch": 0.2105596620908131, + "grad_norm": 2.4083735942840576, + "learning_rate": 1.2837411194962448e-05, + "loss": 0.7885, "step": 2991 }, { - "epoch": 0.2264008172222012, - "grad_norm": 2.4493560791015625, - "learning_rate": 1.6511726429596115e-05, - "loss": 0.8408, + "epoch": 0.2106300598380852, + "grad_norm": 2.296011209487915, + "learning_rate": 1.2836530165785393e-05, + "loss": 0.7022, "step": 2992 }, { - "epoch": 0.22647648594453482, - "grad_norm": 2.2705140113830566, - "learning_rate": 1.6510485669820668e-05, - "loss": 0.601, + "epoch": 0.21070045758535727, + "grad_norm": 2.1844217777252197, + "learning_rate": 1.283564883316185e-05, + "loss": 0.8319, "step": 2993 }, { - "epoch": 0.22655215466686845, - "grad_norm": 2.269789695739746, - "learning_rate": 1.6509244439714317e-05, - "loss": 0.8425, + "epoch": 0.21077085533262935, + "grad_norm": 1.962600588798523, + "learning_rate": 1.283476719713764e-05, + "loss": 0.7065, "step": 2994 }, { - "epoch": 0.22662782338920207, - "grad_norm": 2.8810999393463135, - "learning_rate": 1.6508002739354793e-05, - "loss": 0.7285, + "epoch": 0.21084125307990145, + "grad_norm": 2.6580471992492676, + "learning_rate": 1.2833885257758597e-05, + "loss": 0.7236, "step": 2995 }, { - "epoch": 0.2267034921115357, - "grad_norm": 2.548349142074585, - "learning_rate": 1.650676056881985e-05, - "loss": 0.7676, + "epoch": 0.21091165082717353, + "grad_norm": 1.9748520851135254, + "learning_rate": 1.2833003015070578e-05, + "loss": 0.8103, "step": 2996 }, { - "epoch": 0.22677916083386931, - "grad_norm": 3.094545841217041, - "learning_rate": 1.6505517928187282e-05, - "loss": 0.9013, + "epoch": 0.2109820485744456, + "grad_norm": 2.0917856693267822, + "learning_rate": 1.283212046911945e-05, + "loss": 0.7377, "step": 2997 }, { - "epoch": 0.22685482955620295, - "grad_norm": 2.7249162197113037, - "learning_rate": 1.6504274817534906e-05, - "loss": 0.6787, + "epoch": 0.2110524463217177, + "grad_norm": 2.4745872020721436, + "learning_rate": 1.2831237619951093e-05, + "loss": 0.699, "step": 2998 }, { - "epoch": 0.22693049827853656, - "grad_norm": 2.3722381591796875, - "learning_rate": 1.650303123694057e-05, - "loss": 0.6625, + "epoch": 0.2111228440689898, + "grad_norm": 2.1423213481903076, + "learning_rate": 1.2830354467611416e-05, + "loss": 0.7102, "step": 2999 }, { - "epoch": 0.2270061670008702, - "grad_norm": 2.4005649089813232, - "learning_rate": 1.650178718648215e-05, - "loss": 0.7726, + "epoch": 0.21119324181626187, + "grad_norm": 2.5388002395629883, + "learning_rate": 1.2829471012146328e-05, + "loss": 0.9133, "step": 3000 }, { - "epoch": 0.2270818357232038, - "grad_norm": 2.658586263656616, - "learning_rate": 1.6500542666237553e-05, - "loss": 0.7898, + "epoch": 0.21126363956353397, + "grad_norm": 2.128840446472168, + "learning_rate": 1.2828587253601762e-05, + "loss": 0.721, "step": 3001 }, { - "epoch": 0.22715750444553745, - "grad_norm": 2.125000238418579, - "learning_rate": 1.649929767628471e-05, - "loss": 0.7455, + "epoch": 0.21133403731080605, + "grad_norm": 2.150670289993286, + "learning_rate": 1.2827703192023666e-05, + "loss": 0.7463, "step": 3002 }, { - "epoch": 0.22723317316787106, - "grad_norm": 2.5988640785217285, - "learning_rate": 1.6498052216701595e-05, - "loss": 0.7898, + "epoch": 0.21140443505807816, + "grad_norm": 2.4538185596466064, + "learning_rate": 1.2826818827458002e-05, + "loss": 0.7868, "step": 3003 }, { - "epoch": 0.22730884189020467, - "grad_norm": 2.0731637477874756, - "learning_rate": 1.64968062875662e-05, - "loss": 0.6588, + "epoch": 0.21147483280535023, + "grad_norm": 2.219212532043457, + "learning_rate": 1.2825934159950752e-05, + "loss": 0.6848, "step": 3004 }, { - "epoch": 0.2273845106125383, - "grad_norm": 2.0830650329589844, - "learning_rate": 1.6495559888956544e-05, - "loss": 0.7186, + "epoch": 0.2115452305526223, + "grad_norm": 2.7086071968078613, + "learning_rate": 1.2825049189547909e-05, + "loss": 0.7869, "step": 3005 }, { - "epoch": 0.22746017933487192, - "grad_norm": 4.79794979095459, - "learning_rate": 1.6494313020950687e-05, - "loss": 0.7021, + "epoch": 0.21161562829989441, + "grad_norm": 2.2416110038757324, + "learning_rate": 1.2824163916295482e-05, + "loss": 0.7444, "step": 3006 }, { - "epoch": 0.22753584805720556, - "grad_norm": 3.0674867630004883, - "learning_rate": 1.6493065683626706e-05, - "loss": 0.639, + "epoch": 0.2116860260471665, + "grad_norm": 2.158076524734497, + "learning_rate": 1.2823278340239497e-05, + "loss": 0.7952, "step": 3007 }, { - "epoch": 0.22761151677953917, - "grad_norm": 2.5474140644073486, - "learning_rate": 1.6491817877062718e-05, - "loss": 0.7456, + "epoch": 0.21175642379443857, + "grad_norm": 2.026205062866211, + "learning_rate": 1.2822392461426002e-05, + "loss": 0.7525, "step": 3008 }, { - "epoch": 0.2276871855018728, - "grad_norm": 3.6565370559692383, - "learning_rate": 1.6490569601336864e-05, - "loss": 0.7771, + "epoch": 0.21182682154171067, + "grad_norm": 2.1606574058532715, + "learning_rate": 1.2821506279901045e-05, + "loss": 0.8015, "step": 3009 }, { - "epoch": 0.22776285422420642, - "grad_norm": 2.16518497467041, - "learning_rate": 1.6489320856527312e-05, - "loss": 0.7738, + "epoch": 0.21189721928898275, + "grad_norm": 2.3410604000091553, + "learning_rate": 1.2820619795710705e-05, + "loss": 0.7997, "step": 3010 }, { - "epoch": 0.22783852294654006, - "grad_norm": 2.3201420307159424, - "learning_rate": 1.648807164271227e-05, - "loss": 0.7261, + "epoch": 0.21196761703625483, + "grad_norm": 2.24605131149292, + "learning_rate": 1.2819733008901072e-05, + "loss": 0.8502, "step": 3011 }, { - "epoch": 0.22791419166887367, - "grad_norm": 2.659714698791504, - "learning_rate": 1.6486821959969954e-05, - "loss": 0.7438, + "epoch": 0.21203801478352693, + "grad_norm": 2.9325575828552246, + "learning_rate": 1.2818845919518249e-05, + "loss": 0.7483, "step": 3012 }, { - "epoch": 0.2279898603912073, - "grad_norm": 2.2226853370666504, - "learning_rate": 1.6485571808378637e-05, - "loss": 0.6663, + "epoch": 0.212108412530799, + "grad_norm": 2.3141210079193115, + "learning_rate": 1.2817958527608358e-05, + "loss": 0.7408, "step": 3013 }, { - "epoch": 0.22806552911354092, - "grad_norm": 2.8635661602020264, - "learning_rate": 1.64843211880166e-05, - "loss": 0.6857, + "epoch": 0.2121788102780711, + "grad_norm": 2.1765236854553223, + "learning_rate": 1.281707083321753e-05, + "loss": 0.8486, "step": 3014 }, { - "epoch": 0.22814119783587455, - "grad_norm": 2.4834866523742676, - "learning_rate": 1.6483070098962165e-05, - "loss": 0.7628, + "epoch": 0.2122492080253432, + "grad_norm": 2.2700653076171875, + "learning_rate": 1.2816182836391925e-05, + "loss": 0.8677, "step": 3015 }, { - "epoch": 0.22821686655820816, - "grad_norm": 2.6139743328094482, - "learning_rate": 1.6481818541293675e-05, - "loss": 0.8111, + "epoch": 0.21231960577261527, + "grad_norm": 2.590378761291504, + "learning_rate": 1.2815294537177707e-05, + "loss": 0.6698, "step": 3016 }, { - "epoch": 0.22829253528054178, - "grad_norm": 2.68338942527771, - "learning_rate": 1.648056651508951e-05, - "loss": 0.6424, + "epoch": 0.21239000351988738, + "grad_norm": 2.1986477375030518, + "learning_rate": 1.2814405935621056e-05, + "loss": 0.6886, "step": 3017 }, { - "epoch": 0.2283682040028754, - "grad_norm": 2.5253429412841797, - "learning_rate": 1.6479314020428078e-05, - "loss": 0.8043, + "epoch": 0.21246040126715945, + "grad_norm": 2.399798631668091, + "learning_rate": 1.2813517031768177e-05, + "loss": 0.7428, "step": 3018 }, { - "epoch": 0.22844387272520902, - "grad_norm": 3.081286907196045, - "learning_rate": 1.6478061057387804e-05, - "loss": 0.785, + "epoch": 0.21253079901443153, + "grad_norm": 2.0437734127044678, + "learning_rate": 1.2812627825665281e-05, + "loss": 0.7163, "step": 3019 }, { - "epoch": 0.22851954144754266, - "grad_norm": 2.6442153453826904, - "learning_rate": 1.6476807626047164e-05, - "loss": 0.6959, + "epoch": 0.21260119676170364, + "grad_norm": 2.386971950531006, + "learning_rate": 1.2811738317358602e-05, + "loss": 0.8669, "step": 3020 }, { - "epoch": 0.22859521016987627, - "grad_norm": 2.3536460399627686, - "learning_rate": 1.6475553726484645e-05, - "loss": 0.8099, + "epoch": 0.21267159450897571, + "grad_norm": 2.5105223655700684, + "learning_rate": 1.2810848506894384e-05, + "loss": 0.6908, "step": 3021 }, { - "epoch": 0.2286708788922099, - "grad_norm": 2.2801482677459717, - "learning_rate": 1.647429935877878e-05, - "loss": 0.8345, + "epoch": 0.2127419922562478, + "grad_norm": 2.2760958671569824, + "learning_rate": 1.280995839431889e-05, + "loss": 0.6872, "step": 3022 }, { - "epoch": 0.22874654761454352, - "grad_norm": 2.2949955463409424, - "learning_rate": 1.6473044523008106e-05, - "loss": 0.9149, + "epoch": 0.2128123900035199, + "grad_norm": 1.9517513513565063, + "learning_rate": 1.2809067979678395e-05, + "loss": 0.6715, "step": 3023 }, { - "epoch": 0.22882221633687716, - "grad_norm": 2.6356618404388428, - "learning_rate": 1.6471789219251216e-05, - "loss": 0.7342, + "epoch": 0.21288278775079197, + "grad_norm": 2.1396031379699707, + "learning_rate": 1.2808177263019198e-05, + "loss": 0.6835, "step": 3024 }, { - "epoch": 0.22889788505921077, - "grad_norm": 2.3132026195526123, - "learning_rate": 1.647053344758672e-05, - "loss": 0.6673, + "epoch": 0.21295318549806405, + "grad_norm": 2.4746510982513428, + "learning_rate": 1.2807286244387601e-05, + "loss": 0.7441, "step": 3025 }, { - "epoch": 0.2289735537815444, - "grad_norm": 2.86631178855896, - "learning_rate": 1.6469277208093256e-05, - "loss": 0.8654, + "epoch": 0.21302358324533616, + "grad_norm": 2.086435556411743, + "learning_rate": 1.2806394923829935e-05, + "loss": 0.8032, "step": 3026 }, { - "epoch": 0.22904922250387802, - "grad_norm": 2.4042892456054688, - "learning_rate": 1.646802050084949e-05, - "loss": 0.7001, + "epoch": 0.21309398099260823, + "grad_norm": 2.039548873901367, + "learning_rate": 1.2805503301392538e-05, + "loss": 0.684, "step": 3027 }, { - "epoch": 0.22912489122621166, - "grad_norm": 2.6637120246887207, - "learning_rate": 1.6466763325934133e-05, - "loss": 0.7731, + "epoch": 0.2131643787398803, + "grad_norm": 2.899156093597412, + "learning_rate": 1.2804611377121768e-05, + "loss": 0.8287, "step": 3028 }, { - "epoch": 0.22920055994854527, - "grad_norm": 3.4634320735931396, - "learning_rate": 1.64655056834259e-05, - "loss": 0.9874, + "epoch": 0.21323477648715242, + "grad_norm": 2.2719767093658447, + "learning_rate": 1.2803719151063992e-05, + "loss": 0.6684, "step": 3029 }, { - "epoch": 0.22927622867087888, - "grad_norm": 2.559786558151245, - "learning_rate": 1.646424757340356e-05, - "loss": 0.788, + "epoch": 0.2133051742344245, + "grad_norm": 2.26334810256958, + "learning_rate": 1.2802826623265606e-05, + "loss": 0.854, "step": 3030 }, { - "epoch": 0.22935189739321252, - "grad_norm": 2.1549887657165527, - "learning_rate": 1.646298899594589e-05, - "loss": 0.7187, + "epoch": 0.2133755719816966, + "grad_norm": 2.1905629634857178, + "learning_rate": 1.2801933793773003e-05, + "loss": 0.8057, "step": 3031 }, { - "epoch": 0.22942756611554613, - "grad_norm": 2.1260504722595215, - "learning_rate": 1.6461729951131712e-05, - "loss": 0.7892, + "epoch": 0.21344596972896868, + "grad_norm": 2.252495765686035, + "learning_rate": 1.2801040662632612e-05, + "loss": 0.7793, "step": 3032 }, { - "epoch": 0.22950323483787977, - "grad_norm": 2.535245656967163, - "learning_rate": 1.6460470439039874e-05, - "loss": 0.7686, + "epoch": 0.21351636747624075, + "grad_norm": 2.382873296737671, + "learning_rate": 1.2800147229890862e-05, + "loss": 0.7388, "step": 3033 }, { - "epoch": 0.22957890356021338, - "grad_norm": 2.4702205657958984, - "learning_rate": 1.6459210459749244e-05, - "loss": 0.9208, + "epoch": 0.21358676522351286, + "grad_norm": 1.9096964597702026, + "learning_rate": 1.2799253495594205e-05, + "loss": 0.8481, "step": 3034 }, { - "epoch": 0.22965457228254701, - "grad_norm": 2.4642333984375, - "learning_rate": 1.645795001333873e-05, - "loss": 0.745, + "epoch": 0.21365716297078494, + "grad_norm": 2.4100160598754883, + "learning_rate": 1.2798359459789105e-05, + "loss": 0.7391, "step": 3035 }, { - "epoch": 0.22973024100488063, - "grad_norm": 2.5127851963043213, - "learning_rate": 1.6456689099887263e-05, - "loss": 0.9197, + "epoch": 0.213727560718057, + "grad_norm": 2.2085776329040527, + "learning_rate": 1.2797465122522044e-05, + "loss": 0.874, "step": 3036 }, { - "epoch": 0.22980590972721426, - "grad_norm": 2.6181676387786865, - "learning_rate": 1.6455427719473806e-05, - "loss": 0.6855, + "epoch": 0.21379795846532912, + "grad_norm": 2.383165121078491, + "learning_rate": 1.2796570483839524e-05, + "loss": 0.7301, "step": 3037 }, { - "epoch": 0.22988157844954787, - "grad_norm": 2.3293540477752686, - "learning_rate": 1.6454165872177354e-05, - "loss": 0.6547, + "epoch": 0.2138683562126012, + "grad_norm": 2.4955389499664307, + "learning_rate": 1.2795675543788054e-05, + "loss": 0.7733, "step": 3038 }, { - "epoch": 0.2299572471718815, - "grad_norm": 2.4066195487976074, - "learning_rate": 1.6452903558076925e-05, - "loss": 0.8718, + "epoch": 0.21393875395987327, + "grad_norm": 2.8889360427856445, + "learning_rate": 1.2794780302414164e-05, + "loss": 0.7172, "step": 3039 }, { - "epoch": 0.23003291589421512, - "grad_norm": 2.3516995906829834, - "learning_rate": 1.6451640777251567e-05, - "loss": 0.7497, + "epoch": 0.21400915170714538, + "grad_norm": 2.7161431312561035, + "learning_rate": 1.2793884759764397e-05, + "loss": 0.7995, "step": 3040 }, { - "epoch": 0.23010858461654876, - "grad_norm": 2.0895943641662598, - "learning_rate": 1.6450377529780363e-05, - "loss": 0.768, + "epoch": 0.21407954945441746, + "grad_norm": 1.8693368434906006, + "learning_rate": 1.2792988915885316e-05, + "loss": 0.743, "step": 3041 }, { - "epoch": 0.23018425333888237, - "grad_norm": 2.606595039367676, - "learning_rate": 1.6449113815742422e-05, - "loss": 0.7996, + "epoch": 0.21414994720168953, + "grad_norm": 2.4464199542999268, + "learning_rate": 1.2792092770823493e-05, + "loss": 0.7401, "step": 3042 }, { - "epoch": 0.23025992206121598, - "grad_norm": 2.8909833431243896, - "learning_rate": 1.644784963521688e-05, - "loss": 0.8167, + "epoch": 0.21422034494896164, + "grad_norm": 2.345980405807495, + "learning_rate": 1.279119632462552e-05, + "loss": 0.7839, "step": 3043 }, { - "epoch": 0.23033559078354962, - "grad_norm": 2.0377085208892822, - "learning_rate": 1.6446584988282907e-05, - "loss": 0.7504, + "epoch": 0.21429074269623372, + "grad_norm": 2.561969518661499, + "learning_rate": 1.2790299577338007e-05, + "loss": 0.8201, "step": 3044 }, { - "epoch": 0.23041125950588323, - "grad_norm": 2.66420841217041, - "learning_rate": 1.6445319875019694e-05, - "loss": 0.6552, + "epoch": 0.21436114044350582, + "grad_norm": 2.6459784507751465, + "learning_rate": 1.2789402529007575e-05, + "loss": 0.715, "step": 3045 }, { - "epoch": 0.23048692822821687, - "grad_norm": 2.240417718887329, - "learning_rate": 1.644405429550647e-05, - "loss": 0.7903, + "epoch": 0.2144315381907779, + "grad_norm": 2.432788372039795, + "learning_rate": 1.2788505179680864e-05, + "loss": 0.8173, "step": 3046 }, { - "epoch": 0.23056259695055048, - "grad_norm": 2.7434213161468506, - "learning_rate": 1.6442788249822486e-05, - "loss": 0.8382, + "epoch": 0.21450193593804998, + "grad_norm": 2.316436767578125, + "learning_rate": 1.2787607529404523e-05, + "loss": 0.7594, "step": 3047 }, { - "epoch": 0.23063826567288412, - "grad_norm": 2.1214497089385986, - "learning_rate": 1.6441521738047033e-05, - "loss": 0.8043, + "epoch": 0.21457233368532208, + "grad_norm": 2.1638920307159424, + "learning_rate": 1.2786709578225227e-05, + "loss": 0.6862, "step": 3048 }, { - "epoch": 0.23071393439521773, - "grad_norm": 2.15208101272583, - "learning_rate": 1.6440254760259416e-05, - "loss": 0.9723, + "epoch": 0.21464273143259416, + "grad_norm": 1.7246792316436768, + "learning_rate": 1.2785811326189658e-05, + "loss": 0.6713, "step": 3049 }, { - "epoch": 0.23078960311755137, - "grad_norm": 3.5610761642456055, - "learning_rate": 1.6438987316538985e-05, - "loss": 0.8386, + "epoch": 0.21471312917986624, + "grad_norm": 1.9893722534179688, + "learning_rate": 1.278491277334452e-05, + "loss": 0.7163, "step": 3050 }, { - "epoch": 0.23086527183988498, - "grad_norm": 2.2690629959106445, - "learning_rate": 1.643771940696511e-05, - "loss": 0.7666, + "epoch": 0.21478352692713834, + "grad_norm": 2.329451322555542, + "learning_rate": 1.2784013919736525e-05, + "loss": 0.5757, "step": 3051 }, { - "epoch": 0.23094094056221862, - "grad_norm": 2.7488620281219482, - "learning_rate": 1.6436451031617182e-05, - "loss": 0.738, + "epoch": 0.21485392467441042, + "grad_norm": 2.8759195804595947, + "learning_rate": 1.278311476541241e-05, + "loss": 0.8404, "step": 3052 }, { - "epoch": 0.23101660928455223, - "grad_norm": 2.229719877243042, - "learning_rate": 1.6435182190574643e-05, - "loss": 0.8551, + "epoch": 0.2149243224216825, + "grad_norm": 2.401198625564575, + "learning_rate": 1.2782215310418922e-05, + "loss": 0.7956, "step": 3053 }, { - "epoch": 0.23109227800688587, - "grad_norm": 2.4451205730438232, - "learning_rate": 1.6433912883916944e-05, - "loss": 0.688, + "epoch": 0.2149947201689546, + "grad_norm": 1.8831783533096313, + "learning_rate": 1.2781315554802823e-05, + "loss": 0.7821, "step": 3054 }, { - "epoch": 0.23116794672921948, - "grad_norm": 2.5764944553375244, - "learning_rate": 1.6432643111723578e-05, - "loss": 0.7756, + "epoch": 0.21506511791622668, + "grad_norm": 1.9924826622009277, + "learning_rate": 1.278041549861089e-05, + "loss": 0.7137, "step": 3055 }, { - "epoch": 0.23124361545155311, - "grad_norm": 2.299663782119751, - "learning_rate": 1.6431372874074057e-05, - "loss": 0.7981, + "epoch": 0.21513551566349876, + "grad_norm": 2.1731925010681152, + "learning_rate": 1.277951514188992e-05, + "loss": 0.7111, "step": 3056 }, { - "epoch": 0.23131928417388672, - "grad_norm": 2.5289340019226074, - "learning_rate": 1.6430102171047935e-05, - "loss": 0.6726, + "epoch": 0.21520591341077086, + "grad_norm": 2.1799957752227783, + "learning_rate": 1.2778614484686726e-05, + "loss": 0.8122, "step": 3057 }, { - "epoch": 0.23139495289622034, - "grad_norm": 3.0668952465057373, - "learning_rate": 1.6428831002724782e-05, - "loss": 0.8785, + "epoch": 0.21527631115804294, + "grad_norm": 2.47405743598938, + "learning_rate": 1.277771352704813e-05, + "loss": 0.6836, "step": 3058 }, { - "epoch": 0.23147062161855397, - "grad_norm": 2.457798480987549, - "learning_rate": 1.6427559369184202e-05, - "loss": 0.7681, + "epoch": 0.21534670890531504, + "grad_norm": 2.4810802936553955, + "learning_rate": 1.2776812269020975e-05, + "loss": 0.7044, "step": 3059 }, { - "epoch": 0.23154629034088758, - "grad_norm": 2.192192316055298, - "learning_rate": 1.6426287270505837e-05, - "loss": 0.7763, + "epoch": 0.21541710665258712, + "grad_norm": 2.580700159072876, + "learning_rate": 1.2775910710652121e-05, + "loss": 0.8121, "step": 3060 }, { - "epoch": 0.23162195906322122, - "grad_norm": 2.4625298976898193, - "learning_rate": 1.6425014706769337e-05, - "loss": 0.6961, + "epoch": 0.2154875043998592, + "grad_norm": 1.9804786443710327, + "learning_rate": 1.2775008851988436e-05, + "loss": 0.6698, "step": 3061 }, { - "epoch": 0.23169762778555483, - "grad_norm": 2.2496135234832764, - "learning_rate": 1.64237416780544e-05, - "loss": 0.8604, + "epoch": 0.2155579021471313, + "grad_norm": 2.9414753913879395, + "learning_rate": 1.2774106693076811e-05, + "loss": 0.7641, "step": 3062 }, { - "epoch": 0.23177329650788847, - "grad_norm": 2.194805860519409, - "learning_rate": 1.642246818444075e-05, - "loss": 0.7537, + "epoch": 0.21562829989440338, + "grad_norm": 2.3075196743011475, + "learning_rate": 1.277320423396415e-05, + "loss": 0.8309, "step": 3063 }, { - "epoch": 0.23184896523022208, - "grad_norm": 2.501255512237549, - "learning_rate": 1.6421194226008138e-05, - "loss": 0.8666, + "epoch": 0.21569869764167546, + "grad_norm": 1.8802727460861206, + "learning_rate": 1.2772301474697371e-05, + "loss": 0.6091, "step": 3064 }, { - "epoch": 0.23192463395255572, - "grad_norm": 2.5918145179748535, - "learning_rate": 1.6419919802836337e-05, - "loss": 0.6322, + "epoch": 0.21576909538894756, + "grad_norm": 2.607598066329956, + "learning_rate": 1.277139841532341e-05, + "loss": 0.748, "step": 3065 }, { - "epoch": 0.23200030267488933, - "grad_norm": 2.162069082260132, - "learning_rate": 1.641864491500516e-05, - "loss": 0.7903, + "epoch": 0.21583949313621964, + "grad_norm": 2.0307865142822266, + "learning_rate": 1.2770495055889221e-05, + "loss": 0.7125, "step": 3066 }, { - "epoch": 0.23207597139722297, - "grad_norm": 2.95308518409729, - "learning_rate": 1.6417369562594444e-05, - "loss": 0.8792, + "epoch": 0.21590989088349172, + "grad_norm": 1.8761448860168457, + "learning_rate": 1.2769591396441768e-05, + "loss": 0.7235, "step": 3067 }, { - "epoch": 0.23215164011955658, - "grad_norm": 2.692232131958008, - "learning_rate": 1.6416093745684054e-05, - "loss": 0.7282, + "epoch": 0.21598028863076382, + "grad_norm": 2.5877604484558105, + "learning_rate": 1.2768687437028031e-05, + "loss": 0.7445, "step": 3068 }, { - "epoch": 0.23222730884189022, - "grad_norm": 2.6581435203552246, - "learning_rate": 1.6414817464353888e-05, - "loss": 0.8014, + "epoch": 0.2160506863780359, + "grad_norm": 2.6464695930480957, + "learning_rate": 1.276778317769501e-05, + "loss": 0.7662, "step": 3069 }, { - "epoch": 0.23230297756422383, - "grad_norm": 2.171360969543457, - "learning_rate": 1.6413540718683872e-05, - "loss": 0.7544, + "epoch": 0.21612108412530798, + "grad_norm": 1.8829771280288696, + "learning_rate": 1.276687861848972e-05, + "loss": 0.8376, "step": 3070 }, { - "epoch": 0.23237864628655744, - "grad_norm": 2.7274041175842285, - "learning_rate": 1.6412263508753952e-05, - "loss": 0.6874, + "epoch": 0.21619148187258008, + "grad_norm": 2.020209312438965, + "learning_rate": 1.2765973759459187e-05, + "loss": 0.608, "step": 3071 }, { - "epoch": 0.23245431500889108, - "grad_norm": 2.1489665508270264, - "learning_rate": 1.6410985834644123e-05, - "loss": 0.7909, + "epoch": 0.21626187961985216, + "grad_norm": 2.0585291385650635, + "learning_rate": 1.2765068600650457e-05, + "loss": 0.7564, "step": 3072 }, { - "epoch": 0.2325299837312247, - "grad_norm": 1.950114369392395, - "learning_rate": 1.6409707696434388e-05, - "loss": 0.7571, + "epoch": 0.21633227736712426, + "grad_norm": 2.3239593505859375, + "learning_rate": 1.2764163142110586e-05, + "loss": 0.7111, "step": 3073 }, { - "epoch": 0.23260565245355833, - "grad_norm": 4.101839542388916, - "learning_rate": 1.640842909420479e-05, - "loss": 0.7565, + "epoch": 0.21640267511439634, + "grad_norm": 1.9590197801589966, + "learning_rate": 1.2763257383886658e-05, + "loss": 0.7829, "step": 3074 }, { - "epoch": 0.23268132117589194, - "grad_norm": 2.2927932739257812, - "learning_rate": 1.6407150028035402e-05, - "loss": 0.6963, + "epoch": 0.21647307286166842, + "grad_norm": 2.2086665630340576, + "learning_rate": 1.2762351326025754e-05, + "loss": 0.7233, "step": 3075 }, { - "epoch": 0.23275698989822557, - "grad_norm": 2.3485589027404785, - "learning_rate": 1.6405870498006326e-05, - "loss": 0.6547, + "epoch": 0.21654347060894052, + "grad_norm": 2.1252975463867188, + "learning_rate": 1.2761444968574989e-05, + "loss": 0.7254, "step": 3076 }, { - "epoch": 0.23283265862055919, - "grad_norm": 2.404630422592163, - "learning_rate": 1.640459050419768e-05, - "loss": 0.8066, + "epoch": 0.2166138683562126, + "grad_norm": 2.4428725242614746, + "learning_rate": 1.2760538311581481e-05, + "loss": 0.8211, "step": 3077 }, { - "epoch": 0.23290832734289282, - "grad_norm": 2.318694591522217, - "learning_rate": 1.640331004668963e-05, - "loss": 0.8715, + "epoch": 0.21668426610348468, + "grad_norm": 1.7926791906356812, + "learning_rate": 1.275963135509237e-05, + "loss": 0.6179, "step": 3078 }, { - "epoch": 0.23298399606522643, - "grad_norm": 2.4930219650268555, - "learning_rate": 1.6402029125562357e-05, - "loss": 0.7058, + "epoch": 0.21675466385075678, + "grad_norm": 1.9777814149856567, + "learning_rate": 1.2758724099154806e-05, + "loss": 0.6541, "step": 3079 }, { - "epoch": 0.23305966478756007, - "grad_norm": 3.169928789138794, - "learning_rate": 1.640074774089608e-05, - "loss": 0.758, + "epoch": 0.21682506159802886, + "grad_norm": 2.2217211723327637, + "learning_rate": 1.2757816543815963e-05, + "loss": 0.7521, "step": 3080 }, { - "epoch": 0.23313533350989368, - "grad_norm": 2.3577566146850586, - "learning_rate": 1.6399465892771045e-05, - "loss": 0.8166, + "epoch": 0.21689545934530094, + "grad_norm": 2.0575878620147705, + "learning_rate": 1.2756908689123024e-05, + "loss": 0.6927, "step": 3081 }, { - "epoch": 0.23321100223222732, - "grad_norm": 2.1574015617370605, - "learning_rate": 1.6398183581267522e-05, - "loss": 0.8251, + "epoch": 0.21696585709257304, + "grad_norm": 2.216799736022949, + "learning_rate": 1.2756000535123188e-05, + "loss": 0.773, "step": 3082 }, { - "epoch": 0.23328667095456093, - "grad_norm": 2.141531229019165, - "learning_rate": 1.639690080646581e-05, - "loss": 0.785, + "epoch": 0.21703625483984512, + "grad_norm": 3.156691312789917, + "learning_rate": 1.275509208186367e-05, + "loss": 0.8228, "step": 3083 }, { - "epoch": 0.23336233967689454, - "grad_norm": 2.964078426361084, - "learning_rate": 1.639561756844625e-05, - "loss": 0.6618, + "epoch": 0.2171066525871172, + "grad_norm": 2.0203890800476074, + "learning_rate": 1.2754183329391703e-05, + "loss": 0.7314, "step": 3084 }, { - "epoch": 0.23343800839922818, - "grad_norm": 2.588050603866577, - "learning_rate": 1.6394333867289198e-05, - "loss": 0.7254, + "epoch": 0.2171770503343893, + "grad_norm": 2.2178518772125244, + "learning_rate": 1.2753274277754534e-05, + "loss": 0.7761, "step": 3085 }, { - "epoch": 0.2335136771215618, - "grad_norm": 3.1595702171325684, - "learning_rate": 1.639304970307504e-05, - "loss": 0.759, + "epoch": 0.21724744808166138, + "grad_norm": 1.884958028793335, + "learning_rate": 1.2752364926999424e-05, + "loss": 0.7577, "step": 3086 }, { - "epoch": 0.23358934584389543, - "grad_norm": 2.7546892166137695, - "learning_rate": 1.63917650758842e-05, - "loss": 0.8624, + "epoch": 0.2173178458289335, + "grad_norm": 2.198028087615967, + "learning_rate": 1.2751455277173653e-05, + "loss": 0.7901, "step": 3087 }, { - "epoch": 0.23366501456622904, - "grad_norm": 2.3731441497802734, - "learning_rate": 1.639047998579712e-05, - "loss": 0.7255, + "epoch": 0.21738824357620556, + "grad_norm": 2.364086151123047, + "learning_rate": 1.2750545328324513e-05, + "loss": 0.8207, "step": 3088 }, { - "epoch": 0.23374068328856268, - "grad_norm": 2.9596669673919678, - "learning_rate": 1.6389194432894283e-05, - "loss": 0.8804, + "epoch": 0.21745864132347764, + "grad_norm": 3.1707804203033447, + "learning_rate": 1.2749635080499312e-05, + "loss": 0.7554, "step": 3089 }, { - "epoch": 0.2338163520108963, - "grad_norm": 2.525820732116699, - "learning_rate": 1.638790841725619e-05, - "loss": 0.8342, + "epoch": 0.21752903907074975, + "grad_norm": 1.94142484664917, + "learning_rate": 1.2748724533745377e-05, + "loss": 0.8703, "step": 3090 }, { - "epoch": 0.23389202073322993, - "grad_norm": 2.2738192081451416, - "learning_rate": 1.6386621938963375e-05, - "loss": 0.7513, + "epoch": 0.21759943681802182, + "grad_norm": 1.9685486555099487, + "learning_rate": 1.2747813688110046e-05, + "loss": 0.7325, "step": 3091 }, { - "epoch": 0.23396768945556354, - "grad_norm": 1.9771041870117188, - "learning_rate": 1.6385334998096405e-05, - "loss": 0.8262, + "epoch": 0.2176698345652939, + "grad_norm": 2.2313263416290283, + "learning_rate": 1.2746902543640677e-05, + "loss": 0.7515, "step": 3092 }, { - "epoch": 0.23404335817789718, - "grad_norm": 2.0869479179382324, - "learning_rate": 1.638404759473587e-05, - "loss": 0.7338, + "epoch": 0.217740232312566, + "grad_norm": 1.861414909362793, + "learning_rate": 1.2745991100384643e-05, + "loss": 0.6922, "step": 3093 }, { - "epoch": 0.2341190269002308, - "grad_norm": 2.7630059719085693, - "learning_rate": 1.6382759728962392e-05, - "loss": 0.9286, + "epoch": 0.21781063005983808, + "grad_norm": 2.1390626430511475, + "learning_rate": 1.2745079358389323e-05, + "loss": 0.7437, "step": 3094 }, { - "epoch": 0.23419469562256442, - "grad_norm": 2.188873291015625, - "learning_rate": 1.638147140085662e-05, - "loss": 0.7531, + "epoch": 0.21788102780711016, + "grad_norm": 2.648023843765259, + "learning_rate": 1.2744167317702125e-05, + "loss": 0.7662, "step": 3095 }, { - "epoch": 0.23427036434489804, - "grad_norm": 1.8856064081192017, - "learning_rate": 1.6380182610499234e-05, - "loss": 0.7139, + "epoch": 0.21795142555438227, + "grad_norm": 2.350801944732666, + "learning_rate": 1.2743254978370467e-05, + "loss": 0.6375, "step": 3096 }, { - "epoch": 0.23434603306723165, - "grad_norm": 2.62967848777771, - "learning_rate": 1.637889335797094e-05, - "loss": 0.8767, + "epoch": 0.21802182330165434, + "grad_norm": 2.1566333770751953, + "learning_rate": 1.2742342340441781e-05, + "loss": 0.7851, "step": 3097 }, { - "epoch": 0.23442170178956528, - "grad_norm": 10.917149543762207, - "learning_rate": 1.6377603643352483e-05, - "loss": 0.8107, + "epoch": 0.21809222104892642, + "grad_norm": 1.7837780714035034, + "learning_rate": 1.2741429403963518e-05, + "loss": 0.802, "step": 3098 }, { - "epoch": 0.2344973705118989, - "grad_norm": 2.7370142936706543, - "learning_rate": 1.6376313466724624e-05, - "loss": 0.7963, + "epoch": 0.21816261879619853, + "grad_norm": 2.4457571506500244, + "learning_rate": 1.2740516168983139e-05, + "loss": 0.7221, "step": 3099 }, { - "epoch": 0.23457303923423253, - "grad_norm": 1.870970606803894, - "learning_rate": 1.6375022828168153e-05, - "loss": 0.6093, + "epoch": 0.2182330165434706, + "grad_norm": 1.9914522171020508, + "learning_rate": 1.2739602635548122e-05, + "loss": 0.7345, "step": 3100 }, { - "epoch": 0.23464870795656614, - "grad_norm": 2.654796600341797, - "learning_rate": 1.6373731727763902e-05, - "loss": 0.766, + "epoch": 0.2183034142907427, + "grad_norm": 1.9334121942520142, + "learning_rate": 1.273868880370597e-05, + "loss": 0.676, "step": 3101 }, { - "epoch": 0.23472437667889978, - "grad_norm": 2.636352062225342, - "learning_rate": 1.6372440165592717e-05, - "loss": 0.7713, + "epoch": 0.2183738120380148, + "grad_norm": 2.285327434539795, + "learning_rate": 1.273777467350419e-05, + "loss": 0.7774, "step": 3102 }, { - "epoch": 0.2348000454012334, - "grad_norm": 2.9713962078094482, - "learning_rate": 1.6371148141735488e-05, - "loss": 0.7387, + "epoch": 0.21844420978528686, + "grad_norm": 1.9945783615112305, + "learning_rate": 1.2736860244990306e-05, + "loss": 0.7884, "step": 3103 }, { - "epoch": 0.23487571412356703, - "grad_norm": 2.823192596435547, - "learning_rate": 1.636985565627312e-05, - "loss": 0.7023, + "epoch": 0.21851460753255897, + "grad_norm": 2.0761477947235107, + "learning_rate": 1.2735945518211865e-05, + "loss": 0.7397, "step": 3104 }, { - "epoch": 0.23495138284590064, - "grad_norm": 2.4954514503479004, - "learning_rate": 1.6368562709286553e-05, - "loss": 0.7699, + "epoch": 0.21858500527983105, + "grad_norm": 2.0466208457946777, + "learning_rate": 1.2735030493216419e-05, + "loss": 0.7511, "step": 3105 }, { - "epoch": 0.23502705156823428, - "grad_norm": 2.6096956729888916, - "learning_rate": 1.6367269300856755e-05, - "loss": 0.6207, + "epoch": 0.21865540302710312, + "grad_norm": 1.900396704673767, + "learning_rate": 1.2734115170051546e-05, + "loss": 0.8102, "step": 3106 }, { - "epoch": 0.2351027202905679, - "grad_norm": 2.9008142948150635, - "learning_rate": 1.636597543106473e-05, - "loss": 0.8394, + "epoch": 0.21872580077437523, + "grad_norm": 2.6602399349212646, + "learning_rate": 1.2733199548764831e-05, + "loss": 0.8003, "step": 3107 }, { - "epoch": 0.23517838901290153, - "grad_norm": 1.9445158243179321, - "learning_rate": 1.636468109999149e-05, - "loss": 0.6827, + "epoch": 0.2187961985216473, + "grad_norm": 1.84638512134552, + "learning_rate": 1.273228362940388e-05, + "loss": 0.79, "step": 3108 }, { - "epoch": 0.23525405773523514, - "grad_norm": 2.0866832733154297, - "learning_rate": 1.6363386307718106e-05, - "loss": 0.8256, + "epoch": 0.21886659626891938, + "grad_norm": 1.7553513050079346, + "learning_rate": 1.273136741201631e-05, + "loss": 0.7535, "step": 3109 }, { - "epoch": 0.23532972645756878, - "grad_norm": 2.2782726287841797, - "learning_rate": 1.6362091054325657e-05, - "loss": 0.6447, + "epoch": 0.2189369940161915, + "grad_norm": 2.4688127040863037, + "learning_rate": 1.273045089664976e-05, + "loss": 0.6834, "step": 3110 }, { - "epoch": 0.2354053951799024, - "grad_norm": 2.6688876152038574, - "learning_rate": 1.636079533989525e-05, - "loss": 0.7119, + "epoch": 0.21900739176346357, + "grad_norm": 1.801727056503296, + "learning_rate": 1.2729534083351876e-05, + "loss": 0.6554, "step": 3111 }, { - "epoch": 0.235481063902236, - "grad_norm": 2.297727346420288, - "learning_rate": 1.6359499164508034e-05, - "loss": 0.9497, + "epoch": 0.21907778951073564, + "grad_norm": 1.8008551597595215, + "learning_rate": 1.2728616972170326e-05, + "loss": 0.7554, "step": 3112 }, { - "epoch": 0.23555673262456964, - "grad_norm": 2.075216054916382, - "learning_rate": 1.6358202528245173e-05, - "loss": 0.8736, + "epoch": 0.21914818725800775, + "grad_norm": 2.028029680252075, + "learning_rate": 1.2727699563152793e-05, + "loss": 0.7258, "step": 3113 }, { - "epoch": 0.23563240134690325, - "grad_norm": 2.638568878173828, - "learning_rate": 1.6356905431187874e-05, - "loss": 0.7503, + "epoch": 0.21921858500527983, + "grad_norm": 2.0209741592407227, + "learning_rate": 1.2726781856346972e-05, + "loss": 0.6826, "step": 3114 }, { - "epoch": 0.23570807006923689, - "grad_norm": 3.3016517162323, - "learning_rate": 1.635560787341736e-05, - "loss": 0.6998, + "epoch": 0.21928898275255193, + "grad_norm": 2.0368385314941406, + "learning_rate": 1.2725863851800576e-05, + "loss": 0.7556, "step": 3115 }, { - "epoch": 0.2357837387915705, - "grad_norm": 2.7137296199798584, - "learning_rate": 1.635430985501489e-05, - "loss": 0.8485, + "epoch": 0.219359380499824, + "grad_norm": 2.089179515838623, + "learning_rate": 1.272494554956133e-05, + "loss": 0.6193, "step": 3116 }, { - "epoch": 0.23585940751390413, - "grad_norm": 2.4911835193634033, - "learning_rate": 1.6353011376061752e-05, - "loss": 0.7518, + "epoch": 0.21942977824709609, + "grad_norm": 2.3366663455963135, + "learning_rate": 1.2724026949676982e-05, + "loss": 0.6894, "step": 3117 }, { - "epoch": 0.23593507623623775, - "grad_norm": 2.5743257999420166, - "learning_rate": 1.6351712436639254e-05, - "loss": 0.8575, + "epoch": 0.2195001759943682, + "grad_norm": 2.7887375354766846, + "learning_rate": 1.272310805219529e-05, + "loss": 0.7979, "step": 3118 }, { - "epoch": 0.23601074495857138, - "grad_norm": 1.9882807731628418, - "learning_rate": 1.635041303682875e-05, - "loss": 0.7866, + "epoch": 0.21957057374164027, + "grad_norm": 2.0177929401397705, + "learning_rate": 1.2722188857164026e-05, + "loss": 0.7661, "step": 3119 }, { - "epoch": 0.236086413680905, - "grad_norm": 2.3012471199035645, - "learning_rate": 1.6349113176711606e-05, - "loss": 0.7392, + "epoch": 0.21964097148891235, + "grad_norm": 1.9205858707427979, + "learning_rate": 1.272126936463098e-05, + "loss": 0.7748, "step": 3120 }, { - "epoch": 0.23616208240323863, - "grad_norm": 2.3356645107269287, - "learning_rate": 1.6347812856369225e-05, - "loss": 0.7093, + "epoch": 0.21971136923618445, + "grad_norm": 2.305665969848633, + "learning_rate": 1.272034957464396e-05, + "loss": 0.8255, "step": 3121 }, { - "epoch": 0.23623775112557224, - "grad_norm": 1.8433892726898193, - "learning_rate": 1.6346512075883035e-05, - "loss": 0.7198, + "epoch": 0.21978176698345653, + "grad_norm": 2.653742790222168, + "learning_rate": 1.2719429487250787e-05, + "loss": 0.7486, "step": 3122 }, { - "epoch": 0.23631341984790588, - "grad_norm": 2.4490842819213867, - "learning_rate": 1.6345210835334502e-05, - "loss": 0.8044, + "epoch": 0.2198521647307286, + "grad_norm": 2.1507763862609863, + "learning_rate": 1.271850910249929e-05, + "loss": 0.6984, "step": 3123 }, { - "epoch": 0.2363890885702395, - "grad_norm": 2.7389450073242188, - "learning_rate": 1.6343909134805106e-05, - "loss": 0.7449, + "epoch": 0.2199225624780007, + "grad_norm": 2.1635024547576904, + "learning_rate": 1.271758842043733e-05, + "loss": 0.6959, "step": 3124 }, { - "epoch": 0.2364647572925731, - "grad_norm": 2.162238836288452, - "learning_rate": 1.6342606974376367e-05, - "loss": 0.6568, + "epoch": 0.2199929602252728, + "grad_norm": 2.470705986022949, + "learning_rate": 1.2716667441112768e-05, + "loss": 0.6935, "step": 3125 }, { - "epoch": 0.23654042601490674, - "grad_norm": 2.0842134952545166, - "learning_rate": 1.634130435412983e-05, - "loss": 0.5613, + "epoch": 0.22006335797254487, + "grad_norm": 2.2289273738861084, + "learning_rate": 1.2715746164573489e-05, + "loss": 0.7259, "step": 3126 }, { - "epoch": 0.23661609473724035, - "grad_norm": 2.217766284942627, - "learning_rate": 1.6340001274147074e-05, - "loss": 0.7913, + "epoch": 0.22013375571981697, + "grad_norm": 1.8323878049850464, + "learning_rate": 1.271482459086739e-05, + "loss": 0.6858, "step": 3127 }, { - "epoch": 0.236691763459574, - "grad_norm": 2.228721857070923, - "learning_rate": 1.6338697734509694e-05, - "loss": 0.8567, + "epoch": 0.22020415346708905, + "grad_norm": 2.127415180206299, + "learning_rate": 1.2713902720042384e-05, + "loss": 0.7815, "step": 3128 }, { - "epoch": 0.2367674321819076, - "grad_norm": 2.4052469730377197, - "learning_rate": 1.6337393735299325e-05, - "loss": 0.7, + "epoch": 0.22027455121436115, + "grad_norm": 1.9415438175201416, + "learning_rate": 1.2712980552146401e-05, + "loss": 0.625, "step": 3129 }, { - "epoch": 0.23684310090424124, - "grad_norm": 2.7400004863739014, - "learning_rate": 1.633608927659763e-05, - "loss": 0.7754, + "epoch": 0.22034494896163323, + "grad_norm": 2.0398402214050293, + "learning_rate": 1.2712058087227387e-05, + "loss": 0.8033, "step": 3130 }, { - "epoch": 0.23691876962657485, - "grad_norm": 2.2973527908325195, - "learning_rate": 1.6334784358486296e-05, - "loss": 0.8086, + "epoch": 0.2204153467089053, + "grad_norm": 2.242644786834717, + "learning_rate": 1.2711135325333297e-05, + "loss": 0.8079, "step": 3131 }, { - "epoch": 0.2369944383489085, - "grad_norm": 2.4160079956054688, - "learning_rate": 1.6333478981047043e-05, - "loss": 0.7429, + "epoch": 0.2204857444561774, + "grad_norm": 1.9508758783340454, + "learning_rate": 1.2710212266512114e-05, + "loss": 0.7634, "step": 3132 }, { - "epoch": 0.2370701070712421, - "grad_norm": 2.779785394668579, - "learning_rate": 1.6332173144361613e-05, - "loss": 0.6959, + "epoch": 0.2205561422034495, + "grad_norm": 2.2474985122680664, + "learning_rate": 1.2709288910811817e-05, + "loss": 0.7733, "step": 3133 }, { - "epoch": 0.23714577579357574, - "grad_norm": 2.1350619792938232, - "learning_rate": 1.633086684851179e-05, - "loss": 0.8856, + "epoch": 0.22062653995072157, + "grad_norm": 3.58011531829834, + "learning_rate": 1.2708365258280423e-05, + "loss": 0.6718, "step": 3134 }, { - "epoch": 0.23722144451590935, - "grad_norm": 3.6366331577301025, - "learning_rate": 1.632956009357937e-05, - "loss": 0.6962, + "epoch": 0.22069693769799367, + "grad_norm": 1.788568377494812, + "learning_rate": 1.270744130896595e-05, + "loss": 0.7037, "step": 3135 }, { - "epoch": 0.23729711323824298, - "grad_norm": 2.5807621479034424, - "learning_rate": 1.6328252879646195e-05, - "loss": 0.7641, + "epoch": 0.22076733544526575, + "grad_norm": 2.20206618309021, + "learning_rate": 1.2706517062916433e-05, + "loss": 0.6943, "step": 3136 }, { - "epoch": 0.2373727819605766, - "grad_norm": 2.8703842163085938, - "learning_rate": 1.632694520679412e-05, - "loss": 0.793, + "epoch": 0.22083773319253783, + "grad_norm": 1.865045189857483, + "learning_rate": 1.2705592520179927e-05, + "loss": 0.6673, "step": 3137 }, { - "epoch": 0.2374484506829102, - "grad_norm": 2.0771567821502686, - "learning_rate": 1.632563707510504e-05, - "loss": 0.7813, + "epoch": 0.22090813093980993, + "grad_norm": 2.0609347820281982, + "learning_rate": 1.2704667680804497e-05, + "loss": 0.7025, "step": 3138 }, { - "epoch": 0.23752411940524384, - "grad_norm": 2.532623529434204, - "learning_rate": 1.6324328484660867e-05, - "loss": 0.722, + "epoch": 0.220978528687082, + "grad_norm": 2.540104866027832, + "learning_rate": 1.2703742544838227e-05, + "loss": 0.6825, "step": 3139 }, { - "epoch": 0.23759978812757745, - "grad_norm": 1.9321879148483276, - "learning_rate": 1.632301943554356e-05, - "loss": 0.7015, + "epoch": 0.2210489264343541, + "grad_norm": 2.545807361602783, + "learning_rate": 1.2702817112329218e-05, + "loss": 0.7387, "step": 3140 }, { - "epoch": 0.2376754568499111, - "grad_norm": 2.117048978805542, - "learning_rate": 1.6321709927835087e-05, - "loss": 0.7204, + "epoch": 0.2211193241816262, + "grad_norm": 4.5565667152404785, + "learning_rate": 1.2701891383325585e-05, + "loss": 0.6923, "step": 3141 }, { - "epoch": 0.2377511255722447, - "grad_norm": 2.478074550628662, - "learning_rate": 1.6320399961617458e-05, - "loss": 0.7196, + "epoch": 0.22118972192889827, + "grad_norm": 2.221820592880249, + "learning_rate": 1.270096535787545e-05, + "loss": 0.6842, "step": 3142 }, { - "epoch": 0.23782679429457834, - "grad_norm": 2.4688057899475098, - "learning_rate": 1.6319089536972706e-05, - "loss": 0.7519, + "epoch": 0.22126011967617037, + "grad_norm": 2.3755509853363037, + "learning_rate": 1.2700039036026968e-05, + "loss": 0.8101, "step": 3143 }, { - "epoch": 0.23790246301691195, - "grad_norm": 2.998124599456787, - "learning_rate": 1.6317778653982898e-05, - "loss": 0.8621, + "epoch": 0.22133051742344245, + "grad_norm": 10.42531681060791, + "learning_rate": 1.269911241782829e-05, + "loss": 0.7035, "step": 3144 }, { - "epoch": 0.2379781317392456, - "grad_norm": 3.5607550144195557, - "learning_rate": 1.631646731273012e-05, - "loss": 0.748, + "epoch": 0.22140091517071453, + "grad_norm": 2.5192835330963135, + "learning_rate": 1.26981855033276e-05, + "loss": 0.851, "step": 3145 }, { - "epoch": 0.2380538004615792, - "grad_norm": 2.3379786014556885, - "learning_rate": 1.631515551329649e-05, - "loss": 0.5764, + "epoch": 0.22147131291798663, + "grad_norm": 2.3696038722991943, + "learning_rate": 1.269725829257308e-05, + "loss": 0.7818, "step": 3146 }, { - "epoch": 0.23812946918391284, - "grad_norm": 2.382059097290039, - "learning_rate": 1.6313843255764167e-05, - "loss": 0.7674, + "epoch": 0.2215417106652587, + "grad_norm": 2.1386964321136475, + "learning_rate": 1.2696330785612946e-05, + "loss": 0.8036, "step": 3147 }, { - "epoch": 0.23820513790624645, - "grad_norm": 2.690565586090088, - "learning_rate": 1.6312530540215322e-05, - "loss": 0.823, + "epoch": 0.2216121084125308, + "grad_norm": 2.114572763442993, + "learning_rate": 1.2695402982495414e-05, + "loss": 0.7017, "step": 3148 }, { - "epoch": 0.2382808066285801, - "grad_norm": 2.1526098251342773, - "learning_rate": 1.631121736673216e-05, - "loss": 0.8074, + "epoch": 0.2216825061598029, + "grad_norm": 2.034846305847168, + "learning_rate": 1.2694474883268723e-05, + "loss": 0.7507, "step": 3149 }, { - "epoch": 0.2383564753509137, - "grad_norm": 2.1651923656463623, - "learning_rate": 1.6309903735396925e-05, - "loss": 0.7172, + "epoch": 0.22175290390707497, + "grad_norm": 1.9469513893127441, + "learning_rate": 1.2693546487981126e-05, + "loss": 0.7847, "step": 3150 }, { - "epoch": 0.2384321440732473, - "grad_norm": 2.0754897594451904, - "learning_rate": 1.6308589646291873e-05, - "loss": 0.7341, + "epoch": 0.22182330165434705, + "grad_norm": 1.8315056562423706, + "learning_rate": 1.269261779668089e-05, + "loss": 0.7024, "step": 3151 }, { - "epoch": 0.23850781279558095, - "grad_norm": 2.7566630840301514, - "learning_rate": 1.6307275099499297e-05, - "loss": 0.813, + "epoch": 0.22189369940161915, + "grad_norm": 1.8663288354873657, + "learning_rate": 1.2691688809416298e-05, + "loss": 0.759, "step": 3152 }, { - "epoch": 0.23858348151791456, - "grad_norm": 2.0758936405181885, - "learning_rate": 1.630596009510152e-05, - "loss": 0.7604, + "epoch": 0.22196409714889123, + "grad_norm": 1.9068424701690674, + "learning_rate": 1.2690759526235653e-05, + "loss": 0.7388, "step": 3153 }, { - "epoch": 0.2386591502402482, - "grad_norm": 2.4266982078552246, - "learning_rate": 1.6304644633180893e-05, - "loss": 0.806, + "epoch": 0.2220344948961633, + "grad_norm": 1.9365549087524414, + "learning_rate": 1.2689829947187264e-05, + "loss": 0.703, "step": 3154 }, { - "epoch": 0.2387348189625818, - "grad_norm": 2.900785446166992, - "learning_rate": 1.630332871381979e-05, - "loss": 0.7293, + "epoch": 0.22210489264343541, + "grad_norm": 3.379399061203003, + "learning_rate": 1.2688900072319464e-05, + "loss": 0.7418, "step": 3155 }, { - "epoch": 0.23881048768491545, - "grad_norm": 2.0471670627593994, - "learning_rate": 1.6302012337100624e-05, - "loss": 0.7067, + "epoch": 0.2221752903907075, + "grad_norm": 2.224734306335449, + "learning_rate": 1.2687969901680597e-05, + "loss": 0.7811, "step": 3156 }, { - "epoch": 0.23888615640724906, - "grad_norm": 2.2892119884490967, - "learning_rate": 1.6300695503105825e-05, - "loss": 0.8938, + "epoch": 0.2222456881379796, + "grad_norm": 1.8050628900527954, + "learning_rate": 1.2687039435319023e-05, + "loss": 0.922, "step": 3157 }, { - "epoch": 0.2389618251295827, - "grad_norm": 1.9991192817687988, - "learning_rate": 1.629937821191786e-05, - "loss": 0.7893, + "epoch": 0.22231608588525167, + "grad_norm": 2.086909770965576, + "learning_rate": 1.268610867328312e-05, + "loss": 0.7225, "step": 3158 }, { - "epoch": 0.2390374938519163, - "grad_norm": 2.1983959674835205, - "learning_rate": 1.6298060463619224e-05, - "loss": 0.6493, + "epoch": 0.22238648363252375, + "grad_norm": 1.6612790822982788, + "learning_rate": 1.2685177615621277e-05, + "loss": 0.7979, "step": 3159 }, { - "epoch": 0.23911316257424994, - "grad_norm": 2.451901912689209, - "learning_rate": 1.629674225829244e-05, - "loss": 0.6745, + "epoch": 0.22245688137979586, + "grad_norm": 2.134364366531372, + "learning_rate": 1.26842462623819e-05, + "loss": 0.7747, "step": 3160 }, { - "epoch": 0.23918883129658355, - "grad_norm": 2.5038340091705322, - "learning_rate": 1.6295423596020052e-05, - "loss": 0.8159, + "epoch": 0.22252727912706793, + "grad_norm": 2.1693716049194336, + "learning_rate": 1.268331461361341e-05, + "loss": 0.7404, "step": 3161 }, { - "epoch": 0.2392645000189172, - "grad_norm": 2.6183676719665527, - "learning_rate": 1.6294104476884643e-05, - "loss": 0.747, + "epoch": 0.22259767687434, + "grad_norm": 2.37223219871521, + "learning_rate": 1.2682382669364247e-05, + "loss": 0.7126, "step": 3162 }, { - "epoch": 0.2393401687412508, - "grad_norm": 3.3365650177001953, - "learning_rate": 1.6292784900968818e-05, - "loss": 0.8315, + "epoch": 0.22266807462161212, + "grad_norm": 2.286916971206665, + "learning_rate": 1.2681450429682867e-05, + "loss": 0.7561, "step": 3163 }, { - "epoch": 0.23941583746358444, - "grad_norm": 2.1766538619995117, - "learning_rate": 1.6291464868355216e-05, - "loss": 0.7855, + "epoch": 0.2227384723688842, + "grad_norm": 1.9936481714248657, + "learning_rate": 1.268051789461773e-05, + "loss": 0.8466, "step": 3164 }, { - "epoch": 0.23949150618591805, - "grad_norm": 2.9773683547973633, - "learning_rate": 1.6290144379126498e-05, - "loss": 0.8054, + "epoch": 0.22280887011615627, + "grad_norm": 2.3903703689575195, + "learning_rate": 1.2679585064217322e-05, + "loss": 0.7624, "step": 3165 }, { - "epoch": 0.23956717490825166, - "grad_norm": 2.252821445465088, - "learning_rate": 1.6288823433365365e-05, - "loss": 0.754, + "epoch": 0.22287926786342838, + "grad_norm": 2.3252978324890137, + "learning_rate": 1.2678651938530146e-05, + "loss": 0.7923, "step": 3166 }, { - "epoch": 0.2396428436305853, - "grad_norm": 2.627490520477295, - "learning_rate": 1.628750203115453e-05, - "loss": 0.8792, + "epoch": 0.22294966561070045, + "grad_norm": 1.7455724477767944, + "learning_rate": 1.267771851760471e-05, + "loss": 0.7536, "step": 3167 }, { - "epoch": 0.2397185123529189, - "grad_norm": 2.516615867614746, - "learning_rate": 1.6286180172576748e-05, - "loss": 0.8076, + "epoch": 0.22302006335797253, + "grad_norm": 1.9879755973815918, + "learning_rate": 1.2676784801489547e-05, + "loss": 0.7258, "step": 3168 }, { - "epoch": 0.23979418107525255, - "grad_norm": 3.4849398136138916, - "learning_rate": 1.6284857857714798e-05, - "loss": 0.7961, + "epoch": 0.22309046110524464, + "grad_norm": 2.0571601390838623, + "learning_rate": 1.2675850790233203e-05, + "loss": 0.8396, "step": 3169 }, { - "epoch": 0.23986984979758616, - "grad_norm": 2.3548803329467773, - "learning_rate": 1.6283535086651487e-05, - "loss": 0.7389, + "epoch": 0.2231608588525167, + "grad_norm": 1.9158333539962769, + "learning_rate": 1.2674916483884234e-05, + "loss": 0.789, "step": 3170 }, { - "epoch": 0.2399455185199198, - "grad_norm": 2.4780502319335938, - "learning_rate": 1.6282211859469652e-05, - "loss": 0.7191, + "epoch": 0.22323125659978882, + "grad_norm": 2.5489816665649414, + "learning_rate": 1.267398188249122e-05, + "loss": 0.8456, "step": 3171 }, { - "epoch": 0.2400211872422534, - "grad_norm": 2.856328248977661, - "learning_rate": 1.6280888176252153e-05, - "loss": 0.8256, + "epoch": 0.2233016543470609, + "grad_norm": 2.0065648555755615, + "learning_rate": 1.2673046986102746e-05, + "loss": 0.7089, "step": 3172 }, { - "epoch": 0.24009685596458705, - "grad_norm": 3.004737615585327, - "learning_rate": 1.627956403708189e-05, - "loss": 0.843, + "epoch": 0.22337205209433297, + "grad_norm": 2.149704694747925, + "learning_rate": 1.2672111794767423e-05, + "loss": 0.7155, "step": 3173 }, { - "epoch": 0.24017252468692066, - "grad_norm": 2.3179590702056885, - "learning_rate": 1.627823944204178e-05, - "loss": 0.7477, + "epoch": 0.22344244984160508, + "grad_norm": 1.9714637994766235, + "learning_rate": 1.2671176308533871e-05, + "loss": 0.8088, "step": 3174 }, { - "epoch": 0.2402481934092543, - "grad_norm": 2.164140462875366, - "learning_rate": 1.627691439121478e-05, - "loss": 0.7412, + "epoch": 0.22351284758887716, + "grad_norm": 2.910207986831665, + "learning_rate": 1.2670240527450725e-05, + "loss": 0.7303, "step": 3175 }, { - "epoch": 0.2403238621315879, - "grad_norm": 2.5224568843841553, - "learning_rate": 1.6275588884683858e-05, - "loss": 0.6791, + "epoch": 0.22358324533614923, + "grad_norm": 1.7337642908096313, + "learning_rate": 1.266930445156664e-05, + "loss": 0.7715, "step": 3176 }, { - "epoch": 0.24039953085392154, - "grad_norm": 2.7676162719726562, - "learning_rate": 1.6274262922532033e-05, - "loss": 0.652, + "epoch": 0.22365364308342134, + "grad_norm": 1.954297661781311, + "learning_rate": 1.2668368080930283e-05, + "loss": 0.7129, "step": 3177 }, { - "epoch": 0.24047519957625516, - "grad_norm": 1.807481288909912, - "learning_rate": 1.6272936504842333e-05, - "loss": 0.93, + "epoch": 0.22372404083069342, + "grad_norm": 1.7127834558486938, + "learning_rate": 1.2667431415590334e-05, + "loss": 0.6038, "step": 3178 }, { - "epoch": 0.24055086829858877, - "grad_norm": 2.958256483078003, - "learning_rate": 1.627160963169783e-05, - "loss": 0.7591, + "epoch": 0.2237944385779655, + "grad_norm": 2.1034939289093018, + "learning_rate": 1.2666494455595493e-05, + "loss": 0.7301, "step": 3179 }, { - "epoch": 0.2406265370209224, - "grad_norm": 3.4339983463287354, - "learning_rate": 1.6270282303181606e-05, - "loss": 0.7686, + "epoch": 0.2238648363252376, + "grad_norm": 2.2620768547058105, + "learning_rate": 1.2665557200994474e-05, + "loss": 0.8423, "step": 3180 }, { - "epoch": 0.24070220574325601, - "grad_norm": 8.310561180114746, - "learning_rate": 1.6268954519376792e-05, - "loss": 0.7554, + "epoch": 0.22393523407250968, + "grad_norm": 2.529972553253174, + "learning_rate": 1.2664619651836005e-05, + "loss": 0.7669, "step": 3181 }, { - "epoch": 0.24077787446558965, - "grad_norm": 1.9216821193695068, - "learning_rate": 1.6267626280366538e-05, - "loss": 0.6726, + "epoch": 0.22400563181978178, + "grad_norm": 1.982102870941162, + "learning_rate": 1.2663681808168833e-05, + "loss": 0.8085, "step": 3182 }, { - "epoch": 0.24085354318792326, - "grad_norm": 2.743393659591675, - "learning_rate": 1.626629758623402e-05, - "loss": 0.7734, + "epoch": 0.22407602956705386, + "grad_norm": 1.9138230085372925, + "learning_rate": 1.266274367004171e-05, + "loss": 0.7864, "step": 3183 }, { - "epoch": 0.2409292119102569, - "grad_norm": 2.5748066902160645, - "learning_rate": 1.6264968437062438e-05, - "loss": 0.8017, + "epoch": 0.22414642731432594, + "grad_norm": 2.049367666244507, + "learning_rate": 1.2661805237503417e-05, + "loss": 0.7198, "step": 3184 }, { - "epoch": 0.2410048806325905, - "grad_norm": 2.8789925575256348, - "learning_rate": 1.626363883293504e-05, - "loss": 0.7882, + "epoch": 0.22421682506159804, + "grad_norm": 2.417738199234009, + "learning_rate": 1.2660866510602743e-05, + "loss": 0.8573, "step": 3185 }, { - "epoch": 0.24108054935492415, - "grad_norm": 2.611142158508301, - "learning_rate": 1.6262308773935085e-05, - "loss": 0.8026, + "epoch": 0.22428722280887012, + "grad_norm": 2.111278772354126, + "learning_rate": 1.265992748938849e-05, + "loss": 0.7382, "step": 3186 }, { - "epoch": 0.24115621807725776, - "grad_norm": 2.30765700340271, - "learning_rate": 1.6260978260145867e-05, - "loss": 0.8759, + "epoch": 0.2243576205561422, + "grad_norm": 2.1961803436279297, + "learning_rate": 1.2658988173909483e-05, + "loss": 0.7495, "step": 3187 }, { - "epoch": 0.2412318867995914, - "grad_norm": 2.0693159103393555, - "learning_rate": 1.62596472916507e-05, - "loss": 0.7008, + "epoch": 0.2244280183034143, + "grad_norm": 1.804175615310669, + "learning_rate": 1.2658048564214556e-05, + "loss": 0.7551, "step": 3188 }, { - "epoch": 0.241307555521925, - "grad_norm": 2.8635051250457764, - "learning_rate": 1.6258315868532945e-05, - "loss": 0.7966, + "epoch": 0.22449841605068638, + "grad_norm": 2.9265296459198, + "learning_rate": 1.2657108660352559e-05, + "loss": 0.66, "step": 3189 }, { - "epoch": 0.24138322424425865, - "grad_norm": 2.766191244125366, - "learning_rate": 1.625698399087597e-05, - "loss": 0.6112, + "epoch": 0.22456881379795846, + "grad_norm": 1.7872343063354492, + "learning_rate": 1.2656168462372358e-05, + "loss": 0.7071, "step": 3190 }, { - "epoch": 0.24145889296659226, - "grad_norm": 2.440380811691284, - "learning_rate": 1.6255651658763185e-05, - "loss": 0.8754, + "epoch": 0.22463921154523056, + "grad_norm": 1.8022059202194214, + "learning_rate": 1.2655227970322839e-05, + "loss": 0.81, "step": 3191 }, { - "epoch": 0.24153456168892587, - "grad_norm": 2.209153175354004, - "learning_rate": 1.625431887227803e-05, - "loss": 0.7281, + "epoch": 0.22470960929250264, + "grad_norm": 1.957194209098816, + "learning_rate": 1.2654287184252895e-05, + "loss": 0.7402, "step": 3192 }, { - "epoch": 0.2416102304112595, - "grad_norm": 1.9143271446228027, - "learning_rate": 1.625298563150396e-05, - "loss": 0.8007, + "epoch": 0.22478000703977472, + "grad_norm": 2.19425106048584, + "learning_rate": 1.2653346104211438e-05, + "loss": 0.7503, "step": 3193 }, { - "epoch": 0.24168589913359312, - "grad_norm": 2.3368210792541504, - "learning_rate": 1.6251651936524473e-05, - "loss": 0.8231, + "epoch": 0.22485040478704682, + "grad_norm": 2.351741075515747, + "learning_rate": 1.2652404730247397e-05, + "loss": 0.7145, "step": 3194 }, { - "epoch": 0.24176156785592676, - "grad_norm": 2.0771644115448, - "learning_rate": 1.6250317787423087e-05, - "loss": 0.7381, + "epoch": 0.2249208025343189, + "grad_norm": 2.1414263248443604, + "learning_rate": 1.2651463062409719e-05, + "loss": 0.7684, "step": 3195 }, { - "epoch": 0.24183723657826037, - "grad_norm": 4.1516289710998535, - "learning_rate": 1.624898318428335e-05, - "loss": 0.6193, + "epoch": 0.224991200281591, + "grad_norm": 2.192631959915161, + "learning_rate": 1.2650521100747353e-05, + "loss": 0.6658, "step": 3196 }, { - "epoch": 0.241912905300594, - "grad_norm": 2.5363802909851074, - "learning_rate": 1.6247648127188842e-05, - "loss": 0.8325, + "epoch": 0.22506159802886308, + "grad_norm": 2.1622676849365234, + "learning_rate": 1.2649578845309279e-05, + "loss": 0.6442, "step": 3197 }, { - "epoch": 0.24198857402292762, - "grad_norm": 2.0077271461486816, - "learning_rate": 1.6246312616223164e-05, - "loss": 0.83, + "epoch": 0.22513199577613516, + "grad_norm": 2.0525078773498535, + "learning_rate": 1.2648636296144487e-05, + "loss": 0.6836, "step": 3198 }, { - "epoch": 0.24206424274526125, - "grad_norm": 2.6140248775482178, - "learning_rate": 1.6244976651469952e-05, - "loss": 0.8015, + "epoch": 0.22520239352340726, + "grad_norm": 2.308490753173828, + "learning_rate": 1.2647693453301976e-05, + "loss": 0.8107, "step": 3199 }, { - "epoch": 0.24213991146759486, - "grad_norm": 2.4978835582733154, - "learning_rate": 1.624364023301287e-05, - "loss": 0.7106, + "epoch": 0.22527279127067934, + "grad_norm": 1.9956773519515991, + "learning_rate": 1.2646750316830767e-05, + "loss": 0.785, "step": 3200 }, { - "epoch": 0.2422155801899285, - "grad_norm": 2.355581521987915, - "learning_rate": 1.624230336093561e-05, - "loss": 0.8455, + "epoch": 0.22534318901795142, + "grad_norm": 2.0635197162628174, + "learning_rate": 1.2645806886779895e-05, + "loss": 0.7987, "step": 3201 }, { - "epoch": 0.2422912489122621, - "grad_norm": 3.009420871734619, - "learning_rate": 1.6240966035321887e-05, - "loss": 0.6477, + "epoch": 0.22541358676522352, + "grad_norm": 2.1178529262542725, + "learning_rate": 1.264486316319841e-05, + "loss": 0.7817, "step": 3202 }, { - "epoch": 0.24236691763459575, - "grad_norm": 3.0516929626464844, - "learning_rate": 1.623962825625545e-05, - "loss": 0.7656, + "epoch": 0.2254839845124956, + "grad_norm": 2.0787389278411865, + "learning_rate": 1.264391914613538e-05, + "loss": 0.7746, "step": 3203 }, { - "epoch": 0.24244258635692936, - "grad_norm": 4.451990604400635, - "learning_rate": 1.6238290023820077e-05, - "loss": 0.8312, + "epoch": 0.22555438225976768, + "grad_norm": 2.2193291187286377, + "learning_rate": 1.264297483563988e-05, + "loss": 0.7616, "step": 3204 }, { - "epoch": 0.24251825507926297, - "grad_norm": 2.4959819316864014, - "learning_rate": 1.6236951338099567e-05, - "loss": 0.815, + "epoch": 0.22562478000703978, + "grad_norm": 2.182554244995117, + "learning_rate": 1.2642030231761007e-05, + "loss": 0.6281, "step": 3205 }, { - "epoch": 0.2425939238015966, - "grad_norm": 2.653895854949951, - "learning_rate": 1.6235612199177765e-05, - "loss": 0.6203, + "epoch": 0.22569517775431186, + "grad_norm": 2.066782236099243, + "learning_rate": 1.2641085334547872e-05, + "loss": 0.8186, "step": 3206 }, { - "epoch": 0.24266959252393022, - "grad_norm": 4.231168746948242, - "learning_rate": 1.6234272607138517e-05, - "loss": 0.716, + "epoch": 0.22576557550158394, + "grad_norm": 2.4216160774230957, + "learning_rate": 1.2640140144049603e-05, + "loss": 0.7059, "step": 3207 }, { - "epoch": 0.24274526124626386, - "grad_norm": 2.9942550659179688, - "learning_rate": 1.6232932562065727e-05, - "loss": 0.783, + "epoch": 0.22583597324885604, + "grad_norm": 2.277552604675293, + "learning_rate": 1.263919466031534e-05, + "loss": 0.7024, "step": 3208 }, { - "epoch": 0.24282092996859747, - "grad_norm": 2.903759241104126, - "learning_rate": 1.6231592064043298e-05, - "loss": 0.7758, + "epoch": 0.22590637099612812, + "grad_norm": 2.1183273792266846, + "learning_rate": 1.2638248883394237e-05, + "loss": 0.7392, "step": 3209 }, { - "epoch": 0.2428965986909311, - "grad_norm": 2.2011756896972656, - "learning_rate": 1.6230251113155188e-05, - "loss": 0.9041, + "epoch": 0.22597676874340022, + "grad_norm": 2.1569697856903076, + "learning_rate": 1.2637302813335472e-05, + "loss": 0.681, "step": 3210 }, { - "epoch": 0.24297226741326472, - "grad_norm": 2.3656907081604004, - "learning_rate": 1.622890970948537e-05, - "loss": 0.8788, + "epoch": 0.2260471664906723, + "grad_norm": 2.119476318359375, + "learning_rate": 1.2636356450188224e-05, + "loss": 0.8602, "step": 3211 }, { - "epoch": 0.24304793613559836, - "grad_norm": 2.8599255084991455, - "learning_rate": 1.6227567853117842e-05, - "loss": 0.8441, + "epoch": 0.22611756423794438, + "grad_norm": 2.224773406982422, + "learning_rate": 1.2635409794001704e-05, + "loss": 0.6906, "step": 3212 }, { - "epoch": 0.24312360485793197, - "grad_norm": 2.9189417362213135, - "learning_rate": 1.6226225544136638e-05, - "loss": 0.7811, + "epoch": 0.22618796198521648, + "grad_norm": 2.2786455154418945, + "learning_rate": 1.2634462844825122e-05, + "loss": 0.6978, "step": 3213 }, { - "epoch": 0.2431992735802656, - "grad_norm": 2.1617062091827393, - "learning_rate": 1.622488278262582e-05, - "loss": 0.6992, + "epoch": 0.22625835973248856, + "grad_norm": 1.9598420858383179, + "learning_rate": 1.2633515602707715e-05, + "loss": 0.736, "step": 3214 }, { - "epoch": 0.24327494230259922, - "grad_norm": 2.509108066558838, - "learning_rate": 1.6223539568669476e-05, - "loss": 0.7945, + "epoch": 0.22632875747976064, + "grad_norm": 2.5283362865448, + "learning_rate": 1.263256806769873e-05, + "loss": 0.8667, "step": 3215 }, { - "epoch": 0.24335061102493286, - "grad_norm": 2.4163920879364014, - "learning_rate": 1.6222195902351715e-05, - "loss": 0.7454, + "epoch": 0.22639915522703274, + "grad_norm": 2.3065173625946045, + "learning_rate": 1.263162023984743e-05, + "loss": 0.6962, "step": 3216 }, { - "epoch": 0.24342627974726647, - "grad_norm": 1.949989914894104, - "learning_rate": 1.622085178375669e-05, - "loss": 0.7069, + "epoch": 0.22646955297430482, + "grad_norm": 2.328864336013794, + "learning_rate": 1.2630672119203093e-05, + "loss": 0.6879, "step": 3217 }, { - "epoch": 0.2435019484696001, - "grad_norm": 2.657010078430176, - "learning_rate": 1.6219507212968568e-05, - "loss": 0.6595, + "epoch": 0.2265399507215769, + "grad_norm": 2.1773393154144287, + "learning_rate": 1.2629723705815013e-05, + "loss": 0.758, "step": 3218 }, { - "epoch": 0.24357761719193372, - "grad_norm": 2.2271595001220703, - "learning_rate": 1.6218162190071557e-05, - "loss": 0.8176, + "epoch": 0.226610348468849, + "grad_norm": 2.485766887664795, + "learning_rate": 1.2628774999732499e-05, + "loss": 0.8474, "step": 3219 }, { - "epoch": 0.24365328591426733, - "grad_norm": 2.0614137649536133, - "learning_rate": 1.6216816715149884e-05, - "loss": 0.8147, + "epoch": 0.22668074621612108, + "grad_norm": 2.210015058517456, + "learning_rate": 1.2627826001004875e-05, + "loss": 0.8623, "step": 3220 }, { - "epoch": 0.24372895463660096, - "grad_norm": 3.104268789291382, - "learning_rate": 1.6215470788287803e-05, - "loss": 0.7427, + "epoch": 0.22675114396339316, + "grad_norm": 1.7394312620162964, + "learning_rate": 1.262687670968148e-05, + "loss": 0.6691, "step": 3221 }, { - "epoch": 0.24380462335893457, - "grad_norm": 2.268542528152466, - "learning_rate": 1.6214124409569605e-05, - "loss": 0.78, + "epoch": 0.22682154171066526, + "grad_norm": 1.9511582851409912, + "learning_rate": 1.2625927125811672e-05, + "loss": 0.6788, "step": 3222 }, { - "epoch": 0.2438802920812682, - "grad_norm": 2.576122283935547, - "learning_rate": 1.6212777579079606e-05, - "loss": 0.7915, + "epoch": 0.22689193945793734, + "grad_norm": 2.749260663986206, + "learning_rate": 1.2624977249444812e-05, + "loss": 0.7989, "step": 3223 }, { - "epoch": 0.24395596080360182, - "grad_norm": 1.9363725185394287, - "learning_rate": 1.6211430296902145e-05, - "loss": 0.7399, + "epoch": 0.22696233720520945, + "grad_norm": 2.1582932472229004, + "learning_rate": 1.2624027080630295e-05, + "loss": 0.7314, "step": 3224 }, { - "epoch": 0.24403162952593546, - "grad_norm": 1.9388312101364136, - "learning_rate": 1.621008256312159e-05, - "loss": 0.7831, + "epoch": 0.22703273495248152, + "grad_norm": 2.693284273147583, + "learning_rate": 1.2623076619417516e-05, + "loss": 0.8089, "step": 3225 }, { - "epoch": 0.24410729824826907, - "grad_norm": 2.938326835632324, - "learning_rate": 1.620873437782235e-05, - "loss": 0.744, + "epoch": 0.2271031326997536, + "grad_norm": 2.0972797870635986, + "learning_rate": 1.2622125865855889e-05, + "loss": 0.6247, "step": 3226 }, { - "epoch": 0.2441829669706027, - "grad_norm": 2.0602774620056152, - "learning_rate": 1.6207385741088843e-05, - "loss": 0.7965, + "epoch": 0.2271735304470257, + "grad_norm": 1.7250609397888184, + "learning_rate": 1.2621174819994847e-05, + "loss": 0.781, "step": 3227 }, { - "epoch": 0.24425863569293632, - "grad_norm": 2.867607593536377, - "learning_rate": 1.620603665300553e-05, - "loss": 0.7248, + "epoch": 0.22724392819429778, + "grad_norm": 2.4797232151031494, + "learning_rate": 1.2620223481883836e-05, + "loss": 0.836, "step": 3228 }, { - "epoch": 0.24433430441526996, - "grad_norm": 2.3602488040924072, - "learning_rate": 1.6204687113656895e-05, - "loss": 0.6803, + "epoch": 0.22731432594156986, + "grad_norm": 1.7373640537261963, + "learning_rate": 1.2619271851572316e-05, + "loss": 0.7591, "step": 3229 }, { - "epoch": 0.24440997313760357, - "grad_norm": 2.2412686347961426, - "learning_rate": 1.6203337123127456e-05, - "loss": 0.7207, + "epoch": 0.22738472368884197, + "grad_norm": 1.588986873626709, + "learning_rate": 1.2618319929109763e-05, + "loss": 0.9471, "step": 3230 }, { - "epoch": 0.2444856418599372, - "grad_norm": 2.8568761348724365, - "learning_rate": 1.620198668150174e-05, - "loss": 0.7997, + "epoch": 0.22745512143611404, + "grad_norm": 2.433804750442505, + "learning_rate": 1.2617367714545671e-05, + "loss": 0.7179, "step": 3231 }, { - "epoch": 0.24456131058227082, - "grad_norm": 2.4794857501983643, - "learning_rate": 1.620063578886433e-05, - "loss": 0.6912, + "epoch": 0.22752551918338612, + "grad_norm": 2.173356533050537, + "learning_rate": 1.2616415207929542e-05, + "loss": 0.8176, "step": 3232 }, { - "epoch": 0.24463697930460443, - "grad_norm": 2.476989507675171, - "learning_rate": 1.6199284445299815e-05, - "loss": 0.9426, + "epoch": 0.22759591693065823, + "grad_norm": 2.1565680503845215, + "learning_rate": 1.26154624093109e-05, + "loss": 0.6832, "step": 3233 }, { - "epoch": 0.24471264802693807, - "grad_norm": 2.4754297733306885, - "learning_rate": 1.619793265089282e-05, - "loss": 0.8036, + "epoch": 0.2276663146779303, + "grad_norm": 1.8066478967666626, + "learning_rate": 1.2614509318739281e-05, + "loss": 0.7255, "step": 3234 }, { - "epoch": 0.24478831674927168, - "grad_norm": 2.367055654525757, - "learning_rate": 1.6196580405728005e-05, - "loss": 0.7739, + "epoch": 0.22773671242520238, + "grad_norm": 2.3634228706359863, + "learning_rate": 1.2613555936264239e-05, + "loss": 0.8258, "step": 3235 }, { - "epoch": 0.24486398547160532, - "grad_norm": 2.6627862453460693, - "learning_rate": 1.6195227709890047e-05, - "loss": 0.7749, + "epoch": 0.2278071101724745, + "grad_norm": 1.8965363502502441, + "learning_rate": 1.261260226193534e-05, + "loss": 0.6775, "step": 3236 }, { - "epoch": 0.24493965419393893, - "grad_norm": 2.5135037899017334, - "learning_rate": 1.6193874563463657e-05, - "loss": 0.7816, + "epoch": 0.22787750791974656, + "grad_norm": 2.0862910747528076, + "learning_rate": 1.2611648295802165e-05, + "loss": 0.6308, "step": 3237 }, { - "epoch": 0.24501532291627257, - "grad_norm": 3.7979977130889893, - "learning_rate": 1.6192520966533574e-05, - "loss": 0.7276, + "epoch": 0.22794790566701867, + "grad_norm": 2.4328742027282715, + "learning_rate": 1.2610694037914312e-05, + "loss": 0.7868, "step": 3238 }, { - "epoch": 0.24509099163860618, - "grad_norm": 2.5826263427734375, - "learning_rate": 1.6191166919184564e-05, - "loss": 0.7003, + "epoch": 0.22801830341429075, + "grad_norm": 2.1927382946014404, + "learning_rate": 1.26097394883214e-05, + "loss": 0.7635, "step": 3239 }, { - "epoch": 0.24516666036093981, - "grad_norm": 2.326385259628296, - "learning_rate": 1.6189812421501424e-05, - "loss": 0.549, + "epoch": 0.22808870116156282, + "grad_norm": 2.309882402420044, + "learning_rate": 1.2608784647073046e-05, + "loss": 0.6652, "step": 3240 }, { - "epoch": 0.24524232908327342, - "grad_norm": 2.296499490737915, - "learning_rate": 1.6188457473568974e-05, - "loss": 0.8263, + "epoch": 0.22815909890883493, + "grad_norm": 2.3755128383636475, + "learning_rate": 1.26078295142189e-05, + "loss": 0.696, "step": 3241 }, { - "epoch": 0.24531799780560706, - "grad_norm": 2.9421603679656982, - "learning_rate": 1.6187102075472067e-05, - "loss": 0.8154, + "epoch": 0.228229496656107, + "grad_norm": 2.011117696762085, + "learning_rate": 1.260687408980862e-05, + "loss": 0.7774, "step": 3242 }, { - "epoch": 0.24539366652794067, - "grad_norm": 2.3418660163879395, - "learning_rate": 1.6185746227295585e-05, - "loss": 0.8657, + "epoch": 0.22829989440337908, + "grad_norm": 1.9716317653656006, + "learning_rate": 1.2605918373891876e-05, + "loss": 0.7241, "step": 3243 }, { - "epoch": 0.2454693352502743, - "grad_norm": 2.390544891357422, - "learning_rate": 1.618438992912443e-05, - "loss": 0.7327, + "epoch": 0.2283702921506512, + "grad_norm": 2.136734962463379, + "learning_rate": 1.2604962366518361e-05, + "loss": 0.7392, "step": 3244 }, { - "epoch": 0.24554500397260792, - "grad_norm": 2.8358356952667236, - "learning_rate": 1.6183033181043542e-05, - "loss": 0.9002, + "epoch": 0.22844068989792327, + "grad_norm": 2.2782981395721436, + "learning_rate": 1.2604006067737775e-05, + "loss": 0.6244, "step": 3245 }, { - "epoch": 0.24562067269494153, - "grad_norm": 2.2084474563598633, - "learning_rate": 1.6181675983137884e-05, - "loss": 0.7483, + "epoch": 0.22851108764519534, + "grad_norm": 1.8229858875274658, + "learning_rate": 1.260304947759984e-05, + "loss": 0.7783, "step": 3246 }, { - "epoch": 0.24569634141727517, - "grad_norm": 2.7843167781829834, - "learning_rate": 1.6180318335492445e-05, - "loss": 0.7849, + "epoch": 0.22858148539246745, + "grad_norm": 2.9556033611297607, + "learning_rate": 1.2602092596154286e-05, + "loss": 0.7609, "step": 3247 }, { - "epoch": 0.24577201013960878, - "grad_norm": 2.2138888835906982, - "learning_rate": 1.617896023819225e-05, - "loss": 0.7737, + "epoch": 0.22865188313973953, + "grad_norm": 2.0882041454315186, + "learning_rate": 1.2601135423450865e-05, + "loss": 0.8806, "step": 3248 }, { - "epoch": 0.24584767886194242, - "grad_norm": 2.3047590255737305, - "learning_rate": 1.6177601691322344e-05, - "loss": 0.6689, + "epoch": 0.2287222808870116, + "grad_norm": 1.9104455709457397, + "learning_rate": 1.2600177959539342e-05, + "loss": 0.7326, "step": 3249 }, { - "epoch": 0.24592334758427603, - "grad_norm": 2.421382188796997, - "learning_rate": 1.6176242694967803e-05, - "loss": 0.697, + "epoch": 0.2287926786342837, + "grad_norm": 1.987589955329895, + "learning_rate": 1.2599220204469494e-05, + "loss": 0.7701, "step": 3250 }, { - "epoch": 0.24599901630660967, - "grad_norm": 2.865973472595215, - "learning_rate": 1.6174883249213736e-05, - "loss": 0.6845, + "epoch": 0.22886307638155579, + "grad_norm": 1.8129445314407349, + "learning_rate": 1.2598262158291117e-05, + "loss": 0.7092, "step": 3251 }, { - "epoch": 0.24607468502894328, - "grad_norm": 2.016404628753662, - "learning_rate": 1.6173523354145275e-05, - "loss": 0.7247, + "epoch": 0.2289334741288279, + "grad_norm": 2.353976011276245, + "learning_rate": 1.2597303821054019e-05, + "loss": 0.6502, "step": 3252 }, { - "epoch": 0.24615035375127692, - "grad_norm": 2.101076602935791, - "learning_rate": 1.617216300984758e-05, - "loss": 0.9097, + "epoch": 0.22900387187609997, + "grad_norm": 1.659989356994629, + "learning_rate": 1.2596345192808028e-05, + "loss": 0.6402, "step": 3253 }, { - "epoch": 0.24622602247361053, - "grad_norm": 2.44075083732605, - "learning_rate": 1.6170802216405835e-05, - "loss": 0.7383, + "epoch": 0.22907426962337205, + "grad_norm": 2.2390048503875732, + "learning_rate": 1.259538627360298e-05, + "loss": 0.7011, "step": 3254 }, { - "epoch": 0.24630169119594417, - "grad_norm": 2.7942304611206055, - "learning_rate": 1.6169440973905266e-05, - "loss": 0.862, + "epoch": 0.22914466737064415, + "grad_norm": 2.111513376235962, + "learning_rate": 1.2594427063488733e-05, + "loss": 0.7345, "step": 3255 }, { - "epoch": 0.24637735991827778, - "grad_norm": 2.607255458831787, - "learning_rate": 1.6168079282431113e-05, - "loss": 0.8421, + "epoch": 0.22921506511791623, + "grad_norm": 1.6954584121704102, + "learning_rate": 1.2593467562515157e-05, + "loss": 0.6646, "step": 3256 }, { - "epoch": 0.24645302864061142, - "grad_norm": 3.629255771636963, - "learning_rate": 1.6166717142068654e-05, - "loss": 0.7941, + "epoch": 0.2292854628651883, + "grad_norm": 10.823022842407227, + "learning_rate": 1.2592507770732136e-05, + "loss": 0.7804, "step": 3257 }, { - "epoch": 0.24652869736294503, - "grad_norm": 3.293483257293701, - "learning_rate": 1.6165354552903182e-05, - "loss": 0.9336, + "epoch": 0.2293558606124604, + "grad_norm": 1.9586788415908813, + "learning_rate": 1.2591547688189572e-05, + "loss": 0.7299, "step": 3258 }, { - "epoch": 0.24660436608527864, - "grad_norm": 2.2481791973114014, - "learning_rate": 1.6163991515020035e-05, - "loss": 0.7163, + "epoch": 0.2294262583597325, + "grad_norm": 2.0736536979675293, + "learning_rate": 1.2590587314937379e-05, + "loss": 0.8146, "step": 3259 }, { - "epoch": 0.24668003480761228, - "grad_norm": 5.270366668701172, - "learning_rate": 1.616262802850457e-05, - "loss": 0.8224, + "epoch": 0.22949665610700457, + "grad_norm": 3.873579740524292, + "learning_rate": 1.258962665102549e-05, + "loss": 0.7456, "step": 3260 }, { - "epoch": 0.24675570352994589, - "grad_norm": 2.6033430099487305, - "learning_rate": 1.616126409344217e-05, - "loss": 0.8148, + "epoch": 0.22956705385427667, + "grad_norm": 2.1225738525390625, + "learning_rate": 1.2588665696503847e-05, + "loss": 0.7465, "step": 3261 }, { - "epoch": 0.24683137225227952, - "grad_norm": 3.4595351219177246, - "learning_rate": 1.6159899709918247e-05, - "loss": 0.653, + "epoch": 0.22963745160154875, + "grad_norm": 2.3662734031677246, + "learning_rate": 1.2587704451422414e-05, + "loss": 0.78, "step": 3262 }, { - "epoch": 0.24690704097461313, - "grad_norm": 2.154088020324707, - "learning_rate": 1.615853487801825e-05, - "loss": 0.8671, + "epoch": 0.22970784934882083, + "grad_norm": 2.075134038925171, + "learning_rate": 1.2586742915831166e-05, + "loss": 0.624, "step": 3263 }, { - "epoch": 0.24698270969694677, - "grad_norm": 2.9141697883605957, - "learning_rate": 1.615716959782764e-05, - "loss": 0.846, + "epoch": 0.22977824709609293, + "grad_norm": 2.116715669631958, + "learning_rate": 1.2585781089780092e-05, + "loss": 0.6531, "step": 3264 }, { - "epoch": 0.24705837841928038, - "grad_norm": 2.8304619789123535, - "learning_rate": 1.6155803869431927e-05, - "loss": 0.584, + "epoch": 0.229848644843365, + "grad_norm": 1.956315279006958, + "learning_rate": 1.2584818973319203e-05, + "loss": 0.7924, "step": 3265 }, { - "epoch": 0.24713404714161402, - "grad_norm": 6.038189888000488, - "learning_rate": 1.615443769291663e-05, - "loss": 0.5921, + "epoch": 0.2299190425906371, + "grad_norm": 2.15643572807312, + "learning_rate": 1.2583856566498518e-05, + "loss": 0.7468, "step": 3266 }, { - "epoch": 0.24720971586394763, - "grad_norm": 2.2429237365722656, - "learning_rate": 1.6153071068367302e-05, - "loss": 0.7524, + "epoch": 0.2299894403379092, + "grad_norm": 3.096966028213501, + "learning_rate": 1.2582893869368071e-05, + "loss": 0.7962, "step": 3267 }, { - "epoch": 0.24728538458628127, - "grad_norm": 2.542436361312866, - "learning_rate": 1.6151703995869533e-05, - "loss": 0.6946, + "epoch": 0.23005983808518127, + "grad_norm": 2.2163333892822266, + "learning_rate": 1.2581930881977918e-05, + "loss": 0.7396, "step": 3268 }, { - "epoch": 0.24736105330861488, - "grad_norm": 2.85803484916687, - "learning_rate": 1.6150336475508923e-05, - "loss": 0.9324, + "epoch": 0.23013023583245337, + "grad_norm": 3.69081449508667, + "learning_rate": 1.258096760437812e-05, + "loss": 0.7171, "step": 3269 }, { - "epoch": 0.24743672203094852, - "grad_norm": 2.8291127681732178, - "learning_rate": 1.614896850737112e-05, - "loss": 0.933, + "epoch": 0.23020063357972545, + "grad_norm": 2.072676658630371, + "learning_rate": 1.2580004036618765e-05, + "loss": 0.7301, "step": 3270 }, { - "epoch": 0.24751239075328213, - "grad_norm": 3.0022454261779785, - "learning_rate": 1.6147600091541782e-05, - "loss": 0.5986, + "epoch": 0.23027103132699753, + "grad_norm": 2.1513185501098633, + "learning_rate": 1.2579040178749944e-05, + "loss": 0.7662, "step": 3271 }, { - "epoch": 0.24758805947561574, - "grad_norm": 2.692591428756714, - "learning_rate": 1.614623122810661e-05, - "loss": 0.7886, + "epoch": 0.23034142907426963, + "grad_norm": 2.266205072402954, + "learning_rate": 1.2578076030821772e-05, + "loss": 0.6636, "step": 3272 }, { - "epoch": 0.24766372819794938, - "grad_norm": 3.29293155670166, - "learning_rate": 1.6144861917151322e-05, - "loss": 0.8193, + "epoch": 0.2304118268215417, + "grad_norm": 3.4740803241729736, + "learning_rate": 1.2577111592884378e-05, + "loss": 0.8187, "step": 3273 }, { - "epoch": 0.247739396920283, - "grad_norm": 2.4391543865203857, - "learning_rate": 1.614349215876168e-05, - "loss": 0.8077, + "epoch": 0.2304822245688138, + "grad_norm": 2.072700023651123, + "learning_rate": 1.2576146864987897e-05, + "loss": 0.7506, "step": 3274 }, { - "epoch": 0.24781506564261663, - "grad_norm": 2.349703788757324, - "learning_rate": 1.6142121953023447e-05, - "loss": 0.7945, + "epoch": 0.2305526223160859, + "grad_norm": 1.8920912742614746, + "learning_rate": 1.2575181847182495e-05, + "loss": 0.8067, "step": 3275 }, { - "epoch": 0.24789073436495024, - "grad_norm": 2.3136823177337646, - "learning_rate": 1.6140751300022437e-05, - "loss": 0.7307, + "epoch": 0.23062302006335797, + "grad_norm": 1.8544228076934814, + "learning_rate": 1.2574216539518337e-05, + "loss": 0.8246, "step": 3276 }, { - "epoch": 0.24796640308728388, - "grad_norm": 2.6560375690460205, - "learning_rate": 1.6139380199844487e-05, - "loss": 0.7428, + "epoch": 0.23069341781063005, + "grad_norm": 2.289433717727661, + "learning_rate": 1.2573250942045612e-05, + "loss": 0.7043, "step": 3277 }, { - "epoch": 0.2480420718096175, - "grad_norm": 2.5470519065856934, - "learning_rate": 1.6138008652575455e-05, - "loss": 0.7495, + "epoch": 0.23076381555790215, + "grad_norm": 1.971211552619934, + "learning_rate": 1.2572285054814523e-05, + "loss": 0.7838, "step": 3278 }, { - "epoch": 0.24811774053195113, - "grad_norm": 3.2811646461486816, - "learning_rate": 1.6136636658301236e-05, - "loss": 0.9781, + "epoch": 0.23083421330517423, + "grad_norm": 2.7419793605804443, + "learning_rate": 1.257131887787529e-05, + "loss": 0.787, "step": 3279 }, { - "epoch": 0.24819340925428474, - "grad_norm": 2.395357608795166, - "learning_rate": 1.6135264217107744e-05, - "loss": 0.6281, + "epoch": 0.23090461105244633, + "grad_norm": 2.144624948501587, + "learning_rate": 1.2570352411278141e-05, + "loss": 0.6714, "step": 3280 }, { - "epoch": 0.24826907797661837, - "grad_norm": 2.6726183891296387, - "learning_rate": 1.6133891329080933e-05, - "loss": 0.7946, + "epoch": 0.2309750087997184, + "grad_norm": 2.1271135807037354, + "learning_rate": 1.2569385655073326e-05, + "loss": 0.6856, "step": 3281 }, { - "epoch": 0.24834474669895198, - "grad_norm": 2.373309373855591, - "learning_rate": 1.6132517994306767e-05, - "loss": 0.7204, + "epoch": 0.2310454065469905, + "grad_norm": 2.156649589538574, + "learning_rate": 1.2568418609311107e-05, + "loss": 0.685, "step": 3282 }, { - "epoch": 0.24842041542128562, - "grad_norm": 2.439506769180298, - "learning_rate": 1.6131144212871264e-05, - "loss": 0.7067, + "epoch": 0.2311158042942626, + "grad_norm": 2.0018012523651123, + "learning_rate": 1.2567451274041761e-05, + "loss": 0.7608, "step": 3283 }, { - "epoch": 0.24849608414361923, - "grad_norm": 2.1670548915863037, - "learning_rate": 1.6129769984860435e-05, - "loss": 0.9527, + "epoch": 0.23118620204153467, + "grad_norm": 2.4628849029541016, + "learning_rate": 1.2566483649315582e-05, + "loss": 0.7235, "step": 3284 }, { - "epoch": 0.24857175286595287, - "grad_norm": 2.238982677459717, - "learning_rate": 1.6128395310360356e-05, - "loss": 0.6461, + "epoch": 0.23125659978880675, + "grad_norm": 2.0791099071502686, + "learning_rate": 1.2565515735182876e-05, + "loss": 0.7281, "step": 3285 }, { - "epoch": 0.24864742158828648, - "grad_norm": 3.4354398250579834, - "learning_rate": 1.6127020189457107e-05, - "loss": 0.7255, + "epoch": 0.23132699753607885, + "grad_norm": 2.190547466278076, + "learning_rate": 1.2564547531693965e-05, + "loss": 0.8616, "step": 3286 }, { - "epoch": 0.2487230903106201, - "grad_norm": 3.187068223953247, - "learning_rate": 1.6125644622236797e-05, - "loss": 0.9041, + "epoch": 0.23139739528335093, + "grad_norm": 2.345242738723755, + "learning_rate": 1.256357903889919e-05, + "loss": 0.7768, "step": 3287 }, { - "epoch": 0.24879875903295373, - "grad_norm": 2.2371320724487305, - "learning_rate": 1.6124268608785578e-05, - "loss": 0.7082, + "epoch": 0.231467793030623, + "grad_norm": 1.7824294567108154, + "learning_rate": 1.2562610256848902e-05, + "loss": 0.6736, "step": 3288 }, { - "epoch": 0.24887442775528734, - "grad_norm": 1.9986299276351929, - "learning_rate": 1.6122892149189616e-05, - "loss": 0.6645, + "epoch": 0.23153819077789511, + "grad_norm": 2.132648229598999, + "learning_rate": 1.256164118559347e-05, + "loss": 0.7699, "step": 3289 }, { - "epoch": 0.24895009647762098, - "grad_norm": 2.3427042961120605, - "learning_rate": 1.6121515243535107e-05, - "loss": 0.7438, + "epoch": 0.2316085885251672, + "grad_norm": 2.1597702503204346, + "learning_rate": 1.2560671825183276e-05, + "loss": 0.7806, "step": 3290 }, { - "epoch": 0.2490257651999546, - "grad_norm": 11.90526294708252, - "learning_rate": 1.612013789190828e-05, - "loss": 0.7747, + "epoch": 0.23167898627243927, + "grad_norm": 2.6501755714416504, + "learning_rate": 1.2559702175668714e-05, + "loss": 0.6946, "step": 3291 }, { - "epoch": 0.24910143392228823, - "grad_norm": 2.641674280166626, - "learning_rate": 1.611876009439539e-05, - "loss": 0.8812, + "epoch": 0.23174938401971137, + "grad_norm": 1.8346039056777954, + "learning_rate": 1.2558732237100204e-05, + "loss": 0.726, "step": 3292 }, { - "epoch": 0.24917710264462184, - "grad_norm": 2.691256046295166, - "learning_rate": 1.6117381851082717e-05, - "loss": 0.6981, + "epoch": 0.23181978176698345, + "grad_norm": 1.824737310409546, + "learning_rate": 1.255776200952817e-05, + "loss": 0.7255, "step": 3293 }, { - "epoch": 0.24925277136695548, - "grad_norm": 2.4926040172576904, - "learning_rate": 1.6116003162056574e-05, - "loss": 0.7059, + "epoch": 0.23189017951425556, + "grad_norm": 1.914135217666626, + "learning_rate": 1.2556791493003055e-05, + "loss": 0.7906, "step": 3294 }, { - "epoch": 0.2493284400892891, - "grad_norm": 1.984239101409912, - "learning_rate": 1.6114624027403297e-05, - "loss": 0.7439, + "epoch": 0.23196057726152763, + "grad_norm": 2.1352429389953613, + "learning_rate": 1.2555820687575318e-05, + "loss": 0.7275, "step": 3295 }, { - "epoch": 0.24940410881162273, - "grad_norm": 2.4202473163604736, - "learning_rate": 1.611324444720925e-05, - "loss": 0.8114, + "epoch": 0.2320309750087997, + "grad_norm": 2.0465381145477295, + "learning_rate": 1.2554849593295432e-05, + "loss": 0.8539, "step": 3296 }, { - "epoch": 0.24947977753395634, - "grad_norm": 2.3358452320098877, - "learning_rate": 1.611186442156083e-05, - "loss": 0.7779, + "epoch": 0.23210137275607182, + "grad_norm": 2.1793861389160156, + "learning_rate": 1.2553878210213886e-05, + "loss": 0.8031, "step": 3297 }, { - "epoch": 0.24955544625628998, - "grad_norm": 2.353821277618408, - "learning_rate": 1.6110483950544454e-05, - "loss": 0.7116, + "epoch": 0.2321717705033439, + "grad_norm": 1.9403294324874878, + "learning_rate": 1.2552906538381181e-05, + "loss": 0.6702, "step": 3298 }, { - "epoch": 0.24963111497862359, - "grad_norm": 2.44838547706604, - "learning_rate": 1.610910303424658e-05, - "loss": 0.7111, + "epoch": 0.23224216825061597, + "grad_norm": 1.9818615913391113, + "learning_rate": 1.2551934577847836e-05, + "loss": 0.7738, "step": 3299 }, { - "epoch": 0.2497067837009572, - "grad_norm": 2.416057586669922, - "learning_rate": 1.6107721672753678e-05, - "loss": 0.7076, + "epoch": 0.23231256599788808, + "grad_norm": 1.8916300535202026, + "learning_rate": 1.2550962328664384e-05, + "loss": 0.7712, "step": 3300 }, { - "epoch": 0.24978245242329083, - "grad_norm": 2.881209373474121, - "learning_rate": 1.6106339866152255e-05, - "loss": 0.9059, + "epoch": 0.23238296374516015, + "grad_norm": 2.0202574729919434, + "learning_rate": 1.2549989790881371e-05, + "loss": 0.6601, "step": 3301 }, { - "epoch": 0.24985812114562445, - "grad_norm": 2.15285325050354, - "learning_rate": 1.6104957614528846e-05, - "loss": 0.6258, + "epoch": 0.23245336149243223, + "grad_norm": 2.407169818878174, + "learning_rate": 1.2549016964549367e-05, + "loss": 0.7829, "step": 3302 }, { - "epoch": 0.24993378986795808, - "grad_norm": 2.7021114826202393, - "learning_rate": 1.610357491797001e-05, - "loss": 0.8291, + "epoch": 0.23252375923970434, + "grad_norm": 1.990968942642212, + "learning_rate": 1.2548043849718943e-05, + "loss": 0.691, "step": 3303 }, { - "epoch": 0.2500094585902917, - "grad_norm": 2.52158260345459, - "learning_rate": 1.6102191776562335e-05, - "loss": 0.7096, + "epoch": 0.2325941569869764, + "grad_norm": 1.8612924814224243, + "learning_rate": 1.2547070446440695e-05, + "loss": 0.725, "step": 3304 }, { - "epoch": 0.25008512731262533, - "grad_norm": 2.6078898906707764, - "learning_rate": 1.6100808190392446e-05, - "loss": 0.7502, + "epoch": 0.2326645547342485, + "grad_norm": 2.2716197967529297, + "learning_rate": 1.2546096754765232e-05, + "loss": 0.8063, "step": 3305 }, { - "epoch": 0.25016079603495894, - "grad_norm": 2.7016923427581787, - "learning_rate": 1.6099424159546976e-05, - "loss": 0.6632, + "epoch": 0.2327349524815206, + "grad_norm": 1.826540231704712, + "learning_rate": 1.2545122774743174e-05, + "loss": 0.7283, "step": 3306 }, { - "epoch": 0.25023646475729255, - "grad_norm": 2.718710422515869, - "learning_rate": 1.6098039684112605e-05, - "loss": 0.6973, + "epoch": 0.23280535022879267, + "grad_norm": 2.2173500061035156, + "learning_rate": 1.2544148506425162e-05, + "loss": 0.7555, "step": 3307 }, { - "epoch": 0.2503121334796262, - "grad_norm": 2.2537267208099365, - "learning_rate": 1.6096654764176027e-05, - "loss": 0.6324, + "epoch": 0.23287574797606478, + "grad_norm": 2.032494306564331, + "learning_rate": 1.2543173949861847e-05, + "loss": 0.6161, "step": 3308 }, { - "epoch": 0.25038780220195983, - "grad_norm": 2.0894060134887695, - "learning_rate": 1.609526939982398e-05, - "loss": 0.711, + "epoch": 0.23294614572333686, + "grad_norm": 2.063899040222168, + "learning_rate": 1.25421991051039e-05, + "loss": 0.6629, "step": 3309 }, { - "epoch": 0.25046347092429344, - "grad_norm": 2.1615793704986572, - "learning_rate": 1.6093883591143212e-05, - "loss": 0.8218, + "epoch": 0.23301654347060893, + "grad_norm": 1.859447717666626, + "learning_rate": 1.2541223972202002e-05, + "loss": 0.6877, "step": 3310 }, { - "epoch": 0.25053913964662705, - "grad_norm": 2.357072114944458, - "learning_rate": 1.609249733822051e-05, - "loss": 0.7687, + "epoch": 0.23308694121788104, + "grad_norm": 2.1296629905700684, + "learning_rate": 1.2540248551206848e-05, + "loss": 0.7755, "step": 3311 }, { - "epoch": 0.2506148083689607, - "grad_norm": 2.5962414741516113, - "learning_rate": 1.6091110641142683e-05, - "loss": 1.0468, + "epoch": 0.23315733896515312, + "grad_norm": 1.701793909072876, + "learning_rate": 1.2539272842169157e-05, + "loss": 0.7593, "step": 3312 }, { - "epoch": 0.25069047709129433, - "grad_norm": 2.7395946979522705, - "learning_rate": 1.608972349999657e-05, - "loss": 0.7615, + "epoch": 0.2332277367124252, + "grad_norm": 2.098773956298828, + "learning_rate": 1.2538296845139655e-05, + "loss": 0.7793, "step": 3313 }, { - "epoch": 0.25076614581362794, - "grad_norm": 3.2760064601898193, - "learning_rate": 1.6088335914869047e-05, - "loss": 0.7543, + "epoch": 0.2332981344596973, + "grad_norm": 1.987886905670166, + "learning_rate": 1.2537320560169082e-05, + "loss": 0.7122, "step": 3314 }, { - "epoch": 0.25084181453596155, - "grad_norm": 2.215672254562378, - "learning_rate": 1.6086947885846997e-05, - "loss": 0.8131, + "epoch": 0.23336853220696938, + "grad_norm": 1.7746961116790771, + "learning_rate": 1.2536343987308202e-05, + "loss": 0.8063, "step": 3315 }, { - "epoch": 0.25091748325829516, - "grad_norm": 2.628455638885498, - "learning_rate": 1.6085559413017353e-05, - "loss": 0.8267, + "epoch": 0.23343892995424145, + "grad_norm": 2.022796630859375, + "learning_rate": 1.253536712660778e-05, + "loss": 0.8551, "step": 3316 }, { - "epoch": 0.2509931519806288, - "grad_norm": 2.1428425312042236, - "learning_rate": 1.608417049646706e-05, - "loss": 0.6553, + "epoch": 0.23350932770151356, + "grad_norm": 2.137247323989868, + "learning_rate": 1.2534389978118609e-05, + "loss": 0.7208, "step": 3317 }, { - "epoch": 0.25106882070296244, - "grad_norm": 2.397225856781006, - "learning_rate": 1.6082781136283094e-05, - "loss": 0.7837, + "epoch": 0.23357972544878564, + "grad_norm": 2.0972867012023926, + "learning_rate": 1.253341254189149e-05, + "loss": 0.7424, "step": 3318 }, { - "epoch": 0.25114448942529605, - "grad_norm": 1.8074523210525513, - "learning_rate": 1.6081391332552464e-05, - "loss": 0.6386, + "epoch": 0.2336501231960577, + "grad_norm": 1.7825546264648438, + "learning_rate": 1.2532434817977242e-05, + "loss": 0.6755, "step": 3319 }, { - "epoch": 0.25122015814762966, - "grad_norm": 2.304368257522583, - "learning_rate": 1.608000108536221e-05, - "loss": 0.7698, + "epoch": 0.23372052094332982, + "grad_norm": 1.8914794921875, + "learning_rate": 1.25314568064267e-05, + "loss": 0.7222, "step": 3320 }, { - "epoch": 0.2512958268699633, - "grad_norm": 2.147972583770752, - "learning_rate": 1.6078610394799386e-05, - "loss": 0.77, + "epoch": 0.2337909186906019, + "grad_norm": 2.1860299110412598, + "learning_rate": 1.2530478507290708e-05, + "loss": 0.762, "step": 3321 }, { - "epoch": 0.25137149559229693, - "grad_norm": 2.355785846710205, - "learning_rate": 1.6077219260951082e-05, - "loss": 0.6399, + "epoch": 0.233861316437874, + "grad_norm": 1.8418399095535278, + "learning_rate": 1.2529499920620127e-05, + "loss": 0.6711, "step": 3322 }, { - "epoch": 0.25144716431463054, - "grad_norm": 2.293780565261841, - "learning_rate": 1.607582768390442e-05, - "loss": 0.7807, + "epoch": 0.23393171418514608, + "grad_norm": 2.1422972679138184, + "learning_rate": 1.252852104646584e-05, + "loss": 0.6227, "step": 3323 }, { - "epoch": 0.25152283303696416, - "grad_norm": 1.981724500656128, - "learning_rate": 1.6074435663746543e-05, - "loss": 0.6969, + "epoch": 0.23400211193241816, + "grad_norm": 2.1758105754852295, + "learning_rate": 1.2527541884878737e-05, + "loss": 0.8405, "step": 3324 }, { - "epoch": 0.2515985017592978, - "grad_norm": 2.2810912132263184, - "learning_rate": 1.6073043200564623e-05, - "loss": 0.711, + "epoch": 0.23407250967969026, + "grad_norm": 2.5638813972473145, + "learning_rate": 1.2526562435909723e-05, + "loss": 0.8173, "step": 3325 }, { - "epoch": 0.25167417048163143, - "grad_norm": 1.999266266822815, - "learning_rate": 1.607165029444586e-05, - "loss": 0.7184, + "epoch": 0.23414290742696234, + "grad_norm": 2.017564535140991, + "learning_rate": 1.2525582699609724e-05, + "loss": 0.757, "step": 3326 }, { - "epoch": 0.25174983920396504, - "grad_norm": 2.708433151245117, - "learning_rate": 1.6070256945477485e-05, - "loss": 0.7204, + "epoch": 0.23421330517423442, + "grad_norm": 1.9185298681259155, + "learning_rate": 1.2524602676029678e-05, + "loss": 0.6624, "step": 3327 }, { - "epoch": 0.25182550792629865, - "grad_norm": 2.3254494667053223, - "learning_rate": 1.606886315374675e-05, - "loss": 0.6824, + "epoch": 0.23428370292150652, + "grad_norm": 2.1272456645965576, + "learning_rate": 1.2523622365220534e-05, + "loss": 0.8177, "step": 3328 }, { - "epoch": 0.25190117664863226, - "grad_norm": 2.2623493671417236, - "learning_rate": 1.606746891934094e-05, - "loss": 0.8442, + "epoch": 0.2343541006687786, + "grad_norm": 2.089553117752075, + "learning_rate": 1.252264176723326e-05, + "loss": 0.6694, "step": 3329 }, { - "epoch": 0.25197684537096593, - "grad_norm": 2.412431240081787, - "learning_rate": 1.606607424234737e-05, - "loss": 0.8786, + "epoch": 0.23442449841605068, + "grad_norm": 1.7995353937149048, + "learning_rate": 1.252166088211884e-05, + "loss": 0.7053, "step": 3330 }, { - "epoch": 0.25205251409329954, - "grad_norm": 2.111781597137451, - "learning_rate": 1.6064679122853372e-05, - "loss": 0.836, + "epoch": 0.23449489616332278, + "grad_norm": 2.19932222366333, + "learning_rate": 1.252067970992827e-05, + "loss": 0.7339, "step": 3331 }, { - "epoch": 0.25212818281563315, - "grad_norm": 2.9553704261779785, - "learning_rate": 1.6063283560946322e-05, - "loss": 0.8473, + "epoch": 0.23456529391059486, + "grad_norm": 1.7885493040084839, + "learning_rate": 1.251969825071256e-05, + "loss": 0.6741, "step": 3332 }, { - "epoch": 0.25220385153796676, - "grad_norm": 2.696552276611328, - "learning_rate": 1.6061887556713608e-05, - "loss": 0.7043, + "epoch": 0.23463569165786693, + "grad_norm": 3.0731098651885986, + "learning_rate": 1.251871650452274e-05, + "loss": 0.7311, "step": 3333 }, { - "epoch": 0.2522795202603004, - "grad_norm": 1.8179265260696411, - "learning_rate": 1.6060491110242655e-05, - "loss": 0.8593, + "epoch": 0.23470608940513904, + "grad_norm": 1.8317965269088745, + "learning_rate": 1.2517734471409853e-05, + "loss": 0.7598, "step": 3334 }, { - "epoch": 0.25235518898263404, - "grad_norm": 2.586488962173462, - "learning_rate": 1.6059094221620913e-05, - "loss": 0.8374, + "epoch": 0.23477648715241112, + "grad_norm": 1.9327473640441895, + "learning_rate": 1.2516752151424953e-05, + "loss": 0.8459, "step": 3335 }, { - "epoch": 0.25243085770496765, - "grad_norm": 2.9181783199310303, - "learning_rate": 1.6057696890935857e-05, - "loss": 0.8011, + "epoch": 0.23484688489968322, + "grad_norm": 2.6123898029327393, + "learning_rate": 1.251576954461911e-05, + "loss": 0.6313, "step": 3336 }, { - "epoch": 0.25250652642730126, - "grad_norm": 2.254702568054199, - "learning_rate": 1.6056299118274993e-05, - "loss": 0.6613, + "epoch": 0.2349172826469553, + "grad_norm": 2.218064069747925, + "learning_rate": 1.2514786651043416e-05, + "loss": 0.7803, "step": 3337 }, { - "epoch": 0.2525821951496349, - "grad_norm": 1.7335007190704346, - "learning_rate": 1.6054900903725856e-05, - "loss": 0.6857, + "epoch": 0.23498768039422738, + "grad_norm": 2.0571038722991943, + "learning_rate": 1.2513803470748967e-05, + "loss": 0.8046, "step": 3338 }, { - "epoch": 0.25265786387196854, - "grad_norm": 2.5397019386291504, - "learning_rate": 1.605350224737601e-05, - "loss": 0.911, + "epoch": 0.23505807814149948, + "grad_norm": 2.0950565338134766, + "learning_rate": 1.2512820003786884e-05, + "loss": 0.7992, "step": 3339 }, { - "epoch": 0.25273353259430215, - "grad_norm": 1.3949565887451172, - "learning_rate": 1.6052103149313037e-05, - "loss": 0.9984, + "epoch": 0.23512847588877156, + "grad_norm": 1.7326592206954956, + "learning_rate": 1.2511836250208296e-05, + "loss": 0.6797, "step": 3340 }, { - "epoch": 0.25280920131663576, - "grad_norm": 1.854061245918274, - "learning_rate": 1.6050703609624554e-05, - "loss": 0.7489, + "epoch": 0.23519887363604364, + "grad_norm": 1.8432408571243286, + "learning_rate": 1.2510852210064348e-05, + "loss": 0.7448, "step": 3341 }, { - "epoch": 0.25288487003896937, - "grad_norm": 2.37091064453125, - "learning_rate": 1.604930362839821e-05, - "loss": 0.7097, + "epoch": 0.23526927138331574, + "grad_norm": 2.3240628242492676, + "learning_rate": 1.2509867883406204e-05, + "loss": 0.8235, "step": 3342 }, { - "epoch": 0.25296053876130303, - "grad_norm": 2.4179093837738037, - "learning_rate": 1.604790320572167e-05, - "loss": 0.7679, + "epoch": 0.23533966913058782, + "grad_norm": 1.771673560142517, + "learning_rate": 1.2508883270285037e-05, + "loss": 0.7017, "step": 3343 }, { - "epoch": 0.25303620748363664, - "grad_norm": 2.1405601501464844, - "learning_rate": 1.6046502341682637e-05, - "loss": 0.6784, + "epoch": 0.2354100668778599, + "grad_norm": 1.9256126880645752, + "learning_rate": 1.250789837075204e-05, + "loss": 0.7592, "step": 3344 }, { - "epoch": 0.25311187620597025, - "grad_norm": 2.4001498222351074, - "learning_rate": 1.6045101036368833e-05, - "loss": 0.7994, + "epoch": 0.235480464625132, + "grad_norm": 1.8597882986068726, + "learning_rate": 1.2506913184858421e-05, + "loss": 0.5901, "step": 3345 }, { - "epoch": 0.25318754492830386, - "grad_norm": 2.001662492752075, - "learning_rate": 1.6043699289868018e-05, - "loss": 0.8095, + "epoch": 0.23555086237240408, + "grad_norm": 2.531994581222534, + "learning_rate": 1.2505927712655397e-05, + "loss": 0.6572, "step": 3346 }, { - "epoch": 0.25326321365063753, - "grad_norm": 2.297889471054077, - "learning_rate": 1.6042297102267972e-05, - "loss": 0.8036, + "epoch": 0.23562126011967616, + "grad_norm": 2.130038022994995, + "learning_rate": 1.2504941954194206e-05, + "loss": 0.8292, "step": 3347 }, { - "epoch": 0.25333888237297114, - "grad_norm": 2.313671588897705, - "learning_rate": 1.6040894473656502e-05, - "loss": 0.6839, + "epoch": 0.23569165786694826, + "grad_norm": 2.226653575897217, + "learning_rate": 1.2503955909526096e-05, + "loss": 0.6948, "step": 3348 }, { - "epoch": 0.25341455109530475, - "grad_norm": 2.0433685779571533, - "learning_rate": 1.603949140412145e-05, - "loss": 0.7668, + "epoch": 0.23576205561422034, + "grad_norm": 1.9703153371810913, + "learning_rate": 1.2502969578702331e-05, + "loss": 0.7354, "step": 3349 }, { - "epoch": 0.25349021981763836, - "grad_norm": 2.314209461212158, - "learning_rate": 1.6038087893750673e-05, - "loss": 0.806, + "epoch": 0.23583245336149244, + "grad_norm": 4.345832824707031, + "learning_rate": 1.2501982961774198e-05, + "loss": 0.7714, "step": 3350 }, { - "epoch": 0.25356588853997203, - "grad_norm": 2.245436906814575, - "learning_rate": 1.6036683942632073e-05, - "loss": 0.7672, + "epoch": 0.23590285110876452, + "grad_norm": 1.6267188787460327, + "learning_rate": 1.2500996058792986e-05, + "loss": 0.7185, "step": 3351 }, { - "epoch": 0.25364155726230564, - "grad_norm": 1.8543047904968262, - "learning_rate": 1.6035279550853564e-05, - "loss": 0.6501, + "epoch": 0.2359732488560366, + "grad_norm": 2.6693079471588135, + "learning_rate": 1.2500008869810006e-05, + "loss": 0.6926, "step": 3352 }, { - "epoch": 0.25371722598463925, - "grad_norm": 2.085162878036499, - "learning_rate": 1.6033874718503092e-05, - "loss": 0.7534, + "epoch": 0.2360436466033087, + "grad_norm": 2.3278844356536865, + "learning_rate": 1.2499021394876585e-05, + "loss": 0.6899, "step": 3353 }, { - "epoch": 0.25379289470697286, - "grad_norm": 2.3843894004821777, - "learning_rate": 1.6032469445668636e-05, - "loss": 0.8074, + "epoch": 0.23611404435058078, + "grad_norm": 2.0176401138305664, + "learning_rate": 1.249803363404406e-05, + "loss": 0.7467, "step": 3354 }, { - "epoch": 0.25386856342930647, - "grad_norm": 2.223787307739258, - "learning_rate": 1.6031063732438197e-05, - "loss": 0.7994, + "epoch": 0.23618444209785286, + "grad_norm": 2.1079938411712646, + "learning_rate": 1.2497045587363788e-05, + "loss": 0.7759, "step": 3355 }, { - "epoch": 0.25394423215164014, - "grad_norm": 2.203183650970459, - "learning_rate": 1.6029657578899808e-05, - "loss": 0.7981, + "epoch": 0.23625483984512496, + "grad_norm": 2.172056198120117, + "learning_rate": 1.2496057254887138e-05, + "loss": 0.7632, "step": 3356 }, { - "epoch": 0.25401990087397375, - "grad_norm": 2.084005355834961, - "learning_rate": 1.6028250985141524e-05, - "loss": 0.8071, + "epoch": 0.23632523759239704, + "grad_norm": 1.7789348363876343, + "learning_rate": 1.2495068636665492e-05, + "loss": 0.7316, "step": 3357 }, { - "epoch": 0.25409556959630736, - "grad_norm": 2.4879648685455322, - "learning_rate": 1.602684395125143e-05, - "loss": 0.7354, + "epoch": 0.23639563533966912, + "grad_norm": 1.8637534379959106, + "learning_rate": 1.2494079732750253e-05, + "loss": 0.6626, "step": 3358 }, { - "epoch": 0.25417123831864097, - "grad_norm": 2.1480824947357178, - "learning_rate": 1.602543647731764e-05, - "loss": 0.775, + "epoch": 0.23646603308694122, + "grad_norm": 1.9918080568313599, + "learning_rate": 1.2493090543192832e-05, + "loss": 0.8268, "step": 3359 }, { - "epoch": 0.25424690704097463, - "grad_norm": 2.283517360687256, - "learning_rate": 1.6024028563428296e-05, - "loss": 0.7326, + "epoch": 0.2365364308342133, + "grad_norm": 2.314735174179077, + "learning_rate": 1.2492101068044662e-05, + "loss": 0.6877, "step": 3360 }, { - "epoch": 0.25432257576330825, - "grad_norm": 2.376298666000366, - "learning_rate": 1.6022620209671567e-05, - "loss": 0.6533, + "epoch": 0.23660682858148538, + "grad_norm": 2.111137628555298, + "learning_rate": 1.2491111307357179e-05, + "loss": 0.64, "step": 3361 }, { - "epoch": 0.25439824448564186, - "grad_norm": 2.1882760524749756, - "learning_rate": 1.6021211416135644e-05, - "loss": 0.8149, + "epoch": 0.23667722632875748, + "grad_norm": 1.7992448806762695, + "learning_rate": 1.2490121261181847e-05, + "loss": 0.7117, "step": 3362 }, { - "epoch": 0.25447391320797547, - "grad_norm": 2.442096471786499, - "learning_rate": 1.601980218290875e-05, - "loss": 0.8602, + "epoch": 0.23674762407602956, + "grad_norm": 2.022582530975342, + "learning_rate": 1.248913092957014e-05, + "loss": 0.7247, "step": 3363 }, { - "epoch": 0.25454958193030913, - "grad_norm": 2.5917186737060547, - "learning_rate": 1.6018392510079145e-05, - "loss": 0.7316, + "epoch": 0.23681802182330167, + "grad_norm": 1.6998398303985596, + "learning_rate": 1.2488140312573544e-05, + "loss": 0.8147, "step": 3364 }, { - "epoch": 0.25462525065264274, - "grad_norm": 2.8332271575927734, - "learning_rate": 1.6016982397735098e-05, - "loss": 0.6501, + "epoch": 0.23688841957057374, + "grad_norm": 2.8269848823547363, + "learning_rate": 1.2487149410243562e-05, + "loss": 0.6946, "step": 3365 }, { - "epoch": 0.25470091937497635, - "grad_norm": 2.322115182876587, - "learning_rate": 1.6015571845964914e-05, - "loss": 0.6404, + "epoch": 0.23695881731784582, + "grad_norm": 2.0765278339385986, + "learning_rate": 1.2486158222631715e-05, + "loss": 0.7777, "step": 3366 }, { - "epoch": 0.25477658809730996, - "grad_norm": 1.6798187494277954, - "learning_rate": 1.6014160854856933e-05, - "loss": 0.6577, + "epoch": 0.23702921506511793, + "grad_norm": 2.3071203231811523, + "learning_rate": 1.248516674978953e-05, + "loss": 0.735, "step": 3367 }, { - "epoch": 0.2548522568196436, - "grad_norm": 2.1623387336730957, - "learning_rate": 1.601274942449951e-05, - "loss": 0.7403, + "epoch": 0.23709961281239, + "grad_norm": 2.1637556552886963, + "learning_rate": 1.248417499176856e-05, + "loss": 0.7739, "step": 3368 }, { - "epoch": 0.25492792554197724, - "grad_norm": 2.2108073234558105, - "learning_rate": 1.6011337554981044e-05, - "loss": 0.626, + "epoch": 0.23717001055966208, + "grad_norm": 1.837778925895691, + "learning_rate": 1.2483182948620367e-05, + "loss": 0.6515, "step": 3369 }, { - "epoch": 0.25500359426431085, - "grad_norm": 2.5659923553466797, - "learning_rate": 1.6009925246389933e-05, - "loss": 0.7742, + "epoch": 0.2372404083069342, + "grad_norm": 1.7996625900268555, + "learning_rate": 1.2482190620396524e-05, + "loss": 0.8223, "step": 3370 }, { - "epoch": 0.25507926298664446, - "grad_norm": 2.251542091369629, - "learning_rate": 1.6008512498814637e-05, - "loss": 0.8051, + "epoch": 0.23731080605420626, + "grad_norm": 1.9711017608642578, + "learning_rate": 1.2481198007148627e-05, + "loss": 0.6749, "step": 3371 }, { - "epoch": 0.25515493170897807, - "grad_norm": 2.227972984313965, - "learning_rate": 1.6007099312343618e-05, - "loss": 0.7986, + "epoch": 0.23738120380147834, + "grad_norm": 2.286323308944702, + "learning_rate": 1.2480205108928281e-05, + "loss": 0.7506, "step": 3372 }, { - "epoch": 0.25523060043131174, - "grad_norm": 2.539790153503418, - "learning_rate": 1.6005685687065375e-05, - "loss": 0.7455, + "epoch": 0.23745160154875045, + "grad_norm": 2.15956974029541, + "learning_rate": 1.2479211925787109e-05, + "loss": 0.7751, "step": 3373 }, { - "epoch": 0.25530626915364535, - "grad_norm": 2.585012435913086, - "learning_rate": 1.6004271623068436e-05, - "loss": 0.8405, + "epoch": 0.23752199929602252, + "grad_norm": 2.0714099407196045, + "learning_rate": 1.2478218457776747e-05, + "loss": 0.6823, "step": 3374 }, { - "epoch": 0.25538193787597896, - "grad_norm": 2.184983015060425, - "learning_rate": 1.6002857120441354e-05, - "loss": 0.6086, + "epoch": 0.2375923970432946, + "grad_norm": 1.9976571798324585, + "learning_rate": 1.2477224704948844e-05, + "loss": 0.8358, "step": 3375 }, { - "epoch": 0.25545760659831257, - "grad_norm": 3.0355441570281982, - "learning_rate": 1.6001442179272708e-05, - "loss": 0.8099, + "epoch": 0.2376627947905667, + "grad_norm": 2.153984546661377, + "learning_rate": 1.247623066735507e-05, + "loss": 0.6814, "step": 3376 }, { - "epoch": 0.25553327532064624, - "grad_norm": 2.2966806888580322, - "learning_rate": 1.600002679965111e-05, - "loss": 0.8367, + "epoch": 0.23773319253783878, + "grad_norm": 1.961560845375061, + "learning_rate": 1.2475236345047103e-05, + "loss": 0.6361, "step": 3377 }, { - "epoch": 0.25560894404297985, - "grad_norm": 2.3266594409942627, - "learning_rate": 1.599861098166519e-05, - "loss": 0.7798, + "epoch": 0.2378035902851109, + "grad_norm": 2.1614062786102295, + "learning_rate": 1.247424173807664e-05, + "loss": 0.801, "step": 3378 }, { - "epoch": 0.25568461276531346, - "grad_norm": 2.5856709480285645, - "learning_rate": 1.5997194725403614e-05, - "loss": 0.7653, + "epoch": 0.23787398803238297, + "grad_norm": 2.3563649654388428, + "learning_rate": 1.247324684649539e-05, + "loss": 0.6984, "step": 3379 }, { - "epoch": 0.25576028148764707, - "grad_norm": 1.9472054243087769, - "learning_rate": 1.5995778030955073e-05, - "loss": 0.8388, + "epoch": 0.23794438577965504, + "grad_norm": 2.256253719329834, + "learning_rate": 1.2472251670355082e-05, + "loss": 0.6991, "step": 3380 }, { - "epoch": 0.2558359502099807, - "grad_norm": 2.6131577491760254, - "learning_rate": 1.599436089840829e-05, - "loss": 0.8128, + "epoch": 0.23801478352692715, + "grad_norm": 2.187032699584961, + "learning_rate": 1.2471256209707453e-05, + "loss": 0.7896, "step": 3381 }, { - "epoch": 0.25591161893231434, - "grad_norm": 2.5530786514282227, - "learning_rate": 1.5992943327851998e-05, - "loss": 0.7969, + "epoch": 0.23808518127419923, + "grad_norm": 2.2996063232421875, + "learning_rate": 1.2470260464604258e-05, + "loss": 0.6079, "step": 3382 }, { - "epoch": 0.25598728765464795, - "grad_norm": 2.0992929935455322, - "learning_rate": 1.599152531937498e-05, - "loss": 0.7408, + "epoch": 0.2381555790214713, + "grad_norm": 2.314194917678833, + "learning_rate": 1.2469264435097268e-05, + "loss": 0.7657, "step": 3383 }, { - "epoch": 0.25606295637698157, - "grad_norm": 2.5544259548187256, - "learning_rate": 1.599010687306603e-05, - "loss": 0.8137, + "epoch": 0.2382259767687434, + "grad_norm": 1.9951741695404053, + "learning_rate": 1.2468268121238265e-05, + "loss": 0.7956, "step": 3384 }, { - "epoch": 0.2561386250993152, - "grad_norm": 2.344470500946045, - "learning_rate": 1.5988687989013985e-05, - "loss": 0.6803, + "epoch": 0.23829637451601549, + "grad_norm": 2.2923176288604736, + "learning_rate": 1.2467271523079053e-05, + "loss": 0.8395, "step": 3385 }, { - "epoch": 0.25621429382164884, - "grad_norm": 2.069218873977661, - "learning_rate": 1.5987268667307688e-05, - "loss": 0.8429, + "epoch": 0.23836677226328756, + "grad_norm": 1.6465331315994263, + "learning_rate": 1.246627464067144e-05, + "loss": 0.7065, "step": 3386 }, { - "epoch": 0.25628996254398245, - "grad_norm": 1.9888510704040527, - "learning_rate": 1.598584890803603e-05, - "loss": 0.8587, + "epoch": 0.23843717001055967, + "grad_norm": 1.7634378671646118, + "learning_rate": 1.246527747406726e-05, + "loss": 0.7486, "step": 3387 }, { - "epoch": 0.25636563126631606, - "grad_norm": 2.630401849746704, - "learning_rate": 1.5984428711287917e-05, - "loss": 0.8905, + "epoch": 0.23850756775783175, + "grad_norm": 2.0591142177581787, + "learning_rate": 1.2464280023318353e-05, + "loss": 0.7198, "step": 3388 }, { - "epoch": 0.2564412999886497, - "grad_norm": 2.1368906497955322, - "learning_rate": 1.5983008077152292e-05, - "loss": 0.6999, + "epoch": 0.23857796550510382, + "grad_norm": 2.067718267440796, + "learning_rate": 1.246328228847658e-05, + "loss": 0.775, "step": 3389 }, { - "epoch": 0.25651696871098334, - "grad_norm": 3.064831495285034, - "learning_rate": 1.598158700571811e-05, - "loss": 0.6628, + "epoch": 0.23864836325237593, + "grad_norm": 1.8368442058563232, + "learning_rate": 1.2462284269593811e-05, + "loss": 0.7609, "step": 3390 }, { - "epoch": 0.25659263743331695, - "grad_norm": 2.474104881286621, - "learning_rate": 1.598016549707437e-05, - "loss": 0.8611, + "epoch": 0.238718760999648, + "grad_norm": 1.6994582414627075, + "learning_rate": 1.2461285966721938e-05, + "loss": 0.7957, "step": 3391 }, { - "epoch": 0.25666830615565056, - "grad_norm": 2.035888671875, - "learning_rate": 1.5978743551310094e-05, - "loss": 0.6389, + "epoch": 0.2387891587469201, + "grad_norm": 1.8519991636276245, + "learning_rate": 1.2460287379912862e-05, + "loss": 0.7701, "step": 3392 }, { - "epoch": 0.25674397487798417, - "grad_norm": 2.2236475944519043, - "learning_rate": 1.597732116851432e-05, - "loss": 0.7512, + "epoch": 0.2388595564941922, + "grad_norm": 2.106289863586426, + "learning_rate": 1.24592885092185e-05, + "loss": 0.7148, "step": 3393 }, { - "epoch": 0.2568196436003178, - "grad_norm": 2.101870536804199, - "learning_rate": 1.5975898348776128e-05, - "loss": 0.8177, + "epoch": 0.23892995424146427, + "grad_norm": 1.9190682172775269, + "learning_rate": 1.2458289354690782e-05, + "loss": 0.9365, "step": 3394 }, { - "epoch": 0.25689531232265145, - "grad_norm": 2.129502058029175, - "learning_rate": 1.5974475092184618e-05, - "loss": 0.6882, + "epoch": 0.23900035198873637, + "grad_norm": 1.5822502374649048, + "learning_rate": 1.2457289916381659e-05, + "loss": 0.6171, "step": 3395 }, { - "epoch": 0.25697098104498506, - "grad_norm": 3.0002481937408447, - "learning_rate": 1.5973051398828923e-05, - "loss": 0.7554, + "epoch": 0.23907074973600845, + "grad_norm": 1.811082124710083, + "learning_rate": 1.2456290194343091e-05, + "loss": 0.7025, "step": 3396 }, { - "epoch": 0.25704664976731867, - "grad_norm": 2.7170114517211914, - "learning_rate": 1.5971627268798193e-05, - "loss": 0.6824, + "epoch": 0.23914114748328053, + "grad_norm": 1.7411088943481445, + "learning_rate": 1.2455290188627053e-05, + "loss": 0.623, "step": 3397 }, { - "epoch": 0.2571223184896523, - "grad_norm": 2.4976844787597656, - "learning_rate": 1.5970202702181613e-05, - "loss": 0.8525, + "epoch": 0.23921154523055263, + "grad_norm": 1.8968454599380493, + "learning_rate": 1.2454289899285536e-05, + "loss": 0.7217, "step": 3398 }, { - "epoch": 0.25719798721198595, - "grad_norm": 2.3807260990142822, - "learning_rate": 1.59687776990684e-05, - "loss": 0.6943, + "epoch": 0.2392819429778247, + "grad_norm": 2.3269407749176025, + "learning_rate": 1.245328932637055e-05, + "loss": 0.7893, "step": 3399 }, { - "epoch": 0.25727365593431956, - "grad_norm": 2.146085023880005, - "learning_rate": 1.5967352259547786e-05, - "loss": 0.7723, + "epoch": 0.23935234072509678, + "grad_norm": 1.9997438192367554, + "learning_rate": 1.2452288469934113e-05, + "loss": 0.7956, "step": 3400 }, { - "epoch": 0.25734932465665317, - "grad_norm": 2.7178564071655273, - "learning_rate": 1.596592638370904e-05, - "loss": 0.6769, + "epoch": 0.2394227384723689, + "grad_norm": 1.8075226545333862, + "learning_rate": 1.2451287330028258e-05, + "loss": 0.5885, "step": 3401 }, { - "epoch": 0.2574249933789868, - "grad_norm": 2.112178325653076, - "learning_rate": 1.5964500071641446e-05, - "loss": 0.8901, + "epoch": 0.23949313621964097, + "grad_norm": 2.250140428543091, + "learning_rate": 1.2450285906705042e-05, + "loss": 0.732, "step": 3402 }, { - "epoch": 0.25750066210132044, - "grad_norm": 2.2620973587036133, - "learning_rate": 1.5963073323434336e-05, - "loss": 0.9132, + "epoch": 0.23956353396691307, + "grad_norm": 1.8022961616516113, + "learning_rate": 1.2449284200016523e-05, + "loss": 0.7618, "step": 3403 }, { - "epoch": 0.25757633082365405, - "grad_norm": 2.1333353519439697, - "learning_rate": 1.5961646139177053e-05, - "loss": 0.8134, + "epoch": 0.23963393171418515, + "grad_norm": 2.02912974357605, + "learning_rate": 1.2448282210014784e-05, + "loss": 0.6922, "step": 3404 }, { - "epoch": 0.25765199954598766, - "grad_norm": 2.5050787925720215, - "learning_rate": 1.5960218518958977e-05, - "loss": 0.863, + "epoch": 0.23970432946145723, + "grad_norm": 1.8509401082992554, + "learning_rate": 1.2447279936751918e-05, + "loss": 0.8191, "step": 3405 }, { - "epoch": 0.2577276682683213, - "grad_norm": 1.9675660133361816, - "learning_rate": 1.59587904628695e-05, - "loss": 0.8025, + "epoch": 0.23977472720872933, + "grad_norm": 2.245364189147949, + "learning_rate": 1.2446277380280034e-05, + "loss": 0.7237, "step": 3406 }, { - "epoch": 0.2578033369906549, - "grad_norm": 2.3362104892730713, - "learning_rate": 1.5957361970998056e-05, - "loss": 0.8922, + "epoch": 0.2398451249560014, + "grad_norm": 2.1921169757843018, + "learning_rate": 1.2445274540651259e-05, + "loss": 0.7648, "step": 3407 }, { - "epoch": 0.25787900571298855, - "grad_norm": 2.6554508209228516, - "learning_rate": 1.5955933043434102e-05, - "loss": 0.6258, + "epoch": 0.2399155227032735, + "grad_norm": 2.237760305404663, + "learning_rate": 1.2444271417917726e-05, + "loss": 0.6902, "step": 3408 }, { - "epoch": 0.25795467443532216, - "grad_norm": 2.41428542137146, - "learning_rate": 1.5954503680267128e-05, - "loss": 0.8198, + "epoch": 0.2399859204505456, + "grad_norm": 1.842387080192566, + "learning_rate": 1.2443268012131595e-05, + "loss": 0.7868, "step": 3409 }, { - "epoch": 0.2580303431576558, - "grad_norm": 2.5038862228393555, - "learning_rate": 1.5953073881586637e-05, - "loss": 0.7589, + "epoch": 0.24005631819781767, + "grad_norm": 2.1130874156951904, + "learning_rate": 1.2442264323345028e-05, + "loss": 0.749, "step": 3410 }, { - "epoch": 0.2581060118799894, - "grad_norm": 2.199652671813965, - "learning_rate": 1.5951643647482172e-05, - "loss": 0.6257, + "epoch": 0.24012671594508975, + "grad_norm": 2.07700514793396, + "learning_rate": 1.2441260351610211e-05, + "loss": 0.7423, "step": 3411 }, { - "epoch": 0.25818168060232305, - "grad_norm": 2.1079485416412354, - "learning_rate": 1.5950212978043294e-05, - "loss": 0.6186, + "epoch": 0.24019711369236185, + "grad_norm": 2.1439208984375, + "learning_rate": 1.244025609697934e-05, + "loss": 0.7474, "step": 3412 }, { - "epoch": 0.25825734932465666, - "grad_norm": 2.202430248260498, - "learning_rate": 1.5948781873359602e-05, - "loss": 0.7587, + "epoch": 0.24026751143963393, + "grad_norm": 1.940063714981079, + "learning_rate": 1.2439251559504626e-05, + "loss": 0.6521, "step": 3413 }, { - "epoch": 0.25833301804699027, - "grad_norm": 2.260615110397339, - "learning_rate": 1.5947350333520713e-05, - "loss": 0.7012, + "epoch": 0.240337909186906, + "grad_norm": 2.2455925941467285, + "learning_rate": 1.2438246739238301e-05, + "loss": 0.7462, "step": 3414 }, { - "epoch": 0.2584086867693239, - "grad_norm": 2.422053337097168, - "learning_rate": 1.5945918358616276e-05, - "loss": 0.9323, + "epoch": 0.2404083069341781, + "grad_norm": 1.9169799089431763, + "learning_rate": 1.2437241636232599e-05, + "loss": 0.7471, "step": 3415 }, { - "epoch": 0.25848435549165755, - "grad_norm": 2.8548550605773926, - "learning_rate": 1.5944485948735965e-05, - "loss": 0.722, + "epoch": 0.2404787046814502, + "grad_norm": 2.078136920928955, + "learning_rate": 1.243623625053978e-05, + "loss": 0.7627, "step": 3416 }, { - "epoch": 0.25856002421399116, - "grad_norm": 2.1494009494781494, - "learning_rate": 1.5943053103969484e-05, - "loss": 0.8007, + "epoch": 0.2405491024287223, + "grad_norm": 2.4264116287231445, + "learning_rate": 1.2435230582212118e-05, + "loss": 0.6697, "step": 3417 }, { - "epoch": 0.25863569293632477, - "grad_norm": 2.599536180496216, - "learning_rate": 1.594161982440656e-05, - "loss": 0.9085, + "epoch": 0.24061950017599437, + "grad_norm": 1.6818963289260864, + "learning_rate": 1.2434224631301895e-05, + "loss": 0.7181, "step": 3418 }, { - "epoch": 0.2587113616586584, - "grad_norm": 2.498399257659912, - "learning_rate": 1.5940186110136952e-05, - "loss": 0.7815, + "epoch": 0.24068989792326645, + "grad_norm": 1.7889207601547241, + "learning_rate": 1.243321839786141e-05, + "loss": 0.6375, "step": 3419 }, { - "epoch": 0.25878703038099204, - "grad_norm": 2.293846845626831, - "learning_rate": 1.593875196125044e-05, - "loss": 0.6341, + "epoch": 0.24076029567053855, + "grad_norm": 1.8570665121078491, + "learning_rate": 1.2432211881942981e-05, + "loss": 0.8331, "step": 3420 }, { - "epoch": 0.25886269910332566, - "grad_norm": 1.788888931274414, - "learning_rate": 1.593731737783684e-05, - "loss": 0.6127, + "epoch": 0.24083069341781063, + "grad_norm": 1.9837764501571655, + "learning_rate": 1.2431205083598937e-05, + "loss": 0.6778, "step": 3421 }, { - "epoch": 0.25893836782565927, - "grad_norm": 2.166012763977051, - "learning_rate": 1.5935882359985986e-05, - "loss": 0.6978, + "epoch": 0.2409010911650827, + "grad_norm": 1.9278299808502197, + "learning_rate": 1.2430198002881623e-05, + "loss": 0.6484, "step": 3422 }, { - "epoch": 0.2590140365479929, - "grad_norm": 2.089470148086548, - "learning_rate": 1.5934446907787748e-05, - "loss": 0.7217, + "epoch": 0.24097148891235481, + "grad_norm": 2.2035725116729736, + "learning_rate": 1.2429190639843395e-05, + "loss": 0.5986, "step": 3423 }, { - "epoch": 0.2590897052703265, - "grad_norm": 1.9885095357894897, - "learning_rate": 1.5933011021332015e-05, - "loss": 0.6653, + "epoch": 0.2410418866596269, + "grad_norm": 2.2721638679504395, + "learning_rate": 1.2428182994536628e-05, + "loss": 0.7489, "step": 3424 }, { - "epoch": 0.25916537399266015, - "grad_norm": 2.147557497024536, - "learning_rate": 1.5931574700708704e-05, - "loss": 0.6181, + "epoch": 0.24111228440689897, + "grad_norm": 1.8911609649658203, + "learning_rate": 1.2427175067013711e-05, + "loss": 0.7461, "step": 3425 }, { - "epoch": 0.25924104271499376, - "grad_norm": 1.9154552221298218, - "learning_rate": 1.5930137946007768e-05, - "loss": 0.7011, + "epoch": 0.24118268215417107, + "grad_norm": 2.3391382694244385, + "learning_rate": 1.2426166857327049e-05, + "loss": 0.7341, "step": 3426 }, { - "epoch": 0.2593167114373274, - "grad_norm": 1.8677332401275635, - "learning_rate": 1.592870075731918e-05, - "loss": 0.8459, + "epoch": 0.24125307990144315, + "grad_norm": 2.112961530685425, + "learning_rate": 1.2425158365529058e-05, + "loss": 0.6729, "step": 3427 }, { - "epoch": 0.259392380159661, - "grad_norm": 2.35475492477417, - "learning_rate": 1.592726313473294e-05, - "loss": 0.8357, + "epoch": 0.24132347764871523, + "grad_norm": 2.2887165546417236, + "learning_rate": 1.2424149591672169e-05, + "loss": 0.7506, "step": 3428 }, { - "epoch": 0.25946804888199465, - "grad_norm": 2.0991413593292236, - "learning_rate": 1.592582507833908e-05, - "loss": 0.715, + "epoch": 0.24139387539598733, + "grad_norm": 4.28475284576416, + "learning_rate": 1.2423140535808832e-05, + "loss": 0.7564, "step": 3429 }, { - "epoch": 0.25954371760432826, - "grad_norm": 2.366481304168701, - "learning_rate": 1.592438658822765e-05, - "loss": 0.751, + "epoch": 0.2414642731432594, + "grad_norm": 2.1913299560546875, + "learning_rate": 1.2422131197991506e-05, + "loss": 0.7901, "step": 3430 }, { - "epoch": 0.25961938632666187, - "grad_norm": 2.203183650970459, - "learning_rate": 1.5922947664488733e-05, - "loss": 0.863, + "epoch": 0.24153467089053152, + "grad_norm": 1.9419962167739868, + "learning_rate": 1.2421121578272669e-05, + "loss": 0.8179, "step": 3431 }, { - "epoch": 0.2596950550489955, - "grad_norm": 2.090794324874878, - "learning_rate": 1.5921508307212445e-05, - "loss": 0.7527, + "epoch": 0.2416050686378036, + "grad_norm": 2.0608956813812256, + "learning_rate": 1.242011167670481e-05, + "loss": 0.7502, "step": 3432 }, { - "epoch": 0.25977072377132915, - "grad_norm": 2.188838005065918, - "learning_rate": 1.592006851648892e-05, - "loss": 0.8538, + "epoch": 0.24167546638507567, + "grad_norm": 2.428006887435913, + "learning_rate": 1.2419101493340436e-05, + "loss": 0.8304, "step": 3433 }, { - "epoch": 0.25984639249366276, - "grad_norm": 2.623730182647705, - "learning_rate": 1.5918628292408323e-05, - "loss": 1.0331, + "epoch": 0.24174586413234778, + "grad_norm": 2.0818750858306885, + "learning_rate": 1.2418091028232068e-05, + "loss": 0.7005, "step": 3434 }, { - "epoch": 0.25992206121599637, - "grad_norm": 1.948943853378296, - "learning_rate": 1.591718763506084e-05, - "loss": 0.705, + "epoch": 0.24181626187961985, + "grad_norm": 2.038390874862671, + "learning_rate": 1.2417080281432242e-05, + "loss": 0.8602, "step": 3435 }, { - "epoch": 0.25999772993833, - "grad_norm": 2.0423173904418945, - "learning_rate": 1.59157465445367e-05, - "loss": 0.7737, + "epoch": 0.24188665962689193, + "grad_norm": 1.9287844896316528, + "learning_rate": 1.2416069252993505e-05, + "loss": 0.6616, "step": 3436 }, { - "epoch": 0.2600733986606636, - "grad_norm": 3.1950523853302, - "learning_rate": 1.591430502092614e-05, - "loss": 0.7861, + "epoch": 0.24195705737416404, + "grad_norm": 2.285423994064331, + "learning_rate": 1.2415057942968422e-05, + "loss": 0.709, "step": 3437 }, { - "epoch": 0.26014906738299726, - "grad_norm": 1.802661657333374, - "learning_rate": 1.5912863064319437e-05, - "loss": 0.6932, + "epoch": 0.2420274551214361, + "grad_norm": 2.359217643737793, + "learning_rate": 1.2414046351409574e-05, + "loss": 0.7657, "step": 3438 }, { - "epoch": 0.26022473610533087, - "grad_norm": 2.07924485206604, - "learning_rate": 1.591142067480689e-05, - "loss": 0.7227, + "epoch": 0.2420978528687082, + "grad_norm": 1.937303066253662, + "learning_rate": 1.241303447836955e-05, + "loss": 0.7507, "step": 3439 }, { - "epoch": 0.2603004048276645, - "grad_norm": 2.3398637771606445, - "learning_rate": 1.5909977852478826e-05, - "loss": 0.8542, + "epoch": 0.2421682506159803, + "grad_norm": 1.98556387424469, + "learning_rate": 1.2412022323900963e-05, + "loss": 0.7647, "step": 3440 }, { - "epoch": 0.2603760735499981, - "grad_norm": 2.2240166664123535, - "learning_rate": 1.5908534597425597e-05, - "loss": 0.6994, + "epoch": 0.24223864836325237, + "grad_norm": 2.7347629070281982, + "learning_rate": 1.2411009888056435e-05, + "loss": 0.6382, "step": 3441 }, { - "epoch": 0.26045174227233175, - "grad_norm": 2.661499261856079, - "learning_rate": 1.5907090909737592e-05, - "loss": 0.7015, + "epoch": 0.24230904611052445, + "grad_norm": 3.3726465702056885, + "learning_rate": 1.2409997170888602e-05, + "loss": 0.6731, "step": 3442 }, { - "epoch": 0.26052741099466536, - "grad_norm": 2.6953768730163574, - "learning_rate": 1.590564678950521e-05, - "loss": 0.8241, + "epoch": 0.24237944385779656, + "grad_norm": 2.0269618034362793, + "learning_rate": 1.2408984172450117e-05, + "loss": 0.7415, "step": 3443 }, { - "epoch": 0.260603079716999, - "grad_norm": 2.4903414249420166, - "learning_rate": 1.590420223681889e-05, - "loss": 0.648, + "epoch": 0.24244984160506863, + "grad_norm": 2.093360662460327, + "learning_rate": 1.2407970892793643e-05, + "loss": 0.6302, "step": 3444 }, { - "epoch": 0.2606787484393326, - "grad_norm": 1.9942926168441772, - "learning_rate": 1.5902757251769097e-05, - "loss": 0.7933, + "epoch": 0.24252023935234074, + "grad_norm": 1.8622691631317139, + "learning_rate": 1.2406957331971867e-05, + "loss": 0.7231, "step": 3445 }, { - "epoch": 0.26075441716166625, - "grad_norm": 2.222245931625366, - "learning_rate": 1.590131183444632e-05, - "loss": 0.8263, + "epoch": 0.24259063709961282, + "grad_norm": 1.496277093887329, + "learning_rate": 1.2405943490037485e-05, + "loss": 0.8204, "step": 3446 }, { - "epoch": 0.26083008588399986, - "grad_norm": 2.2102739810943604, - "learning_rate": 1.589986598494107e-05, - "loss": 0.7452, + "epoch": 0.2426610348468849, + "grad_norm": 2.6820340156555176, + "learning_rate": 1.2404929367043201e-05, + "loss": 0.8047, "step": 3447 }, { - "epoch": 0.2609057546063335, - "grad_norm": 2.0333497524261475, - "learning_rate": 1.5898419703343896e-05, - "loss": 0.7399, + "epoch": 0.242731432594157, + "grad_norm": 2.156318426132202, + "learning_rate": 1.2403914963041744e-05, + "loss": 0.7501, "step": 3448 }, { - "epoch": 0.2609814233286671, - "grad_norm": 3.434465169906616, - "learning_rate": 1.5896972989745372e-05, - "loss": 0.5499, + "epoch": 0.24280183034142908, + "grad_norm": 1.7875045537948608, + "learning_rate": 1.2402900278085856e-05, + "loss": 0.7269, "step": 3449 }, { - "epoch": 0.2610570920510007, - "grad_norm": 1.980613112449646, - "learning_rate": 1.589552584423609e-05, - "loss": 0.7486, + "epoch": 0.24287222808870115, + "grad_norm": 2.0446553230285645, + "learning_rate": 1.240188531222829e-05, + "loss": 0.7957, "step": 3450 }, { - "epoch": 0.26113276077333436, - "grad_norm": 2.2190256118774414, - "learning_rate": 1.5894078266906676e-05, - "loss": 0.7358, + "epoch": 0.24294262583597326, + "grad_norm": 2.034390449523926, + "learning_rate": 1.2400870065521813e-05, + "loss": 0.7847, "step": 3451 }, { - "epoch": 0.26120842949566797, - "grad_norm": 2.138643264770508, - "learning_rate": 1.5892630257847783e-05, - "loss": 0.7376, + "epoch": 0.24301302358324534, + "grad_norm": 1.8579272031784058, + "learning_rate": 1.239985453801921e-05, + "loss": 0.7371, "step": 3452 }, { - "epoch": 0.2612840982180016, - "grad_norm": 2.1247470378875732, - "learning_rate": 1.589118181715009e-05, - "loss": 0.5772, + "epoch": 0.2430834213305174, + "grad_norm": 2.498750925064087, + "learning_rate": 1.239883872977328e-05, + "loss": 0.7, "step": 3453 }, { - "epoch": 0.2613597669403352, - "grad_norm": 2.137392520904541, - "learning_rate": 1.58897329449043e-05, - "loss": 0.8956, + "epoch": 0.24315381907778952, + "grad_norm": 2.5268609523773193, + "learning_rate": 1.2397822640836834e-05, + "loss": 0.7005, "step": 3454 }, { - "epoch": 0.26143543566266886, - "grad_norm": 2.029174327850342, - "learning_rate": 1.588828364120115e-05, - "loss": 0.7217, + "epoch": 0.2432242168250616, + "grad_norm": 2.5435361862182617, + "learning_rate": 1.23968062712627e-05, + "loss": 0.8021, "step": 3455 }, { - "epoch": 0.26151110438500247, - "grad_norm": 1.8149274587631226, - "learning_rate": 1.5886833906131404e-05, - "loss": 0.9841, + "epoch": 0.24329461457233367, + "grad_norm": 1.9063462018966675, + "learning_rate": 1.2395789621103721e-05, + "loss": 0.6967, "step": 3456 }, { - "epoch": 0.2615867731073361, - "grad_norm": 2.2353672981262207, - "learning_rate": 1.588538373978584e-05, - "loss": 0.7479, + "epoch": 0.24336501231960578, + "grad_norm": 1.87315034866333, + "learning_rate": 1.2394772690412753e-05, + "loss": 0.6642, "step": 3457 }, { - "epoch": 0.2616624418296697, - "grad_norm": 2.4940500259399414, - "learning_rate": 1.5883933142255276e-05, - "loss": 0.6687, + "epoch": 0.24343541006687786, + "grad_norm": 1.8124486207962036, + "learning_rate": 1.2393755479242666e-05, + "loss": 0.7005, "step": 3458 }, { - "epoch": 0.26173811055200336, - "grad_norm": 2.020583391189575, - "learning_rate": 1.5882482113630554e-05, - "loss": 0.7496, + "epoch": 0.24350580781414996, + "grad_norm": 2.2874608039855957, + "learning_rate": 1.239273798764635e-05, + "loss": 0.751, "step": 3459 }, { - "epoch": 0.26181377927433697, - "grad_norm": 2.2853190898895264, - "learning_rate": 1.5881030654002542e-05, - "loss": 0.7181, + "epoch": 0.24357620556142204, + "grad_norm": 2.0943145751953125, + "learning_rate": 1.2391720215676698e-05, + "loss": 0.7088, "step": 3460 }, { - "epoch": 0.2618894479966706, - "grad_norm": 2.1658430099487305, - "learning_rate": 1.5879578763462135e-05, - "loss": 0.7435, + "epoch": 0.24364660330869412, + "grad_norm": 1.98458993434906, + "learning_rate": 1.2390702163386632e-05, + "loss": 0.7233, "step": 3461 }, { - "epoch": 0.2619651167190042, - "grad_norm": 2.1128828525543213, - "learning_rate": 1.5878126442100252e-05, - "loss": 0.7692, + "epoch": 0.24371700105596622, + "grad_norm": 2.516822338104248, + "learning_rate": 1.2389683830829076e-05, + "loss": 0.7617, "step": 3462 }, { - "epoch": 0.2620407854413378, - "grad_norm": 2.092738628387451, - "learning_rate": 1.5876673690007848e-05, - "loss": 0.6514, + "epoch": 0.2437873988032383, + "grad_norm": 2.354823589324951, + "learning_rate": 1.2388665218056979e-05, + "loss": 0.847, "step": 3463 }, { - "epoch": 0.26211645416367146, - "grad_norm": 2.4423155784606934, - "learning_rate": 1.587522050727589e-05, - "loss": 0.8233, + "epoch": 0.24385779655051038, + "grad_norm": 2.514420747756958, + "learning_rate": 1.2387646325123294e-05, + "loss": 0.7453, "step": 3464 }, { - "epoch": 0.2621921228860051, - "grad_norm": 2.969520092010498, - "learning_rate": 1.5873766893995392e-05, - "loss": 0.7755, + "epoch": 0.24392819429778248, + "grad_norm": 2.3035061359405518, + "learning_rate": 1.2386627152080998e-05, + "loss": 0.8553, "step": 3465 }, { - "epoch": 0.2622677916083387, - "grad_norm": 2.1669178009033203, - "learning_rate": 1.5872312850257378e-05, - "loss": 0.8578, + "epoch": 0.24399859204505456, + "grad_norm": 2.1610493659973145, + "learning_rate": 1.238560769898308e-05, + "loss": 0.797, "step": 3466 }, { - "epoch": 0.2623434603306723, - "grad_norm": 2.1200549602508545, - "learning_rate": 1.5870858376152904e-05, - "loss": 0.8403, + "epoch": 0.24406898979232663, + "grad_norm": 2.1125359535217285, + "learning_rate": 1.238458796588254e-05, + "loss": 0.7552, "step": 3467 }, { - "epoch": 0.26241912905300596, - "grad_norm": 2.184720039367676, - "learning_rate": 1.5869403471773058e-05, - "loss": 0.6616, + "epoch": 0.24413938753959874, + "grad_norm": 3.0067691802978516, + "learning_rate": 1.2383567952832393e-05, + "loss": 0.6446, "step": 3468 }, { - "epoch": 0.2624947977753396, - "grad_norm": 2.18776798248291, - "learning_rate": 1.5867948137208945e-05, - "loss": 0.8047, + "epoch": 0.24420978528687082, + "grad_norm": 2.0382766723632812, + "learning_rate": 1.2382547659885674e-05, + "loss": 0.8705, "step": 3469 }, { - "epoch": 0.2625704664976732, - "grad_norm": 2.2961819171905518, - "learning_rate": 1.5866492372551707e-05, - "loss": 0.8281, + "epoch": 0.2442801830341429, + "grad_norm": 2.0456671714782715, + "learning_rate": 1.2381527087095426e-05, + "loss": 0.6852, "step": 3470 }, { - "epoch": 0.2626461352200068, - "grad_norm": 2.440213918685913, - "learning_rate": 1.5865036177892508e-05, - "loss": 0.7852, + "epoch": 0.244350580781415, + "grad_norm": 2.405489206314087, + "learning_rate": 1.2380506234514713e-05, + "loss": 0.7191, "step": 3471 }, { - "epoch": 0.26272180394234046, - "grad_norm": 2.684682846069336, - "learning_rate": 1.586357955332254e-05, - "loss": 0.7171, + "epoch": 0.24442097852868708, + "grad_norm": 3.1922590732574463, + "learning_rate": 1.2379485102196607e-05, + "loss": 0.733, "step": 3472 }, { - "epoch": 0.26279747266467407, - "grad_norm": 2.211758613586426, - "learning_rate": 1.5862122498933016e-05, - "loss": 0.8172, + "epoch": 0.24449137627595918, + "grad_norm": 2.1326560974121094, + "learning_rate": 1.2378463690194198e-05, + "loss": 0.7582, "step": 3473 }, { - "epoch": 0.2628731413870077, - "grad_norm": 2.2629692554473877, - "learning_rate": 1.5860665014815192e-05, - "loss": 0.7832, + "epoch": 0.24456177402323126, + "grad_norm": 1.9628627300262451, + "learning_rate": 1.2377441998560592e-05, + "loss": 0.7167, "step": 3474 }, { - "epoch": 0.2629488101093413, - "grad_norm": 2.0024826526641846, - "learning_rate": 1.5859207101060336e-05, - "loss": 0.8227, + "epoch": 0.24463217177050334, + "grad_norm": 1.7776806354522705, + "learning_rate": 1.2376420027348905e-05, + "loss": 0.662, "step": 3475 }, { - "epoch": 0.2630244788316749, - "grad_norm": 2.5234375, - "learning_rate": 1.585774875775974e-05, - "loss": 0.8759, + "epoch": 0.24470256951777544, + "grad_norm": 1.7138710021972656, + "learning_rate": 1.2375397776612273e-05, + "loss": 0.6973, "step": 3476 }, { - "epoch": 0.26310014755400857, - "grad_norm": 2.879760503768921, - "learning_rate": 1.585628998500474e-05, - "loss": 0.7474, + "epoch": 0.24477296726504752, + "grad_norm": 2.143932819366455, + "learning_rate": 1.237437524640384e-05, + "loss": 0.7298, "step": 3477 }, { - "epoch": 0.2631758162763422, - "grad_norm": 2.5393941402435303, - "learning_rate": 1.5854830782886686e-05, - "loss": 0.8035, + "epoch": 0.2448433650123196, + "grad_norm": 1.8266419172286987, + "learning_rate": 1.2373352436776774e-05, + "loss": 0.7959, "step": 3478 }, { - "epoch": 0.2632514849986758, - "grad_norm": 2.9086737632751465, - "learning_rate": 1.5853371151496956e-05, - "loss": 0.7489, + "epoch": 0.2449137627595917, + "grad_norm": 1.95771062374115, + "learning_rate": 1.2372329347784247e-05, + "loss": 0.6148, "step": 3479 }, { - "epoch": 0.2633271537210094, - "grad_norm": 2.603519916534424, - "learning_rate": 1.5851911090926957e-05, - "loss": 0.8353, + "epoch": 0.24498416050686378, + "grad_norm": 2.6809232234954834, + "learning_rate": 1.2371305979479452e-05, + "loss": 0.6549, "step": 3480 }, { - "epoch": 0.26340282244334307, - "grad_norm": 3.2521355152130127, - "learning_rate": 1.5850450601268123e-05, - "loss": 0.8392, + "epoch": 0.24505455825413586, + "grad_norm": 2.029634714126587, + "learning_rate": 1.2370282331915593e-05, + "loss": 0.7339, "step": 3481 }, { - "epoch": 0.2634784911656767, - "grad_norm": 1.8981279134750366, - "learning_rate": 1.5848989682611916e-05, - "loss": 0.7701, + "epoch": 0.24512495600140796, + "grad_norm": 2.267069101333618, + "learning_rate": 1.2369258405145893e-05, + "loss": 0.8396, "step": 3482 }, { - "epoch": 0.2635541598880103, - "grad_norm": 2.0759172439575195, - "learning_rate": 1.5847528335049825e-05, - "loss": 0.8041, + "epoch": 0.24519535374868004, + "grad_norm": 2.128948450088501, + "learning_rate": 1.2368234199223585e-05, + "loss": 0.6439, "step": 3483 }, { - "epoch": 0.2636298286103439, - "grad_norm": 2.702728748321533, - "learning_rate": 1.584606655867336e-05, - "loss": 0.8192, + "epoch": 0.24526575149595212, + "grad_norm": 1.8612091541290283, + "learning_rate": 1.236720971420192e-05, + "loss": 0.7965, "step": 3484 }, { - "epoch": 0.26370549733267756, - "grad_norm": 2.305008888244629, - "learning_rate": 1.5844604353574065e-05, - "loss": 0.788, + "epoch": 0.24533614924322422, + "grad_norm": 1.9058741331100464, + "learning_rate": 1.2366184950134162e-05, + "loss": 0.6309, "step": 3485 }, { - "epoch": 0.2637811660550112, - "grad_norm": 2.113942861557007, - "learning_rate": 1.5843141719843506e-05, - "loss": 0.8344, + "epoch": 0.2454065469904963, + "grad_norm": 1.8205245733261108, + "learning_rate": 1.2365159907073586e-05, + "loss": 0.6949, "step": 3486 }, { - "epoch": 0.2638568347773448, - "grad_norm": 1.9294511079788208, - "learning_rate": 1.584167865757328e-05, - "loss": 0.6665, + "epoch": 0.2454769447377684, + "grad_norm": 1.7608234882354736, + "learning_rate": 1.236413458507349e-05, + "loss": 0.774, "step": 3487 }, { - "epoch": 0.2639325034996784, - "grad_norm": 2.4283103942871094, - "learning_rate": 1.584021516685501e-05, - "loss": 0.7448, + "epoch": 0.24554734248504048, + "grad_norm": 1.8468517065048218, + "learning_rate": 1.2363108984187179e-05, + "loss": 0.773, "step": 3488 }, { - "epoch": 0.264008172222012, - "grad_norm": 2.7982795238494873, - "learning_rate": 1.583875124778034e-05, - "loss": 0.797, + "epoch": 0.24561774023231256, + "grad_norm": 2.0087201595306396, + "learning_rate": 1.2362083104467972e-05, + "loss": 0.8329, "step": 3489 }, { - "epoch": 0.26408384094434567, - "grad_norm": 2.033411741256714, - "learning_rate": 1.5837286900440946e-05, - "loss": 0.6918, + "epoch": 0.24568813797958466, + "grad_norm": 2.244976282119751, + "learning_rate": 1.2361056945969212e-05, + "loss": 0.7534, "step": 3490 }, { - "epoch": 0.2641595096666793, - "grad_norm": 1.9575029611587524, - "learning_rate": 1.5835822124928536e-05, - "loss": 0.7613, + "epoch": 0.24575853572685674, + "grad_norm": 1.7363563776016235, + "learning_rate": 1.2360030508744243e-05, + "loss": 0.7065, "step": 3491 }, { - "epoch": 0.2642351783890129, - "grad_norm": 2.232651472091675, - "learning_rate": 1.583435692133483e-05, - "loss": 0.7767, + "epoch": 0.24582893347412882, + "grad_norm": 2.536287546157837, + "learning_rate": 1.2359003792846437e-05, + "loss": 0.6913, "step": 3492 }, { - "epoch": 0.2643108471113465, - "grad_norm": 2.3022804260253906, - "learning_rate": 1.5832891289751595e-05, - "loss": 0.6333, + "epoch": 0.24589933122140092, + "grad_norm": 3.0269908905029297, + "learning_rate": 1.2357976798329165e-05, + "loss": 0.7713, "step": 3493 }, { - "epoch": 0.26438651583368017, - "grad_norm": 2.3175253868103027, - "learning_rate": 1.58314252302706e-05, - "loss": 0.6724, + "epoch": 0.245969728968673, + "grad_norm": 1.8858305215835571, + "learning_rate": 1.2356949525245831e-05, + "loss": 0.6857, "step": 3494 }, { - "epoch": 0.2644621845560138, - "grad_norm": 2.0862104892730713, - "learning_rate": 1.5829958742983665e-05, - "loss": 0.7843, + "epoch": 0.24604012671594508, + "grad_norm": 1.8352850675582886, + "learning_rate": 1.2355921973649838e-05, + "loss": 0.6894, "step": 3495 }, { - "epoch": 0.2645378532783474, - "grad_norm": 2.384624481201172, - "learning_rate": 1.5828491827982625e-05, - "loss": 0.6976, + "epoch": 0.24611052446321718, + "grad_norm": 2.4612677097320557, + "learning_rate": 1.2354894143594612e-05, + "loss": 0.8492, "step": 3496 }, { - "epoch": 0.264613522000681, - "grad_norm": 2.224984645843506, - "learning_rate": 1.5827024485359337e-05, - "loss": 0.7435, + "epoch": 0.24618092221048926, + "grad_norm": 2.1407828330993652, + "learning_rate": 1.2353866035133589e-05, + "loss": 0.8316, "step": 3497 }, { - "epoch": 0.26468919072301467, - "grad_norm": 2.280029535293579, - "learning_rate": 1.5825556715205696e-05, - "loss": 0.7737, + "epoch": 0.24625131995776134, + "grad_norm": 1.9909467697143555, + "learning_rate": 1.2352837648320222e-05, + "loss": 0.691, "step": 3498 }, { - "epoch": 0.2647648594453483, - "grad_norm": 2.348893642425537, - "learning_rate": 1.5824088517613618e-05, - "loss": 0.7458, + "epoch": 0.24632171770503344, + "grad_norm": 1.9915881156921387, + "learning_rate": 1.2351808983207979e-05, + "loss": 0.7787, "step": 3499 }, { - "epoch": 0.2648405281676819, - "grad_norm": 2.458357334136963, - "learning_rate": 1.5822619892675042e-05, - "loss": 0.7923, + "epoch": 0.24639211545230552, + "grad_norm": 1.8780733346939087, + "learning_rate": 1.235078003985034e-05, + "loss": 0.6803, "step": 3500 }, { - "epoch": 0.2649161968900155, - "grad_norm": 2.259758710861206, - "learning_rate": 1.5821150840481944e-05, - "loss": 0.9079, + "epoch": 0.24646251319957763, + "grad_norm": 2.093095302581787, + "learning_rate": 1.2349750818300798e-05, + "loss": 0.7795, "step": 3501 }, { - "epoch": 0.2649918656123491, - "grad_norm": 2.3717095851898193, - "learning_rate": 1.5819681361126315e-05, - "loss": 0.9236, + "epoch": 0.2465329109468497, + "grad_norm": 1.8787952661514282, + "learning_rate": 1.234872131861287e-05, + "loss": 0.8047, "step": 3502 }, { - "epoch": 0.2650675343346828, - "grad_norm": 1.9060953855514526, - "learning_rate": 1.5818211454700185e-05, - "loss": 0.6778, + "epoch": 0.24660330869412178, + "grad_norm": 2.5988423824310303, + "learning_rate": 1.2347691540840073e-05, + "loss": 0.7873, "step": 3503 }, { - "epoch": 0.2651432030570164, - "grad_norm": 2.2435457706451416, - "learning_rate": 1.5816741121295602e-05, - "loss": 0.7405, + "epoch": 0.2466737064413939, + "grad_norm": 1.933863878250122, + "learning_rate": 1.2346661485035951e-05, + "loss": 0.6685, "step": 3504 }, { - "epoch": 0.26521887177935, - "grad_norm": 1.7485630512237549, - "learning_rate": 1.5815270361004638e-05, - "loss": 0.6545, + "epoch": 0.24674410418866596, + "grad_norm": 1.7685267925262451, + "learning_rate": 1.2345631151254056e-05, + "loss": 0.725, "step": 3505 }, { - "epoch": 0.2652945405016836, - "grad_norm": 2.0698351860046387, - "learning_rate": 1.5813799173919403e-05, - "loss": 0.7109, + "epoch": 0.24681450193593804, + "grad_norm": 2.110330820083618, + "learning_rate": 1.2344600539547957e-05, + "loss": 0.6366, "step": 3506 }, { - "epoch": 0.2653702092240173, - "grad_norm": 2.1378233432769775, - "learning_rate": 1.5812327560132024e-05, - "loss": 0.639, + "epoch": 0.24688489968321015, + "grad_norm": 1.9746490716934204, + "learning_rate": 1.2343569649971235e-05, + "loss": 0.7082, "step": 3507 }, { - "epoch": 0.2654458779463509, - "grad_norm": 2.570868492126465, - "learning_rate": 1.581085551973466e-05, - "loss": 0.8756, + "epoch": 0.24695529743048222, + "grad_norm": 1.9742424488067627, + "learning_rate": 1.2342538482577489e-05, + "loss": 0.8545, "step": 3508 }, { - "epoch": 0.2655215466686845, - "grad_norm": 2.893656015396118, - "learning_rate": 1.5809383052819496e-05, - "loss": 0.812, + "epoch": 0.2470256951777543, + "grad_norm": 1.9399806261062622, + "learning_rate": 1.2341507037420326e-05, + "loss": 0.6771, "step": 3509 }, { - "epoch": 0.2655972153910181, - "grad_norm": 2.2518537044525146, - "learning_rate": 1.580791015947874e-05, - "loss": 0.8163, + "epoch": 0.2470960929250264, + "grad_norm": 2.2715096473693848, + "learning_rate": 1.2340475314553376e-05, + "loss": 0.7259, "step": 3510 }, { - "epoch": 0.26567288411335177, - "grad_norm": 2.2821381092071533, - "learning_rate": 1.580643683980463e-05, - "loss": 0.6801, + "epoch": 0.24716649067229848, + "grad_norm": 1.8838448524475098, + "learning_rate": 1.2339443314030277e-05, + "loss": 0.7285, "step": 3511 }, { - "epoch": 0.2657485528356854, - "grad_norm": 2.068220376968384, - "learning_rate": 1.580496309388943e-05, - "loss": 0.8859, + "epoch": 0.24723688841957056, + "grad_norm": 2.0903069972991943, + "learning_rate": 1.2338411035904685e-05, + "loss": 0.8914, "step": 3512 }, { - "epoch": 0.265824221558019, - "grad_norm": 2.3530995845794678, - "learning_rate": 1.580348892182543e-05, - "loss": 0.8019, + "epoch": 0.24730728616684267, + "grad_norm": 2.1358842849731445, + "learning_rate": 1.2337378480230269e-05, + "loss": 0.8083, "step": 3513 }, { - "epoch": 0.2658998902803526, - "grad_norm": 2.0066163539886475, - "learning_rate": 1.580201432370495e-05, - "loss": 0.7801, + "epoch": 0.24737768391411474, + "grad_norm": 1.976032018661499, + "learning_rate": 1.2336345647060714e-05, + "loss": 0.6279, "step": 3514 }, { - "epoch": 0.2659755590026862, - "grad_norm": 2.1849310398101807, - "learning_rate": 1.5800539299620333e-05, - "loss": 0.8071, + "epoch": 0.24744808166138685, + "grad_norm": 1.8861616849899292, + "learning_rate": 1.2335312536449711e-05, + "loss": 0.7535, "step": 3515 }, { - "epoch": 0.2660512277250199, - "grad_norm": 2.290064573287964, - "learning_rate": 1.5799063849663948e-05, - "loss": 0.6413, + "epoch": 0.24751847940865893, + "grad_norm": 2.2500710487365723, + "learning_rate": 1.233427914845098e-05, + "loss": 0.7514, "step": 3516 }, { - "epoch": 0.2661268964473535, - "grad_norm": 2.0501484870910645, - "learning_rate": 1.5797587973928197e-05, - "loss": 0.6741, + "epoch": 0.247588877155931, + "grad_norm": 1.8484090566635132, + "learning_rate": 1.2333245483118246e-05, + "loss": 0.6512, "step": 3517 }, { - "epoch": 0.2662025651696871, - "grad_norm": 2.559082508087158, - "learning_rate": 1.57961116725055e-05, - "loss": 0.7717, + "epoch": 0.2476592749032031, + "grad_norm": 1.7563858032226562, + "learning_rate": 1.2332211540505248e-05, + "loss": 0.737, "step": 3518 }, { - "epoch": 0.2662782338920207, - "grad_norm": 2.377612829208374, - "learning_rate": 1.579463494548831e-05, - "loss": 0.7944, + "epoch": 0.24772967265047519, + "grad_norm": 3.28251051902771, + "learning_rate": 1.2331177320665742e-05, + "loss": 0.7263, "step": 3519 }, { - "epoch": 0.2663539026143544, - "grad_norm": 2.3611176013946533, - "learning_rate": 1.57931577929691e-05, - "loss": 0.7483, + "epoch": 0.24780007039774726, + "grad_norm": 1.5635442733764648, + "learning_rate": 1.23301428236535e-05, + "loss": 0.6404, "step": 3520 }, { - "epoch": 0.266429571336688, - "grad_norm": 1.9215701818466187, - "learning_rate": 1.5791680215040376e-05, - "loss": 0.6026, + "epoch": 0.24787046814501937, + "grad_norm": 1.873259425163269, + "learning_rate": 1.2329108049522307e-05, + "loss": 0.8464, "step": 3521 }, { - "epoch": 0.2665052400590216, - "grad_norm": 2.0787158012390137, - "learning_rate": 1.5790202211794675e-05, - "loss": 0.7157, + "epoch": 0.24794086589229145, + "grad_norm": 2.1881422996520996, + "learning_rate": 1.2328072998325957e-05, + "loss": 0.7049, "step": 3522 }, { - "epoch": 0.2665809087813552, - "grad_norm": 2.295515298843384, - "learning_rate": 1.5788723783324546e-05, - "loss": 0.7047, + "epoch": 0.24801126363956352, + "grad_norm": 1.8884719610214233, + "learning_rate": 1.2327037670118268e-05, + "loss": 0.7881, "step": 3523 }, { - "epoch": 0.2666565775036889, - "grad_norm": 1.9624741077423096, - "learning_rate": 1.5787244929722578e-05, - "loss": 0.7119, + "epoch": 0.24808166138683563, + "grad_norm": 2.238644599914551, + "learning_rate": 1.2326002064953065e-05, + "loss": 0.7523, "step": 3524 }, { - "epoch": 0.2667322462260225, - "grad_norm": 2.571765661239624, - "learning_rate": 1.5785765651081377e-05, - "loss": 0.7344, + "epoch": 0.2481520591341077, + "grad_norm": 5.130863666534424, + "learning_rate": 1.2324966182884193e-05, + "loss": 0.7299, "step": 3525 }, { - "epoch": 0.2668079149483561, - "grad_norm": 3.0177741050720215, - "learning_rate": 1.5784285947493585e-05, - "loss": 0.9012, + "epoch": 0.24822245688137978, + "grad_norm": 1.9441323280334473, + "learning_rate": 1.2323930023965506e-05, + "loss": 0.674, "step": 3526 }, { - "epoch": 0.2668835836706897, - "grad_norm": 2.370260238647461, - "learning_rate": 1.5782805819051865e-05, - "loss": 0.838, + "epoch": 0.2482928546286519, + "grad_norm": 2.6326253414154053, + "learning_rate": 1.2322893588250874e-05, + "loss": 0.8528, "step": 3527 }, { - "epoch": 0.2669592523930233, - "grad_norm": 2.122828722000122, - "learning_rate": 1.5781325265848906e-05, - "loss": 0.8281, + "epoch": 0.24836325237592397, + "grad_norm": 1.832019329071045, + "learning_rate": 1.2321856875794187e-05, + "loss": 0.6613, "step": 3528 }, { - "epoch": 0.267034921115357, - "grad_norm": 1.9243050813674927, - "learning_rate": 1.5779844287977424e-05, - "loss": 0.7285, + "epoch": 0.24843365012319607, + "grad_norm": 2.0515711307525635, + "learning_rate": 1.2320819886649338e-05, + "loss": 0.6649, "step": 3529 }, { - "epoch": 0.2671105898376906, - "grad_norm": 2.585332155227661, - "learning_rate": 1.577836288553016e-05, - "loss": 0.8387, + "epoch": 0.24850404787046815, + "grad_norm": 1.9349201917648315, + "learning_rate": 1.2319782620870245e-05, + "loss": 0.839, "step": 3530 }, { - "epoch": 0.2671862585600242, - "grad_norm": 2.6601555347442627, - "learning_rate": 1.5776881058599897e-05, - "loss": 0.7493, + "epoch": 0.24857444561774023, + "grad_norm": 1.9017839431762695, + "learning_rate": 1.2318745078510836e-05, + "loss": 0.666, "step": 3531 }, { - "epoch": 0.2672619272823578, - "grad_norm": 2.395071029663086, - "learning_rate": 1.577539880727942e-05, - "loss": 0.8335, + "epoch": 0.24864484336501233, + "grad_norm": 1.8369090557098389, + "learning_rate": 1.2317707259625053e-05, + "loss": 0.7653, "step": 3532 }, { - "epoch": 0.2673375960046915, - "grad_norm": 1.9745339155197144, - "learning_rate": 1.5773916131661553e-05, - "loss": 0.6619, + "epoch": 0.2487152411122844, + "grad_norm": 1.8751025199890137, + "learning_rate": 1.2316669164266855e-05, + "loss": 0.7156, "step": 3533 }, { - "epoch": 0.2674132647270251, - "grad_norm": 2.2702884674072266, - "learning_rate": 1.577243303183915e-05, - "loss": 0.7704, + "epoch": 0.24878563885955648, + "grad_norm": 4.131645202636719, + "learning_rate": 1.231563079249021e-05, + "loss": 0.8541, "step": 3534 }, { - "epoch": 0.2674889334493587, - "grad_norm": 2.0734710693359375, - "learning_rate": 1.5770949507905085e-05, - "loss": 0.7202, + "epoch": 0.2488560366068286, + "grad_norm": 2.667304515838623, + "learning_rate": 1.2314592144349109e-05, + "loss": 0.7617, "step": 3535 }, { - "epoch": 0.2675646021716923, - "grad_norm": 1.9147382974624634, - "learning_rate": 1.576946555995226e-05, - "loss": 0.6244, + "epoch": 0.24892643435410067, + "grad_norm": 1.8570104837417603, + "learning_rate": 1.2313553219897546e-05, + "loss": 0.727, "step": 3536 }, { - "epoch": 0.267640270894026, - "grad_norm": 2.5039188861846924, - "learning_rate": 1.576798118807361e-05, - "loss": 0.6814, + "epoch": 0.24899683210137274, + "grad_norm": 1.9895799160003662, + "learning_rate": 1.231251401918954e-05, + "loss": 0.775, "step": 3537 }, { - "epoch": 0.2677159396163596, - "grad_norm": 2.3731849193573, - "learning_rate": 1.5766496392362088e-05, - "loss": 0.7602, + "epoch": 0.24906722984864485, + "grad_norm": 1.762984275817871, + "learning_rate": 1.2311474542279118e-05, + "loss": 0.6555, "step": 3538 }, { - "epoch": 0.2677916083386932, - "grad_norm": 2.7606589794158936, - "learning_rate": 1.5765011172910676e-05, - "loss": 0.7816, + "epoch": 0.24913762759591693, + "grad_norm": 2.0270869731903076, + "learning_rate": 1.2310434789220326e-05, + "loss": 0.7039, "step": 3539 }, { - "epoch": 0.2678672770610268, - "grad_norm": 2.252899408340454, - "learning_rate": 1.576352552981238e-05, - "loss": 0.8885, + "epoch": 0.249208025343189, + "grad_norm": 2.320786237716675, + "learning_rate": 1.230939476006722e-05, + "loss": 0.7201, "step": 3540 }, { - "epoch": 0.2679429457833605, - "grad_norm": 2.393841505050659, - "learning_rate": 1.5762039463160244e-05, - "loss": 0.7985, + "epoch": 0.2492784230904611, + "grad_norm": 2.0420985221862793, + "learning_rate": 1.2308354454873869e-05, + "loss": 0.7761, "step": 3541 }, { - "epoch": 0.2680186145056941, - "grad_norm": 2.1034390926361084, - "learning_rate": 1.5760552973047324e-05, - "loss": 0.7088, + "epoch": 0.2493488208377332, + "grad_norm": 1.854521632194519, + "learning_rate": 1.2307313873694365e-05, + "loss": 0.6621, "step": 3542 }, { - "epoch": 0.2680942832280277, - "grad_norm": 2.355592727661133, - "learning_rate": 1.5759066059566708e-05, - "loss": 0.7645, + "epoch": 0.2494192185850053, + "grad_norm": 1.9656740427017212, + "learning_rate": 1.2306273016582805e-05, + "loss": 0.7318, "step": 3543 }, { - "epoch": 0.2681699519503613, - "grad_norm": 2.2355756759643555, - "learning_rate": 1.575757872281152e-05, - "loss": 0.745, + "epoch": 0.24948961633227737, + "grad_norm": 2.009167432785034, + "learning_rate": 1.2305231883593307e-05, + "loss": 0.7302, "step": 3544 }, { - "epoch": 0.2682456206726949, - "grad_norm": 2.194815158843994, - "learning_rate": 1.5756090962874887e-05, - "loss": 0.5606, + "epoch": 0.24956001407954945, + "grad_norm": 1.6242207288742065, + "learning_rate": 1.2304190474779996e-05, + "loss": 0.571, "step": 3545 }, { - "epoch": 0.2683212893950286, - "grad_norm": 2.1128334999084473, - "learning_rate": 1.5754602779849992e-05, - "loss": 0.7998, + "epoch": 0.24963041182682155, + "grad_norm": 2.229170799255371, + "learning_rate": 1.230314879019702e-05, + "loss": 0.7516, "step": 3546 }, { - "epoch": 0.2683969581173622, - "grad_norm": 2.3708901405334473, - "learning_rate": 1.5753114173830024e-05, - "loss": 0.8299, + "epoch": 0.24970080957409363, + "grad_norm": 2.073471784591675, + "learning_rate": 1.2302106829898536e-05, + "loss": 0.8663, "step": 3547 }, { - "epoch": 0.2684726268396958, - "grad_norm": 2.555021286010742, - "learning_rate": 1.5751625144908203e-05, - "loss": 0.7638, + "epoch": 0.2497712073213657, + "grad_norm": 2.1961145401000977, + "learning_rate": 1.2301064593938716e-05, + "loss": 0.6766, "step": 3548 }, { - "epoch": 0.2685482955620294, - "grad_norm": 2.081984758377075, - "learning_rate": 1.5750135693177777e-05, - "loss": 0.7852, + "epoch": 0.2498416050686378, + "grad_norm": 2.161740303039551, + "learning_rate": 1.2300022082371746e-05, + "loss": 0.8533, "step": 3549 }, { - "epoch": 0.2686239642843631, - "grad_norm": 2.4795191287994385, - "learning_rate": 1.5748645818732025e-05, - "loss": 0.7854, + "epoch": 0.2499120028159099, + "grad_norm": 2.192936420440674, + "learning_rate": 1.2298979295251827e-05, + "loss": 0.6752, "step": 3550 }, { - "epoch": 0.2686996330066967, - "grad_norm": 2.0074732303619385, - "learning_rate": 1.574715552166424e-05, - "loss": 0.8557, + "epoch": 0.24998240056318197, + "grad_norm": 2.0045647621154785, + "learning_rate": 1.2297936232633177e-05, + "loss": 0.7136, "step": 3551 }, { - "epoch": 0.2687753017290303, - "grad_norm": 2.3227956295013428, - "learning_rate": 1.5745664802067755e-05, - "loss": 0.6981, + "epoch": 0.25005279831045407, + "grad_norm": 1.9876997470855713, + "learning_rate": 1.2296892894570024e-05, + "loss": 0.7, "step": 3552 }, { - "epoch": 0.2688509704513639, - "grad_norm": 3.9149978160858154, - "learning_rate": 1.5744173660035923e-05, - "loss": 0.7036, + "epoch": 0.25012319605772615, + "grad_norm": 1.6963567733764648, + "learning_rate": 1.2295849281116612e-05, + "loss": 0.7297, "step": 3553 }, { - "epoch": 0.2689266391736976, - "grad_norm": 2.3597822189331055, - "learning_rate": 1.574268209566212e-05, - "loss": 0.7453, + "epoch": 0.2501935938049982, + "grad_norm": 2.1989288330078125, + "learning_rate": 1.22948053923272e-05, + "loss": 0.7424, "step": 3554 }, { - "epoch": 0.2690023078960312, - "grad_norm": 3.120544672012329, - "learning_rate": 1.574119010903976e-05, - "loss": 0.7463, + "epoch": 0.2502639915522703, + "grad_norm": 2.2288973331451416, + "learning_rate": 1.2293761228256061e-05, + "loss": 0.816, "step": 3555 }, { - "epoch": 0.2690779766183648, - "grad_norm": 2.2731995582580566, - "learning_rate": 1.573969770026227e-05, - "loss": 0.7326, + "epoch": 0.25033438929954244, + "grad_norm": 2.2370355129241943, + "learning_rate": 1.2292716788957479e-05, + "loss": 0.6995, "step": 3556 }, { - "epoch": 0.2691536453406984, - "grad_norm": 3.586319923400879, - "learning_rate": 1.5738204869423107e-05, - "loss": 0.7289, + "epoch": 0.2504047870468145, + "grad_norm": 1.7778412103652954, + "learning_rate": 1.229167207448576e-05, + "loss": 0.7545, "step": 3557 }, { - "epoch": 0.269229314063032, - "grad_norm": 2.4169609546661377, - "learning_rate": 1.5736711616615765e-05, - "loss": 0.8624, + "epoch": 0.2504751847940866, + "grad_norm": 1.6971001625061035, + "learning_rate": 1.2290627084895218e-05, + "loss": 0.7602, "step": 3558 }, { - "epoch": 0.2693049827853657, - "grad_norm": 2.7807676792144775, - "learning_rate": 1.5735217941933754e-05, - "loss": 0.7344, + "epoch": 0.25054558254135867, + "grad_norm": 1.8206727504730225, + "learning_rate": 1.228958182024018e-05, + "loss": 0.7818, "step": 3559 }, { - "epoch": 0.2693806515076993, - "grad_norm": 2.2362794876098633, - "learning_rate": 1.5733723845470606e-05, - "loss": 0.8274, + "epoch": 0.25061598028863075, + "grad_norm": 1.9237855672836304, + "learning_rate": 1.2288536280574994e-05, + "loss": 0.7468, "step": 3560 }, { - "epoch": 0.2694563202300329, - "grad_norm": 2.450251817703247, - "learning_rate": 1.5732229327319895e-05, - "loss": 0.7416, + "epoch": 0.2506863780359028, + "grad_norm": 1.6379793882369995, + "learning_rate": 1.2287490465954017e-05, + "loss": 0.6263, "step": 3561 }, { - "epoch": 0.2695319889523665, - "grad_norm": 2.79219126701355, - "learning_rate": 1.573073438757521e-05, - "loss": 0.6722, + "epoch": 0.25075677578317496, + "grad_norm": 1.7998038530349731, + "learning_rate": 1.2286444376431622e-05, + "loss": 0.71, "step": 3562 }, { - "epoch": 0.2696076576747002, - "grad_norm": 2.2558610439300537, - "learning_rate": 1.5729239026330167e-05, - "loss": 0.5821, + "epoch": 0.25082717353044703, + "grad_norm": 2.292454242706299, + "learning_rate": 1.2285398012062195e-05, + "loss": 0.716, "step": 3563 }, { - "epoch": 0.2696833263970338, - "grad_norm": 2.438255786895752, - "learning_rate": 1.572774324367841e-05, - "loss": 0.8141, + "epoch": 0.2508975712777191, + "grad_norm": 1.8891268968582153, + "learning_rate": 1.2284351372900141e-05, + "loss": 0.6568, "step": 3564 }, { - "epoch": 0.2697589951193674, - "grad_norm": 2.1649351119995117, - "learning_rate": 1.572624703971361e-05, - "loss": 0.6348, + "epoch": 0.2509679690249912, + "grad_norm": 1.9516745805740356, + "learning_rate": 1.2283304458999874e-05, + "loss": 0.7456, "step": 3565 }, { - "epoch": 0.269834663841701, - "grad_norm": 2.2655069828033447, - "learning_rate": 1.5724750414529466e-05, - "loss": 0.6662, + "epoch": 0.25103836677226327, + "grad_norm": 1.8960424661636353, + "learning_rate": 1.2282257270415819e-05, + "loss": 0.8633, "step": 3566 }, { - "epoch": 0.2699103325640347, - "grad_norm": 2.0114426612854004, - "learning_rate": 1.57232533682197e-05, - "loss": 0.7854, + "epoch": 0.2511087645195354, + "grad_norm": 2.001636505126953, + "learning_rate": 1.2281209807202427e-05, + "loss": 0.7063, "step": 3567 }, { - "epoch": 0.2699860012863683, - "grad_norm": 2.8764772415161133, - "learning_rate": 1.5721755900878062e-05, - "loss": 0.6343, + "epoch": 0.2511791622668075, + "grad_norm": 1.6523736715316772, + "learning_rate": 1.2280162069414154e-05, + "loss": 0.7351, "step": 3568 }, { - "epoch": 0.2700616700087019, - "grad_norm": 2.2476704120635986, - "learning_rate": 1.5720258012598332e-05, - "loss": 0.9252, + "epoch": 0.25124956001407955, + "grad_norm": 1.9434362649917603, + "learning_rate": 1.2279114057105475e-05, + "loss": 0.8072, "step": 3569 }, { - "epoch": 0.2701373387310355, - "grad_norm": 2.010709047317505, - "learning_rate": 1.5718759703474307e-05, - "loss": 0.639, + "epoch": 0.25131995776135163, + "grad_norm": 1.8666815757751465, + "learning_rate": 1.2278065770330873e-05, + "loss": 0.8127, "step": 3570 }, { - "epoch": 0.2702130074533691, - "grad_norm": 2.497084617614746, - "learning_rate": 1.571726097359982e-05, - "loss": 0.7162, + "epoch": 0.2513903555086237, + "grad_norm": 1.8874766826629639, + "learning_rate": 1.2277017209144852e-05, + "loss": 0.7102, "step": 3571 }, { - "epoch": 0.2702886761757028, - "grad_norm": 2.4472103118896484, - "learning_rate": 1.571576182306872e-05, - "loss": 0.796, + "epoch": 0.2514607532558958, + "grad_norm": 1.8481684923171997, + "learning_rate": 1.2275968373601928e-05, + "loss": 0.7358, "step": 3572 }, { - "epoch": 0.2703643448980364, - "grad_norm": 2.1636784076690674, - "learning_rate": 1.5714262251974896e-05, - "loss": 0.7544, + "epoch": 0.2515311510031679, + "grad_norm": 1.8213880062103271, + "learning_rate": 1.2274919263756628e-05, + "loss": 0.9867, "step": 3573 }, { - "epoch": 0.27044001362037, - "grad_norm": 2.9408183097839355, - "learning_rate": 1.5712762260412256e-05, - "loss": 0.9256, + "epoch": 0.25160154875044, + "grad_norm": 2.21601939201355, + "learning_rate": 1.22738698796635e-05, + "loss": 0.6903, "step": 3574 }, { - "epoch": 0.2705156823427036, - "grad_norm": 1.9260292053222656, - "learning_rate": 1.571126184847473e-05, - "loss": 0.893, + "epoch": 0.2516719464977121, + "grad_norm": 1.723459005355835, + "learning_rate": 1.2272820221377097e-05, + "loss": 0.6755, "step": 3575 }, { - "epoch": 0.2705913510650373, - "grad_norm": 3.143998861312866, - "learning_rate": 1.5709761016256277e-05, - "loss": 0.7053, + "epoch": 0.25174234424498415, + "grad_norm": 2.0670039653778076, + "learning_rate": 1.2271770288952e-05, + "loss": 0.7734, "step": 3576 }, { - "epoch": 0.2706670197873709, - "grad_norm": 2.45214581489563, - "learning_rate": 1.570825976385089e-05, - "loss": 0.7357, + "epoch": 0.25181274199225623, + "grad_norm": 2.2728521823883057, + "learning_rate": 1.227072008244279e-05, + "loss": 0.6976, "step": 3577 }, { - "epoch": 0.2707426885097045, - "grad_norm": 2.4344959259033203, - "learning_rate": 1.570675809135258e-05, - "loss": 0.7794, + "epoch": 0.25188313973952836, + "grad_norm": 1.9877723455429077, + "learning_rate": 1.226966960190407e-05, + "loss": 0.6386, "step": 3578 }, { - "epoch": 0.2708183572320381, - "grad_norm": 2.231781005859375, - "learning_rate": 1.5705255998855384e-05, - "loss": 0.8033, + "epoch": 0.25195353748680044, + "grad_norm": 2.120692014694214, + "learning_rate": 1.2268618847390453e-05, + "loss": 0.6996, "step": 3579 }, { - "epoch": 0.2708940259543718, - "grad_norm": 2.1317062377929688, - "learning_rate": 1.570375348645337e-05, - "loss": 0.7642, + "epoch": 0.2520239352340725, + "grad_norm": 2.196427583694458, + "learning_rate": 1.2267567818956572e-05, + "loss": 0.6845, "step": 3580 }, { - "epoch": 0.2709696946767054, - "grad_norm": 2.1097607612609863, - "learning_rate": 1.570225055424063e-05, - "loss": 0.6544, + "epoch": 0.2520943329813446, + "grad_norm": 2.0473389625549316, + "learning_rate": 1.226651651665707e-05, + "loss": 0.6834, "step": 3581 }, { - "epoch": 0.271045363399039, - "grad_norm": 3.234607219696045, - "learning_rate": 1.5700747202311284e-05, - "loss": 0.8878, + "epoch": 0.25216473072861667, + "grad_norm": 1.7952088117599487, + "learning_rate": 1.2265464940546602e-05, + "loss": 0.7703, "step": 3582 }, { - "epoch": 0.2711210321213726, - "grad_norm": 2.072981119155884, - "learning_rate": 1.5699243430759477e-05, - "loss": 0.692, + "epoch": 0.25223512847588875, + "grad_norm": 1.8261966705322266, + "learning_rate": 1.2264413090679845e-05, + "loss": 0.7298, "step": 3583 }, { - "epoch": 0.27119670084370623, - "grad_norm": 2.0358352661132812, - "learning_rate": 1.5697739239679374e-05, - "loss": 0.7518, + "epoch": 0.2523055262231609, + "grad_norm": 2.2234151363372803, + "learning_rate": 1.2263360967111485e-05, + "loss": 0.6328, "step": 3584 }, { - "epoch": 0.2712723695660399, - "grad_norm": 2.821546792984009, - "learning_rate": 1.569623462916518e-05, - "loss": 0.8253, + "epoch": 0.25237592397043296, + "grad_norm": 2.1191554069519043, + "learning_rate": 1.2262308569896218e-05, + "loss": 0.6018, "step": 3585 }, { - "epoch": 0.2713480382883735, - "grad_norm": 2.268690824508667, - "learning_rate": 1.569472959931111e-05, - "loss": 0.7328, + "epoch": 0.25244632171770504, + "grad_norm": 1.8102914094924927, + "learning_rate": 1.2261255899088766e-05, + "loss": 0.707, "step": 3586 }, { - "epoch": 0.2714237070107071, - "grad_norm": 2.1014082431793213, - "learning_rate": 1.5693224150211427e-05, - "loss": 0.7633, + "epoch": 0.2525167194649771, + "grad_norm": 2.1377384662628174, + "learning_rate": 1.2260202954743851e-05, + "loss": 0.7908, "step": 3587 }, { - "epoch": 0.2714993757330407, - "grad_norm": 2.4852254390716553, - "learning_rate": 1.5691718281960395e-05, - "loss": 0.6053, + "epoch": 0.2525871172122492, + "grad_norm": 2.1621434688568115, + "learning_rate": 1.2259149736916225e-05, + "loss": 0.6814, "step": 3588 }, { - "epoch": 0.2715750444553744, - "grad_norm": 2.0812735557556152, - "learning_rate": 1.569021199465232e-05, - "loss": 0.6779, + "epoch": 0.25265751495952127, + "grad_norm": 1.8907899856567383, + "learning_rate": 1.225809624566064e-05, + "loss": 0.8035, "step": 3589 }, { - "epoch": 0.271650713177708, - "grad_norm": 2.4176812171936035, - "learning_rate": 1.5688705288381533e-05, - "loss": 0.6303, + "epoch": 0.2527279127067934, + "grad_norm": 2.3993029594421387, + "learning_rate": 1.2257042481031867e-05, + "loss": 0.8514, "step": 3590 }, { - "epoch": 0.2717263819000416, - "grad_norm": 2.428131103515625, - "learning_rate": 1.5687198163242388e-05, - "loss": 0.6474, + "epoch": 0.2527983104540655, + "grad_norm": 2.0951051712036133, + "learning_rate": 1.2255988443084696e-05, + "loss": 0.7055, "step": 3591 }, { - "epoch": 0.2718020506223752, - "grad_norm": 2.6284077167510986, - "learning_rate": 1.568569061932926e-05, - "loss": 0.6688, + "epoch": 0.25286870820133756, + "grad_norm": 3.3863697052001953, + "learning_rate": 1.2254934131873926e-05, + "loss": 0.7268, "step": 3592 }, { - "epoch": 0.2718777193447089, - "grad_norm": 2.674342155456543, - "learning_rate": 1.5684182656736566e-05, - "loss": 0.7523, + "epoch": 0.25293910594860963, + "grad_norm": 2.0205612182617188, + "learning_rate": 1.225387954745437e-05, + "loss": 0.7004, "step": 3593 }, { - "epoch": 0.2719533880670425, - "grad_norm": 2.8079347610473633, - "learning_rate": 1.5682674275558734e-05, - "loss": 0.7762, + "epoch": 0.2530095036958817, + "grad_norm": 2.128711223602295, + "learning_rate": 1.2252824689880859e-05, + "loss": 0.7595, "step": 3594 }, { - "epoch": 0.2720290567893761, - "grad_norm": 2.347954273223877, - "learning_rate": 1.568116547589022e-05, - "loss": 0.7758, + "epoch": 0.25307990144315384, + "grad_norm": 1.9378221035003662, + "learning_rate": 1.2251769559208237e-05, + "loss": 0.6972, "step": 3595 }, { - "epoch": 0.2721047255117097, - "grad_norm": 2.2305753231048584, - "learning_rate": 1.567965625782552e-05, - "loss": 0.8519, + "epoch": 0.2531502991904259, + "grad_norm": 2.1935276985168457, + "learning_rate": 1.2250714155491357e-05, + "loss": 0.7147, "step": 3596 }, { - "epoch": 0.27218039423404333, - "grad_norm": 2.6532304286956787, - "learning_rate": 1.567814662145914e-05, - "loss": 0.6987, + "epoch": 0.253220696937698, + "grad_norm": 1.9349498748779297, + "learning_rate": 1.2249658478785093e-05, + "loss": 0.716, "step": 3597 }, { - "epoch": 0.272256062956377, - "grad_norm": 3.169668197631836, - "learning_rate": 1.5676636566885616e-05, - "loss": 0.7087, + "epoch": 0.2532910946849701, + "grad_norm": 2.0137217044830322, + "learning_rate": 1.224860252914433e-05, + "loss": 0.6491, "step": 3598 }, { - "epoch": 0.2723317316787106, - "grad_norm": 2.8570854663848877, - "learning_rate": 1.5675126094199516e-05, - "loss": 0.7527, + "epoch": 0.25336149243224215, + "grad_norm": 1.7060962915420532, + "learning_rate": 1.2247546306623969e-05, + "loss": 0.7156, "step": 3599 }, { - "epoch": 0.2724074004010442, - "grad_norm": 2.194880247116089, - "learning_rate": 1.567361520349543e-05, - "loss": 0.7146, + "epoch": 0.25343189017951423, + "grad_norm": 1.9797686338424683, + "learning_rate": 1.2246489811278923e-05, + "loss": 0.7125, "step": 3600 }, { - "epoch": 0.27248306912337783, - "grad_norm": 2.3188719749450684, - "learning_rate": 1.5672103894867978e-05, - "loss": 0.8719, + "epoch": 0.25350228792678636, + "grad_norm": 1.9185479879379272, + "learning_rate": 1.2245433043164118e-05, + "loss": 0.6506, "step": 3601 }, { - "epoch": 0.2725587378457115, - "grad_norm": 2.677793502807617, - "learning_rate": 1.5670592168411797e-05, - "loss": 0.7991, + "epoch": 0.25357268567405844, + "grad_norm": 2.1446027755737305, + "learning_rate": 1.2244376002334501e-05, + "loss": 0.8248, "step": 3602 }, { - "epoch": 0.2726344065680451, - "grad_norm": 2.2928788661956787, - "learning_rate": 1.566908002422156e-05, - "loss": 0.767, + "epoch": 0.2536430834213305, + "grad_norm": 2.1215415000915527, + "learning_rate": 1.2243318688845027e-05, + "loss": 0.7197, "step": 3603 }, { - "epoch": 0.2727100752903787, - "grad_norm": 2.1832549571990967, - "learning_rate": 1.566756746239196e-05, - "loss": 0.7049, + "epoch": 0.2537134811686026, + "grad_norm": 2.0447115898132324, + "learning_rate": 1.2242261102750663e-05, + "loss": 0.9078, "step": 3604 }, { - "epoch": 0.27278574401271233, - "grad_norm": 1.460695743560791, - "learning_rate": 1.5666054483017722e-05, - "loss": 0.8869, + "epoch": 0.25378387891587467, + "grad_norm": 2.075596809387207, + "learning_rate": 1.2241203244106398e-05, + "loss": 0.6857, "step": 3605 }, { - "epoch": 0.272861412735046, - "grad_norm": 2.577636241912842, - "learning_rate": 1.566454108619359e-05, - "loss": 0.7205, + "epoch": 0.2538542766631468, + "grad_norm": 1.99501633644104, + "learning_rate": 1.224014511296723e-05, + "loss": 0.7965, "step": 3606 }, { - "epoch": 0.2729370814573796, - "grad_norm": 1.833762764930725, - "learning_rate": 1.5663027272014337e-05, - "loss": 0.9315, + "epoch": 0.2539246744104189, + "grad_norm": 1.9944673776626587, + "learning_rate": 1.223908670938817e-05, + "loss": 0.757, "step": 3607 }, { - "epoch": 0.2730127501797132, - "grad_norm": 1.9866582155227661, - "learning_rate": 1.566151304057477e-05, - "loss": 0.6925, + "epoch": 0.25399507215769096, + "grad_norm": 1.9942903518676758, + "learning_rate": 1.2238028033424249e-05, + "loss": 0.7548, "step": 3608 }, { - "epoch": 0.2730884189020468, - "grad_norm": 2.173614501953125, - "learning_rate": 1.565999839196971e-05, - "loss": 0.8448, + "epoch": 0.25406546990496304, + "grad_norm": 1.9089324474334717, + "learning_rate": 1.2236969085130506e-05, + "loss": 0.7555, "step": 3609 }, { - "epoch": 0.27316408762438044, - "grad_norm": 1.7537118196487427, - "learning_rate": 1.5658483326294008e-05, - "loss": 0.6487, + "epoch": 0.2541358676522351, + "grad_norm": 2.193957805633545, + "learning_rate": 1.2235909864561998e-05, + "loss": 0.7673, "step": 3610 }, { - "epoch": 0.2732397563467141, - "grad_norm": 1.7981582880020142, - "learning_rate": 1.5656967843642544e-05, - "loss": 0.8801, + "epoch": 0.2542062653995072, + "grad_norm": 1.6902503967285156, + "learning_rate": 1.2234850371773793e-05, + "loss": 0.8814, "step": 3611 }, { - "epoch": 0.2733154250690477, - "grad_norm": 2.380213499069214, - "learning_rate": 1.5655451944110223e-05, - "loss": 0.7393, + "epoch": 0.2542766631467793, + "grad_norm": 1.7312393188476562, + "learning_rate": 1.2233790606820979e-05, + "loss": 0.7094, "step": 3612 }, { - "epoch": 0.2733910937913813, - "grad_norm": 2.3052456378936768, - "learning_rate": 1.5653935627791976e-05, - "loss": 0.8234, + "epoch": 0.2543470608940514, + "grad_norm": 1.8258726596832275, + "learning_rate": 1.2232730569758648e-05, + "loss": 0.758, "step": 3613 }, { - "epoch": 0.27346676251371493, - "grad_norm": 2.1715638637542725, - "learning_rate": 1.5652418894782755e-05, - "loss": 0.6953, + "epoch": 0.2544174586413235, + "grad_norm": 1.5053775310516357, + "learning_rate": 1.2231670260641916e-05, + "loss": 0.7025, "step": 3614 }, { - "epoch": 0.2735424312360486, - "grad_norm": 2.075761079788208, - "learning_rate": 1.565090174517755e-05, - "loss": 0.662, + "epoch": 0.25448785638859556, + "grad_norm": 1.8192514181137085, + "learning_rate": 1.223060967952591e-05, + "loss": 0.8152, "step": 3615 }, { - "epoch": 0.2736180999583822, - "grad_norm": 1.88273024559021, - "learning_rate": 1.5649384179071363e-05, - "loss": 0.6704, + "epoch": 0.25455825413586763, + "grad_norm": 2.1463520526885986, + "learning_rate": 1.2229548826465771e-05, + "loss": 0.8117, "step": 3616 }, { - "epoch": 0.2736937686807158, - "grad_norm": 2.0616722106933594, - "learning_rate": 1.5647866196559234e-05, - "loss": 0.6941, + "epoch": 0.25462865188313977, + "grad_norm": 2.010488748550415, + "learning_rate": 1.222848770151665e-05, + "loss": 0.7844, "step": 3617 }, { - "epoch": 0.27376943740304943, - "grad_norm": 2.087221145629883, - "learning_rate": 1.564634779773622e-05, - "loss": 0.866, + "epoch": 0.25469904963041184, + "grad_norm": 2.025888681411743, + "learning_rate": 1.222742630473372e-05, + "loss": 0.8161, "step": 3618 }, { - "epoch": 0.2738451061253831, - "grad_norm": 3.6184213161468506, - "learning_rate": 1.5644828982697413e-05, - "loss": 0.6817, + "epoch": 0.2547694473776839, + "grad_norm": 1.766768455505371, + "learning_rate": 1.222636463617216e-05, + "loss": 0.7012, "step": 3619 }, { - "epoch": 0.2739207748477167, - "grad_norm": 1.8233073949813843, - "learning_rate": 1.5643309751537922e-05, - "loss": 0.7882, + "epoch": 0.254839845124956, + "grad_norm": 2.0290629863739014, + "learning_rate": 1.2225302695887172e-05, + "loss": 0.6478, "step": 3620 }, { - "epoch": 0.2739964435700503, - "grad_norm": 2.140226364135742, - "learning_rate": 1.564179010435289e-05, - "loss": 0.7102, + "epoch": 0.2549102428722281, + "grad_norm": 1.7602368593215942, + "learning_rate": 1.2224240483933963e-05, + "loss": 0.7158, "step": 3621 }, { - "epoch": 0.27407211229238393, - "grad_norm": 1.8913706541061401, - "learning_rate": 1.5640270041237475e-05, - "loss": 0.7444, + "epoch": 0.25498064061950015, + "grad_norm": 2.2058844566345215, + "learning_rate": 1.2223178000367761e-05, + "loss": 0.7203, "step": 3622 }, { - "epoch": 0.27414778101471754, - "grad_norm": 2.4049935340881348, - "learning_rate": 1.5638749562286875e-05, - "loss": 0.7232, + "epoch": 0.2550510383667723, + "grad_norm": 1.779801368713379, + "learning_rate": 1.2222115245243801e-05, + "loss": 0.7155, "step": 3623 }, { - "epoch": 0.2742234497370512, - "grad_norm": 2.22857666015625, - "learning_rate": 1.5637228667596302e-05, - "loss": 0.7192, + "epoch": 0.25512143611404436, + "grad_norm": 1.961446762084961, + "learning_rate": 1.2221052218617343e-05, + "loss": 0.7905, "step": 3624 }, { - "epoch": 0.2742991184593848, - "grad_norm": 2.1087875366210938, - "learning_rate": 1.5635707357261007e-05, - "loss": 0.6453, + "epoch": 0.25519183386131644, + "grad_norm": 1.9131789207458496, + "learning_rate": 1.2219988920543652e-05, + "loss": 0.6447, "step": 3625 }, { - "epoch": 0.2743747871817184, - "grad_norm": 3.1201884746551514, - "learning_rate": 1.563418563137625e-05, - "loss": 0.7145, + "epoch": 0.2552622316085885, + "grad_norm": 1.7837936878204346, + "learning_rate": 1.2218925351078009e-05, + "loss": 0.8335, "step": 3626 }, { - "epoch": 0.27445045590405204, - "grad_norm": 2.400087356567383, - "learning_rate": 1.5632663490037334e-05, - "loss": 0.747, + "epoch": 0.2553326293558606, + "grad_norm": 1.9121971130371094, + "learning_rate": 1.2217861510275709e-05, + "loss": 0.7024, "step": 3627 }, { - "epoch": 0.2745261246263857, - "grad_norm": 2.568697214126587, - "learning_rate": 1.563114093333958e-05, - "loss": 0.7742, + "epoch": 0.2554030271031327, + "grad_norm": 1.8676936626434326, + "learning_rate": 1.2216797398192065e-05, + "loss": 0.6705, "step": 3628 }, { - "epoch": 0.2746017933487193, - "grad_norm": 2.265756607055664, - "learning_rate": 1.562961796137833e-05, - "loss": 0.8239, + "epoch": 0.2554734248504048, + "grad_norm": 1.9657680988311768, + "learning_rate": 1.2215733014882399e-05, + "loss": 0.7734, "step": 3629 }, { - "epoch": 0.2746774620710529, - "grad_norm": 2.1188042163848877, - "learning_rate": 1.5628094574248962e-05, - "loss": 0.818, + "epoch": 0.2555438225976769, + "grad_norm": 1.8178761005401611, + "learning_rate": 1.221466836040205e-05, + "loss": 0.6793, "step": 3630 }, { - "epoch": 0.27475313079338654, - "grad_norm": 2.4410367012023926, - "learning_rate": 1.562657077204687e-05, - "loss": 0.8349, + "epoch": 0.25561422034494896, + "grad_norm": 2.0654585361480713, + "learning_rate": 1.221360343480637e-05, + "loss": 0.8428, "step": 3631 }, { - "epoch": 0.2748287995157202, - "grad_norm": 2.043889284133911, - "learning_rate": 1.562504655486749e-05, - "loss": 0.7269, + "epoch": 0.25568461809222104, + "grad_norm": 2.031270742416382, + "learning_rate": 1.2212538238150727e-05, + "loss": 0.7153, "step": 3632 }, { - "epoch": 0.2749044682380538, - "grad_norm": 2.041012763977051, - "learning_rate": 1.5623521922806263e-05, - "loss": 0.7347, + "epoch": 0.2557550158394931, + "grad_norm": 2.1937756538391113, + "learning_rate": 1.22114727704905e-05, + "loss": 0.6758, "step": 3633 }, { - "epoch": 0.2749801369603874, - "grad_norm": 2.0784006118774414, - "learning_rate": 1.5621996875958668e-05, - "loss": 0.5876, + "epoch": 0.25582541358676525, + "grad_norm": 2.0769991874694824, + "learning_rate": 1.2210407031881083e-05, + "loss": 0.8937, "step": 3634 }, { - "epoch": 0.27505580568272103, - "grad_norm": 2.4200499057769775, - "learning_rate": 1.5620471414420212e-05, - "loss": 0.6634, + "epoch": 0.2558958113340373, + "grad_norm": 1.9003525972366333, + "learning_rate": 1.2209341022377884e-05, + "loss": 0.7548, "step": 3635 }, { - "epoch": 0.27513147440505464, - "grad_norm": 2.1126153469085693, - "learning_rate": 1.5618945538286423e-05, - "loss": 0.801, + "epoch": 0.2559662090813094, + "grad_norm": 2.0467958450317383, + "learning_rate": 1.220827474203633e-05, + "loss": 0.8808, "step": 3636 }, { - "epoch": 0.2752071431273883, - "grad_norm": 2.4012205600738525, - "learning_rate": 1.561741924765286e-05, - "loss": 0.7216, + "epoch": 0.2560366068285815, + "grad_norm": 1.9473772048950195, + "learning_rate": 1.2207208190911856e-05, + "loss": 0.7716, "step": 3637 }, { - "epoch": 0.2752828118497219, - "grad_norm": 2.6323201656341553, - "learning_rate": 1.5615892542615095e-05, - "loss": 0.7527, + "epoch": 0.25610700457585356, + "grad_norm": 1.8578901290893555, + "learning_rate": 1.220614136905991e-05, + "loss": 0.7106, "step": 3638 }, { - "epoch": 0.27535848057205553, - "grad_norm": 2.035029649734497, - "learning_rate": 1.5614365423268742e-05, - "loss": 0.9722, + "epoch": 0.25617740232312564, + "grad_norm": 2.170498847961426, + "learning_rate": 1.2205074276535959e-05, + "loss": 0.707, "step": 3639 }, { - "epoch": 0.27543414929438914, - "grad_norm": 1.8150397539138794, - "learning_rate": 1.561283788970943e-05, - "loss": 0.6651, + "epoch": 0.25624780007039777, + "grad_norm": 3.118760585784912, + "learning_rate": 1.2204006913395485e-05, + "loss": 0.6999, "step": 3640 }, { - "epoch": 0.2755098180167228, - "grad_norm": 2.298706293106079, - "learning_rate": 1.5611309942032827e-05, - "loss": 0.6894, + "epoch": 0.25631819781766985, + "grad_norm": 2.064303398132324, + "learning_rate": 1.2202939279693975e-05, + "loss": 0.8251, "step": 3641 }, { - "epoch": 0.2755854867390564, - "grad_norm": 2.7103264331817627, - "learning_rate": 1.5609781580334607e-05, - "loss": 0.8144, + "epoch": 0.2563885955649419, + "grad_norm": 1.745065689086914, + "learning_rate": 1.2201871375486944e-05, + "loss": 0.7316, "step": 3642 }, { - "epoch": 0.27566115546139003, - "grad_norm": 1.96636962890625, - "learning_rate": 1.560825280471049e-05, - "loss": 0.7921, + "epoch": 0.256458993312214, + "grad_norm": 1.7765566110610962, + "learning_rate": 1.2200803200829906e-05, + "loss": 0.8455, "step": 3643 }, { - "epoch": 0.27573682418372364, - "grad_norm": 2.5422213077545166, - "learning_rate": 1.5606723615256205e-05, - "loss": 0.8534, + "epoch": 0.2565293910594861, + "grad_norm": 1.8464763164520264, + "learning_rate": 1.2199734755778399e-05, + "loss": 0.7077, "step": 3644 }, { - "epoch": 0.2758124929060573, - "grad_norm": 2.5183253288269043, - "learning_rate": 1.560519401206752e-05, - "loss": 0.6715, + "epoch": 0.2565997888067582, + "grad_norm": 2.094757080078125, + "learning_rate": 1.2198666040387977e-05, + "loss": 0.8471, "step": 3645 }, { - "epoch": 0.2758881616283909, - "grad_norm": 2.7416634559631348, - "learning_rate": 1.5603663995240223e-05, - "loss": 0.7949, + "epoch": 0.2566701865540303, + "grad_norm": 1.9334124326705933, + "learning_rate": 1.2197597054714195e-05, + "loss": 0.7666, "step": 3646 }, { - "epoch": 0.2759638303507245, - "grad_norm": 2.2581331729888916, - "learning_rate": 1.5602133564870126e-05, - "loss": 0.7579, + "epoch": 0.25674058430130237, + "grad_norm": 2.005389451980591, + "learning_rate": 1.2196527798812635e-05, + "loss": 0.7991, "step": 3647 }, { - "epoch": 0.27603949907305814, - "grad_norm": 1.9897353649139404, - "learning_rate": 1.5600602721053073e-05, - "loss": 0.6848, + "epoch": 0.25681098204857444, + "grad_norm": 1.9663219451904297, + "learning_rate": 1.2195458272738892e-05, + "loss": 0.7957, "step": 3648 }, { - "epoch": 0.27611516779539175, - "grad_norm": 1.6935006380081177, - "learning_rate": 1.5599071463884927e-05, - "loss": 0.7117, + "epoch": 0.2568813797958465, + "grad_norm": 1.6510800123214722, + "learning_rate": 1.2194388476548566e-05, + "loss": 0.7383, "step": 3649 }, { - "epoch": 0.2761908365177254, - "grad_norm": 2.8586652278900146, - "learning_rate": 1.5597539793461584e-05, - "loss": 0.9308, + "epoch": 0.2569517775431186, + "grad_norm": 1.659825086593628, + "learning_rate": 1.2193318410297278e-05, + "loss": 0.691, "step": 3650 }, { - "epoch": 0.276266505240059, - "grad_norm": 1.9398143291473389, - "learning_rate": 1.5596007709878957e-05, - "loss": 0.7423, + "epoch": 0.25702217529039073, + "grad_norm": 1.9018412828445435, + "learning_rate": 1.2192248074040662e-05, + "loss": 0.6862, "step": 3651 }, { - "epoch": 0.27634217396239263, - "grad_norm": 3.198117733001709, - "learning_rate": 1.5594475213232995e-05, - "loss": 0.8414, + "epoch": 0.2570925730376628, + "grad_norm": 1.7882845401763916, + "learning_rate": 1.2191177467834369e-05, + "loss": 0.7374, "step": 3652 }, { - "epoch": 0.27641784268472624, - "grad_norm": 2.2561442852020264, - "learning_rate": 1.5592942303619667e-05, - "loss": 0.6747, + "epoch": 0.2571629707849349, + "grad_norm": 2.010503053665161, + "learning_rate": 1.2190106591734056e-05, + "loss": 0.7608, "step": 3653 }, { - "epoch": 0.2764935114070599, - "grad_norm": 1.7593903541564941, - "learning_rate": 1.5591408981134966e-05, - "loss": 0.7299, + "epoch": 0.25723336853220696, + "grad_norm": 2.0859057903289795, + "learning_rate": 1.21890354457954e-05, + "loss": 0.6963, "step": 3654 }, { - "epoch": 0.2765691801293935, - "grad_norm": 2.0782854557037354, - "learning_rate": 1.5589875245874918e-05, - "loss": 0.6619, + "epoch": 0.25730376627947904, + "grad_norm": 1.6234455108642578, + "learning_rate": 1.2187964030074095e-05, + "loss": 0.6757, "step": 3655 }, { - "epoch": 0.27664484885172713, - "grad_norm": 3.2505691051483154, - "learning_rate": 1.5588341097935565e-05, - "loss": 0.7653, + "epoch": 0.2573741640267511, + "grad_norm": 1.8284335136413574, + "learning_rate": 1.218689234462584e-05, + "loss": 0.7744, "step": 3656 }, { - "epoch": 0.27672051757406074, - "grad_norm": 2.4041876792907715, - "learning_rate": 1.5586806537412987e-05, - "loss": 0.7123, + "epoch": 0.25744456177402325, + "grad_norm": 1.8032524585723877, + "learning_rate": 1.2185820389506355e-05, + "loss": 0.7109, "step": 3657 }, { - "epoch": 0.2767961862963944, - "grad_norm": 2.018611431121826, - "learning_rate": 1.5585271564403276e-05, - "loss": 0.6393, + "epoch": 0.25751495952129533, + "grad_norm": 2.4541232585906982, + "learning_rate": 1.218474816477137e-05, + "loss": 0.7604, "step": 3658 }, { - "epoch": 0.276871855018728, - "grad_norm": 2.0278730392456055, - "learning_rate": 1.558373617900256e-05, - "loss": 0.7515, + "epoch": 0.2575853572685674, + "grad_norm": 1.5970820188522339, + "learning_rate": 1.2183675670476632e-05, + "loss": 0.6709, "step": 3659 }, { - "epoch": 0.27694752374106163, - "grad_norm": 2.383406400680542, - "learning_rate": 1.558220038130699e-05, - "loss": 0.7467, + "epoch": 0.2576557550158395, + "grad_norm": 3.390453338623047, + "learning_rate": 1.2182602906677904e-05, + "loss": 0.7079, "step": 3660 }, { - "epoch": 0.27702319246339524, - "grad_norm": 2.429568290710449, - "learning_rate": 1.5580664171412743e-05, - "loss": 0.7286, + "epoch": 0.25772615276311156, + "grad_norm": 2.0504367351531982, + "learning_rate": 1.2181529873430954e-05, + "loss": 0.8135, "step": 3661 }, { - "epoch": 0.2770988611857289, - "grad_norm": 2.291583776473999, - "learning_rate": 1.5579127549416024e-05, - "loss": 0.7208, + "epoch": 0.2577965505103837, + "grad_norm": 1.7983391284942627, + "learning_rate": 1.2180456570791575e-05, + "loss": 0.763, "step": 3662 }, { - "epoch": 0.2771745299080625, - "grad_norm": 2.344414710998535, - "learning_rate": 1.5577590515413054e-05, - "loss": 0.7562, + "epoch": 0.25786694825765577, + "grad_norm": 1.9128599166870117, + "learning_rate": 1.2179382998815566e-05, + "loss": 0.7664, "step": 3663 }, { - "epoch": 0.2772501986303961, - "grad_norm": 2.6295807361602783, - "learning_rate": 1.5576053069500093e-05, - "loss": 0.7099, + "epoch": 0.25793734600492785, + "grad_norm": 1.8708971738815308, + "learning_rate": 1.2178309157558744e-05, + "loss": 0.713, "step": 3664 }, { - "epoch": 0.27732586735272974, - "grad_norm": 2.575855016708374, - "learning_rate": 1.557451521177342e-05, - "loss": 0.8234, + "epoch": 0.2580077437521999, + "grad_norm": 1.947878360748291, + "learning_rate": 1.2177235047076941e-05, + "loss": 0.788, "step": 3665 }, { - "epoch": 0.27740153607506335, - "grad_norm": 2.7682507038116455, - "learning_rate": 1.557297694232934e-05, - "loss": 0.8003, + "epoch": 0.258078141499472, + "grad_norm": 3.9084458351135254, + "learning_rate": 1.2176160667426e-05, + "loss": 0.7831, "step": 3666 }, { - "epoch": 0.277477204797397, - "grad_norm": 2.386277675628662, - "learning_rate": 1.5571438261264184e-05, - "loss": 0.8584, + "epoch": 0.2581485392467441, + "grad_norm": 2.0382204055786133, + "learning_rate": 1.2175086018661777e-05, + "loss": 0.6309, "step": 3667 }, { - "epoch": 0.2775528735197306, - "grad_norm": 2.271766424179077, - "learning_rate": 1.5569899168674308e-05, - "loss": 0.7653, + "epoch": 0.2582189369940162, + "grad_norm": 2.050055980682373, + "learning_rate": 1.2174011100840144e-05, + "loss": 0.8068, "step": 3668 }, { - "epoch": 0.27762854224206424, - "grad_norm": 2.1660287380218506, - "learning_rate": 1.55683596646561e-05, - "loss": 0.845, + "epoch": 0.2582893347412883, + "grad_norm": 1.9449944496154785, + "learning_rate": 1.2172935914016988e-05, + "loss": 0.8862, "step": 3669 }, { - "epoch": 0.27770421096439785, - "grad_norm": 2.2329893112182617, - "learning_rate": 1.5566819749305962e-05, - "loss": 0.7212, + "epoch": 0.25835973248856037, + "grad_norm": 1.9053329229354858, + "learning_rate": 1.217186045824821e-05, + "loss": 0.7344, "step": 3670 }, { - "epoch": 0.2777798796867315, - "grad_norm": 2.5628223419189453, - "learning_rate": 1.5565279422720335e-05, - "loss": 0.919, + "epoch": 0.25843013023583244, + "grad_norm": 2.67734956741333, + "learning_rate": 1.2170784733589721e-05, + "loss": 0.8082, "step": 3671 }, { - "epoch": 0.2778555484090651, - "grad_norm": 2.2546982765197754, - "learning_rate": 1.556373868499567e-05, - "loss": 0.6941, + "epoch": 0.2585005279831045, + "grad_norm": 2.374748945236206, + "learning_rate": 1.2169708740097453e-05, + "loss": 0.7762, "step": 3672 }, { - "epoch": 0.27793121713139873, - "grad_norm": 2.420389413833618, - "learning_rate": 1.556219753622846e-05, - "loss": 0.7521, + "epoch": 0.25857092573037666, + "grad_norm": 1.810116171836853, + "learning_rate": 1.2168632477827343e-05, + "loss": 0.6859, "step": 3673 }, { - "epoch": 0.27800688585373234, - "grad_norm": 1.9865230321884155, - "learning_rate": 1.556065597651522e-05, - "loss": 0.775, + "epoch": 0.25864132347764873, + "grad_norm": 2.3427369594573975, + "learning_rate": 1.2167555946835352e-05, + "loss": 0.7924, "step": 3674 }, { - "epoch": 0.278082554576066, - "grad_norm": 2.0972037315368652, - "learning_rate": 1.5559114005952483e-05, - "loss": 0.72, + "epoch": 0.2587117212249208, + "grad_norm": 2.621683359146118, + "learning_rate": 1.2166479147177447e-05, + "loss": 0.7198, "step": 3675 }, { - "epoch": 0.2781582232983996, - "grad_norm": 3.4112987518310547, - "learning_rate": 1.555757162463681e-05, - "loss": 0.7276, + "epoch": 0.2587821189721929, + "grad_norm": 2.1187007427215576, + "learning_rate": 1.216540207890961e-05, + "loss": 0.8507, "step": 3676 }, { - "epoch": 0.27823389202073323, - "grad_norm": 2.490372896194458, - "learning_rate": 1.5556028832664793e-05, - "loss": 0.7068, + "epoch": 0.25885251671946496, + "grad_norm": 2.59725284576416, + "learning_rate": 1.2164324742087844e-05, + "loss": 0.7981, "step": 3677 }, { - "epoch": 0.27830956074306684, - "grad_norm": 2.6258339881896973, - "learning_rate": 1.5554485630133045e-05, - "loss": 0.801, + "epoch": 0.25892291446673704, + "grad_norm": 1.448885202407837, + "learning_rate": 1.2163247136768154e-05, + "loss": 0.84, "step": 3678 }, { - "epoch": 0.27838522946540045, - "grad_norm": 1.8640928268432617, - "learning_rate": 1.5552942017138204e-05, - "loss": 0.8175, + "epoch": 0.2589933122140092, + "grad_norm": 1.931820034980774, + "learning_rate": 1.2162169263006573e-05, + "loss": 0.7667, "step": 3679 }, { - "epoch": 0.2784608981877341, - "grad_norm": 3.355138063430786, - "learning_rate": 1.5551397993776943e-05, - "loss": 0.7382, + "epoch": 0.25906370996128125, + "grad_norm": 2.0883514881134033, + "learning_rate": 1.2161091120859133e-05, + "loss": 0.7178, "step": 3680 }, { - "epoch": 0.27853656691006773, - "grad_norm": 2.3146259784698486, - "learning_rate": 1.554985356014595e-05, - "loss": 0.6254, + "epoch": 0.25913410770855333, + "grad_norm": 1.8008736371994019, + "learning_rate": 1.2160012710381896e-05, + "loss": 0.8101, "step": 3681 }, { - "epoch": 0.27861223563240134, - "grad_norm": 1.9555891752243042, - "learning_rate": 1.5548308716341944e-05, - "loss": 0.6862, + "epoch": 0.2592045054558254, + "grad_norm": 1.9086214303970337, + "learning_rate": 1.215893403163092e-05, + "loss": 0.6925, "step": 3682 }, { - "epoch": 0.27868790435473495, - "grad_norm": 2.361454725265503, - "learning_rate": 1.554676346246166e-05, - "loss": 0.6829, + "epoch": 0.2592749032030975, + "grad_norm": 1.7321678400039673, + "learning_rate": 1.2157855084662294e-05, + "loss": 0.6544, "step": 3683 }, { - "epoch": 0.2787635730770686, - "grad_norm": 2.4104437828063965, - "learning_rate": 1.5545217798601878e-05, - "loss": 0.6886, + "epoch": 0.25934530095036956, + "grad_norm": 2.0167829990386963, + "learning_rate": 1.2156775869532113e-05, + "loss": 0.7089, "step": 3684 }, { - "epoch": 0.2788392417994022, - "grad_norm": 2.2208261489868164, - "learning_rate": 1.5543671724859387e-05, - "loss": 0.6815, + "epoch": 0.2594156986976417, + "grad_norm": 1.9747283458709717, + "learning_rate": 1.2155696386296482e-05, + "loss": 0.6809, "step": 3685 }, { - "epoch": 0.27891491052173584, - "grad_norm": 2.3924684524536133, - "learning_rate": 1.5542125241331006e-05, - "loss": 0.8088, + "epoch": 0.25948609644491377, + "grad_norm": 1.6969846487045288, + "learning_rate": 1.2154616635011526e-05, + "loss": 0.6319, "step": 3686 }, { - "epoch": 0.27899057924406945, - "grad_norm": 2.2955551147460938, - "learning_rate": 1.5540578348113585e-05, - "loss": 0.8344, + "epoch": 0.25955649419218585, + "grad_norm": 1.6829713582992554, + "learning_rate": 1.2153536615733384e-05, + "loss": 0.8263, "step": 3687 }, { - "epoch": 0.2790662479664031, - "grad_norm": 2.611711025238037, - "learning_rate": 1.553903104530399e-05, - "loss": 0.7999, + "epoch": 0.2596268919394579, + "grad_norm": 1.9917230606079102, + "learning_rate": 1.2152456328518207e-05, + "loss": 0.7629, "step": 3688 }, { - "epoch": 0.2791419166887367, - "grad_norm": 2.359159231185913, - "learning_rate": 1.5537483332999123e-05, - "loss": 0.7241, + "epoch": 0.25969728968673, + "grad_norm": 2.5854506492614746, + "learning_rate": 1.2151375773422156e-05, + "loss": 0.762, "step": 3689 }, { - "epoch": 0.27921758541107033, - "grad_norm": 2.494488000869751, - "learning_rate": 1.5535935211295906e-05, - "loss": 0.7985, + "epoch": 0.25976768743400214, + "grad_norm": 2.1879148483276367, + "learning_rate": 1.2150294950501415e-05, + "loss": 0.732, "step": 3690 }, { - "epoch": 0.27929325413340395, - "grad_norm": 2.4356842041015625, - "learning_rate": 1.5534386680291286e-05, - "loss": 0.8035, + "epoch": 0.2598380851812742, + "grad_norm": 2.0332705974578857, + "learning_rate": 1.2149213859812177e-05, + "loss": 0.6572, "step": 3691 }, { - "epoch": 0.27936892285573756, - "grad_norm": 2.5868098735809326, - "learning_rate": 1.5532837740082237e-05, - "loss": 0.9104, + "epoch": 0.2599084829285463, + "grad_norm": 2.1148712635040283, + "learning_rate": 1.2148132501410644e-05, + "loss": 0.8495, "step": 3692 }, { - "epoch": 0.2794445915780712, - "grad_norm": 2.140434741973877, - "learning_rate": 1.5531288390765757e-05, - "loss": 0.7656, + "epoch": 0.25997888067581837, + "grad_norm": 1.7611753940582275, + "learning_rate": 1.214705087535304e-05, + "loss": 0.7015, "step": 3693 }, { - "epoch": 0.27952026030040483, - "grad_norm": 2.476284980773926, - "learning_rate": 1.5529738632438873e-05, - "loss": 0.7472, + "epoch": 0.26004927842309045, + "grad_norm": 1.8501012325286865, + "learning_rate": 1.2145968981695602e-05, + "loss": 0.6826, "step": 3694 }, { - "epoch": 0.27959592902273844, - "grad_norm": 2.0590710639953613, - "learning_rate": 1.552818846519864e-05, - "loss": 0.7425, + "epoch": 0.2601196761703625, + "grad_norm": 2.083048105239868, + "learning_rate": 1.2144886820494575e-05, + "loss": 0.6461, "step": 3695 }, { - "epoch": 0.27967159774507205, - "grad_norm": 1.7913882732391357, - "learning_rate": 1.552663788914213e-05, - "loss": 0.7771, + "epoch": 0.26019007391763466, + "grad_norm": 1.6914079189300537, + "learning_rate": 1.2143804391806223e-05, + "loss": 0.715, "step": 3696 }, { - "epoch": 0.2797472664674057, - "grad_norm": 2.3980860710144043, - "learning_rate": 1.552508690436644e-05, - "loss": 0.8346, + "epoch": 0.26026047166490673, + "grad_norm": 2.032029390335083, + "learning_rate": 1.2142721695686822e-05, + "loss": 0.6681, "step": 3697 }, { - "epoch": 0.27982293518973933, - "grad_norm": 2.179081678390503, - "learning_rate": 1.552353551096871e-05, - "loss": 0.7082, + "epoch": 0.2603308694121788, + "grad_norm": 1.8269295692443848, + "learning_rate": 1.2141638732192662e-05, + "loss": 0.7979, "step": 3698 }, { - "epoch": 0.27989860391207294, - "grad_norm": 2.324483871459961, - "learning_rate": 1.5521983709046084e-05, - "loss": 0.6597, + "epoch": 0.2604012671594509, + "grad_norm": 2.2394490242004395, + "learning_rate": 1.2140555501380047e-05, + "loss": 0.7328, "step": 3699 }, { - "epoch": 0.27997427263440655, - "grad_norm": 3.7304763793945312, - "learning_rate": 1.5520431498695743e-05, - "loss": 0.6375, + "epoch": 0.26047166490672297, + "grad_norm": 2.1288082599639893, + "learning_rate": 1.2139472003305297e-05, + "loss": 0.7716, "step": 3700 }, { - "epoch": 0.2800499413567402, - "grad_norm": 2.4392082691192627, - "learning_rate": 1.5518878880014894e-05, - "loss": 0.6816, + "epoch": 0.2605420626539951, + "grad_norm": 1.8778785467147827, + "learning_rate": 1.2138388238024743e-05, + "loss": 0.6622, "step": 3701 }, { - "epoch": 0.28012561007907383, - "grad_norm": 2.4349966049194336, - "learning_rate": 1.5517325853100762e-05, - "loss": 0.79, + "epoch": 0.2606124604012672, + "grad_norm": 1.691753625869751, + "learning_rate": 1.2137304205594731e-05, + "loss": 0.7475, "step": 3702 }, { - "epoch": 0.28020127880140744, - "grad_norm": 2.9991888999938965, - "learning_rate": 1.5515772418050605e-05, - "loss": 0.6722, + "epoch": 0.26068285814853925, + "grad_norm": 1.926369309425354, + "learning_rate": 1.2136219906071619e-05, + "loss": 0.7806, "step": 3703 }, { - "epoch": 0.28027694752374105, - "grad_norm": 2.6778488159179688, - "learning_rate": 1.5514218574961706e-05, - "loss": 0.6987, + "epoch": 0.26075325589581133, + "grad_norm": 1.7346218824386597, + "learning_rate": 1.2135135339511785e-05, + "loss": 0.8122, "step": 3704 }, { - "epoch": 0.28035261624607466, - "grad_norm": 2.7935636043548584, - "learning_rate": 1.5512664323931372e-05, - "loss": 0.7712, + "epoch": 0.2608236536430834, + "grad_norm": 1.8292080163955688, + "learning_rate": 1.2134050505971614e-05, + "loss": 0.7029, "step": 3705 }, { - "epoch": 0.2804282849684083, - "grad_norm": 2.4512863159179688, - "learning_rate": 1.5511109665056934e-05, - "loss": 0.7726, + "epoch": 0.2608940513903555, + "grad_norm": 2.1288793087005615, + "learning_rate": 1.2132965405507506e-05, + "loss": 0.7025, "step": 3706 }, { - "epoch": 0.28050395369074194, - "grad_norm": 2.603178024291992, - "learning_rate": 1.5509554598435745e-05, - "loss": 0.6195, + "epoch": 0.2609644491376276, + "grad_norm": 3.117194890975952, + "learning_rate": 1.2131880038175878e-05, + "loss": 0.6909, "step": 3707 }, { - "epoch": 0.28057962241307555, - "grad_norm": 2.1387760639190674, - "learning_rate": 1.5507999124165196e-05, - "loss": 0.6634, + "epoch": 0.2610348468848997, + "grad_norm": 2.2483811378479004, + "learning_rate": 1.213079440403316e-05, + "loss": 0.7074, "step": 3708 }, { - "epoch": 0.28065529113540916, - "grad_norm": 2.0796477794647217, - "learning_rate": 1.550644324234269e-05, - "loss": 0.6855, + "epoch": 0.2611052446321718, + "grad_norm": 2.098398208618164, + "learning_rate": 1.2129708503135794e-05, + "loss": 0.7517, "step": 3709 }, { - "epoch": 0.2807309598577428, - "grad_norm": 2.175611734390259, - "learning_rate": 1.5504886953065666e-05, - "loss": 0.7012, + "epoch": 0.26117564237944385, + "grad_norm": 1.899491310119629, + "learning_rate": 1.2128622335540236e-05, + "loss": 0.7624, "step": 3710 }, { - "epoch": 0.28080662858007643, - "grad_norm": 2.2386960983276367, - "learning_rate": 1.550333025643158e-05, - "loss": 0.7295, + "epoch": 0.26124604012671593, + "grad_norm": 2.050945281982422, + "learning_rate": 1.212753590130296e-05, + "loss": 0.7106, "step": 3711 }, { - "epoch": 0.28088229730241004, - "grad_norm": 4.274651527404785, - "learning_rate": 1.5501773152537922e-05, - "loss": 0.7778, + "epoch": 0.261316437873988, + "grad_norm": 2.3775253295898438, + "learning_rate": 1.2126449200480446e-05, + "loss": 0.6898, "step": 3712 }, { - "epoch": 0.28095796602474365, - "grad_norm": 2.236834764480591, - "learning_rate": 1.5500215641482197e-05, - "loss": 0.8035, + "epoch": 0.26138683562126014, + "grad_norm": 1.7274373769760132, + "learning_rate": 1.2125362233129197e-05, + "loss": 0.7586, "step": 3713 }, { - "epoch": 0.2810336347470773, - "grad_norm": 1.7886241674423218, - "learning_rate": 1.5498657723361946e-05, - "loss": 0.9152, + "epoch": 0.2614572333685322, + "grad_norm": 1.8184096813201904, + "learning_rate": 1.2124274999305724e-05, + "loss": 0.6976, "step": 3714 }, { - "epoch": 0.28110930346941093, - "grad_norm": 2.0251529216766357, - "learning_rate": 1.5497099398274727e-05, - "loss": 0.7473, + "epoch": 0.2615276311158043, + "grad_norm": 1.883055567741394, + "learning_rate": 1.2123187499066555e-05, + "loss": 0.7893, "step": 3715 }, { - "epoch": 0.28118497219174454, - "grad_norm": 2.066375970840454, - "learning_rate": 1.5495540666318133e-05, - "loss": 0.806, + "epoch": 0.26159802886307637, + "grad_norm": 1.592800259590149, + "learning_rate": 1.2122099732468222e-05, + "loss": 0.6859, "step": 3716 }, { - "epoch": 0.28126064091407815, - "grad_norm": 2.595116138458252, - "learning_rate": 1.5493981527589768e-05, - "loss": 0.9727, + "epoch": 0.26166842661034845, + "grad_norm": 2.1218338012695312, + "learning_rate": 1.212101169956729e-05, + "loss": 0.7998, "step": 3717 }, { - "epoch": 0.28133630963641176, - "grad_norm": 2.0963737964630127, - "learning_rate": 1.549242198218728e-05, - "loss": 0.7616, + "epoch": 0.2617388243576206, + "grad_norm": 1.7491270303726196, + "learning_rate": 1.2119923400420321e-05, + "loss": 0.6695, "step": 3718 }, { - "epoch": 0.28141197835874543, - "grad_norm": 2.7268869876861572, - "learning_rate": 1.5490862030208326e-05, - "loss": 0.75, + "epoch": 0.26180922210489266, + "grad_norm": 1.8466252088546753, + "learning_rate": 1.2118834835083897e-05, + "loss": 0.7948, "step": 3719 }, { - "epoch": 0.28148764708107904, - "grad_norm": 2.4957454204559326, - "learning_rate": 1.5489301671750602e-05, - "loss": 0.7749, + "epoch": 0.26187961985216474, + "grad_norm": 2.3268167972564697, + "learning_rate": 1.2117746003614613e-05, + "loss": 0.8033, "step": 3720 }, { - "epoch": 0.28156331580341265, - "grad_norm": 2.260963201522827, - "learning_rate": 1.5487740906911814e-05, - "loss": 0.7527, + "epoch": 0.2619500175994368, + "grad_norm": 2.289970874786377, + "learning_rate": 1.2116656906069082e-05, + "loss": 0.8024, "step": 3721 }, { - "epoch": 0.28163898452574626, - "grad_norm": 2.0017857551574707, - "learning_rate": 1.5486179735789708e-05, - "loss": 0.5984, + "epoch": 0.2620204153467089, + "grad_norm": 1.7626911401748657, + "learning_rate": 1.2115567542503923e-05, + "loss": 0.6478, "step": 3722 }, { - "epoch": 0.2817146532480799, - "grad_norm": 1.8426270484924316, - "learning_rate": 1.548461815848205e-05, - "loss": 0.7738, + "epoch": 0.26209081309398097, + "grad_norm": 1.5600993633270264, + "learning_rate": 1.2114477912975774e-05, + "loss": 0.7786, "step": 3723 }, { - "epoch": 0.28179032197041354, - "grad_norm": 2.3114354610443115, - "learning_rate": 1.5483056175086624e-05, - "loss": 0.6493, + "epoch": 0.2621612108412531, + "grad_norm": 2.1563470363616943, + "learning_rate": 1.2113388017541287e-05, + "loss": 0.7015, "step": 3724 }, { - "epoch": 0.28186599069274715, - "grad_norm": 2.0738372802734375, - "learning_rate": 1.5481493785701255e-05, - "loss": 0.7372, + "epoch": 0.2622316085885252, + "grad_norm": 1.7461328506469727, + "learning_rate": 1.2112297856257127e-05, + "loss": 0.7389, "step": 3725 }, { - "epoch": 0.28194165941508076, - "grad_norm": 2.051266670227051, - "learning_rate": 1.547993099042378e-05, - "loss": 0.6902, + "epoch": 0.26230200633579726, + "grad_norm": 2.337660312652588, + "learning_rate": 1.211120742917997e-05, + "loss": 0.7526, "step": 3726 }, { - "epoch": 0.2820173281374144, - "grad_norm": 2.181405544281006, - "learning_rate": 1.547836778935207e-05, - "loss": 0.7094, + "epoch": 0.26237240408306933, + "grad_norm": 2.4296700954437256, + "learning_rate": 1.211011673636651e-05, + "loss": 0.7214, "step": 3727 }, { - "epoch": 0.28209299685974804, - "grad_norm": 2.6144001483917236, - "learning_rate": 1.5476804182584015e-05, - "loss": 0.7553, + "epoch": 0.2624428018303414, + "grad_norm": 1.8229262828826904, + "learning_rate": 1.2109025777873453e-05, + "loss": 0.8164, "step": 3728 }, { - "epoch": 0.28216866558208165, - "grad_norm": 2.2755348682403564, - "learning_rate": 1.5475240170217532e-05, - "loss": 0.7335, + "epoch": 0.26251319957761354, + "grad_norm": 1.97089684009552, + "learning_rate": 1.210793455375752e-05, + "loss": 0.6732, "step": 3729 }, { - "epoch": 0.28224433430441526, - "grad_norm": 2.2966911792755127, - "learning_rate": 1.547367575235057e-05, - "loss": 0.7893, + "epoch": 0.2625835973248856, + "grad_norm": 1.6465404033660889, + "learning_rate": 1.2106843064075442e-05, + "loss": 0.8022, "step": 3730 }, { - "epoch": 0.28232000302674887, - "grad_norm": 2.246685266494751, - "learning_rate": 1.547211092908109e-05, - "loss": 0.7984, + "epoch": 0.2626539950721577, + "grad_norm": 1.770046353340149, + "learning_rate": 1.2105751308883969e-05, + "loss": 0.7028, "step": 3731 }, { - "epoch": 0.28239567174908253, - "grad_norm": 2.2465322017669678, - "learning_rate": 1.547054570050709e-05, - "loss": 0.8449, + "epoch": 0.2627243928194298, + "grad_norm": 1.7353830337524414, + "learning_rate": 1.2104659288239861e-05, + "loss": 0.7164, "step": 3732 }, { - "epoch": 0.28247134047141614, - "grad_norm": 2.385525703430176, - "learning_rate": 1.546898006672659e-05, - "loss": 0.7544, + "epoch": 0.26279479056670185, + "grad_norm": 1.826967477798462, + "learning_rate": 1.2103567002199892e-05, + "loss": 0.7115, "step": 3733 }, { - "epoch": 0.28254700919374975, - "grad_norm": 2.5891146659851074, - "learning_rate": 1.5467414027837638e-05, - "loss": 0.7069, + "epoch": 0.26286518831397393, + "grad_norm": 2.060905933380127, + "learning_rate": 1.2102474450820854e-05, + "loss": 0.7061, "step": 3734 }, { - "epoch": 0.28262267791608336, - "grad_norm": 3.729670763015747, - "learning_rate": 1.54658475839383e-05, - "loss": 0.8181, + "epoch": 0.26293558606124606, + "grad_norm": 1.6632068157196045, + "learning_rate": 1.2101381634159548e-05, + "loss": 0.7337, "step": 3735 }, { - "epoch": 0.28269834663841703, - "grad_norm": 2.792766571044922, - "learning_rate": 1.546428073512667e-05, - "loss": 0.7421, + "epoch": 0.26300598380851814, + "grad_norm": 1.8711063861846924, + "learning_rate": 1.210028855227279e-05, + "loss": 0.7993, "step": 3736 }, { - "epoch": 0.28277401536075064, - "grad_norm": 1.9378312826156616, - "learning_rate": 1.5462713481500875e-05, - "loss": 0.4965, + "epoch": 0.2630763815557902, + "grad_norm": 2.184187650680542, + "learning_rate": 1.2099195205217413e-05, + "loss": 0.6394, "step": 3737 }, { - "epoch": 0.28284968408308425, - "grad_norm": 2.495208978652954, - "learning_rate": 1.5461145823159063e-05, - "loss": 0.6608, + "epoch": 0.2631467793030623, + "grad_norm": 1.815587043762207, + "learning_rate": 1.2098101593050258e-05, + "loss": 0.8144, "step": 3738 }, { - "epoch": 0.28292535280541786, - "grad_norm": 2.5819809436798096, - "learning_rate": 1.5459577760199396e-05, - "loss": 0.6334, + "epoch": 0.26321717705033437, + "grad_norm": 1.557524561882019, + "learning_rate": 1.2097007715828184e-05, + "loss": 0.6818, "step": 3739 }, { - "epoch": 0.28300102152775153, - "grad_norm": 2.375948667526245, - "learning_rate": 1.5458009292720077e-05, - "loss": 0.7322, + "epoch": 0.26328757479760645, + "grad_norm": 1.8915354013442993, + "learning_rate": 1.2095913573608065e-05, + "loss": 0.7305, "step": 3740 }, { - "epoch": 0.28307669025008514, - "grad_norm": 2.2175607681274414, - "learning_rate": 1.545644042081933e-05, - "loss": 0.9031, + "epoch": 0.2633579725448786, + "grad_norm": 2.67863392829895, + "learning_rate": 1.209481916644678e-05, + "loss": 0.7161, "step": 3741 }, { - "epoch": 0.28315235897241875, - "grad_norm": 2.4103119373321533, - "learning_rate": 1.54548711445954e-05, - "loss": 0.9138, + "epoch": 0.26342837029215066, + "grad_norm": 2.025400400161743, + "learning_rate": 1.2093724494401237e-05, + "loss": 0.8226, "step": 3742 }, { - "epoch": 0.28322802769475236, - "grad_norm": 3.062034845352173, - "learning_rate": 1.5453301464146563e-05, - "loss": 0.8161, + "epoch": 0.26349876803942274, + "grad_norm": 2.081714630126953, + "learning_rate": 1.209262955752834e-05, + "loss": 0.7552, "step": 3743 }, { - "epoch": 0.28330369641708597, - "grad_norm": 2.3038175106048584, - "learning_rate": 1.5451731379571115e-05, - "loss": 0.7118, + "epoch": 0.2635691657866948, + "grad_norm": 1.7477253675460815, + "learning_rate": 1.2091534355885024e-05, + "loss": 0.7559, "step": 3744 }, { - "epoch": 0.28337936513941964, - "grad_norm": 3.113598108291626, - "learning_rate": 1.545016089096738e-05, - "loss": 0.772, + "epoch": 0.2636395635339669, + "grad_norm": 1.7836103439331055, + "learning_rate": 1.2090438889528224e-05, + "loss": 0.6195, "step": 3745 }, { - "epoch": 0.28345503386175325, - "grad_norm": 2.0970757007598877, - "learning_rate": 1.544858999843371e-05, - "loss": 0.8035, + "epoch": 0.263709961281239, + "grad_norm": 1.6701231002807617, + "learning_rate": 1.2089343158514895e-05, + "loss": 0.8485, "step": 3746 }, { - "epoch": 0.28353070258408686, - "grad_norm": 2.837749481201172, - "learning_rate": 1.5447018702068475e-05, - "loss": 0.8556, + "epoch": 0.2637803590285111, + "grad_norm": 5.664880275726318, + "learning_rate": 1.2088247162902006e-05, + "loss": 0.7252, "step": 3747 }, { - "epoch": 0.28360637130642047, - "grad_norm": 2.4374568462371826, - "learning_rate": 1.5445447001970078e-05, - "loss": 0.6574, + "epoch": 0.2638507567757832, + "grad_norm": 4.459561824798584, + "learning_rate": 1.208715090274654e-05, + "loss": 0.7174, "step": 3748 }, { - "epoch": 0.28368204002875413, - "grad_norm": 2.6389224529266357, - "learning_rate": 1.544387489823694e-05, - "loss": 0.6553, + "epoch": 0.26392115452305526, + "grad_norm": 1.7916219234466553, + "learning_rate": 1.208605437810549e-05, + "loss": 0.7025, "step": 3749 }, { - "epoch": 0.28375770875108774, - "grad_norm": 2.4554734230041504, - "learning_rate": 1.5442302390967517e-05, - "loss": 0.7377, + "epoch": 0.26399155227032733, + "grad_norm": 2.096947431564331, + "learning_rate": 1.2084957589035867e-05, + "loss": 0.7419, "step": 3750 }, { - "epoch": 0.28383337747342136, - "grad_norm": 2.6802544593811035, - "learning_rate": 1.544072948026028e-05, - "loss": 0.6677, + "epoch": 0.2640619500175994, + "grad_norm": 1.8616689443588257, + "learning_rate": 1.2083860535594696e-05, + "loss": 0.9309, "step": 3751 }, { - "epoch": 0.28390904619575497, - "grad_norm": 2.261831283569336, - "learning_rate": 1.543915616621373e-05, - "loss": 0.6648, + "epoch": 0.26413234776487154, + "grad_norm": 2.0282797813415527, + "learning_rate": 1.2082763217839008e-05, + "loss": 0.7448, "step": 3752 }, { - "epoch": 0.28398471491808863, - "grad_norm": 2.759446144104004, - "learning_rate": 1.5437582448926395e-05, - "loss": 0.5712, + "epoch": 0.2642027455121436, + "grad_norm": 1.7964266538619995, + "learning_rate": 1.208166563582586e-05, + "loss": 0.7321, "step": 3753 }, { - "epoch": 0.28406038364042224, - "grad_norm": 1.8836723566055298, - "learning_rate": 1.5436008328496827e-05, - "loss": 0.7159, + "epoch": 0.2642731432594157, + "grad_norm": 2.179042100906372, + "learning_rate": 1.2080567789612313e-05, + "loss": 0.743, "step": 3754 }, { - "epoch": 0.28413605236275585, - "grad_norm": 2.804567813873291, - "learning_rate": 1.54344338050236e-05, - "loss": 0.7679, + "epoch": 0.2643435410066878, + "grad_norm": 1.8390053510665894, + "learning_rate": 1.2079469679255445e-05, + "loss": 0.9065, "step": 3755 }, { - "epoch": 0.28421172108508946, - "grad_norm": 2.44331431388855, - "learning_rate": 1.5432858878605315e-05, - "loss": 0.8358, + "epoch": 0.26441393875395985, + "grad_norm": 2.004824161529541, + "learning_rate": 1.2078371304812348e-05, + "loss": 0.7413, "step": 3756 }, { - "epoch": 0.2842873898074231, - "grad_norm": 1.8980436325073242, - "learning_rate": 1.54312835493406e-05, - "loss": 0.6766, + "epoch": 0.264484336501232, + "grad_norm": 1.9170900583267212, + "learning_rate": 1.2077272666340127e-05, + "loss": 0.6503, "step": 3757 }, { - "epoch": 0.28436305852975674, - "grad_norm": 3.5173916816711426, - "learning_rate": 1.542970781732811e-05, - "loss": 0.8005, + "epoch": 0.26455473424850406, + "grad_norm": 1.6344085931777954, + "learning_rate": 1.2076173763895902e-05, + "loss": 0.6845, "step": 3758 }, { - "epoch": 0.28443872725209035, - "grad_norm": 2.506789207458496, - "learning_rate": 1.542813168266652e-05, - "loss": 0.7122, + "epoch": 0.26462513199577614, + "grad_norm": 1.9141815900802612, + "learning_rate": 1.2075074597536807e-05, + "loss": 0.8095, "step": 3759 }, { - "epoch": 0.28451439597442396, - "grad_norm": 2.8228511810302734, - "learning_rate": 1.5426555145454533e-05, - "loss": 0.7872, + "epoch": 0.2646955297430482, + "grad_norm": 1.8761615753173828, + "learning_rate": 1.2073975167319986e-05, + "loss": 0.7446, "step": 3760 }, { - "epoch": 0.28459006469675757, - "grad_norm": 2.5148704051971436, - "learning_rate": 1.5424978205790875e-05, - "loss": 0.8348, + "epoch": 0.2647659274903203, + "grad_norm": 2.0224928855895996, + "learning_rate": 1.20728754733026e-05, + "loss": 0.6873, "step": 3761 }, { - "epoch": 0.28466573341909124, - "grad_norm": 2.349149227142334, - "learning_rate": 1.54234008637743e-05, - "loss": 0.9365, + "epoch": 0.2648363252375924, + "grad_norm": 2.162325859069824, + "learning_rate": 1.2071775515541826e-05, + "loss": 0.7372, "step": 3762 }, { - "epoch": 0.28474140214142485, - "grad_norm": 2.6498892307281494, - "learning_rate": 1.542182311950359e-05, - "loss": 0.8949, + "epoch": 0.2649067229848645, + "grad_norm": 1.736892819404602, + "learning_rate": 1.207067529409485e-05, + "loss": 0.763, "step": 3763 }, { - "epoch": 0.28481707086375846, - "grad_norm": 2.6310479640960693, - "learning_rate": 1.5420244973077547e-05, - "loss": 0.793, + "epoch": 0.2649771207321366, + "grad_norm": 2.084728956222534, + "learning_rate": 1.2069574809018872e-05, + "loss": 0.6693, "step": 3764 }, { - "epoch": 0.28489273958609207, - "grad_norm": 2.2707366943359375, - "learning_rate": 1.5418666424595e-05, - "loss": 0.7804, + "epoch": 0.26504751847940866, + "grad_norm": 1.7909811735153198, + "learning_rate": 1.2068474060371106e-05, + "loss": 0.7178, "step": 3765 }, { - "epoch": 0.28496840830842574, - "grad_norm": 2.279681444168091, - "learning_rate": 1.5417087474154796e-05, - "loss": 0.7311, + "epoch": 0.26511791622668074, + "grad_norm": 1.9173625707626343, + "learning_rate": 1.2067373048208787e-05, + "loss": 0.7094, "step": 3766 }, { - "epoch": 0.28504407703075935, - "grad_norm": 2.6598429679870605, - "learning_rate": 1.541550812185582e-05, - "loss": 0.727, + "epoch": 0.2651883139739528, + "grad_norm": 1.7235119342803955, + "learning_rate": 1.2066271772589152e-05, + "loss": 0.7035, "step": 3767 }, { - "epoch": 0.28511974575309296, - "grad_norm": 2.0504753589630127, - "learning_rate": 1.5413928367796974e-05, - "loss": 0.7835, + "epoch": 0.2652587117212249, + "grad_norm": 1.7720882892608643, + "learning_rate": 1.2065170233569461e-05, + "loss": 0.8381, "step": 3768 }, { - "epoch": 0.28519541447542657, - "grad_norm": 2.460989236831665, - "learning_rate": 1.541234821207719e-05, - "loss": 0.7642, + "epoch": 0.265329109468497, + "grad_norm": 1.6778637170791626, + "learning_rate": 1.2064068431206982e-05, + "loss": 0.7823, "step": 3769 }, { - "epoch": 0.28527108319776023, - "grad_norm": 2.5266435146331787, - "learning_rate": 1.5410767654795423e-05, - "loss": 0.6354, + "epoch": 0.2653995072157691, + "grad_norm": 1.5917959213256836, + "learning_rate": 1.2062966365558996e-05, + "loss": 0.8267, "step": 3770 }, { - "epoch": 0.28534675192009384, - "grad_norm": 2.2240378856658936, - "learning_rate": 1.540918669605065e-05, - "loss": 0.8202, + "epoch": 0.2654699049630412, + "grad_norm": 1.8786720037460327, + "learning_rate": 1.2061864036682807e-05, + "loss": 0.817, "step": 3771 }, { - "epoch": 0.28542242064242745, - "grad_norm": 2.3329272270202637, - "learning_rate": 1.540760533594187e-05, - "loss": 0.6528, + "epoch": 0.26554030271031326, + "grad_norm": 1.8178073167800903, + "learning_rate": 1.206076144463572e-05, + "loss": 0.6555, "step": 3772 }, { - "epoch": 0.28549808936476107, - "grad_norm": 2.2796730995178223, - "learning_rate": 1.5406023574568124e-05, - "loss": 0.6997, + "epoch": 0.26561070045758534, + "grad_norm": 1.9280158281326294, + "learning_rate": 1.2059658589475062e-05, + "loss": 0.8477, "step": 3773 }, { - "epoch": 0.2855737580870947, - "grad_norm": 2.3111414909362793, - "learning_rate": 1.5404441412028464e-05, - "loss": 0.7873, + "epoch": 0.26568109820485747, + "grad_norm": 1.755266785621643, + "learning_rate": 1.2058555471258175e-05, + "loss": 0.7872, "step": 3774 }, { - "epoch": 0.28564942680942834, - "grad_norm": 2.165189266204834, - "learning_rate": 1.5402858848421962e-05, - "loss": 0.7418, + "epoch": 0.26575149595212955, + "grad_norm": 1.8421982526779175, + "learning_rate": 1.2057452090042404e-05, + "loss": 0.6518, "step": 3775 }, { - "epoch": 0.28572509553176195, - "grad_norm": 3.3214333057403564, - "learning_rate": 1.5401275883847736e-05, - "loss": 0.7305, + "epoch": 0.2658218936994016, + "grad_norm": 2.5710620880126953, + "learning_rate": 1.2056348445885121e-05, + "loss": 0.8491, "step": 3776 }, { - "epoch": 0.28580076425409556, - "grad_norm": 2.159792900085449, - "learning_rate": 1.539969251840491e-05, - "loss": 0.6443, + "epoch": 0.2658922914466737, + "grad_norm": 2.037706136703491, + "learning_rate": 1.2055244538843702e-05, + "loss": 0.7278, "step": 3777 }, { - "epoch": 0.2858764329764292, - "grad_norm": 2.1362006664276123, - "learning_rate": 1.5398108752192636e-05, - "loss": 0.9493, + "epoch": 0.2659626891939458, + "grad_norm": 1.8966706991195679, + "learning_rate": 1.205414036897554e-05, + "loss": 0.6866, "step": 3778 }, { - "epoch": 0.28595210169876284, - "grad_norm": 2.815425157546997, - "learning_rate": 1.53965245853101e-05, - "loss": 0.7395, + "epoch": 0.26603308694121786, + "grad_norm": 1.9719735383987427, + "learning_rate": 1.2053035936338045e-05, + "loss": 0.7963, "step": 3779 }, { - "epoch": 0.28602777042109645, - "grad_norm": 2.310188055038452, - "learning_rate": 1.53949400178565e-05, - "loss": 0.8445, + "epoch": 0.26610348468849, + "grad_norm": 2.0502636432647705, + "learning_rate": 1.2051931240988634e-05, + "loss": 0.7256, "step": 3780 }, { - "epoch": 0.28610343914343006, - "grad_norm": 2.610882043838501, - "learning_rate": 1.539335504993108e-05, - "loss": 0.6956, + "epoch": 0.26617388243576207, + "grad_norm": 1.736865520477295, + "learning_rate": 1.2050826282984745e-05, + "loss": 0.6937, "step": 3781 }, { - "epoch": 0.28617910786576367, - "grad_norm": 1.9927890300750732, - "learning_rate": 1.5391769681633084e-05, - "loss": 0.761, + "epoch": 0.26624428018303414, + "grad_norm": 1.6097546815872192, + "learning_rate": 1.2049721062383819e-05, + "loss": 0.7007, "step": 3782 }, { - "epoch": 0.28625477658809734, - "grad_norm": 2.202202558517456, - "learning_rate": 1.53901839130618e-05, - "loss": 0.8485, + "epoch": 0.2663146779303062, + "grad_norm": 1.9799835681915283, + "learning_rate": 1.2048615579243325e-05, + "loss": 0.6475, "step": 3783 }, { - "epoch": 0.28633044531043095, - "grad_norm": 3.0478105545043945, - "learning_rate": 1.5388597744316527e-05, - "loss": 0.7307, + "epoch": 0.2663850756775783, + "grad_norm": 1.600266933441162, + "learning_rate": 1.2047509833620734e-05, + "loss": 0.7057, "step": 3784 }, { - "epoch": 0.28640611403276456, - "grad_norm": 2.1386568546295166, - "learning_rate": 1.5387011175496604e-05, - "loss": 0.7544, + "epoch": 0.26645547342485043, + "grad_norm": 1.9862117767333984, + "learning_rate": 1.2046403825573534e-05, + "loss": 0.7491, "step": 3785 }, { - "epoch": 0.28648178275509817, - "grad_norm": 2.2375717163085938, - "learning_rate": 1.538542420670138e-05, - "loss": 0.9585, + "epoch": 0.2665258711721225, + "grad_norm": 2.0891196727752686, + "learning_rate": 1.204529755515923e-05, + "loss": 0.6689, "step": 3786 }, { - "epoch": 0.2865574514774318, - "grad_norm": 2.476581573486328, - "learning_rate": 1.5383836838030242e-05, - "loss": 0.6515, + "epoch": 0.2665962689193946, + "grad_norm": 2.0367112159729004, + "learning_rate": 1.2044191022435339e-05, + "loss": 0.7042, "step": 3787 }, { - "epoch": 0.28663312019976545, - "grad_norm": 2.101915121078491, - "learning_rate": 1.538224906958259e-05, - "loss": 0.7691, + "epoch": 0.26666666666666666, + "grad_norm": 1.9145265817642212, + "learning_rate": 1.2043084227459383e-05, + "loss": 0.7325, "step": 3788 }, { - "epoch": 0.28670878892209906, - "grad_norm": 2.667830467224121, - "learning_rate": 1.538066090145786e-05, - "loss": 0.7572, + "epoch": 0.26673706441393874, + "grad_norm": 1.747888207435608, + "learning_rate": 1.2041977170288916e-05, + "loss": 0.8184, "step": 3789 }, { - "epoch": 0.28678445764443267, - "grad_norm": 2.5487112998962402, - "learning_rate": 1.5379072333755508e-05, - "loss": 0.5705, + "epoch": 0.2668074621612108, + "grad_norm": 3.06093430519104, + "learning_rate": 1.2040869850981487e-05, + "loss": 0.8524, "step": 3790 }, { - "epoch": 0.2868601263667663, - "grad_norm": 2.0503430366516113, - "learning_rate": 1.5377483366575012e-05, - "loss": 0.8871, + "epoch": 0.26687785990848295, + "grad_norm": 2.0501463413238525, + "learning_rate": 1.203976226959467e-05, + "loss": 0.7387, "step": 3791 }, { - "epoch": 0.28693579508909994, - "grad_norm": 1.8013478517532349, - "learning_rate": 1.5375894000015883e-05, - "loss": 0.7667, + "epoch": 0.26694825765575503, + "grad_norm": 1.7529066801071167, + "learning_rate": 1.2038654426186048e-05, + "loss": 0.8285, "step": 3792 }, { - "epoch": 0.28701146381143355, - "grad_norm": 11.85326862335205, - "learning_rate": 1.5374304234177648e-05, - "loss": 0.9177, + "epoch": 0.2670186554030271, + "grad_norm": 1.7912625074386597, + "learning_rate": 1.203754632081322e-05, + "loss": 0.8401, "step": 3793 }, { - "epoch": 0.28708713253376716, - "grad_norm": 2.1834912300109863, - "learning_rate": 1.5372714069159865e-05, - "loss": 0.7887, + "epoch": 0.2670890531502992, + "grad_norm": 2.4856936931610107, + "learning_rate": 1.2036437953533793e-05, + "loss": 0.8307, "step": 3794 }, { - "epoch": 0.2871628012561008, - "grad_norm": 2.3547914028167725, - "learning_rate": 1.5371123505062116e-05, - "loss": 0.5846, + "epoch": 0.26715945089757126, + "grad_norm": 1.8110512495040894, + "learning_rate": 1.2035329324405398e-05, + "loss": 0.7458, "step": 3795 }, { - "epoch": 0.28723846997843444, - "grad_norm": 2.845733404159546, - "learning_rate": 1.5369532541984003e-05, - "loss": 0.6585, + "epoch": 0.26722984864484334, + "grad_norm": 1.9504203796386719, + "learning_rate": 1.2034220433485674e-05, + "loss": 0.7459, "step": 3796 }, { - "epoch": 0.28731413870076805, - "grad_norm": 2.8735568523406982, - "learning_rate": 1.5367941180025162e-05, - "loss": 0.7482, + "epoch": 0.26730024639211547, + "grad_norm": 2.204909563064575, + "learning_rate": 1.2033111280832266e-05, + "loss": 0.8255, "step": 3797 }, { - "epoch": 0.28738980742310166, - "grad_norm": 2.0185205936431885, - "learning_rate": 1.536634941928525e-05, - "loss": 0.7186, + "epoch": 0.26737064413938755, + "grad_norm": 1.7151297330856323, + "learning_rate": 1.2032001866502847e-05, + "loss": 0.6887, "step": 3798 }, { - "epoch": 0.2874654761454353, - "grad_norm": 2.4860315322875977, - "learning_rate": 1.5364757259863943e-05, - "loss": 0.6788, + "epoch": 0.2674410418866596, + "grad_norm": 1.9220085144042969, + "learning_rate": 1.2030892190555093e-05, + "loss": 0.8765, "step": 3799 }, { - "epoch": 0.2875411448677689, - "grad_norm": 1.9986555576324463, - "learning_rate": 1.5363164701860953e-05, - "loss": 0.7885, + "epoch": 0.2675114396339317, + "grad_norm": 2.105177164077759, + "learning_rate": 1.2029782253046696e-05, + "loss": 0.6458, "step": 3800 }, { - "epoch": 0.28761681359010255, - "grad_norm": 2.455195665359497, - "learning_rate": 1.536157174537601e-05, - "loss": 0.7411, + "epoch": 0.2675818373812038, + "grad_norm": 1.7963594198226929, + "learning_rate": 1.2028672054035368e-05, + "loss": 0.6497, "step": 3801 }, { - "epoch": 0.28769248231243616, - "grad_norm": 2.079657793045044, - "learning_rate": 1.5359978390508865e-05, - "loss": 0.8515, + "epoch": 0.2676522351284759, + "grad_norm": 2.2115871906280518, + "learning_rate": 1.2027561593578824e-05, + "loss": 0.8414, "step": 3802 }, { - "epoch": 0.28776815103476977, - "grad_norm": 2.4668712615966797, - "learning_rate": 1.5358384637359304e-05, - "loss": 0.6981, + "epoch": 0.267722632875748, + "grad_norm": 1.817186713218689, + "learning_rate": 1.2026450871734797e-05, + "loss": 0.6109, "step": 3803 }, { - "epoch": 0.2878438197571034, - "grad_norm": 2.006376028060913, - "learning_rate": 1.535679048602713e-05, - "loss": 0.6889, + "epoch": 0.26779303062302007, + "grad_norm": 1.991259217262268, + "learning_rate": 1.202533988856104e-05, + "loss": 0.6851, "step": 3804 }, { - "epoch": 0.28791948847943705, - "grad_norm": 2.464771032333374, - "learning_rate": 1.5355195936612178e-05, - "loss": 0.7923, + "epoch": 0.26786342837029214, + "grad_norm": 1.8222591876983643, + "learning_rate": 1.202422864411531e-05, + "loss": 0.7213, "step": 3805 }, { - "epoch": 0.28799515720177066, - "grad_norm": 2.3989076614379883, - "learning_rate": 1.53536009892143e-05, - "loss": 0.7724, + "epoch": 0.2679338261175642, + "grad_norm": 1.7424530982971191, + "learning_rate": 1.202311713845538e-05, + "loss": 0.6896, "step": 3806 }, { - "epoch": 0.28807082592410427, - "grad_norm": 2.19671893119812, - "learning_rate": 1.5352005643933378e-05, - "loss": 0.7459, + "epoch": 0.2680042238648363, + "grad_norm": 1.8607991933822632, + "learning_rate": 1.2022005371639044e-05, + "loss": 0.7211, "step": 3807 }, { - "epoch": 0.2881464946464379, - "grad_norm": 2.1972532272338867, - "learning_rate": 1.5350409900869317e-05, - "loss": 0.6167, + "epoch": 0.26807462161210843, + "grad_norm": 1.756579041481018, + "learning_rate": 1.2020893343724103e-05, + "loss": 0.7301, "step": 3808 }, { - "epoch": 0.28822216336877154, - "grad_norm": 4.029229164123535, - "learning_rate": 1.534881376012205e-05, - "loss": 0.7394, + "epoch": 0.2681450193593805, + "grad_norm": 1.8377853631973267, + "learning_rate": 1.2019781054768365e-05, + "loss": 0.6916, "step": 3809 }, { - "epoch": 0.28829783209110516, - "grad_norm": 2.5666446685791016, - "learning_rate": 1.534721722179153e-05, - "loss": 0.7416, + "epoch": 0.2682154171066526, + "grad_norm": 1.791008472442627, + "learning_rate": 1.2018668504829666e-05, + "loss": 0.7449, "step": 3810 }, { - "epoch": 0.28837350081343877, - "grad_norm": 2.153628349304199, - "learning_rate": 1.534562028597774e-05, - "loss": 0.6953, + "epoch": 0.26828581485392466, + "grad_norm": 2.210561990737915, + "learning_rate": 1.2017555693965845e-05, + "loss": 0.7573, "step": 3811 }, { - "epoch": 0.2884491695357724, - "grad_norm": 2.0070858001708984, - "learning_rate": 1.5344022952780678e-05, - "loss": 0.7555, + "epoch": 0.26835621260119674, + "grad_norm": 2.3197529315948486, + "learning_rate": 1.201644262223476e-05, + "loss": 0.7861, "step": 3812 }, { - "epoch": 0.288524838258106, - "grad_norm": 2.8213300704956055, - "learning_rate": 1.5342425222300384e-05, - "loss": 0.7651, + "epoch": 0.2684266103484689, + "grad_norm": 2.1047229766845703, + "learning_rate": 1.2015329289694281e-05, + "loss": 0.7689, "step": 3813 }, { - "epoch": 0.28860050698043965, - "grad_norm": 2.5318684577941895, - "learning_rate": 1.534082709463691e-05, - "loss": 0.8429, + "epoch": 0.26849700809574095, + "grad_norm": 1.9818062782287598, + "learning_rate": 1.2014215696402288e-05, + "loss": 0.7719, "step": 3814 }, { - "epoch": 0.28867617570277326, - "grad_norm": 2.2411584854125977, - "learning_rate": 1.533922856989033e-05, - "loss": 1.041, + "epoch": 0.26856740584301303, + "grad_norm": 1.7046561241149902, + "learning_rate": 1.2013101842416682e-05, + "loss": 0.6785, "step": 3815 }, { - "epoch": 0.2887518444251069, - "grad_norm": 2.1817727088928223, - "learning_rate": 1.5337629648160754e-05, - "loss": 0.6029, + "epoch": 0.2686378035902851, + "grad_norm": 2.1183481216430664, + "learning_rate": 1.201198772779537e-05, + "loss": 0.7991, "step": 3816 }, { - "epoch": 0.2888275131474405, - "grad_norm": 1.9107509851455688, - "learning_rate": 1.5336030329548315e-05, - "loss": 0.7631, + "epoch": 0.2687082013375572, + "grad_norm": 1.879280686378479, + "learning_rate": 1.2010873352596277e-05, + "loss": 0.6323, "step": 3817 }, { - "epoch": 0.28890318186977415, - "grad_norm": 2.7551729679107666, - "learning_rate": 1.533443061415316e-05, - "loss": 0.6992, + "epoch": 0.26877859908482926, + "grad_norm": 1.827492117881775, + "learning_rate": 1.200975871687734e-05, + "loss": 0.8392, "step": 3818 }, { - "epoch": 0.28897885059210776, - "grad_norm": 3.3587355613708496, - "learning_rate": 1.533283050207547e-05, - "loss": 0.5839, + "epoch": 0.2688489968321014, + "grad_norm": 1.5740671157836914, + "learning_rate": 1.200864382069651e-05, + "loss": 0.5986, "step": 3819 }, { - "epoch": 0.28905451931444137, - "grad_norm": 2.6251888275146484, - "learning_rate": 1.533122999341546e-05, - "loss": 0.8546, + "epoch": 0.26891939457937347, + "grad_norm": 2.233166456222534, + "learning_rate": 1.2007528664111752e-05, + "loss": 0.8235, "step": 3820 }, { - "epoch": 0.289130188036775, - "grad_norm": 3.0454955101013184, - "learning_rate": 1.532962908827334e-05, - "loss": 0.8615, + "epoch": 0.26898979232664555, + "grad_norm": 1.7757976055145264, + "learning_rate": 1.2006413247181043e-05, + "loss": 0.708, "step": 3821 }, { - "epoch": 0.28920585675910865, - "grad_norm": 2.355125904083252, - "learning_rate": 1.532802778674938e-05, - "loss": 0.7968, + "epoch": 0.2690601900739176, + "grad_norm": 2.003347873687744, + "learning_rate": 1.2005297569962375e-05, + "loss": 0.778, "step": 3822 }, { - "epoch": 0.28928152548144226, - "grad_norm": 2.498617649078369, - "learning_rate": 1.5326426088943854e-05, - "loss": 0.757, + "epoch": 0.2691305878211897, + "grad_norm": 2.4470174312591553, + "learning_rate": 1.2004181632513756e-05, + "loss": 0.8365, "step": 3823 }, { - "epoch": 0.28935719420377587, - "grad_norm": 2.536444664001465, - "learning_rate": 1.532482399495706e-05, - "loss": 0.9326, + "epoch": 0.26920098556846184, + "grad_norm": 2.1624767780303955, + "learning_rate": 1.20030654348932e-05, + "loss": 0.7179, "step": 3824 }, { - "epoch": 0.2894328629261095, - "grad_norm": 2.7442028522491455, - "learning_rate": 1.532322150488933e-05, - "loss": 0.8168, + "epoch": 0.2692713833157339, + "grad_norm": 2.132476806640625, + "learning_rate": 1.200194897715874e-05, + "loss": 0.763, "step": 3825 }, { - "epoch": 0.2895085316484431, - "grad_norm": 2.647066116333008, - "learning_rate": 1.532161861884102e-05, - "loss": 0.7789, + "epoch": 0.269341781063006, + "grad_norm": 1.8587969541549683, + "learning_rate": 1.2000832259368425e-05, + "loss": 0.5459, "step": 3826 }, { - "epoch": 0.28958420037077676, - "grad_norm": 2.5589473247528076, - "learning_rate": 1.5320015336912505e-05, - "loss": 0.6668, + "epoch": 0.26941217881027807, + "grad_norm": 1.8711103200912476, + "learning_rate": 1.199971528158031e-05, + "loss": 0.7941, "step": 3827 }, { - "epoch": 0.28965986909311037, - "grad_norm": 1.8725637197494507, - "learning_rate": 1.531841165920419e-05, - "loss": 0.8211, + "epoch": 0.26948257655755015, + "grad_norm": 1.7823796272277832, + "learning_rate": 1.1998598043852471e-05, + "loss": 0.6841, "step": 3828 }, { - "epoch": 0.289735537815444, - "grad_norm": 2.117074966430664, - "learning_rate": 1.53168075858165e-05, - "loss": 0.8106, + "epoch": 0.2695529743048222, + "grad_norm": 2.273538827896118, + "learning_rate": 1.1997480546242991e-05, + "loss": 0.6922, "step": 3829 }, { - "epoch": 0.2898112065377776, - "grad_norm": 2.3661158084869385, - "learning_rate": 1.5315203116849883e-05, - "loss": 0.6988, + "epoch": 0.26962337205209436, + "grad_norm": 1.6793993711471558, + "learning_rate": 1.1996362788809971e-05, + "loss": 0.691, "step": 3830 }, { - "epoch": 0.28988687526011125, - "grad_norm": 2.031886100769043, - "learning_rate": 1.5313598252404824e-05, - "loss": 0.7676, + "epoch": 0.26969376979936643, + "grad_norm": 1.7589393854141235, + "learning_rate": 1.1995244771611526e-05, + "loss": 0.7204, "step": 3831 }, { - "epoch": 0.28996254398244486, - "grad_norm": 3.7571802139282227, - "learning_rate": 1.5311992992581824e-05, - "loss": 0.8097, + "epoch": 0.2697641675466385, + "grad_norm": 2.303332567214966, + "learning_rate": 1.1994126494705783e-05, + "loss": 0.8503, "step": 3832 }, { - "epoch": 0.2900382127047785, - "grad_norm": 2.4319801330566406, - "learning_rate": 1.5310387337481405e-05, - "loss": 0.7101, + "epoch": 0.2698345652939106, + "grad_norm": 2.0999155044555664, + "learning_rate": 1.1993007958150877e-05, + "loss": 0.8036, "step": 3833 }, { - "epoch": 0.2901138814271121, - "grad_norm": 2.1088521480560303, - "learning_rate": 1.530878128720412e-05, - "loss": 0.9235, + "epoch": 0.26990496304118267, + "grad_norm": 1.9529392719268799, + "learning_rate": 1.1991889162004968e-05, + "loss": 0.7182, "step": 3834 }, { - "epoch": 0.29018955014944575, - "grad_norm": 2.3383238315582275, - "learning_rate": 1.5307174841850546e-05, - "loss": 0.8772, + "epoch": 0.26997536078845474, + "grad_norm": 1.9266053438186646, + "learning_rate": 1.199077010632622e-05, + "loss": 0.7208, "step": 3835 }, { - "epoch": 0.29026521887177936, - "grad_norm": 2.2183845043182373, - "learning_rate": 1.530556800152129e-05, - "loss": 0.8155, + "epoch": 0.2700457585357269, + "grad_norm": 1.805142879486084, + "learning_rate": 1.1989650791172815e-05, + "loss": 0.7335, "step": 3836 }, { - "epoch": 0.290340887594113, - "grad_norm": 2.790646553039551, - "learning_rate": 1.530396076631696e-05, - "loss": 0.7333, + "epoch": 0.27011615628299895, + "grad_norm": 1.8431488275527954, + "learning_rate": 1.1988531216602944e-05, + "loss": 0.7685, "step": 3837 }, { - "epoch": 0.2904165563164466, - "grad_norm": 1.850390076637268, - "learning_rate": 1.5302353136338226e-05, - "loss": 0.8824, + "epoch": 0.27018655403027103, + "grad_norm": 1.745913028717041, + "learning_rate": 1.1987411382674818e-05, + "loss": 0.799, "step": 3838 }, { - "epoch": 0.2904922250387802, - "grad_norm": 2.336447238922119, - "learning_rate": 1.530074511168575e-05, - "loss": 0.6944, + "epoch": 0.2702569517775431, + "grad_norm": 1.8130571842193604, + "learning_rate": 1.1986291289446657e-05, + "loss": 0.669, "step": 3839 }, { - "epoch": 0.29056789376111386, - "grad_norm": 2.158458709716797, - "learning_rate": 1.5299136692460238e-05, - "loss": 0.8595, + "epoch": 0.2703273495248152, + "grad_norm": 1.9802912473678589, + "learning_rate": 1.1985170936976697e-05, + "loss": 0.6569, "step": 3840 }, { - "epoch": 0.29064356248344747, - "grad_norm": 2.0733799934387207, - "learning_rate": 1.5297527878762413e-05, - "loss": 0.6355, + "epoch": 0.2703977472720873, + "grad_norm": 2.0341944694519043, + "learning_rate": 1.1984050325323186e-05, + "loss": 0.7961, "step": 3841 }, { - "epoch": 0.2907192312057811, - "grad_norm": 2.5459089279174805, - "learning_rate": 1.529591867069302e-05, - "loss": 0.7965, + "epoch": 0.2704681450193594, + "grad_norm": 1.891936182975769, + "learning_rate": 1.1982929454544384e-05, + "loss": 0.6646, "step": 3842 }, { - "epoch": 0.2907948999281147, - "grad_norm": 2.3149020671844482, - "learning_rate": 1.529430906835284e-05, - "loss": 0.7332, + "epoch": 0.2705385427666315, + "grad_norm": 1.5602281093597412, + "learning_rate": 1.1981808324698565e-05, + "loss": 0.7045, "step": 3843 }, { - "epoch": 0.29087056865044836, - "grad_norm": 2.2165815830230713, - "learning_rate": 1.5292699071842665e-05, - "loss": 0.6736, + "epoch": 0.27060894051390355, + "grad_norm": 1.804823875427246, + "learning_rate": 1.1980686935844023e-05, + "loss": 0.7116, "step": 3844 }, { - "epoch": 0.29094623737278197, - "grad_norm": 2.079904794692993, - "learning_rate": 1.5291088681263325e-05, - "loss": 0.6289, + "epoch": 0.27067933826117563, + "grad_norm": 1.9481220245361328, + "learning_rate": 1.1979565288039053e-05, + "loss": 0.7615, "step": 3845 }, { - "epoch": 0.2910219060951156, - "grad_norm": 2.122159957885742, - "learning_rate": 1.5289477896715662e-05, - "loss": 0.8419, + "epoch": 0.2707497360084477, + "grad_norm": 2.0233476161956787, + "learning_rate": 1.1978443381341976e-05, + "loss": 0.8537, "step": 3846 }, { - "epoch": 0.2910975748174492, - "grad_norm": 1.8512182235717773, - "learning_rate": 1.5287866718300548e-05, - "loss": 0.6351, + "epoch": 0.27082013375571984, + "grad_norm": 1.6426658630371094, + "learning_rate": 1.1977321215811119e-05, + "loss": 0.666, "step": 3847 }, { - "epoch": 0.29117324353978286, - "grad_norm": 1.8471953868865967, - "learning_rate": 1.5286255146118886e-05, - "loss": 0.8519, + "epoch": 0.2708905315029919, + "grad_norm": 1.8080860376358032, + "learning_rate": 1.1976198791504823e-05, + "loss": 0.7493, "step": 3848 }, { - "epoch": 0.29124891226211647, - "grad_norm": 1.9632765054702759, - "learning_rate": 1.5284643180271593e-05, - "loss": 0.8273, + "epoch": 0.270960929250264, + "grad_norm": 1.876859426498413, + "learning_rate": 1.1975076108481445e-05, + "loss": 0.6739, "step": 3849 }, { - "epoch": 0.2913245809844501, - "grad_norm": 2.095710515975952, - "learning_rate": 1.5283030820859614e-05, - "loss": 0.7674, + "epoch": 0.27103132699753607, + "grad_norm": 1.896008014678955, + "learning_rate": 1.1973953166799357e-05, + "loss": 0.7514, "step": 3850 }, { - "epoch": 0.2914002497067837, - "grad_norm": 2.047574520111084, - "learning_rate": 1.528141806798393e-05, - "loss": 0.7336, + "epoch": 0.27110172474480815, + "grad_norm": 1.8546111583709717, + "learning_rate": 1.1972829966516937e-05, + "loss": 0.8016, "step": 3851 }, { - "epoch": 0.2914759184291173, - "grad_norm": 3.1790666580200195, - "learning_rate": 1.5279804921745526e-05, - "loss": 0.6697, + "epoch": 0.2711721224920803, + "grad_norm": 1.9002022743225098, + "learning_rate": 1.1971706507692584e-05, + "loss": 0.6873, "step": 3852 }, { - "epoch": 0.29155158715145096, - "grad_norm": 1.7570991516113281, - "learning_rate": 1.5278191382245424e-05, - "loss": 0.7305, + "epoch": 0.27124252023935236, + "grad_norm": 2.0734760761260986, + "learning_rate": 1.1970582790384704e-05, + "loss": 0.7236, "step": 3853 }, { - "epoch": 0.2916272558737846, - "grad_norm": 6.570018291473389, - "learning_rate": 1.5276577449584677e-05, - "loss": 0.8719, + "epoch": 0.27131291798662444, + "grad_norm": 1.868902325630188, + "learning_rate": 1.1969458814651728e-05, + "loss": 0.7906, "step": 3854 }, { - "epoch": 0.2917029245961182, - "grad_norm": 2.8734898567199707, - "learning_rate": 1.5274963123864346e-05, - "loss": 0.7329, + "epoch": 0.2713833157338965, + "grad_norm": 1.7149838209152222, + "learning_rate": 1.1968334580552086e-05, + "loss": 0.7471, "step": 3855 }, { - "epoch": 0.2917785933184518, - "grad_norm": 2.37735915184021, - "learning_rate": 1.527334840518553e-05, - "loss": 0.7074, + "epoch": 0.2714537134811686, + "grad_norm": 2.151376962661743, + "learning_rate": 1.1967210088144227e-05, + "loss": 0.6836, "step": 3856 }, { - "epoch": 0.29185426204078546, - "grad_norm": 2.6533119678497314, - "learning_rate": 1.5271733293649347e-05, - "loss": 0.7864, + "epoch": 0.27152411122844067, + "grad_norm": 1.7553517818450928, + "learning_rate": 1.196608533748662e-05, + "loss": 0.7428, "step": 3857 }, { - "epoch": 0.29192993076311907, - "grad_norm": 2.2681920528411865, - "learning_rate": 1.5270117789356937e-05, - "loss": 0.9383, + "epoch": 0.2715945089757128, + "grad_norm": 1.9474021196365356, + "learning_rate": 1.1964960328637737e-05, + "loss": 0.6725, "step": 3858 }, { - "epoch": 0.2920055994854527, - "grad_norm": 2.4914307594299316, - "learning_rate": 1.5268501892409472e-05, - "loss": 0.7368, + "epoch": 0.2716649067229849, + "grad_norm": 1.8811299800872803, + "learning_rate": 1.196383506165607e-05, + "loss": 0.7005, "step": 3859 }, { - "epoch": 0.2920812682077863, - "grad_norm": 2.2388360500335693, - "learning_rate": 1.5266885602908145e-05, - "loss": 0.8739, + "epoch": 0.27173530447025696, + "grad_norm": 1.9418044090270996, + "learning_rate": 1.1962709536600123e-05, + "loss": 0.8386, "step": 3860 }, { - "epoch": 0.29215693693011996, - "grad_norm": 1.967874526977539, - "learning_rate": 1.526526892095417e-05, - "loss": 0.6916, + "epoch": 0.27180570221752903, + "grad_norm": 2.144973039627075, + "learning_rate": 1.1961583753528412e-05, + "loss": 0.7349, "step": 3861 }, { - "epoch": 0.29223260565245357, - "grad_norm": 2.43058443069458, - "learning_rate": 1.5263651846648794e-05, - "loss": 0.8727, + "epoch": 0.2718760999648011, + "grad_norm": 1.6176515817642212, + "learning_rate": 1.196045771249947e-05, + "loss": 0.7639, "step": 3862 }, { - "epoch": 0.2923082743747872, - "grad_norm": 2.0961925983428955, - "learning_rate": 1.5262034380093276e-05, - "loss": 0.8726, + "epoch": 0.2719464977120732, + "grad_norm": 2.049866199493408, + "learning_rate": 1.1959331413571838e-05, + "loss": 0.7087, "step": 3863 }, { - "epoch": 0.2923839430971208, - "grad_norm": 2.120222330093384, - "learning_rate": 1.5260416521388916e-05, - "loss": 0.6642, + "epoch": 0.2720168954593453, + "grad_norm": 2.0620741844177246, + "learning_rate": 1.1958204856804075e-05, + "loss": 0.7391, "step": 3864 }, { - "epoch": 0.2924596118194544, - "grad_norm": 2.0524697303771973, - "learning_rate": 1.525879827063702e-05, - "loss": 0.6988, + "epoch": 0.2720872932066174, + "grad_norm": 1.6856505870819092, + "learning_rate": 1.1957078042254751e-05, + "loss": 0.7282, "step": 3865 }, { - "epoch": 0.29253528054178807, - "grad_norm": 2.791609525680542, - "learning_rate": 1.5257179627938935e-05, - "loss": 0.8412, + "epoch": 0.2721576909538895, + "grad_norm": 1.722184181213379, + "learning_rate": 1.195595096998245e-05, + "loss": 0.6876, "step": 3866 }, { - "epoch": 0.2926109492641217, - "grad_norm": 2.2559304237365723, - "learning_rate": 1.5255560593396025e-05, - "loss": 1.0062, + "epoch": 0.27222808870116155, + "grad_norm": 1.6155259609222412, + "learning_rate": 1.1954823640045768e-05, + "loss": 0.739, "step": 3867 }, { - "epoch": 0.2926866179864553, - "grad_norm": 2.290409803390503, - "learning_rate": 1.5253941167109677e-05, - "loss": 0.6786, + "epoch": 0.27229848644843363, + "grad_norm": 1.7632029056549072, + "learning_rate": 1.1953696052503321e-05, + "loss": 0.828, "step": 3868 }, { - "epoch": 0.2927622867087889, - "grad_norm": 3.779801368713379, - "learning_rate": 1.5252321349181305e-05, - "loss": 0.8315, + "epoch": 0.27236888419570576, + "grad_norm": 1.7166856527328491, + "learning_rate": 1.1952568207413727e-05, + "loss": 0.7498, "step": 3869 }, { - "epoch": 0.29283795543112257, - "grad_norm": 2.773968458175659, - "learning_rate": 1.5250701139712347e-05, - "loss": 0.8372, + "epoch": 0.27243928194297784, + "grad_norm": 1.7267314195632935, + "learning_rate": 1.195144010483563e-05, + "loss": 0.8155, "step": 3870 }, { - "epoch": 0.2929136241534562, - "grad_norm": 2.577420473098755, - "learning_rate": 1.5249080538804266e-05, - "loss": 0.7793, + "epoch": 0.2725096796902499, + "grad_norm": 1.8814709186553955, + "learning_rate": 1.1950311744827673e-05, + "loss": 0.6446, "step": 3871 }, { - "epoch": 0.2929892928757898, - "grad_norm": 2.174755573272705, - "learning_rate": 1.5247459546558554e-05, - "loss": 0.6802, + "epoch": 0.272580077437522, + "grad_norm": 1.939123272895813, + "learning_rate": 1.1949183127448527e-05, + "loss": 0.8221, "step": 3872 }, { - "epoch": 0.2930649615981234, - "grad_norm": 2.247739315032959, - "learning_rate": 1.5245838163076712e-05, - "loss": 0.8264, + "epoch": 0.27265047518479407, + "grad_norm": 1.4833722114562988, + "learning_rate": 1.1948054252756868e-05, + "loss": 0.7908, "step": 3873 }, { - "epoch": 0.29314063032045706, - "grad_norm": 2.067796468734741, - "learning_rate": 1.5244216388460285e-05, - "loss": 0.8444, + "epoch": 0.27272087293206615, + "grad_norm": 2.2403600215911865, + "learning_rate": 1.1946925120811389e-05, + "loss": 0.7744, "step": 3874 }, { - "epoch": 0.2932162990427907, - "grad_norm": 2.176210880279541, - "learning_rate": 1.5242594222810835e-05, - "loss": 0.6093, + "epoch": 0.2727912706793383, + "grad_norm": 1.887914776802063, + "learning_rate": 1.1945795731670792e-05, + "loss": 0.7324, "step": 3875 }, { - "epoch": 0.2932919677651243, - "grad_norm": 2.150022029876709, - "learning_rate": 1.5240971666229939e-05, - "loss": 0.724, + "epoch": 0.27286166842661036, + "grad_norm": 1.7462515830993652, + "learning_rate": 1.1944666085393794e-05, + "loss": 0.5705, "step": 3876 }, { - "epoch": 0.2933676364874579, - "grad_norm": 2.2023675441741943, - "learning_rate": 1.5239348718819215e-05, - "loss": 0.8256, + "epoch": 0.27293206617388244, + "grad_norm": 2.5847558975219727, + "learning_rate": 1.194353618203913e-05, + "loss": 0.7981, "step": 3877 }, { - "epoch": 0.2934433052097915, - "grad_norm": 2.1717021465301514, - "learning_rate": 1.5237725380680291e-05, - "loss": 0.8227, + "epoch": 0.2730024639211545, + "grad_norm": 1.5702255964279175, + "learning_rate": 1.194240602166554e-05, + "loss": 0.7315, "step": 3878 }, { - "epoch": 0.29351897393212517, - "grad_norm": 2.7426464557647705, - "learning_rate": 1.523610165191483e-05, - "loss": 0.8648, + "epoch": 0.2730728616684266, + "grad_norm": 1.5386414527893066, + "learning_rate": 1.1941275604331787e-05, + "loss": 0.7439, "step": 3879 }, { - "epoch": 0.2935946426544588, - "grad_norm": 1.9793200492858887, - "learning_rate": 1.5234477532624512e-05, - "loss": 0.5645, + "epoch": 0.2731432594156987, + "grad_norm": 1.9823660850524902, + "learning_rate": 1.194014493009664e-05, + "loss": 0.8286, "step": 3880 }, { - "epoch": 0.2936703113767924, - "grad_norm": 2.058037281036377, - "learning_rate": 1.5232853022911048e-05, - "loss": 0.7363, + "epoch": 0.2732136571629708, + "grad_norm": 1.7457764148712158, + "learning_rate": 1.1939013999018885e-05, + "loss": 0.6361, "step": 3881 }, { - "epoch": 0.293745980099126, - "grad_norm": 2.488274097442627, - "learning_rate": 1.5231228122876167e-05, - "loss": 0.8012, + "epoch": 0.2732840549102429, + "grad_norm": 2.1409554481506348, + "learning_rate": 1.1937882811157316e-05, + "loss": 0.747, "step": 3882 }, { - "epoch": 0.29382164882145967, - "grad_norm": 2.5905423164367676, - "learning_rate": 1.5229602832621628e-05, - "loss": 0.7683, + "epoch": 0.27335445265751496, + "grad_norm": 1.7860232591629028, + "learning_rate": 1.1936751366570752e-05, + "loss": 0.7935, "step": 3883 }, { - "epoch": 0.2938973175437933, - "grad_norm": 2.189631938934326, - "learning_rate": 1.5227977152249211e-05, - "loss": 0.6833, + "epoch": 0.27342485040478703, + "grad_norm": 1.9025826454162598, + "learning_rate": 1.193561966531801e-05, + "loss": 0.6753, "step": 3884 }, { - "epoch": 0.2939729862661269, - "grad_norm": 3.1091902256011963, - "learning_rate": 1.522635108186072e-05, - "loss": 0.6846, + "epoch": 0.2734952481520591, + "grad_norm": 1.7915054559707642, + "learning_rate": 1.1934487707457932e-05, + "loss": 0.6366, "step": 3885 }, { - "epoch": 0.2940486549884605, - "grad_norm": 2.442972421646118, - "learning_rate": 1.5224724621557985e-05, - "loss": 0.7811, + "epoch": 0.27356564589933124, + "grad_norm": 2.0761730670928955, + "learning_rate": 1.1933355493049371e-05, + "loss": 0.7288, "step": 3886 }, { - "epoch": 0.29412432371079417, - "grad_norm": 1.9295117855072021, - "learning_rate": 1.5223097771442863e-05, - "loss": 0.7857, + "epoch": 0.2736360436466033, + "grad_norm": 2.248682975769043, + "learning_rate": 1.1932223022151191e-05, + "loss": 0.7893, "step": 3887 }, { - "epoch": 0.2941999924331278, - "grad_norm": 2.101855516433716, - "learning_rate": 1.522147053161723e-05, - "loss": 0.7761, + "epoch": 0.2737064413938754, + "grad_norm": 2.1294896602630615, + "learning_rate": 1.1931090294822265e-05, + "loss": 0.7442, "step": 3888 }, { - "epoch": 0.2942756611554614, - "grad_norm": 2.1722593307495117, - "learning_rate": 1.5219842902182986e-05, - "loss": 0.6753, + "epoch": 0.2737768391411475, + "grad_norm": 1.6048939228057861, + "learning_rate": 1.1929957311121492e-05, + "loss": 0.7252, "step": 3889 }, { - "epoch": 0.294351329877795, - "grad_norm": 1.9771231412887573, - "learning_rate": 1.5218214883242067e-05, - "loss": 0.6856, + "epoch": 0.27384723688841955, + "grad_norm": 2.3513660430908203, + "learning_rate": 1.1928824071107773e-05, + "loss": 0.7003, "step": 3890 }, { - "epoch": 0.29442699860012866, - "grad_norm": 2.3351590633392334, - "learning_rate": 1.5216586474896416e-05, - "loss": 0.6703, + "epoch": 0.27391763463569163, + "grad_norm": 1.6869572401046753, + "learning_rate": 1.1927690574840025e-05, + "loss": 0.722, "step": 3891 }, { - "epoch": 0.2945026673224623, - "grad_norm": 2.4224588871002197, - "learning_rate": 1.5214957677248015e-05, - "loss": 0.7411, + "epoch": 0.27398803238296376, + "grad_norm": 1.9833486080169678, + "learning_rate": 1.1926556822377184e-05, + "loss": 0.6745, "step": 3892 }, { - "epoch": 0.2945783360447959, - "grad_norm": 1.6721079349517822, - "learning_rate": 1.5213328490398863e-05, - "loss": 0.8959, + "epoch": 0.27405843013023584, + "grad_norm": 2.383958339691162, + "learning_rate": 1.192542281377819e-05, + "loss": 0.6148, "step": 3893 }, { - "epoch": 0.2946540047671295, - "grad_norm": 1.949367642402649, - "learning_rate": 1.5211698914450985e-05, - "loss": 0.564, + "epoch": 0.2741288278775079, + "grad_norm": 1.9743777513504028, + "learning_rate": 1.1924288549102002e-05, + "loss": 0.7732, "step": 3894 }, { - "epoch": 0.2947296734894631, - "grad_norm": 2.243993043899536, - "learning_rate": 1.5210068949506428e-05, - "loss": 0.7671, + "epoch": 0.27419922562478, + "grad_norm": 1.5348643064498901, + "learning_rate": 1.1923154028407595e-05, + "loss": 0.8069, "step": 3895 }, { - "epoch": 0.2948053422117968, - "grad_norm": 1.8077340126037598, - "learning_rate": 1.5208438595667269e-05, - "loss": 0.8371, + "epoch": 0.2742696233720521, + "grad_norm": 1.7254599332809448, + "learning_rate": 1.192201925175395e-05, + "loss": 0.8013, "step": 3896 }, { - "epoch": 0.2948810109341304, - "grad_norm": 2.3621714115142822, - "learning_rate": 1.5206807853035604e-05, - "loss": 0.6225, + "epoch": 0.2743400211193242, + "grad_norm": 1.5363547801971436, + "learning_rate": 1.1920884219200068e-05, + "loss": 0.7633, "step": 3897 }, { - "epoch": 0.294956679656464, - "grad_norm": 2.770918846130371, - "learning_rate": 1.5205176721713558e-05, - "loss": 0.6834, + "epoch": 0.2744104188665963, + "grad_norm": 2.465437412261963, + "learning_rate": 1.1919748930804958e-05, + "loss": 0.7416, "step": 3898 }, { - "epoch": 0.2950323483787976, - "grad_norm": 2.04931902885437, - "learning_rate": 1.5203545201803273e-05, - "loss": 0.7648, + "epoch": 0.27448081661386836, + "grad_norm": 1.8391296863555908, + "learning_rate": 1.1918613386627647e-05, + "loss": 0.6799, "step": 3899 }, { - "epoch": 0.29510801710113127, - "grad_norm": 2.5135276317596436, - "learning_rate": 1.5201913293406924e-05, - "loss": 0.6688, + "epoch": 0.27455121436114044, + "grad_norm": 1.820837140083313, + "learning_rate": 1.1917477586727169e-05, + "loss": 0.7506, "step": 3900 }, { - "epoch": 0.2951836858234649, - "grad_norm": 1.7943437099456787, - "learning_rate": 1.5200280996626705e-05, - "loss": 0.5577, + "epoch": 0.2746216121084125, + "grad_norm": 1.8815189599990845, + "learning_rate": 1.191634153116258e-05, + "loss": 0.7487, "step": 3901 }, { - "epoch": 0.2952593545457985, - "grad_norm": 2.1226847171783447, - "learning_rate": 1.5198648311564836e-05, - "loss": 0.6473, + "epoch": 0.2746920098556846, + "grad_norm": 1.8758209943771362, + "learning_rate": 1.191520521999294e-05, + "loss": 0.6303, "step": 3902 }, { - "epoch": 0.2953350232681321, - "grad_norm": 2.3555779457092285, - "learning_rate": 1.5197015238323561e-05, - "loss": 0.9919, + "epoch": 0.2747624076029567, + "grad_norm": 2.1485259532928467, + "learning_rate": 1.1914068653277331e-05, + "loss": 0.7304, "step": 3903 }, { - "epoch": 0.29541069199046577, - "grad_norm": 3.256469964981079, - "learning_rate": 1.5195381777005147e-05, - "loss": 1.0487, + "epoch": 0.2748328053502288, + "grad_norm": 1.8579866886138916, + "learning_rate": 1.191293183107484e-05, + "loss": 0.7951, "step": 3904 }, { - "epoch": 0.2954863607127994, - "grad_norm": 2.362062931060791, - "learning_rate": 1.5193747927711889e-05, - "loss": 0.7727, + "epoch": 0.2749032030975009, + "grad_norm": 1.7917592525482178, + "learning_rate": 1.1911794753444575e-05, + "loss": 0.7142, "step": 3905 }, { - "epoch": 0.295562029435133, - "grad_norm": 1.6341859102249146, - "learning_rate": 1.5192113690546101e-05, - "loss": 0.6764, + "epoch": 0.27497360084477296, + "grad_norm": 1.6829959154129028, + "learning_rate": 1.1910657420445652e-05, + "loss": 0.6523, "step": 3906 }, { - "epoch": 0.2956376981574666, - "grad_norm": 2.2178308963775635, - "learning_rate": 1.5190479065610129e-05, - "loss": 0.8589, + "epoch": 0.27504399859204504, + "grad_norm": 1.6976457834243774, + "learning_rate": 1.1909519832137204e-05, + "loss": 0.6107, "step": 3907 }, { - "epoch": 0.2957133668798002, - "grad_norm": 2.287083148956299, - "learning_rate": 1.5188844053006334e-05, - "loss": 0.8661, + "epoch": 0.27511439633931717, + "grad_norm": 2.0412561893463135, + "learning_rate": 1.190838198857837e-05, + "loss": 0.7182, "step": 3908 }, { - "epoch": 0.2957890356021339, - "grad_norm": 2.168025016784668, - "learning_rate": 1.5187208652837105e-05, - "loss": 0.7258, + "epoch": 0.27518479408658925, + "grad_norm": 2.0106358528137207, + "learning_rate": 1.1907243889828316e-05, + "loss": 0.8167, "step": 3909 }, { - "epoch": 0.2958647043244675, - "grad_norm": 2.3210017681121826, - "learning_rate": 1.5185572865204861e-05, - "loss": 0.9169, + "epoch": 0.2752551918338613, + "grad_norm": 1.8946852684020996, + "learning_rate": 1.1906105535946206e-05, + "loss": 0.8229, "step": 3910 }, { - "epoch": 0.2959403730468011, - "grad_norm": 2.4655609130859375, - "learning_rate": 1.5183936690212038e-05, - "loss": 0.9665, + "epoch": 0.2753255895811334, + "grad_norm": 2.0233354568481445, + "learning_rate": 1.1904966926991225e-05, + "loss": 0.7311, "step": 3911 }, { - "epoch": 0.2960160417691347, - "grad_norm": 2.277387857437134, - "learning_rate": 1.5182300127961097e-05, - "loss": 0.8959, + "epoch": 0.2753959873284055, + "grad_norm": 1.7656471729278564, + "learning_rate": 1.1903828063022571e-05, + "loss": 0.7911, "step": 3912 }, { - "epoch": 0.2960917104914684, - "grad_norm": 2.419633388519287, - "learning_rate": 1.5180663178554527e-05, - "loss": 0.7201, + "epoch": 0.27546638507567756, + "grad_norm": 2.113478899002075, + "learning_rate": 1.1902688944099456e-05, + "loss": 0.8127, "step": 3913 }, { - "epoch": 0.296167379213802, - "grad_norm": 2.0627200603485107, - "learning_rate": 1.5179025842094837e-05, - "loss": 0.8001, + "epoch": 0.2755367828229497, + "grad_norm": 2.0491926670074463, + "learning_rate": 1.1901549570281102e-05, + "loss": 0.7337, "step": 3914 }, { - "epoch": 0.2962430479361356, - "grad_norm": 2.119424343109131, - "learning_rate": 1.5177388118684563e-05, - "loss": 0.7499, + "epoch": 0.27560718057022177, + "grad_norm": 2.033907890319824, + "learning_rate": 1.1900409941626748e-05, + "loss": 0.7848, "step": 3915 }, { - "epoch": 0.2963187166584692, - "grad_norm": 2.0635826587677, - "learning_rate": 1.5175750008426266e-05, - "loss": 0.7671, + "epoch": 0.27567757831749384, + "grad_norm": 1.6825600862503052, + "learning_rate": 1.1899270058195642e-05, + "loss": 0.7357, "step": 3916 }, { - "epoch": 0.29639438538080287, - "grad_norm": 1.9896836280822754, - "learning_rate": 1.5174111511422528e-05, - "loss": 0.694, + "epoch": 0.2757479760647659, + "grad_norm": 1.8668832778930664, + "learning_rate": 1.1898129920047048e-05, + "loss": 0.6905, "step": 3917 }, { - "epoch": 0.2964700541031365, - "grad_norm": 2.2198286056518555, - "learning_rate": 1.517247262777596e-05, - "loss": 0.8069, + "epoch": 0.275818373812038, + "grad_norm": 1.966882348060608, + "learning_rate": 1.1896989527240245e-05, + "loss": 0.7839, "step": 3918 }, { - "epoch": 0.2965457228254701, - "grad_norm": 2.9682750701904297, - "learning_rate": 1.5170833357589188e-05, - "loss": 0.8306, + "epoch": 0.2758887715593101, + "grad_norm": 1.8359715938568115, + "learning_rate": 1.1895848879834519e-05, + "loss": 0.8352, "step": 3919 }, { - "epoch": 0.2966213915478037, - "grad_norm": 5.279101848602295, - "learning_rate": 1.5169193700964875e-05, - "loss": 0.891, + "epoch": 0.2759591693065822, + "grad_norm": 1.5507322549819946, + "learning_rate": 1.1894707977889177e-05, + "loss": 0.7801, "step": 3920 }, { - "epoch": 0.2966970602701373, - "grad_norm": 2.3643431663513184, - "learning_rate": 1.5167553658005695e-05, - "loss": 0.7815, + "epoch": 0.2760295670538543, + "grad_norm": 1.7686100006103516, + "learning_rate": 1.1893566821463536e-05, + "loss": 0.8456, "step": 3921 }, { - "epoch": 0.296772728992471, - "grad_norm": 2.442836046218872, - "learning_rate": 1.516591322881436e-05, - "loss": 0.7634, + "epoch": 0.27609996480112636, + "grad_norm": 1.5609196424484253, + "learning_rate": 1.189242541061692e-05, + "loss": 0.6357, "step": 3922 }, { - "epoch": 0.2968483977148046, - "grad_norm": 2.029599905014038, - "learning_rate": 1.5164272413493597e-05, - "loss": 0.7216, + "epoch": 0.27617036254839844, + "grad_norm": 2.1082611083984375, + "learning_rate": 1.189128374540868e-05, + "loss": 0.8138, "step": 3923 }, { - "epoch": 0.2969240664371382, - "grad_norm": 2.4150075912475586, - "learning_rate": 1.5162631212146155e-05, - "loss": 0.8002, + "epoch": 0.2762407602956705, + "grad_norm": 1.9912757873535156, + "learning_rate": 1.1890141825898166e-05, + "loss": 0.7341, "step": 3924 }, { - "epoch": 0.2969997351594718, - "grad_norm": 2.4529528617858887, - "learning_rate": 1.5160989624874815e-05, - "loss": 0.6906, + "epoch": 0.27631115804294265, + "grad_norm": 1.5862867832183838, + "learning_rate": 1.1888999652144752e-05, + "loss": 0.6835, "step": 3925 }, { - "epoch": 0.2970754038818055, - "grad_norm": 2.5680036544799805, - "learning_rate": 1.5159347651782379e-05, - "loss": 0.7421, + "epoch": 0.27638155579021473, + "grad_norm": 1.928122878074646, + "learning_rate": 1.1887857224207818e-05, + "loss": 0.7613, "step": 3926 }, { - "epoch": 0.2971510726041391, - "grad_norm": 2.2897586822509766, - "learning_rate": 1.515770529297167e-05, - "loss": 0.7853, + "epoch": 0.2764519535374868, + "grad_norm": 1.834381341934204, + "learning_rate": 1.1886714542146759e-05, + "loss": 0.7543, "step": 3927 }, { - "epoch": 0.2972267413264727, - "grad_norm": 2.2733471393585205, - "learning_rate": 1.5156062548545538e-05, - "loss": 0.6197, + "epoch": 0.2765223512847589, + "grad_norm": 1.8013280630111694, + "learning_rate": 1.1885571606020985e-05, + "loss": 0.8341, "step": 3928 }, { - "epoch": 0.2973024100488063, - "grad_norm": 2.1105287075042725, - "learning_rate": 1.515441941860686e-05, - "loss": 0.8367, + "epoch": 0.27659274903203096, + "grad_norm": 1.7989022731781006, + "learning_rate": 1.188442841588992e-05, + "loss": 0.8286, "step": 3929 }, { - "epoch": 0.29737807877114, - "grad_norm": 2.4715983867645264, - "learning_rate": 1.515277590325853e-05, - "loss": 0.6271, + "epoch": 0.27666314677930304, + "grad_norm": 1.6944236755371094, + "learning_rate": 1.1883284971812996e-05, + "loss": 0.6851, "step": 3930 }, { - "epoch": 0.2974537474934736, - "grad_norm": 1.8005375862121582, - "learning_rate": 1.5151132002603475e-05, - "loss": 0.7891, + "epoch": 0.27673354452657517, + "grad_norm": 1.896464467048645, + "learning_rate": 1.1882141273849665e-05, + "loss": 0.5972, "step": 3931 }, { - "epoch": 0.2975294162158072, - "grad_norm": 2.270176410675049, - "learning_rate": 1.5149487716744637e-05, - "loss": 0.6982, + "epoch": 0.27680394227384725, + "grad_norm": 2.08565616607666, + "learning_rate": 1.1880997322059388e-05, + "loss": 0.8634, "step": 3932 }, { - "epoch": 0.2976050849381408, - "grad_norm": 2.324087142944336, - "learning_rate": 1.5147843045784989e-05, - "loss": 0.7359, + "epoch": 0.2768743400211193, + "grad_norm": 1.905483365058899, + "learning_rate": 1.187985311650164e-05, + "loss": 0.7275, "step": 3933 }, { - "epoch": 0.2976807536604744, - "grad_norm": 2.229957103729248, - "learning_rate": 1.5146197989827526e-05, - "loss": 0.6015, + "epoch": 0.2769447377683914, + "grad_norm": 1.4740272760391235, + "learning_rate": 1.1878708657235909e-05, + "loss": 0.7651, "step": 3934 }, { - "epoch": 0.2977564223828081, - "grad_norm": 1.9169422388076782, - "learning_rate": 1.5144552548975264e-05, - "loss": 0.6176, + "epoch": 0.2770151355156635, + "grad_norm": 1.9124128818511963, + "learning_rate": 1.1877563944321695e-05, + "loss": 0.7324, "step": 3935 }, { - "epoch": 0.2978320911051417, - "grad_norm": 2.401707172393799, - "learning_rate": 1.5142906723331248e-05, - "loss": 0.7862, + "epoch": 0.2770855332629356, + "grad_norm": 2.0551040172576904, + "learning_rate": 1.1876418977818515e-05, + "loss": 0.6654, "step": 3936 }, { - "epoch": 0.2979077598274753, - "grad_norm": 2.221273899078369, - "learning_rate": 1.5141260512998544e-05, - "loss": 0.7928, + "epoch": 0.2771559310102077, + "grad_norm": 1.9841262102127075, + "learning_rate": 1.1875273757785896e-05, + "loss": 0.7444, "step": 3937 }, { - "epoch": 0.2979834285498089, - "grad_norm": 2.259052276611328, - "learning_rate": 1.5139613918080243e-05, - "loss": 0.8092, + "epoch": 0.27722632875747977, + "grad_norm": 1.660323143005371, + "learning_rate": 1.1874128284283378e-05, + "loss": 0.6027, "step": 3938 }, { - "epoch": 0.2980590972721426, - "grad_norm": 2.435391426086426, - "learning_rate": 1.5137966938679463e-05, - "loss": 0.7887, + "epoch": 0.27729672650475184, + "grad_norm": 1.8307011127471924, + "learning_rate": 1.1872982557370519e-05, + "loss": 0.6553, "step": 3939 }, { - "epoch": 0.2981347659944762, - "grad_norm": 1.9297043085098267, - "learning_rate": 1.5136319574899338e-05, - "loss": 0.763, + "epoch": 0.2773671242520239, + "grad_norm": 1.6628056764602661, + "learning_rate": 1.1871836577106882e-05, + "loss": 0.6907, "step": 3940 }, { - "epoch": 0.2982104347168098, - "grad_norm": 1.8579686880111694, - "learning_rate": 1.5134671826843034e-05, - "loss": 0.6998, + "epoch": 0.277437521999296, + "grad_norm": 1.8518277406692505, + "learning_rate": 1.187069034355205e-05, + "loss": 0.7199, "step": 3941 }, { - "epoch": 0.2982861034391434, - "grad_norm": 2.0775258541107178, - "learning_rate": 1.5133023694613743e-05, - "loss": 0.7513, + "epoch": 0.27750791974656813, + "grad_norm": 1.96440589427948, + "learning_rate": 1.1869543856765615e-05, + "loss": 0.8244, "step": 3942 }, { - "epoch": 0.2983617721614771, - "grad_norm": 2.1866838932037354, - "learning_rate": 1.5131375178314666e-05, - "loss": 0.8225, + "epoch": 0.2775783174938402, + "grad_norm": 1.772930383682251, + "learning_rate": 1.1868397116807183e-05, + "loss": 0.7313, "step": 3943 }, { - "epoch": 0.2984374408838107, - "grad_norm": 1.9399791955947876, - "learning_rate": 1.5129726278049046e-05, - "loss": 0.7335, + "epoch": 0.2776487152411123, + "grad_norm": 1.854162335395813, + "learning_rate": 1.1867250123736378e-05, + "loss": 0.6834, "step": 3944 }, { - "epoch": 0.2985131096061443, - "grad_norm": 2.0289242267608643, - "learning_rate": 1.5128076993920142e-05, - "loss": 0.7298, + "epoch": 0.27771911298838436, + "grad_norm": 1.9780874252319336, + "learning_rate": 1.186610287761283e-05, + "loss": 0.7239, "step": 3945 }, { - "epoch": 0.2985887783284779, - "grad_norm": 1.8820334672927856, - "learning_rate": 1.5126427326031233e-05, - "loss": 0.7299, + "epoch": 0.27778951073565644, + "grad_norm": 2.0153515338897705, + "learning_rate": 1.1864955378496187e-05, + "loss": 0.6862, "step": 3946 }, { - "epoch": 0.2986644470508115, - "grad_norm": 1.9488524198532104, - "learning_rate": 1.5124777274485631e-05, - "loss": 0.7782, + "epoch": 0.2778599084829285, + "grad_norm": 1.8308848142623901, + "learning_rate": 1.1863807626446109e-05, + "loss": 0.7725, "step": 3947 }, { - "epoch": 0.2987401157731452, - "grad_norm": 2.3379132747650146, - "learning_rate": 1.5123126839386668e-05, - "loss": 0.7691, + "epoch": 0.27793030623020065, + "grad_norm": 1.8398529291152954, + "learning_rate": 1.1862659621522265e-05, + "loss": 0.7442, "step": 3948 }, { - "epoch": 0.2988157844954788, - "grad_norm": 2.2485504150390625, - "learning_rate": 1.5121476020837695e-05, - "loss": 0.8717, + "epoch": 0.27800070397747273, + "grad_norm": 1.777825117111206, + "learning_rate": 1.1861511363784345e-05, + "loss": 0.6268, "step": 3949 }, { - "epoch": 0.2988914532178124, - "grad_norm": 2.426990509033203, - "learning_rate": 1.5119824818942093e-05, - "loss": 0.8312, + "epoch": 0.2780711017247448, + "grad_norm": 1.6637208461761475, + "learning_rate": 1.1860362853292047e-05, + "loss": 0.6235, "step": 3950 }, { - "epoch": 0.298967121940146, - "grad_norm": 2.0571541786193848, - "learning_rate": 1.511817323380327e-05, - "loss": 0.7172, + "epoch": 0.2781414994720169, + "grad_norm": 1.8231853246688843, + "learning_rate": 1.1859214090105083e-05, + "loss": 0.6539, "step": 3951 }, { - "epoch": 0.2990427906624797, - "grad_norm": 2.4347405433654785, - "learning_rate": 1.5116521265524652e-05, - "loss": 0.9351, + "epoch": 0.27821189721928896, + "grad_norm": 1.9549661874771118, + "learning_rate": 1.1858065074283175e-05, + "loss": 0.7338, "step": 3952 }, { - "epoch": 0.2991184593848133, - "grad_norm": 2.8219993114471436, - "learning_rate": 1.5114868914209686e-05, - "loss": 0.6793, + "epoch": 0.2782822949665611, + "grad_norm": 1.7487702369689941, + "learning_rate": 1.1856915805886065e-05, + "loss": 0.7239, "step": 3953 }, { - "epoch": 0.2991941281071469, - "grad_norm": 2.1219239234924316, - "learning_rate": 1.5113216179961852e-05, - "loss": 0.826, + "epoch": 0.27835269271383317, + "grad_norm": 1.7718636989593506, + "learning_rate": 1.1855766284973504e-05, + "loss": 0.6977, "step": 3954 }, { - "epoch": 0.2992697968294805, - "grad_norm": 2.106398344039917, - "learning_rate": 1.511156306288465e-05, - "loss": 0.7407, + "epoch": 0.27842309046110525, + "grad_norm": 2.595485210418701, + "learning_rate": 1.1854616511605256e-05, + "loss": 0.7689, "step": 3955 }, { - "epoch": 0.2993454655518142, - "grad_norm": 2.503176689147949, - "learning_rate": 1.5109909563081598e-05, - "loss": 0.7621, + "epoch": 0.2784934882083773, + "grad_norm": 2.023730993270874, + "learning_rate": 1.1853466485841098e-05, + "loss": 0.8546, "step": 3956 }, { - "epoch": 0.2994211342741478, - "grad_norm": 1.9739935398101807, - "learning_rate": 1.510825568065625e-05, - "loss": 0.6997, + "epoch": 0.2785638859556494, + "grad_norm": 2.6776604652404785, + "learning_rate": 1.1852316207740824e-05, + "loss": 0.7036, "step": 3957 }, { - "epoch": 0.2994968029964814, - "grad_norm": 2.651623010635376, - "learning_rate": 1.5106601415712173e-05, - "loss": 0.7928, + "epoch": 0.2786342837029215, + "grad_norm": 1.9854227304458618, + "learning_rate": 1.1851165677364235e-05, + "loss": 0.7395, "step": 3958 }, { - "epoch": 0.299572471718815, - "grad_norm": 2.1895413398742676, - "learning_rate": 1.5104946768352966e-05, - "loss": 0.6159, + "epoch": 0.2787046814501936, + "grad_norm": 1.7253499031066895, + "learning_rate": 1.1850014894771148e-05, + "loss": 0.6224, "step": 3959 }, { - "epoch": 0.2996481404411486, - "grad_norm": 2.34374737739563, - "learning_rate": 1.5103291738682245e-05, - "loss": 0.7105, + "epoch": 0.2787750791974657, + "grad_norm": 1.868184208869934, + "learning_rate": 1.1848863860021396e-05, + "loss": 0.8431, "step": 3960 }, { - "epoch": 0.2997238091634823, - "grad_norm": 2.078537702560425, - "learning_rate": 1.5101636326803654e-05, - "loss": 0.7007, + "epoch": 0.27884547694473777, + "grad_norm": 1.6449055671691895, + "learning_rate": 1.184771257317482e-05, + "loss": 0.664, "step": 3961 }, { - "epoch": 0.2997994778858159, - "grad_norm": 2.177887439727783, - "learning_rate": 1.5099980532820864e-05, - "loss": 0.6706, + "epoch": 0.27891587469200985, + "grad_norm": 1.851151466369629, + "learning_rate": 1.1846561034291274e-05, + "loss": 0.7107, "step": 3962 }, { - "epoch": 0.2998751466081495, - "grad_norm": 1.6755671501159668, - "learning_rate": 1.5098324356837562e-05, - "loss": 0.6632, + "epoch": 0.2789862724392819, + "grad_norm": 1.9545187950134277, + "learning_rate": 1.1845409243430635e-05, + "loss": 0.697, "step": 3963 }, { - "epoch": 0.2999508153304831, - "grad_norm": 3.093810796737671, - "learning_rate": 1.5096667798957465e-05, - "loss": 0.7289, + "epoch": 0.27905667018655406, + "grad_norm": 1.8516836166381836, + "learning_rate": 1.184425720065278e-05, + "loss": 0.6811, "step": 3964 }, { - "epoch": 0.3000264840528168, - "grad_norm": 2.284776449203491, - "learning_rate": 1.509501085928431e-05, - "loss": 0.8436, + "epoch": 0.27912706793382613, + "grad_norm": 2.136770725250244, + "learning_rate": 1.1843104906017604e-05, + "loss": 0.7586, "step": 3965 }, { - "epoch": 0.3001021527751504, - "grad_norm": 2.3349032402038574, - "learning_rate": 1.5093353537921863e-05, - "loss": 0.7769, + "epoch": 0.2791974656810982, + "grad_norm": 1.725250005722046, + "learning_rate": 1.1841952359585019e-05, + "loss": 0.7379, "step": 3966 }, { - "epoch": 0.300177821497484, - "grad_norm": 2.382188081741333, - "learning_rate": 1.5091695834973908e-05, - "loss": 0.7436, + "epoch": 0.2792678634283703, + "grad_norm": 2.1432723999023438, + "learning_rate": 1.1840799561414946e-05, + "loss": 0.6607, "step": 3967 }, { - "epoch": 0.3002534902198176, - "grad_norm": 2.356771469116211, - "learning_rate": 1.5090037750544255e-05, - "loss": 0.7242, + "epoch": 0.27933826117564237, + "grad_norm": 1.9188305139541626, + "learning_rate": 1.1839646511567319e-05, + "loss": 0.735, "step": 3968 }, { - "epoch": 0.3003291589421513, - "grad_norm": 2.8641955852508545, - "learning_rate": 1.5088379284736744e-05, - "loss": 0.8392, + "epoch": 0.27940865892291444, + "grad_norm": 1.7334930896759033, + "learning_rate": 1.1838493210102087e-05, + "loss": 0.6616, "step": 3969 }, { - "epoch": 0.3004048276644849, - "grad_norm": 2.1027886867523193, - "learning_rate": 1.5086720437655228e-05, - "loss": 0.6559, + "epoch": 0.2794790566701866, + "grad_norm": 1.7690743207931519, + "learning_rate": 1.183733965707921e-05, + "loss": 0.6248, "step": 3970 }, { - "epoch": 0.3004804963868185, - "grad_norm": 2.1024560928344727, - "learning_rate": 1.5085061209403593e-05, - "loss": 0.8123, + "epoch": 0.27954945441745865, + "grad_norm": 1.7078536748886108, + "learning_rate": 1.1836185852558664e-05, + "loss": 0.6536, "step": 3971 }, { - "epoch": 0.3005561651091521, - "grad_norm": 3.435528516769409, - "learning_rate": 1.5083401600085741e-05, - "loss": 0.7778, + "epoch": 0.27961985216473073, + "grad_norm": 1.5742218494415283, + "learning_rate": 1.1835031796600437e-05, + "loss": 0.7006, "step": 3972 }, { - "epoch": 0.30063183383148573, - "grad_norm": 2.4583373069763184, - "learning_rate": 1.5081741609805608e-05, - "loss": 0.6514, + "epoch": 0.2796902499120028, + "grad_norm": 2.0936522483825684, + "learning_rate": 1.1833877489264527e-05, + "loss": 0.7077, "step": 3973 }, { - "epoch": 0.3007075025538194, - "grad_norm": 2.3399574756622314, - "learning_rate": 1.5080081238667143e-05, - "loss": 0.7155, + "epoch": 0.2797606476592749, + "grad_norm": 2.11342191696167, + "learning_rate": 1.1832722930610948e-05, + "loss": 0.6215, "step": 3974 }, { - "epoch": 0.300783171276153, - "grad_norm": 2.8221993446350098, - "learning_rate": 1.5078420486774327e-05, - "loss": 0.9117, + "epoch": 0.27983104540654696, + "grad_norm": 2.3073060512542725, + "learning_rate": 1.1831568120699726e-05, + "loss": 0.7858, "step": 3975 }, { - "epoch": 0.3008588399984866, - "grad_norm": 2.924146890640259, - "learning_rate": 1.5076759354231156e-05, - "loss": 0.6425, + "epoch": 0.2799014431538191, + "grad_norm": 1.7394955158233643, + "learning_rate": 1.1830413059590906e-05, + "loss": 0.78, "step": 3976 }, { - "epoch": 0.3009345087208202, - "grad_norm": 2.2757415771484375, - "learning_rate": 1.5075097841141663e-05, - "loss": 0.9589, + "epoch": 0.2799718409010912, + "grad_norm": 1.9047729969024658, + "learning_rate": 1.1829257747344533e-05, + "loss": 0.7045, "step": 3977 }, { - "epoch": 0.3010101774431539, - "grad_norm": 2.634446620941162, - "learning_rate": 1.5073435947609891e-05, - "loss": 0.7762, + "epoch": 0.28004223864836325, + "grad_norm": 4.224178791046143, + "learning_rate": 1.1828102184020676e-05, + "loss": 0.7154, "step": 3978 }, { - "epoch": 0.3010858461654875, - "grad_norm": 2.1489665508270264, - "learning_rate": 1.5071773673739918e-05, - "loss": 0.9189, + "epoch": 0.28011263639563533, + "grad_norm": 2.0666863918304443, + "learning_rate": 1.1826946369679415e-05, + "loss": 0.7744, "step": 3979 }, { - "epoch": 0.3011615148878211, - "grad_norm": 3.0982818603515625, - "learning_rate": 1.507011101963584e-05, - "loss": 0.7364, + "epoch": 0.2801830341429074, + "grad_norm": 2.211413621902466, + "learning_rate": 1.182579030438084e-05, + "loss": 0.6339, "step": 3980 }, { - "epoch": 0.3012371836101547, - "grad_norm": 3.0719218254089355, - "learning_rate": 1.5068447985401776e-05, - "loss": 0.6404, + "epoch": 0.28025343189017954, + "grad_norm": 1.7837681770324707, + "learning_rate": 1.1824633988185056e-05, + "loss": 0.7475, "step": 3981 }, { - "epoch": 0.3013128523324884, - "grad_norm": 2.7266042232513428, - "learning_rate": 1.5066784571141874e-05, - "loss": 0.7629, + "epoch": 0.2803238296374516, + "grad_norm": 1.8710612058639526, + "learning_rate": 1.1823477421152184e-05, + "loss": 0.7091, "step": 3982 }, { - "epoch": 0.301388521054822, - "grad_norm": 3.0816164016723633, - "learning_rate": 1.5065120776960294e-05, - "loss": 0.682, + "epoch": 0.2803942273847237, + "grad_norm": 2.2514004707336426, + "learning_rate": 1.182232060334235e-05, + "loss": 0.6202, "step": 3983 }, { - "epoch": 0.3014641897771556, - "grad_norm": 2.005387306213379, - "learning_rate": 1.5063456602961237e-05, - "loss": 0.6885, + "epoch": 0.28046462513199577, + "grad_norm": 1.9524353742599487, + "learning_rate": 1.1821163534815702e-05, + "loss": 0.6729, "step": 3984 }, { - "epoch": 0.3015398584994892, - "grad_norm": 2.6402080059051514, - "learning_rate": 1.5061792049248918e-05, - "loss": 0.7662, + "epoch": 0.28053502287926785, + "grad_norm": 2.047382354736328, + "learning_rate": 1.1820006215632394e-05, + "loss": 0.7794, "step": 3985 }, { - "epoch": 0.30161552722182283, - "grad_norm": 2.094618797302246, - "learning_rate": 1.5060127115927572e-05, - "loss": 0.7406, + "epoch": 0.2806054206265399, + "grad_norm": 1.9501782655715942, + "learning_rate": 1.1818848645852598e-05, + "loss": 0.8123, "step": 3986 }, { - "epoch": 0.3016911959441565, - "grad_norm": 2.5571320056915283, - "learning_rate": 1.5058461803101466e-05, - "loss": 0.6746, + "epoch": 0.28067581837381206, + "grad_norm": 1.5328397750854492, + "learning_rate": 1.1817690825536498e-05, + "loss": 0.7816, "step": 3987 }, { - "epoch": 0.3017668646664901, - "grad_norm": 2.0813798904418945, - "learning_rate": 1.5056796110874885e-05, - "loss": 0.6651, + "epoch": 0.28074621612108414, + "grad_norm": 2.5197253227233887, + "learning_rate": 1.1816532754744288e-05, + "loss": 0.7367, "step": 3988 }, { - "epoch": 0.3018425333888237, - "grad_norm": 1.9654511213302612, - "learning_rate": 1.5055130039352146e-05, - "loss": 0.7259, + "epoch": 0.2808166138683562, + "grad_norm": 1.7260819673538208, + "learning_rate": 1.1815374433536178e-05, + "loss": 0.6694, "step": 3989 }, { - "epoch": 0.30191820211115733, - "grad_norm": 2.2641611099243164, - "learning_rate": 1.5053463588637577e-05, - "loss": 0.6931, + "epoch": 0.2808870116156283, + "grad_norm": 2.0017902851104736, + "learning_rate": 1.181421586197239e-05, + "loss": 0.7316, "step": 3990 }, { - "epoch": 0.301993870833491, - "grad_norm": 2.238877058029175, - "learning_rate": 1.5051796758835534e-05, - "loss": 0.7844, + "epoch": 0.28095740936290037, + "grad_norm": 1.867958664894104, + "learning_rate": 1.1813057040113159e-05, + "loss": 0.7448, "step": 3991 }, { - "epoch": 0.3020695395558246, - "grad_norm": 3.144057512283325, - "learning_rate": 1.505012955005041e-05, - "loss": 0.9696, + "epoch": 0.2810278071101725, + "grad_norm": 1.914707899093628, + "learning_rate": 1.1811897968018732e-05, + "loss": 0.7334, "step": 3992 }, { - "epoch": 0.3021452082781582, - "grad_norm": 1.9449888467788696, - "learning_rate": 1.5048461962386602e-05, - "loss": 0.7733, + "epoch": 0.2810982048574446, + "grad_norm": 1.5701572895050049, + "learning_rate": 1.1810738645749375e-05, + "loss": 0.6875, "step": 3993 }, { - "epoch": 0.3022208770004918, - "grad_norm": 2.664398193359375, - "learning_rate": 1.5046793995948543e-05, - "loss": 0.8057, + "epoch": 0.28116860260471666, + "grad_norm": 1.6002938747406006, + "learning_rate": 1.1809579073365358e-05, + "loss": 0.6565, "step": 3994 }, { - "epoch": 0.3022965457228255, - "grad_norm": 3.067143678665161, - "learning_rate": 1.504512565084069e-05, - "loss": 0.6894, + "epoch": 0.28123900035198873, + "grad_norm": 3.0042548179626465, + "learning_rate": 1.1808419250926969e-05, + "loss": 0.7608, "step": 3995 }, { - "epoch": 0.3023722144451591, - "grad_norm": 2.999467372894287, - "learning_rate": 1.5043456927167511e-05, - "loss": 0.7235, + "epoch": 0.2813093980992608, + "grad_norm": 2.0234296321868896, + "learning_rate": 1.1807259178494507e-05, + "loss": 0.7731, "step": 3996 }, { - "epoch": 0.3024478831674927, - "grad_norm": 2.0027337074279785, - "learning_rate": 1.5041787825033516e-05, - "loss": 0.5782, + "epoch": 0.2813797958465329, + "grad_norm": 1.856994390487671, + "learning_rate": 1.1806098856128287e-05, + "loss": 0.6714, "step": 3997 }, { - "epoch": 0.3025235518898263, - "grad_norm": 3.10082745552063, - "learning_rate": 1.5040118344543226e-05, - "loss": 0.6152, + "epoch": 0.281450193593805, + "grad_norm": 1.8975565433502197, + "learning_rate": 1.1804938283888637e-05, + "loss": 0.7376, "step": 3998 }, { - "epoch": 0.30259922061215994, - "grad_norm": 2.1244442462921143, - "learning_rate": 1.5038448485801188e-05, - "loss": 0.6947, + "epoch": 0.2815205913410771, + "grad_norm": 1.9372038841247559, + "learning_rate": 1.1803777461835893e-05, + "loss": 0.7841, "step": 3999 }, { - "epoch": 0.3026748893344936, - "grad_norm": 3.423577070236206, - "learning_rate": 1.5036778248911973e-05, - "loss": 0.7686, + "epoch": 0.2815909890883492, + "grad_norm": 1.8341703414916992, + "learning_rate": 1.1802616390030407e-05, + "loss": 0.597, "step": 4000 }, { - "epoch": 0.3027505580568272, - "grad_norm": 2.019320249557495, - "learning_rate": 1.5035107633980182e-05, - "loss": 0.7844, + "epoch": 0.28166138683562125, + "grad_norm": 1.7756075859069824, + "learning_rate": 1.1801455068532545e-05, + "loss": 0.6332, "step": 4001 }, { - "epoch": 0.3028262267791608, - "grad_norm": 1.8496774435043335, - "learning_rate": 1.503343664111043e-05, - "loss": 0.6277, + "epoch": 0.28173178458289333, + "grad_norm": 1.5782862901687622, + "learning_rate": 1.1800293497402687e-05, + "loss": 0.6925, "step": 4002 }, { - "epoch": 0.30290189550149443, - "grad_norm": 2.151859998703003, - "learning_rate": 1.5031765270407362e-05, - "loss": 0.6403, + "epoch": 0.2818021823301654, + "grad_norm": 1.4802216291427612, + "learning_rate": 1.1799131676701222e-05, + "loss": 0.8862, "step": 4003 }, { - "epoch": 0.3029775642238281, - "grad_norm": 2.207608222961426, - "learning_rate": 1.5030093521975642e-05, - "loss": 0.9016, + "epoch": 0.28187258007743754, + "grad_norm": 1.8656500577926636, + "learning_rate": 1.1797969606488555e-05, + "loss": 0.836, "step": 4004 }, { - "epoch": 0.3030532329461617, - "grad_norm": 2.543726921081543, - "learning_rate": 1.5028421395919961e-05, - "loss": 0.8025, + "epoch": 0.2819429778247096, + "grad_norm": 2.19840669631958, + "learning_rate": 1.1796807286825103e-05, + "loss": 0.7531, "step": 4005 }, { - "epoch": 0.3031289016684953, - "grad_norm": 1.9269533157348633, - "learning_rate": 1.5026748892345037e-05, - "loss": 0.6987, + "epoch": 0.2820133755719817, + "grad_norm": 1.7757377624511719, + "learning_rate": 1.1795644717771296e-05, + "loss": 0.7293, "step": 4006 }, { - "epoch": 0.30320457039082893, - "grad_norm": 2.5526182651519775, - "learning_rate": 1.5025076011355602e-05, - "loss": 0.7898, + "epoch": 0.28208377331925377, + "grad_norm": 1.485701322555542, + "learning_rate": 1.1794481899387578e-05, + "loss": 0.7077, "step": 4007 }, { - "epoch": 0.3032802391131626, - "grad_norm": 2.2586183547973633, - "learning_rate": 1.5023402753056422e-05, - "loss": 0.782, + "epoch": 0.28215417106652585, + "grad_norm": 2.251635789871216, + "learning_rate": 1.1793318831734401e-05, + "loss": 0.7226, "step": 4008 }, { - "epoch": 0.3033559078354962, - "grad_norm": 2.1747992038726807, - "learning_rate": 1.5021729117552276e-05, - "loss": 0.77, + "epoch": 0.282224568813798, + "grad_norm": 1.7505569458007812, + "learning_rate": 1.179215551487224e-05, + "loss": 0.7163, "step": 4009 }, { - "epoch": 0.3034315765578298, - "grad_norm": 2.0999064445495605, - "learning_rate": 1.5020055104947979e-05, - "loss": 0.6958, + "epoch": 0.28229496656107006, + "grad_norm": 2.0857231616973877, + "learning_rate": 1.1790991948861573e-05, + "loss": 0.8856, "step": 4010 }, { - "epoch": 0.30350724528016343, - "grad_norm": 2.135993003845215, - "learning_rate": 1.501838071534836e-05, - "loss": 0.633, + "epoch": 0.28236536430834214, + "grad_norm": 1.8925623893737793, + "learning_rate": 1.1789828133762894e-05, + "loss": 0.749, "step": 4011 }, { - "epoch": 0.3035829140024971, - "grad_norm": 3.0176174640655518, - "learning_rate": 1.5016705948858274e-05, - "loss": 0.6997, + "epoch": 0.2824357620556142, + "grad_norm": 1.9142769575119019, + "learning_rate": 1.1788664069636712e-05, + "loss": 0.7937, "step": 4012 }, { - "epoch": 0.3036585827248307, - "grad_norm": 2.2656147480010986, - "learning_rate": 1.5015030805582602e-05, - "loss": 0.7354, + "epoch": 0.2825061598028863, + "grad_norm": 1.838640570640564, + "learning_rate": 1.1787499756543551e-05, + "loss": 0.7155, "step": 4013 }, { - "epoch": 0.3037342514471643, - "grad_norm": 2.75590443611145, - "learning_rate": 1.5013355285626243e-05, - "loss": 0.5873, + "epoch": 0.28257655755015837, + "grad_norm": 2.449157476425171, + "learning_rate": 1.178633519454394e-05, + "loss": 0.7899, "step": 4014 }, { - "epoch": 0.3038099201694979, - "grad_norm": 2.2523720264434814, - "learning_rate": 1.501167938909413e-05, - "loss": 0.795, + "epoch": 0.2826469552974305, + "grad_norm": 1.8678617477416992, + "learning_rate": 1.1785170383698427e-05, + "loss": 0.774, "step": 4015 }, { - "epoch": 0.30388558889183154, - "grad_norm": 2.134129047393799, - "learning_rate": 1.501000311609121e-05, - "loss": 0.7115, + "epoch": 0.2827173530447026, + "grad_norm": 1.8825284242630005, + "learning_rate": 1.1784005324067573e-05, + "loss": 0.666, "step": 4016 }, { - "epoch": 0.3039612576141652, - "grad_norm": 2.428209066390991, - "learning_rate": 1.5008326466722451e-05, - "loss": 0.7494, + "epoch": 0.28278775079197466, + "grad_norm": 1.6521753072738647, + "learning_rate": 1.178284001571195e-05, + "loss": 0.6359, "step": 4017 }, { - "epoch": 0.3040369263364988, - "grad_norm": 2.9792733192443848, - "learning_rate": 1.500664944109286e-05, - "loss": 0.9121, + "epoch": 0.28285814853924673, + "grad_norm": 1.8723312616348267, + "learning_rate": 1.1781674458692142e-05, + "loss": 0.7366, "step": 4018 }, { - "epoch": 0.3041125950588324, - "grad_norm": 3.070441961288452, - "learning_rate": 1.5004972039307451e-05, - "loss": 0.7321, + "epoch": 0.2829285462865188, + "grad_norm": 1.858221411705017, + "learning_rate": 1.178050865306875e-05, + "loss": 0.7957, "step": 4019 }, { - "epoch": 0.30418826378116604, - "grad_norm": 2.1026179790496826, - "learning_rate": 1.5003294261471272e-05, - "loss": 0.5796, + "epoch": 0.28299894403379094, + "grad_norm": 1.8949215412139893, + "learning_rate": 1.1779342598902381e-05, + "loss": 0.68, "step": 4020 }, { - "epoch": 0.3042639325034997, - "grad_norm": 2.6371750831604004, - "learning_rate": 1.5001616107689388e-05, - "loss": 0.7323, + "epoch": 0.283069341781063, + "grad_norm": 1.9945570230484009, + "learning_rate": 1.1778176296253664e-05, + "loss": 0.7071, "step": 4021 }, { - "epoch": 0.3043396012258333, - "grad_norm": 2.2472572326660156, - "learning_rate": 1.4999937578066893e-05, - "loss": 0.8368, + "epoch": 0.2831397395283351, + "grad_norm": 1.8430193662643433, + "learning_rate": 1.1777009745183233e-05, + "loss": 0.7698, "step": 4022 }, { - "epoch": 0.3044152699481669, - "grad_norm": 4.721674919128418, - "learning_rate": 1.4998258672708901e-05, - "loss": 0.552, + "epoch": 0.2832101372756072, + "grad_norm": 2.134927749633789, + "learning_rate": 1.1775842945751739e-05, + "loss": 0.722, "step": 4023 }, { - "epoch": 0.30449093867050053, - "grad_norm": 4.181674480438232, - "learning_rate": 1.499657939172055e-05, - "loss": 0.7753, + "epoch": 0.28328053502287925, + "grad_norm": 1.9295897483825684, + "learning_rate": 1.1774675898019844e-05, + "loss": 0.6868, "step": 4024 }, { - "epoch": 0.3045666073928342, - "grad_norm": 2.1796109676361084, - "learning_rate": 1.4994899735207e-05, - "loss": 0.6785, + "epoch": 0.28335093277015133, + "grad_norm": 2.17865252494812, + "learning_rate": 1.1773508602048227e-05, + "loss": 0.769, "step": 4025 }, { - "epoch": 0.3046422761151678, - "grad_norm": 2.1673810482025146, - "learning_rate": 1.499321970327344e-05, - "loss": 0.743, + "epoch": 0.28342133051742346, + "grad_norm": 1.9356571435928345, + "learning_rate": 1.1772341057897573e-05, + "loss": 0.8004, "step": 4026 }, { - "epoch": 0.3047179448375014, - "grad_norm": 2.26334285736084, - "learning_rate": 1.4991539296025078e-05, - "loss": 0.7859, + "epoch": 0.28349172826469554, + "grad_norm": 1.8020119667053223, + "learning_rate": 1.1771173265628586e-05, + "loss": 0.5957, "step": 4027 }, { - "epoch": 0.30479361355983503, - "grad_norm": 2.063838005065918, - "learning_rate": 1.4989858513567147e-05, - "loss": 1.0079, + "epoch": 0.2835621260119676, + "grad_norm": 1.735824704170227, + "learning_rate": 1.1770005225301981e-05, + "loss": 0.6663, "step": 4028 }, { - "epoch": 0.30486928228216864, - "grad_norm": 2.168710708618164, - "learning_rate": 1.4988177356004902e-05, - "loss": 0.6894, + "epoch": 0.2836325237592397, + "grad_norm": 2.059884548187256, + "learning_rate": 1.1768836936978481e-05, + "loss": 0.7285, "step": 4029 }, { - "epoch": 0.3049449510045023, - "grad_norm": 1.7202786207199097, - "learning_rate": 1.4986495823443621e-05, - "loss": 0.8765, + "epoch": 0.2837029215065118, + "grad_norm": 2.001176118850708, + "learning_rate": 1.1767668400718834e-05, + "loss": 0.7449, "step": 4030 }, { - "epoch": 0.3050206197268359, - "grad_norm": 2.792025327682495, - "learning_rate": 1.4984813915988614e-05, - "loss": 0.6793, + "epoch": 0.2837733192537839, + "grad_norm": 1.8828545808792114, + "learning_rate": 1.1766499616583787e-05, + "loss": 0.6438, "step": 4031 }, { - "epoch": 0.30509628844916953, - "grad_norm": 2.359570264816284, - "learning_rate": 1.4983131633745196e-05, - "loss": 0.7668, + "epoch": 0.283843717001056, + "grad_norm": 2.0976760387420654, + "learning_rate": 1.1765330584634106e-05, + "loss": 0.711, "step": 4032 }, { - "epoch": 0.30517195717150314, - "grad_norm": 4.47695779800415, - "learning_rate": 1.4981448976818725e-05, - "loss": 0.8328, + "epoch": 0.28391411474832806, + "grad_norm": 1.8128604888916016, + "learning_rate": 1.1764161304930577e-05, + "loss": 0.7218, "step": 4033 }, { - "epoch": 0.3052476258938368, - "grad_norm": 2.457249879837036, - "learning_rate": 1.4979765945314574e-05, - "loss": 0.6713, + "epoch": 0.28398451249560014, + "grad_norm": 1.742648720741272, + "learning_rate": 1.1762991777533985e-05, + "loss": 0.655, "step": 4034 }, { - "epoch": 0.3053232946161704, - "grad_norm": 1.8373754024505615, - "learning_rate": 1.497808253933814e-05, - "loss": 0.6746, + "epoch": 0.2840549102428722, + "grad_norm": 1.7699403762817383, + "learning_rate": 1.1761822002505134e-05, + "loss": 0.5665, "step": 4035 }, { - "epoch": 0.305398963338504, - "grad_norm": 2.597541332244873, - "learning_rate": 1.497639875899484e-05, - "loss": 0.6358, + "epoch": 0.2841253079901443, + "grad_norm": 1.640975832939148, + "learning_rate": 1.1760651979904847e-05, + "loss": 0.7414, "step": 4036 }, { - "epoch": 0.30547463206083764, - "grad_norm": 2.0362725257873535, - "learning_rate": 1.4974714604390118e-05, - "loss": 0.8613, + "epoch": 0.2841957057374164, + "grad_norm": 1.8709238767623901, + "learning_rate": 1.1759481709793953e-05, + "loss": 0.773, "step": 4037 }, { - "epoch": 0.3055503007831713, - "grad_norm": 2.046318292617798, - "learning_rate": 1.4973030075629447e-05, - "loss": 0.7307, + "epoch": 0.2842661034846885, + "grad_norm": 1.7185194492340088, + "learning_rate": 1.1758311192233293e-05, + "loss": 0.6993, "step": 4038 }, { - "epoch": 0.3056259695055049, - "grad_norm": 2.4216933250427246, - "learning_rate": 1.4971345172818313e-05, - "loss": 0.7361, + "epoch": 0.2843365012319606, + "grad_norm": 1.9718369245529175, + "learning_rate": 1.1757140427283726e-05, + "loss": 0.7537, "step": 4039 }, { - "epoch": 0.3057016382278385, - "grad_norm": 2.259838104248047, - "learning_rate": 1.4969659896062226e-05, - "loss": 0.8991, + "epoch": 0.28440689897923266, + "grad_norm": 2.19683575630188, + "learning_rate": 1.1755969415006118e-05, + "loss": 0.7364, "step": 4040 }, { - "epoch": 0.30577730695017213, - "grad_norm": 1.8823871612548828, - "learning_rate": 1.4967974245466731e-05, - "loss": 0.7365, + "epoch": 0.28447729672650474, + "grad_norm": 1.7461477518081665, + "learning_rate": 1.1754798155461353e-05, + "loss": 0.6148, "step": 4041 }, { - "epoch": 0.30585297567250574, - "grad_norm": 2.1523728370666504, - "learning_rate": 1.4966288221137388e-05, - "loss": 0.7233, + "epoch": 0.2845476944737768, + "grad_norm": 1.7108646631240845, + "learning_rate": 1.1753626648710327e-05, + "loss": 0.6388, "step": 4042 }, { - "epoch": 0.3059286443948394, - "grad_norm": 5.254127502441406, - "learning_rate": 1.4964601823179776e-05, - "loss": 0.6328, + "epoch": 0.28461809222104895, + "grad_norm": 1.8955973386764526, + "learning_rate": 1.1752454894813946e-05, + "loss": 0.738, "step": 4043 }, { - "epoch": 0.306004313117173, - "grad_norm": 1.8347951173782349, - "learning_rate": 1.4962915051699506e-05, - "loss": 0.7929, + "epoch": 0.284688489968321, + "grad_norm": 2.4048807621002197, + "learning_rate": 1.1751282893833132e-05, + "loss": 0.6608, "step": 4044 }, { - "epoch": 0.30607998183950663, - "grad_norm": 2.36879563331604, - "learning_rate": 1.4961227906802212e-05, - "loss": 0.6739, + "epoch": 0.2847588877155931, + "grad_norm": 3.091808319091797, + "learning_rate": 1.1750110645828816e-05, + "loss": 0.7293, "step": 4045 }, { - "epoch": 0.30615565056184024, - "grad_norm": 1.9774808883666992, - "learning_rate": 1.4959540388593543e-05, - "loss": 0.8385, + "epoch": 0.2848292854628652, + "grad_norm": 2.143312931060791, + "learning_rate": 1.1748938150861944e-05, + "loss": 0.7031, "step": 4046 }, { - "epoch": 0.3062313192841739, - "grad_norm": 1.8404202461242676, - "learning_rate": 1.4957852497179182e-05, - "loss": 0.7847, + "epoch": 0.28489968321013726, + "grad_norm": 1.890505075454712, + "learning_rate": 1.174776540899348e-05, + "loss": 0.7357, "step": 4047 }, { - "epoch": 0.3063069880065075, - "grad_norm": 2.2674901485443115, - "learning_rate": 1.4956164232664825e-05, - "loss": 0.7625, + "epoch": 0.2849700809574094, + "grad_norm": 1.9965007305145264, + "learning_rate": 1.174659242028439e-05, + "loss": 0.7793, "step": 4048 }, { - "epoch": 0.30638265672884113, - "grad_norm": 1.9228596687316895, - "learning_rate": 1.4954475595156198e-05, - "loss": 0.7071, + "epoch": 0.28504047870468147, + "grad_norm": 2.086289644241333, + "learning_rate": 1.1745419184795662e-05, + "loss": 0.7478, "step": 4049 }, { - "epoch": 0.30645832545117474, - "grad_norm": 2.0531787872314453, - "learning_rate": 1.4952786584759053e-05, - "loss": 0.7727, + "epoch": 0.28511087645195354, + "grad_norm": 2.034682273864746, + "learning_rate": 1.1744245702588293e-05, + "loss": 0.8296, "step": 4050 }, { - "epoch": 0.3065339941735084, - "grad_norm": 2.334456205368042, - "learning_rate": 1.4951097201579159e-05, - "loss": 0.7362, + "epoch": 0.2851812741992256, + "grad_norm": 2.088881492614746, + "learning_rate": 1.1743071973723293e-05, + "loss": 0.7195, "step": 4051 }, { - "epoch": 0.306609662895842, - "grad_norm": 2.5954339504241943, - "learning_rate": 1.4949407445722308e-05, - "loss": 0.6712, + "epoch": 0.2852516719464977, + "grad_norm": 1.8486860990524292, + "learning_rate": 1.1741897998261687e-05, + "loss": 0.9014, "step": 4052 }, { - "epoch": 0.3066853316181756, - "grad_norm": 2.0414674282073975, - "learning_rate": 1.4947717317294321e-05, - "loss": 0.7052, + "epoch": 0.2853220696937698, + "grad_norm": 1.4136974811553955, + "learning_rate": 1.1740723776264507e-05, + "loss": 0.7527, "step": 4053 }, { - "epoch": 0.30676100034050924, - "grad_norm": 2.296151638031006, - "learning_rate": 1.4946026816401037e-05, - "loss": 0.6226, + "epoch": 0.2853924674410419, + "grad_norm": 2.1723341941833496, + "learning_rate": 1.1739549307792807e-05, + "loss": 0.7283, "step": 4054 }, { - "epoch": 0.30683666906284285, - "grad_norm": 1.8123749494552612, - "learning_rate": 1.4944335943148323e-05, - "loss": 0.7195, + "epoch": 0.285462865188314, + "grad_norm": 1.496518611907959, + "learning_rate": 1.1738374592907644e-05, + "loss": 0.6303, "step": 4055 }, { - "epoch": 0.3069123377851765, - "grad_norm": 2.317089319229126, - "learning_rate": 1.4942644697642067e-05, - "loss": 0.728, + "epoch": 0.28553326293558606, + "grad_norm": 2.097956418991089, + "learning_rate": 1.1737199631670096e-05, + "loss": 0.7472, "step": 4056 }, { - "epoch": 0.3069880065075101, - "grad_norm": 2.25272536277771, - "learning_rate": 1.4940953079988179e-05, - "loss": 0.6677, + "epoch": 0.28560366068285814, + "grad_norm": 2.025075674057007, + "learning_rate": 1.173602442414125e-05, + "loss": 0.8614, "step": 4057 }, { - "epoch": 0.30706367522984374, - "grad_norm": 2.574553966522217, - "learning_rate": 1.4939261090292592e-05, - "loss": 0.961, + "epoch": 0.2856740584301302, + "grad_norm": 1.600594162940979, + "learning_rate": 1.1734848970382203e-05, + "loss": 0.6097, "step": 4058 }, { - "epoch": 0.30713934395217735, - "grad_norm": 2.785632610321045, - "learning_rate": 1.4937568728661265e-05, - "loss": 0.7058, + "epoch": 0.28574445617740235, + "grad_norm": 2.1172478199005127, + "learning_rate": 1.173367327045407e-05, + "loss": 0.7707, "step": 4059 }, { - "epoch": 0.307215012674511, - "grad_norm": 2.4276599884033203, - "learning_rate": 1.4935875995200183e-05, - "loss": 0.7033, + "epoch": 0.28581485392467443, + "grad_norm": 1.8950347900390625, + "learning_rate": 1.1732497324417976e-05, + "loss": 0.7105, "step": 4060 }, { - "epoch": 0.3072906813968446, - "grad_norm": 2.2110235691070557, - "learning_rate": 1.4934182890015345e-05, - "loss": 0.6735, + "epoch": 0.2858852516719465, + "grad_norm": 2.4193332195281982, + "learning_rate": 1.1731321132335062e-05, + "loss": 0.7467, "step": 4061 }, { - "epoch": 0.30736635011917823, - "grad_norm": 2.3032000064849854, - "learning_rate": 1.4932489413212782e-05, - "loss": 0.8252, + "epoch": 0.2859556494192186, + "grad_norm": 1.9347636699676514, + "learning_rate": 1.1730144694266476e-05, + "loss": 0.856, "step": 4062 }, { - "epoch": 0.30744201884151184, - "grad_norm": 2.245957136154175, - "learning_rate": 1.4930795564898543e-05, - "loss": 0.9702, + "epoch": 0.28602604716649066, + "grad_norm": 2.563082456588745, + "learning_rate": 1.1728968010273385e-05, + "loss": 0.6564, "step": 4063 }, { - "epoch": 0.3075176875638455, - "grad_norm": 2.7905473709106445, - "learning_rate": 1.4929101345178703e-05, - "loss": 0.6039, + "epoch": 0.28609644491376274, + "grad_norm": 2.0575592517852783, + "learning_rate": 1.1727791080416961e-05, + "loss": 0.7752, "step": 4064 }, { - "epoch": 0.3075933562861791, - "grad_norm": 2.3959288597106934, - "learning_rate": 1.4927406754159361e-05, - "loss": 0.6691, + "epoch": 0.28616684266103487, + "grad_norm": 1.7505332231521606, + "learning_rate": 1.17266139047584e-05, + "loss": 0.6376, "step": 4065 }, { - "epoch": 0.30766902500851273, - "grad_norm": 1.9613829851150513, - "learning_rate": 1.4925711791946636e-05, - "loss": 0.8457, + "epoch": 0.28623724040830695, + "grad_norm": 1.9960609674453735, + "learning_rate": 1.1725436483358902e-05, + "loss": 0.7204, "step": 4066 }, { - "epoch": 0.30774469373084634, - "grad_norm": 2.6765880584716797, - "learning_rate": 1.492401645864667e-05, - "loss": 0.8385, + "epoch": 0.286307638155579, + "grad_norm": 2.1367080211639404, + "learning_rate": 1.172425881627968e-05, + "loss": 0.7179, "step": 4067 }, { - "epoch": 0.30782036245317995, - "grad_norm": 2.0931665897369385, - "learning_rate": 1.4922320754365636e-05, - "loss": 0.6882, + "epoch": 0.2863780359028511, + "grad_norm": 7.751003265380859, + "learning_rate": 1.1723080903581963e-05, + "loss": 0.6828, "step": 4068 }, { - "epoch": 0.3078960311755136, - "grad_norm": 2.6953020095825195, - "learning_rate": 1.4920624679209723e-05, - "loss": 0.7359, + "epoch": 0.2864484336501232, + "grad_norm": 1.9674551486968994, + "learning_rate": 1.1721902745326992e-05, + "loss": 0.6899, "step": 4069 }, { - "epoch": 0.30797169989784723, - "grad_norm": 2.3795089721679688, - "learning_rate": 1.4918928233285139e-05, - "loss": 0.7182, + "epoch": 0.28651883139739526, + "grad_norm": 1.7925376892089844, + "learning_rate": 1.1720724341576023e-05, + "loss": 0.6373, "step": 4070 }, { - "epoch": 0.30804736862018084, - "grad_norm": 1.9124236106872559, - "learning_rate": 1.4917231416698126e-05, - "loss": 0.6546, + "epoch": 0.2865892291446674, + "grad_norm": 1.7596532106399536, + "learning_rate": 1.1719545692390318e-05, + "loss": 0.7399, "step": 4071 }, { - "epoch": 0.30812303734251445, - "grad_norm": 2.0559885501861572, - "learning_rate": 1.4915534229554944e-05, - "loss": 0.6896, + "epoch": 0.28665962689193947, + "grad_norm": 2.814255714416504, + "learning_rate": 1.1718366797831161e-05, + "loss": 0.7209, "step": 4072 }, { - "epoch": 0.3081987060648481, - "grad_norm": 1.9751750230789185, - "learning_rate": 1.4913836671961874e-05, - "loss": 0.7115, + "epoch": 0.28673002463921154, + "grad_norm": 1.795639157295227, + "learning_rate": 1.1717187657959838e-05, + "loss": 0.7598, "step": 4073 }, { - "epoch": 0.3082743747871817, - "grad_norm": 2.7253572940826416, - "learning_rate": 1.4912138744025223e-05, - "loss": 0.6748, + "epoch": 0.2868004223864836, + "grad_norm": 1.6410517692565918, + "learning_rate": 1.1716008272837658e-05, + "loss": 0.6982, "step": 4074 }, { - "epoch": 0.30835004350951534, - "grad_norm": 2.238069534301758, - "learning_rate": 1.4910440445851325e-05, - "loss": 0.7391, + "epoch": 0.2868708201337557, + "grad_norm": 1.7311233282089233, + "learning_rate": 1.1714828642525938e-05, + "loss": 0.7243, "step": 4075 }, { - "epoch": 0.30842571223184895, - "grad_norm": 2.452892780303955, - "learning_rate": 1.4908741777546527e-05, - "loss": 0.8675, + "epoch": 0.28694121788102783, + "grad_norm": 1.5485459566116333, + "learning_rate": 1.1713648767086004e-05, + "loss": 0.6689, "step": 4076 }, { - "epoch": 0.3085013809541826, - "grad_norm": 2.315485715866089, - "learning_rate": 1.4907042739217208e-05, - "loss": 0.7192, + "epoch": 0.2870116156282999, + "grad_norm": 1.8302311897277832, + "learning_rate": 1.1712468646579203e-05, + "loss": 0.8418, "step": 4077 }, { - "epoch": 0.3085770496765162, - "grad_norm": 2.2211544513702393, - "learning_rate": 1.4905343330969766e-05, - "loss": 0.7764, + "epoch": 0.287082013375572, + "grad_norm": 1.9976918697357178, + "learning_rate": 1.171128828106689e-05, + "loss": 0.6669, "step": 4078 }, { - "epoch": 0.30865271839884983, - "grad_norm": 2.0151994228363037, - "learning_rate": 1.4903643552910628e-05, - "loss": 0.7262, + "epoch": 0.28715241112284406, + "grad_norm": 1.8388231992721558, + "learning_rate": 1.1710107670610432e-05, + "loss": 0.7346, "step": 4079 }, { - "epoch": 0.30872838712118345, - "grad_norm": 2.6214284896850586, - "learning_rate": 1.4901943405146233e-05, - "loss": 0.7586, + "epoch": 0.28722280887011614, + "grad_norm": 1.9619759321212769, + "learning_rate": 1.170892681527121e-05, + "loss": 0.7473, "step": 4080 }, { - "epoch": 0.30880405584351706, - "grad_norm": 2.426795244216919, - "learning_rate": 1.4900242887783053e-05, - "loss": 0.7345, + "epoch": 0.2872932066173882, + "grad_norm": 1.7974724769592285, + "learning_rate": 1.170774571511062e-05, + "loss": 0.7634, "step": 4081 }, { - "epoch": 0.3088797245658507, - "grad_norm": 1.778331995010376, - "learning_rate": 1.4898542000927582e-05, - "loss": 0.606, + "epoch": 0.28736360436466035, + "grad_norm": 1.6995339393615723, + "learning_rate": 1.1706564370190065e-05, + "loss": 0.7059, "step": 4082 }, { - "epoch": 0.30895539328818433, - "grad_norm": 2.1181724071502686, - "learning_rate": 1.4896840744686331e-05, - "loss": 0.7484, + "epoch": 0.28743400211193243, + "grad_norm": 2.764345407485962, + "learning_rate": 1.1705382780570965e-05, + "loss": 0.6792, "step": 4083 }, { - "epoch": 0.30903106201051794, - "grad_norm": 2.628530740737915, - "learning_rate": 1.4895139119165837e-05, - "loss": 0.7351, + "epoch": 0.2875043998592045, + "grad_norm": 1.7557584047317505, + "learning_rate": 1.1704200946314754e-05, + "loss": 0.7437, "step": 4084 }, { - "epoch": 0.30910673073285155, - "grad_norm": 1.912762999534607, - "learning_rate": 1.489343712447267e-05, - "loss": 0.7039, + "epoch": 0.2875747976064766, + "grad_norm": 2.3604981899261475, + "learning_rate": 1.1703018867482876e-05, + "loss": 0.7093, "step": 4085 }, { - "epoch": 0.3091823994551852, - "grad_norm": 2.245521306991577, - "learning_rate": 1.4891734760713405e-05, - "loss": 0.985, + "epoch": 0.28764519535374866, + "grad_norm": 1.8898390531539917, + "learning_rate": 1.1701836544136788e-05, + "loss": 0.7688, "step": 4086 }, { - "epoch": 0.30925806817751883, - "grad_norm": 6.195135593414307, - "learning_rate": 1.4890032027994655e-05, - "loss": 0.7202, + "epoch": 0.2877155931010208, + "grad_norm": 2.221972942352295, + "learning_rate": 1.1700653976337959e-05, + "loss": 0.7259, "step": 4087 }, { - "epoch": 0.30933373689985244, - "grad_norm": 2.0465266704559326, - "learning_rate": 1.4888328926423048e-05, - "loss": 0.7164, + "epoch": 0.28778599084829287, + "grad_norm": 2.01739764213562, + "learning_rate": 1.1699471164147873e-05, + "loss": 0.8964, "step": 4088 }, { - "epoch": 0.30940940562218605, - "grad_norm": 2.153587818145752, - "learning_rate": 1.4886625456105235e-05, - "loss": 0.8307, + "epoch": 0.28785638859556495, + "grad_norm": 1.6156858205795288, + "learning_rate": 1.1698288107628023e-05, + "loss": 0.7506, "step": 4089 }, { - "epoch": 0.3094850743445197, - "grad_norm": 2.2596750259399414, - "learning_rate": 1.48849216171479e-05, - "loss": 0.7311, + "epoch": 0.287926786342837, + "grad_norm": 2.967663049697876, + "learning_rate": 1.169710480683992e-05, + "loss": 0.7556, "step": 4090 }, { - "epoch": 0.3095607430668533, - "grad_norm": 1.8946340084075928, - "learning_rate": 1.4883217409657739e-05, - "loss": 0.7878, + "epoch": 0.2879971840901091, + "grad_norm": 1.5559872388839722, + "learning_rate": 1.1695921261845087e-05, + "loss": 0.7526, "step": 4091 }, { - "epoch": 0.30963641178918694, - "grad_norm": 1.9765899181365967, - "learning_rate": 1.4881512833741475e-05, - "loss": 0.746, + "epoch": 0.2880675818373812, + "grad_norm": 1.861844539642334, + "learning_rate": 1.1694737472705054e-05, + "loss": 0.6904, "step": 4092 }, { - "epoch": 0.30971208051152055, - "grad_norm": 2.942574977874756, - "learning_rate": 1.4879807889505856e-05, - "loss": 0.6554, + "epoch": 0.2881379795846533, + "grad_norm": 1.8878048658370972, + "learning_rate": 1.1693553439481365e-05, + "loss": 0.7141, "step": 4093 }, { - "epoch": 0.30978774923385416, - "grad_norm": 2.359882116317749, - "learning_rate": 1.4878102577057643e-05, - "loss": 0.6133, + "epoch": 0.2882083773319254, + "grad_norm": 2.221729040145874, + "learning_rate": 1.1692369162235583e-05, + "loss": 0.7905, "step": 4094 }, { - "epoch": 0.3098634179561878, - "grad_norm": 2.197938919067383, - "learning_rate": 1.487639689650364e-05, - "loss": 0.7502, + "epoch": 0.28827877507919747, + "grad_norm": 1.9637153148651123, + "learning_rate": 1.169118464102928e-05, + "loss": 0.6249, "step": 4095 }, { - "epoch": 0.30993908667852144, - "grad_norm": 2.5012736320495605, - "learning_rate": 1.4874690847950654e-05, - "loss": 0.7593, + "epoch": 0.28834917282646955, + "grad_norm": 2.8173906803131104, + "learning_rate": 1.1689999875924037e-05, + "loss": 0.6612, "step": 4096 }, { - "epoch": 0.31001475540085505, - "grad_norm": 2.0770883560180664, - "learning_rate": 1.4872984431505528e-05, - "loss": 0.8841, + "epoch": 0.2884195705737416, + "grad_norm": 1.6310161352157593, + "learning_rate": 1.1688814866981455e-05, + "loss": 0.6118, "step": 4097 }, { - "epoch": 0.31009042412318866, - "grad_norm": 2.287703275680542, - "learning_rate": 1.4871277647275122e-05, - "loss": 0.7934, + "epoch": 0.2884899683210137, + "grad_norm": 2.0276479721069336, + "learning_rate": 1.1687629614263142e-05, + "loss": 0.6564, "step": 4098 }, { - "epoch": 0.3101660928455223, - "grad_norm": 1.9843212366104126, - "learning_rate": 1.486957049536632e-05, - "loss": 0.7782, + "epoch": 0.28856036606828583, + "grad_norm": 1.793845295906067, + "learning_rate": 1.1686444117830719e-05, + "loss": 0.7482, "step": 4099 }, { - "epoch": 0.31024176156785593, - "grad_norm": 2.0489273071289062, - "learning_rate": 1.4867862975886027e-05, - "loss": 0.7255, + "epoch": 0.2886307638155579, + "grad_norm": 1.9694607257843018, + "learning_rate": 1.1685258377745822e-05, + "loss": 0.9192, "step": 4100 }, { - "epoch": 0.31031743029018954, - "grad_norm": 2.2509491443634033, - "learning_rate": 1.4866155088941175e-05, - "loss": 0.7689, + "epoch": 0.28870116156283, + "grad_norm": 2.019437789916992, + "learning_rate": 1.1684072394070099e-05, + "loss": 0.6934, "step": 4101 }, { - "epoch": 0.31039309901252315, - "grad_norm": 2.3707261085510254, - "learning_rate": 1.4864446834638721e-05, - "loss": 0.8923, + "epoch": 0.28877155931010207, + "grad_norm": 1.987646460533142, + "learning_rate": 1.1682886166865209e-05, + "loss": 0.7105, "step": 4102 }, { - "epoch": 0.3104687677348568, - "grad_norm": 2.4059784412384033, - "learning_rate": 1.4862738213085634e-05, - "loss": 0.7605, + "epoch": 0.28884195705737414, + "grad_norm": 1.940624713897705, + "learning_rate": 1.1681699696192826e-05, + "loss": 0.729, "step": 4103 }, { - "epoch": 0.31054443645719043, - "grad_norm": 2.0712780952453613, - "learning_rate": 1.4861029224388921e-05, - "loss": 0.7258, + "epoch": 0.2889123548046463, + "grad_norm": 1.83932626247406, + "learning_rate": 1.1680512982114637e-05, + "loss": 0.7269, "step": 4104 }, { - "epoch": 0.31062010517952404, - "grad_norm": 2.2427139282226562, - "learning_rate": 1.4859319868655602e-05, - "loss": 0.6634, + "epoch": 0.28898275255191835, + "grad_norm": 2.0080509185791016, + "learning_rate": 1.167932602469234e-05, + "loss": 0.6819, "step": 4105 }, { - "epoch": 0.31069577390185765, - "grad_norm": 2.8183469772338867, - "learning_rate": 1.4857610145992719e-05, - "loss": 0.7484, + "epoch": 0.28905315029919043, + "grad_norm": 1.8404968976974487, + "learning_rate": 1.1678138823987642e-05, + "loss": 0.6803, "step": 4106 }, { - "epoch": 0.31077144262419126, - "grad_norm": 2.3078558444976807, - "learning_rate": 1.4855900056507343e-05, - "loss": 0.8037, + "epoch": 0.2891235480464625, + "grad_norm": 1.9653533697128296, + "learning_rate": 1.1676951380062273e-05, + "loss": 0.8435, "step": 4107 }, { - "epoch": 0.31084711134652493, - "grad_norm": 1.7662575244903564, - "learning_rate": 1.4854189600306565e-05, - "loss": 0.5929, + "epoch": 0.2891939457937346, + "grad_norm": 2.887413501739502, + "learning_rate": 1.1675763692977964e-05, + "loss": 0.7546, "step": 4108 }, { - "epoch": 0.31092278006885854, - "grad_norm": 2.5993802547454834, - "learning_rate": 1.48524787774975e-05, - "loss": 0.7729, + "epoch": 0.28926434354100666, + "grad_norm": 2.2821836471557617, + "learning_rate": 1.1674575762796468e-05, + "loss": 0.6127, "step": 4109 }, { - "epoch": 0.31099844879119215, - "grad_norm": 1.8144737482070923, - "learning_rate": 1.4850767588187285e-05, - "loss": 0.6611, + "epoch": 0.2893347412882788, + "grad_norm": 1.9545385837554932, + "learning_rate": 1.1673387589579541e-05, + "loss": 0.7108, "step": 4110 }, { - "epoch": 0.31107411751352576, - "grad_norm": 1.6812607049942017, - "learning_rate": 1.4849056032483081e-05, - "loss": 0.7923, + "epoch": 0.2894051390355509, + "grad_norm": 1.6905745267868042, + "learning_rate": 1.1672199173388964e-05, + "loss": 0.9445, "step": 4111 }, { - "epoch": 0.3111497862358594, - "grad_norm": 2.2537429332733154, - "learning_rate": 1.484734411049207e-05, - "loss": 0.7171, + "epoch": 0.28947553678282295, + "grad_norm": 2.139296054840088, + "learning_rate": 1.1671010514286518e-05, + "loss": 0.6659, "step": 4112 }, { - "epoch": 0.31122545495819304, - "grad_norm": 2.3459930419921875, - "learning_rate": 1.4845631822321456e-05, - "loss": 0.7479, + "epoch": 0.28954593453009503, + "grad_norm": 2.143155097961426, + "learning_rate": 1.1669821612334008e-05, + "loss": 0.772, "step": 4113 }, { - "epoch": 0.31130112368052665, - "grad_norm": 1.9687094688415527, - "learning_rate": 1.484391916807847e-05, - "loss": 0.7927, + "epoch": 0.2896163322773671, + "grad_norm": 1.9737684726715088, + "learning_rate": 1.1668632467593242e-05, + "loss": 0.7121, "step": 4114 }, { - "epoch": 0.31137679240286026, - "grad_norm": 2.511396646499634, - "learning_rate": 1.4842206147870365e-05, - "loss": 0.8989, + "epoch": 0.28968673002463924, + "grad_norm": 1.7130206823349, + "learning_rate": 1.1667443080126044e-05, + "loss": 0.717, "step": 4115 }, { - "epoch": 0.3114524611251939, - "grad_norm": 2.6915669441223145, - "learning_rate": 1.4840492761804417e-05, - "loss": 0.8171, + "epoch": 0.2897571277719113, + "grad_norm": 1.9802327156066895, + "learning_rate": 1.1666253449994256e-05, + "loss": 0.8401, "step": 4116 }, { - "epoch": 0.31152812984752754, - "grad_norm": 1.9705544710159302, - "learning_rate": 1.483877900998792e-05, - "loss": 0.7195, + "epoch": 0.2898275255191834, + "grad_norm": 2.0832936763763428, + "learning_rate": 1.1665063577259723e-05, + "loss": 0.7295, "step": 4117 }, { - "epoch": 0.31160379856986115, - "grad_norm": 1.8884872198104858, - "learning_rate": 1.4837064892528197e-05, - "loss": 0.7556, + "epoch": 0.28989792326645547, + "grad_norm": 1.9010951519012451, + "learning_rate": 1.1663873461984312e-05, + "loss": 0.7317, "step": 4118 }, { - "epoch": 0.31167946729219476, - "grad_norm": 2.239375591278076, - "learning_rate": 1.4835350409532592e-05, - "loss": 0.696, + "epoch": 0.28996832101372755, + "grad_norm": 1.6994128227233887, + "learning_rate": 1.1662683104229892e-05, + "loss": 0.8083, "step": 4119 }, { - "epoch": 0.3117551360145284, - "grad_norm": 1.9161548614501953, - "learning_rate": 1.4833635561108469e-05, - "loss": 0.6043, + "epoch": 0.2900387187609996, + "grad_norm": 1.8920795917510986, + "learning_rate": 1.1661492504058358e-05, + "loss": 0.8131, "step": 4120 }, { - "epoch": 0.31183080473686203, - "grad_norm": 2.44989275932312, - "learning_rate": 1.483192034736322e-05, - "loss": 0.8091, + "epoch": 0.29010911650827176, + "grad_norm": 2.2870404720306396, + "learning_rate": 1.1660301661531607e-05, + "loss": 0.6408, "step": 4121 }, { - "epoch": 0.31190647345919564, - "grad_norm": 2.2470176219940186, - "learning_rate": 1.4830204768404253e-05, - "loss": 0.7588, + "epoch": 0.29017951425554384, + "grad_norm": 1.8727452754974365, + "learning_rate": 1.1659110576711551e-05, + "loss": 0.6458, "step": 4122 }, { - "epoch": 0.31198214218152925, - "grad_norm": 2.389315128326416, - "learning_rate": 1.482848882433901e-05, - "loss": 0.8738, + "epoch": 0.2902499120028159, + "grad_norm": 2.091137170791626, + "learning_rate": 1.1657919249660116e-05, + "loss": 0.741, "step": 4123 }, { - "epoch": 0.31205781090386286, - "grad_norm": 2.3633134365081787, - "learning_rate": 1.4826772515274943e-05, - "loss": 0.7753, + "epoch": 0.290320309750088, + "grad_norm": 1.6697039604187012, + "learning_rate": 1.1656727680439241e-05, + "loss": 0.9141, "step": 4124 }, { - "epoch": 0.31213347962619653, - "grad_norm": 2.183661699295044, - "learning_rate": 1.4825055841319536e-05, - "loss": 0.8454, + "epoch": 0.29039070749736007, + "grad_norm": 2.031008720397949, + "learning_rate": 1.1655535869110876e-05, + "loss": 0.646, "step": 4125 }, { - "epoch": 0.31220914834853014, - "grad_norm": 2.239809989929199, - "learning_rate": 1.4823338802580294e-05, - "loss": 0.7674, + "epoch": 0.29046110524463215, + "grad_norm": 1.9340420961380005, + "learning_rate": 1.1654343815736988e-05, + "loss": 0.6402, "step": 4126 }, { - "epoch": 0.31228481707086375, - "grad_norm": 2.594881296157837, - "learning_rate": 1.4821621399164737e-05, - "loss": 0.8039, + "epoch": 0.2905315029919043, + "grad_norm": 1.7493561506271362, + "learning_rate": 1.1653151520379546e-05, + "loss": 0.6812, "step": 4127 }, { - "epoch": 0.31236048579319736, - "grad_norm": 2.0808498859405518, - "learning_rate": 1.4819903631180423e-05, - "loss": 0.8132, + "epoch": 0.29060190073917636, + "grad_norm": 2.0646262168884277, + "learning_rate": 1.1651958983100543e-05, + "loss": 0.8557, "step": 4128 }, { - "epoch": 0.31243615451553103, - "grad_norm": 2.28064227104187, - "learning_rate": 1.4818185498734914e-05, - "loss": 0.7763, + "epoch": 0.29067229848644843, + "grad_norm": 2.0039403438568115, + "learning_rate": 1.165076620396198e-05, + "loss": 0.6912, "step": 4129 }, { - "epoch": 0.31251182323786464, - "grad_norm": 2.041588068008423, - "learning_rate": 1.4816467001935815e-05, - "loss": 0.7704, + "epoch": 0.2907426962337205, + "grad_norm": 1.7454853057861328, + "learning_rate": 1.1649573183025872e-05, + "loss": 0.6557, "step": 4130 }, { - "epoch": 0.31258749196019825, - "grad_norm": 2.0679996013641357, - "learning_rate": 1.4814748140890738e-05, - "loss": 0.6893, + "epoch": 0.2908130939809926, + "grad_norm": 1.8740512132644653, + "learning_rate": 1.164837992035424e-05, + "loss": 0.7983, "step": 4131 }, { - "epoch": 0.31266316068253186, - "grad_norm": 1.9504202604293823, - "learning_rate": 1.4813028915707326e-05, - "loss": 0.8287, + "epoch": 0.2908834917282647, + "grad_norm": 2.163442611694336, + "learning_rate": 1.1647186416009128e-05, + "loss": 0.746, "step": 4132 }, { - "epoch": 0.3127388294048655, - "grad_norm": 2.44740891456604, - "learning_rate": 1.4811309326493244e-05, - "loss": 0.7966, + "epoch": 0.2909538894755368, + "grad_norm": 1.8060698509216309, + "learning_rate": 1.1645992670052587e-05, + "loss": 0.6734, "step": 4133 }, { - "epoch": 0.31281449812719914, - "grad_norm": 2.279268264770508, - "learning_rate": 1.4809589373356173e-05, - "loss": 0.9172, + "epoch": 0.2910242872228089, + "grad_norm": 2.0593039989471436, + "learning_rate": 1.1644798682546676e-05, + "loss": 0.6386, "step": 4134 }, { - "epoch": 0.31289016684953275, - "grad_norm": 2.8255929946899414, - "learning_rate": 1.4807869056403823e-05, - "loss": 0.7796, + "epoch": 0.29109468497008095, + "grad_norm": 2.1667680740356445, + "learning_rate": 1.1643604453553479e-05, + "loss": 0.6903, "step": 4135 }, { - "epoch": 0.31296583557186636, - "grad_norm": 1.751339077949524, - "learning_rate": 1.480614837574393e-05, - "loss": 0.7826, + "epoch": 0.29116508271735303, + "grad_norm": 2.002692699432373, + "learning_rate": 1.1642409983135077e-05, + "loss": 0.7464, "step": 4136 }, { - "epoch": 0.31304150429419997, - "grad_norm": 2.050893545150757, - "learning_rate": 1.4804427331484249e-05, - "loss": 0.7079, + "epoch": 0.2912354804646251, + "grad_norm": 1.7677979469299316, + "learning_rate": 1.1641215271353577e-05, + "loss": 0.7597, "step": 4137 }, { - "epoch": 0.31311717301653363, - "grad_norm": 2.0679502487182617, - "learning_rate": 1.480270592373255e-05, - "loss": 0.7366, + "epoch": 0.29130587821189724, + "grad_norm": 2.087491989135742, + "learning_rate": 1.1640020318271093e-05, + "loss": 0.6604, "step": 4138 }, { - "epoch": 0.31319284173886724, - "grad_norm": 2.277479887008667, - "learning_rate": 1.480098415259664e-05, - "loss": 0.701, + "epoch": 0.2913762759591693, + "grad_norm": 1.9816242456436157, + "learning_rate": 1.163882512394975e-05, + "loss": 0.6654, "step": 4139 }, { - "epoch": 0.31326851046120086, - "grad_norm": 3.157134771347046, - "learning_rate": 1.479926201818434e-05, - "loss": 0.748, + "epoch": 0.2914466737064414, + "grad_norm": 1.800615668296814, + "learning_rate": 1.1637629688451686e-05, + "loss": 0.8, "step": 4140 }, { - "epoch": 0.31334417918353447, - "grad_norm": 2.37265682220459, - "learning_rate": 1.4797539520603497e-05, - "loss": 0.7892, + "epoch": 0.29151707145371347, + "grad_norm": 2.0550265312194824, + "learning_rate": 1.1636434011839055e-05, + "loss": 0.8415, "step": 4141 }, { - "epoch": 0.31341984790586813, - "grad_norm": 1.8097896575927734, - "learning_rate": 1.4795816659961974e-05, - "loss": 0.8259, + "epoch": 0.29158746920098555, + "grad_norm": 1.549818754196167, + "learning_rate": 1.1635238094174022e-05, + "loss": 0.6821, "step": 4142 }, { - "epoch": 0.31349551662820174, - "grad_norm": 1.9213300943374634, - "learning_rate": 1.4794093436367668e-05, - "loss": 0.8412, + "epoch": 0.2916578669482577, + "grad_norm": 1.6564946174621582, + "learning_rate": 1.1634041935518759e-05, + "loss": 0.7022, "step": 4143 }, { - "epoch": 0.31357118535053535, - "grad_norm": 1.8709452152252197, - "learning_rate": 1.479236984992849e-05, - "loss": 0.7159, + "epoch": 0.29172826469552976, + "grad_norm": 1.8567675352096558, + "learning_rate": 1.1632845535935462e-05, + "loss": 0.808, "step": 4144 }, { - "epoch": 0.31364685407286896, - "grad_norm": 2.649545192718506, - "learning_rate": 1.4790645900752377e-05, - "loss": 0.7186, + "epoch": 0.29179866244280184, + "grad_norm": 2.301088809967041, + "learning_rate": 1.163164889548633e-05, + "loss": 0.7618, "step": 4145 }, { - "epoch": 0.31372252279520263, - "grad_norm": 2.275909662246704, - "learning_rate": 1.478892158894729e-05, - "loss": 0.6128, + "epoch": 0.2918690601900739, + "grad_norm": 4.633566856384277, + "learning_rate": 1.1630452014233576e-05, + "loss": 0.691, "step": 4146 }, { - "epoch": 0.31379819151753624, - "grad_norm": 2.2135143280029297, - "learning_rate": 1.4787196914621208e-05, - "loss": 0.9426, + "epoch": 0.291939457937346, + "grad_norm": 1.7803915739059448, + "learning_rate": 1.1629254892239429e-05, + "loss": 0.7834, "step": 4147 }, { - "epoch": 0.31387386023986985, - "grad_norm": 2.2389373779296875, - "learning_rate": 1.4785471877882138e-05, - "loss": 0.7118, + "epoch": 0.29200985568461807, + "grad_norm": 2.087001323699951, + "learning_rate": 1.1628057529566126e-05, + "loss": 0.6958, "step": 4148 }, { - "epoch": 0.31394952896220346, - "grad_norm": 1.9949945211410522, - "learning_rate": 1.4783746478838108e-05, - "loss": 0.7185, + "epoch": 0.2920802534318902, + "grad_norm": 1.8248859643936157, + "learning_rate": 1.1626859926275924e-05, + "loss": 0.7604, "step": 4149 }, { - "epoch": 0.31402519768453707, - "grad_norm": 1.941359519958496, - "learning_rate": 1.4782020717597163e-05, - "loss": 0.6794, + "epoch": 0.2921506511791623, + "grad_norm": 1.8746949434280396, + "learning_rate": 1.1625662082431084e-05, + "loss": 0.7072, "step": 4150 }, { - "epoch": 0.31410086640687074, - "grad_norm": 2.1652402877807617, - "learning_rate": 1.478029459426738e-05, - "loss": 0.7128, + "epoch": 0.29222104892643436, + "grad_norm": 1.7796236276626587, + "learning_rate": 1.162446399809388e-05, + "loss": 0.7331, "step": 4151 }, { - "epoch": 0.31417653512920435, - "grad_norm": 2.5514955520629883, - "learning_rate": 1.4778568108956857e-05, - "loss": 0.8677, + "epoch": 0.29229144667370643, + "grad_norm": 1.8626784086227417, + "learning_rate": 1.1623265673326605e-05, + "loss": 0.5887, "step": 4152 }, { - "epoch": 0.31425220385153796, - "grad_norm": 2.4251315593719482, - "learning_rate": 1.4776841261773706e-05, - "loss": 0.8487, + "epoch": 0.2923618444209785, + "grad_norm": 2.178006410598755, + "learning_rate": 1.162206710819156e-05, + "loss": 0.8102, "step": 4153 }, { - "epoch": 0.31432787257387157, - "grad_norm": 2.3087351322174072, - "learning_rate": 1.477511405282607e-05, - "loss": 0.7846, + "epoch": 0.2924322421682506, + "grad_norm": 1.8477708101272583, + "learning_rate": 1.1620868302751063e-05, + "loss": 0.7255, "step": 4154 }, { - "epoch": 0.31440354129620524, - "grad_norm": 1.8735215663909912, - "learning_rate": 1.4773386482222115e-05, - "loss": 0.7894, + "epoch": 0.2925026399155227, + "grad_norm": 1.9449650049209595, + "learning_rate": 1.1619669257067435e-05, + "loss": 0.6901, "step": 4155 }, { - "epoch": 0.31447921001853885, - "grad_norm": 2.0851855278015137, - "learning_rate": 1.4771658550070024e-05, - "loss": 0.7316, + "epoch": 0.2925730376627948, + "grad_norm": 2.1117334365844727, + "learning_rate": 1.1618469971203021e-05, + "loss": 0.6986, "step": 4156 }, { - "epoch": 0.31455487874087246, - "grad_norm": 2.0207672119140625, - "learning_rate": 1.4769930256478008e-05, - "loss": 0.7909, + "epoch": 0.2926434354100669, + "grad_norm": 1.8282201290130615, + "learning_rate": 1.161727044522017e-05, + "loss": 0.7745, "step": 4157 }, { - "epoch": 0.31463054746320607, - "grad_norm": 2.3125038146972656, - "learning_rate": 1.4768201601554295e-05, - "loss": 0.7246, + "epoch": 0.29271383315733895, + "grad_norm": 1.9957741498947144, + "learning_rate": 1.1616070679181246e-05, + "loss": 0.6785, "step": 4158 }, { - "epoch": 0.31470621618553973, - "grad_norm": 2.17578387260437, - "learning_rate": 1.4766472585407142e-05, - "loss": 0.7306, + "epoch": 0.29278423090461103, + "grad_norm": 1.9689723253250122, + "learning_rate": 1.1614870673148627e-05, + "loss": 0.6542, "step": 4159 }, { - "epoch": 0.31478188490787334, - "grad_norm": 1.933734655380249, - "learning_rate": 1.4764743208144827e-05, - "loss": 0.7355, + "epoch": 0.29285462865188316, + "grad_norm": 1.7150307893753052, + "learning_rate": 1.1613670427184704e-05, + "loss": 0.642, "step": 4160 }, { - "epoch": 0.31485755363020695, - "grad_norm": 2.213831901550293, - "learning_rate": 1.4763013469875644e-05, - "loss": 0.8326, + "epoch": 0.29292502639915524, + "grad_norm": 1.8678089380264282, + "learning_rate": 1.1612469941351875e-05, + "loss": 0.8182, "step": 4161 }, { - "epoch": 0.31493322235254056, - "grad_norm": 2.065779209136963, - "learning_rate": 1.4761283370707919e-05, - "loss": 0.8269, + "epoch": 0.2929954241464273, + "grad_norm": 1.7645262479782104, + "learning_rate": 1.1611269215712556e-05, + "loss": 0.7082, "step": 4162 }, { - "epoch": 0.3150088910748742, - "grad_norm": 2.665332794189453, - "learning_rate": 1.4759552910749993e-05, - "loss": 0.7556, + "epoch": 0.2930658218936994, + "grad_norm": 2.56716251373291, + "learning_rate": 1.1610068250329176e-05, + "loss": 0.7951, "step": 4163 }, { - "epoch": 0.31508455979720784, - "grad_norm": 1.8183536529541016, - "learning_rate": 1.4757822090110236e-05, - "loss": 0.7538, + "epoch": 0.2931362196409715, + "grad_norm": 2.0943613052368164, + "learning_rate": 1.160886704526417e-05, + "loss": 0.6985, "step": 4164 }, { - "epoch": 0.31516022851954145, - "grad_norm": 2.538285493850708, - "learning_rate": 1.4756090908897039e-05, - "loss": 0.8137, + "epoch": 0.29320661738824355, + "grad_norm": 1.7305212020874023, + "learning_rate": 1.1607665600579995e-05, + "loss": 0.6468, "step": 4165 }, { - "epoch": 0.31523589724187506, - "grad_norm": 2.3421831130981445, - "learning_rate": 1.4754359367218808e-05, + "epoch": 0.2932770151355157, + "grad_norm": 1.950076937675476, + "learning_rate": 1.1606463916339113e-05, "loss": 0.7443, "step": 4166 }, { - "epoch": 0.3153115659642087, - "grad_norm": 3.463080406188965, - "learning_rate": 1.4752627465183985e-05, - "loss": 0.783, + "epoch": 0.29334741288278776, + "grad_norm": 1.5759586095809937, + "learning_rate": 1.1605261992603998e-05, + "loss": 0.677, "step": 4167 }, { - "epoch": 0.31538723468654234, - "grad_norm": 2.2434184551239014, - "learning_rate": 1.4750895202901021e-05, - "loss": 0.8343, + "epoch": 0.29341781063005984, + "grad_norm": 1.9350392818450928, + "learning_rate": 1.1604059829437141e-05, + "loss": 0.5944, "step": 4168 }, { - "epoch": 0.31546290340887595, - "grad_norm": 2.3080389499664307, - "learning_rate": 1.4749162580478401e-05, - "loss": 0.6915, + "epoch": 0.2934882083773319, + "grad_norm": 1.7253186702728271, + "learning_rate": 1.1602857426901046e-05, + "loss": 0.8057, "step": 4169 }, { - "epoch": 0.31553857213120956, - "grad_norm": 1.77947998046875, - "learning_rate": 1.4747429598024625e-05, - "loss": 0.6928, + "epoch": 0.293558606124604, + "grad_norm": 1.7164831161499023, + "learning_rate": 1.1601654785058223e-05, + "loss": 0.6907, "step": 4170 }, { - "epoch": 0.31561424085354317, - "grad_norm": 1.8366894721984863, - "learning_rate": 1.4745696255648219e-05, - "loss": 0.7257, + "epoch": 0.2936290038718761, + "grad_norm": 1.6563913822174072, + "learning_rate": 1.1600451903971201e-05, + "loss": 0.8005, "step": 4171 }, { - "epoch": 0.31568990957587684, - "grad_norm": 2.3540196418762207, - "learning_rate": 1.4743962553457729e-05, - "loss": 0.7414, + "epoch": 0.2936994016191482, + "grad_norm": 1.7347633838653564, + "learning_rate": 1.1599248783702517e-05, + "loss": 0.7064, "step": 4172 }, { - "epoch": 0.31576557829821045, - "grad_norm": 2.082271099090576, - "learning_rate": 1.4742228491561723e-05, - "loss": 0.8058, + "epoch": 0.2937697993664203, + "grad_norm": 1.6658159494400024, + "learning_rate": 1.1598045424314725e-05, + "loss": 0.6805, "step": 4173 }, { - "epoch": 0.31584124702054406, - "grad_norm": 1.8880605697631836, - "learning_rate": 1.4740494070068799e-05, - "loss": 0.8184, + "epoch": 0.29384019711369236, + "grad_norm": 1.8879221677780151, + "learning_rate": 1.1596841825870386e-05, + "loss": 0.7016, "step": 4174 }, { - "epoch": 0.31591691574287767, - "grad_norm": 2.252822160720825, - "learning_rate": 1.4738759289087569e-05, - "loss": 0.8572, + "epoch": 0.29391059486096444, + "grad_norm": 1.8998405933380127, + "learning_rate": 1.1595637988432076e-05, + "loss": 0.6844, "step": 4175 }, { - "epoch": 0.3159925844652113, - "grad_norm": 1.9916536808013916, - "learning_rate": 1.4737024148726668e-05, - "loss": 0.7948, + "epoch": 0.2939809926082365, + "grad_norm": 1.745285153388977, + "learning_rate": 1.1594433912062388e-05, + "loss": 0.8175, "step": 4176 }, { - "epoch": 0.31606825318754495, - "grad_norm": 2.2267072200775146, - "learning_rate": 1.4735288649094764e-05, - "loss": 0.7164, + "epoch": 0.29405139035550865, + "grad_norm": 1.8201717138290405, + "learning_rate": 1.1593229596823918e-05, + "loss": 0.7172, "step": 4177 }, { - "epoch": 0.31614392190987856, - "grad_norm": 2.865279197692871, - "learning_rate": 1.4733552790300531e-05, - "loss": 0.8885, + "epoch": 0.2941217881027807, + "grad_norm": 1.8032022714614868, + "learning_rate": 1.159202504277928e-05, + "loss": 0.8116, "step": 4178 }, { - "epoch": 0.31621959063221217, - "grad_norm": 2.009626626968384, - "learning_rate": 1.473181657245268e-05, - "loss": 0.6854, + "epoch": 0.2941921858500528, + "grad_norm": 1.7052494287490845, + "learning_rate": 1.1590820249991106e-05, + "loss": 0.7858, "step": 4179 }, { - "epoch": 0.3162952593545458, - "grad_norm": 2.28889536857605, - "learning_rate": 1.4730079995659935e-05, - "loss": 0.6812, + "epoch": 0.2942625835973249, + "grad_norm": 1.9494284391403198, + "learning_rate": 1.1589615218522026e-05, + "loss": 0.8535, "step": 4180 }, { - "epoch": 0.31637092807687944, - "grad_norm": 1.8733904361724854, - "learning_rate": 1.4728343060031046e-05, - "loss": 0.7784, + "epoch": 0.29433298134459696, + "grad_norm": 1.773877739906311, + "learning_rate": 1.1588409948434694e-05, + "loss": 0.7451, "step": 4181 }, { - "epoch": 0.31644659679921305, - "grad_norm": 2.0452613830566406, - "learning_rate": 1.4726605765674788e-05, - "loss": 0.71, + "epoch": 0.29440337909186903, + "grad_norm": 1.7964597940444946, + "learning_rate": 1.1587204439791774e-05, + "loss": 0.6487, "step": 4182 }, { - "epoch": 0.31652226552154666, - "grad_norm": 3.1773879528045654, - "learning_rate": 1.4724868112699957e-05, - "loss": 0.7515, + "epoch": 0.29447377683914117, + "grad_norm": 1.847320318222046, + "learning_rate": 1.1585998692655943e-05, + "loss": 0.7303, "step": 4183 }, { - "epoch": 0.3165979342438803, - "grad_norm": 1.9944331645965576, - "learning_rate": 1.4723130101215364e-05, - "loss": 0.686, + "epoch": 0.29454417458641324, + "grad_norm": 1.9165500402450562, + "learning_rate": 1.1584792707089885e-05, + "loss": 0.8284, "step": 4184 }, { - "epoch": 0.31667360296621394, - "grad_norm": 2.6416072845458984, - "learning_rate": 1.4721391731329856e-05, - "loss": 0.8275, + "epoch": 0.2946145723336853, + "grad_norm": 1.9532299041748047, + "learning_rate": 1.15835864831563e-05, + "loss": 0.7341, "step": 4185 }, { - "epoch": 0.31674927168854755, - "grad_norm": 2.302402973175049, - "learning_rate": 1.4719653003152291e-05, - "loss": 0.6254, + "epoch": 0.2946849700809574, + "grad_norm": 1.7085585594177246, + "learning_rate": 1.1582380020917904e-05, + "loss": 0.7204, "step": 4186 }, { - "epoch": 0.31682494041088116, - "grad_norm": 2.278822422027588, - "learning_rate": 1.4717913916791561e-05, - "loss": 0.9094, + "epoch": 0.2947553678282295, + "grad_norm": 1.71831476688385, + "learning_rate": 1.158117332043742e-05, + "loss": 0.6988, "step": 4187 }, { - "epoch": 0.3169006091332148, - "grad_norm": 2.715550422668457, - "learning_rate": 1.4716174472356563e-05, - "loss": 0.8543, + "epoch": 0.2948257655755016, + "grad_norm": 1.91995108127594, + "learning_rate": 1.1579966381777588e-05, + "loss": 0.6642, "step": 4188 }, { - "epoch": 0.3169762778555484, - "grad_norm": 2.0558013916015625, - "learning_rate": 1.4714434669956228e-05, - "loss": 0.8176, + "epoch": 0.2948961633227737, + "grad_norm": 1.81782865524292, + "learning_rate": 1.1578759205001155e-05, + "loss": 0.7302, "step": 4189 }, { - "epoch": 0.31705194657788205, - "grad_norm": 2.050335645675659, - "learning_rate": 1.4712694509699517e-05, - "loss": 0.7816, + "epoch": 0.29496656107004576, + "grad_norm": 2.3147835731506348, + "learning_rate": 1.1577551790170884e-05, + "loss": 0.783, "step": 4190 }, { - "epoch": 0.31712761530021566, - "grad_norm": 2.250823497772217, - "learning_rate": 1.4710953991695394e-05, - "loss": 0.7854, + "epoch": 0.29503695881731784, + "grad_norm": 2.050804853439331, + "learning_rate": 1.1576344137349548e-05, + "loss": 0.8828, "step": 4191 }, { - "epoch": 0.31720328402254927, - "grad_norm": 2.516578435897827, - "learning_rate": 1.4709213116052864e-05, - "loss": 0.7717, + "epoch": 0.2951073565645899, + "grad_norm": 2.0761051177978516, + "learning_rate": 1.1575136246599938e-05, + "loss": 0.6539, "step": 4192 }, { - "epoch": 0.3172789527448829, - "grad_norm": 2.0898921489715576, - "learning_rate": 1.4707471882880942e-05, - "loss": 0.7474, + "epoch": 0.295177754311862, + "grad_norm": 2.17378830909729, + "learning_rate": 1.1573928117984852e-05, + "loss": 0.7561, "step": 4193 }, { - "epoch": 0.31735462146721655, - "grad_norm": 2.837730646133423, - "learning_rate": 1.470573029228867e-05, - "loss": 0.6944, + "epoch": 0.29524815205913413, + "grad_norm": 1.427598476409912, + "learning_rate": 1.1572719751567097e-05, + "loss": 0.586, "step": 4194 }, { - "epoch": 0.31743029018955016, - "grad_norm": 2.0054285526275635, - "learning_rate": 1.4703988344385113e-05, - "loss": 0.902, + "epoch": 0.2953185498064062, + "grad_norm": 1.9893481731414795, + "learning_rate": 1.1571511147409506e-05, + "loss": 0.7115, "step": 4195 }, { - "epoch": 0.31750595891188377, - "grad_norm": 2.452274799346924, - "learning_rate": 1.4702246039279356e-05, - "loss": 0.7572, + "epoch": 0.2953889475536783, + "grad_norm": 1.9446420669555664, + "learning_rate": 1.1570302305574905e-05, + "loss": 0.7192, "step": 4196 }, { - "epoch": 0.3175816276342174, - "grad_norm": 2.3126518726348877, - "learning_rate": 1.470050337708051e-05, - "loss": 0.8434, + "epoch": 0.29545934530095036, + "grad_norm": 2.028529167175293, + "learning_rate": 1.1569093226126152e-05, + "loss": 0.6063, "step": 4197 }, { - "epoch": 0.31765729635655104, - "grad_norm": 1.9818753004074097, - "learning_rate": 1.4698760357897703e-05, - "loss": 0.7367, + "epoch": 0.29552974304822244, + "grad_norm": 2.4359123706817627, + "learning_rate": 1.1567883909126102e-05, + "loss": 0.7819, "step": 4198 }, { - "epoch": 0.31773296507888465, - "grad_norm": 2.2270803451538086, - "learning_rate": 1.4697016981840091e-05, - "loss": 0.7117, + "epoch": 0.29560014079549457, + "grad_norm": 2.1275901794433594, + "learning_rate": 1.156667435463763e-05, + "loss": 0.7689, "step": 4199 }, { - "epoch": 0.31780863380121827, - "grad_norm": 2.7719006538391113, - "learning_rate": 1.469527324901685e-05, - "loss": 0.902, + "epoch": 0.29567053854276665, + "grad_norm": 1.8817106485366821, + "learning_rate": 1.1565464562723624e-05, + "loss": 0.7682, "step": 4200 }, { - "epoch": 0.3178843025235519, - "grad_norm": 2.4166553020477295, - "learning_rate": 1.4693529159537179e-05, - "loss": 0.7287, + "epoch": 0.2957409362900387, + "grad_norm": 2.0288326740264893, + "learning_rate": 1.156425453344698e-05, + "loss": 0.6422, "step": 4201 }, { - "epoch": 0.3179599712458855, - "grad_norm": 2.3354287147521973, - "learning_rate": 1.4691784713510294e-05, - "loss": 0.7441, + "epoch": 0.2958113340373108, + "grad_norm": 2.18570876121521, + "learning_rate": 1.156304426687061e-05, + "loss": 0.7223, "step": 4202 }, { - "epoch": 0.31803563996821915, - "grad_norm": 2.553823709487915, - "learning_rate": 1.4690039911045443e-05, - "loss": 0.7037, + "epoch": 0.2958817317845829, + "grad_norm": 2.2534477710723877, + "learning_rate": 1.1561833763057435e-05, + "loss": 0.7341, "step": 4203 }, { - "epoch": 0.31811130869055276, - "grad_norm": 3.147313356399536, - "learning_rate": 1.4688294752251888e-05, - "loss": 0.8055, + "epoch": 0.29595212953185496, + "grad_norm": 1.8023364543914795, + "learning_rate": 1.1560623022070392e-05, + "loss": 0.7043, "step": 4204 }, { - "epoch": 0.3181869774128864, - "grad_norm": 2.2056877613067627, - "learning_rate": 1.4686549237238917e-05, - "loss": 0.6627, + "epoch": 0.2960225272791271, + "grad_norm": 2.182415246963501, + "learning_rate": 1.1559412043972426e-05, + "loss": 0.6867, "step": 4205 }, { - "epoch": 0.31826264613522, - "grad_norm": 2.1581954956054688, - "learning_rate": 1.4684803366115841e-05, - "loss": 0.7106, + "epoch": 0.29609292502639917, + "grad_norm": 1.9917776584625244, + "learning_rate": 1.1558200828826501e-05, + "loss": 0.7112, "step": 4206 }, { - "epoch": 0.31833831485755365, - "grad_norm": 2.5941429138183594, - "learning_rate": 1.468305713899199e-05, - "loss": 0.8563, + "epoch": 0.29616332277367124, + "grad_norm": 1.6527243852615356, + "learning_rate": 1.1556989376695587e-05, + "loss": 0.6748, "step": 4207 }, { - "epoch": 0.31841398357988726, - "grad_norm": 2.1362545490264893, - "learning_rate": 1.468131055597672e-05, - "loss": 0.7852, + "epoch": 0.2962337205209433, + "grad_norm": 1.7569165229797363, + "learning_rate": 1.1555777687642667e-05, + "loss": 0.5539, "step": 4208 }, { - "epoch": 0.31848965230222087, - "grad_norm": 3.146714925765991, - "learning_rate": 1.4679563617179408e-05, - "loss": 0.8228, + "epoch": 0.2963041182682154, + "grad_norm": 2.6174957752227783, + "learning_rate": 1.1554565761730738e-05, + "loss": 0.8157, "step": 4209 }, { - "epoch": 0.3185653210245545, - "grad_norm": 2.1198391914367676, - "learning_rate": 1.4677816322709452e-05, - "loss": 0.8172, + "epoch": 0.2963745160154875, + "grad_norm": 2.1233229637145996, + "learning_rate": 1.1553353599022812e-05, + "loss": 0.7694, "step": 4210 }, { - "epoch": 0.31864098974688815, - "grad_norm": 2.367765426635742, - "learning_rate": 1.4676068672676274e-05, - "loss": 0.6736, + "epoch": 0.2964449137627596, + "grad_norm": 2.2643871307373047, + "learning_rate": 1.1552141199581906e-05, + "loss": 0.6073, "step": 4211 }, { - "epoch": 0.31871665846922176, - "grad_norm": 1.9327032566070557, - "learning_rate": 1.4674320667189317e-05, - "loss": 0.5936, + "epoch": 0.2965153115100317, + "grad_norm": 1.6998854875564575, + "learning_rate": 1.155092856347106e-05, + "loss": 0.7159, "step": 4212 }, { - "epoch": 0.31879232719155537, - "grad_norm": 2.0183558464050293, - "learning_rate": 1.4672572306358048e-05, - "loss": 0.707, + "epoch": 0.29658570925730376, + "grad_norm": 2.094691276550293, + "learning_rate": 1.1549715690753315e-05, + "loss": 0.6835, "step": 4213 }, { - "epoch": 0.318867995913889, - "grad_norm": 2.5018794536590576, - "learning_rate": 1.4670823590291953e-05, - "loss": 0.8315, + "epoch": 0.29665610700457584, + "grad_norm": 1.8556499481201172, + "learning_rate": 1.154850258149173e-05, + "loss": 0.8128, "step": 4214 }, { - "epoch": 0.3189436646362226, - "grad_norm": 1.932250738143921, - "learning_rate": 1.466907451910054e-05, - "loss": 0.6677, + "epoch": 0.2967265047518479, + "grad_norm": 1.7647641897201538, + "learning_rate": 1.1547289235749376e-05, + "loss": 0.7159, "step": 4215 }, { - "epoch": 0.31901933335855626, - "grad_norm": 2.460291862487793, - "learning_rate": 1.4667325092893349e-05, - "loss": 0.7883, + "epoch": 0.29679690249912005, + "grad_norm": 2.043081045150757, + "learning_rate": 1.154607565358934e-05, + "loss": 0.6519, "step": 4216 }, { - "epoch": 0.31909500208088987, - "grad_norm": 2.749185800552368, - "learning_rate": 1.466557531177993e-05, - "loss": 0.6801, + "epoch": 0.29686730024639213, + "grad_norm": 1.9042656421661377, + "learning_rate": 1.1544861835074709e-05, + "loss": 0.7325, "step": 4217 }, { - "epoch": 0.3191706708032235, - "grad_norm": 2.338014602661133, - "learning_rate": 1.4663825175869858e-05, - "loss": 0.7228, + "epoch": 0.2969376979936642, + "grad_norm": 1.7995637655258179, + "learning_rate": 1.1543647780268597e-05, + "loss": 0.6594, "step": 4218 }, { - "epoch": 0.3192463395255571, - "grad_norm": 2.1747679710388184, - "learning_rate": 1.4662074685272735e-05, - "loss": 0.6736, + "epoch": 0.2970080957409363, + "grad_norm": 2.0484862327575684, + "learning_rate": 1.1542433489234123e-05, + "loss": 0.7238, "step": 4219 }, { - "epoch": 0.31932200824789075, - "grad_norm": 2.656726360321045, - "learning_rate": 1.4660323840098184e-05, - "loss": 0.6297, + "epoch": 0.29707849348820836, + "grad_norm": 1.6586052179336548, + "learning_rate": 1.1541218962034415e-05, + "loss": 0.6797, "step": 4220 }, { - "epoch": 0.31939767697022436, - "grad_norm": 2.0806820392608643, - "learning_rate": 1.4658572640455842e-05, - "loss": 0.6698, + "epoch": 0.29714889123548044, + "grad_norm": 2.2509992122650146, + "learning_rate": 1.1540004198732622e-05, + "loss": 0.8347, "step": 4221 }, { - "epoch": 0.319473345692558, - "grad_norm": 2.605621099472046, - "learning_rate": 1.4656821086455383e-05, - "loss": 0.8796, + "epoch": 0.29721928898275257, + "grad_norm": 2.2073113918304443, + "learning_rate": 1.15387891993919e-05, + "loss": 0.7754, "step": 4222 }, { - "epoch": 0.3195490144148916, - "grad_norm": 2.9140431880950928, - "learning_rate": 1.465506917820649e-05, - "loss": 0.6627, + "epoch": 0.29728968673002465, + "grad_norm": 1.8321969509124756, + "learning_rate": 1.1537573964075417e-05, + "loss": 0.6962, "step": 4223 }, { - "epoch": 0.31962468313722525, - "grad_norm": 2.8789541721343994, - "learning_rate": 1.4653316915818876e-05, - "loss": 0.683, + "epoch": 0.2973600844772967, + "grad_norm": 2.197364330291748, + "learning_rate": 1.1536358492846351e-05, + "loss": 0.7485, "step": 4224 }, { - "epoch": 0.31970035185955886, - "grad_norm": 2.5387415885925293, - "learning_rate": 1.465156429940227e-05, - "loss": 0.8299, + "epoch": 0.2974304822245688, + "grad_norm": 1.976435899734497, + "learning_rate": 1.1535142785767902e-05, + "loss": 0.7007, "step": 4225 }, { - "epoch": 0.3197760205818925, - "grad_norm": 2.163238048553467, - "learning_rate": 1.4649811329066428e-05, - "loss": 0.7167, + "epoch": 0.2975008799718409, + "grad_norm": 1.7424243688583374, + "learning_rate": 1.1533926842903271e-05, + "loss": 0.7407, "step": 4226 }, { - "epoch": 0.3198516893042261, - "grad_norm": 2.0733134746551514, - "learning_rate": 1.4648058004921126e-05, - "loss": 0.7044, + "epoch": 0.297571277719113, + "grad_norm": 1.8767036199569702, + "learning_rate": 1.1532710664315678e-05, + "loss": 0.6303, "step": 4227 }, { - "epoch": 0.3199273580265597, - "grad_norm": 2.783489227294922, - "learning_rate": 1.4646304327076165e-05, - "loss": 0.9027, + "epoch": 0.2976416754663851, + "grad_norm": 1.6602106094360352, + "learning_rate": 1.1531494250068353e-05, + "loss": 0.6469, "step": 4228 }, { - "epoch": 0.32000302674889336, - "grad_norm": 2.3627331256866455, - "learning_rate": 1.4644550295641367e-05, - "loss": 0.8586, + "epoch": 0.29771207321365717, + "grad_norm": 1.9886139631271362, + "learning_rate": 1.1530277600224537e-05, + "loss": 0.6584, "step": 4229 }, { - "epoch": 0.32007869547122697, - "grad_norm": 4.856930255889893, - "learning_rate": 1.464279591072657e-05, - "loss": 0.6494, + "epoch": 0.29778247096092925, + "grad_norm": 1.772824764251709, + "learning_rate": 1.1529060714847484e-05, + "loss": 0.6713, "step": 4230 }, { - "epoch": 0.3201543641935606, - "grad_norm": 2.0555548667907715, - "learning_rate": 1.4641041172441642e-05, - "loss": 0.7398, + "epoch": 0.2978528687082013, + "grad_norm": 1.763185739517212, + "learning_rate": 1.1527843594000466e-05, + "loss": 0.8575, "step": 4231 }, { - "epoch": 0.3202300329158942, - "grad_norm": 2.1479907035827637, - "learning_rate": 1.4639286080896468e-05, - "loss": 0.769, + "epoch": 0.2979232664554734, + "grad_norm": 2.180553674697876, + "learning_rate": 1.1526626237746758e-05, + "loss": 0.6526, "step": 4232 }, { - "epoch": 0.32030570163822786, - "grad_norm": 2.175365924835205, - "learning_rate": 1.4637530636200965e-05, - "loss": 0.6443, + "epoch": 0.29799366420274553, + "grad_norm": 1.6845239400863647, + "learning_rate": 1.1525408646149651e-05, + "loss": 0.7712, "step": 4233 }, { - "epoch": 0.32038137036056147, - "grad_norm": 3.0681824684143066, - "learning_rate": 1.4635774838465055e-05, - "loss": 0.8433, + "epoch": 0.2980640619500176, + "grad_norm": 1.873063325881958, + "learning_rate": 1.1524190819272452e-05, + "loss": 0.6805, "step": 4234 }, { - "epoch": 0.3204570390828951, - "grad_norm": 2.8694117069244385, - "learning_rate": 1.46340186877987e-05, - "loss": 0.6974, + "epoch": 0.2981344596972897, + "grad_norm": 2.042973756790161, + "learning_rate": 1.1522972757178472e-05, + "loss": 0.7536, "step": 4235 }, { - "epoch": 0.3205327078052287, - "grad_norm": 3.192309856414795, - "learning_rate": 1.4632262184311872e-05, - "loss": 0.7687, + "epoch": 0.29820485744456177, + "grad_norm": 2.2274844646453857, + "learning_rate": 1.1521754459931045e-05, + "loss": 0.67, "step": 4236 }, { - "epoch": 0.32060837652756236, - "grad_norm": 2.460700750350952, - "learning_rate": 1.4630505328114569e-05, - "loss": 0.8654, + "epoch": 0.29827525519183384, + "grad_norm": 1.9070934057235718, + "learning_rate": 1.1520535927593507e-05, + "loss": 0.6857, "step": 4237 }, { - "epoch": 0.32068404524989597, - "grad_norm": 2.13419771194458, - "learning_rate": 1.4628748119316807e-05, - "loss": 0.6749, + "epoch": 0.2983456529391059, + "grad_norm": 2.415416717529297, + "learning_rate": 1.1519317160229213e-05, + "loss": 0.7473, "step": 4238 }, { - "epoch": 0.3207597139722296, - "grad_norm": 2.089223623275757, - "learning_rate": 1.4626990558028636e-05, - "loss": 0.8542, + "epoch": 0.29841605068637805, + "grad_norm": 1.8746765851974487, + "learning_rate": 1.1518098157901524e-05, + "loss": 0.6734, "step": 4239 }, { - "epoch": 0.3208353826945632, - "grad_norm": 2.2618892192840576, - "learning_rate": 1.4625232644360117e-05, - "loss": 0.7101, + "epoch": 0.29848644843365013, + "grad_norm": 1.9516246318817139, + "learning_rate": 1.1516878920673824e-05, + "loss": 0.6166, "step": 4240 }, { - "epoch": 0.32091105141689685, - "grad_norm": 2.1564035415649414, - "learning_rate": 1.4623474378421333e-05, - "loss": 0.7211, + "epoch": 0.2985568461809222, + "grad_norm": 1.9970767498016357, + "learning_rate": 1.1515659448609494e-05, + "loss": 0.8967, "step": 4241 }, { - "epoch": 0.32098672013923046, - "grad_norm": 2.1531484127044678, - "learning_rate": 1.4621715760322398e-05, - "loss": 0.7455, + "epoch": 0.2986272439281943, + "grad_norm": 1.9818624258041382, + "learning_rate": 1.1514439741771942e-05, + "loss": 0.747, "step": 4242 }, { - "epoch": 0.3210623888615641, - "grad_norm": 2.0043506622314453, - "learning_rate": 1.4619956790173435e-05, - "loss": 0.8069, + "epoch": 0.29869764167546636, + "grad_norm": 1.9874707460403442, + "learning_rate": 1.151321980022458e-05, + "loss": 0.6131, "step": 4243 }, { - "epoch": 0.3211380575838977, - "grad_norm": 2.2013986110687256, - "learning_rate": 1.4618197468084605e-05, - "loss": 0.8981, + "epoch": 0.2987680394227385, + "grad_norm": 2.037710666656494, + "learning_rate": 1.1511999624030832e-05, + "loss": 0.7749, "step": 4244 }, { - "epoch": 0.3212137263062313, - "grad_norm": 2.18361496925354, - "learning_rate": 1.4616437794166073e-05, - "loss": 0.9738, + "epoch": 0.2988384371700106, + "grad_norm": 1.9172910451889038, + "learning_rate": 1.1510779213254139e-05, + "loss": 0.7046, "step": 4245 }, { - "epoch": 0.32128939502856496, - "grad_norm": 2.511524200439453, - "learning_rate": 1.4614677768528046e-05, - "loss": 0.6794, + "epoch": 0.29890883491728265, + "grad_norm": 1.749405026435852, + "learning_rate": 1.1509558567957949e-05, + "loss": 0.6931, "step": 4246 }, { - "epoch": 0.32136506375089857, - "grad_norm": 2.317763090133667, - "learning_rate": 1.4612917391280734e-05, - "loss": 0.7569, + "epoch": 0.29897923266455473, + "grad_norm": 1.7065517902374268, + "learning_rate": 1.1508337688205724e-05, + "loss": 0.6921, "step": 4247 }, { - "epoch": 0.3214407324732322, - "grad_norm": 2.617969274520874, - "learning_rate": 1.4611156662534382e-05, - "loss": 0.7191, + "epoch": 0.2990496304118268, + "grad_norm": 1.6949689388275146, + "learning_rate": 1.150711657406094e-05, + "loss": 0.6667, "step": 4248 }, { - "epoch": 0.3215164011955658, - "grad_norm": 2.400247097015381, - "learning_rate": 1.4609395582399249e-05, - "loss": 0.72, + "epoch": 0.2991200281590989, + "grad_norm": 1.9782726764678955, + "learning_rate": 1.1505895225587084e-05, + "loss": 0.729, "step": 4249 }, { - "epoch": 0.32159206991789946, - "grad_norm": 2.0886826515197754, - "learning_rate": 1.4607634150985624e-05, - "loss": 0.7317, + "epoch": 0.299190425906371, + "grad_norm": 1.8376926183700562, + "learning_rate": 1.1504673642847652e-05, + "loss": 0.7957, "step": 4250 }, { - "epoch": 0.32166773864023307, - "grad_norm": 3.8635036945343018, - "learning_rate": 1.460587236840381e-05, - "loss": 0.7681, + "epoch": 0.2992608236536431, + "grad_norm": 1.9028868675231934, + "learning_rate": 1.150345182590616e-05, + "loss": 0.7499, "step": 4251 }, { - "epoch": 0.3217434073625667, - "grad_norm": 2.07661771774292, - "learning_rate": 1.4604110234764138e-05, - "loss": 0.6945, + "epoch": 0.29933122140091517, + "grad_norm": 2.0015385150909424, + "learning_rate": 1.1502229774826129e-05, + "loss": 0.7613, "step": 4252 }, { - "epoch": 0.3218190760849003, - "grad_norm": 2.098093032836914, - "learning_rate": 1.4602347750176957e-05, - "loss": 0.6292, + "epoch": 0.29940161914818725, + "grad_norm": 1.9646450281143188, + "learning_rate": 1.1501007489671093e-05, + "loss": 0.7153, "step": 4253 }, { - "epoch": 0.32189474480723396, - "grad_norm": 2.4790546894073486, - "learning_rate": 1.4600584914752637e-05, - "loss": 0.7044, + "epoch": 0.2994720168954593, + "grad_norm": 1.708627700805664, + "learning_rate": 1.1499784970504602e-05, + "loss": 0.6762, "step": 4254 }, { - "epoch": 0.32197041352956757, - "grad_norm": 2.268695831298828, - "learning_rate": 1.4598821728601579e-05, - "loss": 0.7005, + "epoch": 0.29954241464273146, + "grad_norm": 1.8956190347671509, + "learning_rate": 1.1498562217390216e-05, + "loss": 0.7557, "step": 4255 }, { - "epoch": 0.3220460822519012, - "grad_norm": 2.2798666954040527, - "learning_rate": 1.4597058191834192e-05, - "loss": 0.7742, + "epoch": 0.29961281239000354, + "grad_norm": 2.0169382095336914, + "learning_rate": 1.1497339230391506e-05, + "loss": 0.7313, "step": 4256 }, { - "epoch": 0.3221217509742348, - "grad_norm": 3.1297192573547363, - "learning_rate": 1.4595294304560919e-05, - "loss": 0.7833, + "epoch": 0.2996832101372756, + "grad_norm": 1.7282469272613525, + "learning_rate": 1.1496116009572054e-05, + "loss": 0.7057, "step": 4257 }, { - "epoch": 0.3221974196965684, - "grad_norm": 2.595353841781616, - "learning_rate": 1.4593530066892218e-05, - "loss": 0.7436, + "epoch": 0.2997536078845477, + "grad_norm": 2.005577564239502, + "learning_rate": 1.149489255499546e-05, + "loss": 0.856, "step": 4258 }, { - "epoch": 0.32227308841890207, - "grad_norm": 2.4126555919647217, - "learning_rate": 1.4591765478938577e-05, - "loss": 0.798, + "epoch": 0.29982400563181977, + "grad_norm": 1.8666224479675293, + "learning_rate": 1.1493668866725334e-05, + "loss": 0.7105, "step": 4259 }, { - "epoch": 0.3223487571412357, - "grad_norm": 2.161207675933838, - "learning_rate": 1.4590000540810492e-05, - "loss": 0.6859, + "epoch": 0.29989440337909185, + "grad_norm": 1.6316384077072144, + "learning_rate": 1.149244494482529e-05, + "loss": 0.628, "step": 4260 }, { - "epoch": 0.3224244258635693, - "grad_norm": 2.0117027759552, - "learning_rate": 1.4588235252618494e-05, - "loss": 0.7389, + "epoch": 0.299964801126364, + "grad_norm": 2.8094582557678223, + "learning_rate": 1.1491220789358968e-05, + "loss": 0.7069, "step": 4261 }, { - "epoch": 0.3225000945859029, - "grad_norm": 2.079958915710449, - "learning_rate": 1.458646961447313e-05, - "loss": 0.788, + "epoch": 0.30003519887363606, + "grad_norm": 2.3220767974853516, + "learning_rate": 1.1489996400390007e-05, + "loss": 0.7615, "step": 4262 }, { - "epoch": 0.32257576330823656, - "grad_norm": 1.884169340133667, - "learning_rate": 1.458470362648497e-05, - "loss": 0.8014, + "epoch": 0.30010559662090813, + "grad_norm": 1.971850872039795, + "learning_rate": 1.1488771777982067e-05, + "loss": 0.7097, "step": 4263 }, { - "epoch": 0.3226514320305702, - "grad_norm": 2.48260760307312, - "learning_rate": 1.4582937288764604e-05, - "loss": 0.8323, + "epoch": 0.3001759943681802, + "grad_norm": 1.7892301082611084, + "learning_rate": 1.148754692219882e-05, + "loss": 0.7233, "step": 4264 }, { - "epoch": 0.3227271007529038, - "grad_norm": 2.3636391162872314, - "learning_rate": 1.458117060142265e-05, - "loss": 0.8989, + "epoch": 0.3002463921154523, + "grad_norm": 1.6804109811782837, + "learning_rate": 1.1486321833103942e-05, + "loss": 0.7664, "step": 4265 }, { - "epoch": 0.3228027694752374, - "grad_norm": 1.976210594177246, - "learning_rate": 1.4579403564569741e-05, - "loss": 0.8588, + "epoch": 0.3003167898627244, + "grad_norm": 1.9577311277389526, + "learning_rate": 1.1485096510761128e-05, + "loss": 0.6451, "step": 4266 }, { - "epoch": 0.32287843819757106, - "grad_norm": 8.20315170288086, - "learning_rate": 1.4577636178316533e-05, - "loss": 0.836, + "epoch": 0.3003871876099965, + "grad_norm": 1.7580907344818115, + "learning_rate": 1.1483870955234086e-05, + "loss": 0.6583, "step": 4267 }, { - "epoch": 0.32295410691990467, - "grad_norm": 3.0736913681030273, - "learning_rate": 1.4575868442773708e-05, - "loss": 0.8805, + "epoch": 0.3004575853572686, + "grad_norm": 1.8449066877365112, + "learning_rate": 1.1482645166586531e-05, + "loss": 0.7346, "step": 4268 }, { - "epoch": 0.3230297756422383, - "grad_norm": 2.023252010345459, - "learning_rate": 1.4574100358051967e-05, - "loss": 0.6687, + "epoch": 0.30052798310454065, + "grad_norm": 1.741262435913086, + "learning_rate": 1.1481419144882194e-05, + "loss": 0.6865, "step": 4269 }, { - "epoch": 0.3231054443645719, - "grad_norm": 2.3063673973083496, - "learning_rate": 1.4572331924262033e-05, - "loss": 0.7783, + "epoch": 0.30059838085181273, + "grad_norm": 1.6650372743606567, + "learning_rate": 1.148019289018482e-05, + "loss": 0.6263, "step": 4270 }, { - "epoch": 0.3231811130869055, - "grad_norm": 3.034928321838379, - "learning_rate": 1.4570563141514651e-05, - "loss": 0.9137, + "epoch": 0.3006687785990848, + "grad_norm": 1.4766508340835571, + "learning_rate": 1.1478966402558158e-05, + "loss": 0.7805, "step": 4271 }, { - "epoch": 0.32325678180923917, - "grad_norm": 3.5648000240325928, - "learning_rate": 1.4568794009920588e-05, - "loss": 0.723, + "epoch": 0.30073917634635694, + "grad_norm": 1.7098069190979004, + "learning_rate": 1.1477739682065976e-05, + "loss": 0.7468, "step": 4272 }, { - "epoch": 0.3233324505315728, - "grad_norm": 2.6870532035827637, - "learning_rate": 1.456702452959063e-05, - "loss": 0.8169, + "epoch": 0.300809574093629, + "grad_norm": 2.083409547805786, + "learning_rate": 1.1476512728772052e-05, + "loss": 0.7059, "step": 4273 }, { - "epoch": 0.3234081192539064, - "grad_norm": 2.3009469509124756, - "learning_rate": 1.4565254700635593e-05, - "loss": 0.751, + "epoch": 0.3008799718409011, + "grad_norm": 1.970391035079956, + "learning_rate": 1.1475285542740178e-05, + "loss": 0.7215, "step": 4274 }, { - "epoch": 0.32348378797624, - "grad_norm": 1.9499804973602295, - "learning_rate": 1.4563484523166307e-05, - "loss": 0.6896, + "epoch": 0.30095036958817317, + "grad_norm": 1.8966392278671265, + "learning_rate": 1.1474058124034154e-05, + "loss": 0.6677, "step": 4275 }, { - "epoch": 0.32355945669857367, - "grad_norm": 2.177910804748535, - "learning_rate": 1.4561713997293621e-05, - "loss": 0.6619, + "epoch": 0.30102076733544525, + "grad_norm": 1.7939531803131104, + "learning_rate": 1.1472830472717799e-05, + "loss": 0.6206, "step": 4276 }, { - "epoch": 0.3236351254209073, - "grad_norm": 2.637498378753662, - "learning_rate": 1.4559943123128418e-05, - "loss": 0.8201, + "epoch": 0.3010911650827173, + "grad_norm": 2.028278350830078, + "learning_rate": 1.1471602588854936e-05, + "loss": 0.7488, "step": 4277 }, { - "epoch": 0.3237107941432409, - "grad_norm": 2.287648916244507, - "learning_rate": 1.4558171900781594e-05, - "loss": 0.8366, + "epoch": 0.30116156282998946, + "grad_norm": 2.0844414234161377, + "learning_rate": 1.1470374472509405e-05, + "loss": 0.7145, "step": 4278 }, { - "epoch": 0.3237864628655745, - "grad_norm": 2.5055220127105713, - "learning_rate": 1.455640033036407e-05, - "loss": 0.7382, + "epoch": 0.30123196057726154, + "grad_norm": 2.0381007194519043, + "learning_rate": 1.1469146123745055e-05, + "loss": 0.7553, "step": 4279 }, { - "epoch": 0.32386213158790816, - "grad_norm": 2.373974323272705, - "learning_rate": 1.4554628411986783e-05, - "loss": 0.8601, + "epoch": 0.3013023583245336, + "grad_norm": 1.7207715511322021, + "learning_rate": 1.1467917542625753e-05, + "loss": 0.7665, "step": 4280 }, { - "epoch": 0.3239378003102418, - "grad_norm": 2.571704387664795, - "learning_rate": 1.45528561457607e-05, - "loss": 0.7664, + "epoch": 0.3013727560718057, + "grad_norm": 1.938336730003357, + "learning_rate": 1.1466688729215369e-05, + "loss": 0.6937, "step": 4281 }, { - "epoch": 0.3240134690325754, - "grad_norm": 2.084261655807495, - "learning_rate": 1.4551083531796807e-05, - "loss": 0.8355, + "epoch": 0.30144315381907777, + "grad_norm": 1.9975801706314087, + "learning_rate": 1.1465459683577794e-05, + "loss": 0.661, "step": 4282 }, { - "epoch": 0.324089137754909, - "grad_norm": 13.960339546203613, - "learning_rate": 1.4549310570206106e-05, - "loss": 0.6196, + "epoch": 0.3015135515663499, + "grad_norm": 1.7579379081726074, + "learning_rate": 1.1464230405776925e-05, + "loss": 0.6149, "step": 4283 }, { - "epoch": 0.3241648064772426, - "grad_norm": 2.3920133113861084, - "learning_rate": 1.454753726109963e-05, - "loss": 0.718, + "epoch": 0.301583949313622, + "grad_norm": 1.633048415184021, + "learning_rate": 1.1463000895876675e-05, + "loss": 0.7851, "step": 4284 }, { - "epoch": 0.3242404751995763, - "grad_norm": 4.138522148132324, - "learning_rate": 1.4545763604588427e-05, - "loss": 0.6937, + "epoch": 0.30165434706089406, + "grad_norm": 1.9303456544876099, + "learning_rate": 1.1461771153940967e-05, + "loss": 0.8619, "step": 4285 }, { - "epoch": 0.3243161439219099, - "grad_norm": 2.481703281402588, - "learning_rate": 1.454398960078357e-05, - "loss": 0.7318, + "epoch": 0.30172474480816613, + "grad_norm": 1.7720756530761719, + "learning_rate": 1.1460541180033734e-05, + "loss": 0.7095, "step": 4286 }, { - "epoch": 0.3243918126442435, - "grad_norm": 2.211909532546997, - "learning_rate": 1.4542215249796151e-05, - "loss": 0.6617, + "epoch": 0.3017951425554382, + "grad_norm": 1.850172996520996, + "learning_rate": 1.1459310974218927e-05, + "loss": 0.7403, "step": 4287 }, { - "epoch": 0.3244674813665771, - "grad_norm": 2.126004457473755, - "learning_rate": 1.454044055173729e-05, - "loss": 0.6787, + "epoch": 0.3018655403027103, + "grad_norm": 1.7154278755187988, + "learning_rate": 1.1458080536560505e-05, + "loss": 0.7715, "step": 4288 }, { - "epoch": 0.32454315008891077, - "grad_norm": 2.684769630432129, - "learning_rate": 1.4538665506718119e-05, - "loss": 0.6737, + "epoch": 0.3019359380499824, + "grad_norm": 1.7996597290039062, + "learning_rate": 1.1456849867122435e-05, + "loss": 0.8074, "step": 4289 }, { - "epoch": 0.3246188188112444, - "grad_norm": 1.9614814519882202, - "learning_rate": 1.4536890114849804e-05, - "loss": 0.6616, + "epoch": 0.3020063357972545, + "grad_norm": 1.8695168495178223, + "learning_rate": 1.1455618965968706e-05, + "loss": 0.721, "step": 4290 }, { - "epoch": 0.324694487533578, - "grad_norm": 2.360924482345581, - "learning_rate": 1.4535114376243518e-05, - "loss": 0.7476, + "epoch": 0.3020767335445266, + "grad_norm": 2.522082567214966, + "learning_rate": 1.1454387833163312e-05, + "loss": 0.6756, "step": 4291 }, { - "epoch": 0.3247701562559116, - "grad_norm": 2.6671664714813232, - "learning_rate": 1.4533338291010469e-05, - "loss": 0.745, + "epoch": 0.30214713129179865, + "grad_norm": 1.7567757368087769, + "learning_rate": 1.145315646877026e-05, + "loss": 0.8454, "step": 4292 }, { - "epoch": 0.32484582497824527, - "grad_norm": 3.544961929321289, - "learning_rate": 1.453156185926188e-05, - "loss": 0.7626, + "epoch": 0.30221752903907073, + "grad_norm": 1.7004073858261108, + "learning_rate": 1.1451924872853571e-05, + "loss": 0.8564, "step": 4293 }, { - "epoch": 0.3249214937005789, - "grad_norm": 1.3772363662719727, - "learning_rate": 1.4529785081108993e-05, - "loss": 0.8338, + "epoch": 0.30228792678634286, + "grad_norm": 2.590108633041382, + "learning_rate": 1.1450693045477273e-05, + "loss": 0.7542, "step": 4294 }, { - "epoch": 0.3249971624229125, - "grad_norm": 2.098928213119507, - "learning_rate": 1.4528007956663081e-05, - "loss": 0.8097, + "epoch": 0.30235832453361494, + "grad_norm": 1.9655492305755615, + "learning_rate": 1.1449460986705416e-05, + "loss": 0.8674, "step": 4295 }, { - "epoch": 0.3250728311452461, - "grad_norm": 2.549992084503174, - "learning_rate": 1.452623048603543e-05, - "loss": 0.8661, + "epoch": 0.302428722280887, + "grad_norm": 1.7174245119094849, + "learning_rate": 1.144822869660205e-05, + "loss": 0.6721, "step": 4296 }, { - "epoch": 0.3251484998675797, - "grad_norm": 2.1404833793640137, - "learning_rate": 1.4524452669337353e-05, - "loss": 0.6822, + "epoch": 0.3024991200281591, + "grad_norm": 1.8002259731292725, + "learning_rate": 1.1446996175231249e-05, + "loss": 0.8115, "step": 4297 }, { - "epoch": 0.3252241685899134, - "grad_norm": 2.1892287731170654, - "learning_rate": 1.452267450668018e-05, - "loss": 0.7878, + "epoch": 0.3025695177754312, + "grad_norm": 1.649996280670166, + "learning_rate": 1.1445763422657088e-05, + "loss": 0.6907, "step": 4298 }, { - "epoch": 0.325299837312247, - "grad_norm": 2.762024164199829, - "learning_rate": 1.4520895998175267e-05, - "loss": 0.8672, + "epoch": 0.30263991552270325, + "grad_norm": 1.7571666240692139, + "learning_rate": 1.144453043894366e-05, + "loss": 0.7523, "step": 4299 }, { - "epoch": 0.3253755060345806, - "grad_norm": 2.249630928039551, - "learning_rate": 1.451911714393399e-05, - "loss": 0.7267, + "epoch": 0.3027103132699754, + "grad_norm": 2.158149003982544, + "learning_rate": 1.1443297224155068e-05, + "loss": 0.7329, "step": 4300 }, { - "epoch": 0.3254511747569142, - "grad_norm": 2.6956732273101807, - "learning_rate": 1.451733794406775e-05, - "loss": 0.7433, + "epoch": 0.30278071101724746, + "grad_norm": 1.8641117811203003, + "learning_rate": 1.1442063778355432e-05, + "loss": 0.6249, "step": 4301 }, { - "epoch": 0.3255268434792479, - "grad_norm": 1.908390760421753, - "learning_rate": 1.4515558398687958e-05, - "loss": 0.838, + "epoch": 0.30285110876451954, + "grad_norm": 1.801475167274475, + "learning_rate": 1.1440830101608874e-05, + "loss": 0.8757, "step": 4302 }, { - "epoch": 0.3256025122015815, - "grad_norm": 1.9399058818817139, - "learning_rate": 1.4513778507906063e-05, - "loss": 0.7207, + "epoch": 0.3029215065117916, + "grad_norm": 2.071516990661621, + "learning_rate": 1.143959619397954e-05, + "loss": 0.7191, "step": 4303 }, { - "epoch": 0.3256781809239151, - "grad_norm": 2.5229694843292236, - "learning_rate": 1.4511998271833522e-05, - "loss": 0.8095, + "epoch": 0.3029919042590637, + "grad_norm": 1.8381575345993042, + "learning_rate": 1.1438362055531576e-05, + "loss": 0.6116, "step": 4304 }, { - "epoch": 0.3257538496462487, - "grad_norm": 1.9527181386947632, - "learning_rate": 1.4510217690581824e-05, - "loss": 0.6514, + "epoch": 0.30306230200633577, + "grad_norm": 1.7437951564788818, + "learning_rate": 1.1437127686329152e-05, + "loss": 0.7939, "step": 4305 }, { - "epoch": 0.32582951836858237, - "grad_norm": 2.4306392669677734, - "learning_rate": 1.4508436764262467e-05, - "loss": 0.7142, + "epoch": 0.3031326997536079, + "grad_norm": 1.7240639925003052, + "learning_rate": 1.143589308643644e-05, + "loss": 0.711, "step": 4306 }, { - "epoch": 0.325905187090916, - "grad_norm": 2.332416534423828, - "learning_rate": 1.4506655492986985e-05, - "loss": 0.6613, + "epoch": 0.30320309750088, + "grad_norm": 1.564963936805725, + "learning_rate": 1.1434658255917628e-05, + "loss": 0.8158, "step": 4307 }, { - "epoch": 0.3259808558132496, - "grad_norm": 1.7992033958435059, - "learning_rate": 1.4504873876866928e-05, - "loss": 0.6745, + "epoch": 0.30327349524815206, + "grad_norm": 1.801493525505066, + "learning_rate": 1.1433423194836918e-05, + "loss": 0.7385, "step": 4308 }, { - "epoch": 0.3260565245355832, - "grad_norm": 2.053755283355713, - "learning_rate": 1.4503091916013861e-05, - "loss": 0.8298, + "epoch": 0.30334389299542414, + "grad_norm": 1.7981945276260376, + "learning_rate": 1.1432187903258521e-05, + "loss": 0.6779, "step": 4309 }, { - "epoch": 0.3261321932579168, - "grad_norm": 2.6498591899871826, - "learning_rate": 1.4501309610539382e-05, - "loss": 0.7602, + "epoch": 0.3034142907426962, + "grad_norm": 1.9064526557922363, + "learning_rate": 1.143095238124666e-05, + "loss": 0.6856, "step": 4310 }, { - "epoch": 0.3262078619802505, - "grad_norm": 2.5992958545684814, - "learning_rate": 1.44995269605551e-05, - "loss": 0.7103, + "epoch": 0.30348468848996835, + "grad_norm": 1.7649400234222412, + "learning_rate": 1.1429716628865573e-05, + "loss": 0.6884, "step": 4311 }, { - "epoch": 0.3262835307025841, - "grad_norm": 2.5793347358703613, - "learning_rate": 1.4497743966172652e-05, - "loss": 0.6369, + "epoch": 0.3035550862372404, + "grad_norm": 1.8437376022338867, + "learning_rate": 1.1428480646179507e-05, + "loss": 0.682, "step": 4312 }, { - "epoch": 0.3263591994249177, - "grad_norm": 2.428252696990967, - "learning_rate": 1.4495960627503695e-05, - "loss": 0.7442, + "epoch": 0.3036254839845125, + "grad_norm": 1.8373726606369019, + "learning_rate": 1.1427244433252721e-05, + "loss": 0.7228, "step": 4313 }, { - "epoch": 0.3264348681472513, - "grad_norm": 2.169275999069214, - "learning_rate": 1.449417694465991e-05, - "loss": 0.6564, + "epoch": 0.3036958817317846, + "grad_norm": 1.9337040185928345, + "learning_rate": 1.1426007990149489e-05, + "loss": 0.7465, "step": 4314 }, { - "epoch": 0.326510536869585, - "grad_norm": 2.4342880249023438, - "learning_rate": 1.449239291775299e-05, - "loss": 0.7119, + "epoch": 0.30376627947905666, + "grad_norm": 2.076680898666382, + "learning_rate": 1.142477131693409e-05, + "loss": 0.6002, "step": 4315 }, { - "epoch": 0.3265862055919186, - "grad_norm": 2.3536922931671143, - "learning_rate": 1.4490608546894663e-05, - "loss": 0.7913, + "epoch": 0.30383667722632873, + "grad_norm": 1.6452783346176147, + "learning_rate": 1.1423534413670823e-05, + "loss": 0.6466, "step": 4316 }, { - "epoch": 0.3266618743142522, - "grad_norm": 2.1272964477539062, - "learning_rate": 1.4488823832196671e-05, - "loss": 0.8102, + "epoch": 0.30390707497360087, + "grad_norm": 2.4098012447357178, + "learning_rate": 1.1422297280423997e-05, + "loss": 0.609, "step": 4317 }, { - "epoch": 0.3267375430365858, - "grad_norm": 2.338432550430298, - "learning_rate": 1.4487038773770778e-05, - "loss": 0.7231, + "epoch": 0.30397747272087294, + "grad_norm": 2.0799379348754883, + "learning_rate": 1.1421059917257933e-05, + "loss": 0.7568, "step": 4318 }, { - "epoch": 0.3268132117589195, - "grad_norm": 2.033783197402954, - "learning_rate": 1.4485253371728769e-05, - "loss": 0.7513, + "epoch": 0.304047870468145, + "grad_norm": 1.8928236961364746, + "learning_rate": 1.1419822324236954e-05, + "loss": 0.745, "step": 4319 }, { - "epoch": 0.3268888804812531, - "grad_norm": 2.384861946105957, - "learning_rate": 1.448346762618245e-05, - "loss": 0.6621, + "epoch": 0.3041182682154171, + "grad_norm": 1.962492823600769, + "learning_rate": 1.1418584501425414e-05, + "loss": 0.6508, "step": 4320 }, { - "epoch": 0.3269645492035867, - "grad_norm": 2.846778631210327, - "learning_rate": 1.4481681537243652e-05, - "loss": 0.739, + "epoch": 0.3041886659626892, + "grad_norm": 2.0172646045684814, + "learning_rate": 1.1417346448887664e-05, + "loss": 0.7898, "step": 4321 }, { - "epoch": 0.3270402179259203, - "grad_norm": 2.581815242767334, - "learning_rate": 1.447989510502423e-05, - "loss": 0.6875, + "epoch": 0.3042590637099613, + "grad_norm": 1.7276333570480347, + "learning_rate": 1.141610816668807e-05, + "loss": 0.7952, "step": 4322 }, { - "epoch": 0.3271158866482539, - "grad_norm": 2.7722952365875244, - "learning_rate": 1.4478108329636053e-05, - "loss": 0.7614, + "epoch": 0.3043294614572334, + "grad_norm": 2.5904393196105957, + "learning_rate": 1.1414869654891013e-05, + "loss": 0.756, "step": 4323 }, { - "epoch": 0.3271915553705876, - "grad_norm": 1.9879074096679688, - "learning_rate": 1.4476321211191012e-05, - "loss": 0.8599, + "epoch": 0.30439985920450546, + "grad_norm": 1.8802542686462402, + "learning_rate": 1.1413630913560886e-05, + "loss": 0.7895, "step": 4324 }, { - "epoch": 0.3272672240929212, - "grad_norm": 2.1524455547332764, - "learning_rate": 1.4474533749801024e-05, - "loss": 0.7467, + "epoch": 0.30447025695177754, + "grad_norm": 1.5468106269836426, + "learning_rate": 1.1412391942762087e-05, + "loss": 0.6655, "step": 4325 }, { - "epoch": 0.3273428928152548, - "grad_norm": 1.877977728843689, - "learning_rate": 1.4472745945578023e-05, - "loss": 0.7021, + "epoch": 0.3045406546990496, + "grad_norm": 1.8192750215530396, + "learning_rate": 1.1411152742559038e-05, + "loss": 0.7719, "step": 4326 }, { - "epoch": 0.3274185615375884, - "grad_norm": 2.359576940536499, - "learning_rate": 1.4470957798633974e-05, - "loss": 0.8945, + "epoch": 0.3046110524463217, + "grad_norm": 1.7130701541900635, + "learning_rate": 1.1409913313016162e-05, + "loss": 0.7601, "step": 4327 }, { - "epoch": 0.3274942302599221, - "grad_norm": 2.371588706970215, - "learning_rate": 1.4469169309080853e-05, - "loss": 0.8712, + "epoch": 0.30468145019359383, + "grad_norm": 1.583688497543335, + "learning_rate": 1.1408673654197898e-05, + "loss": 0.7326, "step": 4328 }, { - "epoch": 0.3275698989822557, - "grad_norm": 2.2514874935150146, - "learning_rate": 1.4467380477030658e-05, - "loss": 0.645, + "epoch": 0.3047518479408659, + "grad_norm": 1.796613097190857, + "learning_rate": 1.14074337661687e-05, + "loss": 0.7714, "step": 4329 }, { - "epoch": 0.3276455677045893, - "grad_norm": 1.858174204826355, - "learning_rate": 1.4465591302595415e-05, - "loss": 0.8199, + "epoch": 0.304822245688138, + "grad_norm": 1.7656389474868774, + "learning_rate": 1.1406193648993026e-05, + "loss": 0.6384, "step": 4330 }, { - "epoch": 0.3277212364269229, - "grad_norm": 2.374924421310425, - "learning_rate": 1.4463801785887165e-05, - "loss": 0.8076, + "epoch": 0.30489264343541006, + "grad_norm": 2.1797616481781006, + "learning_rate": 1.1404953302735357e-05, + "loss": 0.6813, "step": 4331 }, { - "epoch": 0.3277969051492566, - "grad_norm": 2.052088737487793, - "learning_rate": 1.4462011927017977e-05, - "loss": 0.7276, + "epoch": 0.30496304118268214, + "grad_norm": 1.7639738321304321, + "learning_rate": 1.1403712727460174e-05, + "loss": 0.6994, "step": 4332 }, { - "epoch": 0.3278725738715902, - "grad_norm": 2.3067727088928223, - "learning_rate": 1.4460221726099936e-05, - "loss": 0.7695, + "epoch": 0.3050334389299542, + "grad_norm": 2.5791015625, + "learning_rate": 1.1402471923231979e-05, + "loss": 0.6859, "step": 4333 }, { - "epoch": 0.3279482425939238, - "grad_norm": 2.285053014755249, - "learning_rate": 1.445843118324515e-05, - "loss": 0.8209, + "epoch": 0.30510383667722635, + "grad_norm": 2.266111373901367, + "learning_rate": 1.1401230890115281e-05, + "loss": 0.7917, "step": 4334 }, { - "epoch": 0.3280239113162574, - "grad_norm": 2.512913703918457, - "learning_rate": 1.4456640298565749e-05, - "loss": 0.7486, + "epoch": 0.3051742344244984, + "grad_norm": 1.8612096309661865, + "learning_rate": 1.1399989628174605e-05, + "loss": 0.6277, "step": 4335 }, { - "epoch": 0.328099580038591, - "grad_norm": 2.5502045154571533, - "learning_rate": 1.4454849072173882e-05, - "loss": 0.8489, + "epoch": 0.3052446321717705, + "grad_norm": 1.6100518703460693, + "learning_rate": 1.139874813747448e-05, + "loss": 0.6793, "step": 4336 }, { - "epoch": 0.3281752487609247, - "grad_norm": 1.8892613649368286, - "learning_rate": 1.4453057504181723e-05, - "loss": 0.838, + "epoch": 0.3053150299190426, + "grad_norm": 1.6753222942352295, + "learning_rate": 1.139750641807946e-05, + "loss": 0.6409, "step": 4337 }, { - "epoch": 0.3282509174832583, - "grad_norm": 2.4267325401306152, - "learning_rate": 1.4451265594701467e-05, - "loss": 0.8521, + "epoch": 0.30538542766631466, + "grad_norm": 1.960266351699829, + "learning_rate": 1.1396264470054096e-05, + "loss": 0.7386, "step": 4338 }, { - "epoch": 0.3283265862055919, - "grad_norm": 2.127098321914673, - "learning_rate": 1.4449473343845326e-05, - "loss": 0.8956, + "epoch": 0.3054558254135868, + "grad_norm": 1.9386405944824219, + "learning_rate": 1.1395022293462962e-05, + "loss": 0.8227, "step": 4339 }, { - "epoch": 0.3284022549279255, - "grad_norm": 1.7218542098999023, - "learning_rate": 1.444768075172554e-05, - "loss": 0.8289, + "epoch": 0.30552622316085887, + "grad_norm": 2.245473861694336, + "learning_rate": 1.1393779888370637e-05, + "loss": 0.6942, "step": 4340 }, { - "epoch": 0.3284779236502592, - "grad_norm": 2.7613525390625, - "learning_rate": 1.4445887818454365e-05, - "loss": 0.8301, + "epoch": 0.30559662090813094, + "grad_norm": 3.9977176189422607, + "learning_rate": 1.1392537254841717e-05, + "loss": 0.7536, "step": 4341 }, { - "epoch": 0.3285535923725928, - "grad_norm": 2.5535645484924316, - "learning_rate": 1.4444094544144084e-05, - "loss": 0.7195, + "epoch": 0.305667018655403, + "grad_norm": 1.6711347103118896, + "learning_rate": 1.1391294392940806e-05, + "loss": 0.7296, "step": 4342 }, { - "epoch": 0.3286292610949264, - "grad_norm": 2.61027193069458, - "learning_rate": 1.4442300928906988e-05, - "loss": 0.8215, + "epoch": 0.3057374164026751, + "grad_norm": 2.239342451095581, + "learning_rate": 1.1390051302732524e-05, + "loss": 0.7502, "step": 4343 }, { - "epoch": 0.32870492981726, - "grad_norm": 2.108738899230957, - "learning_rate": 1.4440506972855407e-05, - "loss": 0.6972, + "epoch": 0.3058078141499472, + "grad_norm": 1.651098608970642, + "learning_rate": 1.1388807984281498e-05, + "loss": 0.628, "step": 4344 }, { - "epoch": 0.3287805985395937, - "grad_norm": 2.198636054992676, - "learning_rate": 1.4438712676101686e-05, - "loss": 0.8218, + "epoch": 0.3058782118972193, + "grad_norm": 1.8950421810150146, + "learning_rate": 1.1387564437652372e-05, + "loss": 0.7798, "step": 4345 }, { - "epoch": 0.3288562672619273, - "grad_norm": 2.278756618499756, - "learning_rate": 1.4436918038758184e-05, - "loss": 0.752, + "epoch": 0.3059486096444914, + "grad_norm": 1.8111618757247925, + "learning_rate": 1.1386320662909795e-05, + "loss": 0.671, "step": 4346 }, { - "epoch": 0.3289319359842609, - "grad_norm": 2.669025421142578, - "learning_rate": 1.4435123060937291e-05, - "loss": 0.8767, + "epoch": 0.30601900739176346, + "grad_norm": 1.6698771715164185, + "learning_rate": 1.1385076660118436e-05, + "loss": 0.649, "step": 4347 }, { - "epoch": 0.3290076047065945, - "grad_norm": 2.104459047317505, - "learning_rate": 1.443332774275141e-05, - "loss": 0.735, + "epoch": 0.30608940513903554, + "grad_norm": 1.5967646837234497, + "learning_rate": 1.1383832429342968e-05, + "loss": 0.8678, "step": 4348 }, { - "epoch": 0.3290832734289282, - "grad_norm": 2.096700429916382, - "learning_rate": 1.4431532084312973e-05, - "loss": 0.7188, + "epoch": 0.3061598028863076, + "grad_norm": 1.6762512922286987, + "learning_rate": 1.1382587970648083e-05, + "loss": 0.7539, "step": 4349 }, { - "epoch": 0.3291589421512618, - "grad_norm": 2.3591086864471436, - "learning_rate": 1.4429736085734429e-05, - "loss": 0.7212, + "epoch": 0.30623020063357975, + "grad_norm": 2.094326972961426, + "learning_rate": 1.1381343284098482e-05, + "loss": 0.7553, "step": 4350 }, { - "epoch": 0.3292346108735954, - "grad_norm": 2.4446537494659424, - "learning_rate": 1.4427939747128252e-05, - "loss": 0.7998, + "epoch": 0.30630059838085183, + "grad_norm": 1.8420369625091553, + "learning_rate": 1.1380098369758873e-05, + "loss": 0.7693, "step": 4351 }, { - "epoch": 0.329310279595929, - "grad_norm": 1.968955397605896, - "learning_rate": 1.442614306860693e-05, - "loss": 0.7152, + "epoch": 0.3063709961281239, + "grad_norm": 2.479412794113159, + "learning_rate": 1.1378853227693985e-05, + "loss": 0.6938, "step": 4352 }, { - "epoch": 0.3293859483182626, - "grad_norm": 2.0163991451263428, - "learning_rate": 1.4424346050282977e-05, - "loss": 0.7869, + "epoch": 0.306441393875396, + "grad_norm": 1.6419183015823364, + "learning_rate": 1.137760785796855e-05, + "loss": 0.6573, "step": 4353 }, { - "epoch": 0.3294616170405963, - "grad_norm": 2.4209890365600586, - "learning_rate": 1.4422548692268934e-05, - "loss": 0.8442, + "epoch": 0.30651179162266806, + "grad_norm": 1.6391011476516724, + "learning_rate": 1.1376362260647318e-05, + "loss": 0.6131, "step": 4354 }, { - "epoch": 0.3295372857629299, - "grad_norm": 2.772582530975342, - "learning_rate": 1.442075099467735e-05, - "loss": 0.8652, + "epoch": 0.30658218936994014, + "grad_norm": 1.8573042154312134, + "learning_rate": 1.137511643579505e-05, + "loss": 0.7261, "step": 4355 }, { - "epoch": 0.3296129544852635, - "grad_norm": 2.462894916534424, - "learning_rate": 1.4418952957620806e-05, - "loss": 0.659, + "epoch": 0.30665258711721227, + "grad_norm": 1.9858543872833252, + "learning_rate": 1.1373870383476514e-05, + "loss": 0.774, "step": 4356 }, { - "epoch": 0.3296886232075971, - "grad_norm": 2.011859893798828, - "learning_rate": 1.4417154581211901e-05, - "loss": 0.7464, + "epoch": 0.30672298486448435, + "grad_norm": 1.6675422191619873, + "learning_rate": 1.1372624103756497e-05, + "loss": 0.6758, "step": 4357 }, { - "epoch": 0.3297642919299308, - "grad_norm": 1.890229344367981, - "learning_rate": 1.4415355865563254e-05, - "loss": 0.8068, + "epoch": 0.3067933826117564, + "grad_norm": 1.7947354316711426, + "learning_rate": 1.1371377596699793e-05, + "loss": 0.7123, "step": 4358 }, { - "epoch": 0.3298399606522644, - "grad_norm": 2.171454906463623, - "learning_rate": 1.441355681078751e-05, - "loss": 0.7567, + "epoch": 0.3068637803590285, + "grad_norm": 2.1095118522644043, + "learning_rate": 1.137013086237121e-05, + "loss": 0.6894, "step": 4359 }, { - "epoch": 0.329915629374598, - "grad_norm": 2.6762516498565674, - "learning_rate": 1.4411757416997329e-05, - "loss": 0.6791, + "epoch": 0.3069341781063006, + "grad_norm": 2.0683975219726562, + "learning_rate": 1.1368883900835563e-05, + "loss": 0.695, "step": 4360 }, { - "epoch": 0.3299912980969316, - "grad_norm": 2.497652530670166, - "learning_rate": 1.4409957684305392e-05, - "loss": 0.8792, + "epoch": 0.30700457585357266, + "grad_norm": 2.146839141845703, + "learning_rate": 1.1367636712157685e-05, + "loss": 0.7407, "step": 4361 }, { - "epoch": 0.3300669668192653, - "grad_norm": 3.6293368339538574, - "learning_rate": 1.440815761282441e-05, - "loss": 0.7134, + "epoch": 0.3070749736008448, + "grad_norm": 2.200083017349243, + "learning_rate": 1.1366389296402422e-05, + "loss": 0.6493, "step": 4362 }, { - "epoch": 0.3301426355415989, - "grad_norm": 2.222104787826538, - "learning_rate": 1.4406357202667102e-05, - "loss": 0.7128, + "epoch": 0.30714537134811687, + "grad_norm": 2.5257480144500732, + "learning_rate": 1.1365141653634625e-05, + "loss": 0.823, "step": 4363 }, { - "epoch": 0.3302183042639325, - "grad_norm": 2.209268093109131, - "learning_rate": 1.4404556453946224e-05, - "loss": 0.8376, + "epoch": 0.30721576909538895, + "grad_norm": 1.761011004447937, + "learning_rate": 1.1363893783919158e-05, + "loss": 0.7082, "step": 4364 }, { - "epoch": 0.3302939729862661, - "grad_norm": 1.9097639322280884, - "learning_rate": 1.440275536677454e-05, - "loss": 0.8323, + "epoch": 0.307286166842661, + "grad_norm": 1.9247416257858276, + "learning_rate": 1.13626456873209e-05, + "loss": 0.664, "step": 4365 }, { - "epoch": 0.3303696417085997, - "grad_norm": 2.237016201019287, - "learning_rate": 1.4400953941264837e-05, - "loss": 0.7609, + "epoch": 0.3073565645899331, + "grad_norm": 2.2318787574768066, + "learning_rate": 1.1361397363904744e-05, + "loss": 0.7351, "step": 4366 }, { - "epoch": 0.3304453104309334, - "grad_norm": 1.9410020112991333, - "learning_rate": 1.4399152177529932e-05, - "loss": 0.6425, + "epoch": 0.30742696233720523, + "grad_norm": 2.5091090202331543, + "learning_rate": 1.1360148813735589e-05, + "loss": 0.731, "step": 4367 }, { - "epoch": 0.330520979153267, - "grad_norm": 2.629134178161621, - "learning_rate": 1.4397350075682652e-05, - "loss": 0.5823, + "epoch": 0.3074973600844773, + "grad_norm": 1.7051278352737427, + "learning_rate": 1.1358900036878346e-05, + "loss": 0.6555, "step": 4368 }, { - "epoch": 0.3305966478756006, - "grad_norm": 2.269423246383667, - "learning_rate": 1.4395547635835856e-05, - "loss": 0.6952, + "epoch": 0.3075677578317494, + "grad_norm": 1.8821067810058594, + "learning_rate": 1.1357651033397946e-05, + "loss": 0.6845, "step": 4369 }, { - "epoch": 0.3306723165979342, - "grad_norm": 3.469186544418335, - "learning_rate": 1.4393744858102417e-05, - "loss": 0.7122, + "epoch": 0.30763815557902147, + "grad_norm": 2.4951367378234863, + "learning_rate": 1.1356401803359318e-05, + "loss": 0.7266, "step": 4370 }, { - "epoch": 0.3307479853202679, - "grad_norm": 2.325718879699707, - "learning_rate": 1.4391941742595224e-05, - "loss": 0.7564, + "epoch": 0.30770855332629354, + "grad_norm": 2.028353691101074, + "learning_rate": 1.1355152346827418e-05, + "loss": 0.7387, "step": 4371 }, { - "epoch": 0.3308236540426015, - "grad_norm": 2.014582872390747, - "learning_rate": 1.4390138289427204e-05, - "loss": 0.7626, + "epoch": 0.3077789510735656, + "grad_norm": 1.8838082551956177, + "learning_rate": 1.1353902663867202e-05, + "loss": 0.7879, "step": 4372 }, { - "epoch": 0.3308993227649351, - "grad_norm": 2.6732168197631836, - "learning_rate": 1.438833449871129e-05, - "loss": 0.7594, + "epoch": 0.30784934882083775, + "grad_norm": 1.768048882484436, + "learning_rate": 1.1352652754543644e-05, + "loss": 0.5802, "step": 4373 }, { - "epoch": 0.3309749914872687, - "grad_norm": 1.793832778930664, - "learning_rate": 1.4386530370560439e-05, - "loss": 0.7993, + "epoch": 0.30791974656810983, + "grad_norm": 1.8610402345657349, + "learning_rate": 1.1351402618921728e-05, + "loss": 0.7722, "step": 4374 }, { - "epoch": 0.3310506602096024, - "grad_norm": 2.3724443912506104, - "learning_rate": 1.4384725905087638e-05, - "loss": 0.8349, + "epoch": 0.3079901443153819, + "grad_norm": 1.6052199602127075, + "learning_rate": 1.1350152257066446e-05, + "loss": 0.6974, "step": 4375 }, { - "epoch": 0.331126328931936, - "grad_norm": 2.2450218200683594, - "learning_rate": 1.4382921102405882e-05, - "loss": 0.7646, + "epoch": 0.308060542062654, + "grad_norm": 2.05703067779541, + "learning_rate": 1.134890166904281e-05, + "loss": 0.7872, "step": 4376 }, { - "epoch": 0.3312019976542696, - "grad_norm": 2.3006644248962402, - "learning_rate": 1.4381115962628197e-05, - "loss": 0.7745, + "epoch": 0.30813093980992606, + "grad_norm": 1.7768021821975708, + "learning_rate": 1.134765085491584e-05, + "loss": 0.7272, "step": 4377 }, { - "epoch": 0.3312776663766032, - "grad_norm": 2.5185351371765137, - "learning_rate": 1.4379310485867626e-05, - "loss": 0.7865, + "epoch": 0.3082013375571982, + "grad_norm": 2.476152181625366, + "learning_rate": 1.134639981475056e-05, + "loss": 0.6679, "step": 4378 }, { - "epoch": 0.33135333509893683, - "grad_norm": 3.231492280960083, - "learning_rate": 1.4377504672237231e-05, - "loss": 0.7233, + "epoch": 0.3082717353044703, + "grad_norm": 2.0078017711639404, + "learning_rate": 1.1345148548612022e-05, + "loss": 0.7534, "step": 4379 }, { - "epoch": 0.3314290038212705, - "grad_norm": 3.705059766769409, - "learning_rate": 1.4375698521850104e-05, - "loss": 0.8611, + "epoch": 0.30834213305174235, + "grad_norm": 1.8455250263214111, + "learning_rate": 1.1343897056565274e-05, + "loss": 0.7022, "step": 4380 }, { - "epoch": 0.3315046725436041, - "grad_norm": 2.2593576908111572, - "learning_rate": 1.4373892034819347e-05, - "loss": 0.7857, + "epoch": 0.30841253079901443, + "grad_norm": 2.179657459259033, + "learning_rate": 1.1342645338675384e-05, + "loss": 0.639, "step": 4381 }, { - "epoch": 0.3315803412659377, - "grad_norm": 2.1838455200195312, - "learning_rate": 1.4372085211258087e-05, - "loss": 0.7414, + "epoch": 0.3084829285462865, + "grad_norm": 1.780776858329773, + "learning_rate": 1.134139339500743e-05, + "loss": 0.8119, "step": 4382 }, { - "epoch": 0.3316560099882713, - "grad_norm": 2.32033371925354, - "learning_rate": 1.4370278051279481e-05, - "loss": 0.9, + "epoch": 0.3085533262935586, + "grad_norm": 3.017428159713745, + "learning_rate": 1.13401412256265e-05, + "loss": 0.6308, "step": 4383 }, { - "epoch": 0.331731678710605, - "grad_norm": 2.2040934562683105, - "learning_rate": 1.4368470554996691e-05, - "loss": 0.7841, + "epoch": 0.3086237240408307, + "grad_norm": 1.898450255393982, + "learning_rate": 1.1338888830597697e-05, + "loss": 0.8203, "step": 4384 }, { - "epoch": 0.3318073474329386, - "grad_norm": 2.480590343475342, - "learning_rate": 1.4366662722522909e-05, - "loss": 0.7788, + "epoch": 0.3086941217881028, + "grad_norm": 1.474715232849121, + "learning_rate": 1.1337636209986137e-05, + "loss": 0.7641, "step": 4385 }, { - "epoch": 0.3318830161552722, - "grad_norm": 2.308894157409668, - "learning_rate": 1.4364854553971351e-05, - "loss": 0.8387, + "epoch": 0.30876451953537487, + "grad_norm": 1.704830527305603, + "learning_rate": 1.133638336385694e-05, + "loss": 0.8386, "step": 4386 }, { - "epoch": 0.3319586848776058, - "grad_norm": 2.260097026824951, - "learning_rate": 1.4363046049455249e-05, - "loss": 0.8178, + "epoch": 0.30883491728264695, + "grad_norm": 1.6816809177398682, + "learning_rate": 1.1335130292275245e-05, + "loss": 0.6231, "step": 4387 }, { - "epoch": 0.3320343535999395, - "grad_norm": 2.4592983722686768, - "learning_rate": 1.4361237209087857e-05, - "loss": 0.7605, + "epoch": 0.308905315029919, + "grad_norm": 1.5998969078063965, + "learning_rate": 1.1333876995306201e-05, + "loss": 0.7495, "step": 4388 }, { - "epoch": 0.3321100223222731, - "grad_norm": 2.417006254196167, - "learning_rate": 1.435942803298245e-05, - "loss": 0.7592, + "epoch": 0.3089757127771911, + "grad_norm": 1.7035197019577026, + "learning_rate": 1.1332623473014967e-05, + "loss": 0.7131, "step": 4389 }, { - "epoch": 0.3321856910446067, - "grad_norm": 2.0404160022735596, - "learning_rate": 1.4357618521252326e-05, - "loss": 0.7705, + "epoch": 0.30904611052446324, + "grad_norm": 1.8200337886810303, + "learning_rate": 1.1331369725466712e-05, + "loss": 0.7429, "step": 4390 }, { - "epoch": 0.3322613597669403, - "grad_norm": 1.8098578453063965, - "learning_rate": 1.4355808674010805e-05, - "loss": 0.7147, + "epoch": 0.3091165082717353, + "grad_norm": 1.644662618637085, + "learning_rate": 1.1330115752726624e-05, + "loss": 0.7655, "step": 4391 }, { - "epoch": 0.33233702848927393, - "grad_norm": 2.279453754425049, - "learning_rate": 1.4353998491371217e-05, - "loss": 0.8241, + "epoch": 0.3091869060190074, + "grad_norm": 1.674791693687439, + "learning_rate": 1.1328861554859897e-05, + "loss": 0.8328, "step": 4392 }, { - "epoch": 0.3324126972116076, - "grad_norm": 1.8218796253204346, - "learning_rate": 1.435218797344693e-05, - "loss": 0.8129, + "epoch": 0.30925730376627947, + "grad_norm": 1.8064918518066406, + "learning_rate": 1.1327607131931737e-05, + "loss": 0.7444, "step": 4393 }, { - "epoch": 0.3324883659339412, - "grad_norm": 2.245039939880371, - "learning_rate": 1.4350377120351316e-05, - "loss": 0.8392, + "epoch": 0.30932770151355155, + "grad_norm": 2.0735220909118652, + "learning_rate": 1.1326352484007363e-05, + "loss": 0.7773, "step": 4394 }, { - "epoch": 0.3325640346562748, - "grad_norm": 4.042459011077881, - "learning_rate": 1.4348565932197786e-05, - "loss": 0.8678, + "epoch": 0.3093980992608237, + "grad_norm": 1.9409379959106445, + "learning_rate": 1.1325097611152004e-05, + "loss": 0.6341, "step": 4395 }, { - "epoch": 0.33263970337860843, - "grad_norm": 2.0353004932403564, - "learning_rate": 1.4346754409099758e-05, - "loss": 0.7939, + "epoch": 0.30946849700809576, + "grad_norm": 1.7155520915985107, + "learning_rate": 1.1323842513430906e-05, + "loss": 0.7714, "step": 4396 }, { - "epoch": 0.3327153721009421, - "grad_norm": 2.1609416007995605, - "learning_rate": 1.4344942551170673e-05, - "loss": 0.7398, + "epoch": 0.30953889475536783, + "grad_norm": 1.7087410688400269, + "learning_rate": 1.1322587190909317e-05, + "loss": 0.6672, "step": 4397 }, { - "epoch": 0.3327910408232757, - "grad_norm": 2.067974805831909, - "learning_rate": 1.4343130358523998e-05, - "loss": 0.79, + "epoch": 0.3096092925026399, + "grad_norm": 1.7948307991027832, + "learning_rate": 1.1321331643652506e-05, + "loss": 0.6713, "step": 4398 }, { - "epoch": 0.3328667095456093, - "grad_norm": 1.9276955127716064, - "learning_rate": 1.4341317831273221e-05, - "loss": 0.7935, + "epoch": 0.309679690249912, + "grad_norm": 1.3273446559906006, + "learning_rate": 1.132007587172575e-05, + "loss": 0.5301, "step": 4399 }, { - "epoch": 0.33294237826794293, - "grad_norm": 2.694018840789795, - "learning_rate": 1.4339504969531843e-05, - "loss": 0.7347, + "epoch": 0.30975008799718406, + "grad_norm": 2.8458659648895264, + "learning_rate": 1.1318819875194337e-05, + "loss": 0.7105, "step": 4400 }, { - "epoch": 0.3330180469902766, - "grad_norm": 2.018950939178467, - "learning_rate": 1.4337691773413394e-05, - "loss": 0.5026, + "epoch": 0.3098204857444562, + "grad_norm": 2.3110504150390625, + "learning_rate": 1.1317563654123566e-05, + "loss": 0.7712, "step": 4401 }, { - "epoch": 0.3330937157126102, - "grad_norm": 1.8888964653015137, - "learning_rate": 1.4335878243031423e-05, - "loss": 0.6937, + "epoch": 0.3098908834917283, + "grad_norm": 1.9065675735473633, + "learning_rate": 1.131630720857875e-05, + "loss": 0.6745, "step": 4402 }, { - "epoch": 0.3331693844349438, - "grad_norm": 2.4670050144195557, - "learning_rate": 1.4334064378499495e-05, - "loss": 0.7629, + "epoch": 0.30996128123900035, + "grad_norm": 2.0457825660705566, + "learning_rate": 1.1315050538625215e-05, + "loss": 0.7622, "step": 4403 }, { - "epoch": 0.3332450531572774, - "grad_norm": 2.6195714473724365, - "learning_rate": 1.4332250179931207e-05, - "loss": 0.7759, + "epoch": 0.31003167898627243, + "grad_norm": 2.2997398376464844, + "learning_rate": 1.1313793644328292e-05, + "loss": 0.6855, "step": 4404 }, { - "epoch": 0.33332072187961104, - "grad_norm": 2.684854507446289, - "learning_rate": 1.4330435647440165e-05, - "loss": 0.6832, + "epoch": 0.3101020767335445, + "grad_norm": 1.907590389251709, + "learning_rate": 1.1312536525753334e-05, + "loss": 0.7633, "step": 4405 }, { - "epoch": 0.3333963906019447, - "grad_norm": 2.092449903488159, - "learning_rate": 1.4328620781140001e-05, - "loss": 0.5791, + "epoch": 0.31017247448081664, + "grad_norm": 1.7173782587051392, + "learning_rate": 1.1311279182965697e-05, + "loss": 0.7747, "step": 4406 }, { - "epoch": 0.3334720593242783, - "grad_norm": 1.9735100269317627, - "learning_rate": 1.432680558114437e-05, - "loss": 0.7944, + "epoch": 0.3102428722280887, + "grad_norm": 1.7234442234039307, + "learning_rate": 1.1310021616030746e-05, + "loss": 0.7369, "step": 4407 }, { - "epoch": 0.3335477280466119, - "grad_norm": 2.075486421585083, - "learning_rate": 1.4324990047566943e-05, - "loss": 0.8772, + "epoch": 0.3103132699753608, + "grad_norm": 2.2668983936309814, + "learning_rate": 1.130876382501387e-05, + "loss": 0.7413, "step": 4408 }, { - "epoch": 0.33362339676894553, - "grad_norm": 1.8212954998016357, - "learning_rate": 1.4323174180521418e-05, - "loss": 0.7393, + "epoch": 0.31038366772263287, + "grad_norm": 1.7635817527770996, + "learning_rate": 1.1307505809980462e-05, + "loss": 0.9007, "step": 4409 }, { - "epoch": 0.3336990654912792, - "grad_norm": 2.295945167541504, - "learning_rate": 1.4321357980121509e-05, - "loss": 0.7714, + "epoch": 0.31045406546990495, + "grad_norm": 1.9425183534622192, + "learning_rate": 1.1306247570995925e-05, + "loss": 0.658, "step": 4410 }, { - "epoch": 0.3337747342136128, - "grad_norm": 2.0244014263153076, - "learning_rate": 1.4319541446480951e-05, - "loss": 0.7639, + "epoch": 0.310524463217177, + "grad_norm": 2.2386856079101562, + "learning_rate": 1.1304989108125676e-05, + "loss": 0.83, "step": 4411 }, { - "epoch": 0.3338504029359464, - "grad_norm": 2.317169189453125, - "learning_rate": 1.43177245797135e-05, - "loss": 0.8307, + "epoch": 0.31059486096444916, + "grad_norm": 2.3488993644714355, + "learning_rate": 1.1303730421435143e-05, + "loss": 0.7089, "step": 4412 }, { - "epoch": 0.33392607165828003, - "grad_norm": 2.066453218460083, - "learning_rate": 1.431590737993294e-05, - "loss": 0.7536, + "epoch": 0.31066525871172124, + "grad_norm": 1.9710348844528198, + "learning_rate": 1.1302471510989772e-05, + "loss": 0.7086, "step": 4413 }, { - "epoch": 0.3340017403806137, - "grad_norm": 2.337251663208008, - "learning_rate": 1.4314089847253063e-05, - "loss": 0.8076, + "epoch": 0.3107356564589933, + "grad_norm": 1.8465609550476074, + "learning_rate": 1.1301212376855006e-05, + "loss": 0.6492, "step": 4414 }, { - "epoch": 0.3340774091029473, - "grad_norm": 2.4293739795684814, - "learning_rate": 1.4312271981787692e-05, - "loss": 0.7072, + "epoch": 0.3108060542062654, + "grad_norm": 2.5460364818573, + "learning_rate": 1.1299953019096315e-05, + "loss": 0.7003, "step": 4415 }, { - "epoch": 0.3341530778252809, - "grad_norm": 2.3487699031829834, - "learning_rate": 1.431045378365067e-05, - "loss": 0.8849, + "epoch": 0.31087645195353747, + "grad_norm": 1.8237310647964478, + "learning_rate": 1.1298693437779175e-05, + "loss": 0.778, "step": 4416 }, { - "epoch": 0.33422874654761453, - "grad_norm": 1.5888330936431885, - "learning_rate": 1.4308635252955854e-05, - "loss": 0.7692, + "epoch": 0.31094684970080955, + "grad_norm": 2.1486828327178955, + "learning_rate": 1.1297433632969066e-05, + "loss": 0.6883, "step": 4417 }, { - "epoch": 0.33430441526994814, - "grad_norm": 2.819643974304199, - "learning_rate": 1.430681638981713e-05, - "loss": 0.7928, + "epoch": 0.3110172474480817, + "grad_norm": 2.267162322998047, + "learning_rate": 1.1296173604731493e-05, + "loss": 0.7708, "step": 4418 }, { - "epoch": 0.3343800839922818, - "grad_norm": 2.25228214263916, - "learning_rate": 1.4304997194348399e-05, - "loss": 0.6886, + "epoch": 0.31108764519535376, + "grad_norm": 1.8029956817626953, + "learning_rate": 1.129491335313196e-05, + "loss": 0.788, "step": 4419 }, { - "epoch": 0.3344557527146154, - "grad_norm": 2.0968821048736572, - "learning_rate": 1.4303177666663582e-05, - "loss": 0.7954, + "epoch": 0.31115804294262583, + "grad_norm": 2.183175802230835, + "learning_rate": 1.1293652878235996e-05, + "loss": 0.6463, "step": 4420 }, { - "epoch": 0.33453142143694903, - "grad_norm": 2.138329267501831, - "learning_rate": 1.4301357806876632e-05, - "loss": 0.5807, + "epoch": 0.3112284406898979, + "grad_norm": 1.9352731704711914, + "learning_rate": 1.1292392180109129e-05, + "loss": 0.765, "step": 4421 }, { - "epoch": 0.33460709015928264, - "grad_norm": 2.1104984283447266, - "learning_rate": 1.4299537615101503e-05, - "loss": 0.7762, + "epoch": 0.31129883843717, + "grad_norm": 1.8515058755874634, + "learning_rate": 1.1291131258816905e-05, + "loss": 0.7415, "step": 4422 }, { - "epoch": 0.3346827588816163, - "grad_norm": 2.5498058795928955, - "learning_rate": 1.4297717091452193e-05, - "loss": 0.8644, + "epoch": 0.3113692361844421, + "grad_norm": 2.0118114948272705, + "learning_rate": 1.1289870114424881e-05, + "loss": 0.8165, "step": 4423 }, { - "epoch": 0.3347584276039499, - "grad_norm": 2.219202995300293, - "learning_rate": 1.4295896236042702e-05, - "loss": 0.7394, + "epoch": 0.3114396339317142, + "grad_norm": 2.0142111778259277, + "learning_rate": 1.1288608746998623e-05, + "loss": 0.7421, "step": 4424 }, { - "epoch": 0.3348340963262835, - "grad_norm": 2.217406988143921, - "learning_rate": 1.429407504898706e-05, - "loss": 0.8712, + "epoch": 0.3115100316789863, + "grad_norm": 1.890453815460205, + "learning_rate": 1.1287347156603713e-05, + "loss": 0.6554, "step": 4425 }, { - "epoch": 0.33490976504861714, - "grad_norm": 2.460085153579712, - "learning_rate": 1.4292253530399316e-05, - "loss": 0.8625, + "epoch": 0.31158042942625835, + "grad_norm": 1.7478880882263184, + "learning_rate": 1.1286085343305743e-05, + "loss": 0.6375, "step": 4426 }, { - "epoch": 0.3349854337709508, - "grad_norm": 2.620077610015869, - "learning_rate": 1.429043168039354e-05, - "loss": 0.4811, + "epoch": 0.31165082717353043, + "grad_norm": 1.9007643461227417, + "learning_rate": 1.1284823307170314e-05, + "loss": 0.8103, "step": 4427 }, { - "epoch": 0.3350611024932844, - "grad_norm": 2.3812079429626465, - "learning_rate": 1.4288609499083819e-05, - "loss": 0.736, + "epoch": 0.3117212249208025, + "grad_norm": 1.9803576469421387, + "learning_rate": 1.1283561048263038e-05, + "loss": 0.7024, "step": 4428 }, { - "epoch": 0.335136771215618, - "grad_norm": 2.081484794616699, - "learning_rate": 1.4286786986584267e-05, - "loss": 0.7107, + "epoch": 0.31179162266807464, + "grad_norm": 1.592486023902893, + "learning_rate": 1.1282298566649546e-05, + "loss": 0.7462, "step": 4429 }, { - "epoch": 0.33521243993795163, - "grad_norm": 1.9492160081863403, - "learning_rate": 1.428496414300901e-05, - "loss": 0.8236, + "epoch": 0.3118620204153467, + "grad_norm": 1.5957720279693604, + "learning_rate": 1.1281035862395472e-05, + "loss": 0.7653, "step": 4430 }, { - "epoch": 0.33528810866028524, - "grad_norm": 2.160243034362793, - "learning_rate": 1.428314096847221e-05, - "loss": 0.8853, + "epoch": 0.3119324181626188, + "grad_norm": 1.9658104181289673, + "learning_rate": 1.1279772935566467e-05, + "loss": 0.7372, "step": 4431 }, { - "epoch": 0.3353637773826189, - "grad_norm": 2.322145462036133, - "learning_rate": 1.428131746308803e-05, - "loss": 0.8845, + "epoch": 0.3120028159098909, + "grad_norm": 1.8854186534881592, + "learning_rate": 1.1278509786228191e-05, + "loss": 0.656, "step": 4432 }, { - "epoch": 0.3354394461049525, - "grad_norm": 2.935598134994507, - "learning_rate": 1.427949362697067e-05, - "loss": 0.7397, + "epoch": 0.31207321365716295, + "grad_norm": 2.2209527492523193, + "learning_rate": 1.1277246414446318e-05, + "loss": 0.7337, "step": 4433 }, { - "epoch": 0.33551511482728613, - "grad_norm": 2.1716995239257812, - "learning_rate": 1.4277669460234346e-05, - "loss": 0.6468, + "epoch": 0.3121436114044351, + "grad_norm": 1.8064396381378174, + "learning_rate": 1.1275982820286528e-05, + "loss": 0.61, "step": 4434 }, { - "epoch": 0.33559078354961974, - "grad_norm": 1.9296468496322632, - "learning_rate": 1.4275844962993288e-05, - "loss": 0.783, + "epoch": 0.31221400915170716, + "grad_norm": 1.6920034885406494, + "learning_rate": 1.127471900381452e-05, + "loss": 0.6788, "step": 4435 }, { - "epoch": 0.3356664522719534, - "grad_norm": 2.0291011333465576, - "learning_rate": 1.4274020135361758e-05, - "loss": 0.7367, + "epoch": 0.31228440689897924, + "grad_norm": 1.7305679321289062, + "learning_rate": 1.1273454965096e-05, + "loss": 0.623, "step": 4436 }, { - "epoch": 0.335742120994287, - "grad_norm": 2.1033778190612793, - "learning_rate": 1.4272194977454024e-05, - "loss": 0.7526, + "epoch": 0.3123548046462513, + "grad_norm": 2.102536678314209, + "learning_rate": 1.1272190704196685e-05, + "loss": 0.6294, "step": 4437 }, { - "epoch": 0.33581778971662063, - "grad_norm": 2.0603630542755127, - "learning_rate": 1.427036948938439e-05, - "loss": 0.722, + "epoch": 0.3124252023935234, + "grad_norm": 2.822849988937378, + "learning_rate": 1.1270926221182305e-05, + "loss": 0.7622, "step": 4438 }, { - "epoch": 0.33589345843895424, - "grad_norm": 2.2816059589385986, - "learning_rate": 1.4268543671267173e-05, - "loss": 0.6255, + "epoch": 0.31249560014079547, + "grad_norm": 1.9169998168945312, + "learning_rate": 1.1269661516118606e-05, + "loss": 0.7299, "step": 4439 }, { - "epoch": 0.3359691271612879, - "grad_norm": 2.037482976913452, - "learning_rate": 1.4266717523216709e-05, - "loss": 0.5998, + "epoch": 0.3125659978880676, + "grad_norm": 2.0618340969085693, + "learning_rate": 1.1268396589071334e-05, + "loss": 0.7098, "step": 4440 }, { - "epoch": 0.3360447958836215, - "grad_norm": 1.928592562675476, - "learning_rate": 1.426489104534736e-05, - "loss": 0.8288, + "epoch": 0.3126363956353397, + "grad_norm": 1.7647374868392944, + "learning_rate": 1.126713144010626e-05, + "loss": 0.6972, "step": 4441 }, { - "epoch": 0.3361204646059551, - "grad_norm": 1.9821760654449463, - "learning_rate": 1.4263064237773506e-05, - "loss": 0.6743, + "epoch": 0.31270679338261176, + "grad_norm": 1.5347208976745605, + "learning_rate": 1.1265866069289159e-05, + "loss": 0.7326, "step": 4442 }, { - "epoch": 0.33619613332828874, - "grad_norm": 2.9042348861694336, - "learning_rate": 1.4261237100609543e-05, - "loss": 0.9823, + "epoch": 0.31277719112988384, + "grad_norm": 1.613402247428894, + "learning_rate": 1.1264600476685815e-05, + "loss": 0.6859, "step": 4443 }, { - "epoch": 0.33627180205062235, - "grad_norm": 2.4665040969848633, - "learning_rate": 1.4259409633969901e-05, - "loss": 0.7231, + "epoch": 0.3128475888771559, + "grad_norm": 2.1900596618652344, + "learning_rate": 1.126333466236203e-05, + "loss": 0.732, "step": 4444 }, { - "epoch": 0.336347470772956, - "grad_norm": 2.4456706047058105, - "learning_rate": 1.4257581837969012e-05, - "loss": 0.8436, + "epoch": 0.312917986624428, + "grad_norm": 1.8385682106018066, + "learning_rate": 1.1262068626383617e-05, + "loss": 0.7533, "step": 4445 }, { - "epoch": 0.3364231394952896, - "grad_norm": 2.440807342529297, - "learning_rate": 1.4255753712721347e-05, - "loss": 0.7894, + "epoch": 0.3129883843717001, + "grad_norm": 1.8919141292572021, + "learning_rate": 1.1260802368816397e-05, + "loss": 0.7224, "step": 4446 }, { - "epoch": 0.33649880821762324, - "grad_norm": 1.8564308881759644, - "learning_rate": 1.4253925258341384e-05, - "loss": 0.7516, + "epoch": 0.3130587821189722, + "grad_norm": 1.9405122995376587, + "learning_rate": 1.1259535889726198e-05, + "loss": 0.7485, "step": 4447 }, { - "epoch": 0.33657447693995685, - "grad_norm": 2.5682311058044434, - "learning_rate": 1.4252096474943626e-05, - "loss": 0.701, + "epoch": 0.3131291798662443, + "grad_norm": 1.9961355924606323, + "learning_rate": 1.1258269189178875e-05, + "loss": 0.7827, "step": 4448 }, { - "epoch": 0.3366501456622905, - "grad_norm": 2.0853443145751953, - "learning_rate": 1.4250267362642604e-05, - "loss": 0.761, + "epoch": 0.31319957761351636, + "grad_norm": 2.0449230670928955, + "learning_rate": 1.1257002267240279e-05, + "loss": 0.8264, "step": 4449 }, { - "epoch": 0.3367258143846241, - "grad_norm": 2.6252856254577637, - "learning_rate": 1.4248437921552855e-05, - "loss": 0.718, + "epoch": 0.31326997536078843, + "grad_norm": 1.664318561553955, + "learning_rate": 1.125573512397628e-05, + "loss": 0.6704, "step": 4450 }, { - "epoch": 0.33680148310695773, - "grad_norm": 2.0729100704193115, - "learning_rate": 1.4246608151788947e-05, - "loss": 0.6689, + "epoch": 0.31334037310806057, + "grad_norm": 1.7518949508666992, + "learning_rate": 1.1254467759452756e-05, + "loss": 0.7241, "step": 4451 }, { - "epoch": 0.33687715182929134, - "grad_norm": 3.019207715988159, - "learning_rate": 1.424477805346547e-05, - "loss": 0.7578, + "epoch": 0.31341077085533264, + "grad_norm": 2.240057945251465, + "learning_rate": 1.1253200173735603e-05, + "loss": 0.7087, "step": 4452 }, { - "epoch": 0.336952820551625, - "grad_norm": 1.869498610496521, - "learning_rate": 1.4242947626697024e-05, - "loss": 0.6262, + "epoch": 0.3134811686026047, + "grad_norm": 1.7657912969589233, + "learning_rate": 1.125193236689072e-05, + "loss": 0.669, "step": 4453 }, { - "epoch": 0.3370284892739586, - "grad_norm": 9.356559753417969, - "learning_rate": 1.4241116871598241e-05, - "loss": 0.6689, + "epoch": 0.3135515663498768, + "grad_norm": 1.9772002696990967, + "learning_rate": 1.1250664338984022e-05, + "loss": 0.6825, "step": 4454 }, { - "epoch": 0.33710415799629223, - "grad_norm": 2.5027689933776855, - "learning_rate": 1.423928578828377e-05, - "loss": 0.7085, + "epoch": 0.3136219640971489, + "grad_norm": 2.0371551513671875, + "learning_rate": 1.1249396090081436e-05, + "loss": 0.7, "step": 4455 }, { - "epoch": 0.33717982671862584, - "grad_norm": 1.68281090259552, - "learning_rate": 1.4237454376868275e-05, - "loss": 0.7766, + "epoch": 0.31369236184442095, + "grad_norm": 1.8751482963562012, + "learning_rate": 1.1248127620248897e-05, + "loss": 0.8017, "step": 4456 }, { - "epoch": 0.33725549544095945, - "grad_norm": 3.305651903152466, - "learning_rate": 1.4235622637466449e-05, - "loss": 0.7257, + "epoch": 0.3137627595916931, + "grad_norm": 1.9645402431488037, + "learning_rate": 1.1246858929552356e-05, + "loss": 0.7269, "step": 4457 }, { - "epoch": 0.3373311641632931, - "grad_norm": 2.6844162940979004, - "learning_rate": 1.4233790570192997e-05, - "loss": 0.7563, + "epoch": 0.31383315733896516, + "grad_norm": 1.744320273399353, + "learning_rate": 1.1245590018057776e-05, + "loss": 0.8015, "step": 4458 }, { - "epoch": 0.33740683288562673, - "grad_norm": 1.9760267734527588, - "learning_rate": 1.423195817516265e-05, - "loss": 0.6608, + "epoch": 0.31390355508623724, + "grad_norm": 1.790252923965454, + "learning_rate": 1.1244320885831122e-05, + "loss": 0.6426, "step": 4459 }, { - "epoch": 0.33748250160796034, - "grad_norm": 2.771921157836914, - "learning_rate": 1.4230125452490165e-05, - "loss": 0.8818, + "epoch": 0.3139739528335093, + "grad_norm": 1.7894060611724854, + "learning_rate": 1.1243051532938384e-05, + "loss": 0.7504, "step": 4460 }, { - "epoch": 0.33755817033029395, - "grad_norm": 1.8411167860031128, - "learning_rate": 1.4228292402290303e-05, + "epoch": 0.3140443505807814, + "grad_norm": 1.9329396486282349, + "learning_rate": 1.1241781959445552e-05, "loss": 0.7705, "step": 4461 }, { - "epoch": 0.3376338390526276, - "grad_norm": 8.400818824768066, - "learning_rate": 1.4226459024677864e-05, - "loss": 0.7038, + "epoch": 0.31411474832805353, + "grad_norm": 1.9899910688400269, + "learning_rate": 1.1240512165418636e-05, + "loss": 0.6508, "step": 4462 }, { - "epoch": 0.3377095077749612, - "grad_norm": 2.2699685096740723, - "learning_rate": 1.4224625319767654e-05, - "loss": 0.7097, + "epoch": 0.3141851460753256, + "grad_norm": 1.7938060760498047, + "learning_rate": 1.1239242150923652e-05, + "loss": 0.6864, "step": 4463 }, { - "epoch": 0.33778517649729484, - "grad_norm": 2.2146406173706055, - "learning_rate": 1.422279128767451e-05, - "loss": 0.7979, + "epoch": 0.3142555438225977, + "grad_norm": 1.7188503742218018, + "learning_rate": 1.1237971916026629e-05, + "loss": 0.7078, "step": 4464 }, { - "epoch": 0.33786084521962845, - "grad_norm": 1.8910346031188965, - "learning_rate": 1.4220956928513283e-05, - "loss": 0.7868, + "epoch": 0.31432594156986976, + "grad_norm": 1.523958683013916, + "learning_rate": 1.1236701460793607e-05, + "loss": 0.5815, "step": 4465 }, { - "epoch": 0.3379365139419621, - "grad_norm": 2.668886423110962, - "learning_rate": 1.4219122242398842e-05, - "loss": 0.7427, + "epoch": 0.31439633931714184, + "grad_norm": 2.1751294136047363, + "learning_rate": 1.1235430785290642e-05, + "loss": 0.6879, "step": 4466 }, { - "epoch": 0.3380121826642957, - "grad_norm": 2.642848014831543, - "learning_rate": 1.4217287229446089e-05, - "loss": 0.6824, + "epoch": 0.3144667370644139, + "grad_norm": 1.8777403831481934, + "learning_rate": 1.1234159889583792e-05, + "loss": 0.7959, "step": 4467 }, { - "epoch": 0.33808785138662933, - "grad_norm": 2.7786381244659424, - "learning_rate": 1.4215451889769936e-05, - "loss": 0.8064, + "epoch": 0.31453713481168605, + "grad_norm": 1.8164162635803223, + "learning_rate": 1.1232888773739135e-05, + "loss": 0.6609, "step": 4468 }, { - "epoch": 0.33816352010896294, - "grad_norm": 2.077474355697632, - "learning_rate": 1.4213616223485314e-05, - "loss": 0.7639, + "epoch": 0.3146075325589581, + "grad_norm": 2.0099027156829834, + "learning_rate": 1.1231617437822758e-05, + "loss": 0.7777, "step": 4469 }, { - "epoch": 0.3382391888312966, - "grad_norm": 2.304389715194702, - "learning_rate": 1.4211780230707184e-05, - "loss": 0.7471, + "epoch": 0.3146779303062302, + "grad_norm": 1.7434080839157104, + "learning_rate": 1.1230345881900757e-05, + "loss": 0.7515, "step": 4470 }, { - "epoch": 0.3383148575536302, - "grad_norm": 8.346323013305664, - "learning_rate": 1.4209943911550519e-05, - "loss": 0.6181, + "epoch": 0.3147483280535023, + "grad_norm": 2.0189883708953857, + "learning_rate": 1.1229074106039243e-05, + "loss": 0.6139, "step": 4471 }, { - "epoch": 0.33839052627596383, - "grad_norm": 1.981086254119873, - "learning_rate": 1.4208107266130313e-05, - "loss": 0.656, + "epoch": 0.31481872580077436, + "grad_norm": 1.981954574584961, + "learning_rate": 1.1227802110304335e-05, + "loss": 0.6499, "step": 4472 }, { - "epoch": 0.33846619499829744, - "grad_norm": 2.607759714126587, - "learning_rate": 1.4206270294561587e-05, - "loss": 0.8316, + "epoch": 0.3148891235480465, + "grad_norm": 1.9777555465698242, + "learning_rate": 1.1226529894762167e-05, + "loss": 0.8065, "step": 4473 }, { - "epoch": 0.33854186372063105, - "grad_norm": 2.3243844509124756, - "learning_rate": 1.4204432996959373e-05, - "loss": 0.7953, + "epoch": 0.31495952129531857, + "grad_norm": 2.1682512760162354, + "learning_rate": 1.1225257459478883e-05, + "loss": 0.7893, "step": 4474 }, { - "epoch": 0.3386175324429647, - "grad_norm": 2.651670217514038, - "learning_rate": 1.4202595373438735e-05, - "loss": 0.7781, + "epoch": 0.31502991904259064, + "grad_norm": 2.316328763961792, + "learning_rate": 1.1223984804520636e-05, + "loss": 0.7506, "step": 4475 }, { - "epoch": 0.33869320116529833, - "grad_norm": 2.400404930114746, - "learning_rate": 1.4200757424114745e-05, - "loss": 0.6033, + "epoch": 0.3151003167898627, + "grad_norm": 1.828417420387268, + "learning_rate": 1.1222711929953594e-05, + "loss": 0.7998, "step": 4476 }, { - "epoch": 0.33876886988763194, - "grad_norm": 2.4451756477355957, - "learning_rate": 1.4198919149102506e-05, - "loss": 0.7409, + "epoch": 0.3151707145371348, + "grad_norm": 1.7657605409622192, + "learning_rate": 1.1221438835843933e-05, + "loss": 0.7497, "step": 4477 }, { - "epoch": 0.33884453860996555, - "grad_norm": 1.9781309366226196, - "learning_rate": 1.4197080548517134e-05, - "loss": 0.8582, + "epoch": 0.3152411122844069, + "grad_norm": 1.7982622385025024, + "learning_rate": 1.1220165522257846e-05, + "loss": 0.6379, "step": 4478 }, { - "epoch": 0.3389202073322992, - "grad_norm": 2.500493288040161, - "learning_rate": 1.4195241622473765e-05, - "loss": 0.6146, + "epoch": 0.315311510031679, + "grad_norm": 1.64960515499115, + "learning_rate": 1.121889198926153e-05, + "loss": 0.7036, "step": 4479 }, { - "epoch": 0.3389958760546328, - "grad_norm": 2.1779065132141113, - "learning_rate": 1.419340237108757e-05, - "loss": 0.7664, + "epoch": 0.3153819077789511, + "grad_norm": 1.7817151546478271, + "learning_rate": 1.12176182369212e-05, + "loss": 0.6942, "step": 4480 }, { - "epoch": 0.33907154477696644, - "grad_norm": 2.633241891860962, - "learning_rate": 1.4191562794473713e-05, - "loss": 0.7935, + "epoch": 0.31545230552622316, + "grad_norm": 2.097374439239502, + "learning_rate": 1.1216344265303077e-05, + "loss": 0.8156, "step": 4481 }, { - "epoch": 0.33914721349930005, - "grad_norm": 1.5423035621643066, - "learning_rate": 1.4189722892747406e-05, - "loss": 0.957, + "epoch": 0.31552270327349524, + "grad_norm": 1.506516933441162, + "learning_rate": 1.1215070074473397e-05, + "loss": 0.7292, "step": 4482 }, { - "epoch": 0.3392228822216337, - "grad_norm": 2.039738178253174, - "learning_rate": 1.4187882666023866e-05, - "loss": 0.6976, + "epoch": 0.3155931010207673, + "grad_norm": 1.7547513246536255, + "learning_rate": 1.1213795664498407e-05, + "loss": 0.7284, "step": 4483 }, { - "epoch": 0.3392985509439673, - "grad_norm": 2.4406769275665283, - "learning_rate": 1.4186042114418331e-05, - "loss": 0.7359, + "epoch": 0.3156634987680394, + "grad_norm": 1.8306903839111328, + "learning_rate": 1.1212521035444364e-05, + "loss": 0.6504, "step": 4484 }, { - "epoch": 0.33937421966630094, - "grad_norm": 2.1993062496185303, - "learning_rate": 1.4184201238046069e-05, - "loss": 0.6014, + "epoch": 0.31573389651531153, + "grad_norm": 1.9615957736968994, + "learning_rate": 1.1211246187377538e-05, + "loss": 0.6272, "step": 4485 }, { - "epoch": 0.33944988838863455, - "grad_norm": 2.3027052879333496, - "learning_rate": 1.4182360037022355e-05, - "loss": 0.8225, + "epoch": 0.3158042942625836, + "grad_norm": 1.7946009635925293, + "learning_rate": 1.1209971120364209e-05, + "loss": 0.7097, "step": 4486 }, { - "epoch": 0.33952555711096816, - "grad_norm": 2.9717302322387695, - "learning_rate": 1.4180518511462497e-05, - "loss": 0.7826, + "epoch": 0.3158746920098557, + "grad_norm": 1.7886618375778198, + "learning_rate": 1.1208695834470667e-05, + "loss": 0.7, "step": 4487 }, { - "epoch": 0.3396012258333018, - "grad_norm": 2.1658003330230713, - "learning_rate": 1.4178676661481813e-05, - "loss": 0.8349, + "epoch": 0.31594508975712776, + "grad_norm": 1.8595759868621826, + "learning_rate": 1.1207420329763216e-05, + "loss": 0.6764, "step": 4488 }, { - "epoch": 0.33967689455563543, - "grad_norm": 2.308877944946289, - "learning_rate": 1.417683448719564e-05, - "loss": 0.6189, + "epoch": 0.31601548750439984, + "grad_norm": 1.763343334197998, + "learning_rate": 1.1206144606308176e-05, + "loss": 0.7526, "step": 4489 }, { - "epoch": 0.33975256327796904, - "grad_norm": 1.7710340023040771, - "learning_rate": 1.4174991988719355e-05, - "loss": 0.7385, + "epoch": 0.31608588525167197, + "grad_norm": 1.8307584524154663, + "learning_rate": 1.1204868664171864e-05, + "loss": 0.7388, "step": 4490 }, { - "epoch": 0.33982823200030265, - "grad_norm": 2.1375606060028076, - "learning_rate": 1.4173149166168332e-05, - "loss": 0.739, + "epoch": 0.31615628299894405, + "grad_norm": 1.9727236032485962, + "learning_rate": 1.1203592503420626e-05, + "loss": 0.7091, "step": 4491 }, { - "epoch": 0.3399039007226363, - "grad_norm": 2.5131471157073975, - "learning_rate": 1.4171306019657974e-05, - "loss": 0.7041, + "epoch": 0.3162266807462161, + "grad_norm": 2.02842378616333, + "learning_rate": 1.1202316124120803e-05, + "loss": 0.8613, "step": 4492 }, { - "epoch": 0.33997956944496993, - "grad_norm": 2.478649139404297, - "learning_rate": 1.416946254930371e-05, - "loss": 0.7395, + "epoch": 0.3162970784934882, + "grad_norm": 1.482226014137268, + "learning_rate": 1.1201039526338762e-05, + "loss": 0.6123, "step": 4493 }, { - "epoch": 0.34005523816730354, - "grad_norm": 2.2513961791992188, - "learning_rate": 1.416761875522098e-05, - "loss": 0.7747, + "epoch": 0.3163674762407603, + "grad_norm": 2.0035338401794434, + "learning_rate": 1.1199762710140868e-05, + "loss": 0.7752, "step": 4494 }, { - "epoch": 0.34013090688963715, - "grad_norm": 2.2607710361480713, - "learning_rate": 1.416577463752525e-05, - "loss": 0.6677, + "epoch": 0.31643787398803236, + "grad_norm": 1.8785594701766968, + "learning_rate": 1.1198485675593511e-05, + "loss": 0.8222, "step": 4495 }, { - "epoch": 0.3402065756119708, - "grad_norm": 1.9935152530670166, - "learning_rate": 1.4163930196332004e-05, - "loss": 0.7986, + "epoch": 0.3165082717353045, + "grad_norm": 1.8970451354980469, + "learning_rate": 1.1197208422763079e-05, + "loss": 0.7993, "step": 4496 }, { - "epoch": 0.34028224433430443, - "grad_norm": 2.159668207168579, - "learning_rate": 1.4162085431756746e-05, - "loss": 1.073, + "epoch": 0.31657866948257657, + "grad_norm": 1.860840082168579, + "learning_rate": 1.119593095171598e-05, + "loss": 0.6707, "step": 4497 }, { - "epoch": 0.34035791305663804, - "grad_norm": 1.8614026308059692, - "learning_rate": 1.4160240343915002e-05, - "loss": 0.5685, + "epoch": 0.31664906722984865, + "grad_norm": 1.8211252689361572, + "learning_rate": 1.1194653262518632e-05, + "loss": 0.7303, "step": 4498 }, { - "epoch": 0.34043358177897165, - "grad_norm": 2.1379287242889404, - "learning_rate": 1.4158394932922315e-05, - "loss": 0.7633, + "epoch": 0.3167194649771207, + "grad_norm": 1.6276040077209473, + "learning_rate": 1.1193375355237461e-05, + "loss": 0.6663, "step": 4499 }, { - "epoch": 0.34050925050130526, - "grad_norm": 2.2675228118896484, - "learning_rate": 1.4156549198894257e-05, - "loss": 0.8834, + "epoch": 0.3167898627243928, + "grad_norm": 1.928048849105835, + "learning_rate": 1.1192097229938907e-05, + "loss": 0.8593, "step": 4500 }, { - "epoch": 0.3405849192236389, - "grad_norm": 2.828331708908081, - "learning_rate": 1.415470314194641e-05, - "loss": 0.8714, + "epoch": 0.31686026047166493, + "grad_norm": 1.8128437995910645, + "learning_rate": 1.1190818886689423e-05, + "loss": 0.7887, "step": 4501 }, { - "epoch": 0.34066058794597254, - "grad_norm": 2.267286777496338, - "learning_rate": 1.4152856762194377e-05, - "loss": 0.7331, + "epoch": 0.316930658218937, + "grad_norm": 1.8515598773956299, + "learning_rate": 1.1189540325555466e-05, + "loss": 0.671, "step": 4502 }, { - "epoch": 0.34073625666830615, - "grad_norm": 2.4142282009124756, - "learning_rate": 1.415101005975379e-05, - "loss": 0.7392, + "epoch": 0.3170010559662091, + "grad_norm": 1.922252893447876, + "learning_rate": 1.1188261546603518e-05, + "loss": 0.7222, "step": 4503 }, { - "epoch": 0.34081192539063976, - "grad_norm": 2.212761878967285, - "learning_rate": 1.4149163034740291e-05, - "loss": 0.7835, + "epoch": 0.31707145371348117, + "grad_norm": 1.933125615119934, + "learning_rate": 1.1186982549900056e-05, + "loss": 0.6727, "step": 4504 }, { - "epoch": 0.3408875941129734, - "grad_norm": 2.463355541229248, - "learning_rate": 1.4147315687269547e-05, - "loss": 0.75, + "epoch": 0.31714185146075324, + "grad_norm": 1.9465702772140503, + "learning_rate": 1.118570333551158e-05, + "loss": 0.7115, "step": 4505 }, { - "epoch": 0.34096326283530703, - "grad_norm": 2.5899860858917236, - "learning_rate": 1.414546801745725e-05, - "loss": 0.6397, + "epoch": 0.3172122492080253, + "grad_norm": 1.9398869276046753, + "learning_rate": 1.1184423903504599e-05, + "loss": 0.8072, "step": 4506 }, { - "epoch": 0.34103893155764065, - "grad_norm": 2.4131081104278564, - "learning_rate": 1.4143620025419099e-05, - "loss": 0.7682, + "epoch": 0.31728264695529745, + "grad_norm": 1.8738842010498047, + "learning_rate": 1.1183144253945626e-05, + "loss": 0.7339, "step": 4507 }, { - "epoch": 0.34111460027997426, - "grad_norm": 2.420891284942627, - "learning_rate": 1.414177171127083e-05, - "loss": 0.7546, + "epoch": 0.31735304470256953, + "grad_norm": 2.0439021587371826, + "learning_rate": 1.1181864386901198e-05, + "loss": 0.7738, "step": 4508 }, { - "epoch": 0.3411902690023079, - "grad_norm": 2.2223784923553467, - "learning_rate": 1.4139923075128185e-05, - "loss": 0.6996, + "epoch": 0.3174234424498416, + "grad_norm": 1.9848687648773193, + "learning_rate": 1.118058430243785e-05, + "loss": 0.7636, "step": 4509 }, { - "epoch": 0.34126593772464153, - "grad_norm": 2.2744736671447754, - "learning_rate": 1.413807411710693e-05, - "loss": 0.846, + "epoch": 0.3174938401971137, + "grad_norm": 2.0234215259552, + "learning_rate": 1.117930400062214e-05, + "loss": 0.7543, "step": 4510 }, { - "epoch": 0.34134160644697514, - "grad_norm": 1.9180521965026855, - "learning_rate": 1.4136224837322857e-05, - "loss": 0.7467, + "epoch": 0.31756423794438576, + "grad_norm": 1.9393932819366455, + "learning_rate": 1.1178023481520629e-05, + "loss": 0.7375, "step": 4511 }, { - "epoch": 0.34141727516930875, - "grad_norm": 2.1215381622314453, - "learning_rate": 1.413437523589177e-05, - "loss": 0.7741, + "epoch": 0.31763463569165784, + "grad_norm": 1.8264366388320923, + "learning_rate": 1.1176742745199894e-05, + "loss": 0.6981, "step": 4512 }, { - "epoch": 0.34149294389164236, - "grad_norm": 2.879868984222412, - "learning_rate": 1.4132525312929501e-05, - "loss": 0.7951, + "epoch": 0.31770503343893, + "grad_norm": 1.6543028354644775, + "learning_rate": 1.1175461791726522e-05, + "loss": 0.5147, "step": 4513 }, { - "epoch": 0.34156861261397603, - "grad_norm": 2.1960246562957764, - "learning_rate": 1.4130675068551898e-05, - "loss": 0.7194, + "epoch": 0.31777543118620205, + "grad_norm": 1.7566689252853394, + "learning_rate": 1.1174180621167108e-05, + "loss": 0.6977, "step": 4514 }, { - "epoch": 0.34164428133630964, - "grad_norm": 2.2980356216430664, - "learning_rate": 1.4128824502874824e-05, - "loss": 0.7676, + "epoch": 0.31784582893347413, + "grad_norm": 1.8014500141143799, + "learning_rate": 1.1172899233588265e-05, + "loss": 0.7445, "step": 4515 }, { - "epoch": 0.34171995005864325, - "grad_norm": 2.5067970752716064, - "learning_rate": 1.412697361601417e-05, - "loss": 0.6625, + "epoch": 0.3179162266807462, + "grad_norm": 1.6694457530975342, + "learning_rate": 1.117161762905661e-05, + "loss": 0.6696, "step": 4516 }, { - "epoch": 0.34179561878097686, - "grad_norm": 2.2618825435638428, - "learning_rate": 1.4125122408085849e-05, - "loss": 0.7369, + "epoch": 0.3179866244280183, + "grad_norm": 1.9723091125488281, + "learning_rate": 1.1170335807638778e-05, + "loss": 0.8354, "step": 4517 }, { - "epoch": 0.34187128750331053, - "grad_norm": 3.191148281097412, - "learning_rate": 1.4123270879205787e-05, - "loss": 0.694, + "epoch": 0.3180570221752904, + "grad_norm": 1.5885682106018066, + "learning_rate": 1.116905376940141e-05, + "loss": 0.7075, "step": 4518 }, { - "epoch": 0.34194695622564414, - "grad_norm": 1.9559108018875122, - "learning_rate": 1.412141902948993e-05, - "loss": 0.6441, + "epoch": 0.3181274199225625, + "grad_norm": 1.662989616394043, + "learning_rate": 1.116777151441116e-05, + "loss": 0.7986, "step": 4519 }, { - "epoch": 0.34202262494797775, - "grad_norm": 2.6855931282043457, - "learning_rate": 1.4119566859054249e-05, - "loss": 0.8047, + "epoch": 0.31819781766983457, + "grad_norm": 1.7282116413116455, + "learning_rate": 1.1166489042734693e-05, + "loss": 0.8845, "step": 4520 }, { - "epoch": 0.34209829367031136, - "grad_norm": 1.9309477806091309, - "learning_rate": 1.4117714368014732e-05, - "loss": 0.7086, + "epoch": 0.31826821541710665, + "grad_norm": 1.8073766231536865, + "learning_rate": 1.116520635443869e-05, + "loss": 0.928, "step": 4521 }, { - "epoch": 0.342173962392645, - "grad_norm": 2.5932657718658447, - "learning_rate": 1.4115861556487388e-05, - "loss": 0.8026, + "epoch": 0.3183386131643787, + "grad_norm": 1.7906173467636108, + "learning_rate": 1.1163923449589835e-05, + "loss": 0.749, "step": 4522 }, { - "epoch": 0.34224963111497864, - "grad_norm": 2.6737778186798096, - "learning_rate": 1.4114008424588249e-05, - "loss": 0.7373, + "epoch": 0.3184090109116508, + "grad_norm": 2.1759002208709717, + "learning_rate": 1.116264032825483e-05, + "loss": 0.8061, "step": 4523 }, { - "epoch": 0.34232529983731225, - "grad_norm": 1.9361664056777954, - "learning_rate": 1.411215497243336e-05, - "loss": 0.6394, + "epoch": 0.31847940865892294, + "grad_norm": 1.9174907207489014, + "learning_rate": 1.1161356990500383e-05, + "loss": 0.8493, "step": 4524 }, { - "epoch": 0.34240096855964586, - "grad_norm": 2.822296619415283, - "learning_rate": 1.4110301200138793e-05, - "loss": 0.6913, + "epoch": 0.318549806406195, + "grad_norm": 2.140247344970703, + "learning_rate": 1.1160073436393219e-05, + "loss": 0.7096, "step": 4525 }, { - "epoch": 0.34247663728197947, - "grad_norm": 1.9152315855026245, - "learning_rate": 1.4108447107820634e-05, - "loss": 0.7587, + "epoch": 0.3186202041534671, + "grad_norm": 1.946632742881775, + "learning_rate": 1.1158789666000067e-05, + "loss": 0.6509, "step": 4526 }, { - "epoch": 0.34255230600431313, - "grad_norm": 2.4261810779571533, - "learning_rate": 1.4106592695594997e-05, - "loss": 0.7208, + "epoch": 0.31869060190073917, + "grad_norm": 1.7702847719192505, + "learning_rate": 1.1157505679387676e-05, + "loss": 0.6857, "step": 4527 }, { - "epoch": 0.34262797472664674, - "grad_norm": 2.7643346786499023, - "learning_rate": 1.4104737963578006e-05, - "loss": 0.7681, + "epoch": 0.31876099964801125, + "grad_norm": 3.1344213485717773, + "learning_rate": 1.1156221476622797e-05, + "loss": 0.7142, "step": 4528 }, { - "epoch": 0.34270364344898036, - "grad_norm": 1.8809469938278198, - "learning_rate": 1.4102882911885817e-05, - "loss": 0.7987, + "epoch": 0.3188313973952834, + "grad_norm": 1.7905479669570923, + "learning_rate": 1.1154937057772202e-05, + "loss": 0.7129, "step": 4529 }, { - "epoch": 0.34277931217131397, - "grad_norm": 2.6248059272766113, - "learning_rate": 1.4101027540634591e-05, - "loss": 0.7945, + "epoch": 0.31890179514255546, + "grad_norm": 1.8716684579849243, + "learning_rate": 1.1153652422902666e-05, + "loss": 0.6955, "step": 4530 }, { - "epoch": 0.34285498089364763, - "grad_norm": 2.5834579467773438, - "learning_rate": 1.4099171849940526e-05, - "loss": 0.7192, + "epoch": 0.31897219288982753, + "grad_norm": 2.758645534515381, + "learning_rate": 1.1152367572080979e-05, + "loss": 0.7816, "step": 4531 }, { - "epoch": 0.34293064961598124, - "grad_norm": 2.4351603984832764, - "learning_rate": 1.4097315839919824e-05, - "loss": 0.79, + "epoch": 0.3190425906370996, + "grad_norm": 1.8886823654174805, + "learning_rate": 1.1151082505373939e-05, + "loss": 0.6938, "step": 4532 }, { - "epoch": 0.34300631833831485, - "grad_norm": 1.948221206665039, - "learning_rate": 1.4095459510688717e-05, - "loss": 0.5718, + "epoch": 0.3191129883843717, + "grad_norm": 1.9985222816467285, + "learning_rate": 1.1149797222848363e-05, + "loss": 0.6005, "step": 4533 }, { - "epoch": 0.34308198706064846, - "grad_norm": 2.2318930625915527, - "learning_rate": 1.4093602862363455e-05, - "loss": 0.673, + "epoch": 0.31918338613164376, + "grad_norm": 2.217329502105713, + "learning_rate": 1.114851172457107e-05, + "loss": 0.6462, "step": 4534 }, { - "epoch": 0.34315765578298213, - "grad_norm": 2.6008236408233643, - "learning_rate": 1.4091745895060307e-05, - "loss": 0.7378, + "epoch": 0.3192537838789159, + "grad_norm": 1.5407891273498535, + "learning_rate": 1.1147226010608895e-05, + "loss": 0.7123, "step": 4535 }, { - "epoch": 0.34323332450531574, - "grad_norm": 2.340876579284668, - "learning_rate": 1.4089888608895564e-05, - "loss": 0.6036, + "epoch": 0.319324181626188, + "grad_norm": 1.9387365579605103, + "learning_rate": 1.1145940081028683e-05, + "loss": 0.7298, "step": 4536 }, { - "epoch": 0.34330899322764935, - "grad_norm": 2.4017269611358643, - "learning_rate": 1.4088031003985535e-05, - "loss": 0.6923, + "epoch": 0.31939457937346005, + "grad_norm": 1.7741397619247437, + "learning_rate": 1.1144653935897293e-05, + "loss": 0.5994, "step": 4537 }, { - "epoch": 0.34338466194998296, - "grad_norm": 1.9680914878845215, - "learning_rate": 1.4086173080446543e-05, - "loss": 0.91, + "epoch": 0.31946497712073213, + "grad_norm": 2.4662833213806152, + "learning_rate": 1.114336757528159e-05, + "loss": 0.8573, "step": 4538 }, { - "epoch": 0.34346033067231657, - "grad_norm": 1.9939541816711426, - "learning_rate": 1.4084314838394944e-05, - "loss": 0.7028, + "epoch": 0.3195353748680042, + "grad_norm": 1.8365468978881836, + "learning_rate": 1.1142080999248454e-05, + "loss": 0.6287, "step": 4539 }, { - "epoch": 0.34353599939465024, - "grad_norm": 2.0983216762542725, - "learning_rate": 1.4082456277947105e-05, - "loss": 0.8404, + "epoch": 0.3196057726152763, + "grad_norm": 1.629912257194519, + "learning_rate": 1.1140794207864777e-05, + "loss": 0.7033, "step": 4540 }, { - "epoch": 0.34361166811698385, - "grad_norm": 2.5122082233428955, - "learning_rate": 1.4080597399219415e-05, - "loss": 0.7113, + "epoch": 0.3196761703625484, + "grad_norm": 1.7100404500961304, + "learning_rate": 1.1139507201197459e-05, + "loss": 0.781, "step": 4541 }, { - "epoch": 0.34368733683931746, - "grad_norm": 2.6036436557769775, - "learning_rate": 1.4078738202328287e-05, - "loss": 0.7351, + "epoch": 0.3197465681098205, + "grad_norm": 1.9585187435150146, + "learning_rate": 1.1138219979313412e-05, + "loss": 0.6616, "step": 4542 }, { - "epoch": 0.34376300556165107, - "grad_norm": 2.5619027614593506, - "learning_rate": 1.4076878687390143e-05, - "loss": 0.8871, + "epoch": 0.31981696585709257, + "grad_norm": 1.6772695779800415, + "learning_rate": 1.1136932542279559e-05, + "loss": 0.6891, "step": 4543 }, { - "epoch": 0.34383867428398474, - "grad_norm": 2.4395925998687744, - "learning_rate": 1.4075018854521434e-05, - "loss": 0.7969, + "epoch": 0.31988736360436465, + "grad_norm": 1.9039900302886963, + "learning_rate": 1.113564489016284e-05, + "loss": 0.7151, "step": 4544 }, { - "epoch": 0.34391434300631835, - "grad_norm": 2.3042728900909424, - "learning_rate": 1.4073158703838632e-05, - "loss": 0.6841, + "epoch": 0.3199577613516367, + "grad_norm": 2.159139633178711, + "learning_rate": 1.1134357023030196e-05, + "loss": 0.7002, "step": 4545 }, { - "epoch": 0.34399001172865196, - "grad_norm": 1.9354417324066162, - "learning_rate": 1.4071298235458222e-05, - "loss": 0.6648, + "epoch": 0.32002815909890886, + "grad_norm": 1.7786035537719727, + "learning_rate": 1.1133068940948585e-05, + "loss": 0.6595, "step": 4546 }, { - "epoch": 0.34406568045098557, - "grad_norm": 2.020469903945923, - "learning_rate": 1.4069437449496715e-05, - "loss": 0.8337, + "epoch": 0.32009855684618094, + "grad_norm": 1.8771088123321533, + "learning_rate": 1.1131780643984977e-05, + "loss": 0.7539, "step": 4547 }, { - "epoch": 0.34414134917331923, - "grad_norm": 2.3265795707702637, - "learning_rate": 1.4067576346070637e-05, - "loss": 0.7247, + "epoch": 0.320168954593453, + "grad_norm": 1.5121489763259888, + "learning_rate": 1.1130492132206352e-05, + "loss": 0.6248, "step": 4548 }, { - "epoch": 0.34421701789565284, - "grad_norm": 1.9919472932815552, - "learning_rate": 1.4065714925296538e-05, - "loss": 0.7174, + "epoch": 0.3202393523407251, + "grad_norm": 2.047481060028076, + "learning_rate": 1.11292034056797e-05, + "loss": 0.728, "step": 4549 }, { - "epoch": 0.34429268661798645, - "grad_norm": 4.500394821166992, - "learning_rate": 1.4063853187290988e-05, - "loss": 0.8554, + "epoch": 0.32030975008799717, + "grad_norm": 1.6514313220977783, + "learning_rate": 1.1127914464472023e-05, + "loss": 0.7265, "step": 4550 }, { - "epoch": 0.34436835534032006, - "grad_norm": 3.009972095489502, - "learning_rate": 1.4061991132170571e-05, - "loss": 0.9941, + "epoch": 0.32038014783526925, + "grad_norm": 1.8785545825958252, + "learning_rate": 1.1126625308650335e-05, + "loss": 0.7066, "step": 4551 }, { - "epoch": 0.3444440240626537, - "grad_norm": 2.4552001953125, - "learning_rate": 1.4060128760051897e-05, - "loss": 0.7654, + "epoch": 0.3204505455825414, + "grad_norm": 1.7513831853866577, + "learning_rate": 1.112533593828166e-05, + "loss": 0.7741, "step": 4552 }, { - "epoch": 0.34451969278498734, - "grad_norm": 1.9885838031768799, - "learning_rate": 1.4058266071051593e-05, - "loss": 0.858, + "epoch": 0.32052094332981346, + "grad_norm": 2.0010428428649902, + "learning_rate": 1.1124046353433035e-05, + "loss": 0.6063, "step": 4553 }, { - "epoch": 0.34459536150732095, - "grad_norm": 2.1382341384887695, - "learning_rate": 1.4056403065286308e-05, - "loss": 0.8359, + "epoch": 0.32059134107708553, + "grad_norm": 1.9702069759368896, + "learning_rate": 1.1122756554171502e-05, + "loss": 0.7993, "step": 4554 }, { - "epoch": 0.34467103022965456, - "grad_norm": 1.8705214262008667, - "learning_rate": 1.4054539742872708e-05, - "loss": 0.6685, + "epoch": 0.3206617388243576, + "grad_norm": 1.8853092193603516, + "learning_rate": 1.1121466540564123e-05, + "loss": 0.6174, "step": 4555 }, { - "epoch": 0.3447466989519882, - "grad_norm": 2.376476287841797, - "learning_rate": 1.405267610392748e-05, - "loss": 0.9648, + "epoch": 0.3207321365716297, + "grad_norm": 1.942036747932434, + "learning_rate": 1.1120176312677967e-05, + "loss": 0.7992, "step": 4556 }, { - "epoch": 0.34482236767432184, - "grad_norm": 2.8174350261688232, - "learning_rate": 1.4050812148567337e-05, - "loss": 0.7507, + "epoch": 0.3208025343189018, + "grad_norm": 1.8211241960525513, + "learning_rate": 1.1118885870580113e-05, + "loss": 0.7139, "step": 4557 }, { - "epoch": 0.34489803639665545, - "grad_norm": 2.0304601192474365, - "learning_rate": 1.4048947876908994e-05, - "loss": 1.0117, + "epoch": 0.3208729320661739, + "grad_norm": 1.650458574295044, + "learning_rate": 1.111759521433765e-05, + "loss": 0.6779, "step": 4558 }, { - "epoch": 0.34497370511898906, - "grad_norm": 2.247481107711792, - "learning_rate": 1.4047083289069209e-05, - "loss": 0.7993, + "epoch": 0.320943329813446, + "grad_norm": 1.901129961013794, + "learning_rate": 1.1116304344017685e-05, + "loss": 0.8199, "step": 4559 }, { - "epoch": 0.34504937384132267, - "grad_norm": 2.2741150856018066, - "learning_rate": 1.4045218385164743e-05, - "loss": 0.8526, + "epoch": 0.32101372756071805, + "grad_norm": 2.030168294906616, + "learning_rate": 1.1115013259687329e-05, + "loss": 0.6557, "step": 4560 }, { - "epoch": 0.34512504256365634, - "grad_norm": 1.9066141843795776, - "learning_rate": 1.4043353165312383e-05, - "loss": 0.676, + "epoch": 0.32108412530799013, + "grad_norm": 1.7547634840011597, + "learning_rate": 1.1113721961413707e-05, + "loss": 0.6209, "step": 4561 }, { - "epoch": 0.34520071128598995, - "grad_norm": 2.4865188598632812, - "learning_rate": 1.4041487629628936e-05, - "loss": 0.7842, + "epoch": 0.3211545230552622, + "grad_norm": 1.9571545124053955, + "learning_rate": 1.1112430449263954e-05, + "loss": 0.7753, "step": 4562 }, { - "epoch": 0.34527638000832356, - "grad_norm": 2.2557551860809326, - "learning_rate": 1.4039621778231228e-05, - "loss": 0.8724, + "epoch": 0.32122492080253434, + "grad_norm": 2.167308807373047, + "learning_rate": 1.1111138723305217e-05, + "loss": 0.8753, "step": 4563 }, { - "epoch": 0.34535204873065717, - "grad_norm": 3.9412360191345215, - "learning_rate": 1.4037755611236103e-05, - "loss": 0.6768, + "epoch": 0.3212953185498064, + "grad_norm": 2.58717679977417, + "learning_rate": 1.1109846783604654e-05, + "loss": 0.7173, "step": 4564 }, { - "epoch": 0.3454277174529908, - "grad_norm": 2.0502843856811523, - "learning_rate": 1.403588912876043e-05, - "loss": 0.614, + "epoch": 0.3213657162970785, + "grad_norm": 3.0318500995635986, + "learning_rate": 1.1108554630229436e-05, + "loss": 0.6175, "step": 4565 }, { - "epoch": 0.34550338617532445, - "grad_norm": 2.2128398418426514, - "learning_rate": 1.403402233092109e-05, - "loss": 0.8441, + "epoch": 0.3214361140443506, + "grad_norm": 1.869942307472229, + "learning_rate": 1.1107262263246741e-05, + "loss": 0.6895, "step": 4566 }, { - "epoch": 0.34557905489765806, - "grad_norm": 1.9671826362609863, - "learning_rate": 1.403215521783499e-05, - "loss": 0.7818, + "epoch": 0.32150651179162265, + "grad_norm": 2.0008323192596436, + "learning_rate": 1.1105969682723763e-05, + "loss": 0.783, "step": 4567 }, { - "epoch": 0.34565472361999167, - "grad_norm": 2.188419818878174, - "learning_rate": 1.4030287789619055e-05, - "loss": 0.7472, + "epoch": 0.32157690953889473, + "grad_norm": 1.5717226266860962, + "learning_rate": 1.11046768887277e-05, + "loss": 0.6616, "step": 4568 }, { - "epoch": 0.3457303923423253, - "grad_norm": 2.099836587905884, - "learning_rate": 1.4028420046390227e-05, - "loss": 0.7362, + "epoch": 0.32164730728616686, + "grad_norm": 1.7672160863876343, + "learning_rate": 1.110338388132577e-05, + "loss": 0.8188, "step": 4569 }, { - "epoch": 0.34580606106465894, - "grad_norm": 2.1307973861694336, - "learning_rate": 1.4026551988265472e-05, - "loss": 0.824, + "epoch": 0.32171770503343894, + "grad_norm": 1.9061336517333984, + "learning_rate": 1.1102090660585196e-05, + "loss": 0.7064, "step": 4570 }, { - "epoch": 0.34588172978699255, - "grad_norm": 2.5330209732055664, - "learning_rate": 1.4024683615361774e-05, - "loss": 0.8464, + "epoch": 0.321788102780711, + "grad_norm": 1.7830960750579834, + "learning_rate": 1.1100797226573213e-05, + "loss": 0.7017, "step": 4571 }, { - "epoch": 0.34595739850932616, - "grad_norm": 2.512056589126587, - "learning_rate": 1.4022814927796137e-05, - "loss": 0.8106, + "epoch": 0.3218585005279831, + "grad_norm": 1.8475191593170166, + "learning_rate": 1.1099503579357067e-05, + "loss": 0.8142, "step": 4572 }, { - "epoch": 0.3460330672316598, - "grad_norm": 2.4882543087005615, - "learning_rate": 1.4020945925685584e-05, - "loss": 0.7155, + "epoch": 0.32192889827525517, + "grad_norm": 1.7821784019470215, + "learning_rate": 1.1098209719004018e-05, + "loss": 0.7295, "step": 4573 }, { - "epoch": 0.34610873595399344, - "grad_norm": 2.098118782043457, - "learning_rate": 1.4019076609147158e-05, - "loss": 0.6978, + "epoch": 0.3219992960225273, + "grad_norm": 1.7113350629806519, + "learning_rate": 1.1096915645581333e-05, + "loss": 0.6207, "step": 4574 }, { - "epoch": 0.34618440467632705, - "grad_norm": 2.1440021991729736, - "learning_rate": 1.401720697829792e-05, - "loss": 0.6937, + "epoch": 0.3220696937697994, + "grad_norm": 1.9233964681625366, + "learning_rate": 1.1095621359156296e-05, + "loss": 0.7547, "step": 4575 }, { - "epoch": 0.34626007339866066, - "grad_norm": 2.386068820953369, - "learning_rate": 1.401533703325495e-05, - "loss": 0.7601, + "epoch": 0.32214009151707146, + "grad_norm": 1.9837182760238647, + "learning_rate": 1.1094326859796193e-05, + "loss": 0.6882, "step": 4576 }, { - "epoch": 0.34633574212099427, - "grad_norm": 2.6994619369506836, - "learning_rate": 1.4013466774135355e-05, - "loss": 0.5682, + "epoch": 0.32221048926434354, + "grad_norm": 1.8245409727096558, + "learning_rate": 1.109303214756833e-05, + "loss": 0.7248, "step": 4577 }, { - "epoch": 0.3464114108433279, - "grad_norm": 2.2204699516296387, - "learning_rate": 1.4011596201056259e-05, - "loss": 0.7343, + "epoch": 0.3222808870116156, + "grad_norm": 2.453852891921997, + "learning_rate": 1.1091737222540018e-05, + "loss": 0.7288, "step": 4578 }, { - "epoch": 0.34648707956566155, - "grad_norm": 2.2778122425079346, - "learning_rate": 1.4009725314134795e-05, - "loss": 0.673, + "epoch": 0.3223512847588877, + "grad_norm": 1.9179730415344238, + "learning_rate": 1.1090442084778582e-05, + "loss": 0.697, "step": 4579 }, { - "epoch": 0.34656274828799516, - "grad_norm": 2.2870404720306396, - "learning_rate": 1.4007854113488132e-05, - "loss": 0.7098, + "epoch": 0.3224216825061598, + "grad_norm": 1.748883605003357, + "learning_rate": 1.1089146734351356e-05, + "loss": 0.5814, "step": 4580 }, { - "epoch": 0.34663841701032877, - "grad_norm": 1.8866722583770752, - "learning_rate": 1.4005982599233442e-05, - "loss": 0.6607, + "epoch": 0.3224920802534319, + "grad_norm": 2.091643810272217, + "learning_rate": 1.1087851171325692e-05, + "loss": 0.5576, "step": 4581 }, { - "epoch": 0.3467140857326624, - "grad_norm": 1.7772208452224731, - "learning_rate": 1.4004110771487935e-05, - "loss": 0.8816, + "epoch": 0.322562478000704, + "grad_norm": 1.7303913831710815, + "learning_rate": 1.1086555395768938e-05, + "loss": 0.6461, "step": 4582 }, { - "epoch": 0.34678975445499605, - "grad_norm": 2.5590288639068604, - "learning_rate": 1.4002238630368825e-05, - "loss": 0.8935, + "epoch": 0.32263287574797606, + "grad_norm": 2.162034511566162, + "learning_rate": 1.1085259407748472e-05, + "loss": 0.8161, "step": 4583 }, { - "epoch": 0.34686542317732966, - "grad_norm": 2.385871171951294, - "learning_rate": 1.4000366175993354e-05, - "loss": 0.8033, + "epoch": 0.32270327349524813, + "grad_norm": 1.9917607307434082, + "learning_rate": 1.108396320733167e-05, + "loss": 0.633, "step": 4584 }, { - "epoch": 0.34694109189966327, - "grad_norm": 2.1621901988983154, - "learning_rate": 1.3998493408478778e-05, - "loss": 0.8084, + "epoch": 0.32277367124252027, + "grad_norm": 1.821867823600769, + "learning_rate": 1.1082666794585921e-05, + "loss": 0.7088, "step": 4585 }, { - "epoch": 0.3470167606219969, - "grad_norm": 2.374621868133545, - "learning_rate": 1.3996620327942377e-05, - "loss": 0.7405, + "epoch": 0.32284406898979234, + "grad_norm": 1.887601613998413, + "learning_rate": 1.1081370169578629e-05, + "loss": 0.6092, "step": 4586 }, { - "epoch": 0.34709242934433054, - "grad_norm": 1.7292189598083496, - "learning_rate": 1.3994746934501451e-05, - "loss": 0.759, + "epoch": 0.3229144667370644, + "grad_norm": 1.7601211071014404, + "learning_rate": 1.1080073332377203e-05, + "loss": 0.7292, "step": 4587 }, { - "epoch": 0.34716809806666415, - "grad_norm": 1.871042013168335, - "learning_rate": 1.3992873228273317e-05, - "loss": 0.6202, + "epoch": 0.3229848644843365, + "grad_norm": 1.98417329788208, + "learning_rate": 1.107877628304907e-05, + "loss": 0.7369, "step": 4588 }, { - "epoch": 0.34724376678899777, - "grad_norm": 2.0830862522125244, - "learning_rate": 1.3990999209375314e-05, - "loss": 0.915, + "epoch": 0.3230552622316086, + "grad_norm": 1.8054105043411255, + "learning_rate": 1.1077479021661664e-05, + "loss": 0.7677, "step": 4589 }, { - "epoch": 0.3473194355113314, - "grad_norm": 2.267789363861084, - "learning_rate": 1.3989124877924795e-05, - "loss": 0.8031, + "epoch": 0.32312565997888065, + "grad_norm": 1.7453300952911377, + "learning_rate": 1.1076181548282432e-05, + "loss": 0.6913, "step": 4590 }, { - "epoch": 0.34739510423366504, - "grad_norm": 2.1552555561065674, - "learning_rate": 1.3987250234039143e-05, - "loss": 0.818, + "epoch": 0.3231960577261528, + "grad_norm": 1.6390944719314575, + "learning_rate": 1.107488386297883e-05, + "loss": 0.7593, "step": 4591 }, { - "epoch": 0.34747077295599865, - "grad_norm": 1.9342949390411377, - "learning_rate": 1.3985375277835748e-05, - "loss": 0.7326, + "epoch": 0.32326645547342486, + "grad_norm": 2.281402111053467, + "learning_rate": 1.1073585965818324e-05, + "loss": 0.7702, "step": 4592 }, { - "epoch": 0.34754644167833226, - "grad_norm": 2.7303926944732666, - "learning_rate": 1.3983500009432028e-05, - "loss": 0.7703, + "epoch": 0.32333685322069694, + "grad_norm": 1.8012315034866333, + "learning_rate": 1.1072287856868396e-05, + "loss": 0.7866, "step": 4593 }, { - "epoch": 0.3476221104006659, - "grad_norm": 2.3340682983398438, - "learning_rate": 1.3981624428945419e-05, - "loss": 0.6769, + "epoch": 0.323407250967969, + "grad_norm": 1.9631046056747437, + "learning_rate": 1.1070989536196533e-05, + "loss": 0.7146, "step": 4594 }, { - "epoch": 0.3476977791229995, - "grad_norm": 2.0905210971832275, - "learning_rate": 1.3979748536493376e-05, - "loss": 0.8423, + "epoch": 0.3234776487152411, + "grad_norm": 1.7890956401824951, + "learning_rate": 1.1069691003870236e-05, + "loss": 0.6994, "step": 4595 }, { - "epoch": 0.34777344784533315, - "grad_norm": 2.2317705154418945, - "learning_rate": 1.3977872332193375e-05, - "loss": 0.6386, + "epoch": 0.3235480464625132, + "grad_norm": 1.6904252767562866, + "learning_rate": 1.106839225995702e-05, + "loss": 0.7117, "step": 4596 }, { - "epoch": 0.34784911656766676, - "grad_norm": 2.2130250930786133, - "learning_rate": 1.3975995816162904e-05, - "loss": 0.8846, + "epoch": 0.3236184442097853, + "grad_norm": 1.975051760673523, + "learning_rate": 1.1067093304524406e-05, + "loss": 0.7867, "step": 4597 }, { - "epoch": 0.34792478529000037, - "grad_norm": 2.068493127822876, - "learning_rate": 1.3974118988519486e-05, - "loss": 0.838, + "epoch": 0.3236888419570574, + "grad_norm": 1.7070729732513428, + "learning_rate": 1.1065794137639929e-05, + "loss": 0.7419, "step": 4598 }, { - "epoch": 0.348000454012334, - "grad_norm": 2.0455193519592285, - "learning_rate": 1.3972241849380645e-05, - "loss": 0.6782, + "epoch": 0.32375923970432946, + "grad_norm": 1.7105756998062134, + "learning_rate": 1.1064494759371128e-05, + "loss": 0.7156, "step": 4599 }, { - "epoch": 0.34807612273466765, - "grad_norm": 1.4974215030670166, - "learning_rate": 1.3970364398863938e-05, - "loss": 0.7652, + "epoch": 0.32382963745160154, + "grad_norm": 1.9721319675445557, + "learning_rate": 1.1063195169785566e-05, + "loss": 0.72, "step": 4600 }, { - "epoch": 0.34815179145700126, - "grad_norm": 1.7549113035202026, - "learning_rate": 1.3968486637086936e-05, - "loss": 0.8342, + "epoch": 0.3239000351988736, + "grad_norm": 2.0302064418792725, + "learning_rate": 1.1061895368950809e-05, + "loss": 0.7413, "step": 4601 }, { - "epoch": 0.34822746017933487, - "grad_norm": 2.3468220233917236, - "learning_rate": 1.3966608564167231e-05, - "loss": 0.6665, + "epoch": 0.32397043294614575, + "grad_norm": 2.002274513244629, + "learning_rate": 1.106059535693443e-05, + "loss": 0.754, "step": 4602 }, { - "epoch": 0.3483031289016685, - "grad_norm": 2.2296173572540283, - "learning_rate": 1.3964730180222437e-05, - "loss": 0.8537, + "epoch": 0.3240408306934178, + "grad_norm": 2.106182098388672, + "learning_rate": 1.1059295133804022e-05, + "loss": 0.7162, "step": 4603 }, { - "epoch": 0.34837879762400215, - "grad_norm": 2.597890853881836, - "learning_rate": 1.3962851485370178e-05, - "loss": 0.7739, + "epoch": 0.3241112284406899, + "grad_norm": 1.8628323078155518, + "learning_rate": 1.1057994699627185e-05, + "loss": 0.7207, "step": 4604 }, { - "epoch": 0.34845446634633576, - "grad_norm": 2.384274482727051, - "learning_rate": 1.3960972479728105e-05, - "loss": 0.7863, + "epoch": 0.324181626187962, + "grad_norm": 1.656383991241455, + "learning_rate": 1.1056694054471527e-05, + "loss": 0.6618, "step": 4605 }, { - "epoch": 0.34853013506866937, - "grad_norm": 2.0478515625, - "learning_rate": 1.3959093163413893e-05, - "loss": 0.8538, + "epoch": 0.32425202393523406, + "grad_norm": 3.647233247756958, + "learning_rate": 1.1055393198404673e-05, + "loss": 0.6319, "step": 4606 }, { - "epoch": 0.348605803791003, - "grad_norm": 2.03800630569458, - "learning_rate": 1.3957213536545227e-05, - "loss": 0.716, + "epoch": 0.32432242168250613, + "grad_norm": 2.2536778450012207, + "learning_rate": 1.1054092131494251e-05, + "loss": 0.6087, "step": 4607 }, { - "epoch": 0.3486814725133366, - "grad_norm": 2.1581063270568848, - "learning_rate": 1.3955333599239813e-05, - "loss": 0.6596, + "epoch": 0.32439281942977827, + "grad_norm": 1.7093312740325928, + "learning_rate": 1.105279085380791e-05, + "loss": 0.7468, "step": 4608 }, { - "epoch": 0.34875714123567025, - "grad_norm": 2.4075028896331787, - "learning_rate": 1.3953453351615387e-05, - "loss": 0.7188, + "epoch": 0.32446321717705034, + "grad_norm": 1.6143155097961426, + "learning_rate": 1.10514893654133e-05, + "loss": 0.7782, "step": 4609 }, { - "epoch": 0.34883280995800386, - "grad_norm": 2.663573980331421, - "learning_rate": 1.3951572793789685e-05, - "loss": 0.6679, + "epoch": 0.3245336149243224, + "grad_norm": 1.9205514192581177, + "learning_rate": 1.105018766637809e-05, + "loss": 0.5773, "step": 4610 }, { - "epoch": 0.3489084786803375, - "grad_norm": 1.9524000883102417, - "learning_rate": 1.3949691925880481e-05, - "loss": 0.8243, + "epoch": 0.3246040126715945, + "grad_norm": 2.089189052581787, + "learning_rate": 1.1048885756769956e-05, + "loss": 0.7101, "step": 4611 }, { - "epoch": 0.3489841474026711, - "grad_norm": 2.518850803375244, - "learning_rate": 1.3947810748005563e-05, - "loss": 0.7894, + "epoch": 0.3246744104188666, + "grad_norm": 1.739653468132019, + "learning_rate": 1.1047583636656583e-05, + "loss": 0.7111, "step": 4612 }, { - "epoch": 0.34905981612500475, - "grad_norm": 3.2514965534210205, - "learning_rate": 1.3945929260282729e-05, - "loss": 0.9017, + "epoch": 0.3247448081661387, + "grad_norm": 1.8078001737594604, + "learning_rate": 1.1046281306105672e-05, + "loss": 0.6738, "step": 4613 }, { - "epoch": 0.34913548484733836, - "grad_norm": 2.429962635040283, - "learning_rate": 1.3944047462829808e-05, - "loss": 0.7169, + "epoch": 0.3248152059134108, + "grad_norm": 1.8395683765411377, + "learning_rate": 1.104497876518493e-05, + "loss": 0.6698, "step": 4614 }, { - "epoch": 0.349211153569672, - "grad_norm": 2.0547587871551514, - "learning_rate": 1.3942165355764644e-05, - "loss": 0.7357, + "epoch": 0.32488560366068286, + "grad_norm": 1.9073609113693237, + "learning_rate": 1.104367601396208e-05, + "loss": 0.6531, "step": 4615 }, { - "epoch": 0.3492868222920056, - "grad_norm": 1.767414927482605, - "learning_rate": 1.3940282939205102e-05, - "loss": 0.8615, + "epoch": 0.32495600140795494, + "grad_norm": 1.5625337362289429, + "learning_rate": 1.104237305250485e-05, + "loss": 0.6061, "step": 4616 }, { - "epoch": 0.34936249101433925, - "grad_norm": 3.1079351902008057, - "learning_rate": 1.3938400213269061e-05, - "loss": 0.7191, + "epoch": 0.325026399155227, + "grad_norm": 2.3765017986297607, + "learning_rate": 1.1041069880880987e-05, + "loss": 0.8023, "step": 4617 }, { - "epoch": 0.34943815973667286, - "grad_norm": 2.7524807453155518, - "learning_rate": 1.3936517178074428e-05, - "loss": 0.8091, + "epoch": 0.3250967969024991, + "grad_norm": 1.7506581544876099, + "learning_rate": 1.1039766499158238e-05, + "loss": 0.6371, "step": 4618 }, { - "epoch": 0.34951382845900647, - "grad_norm": 2.906195878982544, - "learning_rate": 1.3934633833739122e-05, - "loss": 0.7484, + "epoch": 0.32516719464977123, + "grad_norm": 1.9522844552993774, + "learning_rate": 1.1038462907404372e-05, + "loss": 0.7115, "step": 4619 }, { - "epoch": 0.3495894971813401, - "grad_norm": 2.340843677520752, - "learning_rate": 1.3932750180381083e-05, - "loss": 0.6663, + "epoch": 0.3252375923970433, + "grad_norm": 2.135820150375366, + "learning_rate": 1.1037159105687162e-05, + "loss": 0.7412, "step": 4620 }, { - "epoch": 0.3496651659036737, - "grad_norm": 2.3411765098571777, - "learning_rate": 1.3930866218118278e-05, - "loss": 0.7908, + "epoch": 0.3253079901443154, + "grad_norm": 1.8366615772247314, + "learning_rate": 1.1035855094074394e-05, + "loss": 0.8126, "step": 4621 }, { - "epoch": 0.34974083462600736, - "grad_norm": 2.4406328201293945, - "learning_rate": 1.3928981947068676e-05, - "loss": 0.8513, + "epoch": 0.32537838789158746, + "grad_norm": 1.8052557706832886, + "learning_rate": 1.1034550872633863e-05, + "loss": 0.736, "step": 4622 }, { - "epoch": 0.34981650334834097, - "grad_norm": 2.0467100143432617, - "learning_rate": 1.3927097367350286e-05, - "loss": 0.7571, + "epoch": 0.32544878563885954, + "grad_norm": 3.2700324058532715, + "learning_rate": 1.103324644143338e-05, + "loss": 0.6936, "step": 4623 }, { - "epoch": 0.3498921720706746, - "grad_norm": 11.604252815246582, - "learning_rate": 1.3925212479081125e-05, - "loss": 0.7548, + "epoch": 0.3255191833861316, + "grad_norm": 1.7796331644058228, + "learning_rate": 1.103194180054076e-05, + "loss": 0.7045, "step": 4624 }, { - "epoch": 0.3499678407930082, - "grad_norm": 2.3513424396514893, - "learning_rate": 1.3923327282379224e-05, - "loss": 0.7268, + "epoch": 0.32558958113340375, + "grad_norm": 2.363058090209961, + "learning_rate": 1.1030636950023835e-05, + "loss": 0.6893, "step": 4625 }, { - "epoch": 0.35004350951534186, - "grad_norm": 2.104701042175293, - "learning_rate": 1.3921441777362647e-05, - "loss": 0.7942, + "epoch": 0.3256599788806758, + "grad_norm": 2.303678035736084, + "learning_rate": 1.1029331889950444e-05, + "loss": 0.7803, "step": 4626 }, { - "epoch": 0.35011917823767547, - "grad_norm": 2.1579172611236572, - "learning_rate": 1.3919555964149467e-05, - "loss": 0.7785, + "epoch": 0.3257303766279479, + "grad_norm": 2.334019184112549, + "learning_rate": 1.102802662038844e-05, + "loss": 0.7398, "step": 4627 }, { - "epoch": 0.3501948469600091, - "grad_norm": 1.857549786567688, - "learning_rate": 1.391766984285778e-05, - "loss": 0.7444, + "epoch": 0.32580077437522, + "grad_norm": 2.066189765930176, + "learning_rate": 1.1026721141405684e-05, + "loss": 0.7208, "step": 4628 }, { - "epoch": 0.3502705156823427, - "grad_norm": 2.2894248962402344, - "learning_rate": 1.3915783413605705e-05, - "loss": 0.6749, + "epoch": 0.32587117212249206, + "grad_norm": 1.7049401998519897, + "learning_rate": 1.1025415453070047e-05, + "loss": 0.6645, "step": 4629 }, { - "epoch": 0.35034618440467635, - "grad_norm": 2.237196207046509, - "learning_rate": 1.3913896676511369e-05, - "loss": 0.845, + "epoch": 0.3259415698697642, + "grad_norm": 2.991091251373291, + "learning_rate": 1.1024109555449417e-05, + "loss": 0.8071, "step": 4630 }, { - "epoch": 0.35042185312700996, - "grad_norm": 1.8301403522491455, - "learning_rate": 1.3912009631692933e-05, - "loss": 0.8037, + "epoch": 0.32601196761703627, + "grad_norm": 2.0745015144348145, + "learning_rate": 1.1022803448611687e-05, + "loss": 0.7992, "step": 4631 }, { - "epoch": 0.3504975218493436, - "grad_norm": 2.5242412090301514, - "learning_rate": 1.3910122279268563e-05, - "loss": 0.8057, + "epoch": 0.32608236536430835, + "grad_norm": 1.8294928073883057, + "learning_rate": 1.1021497132624763e-05, + "loss": 0.6721, "step": 4632 }, { - "epoch": 0.3505731905716772, - "grad_norm": 2.2044260501861572, - "learning_rate": 1.3908234619356456e-05, - "loss": 0.8019, + "epoch": 0.3261527631115804, + "grad_norm": 2.14886212348938, + "learning_rate": 1.1020190607556558e-05, + "loss": 0.6396, "step": 4633 }, { - "epoch": 0.3506488592940108, - "grad_norm": 2.227987289428711, - "learning_rate": 1.3906346652074823e-05, - "loss": 0.7116, + "epoch": 0.3262231608588525, + "grad_norm": 2.076068162918091, + "learning_rate": 1.1018883873475004e-05, + "loss": 0.728, "step": 4634 }, { - "epoch": 0.35072452801634446, - "grad_norm": 2.293186902999878, - "learning_rate": 1.3904458377541892e-05, - "loss": 0.6701, + "epoch": 0.3262935586061246, + "grad_norm": 2.4488892555236816, + "learning_rate": 1.1017576930448039e-05, + "loss": 0.6635, "step": 4635 }, { - "epoch": 0.35080019673867807, - "grad_norm": 2.3717048168182373, - "learning_rate": 1.3902569795875918e-05, - "loss": 0.8981, + "epoch": 0.3263639563533967, + "grad_norm": 1.9414836168289185, + "learning_rate": 1.1016269778543608e-05, + "loss": 0.7165, "step": 4636 }, { - "epoch": 0.3508758654610117, - "grad_norm": 2.319140672683716, - "learning_rate": 1.3900680907195162e-05, - "loss": 0.718, + "epoch": 0.3264343541006688, + "grad_norm": 2.138953924179077, + "learning_rate": 1.1014962417829677e-05, + "loss": 0.6101, "step": 4637 }, { - "epoch": 0.3509515341833453, - "grad_norm": 2.073474168777466, - "learning_rate": 1.389879171161792e-05, - "loss": 0.5373, + "epoch": 0.32650475184794087, + "grad_norm": 3.3359997272491455, + "learning_rate": 1.1013654848374212e-05, + "loss": 0.635, "step": 4638 }, { - "epoch": 0.35102720290567896, - "grad_norm": 1.9484330415725708, - "learning_rate": 1.3896902209262496e-05, - "loss": 0.7226, + "epoch": 0.32657514959521294, + "grad_norm": 1.8301496505737305, + "learning_rate": 1.1012347070245196e-05, + "loss": 0.7697, "step": 4639 }, { - "epoch": 0.35110287162801257, - "grad_norm": 2.9964051246643066, - "learning_rate": 1.3895012400247216e-05, - "loss": 0.6546, + "epoch": 0.326645547342485, + "grad_norm": 4.553729057312012, + "learning_rate": 1.1011039083510625e-05, + "loss": 0.7247, "step": 4640 }, { - "epoch": 0.3511785403503462, - "grad_norm": 2.1370325088500977, - "learning_rate": 1.3893122284690426e-05, - "loss": 0.9182, + "epoch": 0.32671594508975715, + "grad_norm": 1.8439445495605469, + "learning_rate": 1.1009730888238496e-05, + "loss": 0.7486, "step": 4641 }, { - "epoch": 0.3512542090726798, - "grad_norm": 2.6059162616729736, - "learning_rate": 1.3891231862710495e-05, - "loss": 0.8695, + "epoch": 0.32678634283702923, + "grad_norm": 1.9864567518234253, + "learning_rate": 1.1008422484496828e-05, + "loss": 0.7061, "step": 4642 }, { - "epoch": 0.35132987779501346, - "grad_norm": 2.8685076236724854, - "learning_rate": 1.3889341134425802e-05, - "loss": 0.6888, + "epoch": 0.3268567405843013, + "grad_norm": 1.8083325624465942, + "learning_rate": 1.1007113872353646e-05, + "loss": 0.7466, "step": 4643 }, { - "epoch": 0.35140554651734707, - "grad_norm": 2.9174864292144775, - "learning_rate": 1.3887450099954757e-05, - "loss": 0.7698, + "epoch": 0.3269271383315734, + "grad_norm": 1.6427509784698486, + "learning_rate": 1.1005805051876983e-05, + "loss": 0.7971, "step": 4644 }, { - "epoch": 0.3514812152396807, - "grad_norm": 2.5367772579193115, - "learning_rate": 1.3885558759415778e-05, - "loss": 0.7116, + "epoch": 0.32699753607884546, + "grad_norm": 1.7813974618911743, + "learning_rate": 1.1004496023134887e-05, + "loss": 0.6812, "step": 4645 }, { - "epoch": 0.3515568839620143, - "grad_norm": 2.0750954151153564, - "learning_rate": 1.3883667112927305e-05, - "loss": 0.6243, + "epoch": 0.32706793382611754, + "grad_norm": 1.896641731262207, + "learning_rate": 1.1003186786195419e-05, + "loss": 0.7251, "step": 4646 }, { - "epoch": 0.3516325526843479, - "grad_norm": 1.9517443180084229, - "learning_rate": 1.3881775160607804e-05, - "loss": 0.596, + "epoch": 0.3271383315733897, + "grad_norm": 1.6980366706848145, + "learning_rate": 1.100187734112664e-05, + "loss": 0.7754, "step": 4647 }, { - "epoch": 0.35170822140668156, - "grad_norm": 2.155714988708496, - "learning_rate": 1.387988290257575e-05, - "loss": 0.636, + "epoch": 0.32720872932066175, + "grad_norm": 2.0339064598083496, + "learning_rate": 1.1000567687996639e-05, + "loss": 0.8209, "step": 4648 }, { - "epoch": 0.3517838901290152, - "grad_norm": 2.3737261295318604, - "learning_rate": 1.3877990338949647e-05, - "loss": 0.5942, + "epoch": 0.32727912706793383, + "grad_norm": 1.8670015335083008, + "learning_rate": 1.0999257826873498e-05, + "loss": 0.7814, "step": 4649 }, { - "epoch": 0.3518595588513488, - "grad_norm": 2.2594027519226074, - "learning_rate": 1.3876097469848013e-05, - "loss": 0.7346, + "epoch": 0.3273495248152059, + "grad_norm": 1.797736406326294, + "learning_rate": 1.099794775782532e-05, + "loss": 0.8017, "step": 4650 }, { - "epoch": 0.3519352275736824, - "grad_norm": 2.5711894035339355, - "learning_rate": 1.3874204295389382e-05, - "loss": 0.7492, + "epoch": 0.327419922562478, + "grad_norm": 2.3725647926330566, + "learning_rate": 1.0996637480920219e-05, + "loss": 0.7982, "step": 4651 }, { - "epoch": 0.35201089629601606, - "grad_norm": 1.7782737016677856, - "learning_rate": 1.3872310815692313e-05, - "loss": 0.7328, + "epoch": 0.32749032030975006, + "grad_norm": 2.1352100372314453, + "learning_rate": 1.0995326996226314e-05, + "loss": 0.7845, "step": 4652 }, { - "epoch": 0.3520865650183497, - "grad_norm": 2.088243246078491, - "learning_rate": 1.3870417030875383e-05, - "loss": 0.6446, + "epoch": 0.3275607180570222, + "grad_norm": 1.9671199321746826, + "learning_rate": 1.099401630381174e-05, + "loss": 0.7134, "step": 4653 }, { - "epoch": 0.3521622337406833, - "grad_norm": 1.7141531705856323, - "learning_rate": 1.3868522941057184e-05, - "loss": 0.6885, + "epoch": 0.32763111580429427, + "grad_norm": 2.284864664077759, + "learning_rate": 1.0992705403744644e-05, + "loss": 0.6979, "step": 4654 }, { - "epoch": 0.3522379024630169, - "grad_norm": 1.8343784809112549, - "learning_rate": 1.3866628546356334e-05, - "loss": 0.7294, + "epoch": 0.32770151355156635, + "grad_norm": 1.9225486516952515, + "learning_rate": 1.0991394296093175e-05, + "loss": 0.621, "step": 4655 }, { - "epoch": 0.35231357118535056, - "grad_norm": 2.9176583290100098, - "learning_rate": 1.386473384689146e-05, - "loss": 0.6369, + "epoch": 0.3277719112988384, + "grad_norm": 2.013397693634033, + "learning_rate": 1.0990082980925503e-05, + "loss": 0.7349, "step": 4656 }, { - "epoch": 0.35238923990768417, - "grad_norm": 2.6107664108276367, - "learning_rate": 1.3862838842781222e-05, - "loss": 0.6926, + "epoch": 0.3278423090461105, + "grad_norm": 8.000661849975586, + "learning_rate": 1.0988771458309803e-05, + "loss": 0.6884, "step": 4657 }, { - "epoch": 0.3524649086300178, - "grad_norm": 2.2640960216522217, - "learning_rate": 1.3860943534144288e-05, - "loss": 0.6889, + "epoch": 0.32791270679338264, + "grad_norm": 1.8687578439712524, + "learning_rate": 1.0987459728314262e-05, + "loss": 0.6876, "step": 4658 }, { - "epoch": 0.3525405773523514, - "grad_norm": 2.1459038257598877, - "learning_rate": 1.3859047921099342e-05, - "loss": 0.8295, + "epoch": 0.3279831045406547, + "grad_norm": 1.8512394428253174, + "learning_rate": 1.0986147791007078e-05, + "loss": 0.6985, "step": 4659 }, { - "epoch": 0.352616246074685, - "grad_norm": 2.1899123191833496, - "learning_rate": 1.3857152003765108e-05, - "loss": 0.7613, + "epoch": 0.3280535022879268, + "grad_norm": 1.790837049484253, + "learning_rate": 1.0984835646456463e-05, + "loss": 0.734, "step": 4660 }, { - "epoch": 0.35269191479701867, - "grad_norm": 2.238300085067749, - "learning_rate": 1.3855255782260297e-05, - "loss": 0.7159, + "epoch": 0.32812390003519887, + "grad_norm": 1.9225798845291138, + "learning_rate": 1.0983523294730632e-05, + "loss": 0.772, "step": 4661 }, { - "epoch": 0.3527675835193523, - "grad_norm": 2.169271945953369, - "learning_rate": 1.3853359256703668e-05, - "loss": 0.7097, + "epoch": 0.32819429778247095, + "grad_norm": 2.0373215675354004, + "learning_rate": 1.0982210735897817e-05, + "loss": 0.7677, "step": 4662 }, { - "epoch": 0.3528432522416859, - "grad_norm": 2.406660795211792, - "learning_rate": 1.3851462427213986e-05, - "loss": 0.9081, + "epoch": 0.328264695529743, + "grad_norm": 2.1895511150360107, + "learning_rate": 1.098089797002626e-05, + "loss": 0.8331, "step": 4663 }, { - "epoch": 0.3529189209640195, - "grad_norm": 2.2769057750701904, - "learning_rate": 1.3849565293910034e-05, - "loss": 0.7397, + "epoch": 0.32833509327701516, + "grad_norm": 2.0108344554901123, + "learning_rate": 1.0979584997184211e-05, + "loss": 0.8154, "step": 4664 }, { - "epoch": 0.35299458968635317, - "grad_norm": 2.031386137008667, - "learning_rate": 1.3847667856910621e-05, - "loss": 0.8047, + "epoch": 0.32840549102428723, + "grad_norm": 2.508824586868286, + "learning_rate": 1.0978271817439936e-05, + "loss": 0.7742, "step": 4665 }, { - "epoch": 0.3530702584086868, - "grad_norm": 2.512640953063965, - "learning_rate": 1.3845770116334561e-05, - "loss": 0.8456, + "epoch": 0.3284758887715593, + "grad_norm": 2.8454315662384033, + "learning_rate": 1.0976958430861703e-05, + "loss": 0.6922, "step": 4666 }, { - "epoch": 0.3531459271310204, - "grad_norm": 2.3293848037719727, - "learning_rate": 1.384387207230071e-05, - "loss": 0.6356, + "epoch": 0.3285462865188314, + "grad_norm": 2.1691746711730957, + "learning_rate": 1.0975644837517803e-05, + "loss": 0.7377, "step": 4667 }, { - "epoch": 0.353221595853354, - "grad_norm": 2.598417043685913, - "learning_rate": 1.384197372492792e-05, - "loss": 0.7213, + "epoch": 0.32861668426610346, + "grad_norm": 1.8969171047210693, + "learning_rate": 1.0974331037476525e-05, + "loss": 0.7452, "step": 4668 }, { - "epoch": 0.35329726457568766, - "grad_norm": 2.296818733215332, - "learning_rate": 1.3840075074335074e-05, - "loss": 0.7332, + "epoch": 0.3286870820133756, + "grad_norm": 1.7713415622711182, + "learning_rate": 1.0973017030806178e-05, + "loss": 0.6608, "step": 4669 }, { - "epoch": 0.3533729332980213, - "grad_norm": 1.8637245893478394, - "learning_rate": 1.3838176120641071e-05, - "loss": 0.4631, + "epoch": 0.3287574797606477, + "grad_norm": 1.90436851978302, + "learning_rate": 1.0971702817575077e-05, + "loss": 0.7849, "step": 4670 }, { - "epoch": 0.3534486020203549, - "grad_norm": 2.56315541267395, - "learning_rate": 1.3836276863964834e-05, - "loss": 0.7563, + "epoch": 0.32882787750791975, + "grad_norm": 2.22619891166687, + "learning_rate": 1.097038839785155e-05, + "loss": 0.768, "step": 4671 }, { - "epoch": 0.3535242707426885, - "grad_norm": 2.7444026470184326, - "learning_rate": 1.3834377304425298e-05, - "loss": 0.7095, + "epoch": 0.32889827525519183, + "grad_norm": 1.7953124046325684, + "learning_rate": 1.0969073771703933e-05, + "loss": 0.6407, "step": 4672 }, { - "epoch": 0.3535999394650221, - "grad_norm": 1.994140386581421, - "learning_rate": 1.3832477442141416e-05, - "loss": 0.6721, + "epoch": 0.3289686730024639, + "grad_norm": 1.6793264150619507, + "learning_rate": 1.0967758939200578e-05, + "loss": 0.7896, "step": 4673 }, { - "epoch": 0.35367560818735577, - "grad_norm": 2.4888455867767334, - "learning_rate": 1.3830577277232164e-05, - "loss": 0.7671, + "epoch": 0.329039070749736, + "grad_norm": 2.3033206462860107, + "learning_rate": 1.0966443900409841e-05, + "loss": 0.7183, "step": 4674 }, { - "epoch": 0.3537512769096894, - "grad_norm": 1.9443211555480957, - "learning_rate": 1.3828676809816543e-05, - "loss": 0.795, + "epoch": 0.3291094684970081, + "grad_norm": 1.9964032173156738, + "learning_rate": 1.0965128655400094e-05, + "loss": 0.6547, "step": 4675 }, { - "epoch": 0.353826945632023, - "grad_norm": 2.254079818725586, - "learning_rate": 1.3826776040013563e-05, - "loss": 0.6806, + "epoch": 0.3291798662442802, + "grad_norm": 1.9438152313232422, + "learning_rate": 1.0963813204239717e-05, + "loss": 0.7151, "step": 4676 }, { - "epoch": 0.3539026143543566, - "grad_norm": 2.345444440841675, - "learning_rate": 1.3824874967942251e-05, - "loss": 0.6459, + "epoch": 0.32925026399155227, + "grad_norm": 1.685558557510376, + "learning_rate": 1.0962497546997102e-05, + "loss": 0.7081, "step": 4677 }, { - "epoch": 0.35397828307669027, - "grad_norm": 2.6580867767333984, - "learning_rate": 1.382297359372167e-05, - "loss": 0.9527, + "epoch": 0.32932066173882435, + "grad_norm": 1.915696382522583, + "learning_rate": 1.0961181683740649e-05, + "loss": 0.6772, "step": 4678 }, { - "epoch": 0.3540539517990239, - "grad_norm": 2.4587290287017822, - "learning_rate": 1.3821071917470877e-05, - "loss": 0.8039, + "epoch": 0.3293910594860964, + "grad_norm": 1.9172163009643555, + "learning_rate": 1.0959865614538776e-05, + "loss": 0.7898, "step": 4679 }, { - "epoch": 0.3541296205213575, - "grad_norm": 1.9164375066757202, - "learning_rate": 1.3819169939308969e-05, - "loss": 0.7868, + "epoch": 0.32946145723336856, + "grad_norm": 1.8135061264038086, + "learning_rate": 1.09585493394599e-05, + "loss": 0.8221, "step": 4680 }, { - "epoch": 0.3542052892436911, - "grad_norm": 2.0153324604034424, - "learning_rate": 1.3817267659355055e-05, - "loss": 0.739, + "epoch": 0.32953185498064064, + "grad_norm": 1.686639666557312, + "learning_rate": 1.095723285857246e-05, + "loss": 0.6394, "step": 4681 }, { - "epoch": 0.35428095796602477, - "grad_norm": 2.2942817211151123, - "learning_rate": 1.3815365077728255e-05, - "loss": 0.8144, + "epoch": 0.3296022527279127, + "grad_norm": 1.7097264528274536, + "learning_rate": 1.0955916171944902e-05, + "loss": 0.797, "step": 4682 }, { - "epoch": 0.3543566266883584, - "grad_norm": 2.601985216140747, - "learning_rate": 1.3813462194547724e-05, - "loss": 0.7122, + "epoch": 0.3296726504751848, + "grad_norm": 1.8812217712402344, + "learning_rate": 1.0954599279645679e-05, + "loss": 0.6542, "step": 4683 }, { - "epoch": 0.354432295410692, - "grad_norm": 1.8438481092453003, - "learning_rate": 1.3811559009932615e-05, - "loss": 0.7519, + "epoch": 0.32974304822245687, + "grad_norm": 1.7428539991378784, + "learning_rate": 1.0953282181743255e-05, + "loss": 0.6813, "step": 4684 }, { - "epoch": 0.3545079641330256, - "grad_norm": 2.120218276977539, - "learning_rate": 1.3809655524002124e-05, - "loss": 0.7314, + "epoch": 0.32981344596972895, + "grad_norm": 1.6724555492401123, + "learning_rate": 1.0951964878306113e-05, + "loss": 0.6419, "step": 4685 }, { - "epoch": 0.3545836328553592, - "grad_norm": 2.4902069568634033, - "learning_rate": 1.3807751736875446e-05, - "loss": 0.6525, + "epoch": 0.3298838437170011, + "grad_norm": 1.4921215772628784, + "learning_rate": 1.0950647369402737e-05, + "loss": 0.6503, "step": 4686 }, { - "epoch": 0.3546593015776929, - "grad_norm": 2.555546998977661, - "learning_rate": 1.3805847648671803e-05, - "loss": 0.7323, + "epoch": 0.32995424146427316, + "grad_norm": 1.7895342111587524, + "learning_rate": 1.0949329655101624e-05, + "loss": 0.7232, "step": 4687 }, { - "epoch": 0.3547349703000265, - "grad_norm": 2.1880481243133545, - "learning_rate": 1.3803943259510439e-05, - "loss": 0.792, + "epoch": 0.33002463921154523, + "grad_norm": 1.8620413541793823, + "learning_rate": 1.0948011735471289e-05, + "loss": 0.7581, "step": 4688 }, { - "epoch": 0.3548106390223601, - "grad_norm": 2.3539915084838867, - "learning_rate": 1.3802038569510606e-05, - "loss": 0.7203, + "epoch": 0.3300950369588173, + "grad_norm": 2.2104923725128174, + "learning_rate": 1.0946693610580246e-05, + "loss": 0.6292, "step": 4689 }, { - "epoch": 0.3548863077446937, - "grad_norm": 2.6515328884124756, - "learning_rate": 1.3800133578791591e-05, - "loss": 0.8069, + "epoch": 0.3301654347060894, + "grad_norm": 1.8532063961029053, + "learning_rate": 1.0945375280497031e-05, + "loss": 0.7913, "step": 4690 }, { - "epoch": 0.3549619764670274, - "grad_norm": 1.9614640474319458, - "learning_rate": 1.3798228287472683e-05, - "loss": 0.7215, + "epoch": 0.33023583245336147, + "grad_norm": 2.110642910003662, + "learning_rate": 1.094405674529018e-05, + "loss": 0.657, "step": 4691 }, { - "epoch": 0.355037645189361, - "grad_norm": 2.137275218963623, - "learning_rate": 1.37963226956732e-05, - "loss": 0.8132, + "epoch": 0.3303062302006336, + "grad_norm": 2.1547229290008545, + "learning_rate": 1.0942738005028243e-05, + "loss": 0.7583, "step": 4692 }, { - "epoch": 0.3551133139116946, - "grad_norm": 1.8678892850875854, - "learning_rate": 1.3794416803512477e-05, - "loss": 0.5912, + "epoch": 0.3303766279479057, + "grad_norm": 2.540841817855835, + "learning_rate": 1.0941419059779793e-05, + "loss": 0.7526, "step": 4693 }, { - "epoch": 0.3551889826340282, - "grad_norm": 2.0661749839782715, - "learning_rate": 1.379251061110987e-05, - "loss": 0.7803, + "epoch": 0.33044702569517775, + "grad_norm": 1.8449803590774536, + "learning_rate": 1.0940099909613393e-05, + "loss": 0.685, "step": 4694 }, { - "epoch": 0.35526465135636187, - "grad_norm": 2.396214485168457, - "learning_rate": 1.3790604118584744e-05, - "loss": 0.7889, + "epoch": 0.33051742344244983, + "grad_norm": 3.4339823722839355, + "learning_rate": 1.093878055459763e-05, + "loss": 0.6388, "step": 4695 }, { - "epoch": 0.3553403200786955, - "grad_norm": 2.3043839931488037, - "learning_rate": 1.3788697326056494e-05, - "loss": 0.8395, + "epoch": 0.3305878211897219, + "grad_norm": 1.8747515678405762, + "learning_rate": 1.09374609948011e-05, + "loss": 0.6391, "step": 4696 }, { - "epoch": 0.3554159888010291, - "grad_norm": 2.3201632499694824, - "learning_rate": 1.3786790233644529e-05, - "loss": 0.875, + "epoch": 0.33065821893699404, + "grad_norm": 2.159512758255005, + "learning_rate": 1.0936141230292408e-05, + "loss": 0.642, "step": 4697 }, { - "epoch": 0.3554916575233627, - "grad_norm": 2.381060838699341, - "learning_rate": 1.3784882841468276e-05, - "loss": 0.8286, + "epoch": 0.3307286166842661, + "grad_norm": 1.7504578828811646, + "learning_rate": 1.0934821261140166e-05, + "loss": 0.7659, "step": 4698 }, { - "epoch": 0.35556732624569637, - "grad_norm": 2.4320642948150635, - "learning_rate": 1.3782975149647184e-05, - "loss": 0.6862, + "epoch": 0.3307990144315382, + "grad_norm": 2.270479917526245, + "learning_rate": 1.0933501087413005e-05, + "loss": 0.7105, "step": 4699 }, { - "epoch": 0.35564299496803, - "grad_norm": 2.320983409881592, - "learning_rate": 1.378106715830072e-05, - "loss": 0.6916, + "epoch": 0.3308694121788103, + "grad_norm": 2.078235626220703, + "learning_rate": 1.0932180709179559e-05, + "loss": 0.8259, "step": 4700 }, { - "epoch": 0.3557186636903636, - "grad_norm": 2.415428876876831, - "learning_rate": 1.3779158867548367e-05, - "loss": 0.6812, + "epoch": 0.33093980992608235, + "grad_norm": 1.9788585901260376, + "learning_rate": 1.0930860126508477e-05, + "loss": 0.7448, "step": 4701 }, { - "epoch": 0.3557943324126972, - "grad_norm": 2.3122177124023438, - "learning_rate": 1.3777250277509621e-05, - "loss": 0.6497, + "epoch": 0.33101020767335443, + "grad_norm": 2.2282114028930664, + "learning_rate": 1.0929539339468417e-05, + "loss": 0.7237, "step": 4702 }, { - "epoch": 0.3558700011350308, - "grad_norm": 2.4607081413269043, - "learning_rate": 1.3775341388304019e-05, - "loss": 0.6586, + "epoch": 0.33108060542062656, + "grad_norm": 2.0466268062591553, + "learning_rate": 1.0928218348128048e-05, + "loss": 0.7003, "step": 4703 }, { - "epoch": 0.3559456698573645, - "grad_norm": 2.4338831901550293, - "learning_rate": 1.3773432200051093e-05, - "loss": 0.7655, + "epoch": 0.33115100316789864, + "grad_norm": 1.9221158027648926, + "learning_rate": 1.092689715255605e-05, + "loss": 0.6708, "step": 4704 }, { - "epoch": 0.3560213385796981, - "grad_norm": 3.2349586486816406, - "learning_rate": 1.3771522712870401e-05, - "loss": 0.7787, + "epoch": 0.3312214009151707, + "grad_norm": 1.8663239479064941, + "learning_rate": 1.0925575752821114e-05, + "loss": 0.7204, "step": 4705 }, { - "epoch": 0.3560970073020317, - "grad_norm": 2.1032230854034424, - "learning_rate": 1.3769612926881526e-05, - "loss": 0.7262, + "epoch": 0.3312917986624428, + "grad_norm": 1.7420791387557983, + "learning_rate": 1.0924254148991937e-05, + "loss": 0.7418, "step": 4706 }, { - "epoch": 0.3561726760243653, - "grad_norm": 2.523385524749756, - "learning_rate": 1.3767702842204059e-05, - "loss": 0.7117, + "epoch": 0.33136219640971487, + "grad_norm": 1.9348806142807007, + "learning_rate": 1.0922932341137232e-05, + "loss": 0.7874, "step": 4707 }, { - "epoch": 0.356248344746699, - "grad_norm": 2.341728687286377, - "learning_rate": 1.3765792458957624e-05, - "loss": 0.7133, + "epoch": 0.331432594156987, + "grad_norm": 1.8459362983703613, + "learning_rate": 1.0921610329325723e-05, + "loss": 0.7885, "step": 4708 }, { - "epoch": 0.3563240134690326, - "grad_norm": 2.2252790927886963, - "learning_rate": 1.3763881777261847e-05, - "loss": 0.771, + "epoch": 0.3315029919042591, + "grad_norm": 2.075474500656128, + "learning_rate": 1.0920288113626143e-05, + "loss": 0.6113, "step": 4709 }, { - "epoch": 0.3563996821913662, - "grad_norm": 2.088966131210327, - "learning_rate": 1.3761970797236386e-05, - "loss": 0.7424, + "epoch": 0.33157338965153116, + "grad_norm": 2.03891658782959, + "learning_rate": 1.0918965694107231e-05, + "loss": 0.7423, "step": 4710 }, { - "epoch": 0.3564753509136998, - "grad_norm": 2.176795244216919, - "learning_rate": 1.3760059519000912e-05, - "loss": 0.7256, + "epoch": 0.33164378739880324, + "grad_norm": 2.33455753326416, + "learning_rate": 1.0917643070837744e-05, + "loss": 0.6099, "step": 4711 }, { - "epoch": 0.3565510196360335, - "grad_norm": 2.541574716567993, - "learning_rate": 1.3758147942675115e-05, - "loss": 0.7764, + "epoch": 0.3317141851460753, + "grad_norm": 1.6061279773712158, + "learning_rate": 1.0916320243886446e-05, + "loss": 0.7674, "step": 4712 }, { - "epoch": 0.3566266883583671, - "grad_norm": 2.2796478271484375, - "learning_rate": 1.3756236068378706e-05, - "loss": 0.7737, + "epoch": 0.3317845828933474, + "grad_norm": 1.7878636121749878, + "learning_rate": 1.0914997213322114e-05, + "loss": 0.7254, "step": 4713 }, { - "epoch": 0.3567023570807007, - "grad_norm": 2.664128541946411, - "learning_rate": 1.3754323896231409e-05, - "loss": 0.8546, + "epoch": 0.3318549806406195, + "grad_norm": 1.9233156442642212, + "learning_rate": 1.0913673979213528e-05, + "loss": 0.6698, "step": 4714 }, { - "epoch": 0.3567780258030343, - "grad_norm": 2.4556145668029785, - "learning_rate": 1.3752411426352971e-05, - "loss": 0.7036, + "epoch": 0.3319253783878916, + "grad_norm": 2.1003808975219727, + "learning_rate": 1.0912350541629488e-05, + "loss": 0.6726, "step": 4715 }, { - "epoch": 0.3568536945253679, - "grad_norm": 2.207688093185425, - "learning_rate": 1.375049865886316e-05, - "loss": 0.7345, + "epoch": 0.3319957761351637, + "grad_norm": 1.7975786924362183, + "learning_rate": 1.0911026900638802e-05, + "loss": 0.7086, "step": 4716 }, { - "epoch": 0.3569293632477016, - "grad_norm": 2.008202075958252, - "learning_rate": 1.3748585593881757e-05, - "loss": 0.7968, + "epoch": 0.33206617388243576, + "grad_norm": 1.9947775602340698, + "learning_rate": 1.0909703056310283e-05, + "loss": 0.6254, "step": 4717 }, { - "epoch": 0.3570050319700352, - "grad_norm": 1.9544659852981567, - "learning_rate": 1.3746672231528565e-05, - "loss": 0.7988, + "epoch": 0.33213657162970783, + "grad_norm": 1.5790218114852905, + "learning_rate": 1.0908379008712764e-05, + "loss": 0.7512, "step": 4718 }, { - "epoch": 0.3570807006923688, - "grad_norm": 2.3357014656066895, - "learning_rate": 1.3744758571923408e-05, - "loss": 0.7303, + "epoch": 0.3322069693769799, + "grad_norm": 1.6874760389328003, + "learning_rate": 1.0907054757915076e-05, + "loss": 0.7097, "step": 4719 }, { - "epoch": 0.3571563694147024, - "grad_norm": 2.123999834060669, - "learning_rate": 1.3742844615186122e-05, - "loss": 0.7725, + "epoch": 0.33227736712425204, + "grad_norm": 2.1368863582611084, + "learning_rate": 1.0905730303986078e-05, + "loss": 0.7236, "step": 4720 }, { - "epoch": 0.3572320381370361, - "grad_norm": 2.152657985687256, - "learning_rate": 1.3740930361436565e-05, - "loss": 0.8359, + "epoch": 0.3323477648715241, + "grad_norm": 1.934064507484436, + "learning_rate": 1.0904405646994621e-05, + "loss": 0.8579, "step": 4721 }, { - "epoch": 0.3573077068593697, - "grad_norm": 2.6231911182403564, - "learning_rate": 1.3739015810794616e-05, - "loss": 0.8116, + "epoch": 0.3324181626187962, + "grad_norm": 1.7855464220046997, + "learning_rate": 1.0903080787009578e-05, + "loss": 0.7868, "step": 4722 }, { - "epoch": 0.3573833755817033, - "grad_norm": 2.069714307785034, - "learning_rate": 1.3737100963380164e-05, - "loss": 0.7383, + "epoch": 0.3324885603660683, + "grad_norm": 2.182596445083618, + "learning_rate": 1.0901755724099833e-05, + "loss": 0.7909, "step": 4723 }, { - "epoch": 0.3574590443040369, - "grad_norm": 2.1583962440490723, - "learning_rate": 1.3735185819313134e-05, - "loss": 0.7683, + "epoch": 0.33255895811334035, + "grad_norm": 2.315124988555908, + "learning_rate": 1.0900430458334273e-05, + "loss": 0.6257, "step": 4724 }, { - "epoch": 0.3575347130263706, - "grad_norm": 2.315765619277954, - "learning_rate": 1.3733270378713448e-05, - "loss": 0.708, + "epoch": 0.3326293558606125, + "grad_norm": 1.7875077724456787, + "learning_rate": 1.0899104989781798e-05, + "loss": 0.7563, "step": 4725 }, { - "epoch": 0.3576103817487042, - "grad_norm": 1.5999698638916016, - "learning_rate": 1.3731354641701064e-05, - "loss": 0.7547, + "epoch": 0.33269975360788456, + "grad_norm": 1.9466402530670166, + "learning_rate": 1.0897779318511326e-05, + "loss": 0.6127, "step": 4726 }, { - "epoch": 0.3576860504710378, - "grad_norm": 2.330720901489258, - "learning_rate": 1.3729438608395951e-05, - "loss": 0.7628, + "epoch": 0.33277015135515664, + "grad_norm": 2.0292139053344727, + "learning_rate": 1.0896453444591778e-05, + "loss": 0.7127, "step": 4727 }, { - "epoch": 0.3577617191933714, - "grad_norm": 2.1309075355529785, - "learning_rate": 1.3727522278918094e-05, - "loss": 0.632, + "epoch": 0.3328405491024287, + "grad_norm": 2.4154646396636963, + "learning_rate": 1.0895127368092085e-05, + "loss": 0.7138, "step": 4728 }, { - "epoch": 0.357837387915705, - "grad_norm": 2.6685056686401367, - "learning_rate": 1.3725605653387502e-05, - "loss": 0.8374, + "epoch": 0.3329109468497008, + "grad_norm": 2.108973264694214, + "learning_rate": 1.0893801089081193e-05, + "loss": 0.7377, "step": 4729 }, { - "epoch": 0.3579130566380387, - "grad_norm": 1.8115298748016357, - "learning_rate": 1.3723688731924195e-05, - "loss": 0.6296, + "epoch": 0.3329813445969729, + "grad_norm": 1.9350554943084717, + "learning_rate": 1.0892474607628057e-05, + "loss": 0.7852, "step": 4730 }, { - "epoch": 0.3579887253603723, - "grad_norm": 2.554457664489746, - "learning_rate": 1.3721771514648227e-05, - "loss": 0.6815, + "epoch": 0.333051742344245, + "grad_norm": 2.325301170349121, + "learning_rate": 1.089114792380164e-05, + "loss": 0.7529, "step": 4731 }, { - "epoch": 0.3580643940827059, - "grad_norm": 2.170767068862915, - "learning_rate": 1.3719854001679654e-05, - "loss": 0.7365, + "epoch": 0.3331221400915171, + "grad_norm": 1.9331377744674683, + "learning_rate": 1.0889821037670919e-05, + "loss": 0.6306, "step": 4732 }, { - "epoch": 0.3581400628050395, - "grad_norm": 1.8011586666107178, - "learning_rate": 1.3717936193138555e-05, - "loss": 0.707, + "epoch": 0.33319253783878916, + "grad_norm": 2.040936231613159, + "learning_rate": 1.0888493949304878e-05, + "loss": 0.8321, "step": 4733 }, { - "epoch": 0.3582157315273732, - "grad_norm": 2.423759937286377, - "learning_rate": 1.371601808914503e-05, - "loss": 0.7133, + "epoch": 0.33326293558606124, + "grad_norm": 2.2150509357452393, + "learning_rate": 1.0887166658772517e-05, + "loss": 0.689, "step": 4734 }, { - "epoch": 0.3582914002497068, - "grad_norm": 2.005772113800049, - "learning_rate": 1.3714099689819203e-05, - "loss": 0.711, + "epoch": 0.3333333333333333, + "grad_norm": 1.70967698097229, + "learning_rate": 1.0885839166142838e-05, + "loss": 0.6162, "step": 4735 }, { - "epoch": 0.3583670689720404, - "grad_norm": 2.3678839206695557, - "learning_rate": 1.3712180995281207e-05, - "loss": 0.7133, + "epoch": 0.33340373108060545, + "grad_norm": 2.1619012355804443, + "learning_rate": 1.0884511471484863e-05, + "loss": 0.748, "step": 4736 }, { - "epoch": 0.358442737694374, - "grad_norm": 1.848137617111206, - "learning_rate": 1.3710262005651195e-05, - "loss": 0.5987, + "epoch": 0.3334741288278775, + "grad_norm": 1.8263301849365234, + "learning_rate": 1.088318357486762e-05, + "loss": 0.71, "step": 4737 }, { - "epoch": 0.3585184064167077, - "grad_norm": 1.9859153032302856, - "learning_rate": 1.370834272104934e-05, - "loss": 0.6771, + "epoch": 0.3335445265751496, + "grad_norm": 1.6185098886489868, + "learning_rate": 1.0881855476360145e-05, + "loss": 0.68, "step": 4738 }, { - "epoch": 0.3585940751390413, - "grad_norm": 1.895273208618164, - "learning_rate": 1.3706423141595834e-05, - "loss": 0.7763, + "epoch": 0.3336149243224217, + "grad_norm": 1.7600154876708984, + "learning_rate": 1.0880527176031487e-05, + "loss": 0.7454, "step": 4739 }, { - "epoch": 0.3586697438613749, - "grad_norm": 2.161606788635254, - "learning_rate": 1.370450326741089e-05, - "loss": 0.7725, + "epoch": 0.33368532206969376, + "grad_norm": 3.198735237121582, + "learning_rate": 1.0879198673950711e-05, + "loss": 0.62, "step": 4740 }, { - "epoch": 0.3587454125837085, - "grad_norm": 2.1437206268310547, - "learning_rate": 1.3702583098614734e-05, - "loss": 0.7627, + "epoch": 0.33375571981696583, + "grad_norm": 1.7257229089736938, + "learning_rate": 1.0877869970186879e-05, + "loss": 0.7026, "step": 4741 }, { - "epoch": 0.3588210813060421, - "grad_norm": 2.3979063034057617, - "learning_rate": 1.3700662635327618e-05, - "loss": 0.8281, + "epoch": 0.33382611756423797, + "grad_norm": 2.410123348236084, + "learning_rate": 1.0876541064809076e-05, + "loss": 0.735, "step": 4742 }, { - "epoch": 0.3588967500283758, - "grad_norm": 1.9266215562820435, - "learning_rate": 1.36987418776698e-05, - "loss": 0.7498, + "epoch": 0.33389651531151004, + "grad_norm": 1.8945331573486328, + "learning_rate": 1.0875211957886391e-05, + "loss": 0.758, "step": 4743 }, { - "epoch": 0.3589724187507094, - "grad_norm": 1.760246992111206, - "learning_rate": 1.369682082576157e-05, - "loss": 0.7122, + "epoch": 0.3339669130587821, + "grad_norm": 1.8061851263046265, + "learning_rate": 1.0873882649487928e-05, + "loss": 0.6849, "step": 4744 }, { - "epoch": 0.359048087473043, - "grad_norm": 2.123413562774658, - "learning_rate": 1.369489947972323e-05, - "loss": 0.6687, + "epoch": 0.3340373108060542, + "grad_norm": 2.185396194458008, + "learning_rate": 1.0872553139682797e-05, + "loss": 0.6682, "step": 4745 }, { - "epoch": 0.3591237561953766, - "grad_norm": 1.9650204181671143, - "learning_rate": 1.3692977839675095e-05, - "loss": 0.6699, + "epoch": 0.3341077085533263, + "grad_norm": 2.0824451446533203, + "learning_rate": 1.087122342854012e-05, + "loss": 0.6979, "step": 4746 }, { - "epoch": 0.3591994249177103, - "grad_norm": 2.239622116088867, - "learning_rate": 1.3691055905737511e-05, - "loss": 0.6695, + "epoch": 0.33417810630059835, + "grad_norm": 1.8123105764389038, + "learning_rate": 1.0869893516129035e-05, + "loss": 0.7356, "step": 4747 }, { - "epoch": 0.3592750936400439, - "grad_norm": 2.251483201980591, - "learning_rate": 1.3689133678030834e-05, - "loss": 0.7291, + "epoch": 0.3342485040478705, + "grad_norm": 1.967579960823059, + "learning_rate": 1.086856340251868e-05, + "loss": 0.6489, "step": 4748 }, { - "epoch": 0.3593507623623775, - "grad_norm": 2.2021195888519287, - "learning_rate": 1.368721115667544e-05, - "loss": 0.682, + "epoch": 0.33431890179514256, + "grad_norm": 1.9450899362564087, + "learning_rate": 1.0867233087778207e-05, + "loss": 0.8192, "step": 4749 }, { - "epoch": 0.3594264310847111, - "grad_norm": 2.117192506790161, - "learning_rate": 1.3685288341791724e-05, - "loss": 0.8493, + "epoch": 0.33438929954241464, + "grad_norm": 2.0892515182495117, + "learning_rate": 1.0865902571976786e-05, + "loss": 0.7144, "step": 4750 }, { - "epoch": 0.3595020998070448, - "grad_norm": 1.8998351097106934, - "learning_rate": 1.3683365233500096e-05, - "loss": 0.6437, + "epoch": 0.3344596972896867, + "grad_norm": 1.9026561975479126, + "learning_rate": 1.086457185518359e-05, + "loss": 0.6678, "step": 4751 }, { - "epoch": 0.3595777685293784, - "grad_norm": 2.776381731033325, - "learning_rate": 1.3681441831920991e-05, - "loss": 0.8318, + "epoch": 0.3345300950369588, + "grad_norm": 1.8951181173324585, + "learning_rate": 1.08632409374678e-05, + "loss": 0.6042, "step": 4752 }, { - "epoch": 0.359653437251712, - "grad_norm": 2.061583995819092, - "learning_rate": 1.3679518137174854e-05, - "loss": 0.6635, + "epoch": 0.33460049278423093, + "grad_norm": 1.743672251701355, + "learning_rate": 1.0861909818898617e-05, + "loss": 0.6739, "step": 4753 }, { - "epoch": 0.3597291059740456, - "grad_norm": 2.730015516281128, - "learning_rate": 1.367759414938216e-05, - "loss": 0.8673, + "epoch": 0.334670890531503, + "grad_norm": 2.1553544998168945, + "learning_rate": 1.0860578499545245e-05, + "loss": 0.6878, "step": 4754 }, { - "epoch": 0.3598047746963792, - "grad_norm": 1.971379041671753, - "learning_rate": 1.3675669868663386e-05, - "loss": 0.6937, + "epoch": 0.3347412882787751, + "grad_norm": 2.097257614135742, + "learning_rate": 1.08592469794769e-05, + "loss": 0.7007, "step": 4755 }, { - "epoch": 0.3598804434187129, - "grad_norm": 2.2239530086517334, - "learning_rate": 1.3673745295139044e-05, - "loss": 0.7792, + "epoch": 0.33481168602604716, + "grad_norm": 2.411172389984131, + "learning_rate": 1.085791525876281e-05, + "loss": 0.7965, "step": 4756 }, { - "epoch": 0.3599561121410465, - "grad_norm": 2.4830453395843506, - "learning_rate": 1.3671820428929654e-05, - "loss": 0.8029, + "epoch": 0.33488208377331924, + "grad_norm": 2.012063503265381, + "learning_rate": 1.0856583337472211e-05, + "loss": 0.6854, "step": 4757 }, { - "epoch": 0.3600317808633801, - "grad_norm": 3.1367461681365967, - "learning_rate": 1.3669895270155762e-05, - "loss": 0.7619, + "epoch": 0.3349524815205913, + "grad_norm": 2.7740111351013184, + "learning_rate": 1.0855251215674352e-05, + "loss": 0.7171, "step": 4758 }, { - "epoch": 0.3601074495857137, - "grad_norm": 2.4550986289978027, - "learning_rate": 1.3667969818937922e-05, - "loss": 0.6998, + "epoch": 0.33502287926786345, + "grad_norm": 2.2028841972351074, + "learning_rate": 1.085391889343849e-05, + "loss": 0.7016, "step": 4759 }, { - "epoch": 0.3601831183080474, - "grad_norm": 2.109701633453369, - "learning_rate": 1.3666044075396713e-05, - "loss": 0.7235, + "epoch": 0.3350932770151355, + "grad_norm": 2.190796375274658, + "learning_rate": 1.0852586370833896e-05, + "loss": 0.6874, "step": 4760 }, { - "epoch": 0.360258787030381, - "grad_norm": 2.4037647247314453, - "learning_rate": 1.3664118039652732e-05, - "loss": 0.8367, + "epoch": 0.3351636747624076, + "grad_norm": 1.761626958847046, + "learning_rate": 1.0851253647929846e-05, + "loss": 0.6215, "step": 4761 }, { - "epoch": 0.3603344557527146, - "grad_norm": 2.3242437839508057, - "learning_rate": 1.3662191711826594e-05, - "loss": 0.6881, + "epoch": 0.3352340725096797, + "grad_norm": 1.8184051513671875, + "learning_rate": 1.0849920724795631e-05, + "loss": 0.683, "step": 4762 }, { - "epoch": 0.3604101244750482, - "grad_norm": 2.0473146438598633, - "learning_rate": 1.3660265092038933e-05, - "loss": 0.6282, + "epoch": 0.33530447025695176, + "grad_norm": 2.036888360977173, + "learning_rate": 1.0848587601500552e-05, + "loss": 0.8042, "step": 4763 }, { - "epoch": 0.3604857931973819, - "grad_norm": 2.0871689319610596, - "learning_rate": 1.3658338180410396e-05, - "loss": 0.7049, + "epoch": 0.3353748680042239, + "grad_norm": 2.1575939655303955, + "learning_rate": 1.0847254278113916e-05, + "loss": 0.7046, "step": 4764 }, { - "epoch": 0.3605614619197155, - "grad_norm": 2.224372625350952, - "learning_rate": 1.3656410977061659e-05, - "loss": 0.8585, + "epoch": 0.33544526575149597, + "grad_norm": 1.8281302452087402, + "learning_rate": 1.0845920754705049e-05, + "loss": 0.7098, "step": 4765 }, { - "epoch": 0.3606371306420491, - "grad_norm": 2.71907114982605, - "learning_rate": 1.3654483482113403e-05, - "loss": 0.6829, + "epoch": 0.33551566349876805, + "grad_norm": 2.024590015411377, + "learning_rate": 1.0844587031343277e-05, + "loss": 0.7736, "step": 4766 }, { - "epoch": 0.3607127993643827, - "grad_norm": 2.1918084621429443, - "learning_rate": 1.365255569568634e-05, - "loss": 0.744, + "epoch": 0.3355860612460401, + "grad_norm": 1.764366865158081, + "learning_rate": 1.0843253108097945e-05, + "loss": 0.7286, "step": 4767 }, { - "epoch": 0.36078846808671633, - "grad_norm": 1.7974071502685547, - "learning_rate": 1.3650627617901187e-05, - "loss": 0.8826, + "epoch": 0.3356564589933122, + "grad_norm": 2.2416298389434814, + "learning_rate": 1.08419189850384e-05, + "loss": 0.8414, "step": 4768 }, { - "epoch": 0.36086413680905, - "grad_norm": 2.0619354248046875, - "learning_rate": 1.3648699248878694e-05, - "loss": 0.771, + "epoch": 0.3357268567405843, + "grad_norm": 2.8116981983184814, + "learning_rate": 1.084058466223401e-05, + "loss": 0.7174, "step": 4769 }, { - "epoch": 0.3609398055313836, - "grad_norm": 2.1140151023864746, - "learning_rate": 1.3646770588739617e-05, - "loss": 0.8686, + "epoch": 0.3357972544878564, + "grad_norm": 1.9066141843795776, + "learning_rate": 1.0839250139754144e-05, + "loss": 0.7527, "step": 4770 }, { - "epoch": 0.3610154742537172, - "grad_norm": 1.8046315908432007, - "learning_rate": 1.3644841637604734e-05, - "loss": 0.6257, + "epoch": 0.3358676522351285, + "grad_norm": 2.449547290802002, + "learning_rate": 1.0837915417668187e-05, + "loss": 0.7145, "step": 4771 }, { - "epoch": 0.3610911429760508, - "grad_norm": 2.37791109085083, - "learning_rate": 1.3642912395594848e-05, - "loss": 0.7334, + "epoch": 0.33593804998240057, + "grad_norm": 1.8601627349853516, + "learning_rate": 1.083658049604553e-05, + "loss": 0.727, "step": 4772 }, { - "epoch": 0.3611668116983845, - "grad_norm": 1.7635067701339722, - "learning_rate": 1.3640982862830768e-05, - "loss": 0.6244, + "epoch": 0.33600844772967264, + "grad_norm": 2.84259295463562, + "learning_rate": 1.0835245374955579e-05, + "loss": 0.7895, "step": 4773 }, { - "epoch": 0.3612424804207181, - "grad_norm": 2.401155471801758, - "learning_rate": 1.3639053039433334e-05, - "loss": 0.6292, + "epoch": 0.3360788454769447, + "grad_norm": 2.7232141494750977, + "learning_rate": 1.083391005446775e-05, + "loss": 0.6844, "step": 4774 }, { - "epoch": 0.3613181491430517, - "grad_norm": 1.8144967555999756, - "learning_rate": 1.3637122925523391e-05, - "loss": 0.8272, + "epoch": 0.3361492432242168, + "grad_norm": 1.764163613319397, + "learning_rate": 1.0832574534651463e-05, + "loss": 0.7348, "step": 4775 }, { - "epoch": 0.3613938178653853, - "grad_norm": 2.4782304763793945, - "learning_rate": 1.3635192521221815e-05, - "loss": 0.7252, + "epoch": 0.33621964097148893, + "grad_norm": 1.9001750946044922, + "learning_rate": 1.0831238815576156e-05, + "loss": 0.7329, "step": 4776 }, { - "epoch": 0.361469486587719, - "grad_norm": 1.9335335493087769, - "learning_rate": 1.363326182664949e-05, - "loss": 0.8413, + "epoch": 0.336290038718761, + "grad_norm": 2.014292001724243, + "learning_rate": 1.0829902897311271e-05, + "loss": 0.8083, "step": 4777 }, { - "epoch": 0.3615451553100526, - "grad_norm": 1.8808588981628418, - "learning_rate": 1.363133084192732e-05, - "loss": 0.6569, + "epoch": 0.3363604364660331, + "grad_norm": 2.3185012340545654, + "learning_rate": 1.0828566779926266e-05, + "loss": 0.8117, "step": 4778 }, { - "epoch": 0.3616208240323862, - "grad_norm": 1.8410663604736328, - "learning_rate": 1.3629399567176237e-05, - "loss": 0.7255, + "epoch": 0.33643083421330516, + "grad_norm": 1.439197063446045, + "learning_rate": 1.0827230463490609e-05, + "loss": 0.6798, "step": 4779 }, { - "epoch": 0.3616964927547198, - "grad_norm": 1.8325263261795044, - "learning_rate": 1.3627468002517179e-05, - "loss": 0.7194, + "epoch": 0.33650123196057724, + "grad_norm": 1.7174010276794434, + "learning_rate": 1.0825893948073773e-05, + "loss": 0.7622, "step": 4780 }, { - "epoch": 0.36177216147705343, - "grad_norm": 2.408630847930908, - "learning_rate": 1.3625536148071109e-05, - "loss": 0.7562, + "epoch": 0.3365716297078494, + "grad_norm": 1.7209681272506714, + "learning_rate": 1.0824557233745246e-05, + "loss": 0.7359, "step": 4781 }, { - "epoch": 0.3618478301993871, - "grad_norm": 2.4441497325897217, - "learning_rate": 1.3623604003959004e-05, - "loss": 0.7213, + "epoch": 0.33664202745512145, + "grad_norm": 1.896994948387146, + "learning_rate": 1.0823220320574523e-05, + "loss": 0.8063, "step": 4782 }, { - "epoch": 0.3619234989217207, - "grad_norm": 2.6353988647460938, - "learning_rate": 1.3621671570301858e-05, - "loss": 0.8314, + "epoch": 0.33671242520239353, + "grad_norm": 2.738091230392456, + "learning_rate": 1.0821883208631116e-05, + "loss": 0.7763, "step": 4783 }, { - "epoch": 0.3619991676440543, - "grad_norm": 2.562260866165161, - "learning_rate": 1.3619738847220694e-05, - "loss": 0.6955, + "epoch": 0.3367828229496656, + "grad_norm": 1.7921277284622192, + "learning_rate": 1.0820545897984538e-05, + "loss": 0.6726, "step": 4784 }, { - "epoch": 0.36207483636638793, - "grad_norm": 2.1167261600494385, - "learning_rate": 1.361780583483654e-05, - "loss": 0.6659, + "epoch": 0.3368532206969377, + "grad_norm": 1.919130563735962, + "learning_rate": 1.0819208388704319e-05, + "loss": 0.5687, "step": 4785 }, { - "epoch": 0.3621505050887216, - "grad_norm": 2.0852859020233154, - "learning_rate": 1.3615872533270452e-05, - "loss": 0.8003, + "epoch": 0.33692361844420976, + "grad_norm": 1.580159068107605, + "learning_rate": 1.0817870680859996e-05, + "loss": 0.6453, "step": 4786 }, { - "epoch": 0.3622261738110552, - "grad_norm": 2.634650945663452, - "learning_rate": 1.3613938942643491e-05, - "loss": 0.851, + "epoch": 0.3369940161914819, + "grad_norm": 2.0363929271698, + "learning_rate": 1.081653277452112e-05, + "loss": 0.7427, "step": 4787 }, { - "epoch": 0.3623018425333888, - "grad_norm": 2.0236427783966064, - "learning_rate": 1.3612005063076753e-05, - "loss": 0.6714, + "epoch": 0.33706441393875397, + "grad_norm": 1.9075959920883179, + "learning_rate": 1.0815194669757249e-05, + "loss": 0.6176, "step": 4788 }, { - "epoch": 0.36237751125572243, - "grad_norm": 2.218775510787964, - "learning_rate": 1.361007089469134e-05, - "loss": 0.8034, + "epoch": 0.33713481168602605, + "grad_norm": 2.177152395248413, + "learning_rate": 1.0813856366637948e-05, + "loss": 0.8394, "step": 4789 }, { - "epoch": 0.3624531799780561, - "grad_norm": 1.8466893434524536, - "learning_rate": 1.3608136437608379e-05, - "loss": 0.8057, + "epoch": 0.3372052094332981, + "grad_norm": 2.336824417114258, + "learning_rate": 1.0812517865232804e-05, + "loss": 0.6687, "step": 4790 }, { - "epoch": 0.3625288487003897, - "grad_norm": 1.9202516078948975, - "learning_rate": 1.3606201691949005e-05, - "loss": 0.8247, + "epoch": 0.3372756071805702, + "grad_norm": 1.6358850002288818, + "learning_rate": 1.0811179165611404e-05, + "loss": 0.8267, "step": 4791 }, { - "epoch": 0.3626045174227233, - "grad_norm": 2.387627601623535, - "learning_rate": 1.3604266657834388e-05, - "loss": 0.6645, + "epoch": 0.33734600492784234, + "grad_norm": 2.1270229816436768, + "learning_rate": 1.0809840267843347e-05, + "loss": 0.8094, "step": 4792 }, { - "epoch": 0.3626801861450569, - "grad_norm": 2.0650217533111572, - "learning_rate": 1.36023313353857e-05, - "loss": 0.6833, + "epoch": 0.3374164026751144, + "grad_norm": 2.1134791374206543, + "learning_rate": 1.0808501171998242e-05, + "loss": 0.7694, "step": 4793 }, { - "epoch": 0.36275585486739054, - "grad_norm": 2.685912847518921, - "learning_rate": 1.3600395724724133e-05, - "loss": 0.7034, + "epoch": 0.3374868004223865, + "grad_norm": 1.8739615678787231, + "learning_rate": 1.0807161878145713e-05, + "loss": 0.7824, "step": 4794 }, { - "epoch": 0.3628315235897242, - "grad_norm": 2.143637180328369, - "learning_rate": 1.3598459825970912e-05, - "loss": 0.6371, + "epoch": 0.33755719816965857, + "grad_norm": 1.4399677515029907, + "learning_rate": 1.0805822386355391e-05, + "loss": 0.5381, "step": 4795 }, { - "epoch": 0.3629071923120578, - "grad_norm": 2.5087201595306396, - "learning_rate": 1.3596523639247263e-05, - "loss": 0.6024, + "epoch": 0.33762759591693065, + "grad_norm": 2.081230401992798, + "learning_rate": 1.0804482696696917e-05, + "loss": 0.6851, "step": 4796 }, { - "epoch": 0.3629828610343914, - "grad_norm": 2.4101240634918213, - "learning_rate": 1.3594587164674435e-05, - "loss": 0.7716, + "epoch": 0.3376979936642027, + "grad_norm": 1.8418893814086914, + "learning_rate": 1.080314280923994e-05, + "loss": 0.8281, "step": 4797 }, { - "epoch": 0.36305852975672503, - "grad_norm": 2.613996982574463, - "learning_rate": 1.3592650402373699e-05, - "loss": 0.7144, + "epoch": 0.33776839141147486, + "grad_norm": 2.004171133041382, + "learning_rate": 1.0801802724054123e-05, + "loss": 0.7638, "step": 4798 }, { - "epoch": 0.3631341984790587, - "grad_norm": 3.9812352657318115, - "learning_rate": 1.359071335246634e-05, - "loss": 0.6945, + "epoch": 0.33783878915874693, + "grad_norm": 2.7696192264556885, + "learning_rate": 1.0800462441209142e-05, + "loss": 0.6976, "step": 4799 }, { - "epoch": 0.3632098672013923, - "grad_norm": 2.4522552490234375, - "learning_rate": 1.3588776015073662e-05, - "loss": 0.7366, + "epoch": 0.337909186906019, + "grad_norm": 1.6319090127944946, + "learning_rate": 1.0799121960774675e-05, + "loss": 0.6897, "step": 4800 }, { - "epoch": 0.3632855359237259, - "grad_norm": 2.1591553688049316, - "learning_rate": 1.3586838390316987e-05, - "loss": 0.6077, + "epoch": 0.3379795846532911, + "grad_norm": 2.0951921939849854, + "learning_rate": 1.0797781282820418e-05, + "loss": 0.7923, "step": 4801 }, { - "epoch": 0.36336120464605953, - "grad_norm": 2.159881114959717, - "learning_rate": 1.3584900478317658e-05, - "loss": 0.6745, + "epoch": 0.33804998240056316, + "grad_norm": 1.6687337160110474, + "learning_rate": 1.0796440407416073e-05, + "loss": 0.652, "step": 4802 }, { - "epoch": 0.3634368733683932, - "grad_norm": 2.3063745498657227, - "learning_rate": 1.3582962279197031e-05, - "loss": 0.7862, + "epoch": 0.33812038014783524, + "grad_norm": 2.0823001861572266, + "learning_rate": 1.079509933463135e-05, + "loss": 0.6613, "step": 4803 }, { - "epoch": 0.3635125420907268, - "grad_norm": 2.219144582748413, - "learning_rate": 1.3581023793076485e-05, - "loss": 0.8561, + "epoch": 0.3381907778951074, + "grad_norm": 1.8736237287521362, + "learning_rate": 1.0793758064535979e-05, + "loss": 0.7988, "step": 4804 }, { - "epoch": 0.3635882108130604, - "grad_norm": 6.310362815856934, - "learning_rate": 1.3579085020077409e-05, - "loss": 0.6899, + "epoch": 0.33826117564237945, + "grad_norm": 1.9831979274749756, + "learning_rate": 1.0792416597199689e-05, + "loss": 0.7481, "step": 4805 }, { - "epoch": 0.36366387953539403, - "grad_norm": 2.508697748184204, - "learning_rate": 1.3577145960321223e-05, - "loss": 0.8235, + "epoch": 0.33833157338965153, + "grad_norm": 2.1690144538879395, + "learning_rate": 1.0791074932692225e-05, + "loss": 0.5982, "step": 4806 }, { - "epoch": 0.36373954825772764, - "grad_norm": 2.050865888595581, - "learning_rate": 1.357520661392935e-05, - "loss": 0.6781, + "epoch": 0.3384019711369236, + "grad_norm": 2.390871047973633, + "learning_rate": 1.0789733071083342e-05, + "loss": 0.6691, "step": 4807 }, { - "epoch": 0.3638152169800613, - "grad_norm": 2.487555742263794, - "learning_rate": 1.357326698102324e-05, - "loss": 0.7999, + "epoch": 0.3384723688841957, + "grad_norm": 2.6683948040008545, + "learning_rate": 1.0788391012442804e-05, + "loss": 0.7155, "step": 4808 }, { - "epoch": 0.3638908857023949, - "grad_norm": 2.141352415084839, - "learning_rate": 1.3571327061724362e-05, - "loss": 0.7551, + "epoch": 0.3385427666314678, + "grad_norm": 1.8321161270141602, + "learning_rate": 1.0787048756840388e-05, + "loss": 0.7028, "step": 4809 }, { - "epoch": 0.36396655442472853, - "grad_norm": 2.2181026935577393, - "learning_rate": 1.3569386856154194e-05, - "loss": 0.8361, + "epoch": 0.3386131643787399, + "grad_norm": 2.3082079887390137, + "learning_rate": 1.0785706304345876e-05, + "loss": 0.6757, "step": 4810 }, { - "epoch": 0.36404222314706214, - "grad_norm": 1.389456033706665, - "learning_rate": 1.3567446364434246e-05, - "loss": 0.8809, + "epoch": 0.33868356212601197, + "grad_norm": 2.2892351150512695, + "learning_rate": 1.0784363655029066e-05, + "loss": 0.7213, "step": 4811 }, { - "epoch": 0.3641178918693958, - "grad_norm": 2.0791516304016113, - "learning_rate": 1.356550558668603e-05, - "loss": 0.6581, + "epoch": 0.33875395987328405, + "grad_norm": 1.767261266708374, + "learning_rate": 1.078302080895976e-05, + "loss": 0.6609, "step": 4812 }, { - "epoch": 0.3641935605917294, - "grad_norm": 2.7910525798797607, - "learning_rate": 1.3563564523031091e-05, - "loss": 0.7416, + "epoch": 0.3388243576205561, + "grad_norm": 2.00166392326355, + "learning_rate": 1.0781677766207777e-05, + "loss": 0.7289, "step": 4813 }, { - "epoch": 0.364269229314063, - "grad_norm": 2.5519115924835205, - "learning_rate": 1.3561623173590978e-05, - "loss": 0.7204, + "epoch": 0.3388947553678282, + "grad_norm": 1.9623041152954102, + "learning_rate": 1.0780334526842944e-05, + "loss": 0.7828, "step": 4814 }, { - "epoch": 0.36434489803639664, - "grad_norm": 2.1502325534820557, - "learning_rate": 1.3559681538487269e-05, - "loss": 0.8517, + "epoch": 0.33896515311510034, + "grad_norm": 2.165710687637329, + "learning_rate": 1.0778991090935092e-05, + "loss": 0.5972, "step": 4815 }, { - "epoch": 0.3644205667587303, - "grad_norm": 2.1360151767730713, - "learning_rate": 1.3557739617841558e-05, - "loss": 0.7458, + "epoch": 0.3390355508623724, + "grad_norm": 2.0018668174743652, + "learning_rate": 1.0777647458554073e-05, + "loss": 0.5969, "step": 4816 }, { - "epoch": 0.3644962354810639, - "grad_norm": 2.29506254196167, - "learning_rate": 1.3555797411775447e-05, - "loss": 0.705, + "epoch": 0.3391059486096445, + "grad_norm": 2.0146892070770264, + "learning_rate": 1.077630362976974e-05, + "loss": 0.7242, "step": 4817 }, { - "epoch": 0.3645719042033975, - "grad_norm": 2.2674145698547363, - "learning_rate": 1.3553854920410568e-05, - "loss": 0.7909, + "epoch": 0.33917634635691657, + "grad_norm": 1.777526617050171, + "learning_rate": 1.0774959604651963e-05, + "loss": 0.7127, "step": 4818 }, { - "epoch": 0.36464757292573113, - "grad_norm": 2.701314926147461, - "learning_rate": 1.3551912143868564e-05, - "loss": 0.7936, + "epoch": 0.33924674410418865, + "grad_norm": 1.95378839969635, + "learning_rate": 1.0773615383270615e-05, + "loss": 0.7646, "step": 4819 }, { - "epoch": 0.3647232416480648, - "grad_norm": 2.063055992126465, - "learning_rate": 1.35499690822711e-05, - "loss": 0.6838, + "epoch": 0.3393171418514608, + "grad_norm": 1.7871085405349731, + "learning_rate": 1.0772270965695587e-05, + "loss": 0.7292, "step": 4820 }, { - "epoch": 0.3647989103703984, - "grad_norm": 2.2792537212371826, - "learning_rate": 1.3548025735739852e-05, - "loss": 0.7913, + "epoch": 0.33938753959873286, + "grad_norm": 2.1120598316192627, + "learning_rate": 1.0770926351996777e-05, + "loss": 0.7173, "step": 4821 }, { - "epoch": 0.364874579092732, - "grad_norm": 2.7173550128936768, - "learning_rate": 1.3546082104396528e-05, - "loss": 0.7174, + "epoch": 0.33945793734600493, + "grad_norm": 2.3410491943359375, + "learning_rate": 1.076958154224409e-05, + "loss": 0.7755, "step": 4822 }, { - "epoch": 0.36495024781506563, - "grad_norm": 2.250936508178711, - "learning_rate": 1.3544138188362835e-05, - "loss": 0.8527, + "epoch": 0.339528335093277, + "grad_norm": 2.0269722938537598, + "learning_rate": 1.076823653650744e-05, + "loss": 0.5835, "step": 4823 }, { - "epoch": 0.36502591653739924, - "grad_norm": 2.110093355178833, - "learning_rate": 1.354219398776051e-05, - "loss": 0.9162, + "epoch": 0.3395987328405491, + "grad_norm": 2.276371479034424, + "learning_rate": 1.0766891334856767e-05, + "loss": 0.7165, "step": 4824 }, { - "epoch": 0.3651015852597329, - "grad_norm": 2.133039951324463, - "learning_rate": 1.354024950271131e-05, - "loss": 0.7488, + "epoch": 0.33966913058782117, + "grad_norm": 1.974388599395752, + "learning_rate": 1.0765545937361996e-05, + "loss": 0.6987, "step": 4825 }, { - "epoch": 0.3651772539820665, - "grad_norm": 1.9301999807357788, - "learning_rate": 1.3538304733337e-05, - "loss": 0.7952, + "epoch": 0.3397395283350933, + "grad_norm": 1.8961961269378662, + "learning_rate": 1.0764200344093084e-05, + "loss": 0.6673, "step": 4826 }, { - "epoch": 0.36525292270440013, - "grad_norm": 2.617494821548462, - "learning_rate": 1.3536359679759369e-05, - "loss": 0.6829, + "epoch": 0.3398099260823654, + "grad_norm": 2.1117336750030518, + "learning_rate": 1.0762854555119987e-05, + "loss": 0.7833, "step": 4827 }, { - "epoch": 0.36532859142673374, - "grad_norm": 2.2446556091308594, - "learning_rate": 1.3534414342100221e-05, - "loss": 0.684, + "epoch": 0.33988032382963745, + "grad_norm": 1.9031888246536255, + "learning_rate": 1.0761508570512674e-05, + "loss": 0.8093, "step": 4828 }, { - "epoch": 0.3654042601490674, - "grad_norm": 2.0811996459960938, - "learning_rate": 1.3532468720481382e-05, - "loss": 0.6984, + "epoch": 0.33995072157690953, + "grad_norm": 1.882541537284851, + "learning_rate": 1.076016239034112e-05, + "loss": 0.7219, "step": 4829 }, { - "epoch": 0.365479928871401, - "grad_norm": 2.136030673980713, - "learning_rate": 1.3530522815024692e-05, - "loss": 0.6672, + "epoch": 0.3400211193241816, + "grad_norm": 2.7898292541503906, + "learning_rate": 1.0758816014675325e-05, + "loss": 0.6264, "step": 4830 }, { - "epoch": 0.3655555975937346, - "grad_norm": 2.017378330230713, - "learning_rate": 1.3528576625852012e-05, - "loss": 0.8001, + "epoch": 0.3400915170714537, + "grad_norm": 1.8471040725708008, + "learning_rate": 1.0757469443585276e-05, + "loss": 0.7071, "step": 4831 }, { - "epoch": 0.36563126631606824, - "grad_norm": 2.336763620376587, - "learning_rate": 1.3526630153085214e-05, - "loss": 0.8168, + "epoch": 0.3401619148187258, + "grad_norm": 2.1000967025756836, + "learning_rate": 1.075612267714099e-05, + "loss": 0.7727, "step": 4832 }, { - "epoch": 0.3657069350384019, - "grad_norm": 2.5141499042510986, - "learning_rate": 1.352468339684619e-05, - "loss": 0.7466, + "epoch": 0.3402323125659979, + "grad_norm": 1.8035727739334106, + "learning_rate": 1.0754775715412483e-05, + "loss": 0.6613, "step": 4833 }, { - "epoch": 0.3657826037607355, - "grad_norm": 2.2805662155151367, - "learning_rate": 1.3522736357256866e-05, - "loss": 0.6801, + "epoch": 0.34030271031327, + "grad_norm": 2.4467926025390625, + "learning_rate": 1.0753428558469786e-05, + "loss": 0.6499, "step": 4834 }, { - "epoch": 0.3658582724830691, - "grad_norm": 2.272472381591797, - "learning_rate": 1.3520789034439158e-05, - "loss": 0.74, + "epoch": 0.34037310806054205, + "grad_norm": 1.7585070133209229, + "learning_rate": 1.075208120638294e-05, + "loss": 0.7032, "step": 4835 }, { - "epoch": 0.36593394120540274, - "grad_norm": 2.805711507797241, - "learning_rate": 1.351884142851502e-05, - "loss": 0.6199, + "epoch": 0.34044350580781413, + "grad_norm": 1.776389479637146, + "learning_rate": 1.0750733659221994e-05, + "loss": 0.7284, "step": 4836 }, { - "epoch": 0.36600960992773635, - "grad_norm": 2.3359363079071045, - "learning_rate": 1.3516893539606415e-05, - "loss": 0.6921, + "epoch": 0.34051390355508626, + "grad_norm": 2.801448345184326, + "learning_rate": 1.0749385917057008e-05, + "loss": 0.7993, "step": 4837 }, { - "epoch": 0.36608527865007, - "grad_norm": 2.1814374923706055, - "learning_rate": 1.3514945367835328e-05, - "loss": 0.6558, + "epoch": 0.34058430130235834, + "grad_norm": 1.9662574529647827, + "learning_rate": 1.074803797995805e-05, + "loss": 0.7151, "step": 4838 }, { - "epoch": 0.3661609473724036, - "grad_norm": 1.8459466695785522, - "learning_rate": 1.3512996913323758e-05, - "loss": 0.6672, + "epoch": 0.3406546990496304, + "grad_norm": 1.9715665578842163, + "learning_rate": 1.0746689847995209e-05, + "loss": 0.7449, "step": 4839 }, { - "epoch": 0.36623661609473723, - "grad_norm": 2.3520541191101074, - "learning_rate": 1.3511048176193727e-05, - "loss": 0.842, + "epoch": 0.3407250967969025, + "grad_norm": 2.480294704437256, + "learning_rate": 1.0745341521238564e-05, + "loss": 0.7287, "step": 4840 }, { - "epoch": 0.36631228481707084, - "grad_norm": 1.4185298681259155, - "learning_rate": 1.3509099156567269e-05, - "loss": 0.9015, + "epoch": 0.34079549454417457, + "grad_norm": 1.5608187913894653, + "learning_rate": 1.0743992999758223e-05, + "loss": 0.7455, "step": 4841 }, { - "epoch": 0.3663879535394045, - "grad_norm": 1.6182681322097778, - "learning_rate": 1.3507149854566433e-05, - "loss": 0.7128, + "epoch": 0.34086589229144665, + "grad_norm": 2.1583945751190186, + "learning_rate": 1.0742644283624296e-05, + "loss": 0.7563, "step": 4842 }, { - "epoch": 0.3664636222617381, - "grad_norm": 2.5852813720703125, - "learning_rate": 1.3505200270313298e-05, - "loss": 0.7549, + "epoch": 0.3409362900387188, + "grad_norm": 1.9253653287887573, + "learning_rate": 1.0741295372906902e-05, + "loss": 0.6396, "step": 4843 }, { - "epoch": 0.36653929098407173, - "grad_norm": 2.689218044281006, - "learning_rate": 1.3503250403929951e-05, - "loss": 0.6497, + "epoch": 0.34100668778599086, + "grad_norm": 1.9534454345703125, + "learning_rate": 1.0739946267676173e-05, + "loss": 0.685, "step": 4844 }, { - "epoch": 0.36661495970640534, - "grad_norm": 2.295428514480591, - "learning_rate": 1.3501300255538499e-05, - "loss": 0.8129, + "epoch": 0.34107708553326294, + "grad_norm": 1.8217928409576416, + "learning_rate": 1.0738596968002254e-05, + "loss": 0.6119, "step": 4845 }, { - "epoch": 0.366690628428739, - "grad_norm": 2.2084271907806396, - "learning_rate": 1.3499349825261065e-05, - "loss": 0.6761, + "epoch": 0.341147483280535, + "grad_norm": 2.2171595096588135, + "learning_rate": 1.0737247473955288e-05, + "loss": 0.7464, "step": 4846 }, { - "epoch": 0.3667662971510726, - "grad_norm": 2.2909162044525146, - "learning_rate": 1.3497399113219792e-05, - "loss": 0.8429, + "epoch": 0.3412178810278071, + "grad_norm": 1.6931865215301514, + "learning_rate": 1.0735897785605442e-05, + "loss": 0.8012, "step": 4847 }, { - "epoch": 0.36684196587340623, - "grad_norm": 2.24273419380188, - "learning_rate": 1.349544811953684e-05, - "loss": 0.7073, + "epoch": 0.3412882787750792, + "grad_norm": 1.7015212774276733, + "learning_rate": 1.073454790302289e-05, + "loss": 0.6851, "step": 4848 }, { - "epoch": 0.36691763459573984, - "grad_norm": 1.7732197046279907, - "learning_rate": 1.3493496844334386e-05, - "loss": 0.6471, + "epoch": 0.3413586765223513, + "grad_norm": 1.8041236400604248, + "learning_rate": 1.0733197826277807e-05, + "loss": 0.8114, "step": 4849 }, { - "epoch": 0.36699330331807345, - "grad_norm": 2.0910346508026123, - "learning_rate": 1.3491545287734628e-05, - "loss": 0.7475, + "epoch": 0.3414290742696234, + "grad_norm": 1.9889227151870728, + "learning_rate": 1.073184755544039e-05, + "loss": 0.6677, "step": 4850 }, { - "epoch": 0.3670689720404071, - "grad_norm": 2.107093095779419, - "learning_rate": 1.3489593449859774e-05, - "loss": 0.7203, + "epoch": 0.34149947201689546, + "grad_norm": 1.9371503591537476, + "learning_rate": 1.073049709058084e-05, + "loss": 0.6381, "step": 4851 }, { - "epoch": 0.3671446407627407, - "grad_norm": 2.1504039764404297, - "learning_rate": 1.348764133083206e-05, - "loss": 0.6333, + "epoch": 0.34156986976416753, + "grad_norm": 1.8300786018371582, + "learning_rate": 1.0729146431769364e-05, + "loss": 0.6669, "step": 4852 }, { - "epoch": 0.36722030948507434, - "grad_norm": 1.9339256286621094, - "learning_rate": 1.3485688930773729e-05, - "loss": 0.6488, + "epoch": 0.3416402675114396, + "grad_norm": 1.9158648252487183, + "learning_rate": 1.0727795579076189e-05, + "loss": 0.746, "step": 4853 }, { - "epoch": 0.36729597820740795, - "grad_norm": 2.6421656608581543, - "learning_rate": 1.348373624980705e-05, - "loss": 0.6136, + "epoch": 0.34171066525871174, + "grad_norm": 1.555375576019287, + "learning_rate": 1.0726444532571547e-05, + "loss": 0.7297, "step": 4854 }, { - "epoch": 0.3673716469297416, - "grad_norm": 2.230567693710327, - "learning_rate": 1.3481783288054306e-05, - "loss": 0.7886, + "epoch": 0.3417810630059838, + "grad_norm": 1.76763916015625, + "learning_rate": 1.0725093292325677e-05, + "loss": 0.631, "step": 4855 }, { - "epoch": 0.3674473156520752, - "grad_norm": 2.4279305934906006, - "learning_rate": 1.3479830045637794e-05, - "loss": 0.8438, + "epoch": 0.3418514607532559, + "grad_norm": 2.125959634780884, + "learning_rate": 1.0723741858408834e-05, + "loss": 0.7796, "step": 4856 }, { - "epoch": 0.36752298437440883, - "grad_norm": 2.177305221557617, - "learning_rate": 1.3477876522679835e-05, - "loss": 0.6058, + "epoch": 0.341921858500528, + "grad_norm": 1.917842149734497, + "learning_rate": 1.0722390230891281e-05, + "loss": 0.6894, "step": 4857 }, { - "epoch": 0.36759865309674244, - "grad_norm": 2.3177402019500732, - "learning_rate": 1.3475922719302765e-05, - "loss": 0.8804, + "epoch": 0.34199225624780005, + "grad_norm": 1.7959773540496826, + "learning_rate": 1.0721038409843287e-05, + "loss": 0.6127, "step": 4858 }, { - "epoch": 0.3676743218190761, - "grad_norm": 2.5845775604248047, - "learning_rate": 1.3473968635628939e-05, - "loss": 0.7402, + "epoch": 0.34206265399507213, + "grad_norm": 2.2005560398101807, + "learning_rate": 1.0719686395335134e-05, + "loss": 0.7951, "step": 4859 }, { - "epoch": 0.3677499905414097, - "grad_norm": 2.1138968467712402, - "learning_rate": 1.3472014271780725e-05, - "loss": 0.5874, + "epoch": 0.34213305174234426, + "grad_norm": 1.6291238069534302, + "learning_rate": 1.0718334187437122e-05, + "loss": 0.678, "step": 4860 }, { - "epoch": 0.36782565926374333, - "grad_norm": 2.863762378692627, - "learning_rate": 1.3470059627880516e-05, - "loss": 0.5876, + "epoch": 0.34220344948961634, + "grad_norm": 1.833293080329895, + "learning_rate": 1.0716981786219543e-05, + "loss": 0.6308, "step": 4861 }, { - "epoch": 0.36790132798607694, - "grad_norm": 2.387801170349121, - "learning_rate": 1.3468104704050713e-05, - "loss": 0.7677, + "epoch": 0.3422738472368884, + "grad_norm": 1.7735939025878906, + "learning_rate": 1.0715629191752718e-05, + "loss": 0.6642, "step": 4862 }, { - "epoch": 0.36797699670841055, - "grad_norm": 2.5543148517608643, - "learning_rate": 1.3466149500413742e-05, - "loss": 0.8206, + "epoch": 0.3423442449841605, + "grad_norm": 1.8654438257217407, + "learning_rate": 1.0714276404106964e-05, + "loss": 0.6517, "step": 4863 }, { - "epoch": 0.3680526654307442, - "grad_norm": 2.3808276653289795, - "learning_rate": 1.3464194017092043e-05, - "loss": 0.8768, + "epoch": 0.3424146427314326, + "grad_norm": 1.7841194868087769, + "learning_rate": 1.0712923423352617e-05, + "loss": 0.7957, "step": 4864 }, { - "epoch": 0.36812833415307783, - "grad_norm": 3.0910837650299072, - "learning_rate": 1.3462238254208076e-05, - "loss": 0.7585, + "epoch": 0.3424850404787047, + "grad_norm": 1.9051513671875, + "learning_rate": 1.0711570249560017e-05, + "loss": 0.7348, "step": 4865 }, { - "epoch": 0.36820400287541144, - "grad_norm": 2.5641915798187256, - "learning_rate": 1.3460282211884317e-05, - "loss": 0.911, + "epoch": 0.3425554382259768, + "grad_norm": 2.899679183959961, + "learning_rate": 1.0710216882799522e-05, + "loss": 0.6897, "step": 4866 }, { - "epoch": 0.36827967159774505, - "grad_norm": 2.5306897163391113, - "learning_rate": 1.345832589024326e-05, - "loss": 0.7569, + "epoch": 0.34262583597324886, + "grad_norm": 2.1963939666748047, + "learning_rate": 1.0708863323141488e-05, + "loss": 0.6926, "step": 4867 }, { - "epoch": 0.3683553403200787, - "grad_norm": 2.912998914718628, - "learning_rate": 1.3456369289407418e-05, - "loss": 0.6358, + "epoch": 0.34269623372052094, + "grad_norm": 1.6498035192489624, + "learning_rate": 1.0707509570656291e-05, + "loss": 0.7366, "step": 4868 }, { - "epoch": 0.3684310090424123, - "grad_norm": 2.1079254150390625, - "learning_rate": 1.3454412409499314e-05, - "loss": 0.612, + "epoch": 0.342766631467793, + "grad_norm": 2.0913853645324707, + "learning_rate": 1.0706155625414315e-05, + "loss": 0.6354, "step": 4869 }, { - "epoch": 0.36850667776474594, - "grad_norm": 2.1984755992889404, - "learning_rate": 1.3452455250641498e-05, - "loss": 0.6124, + "epoch": 0.3428370292150651, + "grad_norm": 2.3661251068115234, + "learning_rate": 1.070480148748595e-05, + "loss": 0.831, "step": 4870 }, { - "epoch": 0.36858234648707955, - "grad_norm": 2.0500175952911377, - "learning_rate": 1.3450497812956535e-05, - "loss": 0.6647, + "epoch": 0.3429074269623372, + "grad_norm": 1.6716489791870117, + "learning_rate": 1.0703447156941602e-05, + "loss": 0.7617, "step": 4871 }, { - "epoch": 0.3686580152094132, - "grad_norm": 2.169865846633911, - "learning_rate": 1.3448540096567004e-05, - "loss": 0.6461, + "epoch": 0.3429778247096093, + "grad_norm": 2.1577699184417725, + "learning_rate": 1.0702092633851683e-05, + "loss": 0.8423, "step": 4872 }, { - "epoch": 0.3687336839317468, - "grad_norm": 4.670175075531006, - "learning_rate": 1.3446582101595503e-05, - "loss": 0.6869, + "epoch": 0.3430482224568814, + "grad_norm": 2.0851173400878906, + "learning_rate": 1.0700737918286613e-05, + "loss": 0.7607, "step": 4873 }, { - "epoch": 0.36880935265408044, - "grad_norm": 1.9263705015182495, - "learning_rate": 1.3444623828164646e-05, - "loss": 0.729, + "epoch": 0.34311862020415346, + "grad_norm": 1.9092919826507568, + "learning_rate": 1.0699383010316827e-05, + "loss": 0.6722, "step": 4874 }, { - "epoch": 0.36888502137641405, - "grad_norm": 2.334681510925293, - "learning_rate": 1.3442665276397076e-05, - "loss": 0.8118, + "epoch": 0.34318901795142553, + "grad_norm": 1.9479928016662598, + "learning_rate": 1.0698027910012768e-05, + "loss": 0.8096, "step": 4875 }, { - "epoch": 0.36896069009874766, - "grad_norm": 2.442364454269409, - "learning_rate": 1.3440706446415433e-05, - "loss": 0.7216, + "epoch": 0.34325941569869767, + "grad_norm": 2.2419822216033936, + "learning_rate": 1.0696672617444891e-05, + "loss": 0.6854, "step": 4876 }, { - "epoch": 0.3690363588210813, - "grad_norm": 3.1642048358917236, - "learning_rate": 1.3438747338342389e-05, - "loss": 0.7009, + "epoch": 0.34332981344596974, + "grad_norm": 1.786512851715088, + "learning_rate": 1.0695317132683653e-05, + "loss": 0.7664, "step": 4877 }, { - "epoch": 0.36911202754341493, - "grad_norm": 2.8377344608306885, - "learning_rate": 1.3436787952300629e-05, - "loss": 0.758, + "epoch": 0.3434002111932418, + "grad_norm": 1.9368327856063843, + "learning_rate": 1.0693961455799533e-05, + "loss": 0.6852, "step": 4878 }, { - "epoch": 0.36918769626574854, - "grad_norm": 2.899456262588501, - "learning_rate": 1.3434828288412859e-05, - "loss": 0.6575, + "epoch": 0.3434706089405139, + "grad_norm": 1.7960407733917236, + "learning_rate": 1.0692605586863012e-05, + "loss": 0.6107, "step": 4879 }, { - "epoch": 0.36926336498808215, - "grad_norm": 2.240098476409912, - "learning_rate": 1.34328683468018e-05, - "loss": 0.7384, + "epoch": 0.343541006687786, + "grad_norm": 2.271759510040283, + "learning_rate": 1.0691249525944583e-05, + "loss": 0.578, "step": 4880 }, { - "epoch": 0.3693390337104158, - "grad_norm": 2.007436513900757, - "learning_rate": 1.3430908127590185e-05, - "loss": 0.757, + "epoch": 0.34361140443505805, + "grad_norm": 1.707755446434021, + "learning_rate": 1.0689893273114745e-05, + "loss": 0.7418, "step": 4881 }, { - "epoch": 0.36941470243274943, - "grad_norm": 1.9840151071548462, - "learning_rate": 1.342894763090077e-05, - "loss": 0.6856, + "epoch": 0.3436818021823302, + "grad_norm": 2.61665940284729, + "learning_rate": 1.0688536828444016e-05, + "loss": 0.6586, "step": 4882 }, { - "epoch": 0.36949037115508304, - "grad_norm": 2.434241771697998, - "learning_rate": 1.3426986856856331e-05, - "loss": 0.8133, + "epoch": 0.34375219992960226, + "grad_norm": 1.9910961389541626, + "learning_rate": 1.0687180192002915e-05, + "loss": 0.7349, "step": 4883 }, { - "epoch": 0.36956603987741665, - "grad_norm": 2.2398934364318848, - "learning_rate": 1.3425025805579656e-05, - "loss": 0.7309, + "epoch": 0.34382259767687434, + "grad_norm": 1.9239166975021362, + "learning_rate": 1.068582336386198e-05, + "loss": 0.6737, "step": 4884 }, { - "epoch": 0.3696417085997503, - "grad_norm": 1.9073582887649536, - "learning_rate": 1.3423064477193551e-05, - "loss": 0.8249, + "epoch": 0.3438929954241464, + "grad_norm": 1.738805890083313, + "learning_rate": 1.0684466344091747e-05, + "loss": 0.6565, "step": 4885 }, { - "epoch": 0.36971737732208393, - "grad_norm": 2.0606343746185303, - "learning_rate": 1.3421102871820848e-05, - "loss": 0.7246, + "epoch": 0.3439633931714185, + "grad_norm": 1.8537685871124268, + "learning_rate": 1.0683109132762774e-05, + "loss": 0.6585, "step": 4886 }, { - "epoch": 0.36979304604441754, - "grad_norm": 2.2094714641571045, - "learning_rate": 1.341914098958438e-05, - "loss": 0.7137, + "epoch": 0.34403379091869063, + "grad_norm": 1.688008427619934, + "learning_rate": 1.0681751729945622e-05, + "loss": 0.6946, "step": 4887 }, { - "epoch": 0.36986871476675115, - "grad_norm": 2.191936731338501, - "learning_rate": 1.341717883060701e-05, - "loss": 0.6196, + "epoch": 0.3441041886659627, + "grad_norm": 1.916975498199463, + "learning_rate": 1.068039413571086e-05, + "loss": 0.7278, "step": 4888 }, { - "epoch": 0.36994438348908476, - "grad_norm": 1.9013859033584595, - "learning_rate": 1.3415216395011615e-05, - "loss": 0.7521, + "epoch": 0.3441745864132348, + "grad_norm": 2.190035820007324, + "learning_rate": 1.0679036350129076e-05, + "loss": 0.7539, "step": 4889 }, { - "epoch": 0.3700200522114184, - "grad_norm": 2.026242733001709, - "learning_rate": 1.3413253682921088e-05, - "loss": 0.5991, + "epoch": 0.34424498416050686, + "grad_norm": 2.2998874187469482, + "learning_rate": 1.067767837327086e-05, + "loss": 0.7841, "step": 4890 }, { - "epoch": 0.37009572093375204, - "grad_norm": 1.986952304840088, - "learning_rate": 1.3411290694458343e-05, - "loss": 0.6441, + "epoch": 0.34431538190777894, + "grad_norm": 2.3517253398895264, + "learning_rate": 1.0676320205206814e-05, + "loss": 0.7453, "step": 4891 }, { - "epoch": 0.37017138965608565, - "grad_norm": 1.9378926753997803, - "learning_rate": 1.3409327429746304e-05, - "loss": 0.7499, + "epoch": 0.344385779655051, + "grad_norm": 1.7720485925674438, + "learning_rate": 1.0674961846007551e-05, + "loss": 0.6817, "step": 4892 }, { - "epoch": 0.37024705837841926, - "grad_norm": 2.1102092266082764, - "learning_rate": 1.3407363888907925e-05, - "loss": 0.7828, + "epoch": 0.34445617740232315, + "grad_norm": 2.3588690757751465, + "learning_rate": 1.0673603295743695e-05, + "loss": 0.652, "step": 4893 }, { - "epoch": 0.3703227271007529, - "grad_norm": 1.809720516204834, - "learning_rate": 1.340540007206616e-05, - "loss": 0.7066, + "epoch": 0.3445265751495952, + "grad_norm": 2.088320732116699, + "learning_rate": 1.0672244554485876e-05, + "loss": 0.6081, "step": 4894 }, { - "epoch": 0.37039839582308653, - "grad_norm": 2.9328057765960693, - "learning_rate": 1.3403435979343995e-05, - "loss": 0.8314, + "epoch": 0.3445969728968673, + "grad_norm": 1.9637843370437622, + "learning_rate": 1.0670885622304736e-05, + "loss": 0.8192, "step": 4895 }, { - "epoch": 0.37047406454542015, - "grad_norm": 1.9853700399398804, - "learning_rate": 1.3401471610864426e-05, - "loss": 0.8033, + "epoch": 0.3446673706441394, + "grad_norm": 2.035407781600952, + "learning_rate": 1.066952649927093e-05, + "loss": 0.662, "step": 4896 }, { - "epoch": 0.37054973326775376, - "grad_norm": 2.24923038482666, - "learning_rate": 1.3399506966750466e-05, - "loss": 0.779, + "epoch": 0.34473776839141146, + "grad_norm": 1.8977701663970947, + "learning_rate": 1.0668167185455115e-05, + "loss": 0.7638, "step": 4897 }, { - "epoch": 0.3706254019900874, - "grad_norm": 2.0755980014801025, - "learning_rate": 1.3397542047125156e-05, - "loss": 0.6533, + "epoch": 0.34480816613868354, + "grad_norm": 1.8363027572631836, + "learning_rate": 1.0666807680927967e-05, + "loss": 0.7164, "step": 4898 }, { - "epoch": 0.37070107071242103, - "grad_norm": 1.9873055219650269, - "learning_rate": 1.3395576852111535e-05, - "loss": 0.9006, + "epoch": 0.34487856388595567, + "grad_norm": 1.9792706966400146, + "learning_rate": 1.0665447985760167e-05, + "loss": 0.7379, "step": 4899 }, { - "epoch": 0.37077673943475464, - "grad_norm": 1.7531121969223022, - "learning_rate": 1.3393611381832675e-05, - "loss": 0.7746, + "epoch": 0.34494896163322775, + "grad_norm": 1.6596431732177734, + "learning_rate": 1.0664088100022406e-05, + "loss": 0.6946, "step": 4900 }, { - "epoch": 0.37085240815708825, - "grad_norm": 2.0152571201324463, - "learning_rate": 1.3391645636411661e-05, - "loss": 0.6594, + "epoch": 0.3450193593804998, + "grad_norm": 1.683342456817627, + "learning_rate": 1.0662728023785387e-05, + "loss": 0.7628, "step": 4901 }, { - "epoch": 0.37092807687942186, - "grad_norm": 1.8413496017456055, - "learning_rate": 1.3389679615971593e-05, - "loss": 0.793, + "epoch": 0.3450897571277719, + "grad_norm": 1.6519110202789307, + "learning_rate": 1.0661367757119821e-05, + "loss": 0.7112, "step": 4902 }, { - "epoch": 0.37100374560175553, - "grad_norm": 2.1065332889556885, - "learning_rate": 1.338771332063559e-05, - "loss": 0.7577, + "epoch": 0.345160154875044, + "grad_norm": 2.0822367668151855, + "learning_rate": 1.066000730009643e-05, + "loss": 0.7058, "step": 4903 }, { - "epoch": 0.37107941432408914, - "grad_norm": 2.4198694229125977, - "learning_rate": 1.3385746750526784e-05, - "loss": 0.7809, + "epoch": 0.3452305526223161, + "grad_norm": 1.768386960029602, + "learning_rate": 1.0658646652785943e-05, + "loss": 0.5979, "step": 4904 }, { - "epoch": 0.37115508304642275, - "grad_norm": 2.0909576416015625, - "learning_rate": 1.3383779905768336e-05, - "loss": 0.6867, + "epoch": 0.3453009503695882, + "grad_norm": 2.0480387210845947, + "learning_rate": 1.0657285815259104e-05, + "loss": 0.6742, "step": 4905 }, { - "epoch": 0.37123075176875636, - "grad_norm": 2.1321213245391846, - "learning_rate": 1.3381812786483408e-05, - "loss": 0.7633, + "epoch": 0.34537134811686027, + "grad_norm": 1.8383363485336304, + "learning_rate": 1.0655924787586664e-05, + "loss": 0.715, "step": 4906 }, { - "epoch": 0.37130642049109003, - "grad_norm": 3.150540351867676, - "learning_rate": 1.3379845392795192e-05, - "loss": 0.7857, + "epoch": 0.34544174586413234, + "grad_norm": 1.8986897468566895, + "learning_rate": 1.0654563569839382e-05, + "loss": 0.7237, "step": 4907 }, { - "epoch": 0.37138208921342364, - "grad_norm": 2.434208631515503, - "learning_rate": 1.337787772482689e-05, - "loss": 0.8088, + "epoch": 0.3455121436114044, + "grad_norm": 1.5691616535186768, + "learning_rate": 1.0653202162088032e-05, + "loss": 0.6887, "step": 4908 }, { - "epoch": 0.37145775793575725, - "grad_norm": 3.1003522872924805, - "learning_rate": 1.3375909782701728e-05, - "loss": 0.775, + "epoch": 0.3455825413586765, + "grad_norm": 1.5873316526412964, + "learning_rate": 1.0651840564403392e-05, + "loss": 0.659, "step": 4909 }, { - "epoch": 0.37153342665809086, - "grad_norm": 2.1573758125305176, - "learning_rate": 1.337394156654294e-05, - "loss": 0.6811, + "epoch": 0.34565293910594863, + "grad_norm": 1.8403667211532593, + "learning_rate": 1.0650478776856253e-05, + "loss": 0.6835, "step": 4910 }, { - "epoch": 0.3716090953804245, - "grad_norm": 2.903069019317627, - "learning_rate": 1.3371973076473783e-05, - "loss": 0.6592, + "epoch": 0.3457233368532207, + "grad_norm": 2.116297483444214, + "learning_rate": 1.0649116799517418e-05, + "loss": 0.6925, "step": 4911 }, { - "epoch": 0.37168476410275814, - "grad_norm": 2.0902886390686035, - "learning_rate": 1.3370004312617533e-05, - "loss": 0.772, + "epoch": 0.3457937346004928, + "grad_norm": 1.8498096466064453, + "learning_rate": 1.0647754632457693e-05, + "loss": 0.7973, "step": 4912 }, { - "epoch": 0.37176043282509175, - "grad_norm": 2.2064809799194336, - "learning_rate": 1.3368035275097477e-05, - "loss": 0.6503, + "epoch": 0.34586413234776486, + "grad_norm": 1.6956381797790527, + "learning_rate": 1.0646392275747903e-05, + "loss": 0.6967, "step": 4913 }, { - "epoch": 0.37183610154742536, - "grad_norm": 2.047928810119629, - "learning_rate": 1.3366065964036927e-05, - "loss": 0.7396, + "epoch": 0.34593453009503694, + "grad_norm": 1.7659337520599365, + "learning_rate": 1.0645029729458879e-05, + "loss": 0.7188, "step": 4914 }, { - "epoch": 0.37191177026975897, - "grad_norm": 2.1945924758911133, - "learning_rate": 1.3364096379559203e-05, - "loss": 0.757, + "epoch": 0.3460049278423091, + "grad_norm": 1.6292036771774292, + "learning_rate": 1.0643666993661455e-05, + "loss": 0.6381, "step": 4915 }, { - "epoch": 0.37198743899209263, - "grad_norm": 2.299427032470703, - "learning_rate": 1.3362126521787649e-05, - "loss": 0.8464, + "epoch": 0.34607532558958115, + "grad_norm": 2.2593860626220703, + "learning_rate": 1.0642304068426482e-05, + "loss": 0.6273, "step": 4916 }, { - "epoch": 0.37206310771442624, - "grad_norm": 2.420886278152466, - "learning_rate": 1.3360156390845623e-05, - "loss": 0.7771, + "epoch": 0.34614572333685323, + "grad_norm": 1.627012848854065, + "learning_rate": 1.0640940953824824e-05, + "loss": 0.7116, "step": 4917 }, { - "epoch": 0.37213877643675985, - "grad_norm": 2.385572910308838, - "learning_rate": 1.33581859868565e-05, - "loss": 0.7385, + "epoch": 0.3462161210841253, + "grad_norm": 2.450904130935669, + "learning_rate": 1.0639577649927348e-05, + "loss": 0.8027, "step": 4918 }, { - "epoch": 0.37221444515909347, - "grad_norm": 2.7393147945404053, - "learning_rate": 1.3356215309943676e-05, - "loss": 0.8137, + "epoch": 0.3462865188313974, + "grad_norm": 1.6138125658035278, + "learning_rate": 1.0638214156804936e-05, + "loss": 0.7456, "step": 4919 }, { - "epoch": 0.37229011388142713, - "grad_norm": 1.9456791877746582, - "learning_rate": 1.3354244360230558e-05, - "loss": 0.783, + "epoch": 0.34635691657866946, + "grad_norm": 2.1558499336242676, + "learning_rate": 1.0636850474528473e-05, + "loss": 0.7141, "step": 4920 }, { - "epoch": 0.37236578260376074, - "grad_norm": 1.7846276760101318, - "learning_rate": 1.3352273137840579e-05, - "loss": 0.5752, + "epoch": 0.3464273143259416, + "grad_norm": 1.6468205451965332, + "learning_rate": 1.063548660316886e-05, + "loss": 0.7062, "step": 4921 }, { - "epoch": 0.37244145132609435, - "grad_norm": 2.710305690765381, - "learning_rate": 1.3350301642897174e-05, - "loss": 0.6808, + "epoch": 0.34649771207321367, + "grad_norm": 1.6083965301513672, + "learning_rate": 1.0634122542797006e-05, + "loss": 0.6746, "step": 4922 }, { - "epoch": 0.37251712004842796, - "grad_norm": 2.421003818511963, - "learning_rate": 1.3348329875523812e-05, - "loss": 0.7287, + "epoch": 0.34656810982048575, + "grad_norm": 1.7419465780258179, + "learning_rate": 1.0632758293483832e-05, + "loss": 0.7791, "step": 4923 }, { - "epoch": 0.37259278877076163, - "grad_norm": 2.223174810409546, - "learning_rate": 1.3346357835843968e-05, - "loss": 0.6746, + "epoch": 0.3466385075677578, + "grad_norm": 1.727423906326294, + "learning_rate": 1.0631393855300262e-05, + "loss": 0.742, "step": 4924 }, { - "epoch": 0.37266845749309524, - "grad_norm": 2.102065086364746, - "learning_rate": 1.334438552398114e-05, - "loss": 0.6813, + "epoch": 0.3467089053150299, + "grad_norm": 2.2555882930755615, + "learning_rate": 1.0630029228317237e-05, + "loss": 0.6656, "step": 4925 }, { - "epoch": 0.37274412621542885, - "grad_norm": 2.145731210708618, - "learning_rate": 1.334241294005884e-05, - "loss": 0.7174, + "epoch": 0.346779303062302, + "grad_norm": 1.9159454107284546, + "learning_rate": 1.0628664412605707e-05, + "loss": 0.5121, "step": 4926 }, { - "epoch": 0.37281979493776246, - "grad_norm": 2.4170005321502686, - "learning_rate": 1.3340440084200594e-05, - "loss": 0.7821, + "epoch": 0.3468497008095741, + "grad_norm": 2.5646796226501465, + "learning_rate": 1.0627299408236624e-05, + "loss": 0.8422, "step": 4927 }, { - "epoch": 0.3728954636600961, - "grad_norm": 2.3091304302215576, - "learning_rate": 1.3338466956529953e-05, - "loss": 0.6898, + "epoch": 0.3469200985568462, + "grad_norm": 1.8121049404144287, + "learning_rate": 1.062593421528096e-05, + "loss": 0.7391, "step": 4928 }, { - "epoch": 0.37297113238242974, - "grad_norm": 2.37115216255188, - "learning_rate": 1.3336493557170476e-05, - "loss": 0.6841, + "epoch": 0.34699049630411827, + "grad_norm": 1.7120318412780762, + "learning_rate": 1.0624568833809695e-05, + "loss": 0.6719, "step": 4929 }, { - "epoch": 0.37304680110476335, - "grad_norm": 2.1162335872650146, - "learning_rate": 1.3334519886245749e-05, - "loss": 0.5999, + "epoch": 0.34706089405139035, + "grad_norm": 1.960915446281433, + "learning_rate": 1.0623203263893813e-05, + "loss": 0.7072, "step": 4930 }, { - "epoch": 0.37312246982709696, - "grad_norm": 1.9648668766021729, - "learning_rate": 1.3332545943879367e-05, - "loss": 0.7191, + "epoch": 0.3471312917986624, + "grad_norm": 1.8869869709014893, + "learning_rate": 1.0621837505604312e-05, + "loss": 0.6857, "step": 4931 }, { - "epoch": 0.37319813854943057, - "grad_norm": 2.149312973022461, - "learning_rate": 1.3330571730194945e-05, - "loss": 0.6175, + "epoch": 0.34720168954593456, + "grad_norm": 1.7891632318496704, + "learning_rate": 1.0620471559012197e-05, + "loss": 0.7739, "step": 4932 }, { - "epoch": 0.37327380727176424, - "grad_norm": 2.0248279571533203, - "learning_rate": 1.3328597245316115e-05, - "loss": 0.6749, + "epoch": 0.34727208729320663, + "grad_norm": 1.6827057600021362, + "learning_rate": 1.0619105424188488e-05, + "loss": 0.7303, "step": 4933 }, { - "epoch": 0.37334947599409785, - "grad_norm": 2.0984058380126953, - "learning_rate": 1.3326622489366525e-05, - "loss": 0.6864, + "epoch": 0.3473424850404787, + "grad_norm": 2.2321674823760986, + "learning_rate": 1.0617739101204213e-05, + "loss": 0.7537, "step": 4934 }, { - "epoch": 0.37342514471643146, - "grad_norm": 1.864095687866211, - "learning_rate": 1.3324647462469841e-05, - "loss": 0.8771, + "epoch": 0.3474128827877508, + "grad_norm": 2.361264705657959, + "learning_rate": 1.0616372590130403e-05, + "loss": 0.722, "step": 4935 }, { - "epoch": 0.37350081343876507, - "grad_norm": 2.171860933303833, - "learning_rate": 1.3322672164749742e-05, - "loss": 0.6689, + "epoch": 0.34748328053502286, + "grad_norm": 2.1654272079467773, + "learning_rate": 1.0615005891038106e-05, + "loss": 0.7622, "step": 4936 }, { - "epoch": 0.37357648216109873, - "grad_norm": 2.0032029151916504, - "learning_rate": 1.3320696596329935e-05, - "loss": 0.7789, + "epoch": 0.34755367828229494, + "grad_norm": 1.8957561254501343, + "learning_rate": 1.0613639003998378e-05, + "loss": 0.7003, "step": 4937 }, { - "epoch": 0.37365215088343234, - "grad_norm": 1.7304359674453735, - "learning_rate": 1.3318720757334126e-05, - "loss": 0.5789, + "epoch": 0.3476240760295671, + "grad_norm": 1.6441677808761597, + "learning_rate": 1.0612271929082289e-05, + "loss": 0.6332, "step": 4938 }, { - "epoch": 0.37372781960576595, - "grad_norm": 2.128831624984741, - "learning_rate": 1.3316744647886063e-05, - "loss": 0.7089, + "epoch": 0.34769447377683915, + "grad_norm": 2.2181944847106934, + "learning_rate": 1.061090466636091e-05, + "loss": 0.7774, "step": 4939 }, { - "epoch": 0.37380348832809956, - "grad_norm": 2.6077706813812256, - "learning_rate": 1.3314768268109483e-05, - "loss": 0.6515, + "epoch": 0.34776487152411123, + "grad_norm": 1.6139205694198608, + "learning_rate": 1.0609537215905326e-05, + "loss": 0.6987, "step": 4940 }, { - "epoch": 0.37387915705043323, - "grad_norm": 2.1214993000030518, - "learning_rate": 1.3312791618128161e-05, - "loss": 0.689, + "epoch": 0.3478352692713833, + "grad_norm": 2.062990427017212, + "learning_rate": 1.0608169577786633e-05, + "loss": 0.7359, "step": 4941 }, { - "epoch": 0.37395482577276684, - "grad_norm": 2.303440809249878, - "learning_rate": 1.3310814698065876e-05, - "loss": 0.8364, + "epoch": 0.3479056670186554, + "grad_norm": 2.0108628273010254, + "learning_rate": 1.0606801752075937e-05, + "loss": 0.6667, "step": 4942 }, { - "epoch": 0.37403049449510045, - "grad_norm": 2.154649496078491, - "learning_rate": 1.3308837508046431e-05, - "loss": 0.7713, + "epoch": 0.3479760647659275, + "grad_norm": 1.9303642511367798, + "learning_rate": 1.0605433738844352e-05, + "loss": 0.8116, "step": 4943 }, { - "epoch": 0.37410616321743406, - "grad_norm": 1.6795384883880615, - "learning_rate": 1.3306860048193649e-05, - "loss": 0.5444, + "epoch": 0.3480464625131996, + "grad_norm": 2.0251526832580566, + "learning_rate": 1.0604065538163001e-05, + "loss": 0.6202, "step": 4944 }, { - "epoch": 0.3741818319397677, - "grad_norm": 1.9569112062454224, - "learning_rate": 1.3304882318631358e-05, - "loss": 0.7279, + "epoch": 0.34811686026047167, + "grad_norm": 1.7679443359375, + "learning_rate": 1.0602697150103018e-05, + "loss": 0.7028, "step": 4945 }, { - "epoch": 0.37425750066210134, - "grad_norm": 2.33054518699646, - "learning_rate": 1.3302904319483413e-05, - "loss": 0.8949, + "epoch": 0.34818725800774375, + "grad_norm": 1.6642388105392456, + "learning_rate": 1.0601328574735546e-05, + "loss": 0.7416, "step": 4946 }, { - "epoch": 0.37433316938443495, - "grad_norm": 2.1638824939727783, - "learning_rate": 1.3300926050873681e-05, - "loss": 0.6947, + "epoch": 0.3482576557550158, + "grad_norm": 2.2769105434417725, + "learning_rate": 1.0599959812131744e-05, + "loss": 0.7309, "step": 4947 }, { - "epoch": 0.37440883810676856, - "grad_norm": 2.491903066635132, - "learning_rate": 1.3298947512926052e-05, - "loss": 0.7139, + "epoch": 0.3483280535022879, + "grad_norm": 1.6075729131698608, + "learning_rate": 1.0598590862362766e-05, + "loss": 0.7231, "step": 4948 }, { - "epoch": 0.37448450682910217, - "grad_norm": 3.184447765350342, - "learning_rate": 1.3296968705764422e-05, - "loss": 0.9073, + "epoch": 0.34839845124956004, + "grad_norm": 1.7963075637817383, + "learning_rate": 1.0597221725499793e-05, + "loss": 0.6151, "step": 4949 }, { - "epoch": 0.37456017555143584, - "grad_norm": 2.1764798164367676, - "learning_rate": 1.3294989629512715e-05, - "loss": 0.8647, + "epoch": 0.3484688489968321, + "grad_norm": 1.928511619567871, + "learning_rate": 1.0595852401614006e-05, + "loss": 0.679, "step": 4950 }, { - "epoch": 0.37463584427376945, - "grad_norm": 2.0613832473754883, - "learning_rate": 1.3293010284294867e-05, - "loss": 0.7197, + "epoch": 0.3485392467441042, + "grad_norm": 1.6049004793167114, + "learning_rate": 1.0594482890776594e-05, + "loss": 0.6607, "step": 4951 }, { - "epoch": 0.37471151299610306, - "grad_norm": 2.1119112968444824, - "learning_rate": 1.3291030670234827e-05, - "loss": 0.7002, + "epoch": 0.34860964449137627, + "grad_norm": 2.041398525238037, + "learning_rate": 1.0593113193058762e-05, + "loss": 0.7593, "step": 4952 }, { - "epoch": 0.37478718171843667, - "grad_norm": 2.2408530712127686, - "learning_rate": 1.328905078745657e-05, - "loss": 0.6455, + "epoch": 0.34868004223864835, + "grad_norm": 1.809326171875, + "learning_rate": 1.059174330853172e-05, + "loss": 0.6516, "step": 4953 }, { - "epoch": 0.37486285044077033, - "grad_norm": 2.5578296184539795, - "learning_rate": 1.3287070636084077e-05, - "loss": 0.7374, + "epoch": 0.3487504399859204, + "grad_norm": 1.8985366821289062, + "learning_rate": 1.059037323726669e-05, + "loss": 0.7381, "step": 4954 }, { - "epoch": 0.37493851916310394, - "grad_norm": 2.552988052368164, - "learning_rate": 1.3285090216241359e-05, - "loss": 0.7484, + "epoch": 0.34882083773319256, + "grad_norm": 1.9927890300750732, + "learning_rate": 1.0589002979334905e-05, + "loss": 0.6697, "step": 4955 }, { - "epoch": 0.37501418788543756, - "grad_norm": 2.490983724594116, - "learning_rate": 1.328310952805243e-05, - "loss": 0.8299, + "epoch": 0.34889123548046463, + "grad_norm": 1.905551552772522, + "learning_rate": 1.0587632534807605e-05, + "loss": 0.6098, "step": 4956 }, { - "epoch": 0.37508985660777117, - "grad_norm": 2.3703956604003906, - "learning_rate": 1.3281128571641329e-05, - "loss": 0.6885, + "epoch": 0.3489616332277367, + "grad_norm": 1.8438937664031982, + "learning_rate": 1.0586261903756039e-05, + "loss": 0.7986, "step": 4957 }, { - "epoch": 0.3751655253301048, - "grad_norm": 2.2803077697753906, - "learning_rate": 1.3279147347132111e-05, - "loss": 0.6471, + "epoch": 0.3490320309750088, + "grad_norm": 1.3952146768569946, + "learning_rate": 1.0584891086251469e-05, + "loss": 0.7811, "step": 4958 }, { - "epoch": 0.37524119405243844, - "grad_norm": 2.4633090496063232, - "learning_rate": 1.327716585464884e-05, - "loss": 0.7724, + "epoch": 0.34910242872228087, + "grad_norm": 2.032818555831909, + "learning_rate": 1.0583520082365166e-05, + "loss": 0.5857, "step": 4959 }, { - "epoch": 0.37531686277477205, - "grad_norm": 1.877164363861084, - "learning_rate": 1.3275184094315617e-05, - "loss": 0.8416, + "epoch": 0.349172826469553, + "grad_norm": 1.8293943405151367, + "learning_rate": 1.0582148892168407e-05, + "loss": 0.8511, "step": 4960 }, { - "epoch": 0.37539253149710566, - "grad_norm": 2.59460711479187, - "learning_rate": 1.3273202066256534e-05, - "loss": 0.8155, + "epoch": 0.3492432242168251, + "grad_norm": 1.8918215036392212, + "learning_rate": 1.0580777515732484e-05, + "loss": 0.6511, "step": 4961 }, { - "epoch": 0.3754682002194393, - "grad_norm": 1.790168285369873, - "learning_rate": 1.3271219770595716e-05, - "loss": 0.7016, + "epoch": 0.34931362196409715, + "grad_norm": 2.049757480621338, + "learning_rate": 1.0579405953128694e-05, + "loss": 0.8176, "step": 4962 }, { - "epoch": 0.37554386894177294, - "grad_norm": 2.1875531673431396, - "learning_rate": 1.3269237207457305e-05, - "loss": 0.7702, + "epoch": 0.34938401971136923, + "grad_norm": 1.7688411474227905, + "learning_rate": 1.057803420442835e-05, + "loss": 0.6658, "step": 4963 }, { - "epoch": 0.37561953766410655, - "grad_norm": 1.8856444358825684, - "learning_rate": 1.3267254376965449e-05, - "loss": 0.7305, + "epoch": 0.3494544174586413, + "grad_norm": 1.9594130516052246, + "learning_rate": 1.0576662269702762e-05, + "loss": 0.7598, "step": 4964 }, { - "epoch": 0.37569520638644016, - "grad_norm": 2.1934876441955566, - "learning_rate": 1.3265271279244324e-05, - "loss": 0.7793, + "epoch": 0.3495248152059134, + "grad_norm": 1.6519526243209839, + "learning_rate": 1.0575290149023268e-05, + "loss": 0.7932, "step": 4965 }, { - "epoch": 0.37577087510877377, - "grad_norm": 1.9856321811676025, - "learning_rate": 1.3263287914418111e-05, - "loss": 0.7695, + "epoch": 0.3495952129531855, + "grad_norm": 1.974048376083374, + "learning_rate": 1.0573917842461197e-05, + "loss": 0.8494, "step": 4966 }, { - "epoch": 0.37584654383110744, - "grad_norm": 2.125422239303589, - "learning_rate": 1.3261304282611025e-05, - "loss": 0.9119, + "epoch": 0.3496656107004576, + "grad_norm": 1.7019762992858887, + "learning_rate": 1.0572545350087903e-05, + "loss": 0.69, "step": 4967 }, { - "epoch": 0.37592221255344105, - "grad_norm": 2.8048200607299805, - "learning_rate": 1.3259320383947279e-05, - "loss": 0.7251, + "epoch": 0.3497360084477297, + "grad_norm": 1.7606393098831177, + "learning_rate": 1.0571172671974742e-05, + "loss": 0.6524, "step": 4968 }, { - "epoch": 0.37599788127577466, - "grad_norm": 2.0084269046783447, - "learning_rate": 1.3257336218551115e-05, - "loss": 0.6035, + "epoch": 0.34980640619500175, + "grad_norm": 2.074169635772705, + "learning_rate": 1.0569799808193077e-05, + "loss": 0.7006, "step": 4969 }, { - "epoch": 0.37607354999810827, - "grad_norm": 2.4063563346862793, - "learning_rate": 1.3255351786546786e-05, - "loss": 0.7344, + "epoch": 0.34987680394227383, + "grad_norm": 2.073413372039795, + "learning_rate": 1.0568426758814288e-05, + "loss": 0.6021, "step": 4970 }, { - "epoch": 0.3761492187204419, - "grad_norm": 2.7517759799957275, - "learning_rate": 1.3253367088058567e-05, - "loss": 0.7425, + "epoch": 0.34994720168954596, + "grad_norm": 1.7236545085906982, + "learning_rate": 1.0567053523909762e-05, + "loss": 0.7303, "step": 4971 }, { - "epoch": 0.37622488744277555, - "grad_norm": 2.8988542556762695, - "learning_rate": 1.3251382123210743e-05, - "loss": 0.7319, + "epoch": 0.35001759943681804, + "grad_norm": 1.7342137098312378, + "learning_rate": 1.056568010355089e-05, + "loss": 0.5931, "step": 4972 }, { - "epoch": 0.37630055616510916, - "grad_norm": 2.421642541885376, - "learning_rate": 1.324939689212762e-05, - "loss": 0.888, + "epoch": 0.3500879971840901, + "grad_norm": 1.5078591108322144, + "learning_rate": 1.056430649780908e-05, + "loss": 0.7171, "step": 4973 }, { - "epoch": 0.37637622488744277, - "grad_norm": 2.56626558303833, - "learning_rate": 1.324741139493352e-05, - "loss": 0.654, + "epoch": 0.3501583949313622, + "grad_norm": 1.891093134880066, + "learning_rate": 1.0562932706755749e-05, + "loss": 0.6718, "step": 4974 }, { - "epoch": 0.3764518936097764, - "grad_norm": 1.87079918384552, - "learning_rate": 1.3245425631752777e-05, - "loss": 0.7278, + "epoch": 0.35022879267863427, + "grad_norm": 1.6503496170043945, + "learning_rate": 1.056155873046232e-05, + "loss": 0.7106, "step": 4975 }, { - "epoch": 0.37652756233211004, - "grad_norm": 2.264610767364502, - "learning_rate": 1.3243439602709754e-05, - "loss": 0.5887, + "epoch": 0.35029919042590635, + "grad_norm": 1.7664936780929565, + "learning_rate": 1.0560184569000224e-05, + "loss": 0.6401, "step": 4976 }, { - "epoch": 0.37660323105444365, - "grad_norm": 2.095689058303833, - "learning_rate": 1.3241453307928816e-05, - "loss": 0.7291, + "epoch": 0.3503695881731785, + "grad_norm": 1.81720769405365, + "learning_rate": 1.0558810222440913e-05, + "loss": 0.7601, "step": 4977 }, { - "epoch": 0.37667889977677727, - "grad_norm": 1.5480690002441406, - "learning_rate": 1.3239466747534355e-05, - "loss": 0.6431, + "epoch": 0.35043998592045056, + "grad_norm": 1.6572479009628296, + "learning_rate": 1.055743569085583e-05, + "loss": 0.7439, "step": 4978 }, { - "epoch": 0.3767545684991109, - "grad_norm": 2.456465721130371, - "learning_rate": 1.3237479921650772e-05, - "loss": 0.6346, + "epoch": 0.35051038366772264, + "grad_norm": 1.8270928859710693, + "learning_rate": 1.0556060974316449e-05, + "loss": 0.6478, "step": 4979 }, { - "epoch": 0.37683023722144454, - "grad_norm": 1.8623820543289185, - "learning_rate": 1.323549283040249e-05, - "loss": 0.7686, + "epoch": 0.3505807814149947, + "grad_norm": 1.877339243888855, + "learning_rate": 1.0554686072894233e-05, + "loss": 0.7492, "step": 4980 }, { - "epoch": 0.37690590594377815, - "grad_norm": 3.514700174331665, - "learning_rate": 1.3233505473913951e-05, - "loss": 0.6733, + "epoch": 0.3506511791622668, + "grad_norm": 1.868634581565857, + "learning_rate": 1.0553310986660669e-05, + "loss": 0.657, "step": 4981 }, { - "epoch": 0.37698157466611176, - "grad_norm": 2.421954393386841, - "learning_rate": 1.3231517852309602e-05, - "loss": 0.6811, + "epoch": 0.35072157690953887, + "grad_norm": 1.8079792261123657, + "learning_rate": 1.0551935715687252e-05, + "loss": 0.6569, "step": 4982 }, { - "epoch": 0.3770572433884454, - "grad_norm": 3.265939950942993, - "learning_rate": 1.3229529965713925e-05, - "loss": 0.8858, + "epoch": 0.350791974656811, + "grad_norm": 1.6973915100097656, + "learning_rate": 1.0550560260045477e-05, + "loss": 0.7342, "step": 4983 }, { - "epoch": 0.377132912110779, - "grad_norm": 2.31626558303833, - "learning_rate": 1.3227541814251395e-05, - "loss": 0.8614, + "epoch": 0.3508623724040831, + "grad_norm": 1.9286606311798096, + "learning_rate": 1.054918461980686e-05, + "loss": 0.6673, "step": 4984 }, { - "epoch": 0.37720858083311265, - "grad_norm": 1.9577152729034424, - "learning_rate": 1.3225553398046527e-05, - "loss": 0.7351, + "epoch": 0.35093277015135516, + "grad_norm": 1.8243244886398315, + "learning_rate": 1.0547808795042922e-05, + "loss": 0.6629, "step": 4985 }, { - "epoch": 0.37728424955544626, - "grad_norm": 1.793585181236267, - "learning_rate": 1.3223564717223837e-05, - "loss": 0.7094, + "epoch": 0.35100316789862723, + "grad_norm": 2.1092708110809326, + "learning_rate": 1.0546432785825188e-05, + "loss": 0.7852, "step": 4986 }, { - "epoch": 0.37735991827777987, - "grad_norm": 2.228999137878418, - "learning_rate": 1.3221575771907864e-05, - "loss": 0.6827, + "epoch": 0.3510735656458993, + "grad_norm": 2.079528331756592, + "learning_rate": 1.0545056592225204e-05, + "loss": 0.7063, "step": 4987 }, { - "epoch": 0.3774355870001135, - "grad_norm": 2.4481256008148193, - "learning_rate": 1.321958656222316e-05, - "loss": 0.7283, + "epoch": 0.35114396339317144, + "grad_norm": 1.8954263925552368, + "learning_rate": 1.0543680214314517e-05, + "loss": 0.8622, "step": 4988 }, { - "epoch": 0.37751125572244715, - "grad_norm": 2.0559909343719482, - "learning_rate": 1.32175970882943e-05, - "loss": 0.6605, + "epoch": 0.3512143611404435, + "grad_norm": 2.7599573135375977, + "learning_rate": 1.0542303652164687e-05, + "loss": 0.7335, "step": 4989 }, { - "epoch": 0.37758692444478076, - "grad_norm": 2.7114646434783936, - "learning_rate": 1.3215607350245869e-05, - "loss": 0.7703, + "epoch": 0.3512847588877156, + "grad_norm": 1.9689878225326538, + "learning_rate": 1.054092690584728e-05, + "loss": 0.73, "step": 4990 }, { - "epoch": 0.37766259316711437, - "grad_norm": 2.345587968826294, - "learning_rate": 1.3213617348202471e-05, - "loss": 0.8246, + "epoch": 0.3513551566349877, + "grad_norm": 1.8023734092712402, + "learning_rate": 1.0539549975433876e-05, + "loss": 0.6132, "step": 4991 }, { - "epoch": 0.377738261889448, - "grad_norm": 2.660860538482666, - "learning_rate": 1.3211627082288725e-05, - "loss": 0.7403, + "epoch": 0.35142555438225975, + "grad_norm": 2.0936477184295654, + "learning_rate": 1.0538172860996065e-05, + "loss": 0.6629, "step": 4992 }, { - "epoch": 0.37781393061178165, - "grad_norm": 2.2472615242004395, - "learning_rate": 1.320963655262927e-05, - "loss": 0.7548, + "epoch": 0.35149595212953183, + "grad_norm": 1.7051453590393066, + "learning_rate": 1.0536795562605443e-05, + "loss": 0.6567, "step": 4993 }, { - "epoch": 0.37788959933411526, - "grad_norm": 2.2617874145507812, - "learning_rate": 1.3207645759348759e-05, - "loss": 0.7703, + "epoch": 0.35156634987680396, + "grad_norm": 1.686327576637268, + "learning_rate": 1.0535418080333614e-05, + "loss": 0.6859, "step": 4994 }, { - "epoch": 0.37796526805644887, - "grad_norm": 2.100846290588379, - "learning_rate": 1.3205654702571858e-05, - "loss": 0.7233, + "epoch": 0.35163674762407604, + "grad_norm": 1.7864043712615967, + "learning_rate": 1.0534040414252202e-05, + "loss": 0.6901, "step": 4995 }, { - "epoch": 0.3780409367787825, - "grad_norm": 2.057562828063965, - "learning_rate": 1.320366338242326e-05, - "loss": 0.6308, + "epoch": 0.3517071453713481, + "grad_norm": 2.174367666244507, + "learning_rate": 1.0532662564432823e-05, + "loss": 0.7776, "step": 4996 }, { - "epoch": 0.3781166055011161, - "grad_norm": 2.615999937057495, - "learning_rate": 1.3201671799027663e-05, - "loss": 0.7651, + "epoch": 0.3517775431186202, + "grad_norm": 2.069237470626831, + "learning_rate": 1.053128453094712e-05, + "loss": 0.7473, "step": 4997 }, { - "epoch": 0.37819227422344975, - "grad_norm": 2.463115692138672, - "learning_rate": 1.319967995250979e-05, - "loss": 0.7944, + "epoch": 0.3518479408658923, + "grad_norm": 1.6997774839401245, + "learning_rate": 1.0529906313866738e-05, + "loss": 0.6948, "step": 4998 }, { - "epoch": 0.37826794294578336, - "grad_norm": 2.52860689163208, - "learning_rate": 1.3197687842994374e-05, - "loss": 0.7515, + "epoch": 0.3519183386131644, + "grad_norm": 1.7867473363876343, + "learning_rate": 1.0528527913263327e-05, + "loss": 0.8076, "step": 4999 }, { - "epoch": 0.378343611668117, - "grad_norm": 1.9558433294296265, - "learning_rate": 1.3195695470606167e-05, - "loss": 0.8407, + "epoch": 0.3519887363604365, + "grad_norm": 1.7589291334152222, + "learning_rate": 1.0527149329208554e-05, + "loss": 0.7408, "step": 5000 }, { - "epoch": 0.3784192803904506, - "grad_norm": 2.168161153793335, - "learning_rate": 1.319370283546994e-05, - "loss": 0.7869, + "epoch": 0.35205913410770856, + "grad_norm": 1.6534780263900757, + "learning_rate": 1.0525770561774095e-05, + "loss": 0.6576, "step": 5001 }, { - "epoch": 0.37849494911278425, - "grad_norm": 2.3098533153533936, - "learning_rate": 1.3191709937710478e-05, - "loss": 0.7613, + "epoch": 0.35212953185498064, + "grad_norm": 1.945206880569458, + "learning_rate": 1.052439161103163e-05, + "loss": 0.7183, "step": 5002 }, { - "epoch": 0.37857061783511786, - "grad_norm": 2.507798910140991, - "learning_rate": 1.3189716777452581e-05, - "loss": 0.6891, + "epoch": 0.3521999296022527, + "grad_norm": 1.6801036596298218, + "learning_rate": 1.0523012477052852e-05, + "loss": 0.6556, "step": 5003 }, { - "epoch": 0.3786462865574515, - "grad_norm": 2.061244249343872, - "learning_rate": 1.318772335482107e-05, - "loss": 0.72, + "epoch": 0.3522703273495248, + "grad_norm": 1.5722590684890747, + "learning_rate": 1.052163315990947e-05, + "loss": 0.88, "step": 5004 }, { - "epoch": 0.3787219552797851, - "grad_norm": 2.2283413410186768, - "learning_rate": 1.3185729669940776e-05, - "loss": 0.7914, + "epoch": 0.3523407250967969, + "grad_norm": 2.312286376953125, + "learning_rate": 1.0520253659673186e-05, + "loss": 0.7122, "step": 5005 }, { - "epoch": 0.37879762400211875, - "grad_norm": 2.303812026977539, - "learning_rate": 1.3183735722936554e-05, - "loss": 0.7093, + "epoch": 0.352411122844069, + "grad_norm": 1.9871277809143066, + "learning_rate": 1.0518873976415727e-05, + "loss": 0.6826, "step": 5006 }, { - "epoch": 0.37887329272445236, - "grad_norm": 2.085308313369751, - "learning_rate": 1.3181741513933265e-05, - "loss": 0.8941, + "epoch": 0.3524815205913411, + "grad_norm": 2.3135337829589844, + "learning_rate": 1.0517494110208823e-05, + "loss": 0.7125, "step": 5007 }, { - "epoch": 0.37894896144678597, - "grad_norm": 1.7646946907043457, - "learning_rate": 1.3179747043055802e-05, - "loss": 0.6162, + "epoch": 0.35255191833861316, + "grad_norm": 1.9034613370895386, + "learning_rate": 1.0516114061124214e-05, + "loss": 0.6573, "step": 5008 }, { - "epoch": 0.3790246301691196, - "grad_norm": 2.3402299880981445, - "learning_rate": 1.3177752310429057e-05, - "loss": 0.7871, + "epoch": 0.35262231608588523, + "grad_norm": 2.024916410446167, + "learning_rate": 1.051473382923365e-05, + "loss": 0.6537, "step": 5009 }, { - "epoch": 0.3791002988914532, - "grad_norm": 2.1623239517211914, - "learning_rate": 1.317575731617795e-05, - "loss": 0.832, + "epoch": 0.3526927138331573, + "grad_norm": 2.604886770248413, + "learning_rate": 1.0513353414608895e-05, + "loss": 0.7455, "step": 5010 }, { - "epoch": 0.37917596761378686, - "grad_norm": 2.153862953186035, - "learning_rate": 1.3173762060427414e-05, - "loss": 0.6814, + "epoch": 0.35276311158042944, + "grad_norm": 1.8917475938796997, + "learning_rate": 1.0511972817321712e-05, + "loss": 0.7108, "step": 5011 }, { - "epoch": 0.37925163633612047, - "grad_norm": 2.1609599590301514, - "learning_rate": 1.31717665433024e-05, - "loss": 0.7195, + "epoch": 0.3528335093277015, + "grad_norm": 2.2989869117736816, + "learning_rate": 1.0510592037443881e-05, + "loss": 0.6491, "step": 5012 }, { - "epoch": 0.3793273050584541, - "grad_norm": 1.9634231328964233, - "learning_rate": 1.316977076492787e-05, - "loss": 0.6328, + "epoch": 0.3529039070749736, + "grad_norm": 1.7837013006210327, + "learning_rate": 1.050921107504719e-05, + "loss": 0.7231, "step": 5013 }, { - "epoch": 0.3794029737807877, - "grad_norm": 2.0286974906921387, - "learning_rate": 1.316777472542881e-05, - "loss": 0.8721, + "epoch": 0.3529743048222457, + "grad_norm": 2.028761863708496, + "learning_rate": 1.0507829930203438e-05, + "loss": 0.7175, "step": 5014 }, { - "epoch": 0.37947864250312136, - "grad_norm": 4.155838966369629, - "learning_rate": 1.3165778424930214e-05, - "loss": 0.8739, + "epoch": 0.35304470256951775, + "grad_norm": 2.418998956680298, + "learning_rate": 1.050644860298443e-05, + "loss": 0.827, "step": 5015 }, { - "epoch": 0.37955431122545497, - "grad_norm": 2.0985116958618164, - "learning_rate": 1.31637818635571e-05, - "loss": 0.692, + "epoch": 0.3531151003167899, + "grad_norm": 2.2289206981658936, + "learning_rate": 1.0505067093461987e-05, + "loss": 0.6976, "step": 5016 }, { - "epoch": 0.3796299799477886, - "grad_norm": 2.7467665672302246, - "learning_rate": 1.3161785041434501e-05, - "loss": 0.7331, + "epoch": 0.35318549806406196, + "grad_norm": 2.466489791870117, + "learning_rate": 1.0503685401707926e-05, + "loss": 0.6776, "step": 5017 }, { - "epoch": 0.3797056486701222, - "grad_norm": 6.863169193267822, - "learning_rate": 1.3159787958687457e-05, - "loss": 0.8108, + "epoch": 0.35325589581133404, + "grad_norm": 1.9794361591339111, + "learning_rate": 1.0502303527794093e-05, + "loss": 0.6602, "step": 5018 }, { - "epoch": 0.37978131739245585, - "grad_norm": 2.4260308742523193, - "learning_rate": 1.3157790615441042e-05, - "loss": 0.6481, + "epoch": 0.3533262935586061, + "grad_norm": 2.1910600662231445, + "learning_rate": 1.0500921471792326e-05, + "loss": 0.7164, "step": 5019 }, { - "epoch": 0.37985698611478946, - "grad_norm": 2.156952142715454, - "learning_rate": 1.3155793011820327e-05, - "loss": 0.6652, + "epoch": 0.3533966913058782, + "grad_norm": 1.7454729080200195, + "learning_rate": 1.0499539233774478e-05, + "loss": 0.6772, "step": 5020 }, { - "epoch": 0.3799326548371231, - "grad_norm": 2.060511350631714, - "learning_rate": 1.3153795147950412e-05, - "loss": 0.7653, + "epoch": 0.3534670890531503, + "grad_norm": 2.1876533031463623, + "learning_rate": 1.0498156813812419e-05, + "loss": 0.686, "step": 5021 }, { - "epoch": 0.3800083235594567, - "grad_norm": 1.6299368143081665, - "learning_rate": 1.3151797023956411e-05, - "loss": 0.9353, + "epoch": 0.3535374868004224, + "grad_norm": 1.730958104133606, + "learning_rate": 1.049677421197802e-05, + "loss": 0.7212, "step": 5022 }, { - "epoch": 0.3800839922817903, - "grad_norm": 2.0761969089508057, - "learning_rate": 1.3149798639963451e-05, - "loss": 0.6743, + "epoch": 0.3536078845476945, + "grad_norm": 2.426898241043091, + "learning_rate": 1.049539142834316e-05, + "loss": 0.8384, "step": 5023 }, { - "epoch": 0.38015966100412396, - "grad_norm": 2.3090572357177734, - "learning_rate": 1.3147799996096682e-05, - "loss": 0.8114, + "epoch": 0.35367828229496656, + "grad_norm": 1.7746762037277222, + "learning_rate": 1.0494008462979735e-05, + "loss": 0.7837, "step": 5024 }, { - "epoch": 0.38023532972645757, - "grad_norm": 2.1611216068267822, - "learning_rate": 1.3145801092481256e-05, - "loss": 0.8595, + "epoch": 0.35374868004223864, + "grad_norm": 2.1444036960601807, + "learning_rate": 1.0492625315959647e-05, + "loss": 0.7332, "step": 5025 }, { - "epoch": 0.3803109984487912, - "grad_norm": 2.3790178298950195, - "learning_rate": 1.3143801929242359e-05, - "loss": 0.6275, + "epoch": 0.3538190777895107, + "grad_norm": 1.8774069547653198, + "learning_rate": 1.0491241987354805e-05, + "loss": 0.7557, "step": 5026 }, { - "epoch": 0.3803866671711248, - "grad_norm": 2.3763949871063232, - "learning_rate": 1.3141802506505183e-05, - "loss": 0.8345, + "epoch": 0.35388947553678285, + "grad_norm": 1.6719549894332886, + "learning_rate": 1.0489858477237129e-05, + "loss": 0.817, "step": 5027 }, { - "epoch": 0.38046233589345846, - "grad_norm": 2.2187368869781494, - "learning_rate": 1.3139802824394936e-05, - "loss": 0.7276, + "epoch": 0.3539598732840549, + "grad_norm": 2.0939130783081055, + "learning_rate": 1.0488474785678553e-05, + "loss": 0.6269, "step": 5028 }, { - "epoch": 0.38053800461579207, - "grad_norm": 1.9624474048614502, - "learning_rate": 1.313780288303685e-05, - "loss": 0.6358, + "epoch": 0.354030271031327, + "grad_norm": 1.8406318426132202, + "learning_rate": 1.0487090912751009e-05, + "loss": 0.723, "step": 5029 }, { - "epoch": 0.3806136733381257, - "grad_norm": 1.6875008344650269, - "learning_rate": 1.3135802682556162e-05, - "loss": 0.745, + "epoch": 0.3541006687785991, + "grad_norm": 2.0563204288482666, + "learning_rate": 1.0485706858526454e-05, + "loss": 0.7694, "step": 5030 }, { - "epoch": 0.3806893420604593, - "grad_norm": 3.063140392303467, - "learning_rate": 1.3133802223078132e-05, - "loss": 0.8704, + "epoch": 0.35417106652587116, + "grad_norm": 1.8692225217819214, + "learning_rate": 1.048432262307684e-05, + "loss": 0.6323, "step": 5031 }, { - "epoch": 0.38076501078279296, - "grad_norm": 2.5540196895599365, - "learning_rate": 1.3131801504728037e-05, - "loss": 0.6593, + "epoch": 0.35424146427314324, + "grad_norm": 1.688270926475525, + "learning_rate": 1.0482938206474138e-05, + "loss": 0.6764, "step": 5032 }, { - "epoch": 0.38084067950512657, - "grad_norm": 2.9757909774780273, - "learning_rate": 1.3129800527631167e-05, - "loss": 0.7676, + "epoch": 0.35431186202041537, + "grad_norm": 1.6144022941589355, + "learning_rate": 1.0481553608790327e-05, + "loss": 0.7987, "step": 5033 }, { - "epoch": 0.3809163482274602, - "grad_norm": 2.7554965019226074, - "learning_rate": 1.3127799291912833e-05, - "loss": 0.7257, + "epoch": 0.35438225976768745, + "grad_norm": 2.513026475906372, + "learning_rate": 1.0480168830097388e-05, + "loss": 0.684, "step": 5034 }, { - "epoch": 0.3809920169497938, - "grad_norm": 2.5679843425750732, - "learning_rate": 1.3125797797698358e-05, - "loss": 0.7173, + "epoch": 0.3544526575149595, + "grad_norm": 2.033498764038086, + "learning_rate": 1.0478783870467322e-05, + "loss": 0.705, "step": 5035 }, { - "epoch": 0.3810676856721274, - "grad_norm": 2.0927176475524902, - "learning_rate": 1.3123796045113075e-05, - "loss": 0.707, + "epoch": 0.3545230552622316, + "grad_norm": 2.6701114177703857, + "learning_rate": 1.047739872997213e-05, + "loss": 0.6523, "step": 5036 }, { - "epoch": 0.38114335439446106, - "grad_norm": 3.1577799320220947, - "learning_rate": 1.312179403428235e-05, - "loss": 0.8109, + "epoch": 0.3545934530095037, + "grad_norm": 2.1940674781799316, + "learning_rate": 1.047601340868383e-05, + "loss": 0.7225, "step": 5037 }, { - "epoch": 0.3812190231167947, - "grad_norm": 1.8900063037872314, - "learning_rate": 1.3119791765331549e-05, - "loss": 0.7693, + "epoch": 0.35466385075677576, + "grad_norm": 1.7631759643554688, + "learning_rate": 1.0474627906674443e-05, + "loss": 0.6927, "step": 5038 }, { - "epoch": 0.3812946918391283, - "grad_norm": 2.2258424758911133, - "learning_rate": 1.3117789238386063e-05, - "loss": 0.6866, + "epoch": 0.3547342485040479, + "grad_norm": 1.8962273597717285, + "learning_rate": 1.0473242224016006e-05, + "loss": 0.5739, "step": 5039 }, { - "epoch": 0.3813703605614619, - "grad_norm": 1.6877254247665405, - "learning_rate": 1.3115786453571299e-05, - "loss": 0.6474, + "epoch": 0.35480464625131997, + "grad_norm": 1.8077579736709595, + "learning_rate": 1.047185636078056e-05, + "loss": 0.7024, "step": 5040 }, { - "epoch": 0.38144602928379556, - "grad_norm": 2.2569453716278076, - "learning_rate": 1.311378341101267e-05, - "loss": 0.6449, + "epoch": 0.35487504399859204, + "grad_norm": 2.057858943939209, + "learning_rate": 1.0470470317040157e-05, + "loss": 0.7096, "step": 5041 }, { - "epoch": 0.3815216980061292, - "grad_norm": 2.3201940059661865, - "learning_rate": 1.3111780110835622e-05, - "loss": 0.7697, + "epoch": 0.3549454417458641, + "grad_norm": 2.021698236465454, + "learning_rate": 1.0469084092866858e-05, + "loss": 0.6867, "step": 5042 }, { - "epoch": 0.3815973667284628, - "grad_norm": 2.2311851978302, - "learning_rate": 1.3109776553165604e-05, - "loss": 0.5872, + "epoch": 0.3550158394931362, + "grad_norm": 1.8610731363296509, + "learning_rate": 1.0467697688332737e-05, + "loss": 0.6808, "step": 5043 }, { - "epoch": 0.3816730354507964, - "grad_norm": 1.6546425819396973, - "learning_rate": 1.3107772738128085e-05, - "loss": 0.6899, + "epoch": 0.35508623724040833, + "grad_norm": 2.360499858856201, + "learning_rate": 1.0466311103509873e-05, + "loss": 0.8005, "step": 5044 }, { - "epoch": 0.38174870417313006, - "grad_norm": 2.160982131958008, - "learning_rate": 1.3105768665848551e-05, - "loss": 0.7574, + "epoch": 0.3551566349876804, + "grad_norm": 1.832512617111206, + "learning_rate": 1.0464924338470354e-05, + "loss": 0.6549, "step": 5045 }, { - "epoch": 0.38182437289546367, - "grad_norm": 2.2722971439361572, - "learning_rate": 1.3103764336452501e-05, - "loss": 0.75, + "epoch": 0.3552270327349525, + "grad_norm": 1.9466334581375122, + "learning_rate": 1.0463537393286283e-05, + "loss": 0.6955, "step": 5046 }, { - "epoch": 0.3819000416177973, - "grad_norm": 2.3404366970062256, - "learning_rate": 1.310175975006546e-05, - "loss": 0.6267, + "epoch": 0.35529743048222456, + "grad_norm": 1.568792700767517, + "learning_rate": 1.0462150268029764e-05, + "loss": 0.6531, "step": 5047 }, { - "epoch": 0.3819757103401309, - "grad_norm": 1.7116867303848267, - "learning_rate": 1.3099754906812952e-05, - "loss": 0.6965, + "epoch": 0.35536782822949664, + "grad_norm": 1.9727941751480103, + "learning_rate": 1.0460762962772916e-05, + "loss": 0.7236, "step": 5048 }, { - "epoch": 0.38205137906246456, - "grad_norm": 1.8109760284423828, - "learning_rate": 1.3097749806820535e-05, - "loss": 0.6197, + "epoch": 0.3554382259767687, + "grad_norm": 1.7591361999511719, + "learning_rate": 1.045937547758787e-05, + "loss": 0.8445, "step": 5049 }, { - "epoch": 0.38212704778479817, - "grad_norm": 2.044471025466919, - "learning_rate": 1.309574445021377e-05, - "loss": 0.8108, + "epoch": 0.35550862372404085, + "grad_norm": 2.0757646560668945, + "learning_rate": 1.045798781254676e-05, + "loss": 0.677, "step": 5050 }, { - "epoch": 0.3822027165071318, - "grad_norm": 2.0608906745910645, - "learning_rate": 1.309373883711824e-05, - "loss": 0.7047, + "epoch": 0.35557902147131293, + "grad_norm": 1.5907691717147827, + "learning_rate": 1.045659996772173e-05, + "loss": 0.6407, "step": 5051 }, { - "epoch": 0.3822783852294654, - "grad_norm": 5.626868724822998, - "learning_rate": 1.3091732967659546e-05, - "loss": 0.9076, + "epoch": 0.355649419218585, + "grad_norm": 1.9472310543060303, + "learning_rate": 1.0455211943184937e-05, + "loss": 0.6693, "step": 5052 }, { - "epoch": 0.382354053951799, - "grad_norm": 2.2120423316955566, - "learning_rate": 1.3089726841963296e-05, - "loss": 0.7146, + "epoch": 0.3557198169658571, + "grad_norm": 1.6415135860443115, + "learning_rate": 1.0453823739008548e-05, + "loss": 0.7297, "step": 5053 }, { - "epoch": 0.38242972267413267, - "grad_norm": 2.1795167922973633, - "learning_rate": 1.3087720460155122e-05, - "loss": 0.7101, + "epoch": 0.35579021471312916, + "grad_norm": 1.8781920671463013, + "learning_rate": 1.0452435355264733e-05, + "loss": 0.6319, "step": 5054 }, { - "epoch": 0.3825053913964663, - "grad_norm": 2.11128568649292, - "learning_rate": 1.3085713822360676e-05, - "loss": 0.8643, + "epoch": 0.3558606124604013, + "grad_norm": 1.8058972358703613, + "learning_rate": 1.0451046792025679e-05, + "loss": 0.7409, "step": 5055 }, { - "epoch": 0.3825810601187999, - "grad_norm": 2.028358221054077, - "learning_rate": 1.3083706928705612e-05, - "loss": 0.7917, + "epoch": 0.35593101020767337, + "grad_norm": 1.9828354120254517, + "learning_rate": 1.0449658049363575e-05, + "loss": 0.68, "step": 5056 }, { - "epoch": 0.3826567288411335, - "grad_norm": 2.1836349964141846, - "learning_rate": 1.3081699779315615e-05, - "loss": 0.9105, + "epoch": 0.35600140795494545, + "grad_norm": 7.528056621551514, + "learning_rate": 1.0448269127350624e-05, + "loss": 0.7368, "step": 5057 }, { - "epoch": 0.38273239756346716, - "grad_norm": 1.974503517150879, - "learning_rate": 1.3079692374316374e-05, - "loss": 0.7627, + "epoch": 0.3560718057022175, + "grad_norm": 1.6437970399856567, + "learning_rate": 1.044688002605904e-05, + "loss": 0.6721, "step": 5058 }, { - "epoch": 0.3828080662858008, - "grad_norm": 2.411986827850342, - "learning_rate": 1.3077684713833602e-05, - "loss": 0.8903, + "epoch": 0.3561422034494896, + "grad_norm": 2.325629472732544, + "learning_rate": 1.044549074556104e-05, + "loss": 0.7894, "step": 5059 }, { - "epoch": 0.3828837350081344, - "grad_norm": 2.188807725906372, - "learning_rate": 1.3075676797993023e-05, - "loss": 0.6245, + "epoch": 0.3562126011967617, + "grad_norm": 2.0565502643585205, + "learning_rate": 1.0444101285928858e-05, + "loss": 0.7555, "step": 5060 }, { - "epoch": 0.382959403730468, - "grad_norm": 3.615983247756958, - "learning_rate": 1.3073668626920381e-05, - "loss": 0.7468, + "epoch": 0.3562829989440338, + "grad_norm": 1.746848225593567, + "learning_rate": 1.0442711647234732e-05, + "loss": 0.5482, "step": 5061 }, { - "epoch": 0.38303507245280166, - "grad_norm": 2.4570207595825195, - "learning_rate": 1.3071660200741436e-05, - "loss": 0.8592, + "epoch": 0.3563533966913059, + "grad_norm": 2.01792049407959, + "learning_rate": 1.0441321829550906e-05, + "loss": 0.7306, "step": 5062 }, { - "epoch": 0.38311074117513527, - "grad_norm": 2.342355966567993, - "learning_rate": 1.3069651519581959e-05, - "loss": 0.6396, + "epoch": 0.35642379443857797, + "grad_norm": 1.982556700706482, + "learning_rate": 1.0439931832949642e-05, + "loss": 0.7922, "step": 5063 }, { - "epoch": 0.3831864098974689, - "grad_norm": 2.1598551273345947, - "learning_rate": 1.3067642583567737e-05, - "loss": 0.5799, + "epoch": 0.35649419218585005, + "grad_norm": 2.311896800994873, + "learning_rate": 1.0438541657503209e-05, + "loss": 0.6471, "step": 5064 }, { - "epoch": 0.3832620786198025, - "grad_norm": 1.9163577556610107, - "learning_rate": 1.3065633392824586e-05, - "loss": 0.658, + "epoch": 0.3565645899331221, + "grad_norm": 2.4444401264190674, + "learning_rate": 1.0437151303283879e-05, + "loss": 0.7341, "step": 5065 }, { - "epoch": 0.3833377473421361, - "grad_norm": 2.467026472091675, - "learning_rate": 1.3063623947478318e-05, - "loss": 0.7139, + "epoch": 0.3566349876803942, + "grad_norm": 2.190674066543579, + "learning_rate": 1.0435760770363941e-05, + "loss": 0.7284, "step": 5066 }, { - "epoch": 0.38341341606446977, - "grad_norm": 1.7132724523544312, - "learning_rate": 1.3061614247654775e-05, - "loss": 0.7353, + "epoch": 0.35670538542766633, + "grad_norm": 1.95771062374115, + "learning_rate": 1.043437005881569e-05, + "loss": 0.7187, "step": 5067 }, { - "epoch": 0.3834890847868034, - "grad_norm": 1.9084765911102295, - "learning_rate": 1.3059604293479815e-05, - "loss": 0.6118, + "epoch": 0.3567757831749384, + "grad_norm": 1.950106143951416, + "learning_rate": 1.0432979168711425e-05, + "loss": 0.6703, "step": 5068 }, { - "epoch": 0.383564753509137, - "grad_norm": 1.8953239917755127, - "learning_rate": 1.3057594085079298e-05, - "loss": 0.8124, + "epoch": 0.3568461809222105, + "grad_norm": 1.7830218076705933, + "learning_rate": 1.0431588100123468e-05, + "loss": 0.7029, "step": 5069 }, { - "epoch": 0.3836404222314706, - "grad_norm": 2.3783981800079346, - "learning_rate": 1.305558362257912e-05, - "loss": 0.763, + "epoch": 0.35691657866948256, + "grad_norm": 2.2750158309936523, + "learning_rate": 1.0430196853124137e-05, + "loss": 0.6864, "step": 5070 }, { - "epoch": 0.38371609095380427, - "grad_norm": 2.3013756275177, - "learning_rate": 1.3053572906105177e-05, - "loss": 0.6881, + "epoch": 0.35698697641675464, + "grad_norm": 2.7766807079315186, + "learning_rate": 1.0428805427785765e-05, + "loss": 0.6459, "step": 5071 }, { - "epoch": 0.3837917596761379, - "grad_norm": 2.113539934158325, - "learning_rate": 1.3051561935783388e-05, - "loss": 0.8303, + "epoch": 0.3570573741640268, + "grad_norm": 2.0123088359832764, + "learning_rate": 1.0427413824180691e-05, + "loss": 0.6941, "step": 5072 }, { - "epoch": 0.3838674283984715, - "grad_norm": 2.5680975914001465, - "learning_rate": 1.3049550711739684e-05, - "loss": 0.7595, + "epoch": 0.35712777191129885, + "grad_norm": 1.961926817893982, + "learning_rate": 1.0426022042381271e-05, + "loss": 0.7389, "step": 5073 }, { - "epoch": 0.3839430971208051, - "grad_norm": 1.8435100317001343, - "learning_rate": 1.3047539234100018e-05, - "loss": 0.6678, + "epoch": 0.35719816965857093, + "grad_norm": 1.8447461128234863, + "learning_rate": 1.0424630082459862e-05, + "loss": 0.6869, "step": 5074 }, { - "epoch": 0.38401876584313877, - "grad_norm": 2.025412082672119, - "learning_rate": 1.3045527502990358e-05, - "loss": 0.8392, + "epoch": 0.357268567405843, + "grad_norm": 1.6866005659103394, + "learning_rate": 1.0423237944488833e-05, + "loss": 0.7269, "step": 5075 }, { - "epoch": 0.3840944345654724, - "grad_norm": 2.096165895462036, - "learning_rate": 1.3043515518536674e-05, - "loss": 0.8409, + "epoch": 0.3573389651531151, + "grad_norm": 2.3867897987365723, + "learning_rate": 1.0421845628540565e-05, + "loss": 0.6802, "step": 5076 }, { - "epoch": 0.384170103287806, - "grad_norm": 1.9506720304489136, - "learning_rate": 1.3041503280864974e-05, - "loss": 0.7444, + "epoch": 0.35740936290038716, + "grad_norm": 5.004610061645508, + "learning_rate": 1.042045313468744e-05, + "loss": 0.6485, "step": 5077 }, { - "epoch": 0.3842457720101396, - "grad_norm": 1.969355583190918, - "learning_rate": 1.3039490790101266e-05, - "loss": 0.6558, + "epoch": 0.3574797606476593, + "grad_norm": 1.6532641649246216, + "learning_rate": 1.041906046300186e-05, + "loss": 0.7116, "step": 5078 }, { - "epoch": 0.3843214407324732, - "grad_norm": 1.9142673015594482, - "learning_rate": 1.303747804637158e-05, - "loss": 0.7879, + "epoch": 0.35755015839493137, + "grad_norm": 1.7845133543014526, + "learning_rate": 1.0417667613556232e-05, + "loss": 0.7265, "step": 5079 }, { - "epoch": 0.3843971094548069, - "grad_norm": 1.9106582403182983, - "learning_rate": 1.3035465049801958e-05, - "loss": 0.7209, + "epoch": 0.35762055614220345, + "grad_norm": 1.8873200416564941, + "learning_rate": 1.0416274586422966e-05, + "loss": 0.6477, "step": 5080 }, { - "epoch": 0.3844727781771405, - "grad_norm": 2.3156635761260986, - "learning_rate": 1.3033451800518464e-05, - "loss": 0.8002, + "epoch": 0.3576909538894755, + "grad_norm": 1.813827633857727, + "learning_rate": 1.0414881381674493e-05, + "loss": 0.8561, "step": 5081 }, { - "epoch": 0.3845484468994741, - "grad_norm": 2.1822335720062256, - "learning_rate": 1.3031438298647174e-05, - "loss": 0.7506, + "epoch": 0.3577613516367476, + "grad_norm": 1.6803306341171265, + "learning_rate": 1.0413487999383243e-05, + "loss": 0.6258, "step": 5082 }, { - "epoch": 0.3846241156218077, - "grad_norm": 2.149963617324829, - "learning_rate": 1.3029424544314173e-05, - "loss": 0.6489, + "epoch": 0.35783174938401974, + "grad_norm": 2.1102070808410645, + "learning_rate": 1.0412094439621662e-05, + "loss": 0.5803, "step": 5083 }, { - "epoch": 0.38469978434414137, - "grad_norm": 2.2395076751708984, - "learning_rate": 1.3027410537645578e-05, - "loss": 0.6394, + "epoch": 0.3579021471312918, + "grad_norm": 1.9686511754989624, + "learning_rate": 1.04107007024622e-05, + "loss": 0.8176, "step": 5084 }, { - "epoch": 0.384775453066475, - "grad_norm": 2.702310562133789, - "learning_rate": 1.3025396278767511e-05, - "loss": 0.7583, + "epoch": 0.3579725448785639, + "grad_norm": 1.8303474187850952, + "learning_rate": 1.040930678797732e-05, + "loss": 0.8294, "step": 5085 }, { - "epoch": 0.3848511217888086, - "grad_norm": 6.533085823059082, - "learning_rate": 1.3023381767806106e-05, - "loss": 0.759, + "epoch": 0.35804294262583597, + "grad_norm": 1.9779349565505981, + "learning_rate": 1.0407912696239492e-05, + "loss": 0.7271, "step": 5086 }, { - "epoch": 0.3849267905111422, - "grad_norm": 2.2441484928131104, - "learning_rate": 1.302136700488752e-05, - "loss": 0.6975, + "epoch": 0.35811334037310805, + "grad_norm": 1.5680946111679077, + "learning_rate": 1.0406518427321199e-05, + "loss": 0.9393, "step": 5087 }, { - "epoch": 0.38500245923347587, - "grad_norm": 2.1438467502593994, - "learning_rate": 1.301935199013793e-05, - "loss": 0.6995, + "epoch": 0.3581837381203801, + "grad_norm": 2.2610929012298584, + "learning_rate": 1.0405123981294923e-05, + "loss": 0.7643, "step": 5088 }, { - "epoch": 0.3850781279558095, - "grad_norm": 2.285844564437866, - "learning_rate": 1.3017336723683519e-05, - "loss": 0.7527, + "epoch": 0.35825413586765226, + "grad_norm": 1.921730637550354, + "learning_rate": 1.040372935823317e-05, + "loss": 0.7507, "step": 5089 }, { - "epoch": 0.3851537966781431, - "grad_norm": 2.3817970752716064, - "learning_rate": 1.3015321205650483e-05, - "loss": 0.6128, + "epoch": 0.35832453361492433, + "grad_norm": 2.439023017883301, + "learning_rate": 1.0402334558208446e-05, + "loss": 0.6568, "step": 5090 }, { - "epoch": 0.3852294654004767, - "grad_norm": 2.551360845565796, - "learning_rate": 1.3013305436165049e-05, - "loss": 0.7447, + "epoch": 0.3583949313621964, + "grad_norm": 1.7815322875976562, + "learning_rate": 1.0400939581293264e-05, + "loss": 0.6568, "step": 5091 }, { - "epoch": 0.3853051341228103, - "grad_norm": 2.2289671897888184, - "learning_rate": 1.3011289415353446e-05, - "loss": 0.8124, + "epoch": 0.3584653291094685, + "grad_norm": 1.8015146255493164, + "learning_rate": 1.0399544427560156e-05, + "loss": 0.7945, "step": 5092 }, { - "epoch": 0.385380802845144, - "grad_norm": 2.501476526260376, - "learning_rate": 1.300927314334193e-05, - "loss": 0.6713, + "epoch": 0.35853572685674057, + "grad_norm": 2.2215230464935303, + "learning_rate": 1.0398149097081655e-05, + "loss": 0.6578, "step": 5093 }, { - "epoch": 0.3854564715674776, - "grad_norm": 1.9687072038650513, - "learning_rate": 1.300725662025676e-05, - "loss": 0.6829, + "epoch": 0.3586061246040127, + "grad_norm": 1.681355357170105, + "learning_rate": 1.0396753589930302e-05, + "loss": 0.6993, "step": 5094 }, { - "epoch": 0.3855321402898112, - "grad_norm": 2.438424825668335, - "learning_rate": 1.3005239846224218e-05, - "loss": 0.791, + "epoch": 0.3586765223512848, + "grad_norm": 2.332341432571411, + "learning_rate": 1.0395357906178655e-05, + "loss": 0.5606, "step": 5095 }, { - "epoch": 0.3856078090121448, - "grad_norm": 2.1504287719726562, - "learning_rate": 1.3003222821370605e-05, - "loss": 0.7567, + "epoch": 0.35874692009855685, + "grad_norm": 1.8759791851043701, + "learning_rate": 1.0393962045899274e-05, + "loss": 0.7579, "step": 5096 }, { - "epoch": 0.3856834777344785, - "grad_norm": 2.486421823501587, - "learning_rate": 1.3001205545822228e-05, - "loss": 0.5951, + "epoch": 0.35881731784582893, + "grad_norm": 1.8902686834335327, + "learning_rate": 1.0392566009164735e-05, + "loss": 0.8334, "step": 5097 }, { - "epoch": 0.3857591464568121, - "grad_norm": 1.9564738273620605, - "learning_rate": 1.299918801970542e-05, - "loss": 0.8513, + "epoch": 0.358887715593101, + "grad_norm": 2.6006693840026855, + "learning_rate": 1.0391169796047617e-05, + "loss": 0.7323, "step": 5098 }, { - "epoch": 0.3858348151791457, - "grad_norm": 2.0457041263580322, - "learning_rate": 1.2997170243146524e-05, - "loss": 0.7346, + "epoch": 0.3589581133403731, + "grad_norm": 1.9719083309173584, + "learning_rate": 1.0389773406620507e-05, + "loss": 0.7177, "step": 5099 }, { - "epoch": 0.3859104839014793, - "grad_norm": 1.925238847732544, - "learning_rate": 1.2995152216271898e-05, - "loss": 0.7619, + "epoch": 0.3590285110876452, + "grad_norm": 2.0869803428649902, + "learning_rate": 1.0388376840956009e-05, + "loss": 0.7241, "step": 5100 }, { - "epoch": 0.385986152623813, - "grad_norm": 2.6253859996795654, - "learning_rate": 1.2993133939207918e-05, - "loss": 0.8293, + "epoch": 0.3590989088349173, + "grad_norm": 1.7082873582839966, + "learning_rate": 1.038698009912673e-05, + "loss": 0.664, "step": 5101 }, { - "epoch": 0.3860618213461466, - "grad_norm": 2.5588762760162354, - "learning_rate": 1.2991115412080976e-05, - "loss": 0.7825, + "epoch": 0.3591693065821894, + "grad_norm": 1.764224886894226, + "learning_rate": 1.038558318120529e-05, + "loss": 0.7395, "step": 5102 }, { - "epoch": 0.3861374900684802, - "grad_norm": 1.8942152261734009, - "learning_rate": 1.2989096635017476e-05, - "loss": 0.7372, + "epoch": 0.35923970432946145, + "grad_norm": 1.9368475675582886, + "learning_rate": 1.0384186087264314e-05, + "loss": 0.7883, "step": 5103 }, { - "epoch": 0.3862131587908138, - "grad_norm": 2.209826946258545, - "learning_rate": 1.2987077608143845e-05, - "loss": 0.575, + "epoch": 0.35931010207673353, + "grad_norm": 1.6472523212432861, + "learning_rate": 1.0382788817376435e-05, + "loss": 0.6776, "step": 5104 }, { - "epoch": 0.3862888275131474, - "grad_norm": 2.1787028312683105, - "learning_rate": 1.2985058331586516e-05, - "loss": 0.8091, + "epoch": 0.3593804998240056, + "grad_norm": 1.8840534687042236, + "learning_rate": 1.0381391371614305e-05, + "loss": 0.7309, "step": 5105 }, { - "epoch": 0.3863644962354811, - "grad_norm": 2.4378044605255127, - "learning_rate": 1.2983038805471949e-05, - "loss": 0.5765, + "epoch": 0.35945089757127774, + "grad_norm": 2.207127809524536, + "learning_rate": 1.0379993750050575e-05, + "loss": 0.8487, "step": 5106 }, { - "epoch": 0.3864401649578147, - "grad_norm": 2.141134023666382, - "learning_rate": 1.2981019029926606e-05, - "loss": 0.7571, + "epoch": 0.3595212953185498, + "grad_norm": 2.1657581329345703, + "learning_rate": 1.0378595952757908e-05, + "loss": 0.779, "step": 5107 }, { - "epoch": 0.3865158336801483, - "grad_norm": 4.0216064453125, - "learning_rate": 1.2978999005076976e-05, - "loss": 0.8407, + "epoch": 0.3595916930658219, + "grad_norm": 1.737626075744629, + "learning_rate": 1.0377197979808977e-05, + "loss": 0.7341, "step": 5108 }, { - "epoch": 0.3865915024024819, - "grad_norm": 2.1336159706115723, - "learning_rate": 1.2976978731049559e-05, - "loss": 0.794, + "epoch": 0.35966209081309397, + "grad_norm": 1.7929283380508423, + "learning_rate": 1.0375799831276466e-05, + "loss": 0.7297, "step": 5109 }, { - "epoch": 0.3866671711248156, - "grad_norm": 2.151615858078003, - "learning_rate": 1.2974958207970868e-05, - "loss": 0.6768, + "epoch": 0.35973248856036605, + "grad_norm": 1.8559101819992065, + "learning_rate": 1.0374401507233064e-05, + "loss": 0.6952, "step": 5110 }, { - "epoch": 0.3867428398471492, - "grad_norm": 1.8506669998168945, - "learning_rate": 1.2972937435967443e-05, - "loss": 0.6728, + "epoch": 0.3598028863076382, + "grad_norm": 2.2286159992218018, + "learning_rate": 1.0373003007751472e-05, + "loss": 0.7954, "step": 5111 }, { - "epoch": 0.3868185085694828, - "grad_norm": 2.7511610984802246, - "learning_rate": 1.2970916415165822e-05, - "loss": 0.7382, + "epoch": 0.35987328405491026, + "grad_norm": 2.4177775382995605, + "learning_rate": 1.0371604332904397e-05, + "loss": 0.5478, "step": 5112 }, { - "epoch": 0.3868941772918164, - "grad_norm": 2.5931057929992676, - "learning_rate": 1.296889514569257e-05, - "loss": 0.8731, + "epoch": 0.35994368180218234, + "grad_norm": 1.763763666152954, + "learning_rate": 1.037020548276456e-05, + "loss": 0.6865, "step": 5113 }, { - "epoch": 0.3869698460141501, - "grad_norm": 3.5109941959381104, - "learning_rate": 1.296687362767427e-05, - "loss": 0.8754, + "epoch": 0.3600140795494544, + "grad_norm": 1.6133149862289429, + "learning_rate": 1.036880645740469e-05, + "loss": 0.7598, "step": 5114 }, { - "epoch": 0.3870455147364837, - "grad_norm": 2.4638400077819824, - "learning_rate": 1.2964851861237511e-05, - "loss": 0.7972, + "epoch": 0.3600844772967265, + "grad_norm": 1.6351861953735352, + "learning_rate": 1.036740725689752e-05, + "loss": 0.6882, "step": 5115 }, { - "epoch": 0.3871211834588173, - "grad_norm": 2.775519371032715, - "learning_rate": 1.2962829846508908e-05, - "loss": 0.6752, + "epoch": 0.36015487504399857, + "grad_norm": 3.065706491470337, + "learning_rate": 1.0366007881315794e-05, + "loss": 0.6998, "step": 5116 }, { - "epoch": 0.3871968521811509, - "grad_norm": 2.1849584579467773, - "learning_rate": 1.2960807583615081e-05, - "loss": 0.8533, + "epoch": 0.3602252727912707, + "grad_norm": 1.978210687637329, + "learning_rate": 1.0364608330732274e-05, + "loss": 0.7188, "step": 5117 }, { - "epoch": 0.3872725209034845, - "grad_norm": 2.431049346923828, - "learning_rate": 1.295878507268267e-05, - "loss": 0.7406, + "epoch": 0.3602956705385428, + "grad_norm": 2.0100600719451904, + "learning_rate": 1.0363208605219718e-05, + "loss": 0.6597, "step": 5118 }, { - "epoch": 0.3873481896258182, - "grad_norm": 2.2878475189208984, - "learning_rate": 1.2956762313838335e-05, - "loss": 0.7887, + "epoch": 0.36036606828581486, + "grad_norm": 1.7436565160751343, + "learning_rate": 1.0361808704850901e-05, + "loss": 0.7224, "step": 5119 }, { - "epoch": 0.3874238583481518, - "grad_norm": 2.3642971515655518, - "learning_rate": 1.2954739307208746e-05, - "loss": 0.723, + "epoch": 0.36043646603308693, + "grad_norm": 1.8930715322494507, + "learning_rate": 1.0360408629698603e-05, + "loss": 0.6482, "step": 5120 }, { - "epoch": 0.3874995270704854, - "grad_norm": 1.9779037237167358, - "learning_rate": 1.295271605292059e-05, - "loss": 0.644, + "epoch": 0.360506863780359, + "grad_norm": 1.902958631515503, + "learning_rate": 1.0359008379835619e-05, + "loss": 0.776, "step": 5121 }, { - "epoch": 0.387575195792819, - "grad_norm": 2.5578744411468506, - "learning_rate": 1.2950692551100573e-05, - "loss": 0.7849, + "epoch": 0.36057726152763114, + "grad_norm": 1.7349053621292114, + "learning_rate": 1.0357607955334747e-05, + "loss": 0.6963, "step": 5122 }, { - "epoch": 0.3876508645151527, - "grad_norm": 2.2762012481689453, - "learning_rate": 1.2948668801875408e-05, - "loss": 0.7393, + "epoch": 0.3606476592749032, + "grad_norm": 1.5128800868988037, + "learning_rate": 1.0356207356268797e-05, + "loss": 0.7143, "step": 5123 }, { - "epoch": 0.3877265332374863, - "grad_norm": 2.143754720687866, - "learning_rate": 1.2946644805371833e-05, - "loss": 0.7024, + "epoch": 0.3607180570221753, + "grad_norm": 2.214149236679077, + "learning_rate": 1.0354806582710586e-05, + "loss": 0.6356, "step": 5124 }, { - "epoch": 0.3878022019598199, - "grad_norm": 2.0929954051971436, - "learning_rate": 1.2944620561716592e-05, - "loss": 0.717, + "epoch": 0.3607884547694474, + "grad_norm": 2.167600631713867, + "learning_rate": 1.0353405634732943e-05, + "loss": 0.7569, "step": 5125 }, { - "epoch": 0.3878778706821535, - "grad_norm": 2.0429515838623047, - "learning_rate": 1.2942596071036455e-05, - "loss": 0.7081, + "epoch": 0.36085885251671945, + "grad_norm": 1.548780918121338, + "learning_rate": 1.0352004512408703e-05, + "loss": 0.6386, "step": 5126 }, { - "epoch": 0.3879535394044872, - "grad_norm": 2.1083133220672607, - "learning_rate": 1.2940571333458201e-05, - "loss": 0.6678, + "epoch": 0.36092925026399153, + "grad_norm": 1.8998831510543823, + "learning_rate": 1.0350603215810714e-05, + "loss": 0.6912, "step": 5127 }, { - "epoch": 0.3880292081268208, - "grad_norm": 2.166097640991211, - "learning_rate": 1.2938546349108623e-05, - "loss": 0.6017, + "epoch": 0.36099964801126366, + "grad_norm": 2.008417844772339, + "learning_rate": 1.0349201745011829e-05, + "loss": 0.667, "step": 5128 }, { - "epoch": 0.3881048768491544, - "grad_norm": 1.7243160009384155, - "learning_rate": 1.2936521118114534e-05, - "loss": 0.6601, + "epoch": 0.36107004575853574, + "grad_norm": 1.9682005643844604, + "learning_rate": 1.034780010008491e-05, + "loss": 0.7774, "step": 5129 }, { - "epoch": 0.388180545571488, - "grad_norm": 2.28934383392334, - "learning_rate": 1.2934495640602759e-05, - "loss": 0.7419, + "epoch": 0.3611404435058078, + "grad_norm": 1.5230432748794556, + "learning_rate": 1.0346398281102835e-05, + "loss": 0.5395, "step": 5130 }, { - "epoch": 0.3882562142938216, - "grad_norm": 2.0433170795440674, - "learning_rate": 1.2932469916700144e-05, - "loss": 0.8201, + "epoch": 0.3612108412530799, + "grad_norm": 1.870314359664917, + "learning_rate": 1.034499628813848e-05, + "loss": 0.6736, "step": 5131 }, { - "epoch": 0.3883318830161553, - "grad_norm": 2.5278637409210205, - "learning_rate": 1.2930443946533543e-05, - "loss": 0.7638, + "epoch": 0.361281239000352, + "grad_norm": 2.881248712539673, + "learning_rate": 1.0343594121264742e-05, + "loss": 0.7385, "step": 5132 }, { - "epoch": 0.3884075517384889, - "grad_norm": 1.9174318313598633, - "learning_rate": 1.2928417730229827e-05, - "loss": 0.7162, + "epoch": 0.36135163674762405, + "grad_norm": 1.882060170173645, + "learning_rate": 1.0342191780554513e-05, + "loss": 0.7054, "step": 5133 }, { - "epoch": 0.3884832204608225, - "grad_norm": 2.190006732940674, - "learning_rate": 1.2926391267915892e-05, - "loss": 0.7597, + "epoch": 0.3614220344948962, + "grad_norm": 3.220262289047241, + "learning_rate": 1.0340789266080709e-05, + "loss": 0.7046, "step": 5134 }, { - "epoch": 0.3885588891831561, - "grad_norm": 2.2163407802581787, - "learning_rate": 1.292436455971863e-05, - "loss": 0.5617, + "epoch": 0.36149243224216826, + "grad_norm": 1.9311374425888062, + "learning_rate": 1.0339386577916246e-05, + "loss": 0.6562, "step": 5135 }, { - "epoch": 0.3886345579054898, - "grad_norm": 1.9766048192977905, - "learning_rate": 1.2922337605764971e-05, - "loss": 0.7222, + "epoch": 0.36156282998944034, + "grad_norm": 1.919357180595398, + "learning_rate": 1.033798371613405e-05, + "loss": 0.7437, "step": 5136 }, { - "epoch": 0.3887102266278234, - "grad_norm": 2.149446964263916, - "learning_rate": 1.2920310406181842e-05, - "loss": 0.7806, + "epoch": 0.3616332277367124, + "grad_norm": 2.036996364593506, + "learning_rate": 1.0336580680807057e-05, + "loss": 0.7226, "step": 5137 }, { - "epoch": 0.388785895350157, - "grad_norm": 1.846808910369873, - "learning_rate": 1.2918282961096197e-05, - "loss": 0.7699, + "epoch": 0.3617036254839845, + "grad_norm": 1.9273827075958252, + "learning_rate": 1.0335177472008213e-05, + "loss": 0.7425, "step": 5138 }, { - "epoch": 0.3888615640724906, - "grad_norm": 2.0156519412994385, - "learning_rate": 1.2916255270635001e-05, - "loss": 0.6868, + "epoch": 0.3617740232312566, + "grad_norm": 2.3654074668884277, + "learning_rate": 1.0333774089810473e-05, + "loss": 0.7656, "step": 5139 }, { - "epoch": 0.3889372327948243, - "grad_norm": 1.861183524131775, - "learning_rate": 1.2914227334925231e-05, - "loss": 0.6657, + "epoch": 0.3618444209785287, + "grad_norm": 1.8453316688537598, + "learning_rate": 1.0332370534286795e-05, + "loss": 0.6632, "step": 5140 }, { - "epoch": 0.3890129015171579, - "grad_norm": 2.640993118286133, - "learning_rate": 1.2912199154093886e-05, - "loss": 0.627, + "epoch": 0.3619148187258008, + "grad_norm": 1.9159170389175415, + "learning_rate": 1.0330966805510159e-05, + "loss": 0.769, "step": 5141 }, { - "epoch": 0.3890885702394915, - "grad_norm": 2.4647865295410156, - "learning_rate": 1.2910170728267974e-05, - "loss": 0.7462, + "epoch": 0.36198521647307286, + "grad_norm": 1.5500129461288452, + "learning_rate": 1.032956290355354e-05, + "loss": 0.6213, "step": 5142 }, { - "epoch": 0.3891642389618251, - "grad_norm": 2.260634422302246, - "learning_rate": 1.2908142057574526e-05, - "loss": 0.8352, + "epoch": 0.36205561422034493, + "grad_norm": 1.5804864168167114, + "learning_rate": 1.032815882848993e-05, + "loss": 0.7313, "step": 5143 }, { - "epoch": 0.3892399076841587, - "grad_norm": 2.117558002471924, - "learning_rate": 1.2906113142140582e-05, - "loss": 0.8288, + "epoch": 0.362126011967617, + "grad_norm": 1.9040833711624146, + "learning_rate": 1.032675458039233e-05, + "loss": 0.8493, "step": 5144 }, { - "epoch": 0.3893155764064924, - "grad_norm": 2.3098366260528564, - "learning_rate": 1.29040839820932e-05, - "loss": 0.7885, + "epoch": 0.36219640971488914, + "grad_norm": 1.911996841430664, + "learning_rate": 1.0325350159333748e-05, + "loss": 0.8525, "step": 5145 }, { - "epoch": 0.389391245128826, - "grad_norm": 1.80618155002594, - "learning_rate": 1.2902054577559451e-05, - "loss": 0.7721, + "epoch": 0.3622668074621612, + "grad_norm": 1.8081555366516113, + "learning_rate": 1.0323945565387194e-05, + "loss": 0.7221, "step": 5146 }, { - "epoch": 0.3894669138511596, - "grad_norm": 1.6692981719970703, - "learning_rate": 1.2900024928666424e-05, - "loss": 0.6533, + "epoch": 0.3623372052094333, + "grad_norm": 2.235577344894409, + "learning_rate": 1.0322540798625703e-05, + "loss": 0.6886, "step": 5147 }, { - "epoch": 0.3895425825734932, - "grad_norm": 2.307391405105591, - "learning_rate": 1.2897995035541223e-05, - "loss": 0.6559, + "epoch": 0.3624076029567054, + "grad_norm": 1.6317269802093506, + "learning_rate": 1.0321135859122309e-05, + "loss": 0.7168, "step": 5148 }, { - "epoch": 0.3896182512958269, - "grad_norm": 2.1332476139068604, - "learning_rate": 1.2895964898310961e-05, - "loss": 0.7055, + "epoch": 0.36247800070397745, + "grad_norm": 1.6889580488204956, + "learning_rate": 1.031973074695005e-05, + "loss": 0.7255, "step": 5149 }, { - "epoch": 0.3896939200181605, - "grad_norm": 2.272970199584961, - "learning_rate": 1.289393451710278e-05, - "loss": 0.7819, + "epoch": 0.3625483984512496, + "grad_norm": 1.999009609222412, + "learning_rate": 1.0318325462181984e-05, + "loss": 0.7288, "step": 5150 }, { - "epoch": 0.3897695887404941, - "grad_norm": 2.7969746589660645, - "learning_rate": 1.289190389204382e-05, - "loss": 0.7392, + "epoch": 0.36261879619852166, + "grad_norm": 1.9690163135528564, + "learning_rate": 1.0316920004891172e-05, + "loss": 0.8263, "step": 5151 }, { - "epoch": 0.3898452574628277, - "grad_norm": 2.1883418560028076, - "learning_rate": 1.2889873023261257e-05, - "loss": 0.7967, + "epoch": 0.36268919394579374, + "grad_norm": 1.8956488370895386, + "learning_rate": 1.0315514375150685e-05, + "loss": 0.7811, "step": 5152 }, { - "epoch": 0.3899209261851614, - "grad_norm": 1.9223883152008057, - "learning_rate": 1.288784191088226e-05, - "loss": 0.7576, + "epoch": 0.3627595916930658, + "grad_norm": 2.3836379051208496, + "learning_rate": 1.0314108573033605e-05, + "loss": 0.7319, "step": 5153 }, { - "epoch": 0.389996594907495, - "grad_norm": 2.0606937408447266, - "learning_rate": 1.2885810555034028e-05, - "loss": 0.7704, + "epoch": 0.3628299894403379, + "grad_norm": 1.6391150951385498, + "learning_rate": 1.0312702598613018e-05, + "loss": 0.5589, "step": 5154 }, { - "epoch": 0.3900722636298286, - "grad_norm": 1.9041752815246582, - "learning_rate": 1.2883778955843772e-05, - "loss": 0.8243, + "epoch": 0.36290038718761, + "grad_norm": 1.8116405010223389, + "learning_rate": 1.0311296451962022e-05, + "loss": 0.7496, "step": 5155 }, { - "epoch": 0.3901479323521622, - "grad_norm": 2.1987617015838623, - "learning_rate": 1.2881747113438716e-05, - "loss": 0.8491, + "epoch": 0.3629707849348821, + "grad_norm": 1.7769863605499268, + "learning_rate": 1.0309890133153724e-05, + "loss": 0.7536, "step": 5156 }, { - "epoch": 0.39022360107449583, - "grad_norm": 2.152064323425293, - "learning_rate": 1.2879715027946101e-05, - "loss": 0.7676, + "epoch": 0.3630411826821542, + "grad_norm": 2.109684705734253, + "learning_rate": 1.0308483642261241e-05, + "loss": 0.7226, "step": 5157 }, { - "epoch": 0.3902992697968295, - "grad_norm": 2.4647457599639893, - "learning_rate": 1.2877682699493179e-05, - "loss": 0.7452, + "epoch": 0.36311158042942626, + "grad_norm": 1.6316914558410645, + "learning_rate": 1.0307076979357696e-05, + "loss": 0.6734, "step": 5158 }, { - "epoch": 0.3903749385191631, - "grad_norm": 1.774983286857605, - "learning_rate": 1.2875650128207228e-05, - "loss": 0.8262, + "epoch": 0.36318197817669834, + "grad_norm": 1.9353424310684204, + "learning_rate": 1.0305670144516225e-05, + "loss": 0.7028, "step": 5159 }, { - "epoch": 0.3904506072414967, - "grad_norm": 1.9229451417922974, - "learning_rate": 1.2873617314215528e-05, - "loss": 0.7605, + "epoch": 0.3632523759239704, + "grad_norm": 1.9748148918151855, + "learning_rate": 1.030426313780997e-05, + "loss": 0.6927, "step": 5160 }, { - "epoch": 0.3905262759638303, - "grad_norm": 2.0117905139923096, - "learning_rate": 1.2871584257645385e-05, - "loss": 0.6922, + "epoch": 0.3633227736712425, + "grad_norm": 1.975672960281372, + "learning_rate": 1.030285595931208e-05, + "loss": 0.7357, "step": 5161 }, { - "epoch": 0.390601944686164, - "grad_norm": 2.2805237770080566, - "learning_rate": 1.2869550958624115e-05, - "loss": 0.9432, + "epoch": 0.3633931714185146, + "grad_norm": 2.2631421089172363, + "learning_rate": 1.030144860909572e-05, + "loss": 0.5991, "step": 5162 }, { - "epoch": 0.3906776134084976, - "grad_norm": 2.6723804473876953, - "learning_rate": 1.2867517417279045e-05, - "loss": 0.741, + "epoch": 0.3634635691657867, + "grad_norm": 2.3809468746185303, + "learning_rate": 1.0300041087234057e-05, + "loss": 0.6569, "step": 5163 }, { - "epoch": 0.3907532821308312, - "grad_norm": 1.9901678562164307, - "learning_rate": 1.2865483633737528e-05, - "loss": 0.7327, + "epoch": 0.3635339669130588, + "grad_norm": 1.8027180433273315, + "learning_rate": 1.0298633393800266e-05, + "loss": 0.72, "step": 5164 }, { - "epoch": 0.3908289508531648, - "grad_norm": 2.1943933963775635, - "learning_rate": 1.286344960812692e-05, - "loss": 0.8089, + "epoch": 0.36360436466033086, + "grad_norm": 1.7178940773010254, + "learning_rate": 1.0297225528867538e-05, + "loss": 0.754, "step": 5165 }, { - "epoch": 0.3909046195754985, - "grad_norm": 2.47472882270813, - "learning_rate": 1.2861415340574604e-05, - "loss": 0.7517, + "epoch": 0.36367476240760294, + "grad_norm": 2.095451593399048, + "learning_rate": 1.029581749250907e-05, + "loss": 0.8501, "step": 5166 }, { - "epoch": 0.3909802882978321, - "grad_norm": 2.1459341049194336, - "learning_rate": 1.2859380831207969e-05, - "loss": 0.7393, + "epoch": 0.36374516015487507, + "grad_norm": 1.754336953163147, + "learning_rate": 1.0294409284798066e-05, + "loss": 0.6801, "step": 5167 }, { - "epoch": 0.3910559570201657, - "grad_norm": 2.2298531532287598, - "learning_rate": 1.2857346080154425e-05, - "loss": 0.7545, + "epoch": 0.36381555790214715, + "grad_norm": 2.1442601680755615, + "learning_rate": 1.029300090580774e-05, + "loss": 0.6978, "step": 5168 }, { - "epoch": 0.3911316257424993, - "grad_norm": 2.5509769916534424, - "learning_rate": 1.2855311087541393e-05, - "loss": 0.9029, + "epoch": 0.3638859556494192, + "grad_norm": 1.7362371683120728, + "learning_rate": 1.0291592355611314e-05, + "loss": 0.6443, "step": 5169 }, { - "epoch": 0.391207294464833, - "grad_norm": 2.2877771854400635, - "learning_rate": 1.285327585349631e-05, - "loss": 0.6986, + "epoch": 0.3639563533966913, + "grad_norm": 1.7638697624206543, + "learning_rate": 1.0290183634282019e-05, + "loss": 0.6343, "step": 5170 }, { - "epoch": 0.3912829631871666, - "grad_norm": 1.683161735534668, - "learning_rate": 1.2851240378146632e-05, - "loss": 0.8001, + "epoch": 0.3640267511439634, + "grad_norm": 2.297386884689331, + "learning_rate": 1.0288774741893097e-05, + "loss": 0.7595, "step": 5171 }, { - "epoch": 0.3913586319095002, - "grad_norm": 1.9525566101074219, - "learning_rate": 1.2849204661619822e-05, - "loss": 0.7955, + "epoch": 0.36409714889123546, + "grad_norm": 2.083707094192505, + "learning_rate": 1.0287365678517799e-05, + "loss": 0.6549, "step": 5172 }, { - "epoch": 0.3914343006318338, - "grad_norm": 2.2705700397491455, - "learning_rate": 1.284716870404337e-05, - "loss": 0.734, + "epoch": 0.3641675466385076, + "grad_norm": 1.8162933588027954, + "learning_rate": 1.0285956444229383e-05, + "loss": 0.7719, "step": 5173 }, { - "epoch": 0.39150996935416743, - "grad_norm": 1.9373785257339478, - "learning_rate": 1.2845132505544766e-05, - "loss": 0.7796, + "epoch": 0.36423794438577967, + "grad_norm": 4.074939250946045, + "learning_rate": 1.0284547039101114e-05, + "loss": 0.6464, "step": 5174 }, { - "epoch": 0.3915856380765011, - "grad_norm": 2.5509033203125, - "learning_rate": 1.284309606625153e-05, - "loss": 0.7603, + "epoch": 0.36430834213305174, + "grad_norm": 2.0279970169067383, + "learning_rate": 1.0283137463206272e-05, + "loss": 0.7168, "step": 5175 }, { - "epoch": 0.3916613067988347, - "grad_norm": 3.8573920726776123, - "learning_rate": 1.2841059386291191e-05, - "loss": 0.681, + "epoch": 0.3643787398803238, + "grad_norm": 2.039433240890503, + "learning_rate": 1.0281727716618136e-05, + "loss": 0.7254, "step": 5176 }, { - "epoch": 0.3917369755211683, - "grad_norm": 2.2207155227661133, - "learning_rate": 1.2839022465791285e-05, - "loss": 0.7496, + "epoch": 0.3644491376275959, + "grad_norm": 1.6674975156784058, + "learning_rate": 1.0280317799410006e-05, + "loss": 0.648, "step": 5177 }, { - "epoch": 0.39181264424350193, - "grad_norm": 3.7512471675872803, - "learning_rate": 1.283698530487938e-05, - "loss": 0.6177, + "epoch": 0.36451953537486803, + "grad_norm": 1.9281716346740723, + "learning_rate": 1.0278907711655183e-05, + "loss": 0.7628, "step": 5178 }, { - "epoch": 0.3918883129658356, - "grad_norm": 2.095038890838623, - "learning_rate": 1.283494790368304e-05, - "loss": 0.7261, + "epoch": 0.3645899331221401, + "grad_norm": 1.9448643922805786, + "learning_rate": 1.0277497453426977e-05, + "loss": 0.786, "step": 5179 }, { - "epoch": 0.3919639816881692, - "grad_norm": 2.247019052505493, - "learning_rate": 1.2832910262329862e-05, - "loss": 0.7003, + "epoch": 0.3646603308694122, + "grad_norm": 1.7835553884506226, + "learning_rate": 1.0276087024798713e-05, + "loss": 0.7185, "step": 5180 }, { - "epoch": 0.3920396504105028, - "grad_norm": 2.304305076599121, - "learning_rate": 1.2830872380947447e-05, - "loss": 0.7956, + "epoch": 0.36473072861668426, + "grad_norm": 1.6675379276275635, + "learning_rate": 1.0274676425843716e-05, + "loss": 0.6796, "step": 5181 }, { - "epoch": 0.3921153191328364, - "grad_norm": 1.8406106233596802, - "learning_rate": 1.282883425966341e-05, - "loss": 0.882, + "epoch": 0.36480112636395634, + "grad_norm": 2.119790554046631, + "learning_rate": 1.0273265656635326e-05, + "loss": 0.7562, "step": 5182 }, { - "epoch": 0.3921909878551701, - "grad_norm": 3.2401795387268066, - "learning_rate": 1.2826795898605389e-05, - "loss": 0.7532, + "epoch": 0.3648715241112284, + "grad_norm": 1.7027209997177124, + "learning_rate": 1.027185471724689e-05, + "loss": 0.6364, "step": 5183 }, { - "epoch": 0.3922666565775037, - "grad_norm": 2.3485889434814453, - "learning_rate": 1.282475729790103e-05, - "loss": 0.595, + "epoch": 0.36494192185850055, + "grad_norm": 1.6613198518753052, + "learning_rate": 1.0270443607751764e-05, + "loss": 0.6113, "step": 5184 }, { - "epoch": 0.3923423252998373, - "grad_norm": 1.8012150526046753, - "learning_rate": 1.2822718457678001e-05, - "loss": 0.6598, + "epoch": 0.36501231960577263, + "grad_norm": 1.8618322610855103, + "learning_rate": 1.0269032328223312e-05, + "loss": 0.6861, "step": 5185 }, { - "epoch": 0.3924179940221709, - "grad_norm": 2.22017502784729, - "learning_rate": 1.2820679378063978e-05, - "loss": 0.6302, + "epoch": 0.3650827173530447, + "grad_norm": 1.7093168497085571, + "learning_rate": 1.0267620878734912e-05, + "loss": 0.832, "step": 5186 }, { - "epoch": 0.39249366274450453, - "grad_norm": 2.1888411045074463, - "learning_rate": 1.2818640059186653e-05, - "loss": 0.7243, + "epoch": 0.3651531151003168, + "grad_norm": 1.9173895120620728, + "learning_rate": 1.026620925935994e-05, + "loss": 0.7089, "step": 5187 }, { - "epoch": 0.3925693314668382, - "grad_norm": 2.191774845123291, - "learning_rate": 1.2816600501173737e-05, - "loss": 0.7592, + "epoch": 0.36522351284758886, + "grad_norm": 1.7718486785888672, + "learning_rate": 1.0264797470171791e-05, + "loss": 0.7599, "step": 5188 }, { - "epoch": 0.3926450001891718, - "grad_norm": 2.351590871810913, - "learning_rate": 1.2814560704152955e-05, - "loss": 0.6887, + "epoch": 0.36529391059486094, + "grad_norm": 1.7508693933486938, + "learning_rate": 1.0263385511243865e-05, + "loss": 0.6441, "step": 5189 }, { - "epoch": 0.3927206689115054, - "grad_norm": 2.8945960998535156, - "learning_rate": 1.2812520668252039e-05, - "loss": 0.6931, + "epoch": 0.36536430834213307, + "grad_norm": 2.0049166679382324, + "learning_rate": 1.026197338264957e-05, + "loss": 0.6969, "step": 5190 }, { - "epoch": 0.39279633763383903, - "grad_norm": 2.134856700897217, - "learning_rate": 1.281048039359875e-05, - "loss": 0.8368, + "epoch": 0.36543470608940515, + "grad_norm": 1.8473037481307983, + "learning_rate": 1.0260561084462327e-05, + "loss": 0.7871, "step": 5191 }, { - "epoch": 0.3928720063561727, - "grad_norm": 2.4972636699676514, - "learning_rate": 1.2808439880320855e-05, - "loss": 0.837, + "epoch": 0.3655051038366772, + "grad_norm": 2.0289714336395264, + "learning_rate": 1.025914861675556e-05, + "loss": 0.7698, "step": 5192 }, { - "epoch": 0.3929476750785063, - "grad_norm": 2.14408016204834, - "learning_rate": 1.2806399128546137e-05, - "loss": 0.7233, + "epoch": 0.3655755015839493, + "grad_norm": 1.7718275785446167, + "learning_rate": 1.0257735979602703e-05, + "loss": 0.7483, "step": 5193 }, { - "epoch": 0.3930233438008399, - "grad_norm": 1.9677777290344238, - "learning_rate": 1.2804358138402394e-05, - "loss": 0.6967, + "epoch": 0.3656458993312214, + "grad_norm": 1.8395198583602905, + "learning_rate": 1.0256323173077202e-05, + "loss": 0.7448, "step": 5194 }, { - "epoch": 0.39309901252317353, - "grad_norm": 1.9467759132385254, - "learning_rate": 1.280231691001744e-05, - "loss": 0.7552, + "epoch": 0.3657162970784935, + "grad_norm": 1.6992026567459106, + "learning_rate": 1.0254910197252511e-05, + "loss": 0.598, "step": 5195 }, { - "epoch": 0.3931746812455072, - "grad_norm": 2.6038689613342285, - "learning_rate": 1.2800275443519102e-05, - "loss": 0.7232, + "epoch": 0.3657866948257656, + "grad_norm": 1.8085403442382812, + "learning_rate": 1.0253497052202091e-05, + "loss": 0.6794, "step": 5196 }, { - "epoch": 0.3932503499678408, - "grad_norm": 2.1820638179779053, - "learning_rate": 1.2798233739035222e-05, - "loss": 0.8716, + "epoch": 0.36585709257303767, + "grad_norm": 1.6625057458877563, + "learning_rate": 1.025208373799941e-05, + "loss": 0.6646, "step": 5197 }, { - "epoch": 0.3933260186901744, - "grad_norm": 2.153744697570801, - "learning_rate": 1.2796191796693666e-05, - "loss": 0.7718, + "epoch": 0.36592749032030975, + "grad_norm": 1.7434053421020508, + "learning_rate": 1.0250670254717955e-05, + "loss": 0.6834, "step": 5198 }, { - "epoch": 0.393401687412508, - "grad_norm": 2.0024526119232178, - "learning_rate": 1.2794149616622297e-05, - "loss": 0.7673, + "epoch": 0.3659978880675818, + "grad_norm": 1.778206467628479, + "learning_rate": 1.0249256602431208e-05, + "loss": 0.6847, "step": 5199 }, { - "epoch": 0.39347735613484164, - "grad_norm": 1.9214941263198853, - "learning_rate": 1.2792107198949008e-05, - "loss": 0.6543, + "epoch": 0.3660682858148539, + "grad_norm": 1.882431983947754, + "learning_rate": 1.0247842781212665e-05, + "loss": 0.6779, "step": 5200 }, { - "epoch": 0.3935530248571753, - "grad_norm": 2.2117514610290527, - "learning_rate": 1.2790064543801701e-05, - "loss": 0.7172, + "epoch": 0.36613868356212603, + "grad_norm": 1.9357483386993408, + "learning_rate": 1.0246428791135838e-05, + "loss": 0.6316, "step": 5201 }, { - "epoch": 0.3936286935795089, - "grad_norm": 2.943007707595825, - "learning_rate": 1.2788021651308295e-05, - "loss": 0.7764, + "epoch": 0.3662090813093981, + "grad_norm": 1.6566604375839233, + "learning_rate": 1.0245014632274236e-05, + "loss": 0.6878, "step": 5202 }, { - "epoch": 0.3937043623018425, - "grad_norm": 2.1258654594421387, - "learning_rate": 1.278597852159672e-05, - "loss": 0.7708, + "epoch": 0.3662794790566702, + "grad_norm": 1.8348414897918701, + "learning_rate": 1.0243600304701383e-05, + "loss": 0.7995, "step": 5203 }, { - "epoch": 0.39378003102417614, - "grad_norm": 2.93727445602417, - "learning_rate": 1.2783935154794924e-05, - "loss": 0.779, + "epoch": 0.36634987680394226, + "grad_norm": 1.9631662368774414, + "learning_rate": 1.0242185808490814e-05, + "loss": 0.6366, "step": 5204 }, { - "epoch": 0.3938556997465098, - "grad_norm": 1.9181923866271973, - "learning_rate": 1.2781891551030873e-05, - "loss": 0.7721, + "epoch": 0.36642027455121434, + "grad_norm": 1.589158296585083, + "learning_rate": 1.0240771143716071e-05, + "loss": 0.7802, "step": 5205 }, { - "epoch": 0.3939313684688434, - "grad_norm": 2.5555663108825684, - "learning_rate": 1.2779847710432538e-05, - "loss": 0.7231, + "epoch": 0.3664906722984865, + "grad_norm": 1.8015682697296143, + "learning_rate": 1.0239356310450698e-05, + "loss": 0.7792, "step": 5206 }, { - "epoch": 0.394007037191177, - "grad_norm": 2.6176486015319824, - "learning_rate": 1.2777803633127914e-05, - "loss": 0.7323, + "epoch": 0.36656107004575855, + "grad_norm": 1.8317927122116089, + "learning_rate": 1.0237941308768256e-05, + "loss": 0.7143, "step": 5207 }, { - "epoch": 0.39408270591351063, - "grad_norm": 2.785477876663208, - "learning_rate": 1.2775759319245007e-05, - "loss": 0.6702, + "epoch": 0.36663146779303063, + "grad_norm": 1.5566601753234863, + "learning_rate": 1.0236526138742315e-05, + "loss": 0.65, "step": 5208 }, { - "epoch": 0.3941583746358443, - "grad_norm": 2.178852081298828, - "learning_rate": 1.2773714768911842e-05, - "loss": 0.6602, + "epoch": 0.3667018655403027, + "grad_norm": 1.8618600368499756, + "learning_rate": 1.0235110800446449e-05, + "loss": 0.6194, "step": 5209 }, { - "epoch": 0.3942340433581779, - "grad_norm": 2.0223734378814697, - "learning_rate": 1.277166998225645e-05, - "loss": 0.7339, + "epoch": 0.3667722632875748, + "grad_norm": 1.72518789768219, + "learning_rate": 1.0233695293954242e-05, + "loss": 0.7625, "step": 5210 }, { - "epoch": 0.3943097120805115, - "grad_norm": 3.054377555847168, - "learning_rate": 1.2769624959406885e-05, - "loss": 0.8046, + "epoch": 0.36684266103484686, + "grad_norm": 1.7208424806594849, + "learning_rate": 1.0232279619339288e-05, + "loss": 0.7323, "step": 5211 }, { - "epoch": 0.39438538080284513, - "grad_norm": 2.5661230087280273, - "learning_rate": 1.2767579700491215e-05, - "loss": 0.9283, + "epoch": 0.366913058782119, + "grad_norm": 1.710679531097412, + "learning_rate": 1.0230863776675188e-05, + "loss": 0.7231, "step": 5212 }, { - "epoch": 0.39446104952517874, - "grad_norm": 2.2824318408966064, - "learning_rate": 1.2765534205637514e-05, - "loss": 0.7967, + "epoch": 0.36698345652939107, + "grad_norm": 1.8550952672958374, + "learning_rate": 1.0229447766035556e-05, + "loss": 0.7839, "step": 5213 }, { - "epoch": 0.3945367182475124, - "grad_norm": 2.100961923599243, - "learning_rate": 1.2763488474973886e-05, - "loss": 0.589, + "epoch": 0.36705385427666315, + "grad_norm": 1.832140564918518, + "learning_rate": 1.022803158749401e-05, + "loss": 0.7719, "step": 5214 }, { - "epoch": 0.394612386969846, - "grad_norm": 2.079869508743286, - "learning_rate": 1.2761442508628432e-05, - "loss": 0.8719, + "epoch": 0.3671242520239352, + "grad_norm": 1.8216450214385986, + "learning_rate": 1.0226615241124177e-05, + "loss": 0.5738, "step": 5215 }, { - "epoch": 0.39468805569217963, - "grad_norm": 2.3895928859710693, - "learning_rate": 1.2759396306729288e-05, - "loss": 0.8063, + "epoch": 0.3671946497712073, + "grad_norm": 1.9276846647262573, + "learning_rate": 1.0225198726999696e-05, + "loss": 0.8599, "step": 5216 }, { - "epoch": 0.39476372441451324, - "grad_norm": 2.4894683361053467, - "learning_rate": 1.2757349869404585e-05, - "loss": 0.7269, + "epoch": 0.3672650475184794, + "grad_norm": 2.6223535537719727, + "learning_rate": 1.0223782045194213e-05, + "loss": 0.8738, "step": 5217 }, { - "epoch": 0.3948393931368469, - "grad_norm": 2.383577346801758, - "learning_rate": 1.275530319678248e-05, - "loss": 0.7615, + "epoch": 0.3673354452657515, + "grad_norm": 2.207710027694702, + "learning_rate": 1.0222365195781381e-05, + "loss": 0.6108, "step": 5218 }, { - "epoch": 0.3949150618591805, - "grad_norm": 2.04361629486084, - "learning_rate": 1.2753256288991145e-05, - "loss": 0.5673, + "epoch": 0.3674058430130236, + "grad_norm": 1.9490889310836792, + "learning_rate": 1.0220948178834864e-05, + "loss": 0.6931, "step": 5219 }, { - "epoch": 0.3949907305815141, - "grad_norm": 2.225693941116333, - "learning_rate": 1.2751209146158758e-05, - "loss": 0.8495, + "epoch": 0.36747624076029567, + "grad_norm": 1.6793593168258667, + "learning_rate": 1.0219530994428335e-05, + "loss": 0.7077, "step": 5220 }, { - "epoch": 0.39506639930384774, - "grad_norm": 2.191380739212036, - "learning_rate": 1.2749161768413526e-05, - "loss": 0.6206, + "epoch": 0.36754663850756775, + "grad_norm": 1.7949142456054688, + "learning_rate": 1.0218113642635473e-05, + "loss": 0.7351, "step": 5221 }, { - "epoch": 0.3951420680261814, - "grad_norm": 1.8933615684509277, - "learning_rate": 1.2747114155883653e-05, - "loss": 0.6419, + "epoch": 0.3676170362548398, + "grad_norm": 1.76246178150177, + "learning_rate": 1.0216696123529968e-05, + "loss": 0.6655, "step": 5222 }, { - "epoch": 0.395217736748515, - "grad_norm": 2.0595176219940186, - "learning_rate": 1.2745066308697374e-05, - "loss": 0.8184, + "epoch": 0.36768743400211196, + "grad_norm": 2.0549397468566895, + "learning_rate": 1.0215278437185519e-05, + "loss": 0.7076, "step": 5223 }, { - "epoch": 0.3952934054708486, - "grad_norm": 2.14420485496521, - "learning_rate": 1.274301822698293e-05, - "loss": 0.7094, + "epoch": 0.36775783174938403, + "grad_norm": 1.5700234174728394, + "learning_rate": 1.0213860583675833e-05, + "loss": 0.654, "step": 5224 }, { - "epoch": 0.39536907419318223, - "grad_norm": 2.4212982654571533, - "learning_rate": 1.274096991086858e-05, - "loss": 0.6066, + "epoch": 0.3678282294966561, + "grad_norm": 1.653539776802063, + "learning_rate": 1.0212442563074622e-05, + "loss": 0.6349, "step": 5225 }, { - "epoch": 0.39544474291551585, - "grad_norm": 2.151181221008301, - "learning_rate": 1.2738921360482592e-05, - "loss": 0.7381, + "epoch": 0.3678986272439282, + "grad_norm": 1.7631062269210815, + "learning_rate": 1.0211024375455616e-05, + "loss": 0.7, "step": 5226 }, { - "epoch": 0.3955204116378495, - "grad_norm": 2.084139823913574, - "learning_rate": 1.2736872575953256e-05, - "loss": 0.7257, + "epoch": 0.36796902499120027, + "grad_norm": 1.9478917121887207, + "learning_rate": 1.0209606020892541e-05, + "loss": 0.7321, "step": 5227 }, { - "epoch": 0.3955960803601831, - "grad_norm": 2.635713577270508, - "learning_rate": 1.2734823557408872e-05, - "loss": 0.6161, + "epoch": 0.36803942273847234, + "grad_norm": 1.4790852069854736, + "learning_rate": 1.0208187499459145e-05, + "loss": 0.6842, "step": 5228 }, { - "epoch": 0.39567174908251673, - "grad_norm": 2.126365900039673, - "learning_rate": 1.2732774304977758e-05, - "loss": 0.7688, + "epoch": 0.3681098204857445, + "grad_norm": 1.5369781255722046, + "learning_rate": 1.0206768811229176e-05, + "loss": 0.6484, "step": 5229 }, { - "epoch": 0.39574741780485034, - "grad_norm": 1.905297040939331, - "learning_rate": 1.2730724818788245e-05, - "loss": 0.6682, + "epoch": 0.36818021823301655, + "grad_norm": 2.3798553943634033, + "learning_rate": 1.0205349956276388e-05, + "loss": 0.7477, "step": 5230 }, { - "epoch": 0.395823086527184, - "grad_norm": 2.004648447036743, - "learning_rate": 1.2728675098968672e-05, - "loss": 0.5425, + "epoch": 0.36825061598028863, + "grad_norm": 1.756794810295105, + "learning_rate": 1.0203930934674558e-05, + "loss": 0.6446, "step": 5231 }, { - "epoch": 0.3958987552495176, - "grad_norm": 2.102177381515503, - "learning_rate": 1.272662514564741e-05, - "loss": 0.7984, + "epoch": 0.3683210137275607, + "grad_norm": 1.7456036806106567, + "learning_rate": 1.0202511746497454e-05, + "loss": 0.6442, "step": 5232 }, { - "epoch": 0.39597442397185123, - "grad_norm": 2.1269235610961914, - "learning_rate": 1.2724574958952827e-05, - "loss": 0.8195, + "epoch": 0.3683914114748328, + "grad_norm": 1.7075870037078857, + "learning_rate": 1.0201092391818864e-05, + "loss": 0.6546, "step": 5233 }, { - "epoch": 0.39605009269418484, - "grad_norm": 2.0913987159729004, - "learning_rate": 1.2722524539013312e-05, - "loss": 0.8296, + "epoch": 0.3684618092221049, + "grad_norm": 1.7626656293869019, + "learning_rate": 1.0199672870712582e-05, + "loss": 0.7262, "step": 5234 }, { - "epoch": 0.3961257614165185, - "grad_norm": 2.1840765476226807, - "learning_rate": 1.2720473885957271e-05, - "loss": 0.6681, + "epoch": 0.368532206969377, + "grad_norm": 1.7301390171051025, + "learning_rate": 1.019825318325241e-05, + "loss": 0.7734, "step": 5235 }, { - "epoch": 0.3962014301388521, - "grad_norm": 2.1150200366973877, - "learning_rate": 1.271842299991312e-05, - "loss": 0.6953, + "epoch": 0.3686026047166491, + "grad_norm": 1.6568595170974731, + "learning_rate": 1.0196833329512158e-05, + "loss": 0.718, "step": 5236 }, { - "epoch": 0.39627709886118573, - "grad_norm": 2.1616060733795166, - "learning_rate": 1.2716371881009295e-05, - "loss": 0.7757, + "epoch": 0.36867300246392115, + "grad_norm": 2.011725902557373, + "learning_rate": 1.0195413309565647e-05, + "loss": 0.6779, "step": 5237 }, { - "epoch": 0.39635276758351934, - "grad_norm": 2.005535840988159, - "learning_rate": 1.2714320529374241e-05, - "loss": 0.7313, + "epoch": 0.36874340021119323, + "grad_norm": 1.9658771753311157, + "learning_rate": 1.0193993123486702e-05, + "loss": 0.6609, "step": 5238 }, { - "epoch": 0.39642843630585295, - "grad_norm": 1.6546169519424438, - "learning_rate": 1.2712268945136425e-05, - "loss": 0.8232, + "epoch": 0.3688137979584653, + "grad_norm": 2.26755952835083, + "learning_rate": 1.0192572771349164e-05, + "loss": 0.6931, "step": 5239 }, { - "epoch": 0.3965041050281866, - "grad_norm": 1.8763610124588013, - "learning_rate": 1.271021712842432e-05, - "loss": 0.7103, + "epoch": 0.36888419570573744, + "grad_norm": 1.761446237564087, + "learning_rate": 1.0191152253226875e-05, + "loss": 0.7408, "step": 5240 }, { - "epoch": 0.3965797737505202, - "grad_norm": 2.4322524070739746, - "learning_rate": 1.2708165079366417e-05, - "loss": 0.7621, + "epoch": 0.3689545934530095, + "grad_norm": 1.9308634996414185, + "learning_rate": 1.018973156919369e-05, + "loss": 0.691, "step": 5241 }, { - "epoch": 0.39665544247285384, - "grad_norm": 2.386225700378418, - "learning_rate": 1.2706112798091226e-05, - "loss": 0.8655, + "epoch": 0.3690249912002816, + "grad_norm": 1.7107572555541992, + "learning_rate": 1.0188310719323473e-05, + "loss": 0.7825, "step": 5242 }, { - "epoch": 0.39673111119518745, - "grad_norm": 1.8513740301132202, - "learning_rate": 1.2704060284727262e-05, - "loss": 0.7369, + "epoch": 0.36909538894755367, + "grad_norm": 1.6717792749404907, + "learning_rate": 1.0186889703690096e-05, + "loss": 0.8142, "step": 5243 }, { - "epoch": 0.3968067799175211, - "grad_norm": 2.741036891937256, - "learning_rate": 1.2702007539403062e-05, - "loss": 0.7186, + "epoch": 0.36916578669482575, + "grad_norm": 1.9592125415802002, + "learning_rate": 1.0185468522367433e-05, + "loss": 0.6331, "step": 5244 }, { - "epoch": 0.3968824486398547, - "grad_norm": 2.038377046585083, - "learning_rate": 1.2699954562247177e-05, - "loss": 0.8323, + "epoch": 0.3692361844420978, + "grad_norm": 1.9529261589050293, + "learning_rate": 1.0184047175429378e-05, + "loss": 0.7019, "step": 5245 }, { - "epoch": 0.39695811736218833, - "grad_norm": 1.7875847816467285, - "learning_rate": 1.2697901353388168e-05, - "loss": 0.8214, + "epoch": 0.36930658218936996, + "grad_norm": 1.7182368040084839, + "learning_rate": 1.018262566294983e-05, + "loss": 0.6368, "step": 5246 }, { - "epoch": 0.39703378608452194, - "grad_norm": 2.403543472290039, - "learning_rate": 1.269584791295462e-05, - "loss": 0.8744, + "epoch": 0.36937697993664204, + "grad_norm": 2.1959478855133057, + "learning_rate": 1.0181203985002686e-05, + "loss": 0.6903, "step": 5247 }, { - "epoch": 0.3971094548068556, - "grad_norm": 1.7908776998519897, - "learning_rate": 1.269379424107512e-05, - "loss": 0.6912, + "epoch": 0.3694473776839141, + "grad_norm": 1.734174370765686, + "learning_rate": 1.0179782141661869e-05, + "loss": 0.7032, "step": 5248 }, { - "epoch": 0.3971851235291892, - "grad_norm": 2.3532586097717285, - "learning_rate": 1.2691740337878277e-05, - "loss": 0.6537, + "epoch": 0.3695177754311862, + "grad_norm": 1.7132996320724487, + "learning_rate": 1.01783601330013e-05, + "loss": 0.7647, "step": 5249 }, { - "epoch": 0.39726079225152283, - "grad_norm": 2.0470757484436035, - "learning_rate": 1.2689686203492713e-05, - "loss": 0.7524, + "epoch": 0.36958817317845827, + "grad_norm": 1.6879916191101074, + "learning_rate": 1.0176937959094907e-05, + "loss": 0.7457, "step": 5250 }, { - "epoch": 0.39733646097385644, - "grad_norm": 1.9975119829177856, - "learning_rate": 1.2687631838047064e-05, - "loss": 0.7166, + "epoch": 0.3696585709257304, + "grad_norm": 2.059248924255371, + "learning_rate": 1.017551562001663e-05, + "loss": 0.7988, "step": 5251 }, { - "epoch": 0.39741212969619005, - "grad_norm": 1.9511202573776245, - "learning_rate": 1.2685577241669984e-05, - "loss": 0.7518, + "epoch": 0.3697289686730025, + "grad_norm": 1.9319462776184082, + "learning_rate": 1.0174093115840425e-05, + "loss": 0.681, "step": 5252 }, { - "epoch": 0.3974877984185237, - "grad_norm": 2.32716703414917, - "learning_rate": 1.2683522414490138e-05, - "loss": 0.7032, + "epoch": 0.36979936642027456, + "grad_norm": 1.6000701189041138, + "learning_rate": 1.017267044664024e-05, + "loss": 0.6896, "step": 5253 }, { - "epoch": 0.39756346714085733, - "grad_norm": 2.9506001472473145, - "learning_rate": 1.2681467356636202e-05, - "loss": 0.8021, + "epoch": 0.36986976416754663, + "grad_norm": 2.0502054691314697, + "learning_rate": 1.0171247612490048e-05, + "loss": 0.739, "step": 5254 }, { - "epoch": 0.39763913586319094, - "grad_norm": 1.7488332986831665, - "learning_rate": 1.2679412068236875e-05, - "loss": 0.5693, + "epoch": 0.3699401619148187, + "grad_norm": 1.637516975402832, + "learning_rate": 1.0169824613463818e-05, + "loss": 0.6366, "step": 5255 }, { - "epoch": 0.39771480458552455, - "grad_norm": 2.3912007808685303, - "learning_rate": 1.2677356549420862e-05, - "loss": 0.8525, + "epoch": 0.3700105596620908, + "grad_norm": 1.712862491607666, + "learning_rate": 1.0168401449635538e-05, + "loss": 0.6676, "step": 5256 }, { - "epoch": 0.3977904733078582, - "grad_norm": 2.2952723503112793, - "learning_rate": 1.2675300800316889e-05, - "loss": 0.7448, + "epoch": 0.3700809574093629, + "grad_norm": 1.8947783708572388, + "learning_rate": 1.0166978121079197e-05, + "loss": 0.7429, "step": 5257 }, { - "epoch": 0.3978661420301918, - "grad_norm": 3.195134401321411, - "learning_rate": 1.2673244821053692e-05, - "loss": 0.7458, + "epoch": 0.370151355156635, + "grad_norm": 1.708625316619873, + "learning_rate": 1.0165554627868794e-05, + "loss": 0.7112, "step": 5258 }, { - "epoch": 0.39794181075252544, - "grad_norm": 2.2581069469451904, - "learning_rate": 1.267118861176002e-05, - "loss": 0.6457, + "epoch": 0.3702217529039071, + "grad_norm": 1.9722927808761597, + "learning_rate": 1.016413097007834e-05, + "loss": 0.7378, "step": 5259 }, { - "epoch": 0.39801747947485905, - "grad_norm": 2.550224542617798, - "learning_rate": 1.266913217256465e-05, - "loss": 0.8612, + "epoch": 0.37029215065117915, + "grad_norm": 1.6570886373519897, + "learning_rate": 1.0162707147781851e-05, + "loss": 0.7048, "step": 5260 }, { - "epoch": 0.3980931481971927, - "grad_norm": 2.841343879699707, - "learning_rate": 1.2667075503596348e-05, - "loss": 0.6993, + "epoch": 0.37036254839845123, + "grad_norm": 1.7932097911834717, + "learning_rate": 1.0161283161053353e-05, + "loss": 0.7906, "step": 5261 }, { - "epoch": 0.3981688169195263, - "grad_norm": 2.263087034225464, - "learning_rate": 1.2665018604983924e-05, - "loss": 0.7111, + "epoch": 0.37043294614572336, + "grad_norm": 2.1763179302215576, + "learning_rate": 1.015985900996688e-05, + "loss": 0.6945, "step": 5262 }, { - "epoch": 0.39824448564185994, - "grad_norm": 2.2085769176483154, - "learning_rate": 1.2662961476856177e-05, - "loss": 0.7621, + "epoch": 0.37050334389299544, + "grad_norm": 2.1959125995635986, + "learning_rate": 1.0158434694596475e-05, + "loss": 0.5949, "step": 5263 }, { - "epoch": 0.39832015436419355, - "grad_norm": 1.9521921873092651, - "learning_rate": 1.2660904119341937e-05, - "loss": 0.7074, + "epoch": 0.3705737416402675, + "grad_norm": 1.6461431980133057, + "learning_rate": 1.0157010215016192e-05, + "loss": 0.559, "step": 5264 }, { - "epoch": 0.39839582308652716, - "grad_norm": 1.6472288370132446, - "learning_rate": 1.265884653257004e-05, - "loss": 0.7033, + "epoch": 0.3706441393875396, + "grad_norm": 1.8532779216766357, + "learning_rate": 1.0155585571300086e-05, + "loss": 0.7305, "step": 5265 }, { - "epoch": 0.3984714918088608, - "grad_norm": 2.273076057434082, - "learning_rate": 1.2656788716669338e-05, - "loss": 0.6346, + "epoch": 0.3707145371348117, + "grad_norm": 1.6319704055786133, + "learning_rate": 1.015416076352223e-05, + "loss": 0.7101, "step": 5266 }, { - "epoch": 0.39854716053119443, - "grad_norm": 2.0756750106811523, - "learning_rate": 1.26547306717687e-05, - "loss": 0.7563, + "epoch": 0.37078493488208375, + "grad_norm": 1.7893900871276855, + "learning_rate": 1.0152735791756698e-05, + "loss": 0.7462, "step": 5267 }, { - "epoch": 0.39862282925352804, - "grad_norm": 1.9330674409866333, - "learning_rate": 1.2652672397997006e-05, - "loss": 0.7495, + "epoch": 0.3708553326293559, + "grad_norm": 1.6483327150344849, + "learning_rate": 1.0151310656077578e-05, + "loss": 0.8017, "step": 5268 }, { - "epoch": 0.39869849797586165, - "grad_norm": 1.7233151197433472, - "learning_rate": 1.2650613895483152e-05, - "loss": 0.6678, + "epoch": 0.37092573037662796, + "grad_norm": 1.6239169836044312, + "learning_rate": 1.0149885356558963e-05, + "loss": 0.6487, "step": 5269 }, { - "epoch": 0.3987741666981953, - "grad_norm": 1.934144377708435, - "learning_rate": 1.2648555164356047e-05, - "loss": 0.7826, + "epoch": 0.37099612812390004, + "grad_norm": 2.3386011123657227, + "learning_rate": 1.0148459893274955e-05, + "loss": 0.7671, "step": 5270 }, { - "epoch": 0.39884983542052893, - "grad_norm": 2.4113855361938477, - "learning_rate": 1.2646496204744618e-05, - "loss": 0.6297, + "epoch": 0.3710665258711721, + "grad_norm": 1.7649767398834229, + "learning_rate": 1.0147034266299667e-05, + "loss": 0.7562, "step": 5271 }, { - "epoch": 0.39892550414286254, - "grad_norm": 1.8144162893295288, - "learning_rate": 1.2644437016777803e-05, - "loss": 0.5998, + "epoch": 0.3711369236184442, + "grad_norm": 1.7903693914413452, + "learning_rate": 1.0145608475707217e-05, + "loss": 0.7502, "step": 5272 }, { - "epoch": 0.39900117286519615, - "grad_norm": 1.9845155477523804, - "learning_rate": 1.2642377600584556e-05, - "loss": 0.7491, + "epoch": 0.37120732136571627, + "grad_norm": 1.5871249437332153, + "learning_rate": 1.0144182521571733e-05, + "loss": 0.6172, "step": 5273 }, { - "epoch": 0.3990768415875298, - "grad_norm": 2.8481497764587402, - "learning_rate": 1.264031795629384e-05, - "loss": 0.5937, + "epoch": 0.3712777191129884, + "grad_norm": 1.7630242109298706, + "learning_rate": 1.0142756403967351e-05, + "loss": 0.7473, "step": 5274 }, { - "epoch": 0.39915251030986343, - "grad_norm": 2.570568323135376, - "learning_rate": 1.263825808403464e-05, - "loss": 0.8319, + "epoch": 0.3713481168602605, + "grad_norm": 1.535504937171936, + "learning_rate": 1.014133012296822e-05, + "loss": 0.8685, "step": 5275 }, { - "epoch": 0.39922817903219704, - "grad_norm": 2.246908187866211, - "learning_rate": 1.2636197983935953e-05, - "loss": 0.6984, + "epoch": 0.37141851460753256, + "grad_norm": 1.5895111560821533, + "learning_rate": 1.0139903678648489e-05, + "loss": 0.6186, "step": 5276 }, { - "epoch": 0.39930384775453065, - "grad_norm": 2.2146944999694824, - "learning_rate": 1.2634137656126784e-05, - "loss": 0.7518, + "epoch": 0.37148891235480463, + "grad_norm": 2.7040889263153076, + "learning_rate": 1.0138477071082322e-05, + "loss": 0.643, "step": 5277 }, { - "epoch": 0.3993795164768643, - "grad_norm": 2.186021327972412, - "learning_rate": 1.2632077100736164e-05, - "loss": 0.7488, + "epoch": 0.3715593101020767, + "grad_norm": 24.40300750732422, + "learning_rate": 1.0137050300343892e-05, + "loss": 0.7029, "step": 5278 }, { - "epoch": 0.3994551851991979, - "grad_norm": 1.7041964530944824, - "learning_rate": 1.2630016317893127e-05, - "loss": 0.6456, + "epoch": 0.37162970784934884, + "grad_norm": 1.6729038953781128, + "learning_rate": 1.0135623366507373e-05, + "loss": 0.6739, "step": 5279 }, { - "epoch": 0.39953085392153154, - "grad_norm": 1.9374867677688599, - "learning_rate": 1.2627955307726726e-05, - "loss": 0.6803, + "epoch": 0.3717001055966209, + "grad_norm": 1.7552975416183472, + "learning_rate": 1.0134196269646955e-05, + "loss": 0.6781, "step": 5280 }, { - "epoch": 0.39960652264386515, - "grad_norm": 2.4757678508758545, - "learning_rate": 1.2625894070366033e-05, - "loss": 0.6904, + "epoch": 0.371770503343893, + "grad_norm": 1.9739738702774048, + "learning_rate": 1.0132769009836835e-05, + "loss": 0.7175, "step": 5281 }, { - "epoch": 0.39968219136619876, - "grad_norm": 2.042297840118408, - "learning_rate": 1.2623832605940122e-05, - "loss": 0.7731, + "epoch": 0.3718409010911651, + "grad_norm": 2.1036131381988525, + "learning_rate": 1.013134158715122e-05, + "loss": 0.6868, "step": 5282 }, { - "epoch": 0.3997578600885324, - "grad_norm": 1.9081017971038818, - "learning_rate": 1.2621770914578095e-05, - "loss": 0.7498, + "epoch": 0.37191129883843715, + "grad_norm": 1.7601113319396973, + "learning_rate": 1.0129914001664317e-05, + "loss": 0.6785, "step": 5283 }, { - "epoch": 0.39983352881086603, - "grad_norm": 1.9273154735565186, - "learning_rate": 1.2619708996409056e-05, - "loss": 0.7335, + "epoch": 0.37198169658570923, + "grad_norm": 1.7540339231491089, + "learning_rate": 1.012848625345035e-05, + "loss": 0.6684, "step": 5284 }, { - "epoch": 0.39990919753319965, - "grad_norm": 2.073868989944458, - "learning_rate": 1.2617646851562134e-05, - "loss": 0.7556, + "epoch": 0.37205209433298136, + "grad_norm": 1.7120615243911743, + "learning_rate": 1.0127058342583551e-05, + "loss": 0.8378, "step": 5285 }, { - "epoch": 0.39998486625553326, - "grad_norm": 1.6271218061447144, - "learning_rate": 1.2615584480166465e-05, - "loss": 0.7457, + "epoch": 0.37212249208025344, + "grad_norm": 2.675217866897583, + "learning_rate": 1.0125630269138155e-05, + "loss": 0.6893, "step": 5286 }, { - "epoch": 0.4000605349778669, - "grad_norm": 3.472792387008667, - "learning_rate": 1.2613521882351204e-05, - "loss": 0.7719, + "epoch": 0.3721928898275255, + "grad_norm": 2.2433011531829834, + "learning_rate": 1.0124202033188414e-05, + "loss": 0.7734, "step": 5287 }, { - "epoch": 0.40013620370020053, - "grad_norm": 2.107931613922119, - "learning_rate": 1.2611459058245511e-05, - "loss": 0.6264, + "epoch": 0.3722632875747976, + "grad_norm": 1.8474833965301514, + "learning_rate": 1.0122773634808578e-05, + "loss": 0.7053, "step": 5288 }, { - "epoch": 0.40021187242253414, - "grad_norm": 2.132664203643799, - "learning_rate": 1.2609396007978573e-05, - "loss": 0.6974, + "epoch": 0.3723336853220697, + "grad_norm": 1.9858371019363403, + "learning_rate": 1.0121345074072912e-05, + "loss": 0.7986, "step": 5289 }, { - "epoch": 0.40028754114486775, - "grad_norm": 1.6444696187973022, - "learning_rate": 1.2607332731679584e-05, - "loss": 0.6695, + "epoch": 0.3724040830693418, + "grad_norm": 1.6768808364868164, + "learning_rate": 1.0119916351055688e-05, + "loss": 0.6783, "step": 5290 }, { - "epoch": 0.4003632098672014, - "grad_norm": 2.9400112628936768, - "learning_rate": 1.260526922947775e-05, - "loss": 0.7455, + "epoch": 0.3724744808166139, + "grad_norm": 1.674741268157959, + "learning_rate": 1.011848746583119e-05, + "loss": 0.7301, "step": 5291 }, { - "epoch": 0.40043887858953503, - "grad_norm": 3.270721197128296, - "learning_rate": 1.2603205501502296e-05, - "loss": 0.7693, + "epoch": 0.37254487856388596, + "grad_norm": 1.7185328006744385, + "learning_rate": 1.0117058418473702e-05, + "loss": 0.6655, "step": 5292 }, { - "epoch": 0.40051454731186864, - "grad_norm": 1.8366196155548096, - "learning_rate": 1.260114154788246e-05, - "loss": 0.6392, + "epoch": 0.37261527631115804, + "grad_norm": 1.9729162454605103, + "learning_rate": 1.0115629209057524e-05, + "loss": 0.7784, "step": 5293 }, { - "epoch": 0.40059021603420225, - "grad_norm": 2.108292818069458, - "learning_rate": 1.2599077368747494e-05, - "loss": 0.7311, + "epoch": 0.3726856740584301, + "grad_norm": 1.8761993646621704, + "learning_rate": 1.0114199837656963e-05, + "loss": 0.7297, "step": 5294 }, { - "epoch": 0.40066588475653586, - "grad_norm": 2.0967061519622803, - "learning_rate": 1.259701296422666e-05, - "loss": 0.7679, + "epoch": 0.3727560718057022, + "grad_norm": 2.0601983070373535, + "learning_rate": 1.0112770304346331e-05, + "loss": 0.8252, "step": 5295 }, { - "epoch": 0.4007415534788695, - "grad_norm": 1.9956434965133667, - "learning_rate": 1.2594948334449241e-05, - "loss": 0.7826, + "epoch": 0.3728264695529743, + "grad_norm": 1.5327900648117065, + "learning_rate": 1.0111340609199952e-05, + "loss": 0.6716, "step": 5296 }, { - "epoch": 0.40081722220120314, - "grad_norm": 2.254016160964966, - "learning_rate": 1.259288347954453e-05, - "loss": 0.5853, + "epoch": 0.3728968673002464, + "grad_norm": 1.675683617591858, + "learning_rate": 1.0109910752292158e-05, + "loss": 0.6941, "step": 5297 }, { - "epoch": 0.40089289092353675, - "grad_norm": 1.7637948989868164, - "learning_rate": 1.2590818399641833e-05, - "loss": 0.8752, + "epoch": 0.3729672650475185, + "grad_norm": 1.7194623947143555, + "learning_rate": 1.0108480733697286e-05, + "loss": 0.7686, "step": 5298 }, { - "epoch": 0.40096855964587036, - "grad_norm": 2.54941987991333, - "learning_rate": 1.2588753094870477e-05, - "loss": 0.7963, + "epoch": 0.37303766279479056, + "grad_norm": 3.103590965270996, + "learning_rate": 1.0107050553489687e-05, + "loss": 0.8156, "step": 5299 }, { - "epoch": 0.401044228368204, - "grad_norm": 1.4599640369415283, - "learning_rate": 1.2586687565359791e-05, - "loss": 0.7636, + "epoch": 0.37310806054206264, + "grad_norm": 1.8955953121185303, + "learning_rate": 1.0105620211743714e-05, + "loss": 0.782, "step": 5300 }, { - "epoch": 0.40111989709053764, - "grad_norm": 2.2230331897735596, - "learning_rate": 1.2584621811239133e-05, - "loss": 0.7579, + "epoch": 0.37317845828933477, + "grad_norm": 2.878431558609009, + "learning_rate": 1.0104189708533735e-05, + "loss": 0.6781, "step": 5301 }, { - "epoch": 0.40119556581287125, - "grad_norm": 1.8923826217651367, - "learning_rate": 1.2582555832637862e-05, - "loss": 0.6866, + "epoch": 0.37324885603660685, + "grad_norm": 2.2636687755584717, + "learning_rate": 1.010275904393412e-05, + "loss": 0.7647, "step": 5302 }, { - "epoch": 0.40127123453520486, - "grad_norm": 2.1083552837371826, - "learning_rate": 1.2580489629685354e-05, - "loss": 0.6996, + "epoch": 0.3733192537838789, + "grad_norm": 1.7787522077560425, + "learning_rate": 1.0101328218019255e-05, + "loss": 0.6116, "step": 5303 }, { - "epoch": 0.4013469032575385, - "grad_norm": 2.3579092025756836, - "learning_rate": 1.2578423202511008e-05, - "loss": 0.7097, + "epoch": 0.373389651531151, + "grad_norm": 1.9748866558074951, + "learning_rate": 1.0099897230863525e-05, + "loss": 0.7418, "step": 5304 }, { - "epoch": 0.40142257197987213, - "grad_norm": 2.3279149532318115, - "learning_rate": 1.2576356551244226e-05, - "loss": 0.7892, + "epoch": 0.3734600492784231, + "grad_norm": 1.9209239482879639, + "learning_rate": 1.009846608254133e-05, + "loss": 0.7749, "step": 5305 }, { - "epoch": 0.40149824070220574, - "grad_norm": 1.9012402296066284, - "learning_rate": 1.2574289676014431e-05, - "loss": 0.6017, + "epoch": 0.37353044702569516, + "grad_norm": 1.8696320056915283, + "learning_rate": 1.0097034773127081e-05, + "loss": 0.7943, "step": 5306 }, { - "epoch": 0.40157390942453935, - "grad_norm": 1.9098631143569946, - "learning_rate": 1.2572222576951054e-05, - "loss": 0.7714, + "epoch": 0.3736008447729673, + "grad_norm": 1.5559605360031128, + "learning_rate": 1.0095603302695186e-05, + "loss": 0.6976, "step": 5307 }, { - "epoch": 0.40164957814687297, - "grad_norm": 2.4168155193328857, - "learning_rate": 1.2570155254183544e-05, - "loss": 0.8486, + "epoch": 0.37367124252023937, + "grad_norm": 1.7159825563430786, + "learning_rate": 1.0094171671320072e-05, + "loss": 0.812, "step": 5308 }, { - "epoch": 0.40172524686920663, - "grad_norm": 2.088871479034424, - "learning_rate": 1.2568087707841367e-05, - "loss": 0.7525, + "epoch": 0.37374164026751144, + "grad_norm": 2.1060099601745605, + "learning_rate": 1.009273987907617e-05, + "loss": 0.6767, "step": 5309 }, { - "epoch": 0.40180091559154024, - "grad_norm": 2.0699868202209473, - "learning_rate": 1.2566019938053996e-05, - "loss": 0.6048, + "epoch": 0.3738120380147835, + "grad_norm": 1.713591456413269, + "learning_rate": 1.0091307926037923e-05, + "loss": 0.7183, "step": 5310 }, { - "epoch": 0.40187658431387385, - "grad_norm": 1.6784697771072388, - "learning_rate": 1.2563951944950923e-05, - "loss": 0.8357, + "epoch": 0.3738824357620556, + "grad_norm": 1.6451973915100098, + "learning_rate": 1.0089875812279776e-05, + "loss": 0.8686, "step": 5311 }, { - "epoch": 0.40195225303620746, - "grad_norm": 2.897984504699707, - "learning_rate": 1.2561883728661652e-05, - "loss": 0.6136, + "epoch": 0.3739528335093277, + "grad_norm": 2.074495553970337, + "learning_rate": 1.0088443537876187e-05, + "loss": 0.6862, "step": 5312 }, { - "epoch": 0.40202792175854113, - "grad_norm": 2.1926939487457275, - "learning_rate": 1.2559815289315701e-05, - "loss": 0.6236, + "epoch": 0.3740232312565998, + "grad_norm": 1.9609813690185547, + "learning_rate": 1.0087011102901623e-05, + "loss": 0.7647, "step": 5313 }, { - "epoch": 0.40210359048087474, - "grad_norm": 2.2458455562591553, - "learning_rate": 1.2557746627042605e-05, - "loss": 0.8109, + "epoch": 0.3740936290038719, + "grad_norm": 2.0789029598236084, + "learning_rate": 1.0085578507430552e-05, + "loss": 0.8042, "step": 5314 }, { - "epoch": 0.40217925920320835, - "grad_norm": 2.5638132095336914, - "learning_rate": 1.2555677741971905e-05, - "loss": 0.7555, + "epoch": 0.37416402675114396, + "grad_norm": 1.8454471826553345, + "learning_rate": 1.0084145751537462e-05, + "loss": 0.7691, "step": 5315 }, { - "epoch": 0.40225492792554196, - "grad_norm": 1.7509515285491943, - "learning_rate": 1.2553608634233166e-05, - "loss": 0.8178, + "epoch": 0.37423442449841604, + "grad_norm": 1.7699419260025024, + "learning_rate": 1.0082712835296844e-05, + "loss": 0.7042, "step": 5316 }, { - "epoch": 0.4023305966478756, - "grad_norm": 2.2792065143585205, - "learning_rate": 1.2551539303955962e-05, - "loss": 0.8282, + "epoch": 0.3743048222456881, + "grad_norm": 1.9046812057495117, + "learning_rate": 1.0081279758783192e-05, + "loss": 0.7076, "step": 5317 }, { - "epoch": 0.40240626537020924, - "grad_norm": 1.9566348791122437, - "learning_rate": 1.2549469751269876e-05, - "loss": 0.7285, + "epoch": 0.37437521999296025, + "grad_norm": 1.7194609642028809, + "learning_rate": 1.0079846522071014e-05, + "loss": 0.5975, "step": 5318 }, { - "epoch": 0.40248193409254285, - "grad_norm": 2.4450414180755615, - "learning_rate": 1.2547399976304517e-05, - "loss": 0.7842, + "epoch": 0.37444561774023233, + "grad_norm": 1.7808161973953247, + "learning_rate": 1.0078413125234828e-05, + "loss": 0.7619, "step": 5319 }, { - "epoch": 0.40255760281487646, - "grad_norm": 2.2559216022491455, - "learning_rate": 1.2545329979189495e-05, - "loss": 0.7553, + "epoch": 0.3745160154875044, + "grad_norm": 2.939826488494873, + "learning_rate": 1.0076979568349153e-05, + "loss": 0.6928, "step": 5320 }, { - "epoch": 0.40263327153721007, - "grad_norm": 2.124101400375366, - "learning_rate": 1.2543259760054444e-05, - "loss": 0.6608, + "epoch": 0.3745864132347765, + "grad_norm": 1.5947803258895874, + "learning_rate": 1.0075545851488524e-05, + "loss": 0.6526, "step": 5321 }, { - "epoch": 0.40270894025954374, - "grad_norm": 16.932872772216797, - "learning_rate": 1.2541189319029008e-05, - "loss": 0.7524, + "epoch": 0.37465681098204856, + "grad_norm": 1.8662793636322021, + "learning_rate": 1.0074111974727483e-05, + "loss": 0.7929, "step": 5322 }, { - "epoch": 0.40278460898187735, - "grad_norm": 1.7447752952575684, - "learning_rate": 1.2539118656242839e-05, - "loss": 0.7034, + "epoch": 0.37472720872932064, + "grad_norm": 2.086256742477417, + "learning_rate": 1.0072677938140573e-05, + "loss": 0.662, "step": 5323 }, { - "epoch": 0.40286027770421096, - "grad_norm": 2.0638234615325928, - "learning_rate": 1.2537047771825618e-05, - "loss": 0.6932, + "epoch": 0.37479760647659277, + "grad_norm": 2.270972967147827, + "learning_rate": 1.0071243741802357e-05, + "loss": 0.6333, "step": 5324 }, { - "epoch": 0.40293594642654457, - "grad_norm": 2.157304048538208, - "learning_rate": 1.2534976665907024e-05, - "loss": 0.7061, + "epoch": 0.37486800422386485, + "grad_norm": 1.7108832597732544, + "learning_rate": 1.0069809385787396e-05, + "loss": 0.7102, "step": 5325 }, { - "epoch": 0.40301161514887823, - "grad_norm": 1.8160690069198608, - "learning_rate": 1.2532905338616756e-05, - "loss": 0.8272, + "epoch": 0.3749384019711369, + "grad_norm": 1.854067087173462, + "learning_rate": 1.0068374870170266e-05, + "loss": 0.6783, "step": 5326 }, { - "epoch": 0.40308728387121184, - "grad_norm": 2.0002903938293457, - "learning_rate": 1.2530833790084527e-05, - "loss": 0.6858, + "epoch": 0.375008799718409, + "grad_norm": 1.9847677946090698, + "learning_rate": 1.0066940195025546e-05, + "loss": 0.6757, "step": 5327 }, { - "epoch": 0.40316295259354545, - "grad_norm": 2.2748143672943115, - "learning_rate": 1.252876202044007e-05, - "loss": 0.8348, + "epoch": 0.3750791974656811, + "grad_norm": 1.6450245380401611, + "learning_rate": 1.0065505360427829e-05, + "loss": 0.7073, "step": 5328 }, { - "epoch": 0.40323862131587906, - "grad_norm": 2.3512051105499268, - "learning_rate": 1.2526690029813123e-05, - "loss": 0.9381, + "epoch": 0.3751495952129532, + "grad_norm": 1.7886626720428467, + "learning_rate": 1.006407036645171e-05, + "loss": 0.7768, "step": 5329 }, { - "epoch": 0.40331429003821273, - "grad_norm": 2.0239651203155518, - "learning_rate": 1.2524617818333437e-05, - "loss": 0.6707, + "epoch": 0.3752199929602253, + "grad_norm": 2.33428692817688, + "learning_rate": 1.0062635213171801e-05, + "loss": 0.6533, "step": 5330 }, { - "epoch": 0.40338995876054634, - "grad_norm": 2.503915786743164, - "learning_rate": 1.2522545386130781e-05, - "loss": 0.6709, + "epoch": 0.37529039070749737, + "grad_norm": 1.6418706178665161, + "learning_rate": 1.006119990066271e-05, + "loss": 0.7096, "step": 5331 }, { - "epoch": 0.40346562748287995, - "grad_norm": 2.1918065547943115, - "learning_rate": 1.2520472733334942e-05, - "loss": 0.901, + "epoch": 0.37536078845476945, + "grad_norm": 1.7161122560501099, + "learning_rate": 1.0059764428999065e-05, + "loss": 0.7008, "step": 5332 }, { - "epoch": 0.40354129620521356, - "grad_norm": 2.0927445888519287, - "learning_rate": 1.2518399860075714e-05, - "loss": 0.8102, + "epoch": 0.3754311862020415, + "grad_norm": 1.880574345588684, + "learning_rate": 1.0058328798255498e-05, + "loss": 0.6446, "step": 5333 }, { - "epoch": 0.4036169649275472, - "grad_norm": 1.9232884645462036, - "learning_rate": 1.2516326766482908e-05, - "loss": 0.7762, + "epoch": 0.3755015839493136, + "grad_norm": 1.982589602470398, + "learning_rate": 1.0056893008506645e-05, + "loss": 0.6781, "step": 5334 }, { - "epoch": 0.40369263364988084, - "grad_norm": 2.297513961791992, - "learning_rate": 1.2514253452686346e-05, - "loss": 0.8568, + "epoch": 0.37557198169658573, + "grad_norm": 1.8677778244018555, + "learning_rate": 1.0055457059827156e-05, + "loss": 0.6995, "step": 5335 }, { - "epoch": 0.40376830237221445, - "grad_norm": 2.033656597137451, - "learning_rate": 1.2512179918815865e-05, - "loss": 0.8079, + "epoch": 0.3756423794438578, + "grad_norm": 2.078749656677246, + "learning_rate": 1.005402095229169e-05, + "loss": 0.7679, "step": 5336 }, { - "epoch": 0.40384397109454806, - "grad_norm": 1.996671438217163, - "learning_rate": 1.2510106165001317e-05, - "loss": 0.6862, + "epoch": 0.3757127771911299, + "grad_norm": 2.18495774269104, + "learning_rate": 1.0052584685974906e-05, + "loss": 0.6782, "step": 5337 }, { - "epoch": 0.40391963981688167, - "grad_norm": 2.0469861030578613, - "learning_rate": 1.250803219137257e-05, - "loss": 0.7625, + "epoch": 0.37578317493840196, + "grad_norm": 2.020418643951416, + "learning_rate": 1.005114826095148e-05, + "loss": 0.7499, "step": 5338 }, { - "epoch": 0.40399530853921534, - "grad_norm": 1.9562182426452637, - "learning_rate": 1.25059579980595e-05, - "loss": 0.8611, + "epoch": 0.37585357268567404, + "grad_norm": 1.8028045892715454, + "learning_rate": 1.0049711677296094e-05, + "loss": 0.606, "step": 5339 }, { - "epoch": 0.40407097726154895, - "grad_norm": 2.2968802452087402, - "learning_rate": 1.2503883585192003e-05, - "loss": 0.6937, + "epoch": 0.3759239704329461, + "grad_norm": 2.082176446914673, + "learning_rate": 1.0048274935083436e-05, + "loss": 0.6847, "step": 5340 }, { - "epoch": 0.40414664598388256, - "grad_norm": 2.1029908657073975, - "learning_rate": 1.2501808952899976e-05, - "loss": 0.6784, + "epoch": 0.37599436818021825, + "grad_norm": 1.987864375114441, + "learning_rate": 1.0046838034388202e-05, + "loss": 0.7121, "step": 5341 }, { - "epoch": 0.40422231470621617, - "grad_norm": 1.7134768962860107, - "learning_rate": 1.2499734101313355e-05, - "loss": 0.9551, + "epoch": 0.37606476592749033, + "grad_norm": 2.2933692932128906, + "learning_rate": 1.0045400975285101e-05, + "loss": 0.7778, "step": 5342 }, { - "epoch": 0.40429798342854983, - "grad_norm": 2.0898208618164062, - "learning_rate": 1.2497659030562058e-05, - "loss": 0.6126, + "epoch": 0.3761351636747624, + "grad_norm": 1.778998613357544, + "learning_rate": 1.0043963757848843e-05, + "loss": 0.6238, "step": 5343 }, { - "epoch": 0.40437365215088344, - "grad_norm": 1.7897844314575195, - "learning_rate": 1.2495583740776043e-05, - "loss": 0.9229, + "epoch": 0.3762055614220345, + "grad_norm": 1.8387465476989746, + "learning_rate": 1.0042526382154153e-05, + "loss": 0.7837, "step": 5344 }, { - "epoch": 0.40444932087321706, - "grad_norm": 1.9922789335250854, - "learning_rate": 1.2493508232085271e-05, - "loss": 0.7787, + "epoch": 0.37627595916930656, + "grad_norm": 1.5317102670669556, + "learning_rate": 1.0041088848275761e-05, + "loss": 0.6937, "step": 5345 }, { - "epoch": 0.40452498959555067, - "grad_norm": 2.0527071952819824, - "learning_rate": 1.2491432504619707e-05, - "loss": 0.764, + "epoch": 0.3763463569165787, + "grad_norm": 1.8663336038589478, + "learning_rate": 1.0039651156288406e-05, + "loss": 0.7749, "step": 5346 }, { - "epoch": 0.4046006583178843, - "grad_norm": 1.69284188747406, - "learning_rate": 1.2489356558509353e-05, - "loss": 0.5887, + "epoch": 0.37641675466385077, + "grad_norm": 1.7656573057174683, + "learning_rate": 1.0038213306266835e-05, + "loss": 0.6131, "step": 5347 }, { - "epoch": 0.40467632704021794, - "grad_norm": 2.769381284713745, - "learning_rate": 1.2487280393884202e-05, - "loss": 0.855, + "epoch": 0.37648715241112285, + "grad_norm": 1.5636613368988037, + "learning_rate": 1.0036775298285802e-05, + "loss": 0.6285, "step": 5348 }, { - "epoch": 0.40475199576255155, - "grad_norm": 1.9943363666534424, - "learning_rate": 1.2485204010874276e-05, - "loss": 0.6139, + "epoch": 0.3765575501583949, + "grad_norm": 1.8393199443817139, + "learning_rate": 1.003533713242007e-05, + "loss": 0.6768, "step": 5349 }, { - "epoch": 0.40482766448488516, - "grad_norm": 2.07372784614563, - "learning_rate": 1.2483127409609598e-05, - "loss": 0.8462, + "epoch": 0.376627947905667, + "grad_norm": 3.3449630737304688, + "learning_rate": 1.0033898808744412e-05, + "loss": 0.7329, "step": 5350 }, { - "epoch": 0.4049033332072188, - "grad_norm": 2.265497922897339, - "learning_rate": 1.248105059022022e-05, - "loss": 0.6575, + "epoch": 0.3766983456529391, + "grad_norm": 1.92350172996521, + "learning_rate": 1.003246032733361e-05, + "loss": 0.6818, "step": 5351 }, { - "epoch": 0.40497900192955244, - "grad_norm": 2.506788969039917, - "learning_rate": 1.2478973552836195e-05, - "loss": 0.7756, + "epoch": 0.3767687434002112, + "grad_norm": 1.853430151939392, + "learning_rate": 1.0031021688262447e-05, + "loss": 0.7002, "step": 5352 }, { - "epoch": 0.40505467065188605, - "grad_norm": 1.914207935333252, - "learning_rate": 1.2476896297587592e-05, - "loss": 0.7362, + "epoch": 0.3768391411474833, + "grad_norm": 1.7913384437561035, + "learning_rate": 1.002958289160572e-05, + "loss": 0.6596, "step": 5353 }, { - "epoch": 0.40513033937421966, - "grad_norm": 2.050699234008789, - "learning_rate": 1.2474818824604498e-05, - "loss": 0.6945, + "epoch": 0.37690953889475537, + "grad_norm": 1.778450608253479, + "learning_rate": 1.0028143937438236e-05, + "loss": 0.7821, "step": 5354 }, { - "epoch": 0.40520600809655327, - "grad_norm": 2.0397143363952637, - "learning_rate": 1.2472741134017008e-05, - "loss": 0.7752, + "epoch": 0.37697993664202745, + "grad_norm": 1.6053636074066162, + "learning_rate": 1.0026704825834807e-05, + "loss": 0.7137, "step": 5355 }, { - "epoch": 0.40528167681888694, - "grad_norm": 2.459721803665161, - "learning_rate": 1.2470663225955239e-05, - "loss": 0.8221, + "epoch": 0.3770503343892995, + "grad_norm": 1.9291224479675293, + "learning_rate": 1.0025265556870252e-05, + "loss": 0.7205, "step": 5356 }, { - "epoch": 0.40535734554122055, - "grad_norm": 1.8895254135131836, - "learning_rate": 1.2468585100549311e-05, - "loss": 0.6238, + "epoch": 0.37712073213657166, + "grad_norm": 1.9443745613098145, + "learning_rate": 1.0023826130619402e-05, + "loss": 0.7322, "step": 5357 }, { - "epoch": 0.40543301426355416, - "grad_norm": 2.120483875274658, - "learning_rate": 1.2466506757929369e-05, - "loss": 0.6015, + "epoch": 0.37719112988384373, + "grad_norm": 1.6640493869781494, + "learning_rate": 1.0022386547157091e-05, + "loss": 0.7164, "step": 5358 }, { - "epoch": 0.40550868298588777, - "grad_norm": 1.9569705724716187, - "learning_rate": 1.2464428198225558e-05, - "loss": 0.704, + "epoch": 0.3772615276311158, + "grad_norm": 1.9136216640472412, + "learning_rate": 1.0020946806558167e-05, + "loss": 0.6796, "step": 5359 }, { - "epoch": 0.4055843517082214, - "grad_norm": 1.8289756774902344, - "learning_rate": 1.2462349421568047e-05, - "loss": 0.701, + "epoch": 0.3773319253783879, + "grad_norm": 1.715802550315857, + "learning_rate": 1.0019506908897484e-05, + "loss": 0.7418, "step": 5360 }, { - "epoch": 0.40566002043055505, - "grad_norm": 1.8984501361846924, - "learning_rate": 1.246027042808702e-05, - "loss": 0.6924, + "epoch": 0.37740232312565997, + "grad_norm": 1.9123260974884033, + "learning_rate": 1.00180668542499e-05, + "loss": 0.8662, "step": 5361 }, { - "epoch": 0.40573568915288866, - "grad_norm": 2.5578982830047607, - "learning_rate": 1.2458191217912664e-05, - "loss": 0.8879, + "epoch": 0.37747272087293204, + "grad_norm": 1.7322694063186646, + "learning_rate": 1.0016626642690288e-05, + "loss": 0.5995, "step": 5362 }, { - "epoch": 0.40581135787522227, - "grad_norm": 2.1358728408813477, - "learning_rate": 1.2456111791175193e-05, - "loss": 0.7446, + "epoch": 0.3775431186202042, + "grad_norm": 1.8171782493591309, + "learning_rate": 1.0015186274293524e-05, + "loss": 0.7169, "step": 5363 }, { - "epoch": 0.4058870265975559, - "grad_norm": 2.0823116302490234, - "learning_rate": 1.2454032148004819e-05, - "loss": 0.8014, + "epoch": 0.37761351636747625, + "grad_norm": 1.7371406555175781, + "learning_rate": 1.0013745749134499e-05, + "loss": 0.8328, "step": 5364 }, { - "epoch": 0.40596269531988954, - "grad_norm": 1.6264188289642334, - "learning_rate": 1.2451952288531781e-05, - "loss": 0.7815, + "epoch": 0.37768391411474833, + "grad_norm": 2.034255027770996, + "learning_rate": 1.0012305067288098e-05, + "loss": 0.676, "step": 5365 }, { - "epoch": 0.40603836404222315, - "grad_norm": 2.0880937576293945, - "learning_rate": 1.2449872212886328e-05, - "loss": 0.6668, + "epoch": 0.3777543118620204, + "grad_norm": 1.8474699258804321, + "learning_rate": 1.001086422882923e-05, + "loss": 0.7084, "step": 5366 }, { - "epoch": 0.40611403276455676, - "grad_norm": 2.2300379276275635, - "learning_rate": 1.2447791921198715e-05, - "loss": 0.7545, + "epoch": 0.3778247096092925, + "grad_norm": 1.5765087604522705, + "learning_rate": 1.0009423233832804e-05, + "loss": 0.6624, "step": 5367 }, { - "epoch": 0.4061897014868904, - "grad_norm": 1.5105425119400024, - "learning_rate": 1.2445711413599226e-05, - "loss": 0.8274, + "epoch": 0.37789510735656456, + "grad_norm": 1.6165974140167236, + "learning_rate": 1.000798208237374e-05, + "loss": 0.5427, "step": 5368 }, { - "epoch": 0.40626537020922404, - "grad_norm": 2.3562936782836914, - "learning_rate": 1.2443630690218137e-05, - "loss": 0.8011, + "epoch": 0.3779655051038367, + "grad_norm": 2.0014312267303467, + "learning_rate": 1.0006540774526962e-05, + "loss": 0.7155, "step": 5369 }, { - "epoch": 0.40634103893155765, - "grad_norm": 2.463721990585327, - "learning_rate": 1.2441549751185762e-05, - "loss": 0.706, + "epoch": 0.3780359028511088, + "grad_norm": 1.6871329545974731, + "learning_rate": 1.0005099310367406e-05, + "loss": 0.6796, "step": 5370 }, { - "epoch": 0.40641670765389126, - "grad_norm": 2.0283122062683105, - "learning_rate": 1.2439468596632408e-05, - "loss": 0.7052, + "epoch": 0.37810630059838085, + "grad_norm": 1.672567367553711, + "learning_rate": 1.0003657689970016e-05, + "loss": 0.7596, "step": 5371 }, { - "epoch": 0.4064923763762249, - "grad_norm": 1.6645065546035767, - "learning_rate": 1.2437387226688404e-05, - "loss": 0.5734, + "epoch": 0.37817669834565293, + "grad_norm": 1.5686231851577759, + "learning_rate": 1.0002215913409742e-05, + "loss": 0.6148, "step": 5372 }, { - "epoch": 0.4065680450985585, - "grad_norm": 2.11430025100708, - "learning_rate": 1.2435305641484095e-05, - "loss": 0.7679, + "epoch": 0.378247096092925, + "grad_norm": 1.7239083051681519, + "learning_rate": 1.0000773980761544e-05, + "loss": 0.784, "step": 5373 }, { - "epoch": 0.40664371382089215, - "grad_norm": 2.3016443252563477, - "learning_rate": 1.2433223841149837e-05, - "loss": 0.7524, + "epoch": 0.37831749384019714, + "grad_norm": 2.0698776245117188, + "learning_rate": 9.999331892100388e-06, + "loss": 0.7348, "step": 5374 }, { - "epoch": 0.40671938254322576, - "grad_norm": 2.4958677291870117, - "learning_rate": 1.2431141825815998e-05, - "loss": 0.7631, + "epoch": 0.3783878915874692, + "grad_norm": 2.0022168159484863, + "learning_rate": 9.997889647501251e-06, + "loss": 0.7668, "step": 5375 }, { - "epoch": 0.40679505126555937, - "grad_norm": 1.683720350265503, - "learning_rate": 1.2429059595612957e-05, - "loss": 0.7059, + "epoch": 0.3784582893347413, + "grad_norm": 1.8236323595046997, + "learning_rate": 9.996447247039119e-06, + "loss": 0.7121, "step": 5376 }, { - "epoch": 0.406870719987893, - "grad_norm": 1.871661901473999, - "learning_rate": 1.2426977150671117e-05, - "loss": 0.6518, + "epoch": 0.37852868708201337, + "grad_norm": 2.380829334259033, + "learning_rate": 9.995004690788976e-06, + "loss": 0.7142, "step": 5377 }, { - "epoch": 0.40694638871022665, - "grad_norm": 1.8744332790374756, - "learning_rate": 1.2424894491120879e-05, - "loss": 0.7192, + "epoch": 0.37859908482928545, + "grad_norm": 1.804495096206665, + "learning_rate": 9.99356197882583e-06, + "loss": 0.7939, "step": 5378 }, { - "epoch": 0.40702205743256026, - "grad_norm": 2.238365888595581, - "learning_rate": 1.2422811617092675e-05, - "loss": 0.7441, + "epoch": 0.3786694825765575, + "grad_norm": 1.7329933643341064, + "learning_rate": 9.992119111224685e-06, + "loss": 0.7004, "step": 5379 }, { - "epoch": 0.40709772615489387, - "grad_norm": 2.139251232147217, - "learning_rate": 1.2420728528716933e-05, - "loss": 0.7847, + "epoch": 0.37873988032382966, + "grad_norm": 1.650241732597351, + "learning_rate": 9.990676088060557e-06, + "loss": 0.7129, "step": 5380 }, { - "epoch": 0.4071733948772275, - "grad_norm": 2.121941328048706, - "learning_rate": 1.241864522612411e-05, - "loss": 0.781, + "epoch": 0.37881027807110174, + "grad_norm": 1.6394497156143188, + "learning_rate": 9.98923290940847e-06, + "loss": 0.609, "step": 5381 }, { - "epoch": 0.40724906359956115, - "grad_norm": 2.219752788543701, - "learning_rate": 1.2416561709444665e-05, - "loss": 0.815, + "epoch": 0.3788806758183738, + "grad_norm": 1.8784000873565674, + "learning_rate": 9.987789575343458e-06, + "loss": 0.7496, "step": 5382 }, { - "epoch": 0.40732473232189476, - "grad_norm": 2.355746030807495, - "learning_rate": 1.2414477978809075e-05, - "loss": 0.8222, + "epoch": 0.3789510735656459, + "grad_norm": 1.9606164693832397, + "learning_rate": 9.98634608594056e-06, + "loss": 0.7107, "step": 5383 }, { - "epoch": 0.40740040104422837, - "grad_norm": 2.5740647315979004, - "learning_rate": 1.241239403434783e-05, - "loss": 0.8416, + "epoch": 0.37902147131291797, + "grad_norm": 1.9121488332748413, + "learning_rate": 9.984902441274825e-06, + "loss": 0.727, "step": 5384 }, { - "epoch": 0.407476069766562, - "grad_norm": 2.1180238723754883, - "learning_rate": 1.2410309876191433e-05, - "loss": 0.8138, + "epoch": 0.3790918690601901, + "grad_norm": 1.9320366382598877, + "learning_rate": 9.983458641421307e-06, + "loss": 0.6247, "step": 5385 }, { - "epoch": 0.4075517384888956, - "grad_norm": 2.1506288051605225, - "learning_rate": 1.2408225504470402e-05, - "loss": 0.7021, + "epoch": 0.3791622668074622, + "grad_norm": 1.532025694847107, + "learning_rate": 9.982014686455074e-06, + "loss": 0.6936, "step": 5386 }, { - "epoch": 0.40762740721122925, - "grad_norm": 3.2336843013763428, - "learning_rate": 1.2406140919315265e-05, - "loss": 0.8422, + "epoch": 0.37923266455473426, + "grad_norm": 1.542554497718811, + "learning_rate": 9.980570576451196e-06, + "loss": 0.6672, "step": 5387 }, { - "epoch": 0.40770307593356286, - "grad_norm": 2.023808240890503, - "learning_rate": 1.2404056120856568e-05, - "loss": 0.7343, + "epoch": 0.37930306230200633, + "grad_norm": 2.0299131870269775, + "learning_rate": 9.979126311484754e-06, + "loss": 0.7724, "step": 5388 }, { - "epoch": 0.4077787446558965, - "grad_norm": 1.8890466690063477, - "learning_rate": 1.2401971109224865e-05, - "loss": 0.6938, + "epoch": 0.3793734600492784, + "grad_norm": 1.676446795463562, + "learning_rate": 9.977681891630837e-06, + "loss": 0.6177, "step": 5389 }, { - "epoch": 0.4078544133782301, - "grad_norm": 2.454148530960083, - "learning_rate": 1.239988588455073e-05, - "loss": 0.8247, + "epoch": 0.3794438577965505, + "grad_norm": 2.5348141193389893, + "learning_rate": 9.976237316964544e-06, + "loss": 0.7803, "step": 5390 }, { - "epoch": 0.40793008210056375, - "grad_norm": 2.348931312561035, - "learning_rate": 1.2397800446964743e-05, - "loss": 0.6928, + "epoch": 0.3795142555438226, + "grad_norm": 3.3527660369873047, + "learning_rate": 9.974792587560975e-06, + "loss": 0.717, "step": 5391 }, { - "epoch": 0.40800575082289736, - "grad_norm": 1.7471752166748047, - "learning_rate": 1.2395714796597503e-05, - "loss": 0.7767, + "epoch": 0.3795846532910947, + "grad_norm": 1.6299231052398682, + "learning_rate": 9.973347703495246e-06, + "loss": 0.6553, "step": 5392 }, { - "epoch": 0.408081419545231, - "grad_norm": 2.375242233276367, - "learning_rate": 1.239362893357962e-05, - "loss": 0.6983, + "epoch": 0.3796550510383668, + "grad_norm": 1.875382661819458, + "learning_rate": 9.971902664842478e-06, + "loss": 0.5663, "step": 5393 }, { - "epoch": 0.4081570882675646, - "grad_norm": 2.8043153285980225, - "learning_rate": 1.2391542858041716e-05, - "loss": 0.6071, + "epoch": 0.37972544878563885, + "grad_norm": 1.5284674167633057, + "learning_rate": 9.970457471677796e-06, + "loss": 0.6985, "step": 5394 }, { - "epoch": 0.40823275698989825, - "grad_norm": 2.196038246154785, - "learning_rate": 1.238945657011443e-05, - "loss": 0.7422, + "epoch": 0.37979584653291093, + "grad_norm": 1.6339986324310303, + "learning_rate": 9.969012124076342e-06, + "loss": 0.7223, "step": 5395 }, { - "epoch": 0.40830842571223186, - "grad_norm": 2.552044153213501, - "learning_rate": 1.2387370069928408e-05, - "loss": 0.8483, + "epoch": 0.379866244280183, + "grad_norm": 1.856506109237671, + "learning_rate": 9.96756662211326e-06, + "loss": 0.6104, "step": 5396 }, { - "epoch": 0.40838409443456547, - "grad_norm": 2.0079550743103027, - "learning_rate": 1.2385283357614319e-05, - "loss": 0.7005, + "epoch": 0.37993664202745514, + "grad_norm": 2.1086597442626953, + "learning_rate": 9.966120965863698e-06, + "loss": 0.63, "step": 5397 }, { - "epoch": 0.4084597631568991, - "grad_norm": 3.333538055419922, - "learning_rate": 1.2383196433302832e-05, - "loss": 0.6432, + "epoch": 0.3800070397747272, + "grad_norm": 2.0636184215545654, + "learning_rate": 9.964675155402824e-06, + "loss": 0.6404, "step": 5398 }, { - "epoch": 0.40853543187923275, - "grad_norm": 2.925452947616577, - "learning_rate": 1.2381109297124649e-05, - "loss": 0.7974, + "epoch": 0.3800774375219993, + "grad_norm": 2.107306718826294, + "learning_rate": 9.963229190805804e-06, + "loss": 0.6999, "step": 5399 }, { - "epoch": 0.40861110060156636, - "grad_norm": 2.683720350265503, - "learning_rate": 1.2379021949210461e-05, - "loss": 0.7273, + "epoch": 0.3801478352692714, + "grad_norm": 1.6406822204589844, + "learning_rate": 9.961783072147814e-06, + "loss": 0.7408, "step": 5400 }, { - "epoch": 0.40868676932389997, - "grad_norm": 2.577501058578491, - "learning_rate": 1.2376934389690992e-05, - "loss": 0.8398, + "epoch": 0.38021823301654345, + "grad_norm": 1.8856507539749146, + "learning_rate": 9.960336799504037e-06, + "loss": 0.8238, "step": 5401 }, { - "epoch": 0.4087624380462336, - "grad_norm": 1.819061040878296, - "learning_rate": 1.2374846618696968e-05, - "loss": 0.6676, + "epoch": 0.3802886307638156, + "grad_norm": 1.8934967517852783, + "learning_rate": 9.958890372949672e-06, + "loss": 0.6838, "step": 5402 }, { - "epoch": 0.4088381067685672, - "grad_norm": 1.9940983057022095, - "learning_rate": 1.2372758636359129e-05, - "loss": 0.7512, + "epoch": 0.38035902851108766, + "grad_norm": 2.0962276458740234, + "learning_rate": 9.957443792559914e-06, + "loss": 0.7578, "step": 5403 }, { - "epoch": 0.40891377549090085, - "grad_norm": 2.4478976726531982, - "learning_rate": 1.2370670442808242e-05, - "loss": 0.7858, + "epoch": 0.38042942625835974, + "grad_norm": 2.079969644546509, + "learning_rate": 9.955997058409977e-06, + "loss": 0.7743, "step": 5404 }, { - "epoch": 0.40898944421323447, - "grad_norm": 1.8682414293289185, - "learning_rate": 1.2368582038175066e-05, - "loss": 0.6828, + "epoch": 0.3804998240056318, + "grad_norm": 1.7707676887512207, + "learning_rate": 9.954550170575076e-06, + "loss": 0.7214, "step": 5405 }, { - "epoch": 0.4090651129355681, - "grad_norm": 1.9492807388305664, - "learning_rate": 1.2366493422590389e-05, - "loss": 0.5985, + "epoch": 0.3805702217529039, + "grad_norm": 1.832004189491272, + "learning_rate": 9.953103129130435e-06, + "loss": 0.7296, "step": 5406 }, { - "epoch": 0.4091407816579017, - "grad_norm": 2.0559017658233643, - "learning_rate": 1.2364404596185005e-05, - "loss": 0.7456, + "epoch": 0.38064061950017597, + "grad_norm": 1.7930132150650024, + "learning_rate": 9.951655934151287e-06, + "loss": 0.6398, "step": 5407 }, { - "epoch": 0.40921645038023535, - "grad_norm": 1.9211013317108154, - "learning_rate": 1.2362315559089724e-05, - "loss": 0.8048, + "epoch": 0.3807110172474481, + "grad_norm": 1.7455673217773438, + "learning_rate": 9.950208585712877e-06, + "loss": 0.6706, "step": 5408 }, { - "epoch": 0.40929211910256896, - "grad_norm": 2.1894242763519287, - "learning_rate": 1.2360226311435368e-05, - "loss": 0.6719, + "epoch": 0.3807814149947202, + "grad_norm": 1.6153088808059692, + "learning_rate": 9.948761083890448e-06, + "loss": 0.7203, "step": 5409 }, { - "epoch": 0.4093677878249026, - "grad_norm": 2.1584041118621826, - "learning_rate": 1.235813685335277e-05, - "loss": 0.7722, + "epoch": 0.38085181274199226, + "grad_norm": 1.8846722841262817, + "learning_rate": 9.94731342875926e-06, + "loss": 0.705, "step": 5410 }, { - "epoch": 0.4094434565472362, - "grad_norm": 2.0298099517822266, - "learning_rate": 1.235604718497278e-05, - "loss": 0.7631, + "epoch": 0.38092221048926433, + "grad_norm": 1.7720093727111816, + "learning_rate": 9.945865620394578e-06, + "loss": 0.6559, "step": 5411 }, { - "epoch": 0.40951912526956985, - "grad_norm": 2.1612000465393066, - "learning_rate": 1.2353957306426264e-05, - "loss": 0.7931, + "epoch": 0.3809926082365364, + "grad_norm": 2.1405930519104004, + "learning_rate": 9.944417658871673e-06, + "loss": 0.7616, "step": 5412 }, { - "epoch": 0.40959479399190346, - "grad_norm": 2.1110432147979736, - "learning_rate": 1.2351867217844091e-05, - "loss": 0.7961, + "epoch": 0.38106300598380854, + "grad_norm": 1.9164962768554688, + "learning_rate": 9.94296954426583e-06, + "loss": 0.7202, "step": 5413 }, { - "epoch": 0.40967046271423707, - "grad_norm": 2.3404643535614014, - "learning_rate": 1.2349776919357153e-05, - "loss": 0.7194, + "epoch": 0.3811334037310806, + "grad_norm": 2.0604851245880127, + "learning_rate": 9.941521276652333e-06, + "loss": 0.689, "step": 5414 }, { - "epoch": 0.4097461314365707, - "grad_norm": 2.3472416400909424, - "learning_rate": 1.2347686411096347e-05, - "loss": 0.7346, + "epoch": 0.3812038014783527, + "grad_norm": 1.652294397354126, + "learning_rate": 9.940072856106483e-06, + "loss": 0.6988, "step": 5415 }, { - "epoch": 0.4098218001589043, - "grad_norm": 2.5707545280456543, - "learning_rate": 1.2345595693192594e-05, - "loss": 0.7142, + "epoch": 0.3812741992256248, + "grad_norm": 1.905492901802063, + "learning_rate": 9.93862428270358e-06, + "loss": 0.7544, "step": 5416 }, { - "epoch": 0.40989746888123796, - "grad_norm": 1.96560800075531, - "learning_rate": 1.2343504765776816e-05, - "loss": 0.8404, + "epoch": 0.38134459697289685, + "grad_norm": 1.8604322671890259, + "learning_rate": 9.937175556518939e-06, + "loss": 0.7726, "step": 5417 }, { - "epoch": 0.40997313760357157, - "grad_norm": 1.9135947227478027, - "learning_rate": 1.2341413628979957e-05, - "loss": 0.697, + "epoch": 0.38141499472016893, + "grad_norm": 2.2112197875976562, + "learning_rate": 9.935726677627882e-06, + "loss": 0.717, "step": 5418 }, { - "epoch": 0.4100488063259052, - "grad_norm": 2.3961076736450195, - "learning_rate": 1.2339322282932964e-05, - "loss": 0.7307, + "epoch": 0.38148539246744106, + "grad_norm": 1.941248893737793, + "learning_rate": 9.934277646105737e-06, + "loss": 0.7066, "step": 5419 }, { - "epoch": 0.4101244750482388, - "grad_norm": 2.5133774280548096, - "learning_rate": 1.2337230727766815e-05, - "loss": 0.6817, + "epoch": 0.38155579021471314, + "grad_norm": 2.2982091903686523, + "learning_rate": 9.932828462027837e-06, + "loss": 0.7631, "step": 5420 }, { - "epoch": 0.41020014377057246, - "grad_norm": 2.319206714630127, - "learning_rate": 1.233513896361248e-05, - "loss": 0.7036, + "epoch": 0.3816261879619852, + "grad_norm": 1.7509204149246216, + "learning_rate": 9.931379125469532e-06, + "loss": 0.6311, "step": 5421 }, { - "epoch": 0.41027581249290607, - "grad_norm": 2.3848443031311035, - "learning_rate": 1.2333046990600959e-05, - "loss": 0.7175, + "epoch": 0.3816965857092573, + "grad_norm": 1.9317138195037842, + "learning_rate": 9.929929636506172e-06, + "loss": 0.7436, "step": 5422 }, { - "epoch": 0.4103514812152397, - "grad_norm": 3.0399367809295654, - "learning_rate": 1.2330954808863253e-05, - "loss": 0.649, + "epoch": 0.3817669834565294, + "grad_norm": 1.920300006866455, + "learning_rate": 9.928479995213114e-06, + "loss": 0.7388, "step": 5423 }, { - "epoch": 0.4104271499375733, - "grad_norm": 2.1274526119232178, - "learning_rate": 1.2328862418530381e-05, - "loss": 0.7032, + "epoch": 0.38183738120380145, + "grad_norm": 1.7973048686981201, + "learning_rate": 9.927030201665731e-06, + "loss": 0.733, "step": 5424 }, { - "epoch": 0.41050281865990695, - "grad_norm": 2.8769493103027344, - "learning_rate": 1.2326769819733382e-05, - "loss": 0.7368, + "epoch": 0.3819077789510736, + "grad_norm": 2.1074042320251465, + "learning_rate": 9.925580255939395e-06, + "loss": 0.6679, "step": 5425 }, { - "epoch": 0.41057848738224056, - "grad_norm": 2.2980008125305176, - "learning_rate": 1.2324677012603294e-05, - "loss": 0.7648, + "epoch": 0.38197817669834566, + "grad_norm": 1.690592885017395, + "learning_rate": 9.924130158109493e-06, + "loss": 0.5862, "step": 5426 }, { - "epoch": 0.4106541561045742, - "grad_norm": 2.084308624267578, - "learning_rate": 1.232258399727118e-05, - "loss": 0.7854, + "epoch": 0.38204857444561774, + "grad_norm": 1.746494174003601, + "learning_rate": 9.922679908251417e-06, + "loss": 0.6653, "step": 5427 }, { - "epoch": 0.4107298248269078, - "grad_norm": 1.9208357334136963, - "learning_rate": 1.232049077386811e-05, - "loss": 0.6656, + "epoch": 0.3821189721928898, + "grad_norm": 2.034618377685547, + "learning_rate": 9.921229506440564e-06, + "loss": 0.7269, "step": 5428 }, { - "epoch": 0.4108054935492414, - "grad_norm": 1.9888135194778442, - "learning_rate": 1.2318397342525164e-05, - "loss": 0.693, + "epoch": 0.3821893699401619, + "grad_norm": 2.545729637145996, + "learning_rate": 9.919778952752343e-06, + "loss": 0.6695, "step": 5429 }, { - "epoch": 0.41088116227157506, - "grad_norm": 2.4940552711486816, - "learning_rate": 1.2316303703373448e-05, - "loss": 0.8753, + "epoch": 0.382259767687434, + "grad_norm": 1.4270471334457397, + "learning_rate": 9.918328247262171e-06, + "loss": 0.5903, "step": 5430 }, { - "epoch": 0.4109568309939087, - "grad_norm": 1.7741725444793701, - "learning_rate": 1.2314209856544064e-05, - "loss": 0.5688, + "epoch": 0.3823301654347061, + "grad_norm": 1.7483727931976318, + "learning_rate": 9.916877390045469e-06, + "loss": 0.7069, "step": 5431 }, { - "epoch": 0.4110324997162423, - "grad_norm": 2.1441521644592285, - "learning_rate": 1.2312115802168144e-05, - "loss": 0.7552, + "epoch": 0.3824005631819782, + "grad_norm": 1.7942904233932495, + "learning_rate": 9.915426381177672e-06, + "loss": 0.6487, "step": 5432 }, { - "epoch": 0.4111081684385759, - "grad_norm": 2.1466495990753174, - "learning_rate": 1.2310021540376815e-05, - "loss": 0.7369, + "epoch": 0.38247096092925026, + "grad_norm": 1.7310653924942017, + "learning_rate": 9.913975220734215e-06, + "loss": 0.7562, "step": 5433 }, { - "epoch": 0.41118383716090956, - "grad_norm": 2.301936149597168, - "learning_rate": 1.2307927071301235e-05, - "loss": 0.7292, + "epoch": 0.38254135867652234, + "grad_norm": 1.6719640493392944, + "learning_rate": 9.91252390879055e-06, + "loss": 0.7727, "step": 5434 }, { - "epoch": 0.41125950588324317, - "grad_norm": 1.9242736101150513, - "learning_rate": 1.230583239507256e-05, - "loss": 0.7407, + "epoch": 0.3826117564237944, + "grad_norm": 2.5377418994903564, + "learning_rate": 9.911072445422125e-06, + "loss": 0.7976, "step": 5435 }, { - "epoch": 0.4113351746055768, - "grad_norm": 2.5096495151519775, - "learning_rate": 1.2303737511821969e-05, - "loss": 0.6824, + "epoch": 0.38268215417106655, + "grad_norm": 1.7806369066238403, + "learning_rate": 9.90962083070441e-06, + "loss": 0.6842, "step": 5436 }, { - "epoch": 0.4114108433279104, - "grad_norm": 2.5667378902435303, - "learning_rate": 1.2301642421680649e-05, - "loss": 0.7517, + "epoch": 0.3827525519183386, + "grad_norm": 1.8802837133407593, + "learning_rate": 9.908169064712873e-06, + "loss": 0.7328, "step": 5437 }, { - "epoch": 0.41148651205024406, - "grad_norm": 2.038986921310425, - "learning_rate": 1.2299547124779803e-05, - "loss": 0.6629, + "epoch": 0.3828229496656107, + "grad_norm": 1.6242834329605103, + "learning_rate": 9.906717147522993e-06, + "loss": 0.6548, "step": 5438 }, { - "epoch": 0.41156218077257767, - "grad_norm": 2.1099348068237305, - "learning_rate": 1.2297451621250643e-05, - "loss": 0.7357, + "epoch": 0.3828933474128828, + "grad_norm": 1.5010706186294556, + "learning_rate": 9.905265079210255e-06, + "loss": 0.6541, "step": 5439 }, { - "epoch": 0.4116378494949113, - "grad_norm": 1.7685575485229492, - "learning_rate": 1.2295355911224398e-05, - "loss": 0.8865, + "epoch": 0.38296374516015486, + "grad_norm": 1.7151145935058594, + "learning_rate": 9.903812859850154e-06, + "loss": 0.6173, "step": 5440 }, { - "epoch": 0.4117135182172449, - "grad_norm": 2.0220160484313965, - "learning_rate": 1.2293259994832306e-05, - "loss": 0.7703, + "epoch": 0.383034142907427, + "grad_norm": 1.733075499534607, + "learning_rate": 9.902360489518195e-06, + "loss": 0.7025, "step": 5441 }, { - "epoch": 0.4117891869395785, - "grad_norm": 1.9990547895431519, - "learning_rate": 1.2291163872205624e-05, - "loss": 0.8773, + "epoch": 0.38310454065469907, + "grad_norm": 2.04484224319458, + "learning_rate": 9.900907968289882e-06, + "loss": 0.631, "step": 5442 }, { - "epoch": 0.41186485566191217, - "grad_norm": 2.4579837322235107, - "learning_rate": 1.2289067543475613e-05, - "loss": 0.7528, + "epoch": 0.38317493840197114, + "grad_norm": 1.658518671989441, + "learning_rate": 9.899455296240738e-06, + "loss": 0.7258, "step": 5443 }, { - "epoch": 0.4119405243842458, - "grad_norm": 2.3881900310516357, - "learning_rate": 1.2286971008773552e-05, - "loss": 0.8378, + "epoch": 0.3832453361492432, + "grad_norm": 1.773139476776123, + "learning_rate": 9.898002473446289e-06, + "loss": 0.7011, "step": 5444 }, { - "epoch": 0.4120161931065794, - "grad_norm": 2.1781957149505615, - "learning_rate": 1.228487426823074e-05, - "loss": 0.824, + "epoch": 0.3833157338965153, + "grad_norm": 2.8763339519500732, + "learning_rate": 9.896549499982063e-06, + "loss": 0.6225, "step": 5445 }, { - "epoch": 0.412091861828913, - "grad_norm": 2.6942150592803955, - "learning_rate": 1.2282777321978474e-05, - "loss": 0.8154, + "epoch": 0.3833861316437874, + "grad_norm": 1.49462890625, + "learning_rate": 9.895096375923607e-06, + "loss": 0.7976, "step": 5446 }, { - "epoch": 0.41216753055124666, - "grad_norm": 1.4597055912017822, - "learning_rate": 1.2280680170148075e-05, - "loss": 0.7794, + "epoch": 0.3834565293910595, + "grad_norm": 1.7840460538864136, + "learning_rate": 9.893643101346471e-06, + "loss": 0.847, "step": 5447 }, { - "epoch": 0.4122431992735803, - "grad_norm": 2.1186511516571045, - "learning_rate": 1.2278582812870874e-05, - "loss": 0.6442, + "epoch": 0.3835269271383316, + "grad_norm": 2.023862361907959, + "learning_rate": 9.892189676326206e-06, + "loss": 0.8902, "step": 5448 }, { - "epoch": 0.4123188679959139, - "grad_norm": 1.8283412456512451, - "learning_rate": 1.2276485250278211e-05, - "loss": 0.8088, + "epoch": 0.38359732488560366, + "grad_norm": 1.815556526184082, + "learning_rate": 9.890736100938382e-06, + "loss": 0.7474, "step": 5449 }, { - "epoch": 0.4123945367182475, - "grad_norm": 2.1991498470306396, - "learning_rate": 1.2274387482501444e-05, - "loss": 0.7892, + "epoch": 0.38366772263287574, + "grad_norm": 1.677909016609192, + "learning_rate": 9.889282375258572e-06, + "loss": 0.7014, "step": 5450 }, { - "epoch": 0.41247020544058116, - "grad_norm": 1.786555290222168, - "learning_rate": 1.2272289509671943e-05, - "loss": 0.6558, + "epoch": 0.3837381203801478, + "grad_norm": 1.6628671884536743, + "learning_rate": 9.88782849936235e-06, + "loss": 0.697, "step": 5451 }, { - "epoch": 0.41254587416291477, - "grad_norm": 1.8185572624206543, - "learning_rate": 1.227019133192109e-05, - "loss": 0.8318, + "epoch": 0.3838085181274199, + "grad_norm": 1.5931270122528076, + "learning_rate": 9.886374473325312e-06, + "loss": 0.64, "step": 5452 }, { - "epoch": 0.4126215428852484, - "grad_norm": 2.238388776779175, - "learning_rate": 1.2268092949380277e-05, - "loss": 0.6967, + "epoch": 0.38387891587469203, + "grad_norm": 1.5688481330871582, + "learning_rate": 9.88492029722305e-06, + "loss": 0.6776, "step": 5453 }, { - "epoch": 0.412697211607582, - "grad_norm": 1.840320348739624, - "learning_rate": 1.2265994362180915e-05, - "loss": 0.7751, + "epoch": 0.3839493136219641, + "grad_norm": 1.6977131366729736, + "learning_rate": 9.88346597113117e-06, + "loss": 0.7816, "step": 5454 }, { - "epoch": 0.4127728803299156, - "grad_norm": 2.3982057571411133, - "learning_rate": 1.2263895570454424e-05, - "loss": 0.7067, + "epoch": 0.3840197113692362, + "grad_norm": 1.942418098449707, + "learning_rate": 9.882011495125282e-06, + "loss": 0.682, "step": 5455 }, { - "epoch": 0.41284854905224927, - "grad_norm": 3.4811136722564697, - "learning_rate": 1.2261796574332232e-05, - "loss": 0.6473, + "epoch": 0.38409010911650826, + "grad_norm": 2.0211074352264404, + "learning_rate": 9.880556869281007e-06, + "loss": 0.7102, "step": 5456 }, { - "epoch": 0.4129242177745829, - "grad_norm": 1.9703245162963867, - "learning_rate": 1.225969737394579e-05, - "loss": 0.7064, + "epoch": 0.38416050686378034, + "grad_norm": 2.2531683444976807, + "learning_rate": 9.879102093673972e-06, + "loss": 0.689, "step": 5457 }, { - "epoch": 0.4129998864969165, - "grad_norm": 2.2226948738098145, - "learning_rate": 1.2257597969426555e-05, - "loss": 0.6056, + "epoch": 0.38423090461105247, + "grad_norm": 1.9771634340286255, + "learning_rate": 9.87764716837981e-06, + "loss": 0.6562, "step": 5458 }, { - "epoch": 0.4130755552192501, - "grad_norm": 2.0323541164398193, - "learning_rate": 1.2255498360905998e-05, - "loss": 0.7867, + "epoch": 0.38430130235832455, + "grad_norm": 2.429993152618408, + "learning_rate": 9.876192093474167e-06, + "loss": 0.688, "step": 5459 }, { - "epoch": 0.41315122394158377, - "grad_norm": 1.938133716583252, - "learning_rate": 1.2253398548515604e-05, - "loss": 0.6533, + "epoch": 0.3843717001055966, + "grad_norm": 2.0451362133026123, + "learning_rate": 9.87473686903269e-06, + "loss": 0.7217, "step": 5460 }, { - "epoch": 0.4132268926639174, - "grad_norm": 2.4267141819000244, - "learning_rate": 1.2251298532386874e-05, - "loss": 0.6883, + "epoch": 0.3844420978528687, + "grad_norm": 1.6083104610443115, + "learning_rate": 9.873281495131041e-06, + "loss": 0.6862, "step": 5461 }, { - "epoch": 0.413302561386251, - "grad_norm": 2.085056781768799, - "learning_rate": 1.224919831265131e-05, - "loss": 0.6782, + "epoch": 0.3845124956001408, + "grad_norm": 2.018548011779785, + "learning_rate": 9.871825971844886e-06, + "loss": 0.7826, "step": 5462 }, { - "epoch": 0.4133782301085846, - "grad_norm": 1.7166036367416382, - "learning_rate": 1.2247097889440441e-05, - "loss": 0.8407, + "epoch": 0.38458289334741286, + "grad_norm": 1.798581600189209, + "learning_rate": 9.870370299249897e-06, + "loss": 0.6343, "step": 5463 }, { - "epoch": 0.41345389883091826, - "grad_norm": 1.9741954803466797, - "learning_rate": 1.2244997262885797e-05, - "loss": 0.6178, + "epoch": 0.384653291094685, + "grad_norm": 1.7351197004318237, + "learning_rate": 9.868914477421757e-06, + "loss": 0.6723, "step": 5464 }, { - "epoch": 0.4135295675532519, - "grad_norm": 3.0332493782043457, - "learning_rate": 1.224289643311893e-05, - "loss": 0.5895, + "epoch": 0.38472368884195707, + "grad_norm": 1.4689069986343384, + "learning_rate": 9.867458506436156e-06, + "loss": 0.6523, "step": 5465 }, { - "epoch": 0.4136052362755855, - "grad_norm": 2.2371206283569336, - "learning_rate": 1.2240795400271402e-05, - "loss": 0.6845, + "epoch": 0.38479408658922915, + "grad_norm": 1.7129102945327759, + "learning_rate": 9.866002386368787e-06, + "loss": 0.8188, "step": 5466 }, { - "epoch": 0.4136809049979191, - "grad_norm": 2.590519666671753, - "learning_rate": 1.223869416447478e-05, - "loss": 0.7363, + "epoch": 0.3848644843365012, + "grad_norm": 2.063417911529541, + "learning_rate": 9.86454611729536e-06, + "loss": 0.8082, "step": 5467 }, { - "epoch": 0.4137565737202527, - "grad_norm": 2.10429310798645, - "learning_rate": 1.2236592725860656e-05, - "loss": 0.7608, + "epoch": 0.3849348820837733, + "grad_norm": 1.6520217657089233, + "learning_rate": 9.863089699291587e-06, + "loss": 0.7352, "step": 5468 }, { - "epoch": 0.4138322424425864, - "grad_norm": 2.429518461227417, - "learning_rate": 1.2234491084560629e-05, - "loss": 0.6788, + "epoch": 0.38500527983104543, + "grad_norm": 2.491246223449707, + "learning_rate": 9.861633132433189e-06, + "loss": 0.682, "step": 5469 }, { - "epoch": 0.41390791116492, - "grad_norm": 2.2328531742095947, - "learning_rate": 1.2232389240706306e-05, - "loss": 0.7914, + "epoch": 0.3850756775783175, + "grad_norm": 1.4545092582702637, + "learning_rate": 9.860176416795891e-06, + "loss": 0.6335, "step": 5470 }, { - "epoch": 0.4139835798872536, - "grad_norm": 1.950385570526123, - "learning_rate": 1.2230287194429316e-05, - "loss": 0.777, + "epoch": 0.3851460753255896, + "grad_norm": 1.724002480506897, + "learning_rate": 9.858719552455428e-06, + "loss": 0.617, "step": 5471 }, { - "epoch": 0.4140592486095872, - "grad_norm": 2.1990959644317627, - "learning_rate": 1.2228184945861291e-05, - "loss": 0.8321, + "epoch": 0.38521647307286166, + "grad_norm": 1.8568593263626099, + "learning_rate": 9.85726253948755e-06, + "loss": 0.6157, "step": 5472 }, { - "epoch": 0.41413491733192087, - "grad_norm": 2.1966259479522705, - "learning_rate": 1.2226082495133886e-05, - "loss": 0.644, + "epoch": 0.38528687082013374, + "grad_norm": 1.8755921125411987, + "learning_rate": 9.855805377968004e-06, + "loss": 0.681, "step": 5473 }, { - "epoch": 0.4142105860542545, - "grad_norm": 2.390727996826172, - "learning_rate": 1.2223979842378756e-05, - "loss": 0.6661, + "epoch": 0.3853572685674058, + "grad_norm": 1.6114517450332642, + "learning_rate": 9.85434806797255e-06, + "loss": 0.6811, "step": 5474 }, { - "epoch": 0.4142862547765881, - "grad_norm": 2.415733575820923, - "learning_rate": 1.2221876987727586e-05, - "loss": 0.7288, + "epoch": 0.38542766631467795, + "grad_norm": 2.003673553466797, + "learning_rate": 9.852890609576957e-06, + "loss": 0.817, "step": 5475 }, { - "epoch": 0.4143619234989217, - "grad_norm": 2.3557534217834473, - "learning_rate": 1.2219773931312057e-05, - "loss": 0.6913, + "epoch": 0.38549806406195003, + "grad_norm": 1.8812379837036133, + "learning_rate": 9.851433002856992e-06, + "loss": 0.7006, "step": 5476 }, { - "epoch": 0.41443759222125537, - "grad_norm": 2.2471041679382324, - "learning_rate": 1.221767067326387e-05, - "loss": 0.7311, + "epoch": 0.3855684618092221, + "grad_norm": 2.1441457271575928, + "learning_rate": 9.849975247888446e-06, + "loss": 0.8069, "step": 5477 }, { - "epoch": 0.414513260943589, - "grad_norm": 1.8936131000518799, - "learning_rate": 1.221556721371474e-05, - "loss": 0.6176, + "epoch": 0.3856388595564942, + "grad_norm": 1.782468318939209, + "learning_rate": 9.848517344747108e-06, + "loss": 0.7295, "step": 5478 }, { - "epoch": 0.4145889296659226, - "grad_norm": 1.792964220046997, - "learning_rate": 1.2213463552796388e-05, - "loss": 0.6454, + "epoch": 0.38570925730376626, + "grad_norm": 1.771806240081787, + "learning_rate": 9.84705929350877e-06, + "loss": 0.6455, "step": 5479 }, { - "epoch": 0.4146645983882562, - "grad_norm": 2.177844762802124, - "learning_rate": 1.2211359690640556e-05, - "loss": 0.8097, + "epoch": 0.38577965505103834, + "grad_norm": 2.2588231563568115, + "learning_rate": 9.845601094249241e-06, + "loss": 0.7738, "step": 5480 }, { - "epoch": 0.4147402671105898, - "grad_norm": 1.9635275602340698, - "learning_rate": 1.2209255627378992e-05, - "loss": 0.7149, + "epoch": 0.38585005279831047, + "grad_norm": 1.7788875102996826, + "learning_rate": 9.844142747044336e-06, + "loss": 0.6965, "step": 5481 }, { - "epoch": 0.4148159358329235, - "grad_norm": 1.988793969154358, - "learning_rate": 1.2207151363143462e-05, - "loss": 0.7471, + "epoch": 0.38592045054558255, + "grad_norm": 1.8683902025222778, + "learning_rate": 9.842684251969872e-06, + "loss": 0.7494, "step": 5482 }, { - "epoch": 0.4148916045552571, - "grad_norm": 2.298090696334839, - "learning_rate": 1.220504689806574e-05, - "loss": 0.8539, + "epoch": 0.3859908482928546, + "grad_norm": 1.7153949737548828, + "learning_rate": 9.841225609101682e-06, + "loss": 0.608, "step": 5483 }, { - "epoch": 0.4149672732775907, - "grad_norm": 1.7271684408187866, - "learning_rate": 1.2202942232277616e-05, - "loss": 0.8253, + "epoch": 0.3860612460401267, + "grad_norm": 1.6623295545578003, + "learning_rate": 9.839766818515596e-06, + "loss": 0.7013, "step": 5484 }, { - "epoch": 0.4150429419999243, - "grad_norm": 1.700923204421997, - "learning_rate": 1.2200837365910887e-05, - "loss": 0.7333, + "epoch": 0.3861316437873988, + "grad_norm": 1.794837474822998, + "learning_rate": 9.838307880287464e-06, + "loss": 0.7167, "step": 5485 }, { - "epoch": 0.415118610722258, - "grad_norm": 2.043684482574463, - "learning_rate": 1.2198732299097373e-05, - "loss": 0.7908, + "epoch": 0.3862020415346709, + "grad_norm": 1.9074307680130005, + "learning_rate": 9.836848794493133e-06, + "loss": 0.8325, "step": 5486 }, { - "epoch": 0.4151942794445916, - "grad_norm": 1.729766845703125, - "learning_rate": 1.2196627031968894e-05, - "loss": 0.691, + "epoch": 0.386272439281943, + "grad_norm": 1.783678650856018, + "learning_rate": 9.835389561208466e-06, + "loss": 0.8131, "step": 5487 }, { - "epoch": 0.4152699481669252, - "grad_norm": 2.202939033508301, - "learning_rate": 1.2194521564657293e-05, - "loss": 0.8334, + "epoch": 0.38634283702921507, + "grad_norm": 1.8474059104919434, + "learning_rate": 9.833930180509324e-06, + "loss": 0.6207, "step": 5488 }, { - "epoch": 0.4153456168892588, - "grad_norm": 1.9363715648651123, - "learning_rate": 1.2192415897294418e-05, - "loss": 0.7155, + "epoch": 0.38641323477648715, + "grad_norm": 1.6404483318328857, + "learning_rate": 9.832470652471589e-06, + "loss": 0.7315, "step": 5489 }, { - "epoch": 0.4154212856115925, - "grad_norm": 2.62788724899292, - "learning_rate": 1.2190310030012132e-05, - "loss": 0.6351, + "epoch": 0.3864836325237592, + "grad_norm": 1.7419829368591309, + "learning_rate": 9.831010977171139e-06, + "loss": 0.7266, "step": 5490 }, { - "epoch": 0.4154969543339261, - "grad_norm": 1.856323480606079, - "learning_rate": 1.2188203962942318e-05, - "loss": 0.7471, + "epoch": 0.3865540302710313, + "grad_norm": 1.8262670040130615, + "learning_rate": 9.829551154683862e-06, + "loss": 0.7078, "step": 5491 }, { - "epoch": 0.4155726230562597, - "grad_norm": 2.280324935913086, - "learning_rate": 1.2186097696216856e-05, - "loss": 0.6655, + "epoch": 0.38662442801830343, + "grad_norm": 1.7427153587341309, + "learning_rate": 9.828091185085661e-06, + "loss": 0.7214, "step": 5492 }, { - "epoch": 0.4156482917785933, - "grad_norm": 21.438453674316406, - "learning_rate": 1.2183991229967652e-05, - "loss": 0.615, + "epoch": 0.3866948257655755, + "grad_norm": 2.2244904041290283, + "learning_rate": 9.826631068452436e-06, + "loss": 0.8052, "step": 5493 }, { - "epoch": 0.4157239605009269, - "grad_norm": 2.4099819660186768, - "learning_rate": 1.2181884564326618e-05, - "loss": 0.7488, + "epoch": 0.3867652235128476, + "grad_norm": 2.169015884399414, + "learning_rate": 9.825170804860102e-06, + "loss": 0.6946, "step": 5494 }, { - "epoch": 0.4157996292232606, - "grad_norm": 2.0276858806610107, - "learning_rate": 1.2179777699425683e-05, - "loss": 0.7315, + "epoch": 0.38683562126011967, + "grad_norm": 1.687139868736267, + "learning_rate": 9.82371039438458e-06, + "loss": 0.7275, "step": 5495 }, { - "epoch": 0.4158752979455942, - "grad_norm": 2.4924838542938232, - "learning_rate": 1.2177670635396786e-05, - "loss": 0.6686, + "epoch": 0.38690601900739174, + "grad_norm": 1.883980631828308, + "learning_rate": 9.822249837101797e-06, + "loss": 0.6597, "step": 5496 }, { - "epoch": 0.4159509666679278, - "grad_norm": 2.914191961288452, - "learning_rate": 1.2175563372371872e-05, - "loss": 0.9143, + "epoch": 0.3869764167546639, + "grad_norm": 1.8745591640472412, + "learning_rate": 9.82078913308769e-06, + "loss": 0.8758, "step": 5497 }, { - "epoch": 0.4160266353902614, - "grad_norm": 3.372140884399414, - "learning_rate": 1.217345591048291e-05, - "loss": 0.6866, + "epoch": 0.38704681450193595, + "grad_norm": 1.921337604522705, + "learning_rate": 9.8193282824182e-06, + "loss": 0.8123, "step": 5498 }, { - "epoch": 0.4161023041125951, - "grad_norm": 3.612203598022461, - "learning_rate": 1.2171348249861874e-05, - "loss": 0.7108, + "epoch": 0.38711721224920803, + "grad_norm": 1.8899238109588623, + "learning_rate": 9.81786728516928e-06, + "loss": 0.7704, "step": 5499 }, { - "epoch": 0.4161779728349287, - "grad_norm": 1.8624509572982788, - "learning_rate": 1.2169240390640753e-05, - "loss": 0.8142, + "epoch": 0.3871876099964801, + "grad_norm": 1.7792056798934937, + "learning_rate": 9.816406141416887e-06, + "loss": 0.6385, "step": 5500 }, { - "epoch": 0.4162536415572623, - "grad_norm": 2.179865837097168, - "learning_rate": 1.216713233295155e-05, - "loss": 0.7727, + "epoch": 0.3872580077437522, + "grad_norm": 1.7850033044815063, + "learning_rate": 9.81494485123699e-06, + "loss": 0.7413, "step": 5501 }, { - "epoch": 0.4163293102795959, - "grad_norm": 2.4808688163757324, - "learning_rate": 1.2165024076926276e-05, - "loss": 0.7229, + "epoch": 0.38732840549102426, + "grad_norm": 2.668912887573242, + "learning_rate": 9.813483414705558e-06, + "loss": 0.7166, "step": 5502 }, { - "epoch": 0.4164049790019296, - "grad_norm": 1.9209163188934326, - "learning_rate": 1.2162915622696955e-05, - "loss": 0.6675, + "epoch": 0.3873988032382964, + "grad_norm": 2.1849663257598877, + "learning_rate": 9.812021831898575e-06, + "loss": 0.7033, "step": 5503 }, { - "epoch": 0.4164806477242632, - "grad_norm": 2.1031787395477295, - "learning_rate": 1.216080697039563e-05, - "loss": 0.7635, + "epoch": 0.3874692009855685, + "grad_norm": 1.7751823663711548, + "learning_rate": 9.810560102892034e-06, + "loss": 0.7403, "step": 5504 }, { - "epoch": 0.4165563164465968, - "grad_norm": 2.6245055198669434, - "learning_rate": 1.215869812015435e-05, - "loss": 0.7576, + "epoch": 0.38753959873284055, + "grad_norm": 1.7217925786972046, + "learning_rate": 9.809098227761925e-06, + "loss": 0.776, "step": 5505 }, { - "epoch": 0.4166319851689304, - "grad_norm": 1.9226709604263306, - "learning_rate": 1.2156589072105175e-05, - "loss": 0.7822, + "epoch": 0.38760999648011263, + "grad_norm": 1.7495076656341553, + "learning_rate": 9.807636206584254e-06, + "loss": 0.7212, "step": 5506 }, { - "epoch": 0.4167076538912641, - "grad_norm": 2.297623872756958, - "learning_rate": 1.2154479826380185e-05, - "loss": 0.8283, + "epoch": 0.3876803942273847, + "grad_norm": 2.0366933345794678, + "learning_rate": 9.806174039435037e-06, + "loss": 0.7803, "step": 5507 }, { - "epoch": 0.4167833226135977, - "grad_norm": 2.166672706604004, - "learning_rate": 1.215237038311146e-05, - "loss": 0.8331, + "epoch": 0.3877507919746568, + "grad_norm": 1.7918260097503662, + "learning_rate": 9.804711726390288e-06, + "loss": 0.7786, "step": 5508 }, { - "epoch": 0.4168589913359313, - "grad_norm": 1.8679463863372803, - "learning_rate": 1.215026074243111e-05, - "loss": 0.749, + "epoch": 0.3878211897219289, + "grad_norm": 1.8992435932159424, + "learning_rate": 9.803249267526038e-06, + "loss": 0.7378, "step": 5509 }, { - "epoch": 0.4169346600582649, - "grad_norm": 2.0886306762695312, - "learning_rate": 1.2148150904471246e-05, - "loss": 0.6835, + "epoch": 0.387891587469201, + "grad_norm": 2.0526819229125977, + "learning_rate": 9.801786662918318e-06, + "loss": 0.7413, "step": 5510 }, { - "epoch": 0.4170103287805985, - "grad_norm": 1.7681407928466797, - "learning_rate": 1.2146040869363986e-05, - "loss": 0.768, + "epoch": 0.38796198521647307, + "grad_norm": 1.7563878297805786, + "learning_rate": 9.800323912643172e-06, + "loss": 0.7197, "step": 5511 }, { - "epoch": 0.4170859975029322, - "grad_norm": 2.234034538269043, - "learning_rate": 1.2143930637241473e-05, - "loss": 0.7622, + "epoch": 0.38803238296374515, + "grad_norm": 1.7936469316482544, + "learning_rate": 9.79886101677665e-06, + "loss": 0.7415, "step": 5512 }, { - "epoch": 0.4171616662252658, - "grad_norm": 2.314732789993286, - "learning_rate": 1.2141820208235851e-05, - "loss": 0.778, + "epoch": 0.3881027807110172, + "grad_norm": 2.1601929664611816, + "learning_rate": 9.797397975394811e-06, + "loss": 0.6949, "step": 5513 }, { - "epoch": 0.4172373349475994, - "grad_norm": 2.147493362426758, - "learning_rate": 1.213970958247929e-05, - "loss": 0.644, + "epoch": 0.38817317845828936, + "grad_norm": 1.9066649675369263, + "learning_rate": 9.795934788573717e-06, + "loss": 0.6963, "step": 5514 }, { - "epoch": 0.417313003669933, - "grad_norm": 2.0052413940429688, - "learning_rate": 1.2137598760103958e-05, - "loss": 0.8898, + "epoch": 0.38824357620556144, + "grad_norm": 1.7406415939331055, + "learning_rate": 9.79447145638944e-06, + "loss": 0.611, "step": 5515 }, { - "epoch": 0.4173886723922667, - "grad_norm": 2.1926968097686768, - "learning_rate": 1.2135487741242043e-05, - "loss": 0.8162, + "epoch": 0.3883139739528335, + "grad_norm": 1.5542773008346558, + "learning_rate": 9.79300797891806e-06, + "loss": 0.6599, "step": 5516 }, { - "epoch": 0.4174643411146003, - "grad_norm": 2.2886886596679688, - "learning_rate": 1.2133376526025745e-05, - "loss": 0.6808, + "epoch": 0.3883843717001056, + "grad_norm": 1.883857250213623, + "learning_rate": 9.791544356235667e-06, + "loss": 0.8528, "step": 5517 }, { - "epoch": 0.4175400098369339, - "grad_norm": 2.391803503036499, - "learning_rate": 1.2131265114587274e-05, - "loss": 0.7002, + "epoch": 0.38845476944737767, + "grad_norm": 1.9469116926193237, + "learning_rate": 9.790080588418355e-06, + "loss": 0.7981, "step": 5518 }, { - "epoch": 0.4176156785592675, - "grad_norm": 2.0181946754455566, - "learning_rate": 1.2129153507058856e-05, - "loss": 0.7994, + "epoch": 0.38852516719464975, + "grad_norm": 1.4750159978866577, + "learning_rate": 9.788616675542227e-06, + "loss": 0.8376, "step": 5519 }, { - "epoch": 0.4176913472816012, - "grad_norm": 2.0882043838500977, - "learning_rate": 1.2127041703572722e-05, - "loss": 0.6383, + "epoch": 0.3885955649419219, + "grad_norm": 2.733816385269165, + "learning_rate": 9.787152617683392e-06, + "loss": 0.7147, "step": 5520 }, { - "epoch": 0.4177670160039348, - "grad_norm": 1.928208351135254, - "learning_rate": 1.2124929704261123e-05, - "loss": 0.745, + "epoch": 0.38866596268919396, + "grad_norm": 2.1390366554260254, + "learning_rate": 9.785688414917967e-06, + "loss": 0.7528, "step": 5521 }, { - "epoch": 0.4178426847262684, - "grad_norm": 2.641408681869507, - "learning_rate": 1.212281750925632e-05, - "loss": 1.0537, + "epoch": 0.38873636043646603, + "grad_norm": 1.5453308820724487, + "learning_rate": 9.78422406732208e-06, + "loss": 0.8661, "step": 5522 }, { - "epoch": 0.417918353448602, - "grad_norm": 2.605942726135254, - "learning_rate": 1.2120705118690581e-05, - "loss": 0.6757, + "epoch": 0.3888067581837381, + "grad_norm": 1.8129229545593262, + "learning_rate": 9.782759574971862e-06, + "loss": 0.7151, "step": 5523 }, { - "epoch": 0.4179940221709356, - "grad_norm": 2.2262070178985596, - "learning_rate": 1.2118592532696196e-05, - "loss": 0.8022, + "epoch": 0.3888771559310102, + "grad_norm": 1.7061574459075928, + "learning_rate": 9.781294937943453e-06, + "loss": 0.7256, "step": 5524 }, { - "epoch": 0.4180696908932693, - "grad_norm": 2.1038734912872314, - "learning_rate": 1.2116479751405461e-05, - "loss": 0.6194, + "epoch": 0.3889475536782823, + "grad_norm": 1.8579649925231934, + "learning_rate": 9.779830156313e-06, + "loss": 0.6689, "step": 5525 }, { - "epoch": 0.4181453596156029, - "grad_norm": 1.9260424375534058, - "learning_rate": 1.2114366774950681e-05, - "loss": 0.6886, + "epoch": 0.3890179514255544, + "grad_norm": 2.1256520748138428, + "learning_rate": 9.778365230156662e-06, + "loss": 0.6553, "step": 5526 }, { - "epoch": 0.4182210283379365, - "grad_norm": 1.8009731769561768, - "learning_rate": 1.2112253603464182e-05, - "loss": 0.5855, + "epoch": 0.3890883491728265, + "grad_norm": 1.482027530670166, + "learning_rate": 9.776900159550598e-06, + "loss": 0.7098, "step": 5527 }, { - "epoch": 0.4182966970602701, - "grad_norm": 4.206608772277832, - "learning_rate": 1.2110140237078297e-05, - "loss": 0.8015, + "epoch": 0.38915874692009855, + "grad_norm": 1.990539312362671, + "learning_rate": 9.77543494457098e-06, + "loss": 0.7583, "step": 5528 }, { - "epoch": 0.4183723657826038, - "grad_norm": 2.875774621963501, - "learning_rate": 1.2108026675925371e-05, - "loss": 0.7709, + "epoch": 0.38922914466737063, + "grad_norm": 1.6218023300170898, + "learning_rate": 9.773969585293988e-06, + "loss": 0.5983, "step": 5529 }, { - "epoch": 0.4184480345049374, - "grad_norm": 2.3251543045043945, - "learning_rate": 1.2105912920137762e-05, - "loss": 0.7194, + "epoch": 0.3892995424146427, + "grad_norm": 1.649764060974121, + "learning_rate": 9.772504081795801e-06, + "loss": 0.6374, "step": 5530 }, { - "epoch": 0.418523703227271, - "grad_norm": 1.7996389865875244, - "learning_rate": 1.2103798969847836e-05, - "loss": 0.805, + "epoch": 0.38936994016191484, + "grad_norm": 1.7175627946853638, + "learning_rate": 9.771038434152618e-06, + "loss": 0.7487, "step": 5531 }, { - "epoch": 0.4185993719496046, - "grad_norm": 2.323073148727417, - "learning_rate": 1.2101684825187985e-05, - "loss": 0.7145, + "epoch": 0.3894403379091869, + "grad_norm": 1.8458620309829712, + "learning_rate": 9.769572642440637e-06, + "loss": 0.6347, "step": 5532 }, { - "epoch": 0.4186750406719383, - "grad_norm": 3.068136692047119, - "learning_rate": 1.2099570486290597e-05, - "loss": 0.8114, + "epoch": 0.389510735656459, + "grad_norm": 1.7708427906036377, + "learning_rate": 9.768106706736065e-06, + "loss": 0.8158, "step": 5533 }, { - "epoch": 0.4187507093942719, - "grad_norm": 2.661367416381836, - "learning_rate": 1.209745595328808e-05, - "loss": 0.7873, + "epoch": 0.3895811334037311, + "grad_norm": 1.5983657836914062, + "learning_rate": 9.76664062711512e-06, + "loss": 0.5886, "step": 5534 }, { - "epoch": 0.4188263781166055, - "grad_norm": 1.9999775886535645, - "learning_rate": 1.2095341226312853e-05, - "loss": 0.7032, + "epoch": 0.38965153115100315, + "grad_norm": 1.7792373895645142, + "learning_rate": 9.765174403654022e-06, + "loss": 0.6259, "step": 5535 }, { - "epoch": 0.4189020468389391, - "grad_norm": 2.4388561248779297, - "learning_rate": 1.2093226305497341e-05, - "loss": 0.9638, + "epoch": 0.3897219288982753, + "grad_norm": 1.7615834474563599, + "learning_rate": 9.763708036429003e-06, + "loss": 0.6613, "step": 5536 }, { - "epoch": 0.4189777155612727, - "grad_norm": 2.280811309814453, - "learning_rate": 1.2091111190974e-05, - "loss": 0.7426, + "epoch": 0.38979232664554736, + "grad_norm": 1.7922098636627197, + "learning_rate": 9.762241525516301e-06, + "loss": 0.7671, "step": 5537 }, { - "epoch": 0.4190533842836064, - "grad_norm": 2.1886045932769775, - "learning_rate": 1.2088995882875275e-05, - "loss": 0.7784, + "epoch": 0.38986272439281944, + "grad_norm": 1.7660354375839233, + "learning_rate": 9.760774870992159e-06, + "loss": 0.6645, "step": 5538 }, { - "epoch": 0.41912905300594, - "grad_norm": 2.459237813949585, - "learning_rate": 1.208688038133364e-05, - "loss": 0.8087, + "epoch": 0.3899331221400915, + "grad_norm": 1.6601006984710693, + "learning_rate": 9.759308072932832e-06, + "loss": 0.7733, "step": 5539 }, { - "epoch": 0.4192047217282736, - "grad_norm": 2.0118658542633057, - "learning_rate": 1.2084764686481569e-05, - "loss": 0.7171, + "epoch": 0.3900035198873636, + "grad_norm": 1.7215995788574219, + "learning_rate": 9.75784113141458e-06, + "loss": 0.7263, "step": 5540 }, { - "epoch": 0.4192803904506072, - "grad_norm": 2.3412704467773438, - "learning_rate": 1.2082648798451555e-05, - "loss": 0.7725, + "epoch": 0.39007391763463567, + "grad_norm": 2.0790717601776123, + "learning_rate": 9.756374046513668e-06, + "loss": 0.7428, "step": 5541 }, { - "epoch": 0.4193560591729409, - "grad_norm": 1.796249270439148, - "learning_rate": 1.2080532717376106e-05, - "loss": 0.7044, + "epoch": 0.3901443153819078, + "grad_norm": 1.9518482685089111, + "learning_rate": 9.754906818306374e-06, + "loss": 0.6418, "step": 5542 }, { - "epoch": 0.4194317278952745, - "grad_norm": 2.0164694786071777, - "learning_rate": 1.2078416443387731e-05, - "loss": 0.7137, + "epoch": 0.3902147131291799, + "grad_norm": 1.867910623550415, + "learning_rate": 9.753439446868981e-06, + "loss": 0.6915, "step": 5543 }, { - "epoch": 0.4195073966176081, - "grad_norm": 1.7637386322021484, - "learning_rate": 1.2076299976618965e-05, - "loss": 0.7083, + "epoch": 0.39028511087645196, + "grad_norm": 1.420827865600586, + "learning_rate": 9.751971932277777e-06, + "loss": 0.62, "step": 5544 }, { - "epoch": 0.4195830653399417, - "grad_norm": 1.95462167263031, - "learning_rate": 1.207418331720234e-05, - "loss": 0.7147, + "epoch": 0.39035550862372403, + "grad_norm": 1.7914464473724365, + "learning_rate": 9.750504274609057e-06, + "loss": 0.7767, "step": 5545 }, { - "epoch": 0.4196587340622754, - "grad_norm": 1.7692989110946655, - "learning_rate": 1.2072066465270415e-05, - "loss": 0.7749, + "epoch": 0.3904259063709961, + "grad_norm": 1.8050329685211182, + "learning_rate": 9.749036473939131e-06, + "loss": 0.748, "step": 5546 }, { - "epoch": 0.419734402784609, - "grad_norm": 1.8411818742752075, - "learning_rate": 1.2069949420955753e-05, - "loss": 0.6869, + "epoch": 0.3904963041182682, + "grad_norm": 1.7995966672897339, + "learning_rate": 9.747568530344308e-06, + "loss": 0.6775, "step": 5547 }, { - "epoch": 0.4198100715069426, - "grad_norm": 1.8122678995132446, - "learning_rate": 1.2067832184390928e-05, - "loss": 0.7162, + "epoch": 0.3905667018655403, + "grad_norm": 1.7150462865829468, + "learning_rate": 9.74610044390091e-06, + "loss": 0.5979, "step": 5548 }, { - "epoch": 0.4198857402292762, - "grad_norm": 1.7828391790390015, - "learning_rate": 1.206571475570853e-05, - "loss": 0.6865, + "epoch": 0.3906370996128124, + "grad_norm": 1.7092957496643066, + "learning_rate": 9.744632214685263e-06, + "loss": 0.6112, "step": 5549 }, { - "epoch": 0.4199614089516098, - "grad_norm": 2.397252082824707, - "learning_rate": 1.2063597135041156e-05, - "loss": 0.658, + "epoch": 0.3907074973600845, + "grad_norm": 1.7161226272583008, + "learning_rate": 9.743163842773702e-06, + "loss": 0.7685, "step": 5550 }, { - "epoch": 0.4200370776739435, - "grad_norm": 2.136765956878662, - "learning_rate": 1.2061479322521422e-05, - "loss": 0.935, + "epoch": 0.39077789510735655, + "grad_norm": 1.8535044193267822, + "learning_rate": 9.741695328242565e-06, + "loss": 0.6719, "step": 5551 }, { - "epoch": 0.4201127463962771, - "grad_norm": 1.9939488172531128, - "learning_rate": 1.2059361318281949e-05, - "loss": 0.7466, + "epoch": 0.39084829285462863, + "grad_norm": 1.989148497581482, + "learning_rate": 9.74022667116821e-06, + "loss": 0.6362, "step": 5552 }, { - "epoch": 0.4201884151186107, - "grad_norm": 2.698948860168457, - "learning_rate": 1.2057243122455378e-05, - "loss": 0.6457, + "epoch": 0.39091869060190076, + "grad_norm": 1.5626888275146484, + "learning_rate": 9.738757871626984e-06, + "loss": 0.6967, "step": 5553 }, { - "epoch": 0.4202640838409443, - "grad_norm": 18.516450881958008, - "learning_rate": 1.2055124735174352e-05, - "loss": 0.5688, + "epoch": 0.39098908834917284, + "grad_norm": 1.6351114511489868, + "learning_rate": 9.73728892969526e-06, + "loss": 0.7358, "step": 5554 }, { - "epoch": 0.420339752563278, - "grad_norm": 2.335066556930542, - "learning_rate": 1.2053006156571534e-05, - "loss": 0.7693, + "epoch": 0.3910594860964449, + "grad_norm": 1.9982280731201172, + "learning_rate": 9.735819845449403e-06, + "loss": 0.6404, "step": 5555 }, { - "epoch": 0.4204154212856116, - "grad_norm": 2.284088373184204, - "learning_rate": 1.2050887386779595e-05, - "loss": 0.6985, + "epoch": 0.391129883843717, + "grad_norm": 1.8469774723052979, + "learning_rate": 9.734350618965793e-06, + "loss": 0.7676, "step": 5556 }, { - "epoch": 0.4204910900079452, - "grad_norm": 1.7979247570037842, - "learning_rate": 1.2048768425931222e-05, - "loss": 0.8248, + "epoch": 0.3912002815909891, + "grad_norm": 2.013108253479004, + "learning_rate": 9.73288125032082e-06, + "loss": 0.7444, "step": 5557 }, { - "epoch": 0.4205667587302788, - "grad_norm": 1.9598959684371948, - "learning_rate": 1.204664927415911e-05, - "loss": 0.8005, + "epoch": 0.39127067933826115, + "grad_norm": 2.588761568069458, + "learning_rate": 9.731411739590877e-06, + "loss": 0.7033, "step": 5558 }, { - "epoch": 0.4206424274526125, - "grad_norm": 1.7757333517074585, - "learning_rate": 1.2044529931595964e-05, - "loss": 0.6955, + "epoch": 0.3913410770855333, + "grad_norm": 1.910742998123169, + "learning_rate": 9.729942086852363e-06, + "loss": 0.8642, "step": 5559 }, { - "epoch": 0.4207180961749461, - "grad_norm": 2.177375078201294, - "learning_rate": 1.2042410398374509e-05, - "loss": 0.6263, + "epoch": 0.39141147483280536, + "grad_norm": 1.99662446975708, + "learning_rate": 9.728472292181686e-06, + "loss": 0.7718, "step": 5560 }, { - "epoch": 0.4207937648972797, - "grad_norm": 2.13222336769104, - "learning_rate": 1.2040290674627471e-05, - "loss": 0.7584, + "epoch": 0.39148187258007744, + "grad_norm": 1.6677974462509155, + "learning_rate": 9.727002355655266e-06, + "loss": 0.6839, "step": 5561 }, { - "epoch": 0.4208694336196133, - "grad_norm": 1.8539749383926392, - "learning_rate": 1.20381707604876e-05, - "loss": 0.7735, + "epoch": 0.3915522703273495, + "grad_norm": 2.004951000213623, + "learning_rate": 9.725532277349522e-06, + "loss": 0.7197, "step": 5562 }, { - "epoch": 0.42094510234194693, - "grad_norm": 2.649493455886841, - "learning_rate": 1.2036050656087648e-05, - "loss": 0.9243, + "epoch": 0.3916226680746216, + "grad_norm": 2.211705207824707, + "learning_rate": 9.724062057340885e-06, + "loss": 0.8211, "step": 5563 }, { - "epoch": 0.4210207710642806, - "grad_norm": 2.361145257949829, - "learning_rate": 1.2033930361560386e-05, - "loss": 0.6677, + "epoch": 0.3916930658218937, + "grad_norm": 1.7051342725753784, + "learning_rate": 9.722591695705798e-06, + "loss": 0.6778, "step": 5564 }, { - "epoch": 0.4210964397866142, - "grad_norm": 1.8541384935379028, - "learning_rate": 1.2031809877038592e-05, - "loss": 0.9055, + "epoch": 0.3917634635691658, + "grad_norm": 1.768874168395996, + "learning_rate": 9.7211211925207e-06, + "loss": 0.757, "step": 5565 }, { - "epoch": 0.4211721085089478, - "grad_norm": 2.4618043899536133, - "learning_rate": 1.2029689202655054e-05, - "loss": 0.8678, + "epoch": 0.3918338613164379, + "grad_norm": 1.9665753841400146, + "learning_rate": 9.71965054786205e-06, + "loss": 0.7929, "step": 5566 }, { - "epoch": 0.42124777723128143, - "grad_norm": 2.1291258335113525, - "learning_rate": 1.2027568338542583e-05, - "loss": 0.7327, + "epoch": 0.39190425906370996, + "grad_norm": 1.7178375720977783, + "learning_rate": 9.718179761806304e-06, + "loss": 0.7465, "step": 5567 }, { - "epoch": 0.4213234459536151, - "grad_norm": 2.079526424407959, - "learning_rate": 1.2025447284833987e-05, - "loss": 0.7069, + "epoch": 0.39197465681098204, + "grad_norm": 1.9417741298675537, + "learning_rate": 9.716708834429929e-06, + "loss": 0.8173, "step": 5568 }, { - "epoch": 0.4213991146759487, - "grad_norm": 1.8269448280334473, - "learning_rate": 1.2023326041662096e-05, - "loss": 0.7895, + "epoch": 0.3920450545582541, + "grad_norm": 1.5716177225112915, + "learning_rate": 9.715237765809402e-06, + "loss": 0.5334, "step": 5569 }, { - "epoch": 0.4214747833982823, - "grad_norm": 1.9055373668670654, - "learning_rate": 1.2021204609159753e-05, - "loss": 0.5952, + "epoch": 0.39211545230552625, + "grad_norm": 1.8033146858215332, + "learning_rate": 9.713766556021204e-06, + "loss": 0.7228, "step": 5570 }, { - "epoch": 0.4215504521206159, - "grad_norm": 2.0945003032684326, - "learning_rate": 1.2019082987459806e-05, - "loss": 0.7579, + "epoch": 0.3921858500527983, + "grad_norm": 1.8960916996002197, + "learning_rate": 9.712295205141827e-06, + "loss": 0.7081, "step": 5571 }, { - "epoch": 0.4216261208429496, - "grad_norm": 1.7482681274414062, - "learning_rate": 1.2016961176695113e-05, - "loss": 0.6244, + "epoch": 0.3922562478000704, + "grad_norm": 1.9240418672561646, + "learning_rate": 9.710823713247762e-06, + "loss": 0.623, "step": 5572 }, { - "epoch": 0.4217017895652832, - "grad_norm": 2.3275108337402344, - "learning_rate": 1.2014839176998557e-05, - "loss": 0.6763, + "epoch": 0.3923266455473425, + "grad_norm": 1.6313966512680054, + "learning_rate": 9.709352080415522e-06, + "loss": 0.7339, "step": 5573 }, { - "epoch": 0.4217774582876168, - "grad_norm": 1.9964745044708252, - "learning_rate": 1.2012716988503021e-05, - "loss": 0.8104, + "epoch": 0.39239704329461456, + "grad_norm": 1.417414903640747, + "learning_rate": 9.70788030672161e-06, + "loss": 0.762, "step": 5574 }, { - "epoch": 0.4218531270099504, - "grad_norm": 1.7609212398529053, - "learning_rate": 1.20105946113414e-05, - "loss": 0.7107, + "epoch": 0.39246744104188663, + "grad_norm": 1.618254542350769, + "learning_rate": 9.706408392242548e-06, + "loss": 0.6462, "step": 5575 }, { - "epoch": 0.42192879573228403, - "grad_norm": 2.2987399101257324, - "learning_rate": 1.200847204564661e-05, - "loss": 0.7246, + "epoch": 0.39253783878915877, + "grad_norm": 1.755558967590332, + "learning_rate": 9.704936337054865e-06, + "loss": 0.7319, "step": 5576 }, { - "epoch": 0.4220044644546177, - "grad_norm": 1.8618190288543701, - "learning_rate": 1.2006349291551564e-05, - "loss": 0.6966, + "epoch": 0.39260823653643084, + "grad_norm": 1.869261622428894, + "learning_rate": 9.703464141235091e-06, + "loss": 0.7422, "step": 5577 }, { - "epoch": 0.4220801331769513, - "grad_norm": 1.7116061449050903, - "learning_rate": 1.2004226349189208e-05, - "loss": 0.8733, + "epoch": 0.3926786342837029, + "grad_norm": 1.6230076551437378, + "learning_rate": 9.70199180485977e-06, + "loss": 0.6059, "step": 5578 }, { - "epoch": 0.4221558018992849, - "grad_norm": 2.474656105041504, - "learning_rate": 1.2002103218692479e-05, - "loss": 0.8025, + "epoch": 0.392749032030975, + "grad_norm": 1.996168613433838, + "learning_rate": 9.700519328005445e-06, + "loss": 0.8045, "step": 5579 }, { - "epoch": 0.42223147062161853, - "grad_norm": 1.4539145231246948, - "learning_rate": 1.1999979900194335e-05, - "loss": 0.8741, + "epoch": 0.3928194297782471, + "grad_norm": 1.3776401281356812, + "learning_rate": 9.699046710748676e-06, + "loss": 0.6437, "step": 5580 }, { - "epoch": 0.4223071393439522, - "grad_norm": 2.465669631958008, - "learning_rate": 1.1997856393827749e-05, - "loss": 0.7923, + "epoch": 0.3928898275255192, + "grad_norm": 2.1745691299438477, + "learning_rate": 9.697573953166023e-06, + "loss": 0.7866, "step": 5581 }, { - "epoch": 0.4223828080662858, - "grad_norm": 2.1911604404449463, - "learning_rate": 1.1995732699725697e-05, - "loss": 0.7288, + "epoch": 0.3929602252727913, + "grad_norm": 1.9311431646347046, + "learning_rate": 9.696101055334057e-06, + "loss": 0.663, "step": 5582 }, { - "epoch": 0.4224584767886194, - "grad_norm": 1.682003378868103, - "learning_rate": 1.1993608818021176e-05, - "loss": 0.7595, + "epoch": 0.39303062302006336, + "grad_norm": 1.8500245809555054, + "learning_rate": 9.694628017329354e-06, + "loss": 0.6654, "step": 5583 }, { - "epoch": 0.42253414551095303, - "grad_norm": 2.285404920578003, - "learning_rate": 1.1991484748847187e-05, - "loss": 0.6823, + "epoch": 0.39310102076733544, + "grad_norm": 1.7813059091567993, + "learning_rate": 9.693154839228502e-06, + "loss": 0.6812, "step": 5584 }, { - "epoch": 0.4226098142332867, - "grad_norm": 3.933152675628662, - "learning_rate": 1.1989360492336747e-05, - "loss": 0.8576, + "epoch": 0.3931714185146075, + "grad_norm": 2.9597959518432617, + "learning_rate": 9.691681521108093e-06, + "loss": 0.7343, "step": 5585 }, { - "epoch": 0.4226854829556203, - "grad_norm": 2.1262471675872803, - "learning_rate": 1.1987236048622886e-05, - "loss": 0.7692, + "epoch": 0.3932418162618796, + "grad_norm": 1.5973812341690063, + "learning_rate": 9.690208063044723e-06, + "loss": 0.6931, "step": 5586 }, { - "epoch": 0.4227611516779539, - "grad_norm": 2.068648099899292, - "learning_rate": 1.198511141783864e-05, - "loss": 0.885, + "epoch": 0.39331221400915173, + "grad_norm": 1.89991295337677, + "learning_rate": 9.688734465114996e-06, + "loss": 0.6674, "step": 5587 }, { - "epoch": 0.4228368204002875, - "grad_norm": 2.879906177520752, - "learning_rate": 1.1982986600117065e-05, - "loss": 0.744, + "epoch": 0.3933826117564238, + "grad_norm": 1.658571481704712, + "learning_rate": 9.687260727395534e-06, + "loss": 0.7988, "step": 5588 }, { - "epoch": 0.42291248912262114, - "grad_norm": 2.1112852096557617, - "learning_rate": 1.198086159559122e-05, - "loss": 0.6636, + "epoch": 0.3934530095036959, + "grad_norm": 1.7606230974197388, + "learning_rate": 9.685786849962949e-06, + "loss": 0.6838, "step": 5589 }, { - "epoch": 0.4229881578449548, - "grad_norm": 2.5208778381347656, - "learning_rate": 1.1978736404394177e-05, - "loss": 0.7342, + "epoch": 0.39352340725096796, + "grad_norm": 1.803332805633545, + "learning_rate": 9.684312832893874e-06, + "loss": 0.6911, "step": 5590 }, { - "epoch": 0.4230638265672884, - "grad_norm": 2.7024381160736084, - "learning_rate": 1.1976611026659029e-05, - "loss": 0.6204, + "epoch": 0.39359380499824004, + "grad_norm": 1.9671108722686768, + "learning_rate": 9.682838676264946e-06, + "loss": 0.777, "step": 5591 }, { - "epoch": 0.423139495289622, - "grad_norm": 1.8119574785232544, - "learning_rate": 1.1974485462518872e-05, - "loss": 0.7252, + "epoch": 0.39366420274551217, + "grad_norm": 1.8974249362945557, + "learning_rate": 9.681364380152805e-06, + "loss": 0.5824, "step": 5592 }, { - "epoch": 0.42321516401195564, - "grad_norm": 2.16031813621521, - "learning_rate": 1.1972359712106811e-05, - "loss": 0.7198, + "epoch": 0.39373460049278425, + "grad_norm": 1.9445838928222656, + "learning_rate": 9.6798899446341e-06, + "loss": 0.6929, "step": 5593 }, { - "epoch": 0.4232908327342893, - "grad_norm": 1.8880118131637573, - "learning_rate": 1.1970233775555975e-05, - "loss": 0.7329, + "epoch": 0.3938049982400563, + "grad_norm": 2.6364235877990723, + "learning_rate": 9.67841536978549e-06, + "loss": 0.8097, "step": 5594 }, { - "epoch": 0.4233665014566229, - "grad_norm": 1.758371114730835, - "learning_rate": 1.196810765299949e-05, - "loss": 0.7565, + "epoch": 0.3938753959873284, + "grad_norm": 1.664697289466858, + "learning_rate": 9.676940655683639e-06, + "loss": 0.7037, "step": 5595 }, { - "epoch": 0.4234421701789565, - "grad_norm": 2.201699733734131, - "learning_rate": 1.1965981344570504e-05, - "loss": 0.6688, + "epoch": 0.3939457937346005, + "grad_norm": 1.778639554977417, + "learning_rate": 9.675465802405219e-06, + "loss": 0.7337, "step": 5596 }, { - "epoch": 0.42351783890129013, - "grad_norm": 1.9135266542434692, - "learning_rate": 1.1963854850402173e-05, - "loss": 0.7328, + "epoch": 0.39401619148187256, + "grad_norm": 1.7590762376785278, + "learning_rate": 9.673990810026911e-06, + "loss": 0.7568, "step": 5597 }, { - "epoch": 0.4235935076236238, - "grad_norm": 2.1251890659332275, - "learning_rate": 1.1961728170627666e-05, - "loss": 0.7701, + "epoch": 0.3940865892291447, + "grad_norm": 1.8086475133895874, + "learning_rate": 9.672515678625396e-06, + "loss": 0.7885, "step": 5598 }, { - "epoch": 0.4236691763459574, - "grad_norm": 2.2974348068237305, - "learning_rate": 1.1959601305380163e-05, - "loss": 0.6692, + "epoch": 0.39415698697641677, + "grad_norm": 1.7939380407333374, + "learning_rate": 9.671040408277372e-06, + "loss": 0.7134, "step": 5599 }, { - "epoch": 0.423744845068291, - "grad_norm": 2.4548261165618896, - "learning_rate": 1.1957474254792851e-05, - "loss": 0.7951, + "epoch": 0.39422738472368885, + "grad_norm": 1.676728367805481, + "learning_rate": 9.669564999059538e-06, + "loss": 0.7754, "step": 5600 }, { - "epoch": 0.42382051379062463, - "grad_norm": 2.009052276611328, - "learning_rate": 1.195534701899894e-05, - "loss": 0.7192, + "epoch": 0.3942977824709609, + "grad_norm": 2.1590068340301514, + "learning_rate": 9.668089451048602e-06, + "loss": 0.755, "step": 5601 }, { - "epoch": 0.42389618251295824, - "grad_norm": 2.0583083629608154, - "learning_rate": 1.1953219598131634e-05, - "loss": 0.6207, + "epoch": 0.394368180218233, + "grad_norm": 2.7915642261505127, + "learning_rate": 9.66661376432128e-06, + "loss": 0.7517, "step": 5602 }, { - "epoch": 0.4239718512352919, - "grad_norm": 1.8193392753601074, - "learning_rate": 1.1951091992324167e-05, - "loss": 0.7451, + "epoch": 0.3944385779655051, + "grad_norm": 1.783827304840088, + "learning_rate": 9.665137938954294e-06, + "loss": 0.8171, "step": 5603 }, { - "epoch": 0.4240475199576255, - "grad_norm": 2.209012269973755, - "learning_rate": 1.1948964201709775e-05, - "loss": 0.6402, + "epoch": 0.3945089757127772, + "grad_norm": 1.699106216430664, + "learning_rate": 9.66366197502437e-06, + "loss": 0.6758, "step": 5604 }, { - "epoch": 0.42412318867995913, - "grad_norm": 2.069322347640991, - "learning_rate": 1.1946836226421708e-05, - "loss": 0.7, + "epoch": 0.3945793734600493, + "grad_norm": 2.1104142665863037, + "learning_rate": 9.662185872608248e-06, + "loss": 0.7068, "step": 5605 }, { - "epoch": 0.42419885740229274, - "grad_norm": 1.7103984355926514, - "learning_rate": 1.1944708066593225e-05, - "loss": 0.674, + "epoch": 0.39464977120732136, + "grad_norm": 1.766754388809204, + "learning_rate": 9.660709631782673e-06, + "loss": 0.6646, "step": 5606 }, { - "epoch": 0.4242745261246264, - "grad_norm": 2.117616891860962, - "learning_rate": 1.1942579722357596e-05, - "loss": 0.6814, + "epoch": 0.39472016895459344, + "grad_norm": 1.743774175643921, + "learning_rate": 9.659233252624393e-06, + "loss": 0.7426, "step": 5607 }, { - "epoch": 0.42435019484696, - "grad_norm": 4.542725086212158, - "learning_rate": 1.1940451193848108e-05, - "loss": 0.7538, + "epoch": 0.3947905667018655, + "grad_norm": 1.7642467021942139, + "learning_rate": 9.657756735210169e-06, + "loss": 0.7035, "step": 5608 }, { - "epoch": 0.4244258635692936, - "grad_norm": 2.359140157699585, - "learning_rate": 1.1938322481198056e-05, - "loss": 0.6849, + "epoch": 0.39486096444913765, + "grad_norm": 1.8940998315811157, + "learning_rate": 9.656280079616767e-06, + "loss": 0.6844, "step": 5609 }, { - "epoch": 0.42450153229162724, - "grad_norm": 3.171555995941162, - "learning_rate": 1.1936193584540747e-05, - "loss": 0.7442, + "epoch": 0.39493136219640973, + "grad_norm": 1.757473349571228, + "learning_rate": 9.654803285920956e-06, + "loss": 0.637, "step": 5610 }, { - "epoch": 0.4245772010139609, - "grad_norm": 2.4823436737060547, - "learning_rate": 1.19340645040095e-05, - "loss": 0.9193, + "epoch": 0.3950017599436818, + "grad_norm": 2.1561524868011475, + "learning_rate": 9.653326354199516e-06, + "loss": 0.7795, "step": 5611 }, { - "epoch": 0.4246528697362945, - "grad_norm": 1.8430533409118652, - "learning_rate": 1.1931935239737643e-05, - "loss": 0.6416, + "epoch": 0.3950721576909539, + "grad_norm": 1.9163944721221924, + "learning_rate": 9.651849284529238e-06, + "loss": 0.7347, "step": 5612 }, { - "epoch": 0.4247285384586281, - "grad_norm": 2.7708842754364014, - "learning_rate": 1.1929805791858518e-05, - "loss": 0.7639, + "epoch": 0.39514255543822596, + "grad_norm": 1.661069393157959, + "learning_rate": 9.650372076986914e-06, + "loss": 0.7712, "step": 5613 }, { - "epoch": 0.42480420718096173, - "grad_norm": 2.5303053855895996, - "learning_rate": 1.1927676160505476e-05, - "loss": 0.7355, + "epoch": 0.39521295318549804, + "grad_norm": 1.9511587619781494, + "learning_rate": 9.648894731649345e-06, + "loss": 0.7881, "step": 5614 }, { - "epoch": 0.42487987590329535, - "grad_norm": 2.337359666824341, - "learning_rate": 1.1925546345811889e-05, - "loss": 0.8643, + "epoch": 0.39528335093277017, + "grad_norm": 1.7308624982833862, + "learning_rate": 9.647417248593342e-06, + "loss": 0.6616, "step": 5615 }, { - "epoch": 0.424955544625629, - "grad_norm": 2.4965403079986572, - "learning_rate": 1.1923416347911123e-05, - "loss": 0.7514, + "epoch": 0.39535374868004225, + "grad_norm": 1.873694658279419, + "learning_rate": 9.645939627895715e-06, + "loss": 0.7186, "step": 5616 }, { - "epoch": 0.4250312133479626, - "grad_norm": 1.8858367204666138, - "learning_rate": 1.192128616693657e-05, - "loss": 0.6455, + "epoch": 0.3954241464273143, + "grad_norm": 1.836734414100647, + "learning_rate": 9.644461869633291e-06, + "loss": 0.7112, "step": 5617 }, { - "epoch": 0.42510688207029623, - "grad_norm": 1.8443236351013184, - "learning_rate": 1.1919155803021628e-05, - "loss": 0.8063, + "epoch": 0.3954945441745864, + "grad_norm": 1.8547680377960205, + "learning_rate": 9.642983973882901e-06, + "loss": 0.7324, "step": 5618 }, { - "epoch": 0.42518255079262984, - "grad_norm": 1.9932689666748047, - "learning_rate": 1.1917025256299713e-05, - "loss": 0.6858, + "epoch": 0.3955649419218585, + "grad_norm": 1.637615442276001, + "learning_rate": 9.64150594072138e-06, + "loss": 0.671, "step": 5619 }, { - "epoch": 0.4252582195149635, - "grad_norm": 2.4727776050567627, - "learning_rate": 1.1914894526904236e-05, - "loss": 0.8452, + "epoch": 0.3956353396691306, + "grad_norm": 1.9111108779907227, + "learning_rate": 9.640027770225573e-06, + "loss": 0.7004, "step": 5620 }, { - "epoch": 0.4253338882372971, - "grad_norm": 1.8388804197311401, - "learning_rate": 1.1912763614968638e-05, - "loss": 0.8343, + "epoch": 0.3957057374164027, + "grad_norm": 1.8978748321533203, + "learning_rate": 9.638549462472332e-06, + "loss": 0.7475, "step": 5621 }, { - "epoch": 0.42540955695963073, - "grad_norm": 1.9806253910064697, - "learning_rate": 1.1910632520626363e-05, - "loss": 0.7089, + "epoch": 0.39577613516367477, + "grad_norm": 1.939159870147705, + "learning_rate": 9.637071017538514e-06, + "loss": 0.7212, "step": 5622 }, { - "epoch": 0.42548522568196434, - "grad_norm": 2.018436908721924, - "learning_rate": 1.1908501244010862e-05, - "loss": 0.7397, + "epoch": 0.39584653291094685, + "grad_norm": 1.8471709489822388, + "learning_rate": 9.635592435500985e-06, + "loss": 0.6181, "step": 5623 }, { - "epoch": 0.425560894404298, - "grad_norm": 2.8145549297332764, - "learning_rate": 1.190636978525561e-05, - "loss": 0.6912, + "epoch": 0.3959169306582189, + "grad_norm": 1.8075332641601562, + "learning_rate": 9.63411371643662e-06, + "loss": 0.7879, "step": 5624 }, { - "epoch": 0.4256365631266316, - "grad_norm": 1.973929762840271, - "learning_rate": 1.190423814449408e-05, - "loss": 0.6269, + "epoch": 0.395987328405491, + "grad_norm": 1.7333685159683228, + "learning_rate": 9.632634860422296e-06, + "loss": 0.8012, "step": 5625 }, { - "epoch": 0.42571223184896523, - "grad_norm": 1.5904262065887451, - "learning_rate": 1.1902106321859764e-05, - "loss": 0.8579, + "epoch": 0.39605772615276313, + "grad_norm": 1.83855140209198, + "learning_rate": 9.6311558675349e-06, + "loss": 0.6778, "step": 5626 }, { - "epoch": 0.42578790057129884, - "grad_norm": 1.901921033859253, - "learning_rate": 1.189997431748616e-05, - "loss": 0.595, + "epoch": 0.3961281239000352, + "grad_norm": 1.9079039096832275, + "learning_rate": 9.629676737851332e-06, + "loss": 0.6928, "step": 5627 }, { - "epoch": 0.4258635692936325, - "grad_norm": 1.7810097932815552, - "learning_rate": 1.189784213150679e-05, - "loss": 0.6377, + "epoch": 0.3961985216473073, + "grad_norm": 1.97743558883667, + "learning_rate": 9.628197471448485e-06, + "loss": 0.643, "step": 5628 }, { - "epoch": 0.4259392380159661, - "grad_norm": 1.7638015747070312, - "learning_rate": 1.189570976405517e-05, - "loss": 0.6832, + "epoch": 0.39626891939457937, + "grad_norm": 1.723567008972168, + "learning_rate": 9.626718068403272e-06, + "loss": 0.7343, "step": 5629 }, { - "epoch": 0.4260149067382997, - "grad_norm": 1.9742308855056763, - "learning_rate": 1.189357721526484e-05, - "loss": 0.7322, + "epoch": 0.39633931714185144, + "grad_norm": 1.6108994483947754, + "learning_rate": 9.625238528792607e-06, + "loss": 0.8485, "step": 5630 }, { - "epoch": 0.42609057546063334, - "grad_norm": 2.480738639831543, - "learning_rate": 1.1891444485269344e-05, - "loss": 0.735, + "epoch": 0.3964097148891235, + "grad_norm": 1.8023483753204346, + "learning_rate": 9.623758852693417e-06, + "loss": 0.679, "step": 5631 }, { - "epoch": 0.42616624418296695, - "grad_norm": 1.8731553554534912, - "learning_rate": 1.1889311574202242e-05, - "loss": 0.7125, + "epoch": 0.39648011263639565, + "grad_norm": 1.8468098640441895, + "learning_rate": 9.622279040182623e-06, + "loss": 0.7465, "step": 5632 }, { - "epoch": 0.4262419129053006, - "grad_norm": 2.045454263687134, - "learning_rate": 1.1887178482197109e-05, - "loss": 0.7475, + "epoch": 0.39655051038366773, + "grad_norm": 1.7993305921554565, + "learning_rate": 9.62079909133717e-06, + "loss": 0.7531, "step": 5633 }, { - "epoch": 0.4263175816276342, - "grad_norm": 2.2472641468048096, - "learning_rate": 1.1885045209387514e-05, - "loss": 0.6585, + "epoch": 0.3966209081309398, + "grad_norm": 2.2064268589019775, + "learning_rate": 9.619319006233996e-06, + "loss": 0.9047, "step": 5634 }, { - "epoch": 0.42639325034996783, - "grad_norm": 1.9361463785171509, - "learning_rate": 1.1882911755907062e-05, - "loss": 0.6429, + "epoch": 0.3966913058782119, + "grad_norm": 1.8723995685577393, + "learning_rate": 9.617838784950056e-06, + "loss": 0.7639, "step": 5635 }, { - "epoch": 0.42646891907230144, - "grad_norm": 1.8335850238800049, - "learning_rate": 1.1880778121889349e-05, - "loss": 0.554, + "epoch": 0.39676170362548396, + "grad_norm": 2.1562721729278564, + "learning_rate": 9.616358427562308e-06, + "loss": 0.5998, "step": 5636 }, { - "epoch": 0.4265445877946351, - "grad_norm": 2.1291892528533936, - "learning_rate": 1.1878644307467992e-05, - "loss": 0.8045, + "epoch": 0.3968321013727561, + "grad_norm": 1.8937634229660034, + "learning_rate": 9.614877934147716e-06, + "loss": 0.7617, "step": 5637 }, { - "epoch": 0.4266202565169687, - "grad_norm": 2.041837692260742, - "learning_rate": 1.187651031277662e-05, - "loss": 0.7902, + "epoch": 0.3969024991200282, + "grad_norm": 1.7877836227416992, + "learning_rate": 9.613397304783251e-06, + "loss": 0.7237, "step": 5638 }, { - "epoch": 0.42669592523930233, - "grad_norm": 1.9485516548156738, - "learning_rate": 1.1874376137948867e-05, - "loss": 0.6962, + "epoch": 0.39697289686730025, + "grad_norm": 1.84523606300354, + "learning_rate": 9.611916539545893e-06, + "loss": 0.6094, "step": 5639 }, { - "epoch": 0.42677159396163594, - "grad_norm": 2.171895742416382, - "learning_rate": 1.1872241783118386e-05, - "loss": 0.6273, + "epoch": 0.39704329461457233, + "grad_norm": 2.0550453662872314, + "learning_rate": 9.61043563851263e-06, + "loss": 0.7467, "step": 5640 }, { - "epoch": 0.4268472626839696, - "grad_norm": 1.8618667125701904, - "learning_rate": 1.187010724841883e-05, - "loss": 0.6796, + "epoch": 0.3971136923618444, + "grad_norm": 1.8072302341461182, + "learning_rate": 9.608954601760452e-06, + "loss": 0.7831, "step": 5641 }, { - "epoch": 0.4269229314063032, - "grad_norm": 1.9820287227630615, - "learning_rate": 1.1867972533983879e-05, - "loss": 0.7338, + "epoch": 0.3971840901091165, + "grad_norm": 2.0989537239074707, + "learning_rate": 9.607473429366365e-06, + "loss": 0.6131, "step": 5642 }, { - "epoch": 0.42699860012863683, - "grad_norm": 1.5241734981536865, - "learning_rate": 1.1865837639947209e-05, - "loss": 0.6571, + "epoch": 0.3972544878563886, + "grad_norm": 1.7158708572387695, + "learning_rate": 9.60599212140737e-06, + "loss": 0.6073, "step": 5643 }, { - "epoch": 0.42707426885097044, - "grad_norm": 1.9339543581008911, - "learning_rate": 1.1863702566442516e-05, - "loss": 0.7887, + "epoch": 0.3973248856036607, + "grad_norm": 1.803992748260498, + "learning_rate": 9.604510677960483e-06, + "loss": 0.704, "step": 5644 }, { - "epoch": 0.42714993757330405, - "grad_norm": 4.0906572341918945, - "learning_rate": 1.1861567313603511e-05, - "loss": 0.6873, + "epoch": 0.39739528335093277, + "grad_norm": 1.9705408811569214, + "learning_rate": 9.60302909910273e-06, + "loss": 0.6783, "step": 5645 }, { - "epoch": 0.4272256062956377, - "grad_norm": 2.432317018508911, - "learning_rate": 1.1859431881563899e-05, - "loss": 0.6544, + "epoch": 0.39746568109820485, + "grad_norm": 1.8181060552597046, + "learning_rate": 9.601547384911134e-06, + "loss": 0.6656, "step": 5646 }, { - "epoch": 0.4273012750179713, - "grad_norm": 2.0724246501922607, - "learning_rate": 1.185729627045742e-05, - "loss": 0.6835, + "epoch": 0.3975360788454769, + "grad_norm": 1.753326177597046, + "learning_rate": 9.600065535462736e-06, + "loss": 0.655, "step": 5647 }, { - "epoch": 0.42737694374030494, - "grad_norm": 2.3478708267211914, - "learning_rate": 1.1855160480417801e-05, - "loss": 0.8091, + "epoch": 0.39760647659274906, + "grad_norm": 2.033822774887085, + "learning_rate": 9.598583550834575e-06, + "loss": 0.7701, "step": 5648 }, { - "epoch": 0.42745261246263855, - "grad_norm": 1.9543095827102661, - "learning_rate": 1.1853024511578802e-05, - "loss": 0.7343, + "epoch": 0.39767687434002114, + "grad_norm": 2.44938325881958, + "learning_rate": 9.597101431103703e-06, + "loss": 0.7676, "step": 5649 }, { - "epoch": 0.4275282811849722, - "grad_norm": 1.7966238260269165, - "learning_rate": 1.1850888364074179e-05, - "loss": 0.6051, + "epoch": 0.3977472720872932, + "grad_norm": 1.893572449684143, + "learning_rate": 9.595619176347173e-06, + "loss": 0.7325, "step": 5650 }, { - "epoch": 0.4276039499073058, - "grad_norm": 1.587853193283081, - "learning_rate": 1.1848752038037708e-05, - "loss": 0.6063, + "epoch": 0.3978176698345653, + "grad_norm": 1.7821191549301147, + "learning_rate": 9.594136786642053e-06, + "loss": 0.7072, "step": 5651 }, { - "epoch": 0.42767961862963944, - "grad_norm": 1.883578896522522, - "learning_rate": 1.1846615533603168e-05, - "loss": 0.8186, + "epoch": 0.39788806758183737, + "grad_norm": 1.7332006692886353, + "learning_rate": 9.592654262065412e-06, + "loss": 0.6879, "step": 5652 }, { - "epoch": 0.42775528735197305, - "grad_norm": 2.1655025482177734, - "learning_rate": 1.1844478850904357e-05, - "loss": 0.7779, + "epoch": 0.39795846532910945, + "grad_norm": 3.2600131034851074, + "learning_rate": 9.591171602694326e-06, + "loss": 0.6917, "step": 5653 }, { - "epoch": 0.4278309560743067, - "grad_norm": 2.625882387161255, - "learning_rate": 1.1842341990075081e-05, - "loss": 0.7361, + "epoch": 0.3980288630763816, + "grad_norm": 2.051196336746216, + "learning_rate": 9.589688808605884e-06, + "loss": 0.7379, "step": 5654 }, { - "epoch": 0.4279066247966403, - "grad_norm": 1.935105323791504, - "learning_rate": 1.1840204951249152e-05, - "loss": 0.7115, + "epoch": 0.39809926082365366, + "grad_norm": 2.25900936126709, + "learning_rate": 9.588205879877174e-06, + "loss": 0.6384, "step": 5655 }, { - "epoch": 0.42798229351897393, - "grad_norm": 2.1714651584625244, - "learning_rate": 1.1838067734560408e-05, - "loss": 0.8187, + "epoch": 0.39816965857092573, + "grad_norm": 1.8466739654541016, + "learning_rate": 9.586722816585299e-06, + "loss": 0.7932, "step": 5656 }, { - "epoch": 0.42805796224130754, - "grad_norm": 2.1692118644714355, - "learning_rate": 1.183593034014268e-05, - "loss": 0.6285, + "epoch": 0.3982400563181978, + "grad_norm": 1.9396183490753174, + "learning_rate": 9.585239618807361e-06, + "loss": 0.7016, "step": 5657 }, { - "epoch": 0.42813363096364115, - "grad_norm": 1.921869158744812, - "learning_rate": 1.1833792768129824e-05, - "loss": 0.6029, + "epoch": 0.3983104540654699, + "grad_norm": 1.9233624935150146, + "learning_rate": 9.583756286620472e-06, + "loss": 0.6394, "step": 5658 }, { - "epoch": 0.4282092996859748, - "grad_norm": 2.104144811630249, - "learning_rate": 1.1831655018655696e-05, - "loss": 0.7716, + "epoch": 0.39838085181274197, + "grad_norm": 1.799169659614563, + "learning_rate": 9.582272820101755e-06, + "loss": 0.6576, "step": 5659 }, { - "epoch": 0.42828496840830843, - "grad_norm": 2.422243595123291, - "learning_rate": 1.1829517091854176e-05, - "loss": 0.7995, + "epoch": 0.3984512495600141, + "grad_norm": 2.190847396850586, + "learning_rate": 9.580789219328334e-06, + "loss": 0.8254, "step": 5660 }, { - "epoch": 0.42836063713064204, - "grad_norm": 1.9468094110488892, - "learning_rate": 1.1827378987859144e-05, - "loss": 0.7132, + "epoch": 0.3985216473072862, + "grad_norm": 1.9190279245376587, + "learning_rate": 9.579305484377346e-06, + "loss": 0.7519, "step": 5661 }, { - "epoch": 0.42843630585297565, - "grad_norm": 1.8024096488952637, - "learning_rate": 1.1825240706804489e-05, - "loss": 0.5364, + "epoch": 0.39859204505455825, + "grad_norm": 1.8280019760131836, + "learning_rate": 9.577821615325928e-06, + "loss": 0.6898, "step": 5662 }, { - "epoch": 0.4285119745753093, - "grad_norm": 2.2247347831726074, - "learning_rate": 1.1823102248824128e-05, - "loss": 0.7529, + "epoch": 0.39866244280183033, + "grad_norm": 2.796816349029541, + "learning_rate": 9.576337612251231e-06, + "loss": 0.7013, "step": 5663 }, { - "epoch": 0.42858764329764293, - "grad_norm": 1.928809404373169, - "learning_rate": 1.182096361405197e-05, - "loss": 0.7429, + "epoch": 0.3987328405491024, + "grad_norm": 1.6733360290527344, + "learning_rate": 9.574853475230409e-06, + "loss": 0.6456, "step": 5664 }, { - "epoch": 0.42866331201997654, - "grad_norm": 1.948986530303955, - "learning_rate": 1.181882480262195e-05, - "loss": 0.7047, + "epoch": 0.39880323829637454, + "grad_norm": 2.0422823429107666, + "learning_rate": 9.573369204340623e-06, + "loss": 0.7284, "step": 5665 }, { - "epoch": 0.42873898074231015, - "grad_norm": 2.77934193611145, - "learning_rate": 1.1816685814668e-05, - "loss": 0.8807, + "epoch": 0.3988736360436466, + "grad_norm": 1.8659354448318481, + "learning_rate": 9.57188479965904e-06, + "loss": 0.7933, "step": 5666 }, { - "epoch": 0.4288146494646438, - "grad_norm": 2.046111583709717, - "learning_rate": 1.1814546650324078e-05, - "loss": 0.8026, + "epoch": 0.3989440337909187, + "grad_norm": 1.6957063674926758, + "learning_rate": 9.570400261262838e-06, + "loss": 0.7279, "step": 5667 }, { - "epoch": 0.4288903181869774, - "grad_norm": 1.4681226015090942, - "learning_rate": 1.181240730972414e-05, - "loss": 0.8824, + "epoch": 0.3990144315381908, + "grad_norm": 1.804283618927002, + "learning_rate": 9.568915589229197e-06, + "loss": 0.7859, "step": 5668 }, { - "epoch": 0.42896598690931104, - "grad_norm": 1.7636147737503052, - "learning_rate": 1.1810267793002158e-05, - "loss": 0.7005, + "epoch": 0.39908482928546285, + "grad_norm": 1.589419960975647, + "learning_rate": 9.56743078363531e-06, + "loss": 0.6942, "step": 5669 }, { - "epoch": 0.42904165563164465, - "grad_norm": 2.328195571899414, - "learning_rate": 1.180812810029212e-05, - "loss": 0.7529, + "epoch": 0.3991552270327349, + "grad_norm": 1.7047122716903687, + "learning_rate": 9.565945844558368e-06, + "loss": 0.7569, "step": 5670 }, { - "epoch": 0.42911732435397826, - "grad_norm": 2.7056803703308105, - "learning_rate": 1.1805988231728015e-05, - "loss": 0.7472, + "epoch": 0.39922562478000706, + "grad_norm": 1.6502279043197632, + "learning_rate": 9.564460772075582e-06, + "loss": 0.7128, "step": 5671 }, { - "epoch": 0.4291929930763119, - "grad_norm": 2.115111827850342, - "learning_rate": 1.1803848187443853e-05, - "loss": 0.8469, + "epoch": 0.39929602252727914, + "grad_norm": 1.975404143333435, + "learning_rate": 9.562975566264156e-06, + "loss": 0.7106, "step": 5672 }, { - "epoch": 0.42926866179864553, - "grad_norm": 1.9240214824676514, - "learning_rate": 1.1801707967573647e-05, - "loss": 0.7624, + "epoch": 0.3993664202745512, + "grad_norm": 1.9239375591278076, + "learning_rate": 9.56149022720131e-06, + "loss": 0.5742, "step": 5673 }, { - "epoch": 0.42934433052097914, - "grad_norm": 1.9669137001037598, - "learning_rate": 1.179956757225143e-05, - "loss": 0.69, + "epoch": 0.3994368180218233, + "grad_norm": 1.9068118333816528, + "learning_rate": 9.560004754964265e-06, + "loss": 0.7419, "step": 5674 }, { - "epoch": 0.42941999924331276, - "grad_norm": 1.713476300239563, - "learning_rate": 1.1797427001611232e-05, - "loss": 0.7508, + "epoch": 0.39950721576909537, + "grad_norm": 1.796889305114746, + "learning_rate": 9.558519149630258e-06, + "loss": 0.5982, "step": 5675 }, { - "epoch": 0.4294956679656464, - "grad_norm": 2.3911690711975098, - "learning_rate": 1.179528625578711e-05, - "loss": 0.6873, + "epoch": 0.3995776135163675, + "grad_norm": 2.1433496475219727, + "learning_rate": 9.55703341127652e-06, + "loss": 0.5911, "step": 5676 }, { - "epoch": 0.42957133668798003, - "grad_norm": 2.203371047973633, - "learning_rate": 1.1793145334913121e-05, - "loss": 0.7431, + "epoch": 0.3996480112636396, + "grad_norm": 1.7491233348846436, + "learning_rate": 9.5555475399803e-06, + "loss": 0.6094, "step": 5677 }, { - "epoch": 0.42964700541031364, - "grad_norm": 1.975543737411499, - "learning_rate": 1.1791004239123336e-05, - "loss": 0.7112, + "epoch": 0.39971840901091166, + "grad_norm": 2.0059337615966797, + "learning_rate": 9.554061535818848e-06, + "loss": 0.8125, "step": 5678 }, { - "epoch": 0.42972267413264725, - "grad_norm": 1.8592123985290527, - "learning_rate": 1.1788862968551842e-05, - "loss": 0.6954, + "epoch": 0.39978880675818373, + "grad_norm": 1.8465811014175415, + "learning_rate": 9.552575398869427e-06, + "loss": 0.6843, "step": 5679 }, { - "epoch": 0.4297983428549809, - "grad_norm": 2.5964951515197754, - "learning_rate": 1.1786721523332723e-05, - "loss": 0.6297, + "epoch": 0.3998592045054558, + "grad_norm": 2.027442216873169, + "learning_rate": 9.551089129209295e-06, + "loss": 0.794, "step": 5680 }, { - "epoch": 0.42987401157731453, - "grad_norm": 2.0938498973846436, - "learning_rate": 1.1784579903600093e-05, - "loss": 0.6312, + "epoch": 0.3999296022527279, + "grad_norm": 1.8214315176010132, + "learning_rate": 9.549602726915732e-06, + "loss": 0.7609, "step": 5681 }, { - "epoch": 0.42994968029964814, - "grad_norm": 1.6955291032791138, - "learning_rate": 1.1782438109488063e-05, - "loss": 0.7806, + "epoch": 0.4, + "grad_norm": 1.9064480066299438, + "learning_rate": 9.54811619206601e-06, + "loss": 0.6892, "step": 5682 }, { - "epoch": 0.43002534902198175, - "grad_norm": 2.2497923374176025, - "learning_rate": 1.1780296141130756e-05, - "loss": 0.7267, + "epoch": 0.4000703977472721, + "grad_norm": 1.8409302234649658, + "learning_rate": 9.546629524737421e-06, + "loss": 0.7295, "step": 5683 }, { - "epoch": 0.43010101774431536, - "grad_norm": 1.6915230751037598, - "learning_rate": 1.1778153998662316e-05, - "loss": 0.708, + "epoch": 0.4001407954945442, + "grad_norm": 2.028588056564331, + "learning_rate": 9.545142725007261e-06, + "loss": 0.7057, "step": 5684 }, { - "epoch": 0.430176686466649, - "grad_norm": 1.8355711698532104, - "learning_rate": 1.1776011682216882e-05, - "loss": 0.6188, + "epoch": 0.40021119324181625, + "grad_norm": 1.7423746585845947, + "learning_rate": 9.543655792952823e-06, + "loss": 0.7489, "step": 5685 }, { - "epoch": 0.43025235518898264, - "grad_norm": 1.9086350202560425, - "learning_rate": 1.1773869191928624e-05, - "loss": 0.6782, + "epoch": 0.40028159098908833, + "grad_norm": 3.653327703475952, + "learning_rate": 9.542168728651413e-06, + "loss": 0.7592, "step": 5686 }, { - "epoch": 0.43032802391131625, - "grad_norm": 2.346781015396118, - "learning_rate": 1.17717265279317e-05, - "loss": 0.694, + "epoch": 0.4003519887363604, + "grad_norm": 1.5378414392471313, + "learning_rate": 9.540681532180355e-06, + "loss": 0.8099, "step": 5687 }, { - "epoch": 0.43040369263364986, - "grad_norm": 2.6471588611602783, - "learning_rate": 1.17695836903603e-05, - "loss": 0.6686, + "epoch": 0.40042238648363254, + "grad_norm": 1.7829344272613525, + "learning_rate": 9.53919420361696e-06, + "loss": 0.7664, "step": 5688 }, { - "epoch": 0.4304793613559835, - "grad_norm": 2.062077045440674, - "learning_rate": 1.1767440679348607e-05, - "loss": 0.7097, + "epoch": 0.4004927842309046, + "grad_norm": 1.5502066612243652, + "learning_rate": 9.537706743038558e-06, + "loss": 0.7225, "step": 5689 }, { - "epoch": 0.43055503007831714, - "grad_norm": 2.0753626823425293, - "learning_rate": 1.1765297495030831e-05, - "loss": 0.6988, + "epoch": 0.4005631819781767, + "grad_norm": 1.6796952486038208, + "learning_rate": 9.536219150522487e-06, + "loss": 0.7088, "step": 5690 }, { - "epoch": 0.43063069880065075, - "grad_norm": 2.3270702362060547, - "learning_rate": 1.1763154137541183e-05, - "loss": 0.7583, + "epoch": 0.4006335797254488, + "grad_norm": 1.7462257146835327, + "learning_rate": 9.534731426146083e-06, + "loss": 0.7628, "step": 5691 }, { - "epoch": 0.43070636752298436, - "grad_norm": 1.7685538530349731, - "learning_rate": 1.1761010607013883e-05, - "loss": 0.6572, + "epoch": 0.40070397747272085, + "grad_norm": 1.6705410480499268, + "learning_rate": 9.533243569986695e-06, + "loss": 0.6729, "step": 5692 }, { - "epoch": 0.430782036245318, - "grad_norm": 1.7218018770217896, - "learning_rate": 1.175886690358317e-05, - "loss": 0.5876, + "epoch": 0.400774375219993, + "grad_norm": 1.6411266326904297, + "learning_rate": 9.531755582121683e-06, + "loss": 0.7266, "step": 5693 }, { - "epoch": 0.43085770496765163, - "grad_norm": 1.9361426830291748, - "learning_rate": 1.1756723027383286e-05, - "loss": 0.6186, + "epoch": 0.40084477296726506, + "grad_norm": 1.6925642490386963, + "learning_rate": 9.530267462628402e-06, + "loss": 0.671, "step": 5694 }, { - "epoch": 0.43093337368998524, - "grad_norm": 2.054652214050293, - "learning_rate": 1.1754578978548493e-05, - "loss": 0.7866, + "epoch": 0.40091517071453714, + "grad_norm": 1.7912423610687256, + "learning_rate": 9.528779211584226e-06, + "loss": 0.7795, "step": 5695 }, { - "epoch": 0.43100904241231885, - "grad_norm": 3.165121555328369, - "learning_rate": 1.1752434757213053e-05, - "loss": 0.6932, + "epoch": 0.4009855684618092, + "grad_norm": 2.158135414123535, + "learning_rate": 9.52729082906653e-06, + "loss": 0.6136, "step": 5696 }, { - "epoch": 0.43108471113465247, - "grad_norm": 1.8090955018997192, - "learning_rate": 1.1750290363511248e-05, - "loss": 0.7486, + "epoch": 0.4010559662090813, + "grad_norm": 1.7546265125274658, + "learning_rate": 9.525802315152692e-06, + "loss": 0.685, "step": 5697 }, { - "epoch": 0.43116037985698613, - "grad_norm": 2.016489028930664, - "learning_rate": 1.1748145797577363e-05, - "loss": 0.6689, + "epoch": 0.40112636395635337, + "grad_norm": 1.826875925064087, + "learning_rate": 9.524313669920106e-06, + "loss": 0.6716, "step": 5698 }, { - "epoch": 0.43123604857931974, - "grad_norm": 2.5373289585113525, - "learning_rate": 1.17460010595457e-05, - "loss": 0.8227, + "epoch": 0.4011967617036255, + "grad_norm": 1.7154937982559204, + "learning_rate": 9.522824893446167e-06, + "loss": 0.6949, "step": 5699 }, { - "epoch": 0.43131171730165335, - "grad_norm": 1.8351738452911377, - "learning_rate": 1.1743856149550568e-05, - "loss": 0.6326, + "epoch": 0.4012671594508976, + "grad_norm": 1.8083882331848145, + "learning_rate": 9.521335985808276e-06, + "loss": 0.7119, "step": 5700 }, { - "epoch": 0.43138738602398696, - "grad_norm": 2.076626777648926, - "learning_rate": 1.174171106772629e-05, - "loss": 0.8595, + "epoch": 0.40133755719816966, + "grad_norm": 1.6063218116760254, + "learning_rate": 9.519846947083841e-06, + "loss": 0.6595, "step": 5701 }, { - "epoch": 0.43146305474632063, - "grad_norm": 2.6194770336151123, - "learning_rate": 1.1739565814207198e-05, - "loss": 0.8026, + "epoch": 0.40140795494544174, + "grad_norm": 1.790459394454956, + "learning_rate": 9.518357777350287e-06, + "loss": 0.6524, "step": 5702 }, { - "epoch": 0.43153872346865424, - "grad_norm": 2.0482687950134277, - "learning_rate": 1.173742038912763e-05, - "loss": 0.6438, + "epoch": 0.4014783526927138, + "grad_norm": 1.8051888942718506, + "learning_rate": 9.516868476685026e-06, + "loss": 0.7346, "step": 5703 }, { - "epoch": 0.43161439219098785, - "grad_norm": 3.11622953414917, - "learning_rate": 1.173527479262195e-05, - "loss": 0.7928, + "epoch": 0.40154875043998595, + "grad_norm": 1.4663714170455933, + "learning_rate": 9.515379045165496e-06, + "loss": 0.6583, "step": 5704 }, { - "epoch": 0.43169006091332146, - "grad_norm": 1.6607571840286255, - "learning_rate": 1.1733129024824512e-05, - "loss": 0.6947, + "epoch": 0.401619148187258, + "grad_norm": 1.753555178642273, + "learning_rate": 9.513889482869131e-06, + "loss": 0.7919, "step": 5705 }, { - "epoch": 0.4317657296356551, - "grad_norm": 2.05531907081604, - "learning_rate": 1.1730983085869693e-05, - "loss": 0.7901, + "epoch": 0.4016895459345301, + "grad_norm": 1.479905366897583, + "learning_rate": 9.512399789873375e-06, + "loss": 0.6259, "step": 5706 }, { - "epoch": 0.43184139835798874, - "grad_norm": 2.3457841873168945, - "learning_rate": 1.172883697589188e-05, - "loss": 0.7528, + "epoch": 0.4017599436818022, + "grad_norm": 1.8101551532745361, + "learning_rate": 9.510909966255678e-06, + "loss": 0.8163, "step": 5707 }, { - "epoch": 0.43191706708032235, - "grad_norm": 2.2732810974121094, - "learning_rate": 1.1726690695025472e-05, - "loss": 0.6971, + "epoch": 0.40183034142907426, + "grad_norm": 1.8088176250457764, + "learning_rate": 9.509420012093498e-06, + "loss": 0.6427, "step": 5708 }, { - "epoch": 0.43199273580265596, - "grad_norm": 1.925724744796753, - "learning_rate": 1.1724544243404873e-05, - "loss": 0.853, + "epoch": 0.40190073917634633, + "grad_norm": 1.7744650840759277, + "learning_rate": 9.5079299274643e-06, + "loss": 0.7849, "step": 5709 }, { - "epoch": 0.43206840452498957, - "grad_norm": 1.8667192459106445, - "learning_rate": 1.1722397621164502e-05, - "loss": 0.5859, + "epoch": 0.40197113692361847, + "grad_norm": 1.8404700756072998, + "learning_rate": 9.506439712445552e-06, + "loss": 0.7982, "step": 5710 }, { - "epoch": 0.43214407324732323, - "grad_norm": 2.398282289505005, - "learning_rate": 1.1720250828438785e-05, - "loss": 0.7186, + "epoch": 0.40204153467089054, + "grad_norm": 1.8321253061294556, + "learning_rate": 9.504949367114733e-06, + "loss": 0.7551, "step": 5711 }, { - "epoch": 0.43221974196965685, - "grad_norm": 2.3136038780212402, - "learning_rate": 1.1718103865362161e-05, - "loss": 0.849, + "epoch": 0.4021119324181626, + "grad_norm": 1.777778148651123, + "learning_rate": 9.503458891549331e-06, + "loss": 0.7794, "step": 5712 }, { - "epoch": 0.43229541069199046, - "grad_norm": 1.7996965646743774, - "learning_rate": 1.1715956732069083e-05, - "loss": 0.7615, + "epoch": 0.4021823301654347, + "grad_norm": 2.0994369983673096, + "learning_rate": 9.501968285826832e-06, + "loss": 0.7883, "step": 5713 }, { - "epoch": 0.43237107941432407, - "grad_norm": 1.8926506042480469, - "learning_rate": 1.171380942869401e-05, - "loss": 0.6777, + "epoch": 0.4022527279127068, + "grad_norm": 1.8754111528396606, + "learning_rate": 9.500477550024736e-06, + "loss": 0.7647, "step": 5714 }, { - "epoch": 0.43244674813665773, - "grad_norm": 1.8787177801132202, - "learning_rate": 1.1711661955371416e-05, - "loss": 0.7415, + "epoch": 0.40232312565997885, + "grad_norm": 2.1719653606414795, + "learning_rate": 9.498986684220547e-06, + "loss": 0.7154, "step": 5715 }, { - "epoch": 0.43252241685899134, - "grad_norm": 1.7595826387405396, - "learning_rate": 1.1709514312235777e-05, - "loss": 0.756, + "epoch": 0.402393523407251, + "grad_norm": 1.8492810726165771, + "learning_rate": 9.497495688491778e-06, + "loss": 0.8543, "step": 5716 }, { - "epoch": 0.43259808558132495, - "grad_norm": 1.8866539001464844, - "learning_rate": 1.1707366499421589e-05, - "loss": 0.7147, + "epoch": 0.40246392115452306, + "grad_norm": 1.6183831691741943, + "learning_rate": 9.496004562915946e-06, + "loss": 0.6946, "step": 5717 }, { - "epoch": 0.43267375430365856, - "grad_norm": 1.6653908491134644, - "learning_rate": 1.1705218517063353e-05, - "loss": 0.676, + "epoch": 0.40253431890179514, + "grad_norm": 1.7026729583740234, + "learning_rate": 9.494513307570576e-06, + "loss": 0.7409, "step": 5718 }, { - "epoch": 0.43274942302599223, - "grad_norm": 2.470182180404663, - "learning_rate": 1.1703070365295584e-05, - "loss": 0.6446, + "epoch": 0.4026047166490672, + "grad_norm": 1.785105586051941, + "learning_rate": 9.4930219225332e-06, + "loss": 0.622, "step": 5719 }, { - "epoch": 0.43282509174832584, - "grad_norm": 2.4232730865478516, - "learning_rate": 1.1700922044252808e-05, - "loss": 0.7754, + "epoch": 0.4026751143963393, + "grad_norm": 1.9942015409469604, + "learning_rate": 9.491530407881356e-06, + "loss": 0.6938, "step": 5720 }, { - "epoch": 0.43290076047065945, - "grad_norm": 1.9250999689102173, - "learning_rate": 1.1698773554069555e-05, - "loss": 0.5243, + "epoch": 0.40274551214361143, + "grad_norm": 2.1085386276245117, + "learning_rate": 9.49003876369259e-06, + "loss": 0.663, "step": 5721 }, { - "epoch": 0.43297642919299306, - "grad_norm": 2.12267804145813, - "learning_rate": 1.1696624894880376e-05, - "loss": 0.6571, + "epoch": 0.4028159098908835, + "grad_norm": 1.71738862991333, + "learning_rate": 9.488546990044453e-06, + "loss": 0.6842, "step": 5722 }, { - "epoch": 0.4330520979153267, - "grad_norm": 1.7465804815292358, - "learning_rate": 1.1694476066819821e-05, - "loss": 0.6991, + "epoch": 0.4028863076381556, + "grad_norm": 1.723063588142395, + "learning_rate": 9.487055087014507e-06, + "loss": 0.6465, "step": 5723 }, { - "epoch": 0.43312776663766034, - "grad_norm": 3.37947154045105, - "learning_rate": 1.1692327070022462e-05, - "loss": 0.7811, + "epoch": 0.40295670538542766, + "grad_norm": 2.5643258094787598, + "learning_rate": 9.485563054680313e-06, + "loss": 0.7492, "step": 5724 }, { - "epoch": 0.43320343535999395, - "grad_norm": 2.1732442378997803, - "learning_rate": 1.1690177904622874e-05, - "loss": 0.7455, + "epoch": 0.40302710313269974, + "grad_norm": 1.256446361541748, + "learning_rate": 9.484070893119443e-06, + "loss": 0.8049, "step": 5725 }, { - "epoch": 0.43327910408232756, - "grad_norm": 1.863910436630249, - "learning_rate": 1.1688028570755642e-05, - "loss": 0.8409, + "epoch": 0.4030975008799718, + "grad_norm": 1.893876314163208, + "learning_rate": 9.482578602409481e-06, + "loss": 0.7268, "step": 5726 }, { - "epoch": 0.43335477280466117, - "grad_norm": 1.823136806488037, - "learning_rate": 1.1685879068555369e-05, - "loss": 0.6706, + "epoch": 0.40316789862724395, + "grad_norm": 1.857404112815857, + "learning_rate": 9.481086182628006e-06, + "loss": 0.6741, "step": 5727 }, { - "epoch": 0.43343044152699484, - "grad_norm": 1.8565260171890259, - "learning_rate": 1.168372939815666e-05, - "loss": 0.6175, + "epoch": 0.403238296374516, + "grad_norm": 1.5152446031570435, + "learning_rate": 9.479593633852614e-06, + "loss": 0.6962, "step": 5728 }, { - "epoch": 0.43350611024932845, - "grad_norm": 2.070004463195801, - "learning_rate": 1.1681579559694136e-05, - "loss": 0.7205, + "epoch": 0.4033086941217881, + "grad_norm": 1.9274277687072754, + "learning_rate": 9.478100956160905e-06, + "loss": 0.6946, "step": 5729 }, { - "epoch": 0.43358177897166206, - "grad_norm": 2.2450389862060547, - "learning_rate": 1.167942955330243e-05, - "loss": 0.7572, + "epoch": 0.4033790918690602, + "grad_norm": 1.7151548862457275, + "learning_rate": 9.47660814963048e-06, + "loss": 0.5361, "step": 5730 }, { - "epoch": 0.43365744769399567, - "grad_norm": 1.7799854278564453, - "learning_rate": 1.1677279379116174e-05, - "loss": 0.7553, + "epoch": 0.40344948961633226, + "grad_norm": 1.6509225368499756, + "learning_rate": 9.475115214338956e-06, + "loss": 0.6887, "step": 5731 }, { - "epoch": 0.43373311641632933, - "grad_norm": 2.37202787399292, - "learning_rate": 1.1675129037270028e-05, - "loss": 0.637, + "epoch": 0.4035198873636044, + "grad_norm": 1.5534749031066895, + "learning_rate": 9.473622150363953e-06, + "loss": 0.6806, "step": 5732 }, { - "epoch": 0.43380878513866294, - "grad_norm": 2.0018792152404785, - "learning_rate": 1.1672978527898647e-05, - "loss": 0.7219, + "epoch": 0.40359028511087647, + "grad_norm": 1.972931146621704, + "learning_rate": 9.472128957783088e-06, + "loss": 0.7188, "step": 5733 }, { - "epoch": 0.43388445386099656, - "grad_norm": 2.104686975479126, - "learning_rate": 1.1670827851136704e-05, - "loss": 0.7433, + "epoch": 0.40366068285814855, + "grad_norm": 1.9601820707321167, + "learning_rate": 9.470635636674003e-06, + "loss": 0.7169, "step": 5734 }, { - "epoch": 0.43396012258333017, - "grad_norm": 2.32853102684021, - "learning_rate": 1.1668677007118884e-05, - "loss": 0.7045, + "epoch": 0.4037310806054206, + "grad_norm": 1.702879548072815, + "learning_rate": 9.469142187114335e-06, + "loss": 0.657, "step": 5735 }, { - "epoch": 0.4340357913056638, - "grad_norm": 1.7697525024414062, - "learning_rate": 1.166652599597988e-05, - "loss": 0.7692, + "epoch": 0.4038014783526927, + "grad_norm": 1.8446251153945923, + "learning_rate": 9.467648609181727e-06, + "loss": 0.6323, "step": 5736 }, { - "epoch": 0.43411146002799744, - "grad_norm": 2.1800074577331543, - "learning_rate": 1.166437481785439e-05, - "loss": 0.7022, + "epoch": 0.4038718760999648, + "grad_norm": 2.65925931930542, + "learning_rate": 9.466154902953834e-06, + "loss": 0.7084, "step": 5737 }, { - "epoch": 0.43418712875033105, - "grad_norm": 2.2426414489746094, - "learning_rate": 1.1662223472877135e-05, - "loss": 0.7946, + "epoch": 0.4039422738472369, + "grad_norm": 1.7357723712921143, + "learning_rate": 9.464661068508314e-06, + "loss": 0.6357, "step": 5738 }, { - "epoch": 0.43426279747266466, - "grad_norm": 2.2983815670013428, - "learning_rate": 1.1660071961182834e-05, - "loss": 0.7674, + "epoch": 0.404012671594509, + "grad_norm": 2.0799858570098877, + "learning_rate": 9.463167105922831e-06, + "loss": 0.71, "step": 5739 }, { - "epoch": 0.4343384661949983, - "grad_norm": 2.3444814682006836, - "learning_rate": 1.1657920282906221e-05, - "loss": 0.6454, + "epoch": 0.40408306934178106, + "grad_norm": 1.755706548690796, + "learning_rate": 9.461673015275059e-06, + "loss": 0.5586, "step": 5740 }, { - "epoch": 0.43441413491733194, - "grad_norm": 2.3554506301879883, - "learning_rate": 1.1655768438182046e-05, - "loss": 0.795, + "epoch": 0.40415346708905314, + "grad_norm": 1.5220930576324463, + "learning_rate": 9.460178796642682e-06, + "loss": 0.7264, "step": 5741 }, { - "epoch": 0.43448980363966555, - "grad_norm": 2.302736520767212, - "learning_rate": 1.1653616427145061e-05, - "loss": 0.7287, + "epoch": 0.4042238648363252, + "grad_norm": 2.1348989009857178, + "learning_rate": 9.458684450103379e-06, + "loss": 0.7076, "step": 5742 }, { - "epoch": 0.43456547236199916, - "grad_norm": 1.9690250158309937, - "learning_rate": 1.1651464249930032e-05, - "loss": 0.662, + "epoch": 0.40429426258359735, + "grad_norm": 2.168361186981201, + "learning_rate": 9.457189975734843e-06, + "loss": 0.7047, "step": 5743 }, { - "epoch": 0.43464114108433277, - "grad_norm": 1.9559441804885864, - "learning_rate": 1.1649311906671735e-05, - "loss": 0.8327, + "epoch": 0.40436466033086943, + "grad_norm": 1.817024827003479, + "learning_rate": 9.455695373614777e-06, + "loss": 0.6854, "step": 5744 }, { - "epoch": 0.43471680980666644, - "grad_norm": 1.8690423965454102, - "learning_rate": 1.1647159397504958e-05, - "loss": 0.5716, + "epoch": 0.4044350580781415, + "grad_norm": 1.7404192686080933, + "learning_rate": 9.454200643820883e-06, + "loss": 0.8694, "step": 5745 }, { - "epoch": 0.43479247852900005, - "grad_norm": 1.6188991069793701, - "learning_rate": 1.1645006722564499e-05, - "loss": 0.6871, + "epoch": 0.4045054558254136, + "grad_norm": 1.6466504335403442, + "learning_rate": 9.452705786430876e-06, + "loss": 0.7081, "step": 5746 }, { - "epoch": 0.43486814725133366, - "grad_norm": 2.0028879642486572, - "learning_rate": 1.1642853881985162e-05, - "loss": 0.7658, + "epoch": 0.40457585357268566, + "grad_norm": 1.7738351821899414, + "learning_rate": 9.451210801522474e-06, + "loss": 0.6835, "step": 5747 }, { - "epoch": 0.43494381597366727, - "grad_norm": 1.7991002798080444, - "learning_rate": 1.1640700875901768e-05, - "loss": 0.6294, + "epoch": 0.40464625131995774, + "grad_norm": 1.7137365341186523, + "learning_rate": 9.4497156891734e-06, + "loss": 0.7032, "step": 5748 }, { - "epoch": 0.43501948469600094, - "grad_norm": 1.7511948347091675, - "learning_rate": 1.1638547704449142e-05, - "loss": 0.6803, + "epoch": 0.40471664906722987, + "grad_norm": 1.798773169517517, + "learning_rate": 9.448220449461391e-06, + "loss": 0.7315, "step": 5749 }, { - "epoch": 0.43509515341833455, - "grad_norm": 2.2755792140960693, - "learning_rate": 1.163639436776213e-05, - "loss": 0.634, + "epoch": 0.40478704681450195, + "grad_norm": 1.7511727809906006, + "learning_rate": 9.446725082464186e-06, + "loss": 0.737, "step": 5750 }, { - "epoch": 0.43517082214066816, - "grad_norm": 1.9088243246078491, - "learning_rate": 1.1634240865975571e-05, - "loss": 0.6084, + "epoch": 0.404857444561774, + "grad_norm": 1.8785525560379028, + "learning_rate": 9.445229588259523e-06, + "loss": 0.7509, "step": 5751 }, { - "epoch": 0.43524649086300177, - "grad_norm": 1.6567586660385132, - "learning_rate": 1.163208719922433e-05, - "loss": 0.6281, + "epoch": 0.4049278423090461, + "grad_norm": 1.9458194971084595, + "learning_rate": 9.443733966925163e-06, + "loss": 0.8083, "step": 5752 }, { - "epoch": 0.4353221595853354, - "grad_norm": 1.8782658576965332, - "learning_rate": 1.1629933367643274e-05, - "loss": 0.7178, + "epoch": 0.4049982400563182, + "grad_norm": 1.6961705684661865, + "learning_rate": 9.442238218538859e-06, + "loss": 0.836, "step": 5753 }, { - "epoch": 0.43539782830766904, - "grad_norm": 2.370513677597046, - "learning_rate": 1.1627779371367286e-05, - "loss": 0.7157, + "epoch": 0.40506863780359026, + "grad_norm": 1.4049104452133179, + "learning_rate": 9.44074234317838e-06, + "loss": 0.6562, "step": 5754 }, { - "epoch": 0.43547349703000265, - "grad_norm": 2.0037007331848145, - "learning_rate": 1.1625625210531255e-05, - "loss": 0.7701, + "epoch": 0.4051390355508624, + "grad_norm": 1.5522716045379639, + "learning_rate": 9.439246340921492e-06, + "loss": 0.6151, "step": 5755 }, { - "epoch": 0.43554916575233626, - "grad_norm": 1.9564191102981567, - "learning_rate": 1.162347088527008e-05, - "loss": 0.8741, + "epoch": 0.40520943329813447, + "grad_norm": 1.7786287069320679, + "learning_rate": 9.437750211845982e-06, + "loss": 0.6896, "step": 5756 }, { - "epoch": 0.4356248344746699, - "grad_norm": 2.218358039855957, - "learning_rate": 1.1621316395718674e-05, - "loss": 0.7502, + "epoch": 0.40527983104540655, + "grad_norm": 1.825160264968872, + "learning_rate": 9.436253956029628e-06, + "loss": 0.721, "step": 5757 }, { - "epoch": 0.43570050319700354, - "grad_norm": 2.1146786212921143, - "learning_rate": 1.1619161742011953e-05, - "loss": 0.7886, + "epoch": 0.4053502287926786, + "grad_norm": 1.6551214456558228, + "learning_rate": 9.434757573550226e-06, + "loss": 0.5738, "step": 5758 }, { - "epoch": 0.43577617191933715, - "grad_norm": 1.9309403896331787, - "learning_rate": 1.1617006924284856e-05, - "loss": 0.7796, + "epoch": 0.4054206265399507, + "grad_norm": 1.7784318923950195, + "learning_rate": 9.433261064485572e-06, + "loss": 0.6464, "step": 5759 }, { - "epoch": 0.43585184064167076, - "grad_norm": 2.1848180294036865, - "learning_rate": 1.1614851942672319e-05, - "loss": 0.8031, + "epoch": 0.40549102428722283, + "grad_norm": 1.817336916923523, + "learning_rate": 9.431764428913471e-06, + "loss": 0.7163, "step": 5760 }, { - "epoch": 0.4359275093640044, - "grad_norm": 2.1754567623138428, - "learning_rate": 1.1612696797309298e-05, - "loss": 0.7759, + "epoch": 0.4055614220344949, + "grad_norm": 1.9848960638046265, + "learning_rate": 9.430267666911736e-06, + "loss": 0.7039, "step": 5761 }, { - "epoch": 0.43600317808633804, - "grad_norm": 1.7848371267318726, - "learning_rate": 1.1610541488330753e-05, - "loss": 0.7262, + "epoch": 0.405631819781767, + "grad_norm": 1.8978503942489624, + "learning_rate": 9.428770778558184e-06, + "loss": 0.7331, "step": 5762 }, { - "epoch": 0.43607884680867165, - "grad_norm": 2.1866798400878906, - "learning_rate": 1.1608386015871655e-05, - "loss": 0.7978, + "epoch": 0.40570221752903907, + "grad_norm": 1.6153795719146729, + "learning_rate": 9.427273763930638e-06, + "loss": 0.5785, "step": 5763 }, { - "epoch": 0.43615451553100526, - "grad_norm": 1.765702486038208, - "learning_rate": 1.1606230380066988e-05, - "loss": 0.7182, + "epoch": 0.40577261527631114, + "grad_norm": 1.7465856075286865, + "learning_rate": 9.425776623106933e-06, + "loss": 0.7456, "step": 5764 }, { - "epoch": 0.43623018425333887, - "grad_norm": 2.1728196144104004, - "learning_rate": 1.1604074581051746e-05, - "loss": 0.748, + "epoch": 0.4058430130235832, + "grad_norm": 1.5278129577636719, + "learning_rate": 9.424279356164904e-06, + "loss": 0.7507, "step": 5765 }, { - "epoch": 0.4363058529756725, - "grad_norm": 2.886596441268921, - "learning_rate": 1.1601918618960933e-05, - "loss": 0.8474, + "epoch": 0.40591341077085535, + "grad_norm": 1.9049773216247559, + "learning_rate": 9.422781963182395e-06, + "loss": 0.6399, "step": 5766 }, { - "epoch": 0.43638152169800615, - "grad_norm": 2.492180824279785, - "learning_rate": 1.1599762493929555e-05, - "loss": 0.7185, + "epoch": 0.40598380851812743, + "grad_norm": 1.6051594018936157, + "learning_rate": 9.421284444237256e-06, + "loss": 0.6991, "step": 5767 }, { - "epoch": 0.43645719042033976, - "grad_norm": 2.229836940765381, - "learning_rate": 1.1597606206092645e-05, - "loss": 0.7957, + "epoch": 0.4060542062653995, + "grad_norm": 1.8460326194763184, + "learning_rate": 9.41978679940735e-06, + "loss": 0.7001, "step": 5768 }, { - "epoch": 0.43653285914267337, - "grad_norm": 1.8042664527893066, - "learning_rate": 1.1595449755585232e-05, - "loss": 0.7395, + "epoch": 0.4061246040126716, + "grad_norm": 1.7562357187271118, + "learning_rate": 9.418289028770534e-06, + "loss": 0.6051, "step": 5769 }, { - "epoch": 0.436608527865007, - "grad_norm": 1.6901711225509644, - "learning_rate": 1.159329314254236e-05, - "loss": 0.6354, + "epoch": 0.40619500175994366, + "grad_norm": 1.5830988883972168, + "learning_rate": 9.41679113240468e-06, + "loss": 0.6715, "step": 5770 }, { - "epoch": 0.43668419658734065, - "grad_norm": 1.7246809005737305, - "learning_rate": 1.1591136367099087e-05, - "loss": 0.7282, + "epoch": 0.4062653995072158, + "grad_norm": 1.5191670656204224, + "learning_rate": 9.415293110387668e-06, + "loss": 0.7869, "step": 5771 }, { - "epoch": 0.43675986530967426, - "grad_norm": 1.6390856504440308, - "learning_rate": 1.1588979429390467e-05, - "loss": 0.7898, + "epoch": 0.4063357972544879, + "grad_norm": 1.765479564666748, + "learning_rate": 9.41379496279738e-06, + "loss": 0.6773, "step": 5772 }, { - "epoch": 0.43683553403200787, - "grad_norm": 2.199267625808716, - "learning_rate": 1.1586822329551588e-05, - "loss": 0.8082, + "epoch": 0.40640619500175995, + "grad_norm": 1.8467720746994019, + "learning_rate": 9.412296689711707e-06, + "loss": 0.7208, "step": 5773 }, { - "epoch": 0.4369112027543415, - "grad_norm": 2.898261070251465, - "learning_rate": 1.1584665067717527e-05, - "loss": 0.6785, + "epoch": 0.40647659274903203, + "grad_norm": 1.7103976011276245, + "learning_rate": 9.410798291208542e-06, + "loss": 0.724, "step": 5774 }, { - "epoch": 0.43698687147667514, - "grad_norm": 2.123633623123169, - "learning_rate": 1.1582507644023377e-05, - "loss": 0.7712, + "epoch": 0.4065469904963041, + "grad_norm": 1.7214332818984985, + "learning_rate": 9.409299767365792e-06, + "loss": 0.6948, "step": 5775 }, { - "epoch": 0.43706254019900875, - "grad_norm": 2.2638285160064697, - "learning_rate": 1.1580350058604246e-05, - "loss": 0.7443, + "epoch": 0.4066173882435762, + "grad_norm": 1.6131806373596191, + "learning_rate": 9.407801118261364e-06, + "loss": 0.7084, "step": 5776 }, { - "epoch": 0.43713820892134236, - "grad_norm": 1.94474458694458, - "learning_rate": 1.1578192311595247e-05, - "loss": 0.7249, + "epoch": 0.4066877859908483, + "grad_norm": 1.8579357862472534, + "learning_rate": 9.406302343973177e-06, + "loss": 0.7497, "step": 5777 }, { - "epoch": 0.437213877643676, - "grad_norm": 2.0058271884918213, - "learning_rate": 1.1576034403131511e-05, - "loss": 0.5709, + "epoch": 0.4067581837381204, + "grad_norm": 1.3415099382400513, + "learning_rate": 9.404803444579149e-06, + "loss": 0.6943, "step": 5778 }, { - "epoch": 0.4372895463660096, - "grad_norm": 2.0627119541168213, - "learning_rate": 1.1573876333348165e-05, - "loss": 0.6974, + "epoch": 0.40682858148539247, + "grad_norm": 1.6667970418930054, + "learning_rate": 9.403304420157215e-06, + "loss": 0.8075, "step": 5779 }, { - "epoch": 0.43736521508834325, - "grad_norm": 2.220038652420044, - "learning_rate": 1.157171810238036e-05, - "loss": 0.8202, + "epoch": 0.40689897923266455, + "grad_norm": 1.8639007806777954, + "learning_rate": 9.401805270785307e-06, + "loss": 0.7166, "step": 5780 }, { - "epoch": 0.43744088381067686, - "grad_norm": 1.8286783695220947, - "learning_rate": 1.1569559710363249e-05, - "loss": 0.6777, + "epoch": 0.4069693769799366, + "grad_norm": 1.8055942058563232, + "learning_rate": 9.400305996541368e-06, + "loss": 0.7206, "step": 5781 }, { - "epoch": 0.43751655253301047, - "grad_norm": 2.0850884914398193, - "learning_rate": 1.1567401157431998e-05, - "loss": 0.6338, + "epoch": 0.4070397747272087, + "grad_norm": 1.6711360216140747, + "learning_rate": 9.398806597503343e-06, + "loss": 0.8106, "step": 5782 }, { - "epoch": 0.4375922212553441, - "grad_norm": 2.0958290100097656, - "learning_rate": 1.1565242443721783e-05, - "loss": 0.7985, + "epoch": 0.40711017247448084, + "grad_norm": 1.5883315801620483, + "learning_rate": 9.397307073749192e-06, + "loss": 0.7777, "step": 5783 }, { - "epoch": 0.43766788997767775, - "grad_norm": 2.5265495777130127, - "learning_rate": 1.156308356936779e-05, - "loss": 0.7152, + "epoch": 0.4071805702217529, + "grad_norm": 2.00970458984375, + "learning_rate": 9.395807425356878e-06, + "loss": 0.7815, "step": 5784 }, { - "epoch": 0.43774355870001136, - "grad_norm": 1.9527240991592407, - "learning_rate": 1.1560924534505212e-05, - "loss": 0.7323, + "epoch": 0.407250967969025, + "grad_norm": 1.8645482063293457, + "learning_rate": 9.394307652404363e-06, + "loss": 0.7711, "step": 5785 }, { - "epoch": 0.43781922742234497, - "grad_norm": 2.079576253890991, - "learning_rate": 1.1558765339269255e-05, - "loss": 0.7322, + "epoch": 0.40732136571629707, + "grad_norm": 1.5793050527572632, + "learning_rate": 9.392807754969627e-06, + "loss": 0.6503, "step": 5786 }, { - "epoch": 0.4378948961446786, - "grad_norm": 2.6876163482666016, - "learning_rate": 1.1556605983795142e-05, - "loss": 0.7538, + "epoch": 0.40739176346356915, + "grad_norm": 1.684002161026001, + "learning_rate": 9.391307733130647e-06, + "loss": 0.6764, "step": 5787 }, { - "epoch": 0.43797056486701225, - "grad_norm": 2.0275208950042725, - "learning_rate": 1.1554446468218087e-05, - "loss": 0.7103, + "epoch": 0.4074621612108413, + "grad_norm": 2.0054900646209717, + "learning_rate": 9.389807586965413e-06, + "loss": 0.7274, "step": 5788 }, { - "epoch": 0.43804623358934586, - "grad_norm": 2.2174196243286133, - "learning_rate": 1.1552286792673335e-05, - "loss": 0.7053, + "epoch": 0.40753255895811336, + "grad_norm": 1.7945923805236816, + "learning_rate": 9.388307316551918e-06, + "loss": 0.6604, "step": 5789 }, { - "epoch": 0.43812190231167947, - "grad_norm": 2.1054906845092773, - "learning_rate": 1.1550126957296128e-05, - "loss": 0.7026, + "epoch": 0.40760295670538543, + "grad_norm": 2.067002296447754, + "learning_rate": 9.386806921968163e-06, + "loss": 0.7564, "step": 5790 }, { - "epoch": 0.4381975710340131, - "grad_norm": 2.7534801959991455, - "learning_rate": 1.1547966962221726e-05, - "loss": 0.7907, + "epoch": 0.4076733544526575, + "grad_norm": 1.6093369722366333, + "learning_rate": 9.385306403292154e-06, + "loss": 0.7889, "step": 5791 }, { - "epoch": 0.4382732397563467, - "grad_norm": 2.1715431213378906, - "learning_rate": 1.154580680758539e-05, - "loss": 0.7555, + "epoch": 0.4077437521999296, + "grad_norm": 2.1637468338012695, + "learning_rate": 9.383805760601907e-06, + "loss": 0.7796, "step": 5792 }, { - "epoch": 0.43834890847868035, - "grad_norm": 2.3663253784179688, - "learning_rate": 1.1543646493522395e-05, - "loss": 0.7672, + "epoch": 0.40781414994720167, + "grad_norm": 1.799615502357483, + "learning_rate": 9.382304993975436e-06, + "loss": 0.7907, "step": 5793 }, { - "epoch": 0.43842457720101397, - "grad_norm": 2.317469358444214, - "learning_rate": 1.1541486020168034e-05, - "loss": 0.8387, + "epoch": 0.4078845476944738, + "grad_norm": 1.8235608339309692, + "learning_rate": 9.380804103490773e-06, + "loss": 0.7245, "step": 5794 }, { - "epoch": 0.4385002459233476, - "grad_norm": 2.1923418045043945, - "learning_rate": 1.1539325387657593e-05, - "loss": 0.774, + "epoch": 0.4079549454417459, + "grad_norm": 1.6648019552230835, + "learning_rate": 9.37930308922595e-06, + "loss": 0.7449, "step": 5795 }, { - "epoch": 0.4385759146456812, - "grad_norm": 2.3220553398132324, - "learning_rate": 1.1537164596126386e-05, - "loss": 0.7811, + "epoch": 0.40802534318901795, + "grad_norm": 2.0128161907196045, + "learning_rate": 9.377801951259001e-06, + "loss": 0.7422, "step": 5796 }, { - "epoch": 0.43865158336801485, - "grad_norm": 2.2181203365325928, - "learning_rate": 1.1535003645709725e-05, - "loss": 0.616, + "epoch": 0.40809574093629003, + "grad_norm": 1.9637551307678223, + "learning_rate": 9.376300689667977e-06, + "loss": 0.6614, "step": 5797 }, { - "epoch": 0.43872725209034846, - "grad_norm": 2.3010993003845215, - "learning_rate": 1.1532842536542936e-05, - "loss": 0.615, + "epoch": 0.4081661386835621, + "grad_norm": 1.8333698511123657, + "learning_rate": 9.374799304530927e-06, + "loss": 0.678, "step": 5798 }, { - "epoch": 0.4388029208126821, - "grad_norm": 2.1476845741271973, - "learning_rate": 1.1530681268761356e-05, - "loss": 0.8901, + "epoch": 0.40823653643083424, + "grad_norm": 1.5768358707427979, + "learning_rate": 9.37329779592591e-06, + "loss": 0.7203, "step": 5799 }, { - "epoch": 0.4388785895350157, - "grad_norm": 1.7837895154953003, - "learning_rate": 1.1528519842500328e-05, - "loss": 0.8305, + "epoch": 0.4083069341781063, + "grad_norm": 2.104137897491455, + "learning_rate": 9.371796163930994e-06, + "loss": 0.7481, "step": 5800 }, { - "epoch": 0.43895425825734935, - "grad_norm": 2.1396026611328125, - "learning_rate": 1.1526358257895216e-05, - "loss": 0.8109, + "epoch": 0.4083773319253784, + "grad_norm": 1.641037106513977, + "learning_rate": 9.370294408624246e-06, + "loss": 0.6776, "step": 5801 }, { - "epoch": 0.43902992697968296, - "grad_norm": 2.0845377445220947, - "learning_rate": 1.1524196515081372e-05, - "loss": 0.8193, + "epoch": 0.4084477296726505, + "grad_norm": 1.7680681943893433, + "learning_rate": 9.368792530083744e-06, + "loss": 0.7862, "step": 5802 }, { - "epoch": 0.43910559570201657, - "grad_norm": 1.9465970993041992, - "learning_rate": 1.1522034614194178e-05, - "loss": 0.7111, + "epoch": 0.40851812741992255, + "grad_norm": 1.776811957359314, + "learning_rate": 9.367290528387574e-06, + "loss": 0.8379, "step": 5803 }, { - "epoch": 0.4391812644243502, - "grad_norm": 1.8505274057388306, - "learning_rate": 1.1519872555369022e-05, - "loss": 0.7731, + "epoch": 0.4085885251671946, + "grad_norm": 1.9183528423309326, + "learning_rate": 9.365788403613827e-06, + "loss": 0.6661, "step": 5804 }, { - "epoch": 0.4392569331466838, - "grad_norm": 2.0143327713012695, - "learning_rate": 1.1517710338741297e-05, - "loss": 0.5693, + "epoch": 0.40865892291446676, + "grad_norm": 1.7065305709838867, + "learning_rate": 9.364286155840596e-06, + "loss": 0.5923, "step": 5805 }, { - "epoch": 0.43933260186901746, - "grad_norm": 2.1907799243927, - "learning_rate": 1.1515547964446403e-05, - "loss": 0.7013, + "epoch": 0.40872932066173884, + "grad_norm": 1.817001461982727, + "learning_rate": 9.362783785145987e-06, + "loss": 0.696, "step": 5806 }, { - "epoch": 0.43940827059135107, - "grad_norm": 2.1983025074005127, - "learning_rate": 1.1513385432619763e-05, - "loss": 0.8154, + "epoch": 0.4087997184090109, + "grad_norm": 1.7162680625915527, + "learning_rate": 9.36128129160811e-06, + "loss": 0.7254, "step": 5807 }, { - "epoch": 0.4394839393136847, - "grad_norm": 1.9378697872161865, - "learning_rate": 1.1511222743396797e-05, - "loss": 0.6557, + "epoch": 0.408870116156283, + "grad_norm": 1.9084290266036987, + "learning_rate": 9.359778675305079e-06, + "loss": 0.7925, "step": 5808 }, { - "epoch": 0.4395596080360183, - "grad_norm": 1.7255088090896606, - "learning_rate": 1.150905989691294e-05, - "loss": 0.6812, + "epoch": 0.40894051390355507, + "grad_norm": 1.9344899654388428, + "learning_rate": 9.358275936315019e-06, + "loss": 0.7371, "step": 5809 }, { - "epoch": 0.43963527675835196, - "grad_norm": 1.7912418842315674, - "learning_rate": 1.1506896893303637e-05, - "loss": 0.642, + "epoch": 0.40901091165082715, + "grad_norm": 2.0015761852264404, + "learning_rate": 9.356773074716056e-06, + "loss": 0.84, "step": 5810 }, { - "epoch": 0.43971094548068557, - "grad_norm": 2.0049731731414795, - "learning_rate": 1.1504733732704342e-05, - "loss": 0.7161, + "epoch": 0.4090813093980993, + "grad_norm": 1.8801084756851196, + "learning_rate": 9.355270090586328e-06, + "loss": 0.6419, "step": 5811 }, { - "epoch": 0.4397866142030192, - "grad_norm": 1.776609182357788, - "learning_rate": 1.1502570415250522e-05, - "loss": 0.6908, + "epoch": 0.40915170714537136, + "grad_norm": 1.6894186735153198, + "learning_rate": 9.353766984003972e-06, + "loss": 0.6606, "step": 5812 }, { - "epoch": 0.4398622829253528, - "grad_norm": 1.9277169704437256, - "learning_rate": 1.1500406941077642e-05, - "loss": 0.7799, + "epoch": 0.40922210489264343, + "grad_norm": 2.00762677192688, + "learning_rate": 9.35226375504714e-06, + "loss": 0.78, "step": 5813 }, { - "epoch": 0.43993795164768645, - "grad_norm": 1.8909525871276855, - "learning_rate": 1.1498243310321198e-05, - "loss": 0.7749, + "epoch": 0.4092925026399155, + "grad_norm": 1.692471981048584, + "learning_rate": 9.350760403793987e-06, + "loss": 0.5739, "step": 5814 }, { - "epoch": 0.44001362037002006, - "grad_norm": 2.2037034034729004, - "learning_rate": 1.1496079523116677e-05, - "loss": 0.9047, + "epoch": 0.4093629003871876, + "grad_norm": 1.7218680381774902, + "learning_rate": 9.349256930322668e-06, + "loss": 0.7374, "step": 5815 }, { - "epoch": 0.4400892890923537, - "grad_norm": 1.9161611795425415, - "learning_rate": 1.1493915579599582e-05, - "loss": 0.6534, + "epoch": 0.4094332981344597, + "grad_norm": 1.7502589225769043, + "learning_rate": 9.347753334711358e-06, + "loss": 0.7216, "step": 5816 }, { - "epoch": 0.4401649578146873, - "grad_norm": 2.0024547576904297, - "learning_rate": 1.1491751479905425e-05, - "loss": 0.7846, + "epoch": 0.4095036958817318, + "grad_norm": 1.8518223762512207, + "learning_rate": 9.346249617038223e-06, + "loss": 0.748, "step": 5817 }, { - "epoch": 0.4402406265370209, - "grad_norm": 1.8573756217956543, - "learning_rate": 1.1489587224169733e-05, - "loss": 0.7107, + "epoch": 0.4095740936290039, + "grad_norm": 1.749830961227417, + "learning_rate": 9.344745777381445e-06, + "loss": 0.5684, "step": 5818 }, { - "epoch": 0.44031629525935456, - "grad_norm": 2.4334030151367188, - "learning_rate": 1.1487422812528037e-05, - "loss": 0.5948, + "epoch": 0.40964449137627595, + "grad_norm": 1.6912190914154053, + "learning_rate": 9.343241815819213e-06, + "loss": 0.6579, "step": 5819 }, { - "epoch": 0.4403919639816882, - "grad_norm": 1.6953381299972534, - "learning_rate": 1.1485258245115878e-05, - "loss": 0.761, + "epoch": 0.40971488912354803, + "grad_norm": 2.3810219764709473, + "learning_rate": 9.341737732429717e-06, + "loss": 0.697, "step": 5820 }, { - "epoch": 0.4404676327040218, - "grad_norm": 1.992057204246521, - "learning_rate": 1.148309352206881e-05, - "loss": 0.6255, + "epoch": 0.4097852868708201, + "grad_norm": 2.124039649963379, + "learning_rate": 9.340233527291152e-06, + "loss": 0.6091, "step": 5821 }, { - "epoch": 0.4405433014263554, - "grad_norm": 1.9691619873046875, - "learning_rate": 1.1480928643522396e-05, - "loss": 0.6193, + "epoch": 0.40985568461809224, + "grad_norm": 1.5721747875213623, + "learning_rate": 9.33872920048173e-06, + "loss": 0.7068, "step": 5822 }, { - "epoch": 0.44061897014868906, - "grad_norm": 2.430366277694702, - "learning_rate": 1.1478763609612204e-05, - "loss": 0.7825, + "epoch": 0.4099260823653643, + "grad_norm": 1.7351032495498657, + "learning_rate": 9.337224752079656e-06, + "loss": 0.8329, "step": 5823 }, { - "epoch": 0.44069463887102267, - "grad_norm": 1.9845337867736816, - "learning_rate": 1.1476598420473817e-05, - "loss": 0.5983, + "epoch": 0.4099964801126364, + "grad_norm": 1.5845797061920166, + "learning_rate": 9.335720182163152e-06, + "loss": 0.7711, "step": 5824 }, { - "epoch": 0.4407703075933563, - "grad_norm": 1.902275800704956, - "learning_rate": 1.147443307624283e-05, - "loss": 0.7193, + "epoch": 0.4100668778599085, + "grad_norm": 1.6640275716781616, + "learning_rate": 9.33421549081044e-06, + "loss": 0.6541, "step": 5825 }, { - "epoch": 0.4408459763156899, - "grad_norm": 2.0569605827331543, - "learning_rate": 1.1472267577054838e-05, - "loss": 0.8241, + "epoch": 0.41013727560718055, + "grad_norm": 1.7895370721817017, + "learning_rate": 9.332710678099751e-06, + "loss": 0.7219, "step": 5826 }, { - "epoch": 0.44092164503802356, - "grad_norm": 1.7399667501449585, - "learning_rate": 1.1470101923045453e-05, - "loss": 0.62, + "epoch": 0.4102076733544527, + "grad_norm": 1.7705739736557007, + "learning_rate": 9.33120574410932e-06, + "loss": 0.6369, "step": 5827 }, { - "epoch": 0.44099731376035717, - "grad_norm": 2.0543341636657715, - "learning_rate": 1.14679361143503e-05, - "loss": 0.6139, + "epoch": 0.41027807110172476, + "grad_norm": 1.7590675354003906, + "learning_rate": 9.329700688917393e-06, + "loss": 0.6526, "step": 5828 }, { - "epoch": 0.4410729824826908, - "grad_norm": 2.1195523738861084, - "learning_rate": 1.1465770151105e-05, - "loss": 0.7834, + "epoch": 0.41034846884899684, + "grad_norm": 2.0010604858398438, + "learning_rate": 9.328195512602216e-06, + "loss": 0.7459, "step": 5829 }, { - "epoch": 0.4411486512050244, - "grad_norm": 2.0110557079315186, - "learning_rate": 1.1463604033445203e-05, - "loss": 0.5876, + "epoch": 0.4104188665962689, + "grad_norm": 1.8349838256835938, + "learning_rate": 9.326690215242045e-06, + "loss": 0.7644, "step": 5830 }, { - "epoch": 0.441224319927358, - "grad_norm": 1.9073213338851929, - "learning_rate": 1.1461437761506548e-05, - "loss": 0.7183, + "epoch": 0.410489264343541, + "grad_norm": 2.194354295730591, + "learning_rate": 9.325184796915146e-06, + "loss": 0.7285, "step": 5831 }, { - "epoch": 0.44129998864969167, - "grad_norm": 1.7596300840377808, - "learning_rate": 1.1459271335424703e-05, - "loss": 0.7434, + "epoch": 0.41055966209081307, + "grad_norm": 1.7261016368865967, + "learning_rate": 9.32367925769978e-06, + "loss": 0.781, "step": 5832 }, { - "epoch": 0.4413756573720253, - "grad_norm": 2.102022647857666, - "learning_rate": 1.1457104755335332e-05, - "loss": 0.7105, + "epoch": 0.4106300598380852, + "grad_norm": 1.6133323907852173, + "learning_rate": 9.322173597674225e-06, + "loss": 0.6355, "step": 5833 }, { - "epoch": 0.4414513260943589, - "grad_norm": 1.682158350944519, - "learning_rate": 1.1454938021374112e-05, - "loss": 0.6027, + "epoch": 0.4107004575853573, + "grad_norm": 1.7576709985733032, + "learning_rate": 9.320667816916762e-06, + "loss": 0.7573, "step": 5834 }, { - "epoch": 0.4415269948166925, - "grad_norm": 2.463780641555786, - "learning_rate": 1.1452771133676736e-05, - "loss": 0.7158, + "epoch": 0.41077085533262936, + "grad_norm": 1.8653875589370728, + "learning_rate": 9.319161915505678e-06, + "loss": 0.89, "step": 5835 }, { - "epoch": 0.44160266353902616, - "grad_norm": 1.8532681465148926, - "learning_rate": 1.1450604092378895e-05, - "loss": 0.7358, + "epoch": 0.41084125307990144, + "grad_norm": 1.8187925815582275, + "learning_rate": 9.317655893519267e-06, + "loss": 0.8293, "step": 5836 }, { - "epoch": 0.4416783322613598, - "grad_norm": 2.06534743309021, - "learning_rate": 1.1448436897616304e-05, - "loss": 0.6122, + "epoch": 0.4109116508271735, + "grad_norm": 1.909471869468689, + "learning_rate": 9.316149751035825e-06, + "loss": 0.6844, "step": 5837 }, { - "epoch": 0.4417540009836934, - "grad_norm": 1.474158763885498, - "learning_rate": 1.144626954952467e-05, - "loss": 0.869, + "epoch": 0.4109820485744456, + "grad_norm": 1.5758250951766968, + "learning_rate": 9.314643488133661e-06, + "loss": 0.7509, "step": 5838 }, { - "epoch": 0.441829669706027, - "grad_norm": 2.5886144638061523, - "learning_rate": 1.1444102048239729e-05, - "loss": 0.8578, + "epoch": 0.4110524463217177, + "grad_norm": 1.7679216861724854, + "learning_rate": 9.313137104891085e-06, + "loss": 0.7468, "step": 5839 }, { - "epoch": 0.44190533842836066, - "grad_norm": 2.0663976669311523, - "learning_rate": 1.1441934393897208e-05, - "loss": 0.6776, + "epoch": 0.4111228440689898, + "grad_norm": 1.701890468597412, + "learning_rate": 9.311630601386418e-06, + "loss": 0.6896, "step": 5840 }, { - "epoch": 0.44198100715069427, - "grad_norm": 3.3373160362243652, - "learning_rate": 1.1439766586632861e-05, - "loss": 0.6511, + "epoch": 0.4111932418162619, + "grad_norm": 1.9473927021026611, + "learning_rate": 9.310123977697981e-06, + "loss": 0.8378, "step": 5841 }, { - "epoch": 0.4420566758730279, - "grad_norm": 1.776739239692688, - "learning_rate": 1.1437598626582438e-05, - "loss": 0.732, + "epoch": 0.41126363956353396, + "grad_norm": 1.8602027893066406, + "learning_rate": 9.308617233904105e-06, + "loss": 0.6679, "step": 5842 }, { - "epoch": 0.4421323445953615, - "grad_norm": 1.9327268600463867, - "learning_rate": 1.1435430513881705e-05, - "loss": 0.7917, + "epoch": 0.41133403731080603, + "grad_norm": 1.9628676176071167, + "learning_rate": 9.307110370083132e-06, + "loss": 0.6429, "step": 5843 }, { - "epoch": 0.4422080133176951, - "grad_norm": 2.4494521617889404, - "learning_rate": 1.1433262248666438e-05, - "loss": 0.7542, + "epoch": 0.41140443505807817, + "grad_norm": 1.5905227661132812, + "learning_rate": 9.305603386313398e-06, + "loss": 0.6552, "step": 5844 }, { - "epoch": 0.44228368204002877, - "grad_norm": 2.140435218811035, - "learning_rate": 1.1431093831072414e-05, - "loss": 0.5638, + "epoch": 0.41147483280535024, + "grad_norm": 2.368514060974121, + "learning_rate": 9.304096282673257e-06, + "loss": 0.7594, "step": 5845 }, { - "epoch": 0.4423593507623624, - "grad_norm": 2.0090110301971436, - "learning_rate": 1.1428925261235437e-05, - "loss": 0.6552, + "epoch": 0.4115452305526223, + "grad_norm": 1.785260558128357, + "learning_rate": 9.302589059241062e-06, + "loss": 0.7107, "step": 5846 }, { - "epoch": 0.442435019484696, - "grad_norm": 2.2307565212249756, - "learning_rate": 1.14267565392913e-05, - "loss": 0.9066, + "epoch": 0.4116156282998944, + "grad_norm": 1.6443688869476318, + "learning_rate": 9.301081716095178e-06, + "loss": 0.803, "step": 5847 }, { - "epoch": 0.4425106882070296, - "grad_norm": 1.7167367935180664, - "learning_rate": 1.142458766537582e-05, - "loss": 0.7067, + "epoch": 0.4116860260471665, + "grad_norm": 1.7285079956054688, + "learning_rate": 9.299574253313972e-06, + "loss": 0.6149, "step": 5848 }, { - "epoch": 0.44258635692936327, - "grad_norm": 1.934412956237793, - "learning_rate": 1.1422418639624818e-05, - "loss": 0.641, + "epoch": 0.41175642379443855, + "grad_norm": 1.6724612712860107, + "learning_rate": 9.298066670975818e-06, + "loss": 0.721, "step": 5849 }, { - "epoch": 0.4426620256516969, - "grad_norm": 1.84349524974823, - "learning_rate": 1.142024946217413e-05, - "loss": 0.8343, + "epoch": 0.4118268215417107, + "grad_norm": 1.829293131828308, + "learning_rate": 9.296558969159096e-06, + "loss": 0.6668, "step": 5850 }, { - "epoch": 0.4427376943740305, - "grad_norm": 1.82757568359375, - "learning_rate": 1.1418080133159588e-05, - "loss": 0.7165, + "epoch": 0.41189721928898276, + "grad_norm": 1.6967962980270386, + "learning_rate": 9.295051147942191e-06, + "loss": 0.7854, "step": 5851 }, { - "epoch": 0.4428133630963641, - "grad_norm": 1.902925968170166, - "learning_rate": 1.1415910652717046e-05, - "loss": 0.7806, + "epoch": 0.41196761703625484, + "grad_norm": 2.2284326553344727, + "learning_rate": 9.293543207403503e-06, + "loss": 0.7792, "step": 5852 }, { - "epoch": 0.44288903181869776, - "grad_norm": 2.100587844848633, - "learning_rate": 1.1413741020982369e-05, - "loss": 0.7068, + "epoch": 0.4120380147835269, + "grad_norm": 1.6074142456054688, + "learning_rate": 9.292035147621423e-06, + "loss": 0.7999, "step": 5853 }, { - "epoch": 0.4429647005410314, - "grad_norm": 2.090022325515747, - "learning_rate": 1.1411571238091419e-05, - "loss": 0.6824, + "epoch": 0.412108412530799, + "grad_norm": 1.6968129873275757, + "learning_rate": 9.29052696867436e-06, + "loss": 0.7158, "step": 5854 }, { - "epoch": 0.443040369263365, - "grad_norm": 1.6007441282272339, - "learning_rate": 1.1409401304180081e-05, - "loss": 0.7933, + "epoch": 0.41217881027807113, + "grad_norm": 1.7896620035171509, + "learning_rate": 9.289018670640728e-06, + "loss": 0.7796, "step": 5855 }, { - "epoch": 0.4431160379856986, - "grad_norm": 2.1241252422332764, - "learning_rate": 1.1407231219384238e-05, - "loss": 0.7396, + "epoch": 0.4122492080253432, + "grad_norm": 1.837199091911316, + "learning_rate": 9.28751025359894e-06, + "loss": 0.7851, "step": 5856 }, { - "epoch": 0.44319170670803226, - "grad_norm": 2.4907209873199463, - "learning_rate": 1.140506098383979e-05, - "loss": 0.8325, + "epoch": 0.4123196057726153, + "grad_norm": 1.5770806074142456, + "learning_rate": 9.286001717627421e-06, + "loss": 0.6915, "step": 5857 }, { - "epoch": 0.4432673754303659, - "grad_norm": 1.7543824911117554, - "learning_rate": 1.1402890597682648e-05, - "loss": 0.6119, + "epoch": 0.41239000351988736, + "grad_norm": 1.8756579160690308, + "learning_rate": 9.284493062804606e-06, + "loss": 0.7347, "step": 5858 }, { - "epoch": 0.4433430441526995, - "grad_norm": 2.4518723487854004, - "learning_rate": 1.1400720061048718e-05, - "loss": 0.8612, + "epoch": 0.41246040126715944, + "grad_norm": 1.7037806510925293, + "learning_rate": 9.282984289208926e-06, + "loss": 0.6999, "step": 5859 }, { - "epoch": 0.4434187128750331, - "grad_norm": 2.1455647945404053, - "learning_rate": 1.139854937407394e-05, - "loss": 0.7573, + "epoch": 0.4125307990144315, + "grad_norm": 1.6347579956054688, + "learning_rate": 9.281475396918823e-06, + "loss": 0.7065, "step": 5860 }, { - "epoch": 0.4434943815973667, - "grad_norm": 2.118077039718628, - "learning_rate": 1.1396378536894239e-05, - "loss": 0.6258, + "epoch": 0.41260119676170365, + "grad_norm": 1.9985543489456177, + "learning_rate": 9.279966386012751e-06, + "loss": 0.6959, "step": 5861 }, { - "epoch": 0.44357005031970037, - "grad_norm": 1.8771562576293945, - "learning_rate": 1.1394207549645564e-05, - "loss": 0.5765, + "epoch": 0.4126715945089757, + "grad_norm": 2.01326847076416, + "learning_rate": 9.278457256569161e-06, + "loss": 0.6603, "step": 5862 }, { - "epoch": 0.443645719042034, - "grad_norm": 2.192807912826538, - "learning_rate": 1.1392036412463868e-05, - "loss": 0.6963, + "epoch": 0.4127419922562478, + "grad_norm": 1.4518811702728271, + "learning_rate": 9.276948008666515e-06, + "loss": 0.6112, "step": 5863 }, { - "epoch": 0.4437213877643676, - "grad_norm": 3.553529739379883, - "learning_rate": 1.1389865125485116e-05, - "loss": 0.6827, + "epoch": 0.4128123900035199, + "grad_norm": 1.518276333808899, + "learning_rate": 9.27543864238328e-06, + "loss": 0.6956, "step": 5864 }, { - "epoch": 0.4437970564867012, - "grad_norm": 3.7018728256225586, - "learning_rate": 1.138769368884528e-05, - "loss": 0.6063, + "epoch": 0.41288278775079196, + "grad_norm": 1.9940561056137085, + "learning_rate": 9.273929157797927e-06, + "loss": 0.6012, "step": 5865 }, { - "epoch": 0.44387272520903487, - "grad_norm": 2.078188896179199, - "learning_rate": 1.138552210268034e-05, - "loss": 0.7483, + "epoch": 0.41295318549806403, + "grad_norm": 2.0651774406433105, + "learning_rate": 9.27241955498894e-06, + "loss": 0.6584, "step": 5866 }, { - "epoch": 0.4439483939313685, - "grad_norm": 2.0784387588500977, - "learning_rate": 1.1383350367126292e-05, - "loss": 0.7824, + "epoch": 0.41302358324533617, + "grad_norm": 1.6378350257873535, + "learning_rate": 9.2709098340348e-06, + "loss": 0.6374, "step": 5867 }, { - "epoch": 0.4440240626537021, - "grad_norm": 3.4652624130249023, - "learning_rate": 1.1381178482319136e-05, - "loss": 0.7509, + "epoch": 0.41309398099260825, + "grad_norm": 1.7147190570831299, + "learning_rate": 9.269399995014005e-06, + "loss": 0.7188, "step": 5868 }, { - "epoch": 0.4440997313760357, - "grad_norm": 1.8125836849212646, - "learning_rate": 1.1379006448394882e-05, - "loss": 0.6492, + "epoch": 0.4131643787398803, + "grad_norm": 2.0653791427612305, + "learning_rate": 9.267890038005046e-06, + "loss": 0.7582, "step": 5869 }, { - "epoch": 0.44417540009836937, - "grad_norm": 2.023577928543091, - "learning_rate": 1.1376834265489545e-05, - "loss": 0.6456, + "epoch": 0.4132347764871524, + "grad_norm": 1.9030970335006714, + "learning_rate": 9.26637996308643e-06, + "loss": 0.6886, "step": 5870 }, { - "epoch": 0.444251068820703, - "grad_norm": 2.251408100128174, - "learning_rate": 1.1374661933739165e-05, - "loss": 0.7316, + "epoch": 0.4133051742344245, + "grad_norm": 2.076634168624878, + "learning_rate": 9.264869770336667e-06, + "loss": 0.7335, "step": 5871 }, { - "epoch": 0.4443267375430366, - "grad_norm": 1.5530261993408203, - "learning_rate": 1.137248945327977e-05, - "loss": 0.9224, + "epoch": 0.4133755719816966, + "grad_norm": 2.021449089050293, + "learning_rate": 9.263359459834275e-06, + "loss": 0.741, "step": 5872 }, { - "epoch": 0.4444024062653702, - "grad_norm": 1.9940237998962402, - "learning_rate": 1.1370316824247414e-05, - "loss": 0.6529, + "epoch": 0.4134459697289687, + "grad_norm": 1.9878677129745483, + "learning_rate": 9.261849031657774e-06, + "loss": 0.8162, "step": 5873 }, { - "epoch": 0.4444780749877038, - "grad_norm": 2.0414655208587646, - "learning_rate": 1.1368144046778151e-05, - "loss": 0.6643, + "epoch": 0.41351636747624076, + "grad_norm": 1.7260687351226807, + "learning_rate": 9.260338485885696e-06, + "loss": 0.6415, "step": 5874 }, { - "epoch": 0.4445537437100375, - "grad_norm": 2.2049062252044678, - "learning_rate": 1.1365971121008047e-05, - "loss": 0.586, + "epoch": 0.41358676522351284, + "grad_norm": 2.24116849899292, + "learning_rate": 9.25882782259657e-06, + "loss": 0.7929, "step": 5875 }, { - "epoch": 0.4446294124323711, - "grad_norm": 3.8549551963806152, - "learning_rate": 1.1363798047073183e-05, - "loss": 0.9112, + "epoch": 0.4136571629707849, + "grad_norm": 1.5809831619262695, + "learning_rate": 9.257317041868939e-06, + "loss": 0.6217, "step": 5876 }, { - "epoch": 0.4447050811547047, - "grad_norm": 2.402311325073242, - "learning_rate": 1.1361624825109634e-05, - "loss": 0.7245, + "epoch": 0.413727560718057, + "grad_norm": 1.8305349349975586, + "learning_rate": 9.255806143781353e-06, + "loss": 0.689, "step": 5877 }, { - "epoch": 0.4447807498770383, - "grad_norm": 2.2628328800201416, - "learning_rate": 1.1359451455253505e-05, - "loss": 0.786, + "epoch": 0.41379795846532913, + "grad_norm": 1.717634916305542, + "learning_rate": 9.254295128412362e-06, + "loss": 0.7234, "step": 5878 }, { - "epoch": 0.44485641859937197, - "grad_norm": 2.2788891792297363, - "learning_rate": 1.1357277937640893e-05, - "loss": 0.751, + "epoch": 0.4138683562126012, + "grad_norm": 2.18241548538208, + "learning_rate": 9.252783995840523e-06, + "loss": 0.6636, "step": 5879 }, { - "epoch": 0.4449320873217056, - "grad_norm": 2.0168333053588867, - "learning_rate": 1.135510427240791e-05, - "loss": 0.7533, + "epoch": 0.4139387539598733, + "grad_norm": 1.6264292001724243, + "learning_rate": 9.251272746144407e-06, + "loss": 0.7907, "step": 5880 }, { - "epoch": 0.4450077560440392, - "grad_norm": 1.9024062156677246, - "learning_rate": 1.1352930459690684e-05, - "loss": 0.677, + "epoch": 0.41400915170714536, + "grad_norm": 1.794415831565857, + "learning_rate": 9.249761379402582e-06, + "loss": 0.7438, "step": 5881 }, { - "epoch": 0.4450834247663728, - "grad_norm": 1.9839564561843872, - "learning_rate": 1.135075649962534e-05, - "loss": 0.7093, + "epoch": 0.41407954945441744, + "grad_norm": 1.747355580329895, + "learning_rate": 9.248249895693624e-06, + "loss": 0.7072, "step": 5882 }, { - "epoch": 0.44515909348870647, - "grad_norm": 2.0501761436462402, - "learning_rate": 1.1348582392348022e-05, - "loss": 0.733, + "epoch": 0.41414994720168957, + "grad_norm": 1.6854356527328491, + "learning_rate": 9.246738295096116e-06, + "loss": 0.6312, "step": 5883 }, { - "epoch": 0.4452347622110401, - "grad_norm": 2.069188356399536, - "learning_rate": 1.1346408137994876e-05, - "loss": 0.7962, + "epoch": 0.41422034494896165, + "grad_norm": 1.9729335308074951, + "learning_rate": 9.245226577688652e-06, + "loss": 0.7487, "step": 5884 }, { - "epoch": 0.4453104309333737, - "grad_norm": 2.593379497528076, - "learning_rate": 1.1344233736702065e-05, - "loss": 0.6942, + "epoch": 0.4142907426962337, + "grad_norm": 2.06073260307312, + "learning_rate": 9.243714743549825e-06, + "loss": 0.7835, "step": 5885 }, { - "epoch": 0.4453860996557073, - "grad_norm": 2.6586804389953613, - "learning_rate": 1.1342059188605756e-05, - "loss": 0.7377, + "epoch": 0.4143611404435058, + "grad_norm": 1.9175682067871094, + "learning_rate": 9.242202792758235e-06, + "loss": 0.7067, "step": 5886 }, { - "epoch": 0.4454617683780409, - "grad_norm": 2.206529140472412, - "learning_rate": 1.1339884493842124e-05, - "loss": 0.6509, + "epoch": 0.4144315381907779, + "grad_norm": 2.0126290321350098, + "learning_rate": 9.240690725392493e-06, + "loss": 0.7501, "step": 5887 }, { - "epoch": 0.4455374371003746, - "grad_norm": 2.4398305416107178, - "learning_rate": 1.1337709652547357e-05, - "loss": 0.825, + "epoch": 0.41450193593804996, + "grad_norm": 1.7136175632476807, + "learning_rate": 9.23917854153121e-06, + "loss": 0.7689, "step": 5888 }, { - "epoch": 0.4456131058227082, - "grad_norm": 2.344985008239746, - "learning_rate": 1.1335534664857651e-05, - "loss": 0.675, + "epoch": 0.4145723336853221, + "grad_norm": 1.761779546737671, + "learning_rate": 9.237666241253005e-06, + "loss": 0.6554, "step": 5889 }, { - "epoch": 0.4456887745450418, - "grad_norm": 2.7695236206054688, - "learning_rate": 1.1333359530909208e-05, - "loss": 0.6979, + "epoch": 0.41464273143259417, + "grad_norm": 2.0988869667053223, + "learning_rate": 9.236153824636508e-06, + "loss": 0.6645, "step": 5890 }, { - "epoch": 0.4457644432673754, - "grad_norm": 3.5418498516082764, - "learning_rate": 1.1331184250838249e-05, - "loss": 0.6195, + "epoch": 0.41471312917986625, + "grad_norm": 1.7515437602996826, + "learning_rate": 9.23464129176035e-06, + "loss": 0.6615, "step": 5891 }, { - "epoch": 0.4458401119897091, - "grad_norm": 2.1728100776672363, - "learning_rate": 1.132900882478099e-05, - "loss": 0.7099, + "epoch": 0.4147835269271383, + "grad_norm": 1.7992063760757446, + "learning_rate": 9.233128642703164e-06, + "loss": 0.6882, "step": 5892 }, { - "epoch": 0.4459157807120427, - "grad_norm": 2.0681023597717285, - "learning_rate": 1.1326833252873663e-05, - "loss": 0.7016, + "epoch": 0.4148539246744104, + "grad_norm": 1.6499199867248535, + "learning_rate": 9.231615877543597e-06, + "loss": 0.7349, "step": 5893 }, { - "epoch": 0.4459914494343763, - "grad_norm": 2.0414974689483643, - "learning_rate": 1.1324657535252514e-05, - "loss": 0.6981, + "epoch": 0.4149243224216825, + "grad_norm": 1.6978577375411987, + "learning_rate": 9.230102996360301e-06, + "loss": 0.7738, "step": 5894 }, { - "epoch": 0.4460671181567099, - "grad_norm": 2.562387228012085, - "learning_rate": 1.1322481672053791e-05, - "loss": 0.6492, + "epoch": 0.4149947201689546, + "grad_norm": 1.6807516813278198, + "learning_rate": 9.22858999923193e-06, + "loss": 0.7187, "step": 5895 }, { - "epoch": 0.4461427868790436, - "grad_norm": 1.7492594718933105, - "learning_rate": 1.1320305663413752e-05, - "loss": 0.5471, + "epoch": 0.4150651179162267, + "grad_norm": 1.8575568199157715, + "learning_rate": 9.227076886237147e-06, + "loss": 0.7546, "step": 5896 }, { - "epoch": 0.4462184556013772, - "grad_norm": 2.4081857204437256, - "learning_rate": 1.1318129509468671e-05, - "loss": 0.7666, + "epoch": 0.41513551566349877, + "grad_norm": 1.6852152347564697, + "learning_rate": 9.225563657454621e-06, + "loss": 0.5608, "step": 5897 }, { - "epoch": 0.4462941243237108, - "grad_norm": 2.3385374546051025, - "learning_rate": 1.1315953210354821e-05, - "loss": 0.6716, + "epoch": 0.41520591341077084, + "grad_norm": 1.6787410974502563, + "learning_rate": 9.224050312963023e-06, + "loss": 0.6424, "step": 5898 }, { - "epoch": 0.4463697930460444, - "grad_norm": 2.440551280975342, - "learning_rate": 1.1313776766208492e-05, - "loss": 0.9059, + "epoch": 0.4152763111580429, + "grad_norm": 2.0938761234283447, + "learning_rate": 9.222536852841036e-06, + "loss": 0.7235, "step": 5899 }, { - "epoch": 0.446445461768378, - "grad_norm": 1.830227017402649, - "learning_rate": 1.1311600177165972e-05, - "loss": 0.6836, + "epoch": 0.41534670890531505, + "grad_norm": 1.657214879989624, + "learning_rate": 9.221023277167346e-06, + "loss": 0.7116, "step": 5900 }, { - "epoch": 0.4465211304907117, - "grad_norm": 1.9618531465530396, - "learning_rate": 1.130942344336358e-05, - "loss": 0.7531, + "epoch": 0.41541710665258713, + "grad_norm": 2.1542892456054688, + "learning_rate": 9.219509586020642e-06, + "loss": 0.8124, "step": 5901 }, { - "epoch": 0.4465967992130453, - "grad_norm": 1.9825726747512817, - "learning_rate": 1.1307246564937618e-05, - "loss": 0.8805, + "epoch": 0.4154875043998592, + "grad_norm": 1.6519473791122437, + "learning_rate": 9.217995779479624e-06, + "loss": 0.6792, "step": 5902 }, { - "epoch": 0.4466724679353789, - "grad_norm": 2.091987133026123, - "learning_rate": 1.1305069542024414e-05, - "loss": 0.7716, + "epoch": 0.4155579021471313, + "grad_norm": 1.7338699102401733, + "learning_rate": 9.216481857623e-06, + "loss": 0.8351, "step": 5903 }, { - "epoch": 0.4467481366577125, - "grad_norm": 1.93959641456604, - "learning_rate": 1.1302892374760301e-05, - "loss": 0.6985, + "epoch": 0.41562829989440336, + "grad_norm": 1.5467700958251953, + "learning_rate": 9.214967820529477e-06, + "loss": 0.5371, "step": 5904 }, { - "epoch": 0.4468238053800462, - "grad_norm": 2.1887693405151367, - "learning_rate": 1.130071506328162e-05, - "loss": 0.7668, + "epoch": 0.41569869764167544, + "grad_norm": 1.8631147146224976, + "learning_rate": 9.213453668277768e-06, + "loss": 0.7494, "step": 5905 }, { - "epoch": 0.4468994741023798, - "grad_norm": 1.8061445951461792, - "learning_rate": 1.1298537607724716e-05, - "loss": 0.6938, + "epoch": 0.4157690953889476, + "grad_norm": 1.7144228219985962, + "learning_rate": 9.211939400946599e-06, + "loss": 0.7349, "step": 5906 }, { - "epoch": 0.4469751428247134, - "grad_norm": 1.748567819595337, - "learning_rate": 1.1296360008225957e-05, - "loss": 0.6903, + "epoch": 0.41583949313621965, + "grad_norm": 1.6954641342163086, + "learning_rate": 9.210425018614697e-06, + "loss": 0.6924, "step": 5907 }, { - "epoch": 0.447050811547047, - "grad_norm": 1.7834432125091553, - "learning_rate": 1.1294182264921704e-05, - "loss": 0.6602, + "epoch": 0.41590989088349173, + "grad_norm": 1.9504185914993286, + "learning_rate": 9.208910521360798e-06, + "loss": 0.7737, "step": 5908 }, { - "epoch": 0.4471264802693807, - "grad_norm": 1.9683499336242676, - "learning_rate": 1.1292004377948338e-05, - "loss": 0.7615, + "epoch": 0.4159802886307638, + "grad_norm": 1.6038826704025269, + "learning_rate": 9.20739590926364e-06, + "loss": 0.8121, "step": 5909 }, { - "epoch": 0.4472021489917143, - "grad_norm": 2.5268006324768066, - "learning_rate": 1.1289826347442247e-05, - "loss": 0.6007, + "epoch": 0.4160506863780359, + "grad_norm": 3.168085813522339, + "learning_rate": 9.205881182401968e-06, + "loss": 0.7033, "step": 5910 }, { - "epoch": 0.4472778177140479, - "grad_norm": 2.608851671218872, - "learning_rate": 1.1287648173539822e-05, - "loss": 0.7841, + "epoch": 0.416121084125308, + "grad_norm": 1.8201075792312622, + "learning_rate": 9.204366340854537e-06, + "loss": 0.6996, "step": 5911 }, { - "epoch": 0.4473534864363815, - "grad_norm": 2.4634876251220703, - "learning_rate": 1.128546985637747e-05, - "loss": 0.6308, + "epoch": 0.4161914818725801, + "grad_norm": 1.6835488080978394, + "learning_rate": 9.202851384700105e-06, + "loss": 0.5643, "step": 5912 }, { - "epoch": 0.4474291551587151, - "grad_norm": 1.7287302017211914, - "learning_rate": 1.1283291396091601e-05, - "loss": 0.6105, + "epoch": 0.41626187961985217, + "grad_norm": 1.5713342428207397, + "learning_rate": 9.20133631401743e-06, + "loss": 0.6248, "step": 5913 }, { - "epoch": 0.4475048238810488, - "grad_norm": 1.982318639755249, - "learning_rate": 1.1281112792818641e-05, - "loss": 0.7053, + "epoch": 0.41633227736712425, + "grad_norm": 1.5994939804077148, + "learning_rate": 9.199821128885291e-06, + "loss": 0.7044, "step": 5914 }, { - "epoch": 0.4475804926033824, - "grad_norm": 1.8996347188949585, - "learning_rate": 1.1278934046695023e-05, - "loss": 0.7603, + "epoch": 0.4164026751143963, + "grad_norm": 1.5554718971252441, + "learning_rate": 9.198305829382456e-06, + "loss": 0.6726, "step": 5915 }, { - "epoch": 0.447656161325716, - "grad_norm": 2.161860942840576, - "learning_rate": 1.1276755157857179e-05, - "loss": 0.7217, + "epoch": 0.4164730728616684, + "grad_norm": 2.0141658782958984, + "learning_rate": 9.196790415587712e-06, + "loss": 0.7387, "step": 5916 }, { - "epoch": 0.4477318300480496, - "grad_norm": 2.7637171745300293, - "learning_rate": 1.1274576126441568e-05, - "loss": 0.7831, + "epoch": 0.41654347060894054, + "grad_norm": 1.692787766456604, + "learning_rate": 9.195274887579844e-06, + "loss": 0.7238, "step": 5917 }, { - "epoch": 0.4478074987703833, - "grad_norm": 1.9695764780044556, - "learning_rate": 1.127239695258464e-05, - "loss": 0.6614, + "epoch": 0.4166138683562126, + "grad_norm": 2.2205770015716553, + "learning_rate": 9.193759245437649e-06, + "loss": 0.6621, "step": 5918 }, { - "epoch": 0.4478831674927169, - "grad_norm": 2.0457887649536133, - "learning_rate": 1.1270217636422864e-05, - "loss": 0.7391, + "epoch": 0.4166842661034847, + "grad_norm": 1.5265004634857178, + "learning_rate": 9.19224348923992e-06, + "loss": 0.5953, "step": 5919 }, { - "epoch": 0.4479588362150505, - "grad_norm": 1.87351393699646, - "learning_rate": 1.1268038178092718e-05, - "loss": 0.8303, + "epoch": 0.41675466385075677, + "grad_norm": 1.6776925325393677, + "learning_rate": 9.190727619065467e-06, + "loss": 0.695, "step": 5920 }, { - "epoch": 0.4480345049373841, - "grad_norm": 2.1492748260498047, - "learning_rate": 1.1265858577730685e-05, - "loss": 0.6984, + "epoch": 0.41682506159802885, + "grad_norm": 1.5745277404785156, + "learning_rate": 9.189211634993104e-06, + "loss": 0.7163, "step": 5921 }, { - "epoch": 0.4481101736597178, - "grad_norm": 2.0137827396392822, - "learning_rate": 1.1263678835473263e-05, - "loss": 0.7522, + "epoch": 0.4168954593453009, + "grad_norm": 2.0779683589935303, + "learning_rate": 9.187695537101643e-06, + "loss": 0.7455, "step": 5922 }, { - "epoch": 0.4481858423820514, - "grad_norm": 2.2012124061584473, - "learning_rate": 1.1261498951456948e-05, - "loss": 0.6075, + "epoch": 0.41696585709257306, + "grad_norm": 1.7018486261367798, + "learning_rate": 9.18617932546991e-06, + "loss": 0.686, "step": 5923 }, { - "epoch": 0.448261511104385, - "grad_norm": 2.0582940578460693, - "learning_rate": 1.1259318925818253e-05, - "loss": 0.6671, + "epoch": 0.41703625483984513, + "grad_norm": 1.8314191102981567, + "learning_rate": 9.184663000176736e-06, + "loss": 0.6973, "step": 5924 }, { - "epoch": 0.4483371798267186, - "grad_norm": 2.405733823776245, - "learning_rate": 1.1257138758693701e-05, - "loss": 0.8391, + "epoch": 0.4171066525871172, + "grad_norm": 1.7758718729019165, + "learning_rate": 9.183146561300953e-06, + "loss": 0.6588, "step": 5925 }, { - "epoch": 0.4484128485490522, - "grad_norm": 3.624671697616577, - "learning_rate": 1.1254958450219817e-05, - "loss": 0.6537, + "epoch": 0.4171770503343893, + "grad_norm": 1.9741671085357666, + "learning_rate": 9.1816300089214e-06, + "loss": 0.6741, "step": 5926 }, { - "epoch": 0.4484885172713859, - "grad_norm": 2.217015504837036, - "learning_rate": 1.1252778000533143e-05, - "loss": 0.6828, + "epoch": 0.41724744808166137, + "grad_norm": 1.8304142951965332, + "learning_rate": 9.180113343116932e-06, + "loss": 0.706, "step": 5927 }, { - "epoch": 0.4485641859937195, - "grad_norm": 2.174923896789551, - "learning_rate": 1.1250597409770225e-05, - "loss": 0.7816, + "epoch": 0.4173178458289335, + "grad_norm": 2.0238025188446045, + "learning_rate": 9.178596563966393e-06, + "loss": 0.7455, "step": 5928 }, { - "epoch": 0.4486398547160531, - "grad_norm": 1.7993848323822021, - "learning_rate": 1.1248416678067619e-05, - "loss": 0.6842, + "epoch": 0.4173882435762056, + "grad_norm": 1.7662196159362793, + "learning_rate": 9.177079671548646e-06, + "loss": 0.699, "step": 5929 }, { - "epoch": 0.4487155234383867, - "grad_norm": 1.861826777458191, - "learning_rate": 1.1246235805561887e-05, - "loss": 0.8071, + "epoch": 0.41745864132347765, + "grad_norm": 1.6563290357589722, + "learning_rate": 9.175562665942558e-06, + "loss": 0.7385, "step": 5930 }, { - "epoch": 0.4487911921607204, - "grad_norm": 2.257115125656128, - "learning_rate": 1.1244054792389602e-05, - "loss": 0.7332, + "epoch": 0.41752903907074973, + "grad_norm": 1.796033263206482, + "learning_rate": 9.174045547226995e-06, + "loss": 0.6337, "step": 5931 }, { - "epoch": 0.448866860883054, - "grad_norm": 2.5872914791107178, - "learning_rate": 1.1241873638687348e-05, - "loss": 0.6017, + "epoch": 0.4175994368180218, + "grad_norm": 1.83510160446167, + "learning_rate": 9.172528315480834e-06, + "loss": 0.7263, "step": 5932 }, { - "epoch": 0.4489425296053876, - "grad_norm": 1.890411138534546, - "learning_rate": 1.1239692344591719e-05, - "loss": 0.6682, + "epoch": 0.4176698345652939, + "grad_norm": 1.7271041870117188, + "learning_rate": 9.17101097078296e-06, + "loss": 0.6868, "step": 5933 }, { - "epoch": 0.4490181983277212, - "grad_norm": 3.7354846000671387, - "learning_rate": 1.1237510910239306e-05, - "loss": 0.7684, + "epoch": 0.417740232312566, + "grad_norm": 1.6123604774475098, + "learning_rate": 9.169493513212257e-06, + "loss": 0.737, "step": 5934 }, { - "epoch": 0.4490938670500549, - "grad_norm": 1.9048963785171509, - "learning_rate": 1.1235329335766728e-05, - "loss": 0.524, + "epoch": 0.4178106300598381, + "grad_norm": 1.5335601568222046, + "learning_rate": 9.167975942847623e-06, + "loss": 0.6025, "step": 5935 }, { - "epoch": 0.4491695357723885, - "grad_norm": 1.8189629316329956, - "learning_rate": 1.1233147621310594e-05, - "loss": 0.6492, + "epoch": 0.4178810278071102, + "grad_norm": 1.5864094495773315, + "learning_rate": 9.166458259767957e-06, + "loss": 0.7115, "step": 5936 }, { - "epoch": 0.4492452044947221, - "grad_norm": 1.712294101715088, - "learning_rate": 1.1230965767007535e-05, - "loss": 0.7487, + "epoch": 0.41795142555438225, + "grad_norm": 1.354303240776062, + "learning_rate": 9.164940464052164e-06, + "loss": 0.59, "step": 5937 }, { - "epoch": 0.4493208732170557, - "grad_norm": 2.2259769439697266, - "learning_rate": 1.1228783772994184e-05, - "loss": 0.7662, + "epoch": 0.4180218233016543, + "grad_norm": 1.6473647356033325, + "learning_rate": 9.163422555779153e-06, + "loss": 0.7264, "step": 5938 }, { - "epoch": 0.4493965419393893, - "grad_norm": 3.1922950744628906, - "learning_rate": 1.122660163940718e-05, - "loss": 0.8065, + "epoch": 0.41809222104892646, + "grad_norm": 1.788986086845398, + "learning_rate": 9.161904535027848e-06, + "loss": 0.8376, "step": 5939 }, { - "epoch": 0.449472210661723, - "grad_norm": 2.1241049766540527, - "learning_rate": 1.1224419366383186e-05, - "loss": 0.6927, + "epoch": 0.41816261879619854, + "grad_norm": 8.183385848999023, + "learning_rate": 9.160386401877165e-06, + "loss": 0.5682, "step": 5940 }, { - "epoch": 0.4495478793840566, - "grad_norm": 2.3622326850891113, - "learning_rate": 1.1222236954058853e-05, - "loss": 0.8493, + "epoch": 0.4182330165434706, + "grad_norm": 2.1290781497955322, + "learning_rate": 9.158868156406039e-06, + "loss": 0.7029, "step": 5941 }, { - "epoch": 0.4496235481063902, - "grad_norm": 3.5606555938720703, - "learning_rate": 1.1220054402570854e-05, - "loss": 0.773, + "epoch": 0.4183034142907427, + "grad_norm": 2.003115177154541, + "learning_rate": 9.157349798693402e-06, + "loss": 0.6828, "step": 5942 }, { - "epoch": 0.4496992168287238, - "grad_norm": 2.492074966430664, - "learning_rate": 1.1217871712055869e-05, - "loss": 0.6058, + "epoch": 0.41837381203801477, + "grad_norm": 1.631263017654419, + "learning_rate": 9.155831328818193e-06, + "loss": 0.6404, "step": 5943 }, { - "epoch": 0.4497748855510575, - "grad_norm": 2.104963779449463, - "learning_rate": 1.1215688882650582e-05, - "loss": 0.7597, + "epoch": 0.41844420978528685, + "grad_norm": 1.727770447731018, + "learning_rate": 9.154312746859362e-06, + "loss": 0.687, "step": 5944 }, { - "epoch": 0.4498505542733911, - "grad_norm": 1.9802522659301758, - "learning_rate": 1.1213505914491695e-05, - "loss": 0.7904, + "epoch": 0.418514607532559, + "grad_norm": 1.6331084966659546, + "learning_rate": 9.152794052895861e-06, + "loss": 0.6774, "step": 5945 }, { - "epoch": 0.4499262229957247, - "grad_norm": 1.8964923620224, - "learning_rate": 1.1211322807715906e-05, - "loss": 0.7552, + "epoch": 0.41858500527983106, + "grad_norm": 1.8184255361557007, + "learning_rate": 9.151275247006647e-06, + "loss": 0.6851, "step": 5946 }, { - "epoch": 0.4500018917180583, - "grad_norm": 3.3813583850860596, - "learning_rate": 1.1209139562459929e-05, - "loss": 0.6773, + "epoch": 0.41865540302710313, + "grad_norm": 1.7146469354629517, + "learning_rate": 9.149756329270683e-06, + "loss": 0.682, "step": 5947 }, { - "epoch": 0.450077560440392, - "grad_norm": 2.5931599140167236, - "learning_rate": 1.120695617886049e-05, - "loss": 0.6421, + "epoch": 0.4187258007743752, + "grad_norm": 1.820462703704834, + "learning_rate": 9.148237299766943e-06, + "loss": 0.7062, "step": 5948 }, { - "epoch": 0.4501532291627256, - "grad_norm": 2.148244857788086, - "learning_rate": 1.1204772657054314e-05, - "loss": 0.8242, + "epoch": 0.4187961985216473, + "grad_norm": 1.920345425605774, + "learning_rate": 9.1467181585744e-06, + "loss": 0.6719, "step": 5949 }, { - "epoch": 0.4502288978850592, - "grad_norm": 1.9248651266098022, - "learning_rate": 1.1202588997178144e-05, - "loss": 0.737, + "epoch": 0.4188665962689194, + "grad_norm": 1.9192575216293335, + "learning_rate": 9.145198905772034e-06, + "loss": 0.7031, "step": 5950 }, { - "epoch": 0.4503045666073928, - "grad_norm": 2.1882691383361816, - "learning_rate": 1.1200405199368729e-05, - "loss": 0.641, + "epoch": 0.4189369940161915, + "grad_norm": 1.585629940032959, + "learning_rate": 9.143679541438836e-06, + "loss": 0.7008, "step": 5951 }, { - "epoch": 0.45038023532972643, - "grad_norm": 2.8311820030212402, - "learning_rate": 1.119822126376282e-05, - "loss": 0.6549, + "epoch": 0.4190073917634636, + "grad_norm": 1.893792748451233, + "learning_rate": 9.142160065653796e-06, + "loss": 0.6447, "step": 5952 }, { - "epoch": 0.4504559040520601, - "grad_norm": 2.2649013996124268, - "learning_rate": 1.1196037190497188e-05, - "loss": 0.7611, + "epoch": 0.41907778951073565, + "grad_norm": 1.7490506172180176, + "learning_rate": 9.140640478495913e-06, + "loss": 0.6969, "step": 5953 }, { - "epoch": 0.4505315727743937, - "grad_norm": 1.7124543190002441, - "learning_rate": 1.1193852979708604e-05, - "loss": 0.7877, + "epoch": 0.41914818725800773, + "grad_norm": 1.970916509628296, + "learning_rate": 9.139120780044196e-06, + "loss": 0.8116, "step": 5954 }, { - "epoch": 0.4506072414967273, - "grad_norm": 2.419224739074707, - "learning_rate": 1.119166863153385e-05, - "loss": 0.7871, + "epoch": 0.4192185850052798, + "grad_norm": 1.7748239040374756, + "learning_rate": 9.13760097037765e-06, + "loss": 0.5927, "step": 5955 }, { - "epoch": 0.45068291021906093, - "grad_norm": 2.265690565109253, - "learning_rate": 1.1189484146109719e-05, - "loss": 0.5847, + "epoch": 0.41928898275255194, + "grad_norm": 1.6638107299804688, + "learning_rate": 9.136081049575294e-06, + "loss": 0.7526, "step": 5956 }, { - "epoch": 0.4507585789413946, - "grad_norm": 2.1658334732055664, - "learning_rate": 1.1187299523573007e-05, - "loss": 0.6962, + "epoch": 0.419359380499824, + "grad_norm": 2.207810878753662, + "learning_rate": 9.13456101771615e-06, + "loss": 0.8264, "step": 5957 }, { - "epoch": 0.4508342476637282, - "grad_norm": 2.0252439975738525, - "learning_rate": 1.1185114764060528e-05, - "loss": 0.7378, + "epoch": 0.4194297782470961, + "grad_norm": 1.6681687831878662, + "learning_rate": 9.133040874879245e-06, + "loss": 0.7401, "step": 5958 }, { - "epoch": 0.4509099163860618, - "grad_norm": 1.994943380355835, - "learning_rate": 1.118292986770909e-05, - "loss": 0.6885, + "epoch": 0.4195001759943682, + "grad_norm": 1.7101582288742065, + "learning_rate": 9.131520621143614e-06, + "loss": 0.7035, "step": 5959 }, { - "epoch": 0.4509855851083954, - "grad_norm": 2.032151699066162, - "learning_rate": 1.1180744834655526e-05, - "loss": 0.7695, + "epoch": 0.41957057374164025, + "grad_norm": 2.0611424446105957, + "learning_rate": 9.130000256588295e-06, + "loss": 0.6641, "step": 5960 }, { - "epoch": 0.4510612538307291, - "grad_norm": 1.8477638959884644, - "learning_rate": 1.1178559665036666e-05, - "loss": 0.8245, + "epoch": 0.41964097148891233, + "grad_norm": 1.9964933395385742, + "learning_rate": 9.128479781292333e-06, + "loss": 0.7333, "step": 5961 }, { - "epoch": 0.4511369225530627, - "grad_norm": 1.867470145225525, - "learning_rate": 1.1176374358989354e-05, - "loss": 0.6492, + "epoch": 0.41971136923618446, + "grad_norm": 2.1196279525756836, + "learning_rate": 9.12695919533478e-06, + "loss": 0.7799, "step": 5962 }, { - "epoch": 0.4512125912753963, - "grad_norm": 2.083955764770508, - "learning_rate": 1.117418891665044e-05, - "loss": 0.6438, + "epoch": 0.41978176698345654, + "grad_norm": 1.9974027872085571, + "learning_rate": 9.125438498794694e-06, + "loss": 0.7039, "step": 5963 }, { - "epoch": 0.4512882599977299, - "grad_norm": 2.1489977836608887, - "learning_rate": 1.1172003338156787e-05, - "loss": 0.6843, + "epoch": 0.4198521647307286, + "grad_norm": 2.0838675498962402, + "learning_rate": 9.123917691751131e-06, + "loss": 0.7389, "step": 5964 }, { - "epoch": 0.45136392872006353, - "grad_norm": 1.905900478363037, - "learning_rate": 1.1169817623645256e-05, - "loss": 0.6142, + "epoch": 0.4199225624780007, + "grad_norm": 1.790669560432434, + "learning_rate": 9.122396774283168e-06, + "loss": 0.7777, "step": 5965 }, { - "epoch": 0.4514395974423972, - "grad_norm": 2.060368537902832, - "learning_rate": 1.116763177325273e-05, - "loss": 0.76, + "epoch": 0.41999296022527277, + "grad_norm": 2.3371360301971436, + "learning_rate": 9.120875746469873e-06, + "loss": 0.6864, "step": 5966 }, { - "epoch": 0.4515152661647308, - "grad_norm": 2.1221015453338623, - "learning_rate": 1.1165445787116088e-05, - "loss": 0.7409, + "epoch": 0.4200633579725449, + "grad_norm": 1.6986582279205322, + "learning_rate": 9.119354608390327e-06, + "loss": 0.6618, "step": 5967 }, { - "epoch": 0.4515909348870644, - "grad_norm": 1.9896661043167114, - "learning_rate": 1.116325966537223e-05, - "loss": 0.865, + "epoch": 0.420133755719817, + "grad_norm": 1.8604509830474854, + "learning_rate": 9.117833360123614e-06, + "loss": 0.6859, "step": 5968 }, { - "epoch": 0.45166660360939803, - "grad_norm": 1.9330137968063354, - "learning_rate": 1.1161073408158054e-05, - "loss": 0.8041, + "epoch": 0.42020415346708906, + "grad_norm": 1.643764853477478, + "learning_rate": 9.11631200174883e-06, + "loss": 0.774, "step": 5969 }, { - "epoch": 0.4517422723317317, - "grad_norm": 1.8360910415649414, - "learning_rate": 1.115888701561047e-05, - "loss": 0.7317, + "epoch": 0.42027455121436114, + "grad_norm": 1.7667086124420166, + "learning_rate": 9.114790533345067e-06, + "loss": 0.7111, "step": 5970 }, { - "epoch": 0.4518179410540653, - "grad_norm": 2.239154815673828, - "learning_rate": 1.11567004878664e-05, - "loss": 0.765, + "epoch": 0.4203449489616332, + "grad_norm": 1.5120468139648438, + "learning_rate": 9.113268954991427e-06, + "loss": 0.6617, "step": 5971 }, { - "epoch": 0.4518936097763989, - "grad_norm": 2.8562796115875244, - "learning_rate": 1.115451382506277e-05, - "loss": 0.9, + "epoch": 0.4204153467089053, + "grad_norm": 1.6778124570846558, + "learning_rate": 9.111747266767024e-06, + "loss": 0.6336, "step": 5972 }, { - "epoch": 0.45196927849873253, - "grad_norm": 1.8659065961837769, - "learning_rate": 1.1152327027336513e-05, - "loss": 0.6336, + "epoch": 0.4204857444561774, + "grad_norm": 1.7377848625183105, + "learning_rate": 9.110225468750963e-06, + "loss": 0.6751, "step": 5973 }, { - "epoch": 0.4520449472210662, - "grad_norm": 2.5955421924591064, - "learning_rate": 1.1150140094824579e-05, - "loss": 0.6623, + "epoch": 0.4205561422034495, + "grad_norm": 1.8581351041793823, + "learning_rate": 9.108703561022372e-06, + "loss": 0.6995, "step": 5974 }, { - "epoch": 0.4521206159433998, - "grad_norm": 1.7861441373825073, - "learning_rate": 1.1147953027663919e-05, - "loss": 0.6716, + "epoch": 0.4206265399507216, + "grad_norm": 1.9778764247894287, + "learning_rate": 9.107181543660373e-06, + "loss": 0.7295, "step": 5975 }, { - "epoch": 0.4521962846657334, - "grad_norm": 1.989698052406311, - "learning_rate": 1.114576582599149e-05, - "loss": 0.5853, + "epoch": 0.42069693769799366, + "grad_norm": 1.6007062196731567, + "learning_rate": 9.105659416744099e-06, + "loss": 0.7611, "step": 5976 }, { - "epoch": 0.452271953388067, - "grad_norm": 1.9923795461654663, - "learning_rate": 1.1143578489944266e-05, - "loss": 0.7264, + "epoch": 0.42076733544526573, + "grad_norm": 1.6757190227508545, + "learning_rate": 9.10413718035268e-06, + "loss": 0.7581, "step": 5977 }, { - "epoch": 0.4523476221104007, - "grad_norm": 2.052943229675293, - "learning_rate": 1.1141391019659223e-05, - "loss": 0.6532, + "epoch": 0.42083773319253787, + "grad_norm": 1.6099828481674194, + "learning_rate": 9.102614834565268e-06, + "loss": 0.6772, "step": 5978 }, { - "epoch": 0.4524232908327343, - "grad_norm": 1.8937102556228638, - "learning_rate": 1.113920341527335e-05, - "loss": 0.6145, + "epoch": 0.42090813093980994, + "grad_norm": 2.1886744499206543, + "learning_rate": 9.101092379461004e-06, + "loss": 0.7257, "step": 5979 }, { - "epoch": 0.4524989595550679, - "grad_norm": 1.7291990518569946, - "learning_rate": 1.1137015676923637e-05, - "loss": 0.7514, + "epoch": 0.420978528687082, + "grad_norm": 1.910719871520996, + "learning_rate": 9.099569815119045e-06, + "loss": 0.6957, "step": 5980 }, { - "epoch": 0.4525746282774015, - "grad_norm": 1.8814363479614258, - "learning_rate": 1.1134827804747093e-05, - "loss": 0.7184, + "epoch": 0.4210489264343541, + "grad_norm": 1.7987372875213623, + "learning_rate": 9.09804714161855e-06, + "loss": 0.6628, "step": 5981 }, { - "epoch": 0.45265029699973514, - "grad_norm": 2.006896495819092, - "learning_rate": 1.1132639798880728e-05, - "loss": 0.6344, + "epoch": 0.4211193241816262, + "grad_norm": 1.5422509908676147, + "learning_rate": 9.096524359038685e-06, + "loss": 0.7436, "step": 5982 }, { - "epoch": 0.4527259657220688, - "grad_norm": 2.146019458770752, - "learning_rate": 1.1130451659461559e-05, - "loss": 0.6921, + "epoch": 0.42118972192889825, + "grad_norm": 2.2252793312072754, + "learning_rate": 9.095001467458616e-06, + "loss": 0.7099, "step": 5983 }, { - "epoch": 0.4528016344444024, - "grad_norm": 10.404562950134277, - "learning_rate": 1.1128263386626617e-05, - "loss": 0.5599, + "epoch": 0.4212601196761704, + "grad_norm": 1.8518147468566895, + "learning_rate": 9.093478466957526e-06, + "loss": 0.7263, "step": 5984 }, { - "epoch": 0.452877303166736, - "grad_norm": 1.8004459142684937, - "learning_rate": 1.1126074980512936e-05, - "loss": 0.6701, + "epoch": 0.42133051742344246, + "grad_norm": 1.963258981704712, + "learning_rate": 9.091955357614594e-06, + "loss": 0.645, "step": 5985 }, { - "epoch": 0.45295297188906963, - "grad_norm": 2.264495611190796, - "learning_rate": 1.1123886441257567e-05, - "loss": 0.7605, + "epoch": 0.42140091517071454, + "grad_norm": 1.5187724828720093, + "learning_rate": 9.09043213950901e-06, + "loss": 0.7178, "step": 5986 }, { - "epoch": 0.4530286406114033, - "grad_norm": 1.9421061277389526, - "learning_rate": 1.1121697768997556e-05, - "loss": 0.7667, + "epoch": 0.4214713129179866, + "grad_norm": 1.5758106708526611, + "learning_rate": 9.088908812719965e-06, + "loss": 0.5517, "step": 5987 }, { - "epoch": 0.4531043093337369, - "grad_norm": 3.031816244125366, - "learning_rate": 1.1119508963869971e-05, - "loss": 0.5885, + "epoch": 0.4215417106652587, + "grad_norm": 1.803167700767517, + "learning_rate": 9.087385377326658e-06, + "loss": 0.6678, "step": 5988 }, { - "epoch": 0.4531799780560705, - "grad_norm": 3.6335830688476562, - "learning_rate": 1.1117320026011878e-05, - "loss": 0.6176, + "epoch": 0.4216121084125308, + "grad_norm": 1.631888747215271, + "learning_rate": 9.085861833408296e-06, + "loss": 0.7368, "step": 5989 }, { - "epoch": 0.45325564677840413, - "grad_norm": 2.454843282699585, - "learning_rate": 1.1115130955560357e-05, - "loss": 0.7809, + "epoch": 0.4216825061598029, + "grad_norm": 1.5951452255249023, + "learning_rate": 9.084338181044088e-06, + "loss": 0.6377, "step": 5990 }, { - "epoch": 0.4533313155007378, - "grad_norm": 1.9949727058410645, - "learning_rate": 1.1112941752652495e-05, - "loss": 0.7147, + "epoch": 0.421752903907075, + "grad_norm": 1.9890165328979492, + "learning_rate": 9.08281442031325e-06, + "loss": 0.8212, "step": 5991 }, { - "epoch": 0.4534069842230714, - "grad_norm": 1.9766342639923096, - "learning_rate": 1.1110752417425386e-05, - "loss": 0.8628, + "epoch": 0.42182330165434706, + "grad_norm": 1.9781975746154785, + "learning_rate": 9.081290551295002e-06, + "loss": 0.7134, "step": 5992 }, { - "epoch": 0.453482652945405, - "grad_norm": 2.25211763381958, - "learning_rate": 1.1108562950016133e-05, - "loss": 0.7544, + "epoch": 0.42189369940161914, + "grad_norm": 1.4421428442001343, + "learning_rate": 9.079766574068577e-06, + "loss": 0.5738, "step": 5993 }, { - "epoch": 0.45355832166773863, - "grad_norm": 2.635415554046631, - "learning_rate": 1.1106373350561848e-05, - "loss": 0.8409, + "epoch": 0.4219640971488912, + "grad_norm": 2.0059127807617188, + "learning_rate": 9.078242488713203e-06, + "loss": 0.7876, "step": 5994 }, { - "epoch": 0.45363399039007224, - "grad_norm": 2.761585235595703, - "learning_rate": 1.110418361919965e-05, - "loss": 0.8435, + "epoch": 0.42203449489616335, + "grad_norm": 1.593414068222046, + "learning_rate": 9.076718295308117e-06, + "loss": 0.7179, "step": 5995 }, { - "epoch": 0.4537096591124059, - "grad_norm": 1.7957862615585327, - "learning_rate": 1.110199375606667e-05, - "loss": 0.6987, + "epoch": 0.4221048926434354, + "grad_norm": 1.8936508893966675, + "learning_rate": 9.07519399393257e-06, + "loss": 0.7661, "step": 5996 }, { - "epoch": 0.4537853278347395, - "grad_norm": 1.743152141571045, - "learning_rate": 1.1099803761300043e-05, - "loss": 0.5961, + "epoch": 0.4221752903907075, + "grad_norm": 1.9734700918197632, + "learning_rate": 9.073669584665806e-06, + "loss": 0.6657, "step": 5997 }, { - "epoch": 0.4538609965570731, - "grad_norm": 1.9768725633621216, - "learning_rate": 1.1097613635036912e-05, - "loss": 0.731, + "epoch": 0.4222456881379796, + "grad_norm": 2.2361061573028564, + "learning_rate": 9.072145067587082e-06, + "loss": 0.6472, "step": 5998 }, { - "epoch": 0.45393666527940674, - "grad_norm": 2.327970504760742, - "learning_rate": 1.109542337741443e-05, - "loss": 0.8248, + "epoch": 0.42231608588525166, + "grad_norm": 1.6794118881225586, + "learning_rate": 9.070620442775662e-06, + "loss": 0.6223, "step": 5999 }, { - "epoch": 0.4540123340017404, - "grad_norm": 2.5516083240509033, - "learning_rate": 1.1093232988569757e-05, - "loss": 0.7137, + "epoch": 0.42238648363252373, + "grad_norm": 1.6167969703674316, + "learning_rate": 9.069095710310807e-06, + "loss": 0.6483, "step": 6000 }, { - "epoch": 0.454088002724074, - "grad_norm": 2.588467836380005, - "learning_rate": 1.1091042468640066e-05, - "loss": 0.7983, + "epoch": 0.42245688137979587, + "grad_norm": 1.6593986749649048, + "learning_rate": 9.067570870271791e-06, + "loss": 0.6857, "step": 6001 }, { - "epoch": 0.4541636714464076, - "grad_norm": 2.372370481491089, - "learning_rate": 1.1088851817762537e-05, - "loss": 0.7425, + "epoch": 0.42252727912706795, + "grad_norm": 1.853012204170227, + "learning_rate": 9.066045922737897e-06, + "loss": 0.6013, "step": 6002 }, { - "epoch": 0.45423934016874123, - "grad_norm": 2.482089042663574, - "learning_rate": 1.1086661036074342e-05, - "loss": 0.6915, + "epoch": 0.42259767687434, + "grad_norm": 1.7216752767562866, + "learning_rate": 9.064520867788401e-06, + "loss": 0.5474, "step": 6003 }, { - "epoch": 0.4543150088910749, - "grad_norm": 2.0456271171569824, - "learning_rate": 1.108447012371269e-05, - "loss": 0.6623, + "epoch": 0.4226680746216121, + "grad_norm": 1.8159053325653076, + "learning_rate": 9.062995705502596e-06, + "loss": 0.7246, "step": 6004 }, { - "epoch": 0.4543906776134085, - "grad_norm": 2.8427894115448, - "learning_rate": 1.1082279080814775e-05, - "loss": 0.7134, + "epoch": 0.4227384723688842, + "grad_norm": 1.7246172428131104, + "learning_rate": 9.061470435959778e-06, + "loss": 0.6814, "step": 6005 }, { - "epoch": 0.4544663463357421, - "grad_norm": 2.3383491039276123, - "learning_rate": 1.1080087907517808e-05, - "loss": 0.8108, + "epoch": 0.4228088701161563, + "grad_norm": 1.6900124549865723, + "learning_rate": 9.059945059239244e-06, + "loss": 0.6438, "step": 6006 }, { - "epoch": 0.45454201505807573, - "grad_norm": 2.1955339908599854, - "learning_rate": 1.107789660395901e-05, - "loss": 0.6805, + "epoch": 0.4228792678634284, + "grad_norm": 1.9455031156539917, + "learning_rate": 9.0584195754203e-06, + "loss": 0.688, "step": 6007 }, { - "epoch": 0.45461768378040934, - "grad_norm": 2.1509621143341064, - "learning_rate": 1.1075705170275605e-05, - "loss": 0.7771, + "epoch": 0.42294966561070046, + "grad_norm": 1.7316709756851196, + "learning_rate": 9.056893984582259e-06, + "loss": 0.6912, "step": 6008 }, { - "epoch": 0.454693352502743, - "grad_norm": 2.429506778717041, - "learning_rate": 1.107351360660483e-05, - "loss": 0.6084, + "epoch": 0.42302006335797254, + "grad_norm": 2.0152199268341064, + "learning_rate": 9.055368286804435e-06, + "loss": 0.6848, "step": 6009 }, { - "epoch": 0.4547690212250766, - "grad_norm": 2.3158512115478516, - "learning_rate": 1.1071321913083925e-05, - "loss": 0.7337, + "epoch": 0.4230904611052446, + "grad_norm": 1.828118085861206, + "learning_rate": 9.053842482166152e-06, + "loss": 0.7648, "step": 6010 }, { - "epoch": 0.45484468994741023, - "grad_norm": 1.9755150079727173, - "learning_rate": 1.1069130089850142e-05, - "loss": 0.9059, + "epoch": 0.4231608588525167, + "grad_norm": 1.843651533126831, + "learning_rate": 9.052316570746743e-06, + "loss": 0.6277, "step": 6011 }, { - "epoch": 0.45492035866974384, - "grad_norm": 1.9800193309783936, - "learning_rate": 1.1066938137040742e-05, - "loss": 0.9518, + "epoch": 0.42323125659978883, + "grad_norm": 2.0660784244537354, + "learning_rate": 9.050790552625532e-06, + "loss": 0.7138, "step": 6012 }, { - "epoch": 0.4549960273920775, - "grad_norm": 2.4362120628356934, - "learning_rate": 1.106474605479299e-05, - "loss": 0.8131, + "epoch": 0.4233016543470609, + "grad_norm": 2.002849578857422, + "learning_rate": 9.049264427881864e-06, + "loss": 0.6952, "step": 6013 }, { - "epoch": 0.4550716961144111, - "grad_norm": 3.1265878677368164, - "learning_rate": 1.106255384324416e-05, - "loss": 0.8113, + "epoch": 0.423372052094333, + "grad_norm": 1.8742600679397583, + "learning_rate": 9.047738196595085e-06, + "loss": 0.6525, "step": 6014 }, { - "epoch": 0.45514736483674473, - "grad_norm": 2.1288368701934814, - "learning_rate": 1.106036150253154e-05, - "loss": 0.6329, + "epoch": 0.42344244984160506, + "grad_norm": 2.289966583251953, + "learning_rate": 9.04621185884454e-06, + "loss": 0.7379, "step": 6015 }, { - "epoch": 0.45522303355907834, - "grad_norm": 2.2900583744049072, - "learning_rate": 1.1058169032792419e-05, - "loss": 0.6617, + "epoch": 0.42351284758887714, + "grad_norm": 1.8117685317993164, + "learning_rate": 9.044685414709586e-06, + "loss": 0.7749, "step": 6016 }, { - "epoch": 0.455298702281412, - "grad_norm": 2.1186749935150146, - "learning_rate": 1.1055976434164094e-05, - "loss": 0.76, + "epoch": 0.4235832453361492, + "grad_norm": 1.528077483177185, + "learning_rate": 9.043158864269589e-06, + "loss": 0.648, "step": 6017 }, { - "epoch": 0.4553743710037456, - "grad_norm": 1.967383623123169, - "learning_rate": 1.1053783706783876e-05, - "loss": 0.7049, + "epoch": 0.42365364308342135, + "grad_norm": 1.755890965461731, + "learning_rate": 9.041632207603912e-06, + "loss": 0.6647, "step": 6018 }, { - "epoch": 0.4554500397260792, - "grad_norm": 2.262080430984497, - "learning_rate": 1.1051590850789076e-05, - "loss": 0.739, + "epoch": 0.4237240408306934, + "grad_norm": 1.6660250425338745, + "learning_rate": 9.040105444791924e-06, + "loss": 0.5442, "step": 6019 }, { - "epoch": 0.45552570844841284, - "grad_norm": 2.2179148197174072, - "learning_rate": 1.1049397866317026e-05, - "loss": 0.8633, + "epoch": 0.4237944385779655, + "grad_norm": 1.637634038925171, + "learning_rate": 9.03857857591301e-06, + "loss": 0.7207, "step": 6020 }, { - "epoch": 0.45560137717074645, - "grad_norm": 2.158219575881958, - "learning_rate": 1.1047204753505052e-05, - "loss": 0.7621, + "epoch": 0.4238648363252376, + "grad_norm": 1.8926656246185303, + "learning_rate": 9.037051601046547e-06, + "loss": 0.7386, "step": 6021 }, { - "epoch": 0.4556770458930801, - "grad_norm": 2.1269586086273193, - "learning_rate": 1.1045011512490493e-05, - "loss": 0.7006, + "epoch": 0.42393523407250966, + "grad_norm": 1.8775016069412231, + "learning_rate": 9.035524520271927e-06, + "loss": 0.8455, "step": 6022 }, { - "epoch": 0.4557527146154137, - "grad_norm": 2.0919365882873535, - "learning_rate": 1.1042818143410702e-05, - "loss": 0.6958, + "epoch": 0.4240056318197818, + "grad_norm": 1.7788927555084229, + "learning_rate": 9.033997333668547e-06, + "loss": 0.6964, "step": 6023 }, { - "epoch": 0.45582838333774733, - "grad_norm": 2.181525230407715, - "learning_rate": 1.1040624646403027e-05, - "loss": 0.6319, + "epoch": 0.42407602956705387, + "grad_norm": 1.7364180088043213, + "learning_rate": 9.032470041315799e-06, + "loss": 0.7063, "step": 6024 }, { - "epoch": 0.45590405206008094, - "grad_norm": 2.3236260414123535, - "learning_rate": 1.1038431021604841e-05, - "loss": 0.8105, + "epoch": 0.42414642731432595, + "grad_norm": 1.78489089012146, + "learning_rate": 9.030942643293095e-06, + "loss": 0.754, "step": 6025 }, { - "epoch": 0.4559797207824146, - "grad_norm": 2.2050108909606934, - "learning_rate": 1.1036237269153509e-05, - "loss": 0.7843, + "epoch": 0.424216825061598, + "grad_norm": 2.33210825920105, + "learning_rate": 9.029415139679844e-06, + "loss": 0.7703, "step": 6026 }, { - "epoch": 0.4560553895047482, - "grad_norm": 2.168041467666626, - "learning_rate": 1.1034043389186414e-05, - "loss": 0.8211, + "epoch": 0.4242872228088701, + "grad_norm": 1.5387938022613525, + "learning_rate": 9.027887530555461e-06, + "loss": 0.6126, "step": 6027 }, { - "epoch": 0.45613105822708183, - "grad_norm": 2.095221996307373, - "learning_rate": 1.1031849381840942e-05, - "loss": 0.7797, + "epoch": 0.4243576205561422, + "grad_norm": 2.0938901901245117, + "learning_rate": 9.02635981599937e-06, + "loss": 0.5297, "step": 6028 }, { - "epoch": 0.45620672694941544, - "grad_norm": 2.5357155799865723, - "learning_rate": 1.102965524725449e-05, - "loss": 0.748, + "epoch": 0.4244280183034143, + "grad_norm": 2.002307653427124, + "learning_rate": 9.024831996090995e-06, + "loss": 0.6816, "step": 6029 }, { - "epoch": 0.4562823956717491, - "grad_norm": 2.3060734272003174, - "learning_rate": 1.1027460985564464e-05, - "loss": 0.6879, + "epoch": 0.4244984160506864, + "grad_norm": 1.6631735563278198, + "learning_rate": 9.023304070909771e-06, + "loss": 0.6354, "step": 6030 }, { - "epoch": 0.4563580643940827, - "grad_norm": 1.986255407333374, - "learning_rate": 1.102526659690827e-05, - "loss": 0.5767, + "epoch": 0.42456881379795847, + "grad_norm": 2.0020647048950195, + "learning_rate": 9.021776040535136e-06, + "loss": 0.5907, "step": 6031 }, { - "epoch": 0.45643373311641633, - "grad_norm": 1.7908231019973755, - "learning_rate": 1.1023072081423334e-05, - "loss": 0.5617, + "epoch": 0.42463921154523054, + "grad_norm": 1.9584906101226807, + "learning_rate": 9.020247905046534e-06, + "loss": 0.8727, "step": 6032 }, { - "epoch": 0.45650940183874994, - "grad_norm": 2.0068113803863525, - "learning_rate": 1.102087743924708e-05, - "loss": 0.8403, + "epoch": 0.4247096092925026, + "grad_norm": 1.7690964937210083, + "learning_rate": 9.018719664523415e-06, + "loss": 0.7022, "step": 6033 }, { - "epoch": 0.45658507056108355, - "grad_norm": 2.3249096870422363, - "learning_rate": 1.1018682670516945e-05, - "loss": 0.6172, + "epoch": 0.42478000703977475, + "grad_norm": 2.1385812759399414, + "learning_rate": 9.017191319045228e-06, + "loss": 0.7544, "step": 6034 }, { - "epoch": 0.4566607392834172, - "grad_norm": 2.054591178894043, - "learning_rate": 1.101648777537037e-05, - "loss": 0.7662, + "epoch": 0.42485040478704683, + "grad_norm": 2.0051212310791016, + "learning_rate": 9.015662868691441e-06, + "loss": 0.7255, "step": 6035 }, { - "epoch": 0.4567364080057508, - "grad_norm": 2.0367980003356934, - "learning_rate": 1.101429275394481e-05, - "loss": 0.6568, + "epoch": 0.4249208025343189, + "grad_norm": 1.9760196208953857, + "learning_rate": 9.014134313541518e-06, + "loss": 0.8218, "step": 6036 }, { - "epoch": 0.45681207672808444, - "grad_norm": 1.8275066614151, - "learning_rate": 1.1012097606377722e-05, - "loss": 0.7773, + "epoch": 0.424991200281591, + "grad_norm": 1.684480905532837, + "learning_rate": 9.012605653674923e-06, + "loss": 0.6711, "step": 6037 }, { - "epoch": 0.45688774545041805, - "grad_norm": 1.9169228076934814, - "learning_rate": 1.1009902332806577e-05, - "loss": 0.6533, + "epoch": 0.42506159802886306, + "grad_norm": 1.5824445486068726, + "learning_rate": 9.011076889171143e-06, + "loss": 0.7765, "step": 6038 }, { - "epoch": 0.4569634141727517, - "grad_norm": 2.531177520751953, - "learning_rate": 1.1007706933368843e-05, - "loss": 0.8055, + "epoch": 0.42513199577613514, + "grad_norm": 1.9525823593139648, + "learning_rate": 9.00954802010965e-06, + "loss": 0.7298, "step": 6039 }, { - "epoch": 0.4570390828950853, - "grad_norm": 2.8043148517608643, - "learning_rate": 1.1005511408202008e-05, - "loss": 0.8399, + "epoch": 0.4252023935234073, + "grad_norm": 1.7466341257095337, + "learning_rate": 9.008019046569935e-06, + "loss": 0.7049, "step": 6040 }, { - "epoch": 0.45711475161741894, - "grad_norm": 1.9905619621276855, - "learning_rate": 1.1003315757443565e-05, - "loss": 0.5476, + "epoch": 0.42527279127067935, + "grad_norm": 1.9534591436386108, + "learning_rate": 9.006489968631495e-06, + "loss": 0.7324, "step": 6041 }, { - "epoch": 0.45719042033975255, - "grad_norm": 2.0711894035339355, - "learning_rate": 1.1001119981231004e-05, - "loss": 0.6972, + "epoch": 0.42534318901795143, + "grad_norm": 1.677843451499939, + "learning_rate": 9.004960786373823e-06, + "loss": 0.6224, "step": 6042 }, { - "epoch": 0.4572660890620862, - "grad_norm": 2.3095309734344482, - "learning_rate": 1.0998924079701843e-05, - "loss": 0.6728, + "epoch": 0.4254135867652235, + "grad_norm": 1.5617378950119019, + "learning_rate": 9.003431499876422e-06, + "loss": 0.8049, "step": 6043 }, { - "epoch": 0.4573417577844198, - "grad_norm": 3.2638471126556396, - "learning_rate": 1.0996728052993586e-05, - "loss": 0.838, + "epoch": 0.4254839845124956, + "grad_norm": 1.9448145627975464, + "learning_rate": 9.001902109218805e-06, + "loss": 0.7274, "step": 6044 }, { - "epoch": 0.45741742650675343, - "grad_norm": 2.1741018295288086, - "learning_rate": 1.0994531901243763e-05, - "loss": 0.7435, + "epoch": 0.42555438225976766, + "grad_norm": 1.7132102251052856, + "learning_rate": 9.000372614480483e-06, + "loss": 0.7631, "step": 6045 }, { - "epoch": 0.45749309522908704, - "grad_norm": 2.157972812652588, - "learning_rate": 1.0992335624589902e-05, - "loss": 0.7142, + "epoch": 0.4256247800070398, + "grad_norm": 1.660667896270752, + "learning_rate": 8.998843015740976e-06, + "loss": 0.6277, "step": 6046 }, { - "epoch": 0.45756876395142065, - "grad_norm": 3.9512946605682373, - "learning_rate": 1.099013922316954e-05, - "loss": 0.5924, + "epoch": 0.42569517775431187, + "grad_norm": 2.126674175262451, + "learning_rate": 8.997313313079813e-06, + "loss": 0.7845, "step": 6047 }, { - "epoch": 0.4576444326737543, - "grad_norm": 1.8326383829116821, - "learning_rate": 1.0987942697120223e-05, - "loss": 0.7153, + "epoch": 0.42576557550158395, + "grad_norm": 1.7029719352722168, + "learning_rate": 8.99578350657652e-06, + "loss": 0.7349, "step": 6048 }, { - "epoch": 0.45772010139608793, - "grad_norm": 2.491291046142578, - "learning_rate": 1.09857460465795e-05, - "loss": 0.7102, + "epoch": 0.425835973248856, + "grad_norm": 1.6776679754257202, + "learning_rate": 8.994253596310635e-06, + "loss": 0.7303, "step": 6049 }, { - "epoch": 0.45779577011842154, - "grad_norm": 2.1547534465789795, - "learning_rate": 1.0983549271684944e-05, - "loss": 0.6816, + "epoch": 0.4259063709961281, + "grad_norm": 1.9067351818084717, + "learning_rate": 8.992723582361701e-06, + "loss": 0.7061, "step": 6050 }, { - "epoch": 0.45787143884075515, - "grad_norm": 2.5720443725585938, - "learning_rate": 1.0981352372574111e-05, - "loss": 0.7271, + "epoch": 0.42597676874340024, + "grad_norm": 1.9534130096435547, + "learning_rate": 8.991193464809262e-06, + "loss": 0.7062, "step": 6051 }, { - "epoch": 0.4579471075630888, - "grad_norm": 2.2329049110412598, - "learning_rate": 1.0979155349384587e-05, - "loss": 0.6481, + "epoch": 0.4260471664906723, + "grad_norm": 1.7583552598953247, + "learning_rate": 8.989663243732872e-06, + "loss": 0.6504, "step": 6052 }, { - "epoch": 0.45802277628542243, - "grad_norm": 2.2942488193511963, - "learning_rate": 1.0976958202253951e-05, - "loss": 0.7957, + "epoch": 0.4261175642379444, + "grad_norm": 1.7378369569778442, + "learning_rate": 8.98813291921209e-06, + "loss": 0.6146, "step": 6053 }, { - "epoch": 0.45809844500775604, - "grad_norm": 1.748425006866455, - "learning_rate": 1.0974760931319801e-05, - "loss": 0.9268, + "epoch": 0.42618796198521647, + "grad_norm": 1.667134165763855, + "learning_rate": 8.986602491326474e-06, + "loss": 0.6623, "step": 6054 }, { - "epoch": 0.45817411373008965, - "grad_norm": 5.090272903442383, - "learning_rate": 1.0972563536719736e-05, - "loss": 0.7559, + "epoch": 0.42625835973248855, + "grad_norm": 1.8637518882751465, + "learning_rate": 8.985071960155595e-06, + "loss": 0.744, "step": 6055 }, { - "epoch": 0.4582497824524233, - "grad_norm": 2.0367588996887207, - "learning_rate": 1.097036601859136e-05, - "loss": 0.7083, + "epoch": 0.4263287574797606, + "grad_norm": 2.2613425254821777, + "learning_rate": 8.98354132577903e-06, + "loss": 0.7062, "step": 6056 }, { - "epoch": 0.4583254511747569, - "grad_norm": 2.288196325302124, - "learning_rate": 1.096816837707229e-05, - "loss": 0.7582, + "epoch": 0.42639915522703276, + "grad_norm": 1.6338573694229126, + "learning_rate": 8.982010588276353e-06, + "loss": 0.7003, "step": 6057 }, { - "epoch": 0.45840111989709054, - "grad_norm": 1.7706087827682495, - "learning_rate": 1.096597061230015e-05, - "loss": 0.6756, + "epoch": 0.42646955297430483, + "grad_norm": 1.5336662530899048, + "learning_rate": 8.98047974772715e-06, + "loss": 0.8405, "step": 6058 }, { - "epoch": 0.45847678861942415, - "grad_norm": 2.1202590465545654, - "learning_rate": 1.0963772724412575e-05, - "loss": 0.7529, + "epoch": 0.4265399507215769, + "grad_norm": 2.803811550140381, + "learning_rate": 8.978948804211013e-06, + "loss": 0.6873, "step": 6059 }, { - "epoch": 0.45855245734175776, - "grad_norm": 4.25075626373291, - "learning_rate": 1.0961574713547196e-05, - "loss": 0.8093, + "epoch": 0.426610348468849, + "grad_norm": 1.6783676147460938, + "learning_rate": 8.977417757807535e-06, + "loss": 0.7437, "step": 6060 }, { - "epoch": 0.4586281260640914, - "grad_norm": 2.7584314346313477, - "learning_rate": 1.0959376579841669e-05, - "loss": 0.6798, + "epoch": 0.42668074621612107, + "grad_norm": 1.7064518928527832, + "learning_rate": 8.975886608596315e-06, + "loss": 0.786, "step": 6061 }, { - "epoch": 0.45870379478642503, - "grad_norm": 2.2744717597961426, - "learning_rate": 1.095717832343364e-05, - "loss": 0.573, + "epoch": 0.4267511439633932, + "grad_norm": 2.017835855484009, + "learning_rate": 8.974355356656963e-06, + "loss": 0.7841, "step": 6062 }, { - "epoch": 0.45877946350875864, - "grad_norm": 1.9462858438491821, - "learning_rate": 1.0954979944460773e-05, - "loss": 0.7179, + "epoch": 0.4268215417106653, + "grad_norm": 2.0246951580047607, + "learning_rate": 8.972824002069087e-06, + "loss": 0.6172, "step": 6063 }, { - "epoch": 0.45885513223109226, - "grad_norm": 2.249580144882202, - "learning_rate": 1.0952781443060742e-05, - "loss": 0.6776, + "epoch": 0.42689193945793735, + "grad_norm": 1.7762298583984375, + "learning_rate": 8.971292544912304e-06, + "loss": 0.6771, "step": 6064 }, { - "epoch": 0.4589308009534259, - "grad_norm": 2.2577133178710938, - "learning_rate": 1.0950582819371215e-05, - "loss": 0.7104, + "epoch": 0.42696233720520943, + "grad_norm": 1.7895864248275757, + "learning_rate": 8.969760985266236e-06, + "loss": 0.6944, "step": 6065 }, { - "epoch": 0.45900646967575953, - "grad_norm": 1.8561004400253296, - "learning_rate": 1.094838407352989e-05, - "loss": 0.6328, + "epoch": 0.4270327349524815, + "grad_norm": 1.807686686515808, + "learning_rate": 8.968229323210511e-06, + "loss": 0.7149, "step": 6066 }, { - "epoch": 0.45908213839809314, - "grad_norm": 2.133049249649048, - "learning_rate": 1.0946185205674447e-05, - "loss": 0.803, + "epoch": 0.4271031326997536, + "grad_norm": 1.6709282398223877, + "learning_rate": 8.966697558824763e-06, + "loss": 0.7322, "step": 6067 }, { - "epoch": 0.45915780712042675, - "grad_norm": 2.2266693115234375, - "learning_rate": 1.0943986215942597e-05, - "loss": 0.7626, + "epoch": 0.4271735304470257, + "grad_norm": 1.7924108505249023, + "learning_rate": 8.965165692188626e-06, + "loss": 0.7308, "step": 6068 }, { - "epoch": 0.4592334758427604, - "grad_norm": 2.2593750953674316, - "learning_rate": 1.0941787104472038e-05, - "loss": 0.5993, + "epoch": 0.4272439281942978, + "grad_norm": 2.2598326206207275, + "learning_rate": 8.963633723381745e-06, + "loss": 0.6767, "step": 6069 }, { - "epoch": 0.45930914456509403, - "grad_norm": 1.5973234176635742, - "learning_rate": 1.0939587871400493e-05, - "loss": 0.9396, + "epoch": 0.4273143259415699, + "grad_norm": 2.0827717781066895, + "learning_rate": 8.962101652483767e-06, + "loss": 0.6026, "step": 6070 }, { - "epoch": 0.45938481328742764, - "grad_norm": 1.9865094423294067, - "learning_rate": 1.0937388516865681e-05, - "loss": 0.843, + "epoch": 0.42738472368884195, + "grad_norm": 1.7441837787628174, + "learning_rate": 8.96056947957435e-06, + "loss": 0.7273, "step": 6071 }, { - "epoch": 0.45946048200976125, - "grad_norm": 2.004659414291382, - "learning_rate": 1.093518904100533e-05, - "loss": 0.5817, + "epoch": 0.427455121436114, + "grad_norm": 1.9211640357971191, + "learning_rate": 8.959037204733147e-06, + "loss": 0.6615, "step": 6072 }, { - "epoch": 0.45953615073209486, - "grad_norm": 1.994409441947937, - "learning_rate": 1.0932989443957188e-05, - "loss": 0.6163, + "epoch": 0.4275255191833861, + "grad_norm": 2.1747751235961914, + "learning_rate": 8.957504828039827e-06, + "loss": 0.609, "step": 6073 }, { - "epoch": 0.4596118194544285, - "grad_norm": 2.4279356002807617, - "learning_rate": 1.0930789725858994e-05, - "loss": 0.6187, + "epoch": 0.42759591693065824, + "grad_norm": 2.652926445007324, + "learning_rate": 8.955972349574059e-06, + "loss": 0.6951, "step": 6074 }, { - "epoch": 0.45968748817676214, - "grad_norm": 1.8175309896469116, - "learning_rate": 1.0928589886848499e-05, - "loss": 0.7091, + "epoch": 0.4276663146779303, + "grad_norm": 1.934799313545227, + "learning_rate": 8.954439769415517e-06, + "loss": 0.6838, "step": 6075 }, { - "epoch": 0.45976315689909575, - "grad_norm": 2.018789768218994, - "learning_rate": 1.092638992706347e-05, - "loss": 0.5512, + "epoch": 0.4277367124252024, + "grad_norm": 1.7399855852127075, + "learning_rate": 8.952907087643879e-06, + "loss": 0.7663, "step": 6076 }, { - "epoch": 0.45983882562142936, - "grad_norm": 1.8385061025619507, - "learning_rate": 1.0924189846641673e-05, - "loss": 0.665, + "epoch": 0.42780711017247447, + "grad_norm": 1.8127607107162476, + "learning_rate": 8.951374304338836e-06, + "loss": 0.7071, "step": 6077 }, { - "epoch": 0.459914494343763, - "grad_norm": 2.198543071746826, - "learning_rate": 1.0921989645720883e-05, - "loss": 0.7314, + "epoch": 0.42787750791974655, + "grad_norm": 1.7860937118530273, + "learning_rate": 8.949841419580073e-06, + "loss": 0.658, "step": 6078 }, { - "epoch": 0.45999016306609664, - "grad_norm": 3.3511157035827637, - "learning_rate": 1.0919789324438886e-05, - "loss": 0.7007, + "epoch": 0.4279479056670187, + "grad_norm": 1.3782943487167358, + "learning_rate": 8.948308433447288e-06, + "loss": 0.7731, "step": 6079 }, { - "epoch": 0.46006583178843025, - "grad_norm": 2.0471951961517334, - "learning_rate": 1.0917588882933472e-05, - "loss": 0.8203, + "epoch": 0.42801830341429076, + "grad_norm": 1.731669306755066, + "learning_rate": 8.946775346020188e-06, + "loss": 0.6907, "step": 6080 }, { - "epoch": 0.46014150051076386, - "grad_norm": 2.699324131011963, - "learning_rate": 1.091538832134244e-05, - "loss": 0.5743, + "epoch": 0.42808870116156283, + "grad_norm": 1.8618292808532715, + "learning_rate": 8.94524215737847e-06, + "loss": 0.7501, "step": 6081 }, { - "epoch": 0.4602171692330975, - "grad_norm": 2.1900722980499268, - "learning_rate": 1.0913187639803598e-05, - "loss": 0.7094, + "epoch": 0.4281590989088349, + "grad_norm": 1.6177434921264648, + "learning_rate": 8.943708867601852e-06, + "loss": 0.6578, "step": 6082 }, { - "epoch": 0.46029283795543113, - "grad_norm": 2.500459671020508, - "learning_rate": 1.0910986838454754e-05, - "loss": 0.6425, + "epoch": 0.428229496656107, + "grad_norm": 1.8015034198760986, + "learning_rate": 8.94217547677005e-06, + "loss": 0.6268, "step": 6083 }, { - "epoch": 0.46036850667776474, - "grad_norm": 2.3873353004455566, - "learning_rate": 1.0908785917433737e-05, - "loss": 0.6988, + "epoch": 0.42829989440337907, + "grad_norm": 1.6952227354049683, + "learning_rate": 8.940641984962787e-06, + "loss": 0.6325, "step": 6084 }, { - "epoch": 0.46044417540009835, - "grad_norm": 2.539494037628174, - "learning_rate": 1.090658487687837e-05, - "loss": 0.6685, + "epoch": 0.4283702921506512, + "grad_norm": 1.875742793083191, + "learning_rate": 8.939108392259786e-06, + "loss": 0.78, "step": 6085 }, { - "epoch": 0.46051984412243196, - "grad_norm": 2.0410473346710205, - "learning_rate": 1.0904383716926491e-05, - "loss": 0.8181, + "epoch": 0.4284406898979233, + "grad_norm": 1.8028175830841064, + "learning_rate": 8.937574698740788e-06, + "loss": 0.7735, "step": 6086 }, { - "epoch": 0.46059551284476563, - "grad_norm": 2.1296074390411377, - "learning_rate": 1.0902182437715947e-05, - "loss": 0.6946, + "epoch": 0.42851108764519535, + "grad_norm": 1.8013789653778076, + "learning_rate": 8.936040904485523e-06, + "loss": 0.666, "step": 6087 }, { - "epoch": 0.46067118156709924, - "grad_norm": 2.1949639320373535, - "learning_rate": 1.0899981039384581e-05, - "loss": 0.7152, + "epoch": 0.42858148539246743, + "grad_norm": 1.7918307781219482, + "learning_rate": 8.93450700957374e-06, + "loss": 0.7198, "step": 6088 }, { - "epoch": 0.46074685028943285, - "grad_norm": 2.5032870769500732, - "learning_rate": 1.0897779522070262e-05, - "loss": 0.7305, + "epoch": 0.4286518831397395, + "grad_norm": 1.8849347829818726, + "learning_rate": 8.932973014085184e-06, + "loss": 0.6578, "step": 6089 }, { - "epoch": 0.46082251901176646, - "grad_norm": 1.8629069328308105, - "learning_rate": 1.0895577885910846e-05, - "loss": 0.7541, + "epoch": 0.42872228088701164, + "grad_norm": 2.1687722206115723, + "learning_rate": 8.931438918099612e-06, + "loss": 0.7192, "step": 6090 }, { - "epoch": 0.46089818773410013, - "grad_norm": 1.527066707611084, - "learning_rate": 1.0893376131044219e-05, - "loss": 0.6971, + "epoch": 0.4287926786342837, + "grad_norm": 1.6919245719909668, + "learning_rate": 8.92990472169678e-06, + "loss": 0.6916, "step": 6091 }, { - "epoch": 0.46097385645643374, - "grad_norm": 1.9106502532958984, - "learning_rate": 1.089117425760825e-05, - "loss": 0.7487, + "epoch": 0.4288630763815558, + "grad_norm": 1.9113175868988037, + "learning_rate": 8.928370424956457e-06, + "loss": 0.7497, "step": 6092 }, { - "epoch": 0.46104952517876735, - "grad_norm": 2.104304075241089, - "learning_rate": 1.0888972265740833e-05, - "loss": 0.7364, + "epoch": 0.4289334741288279, + "grad_norm": 1.6480077505111694, + "learning_rate": 8.926836027958405e-06, + "loss": 0.6811, "step": 6093 }, { - "epoch": 0.46112519390110096, - "grad_norm": 2.0999743938446045, - "learning_rate": 1.0886770155579864e-05, - "loss": 0.7293, + "epoch": 0.42900387187609995, + "grad_norm": 3.886080503463745, + "learning_rate": 8.925301530782405e-06, + "loss": 0.6795, "step": 6094 }, { - "epoch": 0.4612008626234346, - "grad_norm": 2.2601325511932373, - "learning_rate": 1.0884567927263243e-05, - "loss": 0.7555, + "epoch": 0.42907426962337203, + "grad_norm": 1.8421233892440796, + "learning_rate": 8.923766933508236e-06, + "loss": 0.6618, "step": 6095 }, { - "epoch": 0.46127653134576824, - "grad_norm": 1.8899502754211426, - "learning_rate": 1.0882365580928885e-05, - "loss": 0.6945, + "epoch": 0.42914466737064416, + "grad_norm": 1.9346898794174194, + "learning_rate": 8.92223223621568e-06, + "loss": 0.6136, "step": 6096 }, { - "epoch": 0.46135220006810185, - "grad_norm": 2.614907741546631, - "learning_rate": 1.0880163116714706e-05, - "loss": 0.8823, + "epoch": 0.42921506511791624, + "grad_norm": 1.6411617994308472, + "learning_rate": 8.920697438984528e-06, + "loss": 0.7398, "step": 6097 }, { - "epoch": 0.46142786879043546, - "grad_norm": 1.670644760131836, - "learning_rate": 1.087796053475863e-05, - "loss": 0.8269, + "epoch": 0.4292854628651883, + "grad_norm": 3.048496961593628, + "learning_rate": 8.919162541894578e-06, + "loss": 0.6536, "step": 6098 }, { - "epoch": 0.4615035375127691, - "grad_norm": 2.111875295639038, - "learning_rate": 1.0875757835198592e-05, - "loss": 0.6693, + "epoch": 0.4293558606124604, + "grad_norm": 2.1733858585357666, + "learning_rate": 8.91762754502563e-06, + "loss": 0.7066, "step": 6099 }, { - "epoch": 0.46157920623510273, - "grad_norm": 1.9727673530578613, - "learning_rate": 1.0873555018172533e-05, - "loss": 0.661, + "epoch": 0.42942625835973247, + "grad_norm": 1.6760640144348145, + "learning_rate": 8.91609244845749e-06, + "loss": 0.6581, "step": 6100 }, { - "epoch": 0.46165487495743635, - "grad_norm": 2.154547691345215, - "learning_rate": 1.0871352083818397e-05, - "loss": 0.6215, + "epoch": 0.42949665610700455, + "grad_norm": 1.5646225214004517, + "learning_rate": 8.914557252269968e-06, + "loss": 0.6716, "step": 6101 }, { - "epoch": 0.46173054367976996, - "grad_norm": 3.549818992614746, - "learning_rate": 1.0869149032274142e-05, - "loss": 0.8293, + "epoch": 0.4295670538542767, + "grad_norm": 1.6082866191864014, + "learning_rate": 8.913021956542877e-06, + "loss": 0.7023, "step": 6102 }, { - "epoch": 0.46180621240210357, - "grad_norm": 2.007596731185913, - "learning_rate": 1.0866945863677728e-05, - "loss": 0.8542, + "epoch": 0.42963745160154876, + "grad_norm": 1.844744324684143, + "learning_rate": 8.911486561356045e-06, + "loss": 0.7483, "step": 6103 }, { - "epoch": 0.46188188112443723, - "grad_norm": 1.8901540040969849, - "learning_rate": 1.0864742578167123e-05, - "loss": 0.7118, + "epoch": 0.42970784934882084, + "grad_norm": 2.1872754096984863, + "learning_rate": 8.909951066789297e-06, + "loss": 0.6975, "step": 6104 }, { - "epoch": 0.46195754984677084, - "grad_norm": 2.25508975982666, - "learning_rate": 1.0862539175880313e-05, - "loss": 0.7328, + "epoch": 0.4297782470960929, + "grad_norm": 1.6072494983673096, + "learning_rate": 8.90841547292246e-06, + "loss": 0.6945, "step": 6105 }, { - "epoch": 0.46203321856910445, - "grad_norm": 1.9641987085342407, - "learning_rate": 1.086033565695527e-05, - "loss": 0.7337, + "epoch": 0.429848644843365, + "grad_norm": 1.8777034282684326, + "learning_rate": 8.906879779835376e-06, + "loss": 0.6827, "step": 6106 }, { - "epoch": 0.46210888729143806, - "grad_norm": 1.9229612350463867, - "learning_rate": 1.0858132021529995e-05, - "loss": 0.7118, + "epoch": 0.4299190425906371, + "grad_norm": 1.6227378845214844, + "learning_rate": 8.905343987607886e-06, + "loss": 0.6682, "step": 6107 }, { - "epoch": 0.46218455601377173, - "grad_norm": 2.1345152854919434, - "learning_rate": 1.0855928269742479e-05, - "loss": 0.7995, + "epoch": 0.4299894403379092, + "grad_norm": 1.720977783203125, + "learning_rate": 8.903808096319834e-06, + "loss": 0.6584, "step": 6108 }, { - "epoch": 0.46226022473610534, - "grad_norm": 1.999732255935669, - "learning_rate": 1.0853724401730733e-05, - "loss": 0.5753, + "epoch": 0.4300598380851813, + "grad_norm": 1.7590538263320923, + "learning_rate": 8.902272106051076e-06, + "loss": 0.5555, "step": 6109 }, { - "epoch": 0.46233589345843895, - "grad_norm": 1.7890396118164062, - "learning_rate": 1.0851520417632772e-05, - "loss": 0.7044, + "epoch": 0.43013023583245336, + "grad_norm": 2.12790846824646, + "learning_rate": 8.90073601688147e-06, + "loss": 0.7482, "step": 6110 }, { - "epoch": 0.46241156218077256, - "grad_norm": 1.8957141637802124, - "learning_rate": 1.0849316317586611e-05, - "loss": 0.8104, + "epoch": 0.43020063357972543, + "grad_norm": 1.776079773902893, + "learning_rate": 8.899199828890874e-06, + "loss": 0.7735, "step": 6111 }, { - "epoch": 0.46248723090310623, - "grad_norm": 1.9614812135696411, - "learning_rate": 1.0847112101730284e-05, - "loss": 0.7579, + "epoch": 0.4302710313269975, + "grad_norm": 2.7456791400909424, + "learning_rate": 8.897663542159159e-06, + "loss": 0.7765, "step": 6112 }, { - "epoch": 0.46256289962543984, - "grad_norm": 4.7304205894470215, - "learning_rate": 1.0844907770201818e-05, - "loss": 0.666, + "epoch": 0.43034142907426964, + "grad_norm": 1.519700527191162, + "learning_rate": 8.8961271567662e-06, + "loss": 0.6656, "step": 6113 }, { - "epoch": 0.46263856834777345, - "grad_norm": 2.0775561332702637, - "learning_rate": 1.0842703323139265e-05, - "loss": 0.6668, + "epoch": 0.4304118268215417, + "grad_norm": 1.6653283834457397, + "learning_rate": 8.894590672791874e-06, + "loss": 0.6442, "step": 6114 }, { - "epoch": 0.46271423707010706, - "grad_norm": 1.9007248878479004, - "learning_rate": 1.0840498760680668e-05, - "loss": 0.6426, + "epoch": 0.4304822245688138, + "grad_norm": 3.162923812866211, + "learning_rate": 8.89305409031606e-06, + "loss": 0.7192, "step": 6115 }, { - "epoch": 0.46278990579244067, - "grad_norm": 1.8677881956100464, - "learning_rate": 1.0838294082964087e-05, - "loss": 0.7154, + "epoch": 0.4305526223160859, + "grad_norm": 1.9119908809661865, + "learning_rate": 8.891517409418651e-06, + "loss": 0.6934, "step": 6116 }, { - "epoch": 0.46286557451477434, - "grad_norm": 2.1474485397338867, - "learning_rate": 1.0836089290127581e-05, - "loss": 0.8626, + "epoch": 0.43062302006335795, + "grad_norm": 1.6352273225784302, + "learning_rate": 8.88998063017954e-06, + "loss": 0.6326, "step": 6117 }, { - "epoch": 0.46294124323710795, - "grad_norm": 1.910867691040039, - "learning_rate": 1.083388438230923e-05, - "loss": 0.8349, + "epoch": 0.4306934178106301, + "grad_norm": 1.728672742843628, + "learning_rate": 8.888443752678622e-06, + "loss": 0.7021, "step": 6118 }, { - "epoch": 0.46301691195944156, - "grad_norm": 1.9479092359542847, - "learning_rate": 1.0831679359647104e-05, - "loss": 0.7569, + "epoch": 0.43076381555790216, + "grad_norm": 1.7484896183013916, + "learning_rate": 8.886906776995805e-06, + "loss": 0.652, "step": 6119 }, { - "epoch": 0.46309258068177517, - "grad_norm": 1.7901252508163452, - "learning_rate": 1.0829474222279293e-05, - "loss": 0.8097, + "epoch": 0.43083421330517424, + "grad_norm": 1.8095473051071167, + "learning_rate": 8.885369703210995e-06, + "loss": 0.7416, "step": 6120 }, { - "epoch": 0.46316824940410883, - "grad_norm": 2.567704677581787, - "learning_rate": 1.0827268970343888e-05, - "loss": 0.601, + "epoch": 0.4309046110524463, + "grad_norm": 1.9170221090316772, + "learning_rate": 8.883832531404104e-06, + "loss": 0.762, "step": 6121 }, { - "epoch": 0.46324391812644244, - "grad_norm": 2.589970350265503, - "learning_rate": 1.082506360397899e-05, - "loss": 0.698, + "epoch": 0.4309750087997184, + "grad_norm": 1.516690731048584, + "learning_rate": 8.882295261655058e-06, + "loss": 0.6347, "step": 6122 }, { - "epoch": 0.46331958684877605, - "grad_norm": 2.233529567718506, - "learning_rate": 1.082285812332271e-05, - "loss": 0.8512, + "epoch": 0.4310454065469905, + "grad_norm": 1.9045478105545044, + "learning_rate": 8.880757894043773e-06, + "loss": 0.6524, "step": 6123 }, { - "epoch": 0.46339525557110967, - "grad_norm": 2.331575870513916, - "learning_rate": 1.0820652528513151e-05, - "loss": 0.7811, + "epoch": 0.4311158042942626, + "grad_norm": 1.684240698814392, + "learning_rate": 8.879220428650184e-06, + "loss": 0.8309, "step": 6124 }, { - "epoch": 0.46347092429344333, - "grad_norm": 2.2309775352478027, - "learning_rate": 1.081844681968845e-05, - "loss": 0.6453, + "epoch": 0.4311862020415347, + "grad_norm": 1.707969069480896, + "learning_rate": 8.877682865554223e-06, + "loss": 0.7309, "step": 6125 }, { - "epoch": 0.46354659301577694, - "grad_norm": 1.7300264835357666, - "learning_rate": 1.0816240996986723e-05, - "loss": 0.6944, + "epoch": 0.43125659978880676, + "grad_norm": 1.7326712608337402, + "learning_rate": 8.876145204835829e-06, + "loss": 0.7532, "step": 6126 }, { - "epoch": 0.46362226173811055, - "grad_norm": 2.187654972076416, - "learning_rate": 1.0814035060546112e-05, - "loss": 0.7591, + "epoch": 0.43132699753607884, + "grad_norm": 1.7595962285995483, + "learning_rate": 8.874607446574945e-06, + "loss": 0.6478, "step": 6127 }, { - "epoch": 0.46369793046044416, - "grad_norm": 2.0955562591552734, - "learning_rate": 1.081182901050476e-05, - "loss": 0.7257, + "epoch": 0.4313973952833509, + "grad_norm": 1.5578160285949707, + "learning_rate": 8.873069590851524e-06, + "loss": 0.6492, "step": 6128 }, { - "epoch": 0.4637735991827778, - "grad_norm": 2.2414302825927734, - "learning_rate": 1.080962284700081e-05, - "loss": 0.6971, + "epoch": 0.431467793030623, + "grad_norm": 1.7854139804840088, + "learning_rate": 8.871531637745519e-06, + "loss": 0.7523, "step": 6129 }, { - "epoch": 0.46384926790511144, - "grad_norm": 4.362318992614746, - "learning_rate": 1.0807416570172429e-05, - "loss": 0.7791, + "epoch": 0.4315381907778951, + "grad_norm": 1.899827003479004, + "learning_rate": 8.869993587336887e-06, + "loss": 0.6671, "step": 6130 }, { - "epoch": 0.46392493662744505, - "grad_norm": 1.8338907957077026, - "learning_rate": 1.0805210180157772e-05, - "loss": 0.6372, + "epoch": 0.4316085885251672, + "grad_norm": 1.6503262519836426, + "learning_rate": 8.868455439705596e-06, + "loss": 0.6296, "step": 6131 }, { - "epoch": 0.46400060534977866, - "grad_norm": 2.5617542266845703, - "learning_rate": 1.080300367709502e-05, - "loss": 0.7845, + "epoch": 0.4316789862724393, + "grad_norm": 1.9880658388137817, + "learning_rate": 8.866917194931615e-06, + "loss": 0.6729, "step": 6132 }, { - "epoch": 0.46407627407211227, - "grad_norm": 1.9908982515335083, - "learning_rate": 1.0800797061122341e-05, - "loss": 0.5978, + "epoch": 0.43174938401971136, + "grad_norm": 2.0445117950439453, + "learning_rate": 8.865378853094918e-06, + "loss": 0.8272, "step": 6133 }, { - "epoch": 0.46415194279444594, - "grad_norm": 2.4405834674835205, - "learning_rate": 1.079859033237793e-05, - "loss": 0.7342, + "epoch": 0.43181978176698343, + "grad_norm": 1.7034364938735962, + "learning_rate": 8.863840414275486e-06, + "loss": 0.6984, "step": 6134 }, { - "epoch": 0.46422761151677955, - "grad_norm": 1.8507134914398193, - "learning_rate": 1.0796383490999975e-05, - "loss": 0.6845, + "epoch": 0.43189017951425557, + "grad_norm": 2.110081434249878, + "learning_rate": 8.862301878553299e-06, + "loss": 0.7848, "step": 6135 }, { - "epoch": 0.46430328023911316, - "grad_norm": 2.517188787460327, - "learning_rate": 1.0794176537126674e-05, - "loss": 0.7715, + "epoch": 0.43196057726152765, + "grad_norm": 1.6407935619354248, + "learning_rate": 8.860763246008352e-06, + "loss": 0.8054, "step": 6136 }, { - "epoch": 0.46437894896144677, - "grad_norm": 1.7793668508529663, - "learning_rate": 1.0791969470896235e-05, - "loss": 0.8915, + "epoch": 0.4320309750087997, + "grad_norm": 1.817347764968872, + "learning_rate": 8.859224516720642e-06, + "loss": 0.6305, "step": 6137 }, { - "epoch": 0.46445461768378044, - "grad_norm": 1.7618346214294434, - "learning_rate": 1.0789762292446869e-05, - "loss": 0.6284, + "epoch": 0.4321013727560718, + "grad_norm": 1.6242033243179321, + "learning_rate": 8.857685690770163e-06, + "loss": 0.6708, "step": 6138 }, { - "epoch": 0.46453028640611405, - "grad_norm": 2.0393192768096924, - "learning_rate": 1.0787555001916803e-05, - "loss": 0.6054, + "epoch": 0.4321717705033439, + "grad_norm": 1.7402594089508057, + "learning_rate": 8.856146768236922e-06, + "loss": 0.772, "step": 6139 }, { - "epoch": 0.46460595512844766, - "grad_norm": 2.2760608196258545, - "learning_rate": 1.078534759944426e-05, - "loss": 0.8148, + "epoch": 0.43224216825061595, + "grad_norm": 1.5291111469268799, + "learning_rate": 8.854607749200932e-06, + "loss": 0.6241, "step": 6140 }, { - "epoch": 0.46468162385078127, - "grad_norm": 1.8736259937286377, - "learning_rate": 1.0783140085167477e-05, - "loss": 0.6872, + "epoch": 0.4323125659978881, + "grad_norm": 1.7952951192855835, + "learning_rate": 8.853068633742203e-06, + "loss": 0.6416, "step": 6141 }, { - "epoch": 0.4647572925731149, - "grad_norm": 2.1906280517578125, - "learning_rate": 1.0780932459224692e-05, - "loss": 0.7743, + "epoch": 0.43238296374516016, + "grad_norm": 1.6076984405517578, + "learning_rate": 8.85152942194076e-06, + "loss": 0.5731, "step": 6142 }, { - "epoch": 0.46483296129544854, - "grad_norm": 2.7648184299468994, - "learning_rate": 1.077872472175416e-05, - "loss": 0.6033, + "epoch": 0.43245336149243224, + "grad_norm": 1.8722624778747559, + "learning_rate": 8.849990113876623e-06, + "loss": 0.6986, "step": 6143 }, { - "epoch": 0.46490863001778215, - "grad_norm": 1.824427843093872, - "learning_rate": 1.077651687289413e-05, - "loss": 0.5442, + "epoch": 0.4325237592397043, + "grad_norm": 2.0265331268310547, + "learning_rate": 8.848450709629827e-06, + "loss": 0.6838, "step": 6144 }, { - "epoch": 0.46498429874011576, - "grad_norm": 3.1149511337280273, - "learning_rate": 1.0774308912782866e-05, - "loss": 0.9675, + "epoch": 0.4325941569869764, + "grad_norm": 1.6699728965759277, + "learning_rate": 8.846911209280403e-06, + "loss": 0.6457, "step": 6145 }, { - "epoch": 0.4650599674624494, - "grad_norm": 1.6089766025543213, - "learning_rate": 1.0772100841558644e-05, - "loss": 0.5172, + "epoch": 0.43266455473424853, + "grad_norm": 1.6367628574371338, + "learning_rate": 8.845371612908395e-06, + "loss": 0.7854, "step": 6146 }, { - "epoch": 0.46513563618478304, - "grad_norm": 9.786401748657227, - "learning_rate": 1.0769892659359731e-05, - "loss": 0.7237, + "epoch": 0.4327349524815206, + "grad_norm": 1.7140562534332275, + "learning_rate": 8.843831920593842e-06, + "loss": 0.6752, "step": 6147 }, { - "epoch": 0.46521130490711665, - "grad_norm": 1.9488837718963623, - "learning_rate": 1.0767684366324418e-05, - "loss": 0.8311, + "epoch": 0.4328053502287927, + "grad_norm": 1.7248181104660034, + "learning_rate": 8.842292132416798e-06, + "loss": 0.5691, "step": 6148 }, { - "epoch": 0.46528697362945026, - "grad_norm": 2.2119038105010986, - "learning_rate": 1.076547596259099e-05, - "loss": 0.7903, + "epoch": 0.43287574797606476, + "grad_norm": 2.124762535095215, + "learning_rate": 8.84075224845732e-06, + "loss": 0.6681, "step": 6149 }, { - "epoch": 0.4653626423517839, - "grad_norm": 2.353832483291626, - "learning_rate": 1.076326744829775e-05, - "loss": 0.7647, + "epoch": 0.43294614572333684, + "grad_norm": 1.7293627262115479, + "learning_rate": 8.839212268795463e-06, + "loss": 0.7443, "step": 6150 }, { - "epoch": 0.46543831107411754, - "grad_norm": 2.166916847229004, - "learning_rate": 1.0761058823582999e-05, - "loss": 0.8551, + "epoch": 0.4330165434706089, + "grad_norm": 1.7863357067108154, + "learning_rate": 8.837672193511294e-06, + "loss": 0.6799, "step": 6151 }, { - "epoch": 0.46551397979645115, - "grad_norm": 2.20060658454895, - "learning_rate": 1.0758850088585045e-05, - "loss": 0.7732, + "epoch": 0.43308694121788105, + "grad_norm": 1.714564323425293, + "learning_rate": 8.836132022684885e-06, + "loss": 0.6033, "step": 6152 }, { - "epoch": 0.46558964851878476, - "grad_norm": 1.9233969449996948, - "learning_rate": 1.0756641243442212e-05, - "loss": 0.7602, + "epoch": 0.4331573389651531, + "grad_norm": 1.675366759300232, + "learning_rate": 8.834591756396311e-06, + "loss": 0.6785, "step": 6153 }, { - "epoch": 0.46566531724111837, - "grad_norm": 2.2503979206085205, - "learning_rate": 1.0754432288292825e-05, - "loss": 0.7605, + "epoch": 0.4332277367124252, + "grad_norm": 2.0534327030181885, + "learning_rate": 8.833051394725647e-06, + "loss": 0.7445, "step": 6154 }, { - "epoch": 0.465740985963452, - "grad_norm": 2.2500691413879395, - "learning_rate": 1.075222322327521e-05, - "loss": 0.7364, + "epoch": 0.4332981344596973, + "grad_norm": 1.8206974267959595, + "learning_rate": 8.831510937752982e-06, + "loss": 0.6548, "step": 6155 }, { - "epoch": 0.46581665468578565, - "grad_norm": 2.3264994621276855, - "learning_rate": 1.0750014048527709e-05, - "loss": 0.807, + "epoch": 0.43336853220696936, + "grad_norm": 2.2733724117279053, + "learning_rate": 8.829970385558405e-06, + "loss": 0.7022, "step": 6156 }, { - "epoch": 0.46589232340811926, - "grad_norm": 2.227979898452759, - "learning_rate": 1.074780476418867e-05, - "loss": 0.6017, + "epoch": 0.4334389299542415, + "grad_norm": 1.9045133590698242, + "learning_rate": 8.82842973822201e-06, + "loss": 0.743, "step": 6157 }, { - "epoch": 0.46596799213045287, - "grad_norm": 2.266706705093384, - "learning_rate": 1.0745595370396444e-05, - "loss": 0.7044, + "epoch": 0.43350932770151357, + "grad_norm": 1.6824142932891846, + "learning_rate": 8.826888995823897e-06, + "loss": 0.7145, "step": 6158 }, { - "epoch": 0.4660436608527865, - "grad_norm": 2.1310250759124756, - "learning_rate": 1.074338586728939e-05, - "loss": 0.7331, + "epoch": 0.43357972544878565, + "grad_norm": 1.7415436506271362, + "learning_rate": 8.82534815844417e-06, + "loss": 0.6636, "step": 6159 }, { - "epoch": 0.46611932957512014, - "grad_norm": 2.122551202774048, - "learning_rate": 1.0741176255005873e-05, - "loss": 0.7152, + "epoch": 0.4336501231960577, + "grad_norm": 1.649789571762085, + "learning_rate": 8.82380722616294e-06, + "loss": 0.7411, "step": 6160 }, { - "epoch": 0.46619499829745376, - "grad_norm": 2.330875873565674, - "learning_rate": 1.0738966533684268e-05, - "loss": 0.7346, + "epoch": 0.4337205209433298, + "grad_norm": 1.790529727935791, + "learning_rate": 8.82226619906032e-06, + "loss": 0.6967, "step": 6161 }, { - "epoch": 0.46627066701978737, - "grad_norm": 1.9120339155197144, - "learning_rate": 1.0736756703462951e-05, - "loss": 0.7467, + "epoch": 0.4337909186906019, + "grad_norm": 1.8433711528778076, + "learning_rate": 8.820725077216432e-06, + "loss": 0.8625, "step": 6162 }, { - "epoch": 0.466346335742121, - "grad_norm": 2.05835223197937, - "learning_rate": 1.0734546764480316e-05, - "loss": 0.7593, + "epoch": 0.433861316437874, + "grad_norm": 2.1198556423187256, + "learning_rate": 8.819183860711395e-06, + "loss": 0.7321, "step": 6163 }, { - "epoch": 0.46642200446445464, - "grad_norm": 2.023322820663452, - "learning_rate": 1.0732336716874753e-05, - "loss": 0.9205, + "epoch": 0.4339317141851461, + "grad_norm": 1.6895127296447754, + "learning_rate": 8.817642549625344e-06, + "loss": 0.6978, "step": 6164 }, { - "epoch": 0.46649767318678825, - "grad_norm": 1.8999871015548706, - "learning_rate": 1.073012656078466e-05, - "loss": 0.6393, + "epoch": 0.43400211193241817, + "grad_norm": 1.7161409854888916, + "learning_rate": 8.81610114403841e-06, + "loss": 0.7664, "step": 6165 }, { - "epoch": 0.46657334190912186, - "grad_norm": 2.1797525882720947, - "learning_rate": 1.0727916296348444e-05, - "loss": 0.8688, + "epoch": 0.43407250967969024, + "grad_norm": 2.1388628482818604, + "learning_rate": 8.814559644030732e-06, + "loss": 0.6839, "step": 6166 }, { - "epoch": 0.4666490106314555, - "grad_norm": 2.276228904724121, - "learning_rate": 1.0725705923704521e-05, - "loss": 0.7856, + "epoch": 0.4341429074269623, + "grad_norm": 1.4826297760009766, + "learning_rate": 8.813018049682454e-06, + "loss": 0.6395, "step": 6167 }, { - "epoch": 0.4667246793537891, - "grad_norm": 2.254700183868408, - "learning_rate": 1.0723495442991314e-05, - "loss": 0.5862, + "epoch": 0.4342133051742344, + "grad_norm": 2.092965841293335, + "learning_rate": 8.81147636107373e-06, + "loss": 0.7017, "step": 6168 }, { - "epoch": 0.46680034807612275, - "grad_norm": 1.760817527770996, - "learning_rate": 1.0721284854347248e-05, - "loss": 0.6833, + "epoch": 0.43428370292150653, + "grad_norm": 1.6289697885513306, + "learning_rate": 8.809934578284704e-06, + "loss": 0.7844, "step": 6169 }, { - "epoch": 0.46687601679845636, - "grad_norm": 3.151423692703247, - "learning_rate": 1.0719074157910752e-05, - "loss": 0.8858, + "epoch": 0.4343541006687786, + "grad_norm": 1.835240125656128, + "learning_rate": 8.808392701395544e-06, + "loss": 0.6542, "step": 6170 }, { - "epoch": 0.46695168552078997, - "grad_norm": 1.7372065782546997, - "learning_rate": 1.0716863353820278e-05, - "loss": 0.6091, + "epoch": 0.4344244984160507, + "grad_norm": 1.7020857334136963, + "learning_rate": 8.806850730486406e-06, + "loss": 0.5814, "step": 6171 }, { - "epoch": 0.4670273542431236, - "grad_norm": 1.8967386484146118, - "learning_rate": 1.0714652442214266e-05, - "loss": 0.8024, + "epoch": 0.43449489616332276, + "grad_norm": 1.4651219844818115, + "learning_rate": 8.805308665637466e-06, + "loss": 0.5753, "step": 6172 }, { - "epoch": 0.46710302296545725, - "grad_norm": 1.915652871131897, - "learning_rate": 1.0712441423231172e-05, - "loss": 0.6228, + "epoch": 0.43456529391059484, + "grad_norm": 1.9983229637145996, + "learning_rate": 8.803766506928892e-06, + "loss": 0.6336, "step": 6173 }, { - "epoch": 0.46717869168779086, - "grad_norm": 2.134735345840454, - "learning_rate": 1.0710230297009458e-05, - "loss": 0.8067, + "epoch": 0.434635691657867, + "grad_norm": 1.797161340713501, + "learning_rate": 8.802224254440863e-06, + "loss": 0.6261, "step": 6174 }, { - "epoch": 0.46725436041012447, - "grad_norm": 2.2443509101867676, - "learning_rate": 1.070801906368759e-05, - "loss": 0.7097, + "epoch": 0.43470608940513905, + "grad_norm": 1.464009404182434, + "learning_rate": 8.800681908253561e-06, + "loss": 0.7279, "step": 6175 }, { - "epoch": 0.4673300291324581, - "grad_norm": 2.271360158920288, - "learning_rate": 1.0705807723404044e-05, - "loss": 0.6685, + "epoch": 0.43477648715241113, + "grad_norm": 1.6144942045211792, + "learning_rate": 8.79913946844718e-06, + "loss": 0.8033, "step": 6176 }, { - "epoch": 0.46740569785479175, - "grad_norm": 2.679190158843994, - "learning_rate": 1.0703596276297303e-05, - "loss": 0.7566, + "epoch": 0.4348468848996832, + "grad_norm": 2.436687707901001, + "learning_rate": 8.797596935101906e-06, + "loss": 0.7107, "step": 6177 }, { - "epoch": 0.46748136657712536, - "grad_norm": 2.672213315963745, - "learning_rate": 1.0701384722505851e-05, - "loss": 0.6706, + "epoch": 0.4349172826469553, + "grad_norm": 1.7799311876296997, + "learning_rate": 8.79605430829794e-06, + "loss": 0.7217, "step": 6178 }, { - "epoch": 0.46755703529945897, - "grad_norm": 2.041059732437134, - "learning_rate": 1.0699173062168183e-05, - "loss": 0.6699, + "epoch": 0.43498768039422736, + "grad_norm": 1.8447139263153076, + "learning_rate": 8.794511588115486e-06, + "loss": 0.7565, "step": 6179 }, { - "epoch": 0.4676327040217926, - "grad_norm": 2.066025972366333, - "learning_rate": 1.0696961295422806e-05, - "loss": 0.6701, + "epoch": 0.4350580781414995, + "grad_norm": 2.0695087909698486, + "learning_rate": 8.792968774634747e-06, + "loss": 0.8144, "step": 6180 }, { - "epoch": 0.4677083727441262, - "grad_norm": 2.1812968254089355, - "learning_rate": 1.0694749422408223e-05, - "loss": 0.6914, + "epoch": 0.43512847588877157, + "grad_norm": 1.5516719818115234, + "learning_rate": 8.791425867935935e-06, + "loss": 0.7746, "step": 6181 }, { - "epoch": 0.46778404146645985, - "grad_norm": 2.1719706058502197, - "learning_rate": 1.0692537443262949e-05, - "loss": 0.8392, + "epoch": 0.43519887363604365, + "grad_norm": 1.815410852432251, + "learning_rate": 8.789882868099274e-06, + "loss": 0.6386, "step": 6182 }, { - "epoch": 0.46785971018879347, - "grad_norm": 2.272714614868164, - "learning_rate": 1.0690325358125506e-05, - "loss": 0.8093, + "epoch": 0.4352692713833157, + "grad_norm": 1.8287253379821777, + "learning_rate": 8.78833977520498e-06, + "loss": 0.7354, "step": 6183 }, { - "epoch": 0.4679353789111271, - "grad_norm": 2.0720415115356445, - "learning_rate": 1.0688113167134421e-05, - "loss": 0.5, + "epoch": 0.4353396691305878, + "grad_norm": 1.6869179010391235, + "learning_rate": 8.786796589333283e-06, + "loss": 0.8252, "step": 6184 }, { - "epoch": 0.4680110476334607, - "grad_norm": 2.3994011878967285, - "learning_rate": 1.0685900870428232e-05, - "loss": 0.8163, + "epoch": 0.43541006687785994, + "grad_norm": 2.0918076038360596, + "learning_rate": 8.78525331056441e-06, + "loss": 0.6715, "step": 6185 }, { - "epoch": 0.46808671635579435, - "grad_norm": 1.8097761869430542, - "learning_rate": 1.0683688468145474e-05, - "loss": 0.761, + "epoch": 0.435480464625132, + "grad_norm": 1.736013412475586, + "learning_rate": 8.783709938978602e-06, + "loss": 0.9359, "step": 6186 }, { - "epoch": 0.46816238507812796, - "grad_norm": 1.8033745288848877, - "learning_rate": 1.0681475960424703e-05, - "loss": 0.6908, + "epoch": 0.4355508623724041, + "grad_norm": 2.161222457885742, + "learning_rate": 8.782166474656099e-06, + "loss": 0.7305, "step": 6187 }, { - "epoch": 0.4682380538004616, - "grad_norm": 1.8102970123291016, - "learning_rate": 1.0679263347404466e-05, - "loss": 0.6382, + "epoch": 0.43562126011967617, + "grad_norm": 1.958014965057373, + "learning_rate": 8.780622917677149e-06, + "loss": 0.751, "step": 6188 }, { - "epoch": 0.4683137225227952, - "grad_norm": 2.0066001415252686, - "learning_rate": 1.0677050629223325e-05, - "loss": 0.6318, + "epoch": 0.43569165786694825, + "grad_norm": 1.7071806192398071, + "learning_rate": 8.779079268121997e-06, + "loss": 0.6508, "step": 6189 }, { - "epoch": 0.46838939124512885, - "grad_norm": 1.7005255222320557, - "learning_rate": 1.0674837806019852e-05, - "loss": 0.7265, + "epoch": 0.4357620556142203, + "grad_norm": 1.9760563373565674, + "learning_rate": 8.777535526070904e-06, + "loss": 0.6111, "step": 6190 }, { - "epoch": 0.46846505996746246, - "grad_norm": 2.1413681507110596, - "learning_rate": 1.0672624877932618e-05, - "loss": 0.6977, + "epoch": 0.43583245336149246, + "grad_norm": 1.7810077667236328, + "learning_rate": 8.775991691604132e-06, + "loss": 0.7323, "step": 6191 }, { - "epoch": 0.46854072868979607, - "grad_norm": 2.8440780639648438, - "learning_rate": 1.0670411845100205e-05, - "loss": 0.8837, + "epoch": 0.43590285110876453, + "grad_norm": 1.7099894285202026, + "learning_rate": 8.77444776480194e-06, + "loss": 0.6888, "step": 6192 }, { - "epoch": 0.4686163974121297, - "grad_norm": 1.863978385925293, - "learning_rate": 1.0668198707661198e-05, - "loss": 0.5702, + "epoch": 0.4359732488560366, + "grad_norm": 1.7208329439163208, + "learning_rate": 8.772903745744604e-06, + "loss": 0.6635, "step": 6193 }, { - "epoch": 0.4686920661344633, - "grad_norm": 2.1832001209259033, - "learning_rate": 1.0665985465754193e-05, - "loss": 0.6077, + "epoch": 0.4360436466033087, + "grad_norm": 1.6958746910095215, + "learning_rate": 8.771359634512396e-06, + "loss": 0.6564, "step": 6194 }, { - "epoch": 0.46876773485679696, - "grad_norm": 2.120635747909546, - "learning_rate": 1.066377211951779e-05, - "loss": 0.732, + "epoch": 0.43611404435058077, + "grad_norm": 1.8377577066421509, + "learning_rate": 8.769815431185596e-06, + "loss": 0.8092, "step": 6195 }, { - "epoch": 0.46884340357913057, - "grad_norm": 2.332000255584717, - "learning_rate": 1.0661558669090595e-05, - "loss": 0.6214, + "epoch": 0.43618444209785284, + "grad_norm": 2.0569870471954346, + "learning_rate": 8.768271135844489e-06, + "loss": 0.8128, "step": 6196 }, { - "epoch": 0.4689190723014642, - "grad_norm": 2.0023016929626465, - "learning_rate": 1.0659345114611225e-05, - "loss": 0.7018, + "epoch": 0.436254839845125, + "grad_norm": 3.2467615604400635, + "learning_rate": 8.766726748569365e-06, + "loss": 0.5876, "step": 6197 }, { - "epoch": 0.4689947410237978, - "grad_norm": 2.0073201656341553, - "learning_rate": 1.0657131456218291e-05, - "loss": 0.7069, + "epoch": 0.43632523759239705, + "grad_norm": 1.8247281312942505, + "learning_rate": 8.765182269440514e-06, + "loss": 0.6893, "step": 6198 }, { - "epoch": 0.46907040974613146, - "grad_norm": 2.4780099391937256, - "learning_rate": 1.065491769405043e-05, - "loss": 0.7045, + "epoch": 0.43639563533966913, + "grad_norm": 1.9285614490509033, + "learning_rate": 8.76363769853824e-06, + "loss": 0.7022, "step": 6199 }, { - "epoch": 0.46914607846846507, - "grad_norm": 3.5818912982940674, - "learning_rate": 1.0652703828246268e-05, - "loss": 0.6786, + "epoch": 0.4364660330869412, + "grad_norm": 1.653703212738037, + "learning_rate": 8.762093035942848e-06, + "loss": 0.7721, "step": 6200 }, { - "epoch": 0.4692217471907987, - "grad_norm": 2.6084370613098145, - "learning_rate": 1.0650489858944447e-05, - "loss": 0.7227, + "epoch": 0.4365364308342133, + "grad_norm": 2.4136390686035156, + "learning_rate": 8.76054828173464e-06, + "loss": 0.7967, "step": 6201 }, { - "epoch": 0.4692974159131323, - "grad_norm": 1.8275518417358398, - "learning_rate": 1.0648275786283613e-05, - "loss": 0.7012, + "epoch": 0.4366068285814854, + "grad_norm": 1.7674696445465088, + "learning_rate": 8.759003435993933e-06, + "loss": 0.6209, "step": 6202 }, { - "epoch": 0.46937308463546595, - "grad_norm": 2.5070960521698, - "learning_rate": 1.0646061610402418e-05, - "loss": 0.9001, + "epoch": 0.4366772263287575, + "grad_norm": 1.9998570680618286, + "learning_rate": 8.757458498801045e-06, + "loss": 0.6601, "step": 6203 }, { - "epoch": 0.46944875335779956, - "grad_norm": 2.149526596069336, - "learning_rate": 1.0643847331439523e-05, - "loss": 0.8065, + "epoch": 0.4367476240760296, + "grad_norm": 1.915947437286377, + "learning_rate": 8.755913470236296e-06, + "loss": 0.7241, "step": 6204 }, { - "epoch": 0.4695244220801332, - "grad_norm": 2.6355538368225098, - "learning_rate": 1.0641632949533589e-05, - "loss": 0.8699, + "epoch": 0.43681802182330165, + "grad_norm": 2.2826759815216064, + "learning_rate": 8.754368350380018e-06, + "loss": 0.7285, "step": 6205 }, { - "epoch": 0.4696000908024668, - "grad_norm": 1.9515386819839478, - "learning_rate": 1.0639418464823292e-05, - "loss": 0.7724, + "epoch": 0.4368884195705737, + "grad_norm": 1.9468077421188354, + "learning_rate": 8.752823139312539e-06, + "loss": 0.6244, "step": 6206 }, { - "epoch": 0.46967575952480045, - "grad_norm": 1.7659491300582886, - "learning_rate": 1.0637203877447305e-05, - "loss": 0.724, + "epoch": 0.4369588173178458, + "grad_norm": 2.063021183013916, + "learning_rate": 8.751277837114197e-06, + "loss": 0.745, "step": 6207 }, { - "epoch": 0.46975142824713406, - "grad_norm": 2.187346935272217, - "learning_rate": 1.0634989187544317e-05, - "loss": 0.7288, + "epoch": 0.43702921506511794, + "grad_norm": 1.9307734966278076, + "learning_rate": 8.749732443865337e-06, + "loss": 0.8397, "step": 6208 }, { - "epoch": 0.4698270969694677, - "grad_norm": 2.0950958728790283, - "learning_rate": 1.0632774395253019e-05, - "loss": 0.7164, + "epoch": 0.43709961281239, + "grad_norm": 2.016983985900879, + "learning_rate": 8.7481869596463e-06, + "loss": 0.6439, "step": 6209 }, { - "epoch": 0.4699027656918013, - "grad_norm": 1.7800686359405518, - "learning_rate": 1.063055950071211e-05, - "loss": 0.9003, + "epoch": 0.4371700105596621, + "grad_norm": 1.8454474210739136, + "learning_rate": 8.746641384537441e-06, + "loss": 0.66, "step": 6210 }, { - "epoch": 0.4699784344141349, - "grad_norm": 2.117051124572754, - "learning_rate": 1.0628344504060288e-05, - "loss": 0.5784, + "epoch": 0.43724040830693417, + "grad_norm": 1.5935415029525757, + "learning_rate": 8.745095718619114e-06, + "loss": 0.687, "step": 6211 }, { - "epoch": 0.47005410313646856, - "grad_norm": 1.9472649097442627, - "learning_rate": 1.0626129405436266e-05, - "loss": 0.7032, + "epoch": 0.43731080605420625, + "grad_norm": 2.3259243965148926, + "learning_rate": 8.74354996197168e-06, + "loss": 0.742, "step": 6212 }, { - "epoch": 0.47012977185880217, - "grad_norm": 2.002767562866211, - "learning_rate": 1.0623914204978761e-05, - "loss": 0.7656, + "epoch": 0.4373812038014784, + "grad_norm": 2.0234336853027344, + "learning_rate": 8.742004114675504e-06, + "loss": 0.7989, "step": 6213 }, { - "epoch": 0.4702054405811358, - "grad_norm": 2.2014105319976807, - "learning_rate": 1.0621698902826497e-05, - "loss": 0.888, + "epoch": 0.43745160154875046, + "grad_norm": 1.6545205116271973, + "learning_rate": 8.740458176810957e-06, + "loss": 0.6575, "step": 6214 }, { - "epoch": 0.4702811093034694, - "grad_norm": 2.010322332382202, - "learning_rate": 1.0619483499118204e-05, - "loss": 0.7655, + "epoch": 0.43752199929602253, + "grad_norm": 2.2341396808624268, + "learning_rate": 8.738912148458414e-06, + "loss": 0.7591, "step": 6215 }, { - "epoch": 0.47035677802580306, - "grad_norm": 2.8622562885284424, - "learning_rate": 1.0617267993992612e-05, - "loss": 0.8364, + "epoch": 0.4375923970432946, + "grad_norm": 1.9738550186157227, + "learning_rate": 8.737366029698251e-06, + "loss": 0.6566, "step": 6216 }, { - "epoch": 0.47043244674813667, - "grad_norm": 1.905005931854248, - "learning_rate": 1.061505238758847e-05, - "loss": 0.6593, + "epoch": 0.4376627947905667, + "grad_norm": 1.658828854560852, + "learning_rate": 8.735819820610856e-06, + "loss": 0.5638, "step": 6217 }, { - "epoch": 0.4705081154704703, - "grad_norm": 1.9572088718414307, - "learning_rate": 1.0612836680044525e-05, - "loss": 0.6098, + "epoch": 0.43773319253783877, + "grad_norm": 1.6319608688354492, + "learning_rate": 8.734273521276618e-06, + "loss": 0.5819, "step": 6218 }, { - "epoch": 0.4705837841928039, - "grad_norm": 2.0442428588867188, - "learning_rate": 1.0610620871499529e-05, - "loss": 0.7608, + "epoch": 0.4378035902851109, + "grad_norm": 2.2767651081085205, + "learning_rate": 8.732727131775928e-06, + "loss": 0.7466, "step": 6219 }, { - "epoch": 0.47065945291513755, - "grad_norm": 2.0061099529266357, - "learning_rate": 1.0608404962092244e-05, - "loss": 0.7947, + "epoch": 0.437873988032383, + "grad_norm": 1.8303794860839844, + "learning_rate": 8.731180652189185e-06, + "loss": 0.6441, "step": 6220 }, { - "epoch": 0.47073512163747117, - "grad_norm": 2.0030205249786377, - "learning_rate": 1.0606188951961438e-05, - "loss": 0.706, + "epoch": 0.43794438577965505, + "grad_norm": 1.9100191593170166, + "learning_rate": 8.729634082596791e-06, + "loss": 0.7135, "step": 6221 }, { - "epoch": 0.4708107903598048, - "grad_norm": 2.660520315170288, - "learning_rate": 1.0603972841245887e-05, - "loss": 0.665, + "epoch": 0.43801478352692713, + "grad_norm": 1.7303392887115479, + "learning_rate": 8.728087423079155e-06, + "loss": 0.6768, "step": 6222 }, { - "epoch": 0.4708864590821384, - "grad_norm": 1.7287667989730835, - "learning_rate": 1.0601756630084367e-05, - "loss": 0.6559, + "epoch": 0.4380851812741992, + "grad_norm": 1.9334133863449097, + "learning_rate": 8.726540673716692e-06, + "loss": 0.77, "step": 6223 }, { - "epoch": 0.470962127804472, - "grad_norm": 1.7627499103546143, - "learning_rate": 1.0599540318615667e-05, - "loss": 0.7481, + "epoch": 0.4381555790214713, + "grad_norm": 1.8955438137054443, + "learning_rate": 8.724993834589813e-06, + "loss": 0.6964, "step": 6224 }, { - "epoch": 0.47103779652680566, - "grad_norm": 2.1454079151153564, - "learning_rate": 1.0597323906978577e-05, - "loss": 0.6736, + "epoch": 0.4382259767687434, + "grad_norm": 1.7639063596725464, + "learning_rate": 8.723446905778942e-06, + "loss": 0.6766, "step": 6225 }, { - "epoch": 0.4711134652491393, - "grad_norm": 1.9202808141708374, - "learning_rate": 1.05951073953119e-05, - "loss": 0.7624, + "epoch": 0.4382963745160155, + "grad_norm": 1.8527770042419434, + "learning_rate": 8.721899887364505e-06, + "loss": 0.6673, "step": 6226 }, { - "epoch": 0.4711891339714729, - "grad_norm": 2.2088475227355957, - "learning_rate": 1.0592890783754437e-05, - "loss": 0.7322, + "epoch": 0.4383667722632876, + "grad_norm": 1.8190735578536987, + "learning_rate": 8.720352779426937e-06, + "loss": 0.7574, "step": 6227 }, { - "epoch": 0.4712648026938065, - "grad_norm": 2.0040664672851562, - "learning_rate": 1.0590674072445002e-05, - "loss": 0.7205, + "epoch": 0.43843717001055965, + "grad_norm": 1.8899558782577515, + "learning_rate": 8.718805582046666e-06, + "loss": 0.7401, "step": 6228 }, { - "epoch": 0.47134047141614016, - "grad_norm": 2.295071601867676, - "learning_rate": 1.0588457261522413e-05, - "loss": 0.6734, + "epoch": 0.43850756775783173, + "grad_norm": 1.6029268503189087, + "learning_rate": 8.717258295304138e-06, + "loss": 0.6922, "step": 6229 }, { - "epoch": 0.47141614013847377, - "grad_norm": 2.2637734413146973, - "learning_rate": 1.0586240351125489e-05, - "loss": 0.8409, + "epoch": 0.43857796550510386, + "grad_norm": 2.028824806213379, + "learning_rate": 8.715710919279795e-06, + "loss": 0.5881, "step": 6230 }, { - "epoch": 0.4714918088608074, - "grad_norm": 2.1231727600097656, - "learning_rate": 1.0584023341393069e-05, - "loss": 0.7858, + "epoch": 0.43864836325237594, + "grad_norm": 1.931077003479004, + "learning_rate": 8.714163454054087e-06, + "loss": 0.6717, "step": 6231 }, { - "epoch": 0.471567477583141, - "grad_norm": 2.0771729946136475, - "learning_rate": 1.0581806232463978e-05, - "loss": 0.7433, + "epoch": 0.438718760999648, + "grad_norm": 3.9757111072540283, + "learning_rate": 8.71261589970747e-06, + "loss": 0.7236, "step": 6232 }, { - "epoch": 0.47164314630547466, - "grad_norm": 2.2591214179992676, - "learning_rate": 1.0579589024477068e-05, - "loss": 0.7656, + "epoch": 0.4387891587469201, + "grad_norm": 2.1985251903533936, + "learning_rate": 8.7110682563204e-06, + "loss": 0.5547, "step": 6233 }, { - "epoch": 0.47171881502780827, - "grad_norm": 2.270939826965332, - "learning_rate": 1.0577371717571182e-05, - "loss": 0.8632, + "epoch": 0.43885955649419217, + "grad_norm": 1.7094374895095825, + "learning_rate": 8.709520523973344e-06, + "loss": 0.5994, "step": 6234 }, { - "epoch": 0.4717944837501419, - "grad_norm": 2.1328177452087402, - "learning_rate": 1.057515431188518e-05, - "loss": 0.7845, + "epoch": 0.43892995424146425, + "grad_norm": 1.8150886297225952, + "learning_rate": 8.707972702746767e-06, + "loss": 0.6586, "step": 6235 }, { - "epoch": 0.4718701524724755, - "grad_norm": 2.4155235290527344, - "learning_rate": 1.0572936807557919e-05, - "loss": 0.6961, + "epoch": 0.4390003519887364, + "grad_norm": 2.054304838180542, + "learning_rate": 8.70642479272114e-06, + "loss": 0.7881, "step": 6236 }, { - "epoch": 0.4719458211948091, - "grad_norm": 2.18650221824646, - "learning_rate": 1.0570719204728265e-05, - "loss": 0.69, + "epoch": 0.43907074973600846, + "grad_norm": 2.230776786804199, + "learning_rate": 8.704876793976943e-06, + "loss": 0.6256, "step": 6237 }, { - "epoch": 0.47202148991714277, - "grad_norm": 2.087130308151245, - "learning_rate": 1.05685015035351e-05, - "loss": 0.7847, + "epoch": 0.43914114748328054, + "grad_norm": 1.8628860712051392, + "learning_rate": 8.703328706594658e-06, + "loss": 0.6026, "step": 6238 }, { - "epoch": 0.4720971586394764, - "grad_norm": 1.8233695030212402, - "learning_rate": 1.0566283704117292e-05, - "loss": 0.7336, + "epoch": 0.4392115452305526, + "grad_norm": 2.396927833557129, + "learning_rate": 8.701780530654773e-06, + "loss": 0.753, "step": 6239 }, { - "epoch": 0.47217282736181, - "grad_norm": 2.3203165531158447, - "learning_rate": 1.0564065806613736e-05, - "loss": 0.6075, + "epoch": 0.4392819429778247, + "grad_norm": 1.949360728263855, + "learning_rate": 8.700232266237776e-06, + "loss": 0.7076, "step": 6240 }, { - "epoch": 0.4722484960841436, - "grad_norm": 1.858660101890564, - "learning_rate": 1.056184781116332e-05, - "loss": 0.6882, + "epoch": 0.4393523407250968, + "grad_norm": 4.056278705596924, + "learning_rate": 8.698683913424161e-06, + "loss": 0.6346, "step": 6241 }, { - "epoch": 0.47232416480647726, - "grad_norm": 2.2066173553466797, - "learning_rate": 1.055962971790494e-05, - "loss": 0.6147, + "epoch": 0.4394227384723689, + "grad_norm": 1.9052281379699707, + "learning_rate": 8.697135472294435e-06, + "loss": 0.7372, "step": 6242 }, { - "epoch": 0.4723998335288109, - "grad_norm": 2.0067367553710938, - "learning_rate": 1.0557411526977506e-05, - "loss": 0.7309, + "epoch": 0.439493136219641, + "grad_norm": 1.9019471406936646, + "learning_rate": 8.695586942929097e-06, + "loss": 0.6892, "step": 6243 }, { - "epoch": 0.4724755022511445, - "grad_norm": 2.2683217525482178, - "learning_rate": 1.055519323851992e-05, - "loss": 0.6129, + "epoch": 0.43956353396691306, + "grad_norm": 3.5141758918762207, + "learning_rate": 8.694038325408658e-06, + "loss": 0.6995, "step": 6244 }, { - "epoch": 0.4725511709734781, - "grad_norm": 2.247870683670044, - "learning_rate": 1.0552974852671111e-05, - "loss": 0.7197, + "epoch": 0.43963393171418513, + "grad_norm": 1.7422585487365723, + "learning_rate": 8.692489619813634e-06, + "loss": 0.7901, "step": 6245 }, { - "epoch": 0.47262683969581176, - "grad_norm": 2.4525437355041504, - "learning_rate": 1.0550756369569987e-05, - "loss": 0.5801, + "epoch": 0.4397043294614572, + "grad_norm": 1.967261791229248, + "learning_rate": 8.690940826224539e-06, + "loss": 0.5889, "step": 6246 }, { - "epoch": 0.4727025084181454, - "grad_norm": 2.0607056617736816, - "learning_rate": 1.0548537789355486e-05, - "loss": 0.6399, + "epoch": 0.43977472720872934, + "grad_norm": 1.736568570137024, + "learning_rate": 8.6893919447219e-06, + "loss": 0.6944, "step": 6247 }, { - "epoch": 0.472778177140479, - "grad_norm": 1.7411423921585083, - "learning_rate": 1.054631911216654e-05, - "loss": 0.6645, + "epoch": 0.4398451249560014, + "grad_norm": 1.8167866468429565, + "learning_rate": 8.687842975386245e-06, + "loss": 0.6288, "step": 6248 }, { - "epoch": 0.4728538458628126, - "grad_norm": 2.4376776218414307, - "learning_rate": 1.0544100338142088e-05, - "loss": 0.7827, + "epoch": 0.4399155227032735, + "grad_norm": 1.6752080917358398, + "learning_rate": 8.686293918298104e-06, + "loss": 0.6355, "step": 6249 }, { - "epoch": 0.4729295145851462, - "grad_norm": 1.9150795936584473, - "learning_rate": 1.0541881467421081e-05, - "loss": 0.8126, + "epoch": 0.4399859204505456, + "grad_norm": 1.7385259866714478, + "learning_rate": 8.684744773538015e-06, + "loss": 0.704, "step": 6250 }, { - "epoch": 0.47300518330747987, - "grad_norm": 2.8938183784484863, - "learning_rate": 1.053966250014247e-05, - "loss": 0.8907, + "epoch": 0.44005631819781765, + "grad_norm": 1.775976538658142, + "learning_rate": 8.68319554118652e-06, + "loss": 0.7186, "step": 6251 }, { - "epoch": 0.4730808520298135, - "grad_norm": 2.5442142486572266, - "learning_rate": 1.0537443436445213e-05, - "loss": 0.7493, + "epoch": 0.44012671594508973, + "grad_norm": 1.6073647737503052, + "learning_rate": 8.681646221324164e-06, + "loss": 0.6258, "step": 6252 }, { - "epoch": 0.4731565207521471, - "grad_norm": 5.999145984649658, - "learning_rate": 1.0535224276468274e-05, - "loss": 0.7522, + "epoch": 0.44019711369236186, + "grad_norm": 2.1105713844299316, + "learning_rate": 8.680096814031498e-06, + "loss": 0.8243, "step": 6253 }, { - "epoch": 0.4732321894744807, - "grad_norm": 3.0991311073303223, - "learning_rate": 1.0533005020350627e-05, - "loss": 0.8252, + "epoch": 0.44026751143963394, + "grad_norm": 1.931691288948059, + "learning_rate": 8.678547319389077e-06, + "loss": 0.8118, "step": 6254 }, { - "epoch": 0.47330785819681437, - "grad_norm": 3.191121816635132, - "learning_rate": 1.0530785668231243e-05, - "loss": 0.7184, + "epoch": 0.440337909186906, + "grad_norm": 1.8456681966781616, + "learning_rate": 8.676997737477462e-06, + "loss": 0.6716, "step": 6255 }, { - "epoch": 0.473383526919148, - "grad_norm": 2.009726047515869, - "learning_rate": 1.0528566220249113e-05, - "loss": 0.7366, + "epoch": 0.4404083069341781, + "grad_norm": 1.7427514791488647, + "learning_rate": 8.675448068377212e-06, + "loss": 0.5033, "step": 6256 }, { - "epoch": 0.4734591956414816, - "grad_norm": 2.614388942718506, - "learning_rate": 1.052634667654322e-05, - "loss": 0.8348, + "epoch": 0.4404787046814502, + "grad_norm": 3.2757933139801025, + "learning_rate": 8.673898312168905e-06, + "loss": 0.7578, "step": 6257 }, { - "epoch": 0.4735348643638152, - "grad_norm": 2.2507081031799316, - "learning_rate": 1.0524127037252564e-05, - "loss": 0.7253, + "epoch": 0.4405491024287223, + "grad_norm": 1.8631789684295654, + "learning_rate": 8.672348468933104e-06, + "loss": 0.6666, "step": 6258 }, { - "epoch": 0.47361053308614887, - "grad_norm": 2.0302281379699707, - "learning_rate": 1.0521907302516143e-05, - "loss": 0.7483, + "epoch": 0.4406195001759944, + "grad_norm": 2.0338592529296875, + "learning_rate": 8.670798538750395e-06, + "loss": 0.7577, "step": 6259 }, { - "epoch": 0.4736862018084825, - "grad_norm": 2.1905441284179688, - "learning_rate": 1.0519687472472962e-05, - "loss": 0.7226, + "epoch": 0.44068989792326646, + "grad_norm": 2.0199193954467773, + "learning_rate": 8.669248521701354e-06, + "loss": 0.7247, "step": 6260 }, { - "epoch": 0.4737618705308161, - "grad_norm": 1.9951703548431396, - "learning_rate": 1.0517467547262038e-05, - "loss": 0.8689, + "epoch": 0.44076029567053854, + "grad_norm": 2.318453073501587, + "learning_rate": 8.667698417866574e-06, + "loss": 0.8233, "step": 6261 }, { - "epoch": 0.4738375392531497, - "grad_norm": 2.1481285095214844, - "learning_rate": 1.0515247527022386e-05, - "loss": 0.8369, + "epoch": 0.4408306934178106, + "grad_norm": 1.9138137102127075, + "learning_rate": 8.666148227326638e-06, + "loss": 0.6847, "step": 6262 }, { - "epoch": 0.4739132079754833, - "grad_norm": 1.8148024082183838, - "learning_rate": 1.0513027411893035e-05, - "loss": 0.7932, + "epoch": 0.4409010911650827, + "grad_norm": 2.1254448890686035, + "learning_rate": 8.664597950162151e-06, + "loss": 0.7145, "step": 6263 }, { - "epoch": 0.473988876697817, - "grad_norm": 2.1687381267547607, - "learning_rate": 1.0510807202013016e-05, - "loss": 0.7735, + "epoch": 0.4409714889123548, + "grad_norm": 2.4283809661865234, + "learning_rate": 8.663047586453707e-06, + "loss": 0.6722, "step": 6264 }, { - "epoch": 0.4740645454201506, - "grad_norm": 1.8470525741577148, - "learning_rate": 1.0508586897521359e-05, - "loss": 0.8987, + "epoch": 0.4410418866596269, + "grad_norm": 2.234814167022705, + "learning_rate": 8.661497136281913e-06, + "loss": 0.6115, "step": 6265 }, { - "epoch": 0.4741402141424842, - "grad_norm": 2.30938458442688, - "learning_rate": 1.0506366498557113e-05, - "loss": 0.6753, + "epoch": 0.441112284406899, + "grad_norm": 1.84239661693573, + "learning_rate": 8.65994659972738e-06, + "loss": 0.6728, "step": 6266 }, { - "epoch": 0.4742158828648178, - "grad_norm": 2.1527512073516846, - "learning_rate": 1.0504146005259323e-05, - "loss": 0.8064, + "epoch": 0.44118268215417106, + "grad_norm": 1.8301811218261719, + "learning_rate": 8.658395976870717e-06, + "loss": 0.6392, "step": 6267 }, { - "epoch": 0.47429155158715147, - "grad_norm": 2.357869863510132, - "learning_rate": 1.050192541776705e-05, - "loss": 0.8538, + "epoch": 0.44125307990144313, + "grad_norm": 1.8437247276306152, + "learning_rate": 8.656845267792547e-06, + "loss": 0.6205, "step": 6268 }, { - "epoch": 0.4743672203094851, - "grad_norm": 2.084754705429077, - "learning_rate": 1.0499704736219345e-05, - "loss": 0.7007, + "epoch": 0.44132347764871527, + "grad_norm": 1.5435819625854492, + "learning_rate": 8.655294472573492e-06, + "loss": 0.7961, "step": 6269 }, { - "epoch": 0.4744428890318187, - "grad_norm": 1.8246986865997314, - "learning_rate": 1.049748396075528e-05, - "loss": 0.6057, + "epoch": 0.44139387539598735, + "grad_norm": 2.167980670928955, + "learning_rate": 8.653743591294177e-06, + "loss": 0.8406, "step": 6270 }, { - "epoch": 0.4745185577541523, - "grad_norm": 3.0122311115264893, - "learning_rate": 1.0495263091513926e-05, - "loss": 0.6435, + "epoch": 0.4414642731432594, + "grad_norm": 2.0197601318359375, + "learning_rate": 8.652192624035233e-06, + "loss": 0.722, "step": 6271 }, { - "epoch": 0.47459422647648597, - "grad_norm": 2.0100696086883545, - "learning_rate": 1.0493042128634361e-05, - "loss": 0.6919, + "epoch": 0.4415346708905315, + "grad_norm": 1.7140681743621826, + "learning_rate": 8.650641570877303e-06, + "loss": 0.6731, "step": 6272 }, { - "epoch": 0.4746698951988196, - "grad_norm": 2.3327362537384033, - "learning_rate": 1.0490821072255667e-05, - "loss": 0.6692, + "epoch": 0.4416050686378036, + "grad_norm": 2.046846628189087, + "learning_rate": 8.649090431901018e-06, + "loss": 0.7416, "step": 6273 }, { - "epoch": 0.4747455639211532, - "grad_norm": 2.43101167678833, - "learning_rate": 1.0488599922516941e-05, - "loss": 0.7824, + "epoch": 0.44167546638507565, + "grad_norm": 2.326343059539795, + "learning_rate": 8.64753920718703e-06, + "loss": 0.6598, "step": 6274 }, { - "epoch": 0.4748212326434868, - "grad_norm": 1.8376708030700684, - "learning_rate": 1.048637867955727e-05, - "loss": 0.7273, + "epoch": 0.4417458641323478, + "grad_norm": 1.9007511138916016, + "learning_rate": 8.645987896815987e-06, + "loss": 0.6536, "step": 6275 }, { - "epoch": 0.4748969013658204, - "grad_norm": 1.9170385599136353, - "learning_rate": 1.0484157343515756e-05, - "loss": 0.6116, + "epoch": 0.44181626187961986, + "grad_norm": 1.9517228603363037, + "learning_rate": 8.644436500868542e-06, + "loss": 0.7142, "step": 6276 }, { - "epoch": 0.4749725700881541, - "grad_norm": 1.9089981317520142, - "learning_rate": 1.0481935914531513e-05, - "loss": 0.7632, + "epoch": 0.44188665962689194, + "grad_norm": 2.384875774383545, + "learning_rate": 8.642885019425353e-06, + "loss": 0.6945, "step": 6277 }, { - "epoch": 0.4750482388104877, - "grad_norm": 2.5500640869140625, - "learning_rate": 1.0479714392743645e-05, - "loss": 0.643, + "epoch": 0.441957057374164, + "grad_norm": 1.9869259595870972, + "learning_rate": 8.641333452567085e-06, + "loss": 0.6748, "step": 6278 }, { - "epoch": 0.4751239075328213, - "grad_norm": 2.08392596244812, - "learning_rate": 1.0477492778291281e-05, - "loss": 0.7338, + "epoch": 0.4420274551214361, + "grad_norm": 2.2182424068450928, + "learning_rate": 8.639781800374405e-06, + "loss": 0.7776, "step": 6279 }, { - "epoch": 0.4751995762551549, - "grad_norm": 2.564549446105957, - "learning_rate": 1.0475271071313535e-05, - "loss": 0.7122, + "epoch": 0.4420978528687082, + "grad_norm": 1.994261622428894, + "learning_rate": 8.638230062927982e-06, + "loss": 0.6463, "step": 6280 }, { - "epoch": 0.4752752449774886, - "grad_norm": 1.9711978435516357, - "learning_rate": 1.0473049271949547e-05, - "loss": 0.7447, + "epoch": 0.4421682506159803, + "grad_norm": 2.00596284866333, + "learning_rate": 8.636678240308497e-06, + "loss": 0.676, "step": 6281 }, { - "epoch": 0.4753509136998222, - "grad_norm": 1.9568284749984741, - "learning_rate": 1.0470827380338448e-05, - "loss": 0.66, + "epoch": 0.4422386483632524, + "grad_norm": 2.538010358810425, + "learning_rate": 8.635126332596627e-06, + "loss": 0.5784, "step": 6282 }, { - "epoch": 0.4754265824221558, - "grad_norm": 2.3648922443389893, - "learning_rate": 1.046860539661938e-05, - "loss": 0.7885, + "epoch": 0.44230904611052446, + "grad_norm": 1.6122359037399292, + "learning_rate": 8.633574339873056e-06, + "loss": 0.6722, "step": 6283 }, { - "epoch": 0.4755022511444894, - "grad_norm": 2.0861058235168457, - "learning_rate": 1.0466383320931494e-05, - "loss": 0.8754, + "epoch": 0.44237944385779654, + "grad_norm": 2.0384156703948975, + "learning_rate": 8.632022262218478e-06, + "loss": 0.6748, "step": 6284 }, { - "epoch": 0.4755779198668231, - "grad_norm": 2.200965642929077, - "learning_rate": 1.046416115341394e-05, - "loss": 0.8098, + "epoch": 0.4424498416050686, + "grad_norm": 1.9468833208084106, + "learning_rate": 8.630470099713584e-06, + "loss": 0.7242, "step": 6285 }, { - "epoch": 0.4756535885891567, - "grad_norm": 2.617201328277588, - "learning_rate": 1.0461938894205882e-05, - "loss": 0.6633, + "epoch": 0.44252023935234075, + "grad_norm": 1.8797940015792847, + "learning_rate": 8.628917852439074e-06, + "loss": 0.7954, "step": 6286 }, { - "epoch": 0.4757292573114903, - "grad_norm": 2.2858943939208984, - "learning_rate": 1.0459716543446477e-05, - "loss": 0.729, + "epoch": 0.4425906370996128, + "grad_norm": 2.3255364894866943, + "learning_rate": 8.627365520475646e-06, + "loss": 0.6649, "step": 6287 }, { - "epoch": 0.4758049260338239, - "grad_norm": 2.4813878536224365, - "learning_rate": 1.0457494101274904e-05, - "loss": 0.6442, + "epoch": 0.4426610348468849, + "grad_norm": 1.652653694152832, + "learning_rate": 8.625813103904014e-06, + "loss": 0.6415, "step": 6288 }, { - "epoch": 0.4758805947561575, - "grad_norm": 2.3235511779785156, - "learning_rate": 1.0455271567830336e-05, - "loss": 0.7065, + "epoch": 0.442731432594157, + "grad_norm": 1.8098775148391724, + "learning_rate": 8.624260602804887e-06, + "loss": 0.6064, "step": 6289 }, { - "epoch": 0.4759562634784912, - "grad_norm": 2.1012747287750244, - "learning_rate": 1.0453048943251956e-05, - "loss": 0.8561, + "epoch": 0.44280183034142906, + "grad_norm": 1.9257032871246338, + "learning_rate": 8.622708017258977e-06, + "loss": 0.6662, "step": 6290 }, { - "epoch": 0.4760319322008248, - "grad_norm": 2.3040177822113037, - "learning_rate": 1.045082622767895e-05, - "loss": 0.6949, + "epoch": 0.44287222808870114, + "grad_norm": 1.704490303993225, + "learning_rate": 8.62115534734701e-06, + "loss": 0.6943, "step": 6291 }, { - "epoch": 0.4761076009231584, - "grad_norm": 2.2570884227752686, - "learning_rate": 1.0448603421250513e-05, - "loss": 0.77, + "epoch": 0.44294262583597327, + "grad_norm": 1.9758707284927368, + "learning_rate": 8.619602593149705e-06, + "loss": 0.7102, "step": 6292 }, { - "epoch": 0.476183269645492, - "grad_norm": 2.1080000400543213, - "learning_rate": 1.0446380524105847e-05, - "loss": 0.8376, + "epoch": 0.44301302358324535, + "grad_norm": 2.10601544380188, + "learning_rate": 8.618049754747798e-06, + "loss": 0.7238, "step": 6293 }, { - "epoch": 0.4762589383678257, - "grad_norm": 2.6243064403533936, - "learning_rate": 1.0444157536384152e-05, - "loss": 0.7462, + "epoch": 0.4430834213305174, + "grad_norm": 1.9898911714553833, + "learning_rate": 8.616496832222019e-06, + "loss": 0.7894, "step": 6294 }, { - "epoch": 0.4763346070901593, - "grad_norm": 2.161816358566284, - "learning_rate": 1.0441934458224642e-05, - "loss": 0.8241, + "epoch": 0.4431538190777895, + "grad_norm": 2.048314094543457, + "learning_rate": 8.614943825653104e-06, + "loss": 0.5628, "step": 6295 }, { - "epoch": 0.4764102758124929, - "grad_norm": 2.4741382598876953, - "learning_rate": 1.043971128976653e-05, - "loss": 0.733, + "epoch": 0.4432242168250616, + "grad_norm": 1.7632935047149658, + "learning_rate": 8.613390735121798e-06, + "loss": 0.6755, "step": 6296 }, { - "epoch": 0.4764859445348265, - "grad_norm": 2.5977022647857666, - "learning_rate": 1.0437488031149042e-05, - "loss": 0.6428, + "epoch": 0.4432946145723337, + "grad_norm": 2.011080741882324, + "learning_rate": 8.611837560708846e-06, + "loss": 0.7195, "step": 6297 }, { - "epoch": 0.4765616132571602, - "grad_norm": 2.8118369579315186, - "learning_rate": 1.0435264682511405e-05, - "loss": 0.8184, + "epoch": 0.4433650123196058, + "grad_norm": 1.9133890867233276, + "learning_rate": 8.610284302494999e-06, + "loss": 0.6547, "step": 6298 }, { - "epoch": 0.4766372819794938, - "grad_norm": 2.1726109981536865, - "learning_rate": 1.0433041243992852e-05, - "loss": 0.6495, + "epoch": 0.44343541006687787, + "grad_norm": 1.8197356462478638, + "learning_rate": 8.608730960561014e-06, + "loss": 0.5901, "step": 6299 }, { - "epoch": 0.4767129507018274, - "grad_norm": 1.90822172164917, - "learning_rate": 1.0430817715732622e-05, - "loss": 0.7302, + "epoch": 0.44350580781414994, + "grad_norm": 1.7483916282653809, + "learning_rate": 8.607177534987648e-06, + "loss": 0.6542, "step": 6300 }, { - "epoch": 0.476788619424161, - "grad_norm": 2.576930522918701, - "learning_rate": 1.0428594097869953e-05, - "loss": 0.8132, + "epoch": 0.443576205561422, + "grad_norm": 1.5346592664718628, + "learning_rate": 8.605624025855667e-06, + "loss": 0.619, "step": 6301 }, { - "epoch": 0.4768642881464946, - "grad_norm": 1.8655272722244263, - "learning_rate": 1.0426370390544107e-05, - "loss": 0.8921, + "epoch": 0.4436466033086941, + "grad_norm": 1.5495575666427612, + "learning_rate": 8.60407043324584e-06, + "loss": 0.6812, "step": 6302 }, { - "epoch": 0.4769399568688283, - "grad_norm": 2.354846715927124, - "learning_rate": 1.042414659389433e-05, - "loss": 0.803, + "epoch": 0.44371700105596623, + "grad_norm": 1.821560025215149, + "learning_rate": 8.602516757238937e-06, + "loss": 0.6557, "step": 6303 }, { - "epoch": 0.4770156255911619, - "grad_norm": 2.417755603790283, - "learning_rate": 1.0421922708059892e-05, - "loss": 0.8491, + "epoch": 0.4437873988032383, + "grad_norm": 1.5768433809280396, + "learning_rate": 8.600962997915736e-06, + "loss": 0.6702, "step": 6304 }, { - "epoch": 0.4770912943134955, - "grad_norm": 1.6831603050231934, - "learning_rate": 1.041969873318005e-05, - "loss": 0.6529, + "epoch": 0.4438577965505104, + "grad_norm": 2.2786896228790283, + "learning_rate": 8.599409155357019e-06, + "loss": 0.607, "step": 6305 }, { - "epoch": 0.4771669630358291, - "grad_norm": 2.6375699043273926, - "learning_rate": 1.0417474669394084e-05, - "loss": 0.8822, + "epoch": 0.44392819429778246, + "grad_norm": 1.8299150466918945, + "learning_rate": 8.597855229643573e-06, + "loss": 0.622, "step": 6306 }, { - "epoch": 0.4772426317581628, - "grad_norm": 2.0903217792510986, - "learning_rate": 1.041525051684127e-05, - "loss": 0.6572, + "epoch": 0.44399859204505454, + "grad_norm": 2.1357901096343994, + "learning_rate": 8.596301220856184e-06, + "loss": 0.8005, "step": 6307 }, { - "epoch": 0.4773183004804964, - "grad_norm": 1.7691537141799927, - "learning_rate": 1.0413026275660887e-05, - "loss": 0.7839, + "epoch": 0.4440689897923266, + "grad_norm": 1.7597311735153198, + "learning_rate": 8.594747129075652e-06, + "loss": 0.6725, "step": 6308 }, { - "epoch": 0.47739396920283, - "grad_norm": 2.0674221515655518, - "learning_rate": 1.0410801945992233e-05, - "loss": 0.6806, + "epoch": 0.44413938753959875, + "grad_norm": 1.4086155891418457, + "learning_rate": 8.593192954382771e-06, + "loss": 0.5835, "step": 6309 }, { - "epoch": 0.4774696379251636, - "grad_norm": 2.6139473915100098, - "learning_rate": 1.0408577527974595e-05, - "loss": 0.7133, + "epoch": 0.44420978528687083, + "grad_norm": 1.9376952648162842, + "learning_rate": 8.591638696858344e-06, + "loss": 0.6384, "step": 6310 }, { - "epoch": 0.4775453066474973, - "grad_norm": 2.3850035667419434, - "learning_rate": 1.0406353021747277e-05, - "loss": 0.6617, + "epoch": 0.4442801830341429, + "grad_norm": 1.7554107904434204, + "learning_rate": 8.590084356583182e-06, + "loss": 0.6891, "step": 6311 }, { - "epoch": 0.4776209753698309, - "grad_norm": 2.5777697563171387, - "learning_rate": 1.0404128427449584e-05, - "loss": 0.8671, + "epoch": 0.444350580781415, + "grad_norm": 1.856407642364502, + "learning_rate": 8.588529933638092e-06, + "loss": 0.6871, "step": 6312 }, { - "epoch": 0.4776966440921645, - "grad_norm": 2.2594873905181885, - "learning_rate": 1.0401903745220831e-05, - "loss": 0.7522, + "epoch": 0.44442097852868706, + "grad_norm": 1.7802973985671997, + "learning_rate": 8.586975428103893e-06, + "loss": 0.648, "step": 6313 }, { - "epoch": 0.4777723128144981, - "grad_norm": 2.265115737915039, - "learning_rate": 1.0399678975200328e-05, - "loss": 0.7659, + "epoch": 0.4444913762759592, + "grad_norm": 1.806768774986267, + "learning_rate": 8.585420840061408e-06, + "loss": 0.73, "step": 6314 }, { - "epoch": 0.4778479815368317, - "grad_norm": 2.038884162902832, - "learning_rate": 1.03974541175274e-05, - "loss": 0.6763, + "epoch": 0.44456177402323127, + "grad_norm": 1.7104696035385132, + "learning_rate": 8.583866169591452e-06, + "loss": 0.591, "step": 6315 }, { - "epoch": 0.4779236502591654, - "grad_norm": 2.0255823135375977, - "learning_rate": 1.0395229172341377e-05, - "loss": 0.7657, + "epoch": 0.44463217177050335, + "grad_norm": 1.682559609413147, + "learning_rate": 8.582311416774861e-06, + "loss": 0.6968, "step": 6316 }, { - "epoch": 0.477999318981499, - "grad_norm": 1.9382271766662598, - "learning_rate": 1.0393004139781586e-05, - "loss": 0.5579, + "epoch": 0.4447025695177754, + "grad_norm": 1.585602045059204, + "learning_rate": 8.580756581692467e-06, + "loss": 0.5866, "step": 6317 }, { - "epoch": 0.4780749877038326, - "grad_norm": 2.1081786155700684, - "learning_rate": 1.0390779019987379e-05, - "loss": 0.7453, + "epoch": 0.4447729672650475, + "grad_norm": 1.6496354341506958, + "learning_rate": 8.579201664425107e-06, + "loss": 0.7452, "step": 6318 }, { - "epoch": 0.4781506564261662, - "grad_norm": 1.9590938091278076, - "learning_rate": 1.0388553813098082e-05, - "loss": 0.7372, + "epoch": 0.4448433650123196, + "grad_norm": 1.7225035429000854, + "learning_rate": 8.577646665053622e-06, + "loss": 0.7174, "step": 6319 }, { - "epoch": 0.4782263251484999, - "grad_norm": 1.6101315021514893, - "learning_rate": 1.0386328519253061e-05, - "loss": 0.8169, + "epoch": 0.4449137627595917, + "grad_norm": 1.8713361024856567, + "learning_rate": 8.576091583658858e-06, + "loss": 0.681, "step": 6320 }, { - "epoch": 0.4783019938708335, - "grad_norm": 3.1328699588775635, - "learning_rate": 1.0384103138591659e-05, - "loss": 0.7454, + "epoch": 0.4449841605068638, + "grad_norm": 1.70640230178833, + "learning_rate": 8.574536420321666e-06, + "loss": 0.6069, "step": 6321 }, { - "epoch": 0.4783776625931671, - "grad_norm": 2.7711310386657715, - "learning_rate": 1.0381877671253245e-05, - "loss": 0.5887, + "epoch": 0.44505455825413587, + "grad_norm": 1.5169999599456787, + "learning_rate": 8.572981175122898e-06, + "loss": 0.6365, "step": 6322 }, { - "epoch": 0.4784533313155007, - "grad_norm": 2.25168514251709, - "learning_rate": 1.037965211737718e-05, - "loss": 0.6437, + "epoch": 0.44512495600140795, + "grad_norm": 1.6917929649353027, + "learning_rate": 8.571425848143417e-06, + "loss": 0.6742, "step": 6323 }, { - "epoch": 0.4785290000378344, - "grad_norm": 3.660417318344116, - "learning_rate": 1.0377426477102837e-05, - "loss": 0.7606, + "epoch": 0.44519535374868, + "grad_norm": 1.9512450695037842, + "learning_rate": 8.56987043946408e-06, + "loss": 0.708, "step": 6324 }, { - "epoch": 0.478604668760168, - "grad_norm": 2.1805953979492188, - "learning_rate": 1.0375200750569595e-05, - "loss": 0.651, + "epoch": 0.44526575149595216, + "grad_norm": 1.7476052045822144, + "learning_rate": 8.568314949165756e-06, + "loss": 0.629, "step": 6325 }, { - "epoch": 0.4786803374825016, - "grad_norm": 2.296247720718384, - "learning_rate": 1.037297493791683e-05, - "loss": 0.7059, + "epoch": 0.44533614924322423, + "grad_norm": 2.006992816925049, + "learning_rate": 8.56675937732932e-06, + "loss": 0.6352, "step": 6326 }, { - "epoch": 0.4787560062048352, - "grad_norm": 2.1568257808685303, - "learning_rate": 1.037074903928394e-05, - "loss": 0.7095, + "epoch": 0.4454065469904963, + "grad_norm": 1.708809733390808, + "learning_rate": 8.565203724035646e-06, + "loss": 0.6938, "step": 6327 }, { - "epoch": 0.4788316749271689, - "grad_norm": 2.124258041381836, - "learning_rate": 1.0368523054810308e-05, - "loss": 0.775, + "epoch": 0.4454769447377684, + "grad_norm": 1.7910109758377075, + "learning_rate": 8.56364798936561e-06, + "loss": 0.6868, "step": 6328 }, { - "epoch": 0.4789073436495025, - "grad_norm": 2.2516794204711914, - "learning_rate": 1.0366296984635335e-05, - "loss": 0.8205, + "epoch": 0.44554734248504047, + "grad_norm": 2.2826085090637207, + "learning_rate": 8.562092173400101e-06, + "loss": 0.7186, "step": 6329 }, { - "epoch": 0.4789830123718361, - "grad_norm": 2.5653510093688965, - "learning_rate": 1.0364070828898425e-05, - "loss": 0.6034, + "epoch": 0.44561774023231254, + "grad_norm": 1.7749546766281128, + "learning_rate": 8.560536276220003e-06, + "loss": 0.5383, "step": 6330 }, { - "epoch": 0.4790586810941697, - "grad_norm": 2.1148502826690674, - "learning_rate": 1.0361844587738991e-05, - "loss": 0.6733, + "epoch": 0.4456881379795847, + "grad_norm": 3.45001482963562, + "learning_rate": 8.558980297906211e-06, + "loss": 0.8355, "step": 6331 }, { - "epoch": 0.4791343498165033, - "grad_norm": 2.0348715782165527, - "learning_rate": 1.0359618261296443e-05, - "loss": 0.6521, + "epoch": 0.44575853572685675, + "grad_norm": 1.9786021709442139, + "learning_rate": 8.557424238539624e-06, + "loss": 0.764, "step": 6332 }, { - "epoch": 0.479210018538837, - "grad_norm": 1.895645022392273, - "learning_rate": 1.0357391849710202e-05, - "loss": 0.7377, + "epoch": 0.44582893347412883, + "grad_norm": 1.7486824989318848, + "learning_rate": 8.555868098201139e-06, + "loss": 0.7892, "step": 6333 }, { - "epoch": 0.4792856872611706, - "grad_norm": 2.0529367923736572, - "learning_rate": 1.0355165353119692e-05, - "loss": 0.7686, + "epoch": 0.4458993312214009, + "grad_norm": 2.0367226600646973, + "learning_rate": 8.554311876971663e-06, + "loss": 0.7702, "step": 6334 }, { - "epoch": 0.4793613559835042, - "grad_norm": 2.688235282897949, - "learning_rate": 1.0352938771664346e-05, - "loss": 0.7611, + "epoch": 0.445969728968673, + "grad_norm": 1.8272565603256226, + "learning_rate": 8.552755574932104e-06, + "loss": 0.6705, "step": 6335 }, { - "epoch": 0.4794370247058378, - "grad_norm": 2.2325501441955566, - "learning_rate": 1.0350712105483598e-05, - "loss": 0.7466, + "epoch": 0.44604012671594506, + "grad_norm": 1.8792119026184082, + "learning_rate": 8.551199192163376e-06, + "loss": 0.7101, "step": 6336 }, { - "epoch": 0.4795126934281715, - "grad_norm": 2.4246463775634766, - "learning_rate": 1.0348485354716888e-05, - "loss": 0.738, + "epoch": 0.4461105244632172, + "grad_norm": 1.697383999824524, + "learning_rate": 8.549642728746399e-06, + "loss": 0.6111, "step": 6337 }, { - "epoch": 0.4795883621505051, - "grad_norm": 2.519249439239502, - "learning_rate": 1.0346258519503663e-05, - "loss": 0.6102, + "epoch": 0.4461809222104893, + "grad_norm": 1.8358601331710815, + "learning_rate": 8.548086184762091e-06, + "loss": 0.6222, "step": 6338 }, { - "epoch": 0.4796640308728387, - "grad_norm": 2.213670253753662, - "learning_rate": 1.0344031599983377e-05, - "loss": 0.72, + "epoch": 0.44625131995776135, + "grad_norm": 1.8389803171157837, + "learning_rate": 8.546529560291381e-06, + "loss": 0.7146, "step": 6339 }, { - "epoch": 0.4797396995951723, - "grad_norm": 2.312347173690796, - "learning_rate": 1.0341804596295483e-05, - "loss": 0.7855, + "epoch": 0.4463217177050334, + "grad_norm": 1.7974299192428589, + "learning_rate": 8.5449728554152e-06, + "loss": 0.7153, "step": 6340 }, { - "epoch": 0.479815368317506, - "grad_norm": 2.551732063293457, - "learning_rate": 1.033957750857945e-05, - "loss": 0.6129, + "epoch": 0.4463921154523055, + "grad_norm": 2.120499610900879, + "learning_rate": 8.543416070214478e-06, + "loss": 0.7047, "step": 6341 }, { - "epoch": 0.4798910370398396, - "grad_norm": 2.086526870727539, - "learning_rate": 1.0337350336974735e-05, - "loss": 0.8321, + "epoch": 0.44646251319957764, + "grad_norm": 1.9050575494766235, + "learning_rate": 8.541859204770159e-06, + "loss": 0.7472, "step": 6342 }, { - "epoch": 0.4799667057621732, - "grad_norm": 1.661421298980713, - "learning_rate": 1.033512308162082e-05, - "loss": 0.6088, + "epoch": 0.4465329109468497, + "grad_norm": 1.9294236898422241, + "learning_rate": 8.540302259163183e-06, + "loss": 0.765, "step": 6343 }, { - "epoch": 0.4800423744845068, - "grad_norm": 1.759372591972351, - "learning_rate": 1.0332895742657175e-05, - "loss": 0.7405, + "epoch": 0.4466033086941218, + "grad_norm": 1.7673823833465576, + "learning_rate": 8.538745233474496e-06, + "loss": 0.7277, "step": 6344 }, { - "epoch": 0.48011804320684043, - "grad_norm": 1.9246104955673218, - "learning_rate": 1.0330668320223293e-05, - "loss": 0.6529, + "epoch": 0.44667370644139387, + "grad_norm": 1.711005449295044, + "learning_rate": 8.537188127785053e-06, + "loss": 0.6169, "step": 6345 }, { - "epoch": 0.4801937119291741, - "grad_norm": 2.265521764755249, - "learning_rate": 1.0328440814458652e-05, - "loss": 0.6742, + "epoch": 0.44674410418866595, + "grad_norm": 1.6789143085479736, + "learning_rate": 8.535630942175805e-06, + "loss": 0.6977, "step": 6346 }, { - "epoch": 0.4802693806515077, - "grad_norm": 2.2895123958587646, - "learning_rate": 1.0326213225502754e-05, - "loss": 0.7062, + "epoch": 0.446814501935938, + "grad_norm": 2.1826727390289307, + "learning_rate": 8.534073676727715e-06, + "loss": 0.7346, "step": 6347 }, { - "epoch": 0.4803450493738413, - "grad_norm": 2.0983073711395264, - "learning_rate": 1.0323985553495094e-05, - "loss": 0.8804, + "epoch": 0.44688489968321016, + "grad_norm": 1.6849604845046997, + "learning_rate": 8.532516331521745e-06, + "loss": 0.6874, "step": 6348 }, { - "epoch": 0.4804207180961749, - "grad_norm": 2.0943613052368164, - "learning_rate": 1.0321757798575176e-05, - "loss": 0.7967, + "epoch": 0.44695529743048223, + "grad_norm": 1.6917200088500977, + "learning_rate": 8.530958906638864e-06, + "loss": 0.6969, "step": 6349 }, { - "epoch": 0.4804963868185086, - "grad_norm": 1.9753507375717163, - "learning_rate": 1.0319529960882508e-05, - "loss": 0.7633, + "epoch": 0.4470256951777543, + "grad_norm": 1.5257110595703125, + "learning_rate": 8.529401402160042e-06, + "loss": 0.6846, "step": 6350 }, { - "epoch": 0.4805720555408422, - "grad_norm": 1.9423227310180664, - "learning_rate": 1.0317302040556607e-05, - "loss": 0.9147, + "epoch": 0.4470960929250264, + "grad_norm": 1.8488295078277588, + "learning_rate": 8.527843818166255e-06, + "loss": 0.8158, "step": 6351 }, { - "epoch": 0.4806477242631758, - "grad_norm": 2.10290265083313, - "learning_rate": 1.0315074037736991e-05, - "loss": 0.6627, + "epoch": 0.44716649067229847, + "grad_norm": 2.1414730548858643, + "learning_rate": 8.526286154738484e-06, + "loss": 0.6798, "step": 6352 }, { - "epoch": 0.4807233929855094, - "grad_norm": 2.183432102203369, - "learning_rate": 1.0312845952563187e-05, - "loss": 0.7181, + "epoch": 0.4472368884195706, + "grad_norm": 1.8995546102523804, + "learning_rate": 8.524728411957716e-06, + "loss": 0.6346, "step": 6353 }, { - "epoch": 0.4807990617078431, - "grad_norm": 1.9517886638641357, - "learning_rate": 1.0310617785174721e-05, - "loss": 0.6473, + "epoch": 0.4473072861668427, + "grad_norm": 3.473501205444336, + "learning_rate": 8.523170589904937e-06, + "loss": 0.6337, "step": 6354 }, { - "epoch": 0.4808747304301767, - "grad_norm": 1.996443510055542, - "learning_rate": 1.0308389535711133e-05, - "loss": 0.5921, + "epoch": 0.44737768391411475, + "grad_norm": 1.961373209953308, + "learning_rate": 8.521612688661136e-06, + "loss": 0.7278, "step": 6355 }, { - "epoch": 0.4809503991525103, - "grad_norm": 1.8111634254455566, - "learning_rate": 1.0306161204311958e-05, - "loss": 0.5869, + "epoch": 0.44744808166138683, + "grad_norm": 1.9850616455078125, + "learning_rate": 8.520054708307318e-06, + "loss": 0.7681, "step": 6356 }, { - "epoch": 0.4810260678748439, - "grad_norm": 2.648256301879883, - "learning_rate": 1.0303932791116744e-05, - "loss": 0.768, + "epoch": 0.4475184794086589, + "grad_norm": 2.060985565185547, + "learning_rate": 8.518496648924476e-06, + "loss": 0.6727, "step": 6357 }, { - "epoch": 0.48110173659717753, - "grad_norm": 2.27350115776062, - "learning_rate": 1.0301704296265043e-05, - "loss": 0.6906, + "epoch": 0.447588877155931, + "grad_norm": 1.8798669576644897, + "learning_rate": 8.51693851059362e-06, + "loss": 0.7267, "step": 6358 }, { - "epoch": 0.4811774053195112, - "grad_norm": 1.7875982522964478, - "learning_rate": 1.0299475719896409e-05, - "loss": 0.6928, + "epoch": 0.4476592749032031, + "grad_norm": 2.332091808319092, + "learning_rate": 8.515380293395755e-06, + "loss": 0.6715, "step": 6359 }, { - "epoch": 0.4812530740418448, - "grad_norm": 2.582258939743042, - "learning_rate": 1.0297247062150398e-05, - "loss": 0.588, + "epoch": 0.4477296726504752, + "grad_norm": 1.7512682676315308, + "learning_rate": 8.513821997411896e-06, + "loss": 0.7188, "step": 6360 }, { - "epoch": 0.4813287427641784, - "grad_norm": 2.343061685562134, - "learning_rate": 1.0295018323166583e-05, - "loss": 0.8526, + "epoch": 0.4478000703977473, + "grad_norm": 1.8159457445144653, + "learning_rate": 8.51226362272306e-06, + "loss": 0.7674, "step": 6361 }, { - "epoch": 0.48140441148651203, - "grad_norm": 2.1170144081115723, - "learning_rate": 1.0292789503084532e-05, - "loss": 0.7459, + "epoch": 0.44787046814501935, + "grad_norm": 2.173820734024048, + "learning_rate": 8.51070516941027e-06, + "loss": 0.6559, "step": 6362 }, { - "epoch": 0.4814800802088457, - "grad_norm": 2.0151526927948, - "learning_rate": 1.029056060204382e-05, - "loss": 0.7721, + "epoch": 0.44794086589229143, + "grad_norm": 2.2062060832977295, + "learning_rate": 8.50914663755455e-06, + "loss": 0.6844, "step": 6363 }, { - "epoch": 0.4815557489311793, - "grad_norm": 1.9160104990005493, - "learning_rate": 1.0288331620184032e-05, - "loss": 0.6751, + "epoch": 0.44801126363956356, + "grad_norm": 1.7651655673980713, + "learning_rate": 8.507588027236929e-06, + "loss": 0.6905, "step": 6364 }, { - "epoch": 0.4816314176535129, - "grad_norm": 2.3645200729370117, - "learning_rate": 1.0286102557644746e-05, - "loss": 0.6832, + "epoch": 0.44808166138683564, + "grad_norm": 1.7876533269882202, + "learning_rate": 8.506029338538443e-06, + "loss": 0.6732, "step": 6365 }, { - "epoch": 0.4817070863758465, - "grad_norm": 1.6131598949432373, - "learning_rate": 1.0283873414565564e-05, - "loss": 0.6133, + "epoch": 0.4481520591341077, + "grad_norm": 1.9728195667266846, + "learning_rate": 8.504470571540126e-06, + "loss": 0.603, "step": 6366 }, { - "epoch": 0.4817827550981802, - "grad_norm": 2.991684913635254, - "learning_rate": 1.0281644191086073e-05, - "loss": 0.6093, + "epoch": 0.4482224568813798, + "grad_norm": 2.5583014488220215, + "learning_rate": 8.50291172632302e-06, + "loss": 0.7035, "step": 6367 }, { - "epoch": 0.4818584238205138, - "grad_norm": 2.0066182613372803, - "learning_rate": 1.0279414887345876e-05, - "loss": 0.7508, + "epoch": 0.44829285462865187, + "grad_norm": 2.3197743892669678, + "learning_rate": 8.501352802968176e-06, + "loss": 0.6701, "step": 6368 }, { - "epoch": 0.4819340925428474, - "grad_norm": 2.1001648902893066, - "learning_rate": 1.0277185503484583e-05, - "loss": 0.8123, + "epoch": 0.44836325237592395, + "grad_norm": 1.5735934972763062, + "learning_rate": 8.499793801556638e-06, + "loss": 0.7646, "step": 6369 }, { - "epoch": 0.482009761265181, - "grad_norm": 2.06756591796875, - "learning_rate": 1.0274956039641801e-05, - "loss": 0.6822, + "epoch": 0.4484336501231961, + "grad_norm": 2.1869311332702637, + "learning_rate": 8.498234722169461e-06, + "loss": 0.6513, "step": 6370 }, { - "epoch": 0.48208542998751464, - "grad_norm": 2.315427541732788, - "learning_rate": 1.027272649595715e-05, - "loss": 0.7408, + "epoch": 0.44850404787046816, + "grad_norm": 1.8417121171951294, + "learning_rate": 8.496675564887707e-06, + "loss": 0.6763, "step": 6371 }, { - "epoch": 0.4821610987098483, - "grad_norm": 3.052438735961914, - "learning_rate": 1.0270496872570249e-05, - "loss": 0.6234, + "epoch": 0.44857444561774024, + "grad_norm": 1.8099850416183472, + "learning_rate": 8.495116329792432e-06, + "loss": 0.5912, "step": 6372 }, { - "epoch": 0.4822367674321819, - "grad_norm": 1.8254213333129883, - "learning_rate": 1.0268267169620725e-05, - "loss": 0.7506, + "epoch": 0.4486448433650123, + "grad_norm": 1.9449288845062256, + "learning_rate": 8.493557016964705e-06, + "loss": 0.82, "step": 6373 }, { - "epoch": 0.4823124361545155, - "grad_norm": 2.1949892044067383, - "learning_rate": 1.0266037387248206e-05, - "loss": 0.7951, + "epoch": 0.4487152411122844, + "grad_norm": 1.7831361293792725, + "learning_rate": 8.491997626485598e-06, + "loss": 0.8055, "step": 6374 }, { - "epoch": 0.48238810487684913, - "grad_norm": 2.8453803062438965, - "learning_rate": 1.0263807525592332e-05, - "loss": 0.8468, + "epoch": 0.44878563885955647, + "grad_norm": 1.5528172254562378, + "learning_rate": 8.490438158436181e-06, + "loss": 0.6536, "step": 6375 }, { - "epoch": 0.4824637735991828, - "grad_norm": 2.467980146408081, - "learning_rate": 1.0261577584792743e-05, - "loss": 0.7182, + "epoch": 0.4488560366068286, + "grad_norm": 1.5252323150634766, + "learning_rate": 8.488878612897534e-06, + "loss": 0.7649, "step": 6376 }, { - "epoch": 0.4825394423215164, - "grad_norm": 2.0588455200195312, - "learning_rate": 1.0259347564989087e-05, - "loss": 0.7689, + "epoch": 0.4489264343541007, + "grad_norm": 1.7926220893859863, + "learning_rate": 8.487318989950743e-06, + "loss": 0.7721, "step": 6377 }, { - "epoch": 0.48261511104385, - "grad_norm": 2.141855001449585, - "learning_rate": 1.0257117466321015e-05, - "loss": 0.8404, + "epoch": 0.44899683210137276, + "grad_norm": 1.6518949270248413, + "learning_rate": 8.485759289676886e-06, + "loss": 0.6807, "step": 6378 }, { - "epoch": 0.48269077976618363, - "grad_norm": 1.7582626342773438, - "learning_rate": 1.0254887288928176e-05, - "loss": 0.6482, + "epoch": 0.44906722984864483, + "grad_norm": 1.587640404701233, + "learning_rate": 8.48419951215706e-06, + "loss": 0.6509, "step": 6379 }, { - "epoch": 0.4827664484885173, - "grad_norm": 2.70973539352417, - "learning_rate": 1.0252657032950239e-05, - "loss": 0.7863, + "epoch": 0.4491376275959169, + "grad_norm": 1.8153634071350098, + "learning_rate": 8.48263965747236e-06, + "loss": 0.7493, "step": 6380 }, { - "epoch": 0.4828421172108509, - "grad_norm": 2.2643096446990967, - "learning_rate": 1.0250426698526867e-05, - "loss": 0.6601, + "epoch": 0.44920802534318904, + "grad_norm": 1.5671206712722778, + "learning_rate": 8.48107972570388e-06, + "loss": 0.5898, "step": 6381 }, { - "epoch": 0.4829177859331845, - "grad_norm": 2.705983877182007, - "learning_rate": 1.0248196285797733e-05, - "loss": 0.6439, + "epoch": 0.4492784230904611, + "grad_norm": 2.0299694538116455, + "learning_rate": 8.47951971693272e-06, + "loss": 0.5974, "step": 6382 }, { - "epoch": 0.48299345465551813, - "grad_norm": 1.8606898784637451, - "learning_rate": 1.0245965794902505e-05, - "loss": 0.7542, + "epoch": 0.4493488208377332, + "grad_norm": 1.8423376083374023, + "learning_rate": 8.477959631239995e-06, + "loss": 0.7736, "step": 6383 }, { - "epoch": 0.48306912337785174, - "grad_norm": 2.3897595405578613, - "learning_rate": 1.0243735225980873e-05, - "loss": 0.6444, + "epoch": 0.4494192185850053, + "grad_norm": 1.8973731994628906, + "learning_rate": 8.476399468706806e-06, + "loss": 0.7167, "step": 6384 }, { - "epoch": 0.4831447921001854, - "grad_norm": 3.5943784713745117, - "learning_rate": 1.0241504579172518e-05, - "loss": 0.6732, + "epoch": 0.44948961633227735, + "grad_norm": 1.476536512374878, + "learning_rate": 8.474839229414275e-06, + "loss": 0.7324, "step": 6385 }, { - "epoch": 0.483220460822519, - "grad_norm": 2.2588038444519043, - "learning_rate": 1.023927385461713e-05, - "loss": 0.635, + "epoch": 0.44956001407954943, + "grad_norm": 1.7342517375946045, + "learning_rate": 8.473278913443514e-06, + "loss": 0.7485, "step": 6386 }, { - "epoch": 0.4832961295448526, - "grad_norm": 1.9176634550094604, - "learning_rate": 1.0237043052454404e-05, - "loss": 0.7008, + "epoch": 0.44963041182682156, + "grad_norm": 1.789510726928711, + "learning_rate": 8.47171852087565e-06, + "loss": 0.702, "step": 6387 }, { - "epoch": 0.48337179826718624, - "grad_norm": 2.0969033241271973, - "learning_rate": 1.023481217282404e-05, - "loss": 0.8144, + "epoch": 0.44970080957409364, + "grad_norm": 1.8374449014663696, + "learning_rate": 8.470158051791807e-06, + "loss": 0.641, "step": 6388 }, { - "epoch": 0.4834474669895199, - "grad_norm": 2.217078924179077, - "learning_rate": 1.0232581215865748e-05, - "loss": 0.6075, + "epoch": 0.4497712073213657, + "grad_norm": 1.8920116424560547, + "learning_rate": 8.468597506273115e-06, + "loss": 0.6126, "step": 6389 }, { - "epoch": 0.4835231357118535, - "grad_norm": 2.35626220703125, - "learning_rate": 1.0230350181719231e-05, - "loss": 0.8421, + "epoch": 0.4498416050686378, + "grad_norm": 1.8065204620361328, + "learning_rate": 8.467036884400708e-06, + "loss": 0.6612, "step": 6390 }, { - "epoch": 0.4835988044341871, - "grad_norm": 2.1566507816314697, - "learning_rate": 1.0228119070524205e-05, - "loss": 0.7397, + "epoch": 0.4499120028159099, + "grad_norm": 1.9629923105239868, + "learning_rate": 8.465476186255726e-06, + "loss": 0.7503, "step": 6391 }, { - "epoch": 0.48367447315652073, - "grad_norm": 2.613382577896118, - "learning_rate": 1.0225887882420394e-05, - "loss": 0.7971, + "epoch": 0.449982400563182, + "grad_norm": 1.4823241233825684, + "learning_rate": 8.463915411919308e-06, + "loss": 0.5335, "step": 6392 }, { - "epoch": 0.4837501418788544, - "grad_norm": 6.792184829711914, - "learning_rate": 1.0223656617547517e-05, - "loss": 0.7269, + "epoch": 0.4500527983104541, + "grad_norm": 1.980201244354248, + "learning_rate": 8.462354561472604e-06, + "loss": 0.6848, "step": 6393 }, { - "epoch": 0.483825810601188, - "grad_norm": 2.764080047607422, - "learning_rate": 1.0221425276045305e-05, - "loss": 0.7354, + "epoch": 0.45012319605772616, + "grad_norm": 1.8652297258377075, + "learning_rate": 8.460793634996761e-06, + "loss": 0.7042, "step": 6394 }, { - "epoch": 0.4839014793235216, - "grad_norm": 2.654021978378296, - "learning_rate": 1.0219193858053493e-05, - "loss": 0.6826, + "epoch": 0.45019359380499824, + "grad_norm": 2.0237724781036377, + "learning_rate": 8.459232632572934e-06, + "loss": 0.7343, "step": 6395 }, { - "epoch": 0.48397714804585523, - "grad_norm": 2.122959613800049, - "learning_rate": 1.0216962363711816e-05, - "loss": 0.6705, + "epoch": 0.4502639915522703, + "grad_norm": 1.9426015615463257, + "learning_rate": 8.45767155428228e-06, + "loss": 0.7206, "step": 6396 }, { - "epoch": 0.48405281676818884, - "grad_norm": 2.246718645095825, - "learning_rate": 1.0214730793160018e-05, - "loss": 0.6594, + "epoch": 0.4503343892995424, + "grad_norm": 1.7671769857406616, + "learning_rate": 8.456110400205964e-06, + "loss": 0.7301, "step": 6397 }, { - "epoch": 0.4841284854905225, - "grad_norm": 2.107835054397583, - "learning_rate": 1.0212499146537853e-05, - "loss": 0.6163, + "epoch": 0.4504047870468145, + "grad_norm": 1.987050175666809, + "learning_rate": 8.454549170425149e-06, + "loss": 0.6313, "step": 6398 }, { - "epoch": 0.4842041542128561, - "grad_norm": 2.2131471633911133, - "learning_rate": 1.0210267423985067e-05, - "loss": 0.6586, + "epoch": 0.4504751847940866, + "grad_norm": 1.7860984802246094, + "learning_rate": 8.452987865021003e-06, + "loss": 0.629, "step": 6399 }, { - "epoch": 0.48427982293518973, - "grad_norm": 2.5556445121765137, - "learning_rate": 1.0208035625641424e-05, - "loss": 0.6997, + "epoch": 0.4505455825413587, + "grad_norm": 1.704730749130249, + "learning_rate": 8.451426484074704e-06, + "loss": 0.7402, "step": 6400 }, { - "epoch": 0.48435549165752334, - "grad_norm": 1.8932169675827026, - "learning_rate": 1.020580375164668e-05, - "loss": 0.8308, + "epoch": 0.45061598028863076, + "grad_norm": 1.8385753631591797, + "learning_rate": 8.449865027667426e-06, + "loss": 0.824, "step": 6401 }, { - "epoch": 0.484431160379857, - "grad_norm": 1.881028413772583, - "learning_rate": 1.0203571802140605e-05, - "loss": 0.6717, + "epoch": 0.45068637803590283, + "grad_norm": 1.815189003944397, + "learning_rate": 8.448303495880353e-06, + "loss": 0.6319, "step": 6402 }, { - "epoch": 0.4845068291021906, - "grad_norm": 2.3895459175109863, - "learning_rate": 1.020133977726297e-05, - "loss": 0.6854, + "epoch": 0.4507567757831749, + "grad_norm": 1.5959951877593994, + "learning_rate": 8.44674188879467e-06, + "loss": 0.72, "step": 6403 }, { - "epoch": 0.4845824978245242, - "grad_norm": 1.8925831317901611, - "learning_rate": 1.0199107677153554e-05, - "loss": 0.6973, + "epoch": 0.45082717353044705, + "grad_norm": 1.5302352905273438, + "learning_rate": 8.445180206491568e-06, + "loss": 0.6327, "step": 6404 }, { - "epoch": 0.48465816654685784, - "grad_norm": 2.5870819091796875, - "learning_rate": 1.0196875501952137e-05, - "loss": 0.6679, + "epoch": 0.4508975712777191, + "grad_norm": 1.7185523509979248, + "learning_rate": 8.443618449052234e-06, + "loss": 0.7544, "step": 6405 }, { - "epoch": 0.4847338352691915, - "grad_norm": 2.1854963302612305, - "learning_rate": 1.01946432517985e-05, - "loss": 0.7334, + "epoch": 0.4509679690249912, + "grad_norm": 1.740355372428894, + "learning_rate": 8.442056616557872e-06, + "loss": 0.5326, "step": 6406 }, { - "epoch": 0.4848095039915251, - "grad_norm": 3.5086848735809326, - "learning_rate": 1.0192410926832446e-05, - "loss": 0.6914, + "epoch": 0.4510383667722633, + "grad_norm": 1.643223524093628, + "learning_rate": 8.440494709089681e-06, + "loss": 0.7458, "step": 6407 }, { - "epoch": 0.4848851727138587, - "grad_norm": 3.082146167755127, - "learning_rate": 1.0190178527193761e-05, - "loss": 0.7652, + "epoch": 0.45110876451953535, + "grad_norm": 1.7322330474853516, + "learning_rate": 8.438932726728864e-06, + "loss": 0.6826, "step": 6408 }, { - "epoch": 0.48496084143619234, - "grad_norm": 2.8593039512634277, - "learning_rate": 1.0187946053022247e-05, - "loss": 0.6731, + "epoch": 0.4511791622668075, + "grad_norm": 1.7802765369415283, + "learning_rate": 8.43737066955663e-06, + "loss": 0.7513, "step": 6409 }, { - "epoch": 0.48503651015852595, - "grad_norm": 2.5862269401550293, - "learning_rate": 1.0185713504457709e-05, - "loss": 0.6637, + "epoch": 0.45124956001407956, + "grad_norm": 1.9850162267684937, + "learning_rate": 8.435808537654197e-06, + "loss": 0.7435, "step": 6410 }, { - "epoch": 0.4851121788808596, - "grad_norm": 1.955764889717102, - "learning_rate": 1.0183480881639952e-05, - "loss": 0.6526, + "epoch": 0.45131995776135164, + "grad_norm": 1.7720022201538086, + "learning_rate": 8.434246331102774e-06, + "loss": 0.6572, "step": 6411 }, { - "epoch": 0.4851878476031932, - "grad_norm": 2.400613307952881, - "learning_rate": 1.01812481847088e-05, - "loss": 0.7565, + "epoch": 0.4513903555086237, + "grad_norm": 1.5988835096359253, + "learning_rate": 8.432684049983588e-06, + "loss": 0.7137, "step": 6412 }, { - "epoch": 0.48526351632552683, - "grad_norm": 1.8675727844238281, - "learning_rate": 1.0179015413804063e-05, - "loss": 0.6738, + "epoch": 0.4514607532558958, + "grad_norm": 1.958312749862671, + "learning_rate": 8.43112169437786e-06, + "loss": 0.7156, "step": 6413 }, { - "epoch": 0.48533918504786044, - "grad_norm": 2.350315809249878, - "learning_rate": 1.0176782569065568e-05, - "loss": 0.7441, + "epoch": 0.4515311510031679, + "grad_norm": 1.6222649812698364, + "learning_rate": 8.429559264366819e-06, + "loss": 0.6859, "step": 6414 }, { - "epoch": 0.4854148537701941, - "grad_norm": 2.3835151195526123, - "learning_rate": 1.0174549650633142e-05, - "loss": 0.6982, + "epoch": 0.45160154875044, + "grad_norm": 2.391329050064087, + "learning_rate": 8.427996760031697e-06, + "loss": 0.5955, "step": 6415 }, { - "epoch": 0.4854905224925277, - "grad_norm": 2.2682459354400635, - "learning_rate": 1.0172316658646619e-05, - "loss": 0.6537, + "epoch": 0.4516719464977121, + "grad_norm": 1.5073379278182983, + "learning_rate": 8.42643418145373e-06, + "loss": 0.6563, "step": 6416 }, { - "epoch": 0.48556619121486133, - "grad_norm": 1.8722403049468994, - "learning_rate": 1.0170083593245836e-05, - "loss": 0.8612, + "epoch": 0.45174234424498416, + "grad_norm": 1.6070226430892944, + "learning_rate": 8.42487152871416e-06, + "loss": 0.5699, "step": 6417 }, { - "epoch": 0.48564185993719494, - "grad_norm": 1.8039960861206055, - "learning_rate": 1.0167850454570632e-05, - "loss": 0.7195, + "epoch": 0.45181274199225624, + "grad_norm": 1.7635143995285034, + "learning_rate": 8.423308801894226e-06, + "loss": 0.6764, "step": 6418 }, { - "epoch": 0.4857175286595286, - "grad_norm": 2.1905548572540283, - "learning_rate": 1.0165617242760855e-05, - "loss": 0.8805, + "epoch": 0.4518831397395283, + "grad_norm": 1.908990502357483, + "learning_rate": 8.42174600107518e-06, + "loss": 0.795, "step": 6419 }, { - "epoch": 0.4857931973818622, - "grad_norm": 3.8410537242889404, - "learning_rate": 1.0163383957956357e-05, - "loss": 0.7325, + "epoch": 0.45195353748680045, + "grad_norm": 1.9450206756591797, + "learning_rate": 8.42018312633827e-06, + "loss": 0.7642, "step": 6420 }, { - "epoch": 0.48586886610419583, - "grad_norm": 1.9922008514404297, - "learning_rate": 1.0161150600296993e-05, - "loss": 0.762, + "epoch": 0.4520239352340725, + "grad_norm": 2.3734629154205322, + "learning_rate": 8.418620177764754e-06, + "loss": 0.736, "step": 6421 }, { - "epoch": 0.48594453482652944, - "grad_norm": 2.191408157348633, - "learning_rate": 1.0158917169922622e-05, - "loss": 0.7489, + "epoch": 0.4520943329813446, + "grad_norm": 1.9147963523864746, + "learning_rate": 8.417057155435892e-06, + "loss": 0.5745, "step": 6422 }, { - "epoch": 0.48602020354886305, - "grad_norm": 1.968904733657837, - "learning_rate": 1.0156683666973112e-05, - "loss": 0.6926, + "epoch": 0.4521647307286167, + "grad_norm": 1.5973215103149414, + "learning_rate": 8.415494059432942e-06, + "loss": 0.6884, "step": 6423 }, { - "epoch": 0.4860958722711967, - "grad_norm": 2.3216192722320557, - "learning_rate": 1.0154450091588326e-05, - "loss": 0.7792, + "epoch": 0.45223512847588876, + "grad_norm": 2.3663952350616455, + "learning_rate": 8.413930889837176e-06, + "loss": 0.565, "step": 6424 }, { - "epoch": 0.4861715409935303, - "grad_norm": 1.8544453382492065, - "learning_rate": 1.0152216443908144e-05, - "loss": 0.6568, + "epoch": 0.45230552622316084, + "grad_norm": 1.901071310043335, + "learning_rate": 8.412367646729863e-06, + "loss": 0.7204, "step": 6425 }, { - "epoch": 0.48624720971586394, - "grad_norm": 2.2261478900909424, - "learning_rate": 1.0149982724072439e-05, - "loss": 0.7715, + "epoch": 0.45237592397043297, + "grad_norm": 1.717690348625183, + "learning_rate": 8.410804330192274e-06, + "loss": 0.6646, "step": 6426 }, { - "epoch": 0.48632287843819755, - "grad_norm": 2.093865394592285, - "learning_rate": 1.0147748932221098e-05, - "loss": 0.738, + "epoch": 0.45244632171770505, + "grad_norm": 1.8056989908218384, + "learning_rate": 8.409240940305693e-06, + "loss": 0.6593, "step": 6427 }, { - "epoch": 0.4863985471605312, - "grad_norm": 2.6762888431549072, - "learning_rate": 1.0145515068494007e-05, - "loss": 0.7401, + "epoch": 0.4525167194649771, + "grad_norm": 1.6623942852020264, + "learning_rate": 8.407677477151397e-06, + "loss": 0.6696, "step": 6428 }, { - "epoch": 0.4864742158828648, - "grad_norm": 1.9878803491592407, - "learning_rate": 1.0143281133031056e-05, - "loss": 0.6209, + "epoch": 0.4525871172122492, + "grad_norm": 2.371659755706787, + "learning_rate": 8.406113940810677e-06, + "loss": 0.7437, "step": 6429 }, { - "epoch": 0.48654988460519843, - "grad_norm": 2.1690900325775146, - "learning_rate": 1.0141047125972145e-05, - "loss": 0.7118, + "epoch": 0.4526575149595213, + "grad_norm": 1.6212918758392334, + "learning_rate": 8.404550331364817e-06, + "loss": 0.6973, "step": 6430 }, { - "epoch": 0.48662555332753205, - "grad_norm": 2.016566038131714, - "learning_rate": 1.013881304745717e-05, - "loss": 0.6214, + "epoch": 0.45272791270679336, + "grad_norm": 1.8169225454330444, + "learning_rate": 8.402986648895114e-06, + "loss": 0.7012, "step": 6431 }, { - "epoch": 0.4867012220498657, - "grad_norm": 1.7480603456497192, - "learning_rate": 1.0136578897626037e-05, - "loss": 0.7928, + "epoch": 0.4527983104540655, + "grad_norm": 1.6850754022598267, + "learning_rate": 8.401422893482866e-06, + "loss": 0.7097, "step": 6432 }, { - "epoch": 0.4867768907721993, - "grad_norm": 2.419851064682007, - "learning_rate": 1.013434467661866e-05, - "loss": 0.7508, + "epoch": 0.45286870820133757, + "grad_norm": 2.287299394607544, + "learning_rate": 8.39985906520937e-06, + "loss": 0.6857, "step": 6433 }, { - "epoch": 0.48685255949453293, - "grad_norm": 3.0105323791503906, - "learning_rate": 1.0132110384574949e-05, - "loss": 0.7791, + "epoch": 0.45293910594860964, + "grad_norm": 1.63023042678833, + "learning_rate": 8.398295164155936e-06, + "loss": 0.6375, "step": 6434 }, { - "epoch": 0.48692822821686654, - "grad_norm": 1.9230643510818481, - "learning_rate": 1.0129876021634826e-05, - "loss": 0.7613, + "epoch": 0.4530095036958817, + "grad_norm": 1.7578952312469482, + "learning_rate": 8.396731190403869e-06, + "loss": 0.7582, "step": 6435 }, { - "epoch": 0.4870038969392002, - "grad_norm": 2.396361827850342, - "learning_rate": 1.0127641587938213e-05, - "loss": 0.6539, + "epoch": 0.4530799014431538, + "grad_norm": 1.8032450675964355, + "learning_rate": 8.395167144034482e-06, + "loss": 0.8093, "step": 6436 }, { - "epoch": 0.4870795656615338, - "grad_norm": 2.384631395339966, - "learning_rate": 1.0125407083625034e-05, - "loss": 0.661, + "epoch": 0.45315029919042593, + "grad_norm": 1.8843311071395874, + "learning_rate": 8.393603025129091e-06, + "loss": 0.6403, "step": 6437 }, { - "epoch": 0.48715523438386743, - "grad_norm": 1.9804085493087769, - "learning_rate": 1.0123172508835224e-05, - "loss": 0.6685, + "epoch": 0.453220696937698, + "grad_norm": 1.5403364896774292, + "learning_rate": 8.392038833769017e-06, + "loss": 0.697, "step": 6438 }, { - "epoch": 0.48723090310620104, - "grad_norm": 2.5004003047943115, - "learning_rate": 1.0120937863708718e-05, - "loss": 0.937, + "epoch": 0.4532910946849701, + "grad_norm": 1.7118875980377197, + "learning_rate": 8.390474570035585e-06, + "loss": 0.7233, "step": 6439 }, { - "epoch": 0.48730657182853465, - "grad_norm": 1.8940950632095337, - "learning_rate": 1.0118703148385458e-05, - "loss": 0.7562, + "epoch": 0.45336149243224216, + "grad_norm": 1.9960507154464722, + "learning_rate": 8.388910234010119e-06, + "loss": 0.6329, "step": 6440 }, { - "epoch": 0.4873822405508683, - "grad_norm": 1.8344001770019531, - "learning_rate": 1.0116468363005388e-05, - "loss": 0.6854, + "epoch": 0.45343189017951424, + "grad_norm": 1.6616812944412231, + "learning_rate": 8.387345825773952e-06, + "loss": 0.7002, "step": 6441 }, { - "epoch": 0.48745790927320193, - "grad_norm": 2.383427858352661, - "learning_rate": 1.011423350770846e-05, - "loss": 0.7046, + "epoch": 0.4535022879267863, + "grad_norm": 1.7728782892227173, + "learning_rate": 8.385781345408421e-06, + "loss": 0.7576, "step": 6442 }, { - "epoch": 0.48753357799553554, - "grad_norm": 2.256309747695923, - "learning_rate": 1.0111998582634623e-05, - "loss": 0.7245, + "epoch": 0.45357268567405845, + "grad_norm": 1.7087082862854004, + "learning_rate": 8.384216792994861e-06, + "loss": 0.685, "step": 6443 }, { - "epoch": 0.48760924671786915, - "grad_norm": 7.691187381744385, - "learning_rate": 1.0109763587923842e-05, - "loss": 0.7306, + "epoch": 0.45364308342133053, + "grad_norm": 2.1502537727355957, + "learning_rate": 8.382652168614618e-06, + "loss": 0.6103, "step": 6444 }, { - "epoch": 0.4876849154402028, - "grad_norm": 2.102891206741333, - "learning_rate": 1.0107528523716071e-05, - "loss": 0.6399, + "epoch": 0.4537134811686026, + "grad_norm": 1.834885597229004, + "learning_rate": 8.381087472349036e-06, + "loss": 0.8068, "step": 6445 }, { - "epoch": 0.4877605841625364, - "grad_norm": 2.5367937088012695, - "learning_rate": 1.0105293390151287e-05, - "loss": 0.6951, + "epoch": 0.4537838789158747, + "grad_norm": 1.999954104423523, + "learning_rate": 8.379522704279464e-06, + "loss": 0.7845, "step": 6446 }, { - "epoch": 0.48783625288487004, - "grad_norm": 1.8842484951019287, - "learning_rate": 1.0103058187369451e-05, - "loss": 0.7347, + "epoch": 0.45385427666314676, + "grad_norm": 1.889430046081543, + "learning_rate": 8.377957864487258e-06, + "loss": 0.8001, "step": 6447 }, { - "epoch": 0.48791192160720365, - "grad_norm": 1.78765869140625, - "learning_rate": 1.0100822915510547e-05, - "loss": 0.6858, + "epoch": 0.4539246744104189, + "grad_norm": 2.2373530864715576, + "learning_rate": 8.376392953053776e-06, + "loss": 0.7459, "step": 6448 }, { - "epoch": 0.4879875903295373, - "grad_norm": 1.82582688331604, - "learning_rate": 1.0098587574714548e-05, - "loss": 0.7601, + "epoch": 0.45399507215769097, + "grad_norm": 2.1885812282562256, + "learning_rate": 8.374827970060377e-06, + "loss": 0.7705, "step": 6449 }, { - "epoch": 0.4880632590518709, - "grad_norm": 2.041457176208496, - "learning_rate": 1.0096352165121444e-05, - "loss": 0.6765, + "epoch": 0.45406546990496305, + "grad_norm": 1.7674075365066528, + "learning_rate": 8.373262915588426e-06, + "loss": 0.6809, "step": 6450 }, { - "epoch": 0.48813892777420453, - "grad_norm": 2.2032086849212646, - "learning_rate": 1.0094116686871222e-05, - "loss": 0.6296, + "epoch": 0.4541358676522351, + "grad_norm": 1.7615021467208862, + "learning_rate": 8.371697789719295e-06, + "loss": 0.6719, "step": 6451 }, { - "epoch": 0.48821459649653814, - "grad_norm": 2.4517290592193604, - "learning_rate": 1.0091881140103873e-05, - "loss": 0.8565, + "epoch": 0.4542062653995072, + "grad_norm": 1.8480976819992065, + "learning_rate": 8.37013259253435e-06, + "loss": 0.7251, "step": 6452 }, { - "epoch": 0.48829026521887176, - "grad_norm": 2.434957504272461, - "learning_rate": 1.0089645524959398e-05, - "loss": 0.8896, + "epoch": 0.4542766631467793, + "grad_norm": 1.5720670223236084, + "learning_rate": 8.36856732411497e-06, + "loss": 0.755, "step": 6453 }, { - "epoch": 0.4883659339412054, - "grad_norm": 2.1253631114959717, - "learning_rate": 1.0087409841577793e-05, - "loss": 0.5887, + "epoch": 0.4543470608940514, + "grad_norm": 1.4958299398422241, + "learning_rate": 8.367001984542538e-06, + "loss": 0.6995, "step": 6454 }, { - "epoch": 0.48844160266353903, - "grad_norm": 2.2699928283691406, - "learning_rate": 1.0085174090099066e-05, - "loss": 0.7519, + "epoch": 0.4544174586413235, + "grad_norm": 1.6497315168380737, + "learning_rate": 8.365436573898435e-06, + "loss": 0.6795, "step": 6455 }, { - "epoch": 0.48851727138587264, - "grad_norm": 2.0873475074768066, - "learning_rate": 1.008293827066323e-05, - "loss": 0.6143, + "epoch": 0.45448785638859557, + "grad_norm": 1.772700309753418, + "learning_rate": 8.363871092264047e-06, + "loss": 0.7194, "step": 6456 }, { - "epoch": 0.48859294010820625, - "grad_norm": 2.778334140777588, - "learning_rate": 1.0080702383410296e-05, - "loss": 0.7002, + "epoch": 0.45455825413586765, + "grad_norm": 1.74611234664917, + "learning_rate": 8.362305539720764e-06, + "loss": 0.6121, "step": 6457 }, { - "epoch": 0.4886686088305399, - "grad_norm": 2.2571263313293457, - "learning_rate": 1.0078466428480285e-05, - "loss": 0.6735, + "epoch": 0.4546286518831397, + "grad_norm": 1.7158839702606201, + "learning_rate": 8.360739916349986e-06, + "loss": 0.6236, "step": 6458 }, { - "epoch": 0.48874427755287353, - "grad_norm": 2.0266454219818115, - "learning_rate": 1.0076230406013216e-05, - "loss": 0.782, + "epoch": 0.4546990496304118, + "grad_norm": 1.7607861757278442, + "learning_rate": 8.359174222233105e-06, + "loss": 0.6326, "step": 6459 }, { - "epoch": 0.48881994627520714, - "grad_norm": 1.8740894794464111, - "learning_rate": 1.0073994316149117e-05, - "loss": 0.6624, + "epoch": 0.45476944737768393, + "grad_norm": 2.091641902923584, + "learning_rate": 8.357608457451525e-06, + "loss": 0.6804, "step": 6460 }, { - "epoch": 0.48889561499754075, - "grad_norm": 2.3470191955566406, - "learning_rate": 1.0071758159028023e-05, - "loss": 0.6661, + "epoch": 0.454839845124956, + "grad_norm": 1.99857497215271, + "learning_rate": 8.356042622086656e-06, + "loss": 0.6771, "step": 6461 }, { - "epoch": 0.4889712837198744, - "grad_norm": 2.5701887607574463, - "learning_rate": 1.0069521934789965e-05, - "loss": 0.6928, + "epoch": 0.4549102428722281, + "grad_norm": 2.0967626571655273, + "learning_rate": 8.354476716219897e-06, + "loss": 0.6723, "step": 6462 }, { - "epoch": 0.489046952442208, - "grad_norm": 1.8447167873382568, - "learning_rate": 1.0067285643574983e-05, - "loss": 0.5812, + "epoch": 0.45498064061950017, + "grad_norm": 1.8475315570831299, + "learning_rate": 8.35291073993267e-06, + "loss": 0.6287, "step": 6463 }, { - "epoch": 0.48912262116454164, - "grad_norm": 2.286895751953125, - "learning_rate": 1.0065049285523126e-05, - "loss": 0.751, + "epoch": 0.45505103836677224, + "grad_norm": 1.671162724494934, + "learning_rate": 8.351344693306387e-06, + "loss": 0.6557, "step": 6464 }, { - "epoch": 0.48919828988687525, - "grad_norm": 1.9406249523162842, - "learning_rate": 1.0062812860774435e-05, - "loss": 0.7846, + "epoch": 0.4551214361140444, + "grad_norm": 1.7373467683792114, + "learning_rate": 8.34977857642247e-06, + "loss": 0.7385, "step": 6465 }, { - "epoch": 0.48927395860920886, - "grad_norm": 2.343668222427368, - "learning_rate": 1.0060576369468964e-05, - "loss": 0.6193, + "epoch": 0.45519183386131645, + "grad_norm": 1.8897998332977295, + "learning_rate": 8.348212389362344e-06, + "loss": 0.6001, "step": 6466 }, { - "epoch": 0.4893496273315425, - "grad_norm": 2.254185199737549, - "learning_rate": 1.0058339811746774e-05, - "loss": 0.7514, + "epoch": 0.45526223160858853, + "grad_norm": 1.7477630376815796, + "learning_rate": 8.346646132207433e-06, + "loss": 0.7074, "step": 6467 }, { - "epoch": 0.48942529605387614, - "grad_norm": 1.851659893989563, - "learning_rate": 1.0056103187747916e-05, - "loss": 0.6014, + "epoch": 0.4553326293558606, + "grad_norm": 1.9097415208816528, + "learning_rate": 8.34507980503917e-06, + "loss": 0.5947, "step": 6468 }, { - "epoch": 0.48950096477620975, - "grad_norm": 1.9072391986846924, - "learning_rate": 1.0053866497612465e-05, - "loss": 0.6593, + "epoch": 0.4554030271031327, + "grad_norm": 1.7260587215423584, + "learning_rate": 8.34351340793899e-06, + "loss": 0.6873, "step": 6469 }, { - "epoch": 0.48957663349854336, - "grad_norm": 2.4362573623657227, - "learning_rate": 1.0051629741480483e-05, - "loss": 0.6269, + "epoch": 0.45547342485040476, + "grad_norm": 1.7816095352172852, + "learning_rate": 8.341946940988332e-06, + "loss": 0.7284, "step": 6470 }, { - "epoch": 0.489652302220877, - "grad_norm": 2.2436280250549316, - "learning_rate": 1.004939291949205e-05, - "loss": 0.6636, + "epoch": 0.4555438225976769, + "grad_norm": 1.9860618114471436, + "learning_rate": 8.340380404268637e-06, + "loss": 0.7141, "step": 6471 }, { - "epoch": 0.48972797094321063, - "grad_norm": 1.9060099124908447, - "learning_rate": 1.0047156031787233e-05, - "loss": 0.7561, + "epoch": 0.455614220344949, + "grad_norm": 1.7601209878921509, + "learning_rate": 8.338813797861349e-06, + "loss": 0.7179, "step": 6472 }, { - "epoch": 0.48980363966554424, - "grad_norm": 2.352036714553833, - "learning_rate": 1.0044919078506122e-05, - "loss": 0.6255, + "epoch": 0.45568461809222105, + "grad_norm": 1.7660984992980957, + "learning_rate": 8.337247121847923e-06, + "loss": 0.7, "step": 6473 }, { - "epoch": 0.48987930838787785, - "grad_norm": 1.977865219116211, - "learning_rate": 1.0042682059788798e-05, - "loss": 0.6376, + "epoch": 0.4557550158394931, + "grad_norm": 1.6910138130187988, + "learning_rate": 8.335680376309804e-06, + "loss": 0.6627, "step": 6474 }, { - "epoch": 0.4899549771102115, - "grad_norm": 3.1520273685455322, - "learning_rate": 1.0040444975775348e-05, - "loss": 0.8148, + "epoch": 0.4558254135867652, + "grad_norm": 1.7947163581848145, + "learning_rate": 8.334113561328454e-06, + "loss": 0.6799, "step": 6475 }, { - "epoch": 0.49003064583254513, - "grad_norm": 1.8340657949447632, - "learning_rate": 1.0038207826605871e-05, - "loss": 0.5468, + "epoch": 0.45589581133403734, + "grad_norm": 1.8495246171951294, + "learning_rate": 8.332546676985333e-06, + "loss": 0.775, "step": 6476 }, { - "epoch": 0.49010631455487874, - "grad_norm": 2.7415192127227783, - "learning_rate": 1.003597061242046e-05, - "loss": 0.7775, + "epoch": 0.4559662090813094, + "grad_norm": 1.547760248184204, + "learning_rate": 8.330979723361901e-06, + "loss": 0.7226, "step": 6477 }, { - "epoch": 0.49018198327721235, - "grad_norm": 2.266130208969116, - "learning_rate": 1.003373333335922e-05, - "loss": 0.7789, + "epoch": 0.4560366068285815, + "grad_norm": 1.907615065574646, + "learning_rate": 8.329412700539629e-06, + "loss": 0.7405, "step": 6478 }, { - "epoch": 0.49025765199954596, - "grad_norm": 2.030848503112793, - "learning_rate": 1.0031495989562255e-05, - "loss": 0.709, + "epoch": 0.45610700457585357, + "grad_norm": 1.9402505159378052, + "learning_rate": 8.327845608599988e-06, + "loss": 0.7197, "step": 6479 }, { - "epoch": 0.49033332072187963, - "grad_norm": 1.9253787994384766, - "learning_rate": 1.0029258581169675e-05, - "loss": 0.8155, + "epoch": 0.45617740232312565, + "grad_norm": 1.864632248878479, + "learning_rate": 8.32627844762445e-06, + "loss": 0.7657, "step": 6480 }, { - "epoch": 0.49040898944421324, - "grad_norm": 2.235677480697632, - "learning_rate": 1.0027021108321597e-05, - "loss": 0.6628, + "epoch": 0.4562478000703977, + "grad_norm": 2.1043126583099365, + "learning_rate": 8.324711217694493e-06, + "loss": 0.6144, "step": 6481 }, { - "epoch": 0.49048465816654685, - "grad_norm": 2.522493600845337, - "learning_rate": 1.002478357115813e-05, - "loss": 0.6663, + "epoch": 0.45631819781766986, + "grad_norm": 1.8717507123947144, + "learning_rate": 8.323143918891603e-06, + "loss": 0.7642, "step": 6482 }, { - "epoch": 0.49056032688888046, - "grad_norm": 2.5765788555145264, - "learning_rate": 1.0022545969819403e-05, - "loss": 0.6466, + "epoch": 0.45638859556494193, + "grad_norm": 1.733496069908142, + "learning_rate": 8.32157655129726e-06, + "loss": 0.7064, "step": 6483 }, { - "epoch": 0.4906359956112141, - "grad_norm": 2.641915798187256, - "learning_rate": 1.0020308304445539e-05, - "loss": 0.777, + "epoch": 0.456458993312214, + "grad_norm": 1.6473946571350098, + "learning_rate": 8.320009114992955e-06, + "loss": 0.6854, "step": 6484 }, { - "epoch": 0.49071166433354774, - "grad_norm": 4.392044544219971, - "learning_rate": 1.0018070575176672e-05, - "loss": 0.6881, + "epoch": 0.4565293910594861, + "grad_norm": 1.8370890617370605, + "learning_rate": 8.318441610060185e-06, + "loss": 0.7384, "step": 6485 }, { - "epoch": 0.49078733305588135, - "grad_norm": 1.90240478515625, - "learning_rate": 1.0015832782152928e-05, - "loss": 0.7021, + "epoch": 0.45659978880675817, + "grad_norm": 1.649048089981079, + "learning_rate": 8.316874036580436e-06, + "loss": 0.7737, "step": 6486 }, { - "epoch": 0.49086300177821496, - "grad_norm": 1.9071075916290283, - "learning_rate": 1.0013594925514453e-05, - "loss": 0.6333, + "epoch": 0.45667018655403024, + "grad_norm": 5.046268463134766, + "learning_rate": 8.315306394635216e-06, + "loss": 0.6967, "step": 6487 }, { - "epoch": 0.4909386705005486, - "grad_norm": 1.997644066810608, - "learning_rate": 1.0011357005401386e-05, - "loss": 0.6004, + "epoch": 0.4567405843013024, + "grad_norm": 1.8136941194534302, + "learning_rate": 8.313738684306026e-06, + "loss": 0.6926, "step": 6488 }, { - "epoch": 0.49101433922288223, - "grad_norm": 2.4626305103302, - "learning_rate": 1.000911902195387e-05, - "loss": 0.7033, + "epoch": 0.45681098204857445, + "grad_norm": 1.6416704654693604, + "learning_rate": 8.312170905674369e-06, + "loss": 0.633, "step": 6489 }, { - "epoch": 0.49109000794521585, - "grad_norm": 1.9787263870239258, - "learning_rate": 1.0006880975312061e-05, - "loss": 0.6778, + "epoch": 0.45688137979584653, + "grad_norm": 1.781707525253296, + "learning_rate": 8.31060305882176e-06, + "loss": 0.6994, "step": 6490 }, { - "epoch": 0.49116567666754946, - "grad_norm": 2.7848620414733887, - "learning_rate": 1.0004642865616104e-05, - "loss": 0.5979, + "epoch": 0.4569517775431186, + "grad_norm": 1.6545485258102417, + "learning_rate": 8.309035143829712e-06, + "loss": 0.6809, "step": 6491 }, { - "epoch": 0.49124134538988307, - "grad_norm": 2.4936132431030273, - "learning_rate": 1.0002404693006164e-05, - "loss": 0.7636, + "epoch": 0.4570221752903907, + "grad_norm": 2.1246144771575928, + "learning_rate": 8.30746716077974e-06, + "loss": 0.6907, "step": 6492 }, { - "epoch": 0.49131701411221673, - "grad_norm": 1.9078294038772583, - "learning_rate": 1.0000166457622396e-05, - "loss": 0.6217, + "epoch": 0.4570925730376628, + "grad_norm": 1.822157859802246, + "learning_rate": 8.305899109753368e-06, + "loss": 0.7472, "step": 6493 }, { - "epoch": 0.49139268283455034, - "grad_norm": 2.200657844543457, - "learning_rate": 9.997928159604974e-06, - "loss": 0.698, + "epoch": 0.4571629707849349, + "grad_norm": 1.6629095077514648, + "learning_rate": 8.304330990832116e-06, + "loss": 0.6717, "step": 6494 }, { - "epoch": 0.49146835155688395, - "grad_norm": 2.0636913776397705, - "learning_rate": 9.99568979909406e-06, - "loss": 0.7468, + "epoch": 0.457233368532207, + "grad_norm": 2.361196279525757, + "learning_rate": 8.302762804097515e-06, + "loss": 0.6202, "step": 6495 }, { - "epoch": 0.49154402027921756, - "grad_norm": 2.1066038608551025, - "learning_rate": 9.993451376229832e-06, - "loss": 0.5998, + "epoch": 0.45730376627947905, + "grad_norm": 1.7035309076309204, + "learning_rate": 8.301194549631099e-06, + "loss": 0.6559, "step": 6496 }, { - "epoch": 0.49161968900155123, - "grad_norm": 2.909907579421997, - "learning_rate": 9.991212891152469e-06, - "loss": 0.7663, + "epoch": 0.45737416402675113, + "grad_norm": 1.996356725692749, + "learning_rate": 8.299626227514397e-06, + "loss": 0.6567, "step": 6497 }, { - "epoch": 0.49169535772388484, - "grad_norm": 2.411532402038574, - "learning_rate": 9.988974344002143e-06, - "loss": 0.727, + "epoch": 0.4574445617740232, + "grad_norm": 1.8156561851501465, + "learning_rate": 8.29805783782895e-06, + "loss": 0.677, "step": 6498 }, { - "epoch": 0.49177102644621845, - "grad_norm": 2.1404001712799072, - "learning_rate": 9.986735734919048e-06, - "loss": 0.7662, + "epoch": 0.45751495952129534, + "grad_norm": 1.7694058418273926, + "learning_rate": 8.296489380656302e-06, + "loss": 0.6172, "step": 6499 }, { - "epoch": 0.49184669516855206, - "grad_norm": 1.9575718641281128, - "learning_rate": 9.984497064043367e-06, - "loss": 0.7276, + "epoch": 0.4575853572685674, + "grad_norm": 1.8041995763778687, + "learning_rate": 8.294920856077998e-06, + "loss": 0.8461, "step": 6500 }, { - "epoch": 0.4919223638908857, - "grad_norm": 2.085799217224121, - "learning_rate": 9.982258331515298e-06, - "loss": 0.8749, + "epoch": 0.4576557550158395, + "grad_norm": 1.9123862981796265, + "learning_rate": 8.293352264175583e-06, + "loss": 0.5966, "step": 6501 }, { - "epoch": 0.49199803261321934, - "grad_norm": 2.514505624771118, - "learning_rate": 9.980019537475034e-06, - "loss": 0.7701, + "epoch": 0.45772615276311157, + "grad_norm": 1.6095386743545532, + "learning_rate": 8.291783605030614e-06, + "loss": 0.7486, "step": 6502 }, { - "epoch": 0.49207370133555295, - "grad_norm": 2.4015519618988037, - "learning_rate": 9.977780682062779e-06, - "loss": 0.7562, + "epoch": 0.45779655051038365, + "grad_norm": 2.0836477279663086, + "learning_rate": 8.290214878724645e-06, + "loss": 0.718, "step": 6503 }, { - "epoch": 0.49214937005788656, - "grad_norm": 2.25130033493042, - "learning_rate": 9.975541765418734e-06, - "loss": 0.9941, + "epoch": 0.4578669482576558, + "grad_norm": 1.7481831312179565, + "learning_rate": 8.288646085339237e-06, + "loss": 0.715, "step": 6504 }, { - "epoch": 0.49222503878022017, - "grad_norm": 2.4228641986846924, - "learning_rate": 9.973302787683106e-06, - "loss": 0.7318, + "epoch": 0.45793734600492786, + "grad_norm": 2.1030187606811523, + "learning_rate": 8.28707722495595e-06, + "loss": 0.6823, "step": 6505 }, { - "epoch": 0.49230070750255384, - "grad_norm": 2.58864688873291, - "learning_rate": 9.971063748996113e-06, - "loss": 0.6254, + "epoch": 0.45800774375219994, + "grad_norm": 1.7742618322372437, + "learning_rate": 8.285508297656355e-06, + "loss": 0.6833, "step": 6506 }, { - "epoch": 0.49237637622488745, - "grad_norm": 2.389697790145874, - "learning_rate": 9.968824649497963e-06, - "loss": 0.6684, + "epoch": 0.458078141499472, + "grad_norm": 1.6606049537658691, + "learning_rate": 8.283939303522017e-06, + "loss": 0.6809, "step": 6507 }, { - "epoch": 0.49245204494722106, - "grad_norm": 2.1879706382751465, - "learning_rate": 9.966585489328885e-06, - "loss": 0.7188, + "epoch": 0.4581485392467441, + "grad_norm": 1.6250191926956177, + "learning_rate": 8.282370242634512e-06, + "loss": 0.7054, "step": 6508 }, { - "epoch": 0.49252771366955467, - "grad_norm": 2.2431092262268066, - "learning_rate": 9.964346268629092e-06, - "loss": 0.727, + "epoch": 0.45821893699401617, + "grad_norm": 1.826456904411316, + "learning_rate": 8.280801115075421e-06, + "loss": 0.8157, "step": 6509 }, { - "epoch": 0.49260338239188833, - "grad_norm": 2.4414150714874268, - "learning_rate": 9.962106987538822e-06, - "loss": 0.9228, + "epoch": 0.4582893347412883, + "grad_norm": 2.0603747367858887, + "learning_rate": 8.279231920926316e-06, + "loss": 0.7196, "step": 6510 }, { - "epoch": 0.49267905111422194, - "grad_norm": 2.145207166671753, - "learning_rate": 9.959867646198299e-06, - "loss": 0.611, + "epoch": 0.4583597324885604, + "grad_norm": 1.7439708709716797, + "learning_rate": 8.277662660268784e-06, + "loss": 0.7661, "step": 6511 }, { - "epoch": 0.49275471983655555, - "grad_norm": 2.1104111671447754, - "learning_rate": 9.957628244747755e-06, - "loss": 0.8066, + "epoch": 0.45843013023583246, + "grad_norm": 2.1767895221710205, + "learning_rate": 8.276093333184416e-06, + "loss": 0.6346, "step": 6512 }, { - "epoch": 0.49283038855888917, - "grad_norm": 2.5595433712005615, - "learning_rate": 9.95538878332744e-06, - "loss": 0.6367, + "epoch": 0.45850052798310453, + "grad_norm": 2.667929172515869, + "learning_rate": 8.274523939754798e-06, + "loss": 0.7907, "step": 6513 }, { - "epoch": 0.49290605728122283, - "grad_norm": 2.492157459259033, - "learning_rate": 9.953149262077583e-06, - "loss": 0.811, + "epoch": 0.4585709257303766, + "grad_norm": 2.6660072803497314, + "learning_rate": 8.272954480061526e-06, + "loss": 0.6984, "step": 6514 }, { - "epoch": 0.49298172600355644, - "grad_norm": 2.075108528137207, - "learning_rate": 9.95090968113844e-06, - "loss": 0.6916, + "epoch": 0.4586413234776487, + "grad_norm": 1.6459259986877441, + "learning_rate": 8.2713849541862e-06, + "loss": 0.6881, "step": 6515 }, { - "epoch": 0.49305739472589005, - "grad_norm": 3.124265432357788, - "learning_rate": 9.948670040650253e-06, - "loss": 0.6464, + "epoch": 0.4587117212249208, + "grad_norm": 1.809149146080017, + "learning_rate": 8.269815362210417e-06, + "loss": 0.6379, "step": 6516 }, { - "epoch": 0.49313306344822366, - "grad_norm": 3.6622323989868164, - "learning_rate": 9.946430340753285e-06, - "loss": 0.5503, + "epoch": 0.4587821189721929, + "grad_norm": 1.9763376712799072, + "learning_rate": 8.268245704215782e-06, + "loss": 0.7481, "step": 6517 }, { - "epoch": 0.4932087321705573, - "grad_norm": 2.151686191558838, - "learning_rate": 9.944190581587787e-06, - "loss": 0.697, + "epoch": 0.458852516719465, + "grad_norm": 1.8157546520233154, + "learning_rate": 8.266675980283906e-06, + "loss": 0.7958, "step": 6518 }, { - "epoch": 0.49328440089289094, - "grad_norm": 1.9287328720092773, - "learning_rate": 9.941950763294019e-06, - "loss": 0.9503, + "epoch": 0.45892291446673705, + "grad_norm": 2.0433781147003174, + "learning_rate": 8.2651061904964e-06, + "loss": 0.6341, "step": 6519 }, { - "epoch": 0.49336006961522455, - "grad_norm": 2.108152389526367, - "learning_rate": 9.93971088601225e-06, - "loss": 0.6141, + "epoch": 0.45899331221400913, + "grad_norm": 1.9241418838500977, + "learning_rate": 8.263536334934876e-06, + "loss": 0.6372, "step": 6520 }, { - "epoch": 0.49343573833755816, - "grad_norm": 1.9373310804367065, - "learning_rate": 9.937470949882741e-06, - "loss": 0.8837, + "epoch": 0.45906370996128126, + "grad_norm": 1.660119652748108, + "learning_rate": 8.261966413680953e-06, + "loss": 0.6578, "step": 6521 }, { - "epoch": 0.49351140705989177, - "grad_norm": 2.2187082767486572, - "learning_rate": 9.935230955045775e-06, - "loss": 0.7105, + "epoch": 0.45913410770855334, + "grad_norm": 1.580331802368164, + "learning_rate": 8.260396426816254e-06, + "loss": 0.6844, "step": 6522 }, { - "epoch": 0.49358707578222544, - "grad_norm": 2.0548200607299805, - "learning_rate": 9.932990901641616e-06, - "loss": 0.7974, + "epoch": 0.4592045054558254, + "grad_norm": 1.7798608541488647, + "learning_rate": 8.2588263744224e-06, + "loss": 0.6466, "step": 6523 }, { - "epoch": 0.49366274450455905, - "grad_norm": 2.771439790725708, - "learning_rate": 9.930750789810554e-06, - "loss": 0.7277, + "epoch": 0.4592749032030975, + "grad_norm": 2.0660674571990967, + "learning_rate": 8.257256256581028e-06, + "loss": 0.6268, "step": 6524 }, { - "epoch": 0.49373841322689266, - "grad_norm": 2.260519504547119, - "learning_rate": 9.928510619692862e-06, - "loss": 0.7294, + "epoch": 0.4593453009503696, + "grad_norm": 1.9902422428131104, + "learning_rate": 8.255686073373763e-06, + "loss": 0.6995, "step": 6525 }, { - "epoch": 0.49381408194922627, - "grad_norm": 2.2592484951019287, - "learning_rate": 9.92627039142884e-06, - "loss": 0.6168, + "epoch": 0.45941569869764165, + "grad_norm": 1.6064355373382568, + "learning_rate": 8.25411582488224e-06, + "loss": 0.6101, "step": 6526 }, { - "epoch": 0.49388975067155994, - "grad_norm": 2.256322145462036, - "learning_rate": 9.924030105158762e-06, - "loss": 0.7252, + "epoch": 0.4594860964449138, + "grad_norm": 2.172921657562256, + "learning_rate": 8.252545511188101e-06, + "loss": 0.7372, "step": 6527 }, { - "epoch": 0.49396541939389355, - "grad_norm": 2.2179460525512695, - "learning_rate": 9.921789761022933e-06, - "loss": 0.715, + "epoch": 0.45955649419218586, + "grad_norm": 1.6394591331481934, + "learning_rate": 8.250975132372985e-06, + "loss": 0.6041, "step": 6528 }, { - "epoch": 0.49404108811622716, - "grad_norm": 2.221290111541748, - "learning_rate": 9.919549359161649e-06, - "loss": 0.7855, + "epoch": 0.45962689193945794, + "grad_norm": 1.8856136798858643, + "learning_rate": 8.249404688518541e-06, + "loss": 0.6772, "step": 6529 }, { - "epoch": 0.49411675683856077, - "grad_norm": 2.497633218765259, - "learning_rate": 9.917308899715208e-06, - "loss": 0.6075, + "epoch": 0.45969728968673, + "grad_norm": 1.6956474781036377, + "learning_rate": 8.247834179706416e-06, + "loss": 0.7632, "step": 6530 }, { - "epoch": 0.4941924255608944, - "grad_norm": 2.1124684810638428, - "learning_rate": 9.915068382823918e-06, - "loss": 0.7536, + "epoch": 0.4597676874340021, + "grad_norm": 1.7499394416809082, + "learning_rate": 8.24626360601826e-06, + "loss": 0.6841, "step": 6531 }, { - "epoch": 0.49426809428322804, - "grad_norm": 1.9984757900238037, - "learning_rate": 9.912827808628085e-06, - "loss": 0.5954, + "epoch": 0.4598380851812742, + "grad_norm": 1.6191076040267944, + "learning_rate": 8.244692967535731e-06, + "loss": 0.7055, "step": 6532 }, { - "epoch": 0.49434376300556165, - "grad_norm": 2.670492172241211, - "learning_rate": 9.910587177268025e-06, - "loss": 0.7755, + "epoch": 0.4599084829285463, + "grad_norm": 1.703787088394165, + "learning_rate": 8.243122264340488e-06, + "loss": 0.6539, "step": 6533 }, { - "epoch": 0.49441943172789526, - "grad_norm": 1.9507744312286377, - "learning_rate": 9.908346488884048e-06, - "loss": 0.6622, + "epoch": 0.4599788806758184, + "grad_norm": 1.7421952486038208, + "learning_rate": 8.241551496514192e-06, + "loss": 0.7188, "step": 6534 }, { - "epoch": 0.4944951004502289, - "grad_norm": 2.2064528465270996, - "learning_rate": 9.906105743616476e-06, - "loss": 0.7481, + "epoch": 0.46004927842309046, + "grad_norm": 1.825573444366455, + "learning_rate": 8.239980664138508e-06, + "loss": 0.5784, "step": 6535 }, { - "epoch": 0.49457076917256254, - "grad_norm": 2.032932758331299, - "learning_rate": 9.903864941605631e-06, - "loss": 0.6665, + "epoch": 0.46011967617036253, + "grad_norm": 2.005048990249634, + "learning_rate": 8.238409767295109e-06, + "loss": 0.7304, "step": 6536 }, { - "epoch": 0.49464643789489615, - "grad_norm": 1.6875627040863037, - "learning_rate": 9.901624082991842e-06, - "loss": 0.6829, + "epoch": 0.4601900739176346, + "grad_norm": 1.6310120820999146, + "learning_rate": 8.236838806065661e-06, + "loss": 0.7717, "step": 6537 }, { - "epoch": 0.49472210661722976, - "grad_norm": 2.333106517791748, - "learning_rate": 9.899383167915438e-06, - "loss": 0.7117, + "epoch": 0.46026047166490675, + "grad_norm": 1.8763476610183716, + "learning_rate": 8.235267780531845e-06, + "loss": 0.6683, "step": 6538 }, { - "epoch": 0.4947977753395634, - "grad_norm": 2.2066781520843506, - "learning_rate": 9.897142196516745e-06, - "loss": 0.628, + "epoch": 0.4603308694121788, + "grad_norm": 1.8268344402313232, + "learning_rate": 8.23369669077534e-06, + "loss": 0.8291, "step": 6539 }, { - "epoch": 0.49487344406189704, - "grad_norm": 1.9764480590820312, - "learning_rate": 9.894901168936112e-06, - "loss": 0.7689, + "epoch": 0.4604012671594509, + "grad_norm": 1.7157946825027466, + "learning_rate": 8.232125536877826e-06, + "loss": 0.5997, "step": 6540 }, { - "epoch": 0.49494911278423065, - "grad_norm": 1.911521077156067, - "learning_rate": 9.892660085313872e-06, - "loss": 0.6511, + "epoch": 0.460471664906723, + "grad_norm": 1.7959750890731812, + "learning_rate": 8.23055431892099e-06, + "loss": 0.7604, "step": 6541 }, { - "epoch": 0.49502478150656426, - "grad_norm": 2.4168167114257812, - "learning_rate": 9.890418945790369e-06, - "loss": 0.6539, + "epoch": 0.46054206265399505, + "grad_norm": 1.571955919265747, + "learning_rate": 8.228983036986519e-06, + "loss": 0.7025, "step": 6542 }, { - "epoch": 0.49510045022889787, - "grad_norm": 2.118840217590332, - "learning_rate": 9.88817775050595e-06, - "loss": 0.7646, + "epoch": 0.46061246040126713, + "grad_norm": 2.3646934032440186, + "learning_rate": 8.227411691156107e-06, + "loss": 0.7397, "step": 6543 }, { - "epoch": 0.4951761189512315, - "grad_norm": 2.163649082183838, - "learning_rate": 9.885936499600972e-06, - "loss": 0.8885, + "epoch": 0.46068285814853926, + "grad_norm": 2.533334970474243, + "learning_rate": 8.22584028151145e-06, + "loss": 0.6445, "step": 6544 }, { - "epoch": 0.49525178767356515, - "grad_norm": 1.633584976196289, - "learning_rate": 9.883695193215784e-06, - "loss": 0.7617, + "epoch": 0.46075325589581134, + "grad_norm": 1.8560974597930908, + "learning_rate": 8.224268808134248e-06, + "loss": 0.753, "step": 6545 }, { - "epoch": 0.49532745639589876, - "grad_norm": 2.28901743888855, - "learning_rate": 9.881453831490741e-06, - "loss": 0.6508, + "epoch": 0.4608236536430834, + "grad_norm": 1.609652042388916, + "learning_rate": 8.222697271106198e-06, + "loss": 0.7598, "step": 6546 }, { - "epoch": 0.49540312511823237, - "grad_norm": 1.933939814567566, - "learning_rate": 9.879212414566212e-06, - "loss": 0.6829, + "epoch": 0.4608940513903555, + "grad_norm": 2.5884835720062256, + "learning_rate": 8.221125670509014e-06, + "loss": 0.7305, "step": 6547 }, { - "epoch": 0.495478793840566, - "grad_norm": 2.058504104614258, - "learning_rate": 9.876970942582555e-06, - "loss": 0.7911, + "epoch": 0.4609644491376276, + "grad_norm": 1.9874389171600342, + "learning_rate": 8.219554006424397e-06, + "loss": 0.7265, "step": 6548 }, { - "epoch": 0.49555446256289964, - "grad_norm": 4.5961127281188965, - "learning_rate": 9.874729415680145e-06, - "loss": 0.6346, + "epoch": 0.4610348468848997, + "grad_norm": 1.7227306365966797, + "learning_rate": 8.217982278934064e-06, + "loss": 0.5141, "step": 6549 }, { - "epoch": 0.49563013128523326, - "grad_norm": 2.232285499572754, - "learning_rate": 9.872487833999343e-06, - "loss": 0.8858, + "epoch": 0.4611052446321718, + "grad_norm": 1.3557060956954956, + "learning_rate": 8.216410488119731e-06, + "loss": 0.7381, "step": 6550 }, { - "epoch": 0.49570580000756687, - "grad_norm": 2.6463253498077393, - "learning_rate": 9.870246197680539e-06, - "loss": 0.7364, + "epoch": 0.46117564237944386, + "grad_norm": 2.72552752494812, + "learning_rate": 8.214838634063113e-06, + "loss": 0.7391, "step": 6551 }, { - "epoch": 0.4957814687299005, - "grad_norm": 2.0343997478485107, - "learning_rate": 9.868004506864098e-06, - "loss": 0.6202, + "epoch": 0.46124604012671594, + "grad_norm": 1.6267317533493042, + "learning_rate": 8.213266716845935e-06, + "loss": 0.5588, "step": 6552 }, { - "epoch": 0.49585713745223414, - "grad_norm": 2.176643133163452, - "learning_rate": 9.86576276169041e-06, - "loss": 0.6113, + "epoch": 0.461316437873988, + "grad_norm": 1.951943278312683, + "learning_rate": 8.211694736549923e-06, + "loss": 0.707, "step": 6553 }, { - "epoch": 0.49593280617456775, - "grad_norm": 2.058581829071045, - "learning_rate": 9.863520962299858e-06, - "loss": 0.6814, + "epoch": 0.4613868356212601, + "grad_norm": 2.00592303276062, + "learning_rate": 8.2101226932568e-06, + "loss": 0.7147, "step": 6554 }, { - "epoch": 0.49600847489690136, - "grad_norm": 1.982015609741211, - "learning_rate": 9.861279108832825e-06, - "loss": 0.7254, + "epoch": 0.4614572333685322, + "grad_norm": 1.9252140522003174, + "learning_rate": 8.208550587048306e-06, + "loss": 0.5928, "step": 6555 }, { - "epoch": 0.496084143619235, - "grad_norm": 2.0388500690460205, - "learning_rate": 9.859037201429715e-06, - "loss": 0.7784, + "epoch": 0.4615276311158043, + "grad_norm": 2.098562717437744, + "learning_rate": 8.206978418006173e-06, + "loss": 0.6783, "step": 6556 }, { - "epoch": 0.49615981234156864, - "grad_norm": 2.746868133544922, - "learning_rate": 9.85679524023091e-06, - "loss": 0.7083, + "epoch": 0.4615980288630764, + "grad_norm": 1.7309396266937256, + "learning_rate": 8.205406186212137e-06, + "loss": 0.6597, "step": 6557 }, { - "epoch": 0.49623548106390225, - "grad_norm": 1.9284957647323608, - "learning_rate": 9.854553225376823e-06, - "loss": 0.6415, + "epoch": 0.46166842661034846, + "grad_norm": 1.729174017906189, + "learning_rate": 8.203833891747941e-06, + "loss": 0.6829, "step": 6558 }, { - "epoch": 0.49631114978623586, - "grad_norm": 2.158855438232422, - "learning_rate": 9.852311157007845e-06, - "loss": 0.4699, + "epoch": 0.46173882435762054, + "grad_norm": 2.060007095336914, + "learning_rate": 8.202261534695331e-06, + "loss": 0.7486, "step": 6559 }, { - "epoch": 0.49638681850856947, - "grad_norm": 1.9091185331344604, - "learning_rate": 9.850069035264388e-06, - "loss": 0.7226, + "epoch": 0.46180922210489267, + "grad_norm": 1.8859670162200928, + "learning_rate": 8.200689115136056e-06, + "loss": 0.7116, "step": 6560 }, { - "epoch": 0.4964624872309031, - "grad_norm": 2.265294313430786, - "learning_rate": 9.84782686028686e-06, - "loss": 0.7521, + "epoch": 0.46187961985216475, + "grad_norm": 1.5308682918548584, + "learning_rate": 8.199116633151866e-06, + "loss": 0.5684, "step": 6561 }, { - "epoch": 0.49653815595323675, - "grad_norm": 2.8000192642211914, - "learning_rate": 9.845584632215667e-06, - "loss": 0.8844, + "epoch": 0.4619500175994368, + "grad_norm": 1.9938805103302002, + "learning_rate": 8.197544088824516e-06, + "loss": 0.8211, "step": 6562 }, { - "epoch": 0.49661382467557036, - "grad_norm": 2.02162504196167, - "learning_rate": 9.843342351191232e-06, - "loss": 0.7957, + "epoch": 0.4620204153467089, + "grad_norm": 2.314110279083252, + "learning_rate": 8.195971482235765e-06, + "loss": 0.7142, "step": 6563 }, { - "epoch": 0.49668949339790397, - "grad_norm": 2.197127103805542, - "learning_rate": 9.841100017353972e-06, - "loss": 0.791, + "epoch": 0.462090813093981, + "grad_norm": 1.6516205072402954, + "learning_rate": 8.194398813467375e-06, + "loss": 0.6756, "step": 6564 }, { - "epoch": 0.4967651621202376, - "grad_norm": 1.8492189645767212, - "learning_rate": 9.838857630844305e-06, - "loss": 0.7472, + "epoch": 0.46216121084125306, + "grad_norm": 1.8000315427780151, + "learning_rate": 8.192826082601105e-06, + "loss": 0.6751, "step": 6565 }, { - "epoch": 0.49684083084257125, - "grad_norm": 2.151035785675049, - "learning_rate": 9.836615191802663e-06, - "loss": 0.5994, + "epoch": 0.4622316085885252, + "grad_norm": 1.874444603919983, + "learning_rate": 8.191253289718731e-06, + "loss": 0.786, "step": 6566 }, { - "epoch": 0.49691649956490486, - "grad_norm": 2.232987642288208, - "learning_rate": 9.834372700369472e-06, - "loss": 0.8005, + "epoch": 0.46230200633579727, + "grad_norm": 1.71653413772583, + "learning_rate": 8.189680434902015e-06, + "loss": 0.7403, "step": 6567 }, { - "epoch": 0.49699216828723847, - "grad_norm": 1.6914528608322144, - "learning_rate": 9.832130156685163e-06, - "loss": 0.795, + "epoch": 0.46237240408306934, + "grad_norm": 1.6900938749313354, + "learning_rate": 8.18810751823274e-06, + "loss": 0.7115, "step": 6568 }, { - "epoch": 0.4970678370095721, - "grad_norm": 2.227271318435669, - "learning_rate": 9.829887560890171e-06, - "loss": 0.7877, + "epoch": 0.4624428018303414, + "grad_norm": 1.6905124187469482, + "learning_rate": 8.186534539792677e-06, + "loss": 0.6126, "step": 6569 }, { - "epoch": 0.49714350573190574, - "grad_norm": 2.173454523086548, - "learning_rate": 9.827644913124937e-06, - "loss": 0.7479, + "epoch": 0.4625131995776135, + "grad_norm": 1.8259541988372803, + "learning_rate": 8.184961499663608e-06, + "loss": 0.6083, "step": 6570 }, { - "epoch": 0.49721917445423935, - "grad_norm": 2.1114470958709717, - "learning_rate": 9.8254022135299e-06, - "loss": 0.5879, + "epoch": 0.4625835973248856, + "grad_norm": 2.0162506103515625, + "learning_rate": 8.18338839792732e-06, + "loss": 0.676, "step": 6571 }, { - "epoch": 0.49729484317657296, - "grad_norm": 1.8386335372924805, - "learning_rate": 9.82315946224551e-06, - "loss": 0.572, + "epoch": 0.4626539950721577, + "grad_norm": 1.8687485456466675, + "learning_rate": 8.181815234665597e-06, + "loss": 0.6406, "step": 6572 }, { - "epoch": 0.4973705118989066, - "grad_norm": 2.5144834518432617, - "learning_rate": 9.820916659412208e-06, - "loss": 0.6295, + "epoch": 0.4627243928194298, + "grad_norm": 1.7021446228027344, + "learning_rate": 8.180242009960226e-06, + "loss": 0.7442, "step": 6573 }, { - "epoch": 0.4974461806212402, - "grad_norm": 2.596182107925415, - "learning_rate": 9.818673805170454e-06, - "loss": 0.7412, + "epoch": 0.46279479056670186, + "grad_norm": 1.7091535329818726, + "learning_rate": 8.178668723893006e-06, + "loss": 0.6899, "step": 6574 }, { - "epoch": 0.49752184934357385, - "grad_norm": 1.809083342552185, - "learning_rate": 9.816430899660695e-06, - "loss": 0.7819, + "epoch": 0.46286518831397394, + "grad_norm": 1.8356451988220215, + "learning_rate": 8.177095376545733e-06, + "loss": 0.5627, "step": 6575 }, { - "epoch": 0.49759751806590746, - "grad_norm": 1.6025587320327759, - "learning_rate": 9.814187943023394e-06, - "loss": 0.6106, + "epoch": 0.462935586061246, + "grad_norm": 1.6926594972610474, + "learning_rate": 8.175521968000207e-06, + "loss": 0.5969, "step": 6576 }, { - "epoch": 0.4976731867882411, - "grad_norm": 2.2268176078796387, - "learning_rate": 9.811944935399011e-06, - "loss": 0.7396, + "epoch": 0.46300598380851815, + "grad_norm": 2.181825876235962, + "learning_rate": 8.173948498338224e-06, + "loss": 0.7504, "step": 6577 }, { - "epoch": 0.4977488555105747, - "grad_norm": 2.883540391921997, - "learning_rate": 9.809701876928007e-06, - "loss": 0.7096, + "epoch": 0.46307638155579023, + "grad_norm": 1.7970621585845947, + "learning_rate": 8.1723749676416e-06, + "loss": 0.6694, "step": 6578 }, { - "epoch": 0.49782452423290835, - "grad_norm": 2.127419948577881, - "learning_rate": 9.807458767750857e-06, - "loss": 0.5795, + "epoch": 0.4631467793030623, + "grad_norm": 1.6287323236465454, + "learning_rate": 8.170801375992138e-06, + "loss": 0.6469, "step": 6579 }, { - "epoch": 0.49790019295524196, - "grad_norm": 2.2806951999664307, - "learning_rate": 9.805215608008025e-06, - "loss": 0.7534, + "epoch": 0.4632171770503344, + "grad_norm": 2.1474478244781494, + "learning_rate": 8.16922772347165e-06, + "loss": 0.7079, "step": 6580 }, { - "epoch": 0.49797586167757557, - "grad_norm": 2.44822359085083, - "learning_rate": 9.802972397839987e-06, - "loss": 0.6425, + "epoch": 0.46328757479760646, + "grad_norm": 1.7716054916381836, + "learning_rate": 8.167654010161957e-06, + "loss": 0.7425, "step": 6581 }, { - "epoch": 0.4980515303999092, - "grad_norm": 2.3223483562469482, - "learning_rate": 9.800729137387221e-06, - "loss": 0.6621, + "epoch": 0.46335797254487854, + "grad_norm": 2.1084840297698975, + "learning_rate": 8.166080236144873e-06, + "loss": 0.7847, "step": 6582 }, { - "epoch": 0.49812719912224285, - "grad_norm": 2.271935224533081, - "learning_rate": 9.798485826790205e-06, - "loss": 0.6618, + "epoch": 0.46342837029215067, + "grad_norm": 1.7810825109481812, + "learning_rate": 8.16450640150222e-06, + "loss": 0.8875, "step": 6583 }, { - "epoch": 0.49820286784457646, - "grad_norm": 1.8532313108444214, - "learning_rate": 9.796242466189427e-06, - "loss": 0.615, + "epoch": 0.46349876803942275, + "grad_norm": 2.373051404953003, + "learning_rate": 8.162932506315828e-06, + "loss": 0.7419, "step": 6584 }, { - "epoch": 0.49827853656691007, - "grad_norm": 1.9369428157806396, - "learning_rate": 9.793999055725368e-06, - "loss": 0.7089, + "epoch": 0.4635691657866948, + "grad_norm": 1.538630723953247, + "learning_rate": 8.16135855066752e-06, + "loss": 0.7528, "step": 6585 }, { - "epoch": 0.4983542052892437, - "grad_norm": 2.717416286468506, - "learning_rate": 9.79175559553852e-06, - "loss": 0.6442, + "epoch": 0.4636395635339669, + "grad_norm": 1.7912665605545044, + "learning_rate": 8.159784534639128e-06, + "loss": 0.6331, "step": 6586 }, { - "epoch": 0.4984298740115773, - "grad_norm": 2.2628488540649414, - "learning_rate": 9.789512085769375e-06, - "loss": 0.6872, + "epoch": 0.463709961281239, + "grad_norm": 1.8173837661743164, + "learning_rate": 8.158210458312489e-06, + "loss": 0.6998, "step": 6587 }, { - "epoch": 0.49850554273391096, - "grad_norm": 2.6931004524230957, - "learning_rate": 9.787268526558431e-06, - "loss": 0.6916, + "epoch": 0.4637803590285111, + "grad_norm": 2.189375400543213, + "learning_rate": 8.156636321769439e-06, + "loss": 0.7337, "step": 6588 }, { - "epoch": 0.49858121145624457, - "grad_norm": 2.3270750045776367, - "learning_rate": 9.785024918046185e-06, - "loss": 0.7876, + "epoch": 0.4638507567757832, + "grad_norm": 1.5301917791366577, + "learning_rate": 8.15506212509182e-06, + "loss": 0.5984, "step": 6589 }, { - "epoch": 0.4986568801785782, - "grad_norm": 2.268930673599243, - "learning_rate": 9.782781260373143e-06, - "loss": 0.6831, + "epoch": 0.46392115452305527, + "grad_norm": 1.8833513259887695, + "learning_rate": 8.153487868361474e-06, + "loss": 0.7232, "step": 6590 }, { - "epoch": 0.4987325489009118, - "grad_norm": 3.6653621196746826, - "learning_rate": 9.780537553679803e-06, - "loss": 0.6437, + "epoch": 0.46399155227032735, + "grad_norm": 1.9034265279769897, + "learning_rate": 8.151913551660247e-06, + "loss": 0.7246, "step": 6591 }, { - "epoch": 0.49880821762324545, - "grad_norm": 3.2328827381134033, - "learning_rate": 9.778293798106676e-06, - "loss": 0.668, + "epoch": 0.4640619500175994, + "grad_norm": 1.7011418342590332, + "learning_rate": 8.150339175069995e-06, + "loss": 0.6213, "step": 6592 }, { - "epoch": 0.49888388634557906, - "grad_norm": 2.2856032848358154, - "learning_rate": 9.776049993794277e-06, - "loss": 0.8449, + "epoch": 0.4641323477648715, + "grad_norm": 2.1070287227630615, + "learning_rate": 8.148764738672567e-06, + "loss": 0.712, "step": 6593 }, { - "epoch": 0.4989595550679127, - "grad_norm": 1.9682530164718628, - "learning_rate": 9.773806140883115e-06, - "loss": 0.8408, + "epoch": 0.46420274551214363, + "grad_norm": 1.6628282070159912, + "learning_rate": 8.147190242549817e-06, + "loss": 0.6483, "step": 6594 }, { - "epoch": 0.4990352237902463, - "grad_norm": 2.026069402694702, - "learning_rate": 9.771562239513712e-06, - "loss": 1.0183, + "epoch": 0.4642731432594157, + "grad_norm": 2.0533814430236816, + "learning_rate": 8.14561568678361e-06, + "loss": 0.6931, "step": 6595 }, { - "epoch": 0.49911089251257995, - "grad_norm": 2.4245307445526123, - "learning_rate": 9.769318289826581e-06, - "loss": 0.666, + "epoch": 0.4643435410066878, + "grad_norm": 1.799839735031128, + "learning_rate": 8.144041071455807e-06, + "loss": 0.5362, "step": 6596 }, { - "epoch": 0.49918656123491356, - "grad_norm": 1.815333604812622, - "learning_rate": 9.767074291962257e-06, - "loss": 0.7764, + "epoch": 0.46441393875395987, + "grad_norm": 1.8561105728149414, + "learning_rate": 8.142466396648269e-06, + "loss": 0.7522, "step": 6597 }, { - "epoch": 0.49926222995724717, - "grad_norm": 1.9312478303909302, - "learning_rate": 9.764830246061256e-06, - "loss": 0.6955, + "epoch": 0.46448433650123194, + "grad_norm": 1.7272330522537231, + "learning_rate": 8.140891662442871e-06, + "loss": 0.6589, "step": 6598 }, { - "epoch": 0.4993378986795808, - "grad_norm": 2.2235007286071777, - "learning_rate": 9.762586152264112e-06, - "loss": 0.799, + "epoch": 0.4645547342485041, + "grad_norm": 1.8416380882263184, + "learning_rate": 8.13931686892148e-06, + "loss": 0.6599, "step": 6599 }, { - "epoch": 0.4994135674019144, - "grad_norm": 2.057189464569092, - "learning_rate": 9.760342010711359e-06, - "loss": 0.8393, + "epoch": 0.46462513199577615, + "grad_norm": 1.8790628910064697, + "learning_rate": 8.137742016165974e-06, + "loss": 0.719, "step": 6600 }, { - "epoch": 0.49948923612424806, - "grad_norm": 2.18095326423645, - "learning_rate": 9.758097821543523e-06, - "loss": 0.5707, + "epoch": 0.46469552974304823, + "grad_norm": 1.7608462572097778, + "learning_rate": 8.136167104258233e-06, + "loss": 0.6545, "step": 6601 }, { - "epoch": 0.49956490484658167, - "grad_norm": 2.4955899715423584, - "learning_rate": 9.755853584901159e-06, - "loss": 0.6642, + "epoch": 0.4647659274903203, + "grad_norm": 1.9088890552520752, + "learning_rate": 8.134592133280133e-06, + "loss": 0.6914, "step": 6602 }, { - "epoch": 0.4996405735689153, - "grad_norm": 2.098681688308716, - "learning_rate": 9.753609300924791e-06, - "loss": 0.7294, + "epoch": 0.4648363252375924, + "grad_norm": 1.8374568223953247, + "learning_rate": 8.133017103313559e-06, + "loss": 0.6451, "step": 6603 }, { - "epoch": 0.4997162422912489, - "grad_norm": 1.8592923879623413, - "learning_rate": 9.751364969754975e-06, - "loss": 0.79, + "epoch": 0.46490672298486446, + "grad_norm": 1.595694661140442, + "learning_rate": 8.131442014440402e-06, + "loss": 0.5943, "step": 6604 }, { - "epoch": 0.49979191101358256, - "grad_norm": 2.4341955184936523, - "learning_rate": 9.749120591532253e-06, - "loss": 0.7224, + "epoch": 0.4649771207321366, + "grad_norm": 1.508575439453125, + "learning_rate": 8.129866866742549e-06, + "loss": 0.6643, "step": 6605 }, { - "epoch": 0.49986757973591617, - "grad_norm": 2.533416748046875, - "learning_rate": 9.746876166397175e-06, - "loss": 0.6489, + "epoch": 0.4650475184794087, + "grad_norm": 1.9670284986495972, + "learning_rate": 8.128291660301895e-06, + "loss": 0.7253, "step": 6606 }, { - "epoch": 0.4999432484582498, - "grad_norm": 1.9363288879394531, - "learning_rate": 9.7446316944903e-06, - "loss": 0.6737, + "epoch": 0.46511791622668075, + "grad_norm": 5.888091564178467, + "learning_rate": 8.126716395200335e-06, + "loss": 0.6948, "step": 6607 }, { - "epoch": 0.5000189171805834, - "grad_norm": 2.0590407848358154, - "learning_rate": 9.742387175952178e-06, - "loss": 0.6893, + "epoch": 0.4651883139739528, + "grad_norm": 1.9224740266799927, + "learning_rate": 8.125141071519767e-06, + "loss": 0.6226, "step": 6608 }, { - "epoch": 0.500094585902917, - "grad_norm": 1.818794846534729, - "learning_rate": 9.740142610923369e-06, - "loss": 0.7371, + "epoch": 0.4652587117212249, + "grad_norm": 1.6967793703079224, + "learning_rate": 8.1235656893421e-06, + "loss": 0.7245, "step": 6609 }, { - "epoch": 0.5001702546252507, - "grad_norm": 2.1174185276031494, - "learning_rate": 9.737897999544437e-06, - "loss": 0.7349, + "epoch": 0.465329109468497, + "grad_norm": 1.786620020866394, + "learning_rate": 8.121990248749233e-06, + "loss": 0.7128, "step": 6610 }, { - "epoch": 0.5002459233475843, - "grad_norm": 2.133242607116699, - "learning_rate": 9.735653341955944e-06, - "loss": 0.7632, + "epoch": 0.4653995072157691, + "grad_norm": 1.7419410943984985, + "learning_rate": 8.120414749823077e-06, + "loss": 0.6401, "step": 6611 }, { - "epoch": 0.5003215920699179, - "grad_norm": 2.435183525085449, - "learning_rate": 9.73340863829846e-06, - "loss": 0.612, + "epoch": 0.4654699049630412, + "grad_norm": 1.5859366655349731, + "learning_rate": 8.118839192645542e-06, + "loss": 0.6937, "step": 6612 }, { - "epoch": 0.5003972607922516, - "grad_norm": 2.1215646266937256, - "learning_rate": 9.731163888712557e-06, - "loss": 0.6771, + "epoch": 0.46554030271031327, + "grad_norm": 2.274855375289917, + "learning_rate": 8.117263577298546e-06, + "loss": 0.6581, "step": 6613 }, { - "epoch": 0.5004729295145851, - "grad_norm": 1.9832267761230469, - "learning_rate": 9.728919093338804e-06, - "loss": 0.8126, + "epoch": 0.46561070045758535, + "grad_norm": 2.168102502822876, + "learning_rate": 8.115687903864005e-06, + "loss": 0.7059, "step": 6614 }, { - "epoch": 0.5005485982369188, - "grad_norm": 2.078450918197632, - "learning_rate": 9.726674252317781e-06, - "loss": 0.5838, + "epoch": 0.4656810982048574, + "grad_norm": 1.9641964435577393, + "learning_rate": 8.11411217242384e-06, + "loss": 0.7073, "step": 6615 }, { - "epoch": 0.5006242669592524, - "grad_norm": 2.8075807094573975, - "learning_rate": 9.724429365790064e-06, - "loss": 0.5838, + "epoch": 0.46575149595212956, + "grad_norm": 1.7801446914672852, + "learning_rate": 8.112536383059972e-06, + "loss": 0.7763, "step": 6616 }, { - "epoch": 0.500699935681586, - "grad_norm": 2.4533121585845947, - "learning_rate": 9.722184433896237e-06, - "loss": 0.712, + "epoch": 0.46582189369940163, + "grad_norm": 2.3308708667755127, + "learning_rate": 8.110960535854331e-06, + "loss": 0.7274, "step": 6617 }, { - "epoch": 0.5007756044039197, - "grad_norm": 3.052152633666992, - "learning_rate": 9.71993945677689e-06, - "loss": 0.742, + "epoch": 0.4658922914466737, + "grad_norm": 1.8240830898284912, + "learning_rate": 8.109384630888847e-06, + "loss": 0.613, "step": 6618 }, { - "epoch": 0.5008512731262532, - "grad_norm": 2.4858107566833496, - "learning_rate": 9.717694434572599e-06, - "loss": 0.7089, + "epoch": 0.4659626891939458, + "grad_norm": 1.6605192422866821, + "learning_rate": 8.107808668245448e-06, + "loss": 0.6586, "step": 6619 }, { - "epoch": 0.5009269418485869, - "grad_norm": 1.992408037185669, - "learning_rate": 9.715449367423966e-06, - "loss": 0.6742, + "epoch": 0.46603308694121787, + "grad_norm": 1.6053216457366943, + "learning_rate": 8.106232648006076e-06, + "loss": 0.6062, "step": 6620 }, { - "epoch": 0.5010026105709205, - "grad_norm": 2.0677504539489746, - "learning_rate": 9.713204255471577e-06, - "loss": 0.5632, + "epoch": 0.46610348468848994, + "grad_norm": 1.4666861295700073, + "learning_rate": 8.104656570252665e-06, + "loss": 0.6005, "step": 6621 }, { - "epoch": 0.5010782792932541, - "grad_norm": 2.122046947479248, - "learning_rate": 9.71095909885603e-06, - "loss": 0.7803, + "epoch": 0.4661738824357621, + "grad_norm": 1.8091411590576172, + "learning_rate": 8.10308043506716e-06, + "loss": 0.6569, "step": 6622 }, { - "epoch": 0.5011539480155878, - "grad_norm": 1.7793095111846924, - "learning_rate": 9.708713897717928e-06, - "loss": 0.7774, + "epoch": 0.46624428018303415, + "grad_norm": 2.287379264831543, + "learning_rate": 8.101504242531502e-06, + "loss": 0.7069, "step": 6623 }, { - "epoch": 0.5012296167379214, - "grad_norm": 2.6335649490356445, - "learning_rate": 9.706468652197866e-06, - "loss": 0.7956, + "epoch": 0.46631467793030623, + "grad_norm": 1.5770342350006104, + "learning_rate": 8.099927992727643e-06, + "loss": 0.7911, "step": 6624 }, { - "epoch": 0.501305285460255, - "grad_norm": 2.394765853881836, - "learning_rate": 9.704223362436454e-06, - "loss": 0.7354, + "epoch": 0.4663850756775783, + "grad_norm": 1.5946414470672607, + "learning_rate": 8.09835168573753e-06, + "loss": 0.6498, "step": 6625 }, { - "epoch": 0.5013809541825887, - "grad_norm": 2.1570701599121094, - "learning_rate": 9.701978028574298e-06, - "loss": 0.6291, + "epoch": 0.4664554734248504, + "grad_norm": 1.9717284440994263, + "learning_rate": 8.09677532164312e-06, + "loss": 0.8084, "step": 6626 }, { - "epoch": 0.5014566229049222, - "grad_norm": 2.289980411529541, - "learning_rate": 9.699732650752005e-06, - "loss": 0.7032, + "epoch": 0.4665258711721225, + "grad_norm": 1.952942132949829, + "learning_rate": 8.095198900526366e-06, + "loss": 0.7824, "step": 6627 }, { - "epoch": 0.5015322916272559, - "grad_norm": 2.2219419479370117, - "learning_rate": 9.697487229110192e-06, - "loss": 0.7864, + "epoch": 0.4665962689193946, + "grad_norm": 1.6234028339385986, + "learning_rate": 8.093622422469228e-06, + "loss": 0.7546, "step": 6628 }, { - "epoch": 0.5016079603495895, - "grad_norm": 2.5143167972564697, - "learning_rate": 9.695241763789474e-06, - "loss": 0.6695, + "epoch": 0.4666666666666667, + "grad_norm": 1.7735446691513062, + "learning_rate": 8.092045887553673e-06, + "loss": 0.6229, "step": 6629 }, { - "epoch": 0.5016836290719231, - "grad_norm": 2.949692726135254, - "learning_rate": 9.692996254930464e-06, - "loss": 0.733, + "epoch": 0.46673706441393875, + "grad_norm": 1.4549614191055298, + "learning_rate": 8.090469295861661e-06, + "loss": 0.6779, "step": 6630 }, { - "epoch": 0.5017592977942568, - "grad_norm": 1.9106590747833252, - "learning_rate": 9.690750702673792e-06, - "loss": 0.7533, + "epoch": 0.46680746216121083, + "grad_norm": 1.821742057800293, + "learning_rate": 8.088892647475164e-06, + "loss": 0.7511, "step": 6631 }, { - "epoch": 0.5018349665165903, - "grad_norm": 2.054891586303711, - "learning_rate": 9.688505107160074e-06, - "loss": 0.7022, + "epoch": 0.4668778599084829, + "grad_norm": 2.043276786804199, + "learning_rate": 8.087315942476151e-06, + "loss": 0.6153, "step": 6632 }, { - "epoch": 0.501910635238924, - "grad_norm": 1.7698699235916138, - "learning_rate": 9.686259468529938e-06, - "loss": 0.6956, + "epoch": 0.46694825765575504, + "grad_norm": 1.8999780416488647, + "learning_rate": 8.085739180946598e-06, + "loss": 0.8009, "step": 6633 }, { - "epoch": 0.5019863039612577, - "grad_norm": 2.303417205810547, - "learning_rate": 9.684013786924014e-06, - "loss": 0.6195, + "epoch": 0.4670186554030271, + "grad_norm": 1.800286889076233, + "learning_rate": 8.084162362968482e-06, + "loss": 0.6204, "step": 6634 }, { - "epoch": 0.5020619726835912, - "grad_norm": 1.9892022609710693, - "learning_rate": 9.681768062482937e-06, - "loss": 0.6891, + "epoch": 0.4670890531502992, + "grad_norm": 1.626577615737915, + "learning_rate": 8.082585488623783e-06, + "loss": 0.6875, "step": 6635 }, { - "epoch": 0.5021376414059249, - "grad_norm": 2.3008720874786377, - "learning_rate": 9.679522295347342e-06, - "loss": 0.6299, + "epoch": 0.46715945089757127, + "grad_norm": 1.7355901002883911, + "learning_rate": 8.081008557994485e-06, + "loss": 0.638, "step": 6636 }, { - "epoch": 0.5022133101282585, - "grad_norm": 2.5181405544281006, - "learning_rate": 9.677276485657857e-06, - "loss": 0.6741, + "epoch": 0.46722984864484335, + "grad_norm": 1.762460708618164, + "learning_rate": 8.079431571162569e-06, + "loss": 0.6176, "step": 6637 }, { - "epoch": 0.5022889788505921, - "grad_norm": 2.3965871334075928, - "learning_rate": 9.67503063355513e-06, - "loss": 0.758, + "epoch": 0.4673002463921154, + "grad_norm": 1.9596545696258545, + "learning_rate": 8.077854528210032e-06, + "loss": 0.6853, "step": 6638 }, { - "epoch": 0.5023646475729258, - "grad_norm": 2.2800729274749756, - "learning_rate": 9.672784739179801e-06, - "loss": 0.7284, + "epoch": 0.46737064413938756, + "grad_norm": 1.9971206188201904, + "learning_rate": 8.07627742921886e-06, + "loss": 0.6898, "step": 6639 }, { - "epoch": 0.5024403162952593, - "grad_norm": 2.018068790435791, - "learning_rate": 9.670538802672516e-06, - "loss": 0.5924, + "epoch": 0.46744104188665964, + "grad_norm": 1.8275867700576782, + "learning_rate": 8.074700274271051e-06, + "loss": 0.7444, "step": 6640 }, { - "epoch": 0.502515985017593, - "grad_norm": 2.180234670639038, - "learning_rate": 9.668292824173925e-06, - "loss": 0.8005, + "epoch": 0.4675114396339317, + "grad_norm": 1.7868245840072632, + "learning_rate": 8.0731230634486e-06, + "loss": 0.8214, "step": 6641 }, { - "epoch": 0.5025916537399266, - "grad_norm": 2.2746224403381348, - "learning_rate": 9.666046803824671e-06, - "loss": 0.7719, + "epoch": 0.4675818373812038, + "grad_norm": 1.5321637392044067, + "learning_rate": 8.071545796833511e-06, + "loss": 0.7531, "step": 6642 }, { - "epoch": 0.5026673224622602, - "grad_norm": 3.1850080490112305, - "learning_rate": 9.663800741765416e-06, - "loss": 0.8144, + "epoch": 0.46765223512847587, + "grad_norm": 1.5875775814056396, + "learning_rate": 8.069968474507784e-06, + "loss": 0.6505, "step": 6643 }, { - "epoch": 0.5027429911845939, - "grad_norm": 2.118246078491211, - "learning_rate": 9.661554638136808e-06, - "loss": 0.7937, + "epoch": 0.467722632875748, + "grad_norm": 1.5285100936889648, + "learning_rate": 8.068391096553427e-06, + "loss": 0.6346, "step": 6644 }, { - "epoch": 0.5028186599069274, - "grad_norm": 1.9210091829299927, - "learning_rate": 9.659308493079511e-06, - "loss": 0.5085, + "epoch": 0.4677930306230201, + "grad_norm": 1.6502366065979004, + "learning_rate": 8.06681366305245e-06, + "loss": 0.6002, "step": 6645 }, { - "epoch": 0.5028943286292611, - "grad_norm": 2.1599907875061035, - "learning_rate": 9.657062306734182e-06, - "loss": 0.6544, + "epoch": 0.46786342837029216, + "grad_norm": 1.8060230016708374, + "learning_rate": 8.065236174086865e-06, + "loss": 0.6169, "step": 6646 }, { - "epoch": 0.5029699973515948, - "grad_norm": 2.152679443359375, - "learning_rate": 9.654816079241487e-06, - "loss": 0.8208, + "epoch": 0.46793382611756423, + "grad_norm": 1.7972235679626465, + "learning_rate": 8.063658629738687e-06, + "loss": 0.7187, "step": 6647 }, { - "epoch": 0.5030456660739283, - "grad_norm": 2.152647018432617, - "learning_rate": 9.652569810742093e-06, - "loss": 0.6365, + "epoch": 0.4680042238648363, + "grad_norm": 1.5203578472137451, + "learning_rate": 8.062081030089929e-06, + "loss": 0.7106, "step": 6648 }, { - "epoch": 0.503121334796262, - "grad_norm": 2.0750904083251953, - "learning_rate": 9.650323501376666e-06, - "loss": 0.6757, + "epoch": 0.4680746216121084, + "grad_norm": 1.7217711210250854, + "learning_rate": 8.06050337522262e-06, + "loss": 0.695, "step": 6649 }, { - "epoch": 0.5031970035185956, - "grad_norm": 2.493428945541382, - "learning_rate": 9.648077151285877e-06, - "loss": 0.7954, + "epoch": 0.4681450193593805, + "grad_norm": 1.7866359949111938, + "learning_rate": 8.05892566521878e-06, + "loss": 0.7852, "step": 6650 }, { - "epoch": 0.5032726722409292, - "grad_norm": 1.762840986251831, - "learning_rate": 9.645830760610401e-06, - "loss": 0.8119, + "epoch": 0.4682154171066526, + "grad_norm": 1.9376652240753174, + "learning_rate": 8.057347900160436e-06, + "loss": 0.676, "step": 6651 }, { - "epoch": 0.5033483409632629, - "grad_norm": 2.1899425983428955, - "learning_rate": 9.643584329490914e-06, - "loss": 0.6388, + "epoch": 0.4682858148539247, + "grad_norm": 1.4855091571807861, + "learning_rate": 8.055770080129618e-06, + "loss": 0.6453, "step": 6652 }, { - "epoch": 0.5034240096855964, - "grad_norm": 2.0570414066314697, - "learning_rate": 9.641337858068094e-06, - "loss": 0.7231, + "epoch": 0.46835621260119675, + "grad_norm": 1.8638074398040771, + "learning_rate": 8.054192205208356e-06, + "loss": 0.7574, "step": 6653 }, { - "epoch": 0.5034996784079301, - "grad_norm": 2.0795912742614746, - "learning_rate": 9.639091346482624e-06, - "loss": 0.7492, + "epoch": 0.46842661034846883, + "grad_norm": 1.7278921604156494, + "learning_rate": 8.052614275478685e-06, + "loss": 0.6543, "step": 6654 }, { - "epoch": 0.5035753471302638, - "grad_norm": 2.199233293533325, - "learning_rate": 9.636844794875187e-06, - "loss": 0.6797, + "epoch": 0.46849700809574096, + "grad_norm": 1.9031232595443726, + "learning_rate": 8.051036291022646e-06, + "loss": 0.6818, "step": 6655 }, { - "epoch": 0.5036510158525973, - "grad_norm": 2.3205556869506836, - "learning_rate": 9.634598203386467e-06, - "loss": 0.6839, + "epoch": 0.46856740584301304, + "grad_norm": 1.8398131132125854, + "learning_rate": 8.04945825192228e-06, + "loss": 0.6614, "step": 6656 }, { - "epoch": 0.503726684574931, - "grad_norm": 2.707512855529785, - "learning_rate": 9.632351572157156e-06, - "loss": 0.8496, + "epoch": 0.4686378035902851, + "grad_norm": 1.7867637872695923, + "learning_rate": 8.047880158259624e-06, + "loss": 0.6935, "step": 6657 }, { - "epoch": 0.5038023532972645, - "grad_norm": 1.746833086013794, - "learning_rate": 9.630104901327944e-06, - "loss": 0.8147, + "epoch": 0.4687082013375572, + "grad_norm": 1.6679308414459229, + "learning_rate": 8.046302010116735e-06, + "loss": 0.6961, "step": 6658 }, { - "epoch": 0.5038780220195982, - "grad_norm": 1.9517803192138672, - "learning_rate": 9.62785819103953e-06, - "loss": 0.6498, + "epoch": 0.4687785990848293, + "grad_norm": 2.2838516235351562, + "learning_rate": 8.044723807575654e-06, + "loss": 0.6113, "step": 6659 }, { - "epoch": 0.5039536907419319, - "grad_norm": 1.9872726202011108, - "learning_rate": 9.625611441432598e-06, - "loss": 0.6999, + "epoch": 0.46884899683210135, + "grad_norm": 3.0329344272613525, + "learning_rate": 8.043145550718436e-06, + "loss": 0.6943, "step": 6660 }, { - "epoch": 0.5040293594642654, - "grad_norm": 2.2390949726104736, - "learning_rate": 9.623364652647858e-06, - "loss": 0.6743, + "epoch": 0.4689193945793735, + "grad_norm": 1.9068228006362915, + "learning_rate": 8.04156723962714e-06, + "loss": 0.7667, "step": 6661 }, { - "epoch": 0.5041050281865991, - "grad_norm": 2.458838939666748, - "learning_rate": 9.621117824826008e-06, - "loss": 0.7239, + "epoch": 0.46898979232664556, + "grad_norm": 1.9434071779251099, + "learning_rate": 8.039988874383815e-06, + "loss": 0.7173, "step": 6662 }, { - "epoch": 0.5041806969089327, - "grad_norm": 2.7684452533721924, - "learning_rate": 9.618870958107747e-06, - "loss": 0.67, + "epoch": 0.46906019007391764, + "grad_norm": 1.66643226146698, + "learning_rate": 8.038410455070528e-06, + "loss": 0.7289, "step": 6663 }, { - "epoch": 0.5042563656312663, - "grad_norm": 2.003108501434326, - "learning_rate": 9.61662405263379e-06, - "loss": 0.6692, + "epoch": 0.4691305878211897, + "grad_norm": 2.0360026359558105, + "learning_rate": 8.036831981769342e-06, + "loss": 0.7049, "step": 6664 }, { - "epoch": 0.5043320343536, - "grad_norm": 1.8543363809585571, - "learning_rate": 9.614377108544836e-06, - "loss": 0.7839, + "epoch": 0.4692009855684618, + "grad_norm": 1.9816287755966187, + "learning_rate": 8.035253454562322e-06, + "loss": 0.6698, "step": 6665 }, { - "epoch": 0.5044077030759335, - "grad_norm": 1.8645862340927124, - "learning_rate": 9.612130125981603e-06, - "loss": 0.6441, + "epoch": 0.46927138331573387, + "grad_norm": 1.8286796808242798, + "learning_rate": 8.033674873531537e-06, + "loss": 0.622, "step": 6666 }, { - "epoch": 0.5044833717982672, - "grad_norm": 2.3227145671844482, - "learning_rate": 9.6098831050848e-06, - "loss": 0.7593, + "epoch": 0.469341781063006, + "grad_norm": 1.9376773834228516, + "learning_rate": 8.032096238759058e-06, + "loss": 0.6757, "step": 6667 }, { - "epoch": 0.5045590405206009, - "grad_norm": 2.1270523071289062, - "learning_rate": 9.607636045995145e-06, - "loss": 0.7418, + "epoch": 0.4694121788102781, + "grad_norm": 1.7383314371109009, + "learning_rate": 8.030517550326964e-06, + "loss": 0.6646, "step": 6668 }, { - "epoch": 0.5046347092429344, - "grad_norm": 2.077716827392578, - "learning_rate": 9.605388948853355e-06, - "loss": 0.7234, + "epoch": 0.46948257655755016, + "grad_norm": 1.5484790802001953, + "learning_rate": 8.028938808317325e-06, + "loss": 0.6853, "step": 6669 }, { - "epoch": 0.5047103779652681, - "grad_norm": 1.9619203805923462, - "learning_rate": 9.60314181380015e-06, - "loss": 0.6765, + "epoch": 0.46955297430482223, + "grad_norm": 1.6185481548309326, + "learning_rate": 8.027360012812228e-06, + "loss": 0.7082, "step": 6670 }, { - "epoch": 0.5047860466876016, - "grad_norm": 1.965520977973938, - "learning_rate": 9.600894640976257e-06, - "loss": 0.7541, + "epoch": 0.4696233720520943, + "grad_norm": 1.741227388381958, + "learning_rate": 8.025781163893753e-06, + "loss": 0.7014, "step": 6671 }, { - "epoch": 0.5048617154099353, - "grad_norm": 1.9246147871017456, - "learning_rate": 9.598647430522397e-06, - "loss": 0.7266, + "epoch": 0.46969376979936645, + "grad_norm": 1.3335667848587036, + "learning_rate": 8.024202261643987e-06, + "loss": 0.7158, "step": 6672 }, { - "epoch": 0.504937384132269, - "grad_norm": 2.243699550628662, - "learning_rate": 9.596400182579299e-06, - "loss": 0.6776, + "epoch": 0.4697641675466385, + "grad_norm": 1.6549283266067505, + "learning_rate": 8.022623306145017e-06, + "loss": 0.6617, "step": 6673 }, { - "epoch": 0.5050130528546025, - "grad_norm": 2.064436912536621, - "learning_rate": 9.594152897287689e-06, - "loss": 0.661, + "epoch": 0.4698345652939106, + "grad_norm": 1.5525285005569458, + "learning_rate": 8.021044297478935e-06, + "loss": 0.6158, "step": 6674 }, { - "epoch": 0.5050887215769362, - "grad_norm": 2.3579468727111816, - "learning_rate": 9.591905574788305e-06, - "loss": 0.7555, + "epoch": 0.4699049630411827, + "grad_norm": 1.9705122709274292, + "learning_rate": 8.019465235727837e-06, + "loss": 0.7056, "step": 6675 }, { - "epoch": 0.5051643902992698, - "grad_norm": 2.24985408782959, - "learning_rate": 9.58965821522188e-06, - "loss": 0.6199, + "epoch": 0.46997536078845475, + "grad_norm": 2.035418748855591, + "learning_rate": 8.017886120973816e-06, + "loss": 0.6909, "step": 6676 }, { - "epoch": 0.5052400590216034, - "grad_norm": 2.8891539573669434, - "learning_rate": 9.587410818729151e-06, - "loss": 0.7357, + "epoch": 0.47004575853572683, + "grad_norm": 1.605615258216858, + "learning_rate": 8.016306953298976e-06, + "loss": 0.672, "step": 6677 }, { - "epoch": 0.5053157277439371, - "grad_norm": 1.8925280570983887, - "learning_rate": 9.585163385450857e-06, - "loss": 0.6271, + "epoch": 0.47011615628299896, + "grad_norm": 1.9544897079467773, + "learning_rate": 8.014727732785415e-06, + "loss": 0.8187, "step": 6678 }, { - "epoch": 0.5053913964662706, - "grad_norm": 2.103595495223999, - "learning_rate": 9.582915915527738e-06, - "loss": 0.7584, + "epoch": 0.47018655403027104, + "grad_norm": 1.8247848749160767, + "learning_rate": 8.013148459515241e-06, + "loss": 0.7206, "step": 6679 }, { - "epoch": 0.5054670651886043, - "grad_norm": 1.9762611389160156, - "learning_rate": 9.580668409100539e-06, - "loss": 0.6955, + "epoch": 0.4702569517775431, + "grad_norm": 1.7818752527236938, + "learning_rate": 8.011569133570562e-06, + "loss": 0.7857, "step": 6680 }, { - "epoch": 0.505542733910938, - "grad_norm": 1.8071695566177368, - "learning_rate": 9.578420866310004e-06, - "loss": 0.7404, + "epoch": 0.4703273495248152, + "grad_norm": 1.9644584655761719, + "learning_rate": 8.009989755033485e-06, + "loss": 0.6586, "step": 6681 }, { - "epoch": 0.5056184026332715, - "grad_norm": 1.5950270891189575, - "learning_rate": 9.576173287296889e-06, - "loss": 0.8587, + "epoch": 0.4703977472720873, + "grad_norm": 2.0757572650909424, + "learning_rate": 8.008410323986128e-06, + "loss": 0.7885, "step": 6682 }, { - "epoch": 0.5056940713556052, - "grad_norm": 1.9475131034851074, - "learning_rate": 9.573925672201932e-06, - "loss": 0.7747, + "epoch": 0.4704681450193594, + "grad_norm": 1.6330646276474, + "learning_rate": 8.006830840510605e-06, + "loss": 0.6482, "step": 6683 }, { - "epoch": 0.5057697400779387, - "grad_norm": 1.9169731140136719, - "learning_rate": 9.5716780211659e-06, - "loss": 0.5432, + "epoch": 0.4705385427666315, + "grad_norm": 1.9295967817306519, + "learning_rate": 8.005251304689034e-06, + "loss": 0.6695, "step": 6684 }, { - "epoch": 0.5058454088002724, - "grad_norm": 2.5060229301452637, - "learning_rate": 9.569430334329538e-06, - "loss": 0.6395, + "epoch": 0.47060894051390356, + "grad_norm": 2.068580150604248, + "learning_rate": 8.003671716603538e-06, + "loss": 0.743, "step": 6685 }, { - "epoch": 0.5059210775226061, - "grad_norm": 1.836045503616333, - "learning_rate": 9.567182611833605e-06, - "loss": 0.7145, + "epoch": 0.47067933826117564, + "grad_norm": 2.1411521434783936, + "learning_rate": 8.00209207633624e-06, + "loss": 0.7561, "step": 6686 }, { - "epoch": 0.5059967462449396, - "grad_norm": 1.950330138206482, - "learning_rate": 9.564934853818867e-06, - "loss": 0.6531, + "epoch": 0.4707497360084477, + "grad_norm": 1.6866395473480225, + "learning_rate": 8.000512383969266e-06, + "loss": 0.7009, "step": 6687 }, { - "epoch": 0.5060724149672733, - "grad_norm": 2.326371192932129, - "learning_rate": 9.562687060426075e-06, - "loss": 0.7988, + "epoch": 0.4708201337557198, + "grad_norm": 2.305881977081299, + "learning_rate": 7.99893263958475e-06, + "loss": 0.7401, "step": 6688 }, { - "epoch": 0.506148083689607, - "grad_norm": 1.9932469129562378, - "learning_rate": 9.560439231796005e-06, - "loss": 0.6736, + "epoch": 0.4708905315029919, + "grad_norm": 1.8394367694854736, + "learning_rate": 7.997352843264817e-06, + "loss": 0.7403, "step": 6689 }, { - "epoch": 0.5062237524119405, - "grad_norm": 1.9955806732177734, - "learning_rate": 9.558191368069414e-06, - "loss": 0.6104, + "epoch": 0.470960929250264, + "grad_norm": 1.721963882446289, + "learning_rate": 7.99577299509161e-06, + "loss": 0.6351, "step": 6690 }, { - "epoch": 0.5062994211342742, - "grad_norm": 2.626847505569458, - "learning_rate": 9.555943469387074e-06, - "loss": 0.7226, + "epoch": 0.4710313269975361, + "grad_norm": 1.8732179403305054, + "learning_rate": 7.994193095147263e-06, + "loss": 0.7327, "step": 6691 }, { - "epoch": 0.5063750898566077, - "grad_norm": 2.498826026916504, - "learning_rate": 9.553695535889759e-06, - "loss": 0.7776, + "epoch": 0.47110172474480816, + "grad_norm": 1.6406373977661133, + "learning_rate": 7.992613143513915e-06, + "loss": 0.6805, "step": 6692 }, { - "epoch": 0.5064507585789414, - "grad_norm": 2.252244710922241, - "learning_rate": 9.551447567718236e-06, - "loss": 0.9246, + "epoch": 0.47117212249208024, + "grad_norm": 1.5529100894927979, + "learning_rate": 7.99103314027371e-06, + "loss": 0.7675, "step": 6693 }, { - "epoch": 0.5065264273012751, - "grad_norm": 2.162478446960449, - "learning_rate": 9.549199565013286e-06, - "loss": 0.6745, + "epoch": 0.4712425202393523, + "grad_norm": 2.229367971420288, + "learning_rate": 7.989453085508798e-06, + "loss": 0.6014, "step": 6694 }, { - "epoch": 0.5066020960236086, - "grad_norm": 2.2146854400634766, - "learning_rate": 9.546951527915681e-06, - "loss": 0.834, + "epoch": 0.47131291798662445, + "grad_norm": 2.185915231704712, + "learning_rate": 7.987872979301323e-06, + "loss": 0.647, "step": 6695 }, { - "epoch": 0.5066777647459423, - "grad_norm": 2.3544445037841797, - "learning_rate": 9.5447034565662e-06, - "loss": 0.8678, + "epoch": 0.4713833157338965, + "grad_norm": 1.5875635147094727, + "learning_rate": 7.986292821733435e-06, + "loss": 0.6269, "step": 6696 }, { - "epoch": 0.5067534334682758, - "grad_norm": 1.949468970298767, - "learning_rate": 9.54245535110563e-06, - "loss": 0.6463, + "epoch": 0.4714537134811686, + "grad_norm": 1.5620105266571045, + "learning_rate": 7.984712612887292e-06, + "loss": 0.6604, "step": 6697 }, { - "epoch": 0.5068291021906095, - "grad_norm": 2.0964138507843018, - "learning_rate": 9.540207211674751e-06, - "loss": 0.8263, + "epoch": 0.4715241112284407, + "grad_norm": 1.664412021636963, + "learning_rate": 7.983132352845048e-06, + "loss": 0.7471, "step": 6698 }, { - "epoch": 0.5069047709129432, - "grad_norm": 1.5557283163070679, - "learning_rate": 9.537959038414345e-06, - "loss": 0.6854, + "epoch": 0.47159450897571276, + "grad_norm": 1.703757643699646, + "learning_rate": 7.981552041688861e-06, + "loss": 0.7221, "step": 6699 }, { - "epoch": 0.5069804396352767, - "grad_norm": 1.7753989696502686, - "learning_rate": 9.53571083146521e-06, - "loss": 0.5893, + "epoch": 0.4716649067229849, + "grad_norm": 1.6820560693740845, + "learning_rate": 7.979971679500896e-06, + "loss": 0.6342, "step": 6700 }, { - "epoch": 0.5070561083576104, - "grad_norm": 1.9944871664047241, - "learning_rate": 9.533462590968125e-06, - "loss": 0.7938, + "epoch": 0.47173530447025697, + "grad_norm": 1.5384210348129272, + "learning_rate": 7.978391266363317e-06, + "loss": 0.6757, "step": 6701 }, { - "epoch": 0.5071317770799441, - "grad_norm": 2.079556465148926, - "learning_rate": 9.531214317063891e-06, - "loss": 0.7135, + "epoch": 0.47180570221752904, + "grad_norm": 1.7945889234542847, + "learning_rate": 7.976810802358287e-06, + "loss": 0.7233, "step": 6702 }, { - "epoch": 0.5072074458022776, - "grad_norm": 2.094221591949463, - "learning_rate": 9.528966009893297e-06, - "loss": 0.8092, + "epoch": 0.4718760999648011, + "grad_norm": 2.419027328491211, + "learning_rate": 7.975230287567982e-06, + "loss": 0.6333, "step": 6703 }, { - "epoch": 0.5072831145246113, - "grad_norm": 2.0519769191741943, - "learning_rate": 9.526717669597139e-06, - "loss": 0.7803, + "epoch": 0.4719464977120732, + "grad_norm": 2.267399549484253, + "learning_rate": 7.973649722074568e-06, + "loss": 0.7891, "step": 6704 }, { - "epoch": 0.5073587832469448, - "grad_norm": 1.9500595331192017, - "learning_rate": 9.52446929631622e-06, - "loss": 0.6599, + "epoch": 0.4720168954593453, + "grad_norm": 2.125166654586792, + "learning_rate": 7.972069105960225e-06, + "loss": 0.7451, "step": 6705 }, { - "epoch": 0.5074344519692785, - "grad_norm": 2.6499404907226562, - "learning_rate": 9.522220890191332e-06, - "loss": 0.6721, + "epoch": 0.4720872932066174, + "grad_norm": 1.4931856393814087, + "learning_rate": 7.97048843930713e-06, + "loss": 0.7722, "step": 6706 }, { - "epoch": 0.5075101206916122, - "grad_norm": 1.9413082599639893, - "learning_rate": 9.51997245136329e-06, - "loss": 0.6661, + "epoch": 0.4721576909538895, + "grad_norm": 1.4914391040802002, + "learning_rate": 7.968907722197459e-06, + "loss": 0.7173, "step": 6707 }, { - "epoch": 0.5075857894139457, - "grad_norm": 1.8632111549377441, - "learning_rate": 9.517723979972886e-06, - "loss": 0.6466, + "epoch": 0.47222808870116156, + "grad_norm": 1.75771963596344, + "learning_rate": 7.967326954713398e-06, + "loss": 0.6952, "step": 6708 }, { - "epoch": 0.5076614581362794, - "grad_norm": 2.5629031658172607, - "learning_rate": 9.515475476160934e-06, - "loss": 0.7257, + "epoch": 0.47229848644843364, + "grad_norm": 1.6117488145828247, + "learning_rate": 7.965746136937136e-06, + "loss": 0.7264, "step": 6709 }, { - "epoch": 0.5077371268586129, - "grad_norm": 2.516348361968994, - "learning_rate": 9.513226940068241e-06, - "loss": 0.8455, + "epoch": 0.4723688841957057, + "grad_norm": 1.7845412492752075, + "learning_rate": 7.964165268950855e-06, + "loss": 0.7334, "step": 6710 }, { - "epoch": 0.5078127955809466, - "grad_norm": 2.427999973297119, - "learning_rate": 9.510978371835613e-06, - "loss": 0.7796, + "epoch": 0.47243928194297785, + "grad_norm": 1.6614781618118286, + "learning_rate": 7.962584350836749e-06, + "loss": 0.6764, "step": 6711 }, { - "epoch": 0.5078884643032803, - "grad_norm": 2.137712001800537, - "learning_rate": 9.508729771603872e-06, - "loss": 0.7222, + "epoch": 0.47250967969024993, + "grad_norm": 2.0798163414001465, + "learning_rate": 7.961003382677013e-06, + "loss": 0.654, "step": 6712 }, { - "epoch": 0.5079641330256138, - "grad_norm": 2.6136555671691895, - "learning_rate": 9.506481139513824e-06, - "loss": 0.8969, + "epoch": 0.472580077437522, + "grad_norm": 2.6189446449279785, + "learning_rate": 7.959422364553838e-06, + "loss": 0.6912, "step": 6713 }, { - "epoch": 0.5080398017479475, - "grad_norm": 2.343489646911621, - "learning_rate": 9.50423247570629e-06, - "loss": 0.7039, + "epoch": 0.4726504751847941, + "grad_norm": 1.920992136001587, + "learning_rate": 7.957841296549426e-06, + "loss": 0.6182, "step": 6714 }, { - "epoch": 0.5081154704702812, - "grad_norm": 2.309749126434326, - "learning_rate": 9.501983780322088e-06, - "loss": 0.7666, + "epoch": 0.47272087293206616, + "grad_norm": 1.5950154066085815, + "learning_rate": 7.956260178745977e-06, + "loss": 0.6952, "step": 6715 }, { - "epoch": 0.5081911391926147, - "grad_norm": 2.360842704772949, - "learning_rate": 9.499735053502037e-06, - "loss": 0.772, + "epoch": 0.47279127067933824, + "grad_norm": 1.7773045301437378, + "learning_rate": 7.954679011225697e-06, + "loss": 0.7067, "step": 6716 }, { - "epoch": 0.5082668079149484, - "grad_norm": 1.918006181716919, - "learning_rate": 9.497486295386962e-06, - "loss": 0.7022, + "epoch": 0.47286166842661037, + "grad_norm": 2.189542770385742, + "learning_rate": 7.953097794070788e-06, + "loss": 0.745, "step": 6717 }, { - "epoch": 0.5083424766372819, - "grad_norm": 2.2111973762512207, - "learning_rate": 9.495237506117685e-06, - "loss": 0.5871, + "epoch": 0.47293206617388245, + "grad_norm": 1.784259557723999, + "learning_rate": 7.951516527363462e-06, + "loss": 0.7161, "step": 6718 }, { - "epoch": 0.5084181453596156, - "grad_norm": 2.214198350906372, - "learning_rate": 9.492988685835031e-06, - "loss": 0.6968, + "epoch": 0.4730024639211545, + "grad_norm": 2.1091091632843018, + "learning_rate": 7.949935211185935e-06, + "loss": 0.6174, "step": 6719 }, { - "epoch": 0.5084938140819493, - "grad_norm": 1.9260320663452148, - "learning_rate": 9.490739834679834e-06, - "loss": 0.8053, + "epoch": 0.4730728616684266, + "grad_norm": 1.9375001192092896, + "learning_rate": 7.948353845620411e-06, + "loss": 0.7722, "step": 6720 }, { - "epoch": 0.5085694828042828, - "grad_norm": 2.5409858226776123, - "learning_rate": 9.488490952792924e-06, - "loss": 0.6891, + "epoch": 0.4731432594156987, + "grad_norm": 1.7820405960083008, + "learning_rate": 7.94677243074911e-06, + "loss": 0.6173, "step": 6721 }, { - "epoch": 0.5086451515266165, - "grad_norm": 2.6639885902404785, - "learning_rate": 9.486242040315125e-06, - "loss": 0.6264, + "epoch": 0.47321365716297076, + "grad_norm": 1.8287259340286255, + "learning_rate": 7.945190966654258e-06, + "loss": 0.6496, "step": 6722 }, { - "epoch": 0.50872082024895, - "grad_norm": 2.6699607372283936, - "learning_rate": 9.48399309738728e-06, - "loss": 0.7295, + "epoch": 0.4732840549102429, + "grad_norm": 1.7746152877807617, + "learning_rate": 7.943609453418069e-06, + "loss": 0.73, "step": 6723 }, { - "epoch": 0.5087964889712837, - "grad_norm": 1.702734351158142, - "learning_rate": 9.481744124150222e-06, - "loss": 0.705, + "epoch": 0.47335445265751497, + "grad_norm": 1.6024863719940186, + "learning_rate": 7.942027891122769e-06, + "loss": 0.6477, "step": 6724 }, { - "epoch": 0.5088721576936174, - "grad_norm": 2.3843774795532227, - "learning_rate": 9.479495120744786e-06, - "loss": 0.7714, + "epoch": 0.47342485040478705, + "grad_norm": 1.6074565649032593, + "learning_rate": 7.940446279850589e-06, + "loss": 0.7123, "step": 6725 }, { - "epoch": 0.5089478264159509, - "grad_norm": 2.0906484127044678, - "learning_rate": 9.477246087311816e-06, - "loss": 0.7696, + "epoch": 0.4734952481520591, + "grad_norm": 1.7225322723388672, + "learning_rate": 7.938864619683754e-06, + "loss": 0.669, "step": 6726 }, { - "epoch": 0.5090234951382846, - "grad_norm": 2.4674184322357178, - "learning_rate": 9.474997023992152e-06, - "loss": 0.7912, + "epoch": 0.4735656458993312, + "grad_norm": 1.755051851272583, + "learning_rate": 7.937282910704495e-06, + "loss": 0.6147, "step": 6727 }, { - "epoch": 0.5090991638606183, - "grad_norm": 1.9919297695159912, - "learning_rate": 9.472747930926641e-06, - "loss": 0.6566, + "epoch": 0.47363604364660333, + "grad_norm": 1.7224875688552856, + "learning_rate": 7.935701152995052e-06, + "loss": 0.7224, "step": 6728 }, { - "epoch": 0.5091748325829518, - "grad_norm": 1.8796985149383545, - "learning_rate": 9.470498808256121e-06, - "loss": 0.6215, + "epoch": 0.4737064413938754, + "grad_norm": 1.8651607036590576, + "learning_rate": 7.934119346637655e-06, + "loss": 0.6724, "step": 6729 }, { - "epoch": 0.5092505013052855, - "grad_norm": 2.5070700645446777, - "learning_rate": 9.46824965612145e-06, - "loss": 0.7519, + "epoch": 0.4737768391411475, + "grad_norm": 2.0180118083953857, + "learning_rate": 7.932537491714549e-06, + "loss": 0.7529, "step": 6730 }, { - "epoch": 0.509326170027619, - "grad_norm": 3.2498910427093506, - "learning_rate": 9.466000474663466e-06, - "loss": 0.6886, + "epoch": 0.47384723688841957, + "grad_norm": 1.818206787109375, + "learning_rate": 7.930955588307975e-06, + "loss": 0.726, "step": 6731 }, { - "epoch": 0.5094018387499527, - "grad_norm": 2.552865743637085, - "learning_rate": 9.463751264023028e-06, - "loss": 0.6306, + "epoch": 0.47391763463569164, + "grad_norm": 1.8935813903808594, + "learning_rate": 7.929373636500174e-06, + "loss": 0.6805, "step": 6732 }, { - "epoch": 0.5094775074722864, - "grad_norm": 2.403729200363159, - "learning_rate": 9.461502024340982e-06, - "loss": 0.6939, + "epoch": 0.4739880323829637, + "grad_norm": 1.8435977697372437, + "learning_rate": 7.927791636373398e-06, + "loss": 0.7083, "step": 6733 }, { - "epoch": 0.5095531761946199, - "grad_norm": 1.9017412662506104, - "learning_rate": 9.45925275575819e-06, - "loss": 0.8801, + "epoch": 0.47405843013023585, + "grad_norm": 1.6589529514312744, + "learning_rate": 7.926209588009896e-06, + "loss": 0.64, "step": 6734 }, { - "epoch": 0.5096288449169536, - "grad_norm": 2.5211994647979736, - "learning_rate": 9.457003458415504e-06, - "loss": 0.6766, + "epoch": 0.47412882787750793, + "grad_norm": 2.126020908355713, + "learning_rate": 7.924627491491917e-06, + "loss": 0.6489, "step": 6735 }, { - "epoch": 0.5097045136392871, - "grad_norm": 1.963262915611267, - "learning_rate": 9.454754132453783e-06, - "loss": 0.6982, + "epoch": 0.47419922562478, + "grad_norm": 2.018960952758789, + "learning_rate": 7.923045346901717e-06, + "loss": 0.6663, "step": 6736 }, { - "epoch": 0.5097801823616208, - "grad_norm": 2.2367377281188965, - "learning_rate": 9.452504778013888e-06, - "loss": 0.807, + "epoch": 0.4742696233720521, + "grad_norm": 1.7846347093582153, + "learning_rate": 7.921463154321553e-06, + "loss": 0.6368, "step": 6737 }, { - "epoch": 0.5098558510839545, - "grad_norm": 2.5869593620300293, - "learning_rate": 9.450255395236678e-06, - "loss": 0.7474, + "epoch": 0.47434002111932416, + "grad_norm": 1.669547438621521, + "learning_rate": 7.919880913833686e-06, + "loss": 0.7901, "step": 6738 }, { - "epoch": 0.509931519806288, - "grad_norm": 3.3216679096221924, - "learning_rate": 9.448005984263022e-06, - "loss": 0.581, + "epoch": 0.4744104188665963, + "grad_norm": 1.613043189048767, + "learning_rate": 7.918298625520379e-06, + "loss": 0.6952, "step": 6739 }, { - "epoch": 0.5100071885286217, - "grad_norm": 3.6230344772338867, - "learning_rate": 9.44575654523378e-06, - "loss": 0.7932, + "epoch": 0.4744808166138684, + "grad_norm": 2.6151413917541504, + "learning_rate": 7.916716289463891e-06, + "loss": 0.8331, "step": 6740 }, { - "epoch": 0.5100828572509554, - "grad_norm": 2.0393731594085693, - "learning_rate": 9.443507078289822e-06, - "loss": 0.7265, + "epoch": 0.47455121436114045, + "grad_norm": 1.2126036882400513, + "learning_rate": 7.915133905746495e-06, + "loss": 0.7747, "step": 6741 }, { - "epoch": 0.5101585259732889, - "grad_norm": 2.0096278190612793, - "learning_rate": 9.441257583572017e-06, - "loss": 0.7329, + "epoch": 0.4746216121084125, + "grad_norm": 1.7755547761917114, + "learning_rate": 7.91355147445046e-06, + "loss": 0.736, "step": 6742 }, { - "epoch": 0.5102341946956226, - "grad_norm": 2.389496088027954, - "learning_rate": 9.439008061221235e-06, - "loss": 0.752, + "epoch": 0.4746920098556846, + "grad_norm": 1.616078495979309, + "learning_rate": 7.911968995658053e-06, + "loss": 0.747, "step": 6743 }, { - "epoch": 0.5103098634179561, - "grad_norm": 2.3090922832489014, - "learning_rate": 9.436758511378348e-06, - "loss": 0.7655, + "epoch": 0.4747624076029567, + "grad_norm": 1.99058997631073, + "learning_rate": 7.910386469451553e-06, + "loss": 0.6266, "step": 6744 }, { - "epoch": 0.5103855321402898, - "grad_norm": 1.957068920135498, - "learning_rate": 9.434508934184228e-06, - "loss": 0.6015, + "epoch": 0.4748328053502288, + "grad_norm": 1.845469355583191, + "learning_rate": 7.90880389591324e-06, + "loss": 0.623, "step": 6745 }, { - "epoch": 0.5104612008626235, - "grad_norm": 2.256654739379883, - "learning_rate": 9.432259329779758e-06, - "loss": 0.7901, + "epoch": 0.4749032030975009, + "grad_norm": 1.6575201749801636, + "learning_rate": 7.907221275125385e-06, + "loss": 0.688, "step": 6746 }, { - "epoch": 0.510536869584957, - "grad_norm": 2.0302345752716064, - "learning_rate": 9.430009698305804e-06, - "loss": 0.8028, + "epoch": 0.47497360084477297, + "grad_norm": 1.8003562688827515, + "learning_rate": 7.905638607170274e-06, + "loss": 0.6812, "step": 6747 }, { - "epoch": 0.5106125383072907, - "grad_norm": 2.3182132244110107, - "learning_rate": 9.427760039903258e-06, - "loss": 0.6554, + "epoch": 0.47504399859204505, + "grad_norm": 1.8818820714950562, + "learning_rate": 7.904055892130196e-06, + "loss": 0.7288, "step": 6748 }, { - "epoch": 0.5106882070296243, - "grad_norm": 2.3157958984375, - "learning_rate": 9.425510354712992e-06, - "loss": 0.6733, + "epoch": 0.4751143963393171, + "grad_norm": 1.7230887413024902, + "learning_rate": 7.902473130087429e-06, + "loss": 0.7646, "step": 6749 }, { - "epoch": 0.5107638757519579, - "grad_norm": 1.7436319589614868, - "learning_rate": 9.423260642875892e-06, - "loss": 0.7008, + "epoch": 0.4751847940865892, + "grad_norm": 1.739432692527771, + "learning_rate": 7.900890321124266e-06, + "loss": 0.7681, "step": 6750 }, { - "epoch": 0.5108395444742916, - "grad_norm": 2.456576108932495, - "learning_rate": 9.421010904532843e-06, - "loss": 0.5907, + "epoch": 0.47525519183386133, + "grad_norm": 1.7356570959091187, + "learning_rate": 7.899307465323004e-06, + "loss": 0.6426, "step": 6751 }, { - "epoch": 0.5109152131966251, - "grad_norm": 2.08577561378479, - "learning_rate": 9.418761139824726e-06, - "loss": 0.6606, + "epoch": 0.4753255895811334, + "grad_norm": 1.8385905027389526, + "learning_rate": 7.897724562765928e-06, + "loss": 0.7409, "step": 6752 }, { - "epoch": 0.5109908819189588, - "grad_norm": 2.5024051666259766, - "learning_rate": 9.416511348892434e-06, - "loss": 0.7737, + "epoch": 0.4753959873284055, + "grad_norm": 2.226341724395752, + "learning_rate": 7.896141613535342e-06, + "loss": 0.6909, "step": 6753 }, { - "epoch": 0.5110665506412925, - "grad_norm": 2.1575140953063965, - "learning_rate": 9.414261531876855e-06, - "loss": 0.7219, + "epoch": 0.47546638507567757, + "grad_norm": 1.5906906127929688, + "learning_rate": 7.894558617713541e-06, + "loss": 0.6834, "step": 6754 }, { - "epoch": 0.511142219363626, - "grad_norm": 1.9586470127105713, - "learning_rate": 9.412011688918878e-06, - "loss": 0.8176, + "epoch": 0.47553678282294964, + "grad_norm": 1.6431958675384521, + "learning_rate": 7.892975575382827e-06, + "loss": 0.608, "step": 6755 }, { - "epoch": 0.5112178880859597, - "grad_norm": 2.153289556503296, - "learning_rate": 9.409761820159396e-06, - "loss": 0.8414, + "epoch": 0.4756071805702218, + "grad_norm": 1.5227729082107544, + "learning_rate": 7.891392486625504e-06, + "loss": 0.625, "step": 6756 }, { - "epoch": 0.5112935568082932, - "grad_norm": 2.560614585876465, - "learning_rate": 9.407511925739306e-06, - "loss": 0.6309, + "epoch": 0.47567757831749385, + "grad_norm": 1.677726149559021, + "learning_rate": 7.88980935152388e-06, + "loss": 0.7254, "step": 6757 }, { - "epoch": 0.5113692255306269, - "grad_norm": 2.1728456020355225, - "learning_rate": 9.405262005799498e-06, - "loss": 0.8317, + "epoch": 0.47574797606476593, + "grad_norm": 1.5039305686950684, + "learning_rate": 7.888226170160259e-06, + "loss": 0.6764, "step": 6758 }, { - "epoch": 0.5114448942529606, - "grad_norm": 2.0352866649627686, - "learning_rate": 9.403012060480872e-06, - "loss": 0.6428, + "epoch": 0.475818373812038, + "grad_norm": 1.617849588394165, + "learning_rate": 7.886642942616956e-06, + "loss": 0.774, "step": 6759 }, { - "epoch": 0.5115205629752941, - "grad_norm": 2.638589859008789, - "learning_rate": 9.400762089924329e-06, - "loss": 0.7381, + "epoch": 0.4758887715593101, + "grad_norm": 1.7383544445037842, + "learning_rate": 7.885059668976284e-06, + "loss": 0.6313, "step": 6760 }, { - "epoch": 0.5115962316976278, - "grad_norm": 2.571859836578369, - "learning_rate": 9.398512094270768e-06, - "loss": 0.7103, + "epoch": 0.47595916930658216, + "grad_norm": 1.7849175930023193, + "learning_rate": 7.883476349320558e-06, + "loss": 0.7509, "step": 6761 }, { - "epoch": 0.5116719004199614, - "grad_norm": 1.9200884103775024, - "learning_rate": 9.396262073661092e-06, - "loss": 0.7302, + "epoch": 0.4760295670538543, + "grad_norm": 1.8035820722579956, + "learning_rate": 7.881892983732096e-06, + "loss": 0.7083, "step": 6762 }, { - "epoch": 0.511747569142295, - "grad_norm": 2.4631552696228027, - "learning_rate": 9.394012028236199e-06, - "loss": 0.6403, + "epoch": 0.4760999648011264, + "grad_norm": 2.0053060054779053, + "learning_rate": 7.880309572293221e-06, + "loss": 0.6441, "step": 6763 }, { - "epoch": 0.5118232378646287, - "grad_norm": 2.1766726970672607, - "learning_rate": 9.391761958137e-06, - "loss": 0.8078, + "epoch": 0.47617036254839845, + "grad_norm": 1.58551025390625, + "learning_rate": 7.878726115086252e-06, + "loss": 0.7434, "step": 6764 }, { - "epoch": 0.5118989065869622, - "grad_norm": 1.9873024225234985, - "learning_rate": 9.389511863504403e-06, - "loss": 0.6108, + "epoch": 0.47624076029567053, + "grad_norm": 1.7277882099151611, + "learning_rate": 7.877142612193516e-06, + "loss": 0.7577, "step": 6765 }, { - "epoch": 0.5119745753092959, - "grad_norm": 2.2387278079986572, - "learning_rate": 9.38726174447931e-06, - "loss": 0.806, + "epoch": 0.4763111580429426, + "grad_norm": 1.681213617324829, + "learning_rate": 7.875559063697344e-06, + "loss": 0.7618, "step": 6766 }, { - "epoch": 0.5120502440316296, - "grad_norm": 2.1650137901306152, - "learning_rate": 9.385011601202637e-06, - "loss": 0.6439, + "epoch": 0.47638155579021474, + "grad_norm": 2.30100417137146, + "learning_rate": 7.873975469680062e-06, + "loss": 0.6727, "step": 6767 }, { - "epoch": 0.5121259127539631, - "grad_norm": 2.864428758621216, - "learning_rate": 9.382761433815289e-06, - "loss": 0.7591, + "epoch": 0.4764519535374868, + "grad_norm": 1.6710983514785767, + "learning_rate": 7.872391830224003e-06, + "loss": 0.7465, "step": 6768 }, { - "epoch": 0.5122015814762968, - "grad_norm": 2.0299293994903564, - "learning_rate": 9.380511242458185e-06, - "loss": 0.7405, + "epoch": 0.4765223512847589, + "grad_norm": 1.8669520616531372, + "learning_rate": 7.870808145411504e-06, + "loss": 0.7627, "step": 6769 }, { - "epoch": 0.5122772501986304, - "grad_norm": 2.2550573348999023, - "learning_rate": 9.378261027272231e-06, - "loss": 0.7065, + "epoch": 0.47659274903203097, + "grad_norm": 1.7266384363174438, + "learning_rate": 7.869224415324899e-06, + "loss": 0.6672, "step": 6770 }, { - "epoch": 0.512352918920964, - "grad_norm": 1.7595993280410767, - "learning_rate": 9.376010788398354e-06, - "loss": 0.7129, + "epoch": 0.47666314677930305, + "grad_norm": 1.5524400472640991, + "learning_rate": 7.86764064004653e-06, + "loss": 0.6605, "step": 6771 }, { - "epoch": 0.5124285876432977, - "grad_norm": 2.2322864532470703, - "learning_rate": 9.373760525977464e-06, - "loss": 0.7328, + "epoch": 0.4767335445265751, + "grad_norm": 1.888788104057312, + "learning_rate": 7.866056819658738e-06, + "loss": 0.6984, "step": 6772 }, { - "epoch": 0.5125042563656312, - "grad_norm": 1.9751027822494507, - "learning_rate": 9.37151024015048e-06, - "loss": 0.6518, + "epoch": 0.47680394227384726, + "grad_norm": 2.3503150939941406, + "learning_rate": 7.86447295424387e-06, + "loss": 0.7736, "step": 6773 }, { - "epoch": 0.5125799250879649, - "grad_norm": 2.4681308269500732, - "learning_rate": 9.369259931058326e-06, - "loss": 0.6701, + "epoch": 0.47687434002111934, + "grad_norm": 1.8432834148406982, + "learning_rate": 7.86288904388427e-06, + "loss": 0.7291, "step": 6774 }, { - "epoch": 0.5126555938102985, - "grad_norm": 2.20025372505188, - "learning_rate": 9.367009598841916e-06, - "loss": 0.7454, + "epoch": 0.4769447377683914, + "grad_norm": 1.5287115573883057, + "learning_rate": 7.861305088662283e-06, + "loss": 0.8011, "step": 6775 }, { - "epoch": 0.5127312625326321, - "grad_norm": 2.411095380783081, - "learning_rate": 9.36475924364218e-06, - "loss": 0.8019, + "epoch": 0.4770151355156635, + "grad_norm": 1.5108696222305298, + "learning_rate": 7.85972108866027e-06, + "loss": 0.662, "step": 6776 }, { - "epoch": 0.5128069312549658, - "grad_norm": 2.401850938796997, - "learning_rate": 9.362508865600039e-06, - "loss": 0.6141, + "epoch": 0.47708553326293557, + "grad_norm": 1.983817219734192, + "learning_rate": 7.858137043960574e-06, + "loss": 0.7181, "step": 6777 }, { - "epoch": 0.5128825999772993, - "grad_norm": 2.024711847305298, - "learning_rate": 9.360258464856422e-06, - "loss": 0.6127, + "epoch": 0.47715593101020765, + "grad_norm": 2.0147266387939453, + "learning_rate": 7.856552954645558e-06, + "loss": 0.6873, "step": 6778 }, { - "epoch": 0.512958268699633, - "grad_norm": 2.656041145324707, - "learning_rate": 9.358008041552254e-06, - "loss": 0.7047, + "epoch": 0.4772263287574798, + "grad_norm": 1.8152447938919067, + "learning_rate": 7.85496882079758e-06, + "loss": 0.6704, "step": 6779 }, { - "epoch": 0.5130339374219667, - "grad_norm": 2.6526777744293213, - "learning_rate": 9.355757595828464e-06, - "loss": 0.831, + "epoch": 0.47729672650475186, + "grad_norm": 1.962342619895935, + "learning_rate": 7.853384642498996e-06, + "loss": 0.7447, "step": 6780 }, { - "epoch": 0.5131096061443002, - "grad_norm": 2.158024787902832, - "learning_rate": 9.353507127825985e-06, - "loss": 0.6814, + "epoch": 0.47736712425202393, + "grad_norm": 1.7407258749008179, + "learning_rate": 7.851800419832172e-06, + "loss": 0.7036, "step": 6781 }, { - "epoch": 0.5131852748666339, - "grad_norm": 2.018216848373413, - "learning_rate": 9.351256637685745e-06, - "loss": 0.6801, + "epoch": 0.477437521999296, + "grad_norm": 1.9676868915557861, + "learning_rate": 7.850216152879474e-06, + "loss": 0.7059, "step": 6782 }, { - "epoch": 0.5132609435889675, - "grad_norm": 2.381420135498047, - "learning_rate": 9.349006125548676e-06, - "loss": 0.708, + "epoch": 0.4775079197465681, + "grad_norm": 1.8255327939987183, + "learning_rate": 7.848631841723267e-06, + "loss": 0.6588, "step": 6783 }, { - "epoch": 0.5133366123113011, - "grad_norm": 2.565141201019287, - "learning_rate": 9.346755591555718e-06, - "loss": 0.7645, + "epoch": 0.4775783174938402, + "grad_norm": 1.7491040229797363, + "learning_rate": 7.847047486445922e-06, + "loss": 0.6114, "step": 6784 }, { - "epoch": 0.5134122810336348, - "grad_norm": 2.3605058193206787, - "learning_rate": 9.344505035847804e-06, - "loss": 0.6184, + "epoch": 0.4776487152411123, + "grad_norm": 2.599726915359497, + "learning_rate": 7.845463087129812e-06, + "loss": 0.7537, "step": 6785 }, { - "epoch": 0.5134879497559683, - "grad_norm": 2.587228536605835, - "learning_rate": 9.342254458565865e-06, - "loss": 0.7598, + "epoch": 0.4777191129883844, + "grad_norm": 1.7656313180923462, + "learning_rate": 7.84387864385731e-06, + "loss": 0.6491, "step": 6786 }, { - "epoch": 0.513563618478302, - "grad_norm": 1.9587780237197876, - "learning_rate": 9.34000385985085e-06, - "loss": 0.7085, + "epoch": 0.47778951073565645, + "grad_norm": 1.9474107027053833, + "learning_rate": 7.842294156710792e-06, + "loss": 0.6144, "step": 6787 }, { - "epoch": 0.5136392872006356, - "grad_norm": 1.9432623386383057, - "learning_rate": 9.337753239843691e-06, - "loss": 0.6803, + "epoch": 0.47785990848292853, + "grad_norm": 1.653998851776123, + "learning_rate": 7.840709625772641e-06, + "loss": 0.7486, "step": 6788 }, { - "epoch": 0.5137149559229692, - "grad_norm": 2.544887065887451, - "learning_rate": 9.335502598685333e-06, - "loss": 0.8014, + "epoch": 0.4779303062302006, + "grad_norm": 1.4568196535110474, + "learning_rate": 7.83912505112523e-06, + "loss": 0.8302, "step": 6789 }, { - "epoch": 0.5137906246453029, - "grad_norm": 2.4957051277160645, - "learning_rate": 9.333251936516718e-06, - "loss": 0.7523, + "epoch": 0.47800070397747274, + "grad_norm": 2.057088613510132, + "learning_rate": 7.837540432850953e-06, + "loss": 0.751, "step": 6790 }, { - "epoch": 0.5138662933676365, - "grad_norm": 2.6802759170532227, - "learning_rate": 9.331001253478786e-06, - "loss": 0.6507, + "epoch": 0.4780711017247448, + "grad_norm": 1.8542979955673218, + "learning_rate": 7.835955771032187e-06, + "loss": 0.7578, "step": 6791 }, { - "epoch": 0.5139419620899701, - "grad_norm": 2.0066659450531006, - "learning_rate": 9.328750549712486e-06, - "loss": 0.675, + "epoch": 0.4781414994720169, + "grad_norm": 1.98606276512146, + "learning_rate": 7.834371065751324e-06, + "loss": 0.7466, "step": 6792 }, { - "epoch": 0.5140176308123038, - "grad_norm": 2.4365880489349365, - "learning_rate": 9.326499825358763e-06, - "loss": 0.6524, + "epoch": 0.478211897219289, + "grad_norm": 1.7949435710906982, + "learning_rate": 7.832786317090754e-06, + "loss": 0.8025, "step": 6793 }, { - "epoch": 0.5140932995346373, - "grad_norm": 1.9508459568023682, - "learning_rate": 9.324249080558565e-06, - "loss": 0.7767, + "epoch": 0.47828229496656105, + "grad_norm": 1.7889776229858398, + "learning_rate": 7.831201525132868e-06, + "loss": 0.6185, "step": 6794 }, { - "epoch": 0.514168968256971, - "grad_norm": 2.4417145252227783, - "learning_rate": 9.321998315452841e-06, - "loss": 0.7902, + "epoch": 0.4783526927138332, + "grad_norm": 2.0125715732574463, + "learning_rate": 7.829616689960063e-06, + "loss": 0.6421, "step": 6795 }, { - "epoch": 0.5142446369793046, - "grad_norm": 2.5067169666290283, - "learning_rate": 9.319747530182542e-06, - "loss": 0.7099, + "epoch": 0.47842309046110526, + "grad_norm": 1.8744618892669678, + "learning_rate": 7.828031811654738e-06, + "loss": 0.8033, "step": 6796 }, { - "epoch": 0.5143203057016382, - "grad_norm": 2.2060787677764893, - "learning_rate": 9.31749672488862e-06, - "loss": 0.6742, + "epoch": 0.47849348820837734, + "grad_norm": 1.6122593879699707, + "learning_rate": 7.826446890299284e-06, + "loss": 0.6939, "step": 6797 }, { - "epoch": 0.5143959744239719, - "grad_norm": 2.209672689437866, - "learning_rate": 9.315245899712022e-06, - "loss": 0.594, + "epoch": 0.4785638859556494, + "grad_norm": 1.66451895236969, + "learning_rate": 7.82486192597611e-06, + "loss": 0.7637, "step": 6798 }, { - "epoch": 0.5144716431463054, - "grad_norm": 2.720317840576172, - "learning_rate": 9.312995054793708e-06, - "loss": 0.8389, + "epoch": 0.4786342837029215, + "grad_norm": 1.9264154434204102, + "learning_rate": 7.823276918767618e-06, + "loss": 0.6622, "step": 6799 }, { - "epoch": 0.5145473118686391, - "grad_norm": 2.3275961875915527, - "learning_rate": 9.310744190274631e-06, - "loss": 0.7015, + "epoch": 0.47870468145019357, + "grad_norm": 1.8041987419128418, + "learning_rate": 7.821691868756214e-06, + "loss": 0.6672, "step": 6800 }, { - "epoch": 0.5146229805909727, - "grad_norm": 2.169250726699829, - "learning_rate": 9.308493306295748e-06, - "loss": 0.6436, + "epoch": 0.4787750791974657, + "grad_norm": 1.7057160139083862, + "learning_rate": 7.820106776024303e-06, + "loss": 0.6944, "step": 6801 }, { - "epoch": 0.5146986493133063, - "grad_norm": 2.38690185546875, - "learning_rate": 9.306242402998016e-06, - "loss": 0.8256, + "epoch": 0.4788454769447378, + "grad_norm": 2.0275118350982666, + "learning_rate": 7.8185216406543e-06, + "loss": 0.7639, "step": 6802 }, { - "epoch": 0.51477431803564, - "grad_norm": 2.143653631210327, - "learning_rate": 9.303991480522397e-06, - "loss": 0.727, + "epoch": 0.47891587469200986, + "grad_norm": 1.6468479633331299, + "learning_rate": 7.816936462728613e-06, + "loss": 0.643, "step": 6803 }, { - "epoch": 0.5148499867579736, - "grad_norm": 2.64727783203125, - "learning_rate": 9.301740539009845e-06, - "loss": 0.7609, + "epoch": 0.47898627243928193, + "grad_norm": 1.7893774509429932, + "learning_rate": 7.815351242329662e-06, + "loss": 0.7106, "step": 6804 }, { - "epoch": 0.5149256554803072, - "grad_norm": 2.108668565750122, - "learning_rate": 9.299489578601326e-06, - "loss": 0.7427, + "epoch": 0.479056670186554, + "grad_norm": 1.7952438592910767, + "learning_rate": 7.813765979539863e-06, + "loss": 0.6891, "step": 6805 }, { - "epoch": 0.5150013242026409, - "grad_norm": 2.263934850692749, - "learning_rate": 9.2972385994378e-06, - "loss": 0.7691, + "epoch": 0.47912706793382615, + "grad_norm": 2.086951494216919, + "learning_rate": 7.81218067444163e-06, + "loss": 0.688, "step": 6806 }, { - "epoch": 0.5150769929249744, - "grad_norm": 2.89751935005188, - "learning_rate": 9.294987601660231e-06, - "loss": 0.7268, + "epoch": 0.4791974656810982, + "grad_norm": 1.694872498512268, + "learning_rate": 7.81059532711739e-06, + "loss": 0.6482, "step": 6807 }, { - "epoch": 0.5151526616473081, - "grad_norm": 2.1842520236968994, - "learning_rate": 9.292736585409588e-06, - "loss": 0.7398, + "epoch": 0.4792678634283703, + "grad_norm": 1.5171092748641968, + "learning_rate": 7.809009937649566e-06, + "loss": 0.7119, "step": 6808 }, { - "epoch": 0.5152283303696417, - "grad_norm": 2.5846338272094727, - "learning_rate": 9.290485550826828e-06, - "loss": 0.7019, + "epoch": 0.4793382611756424, + "grad_norm": 1.6777969598770142, + "learning_rate": 7.80742450612058e-06, + "loss": 0.7743, "step": 6809 }, { - "epoch": 0.5153039990919753, - "grad_norm": 2.2591192722320557, - "learning_rate": 9.288234498052927e-06, - "loss": 0.7099, + "epoch": 0.47940865892291445, + "grad_norm": 1.7188845872879028, + "learning_rate": 7.805839032612863e-06, + "loss": 0.6551, "step": 6810 }, { - "epoch": 0.515379667814309, - "grad_norm": 2.2069311141967773, - "learning_rate": 9.285983427228849e-06, - "loss": 0.658, + "epoch": 0.47947905667018653, + "grad_norm": 2.0222718715667725, + "learning_rate": 7.804253517208848e-06, + "loss": 0.6913, "step": 6811 }, { - "epoch": 0.5154553365366425, - "grad_norm": 2.1312320232391357, - "learning_rate": 9.283732338495562e-06, - "loss": 0.7175, + "epoch": 0.47954945441745866, + "grad_norm": 1.732547402381897, + "learning_rate": 7.802667959990961e-06, + "loss": 0.7371, "step": 6812 }, { - "epoch": 0.5155310052589762, - "grad_norm": 3.7413275241851807, - "learning_rate": 9.28148123199404e-06, - "loss": 0.6437, + "epoch": 0.47961985216473074, + "grad_norm": 1.7483415603637695, + "learning_rate": 7.80108236104164e-06, + "loss": 0.7217, "step": 6813 }, { - "epoch": 0.5156066739813098, - "grad_norm": 2.0996272563934326, - "learning_rate": 9.27923010786525e-06, - "loss": 0.6743, + "epoch": 0.4796902499120028, + "grad_norm": 1.6961084604263306, + "learning_rate": 7.799496720443326e-06, + "loss": 0.6989, "step": 6814 }, { - "epoch": 0.5156823427036434, - "grad_norm": 2.2065136432647705, - "learning_rate": 9.27697896625017e-06, - "loss": 0.7956, + "epoch": 0.4797606476592749, + "grad_norm": 1.798318862915039, + "learning_rate": 7.797911038278448e-06, + "loss": 0.5606, "step": 6815 }, { - "epoch": 0.5157580114259771, - "grad_norm": 2.435471534729004, - "learning_rate": 9.274727807289772e-06, - "loss": 0.5638, + "epoch": 0.479831045406547, + "grad_norm": 1.5289281606674194, + "learning_rate": 7.796325314629453e-06, + "loss": 0.7196, "step": 6816 }, { - "epoch": 0.5158336801483107, - "grad_norm": 2.6238391399383545, - "learning_rate": 9.272476631125027e-06, - "loss": 0.8299, + "epoch": 0.47990144315381905, + "grad_norm": 3.0809807777404785, + "learning_rate": 7.794739549578786e-06, + "loss": 0.7038, "step": 6817 }, { - "epoch": 0.5159093488706443, - "grad_norm": 2.0177040100097656, - "learning_rate": 9.270225437896916e-06, - "loss": 0.7226, + "epoch": 0.4799718409010912, + "grad_norm": 1.6639717817306519, + "learning_rate": 7.79315374320889e-06, + "loss": 0.7085, "step": 6818 }, { - "epoch": 0.515985017592978, - "grad_norm": 2.575056552886963, - "learning_rate": 9.267974227746415e-06, - "loss": 0.6647, + "epoch": 0.48004223864836326, + "grad_norm": 1.9146907329559326, + "learning_rate": 7.791567895602211e-06, + "loss": 0.5771, "step": 6819 }, { - "epoch": 0.5160606863153115, - "grad_norm": 2.2004969120025635, - "learning_rate": 9.265723000814501e-06, - "loss": 0.739, + "epoch": 0.48011263639563534, + "grad_norm": 1.652601957321167, + "learning_rate": 7.789982006841203e-06, + "loss": 0.7023, "step": 6820 }, { - "epoch": 0.5161363550376452, - "grad_norm": 1.8651002645492554, - "learning_rate": 9.263471757242153e-06, - "loss": 0.6448, + "epoch": 0.4801830341429074, + "grad_norm": 1.6744959354400635, + "learning_rate": 7.788396077008311e-06, + "loss": 0.6755, "step": 6821 }, { - "epoch": 0.5162120237599788, - "grad_norm": 2.0674023628234863, - "learning_rate": 9.261220497170349e-06, - "loss": 0.6993, + "epoch": 0.4802534318901795, + "grad_norm": 1.8997304439544678, + "learning_rate": 7.786810106185995e-06, + "loss": 0.6364, "step": 6822 }, { - "epoch": 0.5162876924823124, - "grad_norm": 2.2824978828430176, - "learning_rate": 9.258969220740075e-06, - "loss": 0.7217, + "epoch": 0.4803238296374516, + "grad_norm": 1.894555926322937, + "learning_rate": 7.78522409445671e-06, + "loss": 0.7505, "step": 6823 }, { - "epoch": 0.5163633612046461, - "grad_norm": 2.708824634552002, - "learning_rate": 9.25671792809231e-06, - "loss": 0.6483, + "epoch": 0.4803942273847237, + "grad_norm": 1.6558303833007812, + "learning_rate": 7.783638041902911e-06, + "loss": 0.8226, "step": 6824 }, { - "epoch": 0.5164390299269797, - "grad_norm": 4.072855472564697, - "learning_rate": 9.254466619368038e-06, - "loss": 0.5936, + "epoch": 0.4804646251319958, + "grad_norm": 1.7750391960144043, + "learning_rate": 7.78205194860706e-06, + "loss": 0.6479, "step": 6825 }, { - "epoch": 0.5165146986493133, - "grad_norm": 8.13134765625, - "learning_rate": 9.252215294708247e-06, - "loss": 0.7496, + "epoch": 0.48053502287926786, + "grad_norm": 1.760072946548462, + "learning_rate": 7.780465814651622e-06, + "loss": 0.7342, "step": 6826 }, { - "epoch": 0.5165903673716469, - "grad_norm": 2.2548294067382812, - "learning_rate": 9.249963954253913e-06, - "loss": 0.5736, + "epoch": 0.48060542062653994, + "grad_norm": 1.8609955310821533, + "learning_rate": 7.778879640119062e-06, + "loss": 0.7848, "step": 6827 }, { - "epoch": 0.5166660360939805, - "grad_norm": 2.1357603073120117, - "learning_rate": 9.247712598146028e-06, - "loss": 0.7561, + "epoch": 0.480675818373812, + "grad_norm": 1.8693819046020508, + "learning_rate": 7.777293425091838e-06, + "loss": 0.8269, "step": 6828 }, { - "epoch": 0.5167417048163142, - "grad_norm": 1.8459572792053223, - "learning_rate": 9.245461226525584e-06, - "loss": 0.6388, + "epoch": 0.48074621612108415, + "grad_norm": 1.8851139545440674, + "learning_rate": 7.775707169652429e-06, + "loss": 0.7324, "step": 6829 }, { - "epoch": 0.5168173735386478, - "grad_norm": 2.7423040866851807, - "learning_rate": 9.24320983953356e-06, - "loss": 0.8378, + "epoch": 0.4808166138683562, + "grad_norm": 1.7135026454925537, + "learning_rate": 7.7741208738833e-06, + "loss": 0.6112, "step": 6830 }, { - "epoch": 0.5168930422609814, - "grad_norm": 2.403897523880005, - "learning_rate": 9.24095843731095e-06, - "loss": 0.603, + "epoch": 0.4808870116156283, + "grad_norm": 1.630720615386963, + "learning_rate": 7.772534537866926e-06, + "loss": 0.6855, "step": 6831 }, { - "epoch": 0.5169687109833151, - "grad_norm": 2.0380334854125977, - "learning_rate": 9.23870701999874e-06, - "loss": 0.6979, + "epoch": 0.4809574093629004, + "grad_norm": 1.844660997390747, + "learning_rate": 7.770948161685783e-06, + "loss": 0.6802, "step": 6832 }, { - "epoch": 0.5170443797056486, - "grad_norm": 2.8560407161712646, - "learning_rate": 9.23645558773793e-06, - "loss": 0.7479, + "epoch": 0.48102780711017246, + "grad_norm": 1.9692963361740112, + "learning_rate": 7.769361745422347e-06, + "loss": 0.7687, "step": 6833 }, { - "epoch": 0.5171200484279823, - "grad_norm": 2.2404606342315674, - "learning_rate": 9.234204140669502e-06, - "loss": 0.5442, + "epoch": 0.4810982048574446, + "grad_norm": 1.6308640241622925, + "learning_rate": 7.767775289159095e-06, + "loss": 0.6785, "step": 6834 }, { - "epoch": 0.5171957171503159, - "grad_norm": 1.846641182899475, - "learning_rate": 9.231952678934456e-06, - "loss": 0.8732, + "epoch": 0.48116860260471667, + "grad_norm": 1.6492557525634766, + "learning_rate": 7.76618879297851e-06, + "loss": 0.6873, "step": 6835 }, { - "epoch": 0.5172713858726495, - "grad_norm": 2.014122486114502, - "learning_rate": 9.229701202673781e-06, - "loss": 0.6284, + "epoch": 0.48123900035198874, + "grad_norm": 1.8350876569747925, + "learning_rate": 7.76460225696308e-06, + "loss": 0.7817, "step": 6836 }, { - "epoch": 0.5173470545949832, - "grad_norm": 1.9352043867111206, - "learning_rate": 9.227449712028475e-06, - "loss": 0.7152, + "epoch": 0.4813093980992608, + "grad_norm": 1.6560730934143066, + "learning_rate": 7.763015681195283e-06, + "loss": 0.7497, "step": 6837 }, { - "epoch": 0.5174227233173168, - "grad_norm": 2.4555435180664062, - "learning_rate": 9.225198207139533e-06, - "loss": 0.6324, + "epoch": 0.4813797958465329, + "grad_norm": 1.9248439073562622, + "learning_rate": 7.761429065757608e-06, + "loss": 0.677, "step": 6838 }, { - "epoch": 0.5174983920396504, - "grad_norm": 2.332766056060791, - "learning_rate": 9.222946688147949e-06, - "loss": 0.668, + "epoch": 0.481450193593805, + "grad_norm": 2.6765871047973633, + "learning_rate": 7.759842410732549e-06, + "loss": 0.9269, "step": 6839 }, { - "epoch": 0.5175740607619841, - "grad_norm": 2.4688680171966553, - "learning_rate": 9.220695155194724e-06, - "loss": 0.7283, + "epoch": 0.4815205913410771, + "grad_norm": 2.0128135681152344, + "learning_rate": 7.758255716202593e-06, + "loss": 0.7108, "step": 6840 }, { - "epoch": 0.5176497294843176, - "grad_norm": 2.0865702629089355, - "learning_rate": 9.218443608420855e-06, - "loss": 0.6561, + "epoch": 0.4815909890883492, + "grad_norm": 2.12467098236084, + "learning_rate": 7.756668982250238e-06, + "loss": 0.7843, "step": 6841 }, { - "epoch": 0.5177253982066513, - "grad_norm": 1.7171027660369873, - "learning_rate": 9.216192047967337e-06, - "loss": 0.537, + "epoch": 0.48166138683562126, + "grad_norm": 1.8050010204315186, + "learning_rate": 7.755082208957975e-06, + "loss": 0.686, "step": 6842 }, { - "epoch": 0.5178010669289849, - "grad_norm": 2.2573938369750977, - "learning_rate": 9.213940473975178e-06, - "loss": 0.7413, + "epoch": 0.48173178458289334, + "grad_norm": 1.8978787660598755, + "learning_rate": 7.753495396408307e-06, + "loss": 0.7196, "step": 6843 }, { - "epoch": 0.5178767356513185, - "grad_norm": 1.9172108173370361, - "learning_rate": 9.211688886585373e-06, - "loss": 0.5654, + "epoch": 0.4818021823301654, + "grad_norm": 2.349475383758545, + "learning_rate": 7.75190854468373e-06, + "loss": 0.7066, "step": 6844 }, { - "epoch": 0.5179524043736522, - "grad_norm": 2.324502468109131, - "learning_rate": 9.209437285938926e-06, - "loss": 0.6488, + "epoch": 0.4818725800774375, + "grad_norm": 1.5959168672561646, + "learning_rate": 7.750321653866745e-06, + "loss": 0.7257, "step": 6845 }, { - "epoch": 0.5180280730959858, - "grad_norm": 1.878939151763916, - "learning_rate": 9.207185672176837e-06, - "loss": 0.7264, + "epoch": 0.48194297782470963, + "grad_norm": 2.206106662750244, + "learning_rate": 7.748734724039857e-06, + "loss": 0.6676, "step": 6846 }, { - "epoch": 0.5181037418183194, - "grad_norm": 2.321331262588501, - "learning_rate": 9.204934045440111e-06, - "loss": 0.8104, + "epoch": 0.4820133755719817, + "grad_norm": 1.589002251625061, + "learning_rate": 7.747147755285577e-06, + "loss": 0.7494, "step": 6847 }, { - "epoch": 0.518179410540653, - "grad_norm": 2.2414441108703613, - "learning_rate": 9.202682405869753e-06, - "loss": 0.8987, + "epoch": 0.4820837733192538, + "grad_norm": 1.5332880020141602, + "learning_rate": 7.745560747686406e-06, + "loss": 0.6531, "step": 6848 }, { - "epoch": 0.5182550792629866, - "grad_norm": 2.0336124897003174, - "learning_rate": 9.20043075360677e-06, - "loss": 0.7724, + "epoch": 0.48215417106652586, + "grad_norm": 1.626341462135315, + "learning_rate": 7.743973701324854e-06, + "loss": 0.6427, "step": 6849 }, { - "epoch": 0.5183307479853203, - "grad_norm": 2.182743787765503, - "learning_rate": 9.198179088792159e-06, - "loss": 0.8737, + "epoch": 0.48222456881379794, + "grad_norm": 1.674383282661438, + "learning_rate": 7.74238661628344e-06, + "loss": 0.6109, "step": 6850 }, { - "epoch": 0.5184064167076539, - "grad_norm": 1.6060758829116821, - "learning_rate": 9.195927411566938e-06, - "loss": 0.6383, + "epoch": 0.48229496656107007, + "grad_norm": 1.8084361553192139, + "learning_rate": 7.74079949264467e-06, + "loss": 0.6185, "step": 6851 }, { - "epoch": 0.5184820854299875, - "grad_norm": 1.7125988006591797, - "learning_rate": 9.193675722072106e-06, - "loss": 0.6993, + "epoch": 0.48236536430834215, + "grad_norm": 1.7718815803527832, + "learning_rate": 7.739212330491066e-06, + "loss": 0.6956, "step": 6852 }, { - "epoch": 0.5185577541523212, - "grad_norm": 2.322448968887329, - "learning_rate": 9.191424020448673e-06, - "loss": 0.7036, + "epoch": 0.4824357620556142, + "grad_norm": 2.0361151695251465, + "learning_rate": 7.737625129905142e-06, + "loss": 0.6327, "step": 6853 }, { - "epoch": 0.5186334228746547, - "grad_norm": 2.939868688583374, - "learning_rate": 9.189172306837653e-06, - "loss": 0.6713, + "epoch": 0.4825061598028863, + "grad_norm": 1.7237802743911743, + "learning_rate": 7.73603789096942e-06, + "loss": 0.6821, "step": 6854 }, { - "epoch": 0.5187090915969884, - "grad_norm": 4.6412811279296875, - "learning_rate": 9.186920581380045e-06, - "loss": 0.6601, + "epoch": 0.4825765575501584, + "grad_norm": 1.6389198303222656, + "learning_rate": 7.73445061376642e-06, + "loss": 0.745, "step": 6855 }, { - "epoch": 0.518784760319322, - "grad_norm": 7.659454822540283, - "learning_rate": 9.184668844216872e-06, - "loss": 0.8464, + "epoch": 0.48264695529743046, + "grad_norm": 3.436924934387207, + "learning_rate": 7.732863298378671e-06, + "loss": 0.6952, "step": 6856 }, { - "epoch": 0.5188604290416556, - "grad_norm": 6.378112316131592, - "learning_rate": 9.182417095489135e-06, - "loss": 0.7449, + "epoch": 0.4827173530447026, + "grad_norm": 2.052011013031006, + "learning_rate": 7.731275944888692e-06, + "loss": 0.6784, "step": 6857 }, { - "epoch": 0.5189360977639893, - "grad_norm": 1.7814310789108276, - "learning_rate": 9.180165335337848e-06, - "loss": 0.7629, + "epoch": 0.48278775079197467, + "grad_norm": 2.1205978393554688, + "learning_rate": 7.729688553379015e-06, + "loss": 0.6754, "step": 6858 }, { - "epoch": 0.5190117664863229, - "grad_norm": 2.039429187774658, - "learning_rate": 9.177913563904029e-06, - "loss": 0.7624, + "epoch": 0.48285814853924675, + "grad_norm": 1.6376497745513916, + "learning_rate": 7.728101123932171e-06, + "loss": 0.656, "step": 6859 }, { - "epoch": 0.5190874352086565, - "grad_norm": 2.5181028842926025, - "learning_rate": 9.175661781328684e-06, - "loss": 0.709, + "epoch": 0.4829285462865188, + "grad_norm": 1.5531662702560425, + "learning_rate": 7.726513656630688e-06, + "loss": 0.641, "step": 6860 }, { - "epoch": 0.5191631039309901, - "grad_norm": 2.226398229598999, - "learning_rate": 9.173409987752834e-06, - "loss": 0.6182, + "epoch": 0.4829989440337909, + "grad_norm": 1.5313804149627686, + "learning_rate": 7.724926151557102e-06, + "loss": 0.7632, "step": 6861 }, { - "epoch": 0.5192387726533237, - "grad_norm": 1.9586197137832642, - "learning_rate": 9.171158183317486e-06, - "loss": 0.6997, + "epoch": 0.48306934178106303, + "grad_norm": 1.958071231842041, + "learning_rate": 7.72333860879395e-06, + "loss": 0.6493, "step": 6862 }, { - "epoch": 0.5193144413756574, - "grad_norm": 2.897284984588623, - "learning_rate": 9.16890636816366e-06, - "loss": 0.8039, + "epoch": 0.4831397395283351, + "grad_norm": 1.802428960800171, + "learning_rate": 7.72175102842377e-06, + "loss": 0.635, "step": 6863 }, { - "epoch": 0.519390110097991, - "grad_norm": 2.2634551525115967, - "learning_rate": 9.166654542432372e-06, - "loss": 0.7075, + "epoch": 0.4832101372756072, + "grad_norm": 1.6875947713851929, + "learning_rate": 7.720163410529097e-06, + "loss": 0.6771, "step": 6864 }, { - "epoch": 0.5194657788203246, - "grad_norm": 3.2109482288360596, - "learning_rate": 9.164402706264635e-06, - "loss": 0.6153, + "epoch": 0.48328053502287927, + "grad_norm": 2.0611698627471924, + "learning_rate": 7.718575755192478e-06, + "loss": 0.6702, "step": 6865 }, { - "epoch": 0.5195414475426583, - "grad_norm": 2.3147075176239014, - "learning_rate": 9.16215085980147e-06, - "loss": 0.7481, + "epoch": 0.48335093277015134, + "grad_norm": 1.6572887897491455, + "learning_rate": 7.716988062496454e-06, + "loss": 0.6124, "step": 6866 }, { - "epoch": 0.5196171162649919, - "grad_norm": 5.927982330322266, - "learning_rate": 9.159899003183894e-06, - "loss": 0.6095, + "epoch": 0.4834213305174234, + "grad_norm": 1.5559699535369873, + "learning_rate": 7.71540033252357e-06, + "loss": 0.7831, "step": 6867 }, { - "epoch": 0.5196927849873255, - "grad_norm": 1.7811951637268066, - "learning_rate": 9.157647136552926e-06, - "loss": 0.8572, + "epoch": 0.48349172826469555, + "grad_norm": 1.7959307432174683, + "learning_rate": 7.71381256535638e-06, + "loss": 0.6801, "step": 6868 }, { - "epoch": 0.5197684537096591, - "grad_norm": 1.9003556966781616, - "learning_rate": 9.155395260049584e-06, - "loss": 0.7132, + "epoch": 0.48356212601196763, + "grad_norm": 1.8208023309707642, + "learning_rate": 7.712224761077424e-06, + "loss": 0.7323, "step": 6869 }, { - "epoch": 0.5198441224319927, - "grad_norm": 2.136584520339966, - "learning_rate": 9.153143373814887e-06, - "loss": 0.685, + "epoch": 0.4836325237592397, + "grad_norm": 1.8582745790481567, + "learning_rate": 7.710636919769258e-06, + "loss": 0.6894, "step": 6870 }, { - "epoch": 0.5199197911543264, - "grad_norm": 3.125502109527588, - "learning_rate": 9.150891477989858e-06, - "loss": 0.8047, + "epoch": 0.4837029215065118, + "grad_norm": 1.5149494409561157, + "learning_rate": 7.709049041514436e-06, + "loss": 0.7089, "step": 6871 }, { - "epoch": 0.51999545987666, - "grad_norm": 3.0765082836151123, - "learning_rate": 9.148639572715517e-06, - "loss": 0.5735, + "epoch": 0.48377331925378386, + "grad_norm": 1.8558614253997803, + "learning_rate": 7.70746112639551e-06, + "loss": 0.6119, "step": 6872 }, { - "epoch": 0.5200711285989936, - "grad_norm": 2.166370153427124, - "learning_rate": 9.146387658132881e-06, - "loss": 0.8376, + "epoch": 0.48384371700105594, + "grad_norm": 1.5925757884979248, + "learning_rate": 7.705873174495041e-06, + "loss": 0.5966, "step": 6873 }, { - "epoch": 0.5201467973213272, - "grad_norm": 2.2477190494537354, - "learning_rate": 9.144135734382983e-06, - "loss": 0.7244, + "epoch": 0.4839141147483281, + "grad_norm": 2.1561038494110107, + "learning_rate": 7.704285185895587e-06, + "loss": 0.7329, "step": 6874 }, { - "epoch": 0.5202224660436608, - "grad_norm": 2.6588950157165527, - "learning_rate": 9.141883801606836e-06, - "loss": 0.6419, + "epoch": 0.48398451249560015, + "grad_norm": 1.8409711122512817, + "learning_rate": 7.702697160679709e-06, + "loss": 0.6189, "step": 6875 }, { - "epoch": 0.5202981347659945, - "grad_norm": 2.5737180709838867, - "learning_rate": 9.139631859945466e-06, - "loss": 0.8748, + "epoch": 0.4840549102428722, + "grad_norm": 2.340301036834717, + "learning_rate": 7.701109098929966e-06, + "loss": 0.7196, "step": 6876 }, { - "epoch": 0.5203738034883281, - "grad_norm": 1.9617338180541992, - "learning_rate": 9.137379909539897e-06, - "loss": 0.7089, + "epoch": 0.4841253079901443, + "grad_norm": 1.6583633422851562, + "learning_rate": 7.69952100072893e-06, + "loss": 0.6701, "step": 6877 }, { - "epoch": 0.5204494722106617, - "grad_norm": 2.4707319736480713, - "learning_rate": 9.135127950531153e-06, - "loss": 0.6812, + "epoch": 0.4841957057374164, + "grad_norm": 1.9281015396118164, + "learning_rate": 7.697932866159162e-06, + "loss": 0.7482, "step": 6878 }, { - "epoch": 0.5205251409329954, - "grad_norm": 1.5463811159133911, - "learning_rate": 9.132875983060262e-06, - "loss": 0.8144, + "epoch": 0.4842661034846885, + "grad_norm": 1.9697331190109253, + "learning_rate": 7.69634469530323e-06, + "loss": 0.5796, "step": 6879 }, { - "epoch": 0.520600809655329, - "grad_norm": 2.1439154148101807, - "learning_rate": 9.130624007268247e-06, - "loss": 0.7185, + "epoch": 0.4843365012319606, + "grad_norm": 2.0149383544921875, + "learning_rate": 7.694756488243707e-06, + "loss": 0.7254, "step": 6880 }, { - "epoch": 0.5206764783776626, - "grad_norm": 2.178358554840088, - "learning_rate": 9.128372023296132e-06, - "loss": 0.7119, + "epoch": 0.48440689897923267, + "grad_norm": 1.7193833589553833, + "learning_rate": 7.693168245063168e-06, + "loss": 0.6366, "step": 6881 }, { - "epoch": 0.5207521470999962, - "grad_norm": 2.2699880599975586, - "learning_rate": 9.126120031284947e-06, - "loss": 0.7, + "epoch": 0.48447729672650475, + "grad_norm": 2.179192304611206, + "learning_rate": 7.69157996584418e-06, + "loss": 0.7634, "step": 6882 }, { - "epoch": 0.5208278158223298, - "grad_norm": 2.0759565830230713, - "learning_rate": 9.123868031375716e-06, - "loss": 0.6736, + "epoch": 0.4845476944737768, + "grad_norm": 1.7407256364822388, + "learning_rate": 7.689991650669327e-06, + "loss": 0.6231, "step": 6883 }, { - "epoch": 0.5209034845446635, - "grad_norm": 2.5509610176086426, - "learning_rate": 9.121616023709473e-06, - "loss": 0.8851, + "epoch": 0.4846180922210489, + "grad_norm": 1.9604324102401733, + "learning_rate": 7.688403299621184e-06, + "loss": 0.6798, "step": 6884 }, { - "epoch": 0.5209791532669971, - "grad_norm": 2.314539909362793, - "learning_rate": 9.119364008427239e-06, - "loss": 0.8735, + "epoch": 0.48468848996832103, + "grad_norm": 1.6788311004638672, + "learning_rate": 7.686814912782327e-06, + "loss": 0.6692, "step": 6885 }, { - "epoch": 0.5210548219893307, - "grad_norm": 1.9383503198623657, - "learning_rate": 9.117111985670043e-06, - "loss": 0.7673, + "epoch": 0.4847588877155931, + "grad_norm": 1.578857660293579, + "learning_rate": 7.685226490235341e-06, + "loss": 0.6573, "step": 6886 }, { - "epoch": 0.5211304907116643, - "grad_norm": 2.7739522457122803, - "learning_rate": 9.114859955578916e-06, - "loss": 0.736, + "epoch": 0.4848292854628652, + "grad_norm": 1.9187318086624146, + "learning_rate": 7.68363803206281e-06, + "loss": 0.7205, "step": 6887 }, { - "epoch": 0.521206159433998, - "grad_norm": 1.9783974885940552, - "learning_rate": 9.112607918294887e-06, - "loss": 0.868, + "epoch": 0.48489968321013727, + "grad_norm": 1.5444285869598389, + "learning_rate": 7.682049538347318e-06, + "loss": 0.645, "step": 6888 }, { - "epoch": 0.5212818281563316, - "grad_norm": 2.2085728645324707, - "learning_rate": 9.110355873958987e-06, - "loss": 0.7107, + "epoch": 0.48497008095740934, + "grad_norm": 1.7999136447906494, + "learning_rate": 7.680461009171453e-06, + "loss": 0.7056, "step": 6889 }, { - "epoch": 0.5213574968786652, - "grad_norm": 2.447173833847046, - "learning_rate": 9.108103822712246e-06, - "loss": 0.7465, + "epoch": 0.4850404787046815, + "grad_norm": 1.5475364923477173, + "learning_rate": 7.678872444617803e-06, + "loss": 0.6521, "step": 6890 }, { - "epoch": 0.5214331656009988, - "grad_norm": 2.1307895183563232, - "learning_rate": 9.105851764695691e-06, - "loss": 0.6948, + "epoch": 0.48511087645195355, + "grad_norm": 1.6403950452804565, + "learning_rate": 7.67728384476896e-06, + "loss": 0.7125, "step": 6891 }, { - "epoch": 0.5215088343233325, - "grad_norm": 2.2277281284332275, - "learning_rate": 9.103599700050358e-06, - "loss": 0.6859, + "epoch": 0.48518127419922563, + "grad_norm": 1.7675493955612183, + "learning_rate": 7.675695209707519e-06, + "loss": 0.6563, "step": 6892 }, { - "epoch": 0.5215845030456661, - "grad_norm": 1.6344878673553467, - "learning_rate": 9.101347628917278e-06, - "loss": 0.6701, + "epoch": 0.4852516719464977, + "grad_norm": 2.0272488594055176, + "learning_rate": 7.67410653951607e-06, + "loss": 0.6811, "step": 6893 }, { - "epoch": 0.5216601717679997, - "grad_norm": 2.285489797592163, - "learning_rate": 9.09909555143748e-06, - "loss": 0.5954, + "epoch": 0.4853220696937698, + "grad_norm": 2.0071732997894287, + "learning_rate": 7.672517834277212e-06, + "loss": 0.7213, "step": 6894 }, { - "epoch": 0.5217358404903333, - "grad_norm": 2.104238986968994, - "learning_rate": 9.096843467751999e-06, - "loss": 0.6608, + "epoch": 0.48539246744104186, + "grad_norm": 1.946776270866394, + "learning_rate": 7.670929094073543e-06, + "loss": 0.6716, "step": 6895 }, { - "epoch": 0.521811509212667, - "grad_norm": 2.4383769035339355, - "learning_rate": 9.094591378001864e-06, - "loss": 0.7292, + "epoch": 0.485462865188314, + "grad_norm": 2.1968894004821777, + "learning_rate": 7.669340318987662e-06, + "loss": 0.6047, "step": 6896 }, { - "epoch": 0.5218871779350006, - "grad_norm": 2.041077136993408, - "learning_rate": 9.092339282328115e-06, - "loss": 0.7528, + "epoch": 0.4855332629355861, + "grad_norm": 1.7178544998168945, + "learning_rate": 7.66775150910217e-06, + "loss": 0.6137, "step": 6897 }, { - "epoch": 0.5219628466573342, - "grad_norm": 2.0433545112609863, - "learning_rate": 9.09008718087178e-06, - "loss": 0.5971, + "epoch": 0.48560366068285815, + "grad_norm": 1.840998888015747, + "learning_rate": 7.666162664499677e-06, + "loss": 0.6572, "step": 6898 }, { - "epoch": 0.5220385153796678, - "grad_norm": 2.3326566219329834, - "learning_rate": 9.087835073773893e-06, - "loss": 0.6639, + "epoch": 0.48567405843013023, + "grad_norm": 1.6250286102294922, + "learning_rate": 7.66457378526278e-06, + "loss": 0.6604, "step": 6899 }, { - "epoch": 0.5221141841020014, - "grad_norm": 1.7605972290039062, - "learning_rate": 9.085582961175493e-06, - "loss": 0.5656, + "epoch": 0.4857444561774023, + "grad_norm": 1.6378756761550903, + "learning_rate": 7.66298487147409e-06, + "loss": 0.7363, "step": 6900 }, { - "epoch": 0.522189852824335, - "grad_norm": 2.2532219886779785, - "learning_rate": 9.083330843217606e-06, - "loss": 0.6726, + "epoch": 0.4858148539246744, + "grad_norm": 1.3954441547393799, + "learning_rate": 7.66139592321622e-06, + "loss": 0.7974, "step": 6901 }, { - "epoch": 0.5222655215466687, - "grad_norm": 2.456960678100586, - "learning_rate": 9.081078720041277e-06, - "loss": 0.8113, + "epoch": 0.4858852516719465, + "grad_norm": 2.1520020961761475, + "learning_rate": 7.659806940571774e-06, + "loss": 0.6705, "step": 6902 }, { - "epoch": 0.5223411902690023, - "grad_norm": 2.177849292755127, - "learning_rate": 9.078826591787532e-06, - "loss": 0.6898, + "epoch": 0.4859556494192186, + "grad_norm": 1.862487554550171, + "learning_rate": 7.658217923623368e-06, + "loss": 0.6746, "step": 6903 }, { - "epoch": 0.5224168589913359, - "grad_norm": 2.2699694633483887, - "learning_rate": 9.076574458597413e-06, - "loss": 0.7005, + "epoch": 0.48602604716649067, + "grad_norm": 1.9999544620513916, + "learning_rate": 7.656628872453614e-06, + "loss": 0.5797, "step": 6904 }, { - "epoch": 0.5224925277136696, - "grad_norm": 2.6185245513916016, - "learning_rate": 9.074322320611954e-06, - "loss": 0.6497, + "epoch": 0.48609644491376275, + "grad_norm": 2.0365641117095947, + "learning_rate": 7.655039787145133e-06, + "loss": 0.8008, "step": 6905 }, { - "epoch": 0.5225681964360032, - "grad_norm": 2.110748767852783, - "learning_rate": 9.07207017797219e-06, - "loss": 0.8304, + "epoch": 0.4861668426610348, + "grad_norm": 1.676377296447754, + "learning_rate": 7.653450667780539e-06, + "loss": 0.8005, "step": 6906 }, { - "epoch": 0.5226438651583368, - "grad_norm": 2.2064247131347656, - "learning_rate": 9.069818030819162e-06, - "loss": 0.714, + "epoch": 0.48623724040830696, + "grad_norm": 2.526132106781006, + "learning_rate": 7.651861514442454e-06, + "loss": 0.6714, "step": 6907 }, { - "epoch": 0.5227195338806704, - "grad_norm": 1.88336181640625, - "learning_rate": 9.067565879293898e-06, - "loss": 0.8837, + "epoch": 0.48630763815557904, + "grad_norm": 1.9236596822738647, + "learning_rate": 7.650272327213497e-06, + "loss": 0.6981, "step": 6908 }, { - "epoch": 0.522795202603004, - "grad_norm": 2.301023244857788, - "learning_rate": 9.065313723537443e-06, - "loss": 0.8356, + "epoch": 0.4863780359028511, + "grad_norm": 1.7638916969299316, + "learning_rate": 7.648683106176293e-06, + "loss": 0.7815, "step": 6909 }, { - "epoch": 0.5228708713253377, - "grad_norm": 2.310317039489746, - "learning_rate": 9.06306156369083e-06, - "loss": 0.8133, + "epoch": 0.4864484336501232, + "grad_norm": 1.848482370376587, + "learning_rate": 7.647093851413469e-06, + "loss": 0.8307, "step": 6910 }, { - "epoch": 0.5229465400476713, - "grad_norm": 2.0655336380004883, - "learning_rate": 9.060809399895099e-06, - "loss": 0.6615, + "epoch": 0.48651883139739527, + "grad_norm": 1.8696870803833008, + "learning_rate": 7.645504563007647e-06, + "loss": 0.6834, "step": 6911 }, { - "epoch": 0.5230222087700049, - "grad_norm": 2.006929874420166, - "learning_rate": 9.058557232291283e-06, - "loss": 0.6776, + "epoch": 0.48658922914466735, + "grad_norm": 1.6231231689453125, + "learning_rate": 7.64391524104146e-06, + "loss": 0.6434, "step": 6912 }, { - "epoch": 0.5230978774923385, - "grad_norm": 2.7849390506744385, - "learning_rate": 9.05630506102043e-06, - "loss": 0.6462, + "epoch": 0.4866596268919395, + "grad_norm": 1.7697995901107788, + "learning_rate": 7.642325885597535e-06, + "loss": 0.5593, "step": 6913 }, { - "epoch": 0.5231735462146722, - "grad_norm": 1.9213398694992065, - "learning_rate": 9.054052886223568e-06, - "loss": 0.6398, + "epoch": 0.48673002463921156, + "grad_norm": 1.7313284873962402, + "learning_rate": 7.640736496758506e-06, + "loss": 0.677, "step": 6914 }, { - "epoch": 0.5232492149370058, - "grad_norm": 2.0991053581237793, - "learning_rate": 9.051800708041741e-06, - "loss": 0.6703, + "epoch": 0.48680042238648363, + "grad_norm": 1.7075622081756592, + "learning_rate": 7.639147074607006e-06, + "loss": 0.7872, "step": 6915 }, { - "epoch": 0.5233248836593394, - "grad_norm": 1.9303232431411743, - "learning_rate": 9.049548526615986e-06, - "loss": 0.7027, + "epoch": 0.4868708201337557, + "grad_norm": 2.933905601501465, + "learning_rate": 7.637557619225672e-06, + "loss": 0.8435, "step": 6916 }, { - "epoch": 0.523400552381673, - "grad_norm": 1.928173303604126, - "learning_rate": 9.04729634208734e-06, - "loss": 0.7511, + "epoch": 0.4869412178810278, + "grad_norm": 1.4933298826217651, + "learning_rate": 7.63596813069714e-06, + "loss": 0.6623, "step": 6917 }, { - "epoch": 0.5234762211040067, - "grad_norm": 2.1134893894195557, - "learning_rate": 9.045044154596846e-06, - "loss": 0.6348, + "epoch": 0.4870116156282999, + "grad_norm": 1.659057378768921, + "learning_rate": 7.634378609104046e-06, + "loss": 0.6736, "step": 6918 }, { - "epoch": 0.5235518898263403, - "grad_norm": 2.236544609069824, - "learning_rate": 9.04279196428554e-06, - "loss": 0.7813, + "epoch": 0.487082013375572, + "grad_norm": 1.8535329103469849, + "learning_rate": 7.632789054529038e-06, + "loss": 0.678, "step": 6919 }, { - "epoch": 0.5236275585486739, - "grad_norm": 2.3019282817840576, - "learning_rate": 9.040539771294464e-06, - "loss": 0.8056, + "epoch": 0.4871524111228441, + "grad_norm": 1.8676859140396118, + "learning_rate": 7.631199467054751e-06, + "loss": 0.6323, "step": 6920 }, { - "epoch": 0.5237032272710075, - "grad_norm": 2.164768934249878, - "learning_rate": 9.038287575764656e-06, - "loss": 0.7114, + "epoch": 0.48722280887011615, + "grad_norm": 1.7391880750656128, + "learning_rate": 7.629609846763832e-06, + "loss": 0.7512, "step": 6921 }, { - "epoch": 0.5237788959933412, - "grad_norm": 2.2749412059783936, - "learning_rate": 9.036035377837156e-06, - "loss": 0.8045, + "epoch": 0.48729320661738823, + "grad_norm": 1.8021104335784912, + "learning_rate": 7.628020193738928e-06, + "loss": 0.6357, "step": 6922 }, { - "epoch": 0.5238545647156748, - "grad_norm": 1.8921995162963867, - "learning_rate": 9.033783177653006e-06, - "loss": 0.9092, + "epoch": 0.4873636043646603, + "grad_norm": 1.9736992120742798, + "learning_rate": 7.626430508062684e-06, + "loss": 0.6906, "step": 6923 }, { - "epoch": 0.5239302334380084, - "grad_norm": 1.9506926536560059, - "learning_rate": 9.03153097535324e-06, - "loss": 0.6368, + "epoch": 0.48743400211193244, + "grad_norm": 1.762525200843811, + "learning_rate": 7.624840789817751e-06, + "loss": 0.6331, "step": 6924 }, { - "epoch": 0.524005902160342, - "grad_norm": 2.1694705486297607, - "learning_rate": 9.029278771078905e-06, - "loss": 0.809, + "epoch": 0.4875043998592045, + "grad_norm": 1.7422032356262207, + "learning_rate": 7.6232510390867805e-06, + "loss": 0.5511, "step": 6925 }, { - "epoch": 0.5240815708826756, - "grad_norm": 4.481318950653076, - "learning_rate": 9.02702656497104e-06, - "loss": 0.7019, + "epoch": 0.4875747976064766, + "grad_norm": 1.761390209197998, + "learning_rate": 7.6216612559524225e-06, + "loss": 0.6981, "step": 6926 }, { - "epoch": 0.5241572396050093, - "grad_norm": 1.8642576932907104, - "learning_rate": 9.024774357170681e-06, - "loss": 0.7031, + "epoch": 0.4876451953537487, + "grad_norm": 1.712830662727356, + "learning_rate": 7.620071440497334e-06, + "loss": 0.7867, "step": 6927 }, { - "epoch": 0.5242329083273429, - "grad_norm": 1.5886856317520142, - "learning_rate": 9.022522147818873e-06, - "loss": 0.6229, + "epoch": 0.48771559310102075, + "grad_norm": 1.6549065113067627, + "learning_rate": 7.61848159280417e-06, + "loss": 0.7743, "step": 6928 }, { - "epoch": 0.5243085770496765, - "grad_norm": 2.0907092094421387, - "learning_rate": 9.020269937056657e-06, - "loss": 0.6835, + "epoch": 0.4877859908482928, + "grad_norm": 1.8456015586853027, + "learning_rate": 7.616891712955587e-06, + "loss": 0.7167, "step": 6929 }, { - "epoch": 0.5243842457720101, - "grad_norm": 2.4200832843780518, - "learning_rate": 9.01801772502507e-06, - "loss": 0.8053, + "epoch": 0.48785638859556496, + "grad_norm": 1.6480827331542969, + "learning_rate": 7.615301801034245e-06, + "loss": 0.684, "step": 6930 }, { - "epoch": 0.5244599144943438, - "grad_norm": 1.7767586708068848, - "learning_rate": 9.015765511865156e-06, - "loss": 0.8483, + "epoch": 0.48792678634283704, + "grad_norm": 2.430166244506836, + "learning_rate": 7.613711857122806e-06, + "loss": 0.6773, "step": 6931 }, { - "epoch": 0.5245355832166774, - "grad_norm": 1.9560530185699463, - "learning_rate": 9.013513297717954e-06, - "loss": 0.7639, + "epoch": 0.4879971840901091, + "grad_norm": 1.9230659008026123, + "learning_rate": 7.612121881303932e-06, + "loss": 0.6003, "step": 6932 }, { - "epoch": 0.524611251939011, - "grad_norm": 2.2985103130340576, - "learning_rate": 9.011261082724503e-06, - "loss": 0.7953, + "epoch": 0.4880675818373812, + "grad_norm": 1.657749891281128, + "learning_rate": 7.610531873660285e-06, + "loss": 0.6788, "step": 6933 }, { - "epoch": 0.5246869206613446, - "grad_norm": 2.1248481273651123, - "learning_rate": 9.00900886702585e-06, - "loss": 0.7215, + "epoch": 0.48813797958465327, + "grad_norm": 1.602983832359314, + "learning_rate": 7.608941834274533e-06, + "loss": 0.646, "step": 6934 }, { - "epoch": 0.5247625893836783, - "grad_norm": 1.9643497467041016, - "learning_rate": 9.006756650763031e-06, - "loss": 0.6989, + "epoch": 0.4882083773319254, + "grad_norm": 1.9395595788955688, + "learning_rate": 7.607351763229346e-06, + "loss": 0.7313, "step": 6935 }, { - "epoch": 0.5248382581060119, - "grad_norm": 2.1842288970947266, - "learning_rate": 9.004504434077092e-06, - "loss": 0.7278, + "epoch": 0.4882787750791975, + "grad_norm": 1.9432252645492554, + "learning_rate": 7.605761660607389e-06, + "loss": 0.6917, "step": 6936 }, { - "epoch": 0.5249139268283455, - "grad_norm": 2.345261812210083, - "learning_rate": 9.002252217109065e-06, - "loss": 0.7213, + "epoch": 0.48834917282646956, + "grad_norm": 1.5621521472930908, + "learning_rate": 7.604171526491336e-06, + "loss": 0.6543, "step": 6937 }, { - "epoch": 0.5249895955506791, - "grad_norm": 2.2348873615264893, - "learning_rate": 9e-06, - "loss": 0.6579, + "epoch": 0.48841957057374163, + "grad_norm": 1.8826910257339478, + "learning_rate": 7.602581360963857e-06, + "loss": 0.6058, "step": 6938 }, { - "epoch": 0.5250652642730127, - "grad_norm": 2.263463258743286, - "learning_rate": 8.997747782890936e-06, - "loss": 0.7455, + "epoch": 0.4884899683210137, + "grad_norm": 1.727573037147522, + "learning_rate": 7.600991164107628e-06, + "loss": 0.6908, "step": 6939 }, { - "epoch": 0.5251409329953464, - "grad_norm": 2.1686911582946777, - "learning_rate": 8.995495565922914e-06, - "loss": 0.7449, + "epoch": 0.4885603660682858, + "grad_norm": 1.6985597610473633, + "learning_rate": 7.599400936005324e-06, + "loss": 0.7127, "step": 6940 }, { - "epoch": 0.52521660171768, - "grad_norm": 2.4088521003723145, - "learning_rate": 8.99324334923697e-06, - "loss": 0.7396, + "epoch": 0.4886307638155579, + "grad_norm": 1.7893575429916382, + "learning_rate": 7.597810676739622e-06, + "loss": 0.7239, "step": 6941 }, { - "epoch": 0.5252922704400136, - "grad_norm": 2.31754207611084, - "learning_rate": 8.99099113297415e-06, - "loss": 0.6586, + "epoch": 0.48870116156283, + "grad_norm": 1.8316552639007568, + "learning_rate": 7.596220386393201e-06, + "loss": 0.7303, "step": 6942 }, { - "epoch": 0.5253679391623473, - "grad_norm": 1.726416826248169, - "learning_rate": 8.988738917275497e-06, - "loss": 0.8426, + "epoch": 0.4887715593101021, + "grad_norm": 1.83077871799469, + "learning_rate": 7.594630065048743e-06, + "loss": 0.776, "step": 6943 }, { - "epoch": 0.5254436078846809, - "grad_norm": 2.5192909240722656, - "learning_rate": 8.986486702282048e-06, - "loss": 0.7568, + "epoch": 0.48884195705737415, + "grad_norm": 2.4817512035369873, + "learning_rate": 7.593039712788929e-06, + "loss": 0.7137, "step": 6944 }, { - "epoch": 0.5255192766070145, - "grad_norm": 2.0664658546447754, - "learning_rate": 8.98423448813485e-06, - "loss": 0.5892, + "epoch": 0.48891235480464623, + "grad_norm": 1.9120087623596191, + "learning_rate": 7.591449329696444e-06, + "loss": 0.6934, "step": 6945 }, { - "epoch": 0.5255949453293481, - "grad_norm": 3.337775230407715, - "learning_rate": 8.981982274974932e-06, - "loss": 0.7521, + "epoch": 0.48898275255191836, + "grad_norm": 1.8362674713134766, + "learning_rate": 7.589858915853973e-06, + "loss": 0.5655, "step": 6946 }, { - "epoch": 0.5256706140516817, - "grad_norm": 2.2166309356689453, - "learning_rate": 8.979730062943344e-06, - "loss": 0.8307, + "epoch": 0.48905315029919044, + "grad_norm": 1.8294564485549927, + "learning_rate": 7.588268471344202e-06, + "loss": 0.6199, "step": 6947 }, { - "epoch": 0.5257462827740154, - "grad_norm": 1.8791322708129883, - "learning_rate": 8.977477852181128e-06, - "loss": 0.6808, + "epoch": 0.4891235480464625, + "grad_norm": 1.6041282415390015, + "learning_rate": 7.586677996249819e-06, + "loss": 0.6743, "step": 6948 }, { - "epoch": 0.525821951496349, - "grad_norm": 2.134373664855957, - "learning_rate": 8.97522564282932e-06, - "loss": 0.7119, + "epoch": 0.4891939457937346, + "grad_norm": 1.961397647857666, + "learning_rate": 7.585087490653518e-06, + "loss": 0.6706, "step": 6949 }, { - "epoch": 0.5258976202186826, - "grad_norm": 2.081508159637451, - "learning_rate": 8.972973435028964e-06, - "loss": 0.6941, + "epoch": 0.4892643435410067, + "grad_norm": 1.68181574344635, + "learning_rate": 7.583496954637987e-06, + "loss": 0.6074, "step": 6950 }, { - "epoch": 0.5259732889410162, - "grad_norm": 2.069390296936035, - "learning_rate": 8.970721228921096e-06, - "loss": 0.7172, + "epoch": 0.48933474128827875, + "grad_norm": 1.444459319114685, + "learning_rate": 7.581906388285921e-06, + "loss": 0.7288, "step": 6951 }, { - "epoch": 0.5260489576633498, - "grad_norm": 2.029240369796753, - "learning_rate": 8.968469024646759e-06, - "loss": 0.7142, + "epoch": 0.4894051390355509, + "grad_norm": 1.653060793876648, + "learning_rate": 7.580315791680016e-06, + "loss": 0.6949, "step": 6952 }, { - "epoch": 0.5261246263856835, - "grad_norm": 2.3885130882263184, - "learning_rate": 8.966216822346996e-06, - "loss": 0.6241, + "epoch": 0.48947553678282296, + "grad_norm": 1.556343674659729, + "learning_rate": 7.578725164902966e-06, + "loss": 0.7237, "step": 6953 }, { - "epoch": 0.5262002951080171, - "grad_norm": 1.988101840019226, - "learning_rate": 8.963964622162846e-06, - "loss": 0.6023, + "epoch": 0.48954593453009504, + "grad_norm": 2.093841075897217, + "learning_rate": 7.577134508037472e-06, + "loss": 0.7744, "step": 6954 }, { - "epoch": 0.5262759638303507, - "grad_norm": 1.8047088384628296, - "learning_rate": 8.961712424235346e-06, - "loss": 0.6205, + "epoch": 0.4896163322773671, + "grad_norm": 1.8060798645019531, + "learning_rate": 7.575543821166231e-06, + "loss": 0.7314, "step": 6955 }, { - "epoch": 0.5263516325526844, - "grad_norm": 2.6779801845550537, - "learning_rate": 8.959460228705535e-06, - "loss": 0.9106, + "epoch": 0.4896867300246392, + "grad_norm": 2.0685713291168213, + "learning_rate": 7.573953104371947e-06, + "loss": 0.6582, "step": 6956 }, { - "epoch": 0.526427301275018, - "grad_norm": 2.134953022003174, - "learning_rate": 8.957208035714461e-06, - "loss": 0.7214, + "epoch": 0.48975712777191127, + "grad_norm": 1.8766186237335205, + "learning_rate": 7.572362357737319e-06, + "loss": 0.6442, "step": 6957 }, { - "epoch": 0.5265029699973516, - "grad_norm": 1.9478284120559692, - "learning_rate": 8.954955845403156e-06, - "loss": 0.8335, + "epoch": 0.4898275255191834, + "grad_norm": 1.9173319339752197, + "learning_rate": 7.570771581345056e-06, + "loss": 0.6777, "step": 6958 }, { - "epoch": 0.5265786387196852, - "grad_norm": 1.8003441095352173, - "learning_rate": 8.952703657912662e-06, - "loss": 0.7422, + "epoch": 0.4898979232664555, + "grad_norm": 1.9082928895950317, + "learning_rate": 7.56918077527786e-06, + "loss": 0.6737, "step": 6959 }, { - "epoch": 0.5266543074420188, - "grad_norm": 1.7551347017288208, - "learning_rate": 8.950451473384017e-06, - "loss": 0.6058, + "epoch": 0.48996832101372756, + "grad_norm": 2.4666640758514404, + "learning_rate": 7.56758993961844e-06, + "loss": 0.5912, "step": 6960 }, { - "epoch": 0.5267299761643525, - "grad_norm": 2.0923233032226562, - "learning_rate": 8.948199291958263e-06, - "loss": 0.8269, + "epoch": 0.49003871876099964, + "grad_norm": 2.418625831604004, + "learning_rate": 7.565999074449507e-06, + "loss": 0.7114, "step": 6961 }, { - "epoch": 0.5268056448866861, - "grad_norm": 1.9860175848007202, - "learning_rate": 8.945947113776432e-06, - "loss": 0.6191, + "epoch": 0.4901091165082717, + "grad_norm": 2.5650789737701416, + "learning_rate": 7.564408179853767e-06, + "loss": 0.7534, "step": 6962 }, { - "epoch": 0.5268813136090197, - "grad_norm": 2.2504024505615234, - "learning_rate": 8.943694938979572e-06, - "loss": 0.6805, + "epoch": 0.49017951425554385, + "grad_norm": 1.9662985801696777, + "learning_rate": 7.562817255913936e-06, + "loss": 0.6746, "step": 6963 }, { - "epoch": 0.5269569823313534, - "grad_norm": 2.547034978866577, - "learning_rate": 8.941442767708717e-06, - "loss": 0.6573, + "epoch": 0.4902499120028159, + "grad_norm": 2.0140910148620605, + "learning_rate": 7.561226302712726e-06, + "loss": 0.7799, "step": 6964 }, { - "epoch": 0.5270326510536869, - "grad_norm": 1.7897727489471436, - "learning_rate": 8.939190600104905e-06, - "loss": 0.7814, + "epoch": 0.490320309750088, + "grad_norm": 1.69057035446167, + "learning_rate": 7.559635320332852e-06, + "loss": 0.8064, "step": 6965 }, { - "epoch": 0.5271083197760206, - "grad_norm": 2.5294055938720703, - "learning_rate": 8.936938436309175e-06, - "loss": 0.7107, + "epoch": 0.4903907074973601, + "grad_norm": 1.8184431791305542, + "learning_rate": 7.558044308857031e-06, + "loss": 0.6659, "step": 6966 }, { - "epoch": 0.5271839884983542, - "grad_norm": 2.606584310531616, - "learning_rate": 8.934686276462558e-06, - "loss": 0.6996, + "epoch": 0.49046110524463216, + "grad_norm": 1.8598313331604004, + "learning_rate": 7.5564532683679815e-06, + "loss": 0.6189, "step": 6967 }, { - "epoch": 0.5272596572206878, - "grad_norm": 1.6937854290008545, - "learning_rate": 8.932434120706104e-06, - "loss": 0.836, + "epoch": 0.49053150299190423, + "grad_norm": 1.597071886062622, + "learning_rate": 7.5548621989484225e-06, + "loss": 0.7868, "step": 6968 }, { - "epoch": 0.5273353259430215, - "grad_norm": 2.6257095336914062, - "learning_rate": 8.93018196918084e-06, - "loss": 0.6491, + "epoch": 0.49060190073917637, + "grad_norm": 1.860912799835205, + "learning_rate": 7.553271100681074e-06, + "loss": 0.7072, "step": 6969 }, { - "epoch": 0.5274109946653551, - "grad_norm": 3.0788724422454834, - "learning_rate": 8.927929822027812e-06, - "loss": 0.6871, + "epoch": 0.49067229848644844, + "grad_norm": 1.5628958940505981, + "learning_rate": 7.551679973648661e-06, + "loss": 0.6853, "step": 6970 }, { - "epoch": 0.5274866633876887, - "grad_norm": 2.0726146697998047, - "learning_rate": 8.925677679388048e-06, - "loss": 0.7104, + "epoch": 0.4907426962337205, + "grad_norm": 2.131815195083618, + "learning_rate": 7.550088817933906e-06, + "loss": 0.7127, "step": 6971 }, { - "epoch": 0.5275623321100223, - "grad_norm": 2.652439832687378, - "learning_rate": 8.923425541402586e-06, - "loss": 0.6209, + "epoch": 0.4908130939809926, + "grad_norm": 2.1424057483673096, + "learning_rate": 7.548497633619535e-06, + "loss": 0.6896, "step": 6972 }, { - "epoch": 0.5276380008323559, - "grad_norm": 2.571249485015869, - "learning_rate": 8.921173408212468e-06, - "loss": 0.8608, + "epoch": 0.4908834917282647, + "grad_norm": 2.0388107299804688, + "learning_rate": 7.5469064207882765e-06, + "loss": 0.7331, "step": 6973 }, { - "epoch": 0.5277136695546896, - "grad_norm": 2.138179302215576, - "learning_rate": 8.918921279958725e-06, - "loss": 0.7663, + "epoch": 0.4909538894755368, + "grad_norm": 1.7795956134796143, + "learning_rate": 7.545315179522857e-06, + "loss": 0.6353, "step": 6974 }, { - "epoch": 0.5277893382770232, - "grad_norm": 2.010223388671875, - "learning_rate": 8.916669156782396e-06, - "loss": 0.7324, + "epoch": 0.4910242872228089, + "grad_norm": 2.7227673530578613, + "learning_rate": 7.543723909906007e-06, + "loss": 0.6363, "step": 6975 }, { - "epoch": 0.5278650069993568, - "grad_norm": 1.760847806930542, - "learning_rate": 8.914417038824511e-06, - "loss": 0.736, + "epoch": 0.49109468497008096, + "grad_norm": 2.2361767292022705, + "learning_rate": 7.54213261202046e-06, + "loss": 0.7322, "step": 6976 }, { - "epoch": 0.5279406757216905, - "grad_norm": 2.2697741985321045, - "learning_rate": 8.912164926226107e-06, - "loss": 0.868, + "epoch": 0.49116508271735304, + "grad_norm": 2.2187559604644775, + "learning_rate": 7.540541285948946e-06, + "loss": 0.7864, "step": 6977 }, { - "epoch": 0.528016344444024, - "grad_norm": 1.8922936916351318, - "learning_rate": 8.909912819128223e-06, - "loss": 0.5409, + "epoch": 0.4912354804646251, + "grad_norm": 1.861690878868103, + "learning_rate": 7.538949931774201e-06, + "loss": 0.626, "step": 6978 }, { - "epoch": 0.5280920131663577, - "grad_norm": 1.831129789352417, - "learning_rate": 8.907660717671887e-06, - "loss": 0.667, + "epoch": 0.4913058782118972, + "grad_norm": 1.9184237718582153, + "learning_rate": 7.537358549578963e-06, + "loss": 0.6707, "step": 6979 }, { - "epoch": 0.5281676818886913, - "grad_norm": 2.529381275177002, - "learning_rate": 8.905408621998138e-06, - "loss": 0.8239, + "epoch": 0.49137627595916933, + "grad_norm": 1.7319180965423584, + "learning_rate": 7.535767139445964e-06, + "loss": 0.7638, "step": 6980 }, { - "epoch": 0.5282433506110249, - "grad_norm": 2.6161415576934814, - "learning_rate": 8.903156532248005e-06, - "loss": 0.7192, + "epoch": 0.4914466737064414, + "grad_norm": 1.558050274848938, + "learning_rate": 7.534175701457948e-06, + "loss": 0.7383, "step": 6981 }, { - "epoch": 0.5283190193333586, - "grad_norm": 2.2429845333099365, - "learning_rate": 8.90090444856252e-06, - "loss": 0.6279, + "epoch": 0.4915170714537135, + "grad_norm": 1.718546986579895, + "learning_rate": 7.532584235697655e-06, + "loss": 0.6426, "step": 6982 }, { - "epoch": 0.5283946880556922, - "grad_norm": 2.3764171600341797, - "learning_rate": 8.898652371082722e-06, - "loss": 0.6857, + "epoch": 0.49158746920098556, + "grad_norm": 1.888731598854065, + "learning_rate": 7.530992742247823e-06, + "loss": 0.7611, "step": 6983 }, { - "epoch": 0.5284703567780258, - "grad_norm": 2.131578207015991, - "learning_rate": 8.896400299949642e-06, - "loss": 0.6394, + "epoch": 0.49165786694825764, + "grad_norm": 1.664847493171692, + "learning_rate": 7.529401221191198e-06, + "loss": 0.7214, "step": 6984 }, { - "epoch": 0.5285460255003595, - "grad_norm": 2.332397699356079, - "learning_rate": 8.894148235304309e-06, - "loss": 0.6967, + "epoch": 0.4917282646955297, + "grad_norm": 1.8096699714660645, + "learning_rate": 7.527809672610523e-06, + "loss": 0.7514, "step": 6985 }, { - "epoch": 0.528621694222693, - "grad_norm": 2.148642063140869, - "learning_rate": 8.891896177287758e-06, - "loss": 0.6876, + "epoch": 0.49179866244280185, + "grad_norm": 1.9542627334594727, + "learning_rate": 7.526218096588547e-06, + "loss": 0.7228, "step": 6986 }, { - "epoch": 0.5286973629450267, - "grad_norm": 2.2178142070770264, - "learning_rate": 8.889644126041014e-06, - "loss": 0.5579, + "epoch": 0.4918690601900739, + "grad_norm": 2.019768476486206, + "learning_rate": 7.524626493208015e-06, + "loss": 0.6225, "step": 6987 }, { - "epoch": 0.5287730316673603, - "grad_norm": 2.4631600379943848, - "learning_rate": 8.887392081705112e-06, - "loss": 0.596, + "epoch": 0.491939457937346, + "grad_norm": 2.2046096324920654, + "learning_rate": 7.523034862551677e-06, + "loss": 0.7635, "step": 6988 }, { - "epoch": 0.5288487003896939, - "grad_norm": 2.4894394874572754, - "learning_rate": 8.885140044421086e-06, - "loss": 0.7106, + "epoch": 0.4920098556846181, + "grad_norm": 2.4870824813842773, + "learning_rate": 7.521443204702283e-06, + "loss": 0.7522, "step": 6989 }, { - "epoch": 0.5289243691120276, - "grad_norm": 2.318631410598755, - "learning_rate": 8.88288801432996e-06, - "loss": 0.678, + "epoch": 0.49208025343189016, + "grad_norm": 2.8132431507110596, + "learning_rate": 7.519851519742582e-06, + "loss": 0.7137, "step": 6990 }, { - "epoch": 0.5290000378343611, - "grad_norm": 2.5202953815460205, - "learning_rate": 8.880635991572765e-06, - "loss": 0.8136, + "epoch": 0.4921506511791623, + "grad_norm": 2.0460615158081055, + "learning_rate": 7.5182598077553315e-06, + "loss": 0.6913, "step": 6991 }, { - "epoch": 0.5290757065566948, - "grad_norm": 2.017930746078491, - "learning_rate": 8.878383976290529e-06, - "loss": 0.7493, + "epoch": 0.49222104892643437, + "grad_norm": 2.3621294498443604, + "learning_rate": 7.516668068823283e-06, + "loss": 0.5993, "step": 6992 }, { - "epoch": 0.5291513752790284, - "grad_norm": 2.3018083572387695, - "learning_rate": 8.876131968624282e-06, - "loss": 0.6035, + "epoch": 0.49229144667370645, + "grad_norm": 1.657876968383789, + "learning_rate": 7.515076303029196e-06, + "loss": 0.6493, "step": 6993 }, { - "epoch": 0.529227044001362, - "grad_norm": 2.4072072505950928, - "learning_rate": 8.873879968715055e-06, - "loss": 0.772, + "epoch": 0.4923618444209785, + "grad_norm": 1.7639286518096924, + "learning_rate": 7.513484510455824e-06, + "loss": 0.6446, "step": 6994 }, { - "epoch": 0.5293027127236957, - "grad_norm": 3.478423595428467, - "learning_rate": 8.87162797670387e-06, - "loss": 0.6318, + "epoch": 0.4924322421682506, + "grad_norm": 1.784130334854126, + "learning_rate": 7.511892691185927e-06, + "loss": 0.6923, "step": 6995 }, { - "epoch": 0.5293783814460293, - "grad_norm": 1.5796644687652588, - "learning_rate": 8.869375992731757e-06, - "loss": 0.5953, + "epoch": 0.4925026399155227, + "grad_norm": 1.6949496269226074, + "learning_rate": 7.5103008453022666e-06, + "loss": 0.6911, "step": 6996 }, { - "epoch": 0.5294540501683629, - "grad_norm": 2.066748857498169, - "learning_rate": 8.867124016939742e-06, - "loss": 0.6553, + "epoch": 0.4925730376627948, + "grad_norm": 2.0152041912078857, + "learning_rate": 7.508708972887602e-06, + "loss": 0.6932, "step": 6997 }, { - "epoch": 0.5295297188906966, - "grad_norm": 2.06510591506958, - "learning_rate": 8.864872049468846e-06, - "loss": 0.6526, + "epoch": 0.4926434354100669, + "grad_norm": 2.474036931991577, + "learning_rate": 7.507117074024698e-06, + "loss": 0.7901, "step": 6998 }, { - "epoch": 0.5296053876130301, - "grad_norm": 2.0738534927368164, - "learning_rate": 8.862620090460104e-06, - "loss": 0.8406, + "epoch": 0.49271383315733897, + "grad_norm": 1.7222814559936523, + "learning_rate": 7.505525148796318e-06, + "loss": 0.678, "step": 6999 }, { - "epoch": 0.5296810563353638, - "grad_norm": 2.1421914100646973, - "learning_rate": 8.860368140054536e-06, - "loss": 0.714, + "epoch": 0.49278423090461104, + "grad_norm": 1.8365870714187622, + "learning_rate": 7.5039331972852285e-06, + "loss": 0.71, "step": 7000 }, { - "epoch": 0.5297567250576974, - "grad_norm": 1.9644984006881714, - "learning_rate": 8.858116198393166e-06, - "loss": 0.628, + "epoch": 0.4928546286518831, + "grad_norm": 2.084662675857544, + "learning_rate": 7.502341219574194e-06, + "loss": 0.7748, "step": 7001 }, { - "epoch": 0.529832393780031, - "grad_norm": 2.3363261222839355, - "learning_rate": 8.85586426561702e-06, - "loss": 0.8578, + "epoch": 0.49292502639915525, + "grad_norm": 1.9396532773971558, + "learning_rate": 7.500749215745985e-06, + "loss": 0.8144, "step": 7002 }, { - "epoch": 0.5299080625023647, - "grad_norm": 2.117711305618286, - "learning_rate": 8.853612341867116e-06, - "loss": 0.6332, + "epoch": 0.49299542414642733, + "grad_norm": 1.605641484260559, + "learning_rate": 7.499157185883372e-06, + "loss": 0.8003, "step": 7003 }, { - "epoch": 0.5299837312246982, - "grad_norm": 2.1709940433502197, - "learning_rate": 8.851360427284485e-06, - "loss": 0.7398, + "epoch": 0.4930658218936994, + "grad_norm": 1.9236613512039185, + "learning_rate": 7.4975651300691225e-06, + "loss": 0.6736, "step": 7004 }, { - "epoch": 0.5300593999470319, - "grad_norm": 1.8637499809265137, - "learning_rate": 8.849108522010144e-06, - "loss": 0.6726, + "epoch": 0.4931362196409715, + "grad_norm": 2.010066032409668, + "learning_rate": 7.495973048386013e-06, + "loss": 0.633, "step": 7005 }, { - "epoch": 0.5301350686693656, - "grad_norm": 2.101854085922241, - "learning_rate": 8.846856626185113e-06, - "loss": 0.6772, + "epoch": 0.49320661738824356, + "grad_norm": 2.140815258026123, + "learning_rate": 7.494380940916816e-06, + "loss": 0.7284, "step": 7006 }, { - "epoch": 0.5302107373916991, - "grad_norm": 1.8691213130950928, - "learning_rate": 8.84460473995042e-06, - "loss": 0.7488, + "epoch": 0.49327701513551564, + "grad_norm": 1.5142797231674194, + "learning_rate": 7.492788807744304e-06, + "loss": 0.6917, "step": 7007 }, { - "epoch": 0.5302864061140328, - "grad_norm": 2.332582473754883, - "learning_rate": 8.842352863447076e-06, - "loss": 0.6498, + "epoch": 0.4933474128827878, + "grad_norm": 1.7790122032165527, + "learning_rate": 7.491196648951256e-06, + "loss": 0.7207, "step": 7008 }, { - "epoch": 0.5303620748363664, - "grad_norm": 1.9999667406082153, - "learning_rate": 8.840100996816106e-06, - "loss": 0.6713, + "epoch": 0.49341781063005985, + "grad_norm": 1.7448272705078125, + "learning_rate": 7.489604464620452e-06, + "loss": 0.6599, "step": 7009 }, { - "epoch": 0.5304377435587, - "grad_norm": 2.319936752319336, - "learning_rate": 8.837849140198531e-06, - "loss": 0.6997, + "epoch": 0.4934882083773319, + "grad_norm": 1.745219111442566, + "learning_rate": 7.488012254834666e-06, + "loss": 0.715, "step": 7010 }, { - "epoch": 0.5305134122810337, - "grad_norm": 2.1495556831359863, - "learning_rate": 8.835597293735367e-06, - "loss": 0.7488, + "epoch": 0.493558606124604, + "grad_norm": 2.1835439205169678, + "learning_rate": 7.486420019676681e-06, + "loss": 0.6204, "step": 7011 }, { - "epoch": 0.5305890810033672, - "grad_norm": 1.9620660543441772, - "learning_rate": 8.833345457567632e-06, - "loss": 0.6522, + "epoch": 0.4936290038718761, + "grad_norm": 1.7460306882858276, + "learning_rate": 7.48482775922928e-06, + "loss": 0.7477, "step": 7012 }, { - "epoch": 0.5306647497257009, - "grad_norm": 1.9892656803131104, - "learning_rate": 8.83109363183634e-06, - "loss": 0.6923, + "epoch": 0.4936994016191482, + "grad_norm": 1.6942793130874634, + "learning_rate": 7.483235473575244e-06, + "loss": 0.7429, "step": 7013 }, { - "epoch": 0.5307404184480345, - "grad_norm": 1.7837767601013184, - "learning_rate": 8.828841816682515e-06, - "loss": 0.6985, + "epoch": 0.4937697993664203, + "grad_norm": 1.651778221130371, + "learning_rate": 7.481643162797356e-06, + "loss": 0.6965, "step": 7014 }, { - "epoch": 0.5308160871703681, - "grad_norm": 2.1873769760131836, - "learning_rate": 8.826590012247167e-06, - "loss": 0.7771, + "epoch": 0.49384019711369237, + "grad_norm": 1.656455397605896, + "learning_rate": 7.480050826978408e-06, + "loss": 0.6788, "step": 7015 }, { - "epoch": 0.5308917558927018, - "grad_norm": 2.4407780170440674, - "learning_rate": 8.824338218671317e-06, - "loss": 0.8917, + "epoch": 0.49391059486096445, + "grad_norm": 1.9954520463943481, + "learning_rate": 7.478458466201181e-06, + "loss": 0.7843, "step": 7016 }, { - "epoch": 0.5309674246150353, - "grad_norm": 2.155855178833008, - "learning_rate": 8.822086436095973e-06, - "loss": 0.7085, + "epoch": 0.4939809926082365, + "grad_norm": 1.6015024185180664, + "learning_rate": 7.476866080548464e-06, + "loss": 0.7366, "step": 7017 }, { - "epoch": 0.531043093337369, - "grad_norm": 2.3129708766937256, - "learning_rate": 8.819834664662149e-06, - "loss": 0.8051, + "epoch": 0.4940513903555086, + "grad_norm": 2.033125877380371, + "learning_rate": 7.475273670103047e-06, + "loss": 0.6911, "step": 7018 }, { - "epoch": 0.5311187620597027, - "grad_norm": 1.8992316722869873, - "learning_rate": 8.817582904510867e-06, - "loss": 0.624, + "epoch": 0.49412178810278073, + "grad_norm": 1.6838514804840088, + "learning_rate": 7.4736812349477244e-06, + "loss": 0.6515, "step": 7019 }, { - "epoch": 0.5311944307820362, - "grad_norm": 1.9501327276229858, - "learning_rate": 8.81533115578313e-06, - "loss": 0.7945, + "epoch": 0.4941921858500528, + "grad_norm": 1.943515658378601, + "learning_rate": 7.472088775165283e-06, + "loss": 0.6577, "step": 7020 }, { - "epoch": 0.5312700995043699, - "grad_norm": 2.9170970916748047, - "learning_rate": 8.813079418619955e-06, - "loss": 0.5384, + "epoch": 0.4942625835973249, + "grad_norm": 1.4152088165283203, + "learning_rate": 7.470496290838519e-06, + "loss": 0.6022, "step": 7021 }, { - "epoch": 0.5313457682267035, - "grad_norm": 1.9943617582321167, - "learning_rate": 8.81082769316235e-06, - "loss": 0.8106, + "epoch": 0.49433298134459697, + "grad_norm": 1.5996613502502441, + "learning_rate": 7.4689037820502275e-06, + "loss": 0.6404, "step": 7022 }, { - "epoch": 0.5314214369490371, - "grad_norm": 1.9144606590270996, - "learning_rate": 8.808575979551325e-06, - "loss": 0.8237, + "epoch": 0.49440337909186904, + "grad_norm": 1.835863709449768, + "learning_rate": 7.467311248883202e-06, + "loss": 0.7183, "step": 7023 }, { - "epoch": 0.5314971056713708, - "grad_norm": 2.214576482772827, - "learning_rate": 8.806324277927895e-06, - "loss": 0.6764, + "epoch": 0.4944737768391411, + "grad_norm": 1.6736773252487183, + "learning_rate": 7.465718691420244e-06, + "loss": 0.711, "step": 7024 }, { - "epoch": 0.5315727743937043, - "grad_norm": 2.135948657989502, - "learning_rate": 8.804072588433063e-06, - "loss": 0.5962, + "epoch": 0.49454417458641325, + "grad_norm": 1.7038923501968384, + "learning_rate": 7.464126109744148e-06, + "loss": 0.6618, "step": 7025 }, { - "epoch": 0.531648443116038, - "grad_norm": 2.7250607013702393, - "learning_rate": 8.801820911207842e-06, - "loss": 0.6888, + "epoch": 0.49461457233368533, + "grad_norm": 1.5762677192687988, + "learning_rate": 7.462533503937715e-06, + "loss": 0.6587, "step": 7026 }, { - "epoch": 0.5317241118383716, - "grad_norm": 3.235295534133911, - "learning_rate": 8.799569246393234e-06, - "loss": 0.6464, + "epoch": 0.4946849700809574, + "grad_norm": 1.9460824728012085, + "learning_rate": 7.460940874083747e-06, + "loss": 0.6836, "step": 7027 }, { - "epoch": 0.5317997805607052, - "grad_norm": 2.087533473968506, - "learning_rate": 8.797317594130245e-06, - "loss": 0.6293, + "epoch": 0.4947553678282295, + "grad_norm": 1.8446485996246338, + "learning_rate": 7.459348220265044e-06, + "loss": 0.7682, "step": 7028 }, { - "epoch": 0.5318754492830389, - "grad_norm": 5.911130905151367, - "learning_rate": 8.795065954559888e-06, - "loss": 0.7318, + "epoch": 0.49482576557550156, + "grad_norm": 1.7170295715332031, + "learning_rate": 7.457755542564412e-06, + "loss": 0.682, "step": 7029 }, { - "epoch": 0.5319511180053724, - "grad_norm": 1.9690461158752441, - "learning_rate": 8.792814327823165e-06, - "loss": 0.5787, + "epoch": 0.4948961633227737, + "grad_norm": 2.0113134384155273, + "learning_rate": 7.456162841064656e-06, + "loss": 0.7363, "step": 7030 }, { - "epoch": 0.5320267867277061, - "grad_norm": 2.165632486343384, - "learning_rate": 8.790562714061076e-06, - "loss": 0.7131, + "epoch": 0.4949665610700458, + "grad_norm": 1.9247658252716064, + "learning_rate": 7.454570115848579e-06, + "loss": 0.6938, "step": 7031 }, { - "epoch": 0.5321024554500398, - "grad_norm": 2.008331060409546, - "learning_rate": 8.78831111341463e-06, - "loss": 0.5816, + "epoch": 0.49503695881731785, + "grad_norm": 1.763378381729126, + "learning_rate": 7.45297736699899e-06, + "loss": 0.6336, "step": 7032 }, { - "epoch": 0.5321781241723733, - "grad_norm": 2.393400192260742, - "learning_rate": 8.786059526024823e-06, - "loss": 0.6825, + "epoch": 0.49510735656458993, + "grad_norm": 1.8367911577224731, + "learning_rate": 7.4513845945987e-06, + "loss": 0.5711, "step": 7033 }, { - "epoch": 0.532253792894707, - "grad_norm": 2.293968915939331, - "learning_rate": 8.78380795203266e-06, - "loss": 0.6761, + "epoch": 0.495177754311862, + "grad_norm": 1.6368985176086426, + "learning_rate": 7.449791798730513e-06, + "loss": 0.6775, "step": 7034 }, { - "epoch": 0.5323294616170406, - "grad_norm": 2.2142651081085205, - "learning_rate": 8.781556391579148e-06, - "loss": 0.7001, + "epoch": 0.4952481520591341, + "grad_norm": 1.787818193435669, + "learning_rate": 7.448198979477244e-06, + "loss": 0.6278, "step": 7035 }, { - "epoch": 0.5324051303393742, - "grad_norm": 1.9812933206558228, - "learning_rate": 8.779304844805278e-06, - "loss": 0.6993, + "epoch": 0.4953185498064062, + "grad_norm": 1.6443394422531128, + "learning_rate": 7.446606136921704e-06, + "loss": 0.7414, "step": 7036 }, { - "epoch": 0.5324807990617079, - "grad_norm": 1.889985203742981, - "learning_rate": 8.777053311852053e-06, - "loss": 0.7246, + "epoch": 0.4953889475536783, + "grad_norm": 1.5530809164047241, + "learning_rate": 7.445013271146707e-06, + "loss": 0.5143, "step": 7037 }, { - "epoch": 0.5325564677840414, - "grad_norm": 2.5295422077178955, - "learning_rate": 8.77480179286047e-06, - "loss": 0.5645, + "epoch": 0.49545934530095037, + "grad_norm": 1.9387260675430298, + "learning_rate": 7.443420382235066e-06, + "loss": 0.6558, "step": 7038 }, { - "epoch": 0.5326321365063751, - "grad_norm": 1.9612340927124023, - "learning_rate": 8.772550287971525e-06, - "loss": 0.6118, + "epoch": 0.49552974304822245, + "grad_norm": 2.3580756187438965, + "learning_rate": 7.441827470269599e-06, + "loss": 0.7611, "step": 7039 }, { - "epoch": 0.5327078052287088, - "grad_norm": 2.831002712249756, - "learning_rate": 8.77029879732622e-06, - "loss": 0.7933, + "epoch": 0.4956001407954945, + "grad_norm": 1.61228609085083, + "learning_rate": 7.44023453533312e-06, + "loss": 0.671, "step": 7040 }, { - "epoch": 0.5327834739510423, - "grad_norm": 2.5195281505584717, - "learning_rate": 8.768047321065547e-06, - "loss": 0.66, + "epoch": 0.49567053854276666, + "grad_norm": 1.9537783861160278, + "learning_rate": 7.43864157750845e-06, + "loss": 0.6387, "step": 7041 }, { - "epoch": 0.532859142673376, - "grad_norm": 2.25597882270813, - "learning_rate": 8.765795859330498e-06, - "loss": 0.5955, + "epoch": 0.49574093629003874, + "grad_norm": 1.6002590656280518, + "learning_rate": 7.437048596878408e-06, + "loss": 0.6516, "step": 7042 }, { - "epoch": 0.5329348113957095, - "grad_norm": 2.0232596397399902, - "learning_rate": 8.763544412262074e-06, - "loss": 0.7174, + "epoch": 0.4958113340373108, + "grad_norm": 1.6247228384017944, + "learning_rate": 7.435455593525813e-06, + "loss": 0.7215, "step": 7043 }, { - "epoch": 0.5330104801180432, - "grad_norm": 2.8326871395111084, - "learning_rate": 8.761292980001259e-06, - "loss": 0.6191, + "epoch": 0.4958817317845829, + "grad_norm": 1.460218071937561, + "learning_rate": 7.433862567533487e-06, + "loss": 0.7531, "step": 7044 }, { - "epoch": 0.5330861488403769, - "grad_norm": 2.0261335372924805, - "learning_rate": 8.759041562689053e-06, - "loss": 0.7789, + "epoch": 0.49595212953185497, + "grad_norm": 1.7335447072982788, + "learning_rate": 7.4322695189842546e-06, + "loss": 0.644, "step": 7045 }, { - "epoch": 0.5331618175627104, - "grad_norm": 2.5931825637817383, - "learning_rate": 8.756790160466445e-06, - "loss": 0.7508, + "epoch": 0.49602252727912705, + "grad_norm": 2.318429946899414, + "learning_rate": 7.430676447960935e-06, + "loss": 0.6135, "step": 7046 }, { - "epoch": 0.5332374862850441, - "grad_norm": 2.142396926879883, - "learning_rate": 8.75453877347442e-06, - "loss": 0.6213, + "epoch": 0.4960929250263992, + "grad_norm": 1.9972351789474487, + "learning_rate": 7.429083354546358e-06, + "loss": 0.6568, "step": 7047 }, { - "epoch": 0.5333131550073777, - "grad_norm": 2.075147867202759, - "learning_rate": 8.752287401853974e-06, - "loss": 0.5699, + "epoch": 0.49616332277367126, + "grad_norm": 1.7915931940078735, + "learning_rate": 7.4274902388233524e-06, + "loss": 0.7076, "step": 7048 }, { - "epoch": 0.5333888237297113, - "grad_norm": 2.620086908340454, - "learning_rate": 8.750036045746087e-06, - "loss": 0.6605, + "epoch": 0.49623372052094333, + "grad_norm": 1.8487696647644043, + "learning_rate": 7.425897100874738e-06, + "loss": 0.6255, "step": 7049 }, { - "epoch": 0.533464492452045, - "grad_norm": 2.613399028778076, - "learning_rate": 8.747784705291756e-06, - "loss": 0.7942, + "epoch": 0.4963041182682154, + "grad_norm": 2.192624092102051, + "learning_rate": 7.4243039407833485e-06, + "loss": 0.6028, "step": 7050 }, { - "epoch": 0.5335401611743785, - "grad_norm": 2.060634136199951, - "learning_rate": 8.745533380631963e-06, - "loss": 0.5541, + "epoch": 0.4963745160154875, + "grad_norm": 1.9773788452148438, + "learning_rate": 7.422710758632014e-06, + "loss": 0.6316, "step": 7051 }, { - "epoch": 0.5336158298967122, - "grad_norm": 1.947361946105957, - "learning_rate": 8.743282071907692e-06, - "loss": 0.8374, + "epoch": 0.49644491376275957, + "grad_norm": 1.8058468103408813, + "learning_rate": 7.421117554503564e-06, + "loss": 0.6866, "step": 7052 }, { - "epoch": 0.5336914986190459, - "grad_norm": 2.726940393447876, - "learning_rate": 8.741030779259927e-06, - "loss": 0.815, + "epoch": 0.4965153115100317, + "grad_norm": 1.97904634475708, + "learning_rate": 7.41952432848083e-06, + "loss": 0.6849, "step": 7053 }, { - "epoch": 0.5337671673413794, - "grad_norm": 2.2680883407592773, - "learning_rate": 8.738779502829651e-06, - "loss": 0.7913, + "epoch": 0.4965857092573038, + "grad_norm": 2.0759966373443604, + "learning_rate": 7.417931080646648e-06, + "loss": 0.6282, "step": 7054 }, { - "epoch": 0.5338428360637131, - "grad_norm": 1.992996096611023, - "learning_rate": 8.736528242757849e-06, - "loss": 0.6763, + "epoch": 0.49665610700457585, + "grad_norm": 2.0426182746887207, + "learning_rate": 7.416337811083849e-06, + "loss": 0.6633, "step": 7055 }, { - "epoch": 0.5339185047860466, - "grad_norm": 2.075242519378662, - "learning_rate": 8.7342769991855e-06, - "loss": 0.6267, + "epoch": 0.49672650475184793, + "grad_norm": 1.7802667617797852, + "learning_rate": 7.414744519875271e-06, + "loss": 0.621, "step": 7056 }, { - "epoch": 0.5339941735083803, - "grad_norm": 2.2018284797668457, - "learning_rate": 8.732025772253586e-06, - "loss": 0.7294, + "epoch": 0.49679690249912, + "grad_norm": 1.903968095779419, + "learning_rate": 7.41315120710375e-06, + "loss": 0.7276, "step": 7057 }, { - "epoch": 0.534069842230714, - "grad_norm": 1.962738275527954, - "learning_rate": 8.729774562103084e-06, - "loss": 0.6908, + "epoch": 0.49686730024639214, + "grad_norm": 1.7801588773727417, + "learning_rate": 7.411557872852122e-06, + "loss": 0.7051, "step": 7058 }, { - "epoch": 0.5341455109530475, - "grad_norm": 1.9997669458389282, - "learning_rate": 8.727523368874971e-06, - "loss": 0.9049, + "epoch": 0.4969376979936642, + "grad_norm": 2.7071752548217773, + "learning_rate": 7.409964517203229e-06, + "loss": 0.7603, "step": 7059 }, { - "epoch": 0.5342211796753812, - "grad_norm": 2.593151092529297, - "learning_rate": 8.725272192710229e-06, - "loss": 0.7374, + "epoch": 0.4970080957409363, + "grad_norm": 1.6559593677520752, + "learning_rate": 7.408371140239909e-06, + "loss": 0.6092, "step": 7060 }, { - "epoch": 0.5342968483977149, - "grad_norm": 1.8062269687652588, - "learning_rate": 8.72302103374983e-06, - "loss": 0.7373, + "epoch": 0.4970784934882084, + "grad_norm": 1.9004149436950684, + "learning_rate": 7.4067777420450045e-06, + "loss": 0.6834, "step": 7061 }, { - "epoch": 0.5343725171200484, - "grad_norm": 1.9314745664596558, - "learning_rate": 8.720769892134751e-06, - "loss": 0.7628, + "epoch": 0.49714889123548045, + "grad_norm": 2.0391743183135986, + "learning_rate": 7.405184322701355e-06, + "loss": 0.7472, "step": 7062 }, { - "epoch": 0.5344481858423821, - "grad_norm": 2.1951372623443604, - "learning_rate": 8.718518768005963e-06, - "loss": 0.8498, + "epoch": 0.4972192889827525, + "grad_norm": 1.5938483476638794, + "learning_rate": 7.4035908822918066e-06, + "loss": 0.6972, "step": 7063 }, { - "epoch": 0.5345238545647156, - "grad_norm": 3.0464980602264404, - "learning_rate": 8.716267661504437e-06, - "loss": 0.591, + "epoch": 0.49728968673002466, + "grad_norm": 1.5913429260253906, + "learning_rate": 7.4019974208992025e-06, + "loss": 0.6414, "step": 7064 }, { - "epoch": 0.5345995232870493, - "grad_norm": 2.5231964588165283, - "learning_rate": 8.714016572771154e-06, - "loss": 0.6656, + "epoch": 0.49736008447729674, + "grad_norm": 1.9855729341506958, + "learning_rate": 7.400403938606389e-06, + "loss": 0.6589, "step": 7065 }, { - "epoch": 0.534675192009383, - "grad_norm": 1.8425929546356201, - "learning_rate": 8.711765501947074e-06, - "loss": 0.608, + "epoch": 0.4974304822245688, + "grad_norm": 1.7035506963729858, + "learning_rate": 7.398810435496212e-06, + "loss": 0.8007, "step": 7066 }, { - "epoch": 0.5347508607317165, - "grad_norm": 2.2831332683563232, - "learning_rate": 8.709514449173173e-06, - "loss": 0.691, + "epoch": 0.4975008799718409, + "grad_norm": 2.010617971420288, + "learning_rate": 7.397216911651519e-06, + "loss": 0.6365, "step": 7067 }, { - "epoch": 0.5348265294540502, - "grad_norm": 1.8244147300720215, - "learning_rate": 8.707263414590416e-06, - "loss": 0.6913, + "epoch": 0.49757127771911297, + "grad_norm": 1.7975099086761475, + "learning_rate": 7.395623367155158e-06, + "loss": 0.7183, "step": 7068 }, { - "epoch": 0.5349021981763837, - "grad_norm": 1.8097063302993774, - "learning_rate": 8.705012398339768e-06, - "loss": 0.6704, + "epoch": 0.4976416754663851, + "grad_norm": 1.8351850509643555, + "learning_rate": 7.394029802089982e-06, + "loss": 0.7388, "step": 7069 }, { - "epoch": 0.5349778668987174, - "grad_norm": 1.9643720388412476, - "learning_rate": 8.7027614005622e-06, - "loss": 0.5699, + "epoch": 0.4977120732136572, + "grad_norm": 1.765515685081482, + "learning_rate": 7.392436216538837e-06, + "loss": 0.7655, "step": 7070 }, { - "epoch": 0.5350535356210511, - "grad_norm": 2.7243692874908447, - "learning_rate": 8.700510421398676e-06, - "loss": 0.7782, + "epoch": 0.49778247096092926, + "grad_norm": 1.8757396936416626, + "learning_rate": 7.390842610584579e-06, + "loss": 0.7279, "step": 7071 }, { - "epoch": 0.5351292043433846, - "grad_norm": 1.7081341743469238, - "learning_rate": 8.698259460990155e-06, - "loss": 0.7373, + "epoch": 0.49785286870820133, + "grad_norm": 2.2329275608062744, + "learning_rate": 7.389248984310062e-06, + "loss": 0.7842, "step": 7072 }, { - "epoch": 0.5352048730657183, - "grad_norm": 1.9497652053833008, - "learning_rate": 8.696008519477607e-06, - "loss": 0.5009, + "epoch": 0.4979232664554734, + "grad_norm": 1.792484164237976, + "learning_rate": 7.387655337798135e-06, + "loss": 0.7458, "step": 7073 }, { - "epoch": 0.535280541788052, - "grad_norm": 2.6480894088745117, - "learning_rate": 8.693757597001985e-06, - "loss": 0.7564, + "epoch": 0.4979936642027455, + "grad_norm": 1.526589274406433, + "learning_rate": 7.386061671131656e-06, + "loss": 0.6638, "step": 7074 }, { - "epoch": 0.5353562105103855, - "grad_norm": 1.7461856603622437, - "learning_rate": 8.691506693704252e-06, - "loss": 0.6427, + "epoch": 0.4980640619500176, + "grad_norm": 1.6607707738876343, + "learning_rate": 7.384467984393482e-06, + "loss": 0.6695, "step": 7075 }, { - "epoch": 0.5354318792327192, - "grad_norm": 2.0621261596679688, - "learning_rate": 8.68925580972537e-06, - "loss": 0.7876, + "epoch": 0.4981344596972897, + "grad_norm": 1.6075760126113892, + "learning_rate": 7.382874277666468e-06, + "loss": 0.773, "step": 7076 }, { - "epoch": 0.5355075479550527, - "grad_norm": 2.9666640758514404, - "learning_rate": 8.687004945206293e-06, - "loss": 0.6868, + "epoch": 0.4982048574445618, + "grad_norm": 1.5864406824111938, + "learning_rate": 7.381280551033475e-06, + "loss": 0.5967, "step": 7077 }, { - "epoch": 0.5355832166773864, - "grad_norm": 1.9791302680969238, - "learning_rate": 8.68475410028798e-06, - "loss": 0.7977, + "epoch": 0.49827525519183385, + "grad_norm": 1.5365228652954102, + "learning_rate": 7.379686804577361e-06, + "loss": 0.5035, "step": 7078 }, { - "epoch": 0.5356588853997201, - "grad_norm": 2.7180376052856445, - "learning_rate": 8.682503275111385e-06, - "loss": 0.8495, + "epoch": 0.49834565293910593, + "grad_norm": 1.7556229829788208, + "learning_rate": 7.378093038380986e-06, + "loss": 0.7142, "step": 7079 }, { - "epoch": 0.5357345541220536, - "grad_norm": 2.0174508094787598, - "learning_rate": 8.680252469817459e-06, - "loss": 0.7696, + "epoch": 0.498416050686378, + "grad_norm": 1.8772276639938354, + "learning_rate": 7.37649925252721e-06, + "loss": 0.6101, "step": 7080 }, { - "epoch": 0.5358102228443873, - "grad_norm": 2.2519609928131104, - "learning_rate": 8.678001684547159e-06, - "loss": 0.6735, + "epoch": 0.49848644843365014, + "grad_norm": 1.6779102087020874, + "learning_rate": 7.374905447098898e-06, + "loss": 0.6835, "step": 7081 }, { - "epoch": 0.535885891566721, - "grad_norm": 2.086402654647827, - "learning_rate": 8.675750919441436e-06, - "loss": 0.7296, + "epoch": 0.4985568461809222, + "grad_norm": 1.6635017395019531, + "learning_rate": 7.373311622178911e-06, + "loss": 0.7937, "step": 7082 }, { - "epoch": 0.5359615602890545, - "grad_norm": 2.2186553478240967, - "learning_rate": 8.67350017464124e-06, - "loss": 0.7418, + "epoch": 0.4986272439281943, + "grad_norm": 1.9517468214035034, + "learning_rate": 7.3717177778501175e-06, + "loss": 0.6826, "step": 7083 }, { - "epoch": 0.5360372290113882, - "grad_norm": 2.314690589904785, - "learning_rate": 8.671249450287517e-06, - "loss": 0.8358, + "epoch": 0.4986976416754664, + "grad_norm": 1.8456121683120728, + "learning_rate": 7.3701239141953776e-06, + "loss": 0.7476, "step": 7084 }, { - "epoch": 0.5361128977337217, - "grad_norm": 2.289297580718994, - "learning_rate": 8.668998746521215e-06, - "loss": 0.8058, + "epoch": 0.49876803942273845, + "grad_norm": 1.8652701377868652, + "learning_rate": 7.368530031297561e-06, + "loss": 0.7342, "step": 7085 }, { - "epoch": 0.5361885664560554, - "grad_norm": 2.941833972930908, - "learning_rate": 8.666748063483284e-06, - "loss": 0.7801, + "epoch": 0.4988384371700106, + "grad_norm": 1.729322910308838, + "learning_rate": 7.366936129239534e-06, + "loss": 0.6523, "step": 7086 }, { - "epoch": 0.5362642351783891, - "grad_norm": 2.7016420364379883, - "learning_rate": 8.66449740131467e-06, - "loss": 0.6243, + "epoch": 0.49890883491728266, + "grad_norm": 1.5562351942062378, + "learning_rate": 7.3653422081041675e-06, + "loss": 0.6687, "step": 7087 }, { - "epoch": 0.5363399039007226, - "grad_norm": 2.9800827503204346, - "learning_rate": 8.66224676015631e-06, - "loss": 0.8864, + "epoch": 0.49897923266455474, + "grad_norm": 2.2158565521240234, + "learning_rate": 7.363748267974326e-06, + "loss": 0.6906, "step": 7088 }, { - "epoch": 0.5364155726230563, - "grad_norm": 2.015385389328003, - "learning_rate": 8.659996140149154e-06, - "loss": 0.7499, + "epoch": 0.4990496304118268, + "grad_norm": 1.9811062812805176, + "learning_rate": 7.362154308932883e-06, + "loss": 0.7286, "step": 7089 }, { - "epoch": 0.5364912413453898, - "grad_norm": 2.1981661319732666, - "learning_rate": 8.657745541434134e-06, - "loss": 0.6763, + "epoch": 0.4991200281590989, + "grad_norm": 1.9973886013031006, + "learning_rate": 7.36056033106271e-06, + "loss": 0.7246, "step": 7090 }, { - "epoch": 0.5365669100677235, - "grad_norm": 1.9867689609527588, - "learning_rate": 8.655494964152199e-06, - "loss": 0.6845, + "epoch": 0.49919042590637097, + "grad_norm": 1.643250584602356, + "learning_rate": 7.358966334446677e-06, + "loss": 0.7212, "step": 7091 }, { - "epoch": 0.5366425787900572, - "grad_norm": 2.070387125015259, - "learning_rate": 8.653244408444284e-06, - "loss": 0.7047, + "epoch": 0.4992608236536431, + "grad_norm": 1.6952171325683594, + "learning_rate": 7.357372319167662e-06, + "loss": 0.6646, "step": 7092 }, { - "epoch": 0.5367182475123907, - "grad_norm": 1.7507538795471191, - "learning_rate": 8.650993874451324e-06, - "loss": 0.9036, + "epoch": 0.4993312214009152, + "grad_norm": 1.6178512573242188, + "learning_rate": 7.355778285308537e-06, + "loss": 0.6648, "step": 7093 }, { - "epoch": 0.5367939162347244, - "grad_norm": 2.0866281986236572, - "learning_rate": 8.648743362314259e-06, - "loss": 0.7097, + "epoch": 0.49940161914818726, + "grad_norm": 1.6944774389266968, + "learning_rate": 7.354184232952174e-06, + "loss": 0.7332, "step": 7094 }, { - "epoch": 0.536869584957058, - "grad_norm": 3.4941627979278564, - "learning_rate": 8.646492872174018e-06, - "loss": 0.6882, + "epoch": 0.49947201689545934, + "grad_norm": 1.8400647640228271, + "learning_rate": 7.3525901621814525e-06, + "loss": 0.8006, "step": 7095 }, { - "epoch": 0.5369452536793916, - "grad_norm": 1.8780581951141357, - "learning_rate": 8.644242404171536e-06, - "loss": 0.7404, + "epoch": 0.4995424146427314, + "grad_norm": 1.9879798889160156, + "learning_rate": 7.35099607307925e-06, + "loss": 0.65, "step": 7096 }, { - "epoch": 0.5370209224017253, - "grad_norm": 1.995606541633606, - "learning_rate": 8.641991958447748e-06, - "loss": 0.7844, + "epoch": 0.49961281239000355, + "grad_norm": 1.9663934707641602, + "learning_rate": 7.349401965728444e-06, + "loss": 0.7327, "step": 7097 }, { - "epoch": 0.5370965911240588, - "grad_norm": 2.5743753910064697, - "learning_rate": 8.63974153514358e-06, - "loss": 0.7448, + "epoch": 0.4996832101372756, + "grad_norm": 1.7615514993667603, + "learning_rate": 7.347807840211912e-06, + "loss": 0.6439, "step": 7098 }, { - "epoch": 0.5371722598463925, - "grad_norm": 2.6984145641326904, - "learning_rate": 8.637491134399965e-06, - "loss": 0.7314, + "epoch": 0.4997536078845477, + "grad_norm": 1.9616082906723022, + "learning_rate": 7.346213696612538e-06, + "loss": 0.729, "step": 7099 }, { - "epoch": 0.5372479285687262, - "grad_norm": 2.029510021209717, - "learning_rate": 8.63524075635782e-06, - "loss": 0.6158, + "epoch": 0.4998240056318198, + "grad_norm": 1.9506783485412598, + "learning_rate": 7.344619535013199e-06, + "loss": 0.632, "step": 7100 }, { - "epoch": 0.5373235972910597, - "grad_norm": 2.0353198051452637, - "learning_rate": 8.632990401158086e-06, - "loss": 0.6718, + "epoch": 0.49989440337909186, + "grad_norm": 1.507854700088501, + "learning_rate": 7.343025355496777e-06, + "loss": 0.7618, "step": 7101 }, { - "epoch": 0.5373992660133934, - "grad_norm": 1.9957705736160278, - "learning_rate": 8.630740068941678e-06, - "loss": 0.7376, + "epoch": 0.49996480112636393, + "grad_norm": 1.787569284439087, + "learning_rate": 7.3414311581461575e-06, + "loss": 0.7279, "step": 7102 }, { - "epoch": 0.5374749347357269, - "grad_norm": 2.259077787399292, - "learning_rate": 8.628489759849522e-06, - "loss": 0.5768, + "epoch": 0.500035198873636, + "grad_norm": 1.6318904161453247, + "learning_rate": 7.339836943044222e-06, + "loss": 0.6585, "step": 7103 }, { - "epoch": 0.5375506034580606, - "grad_norm": 2.07145619392395, - "learning_rate": 8.626239474022538e-06, - "loss": 0.591, + "epoch": 0.5001055966209081, + "grad_norm": 1.770500659942627, + "learning_rate": 7.338242710273856e-06, + "loss": 0.7299, "step": 7104 }, { - "epoch": 0.5376262721803943, - "grad_norm": 1.6986279487609863, - "learning_rate": 8.623989211601645e-06, - "loss": 0.9477, + "epoch": 0.5001759943681802, + "grad_norm": 1.5977431535720825, + "learning_rate": 7.3366484599179464e-06, + "loss": 0.6059, "step": 7105 }, { - "epoch": 0.5377019409027278, - "grad_norm": 2.296743392944336, - "learning_rate": 8.621738972727768e-06, - "loss": 0.6912, + "epoch": 0.5002463921154523, + "grad_norm": 1.6358646154403687, + "learning_rate": 7.335054192059379e-06, + "loss": 0.7664, "step": 7106 }, { - "epoch": 0.5377776096250615, - "grad_norm": 9.655526161193848, - "learning_rate": 8.619488757541817e-06, - "loss": 0.678, + "epoch": 0.5003167898627244, + "grad_norm": 1.546375036239624, + "learning_rate": 7.333459906781038e-06, + "loss": 0.6073, "step": 7107 }, { - "epoch": 0.5378532783473952, - "grad_norm": 1.8935918807983398, - "learning_rate": 8.617238566184713e-06, - "loss": 0.7756, + "epoch": 0.5003871876099965, + "grad_norm": 1.6986106634140015, + "learning_rate": 7.331865604165817e-06, + "loss": 0.6012, "step": 7108 }, { - "epoch": 0.5379289470697287, - "grad_norm": 2.278226137161255, - "learning_rate": 8.614988398797366e-06, - "loss": 0.7101, + "epoch": 0.5004575853572686, + "grad_norm": 1.9632481336593628, + "learning_rate": 7.330271284296601e-06, + "loss": 0.7696, "step": 7109 }, { - "epoch": 0.5380046157920624, - "grad_norm": 2.2196240425109863, - "learning_rate": 8.612738255520689e-06, - "loss": 0.7687, + "epoch": 0.5005279831045406, + "grad_norm": 1.8008967638015747, + "learning_rate": 7.328676947256283e-06, + "loss": 0.8308, "step": 7110 }, { - "epoch": 0.5380802845143959, - "grad_norm": 2.080031156539917, - "learning_rate": 8.610488136495599e-06, - "loss": 0.7437, + "epoch": 0.5005983808518127, + "grad_norm": 1.7403053045272827, + "learning_rate": 7.327082593127753e-06, + "loss": 0.6851, "step": 7111 }, { - "epoch": 0.5381559532367296, - "grad_norm": 1.6355682611465454, - "learning_rate": 8.608238041863e-06, - "loss": 0.6928, + "epoch": 0.5006687785990849, + "grad_norm": 1.5456092357635498, + "learning_rate": 7.3254882219939e-06, + "loss": 0.6895, "step": 7112 }, { - "epoch": 0.5382316219590633, - "grad_norm": 2.384467124938965, - "learning_rate": 8.605987971763803e-06, - "loss": 0.788, + "epoch": 0.5007391763463569, + "grad_norm": 1.605785608291626, + "learning_rate": 7.3238938339376225e-06, + "loss": 0.619, "step": 7113 }, { - "epoch": 0.5383072906813968, - "grad_norm": 2.125551223754883, - "learning_rate": 8.603737926338912e-06, - "loss": 0.6659, + "epoch": 0.500809574093629, + "grad_norm": 1.5502943992614746, + "learning_rate": 7.322299429041811e-06, + "loss": 0.6959, "step": 7114 }, { - "epoch": 0.5383829594037305, - "grad_norm": 2.0060691833496094, - "learning_rate": 8.601487905729235e-06, - "loss": 0.6208, + "epoch": 0.500879971840901, + "grad_norm": 1.7430022954940796, + "learning_rate": 7.320705007389358e-06, + "loss": 0.6782, "step": 7115 }, { - "epoch": 0.538458628126064, - "grad_norm": 2.356447458267212, - "learning_rate": 8.59923791007567e-06, - "loss": 0.7137, + "epoch": 0.5009503695881732, + "grad_norm": 1.8654053211212158, + "learning_rate": 7.319110569063163e-06, + "loss": 0.75, "step": 7116 }, { - "epoch": 0.5385342968483977, - "grad_norm": 2.69724178314209, - "learning_rate": 8.596987939519128e-06, - "loss": 0.7895, + "epoch": 0.5010207673354453, + "grad_norm": 2.0523219108581543, + "learning_rate": 7.317516114146122e-06, + "loss": 0.8049, "step": 7117 }, { - "epoch": 0.5386099655707314, - "grad_norm": 2.0127129554748535, - "learning_rate": 8.594737994200504e-06, - "loss": 0.5838, + "epoch": 0.5010911650827173, + "grad_norm": 1.8232492208480835, + "learning_rate": 7.315921642721129e-06, + "loss": 0.7184, "step": 7118 }, { - "epoch": 0.5386856342930649, - "grad_norm": 2.2311911582946777, - "learning_rate": 8.592488074260698e-06, - "loss": 0.7257, + "epoch": 0.5011615628299895, + "grad_norm": 1.39983069896698, + "learning_rate": 7.314327154871082e-06, + "loss": 0.6538, "step": 7119 }, { - "epoch": 0.5387613030153986, - "grad_norm": 2.5712006092071533, - "learning_rate": 8.590238179840606e-06, - "loss": 0.7806, + "epoch": 0.5012319605772615, + "grad_norm": 1.7261128425598145, + "learning_rate": 7.312732650678884e-06, + "loss": 0.755, "step": 7120 }, { - "epoch": 0.5388369717377323, - "grad_norm": 2.380955219268799, - "learning_rate": 8.587988311081122e-06, - "loss": 0.7459, + "epoch": 0.5013023583245336, + "grad_norm": 1.7712175846099854, + "learning_rate": 7.311138130227431e-06, + "loss": 0.7795, "step": 7121 }, { - "epoch": 0.5389126404600658, - "grad_norm": 2.549931764602661, - "learning_rate": 8.585738468123147e-06, - "loss": 0.6662, + "epoch": 0.5013727560718056, + "grad_norm": 1.8834385871887207, + "learning_rate": 7.309543593599626e-06, + "loss": 0.7372, "step": 7122 }, { - "epoch": 0.5389883091823995, - "grad_norm": 2.1435601711273193, - "learning_rate": 8.583488651107566e-06, - "loss": 0.7061, + "epoch": 0.5014431538190778, + "grad_norm": 1.5355371236801147, + "learning_rate": 7.307949040878369e-06, + "loss": 0.6514, "step": 7123 }, { - "epoch": 0.539063977904733, - "grad_norm": 5.311253070831299, - "learning_rate": 8.581238860175276e-06, - "loss": 0.8853, + "epoch": 0.5015135515663499, + "grad_norm": 1.7859551906585693, + "learning_rate": 7.306354472146561e-06, + "loss": 0.6314, "step": 7124 }, { - "epoch": 0.5391396466270667, - "grad_norm": 2.5951385498046875, - "learning_rate": 8.578989095467161e-06, - "loss": 0.6598, + "epoch": 0.5015839493136219, + "grad_norm": 1.8737505674362183, + "learning_rate": 7.304759887487109e-06, + "loss": 0.6674, "step": 7125 }, { - "epoch": 0.5392153153494004, - "grad_norm": 2.184601068496704, - "learning_rate": 8.576739357124107e-06, - "loss": 0.6537, + "epoch": 0.5016543470608941, + "grad_norm": 1.6584560871124268, + "learning_rate": 7.3031652869829134e-06, + "loss": 0.6545, "step": 7126 }, { - "epoch": 0.5392909840717339, - "grad_norm": 2.4066100120544434, - "learning_rate": 8.57448964528701e-06, - "loss": 0.7578, + "epoch": 0.5017247448081661, + "grad_norm": 1.7574843168258667, + "learning_rate": 7.301570670716878e-06, + "loss": 0.6592, "step": 7127 }, { - "epoch": 0.5393666527940676, - "grad_norm": 3.0579781532287598, - "learning_rate": 8.572239960096742e-06, - "loss": 0.7298, + "epoch": 0.5017951425554382, + "grad_norm": 2.105433225631714, + "learning_rate": 7.299976038771912e-06, + "loss": 0.683, "step": 7128 }, { - "epoch": 0.5394423215164011, - "grad_norm": 1.7824645042419434, - "learning_rate": 8.569990301694196e-06, - "loss": 0.5753, + "epoch": 0.5018655403027104, + "grad_norm": 1.6905643939971924, + "learning_rate": 7.298381391230921e-06, + "loss": 0.6744, "step": 7129 }, { - "epoch": 0.5395179902387348, - "grad_norm": 2.685590982437134, - "learning_rate": 8.567740670220246e-06, - "loss": 0.7501, + "epoch": 0.5019359380499824, + "grad_norm": 1.8170329332351685, + "learning_rate": 7.29678672817681e-06, + "loss": 0.5902, "step": 7130 }, { - "epoch": 0.5395936589610685, - "grad_norm": 2.148221254348755, - "learning_rate": 8.565491065815771e-06, - "loss": 0.5976, + "epoch": 0.5020063357972545, + "grad_norm": 1.7687439918518066, + "learning_rate": 7.295192049692488e-06, + "loss": 0.8027, "step": 7131 }, { - "epoch": 0.539669327683402, - "grad_norm": 2.3486313819885254, - "learning_rate": 8.563241488621652e-06, - "loss": 0.8495, + "epoch": 0.5020767335445265, + "grad_norm": 1.761724591255188, + "learning_rate": 7.293597355860866e-06, + "loss": 0.7348, "step": 7132 }, { - "epoch": 0.5397449964057357, - "grad_norm": 2.0380804538726807, - "learning_rate": 8.560991938778767e-06, - "loss": 0.6935, + "epoch": 0.5021471312917987, + "grad_norm": 1.6922687292099, + "learning_rate": 7.292002646764849e-06, + "loss": 0.5966, "step": 7133 }, { - "epoch": 0.5398206651280694, - "grad_norm": 2.2446913719177246, - "learning_rate": 8.558742416427985e-06, - "loss": 0.6925, + "epoch": 0.5022175290390708, + "grad_norm": 1.8522456884384155, + "learning_rate": 7.29040792248735e-06, + "loss": 0.69, "step": 7134 }, { - "epoch": 0.5398963338504029, - "grad_norm": 2.0997154712677, - "learning_rate": 8.55649292171018e-06, - "loss": 0.5679, + "epoch": 0.5022879267863428, + "grad_norm": 1.6940529346466064, + "learning_rate": 7.288813183111283e-06, + "loss": 0.6609, "step": 7135 }, { - "epoch": 0.5399720025727366, - "grad_norm": 2.4768741130828857, - "learning_rate": 8.55424345476622e-06, - "loss": 0.7029, + "epoch": 0.502358324533615, + "grad_norm": 1.7849763631820679, + "learning_rate": 7.2872184287195534e-06, + "loss": 0.7766, "step": 7136 }, { - "epoch": 0.5400476712950701, - "grad_norm": 3.763378858566284, - "learning_rate": 8.551994015736978e-06, - "loss": 0.6053, + "epoch": 0.502428722280887, + "grad_norm": 1.767564058303833, + "learning_rate": 7.285623659395079e-06, + "loss": 0.7104, "step": 7137 }, { - "epoch": 0.5401233400174038, - "grad_norm": 1.940919280052185, - "learning_rate": 8.549744604763322e-06, - "loss": 0.6981, + "epoch": 0.5024991200281591, + "grad_norm": 1.7830849885940552, + "learning_rate": 7.284028875220774e-06, + "loss": 0.6966, "step": 7138 }, { - "epoch": 0.5401990087397375, - "grad_norm": 2.1327764987945557, - "learning_rate": 8.547495221986114e-06, - "loss": 0.8293, + "epoch": 0.5025695177754312, + "grad_norm": 1.9503488540649414, + "learning_rate": 7.282434076279548e-06, + "loss": 0.6704, "step": 7139 }, { - "epoch": 0.540274677462071, - "grad_norm": 2.6629951000213623, - "learning_rate": 8.54524586754622e-06, - "loss": 0.8285, + "epoch": 0.5026399155227033, + "grad_norm": 1.7715520858764648, + "learning_rate": 7.280839262654318e-06, + "loss": 0.8012, "step": 7140 }, { - "epoch": 0.5403503461844047, - "grad_norm": 2.080131769180298, - "learning_rate": 8.542996541584498e-06, - "loss": 0.7531, + "epoch": 0.5027103132699754, + "grad_norm": 1.6110767126083374, + "learning_rate": 7.279244434428002e-06, + "loss": 0.6652, "step": 7141 }, { - "epoch": 0.5404260149067383, - "grad_norm": 1.9729998111724854, - "learning_rate": 8.540747244241811e-06, - "loss": 0.9104, + "epoch": 0.5027807110172474, + "grad_norm": 2.196089506149292, + "learning_rate": 7.2776495916835145e-06, + "loss": 0.5907, "step": 7142 }, { - "epoch": 0.5405016836290719, - "grad_norm": 1.957764744758606, - "learning_rate": 8.53849797565902e-06, - "loss": 0.836, + "epoch": 0.5028511087645195, + "grad_norm": 2.0235021114349365, + "learning_rate": 7.276054734503773e-06, + "loss": 0.7934, "step": 7143 }, { - "epoch": 0.5405773523514056, - "grad_norm": 2.3214311599731445, - "learning_rate": 8.536248735976976e-06, - "loss": 0.6771, + "epoch": 0.5029215065117916, + "grad_norm": 2.0558760166168213, + "learning_rate": 7.2744598629716966e-06, + "loss": 0.6466, "step": 7144 }, { - "epoch": 0.5406530210737391, - "grad_norm": 1.9388372898101807, - "learning_rate": 8.533999525336536e-06, - "loss": 0.6914, + "epoch": 0.5029919042590637, + "grad_norm": 2.148022413253784, + "learning_rate": 7.272864977170202e-06, + "loss": 0.7938, "step": 7145 }, { - "epoch": 0.5407286897960728, - "grad_norm": 2.0969862937927246, - "learning_rate": 8.531750343878551e-06, - "loss": 0.7405, + "epoch": 0.5030623020063358, + "grad_norm": 1.7599444389343262, + "learning_rate": 7.271270077182211e-06, + "loss": 0.7471, "step": 7146 }, { - "epoch": 0.5408043585184065, - "grad_norm": 2.0790417194366455, - "learning_rate": 8.529501191743876e-06, - "loss": 0.7561, + "epoch": 0.5031326997536079, + "grad_norm": 1.636151671409607, + "learning_rate": 7.269675163090643e-06, + "loss": 0.7012, "step": 7147 }, { - "epoch": 0.54088002724074, - "grad_norm": 1.9571138620376587, - "learning_rate": 8.527252069073359e-06, - "loss": 0.8044, + "epoch": 0.50320309750088, + "grad_norm": 1.57837975025177, + "learning_rate": 7.268080234978419e-06, + "loss": 0.6114, "step": 7148 }, { - "epoch": 0.5409556959630737, - "grad_norm": 2.024289131164551, - "learning_rate": 8.525002976007848e-06, - "loss": 0.5776, + "epoch": 0.503273495248152, + "grad_norm": 1.9540324211120605, + "learning_rate": 7.266485292928461e-06, + "loss": 0.7915, "step": 7149 }, { - "epoch": 0.5410313646854072, - "grad_norm": 2.4422504901885986, - "learning_rate": 8.522753912688184e-06, - "loss": 0.7017, + "epoch": 0.5033438929954241, + "grad_norm": 1.7327295541763306, + "learning_rate": 7.264890337023693e-06, + "loss": 0.7472, "step": 7150 }, { - "epoch": 0.5411070334077409, - "grad_norm": 2.0276260375976562, - "learning_rate": 8.520504879255214e-06, - "loss": 0.5853, + "epoch": 0.5034142907426963, + "grad_norm": 1.790114402770996, + "learning_rate": 7.263295367347033e-06, + "loss": 0.7328, "step": 7151 }, { - "epoch": 0.5411827021300746, - "grad_norm": 2.331113576889038, - "learning_rate": 8.51825587584978e-06, - "loss": 0.6115, + "epoch": 0.5034846884899683, + "grad_norm": 2.052381992340088, + "learning_rate": 7.261700383981411e-06, + "loss": 0.7211, "step": 7152 }, { - "epoch": 0.5412583708524081, - "grad_norm": 1.7973954677581787, - "learning_rate": 8.516006902612721e-06, - "loss": 0.5986, + "epoch": 0.5035550862372404, + "grad_norm": 2.0784382820129395, + "learning_rate": 7.260105387009749e-06, + "loss": 0.7022, "step": 7153 }, { - "epoch": 0.5413340395747418, - "grad_norm": 2.5584282875061035, - "learning_rate": 8.513757959684877e-06, - "loss": 0.7094, + "epoch": 0.5036254839845125, + "grad_norm": 2.3974592685699463, + "learning_rate": 7.258510376514971e-06, + "loss": 0.5957, "step": 7154 }, { - "epoch": 0.5414097082970754, - "grad_norm": 2.177795886993408, - "learning_rate": 8.51150904720708e-06, - "loss": 0.6867, + "epoch": 0.5036958817317846, + "grad_norm": 1.9683984518051147, + "learning_rate": 7.256915352580003e-06, + "loss": 0.606, "step": 7155 }, { - "epoch": 0.541485377019409, - "grad_norm": 2.400580644607544, - "learning_rate": 8.509260165320168e-06, - "loss": 0.6919, + "epoch": 0.5037662794790567, + "grad_norm": 1.7287384271621704, + "learning_rate": 7.255320315287777e-06, + "loss": 0.7053, "step": 7156 }, { - "epoch": 0.5415610457417427, - "grad_norm": 1.9860256910324097, - "learning_rate": 8.50701131416497e-06, - "loss": 0.6899, + "epoch": 0.5038366772263287, + "grad_norm": 1.772331953048706, + "learning_rate": 7.2537252647212145e-06, + "loss": 0.6359, "step": 7157 }, { - "epoch": 0.5416367144640762, - "grad_norm": 1.862237572669983, - "learning_rate": 8.504762493882317e-06, - "loss": 0.6249, + "epoch": 0.5039070749736009, + "grad_norm": 1.541143536567688, + "learning_rate": 7.252130200963245e-06, + "loss": 0.7639, "step": 7158 }, { - "epoch": 0.5417123831864099, - "grad_norm": 2.687009811401367, - "learning_rate": 8.50251370461304e-06, - "loss": 0.6726, + "epoch": 0.5039774727208729, + "grad_norm": 2.170760154724121, + "learning_rate": 7.250535124096801e-06, + "loss": 0.7871, "step": 7159 }, { - "epoch": 0.5417880519087436, - "grad_norm": 1.91978120803833, - "learning_rate": 8.500264946497967e-06, - "loss": 0.5824, + "epoch": 0.504047870468145, + "grad_norm": 1.9154008626937866, + "learning_rate": 7.2489400342048044e-06, + "loss": 0.6043, "step": 7160 }, { - "epoch": 0.5418637206310771, - "grad_norm": 2.6464669704437256, - "learning_rate": 8.498016219677915e-06, - "loss": 0.6439, + "epoch": 0.504118268215417, + "grad_norm": 1.6673364639282227, + "learning_rate": 7.247344931370193e-06, + "loss": 0.7163, "step": 7161 }, { - "epoch": 0.5419393893534108, - "grad_norm": 2.026582717895508, - "learning_rate": 8.49576752429371e-06, - "loss": 0.76, + "epoch": 0.5041886659626892, + "grad_norm": 1.8303697109222412, + "learning_rate": 7.245749815675894e-06, + "loss": 0.6836, "step": 7162 }, { - "epoch": 0.5420150580757443, - "grad_norm": 2.627919912338257, - "learning_rate": 8.493518860486177e-06, - "loss": 0.7907, + "epoch": 0.5042590637099613, + "grad_norm": 1.7102457284927368, + "learning_rate": 7.24415468720484e-06, + "loss": 0.7046, "step": 7163 }, { - "epoch": 0.542090726798078, - "grad_norm": 2.2149717807769775, - "learning_rate": 8.49127022839613e-06, - "loss": 0.6007, + "epoch": 0.5043294614572333, + "grad_norm": 1.73294198513031, + "learning_rate": 7.242559546039962e-06, + "loss": 0.7331, "step": 7164 }, { - "epoch": 0.5421663955204117, - "grad_norm": 1.838017225265503, - "learning_rate": 8.489021628164388e-06, - "loss": 0.6642, + "epoch": 0.5043998592045055, + "grad_norm": 1.9080764055252075, + "learning_rate": 7.240964392264195e-06, + "loss": 0.6396, "step": 7165 }, { - "epoch": 0.5422420642427452, - "grad_norm": 2.120692014694214, - "learning_rate": 8.486773059931763e-06, - "loss": 0.5709, + "epoch": 0.5044702569517775, + "grad_norm": 1.8342314958572388, + "learning_rate": 7.239369225960469e-06, + "loss": 0.7046, "step": 7166 }, { - "epoch": 0.5423177329650789, - "grad_norm": 2.953522205352783, - "learning_rate": 8.484524523839067e-06, - "loss": 0.6147, + "epoch": 0.5045406546990496, + "grad_norm": 1.6073322296142578, + "learning_rate": 7.237774047211721e-06, + "loss": 0.6448, "step": 7167 }, { - "epoch": 0.5423934016874125, - "grad_norm": 1.9977935552597046, - "learning_rate": 8.482276020027114e-06, - "loss": 0.7301, + "epoch": 0.5046110524463218, + "grad_norm": 1.8853185176849365, + "learning_rate": 7.236178856100886e-06, + "loss": 0.6386, "step": 7168 }, { - "epoch": 0.5424690704097461, - "grad_norm": 2.271860361099243, - "learning_rate": 8.480027548636714e-06, - "loss": 0.6553, + "epoch": 0.5046814501935938, + "grad_norm": 1.8373249769210815, + "learning_rate": 7.234583652710895e-06, + "loss": 0.6923, "step": 7169 }, { - "epoch": 0.5425447391320798, - "grad_norm": 1.9562822580337524, - "learning_rate": 8.477779109808668e-06, - "loss": 0.638, + "epoch": 0.5047518479408659, + "grad_norm": 3.1050891876220703, + "learning_rate": 7.232988437124689e-06, + "loss": 0.8002, "step": 7170 }, { - "epoch": 0.5426204078544133, - "grad_norm": 2.308135509490967, - "learning_rate": 8.475530703683784e-06, - "loss": 0.781, + "epoch": 0.5048222456881379, + "grad_norm": 1.8506361246109009, + "learning_rate": 7.231393209425206e-06, + "loss": 0.5963, "step": 7171 }, { - "epoch": 0.542696076576747, - "grad_norm": 1.8787046670913696, - "learning_rate": 8.47328233040286e-06, - "loss": 0.6161, + "epoch": 0.5048926434354101, + "grad_norm": 1.693061113357544, + "learning_rate": 7.229797969695378e-06, + "loss": 0.7014, "step": 7172 }, { - "epoch": 0.5427717452990807, - "grad_norm": 2.3262102603912354, - "learning_rate": 8.471033990106703e-06, - "loss": 0.6771, + "epoch": 0.5049630411826822, + "grad_norm": 1.800511360168457, + "learning_rate": 7.228202718018145e-06, + "loss": 0.6652, "step": 7173 }, { - "epoch": 0.5428474140214142, - "grad_norm": 2.1242523193359375, - "learning_rate": 8.46878568293611e-06, - "loss": 0.8264, + "epoch": 0.5050334389299542, + "grad_norm": 1.9508299827575684, + "learning_rate": 7.226607454476448e-06, + "loss": 0.6739, "step": 7174 }, { - "epoch": 0.5429230827437479, - "grad_norm": 3.241060256958008, - "learning_rate": 8.466537409031875e-06, - "loss": 0.5887, + "epoch": 0.5051038366772264, + "grad_norm": 1.8487663269042969, + "learning_rate": 7.225012179153221e-06, + "loss": 0.717, "step": 7175 }, { - "epoch": 0.5429987514660815, - "grad_norm": 1.77642023563385, - "learning_rate": 8.464289168534794e-06, - "loss": 0.6155, + "epoch": 0.5051742344244984, + "grad_norm": 1.779911756515503, + "learning_rate": 7.2234168921314096e-06, + "loss": 0.6634, "step": 7176 }, { - "epoch": 0.5430744201884151, - "grad_norm": 2.4458696842193604, - "learning_rate": 8.462040961585655e-06, - "loss": 0.6388, + "epoch": 0.5052446321717705, + "grad_norm": 1.687700867652893, + "learning_rate": 7.221821593493951e-06, + "loss": 0.664, "step": 7177 }, { - "epoch": 0.5431500889107488, - "grad_norm": 1.5858855247497559, - "learning_rate": 8.459792788325251e-06, - "loss": 0.6843, + "epoch": 0.5053150299190425, + "grad_norm": 1.9689269065856934, + "learning_rate": 7.220226283323788e-06, + "loss": 0.7029, "step": 7178 }, { - "epoch": 0.5432257576330823, - "grad_norm": 2.241321325302124, - "learning_rate": 8.457544648894372e-06, - "loss": 0.6885, + "epoch": 0.5053854276663147, + "grad_norm": 1.9033743143081665, + "learning_rate": 7.21863096170386e-06, + "loss": 0.8109, "step": 7179 }, { - "epoch": 0.543301426355416, - "grad_norm": 1.9236705303192139, - "learning_rate": 8.4552965434338e-06, - "loss": 0.6897, + "epoch": 0.5054558254135868, + "grad_norm": 1.9979774951934814, + "learning_rate": 7.21703562871711e-06, + "loss": 0.7432, "step": 7180 }, { - "epoch": 0.5433770950777496, - "grad_norm": 2.625894069671631, - "learning_rate": 8.453048472084323e-06, - "loss": 0.7572, + "epoch": 0.5055262231608588, + "grad_norm": 2.107221841812134, + "learning_rate": 7.215440284446481e-06, + "loss": 0.6958, "step": 7181 }, { - "epoch": 0.5434527638000832, - "grad_norm": 1.9380083084106445, - "learning_rate": 8.450800434986716e-06, - "loss": 0.8159, + "epoch": 0.505596620908131, + "grad_norm": 1.423128366470337, + "learning_rate": 7.213844928974916e-06, + "loss": 0.6254, "step": 7182 }, { - "epoch": 0.5435284325224169, - "grad_norm": 2.195018768310547, - "learning_rate": 8.448552432281763e-06, - "loss": 0.7674, + "epoch": 0.505667018655403, + "grad_norm": 1.9390244483947754, + "learning_rate": 7.212249562385361e-06, + "loss": 0.6759, "step": 7183 }, { - "epoch": 0.5436041012447504, - "grad_norm": 3.2216532230377197, - "learning_rate": 8.446304464110243e-06, - "loss": 0.7002, + "epoch": 0.5057374164026751, + "grad_norm": 2.2053797245025635, + "learning_rate": 7.210654184760759e-06, + "loss": 0.6848, "step": 7184 }, { - "epoch": 0.5436797699670841, - "grad_norm": 2.094017505645752, - "learning_rate": 8.444056530612926e-06, - "loss": 0.6522, + "epoch": 0.5058078141499472, + "grad_norm": 1.925341248512268, + "learning_rate": 7.2090587961840545e-06, + "loss": 0.6335, "step": 7185 }, { - "epoch": 0.5437554386894178, - "grad_norm": 2.447477340698242, - "learning_rate": 8.441808631930588e-06, - "loss": 0.8029, + "epoch": 0.5058782118972193, + "grad_norm": 1.6882789134979248, + "learning_rate": 7.207463396738193e-06, + "loss": 0.718, "step": 7186 }, { - "epoch": 0.5438311074117513, - "grad_norm": 1.7024062871932983, - "learning_rate": 8.439560768203996e-06, - "loss": 0.6894, + "epoch": 0.5059486096444914, + "grad_norm": 1.6522361040115356, + "learning_rate": 7.205867986506124e-06, + "loss": 0.729, "step": 7187 }, { - "epoch": 0.543906776134085, - "grad_norm": 1.634425163269043, - "learning_rate": 8.437312939573925e-06, - "loss": 0.6593, + "epoch": 0.5060190073917634, + "grad_norm": 1.8419979810714722, + "learning_rate": 7.204272565570789e-06, + "loss": 0.6755, "step": 7188 }, { - "epoch": 0.5439824448564186, - "grad_norm": 2.105696678161621, - "learning_rate": 8.435065146181135e-06, - "loss": 0.7249, + "epoch": 0.5060894051390356, + "grad_norm": 2.04264497756958, + "learning_rate": 7.2026771340151415e-06, + "loss": 0.7164, "step": 7189 }, { - "epoch": 0.5440581135787522, - "grad_norm": 2.9935972690582275, - "learning_rate": 8.432817388166395e-06, - "loss": 0.632, + "epoch": 0.5061598028863077, + "grad_norm": 1.740890383720398, + "learning_rate": 7.201081691922123e-06, + "loss": 0.7199, "step": 7190 }, { - "epoch": 0.5441337823010859, - "grad_norm": 2.017632007598877, - "learning_rate": 8.430569665670464e-06, - "loss": 0.8673, + "epoch": 0.5062302006335797, + "grad_norm": 2.401780128479004, + "learning_rate": 7.199486239374685e-06, + "loss": 0.6574, "step": 7191 }, { - "epoch": 0.5442094510234194, - "grad_norm": 1.9083147048950195, - "learning_rate": 8.428321978834104e-06, - "loss": 0.6792, + "epoch": 0.5063005983808518, + "grad_norm": 1.7312732934951782, + "learning_rate": 7.19789077645578e-06, + "loss": 0.7695, "step": 7192 }, { - "epoch": 0.5442851197457531, - "grad_norm": 2.1017110347747803, - "learning_rate": 8.426074327798067e-06, - "loss": 0.6695, + "epoch": 0.5063709961281239, + "grad_norm": 2.0891263484954834, + "learning_rate": 7.196295303248351e-06, + "loss": 0.6916, "step": 7193 }, { - "epoch": 0.5443607884680867, - "grad_norm": 1.9872255325317383, - "learning_rate": 8.423826712703114e-06, - "loss": 0.6914, + "epoch": 0.506441393875396, + "grad_norm": 2.103720188140869, + "learning_rate": 7.194699819835353e-06, + "loss": 0.8183, "step": 7194 }, { - "epoch": 0.5444364571904203, - "grad_norm": 2.61326003074646, - "learning_rate": 8.421579133689997e-06, - "loss": 0.6831, + "epoch": 0.5065117916226681, + "grad_norm": 1.9978855848312378, + "learning_rate": 7.1931043262997375e-06, + "loss": 0.7222, "step": 7195 }, { - "epoch": 0.544512125912754, - "grad_norm": 2.0831680297851562, - "learning_rate": 8.419331590899463e-06, - "loss": 0.662, + "epoch": 0.5065821893699402, + "grad_norm": 1.9714429378509521, + "learning_rate": 7.191508822724451e-06, + "loss": 0.6835, "step": 7196 }, { - "epoch": 0.5445877946350876, - "grad_norm": 1.7519179582595825, - "learning_rate": 8.417084084472267e-06, - "loss": 0.7071, + "epoch": 0.5066525871172123, + "grad_norm": 3.737884759902954, + "learning_rate": 7.189913309192446e-06, + "loss": 0.7012, "step": 7197 }, { - "epoch": 0.5446634633574212, - "grad_norm": 1.9903972148895264, - "learning_rate": 8.414836614549145e-06, - "loss": 0.7907, + "epoch": 0.5067229848644843, + "grad_norm": 2.326138496398926, + "learning_rate": 7.188317785786678e-06, + "loss": 0.6443, "step": 7198 }, { - "epoch": 0.5447391320797549, - "grad_norm": 3.3325815200805664, - "learning_rate": 8.412589181270849e-06, - "loss": 0.6182, + "epoch": 0.5067933826117564, + "grad_norm": 2.2467241287231445, + "learning_rate": 7.1867222525900965e-06, + "loss": 0.6672, "step": 7199 }, { - "epoch": 0.5448148008020884, - "grad_norm": 2.1972944736480713, - "learning_rate": 8.410341784778121e-06, - "loss": 0.8044, + "epoch": 0.5068637803590285, + "grad_norm": 1.7143731117248535, + "learning_rate": 7.185126709685656e-06, + "loss": 0.6272, "step": 7200 }, { - "epoch": 0.5448904695244221, - "grad_norm": 2.348930835723877, - "learning_rate": 8.408094425211695e-06, - "loss": 0.7072, + "epoch": 0.5069341781063006, + "grad_norm": 2.0546677112579346, + "learning_rate": 7.183531157156312e-06, + "loss": 0.715, "step": 7201 }, { - "epoch": 0.5449661382467557, - "grad_norm": 2.407869338989258, - "learning_rate": 8.405847102712313e-06, - "loss": 0.7567, + "epoch": 0.5070045758535727, + "grad_norm": 1.734505534172058, + "learning_rate": 7.181935595085016e-06, + "loss": 0.7118, "step": 7202 }, { - "epoch": 0.5450418069690893, - "grad_norm": 2.114015579223633, - "learning_rate": 8.403599817420702e-06, - "loss": 0.8147, + "epoch": 0.5070749736008447, + "grad_norm": 1.9087351560592651, + "learning_rate": 7.1803400235547224e-06, + "loss": 0.682, "step": 7203 }, { - "epoch": 0.545117475691423, - "grad_norm": 2.1249501705169678, - "learning_rate": 8.401352569477605e-06, - "loss": 0.6118, + "epoch": 0.5071453713481169, + "grad_norm": 1.8142451047897339, + "learning_rate": 7.178744442648389e-06, + "loss": 0.5935, "step": 7204 }, { - "epoch": 0.5451931444137565, - "grad_norm": 2.1897776126861572, - "learning_rate": 8.399105359023743e-06, - "loss": 0.6311, + "epoch": 0.5072157690953889, + "grad_norm": 2.5142006874084473, + "learning_rate": 7.17714885244897e-06, + "loss": 0.6111, "step": 7205 }, { - "epoch": 0.5452688131360902, - "grad_norm": 2.278143882751465, - "learning_rate": 8.39685818619985e-06, - "loss": 0.8238, + "epoch": 0.507286166842661, + "grad_norm": 1.7994918823242188, + "learning_rate": 7.175553253039421e-06, + "loss": 0.6354, "step": 7206 }, { - "epoch": 0.5453444818584238, - "grad_norm": 1.9254010915756226, - "learning_rate": 8.394611051146647e-06, - "loss": 0.6331, + "epoch": 0.5073565645899332, + "grad_norm": 1.820534586906433, + "learning_rate": 7.1739576445027e-06, + "loss": 0.6504, "step": 7207 }, { - "epoch": 0.5454201505807574, - "grad_norm": 1.6860229969024658, - "learning_rate": 8.392363954004855e-06, - "loss": 0.7736, + "epoch": 0.5074269623372052, + "grad_norm": 1.9252947568893433, + "learning_rate": 7.1723620269217635e-06, + "loss": 0.6892, "step": 7208 }, { - "epoch": 0.5454958193030911, - "grad_norm": 2.540018081665039, - "learning_rate": 8.390116894915201e-06, - "loss": 0.6795, + "epoch": 0.5074973600844773, + "grad_norm": 1.690758466720581, + "learning_rate": 7.170766400379568e-06, + "loss": 0.6298, "step": 7209 }, { - "epoch": 0.5455714880254247, - "grad_norm": 2.2471187114715576, - "learning_rate": 8.387869874018399e-06, - "loss": 0.7135, + "epoch": 0.5075677578317493, + "grad_norm": 1.7276288270950317, + "learning_rate": 7.169170764959074e-06, + "loss": 0.7075, "step": 7210 }, { - "epoch": 0.5456471567477583, - "grad_norm": 2.69797682762146, - "learning_rate": 8.385622891455167e-06, - "loss": 0.6507, + "epoch": 0.5076381555790215, + "grad_norm": 2.1175191402435303, + "learning_rate": 7.167575120743239e-06, + "loss": 0.7977, "step": 7211 }, { - "epoch": 0.545722825470092, - "grad_norm": 3.0235390663146973, - "learning_rate": 8.383375947366214e-06, - "loss": 0.6393, + "epoch": 0.5077085533262936, + "grad_norm": 2.422945022583008, + "learning_rate": 7.165979467815021e-06, + "loss": 0.6021, "step": 7212 }, { - "epoch": 0.5457984941924255, - "grad_norm": 2.0341522693634033, - "learning_rate": 8.381129041892252e-06, - "loss": 0.8328, + "epoch": 0.5077789510735656, + "grad_norm": 1.8297063112258911, + "learning_rate": 7.164383806257381e-06, + "loss": 0.6913, "step": 7213 }, { - "epoch": 0.5458741629147592, - "grad_norm": 1.5147240161895752, - "learning_rate": 8.378882175173996e-06, - "loss": 0.5524, + "epoch": 0.5078493488208378, + "grad_norm": 1.57808518409729, + "learning_rate": 7.162788136153277e-06, + "loss": 0.7092, "step": 7214 }, { - "epoch": 0.5459498316370928, - "grad_norm": 2.072648286819458, - "learning_rate": 8.376635347352143e-06, - "loss": 0.6394, + "epoch": 0.5079197465681098, + "grad_norm": 1.6981334686279297, + "learning_rate": 7.161192457585669e-06, + "loss": 0.6594, "step": 7215 }, { - "epoch": 0.5460255003594264, - "grad_norm": 2.308685064315796, - "learning_rate": 8.374388558567405e-06, - "loss": 0.7882, + "epoch": 0.5079901443153819, + "grad_norm": 1.7312153577804565, + "learning_rate": 7.159596770637523e-06, + "loss": 0.6934, "step": 7216 }, { - "epoch": 0.5461011690817601, - "grad_norm": 2.836092472076416, - "learning_rate": 8.372141808960474e-06, - "loss": 0.6588, + "epoch": 0.5080605420626539, + "grad_norm": 1.7815219163894653, + "learning_rate": 7.158001075391794e-06, + "loss": 0.6789, "step": 7217 }, { - "epoch": 0.5461768378040937, - "grad_norm": 2.7137436866760254, - "learning_rate": 8.369895098672053e-06, - "loss": 0.6675, + "epoch": 0.5081309398099261, + "grad_norm": 1.9189003705978394, + "learning_rate": 7.156405371931446e-06, + "loss": 0.6263, "step": 7218 }, { - "epoch": 0.5462525065264273, - "grad_norm": 1.9144799709320068, - "learning_rate": 8.367648427842842e-06, - "loss": 0.8562, + "epoch": 0.5082013375571982, + "grad_norm": 2.509727716445923, + "learning_rate": 7.154809660339442e-06, + "loss": 0.7017, "step": 7219 }, { - "epoch": 0.5463281752487609, - "grad_norm": 2.2741446495056152, - "learning_rate": 8.365401796613534e-06, - "loss": 0.6291, + "epoch": 0.5082717353044702, + "grad_norm": 2.1062777042388916, + "learning_rate": 7.153213940698742e-06, + "loss": 0.5922, "step": 7220 }, { - "epoch": 0.5464038439710945, - "grad_norm": 2.282801866531372, - "learning_rate": 8.363155205124815e-06, - "loss": 0.6835, + "epoch": 0.5083421330517424, + "grad_norm": 1.9614536762237549, + "learning_rate": 7.15161821309231e-06, + "loss": 0.7441, "step": 7221 }, { - "epoch": 0.5464795126934282, - "grad_norm": 2.0239760875701904, - "learning_rate": 8.36090865351738e-06, - "loss": 0.606, + "epoch": 0.5084125307990144, + "grad_norm": 2.0660202503204346, + "learning_rate": 7.150022477603112e-06, + "loss": 0.8596, "step": 7222 }, { - "epoch": 0.5465551814157618, - "grad_norm": 2.0465428829193115, - "learning_rate": 8.358662141931906e-06, - "loss": 0.8214, + "epoch": 0.5084829285462865, + "grad_norm": 2.2284786701202393, + "learning_rate": 7.1484267343141084e-06, + "loss": 0.6416, "step": 7223 }, { - "epoch": 0.5466308501380954, - "grad_norm": 1.8509844541549683, - "learning_rate": 8.356415670509085e-06, - "loss": 0.6769, + "epoch": 0.5085533262935586, + "grad_norm": 1.5946940183639526, + "learning_rate": 7.146830983308263e-06, + "loss": 0.5728, "step": 7224 }, { - "epoch": 0.5467065188604291, - "grad_norm": 2.0568501949310303, - "learning_rate": 8.3541692393896e-06, - "loss": 0.6617, + "epoch": 0.5086237240408307, + "grad_norm": 2.299398422241211, + "learning_rate": 7.145235224668546e-06, + "loss": 0.6643, "step": 7225 }, { - "epoch": 0.5467821875827626, - "grad_norm": 2.1013340950012207, - "learning_rate": 8.351922848714125e-06, - "loss": 0.7638, + "epoch": 0.5086941217881028, + "grad_norm": 1.8377124071121216, + "learning_rate": 7.143639458477915e-06, + "loss": 0.6262, "step": 7226 }, { - "epoch": 0.5468578563050963, - "grad_norm": 2.2861831188201904, - "learning_rate": 8.349676498623337e-06, - "loss": 0.743, + "epoch": 0.5087645195353748, + "grad_norm": 1.8305447101593018, + "learning_rate": 7.142043684819339e-06, + "loss": 0.7026, "step": 7227 }, { - "epoch": 0.5469335250274299, - "grad_norm": 2.278883457183838, - "learning_rate": 8.347430189257907e-06, - "loss": 0.6331, + "epoch": 0.508834917282647, + "grad_norm": 1.5226151943206787, + "learning_rate": 7.140447903775783e-06, + "loss": 0.7186, "step": 7228 }, { - "epoch": 0.5470091937497635, - "grad_norm": 1.7488274574279785, - "learning_rate": 8.345183920758512e-06, - "loss": 0.7192, + "epoch": 0.5089053150299191, + "grad_norm": 1.7863333225250244, + "learning_rate": 7.138852115430214e-06, + "loss": 0.7077, "step": 7229 }, { - "epoch": 0.5470848624720972, - "grad_norm": 1.8391131162643433, - "learning_rate": 8.342937693265819e-06, - "loss": 0.8068, + "epoch": 0.5089757127771911, + "grad_norm": 1.9445303678512573, + "learning_rate": 7.137256319865597e-06, + "loss": 0.7002, "step": 7230 }, { - "epoch": 0.5471605311944308, - "grad_norm": 2.0929038524627686, - "learning_rate": 8.340691506920491e-06, - "loss": 0.6009, + "epoch": 0.5090461105244632, + "grad_norm": 1.8642805814743042, + "learning_rate": 7.135660517164901e-06, + "loss": 0.6012, "step": 7231 }, { - "epoch": 0.5472361999167644, - "grad_norm": 2.610123872756958, - "learning_rate": 8.338445361863193e-06, - "loss": 0.841, + "epoch": 0.5091165082717353, + "grad_norm": 2.1673085689544678, + "learning_rate": 7.1340647074110905e-06, + "loss": 0.679, "step": 7232 }, { - "epoch": 0.547311868639098, - "grad_norm": 2.2190845012664795, - "learning_rate": 8.336199258234588e-06, - "loss": 0.7216, + "epoch": 0.5091869060190074, + "grad_norm": 1.8513344526290894, + "learning_rate": 7.132468890687133e-06, + "loss": 0.6885, "step": 7233 }, { - "epoch": 0.5473875373614316, - "grad_norm": 1.9688743352890015, - "learning_rate": 8.33395319617533e-06, - "loss": 0.63, + "epoch": 0.5092573037662795, + "grad_norm": 3.6087520122528076, + "learning_rate": 7.1308730670760005e-06, + "loss": 0.7511, "step": 7234 }, { - "epoch": 0.5474632060837653, - "grad_norm": 2.508888006210327, - "learning_rate": 8.331707175826077e-06, - "loss": 0.6036, + "epoch": 0.5093277015135516, + "grad_norm": 1.783063530921936, + "learning_rate": 7.129277236660658e-06, + "loss": 0.6081, "step": 7235 }, { - "epoch": 0.5475388748060989, - "grad_norm": 2.6636786460876465, - "learning_rate": 8.329461197327484e-06, - "loss": 0.7058, + "epoch": 0.5093980992608237, + "grad_norm": 1.844086766242981, + "learning_rate": 7.127681399524074e-06, + "loss": 0.5937, "step": 7236 }, { - "epoch": 0.5476145435284325, - "grad_norm": 2.2972769737243652, - "learning_rate": 8.3272152608202e-06, - "loss": 0.6575, + "epoch": 0.5094684970080957, + "grad_norm": 1.8665753602981567, + "learning_rate": 7.126085555749221e-06, + "loss": 0.6858, "step": 7237 }, { - "epoch": 0.5476902122507662, - "grad_norm": 2.279362440109253, - "learning_rate": 8.324969366444874e-06, - "loss": 0.6517, + "epoch": 0.5095388947553678, + "grad_norm": 1.6276103258132935, + "learning_rate": 7.124489705419063e-06, + "loss": 0.7157, "step": 7238 }, { - "epoch": 0.5477658809730998, - "grad_norm": 2.3690998554229736, - "learning_rate": 8.322723514342143e-06, - "loss": 0.6411, + "epoch": 0.5096092925026399, + "grad_norm": 1.967496633529663, + "learning_rate": 7.122893848616572e-06, + "loss": 0.8549, "step": 7239 }, { - "epoch": 0.5478415496954334, - "grad_norm": 1.700605034828186, - "learning_rate": 8.320477704652662e-06, - "loss": 0.7621, + "epoch": 0.509679690249912, + "grad_norm": 1.9417833089828491, + "learning_rate": 7.121297985424722e-06, + "loss": 0.6482, "step": 7240 }, { - "epoch": 0.547917218417767, - "grad_norm": 2.426114320755005, - "learning_rate": 8.318231937517063e-06, - "loss": 0.6546, + "epoch": 0.5097500879971841, + "grad_norm": 1.8633122444152832, + "learning_rate": 7.119702115926475e-06, + "loss": 0.773, "step": 7241 }, { - "epoch": 0.5479928871401006, - "grad_norm": 2.4113972187042236, - "learning_rate": 8.315986213075986e-06, - "loss": 0.6745, + "epoch": 0.5098204857444562, + "grad_norm": 1.5895823240280151, + "learning_rate": 7.118106240204809e-06, + "loss": 0.6198, "step": 7242 }, { - "epoch": 0.5480685558624343, - "grad_norm": 2.2647318840026855, - "learning_rate": 8.313740531470065e-06, - "loss": 0.8873, + "epoch": 0.5098908834917283, + "grad_norm": 1.6082289218902588, + "learning_rate": 7.1165103583426935e-06, + "loss": 0.7093, "step": 7243 }, { - "epoch": 0.5481442245847679, - "grad_norm": 2.242077112197876, - "learning_rate": 8.311494892839929e-06, - "loss": 0.737, + "epoch": 0.5099612812390003, + "grad_norm": 1.7809982299804688, + "learning_rate": 7.114914470423098e-06, + "loss": 0.6465, "step": 7244 }, { - "epoch": 0.5482198933071015, - "grad_norm": 3.1310784816741943, - "learning_rate": 8.30924929732621e-06, - "loss": 0.8376, + "epoch": 0.5100316789862724, + "grad_norm": 1.7603678703308105, + "learning_rate": 7.1133185765289956e-06, + "loss": 0.7207, "step": 7245 }, { - "epoch": 0.5482955620294351, - "grad_norm": 2.1693949699401855, - "learning_rate": 8.307003745069537e-06, - "loss": 0.7175, + "epoch": 0.5101020767335446, + "grad_norm": 1.9739665985107422, + "learning_rate": 7.111722676743358e-06, + "loss": 0.6414, "step": 7246 }, { - "epoch": 0.5483712307517687, - "grad_norm": 2.6031949520111084, - "learning_rate": 8.30475823621053e-06, - "loss": 0.5512, + "epoch": 0.5101724744808166, + "grad_norm": 1.5993940830230713, + "learning_rate": 7.110126771149157e-06, + "loss": 0.5985, "step": 7247 }, { - "epoch": 0.5484468994741024, - "grad_norm": 2.4191761016845703, - "learning_rate": 8.30251277088981e-06, - "loss": 0.6359, + "epoch": 0.5102428722280887, + "grad_norm": 2.09397292137146, + "learning_rate": 7.108530859829365e-06, + "loss": 0.7616, "step": 7248 }, { - "epoch": 0.548522568196436, - "grad_norm": 1.9725650548934937, - "learning_rate": 8.300267349247993e-06, - "loss": 0.7108, + "epoch": 0.5103132699753608, + "grad_norm": 1.5309827327728271, + "learning_rate": 7.1069349428669564e-06, + "loss": 0.675, "step": 7249 }, { - "epoch": 0.5485982369187696, - "grad_norm": 2.7272510528564453, - "learning_rate": 8.298021971425704e-06, - "loss": 0.6975, + "epoch": 0.5103836677226329, + "grad_norm": 1.6824051141738892, + "learning_rate": 7.1053390203449026e-06, + "loss": 0.6884, "step": 7250 }, { - "epoch": 0.5486739056411033, - "grad_norm": 2.1538898944854736, - "learning_rate": 8.295776637563546e-06, - "loss": 0.8058, + "epoch": 0.510454065469905, + "grad_norm": 1.809202790260315, + "learning_rate": 7.103743092346176e-06, + "loss": 0.6487, "step": 7251 }, { - "epoch": 0.5487495743634369, - "grad_norm": 2.162531852722168, - "learning_rate": 8.293531347802136e-06, - "loss": 0.7727, + "epoch": 0.510524463217177, + "grad_norm": 1.935417890548706, + "learning_rate": 7.102147158953754e-06, + "loss": 0.7213, "step": 7252 }, { - "epoch": 0.5488252430857705, - "grad_norm": 2.4503231048583984, - "learning_rate": 8.291286102282076e-06, - "loss": 0.7097, + "epoch": 0.5105948609644492, + "grad_norm": 1.6642048358917236, + "learning_rate": 7.100551220250609e-06, + "loss": 0.8042, "step": 7253 }, { - "epoch": 0.5489009118081041, - "grad_norm": 2.264075994491577, - "learning_rate": 8.289040901143969e-06, - "loss": 0.5951, + "epoch": 0.5106652587117212, + "grad_norm": 1.8413852453231812, + "learning_rate": 7.098955276319714e-06, + "loss": 0.6426, "step": 7254 }, { - "epoch": 0.5489765805304377, - "grad_norm": 2.111943244934082, - "learning_rate": 8.286795744528425e-06, - "loss": 0.6764, + "epoch": 0.5107356564589933, + "grad_norm": 1.8260184526443481, + "learning_rate": 7.097359327244046e-06, + "loss": 0.7916, "step": 7255 }, { - "epoch": 0.5490522492527714, - "grad_norm": 1.6367228031158447, - "learning_rate": 8.284550632576037e-06, - "loss": 0.599, + "epoch": 0.5108060542062653, + "grad_norm": 1.6348304748535156, + "learning_rate": 7.095763373106577e-06, + "loss": 0.7005, "step": 7256 }, { - "epoch": 0.549127917975105, - "grad_norm": 2.353544235229492, - "learning_rate": 8.282305565427402e-06, - "loss": 0.6703, + "epoch": 0.5108764519535375, + "grad_norm": 1.8281441926956177, + "learning_rate": 7.094167413990283e-06, + "loss": 0.6835, "step": 7257 }, { - "epoch": 0.5492035866974386, - "grad_norm": 2.2156119346618652, - "learning_rate": 8.280060543223115e-06, - "loss": 0.6398, + "epoch": 0.5109468497008096, + "grad_norm": 2.9072799682617188, + "learning_rate": 7.092571449978142e-06, + "loss": 0.713, "step": 7258 }, { - "epoch": 0.5492792554197722, - "grad_norm": 2.4467849731445312, - "learning_rate": 8.27781556610376e-06, - "loss": 0.705, + "epoch": 0.5110172474480816, + "grad_norm": 1.8686411380767822, + "learning_rate": 7.090975481153126e-06, + "loss": 0.6331, "step": 7259 }, { - "epoch": 0.5493549241421058, - "grad_norm": 1.8530616760253906, - "learning_rate": 8.275570634209936e-06, - "loss": 0.7576, + "epoch": 0.5110876451953538, + "grad_norm": 1.8354483842849731, + "learning_rate": 7.089379507598212e-06, + "loss": 0.7569, "step": 7260 }, { - "epoch": 0.5494305928644395, - "grad_norm": 2.1063315868377686, - "learning_rate": 8.273325747682223e-06, - "loss": 0.6817, + "epoch": 0.5111580429426258, + "grad_norm": 1.5587434768676758, + "learning_rate": 7.087783529396378e-06, + "loss": 0.6427, "step": 7261 }, { - "epoch": 0.5495062615867731, - "grad_norm": 3.19496488571167, - "learning_rate": 8.271080906661197e-06, - "loss": 0.7781, + "epoch": 0.5112284406898979, + "grad_norm": 1.6978719234466553, + "learning_rate": 7.086187546630598e-06, + "loss": 0.768, "step": 7262 }, { - "epoch": 0.5495819303091067, - "grad_norm": 1.902632713317871, - "learning_rate": 8.268836111287447e-06, - "loss": 0.6575, + "epoch": 0.5112988384371701, + "grad_norm": 1.8822331428527832, + "learning_rate": 7.0845915593838485e-06, + "loss": 0.6839, "step": 7263 }, { - "epoch": 0.5496575990314404, - "grad_norm": 1.7759108543395996, - "learning_rate": 8.26659136170154e-06, - "loss": 0.6641, + "epoch": 0.5113692361844421, + "grad_norm": 1.617148518562317, + "learning_rate": 7.0829955677391086e-06, + "loss": 0.6707, "step": 7264 }, { - "epoch": 0.549733267753774, - "grad_norm": 1.6828055381774902, - "learning_rate": 8.264346658044056e-06, - "loss": 0.6752, + "epoch": 0.5114396339317142, + "grad_norm": 1.7654285430908203, + "learning_rate": 7.081399571779353e-06, + "loss": 0.6298, "step": 7265 }, { - "epoch": 0.5498089364761076, - "grad_norm": 2.365657329559326, - "learning_rate": 8.262102000455565e-06, - "loss": 0.7019, + "epoch": 0.5115100316789862, + "grad_norm": 1.526501178741455, + "learning_rate": 7.0798035715875595e-06, + "loss": 0.6018, "step": 7266 }, { - "epoch": 0.5498846051984412, - "grad_norm": 1.8660743236541748, - "learning_rate": 8.259857389076632e-06, - "loss": 0.6905, + "epoch": 0.5115804294262584, + "grad_norm": 1.8355048894882202, + "learning_rate": 7.078207567246708e-06, + "loss": 0.6465, "step": 7267 }, { - "epoch": 0.5499602739207748, - "grad_norm": 2.5874767303466797, - "learning_rate": 8.257612824047825e-06, - "loss": 0.7434, + "epoch": 0.5116508271735305, + "grad_norm": 1.9714993238449097, + "learning_rate": 7.076611558839773e-06, + "loss": 0.7634, "step": 7268 }, { - "epoch": 0.5500359426431085, - "grad_norm": 1.9300464391708374, - "learning_rate": 8.255368305509703e-06, - "loss": 0.7444, + "epoch": 0.5117212249208025, + "grad_norm": 1.691249966621399, + "learning_rate": 7.0750155464497324e-06, + "loss": 0.6965, "step": 7269 }, { - "epoch": 0.5501116113654421, - "grad_norm": 2.1249427795410156, - "learning_rate": 8.253123833602823e-06, - "loss": 0.675, + "epoch": 0.5117916226680747, + "grad_norm": 1.8791855573654175, + "learning_rate": 7.073419530159568e-06, + "loss": 0.6786, "step": 7270 }, { - "epoch": 0.5501872800877757, - "grad_norm": 2.6504623889923096, - "learning_rate": 8.25087940846775e-06, - "loss": 0.6052, + "epoch": 0.5118620204153467, + "grad_norm": 1.818517804145813, + "learning_rate": 7.071823510052255e-06, + "loss": 0.7568, "step": 7271 }, { - "epoch": 0.5502629488101093, - "grad_norm": 1.6666854619979858, - "learning_rate": 8.248635030245026e-06, - "loss": 0.9488, + "epoch": 0.5119324181626188, + "grad_norm": 1.9779372215270996, + "learning_rate": 7.070227486210772e-06, + "loss": 0.661, "step": 7272 }, { - "epoch": 0.550338617532443, - "grad_norm": 1.920323133468628, - "learning_rate": 8.246390699075211e-06, - "loss": 0.7232, + "epoch": 0.5120028159098908, + "grad_norm": 1.652539849281311, + "learning_rate": 7.068631458718099e-06, + "loss": 0.6455, "step": 7273 }, { - "epoch": 0.5504142862547766, - "grad_norm": 1.9666804075241089, - "learning_rate": 8.244146415098847e-06, - "loss": 0.6699, + "epoch": 0.512073213657163, + "grad_norm": 2.004509210586548, + "learning_rate": 7.067035427657214e-06, + "loss": 0.8217, "step": 7274 }, { - "epoch": 0.5504899549771102, - "grad_norm": 2.455787181854248, - "learning_rate": 8.241902178456474e-06, - "loss": 0.7122, + "epoch": 0.5121436114044351, + "grad_norm": 2.0231258869171143, + "learning_rate": 7.065439393111096e-06, + "loss": 0.6498, "step": 7275 }, { - "epoch": 0.5505656236994438, - "grad_norm": 2.3213722705841064, - "learning_rate": 8.239657989288643e-06, - "loss": 0.8173, + "epoch": 0.5122140091517071, + "grad_norm": 1.6902931928634644, + "learning_rate": 7.063843355162725e-06, + "loss": 0.6542, "step": 7276 }, { - "epoch": 0.5506412924217775, - "grad_norm": 1.930747628211975, - "learning_rate": 8.23741384773589e-06, - "loss": 0.646, + "epoch": 0.5122844068989792, + "grad_norm": 1.927322268486023, + "learning_rate": 7.06224731389508e-06, + "loss": 0.5698, "step": 7277 }, { - "epoch": 0.5507169611441111, - "grad_norm": 2.0072009563446045, - "learning_rate": 8.235169753938745e-06, - "loss": 0.7275, + "epoch": 0.5123548046462513, + "grad_norm": 1.8542006015777588, + "learning_rate": 7.060651269391141e-06, + "loss": 0.7442, "step": 7278 }, { - "epoch": 0.5507926298664447, - "grad_norm": 2.398528814315796, - "learning_rate": 8.232925708037748e-06, - "loss": 0.6895, + "epoch": 0.5124252023935234, + "grad_norm": 2.0353829860687256, + "learning_rate": 7.05905522173389e-06, + "loss": 0.6151, "step": 7279 }, { - "epoch": 0.5508682985887783, - "grad_norm": 2.015350103378296, - "learning_rate": 8.230681710173418e-06, - "loss": 0.6356, + "epoch": 0.5124956001407955, + "grad_norm": 2.0359506607055664, + "learning_rate": 7.057459171006301e-06, + "loss": 0.7748, "step": 7280 }, { - "epoch": 0.550943967311112, - "grad_norm": 2.454782485961914, - "learning_rate": 8.22843776048629e-06, - "loss": 0.7882, + "epoch": 0.5125659978880676, + "grad_norm": 1.906287431716919, + "learning_rate": 7.055863117291358e-06, + "loss": 0.6373, "step": 7281 }, { - "epoch": 0.5510196360334456, - "grad_norm": 2.29709529876709, - "learning_rate": 8.226193859116887e-06, - "loss": 0.6594, + "epoch": 0.5126363956353397, + "grad_norm": 1.8468117713928223, + "learning_rate": 7.0542670606720435e-06, + "loss": 0.6838, "step": 7282 }, { - "epoch": 0.5510953047557792, - "grad_norm": 2.140209197998047, - "learning_rate": 8.223950006205725e-06, - "loss": 0.6665, + "epoch": 0.5127067933826117, + "grad_norm": 1.752212405204773, + "learning_rate": 7.052671001231331e-06, + "loss": 0.7092, "step": 7283 }, { - "epoch": 0.5511709734781128, - "grad_norm": 1.968939185142517, - "learning_rate": 8.221706201893326e-06, - "loss": 0.7324, + "epoch": 0.5127771911298838, + "grad_norm": 1.7946126461029053, + "learning_rate": 7.0510749390522085e-06, + "loss": 0.6601, "step": 7284 }, { - "epoch": 0.5512466422004464, - "grad_norm": 1.8887324333190918, - "learning_rate": 8.219462446320199e-06, - "loss": 0.5153, + "epoch": 0.512847588877156, + "grad_norm": 1.8366667032241821, + "learning_rate": 7.049478874217655e-06, + "loss": 0.7965, "step": 7285 }, { - "epoch": 0.5513223109227801, - "grad_norm": 2.2595646381378174, - "learning_rate": 8.21721873962686e-06, - "loss": 0.8726, + "epoch": 0.512917986624428, + "grad_norm": 1.6423064470291138, + "learning_rate": 7.047882806810647e-06, + "loss": 0.7965, "step": 7286 }, { - "epoch": 0.5513979796451137, - "grad_norm": 5.0413899421691895, - "learning_rate": 8.214975081953816e-06, - "loss": 0.7368, + "epoch": 0.5129883843717001, + "grad_norm": 1.730243444442749, + "learning_rate": 7.0462867369141696e-06, + "loss": 0.7173, "step": 7287 }, { - "epoch": 0.5514736483674473, - "grad_norm": 2.2042415142059326, - "learning_rate": 8.21273147344157e-06, - "loss": 0.6438, + "epoch": 0.5130587821189722, + "grad_norm": 1.6485155820846558, + "learning_rate": 7.044690664611203e-06, + "loss": 0.6642, "step": 7288 }, { - "epoch": 0.5515493170897809, - "grad_norm": 2.034449815750122, - "learning_rate": 8.210487914230627e-06, - "loss": 0.655, + "epoch": 0.5131291798662443, + "grad_norm": 1.6917983293533325, + "learning_rate": 7.043094589984729e-06, + "loss": 0.7616, "step": 7289 }, { - "epoch": 0.5516249858121146, - "grad_norm": 2.396224021911621, - "learning_rate": 8.208244404461479e-06, - "loss": 0.6204, + "epoch": 0.5131995776135164, + "grad_norm": 1.6673225164413452, + "learning_rate": 7.041498513117726e-06, + "loss": 0.6413, "step": 7290 }, { - "epoch": 0.5517006545344482, - "grad_norm": 2.4055793285369873, - "learning_rate": 8.206000944274634e-06, - "loss": 0.7578, + "epoch": 0.5132699753607884, + "grad_norm": 1.6064687967300415, + "learning_rate": 7.039902434093179e-06, + "loss": 0.6721, "step": 7291 }, { - "epoch": 0.5517763232567818, - "grad_norm": 2.2324295043945312, - "learning_rate": 8.203757533810575e-06, - "loss": 0.6712, + "epoch": 0.5133403731080606, + "grad_norm": 1.7484149932861328, + "learning_rate": 7.038306352994068e-06, + "loss": 0.7397, "step": 7292 }, { - "epoch": 0.5518519919791154, - "grad_norm": 2.729771614074707, - "learning_rate": 8.201514173209797e-06, - "loss": 0.7397, + "epoch": 0.5134107708553326, + "grad_norm": 1.402335524559021, + "learning_rate": 7.036710269903375e-06, + "loss": 0.5711, "step": 7293 }, { - "epoch": 0.551927660701449, - "grad_norm": 1.7570246458053589, - "learning_rate": 8.199270862612781e-06, - "loss": 0.7682, + "epoch": 0.5134811686026047, + "grad_norm": 1.6476999521255493, + "learning_rate": 7.035114184904084e-06, + "loss": 0.7219, "step": 7294 }, { - "epoch": 0.5520033294237827, - "grad_norm": 2.058879852294922, - "learning_rate": 8.197027602160013e-06, - "loss": 0.5722, + "epoch": 0.5135515663498768, + "grad_norm": 1.8207392692565918, + "learning_rate": 7.033518098079173e-06, + "loss": 0.6599, "step": 7295 }, { - "epoch": 0.5520789981461163, - "grad_norm": 2.095630645751953, - "learning_rate": 8.194784391991977e-06, - "loss": 0.7722, + "epoch": 0.5136219640971489, + "grad_norm": 1.704302430152893, + "learning_rate": 7.031922009511624e-06, + "loss": 0.7425, "step": 7296 }, { - "epoch": 0.5521546668684499, - "grad_norm": 2.408841371536255, - "learning_rate": 8.192541232249145e-06, - "loss": 0.7326, + "epoch": 0.513692361844421, + "grad_norm": 1.7492698431015015, + "learning_rate": 7.030325919284424e-06, + "loss": 0.6558, "step": 7297 }, { - "epoch": 0.5522303355907835, - "grad_norm": 2.27298641204834, - "learning_rate": 8.190298123071993e-06, - "loss": 0.7144, + "epoch": 0.513762759591693, + "grad_norm": 1.8424549102783203, + "learning_rate": 7.028729827480551e-06, + "loss": 0.6834, "step": 7298 }, { - "epoch": 0.5523060043131172, - "grad_norm": 2.675877809524536, - "learning_rate": 8.188055064600991e-06, - "loss": 0.874, + "epoch": 0.5138331573389652, + "grad_norm": 1.8522762060165405, + "learning_rate": 7.027133734182986e-06, + "loss": 0.6569, "step": 7299 }, { - "epoch": 0.5523816730354508, - "grad_norm": 2.062973976135254, - "learning_rate": 8.185812056976605e-06, - "loss": 0.6534, + "epoch": 0.5139035550862372, + "grad_norm": 2.1853513717651367, + "learning_rate": 7.025537639474717e-06, + "loss": 0.7542, "step": 7300 }, { - "epoch": 0.5524573417577844, - "grad_norm": 2.2476651668548584, - "learning_rate": 8.183569100339305e-06, - "loss": 0.8483, + "epoch": 0.5139739528335093, + "grad_norm": 1.7360774278640747, + "learning_rate": 7.023941543438721e-06, + "loss": 0.6347, "step": 7301 }, { - "epoch": 0.552533010480118, - "grad_norm": 2.406301259994507, - "learning_rate": 8.181326194829548e-06, - "loss": 0.8172, + "epoch": 0.5140443505807815, + "grad_norm": 1.5875208377838135, + "learning_rate": 7.022345446157983e-06, + "loss": 0.7857, "step": 7302 }, { - "epoch": 0.5526086792024517, - "grad_norm": 2.0138354301452637, - "learning_rate": 8.179083340587794e-06, - "loss": 0.6932, + "epoch": 0.5141147483280535, + "grad_norm": 1.9831496477127075, + "learning_rate": 7.020749347715486e-06, + "loss": 0.5826, "step": 7303 }, { - "epoch": 0.5526843479247853, - "grad_norm": 1.9023960828781128, - "learning_rate": 8.176840537754493e-06, - "loss": 0.643, + "epoch": 0.5141851460753256, + "grad_norm": 1.7789360284805298, + "learning_rate": 7.01915324819421e-06, + "loss": 0.6163, "step": 7304 }, { - "epoch": 0.5527600166471189, - "grad_norm": 2.067840576171875, - "learning_rate": 8.1745977864701e-06, - "loss": 0.8828, + "epoch": 0.5142555438225976, + "grad_norm": 1.6004186868667603, + "learning_rate": 7.017557147677139e-06, + "loss": 0.6778, "step": 7305 }, { - "epoch": 0.5528356853694525, - "grad_norm": 2.148322105407715, - "learning_rate": 8.172355086875064e-06, - "loss": 0.7668, + "epoch": 0.5143259415698698, + "grad_norm": 1.6345031261444092, + "learning_rate": 7.015961046247258e-06, + "loss": 0.7549, "step": 7306 }, { - "epoch": 0.5529113540917862, - "grad_norm": 2.0931055545806885, - "learning_rate": 8.17011243910983e-06, - "loss": 0.6585, + "epoch": 0.5143963393171419, + "grad_norm": 2.5310299396514893, + "learning_rate": 7.014364943987546e-06, + "loss": 0.6744, "step": 7307 }, { - "epoch": 0.5529870228141198, - "grad_norm": 1.9692755937576294, - "learning_rate": 8.167869843314839e-06, - "loss": 0.7746, + "epoch": 0.5144667370644139, + "grad_norm": 1.7102280855178833, + "learning_rate": 7.012768840980988e-06, + "loss": 0.6961, "step": 7308 }, { - "epoch": 0.5530626915364534, - "grad_norm": 2.0854835510253906, - "learning_rate": 8.165627299630532e-06, - "loss": 0.7212, + "epoch": 0.5145371348116861, + "grad_norm": 1.6429734230041504, + "learning_rate": 7.011172737310567e-06, + "loss": 0.6519, "step": 7309 }, { - "epoch": 0.553138360258787, - "grad_norm": 2.4114372730255127, - "learning_rate": 8.163384808197339e-06, - "loss": 0.7573, + "epoch": 0.5146075325589581, + "grad_norm": 1.5758413076400757, + "learning_rate": 7.009576633059265e-06, + "loss": 0.746, "step": 7310 }, { - "epoch": 0.5532140289811207, - "grad_norm": 2.3823318481445312, - "learning_rate": 8.161142369155693e-06, - "loss": 0.6895, + "epoch": 0.5146779303062302, + "grad_norm": 1.8911726474761963, + "learning_rate": 7.007980528310064e-06, + "loss": 0.6699, "step": 7311 }, { - "epoch": 0.5532896977034543, - "grad_norm": 2.2626543045043945, - "learning_rate": 8.158899982646032e-06, - "loss": 0.7496, + "epoch": 0.5147483280535022, + "grad_norm": 1.9318925142288208, + "learning_rate": 7.00638442314595e-06, + "loss": 0.6744, "step": 7312 }, { - "epoch": 0.5533653664257879, - "grad_norm": 2.219508647918701, - "learning_rate": 8.156657648808769e-06, - "loss": 0.6999, + "epoch": 0.5148187258007744, + "grad_norm": 1.5567445755004883, + "learning_rate": 7.004788317649902e-06, + "loss": 0.6954, "step": 7313 }, { - "epoch": 0.5534410351481215, - "grad_norm": 2.2591121196746826, - "learning_rate": 8.154415367784335e-06, - "loss": 0.7347, + "epoch": 0.5148891235480465, + "grad_norm": 1.8238369226455688, + "learning_rate": 7.003192211904906e-06, + "loss": 0.7105, "step": 7314 }, { - "epoch": 0.5535167038704552, - "grad_norm": 2.205310344696045, - "learning_rate": 8.152173139713146e-06, - "loss": 0.7123, + "epoch": 0.5149595212953185, + "grad_norm": 1.992449402809143, + "learning_rate": 7.0015961059939455e-06, + "loss": 0.6203, "step": 7315 }, { - "epoch": 0.5535923725927888, - "grad_norm": 2.087782382965088, - "learning_rate": 8.149930964735612e-06, - "loss": 0.6367, + "epoch": 0.5150299190425907, + "grad_norm": 1.6632627248764038, + "learning_rate": 7e-06, + "loss": 0.6056, "step": 7316 }, { - "epoch": 0.5536680413151224, - "grad_norm": 2.4498722553253174, - "learning_rate": 8.147688842992155e-06, - "loss": 0.614, + "epoch": 0.5151003167898627, + "grad_norm": 1.8979486227035522, + "learning_rate": 6.998403894006055e-06, + "loss": 0.7659, "step": 7317 }, { - "epoch": 0.553743710037456, - "grad_norm": 3.0710363388061523, - "learning_rate": 8.14544677462318e-06, - "loss": 0.7032, + "epoch": 0.5151707145371348, + "grad_norm": 2.084355115890503, + "learning_rate": 6.996807788095094e-06, + "loss": 0.7256, "step": 7318 }, { - "epoch": 0.5538193787597896, - "grad_norm": 2.3299739360809326, - "learning_rate": 8.14320475976909e-06, - "loss": 0.854, + "epoch": 0.5152411122844069, + "grad_norm": 1.966103434562683, + "learning_rate": 6.995211682350097e-06, + "loss": 0.8025, "step": 7319 }, { - "epoch": 0.5538950474821233, - "grad_norm": 1.8151921033859253, - "learning_rate": 8.140962798570289e-06, - "loss": 0.5846, + "epoch": 0.515311510031679, + "grad_norm": 1.6654789447784424, + "learning_rate": 6.993615576854051e-06, + "loss": 0.7056, "step": 7320 }, { - "epoch": 0.5539707162044569, - "grad_norm": 2.084280014038086, - "learning_rate": 8.138720891167174e-06, - "loss": 0.713, + "epoch": 0.5153819077789511, + "grad_norm": 2.279421329498291, + "learning_rate": 6.992019471689935e-06, + "loss": 0.6904, "step": 7321 }, { - "epoch": 0.5540463849267905, - "grad_norm": 2.037383794784546, - "learning_rate": 8.136479037700146e-06, - "loss": 0.6499, + "epoch": 0.5154523055262231, + "grad_norm": 1.6446737051010132, + "learning_rate": 6.990423366940736e-06, + "loss": 0.6165, "step": 7322 }, { - "epoch": 0.5541220536491241, - "grad_norm": 2.4484384059906006, - "learning_rate": 8.134237238309593e-06, - "loss": 0.7387, + "epoch": 0.5155227032734953, + "grad_norm": 1.7655209302902222, + "learning_rate": 6.988827262689433e-06, + "loss": 0.7172, "step": 7323 }, { - "epoch": 0.5541977223714578, - "grad_norm": 2.2470922470092773, - "learning_rate": 8.131995493135903e-06, - "loss": 0.7522, + "epoch": 0.5155931010207674, + "grad_norm": 1.8152681589126587, + "learning_rate": 6.987231159019013e-06, + "loss": 0.7333, "step": 7324 }, { - "epoch": 0.5542733910937914, - "grad_norm": 2.310148239135742, - "learning_rate": 8.129753802319467e-06, - "loss": 0.7125, + "epoch": 0.5156634987680394, + "grad_norm": 1.7429927587509155, + "learning_rate": 6.985635056012453e-06, + "loss": 0.6813, "step": 7325 }, { - "epoch": 0.554349059816125, - "grad_norm": 1.8426295518875122, - "learning_rate": 8.127512166000656e-06, - "loss": 0.8074, + "epoch": 0.5157338965153115, + "grad_norm": 1.5805948972702026, + "learning_rate": 6.984038953752743e-06, + "loss": 0.6533, "step": 7326 }, { - "epoch": 0.5544247285384586, - "grad_norm": 1.8981022834777832, - "learning_rate": 8.125270584319857e-06, - "loss": 0.5748, + "epoch": 0.5158042942625836, + "grad_norm": 2.1669249534606934, + "learning_rate": 6.9824428523228604e-06, + "loss": 0.7525, "step": 7327 }, { - "epoch": 0.5545003972607923, - "grad_norm": 2.154419183731079, - "learning_rate": 8.123029057417446e-06, - "loss": 0.7373, + "epoch": 0.5158746920098557, + "grad_norm": 1.6395437717437744, + "learning_rate": 6.980846751805791e-06, + "loss": 0.7248, "step": 7328 }, { - "epoch": 0.5545760659831259, - "grad_norm": 1.6180402040481567, - "learning_rate": 8.12078758543379e-06, - "loss": 0.6536, + "epoch": 0.5159450897571277, + "grad_norm": 1.4447835683822632, + "learning_rate": 6.979250652284514e-06, + "loss": 0.7433, "step": 7329 }, { - "epoch": 0.5546517347054595, - "grad_norm": 2.104201555252075, - "learning_rate": 8.11854616850926e-06, - "loss": 0.7068, + "epoch": 0.5160154875043999, + "grad_norm": 1.703834056854248, + "learning_rate": 6.9776545538420176e-06, + "loss": 0.782, "step": 7330 }, { - "epoch": 0.5547274034277931, - "grad_norm": 1.8646724224090576, - "learning_rate": 8.116304806784218e-06, - "loss": 0.5606, + "epoch": 0.516085885251672, + "grad_norm": 1.8685457706451416, + "learning_rate": 6.976058456561278e-06, + "loss": 0.73, "step": 7331 }, { - "epoch": 0.5548030721501267, - "grad_norm": 2.118800163269043, - "learning_rate": 8.11406350039903e-06, - "loss": 0.7093, + "epoch": 0.516156282998944, + "grad_norm": 1.6005162000656128, + "learning_rate": 6.974462360525283e-06, + "loss": 0.8243, "step": 7332 }, { - "epoch": 0.5548787408724604, - "grad_norm": 2.5664494037628174, - "learning_rate": 8.11182224949405e-06, - "loss": 0.7353, + "epoch": 0.5162266807462161, + "grad_norm": 1.957362413406372, + "learning_rate": 6.972866265817012e-06, + "loss": 0.7151, "step": 7333 }, { - "epoch": 0.554954409594794, - "grad_norm": 2.0426931381225586, - "learning_rate": 8.109581054209633e-06, - "loss": 0.6415, + "epoch": 0.5162970784934882, + "grad_norm": 2.100578546524048, + "learning_rate": 6.971270172519451e-06, + "loss": 0.6194, "step": 7334 }, { - "epoch": 0.5550300783171276, - "grad_norm": 1.8999990224838257, - "learning_rate": 8.10733991468613e-06, - "loss": 0.6621, + "epoch": 0.5163674762407603, + "grad_norm": 1.6723664999008179, + "learning_rate": 6.9696740807155755e-06, + "loss": 0.7407, "step": 7335 }, { - "epoch": 0.5551057470394613, - "grad_norm": 2.4535303115844727, - "learning_rate": 8.105098831063887e-06, - "loss": 0.6392, + "epoch": 0.5164378739880324, + "grad_norm": 1.6643610000610352, + "learning_rate": 6.9680779904883766e-06, + "loss": 0.542, "step": 7336 }, { - "epoch": 0.5551814157617949, - "grad_norm": 1.9093875885009766, - "learning_rate": 8.102857803483254e-06, - "loss": 0.6715, + "epoch": 0.5165082717353044, + "grad_norm": 2.090097427368164, + "learning_rate": 6.966481901920827e-06, + "loss": 0.6134, "step": 7337 }, { - "epoch": 0.5552570844841285, - "grad_norm": 1.9944705963134766, - "learning_rate": 8.100616832084564e-06, - "loss": 0.6541, + "epoch": 0.5165786694825766, + "grad_norm": 1.747299313545227, + "learning_rate": 6.964885815095916e-06, + "loss": 0.6479, "step": 7338 }, { - "epoch": 0.5553327532064621, - "grad_norm": 1.7408812046051025, - "learning_rate": 8.09837591700816e-06, - "loss": 0.7232, + "epoch": 0.5166490672298486, + "grad_norm": 2.007554292678833, + "learning_rate": 6.963289730096623e-06, + "loss": 0.8183, "step": 7339 }, { - "epoch": 0.5554084219287957, - "grad_norm": 2.030344247817993, - "learning_rate": 8.09613505839437e-06, - "loss": 0.7087, + "epoch": 0.5167194649771207, + "grad_norm": 1.6758862733840942, + "learning_rate": 6.961693647005933e-06, + "loss": 0.8854, "step": 7340 }, { - "epoch": 0.5554840906511294, - "grad_norm": 1.8891116380691528, - "learning_rate": 8.093894256383525e-06, - "loss": 0.737, + "epoch": 0.5167898627243929, + "grad_norm": 1.662482738494873, + "learning_rate": 6.960097565906819e-06, + "loss": 0.6356, "step": 7341 }, { - "epoch": 0.555559759373463, - "grad_norm": 1.9823884963989258, - "learning_rate": 8.091653511115954e-06, - "loss": 0.7039, + "epoch": 0.5168602604716649, + "grad_norm": 2.334535837173462, + "learning_rate": 6.9585014868822755e-06, + "loss": 0.7918, "step": 7342 }, { - "epoch": 0.5556354280957966, - "grad_norm": 2.3397977352142334, - "learning_rate": 8.089412822731979e-06, - "loss": 0.7968, + "epoch": 0.516930658218937, + "grad_norm": 2.5033926963806152, + "learning_rate": 6.956905410015272e-06, + "loss": 0.711, "step": 7343 }, { - "epoch": 0.5557110968181302, - "grad_norm": 1.816161870956421, - "learning_rate": 8.087172191371917e-06, - "loss": 0.6302, + "epoch": 0.517001055966209, + "grad_norm": 1.9171693325042725, + "learning_rate": 6.955309335388799e-06, + "loss": 0.7706, "step": 7344 }, { - "epoch": 0.5557867655404638, - "grad_norm": 2.0150673389434814, - "learning_rate": 8.084931617176084e-06, - "loss": 0.5506, + "epoch": 0.5170714537134812, + "grad_norm": 1.5852034091949463, + "learning_rate": 6.953713263085831e-06, + "loss": 0.6673, "step": 7345 }, { - "epoch": 0.5558624342627975, - "grad_norm": 2.0042967796325684, - "learning_rate": 8.082691100284796e-06, - "loss": 0.6103, + "epoch": 0.5171418514607533, + "grad_norm": 1.5376867055892944, + "learning_rate": 6.952117193189355e-06, + "loss": 0.5406, "step": 7346 }, { - "epoch": 0.5559381029851311, - "grad_norm": 1.9429584741592407, - "learning_rate": 8.080450640838353e-06, - "loss": 0.8005, + "epoch": 0.5172122492080253, + "grad_norm": 1.836351990699768, + "learning_rate": 6.950521125782346e-06, + "loss": 0.7411, "step": 7347 }, { - "epoch": 0.5560137717074647, - "grad_norm": 1.7641698122024536, - "learning_rate": 8.078210238977067e-06, - "loss": 0.6253, + "epoch": 0.5172826469552975, + "grad_norm": 1.7956129312515259, + "learning_rate": 6.948925060947793e-06, + "loss": 0.758, "step": 7348 }, { - "epoch": 0.5560894404297984, - "grad_norm": 3.233842372894287, - "learning_rate": 8.075969894841239e-06, - "loss": 0.6349, + "epoch": 0.5173530447025695, + "grad_norm": 1.520755648612976, + "learning_rate": 6.947328998768669e-06, + "loss": 0.7097, "step": 7349 }, { - "epoch": 0.556165109152132, - "grad_norm": 1.9746023416519165, - "learning_rate": 8.073729608571166e-06, - "loss": 0.6736, + "epoch": 0.5174234424498416, + "grad_norm": 2.1565463542938232, + "learning_rate": 6.94573293932796e-06, + "loss": 0.6968, "step": 7350 }, { - "epoch": 0.5562407778744656, - "grad_norm": 2.0112850666046143, - "learning_rate": 8.071489380307138e-06, - "loss": 0.7653, + "epoch": 0.5174938401971136, + "grad_norm": 1.6890201568603516, + "learning_rate": 6.944136882708643e-06, + "loss": 0.713, "step": 7351 }, { - "epoch": 0.5563164465967992, - "grad_norm": 2.242575168609619, - "learning_rate": 8.069249210189447e-06, - "loss": 0.741, + "epoch": 0.5175642379443858, + "grad_norm": 1.9673614501953125, + "learning_rate": 6.942540828993702e-06, + "loss": 0.7057, "step": 7352 }, { - "epoch": 0.5563921153191328, - "grad_norm": 1.929771065711975, - "learning_rate": 8.067009098358384e-06, - "loss": 0.6086, + "epoch": 0.5176346356916579, + "grad_norm": 1.8981658220291138, + "learning_rate": 6.940944778266112e-06, + "loss": 0.7321, "step": 7353 }, { - "epoch": 0.5564677840414665, - "grad_norm": 2.020859718322754, - "learning_rate": 8.064769044954229e-06, - "loss": 0.7699, + "epoch": 0.5177050334389299, + "grad_norm": 1.7329944372177124, + "learning_rate": 6.939348730608861e-06, + "loss": 0.6226, "step": 7354 }, { - "epoch": 0.5565434527638001, - "grad_norm": 2.042544364929199, - "learning_rate": 8.06252905011726e-06, - "loss": 0.6731, + "epoch": 0.5177754311862021, + "grad_norm": 1.8583217859268188, + "learning_rate": 6.937752686104921e-06, + "loss": 0.5958, "step": 7355 }, { - "epoch": 0.5566191214861337, - "grad_norm": 2.6606943607330322, - "learning_rate": 8.060289113987754e-06, - "loss": 0.6402, + "epoch": 0.5178458289334741, + "grad_norm": 1.7113839387893677, + "learning_rate": 6.9361566448372764e-06, + "loss": 0.9241, "step": 7356 }, { - "epoch": 0.5566947902084673, - "grad_norm": 1.789626955986023, - "learning_rate": 8.058049236705982e-06, - "loss": 0.8114, + "epoch": 0.5179162266807462, + "grad_norm": 2.87446928024292, + "learning_rate": 6.934560606888905e-06, + "loss": 0.6057, "step": 7357 }, { - "epoch": 0.5567704589308009, - "grad_norm": 1.9699000120162964, - "learning_rate": 8.055809418412215e-06, - "loss": 0.7457, + "epoch": 0.5179866244280183, + "grad_norm": 1.8411356210708618, + "learning_rate": 6.9329645723427885e-06, + "loss": 0.699, "step": 7358 }, { - "epoch": 0.5568461276531346, - "grad_norm": 2.0746593475341797, - "learning_rate": 8.053569659246716e-06, - "loss": 0.855, + "epoch": 0.5180570221752904, + "grad_norm": 1.8761615753173828, + "learning_rate": 6.931368541281902e-06, + "loss": 0.7268, "step": 7359 }, { - "epoch": 0.5569217963754682, - "grad_norm": 2.2033884525299072, - "learning_rate": 8.051329959349748e-06, - "loss": 0.6439, + "epoch": 0.5181274199225625, + "grad_norm": 1.9560787677764893, + "learning_rate": 6.9297725137892296e-06, + "loss": 0.5878, "step": 7360 }, { - "epoch": 0.5569974650978018, - "grad_norm": 1.9378942251205444, - "learning_rate": 8.049090318861563e-06, - "loss": 0.7654, + "epoch": 0.5181978176698345, + "grad_norm": 1.633263349533081, + "learning_rate": 6.928176489947746e-06, + "loss": 0.701, "step": 7361 }, { - "epoch": 0.5570731338201355, - "grad_norm": 2.35151743888855, - "learning_rate": 8.046850737922418e-06, - "loss": 0.7722, + "epoch": 0.5182682154171067, + "grad_norm": 1.976098895072937, + "learning_rate": 6.926580469840434e-06, + "loss": 0.7725, "step": 7362 }, { - "epoch": 0.5571488025424691, - "grad_norm": 2.5247981548309326, - "learning_rate": 8.044611216672562e-06, - "loss": 0.6724, + "epoch": 0.5183386131643788, + "grad_norm": 1.7193260192871094, + "learning_rate": 6.924984453550267e-06, + "loss": 0.6686, "step": 7363 }, { - "epoch": 0.5572244712648027, - "grad_norm": 2.331882953643799, - "learning_rate": 8.042371755252245e-06, - "loss": 0.6965, + "epoch": 0.5184090109116508, + "grad_norm": 1.8226069211959839, + "learning_rate": 6.923388441160229e-06, + "loss": 0.5811, "step": 7364 }, { - "epoch": 0.5573001399871363, - "grad_norm": 2.167255401611328, - "learning_rate": 8.040132353801705e-06, - "loss": 0.7462, + "epoch": 0.518479408658923, + "grad_norm": 2.4959938526153564, + "learning_rate": 6.921792432753293e-06, + "loss": 0.8177, "step": 7365 }, { - "epoch": 0.5573758087094699, - "grad_norm": 2.5408060550689697, - "learning_rate": 8.037893012461182e-06, - "loss": 0.6558, + "epoch": 0.518549806406195, + "grad_norm": 1.5298210382461548, + "learning_rate": 6.920196428412441e-06, + "loss": 0.6795, "step": 7366 }, { - "epoch": 0.5574514774318036, - "grad_norm": 1.810550332069397, - "learning_rate": 8.035653731370906e-06, - "loss": 0.6378, + "epoch": 0.5186202041534671, + "grad_norm": 1.754594326019287, + "learning_rate": 6.918600428220647e-06, + "loss": 0.6038, "step": 7367 }, { - "epoch": 0.5575271461541372, - "grad_norm": 2.1500096321105957, - "learning_rate": 8.033414510671117e-06, - "loss": 0.6658, + "epoch": 0.5186906019007391, + "grad_norm": 1.7879782915115356, + "learning_rate": 6.917004432260892e-06, + "loss": 0.6986, "step": 7368 }, { - "epoch": 0.5576028148764708, - "grad_norm": 2.4346792697906494, - "learning_rate": 8.031175350502037e-06, - "loss": 0.6172, + "epoch": 0.5187609996480113, + "grad_norm": 1.9729777574539185, + "learning_rate": 6.915408440616152e-06, + "loss": 0.7928, "step": 7369 }, { - "epoch": 0.5576784835988045, - "grad_norm": 2.380704641342163, - "learning_rate": 8.02893625100389e-06, - "loss": 0.6924, + "epoch": 0.5188313973952834, + "grad_norm": 2.0853335857391357, + "learning_rate": 6.913812453369401e-06, + "loss": 0.7744, "step": 7370 }, { - "epoch": 0.557754152321138, - "grad_norm": 1.7149864435195923, - "learning_rate": 8.026697212316896e-06, - "loss": 0.7118, + "epoch": 0.5189017951425554, + "grad_norm": 2.0244839191436768, + "learning_rate": 6.912216470603622e-06, + "loss": 0.6515, "step": 7371 }, { - "epoch": 0.5578298210434717, - "grad_norm": 1.924574851989746, - "learning_rate": 8.02445823458127e-06, - "loss": 0.6955, + "epoch": 0.5189721928898275, + "grad_norm": 1.6482385396957397, + "learning_rate": 6.9106204924017864e-06, + "loss": 0.6923, "step": 7372 }, { - "epoch": 0.5579054897658053, - "grad_norm": 2.193007707595825, - "learning_rate": 8.022219317937223e-06, - "loss": 0.6082, + "epoch": 0.5190425906370996, + "grad_norm": 1.9071153402328491, + "learning_rate": 6.909024518846874e-06, + "loss": 0.6517, "step": 7373 }, { - "epoch": 0.5579811584881389, - "grad_norm": 1.9797512292861938, - "learning_rate": 8.019980462524968e-06, - "loss": 0.84, + "epoch": 0.5191129883843717, + "grad_norm": 1.7285280227661133, + "learning_rate": 6.907428550021859e-06, + "loss": 0.584, "step": 7374 }, { - "epoch": 0.5580568272104726, - "grad_norm": 2.113743543624878, - "learning_rate": 8.017741668484704e-06, - "loss": 0.8963, + "epoch": 0.5191833861316438, + "grad_norm": 1.664016842842102, + "learning_rate": 6.9058325860097174e-06, + "loss": 0.6509, "step": 7375 }, { - "epoch": 0.5581324959328062, - "grad_norm": 2.2391629219055176, - "learning_rate": 8.015502935956636e-06, - "loss": 0.7656, + "epoch": 0.5192537838789159, + "grad_norm": 1.863820195198059, + "learning_rate": 6.904236626893422e-06, + "loss": 0.5907, "step": 7376 }, { - "epoch": 0.5582081646551398, - "grad_norm": 2.6514220237731934, - "learning_rate": 8.013264265080955e-06, - "loss": 0.673, + "epoch": 0.519324181626188, + "grad_norm": 1.9558964967727661, + "learning_rate": 6.902640672755955e-06, + "loss": 0.641, "step": 7377 }, { - "epoch": 0.5582838333774734, - "grad_norm": 2.256282091140747, - "learning_rate": 8.011025655997858e-06, - "loss": 0.7703, + "epoch": 0.51939457937346, + "grad_norm": 2.214848279953003, + "learning_rate": 6.9010447236802846e-06, + "loss": 0.6435, "step": 7378 }, { - "epoch": 0.558359502099807, - "grad_norm": 2.2570066452026367, - "learning_rate": 8.008787108847533e-06, - "loss": 0.7589, + "epoch": 0.5194649771207321, + "grad_norm": 2.115663528442383, + "learning_rate": 6.899448779749392e-06, + "loss": 0.6746, "step": 7379 }, { - "epoch": 0.5584351708221407, - "grad_norm": 2.5450544357299805, - "learning_rate": 8.006548623770168e-06, - "loss": 0.6944, + "epoch": 0.5195353748680043, + "grad_norm": 1.6854127645492554, + "learning_rate": 6.8978528410462454e-06, + "loss": 0.8011, "step": 7380 }, { - "epoch": 0.5585108395444743, - "grad_norm": 2.3593268394470215, - "learning_rate": 8.00431020090594e-06, - "loss": 0.6387, + "epoch": 0.5196057726152763, + "grad_norm": 2.227003335952759, + "learning_rate": 6.896256907653824e-06, + "loss": 0.722, "step": 7381 }, { - "epoch": 0.5585865082668079, - "grad_norm": 1.9944931268692017, - "learning_rate": 8.002071840395026e-06, - "loss": 0.6667, + "epoch": 0.5196761703625484, + "grad_norm": 1.6735475063323975, + "learning_rate": 6.894660979655097e-06, + "loss": 0.6599, "step": 7382 }, { - "epoch": 0.5586621769891416, - "grad_norm": 1.908489465713501, - "learning_rate": 7.999833542377605e-06, - "loss": 0.635, + "epoch": 0.5197465681098205, + "grad_norm": 1.999244213104248, + "learning_rate": 6.893065057133044e-06, + "loss": 0.6718, "step": 7383 }, { - "epoch": 0.5587378457114751, - "grad_norm": 2.8577051162719727, - "learning_rate": 7.997595306993838e-06, - "loss": 0.6809, + "epoch": 0.5198169658570926, + "grad_norm": 1.7448939085006714, + "learning_rate": 6.891469140170635e-06, + "loss": 0.7927, "step": 7384 }, { - "epoch": 0.5588135144338088, - "grad_norm": 2.8183300495147705, - "learning_rate": 7.995357134383898e-06, - "loss": 0.7129, + "epoch": 0.5198873636043646, + "grad_norm": 2.0554862022399902, + "learning_rate": 6.8898732288508435e-06, + "loss": 0.7326, "step": 7385 }, { - "epoch": 0.5588891831561424, - "grad_norm": 2.3807883262634277, - "learning_rate": 7.993119024687943e-06, - "loss": 0.6761, + "epoch": 0.5199577613516367, + "grad_norm": 2.0807201862335205, + "learning_rate": 6.888277323256641e-06, + "loss": 0.6573, "step": 7386 }, { - "epoch": 0.558964851878476, - "grad_norm": 1.9848699569702148, - "learning_rate": 7.990880978046132e-06, - "loss": 0.5648, + "epoch": 0.5200281590989089, + "grad_norm": 1.9615312814712524, + "learning_rate": 6.886681423471005e-06, + "loss": 0.7129, "step": 7387 }, { - "epoch": 0.5590405206008097, - "grad_norm": 1.9537297487258911, - "learning_rate": 7.988642994598616e-06, - "loss": 0.6346, + "epoch": 0.5200985568461809, + "grad_norm": 2.084454298019409, + "learning_rate": 6.8850855295769004e-06, + "loss": 0.7203, "step": 7388 }, { - "epoch": 0.5591161893231433, - "grad_norm": 2.2345707416534424, - "learning_rate": 7.986405074485547e-06, - "loss": 0.6992, + "epoch": 0.520168954593453, + "grad_norm": 1.6810779571533203, + "learning_rate": 6.883489641657307e-06, + "loss": 0.6785, "step": 7389 }, { - "epoch": 0.5591918580454769, - "grad_norm": 1.7011851072311401, - "learning_rate": 7.984167217847072e-06, - "loss": 0.6245, + "epoch": 0.520239352340725, + "grad_norm": 1.5935664176940918, + "learning_rate": 6.88189375979519e-06, + "loss": 0.686, "step": 7390 }, { - "epoch": 0.5592675267678106, - "grad_norm": 1.5342859029769897, - "learning_rate": 7.98192942482333e-06, - "loss": 0.71, + "epoch": 0.5203097500879972, + "grad_norm": 1.8562833070755005, + "learning_rate": 6.880297884073525e-06, + "loss": 0.6922, "step": 7391 }, { - "epoch": 0.5593431954901441, - "grad_norm": 1.9042041301727295, - "learning_rate": 7.979691695554464e-06, - "loss": 0.8046, + "epoch": 0.5203801478352693, + "grad_norm": 1.786625623703003, + "learning_rate": 6.878702014575279e-06, + "loss": 0.7997, "step": 7392 }, { - "epoch": 0.5594188642124778, - "grad_norm": 4.508663654327393, - "learning_rate": 7.977454030180597e-06, - "loss": 0.661, + "epoch": 0.5204505455825413, + "grad_norm": 2.3165080547332764, + "learning_rate": 6.87710615138343e-06, + "loss": 0.5694, "step": 7393 }, { - "epoch": 0.5594945329348114, - "grad_norm": 2.309535503387451, - "learning_rate": 7.975216428841871e-06, - "loss": 0.7014, + "epoch": 0.5205209433298135, + "grad_norm": 1.744941234588623, + "learning_rate": 6.875510294580938e-06, + "loss": 0.6803, "step": 7394 }, { - "epoch": 0.559570201657145, - "grad_norm": 2.226212739944458, - "learning_rate": 7.972978891678407e-06, - "loss": 0.6736, + "epoch": 0.5205913410770855, + "grad_norm": 1.7387410402297974, + "learning_rate": 6.873914444250782e-06, + "loss": 0.7212, "step": 7395 }, { - "epoch": 0.5596458703794787, - "grad_norm": 2.99662446975708, - "learning_rate": 7.970741418830327e-06, - "loss": 0.7326, + "epoch": 0.5206617388243576, + "grad_norm": 2.2411463260650635, + "learning_rate": 6.872318600475927e-06, + "loss": 0.6934, "step": 7396 }, { - "epoch": 0.5597215391018122, - "grad_norm": 2.0158193111419678, - "learning_rate": 7.968504010437746e-06, - "loss": 0.7136, + "epoch": 0.5207321365716298, + "grad_norm": 2.1251368522644043, + "learning_rate": 6.870722763339345e-06, + "loss": 0.7357, "step": 7397 }, { - "epoch": 0.5597972078241459, - "grad_norm": 2.631486654281616, - "learning_rate": 7.966266666640778e-06, - "loss": 0.7801, + "epoch": 0.5208025343189018, + "grad_norm": 2.3309500217437744, + "learning_rate": 6.869126932924e-06, + "loss": 0.7433, "step": 7398 }, { - "epoch": 0.5598728765464795, - "grad_norm": 2.236232280731201, - "learning_rate": 7.96402938757954e-06, - "loss": 0.7771, + "epoch": 0.5208729320661739, + "grad_norm": 1.9608570337295532, + "learning_rate": 6.8675311093128685e-06, + "loss": 0.6985, "step": 7399 }, { - "epoch": 0.5599485452688131, - "grad_norm": 1.8148475885391235, - "learning_rate": 7.96179217339413e-06, - "loss": 0.6843, + "epoch": 0.5209433298134459, + "grad_norm": 1.7868221998214722, + "learning_rate": 6.865935292588911e-06, + "loss": 0.7534, "step": 7400 }, { - "epoch": 0.5600242139911468, - "grad_norm": 2.4410221576690674, - "learning_rate": 7.959555024224654e-06, - "loss": 0.606, + "epoch": 0.5210137275607181, + "grad_norm": 1.71648108959198, + "learning_rate": 6.864339482835102e-06, + "loss": 0.6437, "step": 7401 }, { - "epoch": 0.5600998827134804, - "grad_norm": 1.8520560264587402, - "learning_rate": 7.957317940211205e-06, - "loss": 0.6385, + "epoch": 0.5210841253079902, + "grad_norm": 1.7830232381820679, + "learning_rate": 6.862743680134404e-06, + "loss": 0.6954, "step": 7402 }, { - "epoch": 0.560175551435814, - "grad_norm": 3.167067050933838, - "learning_rate": 7.955080921493879e-06, - "loss": 0.7738, + "epoch": 0.5211545230552622, + "grad_norm": 2.0513956546783447, + "learning_rate": 6.861147884569789e-06, + "loss": 0.7202, "step": 7403 }, { - "epoch": 0.5602512201581477, - "grad_norm": 1.9839564561843872, - "learning_rate": 7.952843968212768e-06, - "loss": 0.8493, + "epoch": 0.5212249208025344, + "grad_norm": 1.784148931503296, + "learning_rate": 6.859552096224217e-06, + "loss": 0.6815, "step": 7404 }, { - "epoch": 0.5603268888804812, - "grad_norm": 3.0600149631500244, - "learning_rate": 7.950607080507951e-06, - "loss": 0.6369, + "epoch": 0.5212953185498064, + "grad_norm": 1.6463571786880493, + "learning_rate": 6.857956315180664e-06, + "loss": 0.664, "step": 7405 }, { - "epoch": 0.5604025576028149, - "grad_norm": 2.1107337474823, - "learning_rate": 7.948370258519519e-06, - "loss": 0.6013, + "epoch": 0.5213657162970785, + "grad_norm": 1.7348599433898926, + "learning_rate": 6.856360541522086e-06, + "loss": 0.621, "step": 7406 }, { - "epoch": 0.5604782263251485, - "grad_norm": 2.616274118423462, - "learning_rate": 7.946133502387537e-06, - "loss": 0.731, + "epoch": 0.5214361140443505, + "grad_norm": 1.861259937286377, + "learning_rate": 6.8547647753314565e-06, + "loss": 0.6013, "step": 7407 }, { - "epoch": 0.5605538950474821, - "grad_norm": 2.0208301544189453, - "learning_rate": 7.943896812252083e-06, - "loss": 0.6894, + "epoch": 0.5215065117916227, + "grad_norm": 2.289461851119995, + "learning_rate": 6.853169016691736e-06, + "loss": 0.7143, "step": 7408 }, { - "epoch": 0.5606295637698158, - "grad_norm": 4.073803424835205, - "learning_rate": 7.941660188253228e-06, - "loss": 0.5672, + "epoch": 0.5215769095388948, + "grad_norm": 1.6642680168151855, + "learning_rate": 6.851573265685893e-06, + "loss": 0.6935, "step": 7409 }, { - "epoch": 0.5607052324921493, - "grad_norm": 2.3524041175842285, - "learning_rate": 7.939423630531038e-06, - "loss": 0.7285, + "epoch": 0.5216473072861668, + "grad_norm": 1.8282966613769531, + "learning_rate": 6.8499775223968875e-06, + "loss": 0.5588, "step": 7410 }, { - "epoch": 0.560780901214483, - "grad_norm": 2.553096294403076, - "learning_rate": 7.937187139225567e-06, - "loss": 0.708, + "epoch": 0.521717705033439, + "grad_norm": 1.9467952251434326, + "learning_rate": 6.848381786907691e-06, + "loss": 0.5894, "step": 7411 }, { - "epoch": 0.5608565699368167, - "grad_norm": 2.174004554748535, - "learning_rate": 7.93495071447688e-06, - "loss": 0.6833, + "epoch": 0.521788102780711, + "grad_norm": 1.7577133178710938, + "learning_rate": 6.846786059301259e-06, + "loss": 0.7109, "step": 7412 }, { - "epoch": 0.5609322386591502, - "grad_norm": 2.332759380340576, - "learning_rate": 7.932714356425018e-06, - "loss": 0.6054, + "epoch": 0.5218585005279831, + "grad_norm": 1.8285603523254395, + "learning_rate": 6.845190339660561e-06, + "loss": 0.5249, "step": 7413 }, { - "epoch": 0.5610079073814839, - "grad_norm": 2.4970386028289795, - "learning_rate": 7.930478065210035e-06, - "loss": 0.7808, + "epoch": 0.5219288982752552, + "grad_norm": 1.8494151830673218, + "learning_rate": 6.843594628068555e-06, + "loss": 0.6257, "step": 7414 }, { - "epoch": 0.5610835761038175, - "grad_norm": 1.955387830734253, - "learning_rate": 7.92824184097198e-06, - "loss": 0.7495, + "epoch": 0.5219992960225273, + "grad_norm": 1.7404510974884033, + "learning_rate": 6.841998924608209e-06, + "loss": 0.5015, "step": 7415 }, { - "epoch": 0.5611592448261511, - "grad_norm": 1.921034574508667, - "learning_rate": 7.926005683850883e-06, - "loss": 0.7805, + "epoch": 0.5220696937697994, + "grad_norm": 1.8912338018417358, + "learning_rate": 6.840403229362478e-06, + "loss": 0.7292, "step": 7416 }, { - "epoch": 0.5612349135484848, - "grad_norm": 2.5919716358184814, - "learning_rate": 7.923769593986788e-06, - "loss": 0.6509, + "epoch": 0.5221400915170714, + "grad_norm": 1.9745309352874756, + "learning_rate": 6.838807542414329e-06, + "loss": 0.7764, "step": 7417 }, { - "epoch": 0.5613105822708183, - "grad_norm": 2.1961042881011963, - "learning_rate": 7.921533571519717e-06, - "loss": 0.6006, + "epoch": 0.5222104892643435, + "grad_norm": 1.675135612487793, + "learning_rate": 6.837211863846724e-06, + "loss": 0.7521, "step": 7418 }, { - "epoch": 0.561386250993152, - "grad_norm": 2.0641181468963623, - "learning_rate": 7.919297616589703e-06, - "loss": 0.6438, + "epoch": 0.5222808870116157, + "grad_norm": 1.6488927602767944, + "learning_rate": 6.83561619374262e-06, + "loss": 0.7203, "step": 7419 }, { - "epoch": 0.5614619197154856, - "grad_norm": 2.4737486839294434, - "learning_rate": 7.917061729336771e-06, - "loss": 0.7045, + "epoch": 0.5223512847588877, + "grad_norm": 1.6344131231307983, + "learning_rate": 6.83402053218498e-06, + "loss": 0.7466, "step": 7420 }, { - "epoch": 0.5615375884378192, - "grad_norm": 2.176301956176758, - "learning_rate": 7.914825909900935e-06, - "loss": 0.7689, + "epoch": 0.5224216825061598, + "grad_norm": 2.302687644958496, + "learning_rate": 6.83242487925676e-06, + "loss": 0.7717, "step": 7421 }, { - "epoch": 0.5616132571601529, - "grad_norm": 2.713766574859619, - "learning_rate": 7.91259015842221e-06, - "loss": 0.7377, + "epoch": 0.5224920802534319, + "grad_norm": 1.94539475440979, + "learning_rate": 6.830829235040925e-06, + "loss": 0.6877, "step": 7422 }, { - "epoch": 0.5616889258824864, - "grad_norm": 1.8123505115509033, - "learning_rate": 7.910354475040606e-06, - "loss": 0.7732, + "epoch": 0.522562478000704, + "grad_norm": 1.80856192111969, + "learning_rate": 6.829233599620431e-06, + "loss": 0.705, "step": 7423 }, { - "epoch": 0.5617645946048201, - "grad_norm": 2.313922643661499, - "learning_rate": 7.908118859896127e-06, - "loss": 0.658, + "epoch": 0.522632875747976, + "grad_norm": 2.1718575954437256, + "learning_rate": 6.827637973078237e-06, + "loss": 0.7395, "step": 7424 }, { - "epoch": 0.5618402633271538, - "grad_norm": 2.6944148540496826, - "learning_rate": 7.905883313128779e-06, - "loss": 0.6323, + "epoch": 0.5227032734952481, + "grad_norm": 1.9884939193725586, + "learning_rate": 6.826042355497301e-06, + "loss": 0.5717, "step": 7425 }, { - "epoch": 0.5619159320494873, - "grad_norm": 2.4101080894470215, - "learning_rate": 7.903647834878557e-06, - "loss": 0.7163, + "epoch": 0.5227736712425203, + "grad_norm": 2.6892025470733643, + "learning_rate": 6.8244467469605805e-06, + "loss": 0.7638, "step": 7426 }, { - "epoch": 0.561991600771821, - "grad_norm": 2.65920352935791, - "learning_rate": 7.901412425285453e-06, - "loss": 0.776, + "epoch": 0.5228440689897923, + "grad_norm": 1.5287035703659058, + "learning_rate": 6.822851147551029e-06, + "loss": 0.708, "step": 7427 }, { - "epoch": 0.5620672694941546, - "grad_norm": 2.0206611156463623, - "learning_rate": 7.899177084489457e-06, - "loss": 0.6004, + "epoch": 0.5229144667370644, + "grad_norm": 1.864464282989502, + "learning_rate": 6.821255557351611e-06, + "loss": 0.6917, "step": 7428 }, { - "epoch": 0.5621429382164882, - "grad_norm": 2.1238224506378174, - "learning_rate": 7.89694181263055e-06, - "loss": 0.7055, + "epoch": 0.5229848644843365, + "grad_norm": 1.7315733432769775, + "learning_rate": 6.819659976445277e-06, + "loss": 0.6081, "step": 7429 }, { - "epoch": 0.5622186069388219, - "grad_norm": 2.1638028621673584, - "learning_rate": 7.894706609848717e-06, - "loss": 0.7816, + "epoch": 0.5230552622316086, + "grad_norm": 1.6570391654968262, + "learning_rate": 6.818064404914985e-06, + "loss": 0.5942, "step": 7430 }, { - "epoch": 0.5622942756611554, - "grad_norm": 2.223090171813965, - "learning_rate": 7.89247147628393e-06, - "loss": 0.6012, + "epoch": 0.5231256599788807, + "grad_norm": 1.7434214353561401, + "learning_rate": 6.816468842843689e-06, + "loss": 0.6653, "step": 7431 }, { - "epoch": 0.5623699443834891, - "grad_norm": 3.290097951889038, - "learning_rate": 7.890236412076162e-06, - "loss": 0.7829, + "epoch": 0.5231960577261527, + "grad_norm": 1.8913851976394653, + "learning_rate": 6.814873290314345e-06, + "loss": 0.6386, "step": 7432 }, { - "epoch": 0.5624456131058228, - "grad_norm": 1.7183538675308228, - "learning_rate": 7.888001417365379e-06, - "loss": 0.6503, + "epoch": 0.5232664554734249, + "grad_norm": 1.843229055404663, + "learning_rate": 6.8132777474099024e-06, + "loss": 0.6786, "step": 7433 }, { - "epoch": 0.5625212818281563, - "grad_norm": 2.0650956630706787, - "learning_rate": 7.885766492291543e-06, - "loss": 0.7117, + "epoch": 0.5233368532206969, + "grad_norm": 1.5635628700256348, + "learning_rate": 6.811682214213322e-06, + "loss": 0.7021, "step": 7434 }, { - "epoch": 0.56259695055049, - "grad_norm": 2.289998769760132, - "learning_rate": 7.883531636994612e-06, - "loss": 0.8198, + "epoch": 0.523407250967969, + "grad_norm": 2.2642741203308105, + "learning_rate": 6.810086690807554e-06, + "loss": 0.626, "step": 7435 }, { - "epoch": 0.5626726192728235, - "grad_norm": 2.176126718521118, - "learning_rate": 7.881296851614544e-06, - "loss": 0.7623, + "epoch": 0.5234776487152412, + "grad_norm": 1.854142665863037, + "learning_rate": 6.808491177275551e-06, + "loss": 0.6568, "step": 7436 }, { - "epoch": 0.5627482879951572, - "grad_norm": 2.0864546298980713, - "learning_rate": 7.879062136291284e-06, - "loss": 0.7213, + "epoch": 0.5235480464625132, + "grad_norm": 1.449426531791687, + "learning_rate": 6.806895673700262e-06, + "loss": 0.6265, "step": 7437 }, { - "epoch": 0.5628239567174909, - "grad_norm": 2.1789047718048096, - "learning_rate": 7.876827491164778e-06, - "loss": 0.638, + "epoch": 0.5236184442097853, + "grad_norm": 2.031947135925293, + "learning_rate": 6.805300180164646e-06, + "loss": 0.7769, "step": 7438 }, { - "epoch": 0.5628996254398244, - "grad_norm": 2.4413700103759766, - "learning_rate": 7.874592916374966e-06, - "loss": 0.5869, + "epoch": 0.5236888419570573, + "grad_norm": 2.3577120304107666, + "learning_rate": 6.8037046967516476e-06, + "loss": 0.7657, "step": 7439 }, { - "epoch": 0.5629752941621581, - "grad_norm": 2.1864519119262695, - "learning_rate": 7.87235841206179e-06, - "loss": 0.7605, + "epoch": 0.5237592397043295, + "grad_norm": 1.8724920749664307, + "learning_rate": 6.802109223544222e-06, + "loss": 0.6964, "step": 7440 }, { - "epoch": 0.5630509628844917, - "grad_norm": 2.2284204959869385, - "learning_rate": 7.870123978365174e-06, - "loss": 0.7422, + "epoch": 0.5238296374516016, + "grad_norm": 1.7167026996612549, + "learning_rate": 6.800513760625315e-06, + "loss": 0.6688, "step": 7441 }, { - "epoch": 0.5631266316068253, - "grad_norm": 1.9402129650115967, - "learning_rate": 7.867889615425052e-06, - "loss": 0.6012, + "epoch": 0.5239000351988736, + "grad_norm": 1.7597228288650513, + "learning_rate": 6.798918308077879e-06, + "loss": 0.7442, "step": 7442 }, { - "epoch": 0.563202300329159, - "grad_norm": 1.827885627746582, - "learning_rate": 7.865655323381342e-06, - "loss": 0.6708, + "epoch": 0.5239704329461458, + "grad_norm": 1.779679298400879, + "learning_rate": 6.79732286598486e-06, + "loss": 0.706, "step": 7443 }, { - "epoch": 0.5632779690514925, - "grad_norm": 2.1613986492156982, - "learning_rate": 7.863421102373963e-06, - "loss": 0.5724, + "epoch": 0.5240408306934178, + "grad_norm": 1.7420895099639893, + "learning_rate": 6.795727434429213e-06, + "loss": 0.6073, "step": 7444 }, { - "epoch": 0.5633536377738262, - "grad_norm": 2.0706324577331543, - "learning_rate": 7.861186952542832e-06, - "loss": 0.8642, + "epoch": 0.5241112284406899, + "grad_norm": 1.743107795715332, + "learning_rate": 6.794132013493878e-06, + "loss": 0.5963, "step": 7445 }, { - "epoch": 0.5634293064961599, - "grad_norm": 2.373830795288086, - "learning_rate": 7.85895287402786e-06, - "loss": 0.6601, + "epoch": 0.5241816261879619, + "grad_norm": 1.5976300239562988, + "learning_rate": 6.792536603261808e-06, + "loss": 0.5475, "step": 7446 }, { - "epoch": 0.5635049752184934, - "grad_norm": 1.8708213567733765, - "learning_rate": 7.856718866968947e-06, - "loss": 0.6013, + "epoch": 0.5242520239352341, + "grad_norm": 1.9796322584152222, + "learning_rate": 6.790941203815947e-06, + "loss": 0.7492, "step": 7447 }, { - "epoch": 0.5635806439408271, - "grad_norm": 2.5757904052734375, - "learning_rate": 7.854484931505997e-06, - "loss": 0.7932, + "epoch": 0.5243224216825062, + "grad_norm": 1.6430132389068604, + "learning_rate": 6.789345815239244e-06, + "loss": 0.7434, "step": 7448 }, { - "epoch": 0.5636563126631606, - "grad_norm": 1.908565640449524, - "learning_rate": 7.852251067778903e-06, - "loss": 0.6691, + "epoch": 0.5243928194297782, + "grad_norm": 1.6057250499725342, + "learning_rate": 6.7877504376146394e-06, + "loss": 0.6753, "step": 7449 }, { - "epoch": 0.5637319813854943, - "grad_norm": 4.402620792388916, - "learning_rate": 7.850017275927563e-06, - "loss": 0.7278, + "epoch": 0.5244632171770504, + "grad_norm": 1.7355108261108398, + "learning_rate": 6.786155071025086e-06, + "loss": 0.7909, "step": 7450 }, { - "epoch": 0.563807650107828, - "grad_norm": 2.60870623588562, - "learning_rate": 7.847783556091858e-06, - "loss": 0.7348, + "epoch": 0.5245336149243224, + "grad_norm": 1.713639259338379, + "learning_rate": 6.784559715553519e-06, + "loss": 0.828, "step": 7451 }, { - "epoch": 0.5638833188301615, - "grad_norm": 2.4514048099517822, - "learning_rate": 7.845549908411676e-06, - "loss": 0.8417, + "epoch": 0.5246040126715945, + "grad_norm": 1.7967215776443481, + "learning_rate": 6.782964371282893e-06, + "loss": 0.588, "step": 7452 }, { - "epoch": 0.5639589875524952, - "grad_norm": 2.5910489559173584, - "learning_rate": 7.843316333026892e-06, - "loss": 0.6838, + "epoch": 0.5246744104188666, + "grad_norm": 1.6326206922531128, + "learning_rate": 6.781369038296142e-06, + "loss": 0.6789, "step": 7453 }, { - "epoch": 0.5640346562748288, - "grad_norm": 2.305745840072632, - "learning_rate": 7.841082830077378e-06, - "loss": 0.4627, + "epoch": 0.5247448081661387, + "grad_norm": 1.8411390781402588, + "learning_rate": 6.779773716676215e-06, + "loss": 0.6032, "step": 7454 }, { - "epoch": 0.5641103249971624, - "grad_norm": 3.0249183177948, - "learning_rate": 7.838849399703007e-06, - "loss": 0.6928, + "epoch": 0.5248152059134108, + "grad_norm": 2.129791021347046, + "learning_rate": 6.778178406506049e-06, + "loss": 0.692, "step": 7455 }, { - "epoch": 0.5641859937194961, - "grad_norm": 2.0935657024383545, - "learning_rate": 7.836616042043643e-06, - "loss": 0.6973, + "epoch": 0.5248856036606828, + "grad_norm": 1.6395708322525024, + "learning_rate": 6.776583107868593e-06, + "loss": 0.6148, "step": 7456 }, { - "epoch": 0.5642616624418296, - "grad_norm": 2.505596399307251, - "learning_rate": 7.834382757239145e-06, - "loss": 0.6782, + "epoch": 0.524956001407955, + "grad_norm": 1.6447468996047974, + "learning_rate": 6.774987820846779e-06, + "loss": 0.7037, "step": 7457 }, { - "epoch": 0.5643373311641633, - "grad_norm": 2.2332210540771484, - "learning_rate": 7.832149545429372e-06, - "loss": 0.7438, + "epoch": 0.5250263991552271, + "grad_norm": 1.7879457473754883, + "learning_rate": 6.773392545523555e-06, + "loss": 0.7421, "step": 7458 }, { - "epoch": 0.564412999886497, - "grad_norm": 2.181175470352173, - "learning_rate": 7.829916406754166e-06, - "loss": 0.6196, + "epoch": 0.5250967969024991, + "grad_norm": 1.7525808811187744, + "learning_rate": 6.771797281981856e-06, + "loss": 0.5544, "step": 7459 }, { - "epoch": 0.5644886686088305, - "grad_norm": 1.7004423141479492, - "learning_rate": 7.827683341353381e-06, - "loss": 0.6966, + "epoch": 0.5251671946497712, + "grad_norm": 1.9077346324920654, + "learning_rate": 6.770202030304625e-06, + "loss": 0.6087, "step": 7460 }, { - "epoch": 0.5645643373311642, - "grad_norm": 2.624783992767334, - "learning_rate": 7.825450349366859e-06, - "loss": 0.8012, + "epoch": 0.5252375923970433, + "grad_norm": 1.7964978218078613, + "learning_rate": 6.768606790574795e-06, + "loss": 0.8133, "step": 7461 }, { - "epoch": 0.5646400060534977, - "grad_norm": 1.9809978008270264, - "learning_rate": 7.823217430934434e-06, - "loss": 0.7437, + "epoch": 0.5253079901443154, + "grad_norm": 1.7891348600387573, + "learning_rate": 6.767011562875312e-06, + "loss": 0.6611, "step": 7462 }, { - "epoch": 0.5647156747758314, - "grad_norm": 3.563035011291504, - "learning_rate": 7.82098458619594e-06, - "loss": 0.6683, + "epoch": 0.5253783878915874, + "grad_norm": 1.7438241243362427, + "learning_rate": 6.765416347289105e-06, + "loss": 0.6291, "step": 7463 }, { - "epoch": 0.5647913434981651, - "grad_norm": 2.226811408996582, - "learning_rate": 7.818751815291204e-06, - "loss": 0.7564, + "epoch": 0.5254487856388596, + "grad_norm": 1.6934930086135864, + "learning_rate": 6.7638211438991175e-06, + "loss": 0.6447, "step": 7464 }, { - "epoch": 0.5648670122204986, - "grad_norm": 1.755900502204895, - "learning_rate": 7.816519118360046e-06, - "loss": 0.62, + "epoch": 0.5255191833861317, + "grad_norm": 1.8121527433395386, + "learning_rate": 6.7622259527882806e-06, + "loss": 0.6393, "step": 7465 }, { - "epoch": 0.5649426809428323, - "grad_norm": 8.155805587768555, - "learning_rate": 7.814286495542293e-06, - "loss": 0.7387, + "epoch": 0.5255895811334037, + "grad_norm": 1.5217853784561157, + "learning_rate": 6.760630774039534e-06, + "loss": 0.6454, "step": 7466 }, { - "epoch": 0.565018349665166, - "grad_norm": 2.6799027919769287, - "learning_rate": 7.812053946977755e-06, - "loss": 0.583, + "epoch": 0.5256599788806758, + "grad_norm": 1.8326139450073242, + "learning_rate": 6.759035607735807e-06, + "loss": 0.6629, "step": 7467 }, { - "epoch": 0.5650940183874995, - "grad_norm": 2.1574854850769043, - "learning_rate": 7.80982147280624e-06, - "loss": 0.564, + "epoch": 0.5257303766279479, + "grad_norm": 1.6447778940200806, + "learning_rate": 6.757440453960038e-06, + "loss": 0.6287, "step": 7468 }, { - "epoch": 0.5651696871098332, - "grad_norm": 2.8113937377929688, - "learning_rate": 7.807589073167556e-06, - "loss": 0.7773, + "epoch": 0.52580077437522, + "grad_norm": 1.9868388175964355, + "learning_rate": 6.755845312795161e-06, + "loss": 0.6542, "step": 7469 }, { - "epoch": 0.5652453558321667, - "grad_norm": 2.041391611099243, - "learning_rate": 7.805356748201497e-06, - "loss": 0.7095, + "epoch": 0.5258711721224921, + "grad_norm": 1.624085783958435, + "learning_rate": 6.7542501843241066e-06, + "loss": 0.7322, "step": 7470 }, { - "epoch": 0.5653210245545004, - "grad_norm": 2.0056307315826416, - "learning_rate": 7.803124498047865e-06, - "loss": 0.6397, + "epoch": 0.5259415698697641, + "grad_norm": 1.6465896368026733, + "learning_rate": 6.752655068629808e-06, + "loss": 0.7011, "step": 7471 }, { - "epoch": 0.5653966932768341, - "grad_norm": 2.5884063243865967, - "learning_rate": 7.80089232284645e-06, - "loss": 0.7664, + "epoch": 0.5260119676170363, + "grad_norm": 1.7026617527008057, + "learning_rate": 6.7510599657951945e-06, + "loss": 0.6757, "step": 7472 }, { - "epoch": 0.5654723619991676, - "grad_norm": 2.3261334896087646, - "learning_rate": 7.798660222737033e-06, - "loss": 0.7108, + "epoch": 0.5260823653643083, + "grad_norm": 2.9634289741516113, + "learning_rate": 6.749464875903201e-06, + "loss": 0.7107, "step": 7473 }, { - "epoch": 0.5655480307215013, - "grad_norm": 1.9324620962142944, - "learning_rate": 7.7964281978594e-06, - "loss": 0.7209, + "epoch": 0.5261527631115804, + "grad_norm": 2.051602363586426, + "learning_rate": 6.747869799036754e-06, + "loss": 0.6903, "step": 7474 }, { - "epoch": 0.5656236994438348, - "grad_norm": 2.28661847114563, - "learning_rate": 7.794196248353323e-06, - "loss": 0.6863, + "epoch": 0.5262231608588526, + "grad_norm": 1.6552486419677734, + "learning_rate": 6.746274735278787e-06, + "loss": 0.6456, "step": 7475 }, { - "epoch": 0.5656993681661685, - "grad_norm": 3.6437911987304688, - "learning_rate": 7.791964374358579e-06, - "loss": 0.7256, + "epoch": 0.5262935586061246, + "grad_norm": 1.5990136861801147, + "learning_rate": 6.744679684712224e-06, + "loss": 0.6042, "step": 7476 }, { - "epoch": 0.5657750368885022, - "grad_norm": 2.1123297214508057, - "learning_rate": 7.789732576014934e-06, - "loss": 0.7905, + "epoch": 0.5263639563533967, + "grad_norm": 1.6663833856582642, + "learning_rate": 6.743084647419997e-06, + "loss": 0.6298, "step": 7477 }, { - "epoch": 0.5658507056108357, - "grad_norm": 2.2938523292541504, - "learning_rate": 7.787500853462149e-06, - "loss": 0.579, + "epoch": 0.5264343541006687, + "grad_norm": 1.869809627532959, + "learning_rate": 6.741489623485029e-06, + "loss": 0.6025, "step": 7478 }, { - "epoch": 0.5659263743331694, - "grad_norm": 2.4166362285614014, - "learning_rate": 7.785269206839984e-06, - "loss": 0.6481, + "epoch": 0.5265047518479409, + "grad_norm": 1.6614131927490234, + "learning_rate": 6.739894612990253e-06, + "loss": 0.6095, "step": 7479 }, { - "epoch": 0.5660020430555031, - "grad_norm": 1.8204790353775024, - "learning_rate": 7.783037636288185e-06, - "loss": 0.7452, + "epoch": 0.5265751495952129, + "grad_norm": 1.5472526550292969, + "learning_rate": 6.738299616018589e-06, + "loss": 0.594, "step": 7480 }, { - "epoch": 0.5660777117778366, - "grad_norm": 2.0831942558288574, - "learning_rate": 7.78080614194651e-06, - "loss": 0.5121, + "epoch": 0.526645547342485, + "grad_norm": 2.301776170730591, + "learning_rate": 6.736704632652967e-06, + "loss": 0.6852, "step": 7481 }, { - "epoch": 0.5661533805001703, - "grad_norm": 2.7705237865448, - "learning_rate": 7.778574723954695e-06, - "loss": 0.7819, + "epoch": 0.5267159450897572, + "grad_norm": 2.2722675800323486, + "learning_rate": 6.735109662976309e-06, + "loss": 0.632, "step": 7482 }, { - "epoch": 0.5662290492225038, - "grad_norm": 2.23626708984375, - "learning_rate": 7.776343382452485e-06, - "loss": 0.609, + "epoch": 0.5267863428370292, + "grad_norm": 2.0143613815307617, + "learning_rate": 6.73351470707154e-06, + "loss": 0.7023, "step": 7483 }, { - "epoch": 0.5663047179448375, - "grad_norm": 2.0028553009033203, - "learning_rate": 7.774112117579608e-06, - "loss": 0.701, + "epoch": 0.5268567405843013, + "grad_norm": 1.8775767087936401, + "learning_rate": 6.731919765021579e-06, + "loss": 0.6213, "step": 7484 }, { - "epoch": 0.5663803866671712, - "grad_norm": 4.554917812347412, - "learning_rate": 7.771880929475792e-06, - "loss": 0.6932, + "epoch": 0.5269271383315733, + "grad_norm": 1.9760935306549072, + "learning_rate": 6.730324836909356e-06, + "loss": 0.6647, "step": 7485 }, { - "epoch": 0.5664560553895047, - "grad_norm": 1.90889310836792, - "learning_rate": 7.76964981828077e-06, - "loss": 0.7258, + "epoch": 0.5269975360788455, + "grad_norm": 2.172408103942871, + "learning_rate": 6.728729922817789e-06, + "loss": 0.6821, "step": 7486 }, { - "epoch": 0.5665317241118384, - "grad_norm": 2.3061931133270264, - "learning_rate": 7.767418784134253e-06, - "loss": 0.7325, + "epoch": 0.5270679338261176, + "grad_norm": 1.9519052505493164, + "learning_rate": 6.727135022829798e-06, + "loss": 0.6156, "step": 7487 }, { - "epoch": 0.5666073928341719, - "grad_norm": 2.0186426639556885, - "learning_rate": 7.76518782717596e-06, - "loss": 0.7392, + "epoch": 0.5271383315733896, + "grad_norm": 1.5592550039291382, + "learning_rate": 6.725540137028303e-06, + "loss": 0.7087, "step": 7488 }, { - "epoch": 0.5666830615565056, - "grad_norm": 2.2334370613098145, - "learning_rate": 7.762956947545598e-06, - "loss": 0.7617, + "epoch": 0.5272087293206618, + "grad_norm": 1.8131780624389648, + "learning_rate": 6.723945265496229e-06, + "loss": 0.6541, "step": 7489 }, { - "epoch": 0.5667587302788393, - "grad_norm": 2.3827006816864014, - "learning_rate": 7.760726145382871e-06, - "loss": 0.648, + "epoch": 0.5272791270679338, + "grad_norm": 2.005467176437378, + "learning_rate": 6.722350408316485e-06, + "loss": 0.6509, "step": 7490 }, { - "epoch": 0.5668343990011728, - "grad_norm": 4.122293472290039, - "learning_rate": 7.758495420827485e-06, - "loss": 0.7135, + "epoch": 0.5273495248152059, + "grad_norm": 1.8701461553573608, + "learning_rate": 6.720755565571999e-06, + "loss": 0.7365, "step": 7491 }, { - "epoch": 0.5669100677235065, - "grad_norm": 2.1730690002441406, - "learning_rate": 7.75626477401913e-06, - "loss": 0.6489, + "epoch": 0.527419922562478, + "grad_norm": 1.6121982336044312, + "learning_rate": 6.719160737345682e-06, + "loss": 0.6683, "step": 7492 }, { - "epoch": 0.5669857364458402, - "grad_norm": 2.0568599700927734, - "learning_rate": 7.754034205097497e-06, - "loss": 0.7312, + "epoch": 0.5274903203097501, + "grad_norm": 1.6670591831207275, + "learning_rate": 6.717565923720455e-06, + "loss": 0.6859, "step": 7493 }, { - "epoch": 0.5670614051681737, - "grad_norm": 2.0819613933563232, - "learning_rate": 7.751803714202273e-06, - "loss": 0.7455, + "epoch": 0.5275607180570222, + "grad_norm": 1.845933198928833, + "learning_rate": 6.7159711247792276e-06, + "loss": 0.7321, "step": 7494 }, { - "epoch": 0.5671370738905074, - "grad_norm": 2.208782434463501, - "learning_rate": 7.749573301473133e-06, - "loss": 0.6664, + "epoch": 0.5276311158042942, + "grad_norm": 1.6344823837280273, + "learning_rate": 6.714376340604923e-06, + "loss": 0.769, "step": 7495 }, { - "epoch": 0.5672127426128409, - "grad_norm": 2.119398593902588, - "learning_rate": 7.74734296704976e-06, - "loss": 0.5872, + "epoch": 0.5277015135515664, + "grad_norm": 1.7420029640197754, + "learning_rate": 6.712781571280446e-06, + "loss": 0.5877, "step": 7496 }, { - "epoch": 0.5672884113351746, - "grad_norm": 1.802098274230957, - "learning_rate": 7.745112711071824e-06, - "loss": 0.8714, + "epoch": 0.5277719112988385, + "grad_norm": 1.7725459337234497, + "learning_rate": 6.7111868168887195e-06, + "loss": 0.7439, "step": 7497 }, { - "epoch": 0.5673640800575083, - "grad_norm": 2.2059485912323, - "learning_rate": 7.742882533678988e-06, - "loss": 0.7653, + "epoch": 0.5278423090461105, + "grad_norm": 1.8906582593917847, + "learning_rate": 6.70959207751265e-06, + "loss": 0.6795, "step": 7498 }, { - "epoch": 0.5674397487798418, - "grad_norm": 3.0473945140838623, - "learning_rate": 7.740652435010915e-06, - "loss": 0.7615, + "epoch": 0.5279127067933826, + "grad_norm": 1.8604646921157837, + "learning_rate": 6.707997353235153e-06, + "loss": 0.6254, "step": 7499 }, { - "epoch": 0.5675154175021755, - "grad_norm": 2.383582830429077, - "learning_rate": 7.738422415207257e-06, - "loss": 0.8784, + "epoch": 0.5279831045406547, + "grad_norm": 1.7486445903778076, + "learning_rate": 6.706402644139136e-06, + "loss": 0.6175, "step": 7500 }, { - "epoch": 0.567591086224509, - "grad_norm": 2.5827648639678955, - "learning_rate": 7.736192474407667e-06, - "loss": 0.7992, + "epoch": 0.5280535022879268, + "grad_norm": 2.08443284034729, + "learning_rate": 6.7048079503075145e-06, + "loss": 0.753, "step": 7501 }, { - "epoch": 0.5676667549468427, - "grad_norm": 2.1168980598449707, - "learning_rate": 7.733962612751795e-06, - "loss": 0.7262, + "epoch": 0.5281239000351988, + "grad_norm": 1.5325770378112793, + "learning_rate": 6.703213271823191e-06, + "loss": 0.73, "step": 7502 }, { - "epoch": 0.5677424236691764, - "grad_norm": 1.6200450658798218, - "learning_rate": 7.731732830379278e-06, - "loss": 0.6776, + "epoch": 0.528194297782471, + "grad_norm": 1.7194849252700806, + "learning_rate": 6.701618608769081e-06, + "loss": 0.749, "step": 7503 }, { - "epoch": 0.5678180923915099, - "grad_norm": 2.622553825378418, - "learning_rate": 7.729503127429755e-06, - "loss": 0.7279, + "epoch": 0.5282646955297431, + "grad_norm": 1.7757371664047241, + "learning_rate": 6.700023961228088e-06, + "loss": 0.6504, "step": 7504 }, { - "epoch": 0.5678937611138436, - "grad_norm": 1.9916034936904907, - "learning_rate": 7.727273504042853e-06, - "loss": 0.815, + "epoch": 0.5283350932770151, + "grad_norm": 2.1713650226593018, + "learning_rate": 6.698429329283123e-06, + "loss": 0.8018, "step": 7505 }, { - "epoch": 0.5679694298361773, - "grad_norm": 2.347259521484375, - "learning_rate": 7.7250439603582e-06, - "loss": 0.4899, + "epoch": 0.5284054910242872, + "grad_norm": 2.1625423431396484, + "learning_rate": 6.696834713017087e-06, + "loss": 0.7458, "step": 7506 }, { - "epoch": 0.5680450985585108, - "grad_norm": 2.532034397125244, - "learning_rate": 7.722814496515418e-06, - "loss": 0.7588, + "epoch": 0.5284758887715593, + "grad_norm": 1.8513375520706177, + "learning_rate": 6.695240112512894e-06, + "loss": 0.7984, "step": 7507 }, { - "epoch": 0.5681207672808445, - "grad_norm": 2.015144109725952, - "learning_rate": 7.720585112654124e-06, - "loss": 0.7181, + "epoch": 0.5285462865188314, + "grad_norm": 1.632020354270935, + "learning_rate": 6.6936455278534385e-06, + "loss": 0.6246, "step": 7508 }, { - "epoch": 0.568196436003178, - "grad_norm": 2.0816779136657715, - "learning_rate": 7.718355808913931e-06, - "loss": 0.7219, + "epoch": 0.5286166842661035, + "grad_norm": 1.7794729471206665, + "learning_rate": 6.692050959121633e-06, + "loss": 0.6721, "step": 7509 }, { - "epoch": 0.5682721047255117, - "grad_norm": 2.1527559757232666, - "learning_rate": 7.71612658543444e-06, - "loss": 0.6645, + "epoch": 0.5286870820133756, + "grad_norm": 1.7825604677200317, + "learning_rate": 6.690456406400375e-06, + "loss": 0.7461, "step": 7510 }, { - "epoch": 0.5683477734478454, - "grad_norm": 2.143007278442383, - "learning_rate": 7.713897442355251e-06, - "loss": 0.6128, + "epoch": 0.5287574797606477, + "grad_norm": 1.8926806449890137, + "learning_rate": 6.68886186977257e-06, + "loss": 0.7776, "step": 7511 }, { - "epoch": 0.5684234421701789, - "grad_norm": 2.356175184249878, - "learning_rate": 7.711668379815969e-06, - "loss": 0.7695, + "epoch": 0.5288278775079197, + "grad_norm": 1.8956408500671387, + "learning_rate": 6.687267349321116e-06, + "loss": 0.6919, "step": 7512 }, { - "epoch": 0.5684991108925126, - "grad_norm": 2.8489623069763184, - "learning_rate": 7.70943939795618e-06, - "loss": 0.8197, + "epoch": 0.5288982752551918, + "grad_norm": 2.341432809829712, + "learning_rate": 6.685672845128919e-06, + "loss": 0.7111, "step": 7513 }, { - "epoch": 0.5685747796148461, - "grad_norm": 1.8885236978530884, - "learning_rate": 7.707210496915469e-06, - "loss": 0.6958, + "epoch": 0.528968673002464, + "grad_norm": 1.8126460313796997, + "learning_rate": 6.684078357278872e-06, + "loss": 0.7536, "step": 7514 }, { - "epoch": 0.5686504483371798, - "grad_norm": 1.7150541543960571, - "learning_rate": 7.70498167683342e-06, - "loss": 0.6678, + "epoch": 0.529039070749736, + "grad_norm": 2.3326852321624756, + "learning_rate": 6.682483885853881e-06, + "loss": 0.8129, "step": 7515 }, { - "epoch": 0.5687261170595135, - "grad_norm": 2.1104001998901367, - "learning_rate": 7.702752937849603e-06, - "loss": 0.6665, + "epoch": 0.5291094684970081, + "grad_norm": 2.5285210609436035, + "learning_rate": 6.680889430936838e-06, + "loss": 0.7359, "step": 7516 }, { - "epoch": 0.568801785781847, - "grad_norm": 2.0098612308502197, - "learning_rate": 7.700524280103593e-06, - "loss": 0.7077, + "epoch": 0.5291798662442802, + "grad_norm": 1.6461563110351562, + "learning_rate": 6.679294992610643e-06, + "loss": 0.6089, "step": 7517 }, { - "epoch": 0.5688774545041807, - "grad_norm": 2.4959285259246826, - "learning_rate": 7.69829570373496e-06, - "loss": 0.7196, + "epoch": 0.5292502639915523, + "grad_norm": 2.044753074645996, + "learning_rate": 6.67770057095819e-06, + "loss": 0.6962, "step": 7518 }, { - "epoch": 0.5689531232265144, - "grad_norm": 1.7228821516036987, - "learning_rate": 7.696067208883257e-06, - "loss": 0.7246, + "epoch": 0.5293206617388243, + "grad_norm": 1.7113864421844482, + "learning_rate": 6.676106166062377e-06, + "loss": 0.6615, "step": 7519 }, { - "epoch": 0.5690287919488479, - "grad_norm": 1.951913833618164, - "learning_rate": 7.693838795688046e-06, - "loss": 0.5224, + "epoch": 0.5293910594860964, + "grad_norm": 1.8552175760269165, + "learning_rate": 6.6745117780061e-06, + "loss": 0.7467, "step": 7520 }, { - "epoch": 0.5691044606711816, - "grad_norm": 1.957993745803833, - "learning_rate": 7.691610464288869e-06, - "loss": 0.6015, + "epoch": 0.5294614572333686, + "grad_norm": 1.8117557764053345, + "learning_rate": 6.672917406872248e-06, + "loss": 0.8, "step": 7521 }, { - "epoch": 0.5691801293935151, - "grad_norm": 2.1833786964416504, - "learning_rate": 7.689382214825279e-06, - "loss": 0.7048, + "epoch": 0.5295318549806406, + "grad_norm": 1.7208598852157593, + "learning_rate": 6.671323052743719e-06, + "loss": 0.6985, "step": 7522 }, { - "epoch": 0.5692557981158488, - "grad_norm": 2.1259801387786865, - "learning_rate": 7.687154047436815e-06, - "loss": 0.6339, + "epoch": 0.5296022527279127, + "grad_norm": 1.65394926071167, + "learning_rate": 6.669728715703398e-06, + "loss": 0.6833, "step": 7523 }, { - "epoch": 0.5693314668381825, - "grad_norm": 3.2358062267303467, - "learning_rate": 7.68492596226301e-06, - "loss": 0.9118, + "epoch": 0.5296726504751847, + "grad_norm": 1.974219799041748, + "learning_rate": 6.6681343958341836e-06, + "loss": 0.6186, "step": 7524 }, { - "epoch": 0.569407135560516, - "grad_norm": 3.1258087158203125, - "learning_rate": 7.682697959443396e-06, - "loss": 0.733, + "epoch": 0.5297430482224569, + "grad_norm": 2.463388204574585, + "learning_rate": 6.666540093218961e-06, + "loss": 0.7838, "step": 7525 }, { - "epoch": 0.5694828042828497, - "grad_norm": 1.9816136360168457, - "learning_rate": 7.680470039117491e-06, - "loss": 0.5748, + "epoch": 0.529813445969729, + "grad_norm": 1.912752389907837, + "learning_rate": 6.664945807940623e-06, + "loss": 0.6655, "step": 7526 }, { - "epoch": 0.5695584730051833, - "grad_norm": 1.8756901025772095, - "learning_rate": 7.678242201424825e-06, - "loss": 0.7879, + "epoch": 0.529883843717001, + "grad_norm": 1.8175055980682373, + "learning_rate": 6.663351540082053e-06, + "loss": 0.668, "step": 7527 }, { - "epoch": 0.5696341417275169, - "grad_norm": 2.385270595550537, - "learning_rate": 7.676014446504906e-06, - "loss": 0.6642, + "epoch": 0.5299542414642732, + "grad_norm": 1.6138761043548584, + "learning_rate": 6.661757289726144e-06, + "loss": 0.7255, "step": 7528 }, { - "epoch": 0.5697098104498506, - "grad_norm": 1.8264906406402588, - "learning_rate": 7.673786774497248e-06, - "loss": 0.5951, + "epoch": 0.5300246392115452, + "grad_norm": 1.6489248275756836, + "learning_rate": 6.660163056955777e-06, + "loss": 0.6884, "step": 7529 }, { - "epoch": 0.5697854791721841, - "grad_norm": 3.006786823272705, - "learning_rate": 7.671559185541348e-06, - "loss": 0.6021, + "epoch": 0.5300950369588173, + "grad_norm": 2.275033950805664, + "learning_rate": 6.658568841853843e-06, + "loss": 0.6585, "step": 7530 }, { - "epoch": 0.5698611478945178, - "grad_norm": 2.019592523574829, - "learning_rate": 7.669331679776708e-06, - "loss": 0.6055, + "epoch": 0.5301654347060895, + "grad_norm": 1.7176398038864136, + "learning_rate": 6.656974644503222e-06, + "loss": 0.6444, "step": 7531 }, { - "epoch": 0.5699368166168515, - "grad_norm": 2.036536693572998, - "learning_rate": 7.667104257342825e-06, - "loss": 0.7984, + "epoch": 0.5302358324533615, + "grad_norm": 1.9630380868911743, + "learning_rate": 6.655380464986803e-06, + "loss": 0.6126, "step": 7532 }, { - "epoch": 0.570012485339185, - "grad_norm": 2.4801836013793945, - "learning_rate": 7.664876918379182e-06, - "loss": 0.747, + "epoch": 0.5303062302006336, + "grad_norm": 1.621010184288025, + "learning_rate": 6.653786303387463e-06, + "loss": 0.689, "step": 7533 }, { - "epoch": 0.5700881540615187, - "grad_norm": 2.761587381362915, - "learning_rate": 7.662649663025267e-06, - "loss": 0.7959, + "epoch": 0.5303766279479056, + "grad_norm": 1.748073697090149, + "learning_rate": 6.652192159788088e-06, + "loss": 0.6095, "step": 7534 }, { - "epoch": 0.5701638227838522, - "grad_norm": 1.7771096229553223, - "learning_rate": 7.660422491420554e-06, - "loss": 0.6943, + "epoch": 0.5304470256951778, + "grad_norm": 1.9960527420043945, + "learning_rate": 6.650598034271555e-06, + "loss": 0.6615, "step": 7535 }, { - "epoch": 0.5702394915061859, - "grad_norm": 2.084876537322998, - "learning_rate": 7.658195403704516e-06, - "loss": 0.6666, + "epoch": 0.5305174234424498, + "grad_norm": 1.8324952125549316, + "learning_rate": 6.6490039269207505e-06, + "loss": 0.7185, "step": 7536 }, { - "epoch": 0.5703151602285196, - "grad_norm": 1.841198444366455, - "learning_rate": 7.655968400016624e-06, - "loss": 0.791, + "epoch": 0.5305878211897219, + "grad_norm": 1.7429721355438232, + "learning_rate": 6.6474098378185465e-06, + "loss": 0.7199, "step": 7537 }, { - "epoch": 0.5703908289508531, - "grad_norm": 1.9432196617126465, - "learning_rate": 7.653741480496337e-06, - "loss": 0.8101, + "epoch": 0.530658218936994, + "grad_norm": 1.896440029144287, + "learning_rate": 6.645815767047827e-06, + "loss": 0.5962, "step": 7538 }, { - "epoch": 0.5704664976731868, - "grad_norm": 1.6360357999801636, - "learning_rate": 7.651514645283116e-06, - "loss": 0.6755, + "epoch": 0.5307286166842661, + "grad_norm": 1.9084726572036743, + "learning_rate": 6.644221714691465e-06, + "loss": 0.7683, "step": 7539 }, { - "epoch": 0.5705421663955205, - "grad_norm": 2.8515806198120117, - "learning_rate": 7.649287894516406e-06, - "loss": 0.7672, + "epoch": 0.5307990144315382, + "grad_norm": 1.7117743492126465, + "learning_rate": 6.64262768083234e-06, + "loss": 0.7243, "step": 7540 }, { - "epoch": 0.570617835117854, - "grad_norm": 1.9835529327392578, - "learning_rate": 7.647061228335656e-06, - "loss": 0.763, + "epoch": 0.5308694121788102, + "grad_norm": 1.6634089946746826, + "learning_rate": 6.641033665553321e-06, + "loss": 0.6265, "step": 7541 }, { - "epoch": 0.5706935038401877, - "grad_norm": 1.8836452960968018, - "learning_rate": 7.644834646880308e-06, - "loss": 0.6668, + "epoch": 0.5309398099260824, + "grad_norm": 1.8038777112960815, + "learning_rate": 6.639439668937292e-06, + "loss": 0.6173, "step": 7542 }, { - "epoch": 0.5707691725625212, - "grad_norm": 2.047100067138672, - "learning_rate": 7.6426081502898e-06, - "loss": 0.6978, + "epoch": 0.5310102076733545, + "grad_norm": 1.5525712966918945, + "learning_rate": 6.637845691067117e-06, + "loss": 0.6789, "step": 7543 }, { - "epoch": 0.5708448412848549, - "grad_norm": 1.7520688772201538, - "learning_rate": 7.640381738703558e-06, - "loss": 0.5824, + "epoch": 0.5310806054206265, + "grad_norm": 1.5650488138198853, + "learning_rate": 6.636251732025676e-06, + "loss": 0.6603, "step": 7544 }, { - "epoch": 0.5709205100071886, - "grad_norm": 2.1768970489501953, - "learning_rate": 7.638155412261011e-06, - "loss": 0.8075, + "epoch": 0.5311510031678986, + "grad_norm": 1.987973928451538, + "learning_rate": 6.634657791895833e-06, + "loss": 0.7499, "step": 7545 }, { - "epoch": 0.5709961787295221, - "grad_norm": 2.6738734245300293, - "learning_rate": 7.635929171101575e-06, - "loss": 0.7113, + "epoch": 0.5312214009151707, + "grad_norm": 1.885811686515808, + "learning_rate": 6.6330638707604675e-06, + "loss": 0.6866, "step": 7546 }, { - "epoch": 0.5710718474518558, - "grad_norm": 1.7277650833129883, - "learning_rate": 7.633703015364664e-06, - "loss": 0.6632, + "epoch": 0.5312917986624428, + "grad_norm": 1.702982783317566, + "learning_rate": 6.63146996870244e-06, + "loss": 0.7379, "step": 7547 }, { - "epoch": 0.5711475161741894, - "grad_norm": 1.8607732057571411, - "learning_rate": 7.631476945189694e-06, - "loss": 0.5316, + "epoch": 0.5313621964097149, + "grad_norm": 1.787111520767212, + "learning_rate": 6.629876085804623e-06, + "loss": 0.7197, "step": 7548 }, { - "epoch": 0.571223184896523, - "grad_norm": 2.290065050125122, - "learning_rate": 7.629250960716061e-06, - "loss": 0.6583, + "epoch": 0.531432594156987, + "grad_norm": 1.7335540056228638, + "learning_rate": 6.628282222149884e-06, + "loss": 0.7013, "step": 7549 }, { - "epoch": 0.5712988536188567, - "grad_norm": 2.1077466011047363, - "learning_rate": 7.62702506208317e-06, - "loss": 0.7311, + "epoch": 0.5315029919042591, + "grad_norm": 1.5618013143539429, + "learning_rate": 6.62668837782109e-06, + "loss": 0.6209, "step": 7550 }, { - "epoch": 0.5713745223411902, - "grad_norm": 2.3623740673065186, - "learning_rate": 7.6247992494304075e-06, - "loss": 0.6345, + "epoch": 0.5315733896515311, + "grad_norm": 1.9631468057632446, + "learning_rate": 6.6250945529011025e-06, + "loss": 0.6712, "step": 7551 }, { - "epoch": 0.5714501910635239, - "grad_norm": 2.172419548034668, - "learning_rate": 7.622573522897162e-06, - "loss": 0.6144, + "epoch": 0.5316437873988032, + "grad_norm": 2.008808135986328, + "learning_rate": 6.623500747472793e-06, + "loss": 0.6899, "step": 7552 }, { - "epoch": 0.5715258597858576, - "grad_norm": 1.9453599452972412, - "learning_rate": 7.620347882622821e-06, - "loss": 0.8414, + "epoch": 0.5317141851460754, + "grad_norm": 1.6450614929199219, + "learning_rate": 6.621906961619015e-06, + "loss": 0.7631, "step": 7553 }, { - "epoch": 0.5716015285081911, - "grad_norm": 2.7051191329956055, - "learning_rate": 7.6181223287467574e-06, - "loss": 0.6487, + "epoch": 0.5317845828933474, + "grad_norm": 2.401175022125244, + "learning_rate": 6.620313195422641e-06, + "loss": 0.6669, "step": 7554 }, { - "epoch": 0.5716771972305248, - "grad_norm": 2.7036776542663574, - "learning_rate": 7.615896861408342e-06, - "loss": 0.6815, + "epoch": 0.5318549806406195, + "grad_norm": 1.9532666206359863, + "learning_rate": 6.618719448966526e-06, + "loss": 0.7256, "step": 7555 }, { - "epoch": 0.5717528659528583, - "grad_norm": 2.5882720947265625, - "learning_rate": 7.613671480746944e-06, - "loss": 0.735, + "epoch": 0.5319253783878916, + "grad_norm": 1.6480385065078735, + "learning_rate": 6.617125722333534e-06, + "loss": 0.7787, "step": 7556 }, { - "epoch": 0.571828534675192, - "grad_norm": 2.0548622608184814, - "learning_rate": 7.611446186901918e-06, - "loss": 0.7788, + "epoch": 0.5319957761351637, + "grad_norm": 2.0433669090270996, + "learning_rate": 6.615532015606518e-06, + "loss": 0.6641, "step": 7557 }, { - "epoch": 0.5719042033975257, - "grad_norm": 2.1789886951446533, - "learning_rate": 7.609220980012624e-06, - "loss": 0.672, + "epoch": 0.5320661738824357, + "grad_norm": 1.7731455564498901, + "learning_rate": 6.6139383288683464e-06, + "loss": 0.6834, "step": 7558 }, { - "epoch": 0.5719798721198592, - "grad_norm": 1.767871618270874, - "learning_rate": 7.606995860218413e-06, - "loss": 0.6939, + "epoch": 0.5321365716297078, + "grad_norm": 1.781884789466858, + "learning_rate": 6.6123446622018655e-06, + "loss": 0.6478, "step": 7559 }, { - "epoch": 0.5720555408421929, - "grad_norm": 2.4886820316314697, - "learning_rate": 7.604770827658626e-06, - "loss": 0.7978, + "epoch": 0.53220696937698, + "grad_norm": 1.7247004508972168, + "learning_rate": 6.610751015689941e-06, + "loss": 0.6018, "step": 7560 }, { - "epoch": 0.5721312095645265, - "grad_norm": 2.7979044914245605, - "learning_rate": 7.602545882472603e-06, - "loss": 0.7555, + "epoch": 0.532277367124252, + "grad_norm": 1.8551161289215088, + "learning_rate": 6.609157389415421e-06, + "loss": 0.7282, "step": 7561 }, { - "epoch": 0.5722068782868601, - "grad_norm": 2.692528009414673, - "learning_rate": 7.6003210247996736e-06, - "loss": 0.7346, + "epoch": 0.5323477648715241, + "grad_norm": 1.9824702739715576, + "learning_rate": 6.607563783461164e-06, + "loss": 0.6314, "step": 7562 }, { - "epoch": 0.5722825470091938, - "grad_norm": 1.8994667530059814, - "learning_rate": 7.59809625477917e-06, - "loss": 0.7628, + "epoch": 0.5324181626187962, + "grad_norm": 1.6524405479431152, + "learning_rate": 6.605970197910019e-06, + "loss": 0.6948, "step": 7563 }, { - "epoch": 0.5723582157315273, - "grad_norm": 2.2061333656311035, - "learning_rate": 7.595871572550416e-06, - "loss": 0.7109, + "epoch": 0.5324885603660683, + "grad_norm": 1.8479406833648682, + "learning_rate": 6.604376632844845e-06, + "loss": 0.8421, "step": 7564 }, { - "epoch": 0.572433884453861, - "grad_norm": 2.585219621658325, - "learning_rate": 7.593646978252723e-06, - "loss": 0.663, + "epoch": 0.5325589581133404, + "grad_norm": 1.6810022592544556, + "learning_rate": 6.602783088348482e-06, + "loss": 0.7294, "step": 7565 }, { - "epoch": 0.5725095531761947, - "grad_norm": 2.168567180633545, - "learning_rate": 7.591422472025408e-06, - "loss": 0.6996, + "epoch": 0.5326293558606124, + "grad_norm": 1.7090864181518555, + "learning_rate": 6.60118956450379e-06, + "loss": 0.6951, "step": 7566 }, { - "epoch": 0.5725852218985282, - "grad_norm": 2.5026702880859375, - "learning_rate": 7.589198054007769e-06, - "loss": 0.5515, + "epoch": 0.5326997536078846, + "grad_norm": 1.7481186389923096, + "learning_rate": 6.599596061393612e-06, + "loss": 0.6516, "step": 7567 }, { - "epoch": 0.5726608906208619, - "grad_norm": 2.055335760116577, - "learning_rate": 7.5869737243391125e-06, - "loss": 0.6029, + "epoch": 0.5327701513551566, + "grad_norm": 2.0140206813812256, + "learning_rate": 6.5980025791007956e-06, + "loss": 0.7058, "step": 7568 }, { - "epoch": 0.5727365593431955, - "grad_norm": 1.7085371017456055, - "learning_rate": 7.584749483158733e-06, - "loss": 0.6204, + "epoch": 0.5328405491024287, + "grad_norm": 1.8149967193603516, + "learning_rate": 6.596409117708193e-06, + "loss": 0.6632, "step": 7569 }, { - "epoch": 0.5728122280655291, - "grad_norm": 2.084444522857666, - "learning_rate": 7.582525330605918e-06, - "loss": 0.7365, + "epoch": 0.5329109468497009, + "grad_norm": 1.7204270362854004, + "learning_rate": 6.594815677298644e-06, + "loss": 0.7462, "step": 7570 }, { - "epoch": 0.5728878967878628, - "grad_norm": 2.4518747329711914, - "learning_rate": 7.580301266819951e-06, - "loss": 0.7121, + "epoch": 0.5329813445969729, + "grad_norm": 2.195477247238159, + "learning_rate": 6.593222257954997e-06, + "loss": 0.7379, "step": 7571 }, { - "epoch": 0.5729635655101963, - "grad_norm": 2.6568686962127686, - "learning_rate": 7.578077291940109e-06, - "loss": 0.79, + "epoch": 0.533051742344245, + "grad_norm": 1.9683197736740112, + "learning_rate": 6.59162885976009e-06, + "loss": 0.5701, "step": 7572 }, { - "epoch": 0.57303923423253, - "grad_norm": 1.885642409324646, - "learning_rate": 7.575853406105669e-06, - "loss": 0.67, + "epoch": 0.533122140091517, + "grad_norm": 2.1704883575439453, + "learning_rate": 6.590035482796772e-06, + "loss": 0.7362, "step": 7573 }, { - "epoch": 0.5731149029548636, - "grad_norm": 2.200899362564087, - "learning_rate": 7.573629609455893e-06, - "loss": 0.7487, + "epoch": 0.5331925378387892, + "grad_norm": 2.072948694229126, + "learning_rate": 6.588442127147876e-06, + "loss": 0.6935, "step": 7574 }, { - "epoch": 0.5731905716771972, - "grad_norm": 1.83932626247406, - "learning_rate": 7.571405902130047e-06, - "loss": 0.8097, + "epoch": 0.5332629355860612, + "grad_norm": 1.7833729982376099, + "learning_rate": 6.586848792896251e-06, + "loss": 0.654, "step": 7575 }, { - "epoch": 0.5732662403995309, - "grad_norm": 1.8516623973846436, - "learning_rate": 7.569182284267382e-06, - "loss": 0.7444, + "epoch": 0.5333333333333333, + "grad_norm": 1.9999759197235107, + "learning_rate": 6.585255480124728e-06, + "loss": 0.732, "step": 7576 }, { - "epoch": 0.5733419091218644, - "grad_norm": 2.9464001655578613, - "learning_rate": 7.566958756007148e-06, - "loss": 0.7044, + "epoch": 0.5334037310806055, + "grad_norm": 1.4602270126342773, + "learning_rate": 6.5836621889161515e-06, + "loss": 0.5872, "step": 7577 }, { - "epoch": 0.5734175778441981, - "grad_norm": 2.2606701850891113, - "learning_rate": 7.5647353174885956e-06, - "loss": 0.7526, + "epoch": 0.5334741288278775, + "grad_norm": 2.1560707092285156, + "learning_rate": 6.582068919353352e-06, + "loss": 0.6005, "step": 7578 }, { - "epoch": 0.5734932465665318, - "grad_norm": 1.8875621557235718, - "learning_rate": 7.5625119688509575e-06, - "loss": 0.5802, + "epoch": 0.5335445265751496, + "grad_norm": 1.6263302564620972, + "learning_rate": 6.58047567151917e-06, + "loss": 0.6699, "step": 7579 }, { - "epoch": 0.5735689152888653, - "grad_norm": 2.271878957748413, - "learning_rate": 7.560288710233472e-06, - "loss": 0.7184, + "epoch": 0.5336149243224216, + "grad_norm": 1.7706831693649292, + "learning_rate": 6.5788824454964346e-06, + "loss": 0.6237, "step": 7580 }, { - "epoch": 0.573644584011199, - "grad_norm": 2.303473711013794, - "learning_rate": 7.558065541775362e-06, - "loss": 0.8383, + "epoch": 0.5336853220696938, + "grad_norm": 1.645206093788147, + "learning_rate": 6.577289241367986e-06, + "loss": 0.6065, "step": 7581 }, { - "epoch": 0.5737202527335326, - "grad_norm": 1.9427469968795776, - "learning_rate": 7.555842463615853e-06, - "loss": 0.7299, + "epoch": 0.5337557198169659, + "grad_norm": 1.8330366611480713, + "learning_rate": 6.5756960592166505e-06, + "loss": 0.6084, "step": 7582 }, { - "epoch": 0.5737959214558662, - "grad_norm": 1.7646313905715942, - "learning_rate": 7.553619475894155e-06, - "loss": 0.6953, + "epoch": 0.5338261175642379, + "grad_norm": 1.4619405269622803, + "learning_rate": 6.5741028991252625e-06, + "loss": 0.6582, "step": 7583 }, { - "epoch": 0.5738715901781999, - "grad_norm": 2.1616227626800537, - "learning_rate": 7.551396578749487e-06, - "loss": 0.8009, + "epoch": 0.5338965153115101, + "grad_norm": 1.7559928894042969, + "learning_rate": 6.572509761176649e-06, + "loss": 0.7826, "step": 7584 }, { - "epoch": 0.5739472589005334, - "grad_norm": 1.9275579452514648, - "learning_rate": 7.5491737723210515e-06, - "loss": 0.7245, + "epoch": 0.5339669130587821, + "grad_norm": 1.8723212480545044, + "learning_rate": 6.570916645453641e-06, + "loss": 0.6883, "step": 7585 }, { - "epoch": 0.5740229276228671, - "grad_norm": 1.933892011642456, - "learning_rate": 7.546951056748047e-06, - "loss": 0.7036, + "epoch": 0.5340373108060542, + "grad_norm": 1.9045860767364502, + "learning_rate": 6.569323552039063e-06, + "loss": 0.774, "step": 7586 }, { - "epoch": 0.5740985963452007, - "grad_norm": 1.7795796394348145, - "learning_rate": 7.544728432169666e-06, - "loss": 0.6322, + "epoch": 0.5341077085533263, + "grad_norm": 1.8993767499923706, + "learning_rate": 6.567730481015747e-06, + "loss": 0.7194, "step": 7587 }, { - "epoch": 0.5741742650675343, - "grad_norm": 1.7775262594223022, - "learning_rate": 7.542505898725095e-06, - "loss": 0.6999, + "epoch": 0.5341781063005984, + "grad_norm": 1.8078961372375488, + "learning_rate": 6.566137432466513e-06, + "loss": 0.5774, "step": 7588 }, { - "epoch": 0.574249933789868, - "grad_norm": 1.9181798696517944, - "learning_rate": 7.540283456553523e-06, - "loss": 0.5708, + "epoch": 0.5342485040478705, + "grad_norm": 1.9497579336166382, + "learning_rate": 6.5645444064741885e-06, + "loss": 0.8288, "step": 7589 }, { - "epoch": 0.5743256025122015, - "grad_norm": 4.439153671264648, - "learning_rate": 7.538061105794121e-06, - "loss": 0.8357, + "epoch": 0.5343189017951425, + "grad_norm": 2.015528678894043, + "learning_rate": 6.562951403121592e-06, + "loss": 0.7018, "step": 7590 }, { - "epoch": 0.5744012712345352, - "grad_norm": 1.863204836845398, - "learning_rate": 7.5358388465860625e-06, - "loss": 0.5036, + "epoch": 0.5343892995424147, + "grad_norm": 1.7684500217437744, + "learning_rate": 6.561358422491551e-06, + "loss": 0.6903, "step": 7591 }, { - "epoch": 0.5744769399568689, - "grad_norm": 2.4404690265655518, - "learning_rate": 7.533616679068508e-06, - "loss": 0.8833, + "epoch": 0.5344596972896867, + "grad_norm": 1.9624093770980835, + "learning_rate": 6.5597654646668796e-06, + "loss": 0.6145, "step": 7592 }, { - "epoch": 0.5745526086792024, - "grad_norm": 2.3636746406555176, - "learning_rate": 7.53139460338062e-06, - "loss": 0.6487, + "epoch": 0.5345300950369588, + "grad_norm": 1.8126200437545776, + "learning_rate": 6.558172529730402e-06, + "loss": 0.8254, "step": 7593 }, { - "epoch": 0.5746282774015361, - "grad_norm": 2.188185691833496, - "learning_rate": 7.5291726196615545e-06, - "loss": 0.6516, + "epoch": 0.5346004927842309, + "grad_norm": 1.8028085231781006, + "learning_rate": 6.5565796177649335e-06, + "loss": 0.6722, "step": 7594 }, { - "epoch": 0.5747039461238697, - "grad_norm": 2.0827059745788574, - "learning_rate": 7.526950728050455e-06, - "loss": 0.6179, + "epoch": 0.534670890531503, + "grad_norm": 1.5922553539276123, + "learning_rate": 6.554986728853295e-06, + "loss": 0.5974, "step": 7595 }, { - "epoch": 0.5747796148462033, - "grad_norm": 2.1536214351654053, - "learning_rate": 7.524728928686468e-06, - "loss": 0.5059, + "epoch": 0.5347412882787751, + "grad_norm": 1.889975905418396, + "learning_rate": 6.553393863078296e-06, + "loss": 0.6103, "step": 7596 }, { - "epoch": 0.574855283568537, - "grad_norm": 1.9691141843795776, - "learning_rate": 7.522507221708724e-06, - "loss": 0.6105, + "epoch": 0.5348116860260471, + "grad_norm": 2.4559378623962402, + "learning_rate": 6.551801020522759e-06, + "loss": 0.7008, "step": 7597 }, { - "epoch": 0.5749309522908705, - "grad_norm": 2.0234973430633545, - "learning_rate": 7.520285607256354e-06, - "loss": 0.7116, + "epoch": 0.5348820837733193, + "grad_norm": 1.8064526319503784, + "learning_rate": 6.550208201269488e-06, + "loss": 0.6323, "step": 7598 }, { - "epoch": 0.5750066210132042, - "grad_norm": 2.0433642864227295, - "learning_rate": 7.51806408546849e-06, - "loss": 0.6751, + "epoch": 0.5349524815205914, + "grad_norm": 1.7212413549423218, + "learning_rate": 6.548615405401303e-06, + "loss": 0.6201, "step": 7599 }, { - "epoch": 0.5750822897355378, - "grad_norm": 2.2238693237304688, - "learning_rate": 7.515842656484246e-06, - "loss": 0.5849, + "epoch": 0.5350228792678634, + "grad_norm": 1.7334377765655518, + "learning_rate": 6.547022633001011e-06, + "loss": 0.6379, "step": 7600 }, { - "epoch": 0.5751579584578714, - "grad_norm": 2.1468698978424072, - "learning_rate": 7.513621320442734e-06, - "loss": 0.6573, + "epoch": 0.5350932770151355, + "grad_norm": 1.81745445728302, + "learning_rate": 6.545429884151424e-06, + "loss": 0.5975, "step": 7601 }, { - "epoch": 0.5752336271802051, - "grad_norm": 1.8017033338546753, - "learning_rate": 7.5114000774830645e-06, - "loss": 0.7018, + "epoch": 0.5351636747624076, + "grad_norm": 1.9506843090057373, + "learning_rate": 6.543837158935345e-06, + "loss": 0.7484, "step": 7602 }, { - "epoch": 0.5753092959025387, - "grad_norm": 3.9429688453674316, - "learning_rate": 7.509178927744331e-06, - "loss": 0.676, + "epoch": 0.5352340725096797, + "grad_norm": 1.9207278490066528, + "learning_rate": 6.5422444574355905e-06, + "loss": 0.7411, "step": 7603 }, { - "epoch": 0.5753849646248723, - "grad_norm": 2.290649175643921, - "learning_rate": 7.506957871365639e-06, - "loss": 0.6806, + "epoch": 0.5353044702569518, + "grad_norm": 1.642674446105957, + "learning_rate": 6.540651779734956e-06, + "loss": 0.6263, "step": 7604 }, { - "epoch": 0.575460633347206, - "grad_norm": 2.842364549636841, - "learning_rate": 7.504736908486076e-06, - "loss": 0.8284, + "epoch": 0.5353748680042238, + "grad_norm": 1.889747977256775, + "learning_rate": 6.5390591259162555e-06, + "loss": 0.5822, "step": 7605 }, { - "epoch": 0.5755363020695395, - "grad_norm": 1.501198172569275, - "learning_rate": 7.502516039244721e-06, - "loss": 0.7676, + "epoch": 0.535445265751496, + "grad_norm": 1.8003062009811401, + "learning_rate": 6.537466496062287e-06, + "loss": 0.7749, "step": 7606 }, { - "epoch": 0.5756119707918732, - "grad_norm": 1.9701095819473267, - "learning_rate": 7.500295263780658e-06, - "loss": 0.7604, + "epoch": 0.535515663498768, + "grad_norm": 1.9270669221878052, + "learning_rate": 6.535873890255854e-06, + "loss": 0.7477, "step": 7607 }, { - "epoch": 0.5756876395142068, - "grad_norm": 2.247490644454956, - "learning_rate": 7.498074582232952e-06, - "loss": 0.895, + "epoch": 0.5355860612460401, + "grad_norm": 1.6326743364334106, + "learning_rate": 6.534281308579757e-06, + "loss": 0.6714, "step": 7608 }, { - "epoch": 0.5757633082365404, - "grad_norm": 1.9799500703811646, - "learning_rate": 7.4958539947406755e-06, - "loss": 0.7798, + "epoch": 0.5356564589933123, + "grad_norm": 1.8754991292953491, + "learning_rate": 6.5326887511168e-06, + "loss": 0.7455, "step": 7609 }, { - "epoch": 0.5758389769588741, - "grad_norm": 2.8931593894958496, - "learning_rate": 7.493633501442889e-06, - "loss": 0.6262, + "epoch": 0.5357268567405843, + "grad_norm": 1.8203610181808472, + "learning_rate": 6.531096217949774e-06, + "loss": 0.6497, "step": 7610 }, { - "epoch": 0.5759146456812076, - "grad_norm": 2.1218535900115967, - "learning_rate": 7.4914131024786425e-06, - "loss": 0.7037, + "epoch": 0.5357972544878564, + "grad_norm": 1.651504397392273, + "learning_rate": 6.529503709161483e-06, + "loss": 0.7736, "step": 7611 }, { - "epoch": 0.5759903144035413, - "grad_norm": 2.6001503467559814, - "learning_rate": 7.4891927979869885e-06, - "loss": 0.6523, + "epoch": 0.5358676522351284, + "grad_norm": 1.7416820526123047, + "learning_rate": 6.527911224834719e-06, + "loss": 0.7641, "step": 7612 }, { - "epoch": 0.5760659831258749, - "grad_norm": 2.1940317153930664, - "learning_rate": 7.486972588106963e-06, - "loss": 0.6337, + "epoch": 0.5359380499824006, + "grad_norm": 1.760651707649231, + "learning_rate": 6.526318765052279e-06, + "loss": 0.6639, "step": 7613 }, { - "epoch": 0.5761416518482085, - "grad_norm": 1.9558513164520264, - "learning_rate": 7.4847524729776135e-06, - "loss": 0.7347, + "epoch": 0.5360084477296726, + "grad_norm": 1.7100434303283691, + "learning_rate": 6.5247263298969525e-06, + "loss": 0.6942, "step": 7614 }, { - "epoch": 0.5762173205705422, - "grad_norm": 1.9365533590316772, - "learning_rate": 7.4825324527379625e-06, - "loss": 0.7676, + "epoch": 0.5360788454769447, + "grad_norm": 2.0602376461029053, + "learning_rate": 6.523133919451539e-06, + "loss": 0.7618, "step": 7615 }, { - "epoch": 0.5762929892928758, - "grad_norm": 2.3588614463806152, - "learning_rate": 7.48031252752704e-06, - "loss": 0.5735, + "epoch": 0.5361492432242169, + "grad_norm": 1.5820198059082031, + "learning_rate": 6.521541533798821e-06, + "loss": 0.7584, "step": 7616 }, { - "epoch": 0.5763686580152094, - "grad_norm": 1.7206127643585205, - "learning_rate": 7.4780926974838605e-06, - "loss": 0.6839, + "epoch": 0.5362196409714889, + "grad_norm": 1.6755225658416748, + "learning_rate": 6.519949173021594e-06, + "loss": 0.6295, "step": 7617 }, { - "epoch": 0.5764443267375431, - "grad_norm": 2.759472370147705, - "learning_rate": 7.4758729627474395e-06, - "loss": 0.6453, + "epoch": 0.536290038718761, + "grad_norm": 1.7883131504058838, + "learning_rate": 6.5183568372026435e-06, + "loss": 0.7669, "step": 7618 }, { - "epoch": 0.5765199954598766, - "grad_norm": 2.1076037883758545, - "learning_rate": 7.473653323456781e-06, - "loss": 0.6109, + "epoch": 0.536360436466033, + "grad_norm": 1.7524387836456299, + "learning_rate": 6.516764526424756e-06, + "loss": 0.753, "step": 7619 }, { - "epoch": 0.5765956641822103, - "grad_norm": 1.9081132411956787, - "learning_rate": 7.471433779750889e-06, - "loss": 0.7941, + "epoch": 0.5364308342133052, + "grad_norm": 1.6729588508605957, + "learning_rate": 6.5151722407707195e-06, + "loss": 0.7181, "step": 7620 }, { - "epoch": 0.5766713329045439, - "grad_norm": 1.8242427110671997, - "learning_rate": 7.4692143317687595e-06, - "loss": 0.6116, + "epoch": 0.5365012319605773, + "grad_norm": 2.098452091217041, + "learning_rate": 6.513579980323319e-06, + "loss": 0.6554, "step": 7621 }, { - "epoch": 0.5767470016268775, - "grad_norm": 2.058173179626465, - "learning_rate": 7.466994979649378e-06, - "loss": 0.6837, + "epoch": 0.5365716297078493, + "grad_norm": 1.6959097385406494, + "learning_rate": 6.5119877451653345e-06, + "loss": 0.6121, "step": 7622 }, { - "epoch": 0.5768226703492112, - "grad_norm": 2.6424033641815186, - "learning_rate": 7.464775723531731e-06, - "loss": 0.7541, + "epoch": 0.5366420274551215, + "grad_norm": 1.6789363622665405, + "learning_rate": 6.5103955353795495e-06, + "loss": 0.6554, "step": 7623 }, { - "epoch": 0.5768983390715448, - "grad_norm": 1.771959900856018, - "learning_rate": 7.46255656355479e-06, - "loss": 0.6146, + "epoch": 0.5367124252023935, + "grad_norm": 1.6949821710586548, + "learning_rate": 6.508803351048744e-06, + "loss": 0.6116, "step": 7624 }, { - "epoch": 0.5769740077938784, - "grad_norm": 2.1034512519836426, - "learning_rate": 7.460337499857531e-06, - "loss": 0.7532, + "epoch": 0.5367828229496656, + "grad_norm": 1.7603811025619507, + "learning_rate": 6.507211192255695e-06, + "loss": 0.7552, "step": 7625 }, { - "epoch": 0.577049676516212, - "grad_norm": 2.0289289951324463, - "learning_rate": 7.4581185325789204e-06, - "loss": 0.7371, + "epoch": 0.5368532206969377, + "grad_norm": 1.5074735879898071, + "learning_rate": 6.505619059083185e-06, + "loss": 0.7647, "step": 7626 }, { - "epoch": 0.5771253452385456, - "grad_norm": 2.0849831104278564, - "learning_rate": 7.455899661857912e-06, - "loss": 0.6457, + "epoch": 0.5369236184442098, + "grad_norm": 1.7914700508117676, + "learning_rate": 6.504026951613987e-06, + "loss": 0.7322, "step": 7627 }, { - "epoch": 0.5772010139608793, - "grad_norm": 1.9334015846252441, - "learning_rate": 7.453680887833464e-06, - "loss": 0.6986, + "epoch": 0.5369940161914819, + "grad_norm": 1.8822458982467651, + "learning_rate": 6.502434869930878e-06, + "loss": 0.7405, "step": 7628 }, { - "epoch": 0.5772766826832129, - "grad_norm": 2.172220468521118, - "learning_rate": 7.451462210644513e-06, - "loss": 0.6897, + "epoch": 0.5370644139387539, + "grad_norm": 1.9195444583892822, + "learning_rate": 6.500842814116629e-06, + "loss": 0.7344, "step": 7629 }, { - "epoch": 0.5773523514055465, - "grad_norm": 2.764195203781128, - "learning_rate": 7.449243630430013e-06, - "loss": 0.7162, + "epoch": 0.5371348116860261, + "grad_norm": 1.7205439805984497, + "learning_rate": 6.499250784254017e-06, + "loss": 0.7425, "step": 7630 }, { - "epoch": 0.5774280201278802, - "grad_norm": 1.864995002746582, - "learning_rate": 7.447025147328891e-06, - "loss": 0.7502, + "epoch": 0.5372052094332981, + "grad_norm": 1.5717813968658447, + "learning_rate": 6.497658780425806e-06, + "loss": 0.6014, "step": 7631 }, { - "epoch": 0.5775036888502137, - "grad_norm": 1.9428197145462036, - "learning_rate": 7.444806761480079e-06, - "loss": 0.7187, + "epoch": 0.5372756071805702, + "grad_norm": 1.8617757558822632, + "learning_rate": 6.496066802714773e-06, + "loss": 0.6897, "step": 7632 }, { - "epoch": 0.5775793575725474, - "grad_norm": 1.9613124132156372, - "learning_rate": 7.442588473022497e-06, - "loss": 0.8163, + "epoch": 0.5373460049278423, + "grad_norm": 1.9209654331207275, + "learning_rate": 6.4944748512036815e-06, + "loss": 0.631, "step": 7633 }, { - "epoch": 0.577655026294881, - "grad_norm": 1.6671011447906494, - "learning_rate": 7.440370282095059e-06, - "loss": 0.7783, + "epoch": 0.5374164026751144, + "grad_norm": 1.6570935249328613, + "learning_rate": 6.492882925975302e-06, + "loss": 0.6882, "step": 7634 }, { - "epoch": 0.5777306950172146, - "grad_norm": 1.9171720743179321, - "learning_rate": 7.438152188836682e-06, - "loss": 0.8034, + "epoch": 0.5374868004223865, + "grad_norm": 1.8075212240219116, + "learning_rate": 6.491291027112398e-06, + "loss": 0.8008, "step": 7635 }, { - "epoch": 0.5778063637395483, - "grad_norm": 2.3247179985046387, - "learning_rate": 7.435934193386265e-06, - "loss": 0.6332, + "epoch": 0.5375571981696585, + "grad_norm": 1.7746261358261108, + "learning_rate": 6.489699154697735e-06, + "loss": 0.6159, "step": 7636 }, { - "epoch": 0.5778820324618819, - "grad_norm": 2.2075064182281494, - "learning_rate": 7.433716295882709e-06, - "loss": 0.725, + "epoch": 0.5376275959169307, + "grad_norm": 1.8936866521835327, + "learning_rate": 6.488107308814072e-06, + "loss": 0.6072, "step": 7637 }, { - "epoch": 0.5779577011842155, - "grad_norm": 2.2133593559265137, - "learning_rate": 7.431498496464904e-06, - "loss": 0.8622, + "epoch": 0.5376979936642028, + "grad_norm": 1.9252159595489502, + "learning_rate": 6.486515489544176e-06, + "loss": 0.7596, "step": 7638 }, { - "epoch": 0.5780333699065491, - "grad_norm": 1.9004621505737305, - "learning_rate": 7.4292807952717325e-06, - "loss": 0.6223, + "epoch": 0.5377683914114748, + "grad_norm": 1.836943507194519, + "learning_rate": 6.484923696970804e-06, + "loss": 0.6519, "step": 7639 }, { - "epoch": 0.5781090386288827, - "grad_norm": 2.1120524406433105, - "learning_rate": 7.427063192442083e-06, - "loss": 0.6237, + "epoch": 0.5378387891587469, + "grad_norm": 1.577463984489441, + "learning_rate": 6.483331931176717e-06, + "loss": 0.5807, "step": 7640 }, { - "epoch": 0.5781847073512164, - "grad_norm": 1.8914631605148315, - "learning_rate": 7.424845688114822e-06, - "loss": 0.6204, + "epoch": 0.537909186906019, + "grad_norm": 2.2375128269195557, + "learning_rate": 6.481740192244669e-06, + "loss": 0.6841, "step": 7641 }, { - "epoch": 0.57826037607355, - "grad_norm": 1.872819423675537, - "learning_rate": 7.42262828242882e-06, - "loss": 0.7482, + "epoch": 0.5379795846532911, + "grad_norm": 1.8305737972259521, + "learning_rate": 6.48014848025742e-06, + "loss": 0.6673, "step": 7642 }, { - "epoch": 0.5783360447958836, - "grad_norm": 1.7717469930648804, - "learning_rate": 7.420410975522935e-06, - "loss": 0.7326, + "epoch": 0.5380499824005632, + "grad_norm": 1.5856820344924927, + "learning_rate": 6.478556795297719e-06, + "loss": 0.7657, "step": 7643 }, { - "epoch": 0.5784117135182173, - "grad_norm": 2.0166468620300293, - "learning_rate": 7.418193767536022e-06, - "loss": 0.6824, + "epoch": 0.5381203801478353, + "grad_norm": 1.5195484161376953, + "learning_rate": 6.4769651374483244e-06, + "loss": 0.6363, "step": 7644 }, { - "epoch": 0.5784873822405509, - "grad_norm": 2.0506820678710938, - "learning_rate": 7.4159766586069335e-06, - "loss": 0.602, + "epoch": 0.5381907778951074, + "grad_norm": 2.2556374073028564, + "learning_rate": 6.475373506791985e-06, + "loss": 0.6214, "step": 7645 }, { - "epoch": 0.5785630509628845, - "grad_norm": 1.8485995531082153, - "learning_rate": 7.413759648874512e-06, - "loss": 0.7065, + "epoch": 0.5382611756423794, + "grad_norm": 1.662761926651001, + "learning_rate": 6.473781903411455e-06, + "loss": 0.549, "step": 7646 }, { - "epoch": 0.5786387196852181, - "grad_norm": 3.743983030319214, - "learning_rate": 7.411542738477589e-06, - "loss": 0.777, + "epoch": 0.5383315733896515, + "grad_norm": 1.4402122497558594, + "learning_rate": 6.472190327389477e-06, + "loss": 0.6282, "step": 7647 }, { - "epoch": 0.5787143884075517, - "grad_norm": 2.7498581409454346, - "learning_rate": 7.409325927555001e-06, - "loss": 0.7437, + "epoch": 0.5384019711369237, + "grad_norm": 1.8957995176315308, + "learning_rate": 6.470598778808805e-06, + "loss": 0.6763, "step": 7648 }, { - "epoch": 0.5787900571298854, - "grad_norm": 1.9643309116363525, - "learning_rate": 7.4071092162455635e-06, - "loss": 0.7352, + "epoch": 0.5384723688841957, + "grad_norm": 1.7002350091934204, + "learning_rate": 6.469007257752178e-06, + "loss": 0.7286, "step": 7649 }, { - "epoch": 0.578865725852219, - "grad_norm": 1.9491885900497437, - "learning_rate": 7.4048926046881e-06, - "loss": 0.7209, + "epoch": 0.5385427666314678, + "grad_norm": 1.6537771224975586, + "learning_rate": 6.467415764302348e-06, + "loss": 0.6924, "step": 7650 }, { - "epoch": 0.5789413945745526, - "grad_norm": 1.8065403699874878, - "learning_rate": 7.402676093021424e-06, - "loss": 0.728, + "epoch": 0.5386131643787399, + "grad_norm": 1.6342873573303223, + "learning_rate": 6.465824298542053e-06, + "loss": 0.758, "step": 7651 }, { - "epoch": 0.5790170632968862, - "grad_norm": 1.9861661195755005, - "learning_rate": 7.400459681384335e-06, - "loss": 0.5903, + "epoch": 0.538683562126012, + "grad_norm": 1.4749640226364136, + "learning_rate": 6.4642328605540375e-06, + "loss": 0.6528, "step": 7652 }, { - "epoch": 0.5790927320192198, - "grad_norm": 1.8976181745529175, - "learning_rate": 7.398243369915636e-06, - "loss": 0.7683, + "epoch": 0.538753959873284, + "grad_norm": 1.7774696350097656, + "learning_rate": 6.462641450421038e-06, + "loss": 0.6277, "step": 7653 }, { - "epoch": 0.5791684007415535, - "grad_norm": 1.8493512868881226, - "learning_rate": 7.396027158754114e-06, - "loss": 0.5691, + "epoch": 0.5388243576205561, + "grad_norm": 1.6630568504333496, + "learning_rate": 6.461050068225802e-06, + "loss": 0.5561, "step": 7654 }, { - "epoch": 0.5792440694638871, - "grad_norm": 1.9040534496307373, - "learning_rate": 7.393811048038561e-06, - "loss": 0.7016, + "epoch": 0.5388947553678283, + "grad_norm": 1.707047700881958, + "learning_rate": 6.459458714051055e-06, + "loss": 0.5947, "step": 7655 }, { - "epoch": 0.5793197381862207, - "grad_norm": 1.8322153091430664, - "learning_rate": 7.391595037907758e-06, - "loss": 0.725, + "epoch": 0.5389651531151003, + "grad_norm": 1.6108838319778442, + "learning_rate": 6.457867387979543e-06, + "loss": 0.7107, "step": 7656 }, { - "epoch": 0.5793954069085544, - "grad_norm": 2.335909128189087, - "learning_rate": 7.389379128500474e-06, - "loss": 0.7147, + "epoch": 0.5390355508623724, + "grad_norm": 1.7779872417449951, + "learning_rate": 6.456276090093994e-06, + "loss": 0.7612, "step": 7657 }, { - "epoch": 0.579471075630888, - "grad_norm": 2.1806111335754395, - "learning_rate": 7.3871633199554775e-06, - "loss": 0.5799, + "epoch": 0.5391059486096444, + "grad_norm": 1.5084747076034546, + "learning_rate": 6.454684820477145e-06, + "loss": 0.6586, "step": 7658 }, { - "epoch": 0.5795467443532216, - "grad_norm": 2.703801393508911, - "learning_rate": 7.384947612411532e-06, - "loss": 0.7216, + "epoch": 0.5391763463569166, + "grad_norm": 1.7420681715011597, + "learning_rate": 6.453093579211724e-06, + "loss": 0.6269, "step": 7659 }, { - "epoch": 0.5796224130755552, - "grad_norm": 2.5444979667663574, - "learning_rate": 7.3827320060073886e-06, - "loss": 0.692, + "epoch": 0.5392467441041887, + "grad_norm": 1.9786864519119263, + "learning_rate": 6.451502366380467e-06, + "loss": 0.7259, "step": 7660 }, { - "epoch": 0.5796980817978888, - "grad_norm": 2.3260116577148438, - "learning_rate": 7.380516500881799e-06, - "loss": 0.7942, + "epoch": 0.5393171418514607, + "grad_norm": 1.6913539171218872, + "learning_rate": 6.449911182066095e-06, + "loss": 0.6144, "step": 7661 }, { - "epoch": 0.5797737505202225, - "grad_norm": 2.3497567176818848, - "learning_rate": 7.378301097173506e-06, - "loss": 0.6695, + "epoch": 0.5393875395987329, + "grad_norm": 1.9882802963256836, + "learning_rate": 6.448320026351341e-06, + "loss": 0.6721, "step": 7662 }, { - "epoch": 0.5798494192425561, - "grad_norm": 2.0174307823181152, - "learning_rate": 7.376085795021241e-06, - "loss": 0.6379, + "epoch": 0.5394579373460049, + "grad_norm": 1.7868109941482544, + "learning_rate": 6.446728899318928e-06, + "loss": 0.6476, "step": 7663 }, { - "epoch": 0.5799250879648897, - "grad_norm": 3.6419637203216553, - "learning_rate": 7.373870594563739e-06, - "loss": 0.6858, + "epoch": 0.539528335093277, + "grad_norm": 1.9142075777053833, + "learning_rate": 6.445137801051581e-06, + "loss": 0.7497, "step": 7664 }, { - "epoch": 0.5800007566872233, - "grad_norm": 2.2158446311950684, - "learning_rate": 7.3716554959397145e-06, - "loss": 0.7634, + "epoch": 0.5395987328405492, + "grad_norm": 1.827823281288147, + "learning_rate": 6.443546731632019e-06, + "loss": 0.6297, "step": 7665 }, { - "epoch": 0.580076425409557, - "grad_norm": 1.666245937347412, - "learning_rate": 7.369440499287893e-06, - "loss": 0.5982, + "epoch": 0.5396691305878212, + "grad_norm": 1.705425500869751, + "learning_rate": 6.441955691142971e-06, + "loss": 0.6178, "step": 7666 }, { - "epoch": 0.5801520941318906, - "grad_norm": 2.2408547401428223, - "learning_rate": 7.367225604746981e-06, - "loss": 0.6844, + "epoch": 0.5397395283350933, + "grad_norm": 1.8430206775665283, + "learning_rate": 6.440364679667148e-06, + "loss": 0.7068, "step": 7667 }, { - "epoch": 0.5802277628542242, - "grad_norm": 2.1953916549682617, - "learning_rate": 7.365010812455683e-06, - "loss": 0.803, + "epoch": 0.5398099260823653, + "grad_norm": 2.0737600326538086, + "learning_rate": 6.438773697287274e-06, + "loss": 0.6741, "step": 7668 }, { - "epoch": 0.5803034315765578, - "grad_norm": 2.5317487716674805, - "learning_rate": 7.362796122552698e-06, - "loss": 0.7437, + "epoch": 0.5398803238296375, + "grad_norm": 1.5770585536956787, + "learning_rate": 6.4371827440860645e-06, + "loss": 0.6761, "step": 7669 }, { - "epoch": 0.5803791002988915, - "grad_norm": 2.728444814682007, - "learning_rate": 7.3605815351767105e-06, - "loss": 0.725, + "epoch": 0.5399507215769095, + "grad_norm": 2.215792655944824, + "learning_rate": 6.435591820146231e-06, + "loss": 0.7524, "step": 7670 }, { - "epoch": 0.5804547690212251, - "grad_norm": 2.354583740234375, - "learning_rate": 7.358367050466411e-06, - "loss": 0.7462, + "epoch": 0.5400211193241816, + "grad_norm": 1.6495838165283203, + "learning_rate": 6.434000925550494e-06, + "loss": 0.7211, "step": 7671 }, { - "epoch": 0.5805304377435587, - "grad_norm": 1.8923773765563965, - "learning_rate": 7.356152668560478e-06, - "loss": 0.6499, + "epoch": 0.5400915170714538, + "grad_norm": 1.7708029747009277, + "learning_rate": 6.432410060381559e-06, + "loss": 0.7513, "step": 7672 }, { - "epoch": 0.5806061064658923, - "grad_norm": 2.219557523727417, - "learning_rate": 7.353938389597583e-06, - "loss": 0.6165, + "epoch": 0.5401619148187258, + "grad_norm": 1.5538126230239868, + "learning_rate": 6.43081922472214e-06, + "loss": 0.6454, "step": 7673 }, { - "epoch": 0.580681775188226, - "grad_norm": 2.9942805767059326, - "learning_rate": 7.351724213716388e-06, - "loss": 0.6961, + "epoch": 0.5402323125659979, + "grad_norm": 1.7136077880859375, + "learning_rate": 6.429228418654943e-06, + "loss": 0.6372, "step": 7674 }, { - "epoch": 0.5807574439105596, - "grad_norm": 1.9421963691711426, - "learning_rate": 7.349510141055552e-06, - "loss": 0.7431, + "epoch": 0.5403027103132699, + "grad_norm": 1.8492094278335571, + "learning_rate": 6.427637642262681e-06, + "loss": 0.8329, "step": 7675 }, { - "epoch": 0.5808331126328932, - "grad_norm": 2.3083291053771973, - "learning_rate": 7.347296171753734e-06, - "loss": 0.739, + "epoch": 0.5403731080605421, + "grad_norm": 1.995292067527771, + "learning_rate": 6.426046895628052e-06, + "loss": 0.7199, "step": 7676 }, { - "epoch": 0.5809087813552268, - "grad_norm": 2.1545681953430176, - "learning_rate": 7.345082305949572e-06, - "loss": 0.6407, + "epoch": 0.5404435058078142, + "grad_norm": 2.002349615097046, + "learning_rate": 6.4244561788337685e-06, + "loss": 0.6631, "step": 7677 }, { - "epoch": 0.5809844500775604, - "grad_norm": 2.1147782802581787, - "learning_rate": 7.342868543781711e-06, - "loss": 0.721, + "epoch": 0.5405139035550862, + "grad_norm": 2.9734368324279785, + "learning_rate": 6.422865491962527e-06, + "loss": 0.671, "step": 7678 }, { - "epoch": 0.581060118799894, - "grad_norm": 2.1288297176361084, - "learning_rate": 7.34065488538878e-06, - "loss": 0.6971, + "epoch": 0.5405843013023583, + "grad_norm": 1.7497472763061523, + "learning_rate": 6.421274835097034e-06, + "loss": 0.652, "step": 7679 }, { - "epoch": 0.5811357875222277, - "grad_norm": 2.313871145248413, - "learning_rate": 7.338441330909405e-06, - "loss": 0.7317, + "epoch": 0.5406546990496304, + "grad_norm": 1.7477983236312866, + "learning_rate": 6.419684208319984e-06, + "loss": 0.6937, "step": 7680 }, { - "epoch": 0.5812114562445613, - "grad_norm": 2.2980892658233643, - "learning_rate": 7.336227880482211e-06, - "loss": 0.6622, + "epoch": 0.5407250967969025, + "grad_norm": 1.5725513696670532, + "learning_rate": 6.4180936117140795e-06, + "loss": 0.6688, "step": 7681 }, { - "epoch": 0.5812871249668949, - "grad_norm": 1.893399715423584, - "learning_rate": 7.334014534245808e-06, - "loss": 0.6524, + "epoch": 0.5407954945441746, + "grad_norm": 1.7177162170410156, + "learning_rate": 6.416503045362011e-06, + "loss": 0.7293, "step": 7682 }, { - "epoch": 0.5813627936892286, - "grad_norm": 2.0858371257781982, - "learning_rate": 7.3318012923388046e-06, - "loss": 0.7039, + "epoch": 0.5408658922914467, + "grad_norm": 1.699470043182373, + "learning_rate": 6.414912509346483e-06, + "loss": 0.6472, "step": 7683 }, { - "epoch": 0.5814384624115622, - "grad_norm": 1.9755072593688965, - "learning_rate": 7.329588154899797e-06, - "loss": 0.6282, + "epoch": 0.5409362900387188, + "grad_norm": 2.2943477630615234, + "learning_rate": 6.41332200375018e-06, + "loss": 0.6213, "step": 7684 }, { - "epoch": 0.5815141311338958, - "grad_norm": 1.9997296333312988, - "learning_rate": 7.327375122067382e-06, - "loss": 0.5055, + "epoch": 0.5410066877859908, + "grad_norm": 1.5972942113876343, + "learning_rate": 6.411731528655799e-06, + "loss": 0.5708, "step": 7685 }, { - "epoch": 0.5815897998562294, - "grad_norm": 2.181192636489868, - "learning_rate": 7.325162193980147e-06, - "loss": 0.7401, + "epoch": 0.541077085533263, + "grad_norm": 2.069218873977661, + "learning_rate": 6.410141084146028e-06, + "loss": 0.617, "step": 7686 }, { - "epoch": 0.581665468578563, - "grad_norm": 1.5383789539337158, - "learning_rate": 7.322949370776675e-06, - "loss": 0.7502, + "epoch": 0.541147483280535, + "grad_norm": 1.800736904144287, + "learning_rate": 6.408550670303557e-06, + "loss": 0.6477, "step": 7687 }, { - "epoch": 0.5817411373008967, - "grad_norm": 2.279628276824951, - "learning_rate": 7.320736652595537e-06, - "loss": 0.6221, + "epoch": 0.5412178810278071, + "grad_norm": 1.792518138885498, + "learning_rate": 6.40696028721107e-06, + "loss": 0.7397, "step": 7688 }, { - "epoch": 0.5818168060232303, - "grad_norm": 2.0535850524902344, - "learning_rate": 7.3185240395753005e-06, - "loss": 0.853, + "epoch": 0.5412882787750792, + "grad_norm": 1.5638318061828613, + "learning_rate": 6.405369934951257e-06, + "loss": 0.6252, "step": 7689 }, { - "epoch": 0.5818924747455639, - "grad_norm": 1.900155782699585, - "learning_rate": 7.316311531854524e-06, - "loss": 0.8605, + "epoch": 0.5413586765223513, + "grad_norm": 1.8888559341430664, + "learning_rate": 6.403779613606799e-06, + "loss": 0.6607, "step": 7690 }, { - "epoch": 0.5819681434678975, - "grad_norm": 1.8378933668136597, - "learning_rate": 7.314099129571769e-06, - "loss": 0.4497, + "epoch": 0.5414290742696234, + "grad_norm": 1.807568073272705, + "learning_rate": 6.402189323260379e-06, + "loss": 0.6187, "step": 7691 }, { - "epoch": 0.5820438121902312, - "grad_norm": 2.2572903633117676, - "learning_rate": 7.3118868328655795e-06, - "loss": 0.7395, + "epoch": 0.5414994720168954, + "grad_norm": 2.291107416152954, + "learning_rate": 6.400599063994677e-06, + "loss": 0.815, "step": 7692 }, { - "epoch": 0.5821194809125648, - "grad_norm": 1.9963607788085938, - "learning_rate": 7.309674641874496e-06, - "loss": 0.8227, + "epoch": 0.5415698697641675, + "grad_norm": 1.7056608200073242, + "learning_rate": 6.399008835892374e-06, + "loss": 0.7074, "step": 7693 }, { - "epoch": 0.5821951496348984, - "grad_norm": 2.2713754177093506, - "learning_rate": 7.307462556737054e-06, - "loss": 0.667, + "epoch": 0.5416402675114397, + "grad_norm": 1.6791787147521973, + "learning_rate": 6.397418639036143e-06, + "loss": 0.7018, "step": 7694 }, { - "epoch": 0.582270818357232, - "grad_norm": 1.7703185081481934, - "learning_rate": 7.30525057759178e-06, - "loss": 0.8367, + "epoch": 0.5417106652587117, + "grad_norm": 1.6861300468444824, + "learning_rate": 6.395828473508665e-06, + "loss": 0.7287, "step": 7695 }, { - "epoch": 0.5823464870795657, - "grad_norm": 2.0329811573028564, - "learning_rate": 7.3030387045771945e-06, - "loss": 0.7944, + "epoch": 0.5417810630059838, + "grad_norm": 2.121337413787842, + "learning_rate": 6.394238339392611e-06, + "loss": 0.6066, "step": 7696 }, { - "epoch": 0.5824221558018993, - "grad_norm": 2.182164192199707, - "learning_rate": 7.300826937831816e-06, - "loss": 0.6249, + "epoch": 0.5418514607532559, + "grad_norm": 1.8389884233474731, + "learning_rate": 6.392648236770655e-06, + "loss": 0.5979, "step": 7697 }, { - "epoch": 0.5824978245242329, - "grad_norm": 1.8738006353378296, - "learning_rate": 7.298615277494151e-06, - "loss": 0.6997, + "epoch": 0.541921858500528, + "grad_norm": 2.3450424671173096, + "learning_rate": 6.391058165725467e-06, + "loss": 0.7965, "step": 7698 }, { - "epoch": 0.5825734932465665, - "grad_norm": 2.295851469039917, - "learning_rate": 7.2964037237027004e-06, - "loss": 0.6997, + "epoch": 0.5419922562478001, + "grad_norm": 1.827224612236023, + "learning_rate": 6.389468126339717e-06, + "loss": 0.7261, "step": 7699 }, { - "epoch": 0.5826491619689002, - "grad_norm": 2.639524221420288, - "learning_rate": 7.294192276595958e-06, - "loss": 0.8636, + "epoch": 0.5420626539950721, + "grad_norm": 2.1186211109161377, + "learning_rate": 6.3878781186960695e-06, + "loss": 0.6262, "step": 7700 }, { - "epoch": 0.5827248306912338, - "grad_norm": 2.49822998046875, - "learning_rate": 7.2919809363124104e-06, - "loss": 0.5851, + "epoch": 0.5421330517423443, + "grad_norm": 1.4815746545791626, + "learning_rate": 6.3862881428771955e-06, + "loss": 0.7919, "step": 7701 }, { - "epoch": 0.5828004994135674, - "grad_norm": 1.8222301006317139, - "learning_rate": 7.289769702990542e-06, - "loss": 0.7613, + "epoch": 0.5422034494896163, + "grad_norm": 1.8806374073028564, + "learning_rate": 6.384698198965755e-06, + "loss": 0.6331, "step": 7702 }, { - "epoch": 0.582876168135901, - "grad_norm": 1.6940174102783203, - "learning_rate": 7.28755857676883e-06, - "loss": 0.5668, + "epoch": 0.5422738472368884, + "grad_norm": 1.735249638557434, + "learning_rate": 6.383108287044415e-06, + "loss": 0.7233, "step": 7703 }, { - "epoch": 0.5829518368582346, - "grad_norm": 2.8774144649505615, - "learning_rate": 7.285347557785736e-06, - "loss": 0.7722, + "epoch": 0.5423442449841606, + "grad_norm": 2.352452516555786, + "learning_rate": 6.381518407195831e-06, + "loss": 0.6534, "step": 7704 }, { - "epoch": 0.5830275055805683, - "grad_norm": 2.2571170330047607, - "learning_rate": 7.283136646179724e-06, - "loss": 0.7295, + "epoch": 0.5424146427314326, + "grad_norm": 1.8975284099578857, + "learning_rate": 6.379928559502668e-06, + "loss": 0.6257, "step": 7705 }, { - "epoch": 0.5831031743029019, - "grad_norm": 2.2378146648406982, - "learning_rate": 7.2809258420892455e-06, - "loss": 0.7539, + "epoch": 0.5424850404787047, + "grad_norm": 1.6682980060577393, + "learning_rate": 6.378338744047577e-06, + "loss": 0.6982, "step": 7706 }, { - "epoch": 0.5831788430252355, - "grad_norm": 2.1192033290863037, - "learning_rate": 7.278715145652754e-06, - "loss": 0.7074, + "epoch": 0.5425554382259767, + "grad_norm": 1.9517183303833008, + "learning_rate": 6.376748960913222e-06, + "loss": 0.6555, "step": 7707 }, { - "epoch": 0.5832545117475691, - "grad_norm": 2.149138927459717, - "learning_rate": 7.276504557008687e-06, - "loss": 0.6908, + "epoch": 0.5426258359732489, + "grad_norm": 2.0254437923431396, + "learning_rate": 6.37515921018225e-06, + "loss": 0.6809, "step": 7708 }, { - "epoch": 0.5833301804699028, - "grad_norm": 2.1191375255584717, - "learning_rate": 7.274294076295479e-06, - "loss": 0.6748, + "epoch": 0.5426962337205209, + "grad_norm": 1.860432744026184, + "learning_rate": 6.373569491937318e-06, + "loss": 0.6938, "step": 7709 }, { - "epoch": 0.5834058491922364, - "grad_norm": 1.9595415592193604, - "learning_rate": 7.27208370365156e-06, - "loss": 0.8031, + "epoch": 0.542766631467793, + "grad_norm": 2.0693907737731934, + "learning_rate": 6.371979806261073e-06, + "loss": 0.7371, "step": 7710 }, { - "epoch": 0.58348151791457, - "grad_norm": 2.0136635303497314, - "learning_rate": 7.269873439215343e-06, - "loss": 0.535, + "epoch": 0.5428370292150652, + "grad_norm": 1.7892760038375854, + "learning_rate": 6.370390153236171e-06, + "loss": 0.6424, "step": 7711 }, { - "epoch": 0.5835571866369036, - "grad_norm": 6.647026062011719, - "learning_rate": 7.267663283125249e-06, - "loss": 0.5824, + "epoch": 0.5429074269623372, + "grad_norm": 2.0827126502990723, + "learning_rate": 6.36880053294525e-06, + "loss": 0.6356, "step": 7712 }, { - "epoch": 0.5836328553592373, - "grad_norm": 2.1714112758636475, - "learning_rate": 7.265453235519686e-06, - "loss": 0.6723, + "epoch": 0.5429778247096093, + "grad_norm": 1.7739441394805908, + "learning_rate": 6.3672109454709656e-06, + "loss": 0.635, "step": 7713 }, { - "epoch": 0.5837085240815709, - "grad_norm": 1.932857632637024, - "learning_rate": 7.26324329653705e-06, - "loss": 0.768, + "epoch": 0.5430482224568813, + "grad_norm": 1.8319505453109741, + "learning_rate": 6.365621390895954e-06, + "loss": 0.596, "step": 7714 }, { - "epoch": 0.5837841928039045, - "grad_norm": 3.383786916732788, - "learning_rate": 7.261033466315737e-06, - "loss": 0.5688, + "epoch": 0.5431186202041535, + "grad_norm": 1.9109606742858887, + "learning_rate": 6.364031869302863e-06, + "loss": 0.6564, "step": 7715 }, { - "epoch": 0.5838598615262381, - "grad_norm": 2.061591863632202, - "learning_rate": 7.2588237449941274e-06, - "loss": 0.4932, + "epoch": 0.5431890179514256, + "grad_norm": 2.364642381668091, + "learning_rate": 6.362442380774329e-06, + "loss": 0.5471, "step": 7716 }, { - "epoch": 0.5839355302485717, - "grad_norm": 1.7182115316390991, - "learning_rate": 7.256614132710612e-06, - "loss": 0.5914, + "epoch": 0.5432594156986976, + "grad_norm": 1.5500270128250122, + "learning_rate": 6.360852925392996e-06, + "loss": 0.646, "step": 7717 }, { - "epoch": 0.5840111989709054, - "grad_norm": 2.315765142440796, - "learning_rate": 7.254404629603557e-06, - "loss": 0.7175, + "epoch": 0.5433298134459698, + "grad_norm": 1.952462077140808, + "learning_rate": 6.359263503241495e-06, + "loss": 0.6316, "step": 7718 }, { - "epoch": 0.584086867693239, - "grad_norm": 2.1149778366088867, - "learning_rate": 7.252195235811331e-06, - "loss": 0.6716, + "epoch": 0.5434002111932418, + "grad_norm": 1.7297394275665283, + "learning_rate": 6.357674114402465e-06, + "loss": 0.6861, "step": 7719 }, { - "epoch": 0.5841625364155726, - "grad_norm": 2.2034709453582764, - "learning_rate": 7.2499859514722925e-06, - "loss": 0.718, + "epoch": 0.5434706089405139, + "grad_norm": 1.754349708557129, + "learning_rate": 6.3560847589585414e-06, + "loss": 0.7703, "step": 7720 }, { - "epoch": 0.5842382051379063, - "grad_norm": 1.7320899963378906, - "learning_rate": 7.24777677672479e-06, - "loss": 0.8193, + "epoch": 0.543541006687786, + "grad_norm": 1.634427547454834, + "learning_rate": 6.3544954369923515e-06, + "loss": 0.5949, "step": 7721 }, { - "epoch": 0.5843138738602399, - "grad_norm": 2.224959373474121, - "learning_rate": 7.2455677117071785e-06, - "loss": 0.6078, + "epoch": 0.5436114044350581, + "grad_norm": 1.9098705053329468, + "learning_rate": 6.352906148586532e-06, + "loss": 0.6878, "step": 7722 }, { - "epoch": 0.5843895425825735, - "grad_norm": 1.8637721538543701, - "learning_rate": 7.243358756557788e-06, - "loss": 0.7856, + "epoch": 0.5436818021823302, + "grad_norm": 2.0321969985961914, + "learning_rate": 6.3513168938237055e-06, + "loss": 0.7008, "step": 7723 }, { - "epoch": 0.5844652113049071, - "grad_norm": 1.866568922996521, - "learning_rate": 7.241149911414957e-06, - "loss": 0.6537, + "epoch": 0.5437521999296022, + "grad_norm": 1.682110071182251, + "learning_rate": 6.349727672786503e-06, + "loss": 0.5884, "step": 7724 }, { - "epoch": 0.5845408800272407, - "grad_norm": 2.14298415184021, - "learning_rate": 7.238941176417005e-06, - "loss": 0.7189, + "epoch": 0.5438225976768744, + "grad_norm": 1.8006662130355835, + "learning_rate": 6.348138485557547e-06, + "loss": 0.7076, "step": 7725 }, { - "epoch": 0.5846165487495744, - "grad_norm": 2.2058675289154053, - "learning_rate": 7.236732551702251e-06, - "loss": 0.6751, + "epoch": 0.5438929954241464, + "grad_norm": 1.7511736154556274, + "learning_rate": 6.346549332219462e-06, + "loss": 0.6913, "step": 7726 }, { - "epoch": 0.584692217471908, - "grad_norm": 2.182589054107666, - "learning_rate": 7.23452403740901e-06, - "loss": 0.757, + "epoch": 0.5439633931714185, + "grad_norm": 1.701819896697998, + "learning_rate": 6.344960212854867e-06, + "loss": 0.6998, "step": 7727 }, { - "epoch": 0.5847678861942416, - "grad_norm": 2.33272123336792, - "learning_rate": 7.232315633675584e-06, - "loss": 0.6802, + "epoch": 0.5440337909186906, + "grad_norm": 1.7978006601333618, + "learning_rate": 6.3433711275463855e-06, + "loss": 0.6307, "step": 7728 }, { - "epoch": 0.5848435549165752, - "grad_norm": 1.9340473413467407, - "learning_rate": 7.230107340640272e-06, - "loss": 0.6658, + "epoch": 0.5441041886659627, + "grad_norm": 1.6168092489242554, + "learning_rate": 6.341782076376633e-06, + "loss": 0.7069, "step": 7729 }, { - "epoch": 0.5849192236389088, - "grad_norm": 6.095152854919434, - "learning_rate": 7.22789915844136e-06, - "loss": 0.6181, + "epoch": 0.5441745864132348, + "grad_norm": 1.6781070232391357, + "learning_rate": 6.340193059428228e-06, + "loss": 0.6785, "step": 7730 }, { - "epoch": 0.5849948923612425, - "grad_norm": 1.9372726678848267, - "learning_rate": 7.225691087217132e-06, - "loss": 0.6955, + "epoch": 0.5442449841605068, + "grad_norm": 1.689842700958252, + "learning_rate": 6.338604076783781e-06, + "loss": 0.6174, "step": 7731 }, { - "epoch": 0.5850705610835761, - "grad_norm": 2.411663055419922, - "learning_rate": 7.22348312710587e-06, - "loss": 0.7125, + "epoch": 0.544315381907779, + "grad_norm": 1.880469560623169, + "learning_rate": 6.3370151285259095e-06, + "loss": 0.7214, "step": 7732 }, { - "epoch": 0.5851462298059097, - "grad_norm": 2.0117082595825195, - "learning_rate": 7.221275278245842e-06, - "loss": 0.6744, + "epoch": 0.5443857796550511, + "grad_norm": 1.6030569076538086, + "learning_rate": 6.3354262147372185e-06, + "loss": 0.7172, "step": 7733 }, { - "epoch": 0.5852218985282434, - "grad_norm": 2.462001085281372, - "learning_rate": 7.2190675407753075e-06, - "loss": 0.7146, + "epoch": 0.5444561774023231, + "grad_norm": 1.8071589469909668, + "learning_rate": 6.333837335500324e-06, + "loss": 0.6472, "step": 7734 }, { - "epoch": 0.585297567250577, - "grad_norm": 2.144810438156128, - "learning_rate": 7.216859914832526e-06, - "loss": 0.6407, + "epoch": 0.5445265751495952, + "grad_norm": 1.875027060508728, + "learning_rate": 6.3322484908978274e-06, + "loss": 0.5985, "step": 7735 }, { - "epoch": 0.5853732359729106, - "grad_norm": 2.357346773147583, - "learning_rate": 7.2146524005557416e-06, - "loss": 0.6624, + "epoch": 0.5445969728968673, + "grad_norm": 1.9853671789169312, + "learning_rate": 6.330659681012339e-06, + "loss": 0.7119, "step": 7736 }, { - "epoch": 0.5854489046952442, - "grad_norm": 3.149186134338379, - "learning_rate": 7.212444998083196e-06, - "loss": 0.6403, + "epoch": 0.5446673706441394, + "grad_norm": 1.5938758850097656, + "learning_rate": 6.329070905926458e-06, + "loss": 0.579, "step": 7737 }, { - "epoch": 0.5855245734175778, - "grad_norm": 1.9493343830108643, - "learning_rate": 7.210237707553132e-06, - "loss": 0.6004, + "epoch": 0.5447377683914115, + "grad_norm": 1.9238793849945068, + "learning_rate": 6.32748216572279e-06, + "loss": 0.7336, "step": 7738 }, { - "epoch": 0.5856002421399115, - "grad_norm": 2.3722169399261475, - "learning_rate": 7.208030529103768e-06, - "loss": 0.7029, + "epoch": 0.5448081661386835, + "grad_norm": 1.4869035482406616, + "learning_rate": 6.32589346048393e-06, + "loss": 0.8402, "step": 7739 }, { - "epoch": 0.5856759108622451, - "grad_norm": 1.8572362661361694, - "learning_rate": 7.205823462873331e-06, - "loss": 0.5583, + "epoch": 0.5448785638859557, + "grad_norm": 1.7831963300704956, + "learning_rate": 6.3243047902924826e-06, + "loss": 0.561, "step": 7740 }, { - "epoch": 0.5857515795845787, - "grad_norm": 19.258787155151367, - "learning_rate": 7.203616509000029e-06, - "loss": 0.7381, + "epoch": 0.5449489616332277, + "grad_norm": 1.708897352218628, + "learning_rate": 6.322716155231039e-06, + "loss": 0.696, "step": 7741 }, { - "epoch": 0.5858272483069124, - "grad_norm": 2.024709939956665, - "learning_rate": 7.201409667622069e-06, - "loss": 0.6997, + "epoch": 0.5450193593804998, + "grad_norm": 1.911719799041748, + "learning_rate": 6.321127555382197e-06, + "loss": 0.678, "step": 7742 }, { - "epoch": 0.5859029170292459, - "grad_norm": 1.7652946710586548, - "learning_rate": 7.199202938877658e-06, - "loss": 0.6909, + "epoch": 0.5450897571277719, + "grad_norm": 1.6607388257980347, + "learning_rate": 6.319538990828548e-06, + "loss": 0.7441, "step": 7743 }, { - "epoch": 0.5859785857515796, - "grad_norm": 1.8822402954101562, - "learning_rate": 7.196996322904982e-06, - "loss": 0.635, + "epoch": 0.545160154875044, + "grad_norm": 1.7611827850341797, + "learning_rate": 6.317950461652684e-06, + "loss": 0.7386, "step": 7744 }, { - "epoch": 0.5860542544739132, - "grad_norm": 1.622046947479248, - "learning_rate": 7.194789819842228e-06, - "loss": 0.7197, + "epoch": 0.5452305526223161, + "grad_norm": 1.5957032442092896, + "learning_rate": 6.31636196793719e-06, + "loss": 0.7512, "step": 7745 }, { - "epoch": 0.5861299231962468, - "grad_norm": 3.179093837738037, - "learning_rate": 7.1925834298275735e-06, - "loss": 0.6946, + "epoch": 0.5453009503695881, + "grad_norm": 1.479157567024231, + "learning_rate": 6.31477350976466e-06, + "loss": 0.5717, "step": 7746 }, { - "epoch": 0.5862055919185805, - "grad_norm": 2.236264944076538, - "learning_rate": 7.19037715299919e-06, - "loss": 0.7203, + "epoch": 0.5453713481168603, + "grad_norm": 1.632075548171997, + "learning_rate": 6.3131850872176745e-06, + "loss": 0.7369, "step": 7747 }, { - "epoch": 0.5862812606409141, - "grad_norm": 2.4791111946105957, - "learning_rate": 7.188170989495242e-06, - "loss": 0.5344, + "epoch": 0.5454417458641323, + "grad_norm": 1.8773629665374756, + "learning_rate": 6.3115967003788195e-06, + "loss": 0.7224, "step": 7748 }, { - "epoch": 0.5863569293632477, - "grad_norm": 2.7363762855529785, - "learning_rate": 7.18596493945389e-06, - "loss": 0.6981, + "epoch": 0.5455121436114044, + "grad_norm": 1.9852845668792725, + "learning_rate": 6.3100083493306735e-06, + "loss": 0.8419, "step": 7749 }, { - "epoch": 0.5864325980855813, - "grad_norm": 1.8051518201828003, - "learning_rate": 7.183759003013277e-06, - "loss": 0.6166, + "epoch": 0.5455825413586766, + "grad_norm": 1.5376378297805786, + "learning_rate": 6.30842003415582e-06, + "loss": 0.6409, "step": 7750 }, { - "epoch": 0.5865082668079149, - "grad_norm": 2.029879331588745, - "learning_rate": 7.181553180311554e-06, - "loss": 0.7514, + "epoch": 0.5456529391059486, + "grad_norm": 1.5901210308074951, + "learning_rate": 6.306831754936833e-06, + "loss": 0.6792, "step": 7751 }, { - "epoch": 0.5865839355302486, - "grad_norm": 2.204545736312866, - "learning_rate": 7.1793474714868465e-06, - "loss": 0.7361, + "epoch": 0.5457233368532207, + "grad_norm": 1.3552205562591553, + "learning_rate": 6.305243511756293e-06, + "loss": 0.6229, "step": 7752 }, { - "epoch": 0.5866596042525822, - "grad_norm": 2.728564739227295, - "learning_rate": 7.177141876677292e-06, - "loss": 0.637, + "epoch": 0.5457937346004927, + "grad_norm": 1.80763840675354, + "learning_rate": 6.303655304696771e-06, + "loss": 0.6877, "step": 7753 }, { - "epoch": 0.5867352729749158, - "grad_norm": 2.3274734020233154, - "learning_rate": 7.174936396021011e-06, - "loss": 0.6613, + "epoch": 0.5458641323477649, + "grad_norm": 1.7056670188903809, + "learning_rate": 6.302067133840842e-06, + "loss": 0.6865, "step": 7754 }, { - "epoch": 0.5868109416972495, - "grad_norm": 2.8488948345184326, - "learning_rate": 7.172731029656113e-06, - "loss": 0.8433, + "epoch": 0.545934530095037, + "grad_norm": 1.5706933736801147, + "learning_rate": 6.3004789992710715e-06, + "loss": 0.7855, "step": 7755 }, { - "epoch": 0.586886610419583, - "grad_norm": 2.334925651550293, - "learning_rate": 7.1705257777207115e-06, - "loss": 0.7575, + "epoch": 0.546004927842309, + "grad_norm": 1.794345498085022, + "learning_rate": 6.298890901070036e-06, + "loss": 0.6808, "step": 7756 }, { - "epoch": 0.5869622791419167, - "grad_norm": 4.486547470092773, - "learning_rate": 7.168320640352898e-06, - "loss": 0.8143, + "epoch": 0.5460753255895812, + "grad_norm": 1.8097459077835083, + "learning_rate": 6.297302839320293e-06, + "loss": 0.679, "step": 7757 }, { - "epoch": 0.5870379478642503, - "grad_norm": 1.958241581916809, - "learning_rate": 7.1661156176907716e-06, - "loss": 0.742, + "epoch": 0.5461457233368532, + "grad_norm": 1.6874432563781738, + "learning_rate": 6.295714814104415e-06, + "loss": 0.7084, "step": 7758 }, { - "epoch": 0.5871136165865839, - "grad_norm": 2.2698402404785156, - "learning_rate": 7.163910709872421e-06, - "loss": 0.817, + "epoch": 0.5462161210841253, + "grad_norm": 2.2061455249786377, + "learning_rate": 6.2941268255049585e-06, + "loss": 0.7477, "step": 7759 }, { - "epoch": 0.5871892853089176, - "grad_norm": 2.016049861907959, - "learning_rate": 7.1617059170359165e-06, - "loss": 0.6541, + "epoch": 0.5462865188313974, + "grad_norm": 1.7836071252822876, + "learning_rate": 6.292538873604491e-06, + "loss": 0.689, "step": 7760 }, { - "epoch": 0.5872649540312512, - "grad_norm": 1.8849208354949951, - "learning_rate": 7.1595012393193346e-06, - "loss": 0.6868, + "epoch": 0.5463569165786695, + "grad_norm": 1.533530354499817, + "learning_rate": 6.290950958485564e-06, + "loss": 0.6504, "step": 7761 }, { - "epoch": 0.5873406227535848, - "grad_norm": 2.1924757957458496, - "learning_rate": 7.157296676860735e-06, - "loss": 0.6863, + "epoch": 0.5464273143259416, + "grad_norm": 2.126983880996704, + "learning_rate": 6.289363080230745e-06, + "loss": 0.7033, "step": 7762 }, { - "epoch": 0.5874162914759185, - "grad_norm": 2.1753952503204346, - "learning_rate": 7.155092229798181e-06, - "loss": 0.6678, + "epoch": 0.5464977120732136, + "grad_norm": 1.6140657663345337, + "learning_rate": 6.287775238922577e-06, + "loss": 0.6282, "step": 7763 }, { - "epoch": 0.587491960198252, - "grad_norm": 1.9696418046951294, - "learning_rate": 7.152887898269718e-06, - "loss": 0.706, + "epoch": 0.5465681098204858, + "grad_norm": 2.0173208713531494, + "learning_rate": 6.286187434643622e-06, + "loss": 0.5791, "step": 7764 }, { - "epoch": 0.5875676289205857, - "grad_norm": 2.0107688903808594, - "learning_rate": 7.15068368241339e-06, - "loss": 0.7368, + "epoch": 0.5466385075677578, + "grad_norm": 2.033362865447998, + "learning_rate": 6.284599667476429e-06, + "loss": 0.6204, "step": 7765 }, { - "epoch": 0.5876432976429193, - "grad_norm": 1.9954001903533936, - "learning_rate": 7.14847958236723e-06, - "loss": 0.6301, + "epoch": 0.5467089053150299, + "grad_norm": 2.0439112186431885, + "learning_rate": 6.283011937503548e-06, + "loss": 0.6598, "step": 7766 }, { - "epoch": 0.5877189663652529, - "grad_norm": 1.949197769165039, - "learning_rate": 7.146275598269265e-06, - "loss": 0.5946, + "epoch": 0.546779303062302, + "grad_norm": 2.366356611251831, + "learning_rate": 6.281424244807522e-06, + "loss": 0.6643, "step": 7767 }, { - "epoch": 0.5877946350875866, - "grad_norm": 2.207111358642578, - "learning_rate": 7.144071730257521e-06, - "loss": 0.6304, + "epoch": 0.5468497008095741, + "grad_norm": 1.7889009714126587, + "learning_rate": 6.2798365894709055e-06, + "loss": 0.8412, "step": 7768 }, { - "epoch": 0.5878703038099202, - "grad_norm": 1.9679584503173828, - "learning_rate": 7.141867978470007e-06, - "loss": 0.652, + "epoch": 0.5469200985568462, + "grad_norm": 2.3389923572540283, + "learning_rate": 6.2782489715762325e-06, + "loss": 0.6406, "step": 7769 }, { - "epoch": 0.5879459725322538, - "grad_norm": 2.0223073959350586, - "learning_rate": 7.139664343044732e-06, - "loss": 0.6923, + "epoch": 0.5469904963041182, + "grad_norm": 1.9505542516708374, + "learning_rate": 6.27666139120605e-06, + "loss": 0.6782, "step": 7770 }, { - "epoch": 0.5880216412545874, - "grad_norm": 2.1896934509277344, - "learning_rate": 7.137460824119691e-06, - "loss": 0.7841, + "epoch": 0.5470608940513904, + "grad_norm": 1.8522621393203735, + "learning_rate": 6.275073848442899e-06, + "loss": 0.7187, "step": 7771 }, { - "epoch": 0.588097309976921, - "grad_norm": 2.157703161239624, - "learning_rate": 7.135257421832879e-06, - "loss": 0.6671, + "epoch": 0.5471312917986625, + "grad_norm": 2.5237157344818115, + "learning_rate": 6.273486343369312e-06, + "loss": 0.69, "step": 7772 }, { - "epoch": 0.5881729786992547, - "grad_norm": 2.098355293273926, - "learning_rate": 7.133054136322274e-06, - "loss": 0.6282, + "epoch": 0.5472016895459345, + "grad_norm": 1.6449689865112305, + "learning_rate": 6.271898876067831e-06, + "loss": 0.5369, "step": 7773 }, { - "epoch": 0.5882486474215883, - "grad_norm": 1.8522675037384033, - "learning_rate": 7.130850967725861e-06, - "loss": 0.6418, + "epoch": 0.5472720872932066, + "grad_norm": 1.5917623043060303, + "learning_rate": 6.2703114466209846e-06, + "loss": 0.6745, "step": 7774 }, { - "epoch": 0.5883243161439219, - "grad_norm": 1.7040518522262573, - "learning_rate": 7.128647916181605e-06, - "loss": 0.5497, + "epoch": 0.5473424850404787, + "grad_norm": 1.810890555381775, + "learning_rate": 6.26872405511131e-06, + "loss": 0.6137, "step": 7775 }, { - "epoch": 0.5883999848662556, - "grad_norm": 2.2610528469085693, - "learning_rate": 7.126444981827471e-06, - "loss": 0.6669, + "epoch": 0.5474128827877508, + "grad_norm": 1.991504430770874, + "learning_rate": 6.2671367016213306e-06, + "loss": 0.633, "step": 7776 }, { - "epoch": 0.5884756535885891, - "grad_norm": 2.119845390319824, - "learning_rate": 7.12424216480141e-06, - "loss": 0.7393, + "epoch": 0.5474832805350229, + "grad_norm": 1.7536638975143433, + "learning_rate": 6.26554938623358e-06, + "loss": 0.6377, "step": 7777 }, { - "epoch": 0.5885513223109228, - "grad_norm": 2.3598594665527344, - "learning_rate": 7.12203946524137e-06, - "loss": 0.6026, + "epoch": 0.547553678282295, + "grad_norm": 1.6204469203948975, + "learning_rate": 6.263962109030579e-06, + "loss": 0.6295, "step": 7778 }, { - "epoch": 0.5886269910332564, - "grad_norm": 2.374350070953369, - "learning_rate": 7.119836883285297e-06, - "loss": 0.6199, + "epoch": 0.5476240760295671, + "grad_norm": 6.28739070892334, + "learning_rate": 6.2623748700948584e-06, + "loss": 0.6484, "step": 7779 }, { - "epoch": 0.58870265975559, - "grad_norm": 2.1946024894714355, - "learning_rate": 7.117634419071117e-06, - "loss": 0.6357, + "epoch": 0.5476944737768391, + "grad_norm": 1.763731837272644, + "learning_rate": 6.260787669508934e-06, + "loss": 0.6419, "step": 7780 }, { - "epoch": 0.5887783284779237, - "grad_norm": 2.086524724960327, - "learning_rate": 7.115432072736759e-06, - "loss": 0.8439, + "epoch": 0.5477648715241112, + "grad_norm": 3.647878408432007, + "learning_rate": 6.25920050735533e-06, + "loss": 0.6871, "step": 7781 }, { - "epoch": 0.5888539972002573, - "grad_norm": 2.3127012252807617, - "learning_rate": 7.1132298444201395e-06, - "loss": 0.7654, + "epoch": 0.5478352692713833, + "grad_norm": 1.8419873714447021, + "learning_rate": 6.257613383716561e-06, + "loss": 0.7531, "step": 7782 }, { - "epoch": 0.5889296659225909, - "grad_norm": 2.065904378890991, - "learning_rate": 7.111027734259167e-06, - "loss": 0.7323, + "epoch": 0.5479056670186554, + "grad_norm": 2.0884478092193604, + "learning_rate": 6.256026298675146e-06, + "loss": 0.743, "step": 7783 }, { - "epoch": 0.5890053346449245, - "grad_norm": 1.9671047925949097, - "learning_rate": 7.108825742391752e-06, - "loss": 0.7849, + "epoch": 0.5479760647659275, + "grad_norm": 1.6600134372711182, + "learning_rate": 6.254439252313594e-06, + "loss": 0.7121, "step": 7784 }, { - "epoch": 0.5890810033672581, - "grad_norm": 2.391326665878296, - "learning_rate": 7.106623868955784e-06, - "loss": 0.6453, + "epoch": 0.5480464625131996, + "grad_norm": 1.8200267553329468, + "learning_rate": 6.252852244714424e-06, + "loss": 0.6272, "step": 7785 }, { - "epoch": 0.5891566720895918, - "grad_norm": 2.2341959476470947, - "learning_rate": 7.104422114089155e-06, - "loss": 0.7335, + "epoch": 0.5481168602604717, + "grad_norm": 1.8633325099945068, + "learning_rate": 6.251265275960141e-06, + "loss": 0.6545, "step": 7786 }, { - "epoch": 0.5892323408119254, - "grad_norm": 2.1001060009002686, - "learning_rate": 7.1022204779297415e-06, - "loss": 0.752, + "epoch": 0.5481872580077437, + "grad_norm": 2.0975241661071777, + "learning_rate": 6.249678346133256e-06, + "loss": 0.7797, "step": 7787 }, { - "epoch": 0.589308009534259, - "grad_norm": 2.2669565677642822, - "learning_rate": 7.1000189606154185e-06, - "loss": 0.6614, + "epoch": 0.5482576557550158, + "grad_norm": 1.8973156213760376, + "learning_rate": 6.2480914553162715e-06, + "loss": 0.6801, "step": 7788 }, { - "epoch": 0.5893836782565927, - "grad_norm": 2.1226632595062256, - "learning_rate": 7.097817562284056e-06, - "loss": 0.7003, + "epoch": 0.548328053502288, + "grad_norm": 1.8868952989578247, + "learning_rate": 6.246504603591694e-06, + "loss": 0.745, "step": 7789 }, { - "epoch": 0.5894593469789262, - "grad_norm": 1.9791269302368164, - "learning_rate": 7.095616283073511e-06, - "loss": 0.7109, + "epoch": 0.54839845124956, + "grad_norm": 1.9569071531295776, + "learning_rate": 6.244917791042022e-06, + "loss": 0.7399, "step": 7790 }, { - "epoch": 0.5895350157012599, - "grad_norm": 2.323878765106201, - "learning_rate": 7.093415123121633e-06, - "loss": 0.7033, + "epoch": 0.5484688489968321, + "grad_norm": 2.0065667629241943, + "learning_rate": 6.243331017749762e-06, + "loss": 0.773, "step": 7791 }, { - "epoch": 0.5896106844235935, - "grad_norm": 2.5213472843170166, - "learning_rate": 7.091214082566267e-06, - "loss": 0.7201, + "epoch": 0.5485392467441041, + "grad_norm": 1.9541308879852295, + "learning_rate": 6.241744283797405e-06, + "loss": 0.6835, "step": 7792 }, { - "epoch": 0.5896863531459271, - "grad_norm": 2.2447805404663086, - "learning_rate": 7.089013161545246e-06, - "loss": 0.7435, + "epoch": 0.5486096444913763, + "grad_norm": 2.238441228866577, + "learning_rate": 6.240157589267452e-06, + "loss": 0.6994, "step": 7793 }, { - "epoch": 0.5897620218682608, - "grad_norm": 1.9220545291900635, - "learning_rate": 7.086812360196404e-06, - "loss": 0.7438, + "epoch": 0.5486800422386484, + "grad_norm": 1.6678476333618164, + "learning_rate": 6.238570934242392e-06, + "loss": 0.6664, "step": 7794 }, { - "epoch": 0.5898376905905944, - "grad_norm": 2.168332815170288, - "learning_rate": 7.084611678657562e-06, - "loss": 0.6878, + "epoch": 0.5487504399859204, + "grad_norm": 1.9458122253417969, + "learning_rate": 6.236984318804719e-06, + "loss": 0.7266, "step": 7795 }, { - "epoch": 0.589913359312928, - "grad_norm": 2.1411046981811523, - "learning_rate": 7.082411117066529e-06, - "loss": 0.7192, + "epoch": 0.5488208377331926, + "grad_norm": 1.595186471939087, + "learning_rate": 6.235397743036921e-06, + "loss": 0.7331, "step": 7796 }, { - "epoch": 0.5899890280352617, - "grad_norm": 2.133474111557007, - "learning_rate": 7.080210675561116e-06, - "loss": 0.6435, + "epoch": 0.5488912354804646, + "grad_norm": 1.5885337591171265, + "learning_rate": 6.233811207021489e-06, + "loss": 0.6458, "step": 7797 }, { - "epoch": 0.5900646967575952, - "grad_norm": 2.075592041015625, - "learning_rate": 7.078010354279117e-06, - "loss": 0.7506, + "epoch": 0.5489616332277367, + "grad_norm": 1.6301014423370361, + "learning_rate": 6.232224710840905e-06, + "loss": 0.702, "step": 7798 }, { - "epoch": 0.5901403654799289, - "grad_norm": 1.674235224723816, - "learning_rate": 7.075810153358327e-06, - "loss": 0.6847, + "epoch": 0.5490320309750087, + "grad_norm": 1.7431066036224365, + "learning_rate": 6.2306382545776555e-06, + "loss": 0.562, "step": 7799 }, { - "epoch": 0.5902160342022625, - "grad_norm": 2.099836587905884, - "learning_rate": 7.073610072936532e-06, - "loss": 0.778, + "epoch": 0.5491024287222809, + "grad_norm": 1.7357741594314575, + "learning_rate": 6.229051838314218e-06, + "loss": 0.6332, "step": 7800 }, { - "epoch": 0.5902917029245961, - "grad_norm": 1.963880181312561, - "learning_rate": 7.0714101131515015e-06, - "loss": 0.6492, + "epoch": 0.549172826469553, + "grad_norm": 1.5846047401428223, + "learning_rate": 6.227465462133075e-06, + "loss": 0.578, "step": 7801 }, { - "epoch": 0.5903673716469298, - "grad_norm": 2.0048913955688477, - "learning_rate": 7.069210274141011e-06, - "loss": 0.7348, + "epoch": 0.549243224216825, + "grad_norm": 1.6287704706192017, + "learning_rate": 6.225879126116699e-06, + "loss": 0.6602, "step": 7802 }, { - "epoch": 0.5904430403692633, - "grad_norm": 1.986953616142273, - "learning_rate": 7.067010556042812e-06, - "loss": 0.7612, + "epoch": 0.5493136219640972, + "grad_norm": 2.342620849609375, + "learning_rate": 6.224292830347572e-06, + "loss": 0.6662, "step": 7803 }, { - "epoch": 0.590518709091597, - "grad_norm": 2.7819113731384277, - "learning_rate": 7.064810958994668e-06, - "loss": 0.7183, + "epoch": 0.5493840197113692, + "grad_norm": 1.847562313079834, + "learning_rate": 6.222706574908162e-06, + "loss": 0.8395, "step": 7804 }, { - "epoch": 0.5905943778139306, - "grad_norm": 2.236011266708374, - "learning_rate": 7.062611483134321e-06, - "loss": 0.8888, + "epoch": 0.5494544174586413, + "grad_norm": 1.5138769149780273, + "learning_rate": 6.221120359880942e-06, + "loss": 0.7586, "step": 7805 }, { - "epoch": 0.5906700465362642, - "grad_norm": 1.9459607601165771, - "learning_rate": 7.06041212859951e-06, - "loss": 0.7414, + "epoch": 0.5495248152059135, + "grad_norm": 1.6749473810195923, + "learning_rate": 6.219534185348379e-06, + "loss": 0.5534, "step": 7806 }, { - "epoch": 0.5907457152585979, - "grad_norm": 1.7577186822891235, - "learning_rate": 7.058212895527964e-06, - "loss": 0.7483, + "epoch": 0.5495952129531855, + "grad_norm": 1.8313485383987427, + "learning_rate": 6.21794805139294e-06, + "loss": 0.6427, "step": 7807 }, { - "epoch": 0.5908213839809315, - "grad_norm": 1.7476128339767456, - "learning_rate": 7.056013784057404e-06, - "loss": 0.6792, + "epoch": 0.5496656107004576, + "grad_norm": 1.603509545326233, + "learning_rate": 6.216361958097089e-06, + "loss": 0.5611, "step": 7808 }, { - "epoch": 0.5908970527032651, - "grad_norm": 2.0670974254608154, - "learning_rate": 7.053814794325552e-06, - "loss": 0.743, + "epoch": 0.5497360084477296, + "grad_norm": 1.8518643379211426, + "learning_rate": 6.214775905543292e-06, + "loss": 0.717, "step": 7809 }, { - "epoch": 0.5909727214255988, - "grad_norm": 1.867936372756958, - "learning_rate": 7.051615926470112e-06, - "loss": 0.7591, + "epoch": 0.5498064061950018, + "grad_norm": 1.5756397247314453, + "learning_rate": 6.213189893814006e-06, + "loss": 0.5757, "step": 7810 }, { - "epoch": 0.5910483901479323, - "grad_norm": 2.2760121822357178, - "learning_rate": 7.049417180628785e-06, - "loss": 0.6799, + "epoch": 0.5498768039422739, + "grad_norm": 1.812091588973999, + "learning_rate": 6.2116039229916905e-06, + "loss": 0.6676, "step": 7811 }, { - "epoch": 0.591124058870266, - "grad_norm": 2.5277442932128906, - "learning_rate": 7.047218556939262e-06, - "loss": 0.7732, + "epoch": 0.5499472016895459, + "grad_norm": 1.6875733137130737, + "learning_rate": 6.210017993158799e-06, + "loss": 0.7729, "step": 7812 }, { - "epoch": 0.5911997275925996, - "grad_norm": 2.176264524459839, - "learning_rate": 7.04502005553923e-06, - "loss": 0.6876, + "epoch": 0.550017599436818, + "grad_norm": 1.8549760580062866, + "learning_rate": 6.208432104397791e-06, + "loss": 0.6248, "step": 7813 }, { - "epoch": 0.5912753963149332, - "grad_norm": 2.316378355026245, - "learning_rate": 7.042821676566363e-06, - "loss": 0.6702, + "epoch": 0.5500879971840901, + "grad_norm": 1.8881546258926392, + "learning_rate": 6.2068462567911115e-06, + "loss": 0.7164, "step": 7814 }, { - "epoch": 0.5913510650372669, - "grad_norm": 2.1952383518218994, - "learning_rate": 7.040623420158334e-06, - "loss": 0.6593, + "epoch": 0.5501583949313622, + "grad_norm": 1.8984192609786987, + "learning_rate": 6.205260450421216e-06, + "loss": 0.6638, "step": 7815 }, { - "epoch": 0.5914267337596004, - "grad_norm": 2.2782609462738037, - "learning_rate": 7.038425286452806e-06, - "loss": 0.7548, + "epoch": 0.5502287926786343, + "grad_norm": 1.602730631828308, + "learning_rate": 6.203674685370547e-06, + "loss": 0.5983, "step": 7816 }, { - "epoch": 0.5915024024819341, - "grad_norm": 1.863445520401001, - "learning_rate": 7.036227275587428e-06, - "loss": 0.5509, + "epoch": 0.5502991904259064, + "grad_norm": 1.7371748685836792, + "learning_rate": 6.202088961721555e-06, + "loss": 0.6315, "step": 7817 }, { - "epoch": 0.5915780712042678, - "grad_norm": 5.7330851554870605, - "learning_rate": 7.034029387699853e-06, - "loss": 0.6268, + "epoch": 0.5503695881731785, + "grad_norm": 1.7810018062591553, + "learning_rate": 6.200503279556677e-06, + "loss": 0.6932, "step": 7818 }, { - "epoch": 0.5916537399266013, - "grad_norm": 1.4934370517730713, - "learning_rate": 7.031831622927709e-06, - "loss": 0.6594, + "epoch": 0.5504399859204505, + "grad_norm": 1.712323784828186, + "learning_rate": 6.1989176389583575e-06, + "loss": 0.6673, "step": 7819 }, { - "epoch": 0.591729408648935, - "grad_norm": 1.9840810298919678, - "learning_rate": 7.0296339814086425e-06, - "loss": 0.7832, + "epoch": 0.5505103836677226, + "grad_norm": 1.644753098487854, + "learning_rate": 6.19733204000904e-06, + "loss": 0.7971, "step": 7820 }, { - "epoch": 0.5918050773712686, - "grad_norm": 1.9863804578781128, - "learning_rate": 7.027436463280266e-06, - "loss": 0.6377, + "epoch": 0.5505807814149947, + "grad_norm": 1.863103985786438, + "learning_rate": 6.195746482791153e-06, + "loss": 0.7579, "step": 7821 }, { - "epoch": 0.5918807460936022, - "grad_norm": 2.300100803375244, - "learning_rate": 7.0252390686802e-06, - "loss": 0.6932, + "epoch": 0.5506511791622668, + "grad_norm": 1.4730033874511719, + "learning_rate": 6.194160967387137e-06, + "loss": 0.6757, "step": 7822 }, { - "epoch": 0.5919564148159359, - "grad_norm": 1.7306914329528809, - "learning_rate": 7.023041797746048e-06, - "loss": 0.7395, + "epoch": 0.5507215769095389, + "grad_norm": 1.634902000427246, + "learning_rate": 6.192575493879418e-06, + "loss": 0.7041, "step": 7823 }, { - "epoch": 0.5920320835382694, - "grad_norm": 2.2924342155456543, - "learning_rate": 7.020844650615412e-06, - "loss": 0.6836, + "epoch": 0.550791974656811, + "grad_norm": 1.6455365419387817, + "learning_rate": 6.190990062350435e-06, + "loss": 0.6545, "step": 7824 }, { - "epoch": 0.5921077522606031, - "grad_norm": 2.0509254932403564, - "learning_rate": 7.018647627425889e-06, - "loss": 0.7414, + "epoch": 0.5508623724040831, + "grad_norm": 1.6908098459243774, + "learning_rate": 6.189404672882609e-06, + "loss": 0.686, "step": 7825 }, { - "epoch": 0.5921834209829367, - "grad_norm": 1.9954980611801147, - "learning_rate": 7.016450728315059e-06, - "loss": 0.7232, + "epoch": 0.5509327701513551, + "grad_norm": 1.853636384010315, + "learning_rate": 6.18781932555837e-06, + "loss": 0.5737, "step": 7826 }, { - "epoch": 0.5922590897052703, - "grad_norm": 2.126925468444824, - "learning_rate": 7.014253953420501e-06, - "loss": 0.8562, + "epoch": 0.5510031678986272, + "grad_norm": 1.9686968326568604, + "learning_rate": 6.186234020460138e-06, + "loss": 0.7403, "step": 7827 }, { - "epoch": 0.592334758427604, - "grad_norm": 2.0931129455566406, - "learning_rate": 7.0120573028797814e-06, - "loss": 0.5782, + "epoch": 0.5510735656458994, + "grad_norm": 1.9566203355789185, + "learning_rate": 6.184648757670338e-06, + "loss": 0.6118, "step": 7828 }, { - "epoch": 0.5924104271499375, - "grad_norm": 1.9808735847473145, - "learning_rate": 7.009860776830461e-06, - "loss": 0.6848, + "epoch": 0.5511439633931714, + "grad_norm": 1.5814067125320435, + "learning_rate": 6.183063537271384e-06, + "loss": 0.6354, "step": 7829 }, { - "epoch": 0.5924860958722712, - "grad_norm": 1.9941705465316772, - "learning_rate": 7.007664375410099e-06, - "loss": 0.6859, + "epoch": 0.5512143611404435, + "grad_norm": 1.9488803148269653, + "learning_rate": 6.1814783593457e-06, + "loss": 0.8212, "step": 7830 }, { - "epoch": 0.5925617645946049, - "grad_norm": 1.9075549840927124, - "learning_rate": 7.005468098756237e-06, - "loss": 0.6063, + "epoch": 0.5512847588877156, + "grad_norm": 1.7936046123504639, + "learning_rate": 6.179893223975697e-06, + "loss": 0.6823, "step": 7831 }, { - "epoch": 0.5926374333169384, - "grad_norm": 2.4272186756134033, - "learning_rate": 7.003271947006415e-06, - "loss": 0.7188, + "epoch": 0.5513551566349877, + "grad_norm": 1.7793817520141602, + "learning_rate": 6.178308131243788e-06, + "loss": 0.7867, "step": 7832 }, { - "epoch": 0.5927131020392721, - "grad_norm": 2.166393518447876, - "learning_rate": 7.00107592029816e-06, - "loss": 0.7613, + "epoch": 0.5514255543822598, + "grad_norm": 1.6854156255722046, + "learning_rate": 6.176723081232382e-06, + "loss": 0.6536, "step": 7833 }, { - "epoch": 0.5927887707616057, - "grad_norm": 2.0019025802612305, - "learning_rate": 6.998880018768995e-06, - "loss": 0.5959, + "epoch": 0.5514959521295318, + "grad_norm": 1.6729825735092163, + "learning_rate": 6.17513807402389e-06, + "loss": 0.7352, "step": 7834 }, { - "epoch": 0.5928644394839393, - "grad_norm": 2.18587327003479, - "learning_rate": 6.996684242556438e-06, - "loss": 0.6934, + "epoch": 0.551566349876804, + "grad_norm": 1.5397371053695679, + "learning_rate": 6.173553109700715e-06, + "loss": 0.7051, "step": 7835 }, { - "epoch": 0.592940108206273, - "grad_norm": 1.79190993309021, - "learning_rate": 6.9944885917979935e-06, - "loss": 0.5657, + "epoch": 0.551636747624076, + "grad_norm": 2.3712105751037598, + "learning_rate": 6.171968188345264e-06, + "loss": 0.8233, "step": 7836 }, { - "epoch": 0.5930157769286065, - "grad_norm": 1.922041893005371, - "learning_rate": 6.992293066631159e-06, - "loss": 0.7778, + "epoch": 0.5517071453713481, + "grad_norm": 1.6892001628875732, + "learning_rate": 6.170383310039936e-06, + "loss": 0.6877, "step": 7837 }, { - "epoch": 0.5930914456509402, - "grad_norm": 2.1699047088623047, - "learning_rate": 6.990097667193427e-06, - "loss": 0.5798, + "epoch": 0.5517775431186202, + "grad_norm": 1.913009524345398, + "learning_rate": 6.168798474867132e-06, + "loss": 0.763, "step": 7838 }, { - "epoch": 0.5931671143732739, - "grad_norm": 1.9367480278015137, - "learning_rate": 6.987902393622278e-06, - "loss": 0.8402, + "epoch": 0.5518479408658923, + "grad_norm": 1.5469714403152466, + "learning_rate": 6.167213682909246e-06, + "loss": 0.7208, "step": 7839 }, { - "epoch": 0.5932427830956074, - "grad_norm": 2.1018497943878174, - "learning_rate": 6.985707246055189e-06, - "loss": 0.6521, + "epoch": 0.5519183386131644, + "grad_norm": 1.9929800033569336, + "learning_rate": 6.165628934248678e-06, + "loss": 0.6664, "step": 7840 }, { - "epoch": 0.5933184518179411, - "grad_norm": 1.8842015266418457, - "learning_rate": 6.983512224629631e-06, - "loss": 0.6046, + "epoch": 0.5519887363604364, + "grad_norm": 1.8267264366149902, + "learning_rate": 6.164044228967813e-06, + "loss": 0.715, "step": 7841 }, { - "epoch": 0.5933941205402746, - "grad_norm": 2.317081928253174, - "learning_rate": 6.981317329483057e-06, - "loss": 0.6315, + "epoch": 0.5520591341077086, + "grad_norm": 1.7379592657089233, + "learning_rate": 6.162459567149049e-06, + "loss": 0.8288, "step": 7842 }, { - "epoch": 0.5934697892626083, - "grad_norm": 2.405687093734741, - "learning_rate": 6.979122560752923e-06, - "loss": 0.5215, + "epoch": 0.5521295318549806, + "grad_norm": 1.8503873348236084, + "learning_rate": 6.160874948874769e-06, + "loss": 0.6294, "step": 7843 }, { - "epoch": 0.593545457984942, - "grad_norm": 7.7125935554504395, - "learning_rate": 6.976927918576667e-06, - "loss": 0.6223, + "epoch": 0.5521999296022527, + "grad_norm": 1.7426378726959229, + "learning_rate": 6.159290374227363e-06, + "loss": 0.6546, "step": 7844 }, { - "epoch": 0.5936211267072755, - "grad_norm": 2.015296697616577, - "learning_rate": 6.974733403091729e-06, - "loss": 0.6515, + "epoch": 0.5522703273495249, + "grad_norm": 1.596944808959961, + "learning_rate": 6.157705843289208e-06, + "loss": 0.6646, "step": 7845 }, { - "epoch": 0.5936967954296092, - "grad_norm": 1.864576816558838, - "learning_rate": 6.972539014435539e-06, - "loss": 0.74, + "epoch": 0.5523407250967969, + "grad_norm": 1.6603950262069702, + "learning_rate": 6.156121356142693e-06, + "loss": 0.6061, "step": 7846 }, { - "epoch": 0.5937724641519428, - "grad_norm": 2.105365514755249, - "learning_rate": 6.970344752745511e-06, - "loss": 0.6876, + "epoch": 0.552411122844069, + "grad_norm": 1.8152450323104858, + "learning_rate": 6.154536912870188e-06, + "loss": 0.6886, "step": 7847 }, { - "epoch": 0.5938481328742764, - "grad_norm": 1.9904052019119263, - "learning_rate": 6.968150618159058e-06, - "loss": 0.7621, + "epoch": 0.552481520591341, + "grad_norm": 1.810212254524231, + "learning_rate": 6.152952513554079e-06, + "loss": 0.7911, "step": 7848 }, { - "epoch": 0.5939238015966101, - "grad_norm": 2.3770105838775635, - "learning_rate": 6.965956610813589e-06, - "loss": 0.5695, + "epoch": 0.5525519183386132, + "grad_norm": 1.7930800914764404, + "learning_rate": 6.151368158276733e-06, + "loss": 0.6087, "step": 7849 }, { - "epoch": 0.5939994703189436, - "grad_norm": 2.017836570739746, - "learning_rate": 6.963762730846492e-06, - "loss": 0.6737, + "epoch": 0.5526223160858853, + "grad_norm": 1.7702215909957886, + "learning_rate": 6.149783847120528e-06, + "loss": 0.745, "step": 7850 }, { - "epoch": 0.5940751390412773, - "grad_norm": 1.9348866939544678, - "learning_rate": 6.96156897839516e-06, - "loss": 0.8016, + "epoch": 0.5526927138331573, + "grad_norm": 1.8680458068847656, + "learning_rate": 6.148199580167828e-06, + "loss": 0.6226, "step": 7851 }, { - "epoch": 0.594150807763611, - "grad_norm": 1.867281198501587, - "learning_rate": 6.959375353596973e-06, - "loss": 0.5993, + "epoch": 0.5527631115804295, + "grad_norm": 1.884903073310852, + "learning_rate": 6.146615357501007e-06, + "loss": 0.722, "step": 7852 }, { - "epoch": 0.5942264764859445, - "grad_norm": 2.012544870376587, - "learning_rate": 6.957181856589301e-06, - "loss": 0.5875, + "epoch": 0.5528335093277015, + "grad_norm": 1.7139201164245605, + "learning_rate": 6.145031179202421e-06, + "loss": 0.7422, "step": 7853 }, { - "epoch": 0.5943021452082782, - "grad_norm": 2.0992937088012695, - "learning_rate": 6.9549884875095095e-06, - "loss": 0.5162, + "epoch": 0.5529039070749736, + "grad_norm": 1.613330364227295, + "learning_rate": 6.143447045354442e-06, + "loss": 0.6888, "step": 7854 }, { - "epoch": 0.5943778139306117, - "grad_norm": 1.8907424211502075, - "learning_rate": 6.952795246494949e-06, - "loss": 0.7365, + "epoch": 0.5529743048222457, + "grad_norm": 1.9915255308151245, + "learning_rate": 6.1418629560394255e-06, + "loss": 0.7366, "step": 7855 }, { - "epoch": 0.5944534826529454, - "grad_norm": 1.9334065914154053, - "learning_rate": 6.9506021336829745e-06, - "loss": 0.6326, + "epoch": 0.5530447025695178, + "grad_norm": 2.0132555961608887, + "learning_rate": 6.140278911339734e-06, + "loss": 0.7329, "step": 7856 }, { - "epoch": 0.5945291513752791, - "grad_norm": 2.2515668869018555, - "learning_rate": 6.948409149210924e-06, - "loss": 0.7034, + "epoch": 0.5531151003167899, + "grad_norm": 1.7994076013565063, + "learning_rate": 6.138694911337716e-06, + "loss": 0.7698, "step": 7857 }, { - "epoch": 0.5946048200976126, - "grad_norm": 2.1506471633911133, - "learning_rate": 6.946216293216127e-06, - "loss": 0.8012, + "epoch": 0.5531854980640619, + "grad_norm": 1.7677419185638428, + "learning_rate": 6.137110956115734e-06, + "loss": 0.6387, "step": 7858 }, { - "epoch": 0.5946804888199463, - "grad_norm": 1.9353808164596558, - "learning_rate": 6.944023565835911e-06, - "loss": 0.6319, + "epoch": 0.553255895811334, + "grad_norm": 2.693716049194336, + "learning_rate": 6.135527045756131e-06, + "loss": 0.6487, "step": 7859 }, { - "epoch": 0.59475615754228, - "grad_norm": 3.7930614948272705, - "learning_rate": 6.941830967207584e-06, - "loss": 0.6095, + "epoch": 0.5533262935586061, + "grad_norm": 1.477164626121521, + "learning_rate": 6.133943180341261e-06, + "loss": 0.7204, "step": 7860 }, { - "epoch": 0.5948318262646135, - "grad_norm": 1.9903547763824463, - "learning_rate": 6.939638497468461e-06, - "loss": 0.7019, + "epoch": 0.5533966913058782, + "grad_norm": 1.6951420307159424, + "learning_rate": 6.13235935995347e-06, + "loss": 0.6494, "step": 7861 }, { - "epoch": 0.5949074949869472, - "grad_norm": 2.3188912868499756, - "learning_rate": 6.937446156755841e-06, - "loss": 0.8333, + "epoch": 0.5534670890531503, + "grad_norm": 1.5593469142913818, + "learning_rate": 6.130775584675102e-06, + "loss": 0.7315, "step": 7862 }, { - "epoch": 0.5949831637092807, - "grad_norm": 2.208460569381714, - "learning_rate": 6.935253945207013e-06, - "loss": 0.5642, + "epoch": 0.5535374868004224, + "grad_norm": 1.5054998397827148, + "learning_rate": 6.1291918545884965e-06, + "loss": 0.7115, "step": 7863 }, { - "epoch": 0.5950588324316144, - "grad_norm": 2.100937604904175, - "learning_rate": 6.93306186295926e-06, - "loss": 0.6346, + "epoch": 0.5536078845476945, + "grad_norm": 1.5793615579605103, + "learning_rate": 6.127608169776e-06, + "loss": 0.652, "step": 7864 }, { - "epoch": 0.5951345011539481, - "grad_norm": 2.0526506900787354, - "learning_rate": 6.9308699101498565e-06, - "loss": 0.6951, + "epoch": 0.5536782822949665, + "grad_norm": 1.7108153104782104, + "learning_rate": 6.126024530319938e-06, + "loss": 0.8334, "step": 7865 }, { - "epoch": 0.5952101698762816, - "grad_norm": 2.3143303394317627, - "learning_rate": 6.928678086916076e-06, - "loss": 0.6788, + "epoch": 0.5537486800422387, + "grad_norm": 1.6228687763214111, + "learning_rate": 6.124440936302658e-06, + "loss": 0.6226, "step": 7866 }, { - "epoch": 0.5952858385986153, - "grad_norm": 2.076948404312134, - "learning_rate": 6.926486393395171e-06, - "loss": 0.7481, + "epoch": 0.5538190777895108, + "grad_norm": 2.0125041007995605, + "learning_rate": 6.122857387806484e-06, + "loss": 0.6926, "step": 7867 }, { - "epoch": 0.5953615073209488, - "grad_norm": 2.473203659057617, - "learning_rate": 6.9242948297243975e-06, - "loss": 0.5965, + "epoch": 0.5538894755367828, + "grad_norm": 2.494385242462158, + "learning_rate": 6.121273884913749e-06, + "loss": 0.6777, "step": 7868 }, { - "epoch": 0.5954371760432825, - "grad_norm": 2.1721701622009277, - "learning_rate": 6.922103396040992e-06, - "loss": 0.768, + "epoch": 0.5539598732840549, + "grad_norm": 1.736871361732483, + "learning_rate": 6.11969042770678e-06, + "loss": 0.6219, "step": 7869 }, { - "epoch": 0.5955128447656162, - "grad_norm": 1.6090091466903687, - "learning_rate": 6.919912092482192e-06, - "loss": 0.6591, + "epoch": 0.554030271031327, + "grad_norm": 1.700073480606079, + "learning_rate": 6.118107016267902e-06, + "loss": 0.7423, "step": 7870 }, { - "epoch": 0.5955885134879497, - "grad_norm": 1.9884438514709473, - "learning_rate": 6.917720919185227e-06, - "loss": 0.7708, + "epoch": 0.5541006687785991, + "grad_norm": 1.6689077615737915, + "learning_rate": 6.116523650679442e-06, + "loss": 0.7274, "step": 7871 }, { - "epoch": 0.5956641822102834, - "grad_norm": 2.0646588802337646, - "learning_rate": 6.9155298762873115e-06, - "loss": 0.6364, + "epoch": 0.5541710665258712, + "grad_norm": 2.0224971771240234, + "learning_rate": 6.114940331023716e-06, + "loss": 0.7108, "step": 7872 }, { - "epoch": 0.595739850932617, - "grad_norm": 2.5629851818084717, - "learning_rate": 6.913338963925659e-06, - "loss": 0.8015, + "epoch": 0.5542414642731432, + "grad_norm": 1.6471261978149414, + "learning_rate": 6.1133570573830446e-06, + "loss": 0.6621, "step": 7873 }, { - "epoch": 0.5958155196549506, - "grad_norm": 2.499199151992798, - "learning_rate": 6.9111481822374685e-06, - "loss": 0.5926, + "epoch": 0.5543118620204154, + "grad_norm": 1.9003328084945679, + "learning_rate": 6.11177382983974e-06, + "loss": 0.6419, "step": 7874 }, { - "epoch": 0.5958911883772843, - "grad_norm": 2.0954859256744385, - "learning_rate": 6.908957531359932e-06, - "loss": 0.7402, + "epoch": 0.5543822597676874, + "grad_norm": 1.3469488620758057, + "learning_rate": 6.110190648476122e-06, + "loss": 0.5715, "step": 7875 }, { - "epoch": 0.5959668570996178, - "grad_norm": 1.77829110622406, - "learning_rate": 6.906767011430242e-06, - "loss": 0.6274, + "epoch": 0.5544526575149595, + "grad_norm": 1.8763664960861206, + "learning_rate": 6.108607513374496e-06, + "loss": 0.6316, "step": 7876 }, { - "epoch": 0.5960425258219515, - "grad_norm": 2.359452962875366, - "learning_rate": 6.904576622585572e-06, - "loss": 0.6625, + "epoch": 0.5545230552622316, + "grad_norm": 1.8494656085968018, + "learning_rate": 6.107024424617173e-06, + "loss": 0.7565, "step": 7877 }, { - "epoch": 0.5961181945442852, - "grad_norm": 2.3234505653381348, - "learning_rate": 6.9023863649630894e-06, - "loss": 0.8029, + "epoch": 0.5545934530095037, + "grad_norm": 1.6249184608459473, + "learning_rate": 6.105441382286459e-06, + "loss": 0.7672, "step": 7878 }, { - "epoch": 0.5961938632666187, - "grad_norm": 2.330986261367798, - "learning_rate": 6.90019623869996e-06, - "loss": 0.5878, + "epoch": 0.5546638507567758, + "grad_norm": 1.6247644424438477, + "learning_rate": 6.103858386464659e-06, + "loss": 0.6103, "step": 7879 }, { - "epoch": 0.5962695319889524, - "grad_norm": 2.1926472187042236, - "learning_rate": 6.898006243933329e-06, - "loss": 0.6217, + "epoch": 0.5547342485040478, + "grad_norm": 2.0301427841186523, + "learning_rate": 6.10227543723407e-06, + "loss": 0.7038, "step": 7880 }, { - "epoch": 0.5963452007112859, - "grad_norm": 1.9008252620697021, - "learning_rate": 6.8958163808003485e-06, - "loss": 0.6465, + "epoch": 0.55480464625132, + "grad_norm": 2.321446418762207, + "learning_rate": 6.100692534676996e-06, + "loss": 0.7637, "step": 7881 }, { - "epoch": 0.5964208694336196, - "grad_norm": 2.272428512573242, - "learning_rate": 6.893626649438154e-06, - "loss": 0.692, + "epoch": 0.554875043998592, + "grad_norm": 1.702883005142212, + "learning_rate": 6.099109678875732e-06, + "loss": 0.6676, "step": 7882 }, { - "epoch": 0.5964965381559533, - "grad_norm": 2.106896162033081, - "learning_rate": 6.891437049983869e-06, - "loss": 0.5936, + "epoch": 0.5549454417458641, + "grad_norm": 1.9499701261520386, + "learning_rate": 6.097526869912572e-06, + "loss": 0.6695, "step": 7883 }, { - "epoch": 0.5965722068782868, - "grad_norm": 2.124065399169922, - "learning_rate": 6.889247582574617e-06, - "loss": 0.6684, + "epoch": 0.5550158394931363, + "grad_norm": 1.8997308015823364, + "learning_rate": 6.095944107869806e-06, + "loss": 0.6846, "step": 7884 }, { - "epoch": 0.5966478756006205, - "grad_norm": 1.6563143730163574, - "learning_rate": 6.887058247347506e-06, - "loss": 0.6952, + "epoch": 0.5550862372404083, + "grad_norm": 1.508699893951416, + "learning_rate": 6.094361392829726e-06, + "loss": 0.6901, "step": 7885 }, { - "epoch": 0.5967235443229542, - "grad_norm": 1.7926180362701416, - "learning_rate": 6.884869044439644e-06, - "loss": 0.7559, + "epoch": 0.5551566349876804, + "grad_norm": 1.8020700216293335, + "learning_rate": 6.092778724874615e-06, + "loss": 0.7194, "step": 7886 }, { - "epoch": 0.5967992130452877, - "grad_norm": 2.0105221271514893, - "learning_rate": 6.8826799739881235e-06, - "loss": 0.6041, + "epoch": 0.5552270327349524, + "grad_norm": 1.5317384004592896, + "learning_rate": 6.091196104086761e-06, + "loss": 0.698, "step": 7887 }, { - "epoch": 0.5968748817676214, - "grad_norm": 2.2636537551879883, - "learning_rate": 6.88049103613003e-06, - "loss": 0.6275, + "epoch": 0.5552974304822246, + "grad_norm": 1.609717607498169, + "learning_rate": 6.089613530548444e-06, + "loss": 0.6938, "step": 7888 }, { - "epoch": 0.5969505504899549, - "grad_norm": 2.092416524887085, - "learning_rate": 6.878302231002446e-06, - "loss": 0.7661, + "epoch": 0.5553678282294967, + "grad_norm": 1.6532469987869263, + "learning_rate": 6.0880310043419466e-06, + "loss": 0.632, "step": 7889 }, { - "epoch": 0.5970262192122886, - "grad_norm": 1.8432059288024902, - "learning_rate": 6.876113558742437e-06, - "loss": 0.6898, + "epoch": 0.5554382259767687, + "grad_norm": 1.6995035409927368, + "learning_rate": 6.086448525549541e-06, + "loss": 0.6097, "step": 7890 }, { - "epoch": 0.5971018879346223, - "grad_norm": 1.8818315267562866, - "learning_rate": 6.873925019487064e-06, - "loss": 0.5599, + "epoch": 0.5555086237240409, + "grad_norm": 1.60550057888031, + "learning_rate": 6.084866094253506e-06, + "loss": 0.5566, "step": 7891 }, { - "epoch": 0.5971775566569558, - "grad_norm": 1.8227505683898926, - "learning_rate": 6.871736613373384e-06, - "loss": 0.6097, + "epoch": 0.5555790214713129, + "grad_norm": 1.702028512954712, + "learning_rate": 6.083283710536107e-06, + "loss": 0.6571, "step": 7892 }, { - "epoch": 0.5972532253792895, - "grad_norm": 5.854292869567871, - "learning_rate": 6.869548340538444e-06, - "loss": 0.7175, + "epoch": 0.555649419218585, + "grad_norm": 1.7814414501190186, + "learning_rate": 6.081701374479623e-06, + "loss": 0.723, "step": 7893 }, { - "epoch": 0.597328894101623, - "grad_norm": 1.9756673574447632, - "learning_rate": 6.8673602011192746e-06, - "loss": 0.6218, + "epoch": 0.555719816965857, + "grad_norm": 2.0688118934631348, + "learning_rate": 6.080119086166314e-06, + "loss": 0.8083, "step": 7894 }, { - "epoch": 0.5974045628239567, - "grad_norm": 2.129859209060669, - "learning_rate": 6.86517219525291e-06, - "loss": 0.7691, + "epoch": 0.5557902147131292, + "grad_norm": 1.6852928400039673, + "learning_rate": 6.078536845678447e-06, + "loss": 0.7056, "step": 7895 }, { - "epoch": 0.5974802315462904, - "grad_norm": 2.6184825897216797, - "learning_rate": 6.862984323076363e-06, - "loss": 0.784, + "epoch": 0.5558606124604013, + "grad_norm": 1.9891263246536255, + "learning_rate": 6.076954653098283e-06, + "loss": 0.6895, "step": 7896 }, { - "epoch": 0.5975559002686239, - "grad_norm": 2.6178689002990723, - "learning_rate": 6.860796584726652e-06, - "loss": 0.6983, + "epoch": 0.5559310102076733, + "grad_norm": 1.9008475542068481, + "learning_rate": 6.075372508508085e-06, + "loss": 0.6274, "step": 7897 }, { - "epoch": 0.5976315689909576, - "grad_norm": 1.5637413263320923, - "learning_rate": 6.858608980340779e-06, - "loss": 0.6212, + "epoch": 0.5560014079549455, + "grad_norm": 1.7858803272247314, + "learning_rate": 6.073790411990104e-06, + "loss": 0.744, "step": 7898 }, { - "epoch": 0.5977072377132913, - "grad_norm": 1.7278257608413696, - "learning_rate": 6.856421510055736e-06, - "loss": 0.6918, + "epoch": 0.5560718057022175, + "grad_norm": 1.5384825468063354, + "learning_rate": 6.072208363626602e-06, + "loss": 0.5997, "step": 7899 }, { - "epoch": 0.5977829064356248, - "grad_norm": 2.5124382972717285, - "learning_rate": 6.8542341740085136e-06, - "loss": 0.772, + "epoch": 0.5561422034494896, + "grad_norm": 1.9604229927062988, + "learning_rate": 6.070626363499825e-06, + "loss": 0.6303, "step": 7900 }, { - "epoch": 0.5978585751579585, - "grad_norm": 2.5317604541778564, - "learning_rate": 6.8520469723360835e-06, - "loss": 0.5569, + "epoch": 0.5562126011967617, + "grad_norm": 1.8595439195632935, + "learning_rate": 6.069044411692028e-06, + "loss": 0.7508, "step": 7901 }, { - "epoch": 0.597934243880292, - "grad_norm": 2.7019991874694824, - "learning_rate": 6.849859905175421e-06, - "loss": 0.7898, + "epoch": 0.5562829989440338, + "grad_norm": 1.8011680841445923, + "learning_rate": 6.067462508285452e-06, + "loss": 0.7479, "step": 7902 }, { - "epoch": 0.5980099126026257, - "grad_norm": 1.7270742654800415, - "learning_rate": 6.847672972663488e-06, - "loss": 0.8241, + "epoch": 0.5563533966913059, + "grad_norm": 1.6678258180618286, + "learning_rate": 6.065880653362346e-06, + "loss": 0.6622, "step": 7903 }, { - "epoch": 0.5980855813249594, - "grad_norm": 2.8753139972686768, - "learning_rate": 6.845486174937233e-06, - "loss": 0.6372, + "epoch": 0.5564237944385779, + "grad_norm": 1.7199361324310303, + "learning_rate": 6.0642988470049495e-06, + "loss": 0.7401, "step": 7904 }, { - "epoch": 0.5981612500472929, - "grad_norm": 2.0319454669952393, - "learning_rate": 6.843299512133604e-06, - "loss": 0.578, + "epoch": 0.5564941921858501, + "grad_norm": 1.56338369846344, + "learning_rate": 6.062717089295505e-06, + "loss": 0.6775, "step": 7905 }, { - "epoch": 0.5982369187696266, - "grad_norm": 2.0023133754730225, - "learning_rate": 6.841112984389529e-06, - "loss": 0.6541, + "epoch": 0.5565645899331222, + "grad_norm": 1.675711750984192, + "learning_rate": 6.061135380316248e-06, + "loss": 0.6727, "step": 7906 }, { - "epoch": 0.5983125874919601, - "grad_norm": 2.8424692153930664, - "learning_rate": 6.8389265918419485e-06, - "loss": 0.6049, + "epoch": 0.5566349876803942, + "grad_norm": 1.6156067848205566, + "learning_rate": 6.059553720149413e-06, + "loss": 0.5882, "step": 7907 }, { - "epoch": 0.5983882562142938, - "grad_norm": 2.44048810005188, - "learning_rate": 6.836740334627771e-06, - "loss": 0.6917, + "epoch": 0.5567053854276663, + "grad_norm": 1.584352970123291, + "learning_rate": 6.05797210887723e-06, + "loss": 0.775, "step": 7908 }, { - "epoch": 0.5984639249366275, - "grad_norm": 1.9694398641586304, - "learning_rate": 6.8345542128839146e-06, - "loss": 0.5634, + "epoch": 0.5567757831749384, + "grad_norm": 1.6168479919433594, + "learning_rate": 6.056390546581933e-06, + "loss": 0.7491, "step": 7909 }, { - "epoch": 0.598539593658961, - "grad_norm": 3.217658519744873, - "learning_rate": 6.832368226747273e-06, - "loss": 0.824, + "epoch": 0.5568461809222105, + "grad_norm": 1.8831716775894165, + "learning_rate": 6.054809033345742e-06, + "loss": 0.7852, "step": 7910 }, { - "epoch": 0.5986152623812947, - "grad_norm": 2.072033643722534, - "learning_rate": 6.830182376354744e-06, - "loss": 0.6921, + "epoch": 0.5569165786694826, + "grad_norm": 2.036759853363037, + "learning_rate": 6.053227569250889e-06, + "loss": 0.6338, "step": 7911 }, { - "epoch": 0.5986909311036284, - "grad_norm": 2.394564628601074, - "learning_rate": 6.8279966618432155e-06, - "loss": 0.6915, + "epoch": 0.5569869764167547, + "grad_norm": 2.245087146759033, + "learning_rate": 6.051646154379591e-06, + "loss": 0.6314, "step": 7912 }, { - "epoch": 0.5987665998259619, - "grad_norm": 1.8874036073684692, - "learning_rate": 6.825811083349559e-06, - "loss": 0.6492, + "epoch": 0.5570573741640268, + "grad_norm": 1.5066320896148682, + "learning_rate": 6.050064788814069e-06, + "loss": 0.7276, "step": 7913 }, { - "epoch": 0.5988422685482956, - "grad_norm": 1.7274051904678345, - "learning_rate": 6.8236256410106476e-06, - "loss": 0.6783, + "epoch": 0.5571277719112988, + "grad_norm": 1.5262733697891235, + "learning_rate": 6.048483472636537e-06, + "loss": 0.6712, "step": 7914 }, { - "epoch": 0.5989179372706291, - "grad_norm": 2.2389156818389893, - "learning_rate": 6.821440334963335e-06, - "loss": 0.5332, + "epoch": 0.5571981696585709, + "grad_norm": 1.693677306175232, + "learning_rate": 6.046902205929214e-06, + "loss": 0.5929, "step": 7915 }, { - "epoch": 0.5989936059929628, - "grad_norm": 3.8984248638153076, - "learning_rate": 6.819255165344475e-06, - "loss": 0.6413, + "epoch": 0.557268567405843, + "grad_norm": 1.5693113803863525, + "learning_rate": 6.045320988774304e-06, + "loss": 0.651, "step": 7916 }, { - "epoch": 0.5990692747152965, - "grad_norm": 2.091085433959961, - "learning_rate": 6.817070132290911e-06, - "loss": 0.6986, + "epoch": 0.5573389651531151, + "grad_norm": 2.1736855506896973, + "learning_rate": 6.043739821254025e-06, + "loss": 0.6768, "step": 7917 }, { - "epoch": 0.59914494343763, - "grad_norm": 2.023153781890869, - "learning_rate": 6.814885235939475e-06, - "loss": 0.734, + "epoch": 0.5574093629003872, + "grad_norm": 1.8210651874542236, + "learning_rate": 6.042158703450575e-06, + "loss": 0.6753, "step": 7918 }, { - "epoch": 0.5992206121599637, - "grad_norm": 2.2519912719726562, - "learning_rate": 6.812700476426995e-06, - "loss": 0.6936, + "epoch": 0.5574797606476593, + "grad_norm": 2.080321788787842, + "learning_rate": 6.040577635446165e-06, + "loss": 0.633, "step": 7919 }, { - "epoch": 0.5992962808822972, - "grad_norm": 1.7816762924194336, - "learning_rate": 6.810515853890283e-06, - "loss": 0.7838, + "epoch": 0.5575501583949314, + "grad_norm": 1.5413209199905396, + "learning_rate": 6.038996617322989e-06, + "loss": 0.8054, "step": 7920 }, { - "epoch": 0.5993719496046309, - "grad_norm": 2.385741710662842, - "learning_rate": 6.808331368466149e-06, - "loss": 0.7145, + "epoch": 0.5576205561422034, + "grad_norm": 2.0382888317108154, + "learning_rate": 6.03741564916325e-06, + "loss": 0.6002, "step": 7921 }, { - "epoch": 0.5994476183269646, - "grad_norm": 2.6496787071228027, - "learning_rate": 6.806147020291395e-06, - "loss": 0.7415, + "epoch": 0.5576909538894755, + "grad_norm": 1.9797132015228271, + "learning_rate": 6.0358347310491455e-06, + "loss": 0.8636, "step": 7922 }, { - "epoch": 0.5995232870492981, - "grad_norm": 2.55653977394104, - "learning_rate": 6.803962809502812e-06, - "loss": 0.7278, + "epoch": 0.5577613516367477, + "grad_norm": 1.9003655910491943, + "learning_rate": 6.034253863062864e-06, + "loss": 0.7187, "step": 7923 }, { - "epoch": 0.5995989557716318, - "grad_norm": 2.4421041011810303, - "learning_rate": 6.80177873623718e-06, - "loss": 0.7035, + "epoch": 0.5578317493840197, + "grad_norm": 1.9913098812103271, + "learning_rate": 6.0326730452866014e-06, + "loss": 0.5796, "step": 7924 }, { - "epoch": 0.5996746244939655, - "grad_norm": 2.765859603881836, - "learning_rate": 6.799594800631275e-06, - "loss": 0.5941, + "epoch": 0.5579021471312918, + "grad_norm": 1.6400822401046753, + "learning_rate": 6.03109227780254e-06, + "loss": 0.6708, "step": 7925 }, { - "epoch": 0.599750293216299, - "grad_norm": 2.608785390853882, - "learning_rate": 6.797411002821856e-06, - "loss": 0.6181, + "epoch": 0.5579725448785638, + "grad_norm": 1.8751401901245117, + "learning_rate": 6.029511560692872e-06, + "loss": 0.6449, "step": 7926 }, { - "epoch": 0.5998259619386327, - "grad_norm": 2.0030951499938965, - "learning_rate": 6.795227342945686e-06, - "loss": 0.6672, + "epoch": 0.558042942625836, + "grad_norm": 1.8307408094406128, + "learning_rate": 6.027930894039775e-06, + "loss": 0.6453, "step": 7927 }, { - "epoch": 0.5999016306609662, - "grad_norm": 1.9999091625213623, - "learning_rate": 6.7930438211395136e-06, - "loss": 0.7108, + "epoch": 0.5581133403731081, + "grad_norm": 1.7636337280273438, + "learning_rate": 6.026350277925433e-06, + "loss": 0.7186, "step": 7928 }, { - "epoch": 0.5999772993832999, - "grad_norm": 2.112736463546753, - "learning_rate": 6.7908604375400725e-06, - "loss": 0.5911, + "epoch": 0.5581837381203801, + "grad_norm": 1.6545605659484863, + "learning_rate": 6.0247697124320196e-06, + "loss": 0.6821, "step": 7929 }, { - "epoch": 0.6000529681056336, - "grad_norm": 2.1307666301727295, - "learning_rate": 6.788677192284098e-06, - "loss": 0.5783, + "epoch": 0.5582541358676523, + "grad_norm": 1.826183557510376, + "learning_rate": 6.023189197641714e-06, + "loss": 0.7317, "step": 7930 }, { - "epoch": 0.6001286368279671, - "grad_norm": 2.141432046890259, - "learning_rate": 6.7864940855083085e-06, - "loss": 0.6246, + "epoch": 0.5583245336149243, + "grad_norm": 2.13765549659729, + "learning_rate": 6.021608733636683e-06, + "loss": 0.6969, "step": 7931 }, { - "epoch": 0.6002043055503008, - "grad_norm": 1.9539786577224731, - "learning_rate": 6.784311117349416e-06, - "loss": 0.79, + "epoch": 0.5583949313621964, + "grad_norm": 2.062459707260132, + "learning_rate": 6.020028320499103e-06, + "loss": 0.7376, "step": 7932 }, { - "epoch": 0.6002799742726344, - "grad_norm": 2.6877975463867188, - "learning_rate": 6.782128287944133e-06, - "loss": 0.665, + "epoch": 0.5584653291094684, + "grad_norm": 1.8655294179916382, + "learning_rate": 6.018447958311138e-06, + "loss": 0.7229, "step": 7933 }, { - "epoch": 0.600355642994968, - "grad_norm": 2.274559259414673, - "learning_rate": 6.779945597429147e-06, - "loss": 0.7325, + "epoch": 0.5585357268567406, + "grad_norm": 1.6662014722824097, + "learning_rate": 6.016867647154954e-06, + "loss": 0.6873, "step": 7934 }, { - "epoch": 0.6004313117173017, - "grad_norm": 2.1375906467437744, - "learning_rate": 6.77776304594115e-06, - "loss": 0.8202, + "epoch": 0.5586061246040127, + "grad_norm": 1.3693822622299194, + "learning_rate": 6.015287387112708e-06, + "loss": 0.7679, "step": 7935 }, { - "epoch": 0.6005069804396352, - "grad_norm": 1.993807315826416, - "learning_rate": 6.775580633616818e-06, - "loss": 0.7036, + "epoch": 0.5586765223512847, + "grad_norm": 2.117248773574829, + "learning_rate": 6.013707178266566e-06, + "loss": 0.6799, "step": 7936 }, { - "epoch": 0.6005826491619689, - "grad_norm": 2.2041423320770264, - "learning_rate": 6.773398360592818e-06, - "loss": 0.8387, + "epoch": 0.5587469200985569, + "grad_norm": 1.6862576007843018, + "learning_rate": 6.0121270206986774e-06, + "loss": 0.6582, "step": 7937 }, { - "epoch": 0.6006583178843026, - "grad_norm": 2.223114490509033, - "learning_rate": 6.771216227005818e-06, - "loss": 0.6734, + "epoch": 0.5588173178458289, + "grad_norm": 1.497038722038269, + "learning_rate": 6.010546914491202e-06, + "loss": 0.7092, "step": 7938 }, { - "epoch": 0.6007339866066361, - "grad_norm": 1.8410695791244507, - "learning_rate": 6.769034232992466e-06, - "loss": 0.5136, + "epoch": 0.558887715593101, + "grad_norm": 2.0590436458587646, + "learning_rate": 6.008966859726287e-06, + "loss": 0.6776, "step": 7939 }, { - "epoch": 0.6008096553289698, - "grad_norm": 2.056565046310425, - "learning_rate": 6.766852378689406e-06, - "loss": 0.6536, + "epoch": 0.5589581133403732, + "grad_norm": 1.5705978870391846, + "learning_rate": 6.007386856486086e-06, + "loss": 0.7331, "step": 7940 }, { - "epoch": 0.6008853240513033, - "grad_norm": 2.2308459281921387, - "learning_rate": 6.764670664233275e-06, - "loss": 0.6653, + "epoch": 0.5590285110876452, + "grad_norm": 1.7697029113769531, + "learning_rate": 6.005806904852738e-06, + "loss": 0.6968, "step": 7941 }, { - "epoch": 0.600960992773637, - "grad_norm": 2.1193389892578125, - "learning_rate": 6.762489089760692e-06, - "loss": 0.6981, + "epoch": 0.5590989088349173, + "grad_norm": 1.9237359762191772, + "learning_rate": 6.004227004908391e-06, + "loss": 0.6228, "step": 7942 }, { - "epoch": 0.6010366614959707, - "grad_norm": 2.764047145843506, - "learning_rate": 6.760307655408282e-06, - "loss": 0.7005, + "epoch": 0.5591693065821893, + "grad_norm": 1.983215093612671, + "learning_rate": 6.002647156735182e-06, + "loss": 0.7387, "step": 7943 }, { - "epoch": 0.6011123302183042, - "grad_norm": 2.1812756061553955, - "learning_rate": 6.758126361312653e-06, - "loss": 0.6901, + "epoch": 0.5592397043294615, + "grad_norm": 1.8781371116638184, + "learning_rate": 6.001067360415252e-06, + "loss": 0.6652, "step": 7944 }, { - "epoch": 0.6011879989406379, - "grad_norm": 2.3195836544036865, - "learning_rate": 6.7559452076104e-06, - "loss": 0.8043, + "epoch": 0.5593101020767336, + "grad_norm": 1.8556292057037354, + "learning_rate": 5.9994876160307345e-06, + "loss": 0.69, "step": 7945 }, { - "epoch": 0.6012636676629715, - "grad_norm": 2.256772994995117, - "learning_rate": 6.753764194438118e-06, - "loss": 0.7321, + "epoch": 0.5593804998240056, + "grad_norm": 1.7247766256332397, + "learning_rate": 5.997907923663762e-06, + "loss": 0.7291, "step": 7946 }, { - "epoch": 0.6013393363853051, - "grad_norm": 1.9437758922576904, - "learning_rate": 6.751583321932382e-06, - "loss": 0.7759, + "epoch": 0.5594508975712777, + "grad_norm": 1.7627551555633545, + "learning_rate": 5.996328283396464e-06, + "loss": 0.5833, "step": 7947 }, { - "epoch": 0.6014150051076388, - "grad_norm": 1.943305253982544, - "learning_rate": 6.749402590229775e-06, - "loss": 0.6389, + "epoch": 0.5595212953185498, + "grad_norm": 1.7377054691314697, + "learning_rate": 5.994748695310969e-06, + "loss": 0.6348, "step": 7948 }, { - "epoch": 0.6014906738299723, - "grad_norm": 2.6814630031585693, - "learning_rate": 6.747221999466858e-06, - "loss": 0.7856, + "epoch": 0.5595916930658219, + "grad_norm": 2.0201780796051025, + "learning_rate": 5.993169159489396e-06, + "loss": 0.7364, "step": 7949 }, { - "epoch": 0.601566342552306, - "grad_norm": 2.330061197280884, - "learning_rate": 6.745041549780184e-06, - "loss": 0.6706, + "epoch": 0.5596620908130939, + "grad_norm": 1.8187053203582764, + "learning_rate": 5.991589676013873e-06, + "loss": 0.661, "step": 7950 }, { - "epoch": 0.6016420112746397, - "grad_norm": 2.5710747241973877, - "learning_rate": 6.742861241306301e-06, - "loss": 0.7252, + "epoch": 0.5597324885603661, + "grad_norm": 1.8144654035568237, + "learning_rate": 5.990010244966515e-06, + "loss": 0.5182, "step": 7951 }, { - "epoch": 0.6017176799969732, - "grad_norm": 2.0507359504699707, - "learning_rate": 6.7406810741817464e-06, - "loss": 0.7641, + "epoch": 0.5598028863076382, + "grad_norm": 2.0133955478668213, + "learning_rate": 5.988430866429441e-06, + "loss": 0.6452, "step": 7952 }, { - "epoch": 0.6017933487193069, - "grad_norm": 2.1858019828796387, - "learning_rate": 6.738501048543054e-06, - "loss": 0.6429, + "epoch": 0.5598732840549102, + "grad_norm": 1.8166987895965576, + "learning_rate": 5.9868515404847605e-06, + "loss": 0.6887, "step": 7953 }, { - "epoch": 0.6018690174416405, - "grad_norm": 1.9703868627548218, - "learning_rate": 6.736321164526739e-06, - "loss": 0.6259, + "epoch": 0.5599436818021823, + "grad_norm": 2.2275285720825195, + "learning_rate": 5.985272267214587e-06, + "loss": 0.7394, "step": 7954 }, { - "epoch": 0.6019446861639741, - "grad_norm": 3.4809346199035645, - "learning_rate": 6.734141422269315e-06, - "loss": 0.6968, + "epoch": 0.5600140795494544, + "grad_norm": 1.905859112739563, + "learning_rate": 5.983693046701024e-06, + "loss": 0.5661, "step": 7955 }, { - "epoch": 0.6020203548863078, - "grad_norm": 1.7865307331085205, - "learning_rate": 6.731961821907283e-06, - "loss": 0.6615, + "epoch": 0.5600844772967265, + "grad_norm": 1.9039393663406372, + "learning_rate": 5.982113879026185e-06, + "loss": 0.802, "step": 7956 }, { - "epoch": 0.6020960236086413, - "grad_norm": 2.6982762813568115, - "learning_rate": 6.729782363577135e-06, - "loss": 0.7117, + "epoch": 0.5601548750439986, + "grad_norm": 2.0951530933380127, + "learning_rate": 5.980534764272164e-06, + "loss": 0.7596, "step": 7957 }, { - "epoch": 0.602171692330975, - "grad_norm": 2.4082143306732178, - "learning_rate": 6.727603047415362e-06, - "loss": 0.884, + "epoch": 0.5602252727912707, + "grad_norm": 1.8769793510437012, + "learning_rate": 5.978955702521065e-06, + "loss": 0.6885, "step": 7958 }, { - "epoch": 0.6022473610533086, - "grad_norm": 1.7152711153030396, - "learning_rate": 6.725423873558435e-06, - "loss": 0.5956, + "epoch": 0.5602956705385428, + "grad_norm": 1.6829839944839478, + "learning_rate": 5.977376693854984e-06, + "loss": 0.6187, "step": 7959 }, { - "epoch": 0.6023230297756422, - "grad_norm": 2.04510760307312, - "learning_rate": 6.723244842142823e-06, - "loss": 0.762, + "epoch": 0.5603660682858148, + "grad_norm": 1.745316743850708, + "learning_rate": 5.975797738356015e-06, + "loss": 0.573, "step": 7960 }, { - "epoch": 0.6023986984979759, - "grad_norm": 2.1493642330169678, - "learning_rate": 6.721065953304981e-06, - "loss": 0.6102, + "epoch": 0.5604364660330869, + "grad_norm": 1.6168361902236938, + "learning_rate": 5.974218836106247e-06, + "loss": 0.6525, "step": 7961 }, { - "epoch": 0.6024743672203094, - "grad_norm": 1.814997911453247, - "learning_rate": 6.718887207181358e-06, - "loss": 0.6394, + "epoch": 0.5605068637803591, + "grad_norm": 1.9214814901351929, + "learning_rate": 5.972639987187773e-06, + "loss": 0.7237, "step": 7962 }, { - "epoch": 0.6025500359426431, - "grad_norm": 2.8531596660614014, - "learning_rate": 6.716708603908399e-06, - "loss": 0.635, + "epoch": 0.5605772615276311, + "grad_norm": 1.498218297958374, + "learning_rate": 5.971061191682675e-06, + "loss": 0.5769, "step": 7963 }, { - "epoch": 0.6026257046649768, - "grad_norm": 2.303659677505493, - "learning_rate": 6.714530143622533e-06, - "loss": 0.8212, + "epoch": 0.5606476592749032, + "grad_norm": 1.6165777444839478, + "learning_rate": 5.969482449673039e-06, + "loss": 0.6724, "step": 7964 }, { - "epoch": 0.6027013733873103, - "grad_norm": 2.530139684677124, - "learning_rate": 6.71235182646018e-06, - "loss": 0.6074, + "epoch": 0.5607180570221753, + "grad_norm": 1.5349630117416382, + "learning_rate": 5.967903761240943e-06, + "loss": 0.6003, "step": 7965 }, { - "epoch": 0.602777042109644, - "grad_norm": 2.0177268981933594, - "learning_rate": 6.710173652557756e-06, - "loss": 0.6609, + "epoch": 0.5607884547694474, + "grad_norm": 1.69106125831604, + "learning_rate": 5.966325126468466e-06, + "loss": 0.6117, "step": 7966 }, { - "epoch": 0.6028527108319776, - "grad_norm": 2.0074338912963867, - "learning_rate": 6.707995622051663e-06, - "loss": 0.764, + "epoch": 0.5608588525167195, + "grad_norm": 1.8375771045684814, + "learning_rate": 5.964746545437679e-06, + "loss": 0.7374, "step": 7967 }, { - "epoch": 0.6029283795543112, - "grad_norm": 1.9502952098846436, - "learning_rate": 6.705817735078295e-06, - "loss": 0.7149, + "epoch": 0.5609292502639915, + "grad_norm": 1.7580147981643677, + "learning_rate": 5.9631680182306595e-06, + "loss": 0.6896, "step": 7968 }, { - "epoch": 0.6030040482766449, - "grad_norm": 2.4225590229034424, - "learning_rate": 6.703639991774045e-06, - "loss": 0.8186, + "epoch": 0.5609996480112637, + "grad_norm": 1.6259013414382935, + "learning_rate": 5.961589544929473e-06, + "loss": 0.6659, "step": 7969 }, { - "epoch": 0.6030797169989784, - "grad_norm": 2.3697550296783447, - "learning_rate": 6.701462392275284e-06, - "loss": 0.7241, + "epoch": 0.5610700457585357, + "grad_norm": 1.7657780647277832, + "learning_rate": 5.960011125616184e-06, + "loss": 0.609, "step": 7970 }, { - "epoch": 0.6031553857213121, - "grad_norm": 1.843167781829834, - "learning_rate": 6.699284936718385e-06, - "loss": 0.5219, + "epoch": 0.5611404435058078, + "grad_norm": 1.7017349004745483, + "learning_rate": 5.958432760372862e-06, + "loss": 0.5578, "step": 7971 }, { - "epoch": 0.6032310544436457, - "grad_norm": 2.5825045108795166, - "learning_rate": 6.697107625239701e-06, - "loss": 0.7933, + "epoch": 0.5612108412530799, + "grad_norm": 1.7058886289596558, + "learning_rate": 5.956854449281562e-06, + "loss": 0.6931, "step": 7972 }, { - "epoch": 0.6033067231659793, - "grad_norm": 1.4268264770507812, - "learning_rate": 6.694930457975585e-06, - "loss": 0.8802, + "epoch": 0.561281239000352, + "grad_norm": 2.355654001235962, + "learning_rate": 5.955276192424346e-06, + "loss": 0.732, "step": 7973 }, { - "epoch": 0.603382391888313, - "grad_norm": 2.36474347114563, - "learning_rate": 6.692753435062383e-06, - "loss": 0.8809, + "epoch": 0.5613516367476241, + "grad_norm": 1.742568016052246, + "learning_rate": 5.953697989883265e-06, + "loss": 0.7647, "step": 7974 }, { - "epoch": 0.6034580606106466, - "grad_norm": 1.9146884679794312, - "learning_rate": 6.6905765566364225e-06, - "loss": 0.6139, + "epoch": 0.5614220344948961, + "grad_norm": 1.7335954904556274, + "learning_rate": 5.952119841740375e-06, + "loss": 0.7305, "step": 7975 }, { - "epoch": 0.6035337293329802, - "grad_norm": 2.1779873371124268, - "learning_rate": 6.688399822834028e-06, - "loss": 0.6991, + "epoch": 0.5614924322421683, + "grad_norm": 2.27711820602417, + "learning_rate": 5.950541748077721e-06, + "loss": 0.6409, "step": 7976 }, { - "epoch": 0.6036093980553139, - "grad_norm": 2.397181272506714, - "learning_rate": 6.686223233791513e-06, - "loss": 0.8585, + "epoch": 0.5615628299894403, + "grad_norm": 1.5322604179382324, + "learning_rate": 5.948963708977354e-06, + "loss": 0.7256, "step": 7977 }, { - "epoch": 0.6036850667776474, - "grad_norm": 2.285151481628418, - "learning_rate": 6.68404678964518e-06, - "loss": 0.7212, + "epoch": 0.5616332277367124, + "grad_norm": 2.0684492588043213, + "learning_rate": 5.947385724521314e-06, + "loss": 0.6971, "step": 7978 }, { - "epoch": 0.6037607354999811, - "grad_norm": 2.0508203506469727, - "learning_rate": 6.681870490531329e-06, - "loss": 0.7258, + "epoch": 0.5617036254839846, + "grad_norm": 1.8770661354064941, + "learning_rate": 5.9458077947916455e-06, + "loss": 0.6825, "step": 7979 }, { - "epoch": 0.6038364042223147, - "grad_norm": 1.981296181678772, - "learning_rate": 6.679694336586247e-06, - "loss": 0.6721, + "epoch": 0.5617740232312566, + "grad_norm": 1.6417081356048584, + "learning_rate": 5.9442299198703835e-06, + "loss": 0.6335, "step": 7980 }, { - "epoch": 0.6039120729446483, - "grad_norm": 2.3068652153015137, - "learning_rate": 6.6775183279462105e-06, - "loss": 0.5249, + "epoch": 0.5618444209785287, + "grad_norm": 1.6648975610733032, + "learning_rate": 5.9426520998395645e-06, + "loss": 0.8031, "step": 7981 }, { - "epoch": 0.603987741666982, - "grad_norm": 2.042125940322876, - "learning_rate": 6.675342464747489e-06, - "loss": 0.7777, + "epoch": 0.5619148187258007, + "grad_norm": 1.7941856384277344, + "learning_rate": 5.941074334781218e-06, + "loss": 0.6504, "step": 7982 }, { - "epoch": 0.6040634103893155, - "grad_norm": 2.7529611587524414, - "learning_rate": 6.673166747126338e-06, - "loss": 0.7063, + "epoch": 0.5619852164730729, + "grad_norm": 1.6762281656265259, + "learning_rate": 5.939496624777379e-06, + "loss": 0.7527, "step": 7983 }, { - "epoch": 0.6041390791116492, - "grad_norm": 1.930591344833374, - "learning_rate": 6.670991175219012e-06, - "loss": 0.7152, + "epoch": 0.562055614220345, + "grad_norm": 1.7374897003173828, + "learning_rate": 5.937918969910068e-06, + "loss": 0.8159, "step": 7984 }, { - "epoch": 0.6042147478339828, - "grad_norm": 1.793343186378479, - "learning_rate": 6.668815749161754e-06, - "loss": 0.5882, + "epoch": 0.562126011967617, + "grad_norm": 1.5118826627731323, + "learning_rate": 5.936341370261316e-06, + "loss": 0.604, "step": 7985 }, { - "epoch": 0.6042904165563164, - "grad_norm": 2.146716356277466, - "learning_rate": 6.666640469090792e-06, - "loss": 0.7066, + "epoch": 0.5621964097148892, + "grad_norm": 1.980568289756775, + "learning_rate": 5.934763825913136e-06, + "loss": 0.6699, "step": 7986 }, { - "epoch": 0.6043660852786501, - "grad_norm": 2.544588327407837, - "learning_rate": 6.664465335142352e-06, - "loss": 0.7241, + "epoch": 0.5622668074621612, + "grad_norm": 1.874399185180664, + "learning_rate": 5.933186336947551e-06, + "loss": 0.5865, "step": 7987 }, { - "epoch": 0.6044417540009837, - "grad_norm": 2.0014796257019043, - "learning_rate": 6.662290347452644e-06, - "loss": 0.6437, + "epoch": 0.5623372052094333, + "grad_norm": 1.669342279434204, + "learning_rate": 5.931608903446572e-06, + "loss": 0.6376, "step": 7988 }, { - "epoch": 0.6045174227233173, - "grad_norm": 2.055544376373291, - "learning_rate": 6.660115506157876e-06, - "loss": 0.5982, + "epoch": 0.5624076029567053, + "grad_norm": 2.170628786087036, + "learning_rate": 5.930031525492216e-06, + "loss": 0.6692, "step": 7989 }, { - "epoch": 0.604593091445651, - "grad_norm": 2.17427134513855, - "learning_rate": 6.6579408113942466e-06, - "loss": 0.6711, + "epoch": 0.5624780007039775, + "grad_norm": 1.756642460823059, + "learning_rate": 5.9284542031664895e-06, + "loss": 0.5137, "step": 7990 }, { - "epoch": 0.6046687601679845, - "grad_norm": 2.4635872840881348, - "learning_rate": 6.655766263297936e-06, - "loss": 0.7088, + "epoch": 0.5625483984512496, + "grad_norm": 1.748650312423706, + "learning_rate": 5.9268769365514e-06, + "loss": 0.668, "step": 7991 }, { - "epoch": 0.6047444288903182, - "grad_norm": 2.311051607131958, - "learning_rate": 6.653591862005126e-06, - "loss": 0.7915, + "epoch": 0.5626187961985216, + "grad_norm": 1.8741737604141235, + "learning_rate": 5.925299725728949e-06, + "loss": 0.5808, "step": 7992 }, { - "epoch": 0.6048200976126518, - "grad_norm": 2.1790106296539307, - "learning_rate": 6.65141760765198e-06, - "loss": 0.5936, + "epoch": 0.5626891939457938, + "grad_norm": 2.2097153663635254, + "learning_rate": 5.923722570781141e-06, + "loss": 0.68, "step": 7993 }, { - "epoch": 0.6048957663349854, - "grad_norm": 2.363032341003418, - "learning_rate": 6.64924350037466e-06, - "loss": 0.7366, + "epoch": 0.5627595916930658, + "grad_norm": 1.6831471920013428, + "learning_rate": 5.922145471789967e-06, + "loss": 0.7248, "step": 7994 }, { - "epoch": 0.6049714350573191, - "grad_norm": 4.0425214767456055, - "learning_rate": 6.6470695403093156e-06, - "loss": 0.6819, + "epoch": 0.5628299894403379, + "grad_norm": 1.8153493404388428, + "learning_rate": 5.92056842883743e-06, + "loss": 0.6609, "step": 7995 }, { - "epoch": 0.6050471037796527, - "grad_norm": 1.7406569719314575, - "learning_rate": 6.6448957275920895e-06, - "loss": 0.5655, + "epoch": 0.56290038718761, + "grad_norm": 1.974644660949707, + "learning_rate": 5.918991442005517e-06, + "loss": 0.5879, "step": 7996 }, { - "epoch": 0.6051227725019863, - "grad_norm": 2.071523666381836, - "learning_rate": 6.642722062359109e-06, - "loss": 0.6732, + "epoch": 0.5629707849348821, + "grad_norm": 2.070871114730835, + "learning_rate": 5.917414511376218e-06, + "loss": 0.6645, "step": 7997 }, { - "epoch": 0.6051984412243199, - "grad_norm": 2.9325242042541504, - "learning_rate": 6.640548544746494e-06, - "loss": 0.7125, + "epoch": 0.5630411826821542, + "grad_norm": 1.852425456047058, + "learning_rate": 5.915837637031518e-06, + "loss": 0.712, "step": 7998 }, { - "epoch": 0.6052741099466535, - "grad_norm": 1.7183152437210083, - "learning_rate": 6.638375174890364e-06, - "loss": 0.6568, + "epoch": 0.5631115804294262, + "grad_norm": 2.1686923503875732, + "learning_rate": 5.9142608190534034e-06, + "loss": 0.6717, "step": 7999 }, { - "epoch": 0.6053497786689872, - "grad_norm": 2.2659685611724854, - "learning_rate": 6.636201952926818e-06, - "loss": 0.6901, + "epoch": 0.5631819781766984, + "grad_norm": 1.632304072380066, + "learning_rate": 5.912684057523848e-06, + "loss": 0.722, "step": 8000 }, { - "epoch": 0.6054254473913208, - "grad_norm": 1.8365509510040283, - "learning_rate": 6.634028878991954e-06, - "loss": 0.6363, + "epoch": 0.5632523759239705, + "grad_norm": 1.6945463418960571, + "learning_rate": 5.911107352524837e-06, + "loss": 0.6435, "step": 8001 }, { - "epoch": 0.6055011161136544, - "grad_norm": 2.3198299407958984, - "learning_rate": 6.631855953221851e-06, - "loss": 0.748, + "epoch": 0.5633227736712425, + "grad_norm": 1.7457093000411987, + "learning_rate": 5.9095307041383394e-06, + "loss": 0.5957, "step": 8002 }, { - "epoch": 0.6055767848359881, - "grad_norm": 2.4509475231170654, - "learning_rate": 6.629683175752586e-06, - "loss": 0.667, + "epoch": 0.5633931714185146, + "grad_norm": 1.9915016889572144, + "learning_rate": 5.907954112446329e-06, + "loss": 0.6193, "step": 8003 }, { - "epoch": 0.6056524535583216, - "grad_norm": 2.3595855236053467, - "learning_rate": 6.62751054672023e-06, - "loss": 0.693, + "epoch": 0.5634635691657867, + "grad_norm": 1.5440304279327393, + "learning_rate": 5.906377577530771e-06, + "loss": 0.6912, "step": 8004 }, { - "epoch": 0.6057281222806553, - "grad_norm": 2.1252949237823486, - "learning_rate": 6.625338066260836e-06, - "loss": 0.766, + "epoch": 0.5635339669130588, + "grad_norm": 1.7858909368515015, + "learning_rate": 5.904801099473637e-06, + "loss": 0.6369, "step": 8005 }, { - "epoch": 0.6058037910029889, - "grad_norm": 1.8091083765029907, - "learning_rate": 6.623165734510455e-06, - "loss": 0.7842, + "epoch": 0.5636043646603308, + "grad_norm": 1.8517088890075684, + "learning_rate": 5.903224678356882e-06, + "loss": 0.743, "step": 8006 }, { - "epoch": 0.6058794597253225, - "grad_norm": 2.139522075653076, - "learning_rate": 6.620993551605123e-06, - "loss": 0.8644, + "epoch": 0.563674762407603, + "grad_norm": 1.6636650562286377, + "learning_rate": 5.901648314262471e-06, + "loss": 0.7854, "step": 8007 }, { - "epoch": 0.6059551284476562, - "grad_norm": 2.6988987922668457, - "learning_rate": 6.618821517680869e-06, - "loss": 0.7944, + "epoch": 0.5637451601548751, + "grad_norm": 1.635767936706543, + "learning_rate": 5.900072007272358e-06, + "loss": 0.8092, "step": 8008 }, { - "epoch": 0.6060307971699898, - "grad_norm": 1.9569789171218872, - "learning_rate": 6.616649632873708e-06, - "loss": 0.7234, + "epoch": 0.5638155579021471, + "grad_norm": 1.8911882638931274, + "learning_rate": 5.898495757468499e-06, + "loss": 0.7298, "step": 8009 }, { - "epoch": 0.6061064658923234, - "grad_norm": 2.4347798824310303, - "learning_rate": 6.614477897319661e-06, - "loss": 0.7029, + "epoch": 0.5638859556494192, + "grad_norm": 1.6515145301818848, + "learning_rate": 5.896919564932842e-06, + "loss": 0.6593, "step": 8010 }, { - "epoch": 0.6061821346146571, - "grad_norm": 2.1535308361053467, - "learning_rate": 6.612306311154722e-06, - "loss": 0.5297, + "epoch": 0.5639563533966913, + "grad_norm": 1.8184762001037598, + "learning_rate": 5.895343429747337e-06, + "loss": 0.6766, "step": 8011 }, { - "epoch": 0.6062578033369906, - "grad_norm": 2.128608465194702, - "learning_rate": 6.6101348745148865e-06, - "loss": 0.5245, + "epoch": 0.5640267511439634, + "grad_norm": 1.4885756969451904, + "learning_rate": 5.8937673519939255e-06, + "loss": 0.6093, "step": 8012 }, { - "epoch": 0.6063334720593243, - "grad_norm": 2.0060644149780273, - "learning_rate": 6.607963587536134e-06, - "loss": 0.6084, + "epoch": 0.5640971488912355, + "grad_norm": 2.0468223094940186, + "learning_rate": 5.892191331754553e-06, + "loss": 0.6486, "step": 8013 }, { - "epoch": 0.6064091407816579, - "grad_norm": 2.5640623569488525, - "learning_rate": 6.605792450354436e-06, - "loss": 0.6693, + "epoch": 0.5641675466385075, + "grad_norm": 1.823989748954773, + "learning_rate": 5.8906153691111554e-06, + "loss": 0.6621, "step": 8014 }, { - "epoch": 0.6064848095039915, - "grad_norm": 2.0857560634613037, - "learning_rate": 6.603621463105762e-06, - "loss": 0.7536, + "epoch": 0.5642379443857797, + "grad_norm": 1.7834594249725342, + "learning_rate": 5.88903946414567e-06, + "loss": 0.7715, "step": 8015 }, { - "epoch": 0.6065604782263252, - "grad_norm": 2.7970449924468994, - "learning_rate": 6.601450625926061e-06, - "loss": 0.8408, + "epoch": 0.5643083421330517, + "grad_norm": 1.958844542503357, + "learning_rate": 5.887463616940029e-06, + "loss": 0.679, "step": 8016 }, { - "epoch": 0.6066361469486587, - "grad_norm": 2.1201984882354736, - "learning_rate": 6.599279938951282e-06, - "loss": 0.6348, + "epoch": 0.5643787398803238, + "grad_norm": 1.7363585233688354, + "learning_rate": 5.885887827576163e-06, + "loss": 0.6168, "step": 8017 }, { - "epoch": 0.6067118156709924, - "grad_norm": 2.2606847286224365, - "learning_rate": 6.597109402317356e-06, - "loss": 0.8402, + "epoch": 0.564449137627596, + "grad_norm": 1.6450824737548828, + "learning_rate": 5.884312096135995e-06, + "loss": 0.5557, "step": 8018 }, { - "epoch": 0.606787484393326, - "grad_norm": 2.860076904296875, - "learning_rate": 6.594939016160209e-06, - "loss": 0.6749, + "epoch": 0.564519535374868, + "grad_norm": 1.9061498641967773, + "learning_rate": 5.882736422701454e-06, + "loss": 0.7686, "step": 8019 }, { - "epoch": 0.6068631531156596, - "grad_norm": 2.2081804275512695, - "learning_rate": 6.592768780615764e-06, - "loss": 0.704, + "epoch": 0.5645899331221401, + "grad_norm": 3.9072365760803223, + "learning_rate": 5.881160807354457e-06, + "loss": 0.7204, "step": 8020 }, { - "epoch": 0.6069388218379933, - "grad_norm": 2.2822539806365967, - "learning_rate": 6.590598695819921e-06, - "loss": 0.6199, + "epoch": 0.5646603308694121, + "grad_norm": 1.7166398763656616, + "learning_rate": 5.8795852501769215e-06, + "loss": 0.742, "step": 8021 }, { - "epoch": 0.6070144905603269, - "grad_norm": 2.0878994464874268, - "learning_rate": 6.588428761908583e-06, - "loss": 0.6372, + "epoch": 0.5647307286166843, + "grad_norm": 1.8614035844802856, + "learning_rate": 5.878009751250767e-06, + "loss": 0.7228, "step": 8022 }, { - "epoch": 0.6070901592826605, - "grad_norm": 2.373242139816284, - "learning_rate": 6.586258979017634e-06, - "loss": 0.7862, + "epoch": 0.5648011263639564, + "grad_norm": 2.0024378299713135, + "learning_rate": 5.8764343106579e-06, + "loss": 0.5854, "step": 8023 }, { - "epoch": 0.6071658280049942, - "grad_norm": 1.9555684328079224, - "learning_rate": 6.584089347282954e-06, - "loss": 0.676, + "epoch": 0.5648715241112284, + "grad_norm": 2.3426942825317383, + "learning_rate": 5.874858928480232e-06, + "loss": 0.7305, "step": 8024 }, { - "epoch": 0.6072414967273277, - "grad_norm": 2.07110595703125, - "learning_rate": 6.581919866840413e-06, - "loss": 0.5871, + "epoch": 0.5649419218585006, + "grad_norm": 2.3908426761627197, + "learning_rate": 5.873283604799666e-06, + "loss": 0.7874, "step": 8025 }, { - "epoch": 0.6073171654496614, - "grad_norm": 3.0451760292053223, - "learning_rate": 6.579750537825874e-06, - "loss": 0.6727, + "epoch": 0.5650123196057726, + "grad_norm": 1.6863834857940674, + "learning_rate": 5.871708339698107e-06, + "loss": 0.6951, "step": 8026 }, { - "epoch": 0.607392834171995, - "grad_norm": 2.067446708679199, - "learning_rate": 6.577581360375182e-06, - "loss": 0.726, + "epoch": 0.5650827173530447, + "grad_norm": 2.3582000732421875, + "learning_rate": 5.87013313325745e-06, + "loss": 0.7462, "step": 8027 }, { - "epoch": 0.6074685028943286, - "grad_norm": 1.995710849761963, - "learning_rate": 6.575412334624183e-06, - "loss": 0.79, + "epoch": 0.5651531151003167, + "grad_norm": 1.8845301866531372, + "learning_rate": 5.868557985559599e-06, + "loss": 0.6994, "step": 8028 }, { - "epoch": 0.6075441716166623, - "grad_norm": 2.084068536758423, - "learning_rate": 6.573243460708701e-06, - "loss": 0.7256, + "epoch": 0.5652235128475889, + "grad_norm": 1.6765400171279907, + "learning_rate": 5.86698289668644e-06, + "loss": 0.8201, "step": 8029 }, { - "epoch": 0.6076198403389959, - "grad_norm": 2.150193929672241, - "learning_rate": 6.571074738764565e-06, - "loss": 0.7144, + "epoch": 0.565293910594861, + "grad_norm": 1.8844454288482666, + "learning_rate": 5.865407866719868e-06, + "loss": 0.7003, "step": 8030 }, { - "epoch": 0.6076955090613295, - "grad_norm": 1.506453514099121, - "learning_rate": 6.568906168927585e-06, - "loss": 0.8205, + "epoch": 0.565364308342133, + "grad_norm": 1.9080604314804077, + "learning_rate": 5.863832895741768e-06, + "loss": 0.7826, "step": 8031 }, { - "epoch": 0.6077711777836631, - "grad_norm": 2.1317901611328125, - "learning_rate": 6.5667377513335645e-06, - "loss": 0.7269, + "epoch": 0.5654347060894052, + "grad_norm": 1.9972426891326904, + "learning_rate": 5.862257983834025e-06, + "loss": 0.7453, "step": 8032 }, { - "epoch": 0.6078468465059967, - "grad_norm": 2.2331316471099854, - "learning_rate": 6.564569486118297e-06, - "loss": 0.8241, + "epoch": 0.5655051038366772, + "grad_norm": 1.8196810483932495, + "learning_rate": 5.860683131078518e-06, + "loss": 0.7026, "step": 8033 }, { - "epoch": 0.6079225152283304, - "grad_norm": 2.03304386138916, - "learning_rate": 6.562401373417562e-06, - "loss": 0.6857, + "epoch": 0.5655755015839493, + "grad_norm": 1.8084654808044434, + "learning_rate": 5.85910833755713e-06, + "loss": 0.6053, "step": 8034 }, { - "epoch": 0.607998183950664, - "grad_norm": 1.8887367248535156, - "learning_rate": 6.560233413367139e-06, - "loss": 0.6265, + "epoch": 0.5656458993312214, + "grad_norm": 2.063110589981079, + "learning_rate": 5.857533603351731e-06, + "loss": 0.5979, "step": 8035 }, { - "epoch": 0.6080738526729976, - "grad_norm": 1.7368558645248413, - "learning_rate": 6.558065606102792e-06, - "loss": 0.6891, + "epoch": 0.5657162970784935, + "grad_norm": 1.9057331085205078, + "learning_rate": 5.855958928544195e-06, + "loss": 0.75, "step": 8036 }, { - "epoch": 0.6081495213953313, - "grad_norm": 2.355769395828247, - "learning_rate": 6.555897951760274e-06, - "loss": 0.7196, + "epoch": 0.5657866948257656, + "grad_norm": 1.8351106643676758, + "learning_rate": 5.854384313216389e-06, + "loss": 0.6529, "step": 8037 }, { - "epoch": 0.6082251901176648, - "grad_norm": 2.221735715866089, - "learning_rate": 6.553730450475333e-06, - "loss": 0.6272, + "epoch": 0.5658570925730376, + "grad_norm": 1.6137727499008179, + "learning_rate": 5.852809757450183e-06, + "loss": 0.7517, "step": 8038 }, { - "epoch": 0.6083008588399985, - "grad_norm": 2.569798231124878, - "learning_rate": 6.551563102383697e-06, - "loss": 0.7951, + "epoch": 0.5659274903203098, + "grad_norm": 1.8229238986968994, + "learning_rate": 5.851235261327433e-06, + "loss": 0.7109, "step": 8039 }, { - "epoch": 0.6083765275623321, - "grad_norm": 1.8355985879898071, - "learning_rate": 6.5493959076211055e-06, - "loss": 0.61, + "epoch": 0.5659978880675819, + "grad_norm": 2.202348232269287, + "learning_rate": 5.849660824930004e-06, + "loss": 0.6136, "step": 8040 }, { - "epoch": 0.6084521962846657, - "grad_norm": 2.0829899311065674, - "learning_rate": 6.547228866323265e-06, - "loss": 0.7505, + "epoch": 0.5660682858148539, + "grad_norm": 1.9608862400054932, + "learning_rate": 5.848086448339751e-06, + "loss": 0.696, "step": 8041 }, { - "epoch": 0.6085278650069994, - "grad_norm": 1.4479275941848755, - "learning_rate": 6.54506197862589e-06, - "loss": 0.827, + "epoch": 0.566138683562126, + "grad_norm": 1.9533987045288086, + "learning_rate": 5.846512131638527e-06, + "loss": 0.7399, "step": 8042 }, { - "epoch": 0.608603533729333, - "grad_norm": 1.3985527753829956, - "learning_rate": 6.542895244664671e-06, - "loss": 0.9008, + "epoch": 0.5662090813093981, + "grad_norm": 2.0814077854156494, + "learning_rate": 5.844937874908181e-06, + "loss": 0.7363, "step": 8043 }, { - "epoch": 0.6086792024516666, - "grad_norm": 1.865014672279358, - "learning_rate": 6.540728664575301e-06, - "loss": 0.7863, + "epoch": 0.5662794790566702, + "grad_norm": 2.2985100746154785, + "learning_rate": 5.843363678230562e-06, + "loss": 0.6788, "step": 8044 }, { - "epoch": 0.6087548711740002, - "grad_norm": 1.861416220664978, - "learning_rate": 6.538562238493453e-06, - "loss": 0.6838, + "epoch": 0.5663498768039422, + "grad_norm": 1.7289729118347168, + "learning_rate": 5.84178954168751e-06, + "loss": 0.65, "step": 8045 }, { - "epoch": 0.6088305398963338, - "grad_norm": 2.162048578262329, - "learning_rate": 6.5363959665547996e-06, - "loss": 0.8778, + "epoch": 0.5664202745512144, + "grad_norm": 1.7576593160629272, + "learning_rate": 5.840215465360872e-06, + "loss": 0.6361, "step": 8046 }, { - "epoch": 0.6089062086186675, - "grad_norm": 2.3792169094085693, - "learning_rate": 6.534229848895002e-06, - "loss": 0.6775, + "epoch": 0.5664906722984865, + "grad_norm": 1.4652199745178223, + "learning_rate": 5.838641449332481e-06, + "loss": 0.679, "step": 8047 }, { - "epoch": 0.6089818773410011, - "grad_norm": 1.898842692375183, - "learning_rate": 6.532063885649705e-06, - "loss": 0.5081, + "epoch": 0.5665610700457585, + "grad_norm": 1.2343019247055054, + "learning_rate": 5.837067493684174e-06, + "loss": 0.6878, "step": 8048 }, { - "epoch": 0.6090575460633347, - "grad_norm": 1.847794532775879, - "learning_rate": 6.52989807695455e-06, - "loss": 0.7093, + "epoch": 0.5666314677930306, + "grad_norm": 1.8032689094543457, + "learning_rate": 5.83549359849778e-06, + "loss": 0.7615, "step": 8049 }, { - "epoch": 0.6091332147856684, - "grad_norm": 1.8337700366973877, - "learning_rate": 6.527732422945164e-06, - "loss": 0.7425, + "epoch": 0.5667018655403027, + "grad_norm": 1.8691171407699585, + "learning_rate": 5.83391976385513e-06, + "loss": 0.7338, "step": 8050 }, { - "epoch": 0.609208883508002, - "grad_norm": 2.419373035430908, - "learning_rate": 6.525566923757172e-06, - "loss": 0.7282, + "epoch": 0.5667722632875748, + "grad_norm": 2.2192437648773193, + "learning_rate": 5.832345989838043e-06, + "loss": 0.7129, "step": 8051 }, { - "epoch": 0.6092845522303356, - "grad_norm": 2.604099750518799, - "learning_rate": 6.5234015795261845e-06, - "loss": 0.7182, + "epoch": 0.5668426610348469, + "grad_norm": 1.7518559694290161, + "learning_rate": 5.83077227652835e-06, + "loss": 0.6778, "step": 8052 }, { - "epoch": 0.6093602209526692, - "grad_norm": 1.8286306858062744, - "learning_rate": 6.5212363903877975e-06, - "loss": 0.6825, + "epoch": 0.566913058782119, + "grad_norm": 1.8813222646713257, + "learning_rate": 5.829198624007864e-06, + "loss": 0.7217, "step": 8053 }, { - "epoch": 0.6094358896750028, - "grad_norm": 2.197711229324341, - "learning_rate": 6.519071356477606e-06, - "loss": 0.6857, + "epoch": 0.5669834565293911, + "grad_norm": 2.211686849594116, + "learning_rate": 5.827625032358402e-06, + "loss": 0.738, "step": 8054 }, { - "epoch": 0.6095115583973365, - "grad_norm": 2.546816110610962, - "learning_rate": 6.516906477931188e-06, - "loss": 0.6036, + "epoch": 0.5670538542766631, + "grad_norm": 2.329256534576416, + "learning_rate": 5.826051501661776e-06, + "loss": 0.7365, "step": 8055 }, { - "epoch": 0.6095872271196701, - "grad_norm": 1.9592362642288208, - "learning_rate": 6.514741754884122e-06, - "loss": 0.8768, + "epoch": 0.5671242520239352, + "grad_norm": 1.531620740890503, + "learning_rate": 5.824478031999797e-06, + "loss": 0.6343, "step": 8056 }, { - "epoch": 0.6096628958420037, - "grad_norm": 2.2540531158447266, - "learning_rate": 6.512577187471963e-06, - "loss": 0.8306, + "epoch": 0.5671946497712074, + "grad_norm": 1.7742172479629517, + "learning_rate": 5.8229046234542664e-06, + "loss": 0.6319, "step": 8057 }, { - "epoch": 0.6097385645643373, - "grad_norm": 2.1701200008392334, - "learning_rate": 6.510412775830269e-06, - "loss": 0.7949, + "epoch": 0.5672650475184794, + "grad_norm": 2.907712459564209, + "learning_rate": 5.821331276106994e-06, + "loss": 0.7352, "step": 8058 }, { - "epoch": 0.609814233286671, - "grad_norm": 2.3326847553253174, - "learning_rate": 6.508248520094577e-06, - "loss": 0.8873, + "epoch": 0.5673354452657515, + "grad_norm": 1.7791705131530762, + "learning_rate": 5.819757990039774e-06, + "loss": 0.6297, "step": 8059 }, { - "epoch": 0.6098899020090046, - "grad_norm": 2.0343968868255615, - "learning_rate": 6.506084420400419e-06, - "loss": 0.6667, + "epoch": 0.5674058430130235, + "grad_norm": 1.8914343118667603, + "learning_rate": 5.818184765334407e-06, + "loss": 0.7111, "step": 8060 }, { - "epoch": 0.6099655707313382, - "grad_norm": 2.17753529548645, - "learning_rate": 6.503920476883326e-06, - "loss": 0.7618, + "epoch": 0.5674762407602957, + "grad_norm": 1.8173452615737915, + "learning_rate": 5.816611602072682e-06, + "loss": 0.5908, "step": 8061 }, { - "epoch": 0.6100412394536718, - "grad_norm": 2.2259414196014404, - "learning_rate": 6.501756689678802e-06, - "loss": 0.629, + "epoch": 0.5675466385075678, + "grad_norm": 1.5978857278823853, + "learning_rate": 5.815038500336394e-06, + "loss": 0.6885, "step": 8062 }, { - "epoch": 0.6101169081760055, - "grad_norm": 2.3345701694488525, - "learning_rate": 6.4995930589223575e-06, - "loss": 0.74, + "epoch": 0.5676170362548398, + "grad_norm": 1.670300006866455, + "learning_rate": 5.813465460207323e-06, + "loss": 0.7123, "step": 8063 }, { - "epoch": 0.6101925768983391, - "grad_norm": 2.023045301437378, - "learning_rate": 6.497429584749482e-06, - "loss": 0.8297, + "epoch": 0.567687434002112, + "grad_norm": 12.119657516479492, + "learning_rate": 5.8118924817672614e-06, + "loss": 0.7516, "step": 8064 }, { - "epoch": 0.6102682456206727, - "grad_norm": 2.1104860305786133, - "learning_rate": 6.495266267295658e-06, - "loss": 0.8041, + "epoch": 0.567757831749384, + "grad_norm": 1.9405988454818726, + "learning_rate": 5.810319565097984e-06, + "loss": 0.6908, "step": 8065 }, { - "epoch": 0.6103439143430063, - "grad_norm": 2.1051814556121826, - "learning_rate": 6.493103106696364e-06, - "loss": 0.7396, + "epoch": 0.5678282294966561, + "grad_norm": 1.9134366512298584, + "learning_rate": 5.808746710281272e-06, + "loss": 0.5622, "step": 8066 }, { - "epoch": 0.6104195830653399, - "grad_norm": 1.9070154428482056, - "learning_rate": 6.490940103087062e-06, - "loss": 0.7472, + "epoch": 0.5678986272439281, + "grad_norm": 1.795207142829895, + "learning_rate": 5.807173917398895e-06, + "loss": 0.6403, "step": 8067 }, { - "epoch": 0.6104952517876736, - "grad_norm": 2.5003867149353027, - "learning_rate": 6.488777256603204e-06, - "loss": 0.8995, + "epoch": 0.5679690249912003, + "grad_norm": 1.5551453828811646, + "learning_rate": 5.8056011865326285e-06, + "loss": 0.6548, "step": 8068 }, { - "epoch": 0.6105709205100072, - "grad_norm": 2.510374069213867, - "learning_rate": 6.486614567380239e-06, - "loss": 0.7385, + "epoch": 0.5680394227384724, + "grad_norm": 1.9095842838287354, + "learning_rate": 5.8040285177642345e-06, + "loss": 0.6782, "step": 8069 }, { - "epoch": 0.6106465892323408, - "grad_norm": 1.955937385559082, - "learning_rate": 6.484452035553597e-06, - "loss": 0.728, + "epoch": 0.5681098204857444, + "grad_norm": 1.7711478471755981, + "learning_rate": 5.802455911175484e-06, + "loss": 0.6824, "step": 8070 }, { - "epoch": 0.6107222579546744, - "grad_norm": 1.8284022808074951, - "learning_rate": 6.482289661258704e-06, - "loss": 0.6269, + "epoch": 0.5681802182330166, + "grad_norm": 1.954138159751892, + "learning_rate": 5.800883366848134e-06, + "loss": 0.6621, "step": 8071 }, { - "epoch": 0.610797926677008, - "grad_norm": 1.7981479167938232, - "learning_rate": 6.4801274446309794e-06, - "loss": 0.7175, + "epoch": 0.5682506159802886, + "grad_norm": 1.641762375831604, + "learning_rate": 5.799310884863943e-06, + "loss": 0.5183, "step": 8072 }, { - "epoch": 0.6108735953993417, - "grad_norm": 1.8497958183288574, - "learning_rate": 6.477965385805822e-06, - "loss": 0.7925, + "epoch": 0.5683210137275607, + "grad_norm": 2.1440935134887695, + "learning_rate": 5.79773846530467e-06, + "loss": 0.7574, "step": 8073 }, { - "epoch": 0.6109492641216753, - "grad_norm": 1.7783727645874023, - "learning_rate": 6.475803484918631e-06, - "loss": 0.6708, + "epoch": 0.5683914114748329, + "grad_norm": 1.8141509294509888, + "learning_rate": 5.796166108252058e-06, + "loss": 0.6859, "step": 8074 }, { - "epoch": 0.6110249328440089, - "grad_norm": 1.979129433631897, - "learning_rate": 6.473641742104787e-06, - "loss": 0.5945, + "epoch": 0.5684618092221049, + "grad_norm": 1.6928112506866455, + "learning_rate": 5.794593813787865e-06, + "loss": 0.5669, "step": 8075 }, { - "epoch": 0.6111006015663426, - "grad_norm": 1.9033944606781006, - "learning_rate": 6.4714801574996695e-06, - "loss": 0.6975, + "epoch": 0.568532206969377, + "grad_norm": 1.8217519521713257, + "learning_rate": 5.793021581993828e-06, + "loss": 0.6889, "step": 8076 }, { - "epoch": 0.6111762702886762, - "grad_norm": 1.668627381324768, - "learning_rate": 6.469318731238645e-06, - "loss": 0.6458, + "epoch": 0.568602604716649, + "grad_norm": 2.1448256969451904, + "learning_rate": 5.791449412951694e-06, + "loss": 0.5965, "step": 8077 }, { - "epoch": 0.6112519390110098, - "grad_norm": 2.2541165351867676, - "learning_rate": 6.467157463457064e-06, - "loss": 0.6398, + "epoch": 0.5686730024639212, + "grad_norm": 1.5726782083511353, + "learning_rate": 5.789877306743198e-06, + "loss": 0.7748, "step": 8078 }, { - "epoch": 0.6113276077333434, - "grad_norm": 2.112131118774414, - "learning_rate": 6.464996354290277e-06, - "loss": 0.7406, + "epoch": 0.5687434002111933, + "grad_norm": 1.774440050125122, + "learning_rate": 5.788305263450078e-06, + "loss": 0.5501, "step": 8079 }, { - "epoch": 0.611403276455677, - "grad_norm": 1.7680984735488892, - "learning_rate": 6.462835403873615e-06, - "loss": 0.7407, + "epoch": 0.5688137979584653, + "grad_norm": 2.2894399166107178, + "learning_rate": 5.786733283154064e-06, + "loss": 0.6726, "step": 8080 }, { - "epoch": 0.6114789451780107, - "grad_norm": 1.924974799156189, - "learning_rate": 6.460674612342407e-06, - "loss": 0.7928, + "epoch": 0.5688841957057374, + "grad_norm": 1.7112470865249634, + "learning_rate": 5.785161365936887e-06, + "loss": 0.6936, "step": 8081 }, { - "epoch": 0.6115546139003443, - "grad_norm": 1.6907529830932617, - "learning_rate": 6.458513979831969e-06, - "loss": 0.7178, + "epoch": 0.5689545934530095, + "grad_norm": 1.8500607013702393, + "learning_rate": 5.7835895118802686e-06, + "loss": 0.6149, "step": 8082 }, { - "epoch": 0.6116302826226779, - "grad_norm": 2.3238351345062256, - "learning_rate": 6.456353506477607e-06, - "loss": 0.7008, + "epoch": 0.5690249912002816, + "grad_norm": 2.1945598125457764, + "learning_rate": 5.782017721065936e-06, + "loss": 0.7967, "step": 8083 }, { - "epoch": 0.6117059513450115, - "grad_norm": 1.6362533569335938, - "learning_rate": 6.454193192414613e-06, - "loss": 0.6097, + "epoch": 0.5690953889475536, + "grad_norm": 1.7712244987487793, + "learning_rate": 5.780445993575601e-06, + "loss": 0.6958, "step": 8084 }, { - "epoch": 0.6117816200673452, - "grad_norm": 2.3989768028259277, - "learning_rate": 6.452033037778277e-06, - "loss": 0.7459, + "epoch": 0.5691657866948258, + "grad_norm": 1.9915916919708252, + "learning_rate": 5.778874329490987e-06, + "loss": 0.6951, "step": 8085 }, { - "epoch": 0.6118572887896788, - "grad_norm": 2.1705870628356934, - "learning_rate": 6.449873042703871e-06, - "loss": 0.6697, + "epoch": 0.5692361844420979, + "grad_norm": 2.1172213554382324, + "learning_rate": 5.7773027288938005e-06, + "loss": 0.6194, "step": 8086 }, { - "epoch": 0.6119329575120124, - "grad_norm": 2.115978240966797, - "learning_rate": 6.4477132073266645e-06, - "loss": 0.6262, + "epoch": 0.5693065821893699, + "grad_norm": 1.949658989906311, + "learning_rate": 5.775731191865754e-06, + "loss": 0.6513, "step": 8087 }, { - "epoch": 0.612008626234346, - "grad_norm": 1.8548389673233032, - "learning_rate": 6.445553531781915e-06, - "loss": 0.6613, + "epoch": 0.569376979936642, + "grad_norm": 1.748931646347046, + "learning_rate": 5.77415971848855e-06, + "loss": 0.65, "step": 8088 }, { - "epoch": 0.6120842949566797, - "grad_norm": 1.7452164888381958, - "learning_rate": 6.443394016204861e-06, - "loss": 0.6711, + "epoch": 0.5694473776839141, + "grad_norm": 1.9200104475021362, + "learning_rate": 5.772588308843895e-06, + "loss": 0.6614, "step": 8089 }, { - "epoch": 0.6121599636790133, - "grad_norm": 1.9946900606155396, - "learning_rate": 6.441234660730747e-06, - "loss": 0.6328, + "epoch": 0.5695177754311862, + "grad_norm": 2.379971504211426, + "learning_rate": 5.771016963013481e-06, + "loss": 0.7204, "step": 8090 }, { - "epoch": 0.6122356324013469, - "grad_norm": 2.100299596786499, - "learning_rate": 6.43907546549479e-06, - "loss": 0.7854, + "epoch": 0.5695881731784583, + "grad_norm": 1.9537075757980347, + "learning_rate": 5.769445681079011e-06, + "loss": 0.745, "step": 8091 }, { - "epoch": 0.6123113011236805, - "grad_norm": 2.174600124359131, - "learning_rate": 6.4369164306322125e-06, - "loss": 0.7751, + "epoch": 0.5696585709257304, + "grad_norm": 1.9772095680236816, + "learning_rate": 5.767874463122174e-06, + "loss": 0.6636, "step": 8092 }, { - "epoch": 0.6123869698460142, - "grad_norm": 1.753521203994751, - "learning_rate": 6.434757556278219e-06, - "loss": 0.5145, + "epoch": 0.5697289686730025, + "grad_norm": 1.6935769319534302, + "learning_rate": 5.7663033092246605e-06, + "loss": 0.6161, "step": 8093 }, { - "epoch": 0.6124626385683478, - "grad_norm": 1.6871925592422485, - "learning_rate": 6.432598842568003e-06, - "loss": 0.6158, + "epoch": 0.5697993664202745, + "grad_norm": 1.5233746767044067, + "learning_rate": 5.7647322194681536e-06, + "loss": 0.6547, "step": 8094 }, { - "epoch": 0.6125383072906814, - "grad_norm": 1.852492332458496, - "learning_rate": 6.430440289636754e-06, - "loss": 0.613, + "epoch": 0.5698697641675466, + "grad_norm": 1.8158254623413086, + "learning_rate": 5.7631611939343395e-06, + "loss": 0.6366, "step": 8095 }, { - "epoch": 0.612613976013015, - "grad_norm": 1.7599942684173584, - "learning_rate": 6.428281897619638e-06, - "loss": 0.8107, + "epoch": 0.5699401619148188, + "grad_norm": 1.897362232208252, + "learning_rate": 5.761590232704892e-06, + "loss": 0.7039, "step": 8096 }, { - "epoch": 0.6126896447353486, - "grad_norm": 2.176158905029297, - "learning_rate": 6.4261236666518345e-06, - "loss": 0.6481, + "epoch": 0.5700105596620908, + "grad_norm": 1.790728211402893, + "learning_rate": 5.760019335861493e-06, + "loss": 0.5871, "step": 8097 }, { - "epoch": 0.6127653134576823, - "grad_norm": 2.7235372066497803, - "learning_rate": 6.423965596868489e-06, - "loss": 0.7695, + "epoch": 0.5700809574093629, + "grad_norm": 2.2662363052368164, + "learning_rate": 5.75844850348581e-06, + "loss": 0.6885, "step": 8098 }, { - "epoch": 0.6128409821800159, - "grad_norm": 1.6822925806045532, - "learning_rate": 6.421807688404753e-06, - "loss": 0.533, + "epoch": 0.570151355156635, + "grad_norm": 1.8791894912719727, + "learning_rate": 5.756877735659514e-06, + "loss": 0.7595, "step": 8099 }, { - "epoch": 0.6129166509023495, - "grad_norm": 1.8413187265396118, - "learning_rate": 6.419649941395756e-06, - "loss": 0.6069, + "epoch": 0.5702217529039071, + "grad_norm": 1.7183377742767334, + "learning_rate": 5.755307032464271e-06, + "loss": 0.5573, "step": 8100 }, { - "epoch": 0.6129923196246831, - "grad_norm": 1.9572733640670776, - "learning_rate": 6.417492355976624e-06, - "loss": 0.7027, + "epoch": 0.5702921506511791, + "grad_norm": 1.7903143167495728, + "learning_rate": 5.753736393981742e-06, + "loss": 0.6341, "step": 8101 }, { - "epoch": 0.6130679883470168, - "grad_norm": 2.1810455322265625, - "learning_rate": 6.4153349322824765e-06, - "loss": 0.6632, + "epoch": 0.5703625483984512, + "grad_norm": 2.0109353065490723, + "learning_rate": 5.752165820293585e-06, + "loss": 0.6693, "step": 8102 }, { - "epoch": 0.6131436570693504, - "grad_norm": 2.011125326156616, - "learning_rate": 6.413177670448413e-06, - "loss": 0.7041, + "epoch": 0.5704329461457234, + "grad_norm": 1.944618582725525, + "learning_rate": 5.75059531148146e-06, + "loss": 0.6606, "step": 8103 }, { - "epoch": 0.613219325791684, - "grad_norm": 2.0759332180023193, - "learning_rate": 6.411020570609533e-06, - "loss": 0.7887, + "epoch": 0.5705033438929954, + "grad_norm": 2.2054436206817627, + "learning_rate": 5.7490248676270145e-06, + "loss": 0.6955, "step": 8104 }, { - "epoch": 0.6132949945140176, - "grad_norm": 1.7652744054794312, - "learning_rate": 6.408863632900918e-06, - "loss": 0.6336, + "epoch": 0.5705737416402675, + "grad_norm": 1.7767367362976074, + "learning_rate": 5.747454488811901e-06, + "loss": 0.6406, "step": 8105 }, { - "epoch": 0.6133706632363513, - "grad_norm": 1.9956303834915161, - "learning_rate": 6.406706857457639e-06, - "loss": 0.7777, + "epoch": 0.5706441393875396, + "grad_norm": 1.6355843544006348, + "learning_rate": 5.745884175117761e-06, + "loss": 0.5783, "step": 8106 }, { - "epoch": 0.6134463319586849, - "grad_norm": 2.095097780227661, - "learning_rate": 6.40455024441477e-06, - "loss": 0.6843, + "epoch": 0.5707145371348117, + "grad_norm": 1.7044286727905273, + "learning_rate": 5.74431392662624e-06, + "loss": 0.7362, "step": 8107 }, { - "epoch": 0.6135220006810185, - "grad_norm": 2.287135601043701, - "learning_rate": 6.402393793907355e-06, - "loss": 0.7009, + "epoch": 0.5707849348820838, + "grad_norm": 1.6245155334472656, + "learning_rate": 5.742743743418972e-06, + "loss": 0.7232, "step": 8108 }, { - "epoch": 0.6135976694033521, - "grad_norm": 1.8834456205368042, - "learning_rate": 6.4002375060704465e-06, - "loss": 0.6393, + "epoch": 0.5708553326293558, + "grad_norm": 1.5250318050384521, + "learning_rate": 5.741173625577598e-06, + "loss": 0.6306, "step": 8109 }, { - "epoch": 0.6136733381256857, - "grad_norm": 4.113138198852539, - "learning_rate": 6.398081381039072e-06, - "loss": 0.7464, + "epoch": 0.570925730376628, + "grad_norm": 1.8652479648590088, + "learning_rate": 5.739603573183747e-06, + "loss": 0.6656, "step": 8110 }, { - "epoch": 0.6137490068480194, - "grad_norm": 1.7231311798095703, - "learning_rate": 6.395925418948255e-06, - "loss": 0.6792, + "epoch": 0.5709961281239, + "grad_norm": 1.729027271270752, + "learning_rate": 5.73803358631905e-06, + "loss": 0.6319, "step": 8111 }, { - "epoch": 0.613824675570353, - "grad_norm": 3.072347640991211, - "learning_rate": 6.3937696199330116e-06, - "loss": 0.5752, + "epoch": 0.5710665258711721, + "grad_norm": 1.7715139389038086, + "learning_rate": 5.7364636650651265e-06, + "loss": 0.6893, "step": 8112 }, { - "epoch": 0.6139003442926866, - "grad_norm": 1.6998788118362427, - "learning_rate": 6.3916139841283465e-06, - "loss": 0.6967, + "epoch": 0.5711369236184443, + "grad_norm": 1.8506479263305664, + "learning_rate": 5.734893809503603e-06, + "loss": 0.7174, "step": 8113 }, { - "epoch": 0.6139760130150202, - "grad_norm": 2.079223394393921, - "learning_rate": 6.3894585116692496e-06, - "loss": 0.6728, + "epoch": 0.5712073213657163, + "grad_norm": 1.8871264457702637, + "learning_rate": 5.733324019716093e-06, + "loss": 0.6385, "step": 8114 }, { - "epoch": 0.6140516817373539, - "grad_norm": 2.595216751098633, - "learning_rate": 6.387303202690705e-06, - "loss": 0.6351, + "epoch": 0.5712777191129884, + "grad_norm": 1.7246164083480835, + "learning_rate": 5.731754295784218e-06, + "loss": 0.7702, "step": 8115 }, { - "epoch": 0.6141273504596875, - "grad_norm": 2.1260061264038086, - "learning_rate": 6.385148057327681e-06, - "loss": 0.7492, + "epoch": 0.5713481168602604, + "grad_norm": 1.5287353992462158, + "learning_rate": 5.730184637789584e-06, + "loss": 0.669, "step": 8116 }, { - "epoch": 0.6142030191820211, - "grad_norm": 1.9894077777862549, - "learning_rate": 6.382993075715144e-06, - "loss": 0.8202, + "epoch": 0.5714185146075326, + "grad_norm": 4.823519706726074, + "learning_rate": 5.728615045813803e-06, + "loss": 0.7244, "step": 8117 }, { - "epoch": 0.6142786879043547, - "grad_norm": 3.4752848148345947, - "learning_rate": 6.380838257988048e-06, - "loss": 0.6512, + "epoch": 0.5714889123548047, + "grad_norm": 1.9422423839569092, + "learning_rate": 5.727045519938474e-06, + "loss": 0.7403, "step": 8118 }, { - "epoch": 0.6143543566266884, - "grad_norm": 2.0392558574676514, - "learning_rate": 6.378683604281329e-06, - "loss": 0.6064, + "epoch": 0.5715593101020767, + "grad_norm": 1.8559401035308838, + "learning_rate": 5.725476060245204e-06, + "loss": 0.7102, "step": 8119 }, { - "epoch": 0.614430025349022, - "grad_norm": 1.8147621154785156, - "learning_rate": 6.376529114729924e-06, - "loss": 0.6444, + "epoch": 0.5716297078493489, + "grad_norm": 1.7097687721252441, + "learning_rate": 5.723906666815585e-06, + "loss": 0.6573, "step": 8120 }, { - "epoch": 0.6145056940713556, - "grad_norm": 2.527744770050049, - "learning_rate": 6.374374789468749e-06, - "loss": 0.6505, + "epoch": 0.5717001055966209, + "grad_norm": 1.731164574623108, + "learning_rate": 5.722337339731215e-06, + "loss": 0.6558, "step": 8121 }, { - "epoch": 0.6145813627936892, - "grad_norm": 2.176684856414795, - "learning_rate": 6.372220628632714e-06, - "loss": 0.5707, + "epoch": 0.571770503343893, + "grad_norm": 1.9202227592468262, + "learning_rate": 5.720768079073686e-06, + "loss": 0.634, "step": 8122 }, { - "epoch": 0.6146570315160228, - "grad_norm": 2.0857019424438477, - "learning_rate": 6.3700666323567265e-06, - "loss": 0.7341, + "epoch": 0.571840901091165, + "grad_norm": 1.6523429155349731, + "learning_rate": 5.71919888492458e-06, + "loss": 0.7238, "step": 8123 }, { - "epoch": 0.6147327002383565, - "grad_norm": 2.4121081829071045, - "learning_rate": 6.3679128007756724e-06, - "loss": 0.7551, + "epoch": 0.5719112988384372, + "grad_norm": 1.6720142364501953, + "learning_rate": 5.7176297573654875e-06, + "loss": 0.6996, "step": 8124 }, { - "epoch": 0.6148083689606901, - "grad_norm": 2.668250560760498, - "learning_rate": 6.365759134024433e-06, - "loss": 0.7629, + "epoch": 0.5719816965857093, + "grad_norm": 1.8736917972564697, + "learning_rate": 5.7160606964779815e-06, + "loss": 0.6947, "step": 8125 }, { - "epoch": 0.6148840376830237, - "grad_norm": 1.942336916923523, - "learning_rate": 6.363605632237874e-06, - "loss": 0.6856, + "epoch": 0.5720520943329813, + "grad_norm": 1.526715636253357, + "learning_rate": 5.714491702343645e-06, + "loss": 0.588, "step": 8126 }, { - "epoch": 0.6149597064053574, - "grad_norm": 2.963472843170166, - "learning_rate": 6.361452295550856e-06, - "loss": 0.684, + "epoch": 0.5721224920802535, + "grad_norm": 2.2151427268981934, + "learning_rate": 5.712922775044048e-06, + "loss": 0.8473, "step": 8127 }, { - "epoch": 0.615035375127691, - "grad_norm": 2.0592167377471924, - "learning_rate": 6.359299124098231e-06, - "loss": 0.7157, + "epoch": 0.5721928898275255, + "grad_norm": 2.178990364074707, + "learning_rate": 5.711353914660764e-06, + "loss": 0.6591, "step": 8128 }, { - "epoch": 0.6151110438500246, - "grad_norm": 2.172013282775879, - "learning_rate": 6.3571461180148395e-06, - "loss": 0.6001, + "epoch": 0.5722632875747976, + "grad_norm": 1.8896424770355225, + "learning_rate": 5.709785121275353e-06, + "loss": 0.7649, "step": 8129 }, { - "epoch": 0.6151867125723582, - "grad_norm": 2.4280693531036377, - "learning_rate": 6.354993277435503e-06, - "loss": 0.5956, + "epoch": 0.5723336853220697, + "grad_norm": 1.9149203300476074, + "learning_rate": 5.708216394969385e-06, + "loss": 0.7582, "step": 8130 }, { - "epoch": 0.6152623812946918, - "grad_norm": 1.9213684797286987, - "learning_rate": 6.352840602495044e-06, - "loss": 0.59, + "epoch": 0.5724040830693418, + "grad_norm": 1.8181264400482178, + "learning_rate": 5.706647735824416e-06, + "loss": 0.6763, "step": 8131 }, { - "epoch": 0.6153380500170255, - "grad_norm": 2.0935068130493164, - "learning_rate": 6.350688093328266e-06, - "loss": 0.6688, + "epoch": 0.5724744808166139, + "grad_norm": 1.684678554534912, + "learning_rate": 5.705079143922004e-06, + "loss": 0.648, "step": 8132 }, { - "epoch": 0.6154137187393591, - "grad_norm": 3.5406651496887207, - "learning_rate": 6.348535750069969e-06, - "loss": 0.8106, + "epoch": 0.5725448785638859, + "grad_norm": 1.6635394096374512, + "learning_rate": 5.703510619343697e-06, + "loss": 0.6746, "step": 8133 }, { - "epoch": 0.6154893874616927, - "grad_norm": 2.5586190223693848, - "learning_rate": 6.346383572854942e-06, - "loss": 0.6554, + "epoch": 0.572615276311158, + "grad_norm": 2.103398084640503, + "learning_rate": 5.70194216217105e-06, + "loss": 0.6563, "step": 8134 }, { - "epoch": 0.6155650561840263, - "grad_norm": 5.290948390960693, - "learning_rate": 6.344231561817956e-06, - "loss": 0.5677, + "epoch": 0.5726856740584302, + "grad_norm": 1.9093661308288574, + "learning_rate": 5.700373772485603e-06, + "loss": 0.7089, "step": 8135 }, { - "epoch": 0.6156407249063599, - "grad_norm": 9.015077590942383, - "learning_rate": 6.342079717093782e-06, - "loss": 0.7897, + "epoch": 0.5727560718057022, + "grad_norm": 1.6111443042755127, + "learning_rate": 5.698805450368902e-06, + "loss": 0.7023, "step": 8136 }, { - "epoch": 0.6157163936286936, - "grad_norm": 8.069103240966797, - "learning_rate": 6.339928038817168e-06, - "loss": 0.741, + "epoch": 0.5728264695529743, + "grad_norm": 2.111361503601074, + "learning_rate": 5.697237195902483e-06, + "loss": 0.7017, "step": 8137 }, { - "epoch": 0.6157920623510272, - "grad_norm": 58.060359954833984, - "learning_rate": 6.337776527122865e-06, - "loss": 0.7947, + "epoch": 0.5728968673002464, + "grad_norm": 1.9252454042434692, + "learning_rate": 5.695669009167884e-06, + "loss": 0.6279, "step": 8138 }, { - "epoch": 0.6158677310733608, - "grad_norm": 12.597784996032715, - "learning_rate": 6.335625182145611e-06, - "loss": 0.6853, + "epoch": 0.5729672650475185, + "grad_norm": 1.6173492670059204, + "learning_rate": 5.694100890246633e-06, + "loss": 0.5496, "step": 8139 }, { - "epoch": 0.6159433997956945, - "grad_norm": 2.0326411724090576, - "learning_rate": 6.333474004020123e-06, - "loss": 0.672, + "epoch": 0.5730376627947905, + "grad_norm": 1.772667646408081, + "learning_rate": 5.692532839220261e-06, + "loss": 0.8992, "step": 8140 }, { - "epoch": 0.6160190685180281, - "grad_norm": 2.0402638912200928, - "learning_rate": 6.331322992881118e-06, - "loss": 0.7124, + "epoch": 0.5731080605420626, + "grad_norm": 1.6319092512130737, + "learning_rate": 5.690964856170287e-06, + "loss": 0.6369, "step": 8141 }, { - "epoch": 0.6160947372403617, - "grad_norm": 4.363745212554932, - "learning_rate": 6.329172148863294e-06, - "loss": 0.8492, + "epoch": 0.5731784582893348, + "grad_norm": 1.7490017414093018, + "learning_rate": 5.689396941178239e-06, + "loss": 0.7507, "step": 8142 }, { - "epoch": 0.6161704059626953, - "grad_norm": 4.418940544128418, - "learning_rate": 6.327021472101355e-06, - "loss": 0.6559, + "epoch": 0.5732488560366068, + "grad_norm": 2.2018632888793945, + "learning_rate": 5.687829094325631e-06, + "loss": 0.6556, "step": 8143 }, { - "epoch": 0.6162460746850289, - "grad_norm": 2.802222728729248, - "learning_rate": 6.3248709627299735e-06, - "loss": 0.7396, + "epoch": 0.5733192537838789, + "grad_norm": 1.6492971181869507, + "learning_rate": 5.686261315693976e-06, + "loss": 0.8279, "step": 8144 }, { - "epoch": 0.6163217434073626, - "grad_norm": 2.031181573867798, - "learning_rate": 6.322720620883827e-06, - "loss": 0.6788, + "epoch": 0.573389651531151, + "grad_norm": 1.5351334810256958, + "learning_rate": 5.6846936053647844e-06, + "loss": 0.7324, "step": 8145 }, { - "epoch": 0.6163974121296962, - "grad_norm": 2.4163577556610107, - "learning_rate": 6.320570446697574e-06, - "loss": 0.8298, + "epoch": 0.5734600492784231, + "grad_norm": 1.7504860162734985, + "learning_rate": 5.683125963419565e-06, + "loss": 0.7498, "step": 8146 }, { - "epoch": 0.6164730808520298, - "grad_norm": 2.520803928375244, - "learning_rate": 6.318420440305863e-06, - "loss": 0.8713, + "epoch": 0.5735304470256952, + "grad_norm": 1.8726359605789185, + "learning_rate": 5.681558389939817e-06, + "loss": 0.6442, "step": 8147 }, { - "epoch": 0.6165487495743635, - "grad_norm": 2.569690227508545, - "learning_rate": 6.316270601843342e-06, - "loss": 0.5737, + "epoch": 0.5736008447729672, + "grad_norm": 1.8259636163711548, + "learning_rate": 5.6799908850070445e-06, + "loss": 0.5939, "step": 8148 }, { - "epoch": 0.616624418296697, - "grad_norm": 2.0397837162017822, - "learning_rate": 6.314120931444631e-06, - "loss": 0.6953, + "epoch": 0.5736712425202394, + "grad_norm": 1.8069688081741333, + "learning_rate": 5.67842344870274e-06, + "loss": 0.771, "step": 8149 }, { - "epoch": 0.6167000870190307, - "grad_norm": 2.84653639793396, - "learning_rate": 6.31197142924436e-06, - "loss": 0.7573, + "epoch": 0.5737416402675114, + "grad_norm": 3.8866238594055176, + "learning_rate": 5.676856081108398e-06, + "loss": 0.5725, "step": 8150 }, { - "epoch": 0.6167757557413643, - "grad_norm": 2.532266616821289, - "learning_rate": 6.30982209537713e-06, - "loss": 0.5795, + "epoch": 0.5738120380147835, + "grad_norm": 1.8992180824279785, + "learning_rate": 5.675288782305507e-06, + "loss": 0.7524, "step": 8151 }, { - "epoch": 0.6168514244636979, - "grad_norm": 2.0461106300354004, - "learning_rate": 6.307672929977539e-06, - "loss": 0.7261, + "epoch": 0.5738824357620557, + "grad_norm": 1.753021001815796, + "learning_rate": 5.673721552375552e-06, + "loss": 0.8048, "step": 8152 }, { - "epoch": 0.6169270931860316, - "grad_norm": 2.5015604496002197, - "learning_rate": 6.3055239331801795e-06, - "loss": 0.6285, + "epoch": 0.5739528335093277, + "grad_norm": 2.081477403640747, + "learning_rate": 5.672154391400012e-06, + "loss": 0.668, "step": 8153 }, { - "epoch": 0.6170027619083652, - "grad_norm": 3.2419116497039795, - "learning_rate": 6.303375105119626e-06, - "loss": 0.618, + "epoch": 0.5740232312565998, + "grad_norm": 1.9308956861495972, + "learning_rate": 5.670587299460371e-06, + "loss": 0.7006, "step": 8154 }, { - "epoch": 0.6170784306306988, - "grad_norm": 2.273522138595581, - "learning_rate": 6.301226445930447e-06, - "loss": 0.6384, + "epoch": 0.5740936290038718, + "grad_norm": 1.707717776298523, + "learning_rate": 5.6690202766380984e-06, + "loss": 0.6628, "step": 8155 }, { - "epoch": 0.6171540993530324, - "grad_norm": 2.353877544403076, - "learning_rate": 6.299077955747195e-06, - "loss": 0.6086, + "epoch": 0.574164026751144, + "grad_norm": 1.4307575225830078, + "learning_rate": 5.6674533230146695e-06, + "loss": 0.714, "step": 8156 }, { - "epoch": 0.617229768075366, - "grad_norm": 2.2574803829193115, - "learning_rate": 6.296929634704415e-06, - "loss": 0.8628, + "epoch": 0.574234424498416, + "grad_norm": 1.7731037139892578, + "learning_rate": 5.665886438671547e-06, + "loss": 0.6488, "step": 8157 }, { - "epoch": 0.6173054367976997, - "grad_norm": 2.2511677742004395, - "learning_rate": 6.294781482936646e-06, - "loss": 0.8214, + "epoch": 0.5743048222456881, + "grad_norm": 1.7234880924224854, + "learning_rate": 5.664319623690198e-06, + "loss": 0.6188, "step": 8158 }, { - "epoch": 0.6173811055200333, - "grad_norm": 2.1180849075317383, - "learning_rate": 6.292633500578412e-06, - "loss": 0.6905, + "epoch": 0.5743752199929603, + "grad_norm": 1.6666074991226196, + "learning_rate": 5.662752878152079e-06, + "loss": 0.7264, "step": 8159 }, { - "epoch": 0.6174567742423669, - "grad_norm": 2.302046775817871, - "learning_rate": 6.290485687764223e-06, - "loss": 0.807, + "epoch": 0.5744456177402323, + "grad_norm": 1.653846025466919, + "learning_rate": 5.6611862021386515e-06, + "loss": 0.6061, "step": 8160 }, { - "epoch": 0.6175324429647006, - "grad_norm": 2.0591933727264404, - "learning_rate": 6.2883380446285865e-06, - "loss": 0.7709, + "epoch": 0.5745160154875044, + "grad_norm": 1.9214038848876953, + "learning_rate": 5.659619595731364e-06, + "loss": 0.7283, "step": 8161 }, { - "epoch": 0.6176081116870341, - "grad_norm": 2.0985355377197266, - "learning_rate": 6.28619057130599e-06, - "loss": 0.8037, + "epoch": 0.5745864132347764, + "grad_norm": 1.7268505096435547, + "learning_rate": 5.65805305901167e-06, + "loss": 0.5976, "step": 8162 }, { - "epoch": 0.6176837804093678, - "grad_norm": 1.847230315208435, - "learning_rate": 6.284043267930915e-06, - "loss": 0.6209, + "epoch": 0.5746568109820486, + "grad_norm": 1.5392508506774902, + "learning_rate": 5.6564865920610105e-06, + "loss": 0.6915, "step": 8163 }, { - "epoch": 0.6177594491317014, - "grad_norm": 2.1725268363952637, - "learning_rate": 6.28189613463784e-06, - "loss": 0.7832, + "epoch": 0.5747272087293207, + "grad_norm": 1.8991539478302002, + "learning_rate": 5.654920194960833e-06, + "loss": 0.5965, "step": 8164 }, { - "epoch": 0.617835117854035, - "grad_norm": 1.3901453018188477, - "learning_rate": 6.279749171561218e-06, - "loss": 0.772, + "epoch": 0.5747976064765927, + "grad_norm": 1.6193293333053589, + "learning_rate": 5.6533538677925675e-06, + "loss": 0.6019, "step": 8165 }, { - "epoch": 0.6179107865763687, - "grad_norm": 2.406770944595337, - "learning_rate": 6.277602378835502e-06, - "loss": 0.6068, + "epoch": 0.5748680042238649, + "grad_norm": 1.727514386177063, + "learning_rate": 5.651787610637658e-06, + "loss": 0.6988, "step": 8166 }, { - "epoch": 0.6179864552987023, - "grad_norm": 2.2921957969665527, - "learning_rate": 6.275455756595129e-06, - "loss": 0.7577, + "epoch": 0.5749384019711369, + "grad_norm": 1.8360650539398193, + "learning_rate": 5.65022142357753e-06, + "loss": 0.6597, "step": 8167 }, { - "epoch": 0.6180621240210359, - "grad_norm": 2.531522274017334, - "learning_rate": 6.273309304974528e-06, - "loss": 0.5886, + "epoch": 0.575008799718409, + "grad_norm": 2.250810384750366, + "learning_rate": 5.648655306693614e-06, + "loss": 0.7075, "step": 8168 }, { - "epoch": 0.6181377927433696, - "grad_norm": 2.0290911197662354, - "learning_rate": 6.27116302410812e-06, - "loss": 0.7469, + "epoch": 0.5750791974656811, + "grad_norm": 2.007718324661255, + "learning_rate": 5.647089260067332e-06, + "loss": 0.7567, "step": 8169 }, { - "epoch": 0.6182134614657031, - "grad_norm": 2.061554431915283, - "learning_rate": 6.269016914130309e-06, - "loss": 0.5977, + "epoch": 0.5751495952129532, + "grad_norm": 1.7931090593338013, + "learning_rate": 5.645523283780105e-06, + "loss": 0.728, "step": 8170 }, { - "epoch": 0.6182891301880368, - "grad_norm": 4.355343818664551, - "learning_rate": 6.266870975175491e-06, - "loss": 0.6287, + "epoch": 0.5752199929602253, + "grad_norm": 1.8193519115447998, + "learning_rate": 5.643957377913347e-06, + "loss": 0.6853, "step": 8171 }, { - "epoch": 0.6183647989103704, - "grad_norm": 2.044095754623413, - "learning_rate": 6.264725207378055e-06, - "loss": 0.8558, + "epoch": 0.5752903907074973, + "grad_norm": 1.536608338356018, + "learning_rate": 5.642391542548474e-06, + "loss": 0.6896, "step": 8172 }, { - "epoch": 0.618440467632704, - "grad_norm": 1.840848684310913, - "learning_rate": 6.262579610872368e-06, - "loss": 0.7437, + "epoch": 0.5753607884547695, + "grad_norm": 1.6288024187088013, + "learning_rate": 5.640825777766895e-06, + "loss": 0.6358, "step": 8173 }, { - "epoch": 0.6185161363550377, - "grad_norm": 5.799072742462158, - "learning_rate": 6.260434185792803e-06, - "loss": 0.7079, + "epoch": 0.5754311862020416, + "grad_norm": 1.6683870553970337, + "learning_rate": 5.639260083650014e-06, + "loss": 0.5808, "step": 8174 }, { - "epoch": 0.6185918050773712, - "grad_norm": 1.9204273223876953, - "learning_rate": 6.258288932273713e-06, - "loss": 0.6698, + "epoch": 0.5755015839493136, + "grad_norm": 1.8539376258850098, + "learning_rate": 5.6376944602792355e-06, + "loss": 0.6702, "step": 8175 }, { - "epoch": 0.6186674737997049, - "grad_norm": 2.231785774230957, - "learning_rate": 6.2561438504494346e-06, - "loss": 0.7382, + "epoch": 0.5755719816965857, + "grad_norm": 1.8664599657058716, + "learning_rate": 5.636128907735952e-06, + "loss": 0.6916, "step": 8176 }, { - "epoch": 0.6187431425220385, - "grad_norm": 1.8387155532836914, - "learning_rate": 6.253998940454305e-06, - "loss": 0.5977, + "epoch": 0.5756423794438578, + "grad_norm": 1.9104851484298706, + "learning_rate": 5.6345634261015655e-06, + "loss": 0.7102, "step": 8177 }, { - "epoch": 0.6188188112443721, - "grad_norm": 2.453481435775757, - "learning_rate": 6.25185420242264e-06, - "loss": 0.7128, + "epoch": 0.5757127771911299, + "grad_norm": 1.6734907627105713, + "learning_rate": 5.6329980154574615e-06, + "loss": 0.7556, "step": 8178 }, { - "epoch": 0.6188944799667058, - "grad_norm": 2.066225528717041, - "learning_rate": 6.249709636488755e-06, - "loss": 0.5713, + "epoch": 0.5757831749384019, + "grad_norm": 2.050377130508423, + "learning_rate": 5.63143267588503e-06, + "loss": 0.7605, "step": 8179 }, { - "epoch": 0.6189701486890394, - "grad_norm": 1.8709297180175781, - "learning_rate": 6.2475652427869495e-06, - "loss": 0.7591, + "epoch": 0.575853572685674, + "grad_norm": 1.6815866231918335, + "learning_rate": 5.6298674074656506e-06, + "loss": 0.6877, "step": 8180 }, { - "epoch": 0.619045817411373, - "grad_norm": 2.087465763092041, - "learning_rate": 6.2454210214515095e-06, - "loss": 0.7528, + "epoch": 0.5759239704329462, + "grad_norm": 2.0409457683563232, + "learning_rate": 5.628302210280708e-06, + "loss": 0.7122, "step": 8181 }, { - "epoch": 0.6191214861337067, - "grad_norm": 2.2975540161132812, - "learning_rate": 6.243276972616716e-06, - "loss": 0.7861, + "epoch": 0.5759943681802182, + "grad_norm": 2.7393882274627686, + "learning_rate": 5.626737084411574e-06, + "loss": 0.7278, "step": 8182 }, { - "epoch": 0.6191971548560402, - "grad_norm": 1.8741811513900757, - "learning_rate": 6.241133096416832e-06, - "loss": 0.629, + "epoch": 0.5760647659274903, + "grad_norm": 1.8753958940505981, + "learning_rate": 5.625172029939625e-06, + "loss": 0.6958, "step": 8183 }, { - "epoch": 0.6192728235783739, - "grad_norm": 1.919198751449585, - "learning_rate": 6.238989392986118e-06, - "loss": 0.7016, + "epoch": 0.5761351636747624, + "grad_norm": 1.871300458908081, + "learning_rate": 5.623607046946225e-06, + "loss": 0.615, "step": 8184 }, { - "epoch": 0.6193484923007075, - "grad_norm": 2.3332748413085938, - "learning_rate": 6.236845862458818e-06, - "loss": 0.59, + "epoch": 0.5762055614220345, + "grad_norm": 1.6851606369018555, + "learning_rate": 5.622042135512743e-06, + "loss": 0.6302, "step": 8185 }, { - "epoch": 0.6194241610230411, - "grad_norm": 2.41435170173645, - "learning_rate": 6.2347025049691696e-06, - "loss": 0.7076, + "epoch": 0.5762759591693066, + "grad_norm": 1.463715672492981, + "learning_rate": 5.620477295720535e-06, + "loss": 0.8234, "step": 8186 }, { - "epoch": 0.6194998297453748, - "grad_norm": 2.0539135932922363, - "learning_rate": 6.232559320651392e-06, - "loss": 0.6218, + "epoch": 0.5763463569165787, + "grad_norm": 1.8295669555664062, + "learning_rate": 5.618912527650965e-06, + "loss": 0.7365, "step": 8187 }, { - "epoch": 0.6195754984677083, - "grad_norm": 2.1097443103790283, - "learning_rate": 6.2304163096397e-06, - "loss": 0.7403, + "epoch": 0.5764167546638508, + "grad_norm": 2.0169363021850586, + "learning_rate": 5.617347831385382e-06, + "loss": 0.7153, "step": 8188 }, { - "epoch": 0.619651167190042, - "grad_norm": 2.184004306793213, - "learning_rate": 6.2282734720683e-06, - "loss": 0.5566, + "epoch": 0.5764871524111228, + "grad_norm": 2.0920462608337402, + "learning_rate": 5.615783207005139e-06, + "loss": 0.6497, "step": 8189 }, { - "epoch": 0.6197268359123757, - "grad_norm": 2.4492106437683105, - "learning_rate": 6.226130808071377e-06, - "loss": 0.6588, + "epoch": 0.5765575501583949, + "grad_norm": 1.847169280052185, + "learning_rate": 5.6142186545915785e-06, + "loss": 0.777, "step": 8190 }, { - "epoch": 0.6198025046347092, - "grad_norm": 2.1081714630126953, - "learning_rate": 6.2239883177831174e-06, - "loss": 0.6285, + "epoch": 0.5766279479056671, + "grad_norm": 1.6676913499832153, + "learning_rate": 5.612654174226048e-06, + "loss": 0.5745, "step": 8191 }, { - "epoch": 0.6198781733570429, - "grad_norm": 2.0451788902282715, - "learning_rate": 6.221846001337686e-06, - "loss": 0.7617, + "epoch": 0.5766983456529391, + "grad_norm": 1.6745476722717285, + "learning_rate": 5.6110897659898795e-06, + "loss": 0.6943, "step": 8192 }, { - "epoch": 0.6199538420793765, - "grad_norm": 2.1223862171173096, - "learning_rate": 6.219703858869242e-06, - "loss": 0.734, + "epoch": 0.5767687434002112, + "grad_norm": 1.8144006729125977, + "learning_rate": 5.609525429964416e-06, + "loss": 0.7265, "step": 8193 }, { - "epoch": 0.6200295108017101, - "grad_norm": 1.9312350749969482, - "learning_rate": 6.217561890511939e-06, - "loss": 0.5471, + "epoch": 0.5768391411474832, + "grad_norm": 2.115290880203247, + "learning_rate": 5.607961166230982e-06, + "loss": 0.6097, "step": 8194 }, { - "epoch": 0.6201051795240438, - "grad_norm": 2.3495125770568848, - "learning_rate": 6.215420096399907e-06, - "loss": 0.7066, + "epoch": 0.5769095388947554, + "grad_norm": 1.7117674350738525, + "learning_rate": 5.6063969748709104e-06, + "loss": 0.7019, "step": 8195 }, { - "epoch": 0.6201808482463773, - "grad_norm": 2.163055181503296, - "learning_rate": 6.213278476667278e-06, - "loss": 0.7731, + "epoch": 0.5769799366420274, + "grad_norm": 2.22959303855896, + "learning_rate": 5.604832855965519e-06, + "loss": 0.6614, "step": 8196 }, { - "epoch": 0.620256516968711, - "grad_norm": 2.2514150142669678, - "learning_rate": 6.211137031448162e-06, - "loss": 0.6057, + "epoch": 0.5770503343892995, + "grad_norm": 2.237795114517212, + "learning_rate": 5.603268809596133e-06, + "loss": 0.6829, "step": 8197 }, { - "epoch": 0.6203321856910446, - "grad_norm": 1.9421865940093994, - "learning_rate": 6.2089957608766664e-06, - "loss": 0.6273, + "epoch": 0.5771207321365717, + "grad_norm": 1.8944430351257324, + "learning_rate": 5.601704835844064e-06, + "loss": 0.6586, "step": 8198 }, { - "epoch": 0.6204078544133782, - "grad_norm": 1.938656210899353, - "learning_rate": 6.2068546650868785e-06, - "loss": 0.8241, + "epoch": 0.5771911298838437, + "grad_norm": 1.9542096853256226, + "learning_rate": 5.60014093479063e-06, + "loss": 0.7462, "step": 8199 }, { - "epoch": 0.6204835231357119, - "grad_norm": 2.263339042663574, - "learning_rate": 6.204713744212891e-06, - "loss": 0.7253, + "epoch": 0.5772615276311158, + "grad_norm": 1.633751392364502, + "learning_rate": 5.5985771065171355e-06, + "loss": 0.6232, "step": 8200 }, { - "epoch": 0.6205591918580454, - "grad_norm": 1.7081513404846191, - "learning_rate": 6.202572998388768e-06, - "loss": 0.5888, + "epoch": 0.5773319253783878, + "grad_norm": 1.6146372556686401, + "learning_rate": 5.597013351104887e-06, + "loss": 0.7112, "step": 8201 }, { - "epoch": 0.6206348605803791, - "grad_norm": 2.124990940093994, - "learning_rate": 6.200432427748574e-06, - "loss": 0.7011, + "epoch": 0.57740232312566, + "grad_norm": 1.949133276939392, + "learning_rate": 5.595449668635184e-06, + "loss": 0.6251, "step": 8202 }, { - "epoch": 0.6207105293027128, - "grad_norm": 2.4887731075286865, - "learning_rate": 6.198292032426354e-06, - "loss": 0.7463, + "epoch": 0.5774727208729321, + "grad_norm": 2.099766492843628, + "learning_rate": 5.593886059189326e-06, + "loss": 0.6734, "step": 8203 }, { - "epoch": 0.6207861980250463, - "grad_norm": 2.28210186958313, - "learning_rate": 6.1961518125561485e-06, - "loss": 0.7691, + "epoch": 0.5775431186202041, + "grad_norm": 2.0622096061706543, + "learning_rate": 5.5923225228486025e-06, + "loss": 0.5444, "step": 8204 }, { - "epoch": 0.62086186674738, - "grad_norm": 1.8958467245101929, - "learning_rate": 6.194011768271986e-06, - "loss": 0.7223, + "epoch": 0.5776135163674763, + "grad_norm": 1.790793776512146, + "learning_rate": 5.590759059694308e-06, + "loss": 0.6782, "step": 8205 }, { - "epoch": 0.6209375354697136, - "grad_norm": 1.9587349891662598, - "learning_rate": 6.191871899707883e-06, - "loss": 0.769, + "epoch": 0.5776839141147483, + "grad_norm": 2.0392489433288574, + "learning_rate": 5.5891956698077264e-06, + "loss": 0.723, "step": 8206 }, { - "epoch": 0.6210132041920472, - "grad_norm": 2.0116994380950928, - "learning_rate": 6.189732206997845e-06, - "loss": 0.5752, + "epoch": 0.5777543118620204, + "grad_norm": 1.761608600616455, + "learning_rate": 5.5876323532701404e-06, + "loss": 0.7583, "step": 8207 }, { - "epoch": 0.6210888729143809, - "grad_norm": 2.030748128890991, - "learning_rate": 6.187592690275864e-06, - "loss": 0.6877, + "epoch": 0.5778247096092926, + "grad_norm": 1.8317463397979736, + "learning_rate": 5.586069110162826e-06, + "loss": 0.7487, "step": 8208 }, { - "epoch": 0.6211645416367144, - "grad_norm": 1.9845973253250122, - "learning_rate": 6.185453349675923e-06, - "loss": 0.6563, + "epoch": 0.5778951073565646, + "grad_norm": 2.83974289894104, + "learning_rate": 5.58450594056706e-06, + "loss": 0.6731, "step": 8209 }, { - "epoch": 0.6212402103590481, - "grad_norm": 2.015986680984497, - "learning_rate": 6.1833141853319995e-06, - "loss": 0.6908, + "epoch": 0.5779655051038367, + "grad_norm": 1.705824613571167, + "learning_rate": 5.58294284456411e-06, + "loss": 0.7107, "step": 8210 }, { - "epoch": 0.6213158790813818, - "grad_norm": 2.061414957046509, - "learning_rate": 6.181175197378053e-06, - "loss": 0.7801, + "epoch": 0.5780359028511087, + "grad_norm": 1.5219186544418335, + "learning_rate": 5.581379822235247e-06, + "loss": 0.5744, "step": 8211 }, { - "epoch": 0.6213915478037153, - "grad_norm": 1.997130274772644, - "learning_rate": 6.179036385948032e-06, - "loss": 0.6684, + "epoch": 0.5781063005983809, + "grad_norm": 1.8401975631713867, + "learning_rate": 5.579816873661731e-06, + "loss": 0.7831, "step": 8212 }, { - "epoch": 0.621467216526049, - "grad_norm": 2.0803966522216797, - "learning_rate": 6.1768977511758755e-06, - "loss": 0.6245, + "epoch": 0.5781766983456529, + "grad_norm": 1.881591558456421, + "learning_rate": 5.5782539989248226e-06, + "loss": 0.6099, "step": 8213 }, { - "epoch": 0.6215428852483825, - "grad_norm": 2.1265552043914795, - "learning_rate": 6.174759293195511e-06, - "loss": 0.6819, + "epoch": 0.578247096092925, + "grad_norm": 1.5496925115585327, + "learning_rate": 5.576691198105776e-06, + "loss": 0.6913, "step": 8214 }, { - "epoch": 0.6216185539707162, - "grad_norm": 1.9517875909805298, - "learning_rate": 6.1726210121408594e-06, - "loss": 0.7735, + "epoch": 0.5783174938401971, + "grad_norm": 1.7259063720703125, + "learning_rate": 5.575128471285844e-06, + "loss": 0.7081, "step": 8215 }, { - "epoch": 0.6216942226930499, - "grad_norm": 2.6221845149993896, - "learning_rate": 6.170482908145827e-06, - "loss": 0.7706, + "epoch": 0.5783878915874692, + "grad_norm": 1.6139036417007446, + "learning_rate": 5.573565818546269e-06, + "loss": 0.6537, "step": 8216 }, { - "epoch": 0.6217698914153834, - "grad_norm": 2.260093927383423, - "learning_rate": 6.168344981344304e-06, - "loss": 0.6144, + "epoch": 0.5784582893347413, + "grad_norm": 1.53080153465271, + "learning_rate": 5.572003239968304e-06, + "loss": 0.6702, "step": 8217 }, { - "epoch": 0.6218455601377171, - "grad_norm": 2.432312488555908, - "learning_rate": 6.166207231870179e-06, - "loss": 0.8123, + "epoch": 0.5785286870820133, + "grad_norm": 1.4133095741271973, + "learning_rate": 5.570440735633181e-06, + "loss": 0.8067, "step": 8218 }, { - "epoch": 0.6219212288600507, - "grad_norm": 1.845873236656189, - "learning_rate": 6.16406965985732e-06, - "loss": 0.7548, + "epoch": 0.5785990848292855, + "grad_norm": 1.6692250967025757, + "learning_rate": 5.568878305622141e-06, + "loss": 0.6681, "step": 8219 }, { - "epoch": 0.6219968975823843, - "grad_norm": 2.7891929149627686, - "learning_rate": 6.161932265439592e-06, - "loss": 0.6276, + "epoch": 0.5786694825765576, + "grad_norm": 1.6520192623138428, + "learning_rate": 5.567315950016412e-06, + "loss": 0.632, "step": 8220 }, { - "epoch": 0.622072566304718, - "grad_norm": 1.9426125288009644, - "learning_rate": 6.159795048750848e-06, - "loss": 0.778, + "epoch": 0.5787398803238296, + "grad_norm": 1.6879892349243164, + "learning_rate": 5.565753668897227e-06, + "loss": 0.6514, "step": 8221 }, { - "epoch": 0.6221482350270515, - "grad_norm": 2.255960702896118, - "learning_rate": 6.157658009924922e-06, - "loss": 0.7768, + "epoch": 0.5788102780711017, + "grad_norm": 1.7876850366592407, + "learning_rate": 5.5641914623458035e-06, + "loss": 0.8392, "step": 8222 }, { - "epoch": 0.6222239037493852, - "grad_norm": 2.089102029800415, - "learning_rate": 6.155521149095647e-06, - "loss": 0.7533, + "epoch": 0.5788806758183738, + "grad_norm": 2.0689826011657715, + "learning_rate": 5.562629330443368e-06, + "loss": 0.799, "step": 8223 }, { - "epoch": 0.6222995724717189, - "grad_norm": 2.0780301094055176, - "learning_rate": 6.153384466396833e-06, - "loss": 0.8131, + "epoch": 0.5789510735656459, + "grad_norm": 2.636178970336914, + "learning_rate": 5.561067273271136e-06, + "loss": 0.7372, "step": 8224 }, { - "epoch": 0.6223752411940524, - "grad_norm": 2.264507532119751, - "learning_rate": 6.151247961962294e-06, - "loss": 0.5875, + "epoch": 0.579021471312918, + "grad_norm": 1.6689369678497314, + "learning_rate": 5.559505290910318e-06, + "loss": 0.5981, "step": 8225 }, { - "epoch": 0.6224509099163861, - "grad_norm": 1.9472380876541138, - "learning_rate": 6.1491116359258215e-06, - "loss": 0.6929, + "epoch": 0.5790918690601901, + "grad_norm": 1.7435472011566162, + "learning_rate": 5.557943383442129e-06, + "loss": 0.743, "step": 8226 }, { - "epoch": 0.6225265786387196, - "grad_norm": 1.84212327003479, - "learning_rate": 6.146975488421199e-06, - "loss": 0.5678, + "epoch": 0.5791622668074622, + "grad_norm": 1.9502640962600708, + "learning_rate": 5.556381550947765e-06, + "loss": 0.751, "step": 8227 }, { - "epoch": 0.6226022473610533, - "grad_norm": 1.9093300104141235, - "learning_rate": 6.144839519582201e-06, - "loss": 0.739, + "epoch": 0.5792326645547342, + "grad_norm": 1.8088504076004028, + "learning_rate": 5.554819793508434e-06, + "loss": 0.6631, "step": 8228 }, { - "epoch": 0.622677916083387, - "grad_norm": 1.707137942314148, - "learning_rate": 6.142703729542581e-06, - "loss": 0.6922, + "epoch": 0.5793030623020063, + "grad_norm": 1.5860257148742676, + "learning_rate": 5.553258111205329e-06, + "loss": 0.6646, "step": 8229 }, { - "epoch": 0.6227535848057205, - "grad_norm": 1.5876680612564087, - "learning_rate": 6.1405681184361e-06, - "loss": 0.7791, + "epoch": 0.5793734600492785, + "grad_norm": 1.5610913038253784, + "learning_rate": 5.5516965041196465e-06, + "loss": 0.5686, "step": 8230 }, { - "epoch": 0.6228292535280542, - "grad_norm": 2.1227242946624756, - "learning_rate": 6.138432686396492e-06, - "loss": 0.8995, + "epoch": 0.5794438577965505, + "grad_norm": 1.8429620265960693, + "learning_rate": 5.550134972332571e-06, + "loss": 0.618, "step": 8231 }, { - "epoch": 0.6229049222503878, - "grad_norm": 1.9821844100952148, - "learning_rate": 6.1362974335574835e-06, - "loss": 0.8453, + "epoch": 0.5795142555438226, + "grad_norm": 1.9208455085754395, + "learning_rate": 5.548573515925298e-06, + "loss": 0.696, "step": 8232 }, { - "epoch": 0.6229805909727214, - "grad_norm": 2.247864007949829, - "learning_rate": 6.134162360052793e-06, - "loss": 0.7166, + "epoch": 0.5795846532910947, + "grad_norm": 1.6332908868789673, + "learning_rate": 5.547012134978996e-06, + "loss": 0.5376, "step": 8233 }, { - "epoch": 0.6230562596950551, - "grad_norm": 2.540407657623291, - "learning_rate": 6.132027466016122e-06, - "loss": 0.7784, + "epoch": 0.5796550510383668, + "grad_norm": 1.9052255153656006, + "learning_rate": 5.545450829574853e-06, + "loss": 0.5968, "step": 8234 }, { - "epoch": 0.6231319284173886, - "grad_norm": 2.323075294494629, - "learning_rate": 6.129892751581171e-06, - "loss": 0.6891, + "epoch": 0.5797254487856388, + "grad_norm": 1.512439489364624, + "learning_rate": 5.543889599794036e-06, + "loss": 0.6695, "step": 8235 }, { - "epoch": 0.6232075971397223, - "grad_norm": 1.875849723815918, - "learning_rate": 6.1277582168816165e-06, - "loss": 0.6888, + "epoch": 0.5797958465329109, + "grad_norm": 2.153076648712158, + "learning_rate": 5.542328445717721e-06, + "loss": 0.7188, "step": 8236 }, { - "epoch": 0.623283265862056, - "grad_norm": 2.02666974067688, - "learning_rate": 6.125623862051135e-06, - "loss": 0.7028, + "epoch": 0.5798662442801831, + "grad_norm": 1.6548734903335571, + "learning_rate": 5.540767367427066e-06, + "loss": 0.624, "step": 8237 }, { - "epoch": 0.6233589345843895, - "grad_norm": 1.8737157583236694, - "learning_rate": 6.1234896872233815e-06, - "loss": 0.9008, + "epoch": 0.5799366420274551, + "grad_norm": 1.8989782333374023, + "learning_rate": 5.5392063650032394e-06, + "loss": 0.6002, "step": 8238 }, { - "epoch": 0.6234346033067232, - "grad_norm": 1.9101803302764893, - "learning_rate": 6.1213556925320105e-06, - "loss": 0.6532, + "epoch": 0.5800070397747272, + "grad_norm": 1.8820514678955078, + "learning_rate": 5.537645438527397e-06, + "loss": 0.6627, "step": 8239 }, { - "epoch": 0.6235102720290568, - "grad_norm": 2.0796594619750977, - "learning_rate": 6.119221878110652e-06, - "loss": 0.5757, + "epoch": 0.5800774375219993, + "grad_norm": 1.7206306457519531, + "learning_rate": 5.536084588080692e-06, + "loss": 0.7226, "step": 8240 }, { - "epoch": 0.6235859407513904, - "grad_norm": 2.185795307159424, - "learning_rate": 6.1170882440929385e-06, - "loss": 0.6812, + "epoch": 0.5801478352692714, + "grad_norm": 2.7029783725738525, + "learning_rate": 5.534523813744275e-06, + "loss": 0.6289, "step": 8241 }, { - "epoch": 0.6236616094737241, - "grad_norm": 2.2780838012695312, - "learning_rate": 6.114954790612487e-06, - "loss": 0.7021, + "epoch": 0.5802182330165435, + "grad_norm": 1.6712077856063843, + "learning_rate": 5.532963115599293e-06, + "loss": 0.6741, "step": 8242 }, { - "epoch": 0.6237372781960576, - "grad_norm": 2.1631765365600586, - "learning_rate": 6.112821517802896e-06, - "loss": 0.6584, + "epoch": 0.5802886307638155, + "grad_norm": 1.7420172691345215, + "learning_rate": 5.5314024937268844e-06, + "loss": 0.613, "step": 8243 }, { - "epoch": 0.6238129469183913, - "grad_norm": 2.143260955810547, - "learning_rate": 6.11068842579776e-06, - "loss": 0.7908, + "epoch": 0.5803590285110877, + "grad_norm": 2.12302827835083, + "learning_rate": 5.529841948208194e-06, + "loss": 0.5883, "step": 8244 }, { - "epoch": 0.623888615640725, - "grad_norm": 2.4261891841888428, - "learning_rate": 6.108555514730655e-06, - "loss": 0.704, + "epoch": 0.5804294262583597, + "grad_norm": 1.9827967882156372, + "learning_rate": 5.528281479124351e-06, + "loss": 0.7268, "step": 8245 }, { - "epoch": 0.6239642843630585, - "grad_norm": 2.189960241317749, - "learning_rate": 6.106422784735162e-06, - "loss": 0.7673, + "epoch": 0.5804998240056318, + "grad_norm": 1.5134717226028442, + "learning_rate": 5.526721086556486e-06, + "loss": 0.7075, "step": 8246 }, { - "epoch": 0.6240399530853922, - "grad_norm": 2.8708183765411377, - "learning_rate": 6.104290235944831e-06, - "loss": 0.6039, + "epoch": 0.580570221752904, + "grad_norm": 2.0789740085601807, + "learning_rate": 5.5251607705857265e-06, + "loss": 0.7042, "step": 8247 }, { - "epoch": 0.6241156218077257, - "grad_norm": 1.9821953773498535, - "learning_rate": 6.1021578684932136e-06, - "loss": 0.6746, + "epoch": 0.580640619500176, + "grad_norm": 2.0504043102264404, + "learning_rate": 5.523600531293195e-06, + "loss": 0.6545, "step": 8248 }, { - "epoch": 0.6241912905300594, - "grad_norm": 2.20764422416687, - "learning_rate": 6.1000256825138405e-06, - "loss": 0.6322, + "epoch": 0.5807110172474481, + "grad_norm": 1.880976915359497, + "learning_rate": 5.522040368760006e-06, + "loss": 0.7902, "step": 8249 }, { - "epoch": 0.6242669592523931, - "grad_norm": 2.8695743083953857, - "learning_rate": 6.097893678140237e-06, - "loss": 0.7263, + "epoch": 0.5807814149947201, + "grad_norm": 2.832029342651367, + "learning_rate": 5.5204802830672795e-06, + "loss": 0.7164, "step": 8250 }, { - "epoch": 0.6243426279747266, - "grad_norm": 2.6145105361938477, - "learning_rate": 6.095761855505921e-06, - "loss": 0.7057, + "epoch": 0.5808518127419923, + "grad_norm": 1.7389121055603027, + "learning_rate": 5.518920274296122e-06, + "loss": 0.7294, "step": 8251 }, { - "epoch": 0.6244182966970603, - "grad_norm": 2.075453758239746, - "learning_rate": 6.093630214744391e-06, - "loss": 0.8061, + "epoch": 0.5809222104892643, + "grad_norm": 1.8672235012054443, + "learning_rate": 5.5173603425276425e-06, + "loss": 0.6876, "step": 8252 }, { - "epoch": 0.624493965419394, - "grad_norm": 2.094466209411621, - "learning_rate": 6.091498755989139e-06, - "loss": 0.613, + "epoch": 0.5809926082365364, + "grad_norm": 1.8448493480682373, + "learning_rate": 5.51580048784294e-06, + "loss": 0.5973, "step": 8253 }, { - "epoch": 0.6245696341417275, - "grad_norm": 1.5534762144088745, - "learning_rate": 6.089367479373639e-06, - "loss": 0.7078, + "epoch": 0.5810630059838086, + "grad_norm": 3.606563091278076, + "learning_rate": 5.5142407103231144e-06, + "loss": 0.5375, "step": 8254 }, { - "epoch": 0.6246453028640612, - "grad_norm": 2.0539896488189697, - "learning_rate": 6.087236385031361e-06, - "loss": 0.7436, + "epoch": 0.5811334037310806, + "grad_norm": 1.9489216804504395, + "learning_rate": 5.5126810100492585e-06, + "loss": 0.6647, "step": 8255 }, { - "epoch": 0.6247209715863947, - "grad_norm": 1.873143196105957, - "learning_rate": 6.085105473095764e-06, - "loss": 0.7195, + "epoch": 0.5812038014783527, + "grad_norm": 2.0137863159179688, + "learning_rate": 5.511121387102467e-06, + "loss": 0.7097, "step": 8256 }, { - "epoch": 0.6247966403087284, - "grad_norm": 2.717400312423706, - "learning_rate": 6.082974743700289e-06, - "loss": 0.726, + "epoch": 0.5812741992256247, + "grad_norm": 1.7851628065109253, + "learning_rate": 5.5095618415638196e-06, + "loss": 0.794, "step": 8257 }, { - "epoch": 0.6248723090310621, - "grad_norm": 2.0352890491485596, - "learning_rate": 6.0808441969783714e-06, - "loss": 0.727, + "epoch": 0.5813445969728969, + "grad_norm": 1.8300530910491943, + "learning_rate": 5.508002373514405e-06, + "loss": 0.6512, "step": 8258 }, { - "epoch": 0.6249479777533956, - "grad_norm": 2.217931032180786, - "learning_rate": 6.078713833063431e-06, - "loss": 0.7579, + "epoch": 0.581414994720169, + "grad_norm": 1.8893623352050781, + "learning_rate": 5.506442983035297e-06, + "loss": 0.651, "step": 8259 }, { - "epoch": 0.6250236464757293, - "grad_norm": 1.9803812503814697, - "learning_rate": 6.0765836520888774e-06, - "loss": 0.7234, + "epoch": 0.581485392467441, + "grad_norm": 1.6678426265716553, + "learning_rate": 5.504883670207571e-06, + "loss": 0.6836, "step": 8260 }, { - "epoch": 0.6250993151980628, - "grad_norm": 2.017169237136841, - "learning_rate": 6.074453654188113e-06, - "loss": 0.7283, + "epoch": 0.5815557902147132, + "grad_norm": 2.6464734077453613, + "learning_rate": 5.5033244351122955e-06, + "loss": 0.6425, "step": 8261 }, { - "epoch": 0.6251749839203965, - "grad_norm": 2.0302610397338867, - "learning_rate": 6.072323839494523e-06, - "loss": 0.7154, + "epoch": 0.5816261879619852, + "grad_norm": 1.6643173694610596, + "learning_rate": 5.50176527783054e-06, + "loss": 0.6202, "step": 8262 }, { - "epoch": 0.6252506526427302, - "grad_norm": 1.8315473794937134, - "learning_rate": 6.070194208141484e-06, - "loss": 0.642, + "epoch": 0.5816965857092573, + "grad_norm": 1.9622917175292969, + "learning_rate": 5.500206198443363e-06, + "loss": 0.5931, "step": 8263 }, { - "epoch": 0.6253263213650637, - "grad_norm": 2.1627883911132812, - "learning_rate": 6.0680647602623605e-06, - "loss": 0.7464, + "epoch": 0.5817669834565294, + "grad_norm": 1.7560551166534424, + "learning_rate": 5.498647197031827e-06, + "loss": 0.6735, "step": 8264 }, { - "epoch": 0.6254019900873974, - "grad_norm": 1.989489197731018, - "learning_rate": 6.065935495990501e-06, - "loss": 0.5703, + "epoch": 0.5818373812038015, + "grad_norm": 1.7820117473602295, + "learning_rate": 5.497088273676979e-06, + "loss": 0.7165, "step": 8265 }, { - "epoch": 0.625477658809731, - "grad_norm": 1.6930984258651733, - "learning_rate": 6.063806415459253e-06, - "loss": 0.5945, + "epoch": 0.5819077789510736, + "grad_norm": 2.834916591644287, + "learning_rate": 5.495529428459876e-06, + "loss": 0.6063, "step": 8266 }, { - "epoch": 0.6255533275320646, - "grad_norm": 1.9315162897109985, - "learning_rate": 6.0616775188019444e-06, - "loss": 0.6163, + "epoch": 0.5819781766983456, + "grad_norm": 1.683653473854065, + "learning_rate": 5.493970661461557e-06, + "loss": 0.7386, "step": 8267 }, { - "epoch": 0.6256289962543983, - "grad_norm": 1.9883739948272705, - "learning_rate": 6.059548806151893e-06, - "loss": 0.6101, + "epoch": 0.5820485744456178, + "grad_norm": 1.6850310564041138, + "learning_rate": 5.492411972763071e-06, + "loss": 0.7243, "step": 8268 }, { - "epoch": 0.6257046649767318, - "grad_norm": 2.2060041427612305, - "learning_rate": 6.057420277642407e-06, - "loss": 0.6781, + "epoch": 0.5821189721928899, + "grad_norm": 2.95345139503479, + "learning_rate": 5.490853362445451e-06, + "loss": 0.6714, "step": 8269 }, { - "epoch": 0.6257803336990655, - "grad_norm": 2.501366138458252, - "learning_rate": 6.055291933406778e-06, - "loss": 0.6316, + "epoch": 0.5821893699401619, + "grad_norm": 1.90186607837677, + "learning_rate": 5.489294830589731e-06, + "loss": 0.7088, "step": 8270 }, { - "epoch": 0.6258560024213992, - "grad_norm": 1.9674981832504272, - "learning_rate": 6.053163773578293e-06, - "loss": 0.8069, + "epoch": 0.582259767687434, + "grad_norm": 1.8064552545547485, + "learning_rate": 5.48773637727694e-06, + "loss": 0.7998, "step": 8271 }, { - "epoch": 0.6259316711437327, - "grad_norm": 2.0307114124298096, - "learning_rate": 6.051035798290226e-06, - "loss": 0.5404, + "epoch": 0.5823301654347061, + "grad_norm": 1.6982916593551636, + "learning_rate": 5.486178002588103e-06, + "loss": 0.7776, "step": 8272 }, { - "epoch": 0.6260073398660664, - "grad_norm": 1.978594183921814, - "learning_rate": 6.048908007675834e-06, - "loss": 0.6138, + "epoch": 0.5824005631819782, + "grad_norm": 1.8335176706314087, + "learning_rate": 5.484619706604246e-06, + "loss": 0.6774, "step": 8273 }, { - "epoch": 0.6260830085883999, - "grad_norm": 3.162856340408325, - "learning_rate": 6.046780401868367e-06, - "loss": 0.705, + "epoch": 0.5824709609292502, + "grad_norm": 2.3546249866485596, + "learning_rate": 5.4830614894063814e-06, + "loss": 0.7384, "step": 8274 }, { - "epoch": 0.6261586773107336, - "grad_norm": 2.049156427383423, - "learning_rate": 6.044652981001066e-06, - "loss": 0.7661, + "epoch": 0.5825413586765223, + "grad_norm": 1.7954144477844238, + "learning_rate": 5.481503351075525e-06, + "loss": 0.5878, "step": 8275 }, { - "epoch": 0.6262343460330673, - "grad_norm": 2.057219982147217, - "learning_rate": 6.042525745207149e-06, - "loss": 0.6902, + "epoch": 0.5826117564237945, + "grad_norm": 1.8957939147949219, + "learning_rate": 5.4799452916926814e-06, + "loss": 0.6293, "step": 8276 }, { - "epoch": 0.6263100147554008, - "grad_norm": 2.105268716812134, - "learning_rate": 6.040398694619838e-06, - "loss": 0.6733, + "epoch": 0.5826821541710665, + "grad_norm": 1.9578779935836792, + "learning_rate": 5.478387311338863e-06, + "loss": 0.7058, "step": 8277 }, { - "epoch": 0.6263856834777345, - "grad_norm": 2.046825647354126, - "learning_rate": 6.038271829372335e-06, - "loss": 0.6682, + "epoch": 0.5827525519183386, + "grad_norm": 1.6520581245422363, + "learning_rate": 5.476829410095063e-06, + "loss": 0.6446, "step": 8278 }, { - "epoch": 0.6264613522000682, - "grad_norm": 2.0353481769561768, - "learning_rate": 6.036145149597828e-06, - "loss": 0.7527, + "epoch": 0.5828229496656107, + "grad_norm": 1.7950501441955566, + "learning_rate": 5.475271588042284e-06, + "loss": 0.7018, "step": 8279 }, { - "epoch": 0.6265370209224017, - "grad_norm": 1.9645310640335083, - "learning_rate": 6.034018655429499e-06, - "loss": 0.6571, + "epoch": 0.5828933474128828, + "grad_norm": 1.6770246028900146, + "learning_rate": 5.473713845261514e-06, + "loss": 0.7813, "step": 8280 }, { - "epoch": 0.6266126896447354, - "grad_norm": 1.8533154726028442, - "learning_rate": 6.031892347000512e-06, - "loss": 0.6641, + "epoch": 0.5829637451601549, + "grad_norm": 1.7638682126998901, + "learning_rate": 5.472156181833745e-06, + "loss": 0.6777, "step": 8281 }, { - "epoch": 0.6266883583670689, - "grad_norm": 2.540891170501709, - "learning_rate": 6.029766224444028e-06, - "loss": 0.7443, + "epoch": 0.5830341429074269, + "grad_norm": 2.426767349243164, + "learning_rate": 5.4705985978399565e-06, + "loss": 0.7514, "step": 8282 }, { - "epoch": 0.6267640270894026, - "grad_norm": 2.1608104705810547, - "learning_rate": 6.027640287893191e-06, - "loss": 0.6818, + "epoch": 0.5831045406546991, + "grad_norm": 1.9168076515197754, + "learning_rate": 5.469041093361137e-06, + "loss": 0.6855, "step": 8283 }, { - "epoch": 0.6268396958117363, - "grad_norm": 2.1452369689941406, - "learning_rate": 6.0255145374811315e-06, - "loss": 0.7741, + "epoch": 0.5831749384019711, + "grad_norm": 1.7565991878509521, + "learning_rate": 5.467483668478254e-06, + "loss": 0.6517, "step": 8284 }, { - "epoch": 0.6269153645340698, - "grad_norm": 1.848710060119629, - "learning_rate": 6.023388973340974e-06, - "loss": 0.6587, + "epoch": 0.5832453361492432, + "grad_norm": 1.559098720550537, + "learning_rate": 5.465926323272284e-06, + "loss": 0.6492, "step": 8285 }, { - "epoch": 0.6269910332564035, - "grad_norm": 2.499025821685791, - "learning_rate": 6.021263595605825e-06, - "loss": 0.7964, + "epoch": 0.5833157338965154, + "grad_norm": 2.20475172996521, + "learning_rate": 5.464369057824194e-06, + "loss": 0.7991, "step": 8286 }, { - "epoch": 0.627066701978737, - "grad_norm": 2.207012414932251, - "learning_rate": 6.019138404408783e-06, - "loss": 0.6924, + "epoch": 0.5833861316437874, + "grad_norm": 2.130322217941284, + "learning_rate": 5.4628118722149485e-06, + "loss": 0.7193, "step": 8287 }, { - "epoch": 0.6271423707010707, - "grad_norm": 2.040011405944824, - "learning_rate": 6.017013399882936e-06, - "loss": 0.6481, + "epoch": 0.5834565293910595, + "grad_norm": 1.7306833267211914, + "learning_rate": 5.461254766525503e-06, + "loss": 0.5363, "step": 8288 }, { - "epoch": 0.6272180394234044, - "grad_norm": 1.5633232593536377, - "learning_rate": 6.014888582161361e-06, - "loss": 0.6835, + "epoch": 0.5835269271383315, + "grad_norm": 1.6801269054412842, + "learning_rate": 5.459697740836818e-06, + "loss": 0.5975, "step": 8289 }, { - "epoch": 0.6272937081457379, - "grad_norm": 2.0602505207061768, - "learning_rate": 6.012763951377116e-06, - "loss": 0.8177, + "epoch": 0.5835973248856037, + "grad_norm": 1.7997920513153076, + "learning_rate": 5.458140795229842e-06, + "loss": 0.7007, "step": 8290 }, { - "epoch": 0.6273693768680716, - "grad_norm": 1.8293778896331787, - "learning_rate": 6.010639507663251e-06, - "loss": 0.6229, + "epoch": 0.5836677226328757, + "grad_norm": 1.9618583917617798, + "learning_rate": 5.456583929785523e-06, + "loss": 0.7078, "step": 8291 }, { - "epoch": 0.6274450455904053, - "grad_norm": 2.240879535675049, - "learning_rate": 6.008515251152815e-06, - "loss": 0.6448, + "epoch": 0.5837381203801478, + "grad_norm": 1.5579930543899536, + "learning_rate": 5.455027144584802e-06, + "loss": 0.635, "step": 8292 }, { - "epoch": 0.6275207143127388, - "grad_norm": 1.7623291015625, - "learning_rate": 6.006391181978825e-06, - "loss": 0.6318, + "epoch": 0.58380851812742, + "grad_norm": 1.6427663564682007, + "learning_rate": 5.4534704397086206e-06, + "loss": 0.6648, "step": 8293 }, { - "epoch": 0.6275963830350725, - "grad_norm": 2.637432098388672, - "learning_rate": 6.004267300274305e-06, - "loss": 0.632, + "epoch": 0.583878915874692, + "grad_norm": 1.920552372932434, + "learning_rate": 5.451913815237909e-06, + "loss": 0.7257, "step": 8294 }, { - "epoch": 0.627672051757406, - "grad_norm": 2.096395492553711, - "learning_rate": 6.002143606172254e-06, - "loss": 0.798, + "epoch": 0.5839493136219641, + "grad_norm": 2.1827950477600098, + "learning_rate": 5.4503572712536026e-06, + "loss": 0.7802, "step": 8295 }, { - "epoch": 0.6277477204797397, - "grad_norm": 2.2483010292053223, - "learning_rate": 6.000020099805665e-06, - "loss": 0.6926, + "epoch": 0.5840197113692361, + "grad_norm": 2.3113808631896973, + "learning_rate": 5.448800807836624e-06, + "loss": 0.75, "step": 8296 }, { - "epoch": 0.6278233892020734, - "grad_norm": 2.0288619995117188, - "learning_rate": 5.997896781307524e-06, - "loss": 0.7208, + "epoch": 0.5840901091165083, + "grad_norm": 1.7168505191802979, + "learning_rate": 5.447244425067897e-06, + "loss": 0.5355, "step": 8297 }, { - "epoch": 0.6278990579244069, - "grad_norm": 1.9230502843856812, - "learning_rate": 5.995773650810794e-06, - "loss": 0.6955, + "epoch": 0.5841605068637804, + "grad_norm": 1.7230767011642456, + "learning_rate": 5.445688123028338e-06, + "loss": 0.7232, "step": 8298 }, { - "epoch": 0.6279747266467406, - "grad_norm": 1.8316569328308105, - "learning_rate": 5.993650708448437e-06, - "loss": 0.5128, + "epoch": 0.5842309046110524, + "grad_norm": 1.8321757316589355, + "learning_rate": 5.444131901798863e-06, + "loss": 0.6173, "step": 8299 }, { - "epoch": 0.6280503953690741, - "grad_norm": 2.2068562507629395, - "learning_rate": 5.991527954353395e-06, - "loss": 0.765, + "epoch": 0.5843013023583246, + "grad_norm": 2.546560287475586, + "learning_rate": 5.442575761460375e-06, + "loss": 0.6552, "step": 8300 }, { - "epoch": 0.6281260640914078, - "grad_norm": 2.418468952178955, - "learning_rate": 5.9894053886586006e-06, - "loss": 0.7241, + "epoch": 0.5843717001055966, + "grad_norm": 1.8300697803497314, + "learning_rate": 5.4410197020937875e-06, + "loss": 0.6915, "step": 8301 }, { - "epoch": 0.6282017328137415, - "grad_norm": 1.7980351448059082, - "learning_rate": 5.987283011496981e-06, - "loss": 0.6045, + "epoch": 0.5844420978528687, + "grad_norm": 1.7439712285995483, + "learning_rate": 5.439463723779996e-06, + "loss": 0.6637, "step": 8302 }, { - "epoch": 0.628277401536075, - "grad_norm": 2.48748779296875, - "learning_rate": 5.985160823001445e-06, - "loss": 0.7215, + "epoch": 0.5845124956001408, + "grad_norm": 1.6424226760864258, + "learning_rate": 5.437907826599901e-06, + "loss": 0.7049, "step": 8303 }, { - "epoch": 0.6283530702584087, - "grad_norm": 1.8230119943618774, - "learning_rate": 5.983038823304886e-06, - "loss": 0.705, + "epoch": 0.5845828933474129, + "grad_norm": 1.688897728919983, + "learning_rate": 5.436352010634391e-06, + "loss": 0.7345, "step": 8304 }, { - "epoch": 0.6284287389807424, - "grad_norm": 2.0660839080810547, - "learning_rate": 5.980917012540198e-06, - "loss": 0.7044, + "epoch": 0.584653291094685, + "grad_norm": 1.583640694618225, + "learning_rate": 5.434796275964357e-06, + "loss": 0.6326, "step": 8305 }, { - "epoch": 0.6285044077030759, - "grad_norm": 1.6837860345840454, - "learning_rate": 5.978795390840247e-06, - "loss": 0.6187, + "epoch": 0.584723688841957, + "grad_norm": 1.703668475151062, + "learning_rate": 5.43324062267068e-06, + "loss": 0.8158, "step": 8306 }, { - "epoch": 0.6285800764254096, - "grad_norm": 1.9054850339889526, - "learning_rate": 5.976673958337902e-06, - "loss": 0.6454, + "epoch": 0.5847940865892292, + "grad_norm": 1.9501432180404663, + "learning_rate": 5.431685050834244e-06, + "loss": 0.6853, "step": 8307 }, { - "epoch": 0.6286557451477431, - "grad_norm": 2.1775870323181152, - "learning_rate": 5.974552715166014e-06, - "loss": 0.6592, + "epoch": 0.5848644843365012, + "grad_norm": 1.7676399946212769, + "learning_rate": 5.4301295605359215e-06, + "loss": 0.7202, "step": 8308 }, { - "epoch": 0.6287314138700768, - "grad_norm": 1.7539699077606201, - "learning_rate": 5.97243166145742e-06, - "loss": 0.6329, + "epoch": 0.5849348820837733, + "grad_norm": 1.8917163610458374, + "learning_rate": 5.4285741518565866e-06, + "loss": 0.7113, "step": 8309 }, { - "epoch": 0.6288070825924105, - "grad_norm": 7.603641033172607, - "learning_rate": 5.970310797344949e-06, - "loss": 0.6322, + "epoch": 0.5850052798310454, + "grad_norm": 1.583044409751892, + "learning_rate": 5.427018824877103e-06, + "loss": 0.5963, "step": 8310 }, { - "epoch": 0.628882751314744, - "grad_norm": 2.0297069549560547, - "learning_rate": 5.968190122961411e-06, - "loss": 0.7299, + "epoch": 0.5850756775783175, + "grad_norm": 1.8377320766448975, + "learning_rate": 5.425463579678337e-06, + "loss": 0.6595, "step": 8311 }, { - "epoch": 0.6289584200370777, - "grad_norm": 2.0094974040985107, - "learning_rate": 5.966069638439615e-06, - "loss": 0.5245, + "epoch": 0.5851460753255896, + "grad_norm": 3.047484874725342, + "learning_rate": 5.4239084163411416e-06, + "loss": 0.6324, "step": 8312 }, { - "epoch": 0.6290340887594112, - "grad_norm": 1.9231394529342651, - "learning_rate": 5.963949343912353e-06, - "loss": 0.6593, + "epoch": 0.5852164730728616, + "grad_norm": 1.597550868988037, + "learning_rate": 5.422353334946379e-06, + "loss": 0.6371, "step": 8313 }, { - "epoch": 0.6291097574817449, - "grad_norm": 25.850337982177734, - "learning_rate": 5.961829239512402e-06, - "loss": 0.5678, + "epoch": 0.5852868708201338, + "grad_norm": 1.7899389266967773, + "learning_rate": 5.420798335574893e-06, + "loss": 0.6428, "step": 8314 }, { - "epoch": 0.6291854262040786, - "grad_norm": 3.02929425239563, - "learning_rate": 5.959709325372531e-06, - "loss": 0.5795, + "epoch": 0.5853572685674059, + "grad_norm": 1.8575233221054077, + "learning_rate": 5.419243418307534e-06, + "loss": 0.6771, "step": 8315 }, { - "epoch": 0.6292610949264121, - "grad_norm": 1.7381452322006226, - "learning_rate": 5.957589601625495e-06, - "loss": 0.6775, + "epoch": 0.5854276663146779, + "grad_norm": 2.0329737663269043, + "learning_rate": 5.417688583225139e-06, + "loss": 0.7, "step": 8316 }, { - "epoch": 0.6293367636487458, - "grad_norm": 2.252568483352661, - "learning_rate": 5.955470068404037e-06, - "loss": 0.6707, + "epoch": 0.58549806406195, + "grad_norm": 2.4444756507873535, + "learning_rate": 5.416133830408551e-06, + "loss": 0.6492, "step": 8317 }, { - "epoch": 0.6294124323710795, - "grad_norm": 1.7029986381530762, - "learning_rate": 5.953350725840891e-06, - "loss": 0.6799, + "epoch": 0.5855684618092221, + "grad_norm": 3.518667221069336, + "learning_rate": 5.414579159938595e-06, + "loss": 0.7175, "step": 8318 }, { - "epoch": 0.629488101093413, - "grad_norm": 1.7706902027130127, - "learning_rate": 5.9512315740687785e-06, - "loss": 0.6089, + "epoch": 0.5856388595564942, + "grad_norm": 1.802787184715271, + "learning_rate": 5.4130245718961075e-06, + "loss": 0.6816, "step": 8319 }, { - "epoch": 0.6295637698157467, - "grad_norm": 2.4172465801239014, - "learning_rate": 5.949112613220405e-06, - "loss": 0.8089, + "epoch": 0.5857092573037663, + "grad_norm": 2.456324338912964, + "learning_rate": 5.411470066361908e-06, + "loss": 0.663, "step": 8320 }, { - "epoch": 0.6296394385380802, - "grad_norm": 2.203758955001831, - "learning_rate": 5.946993843428469e-06, - "loss": 0.6302, + "epoch": 0.5857796550510384, + "grad_norm": 1.606592059135437, + "learning_rate": 5.4099156434168204e-06, + "loss": 0.6908, "step": 8321 }, { - "epoch": 0.6297151072604139, - "grad_norm": 2.0722339153289795, - "learning_rate": 5.944875264825648e-06, - "loss": 0.613, + "epoch": 0.5858500527983105, + "grad_norm": 2.7831053733825684, + "learning_rate": 5.408361303141656e-06, + "loss": 0.6508, "step": 8322 }, { - "epoch": 0.6297907759827476, - "grad_norm": 2.695570707321167, - "learning_rate": 5.942756877544623e-06, - "loss": 0.7249, + "epoch": 0.5859204505455825, + "grad_norm": 1.7652250528335571, + "learning_rate": 5.406807045617229e-06, + "loss": 0.6995, "step": 8323 }, { - "epoch": 0.6298664447050811, - "grad_norm": 2.075514554977417, - "learning_rate": 5.940638681718052e-06, - "loss": 0.5446, + "epoch": 0.5859908482928546, + "grad_norm": 1.7334200143814087, + "learning_rate": 5.4052528709243485e-06, + "loss": 0.6014, "step": 8324 }, { - "epoch": 0.6299421134274148, - "grad_norm": 1.8209716081619263, - "learning_rate": 5.938520677478581e-06, - "loss": 0.719, + "epoch": 0.5860612460401268, + "grad_norm": 1.6266498565673828, + "learning_rate": 5.403698779143814e-06, + "loss": 0.6474, "step": 8325 }, { - "epoch": 0.6300177821497484, - "grad_norm": 2.407820224761963, - "learning_rate": 5.936402864958848e-06, - "loss": 0.7208, + "epoch": 0.5861316437873988, + "grad_norm": 2.0051681995391846, + "learning_rate": 5.402144770356428e-06, + "loss": 0.5822, "step": 8326 }, { - "epoch": 0.630093450872082, - "grad_norm": 2.0046629905700684, - "learning_rate": 5.934285244291473e-06, - "loss": 0.7994, + "epoch": 0.5862020415346709, + "grad_norm": 1.6933257579803467, + "learning_rate": 5.400590844642978e-06, + "loss": 0.7271, "step": 8327 }, { - "epoch": 0.6301691195944157, - "grad_norm": 2.1427841186523438, - "learning_rate": 5.932167815609073e-06, - "loss": 0.7415, + "epoch": 0.586272439281943, + "grad_norm": 1.6895219087600708, + "learning_rate": 5.399037002084265e-06, + "loss": 0.6039, "step": 8328 }, { - "epoch": 0.6302447883167492, - "grad_norm": 2.2254834175109863, - "learning_rate": 5.930050579044249e-06, - "loss": 0.6476, + "epoch": 0.5863428370292151, + "grad_norm": 1.942656397819519, + "learning_rate": 5.397483242761063e-06, + "loss": 0.6492, "step": 8329 }, { - "epoch": 0.6303204570390829, - "grad_norm": 2.195834159851074, - "learning_rate": 5.927933534729585e-06, - "loss": 0.5771, + "epoch": 0.5864132347764871, + "grad_norm": 1.53084135055542, + "learning_rate": 5.395929566754161e-06, + "loss": 0.737, "step": 8330 }, { - "epoch": 0.6303961257614166, - "grad_norm": 1.9512183666229248, - "learning_rate": 5.925816682797663e-06, - "loss": 0.7415, + "epoch": 0.5864836325237592, + "grad_norm": 2.0106735229492188, + "learning_rate": 5.3943759741443326e-06, + "loss": 0.5946, "step": 8331 }, { - "epoch": 0.6304717944837501, - "grad_norm": 2.1546523571014404, - "learning_rate": 5.9237000233810356e-06, - "loss": 0.6585, + "epoch": 0.5865540302710314, + "grad_norm": 1.9458675384521484, + "learning_rate": 5.392822465012353e-06, + "loss": 0.7821, "step": 8332 }, { - "epoch": 0.6305474632060838, - "grad_norm": 1.8281909227371216, - "learning_rate": 5.9215835566122696e-06, - "loss": 0.7334, + "epoch": 0.5866244280183034, + "grad_norm": 1.7307686805725098, + "learning_rate": 5.391269039438985e-06, + "loss": 0.6662, "step": 8333 }, { - "epoch": 0.6306231319284173, - "grad_norm": 2.1117517948150635, - "learning_rate": 5.919467282623896e-06, - "loss": 0.5892, + "epoch": 0.5866948257655755, + "grad_norm": 1.881355881690979, + "learning_rate": 5.389715697505002e-06, + "loss": 0.7322, "step": 8334 }, { - "epoch": 0.630698800650751, - "grad_norm": 2.3042280673980713, - "learning_rate": 5.917351201548447e-06, - "loss": 0.6998, + "epoch": 0.5867652235128475, + "grad_norm": 1.8665058612823486, + "learning_rate": 5.388162439291153e-06, + "loss": 0.7478, "step": 8335 }, { - "epoch": 0.6307744693730847, - "grad_norm": 2.325014114379883, - "learning_rate": 5.9152353135184335e-06, - "loss": 0.6966, + "epoch": 0.5868356212601197, + "grad_norm": 1.7925149202346802, + "learning_rate": 5.386609264878202e-06, + "loss": 0.7535, "step": 8336 }, { - "epoch": 0.6308501380954182, - "grad_norm": 1.9471384286880493, - "learning_rate": 5.913119618666361e-06, - "loss": 0.6993, + "epoch": 0.5869060190073918, + "grad_norm": 1.7513177394866943, + "learning_rate": 5.3850561743468955e-06, + "loss": 0.7366, "step": 8337 }, { - "epoch": 0.6309258068177519, - "grad_norm": 2.687548875808716, - "learning_rate": 5.911004117124724e-06, - "loss": 0.7726, + "epoch": 0.5869764167546638, + "grad_norm": 2.0716114044189453, + "learning_rate": 5.383503167777982e-06, + "loss": 0.7417, "step": 8338 }, { - "epoch": 0.6310014755400855, - "grad_norm": 1.9551631212234497, - "learning_rate": 5.908888809026001e-06, - "loss": 0.7378, + "epoch": 0.587046814501936, + "grad_norm": 2.0462141036987305, + "learning_rate": 5.3819502452522e-06, + "loss": 0.6394, "step": 8339 }, { - "epoch": 0.6310771442624191, - "grad_norm": 2.2991223335266113, - "learning_rate": 5.9067736945026594e-06, - "loss": 0.7404, + "epoch": 0.587117212249208, + "grad_norm": 1.6991044282913208, + "learning_rate": 5.380397406850294e-06, + "loss": 0.7161, "step": 8340 }, { - "epoch": 0.6311528129847528, - "grad_norm": 2.1046478748321533, - "learning_rate": 5.904658773687153e-06, - "loss": 0.7065, + "epoch": 0.5871876099964801, + "grad_norm": 1.7154700756072998, + "learning_rate": 5.37884465265299e-06, + "loss": 0.7809, "step": 8341 }, { - "epoch": 0.6312284817070863, - "grad_norm": 2.240189790725708, - "learning_rate": 5.902544046711922e-06, - "loss": 0.6193, + "epoch": 0.5872580077437523, + "grad_norm": 1.9495103359222412, + "learning_rate": 5.3772919827410235e-06, + "loss": 0.6476, "step": 8342 }, { - "epoch": 0.63130415042942, - "grad_norm": 1.9708818197250366, - "learning_rate": 5.9004295137094054e-06, - "loss": 0.6508, + "epoch": 0.5873284054910243, + "grad_norm": 1.598648190498352, + "learning_rate": 5.375739397195115e-06, + "loss": 0.6669, "step": 8343 }, { - "epoch": 0.6313798191517537, - "grad_norm": 2.299236297607422, - "learning_rate": 5.898315174812016e-06, - "loss": 0.842, + "epoch": 0.5873988032382964, + "grad_norm": 2.0440189838409424, + "learning_rate": 5.3741868960959875e-06, + "loss": 0.5774, "step": 8344 }, { - "epoch": 0.6314554878740872, - "grad_norm": 2.2343852519989014, - "learning_rate": 5.896201030152164e-06, - "loss": 0.7665, + "epoch": 0.5874692009855684, + "grad_norm": 1.9413020610809326, + "learning_rate": 5.372634479524353e-06, + "loss": 0.7354, "step": 8345 }, { - "epoch": 0.6315311565964209, - "grad_norm": 2.4583773612976074, - "learning_rate": 5.894087079862241e-06, - "loss": 0.6585, + "epoch": 0.5875395987328406, + "grad_norm": 1.7521178722381592, + "learning_rate": 5.371082147560929e-06, + "loss": 0.7862, "step": 8346 }, { - "epoch": 0.6316068253187545, - "grad_norm": 2.2705533504486084, - "learning_rate": 5.89197332407463e-06, - "loss": 0.5592, + "epoch": 0.5876099964801126, + "grad_norm": 2.103842258453369, + "learning_rate": 5.369529900286416e-06, + "loss": 0.6207, "step": 8347 }, { - "epoch": 0.6316824940410881, - "grad_norm": 1.823162317276001, - "learning_rate": 5.889859762921702e-06, - "loss": 0.5589, + "epoch": 0.5876803942273847, + "grad_norm": 1.886500358581543, + "learning_rate": 5.3679777377815236e-06, + "loss": 0.5796, "step": 8348 }, { - "epoch": 0.6317581627634218, - "grad_norm": 2.318509340286255, - "learning_rate": 5.8877463965358175e-06, - "loss": 0.6892, + "epoch": 0.5877507919746568, + "grad_norm": 1.7851455211639404, + "learning_rate": 5.366425660126944e-06, + "loss": 0.6277, "step": 8349 }, { - "epoch": 0.6318338314857553, - "grad_norm": 2.149641275405884, - "learning_rate": 5.885633225049318e-06, - "loss": 0.789, + "epoch": 0.5878211897219289, + "grad_norm": 2.5376858711242676, + "learning_rate": 5.364873667403376e-06, + "loss": 0.6055, "step": 8350 }, { - "epoch": 0.631909500208089, - "grad_norm": 2.2734084129333496, - "learning_rate": 5.883520248594542e-06, - "loss": 0.7964, + "epoch": 0.587891587469201, + "grad_norm": 1.8392527103424072, + "learning_rate": 5.363321759691504e-06, + "loss": 0.6695, "step": 8351 }, { - "epoch": 0.6319851689304226, - "grad_norm": 1.703250527381897, - "learning_rate": 5.881407467303804e-06, - "loss": 0.6932, + "epoch": 0.587961985216473, + "grad_norm": 2.248911142349243, + "learning_rate": 5.361769937072019e-06, + "loss": 0.6314, "step": 8352 }, { - "epoch": 0.6320608376527562, - "grad_norm": 1.407254934310913, - "learning_rate": 5.879294881309418e-06, - "loss": 0.9285, + "epoch": 0.5880323829637452, + "grad_norm": 1.6114343404769897, + "learning_rate": 5.360218199625596e-06, + "loss": 0.7296, "step": 8353 }, { - "epoch": 0.6321365063750899, - "grad_norm": 1.6375536918640137, - "learning_rate": 5.877182490743683e-06, - "loss": 0.5346, + "epoch": 0.5881027807110173, + "grad_norm": 1.7467126846313477, + "learning_rate": 5.358666547432917e-06, + "loss": 0.7031, "step": 8354 }, { - "epoch": 0.6322121750974234, - "grad_norm": 2.1857407093048096, - "learning_rate": 5.875070295738878e-06, - "loss": 0.675, + "epoch": 0.5881731784582893, + "grad_norm": 1.914749264717102, + "learning_rate": 5.357114980574648e-06, + "loss": 0.717, "step": 8355 }, { - "epoch": 0.6322878438197571, - "grad_norm": 2.0227956771850586, - "learning_rate": 5.872958296427281e-06, - "loss": 0.6196, + "epoch": 0.5882435762055614, + "grad_norm": 1.6608506441116333, + "learning_rate": 5.355563499131462e-06, + "loss": 0.7272, "step": 8356 }, { - "epoch": 0.6323635125420908, - "grad_norm": 1.9548943042755127, - "learning_rate": 5.870846492941147e-06, - "loss": 0.8473, + "epoch": 0.5883139739528335, + "grad_norm": 1.901599407196045, + "learning_rate": 5.354012103184014e-06, + "loss": 0.6925, "step": 8357 }, { - "epoch": 0.6324391812644243, - "grad_norm": 1.8974775075912476, - "learning_rate": 5.868734885412725e-06, - "loss": 0.7865, + "epoch": 0.5883843717001056, + "grad_norm": 1.4379565715789795, + "learning_rate": 5.352460792812971e-06, + "loss": 0.7301, "step": 8358 }, { - "epoch": 0.632514849986758, - "grad_norm": 1.963448405265808, - "learning_rate": 5.866623473974256e-06, - "loss": 0.6768, + "epoch": 0.5884547694473777, + "grad_norm": 1.6602476835250854, + "learning_rate": 5.3509095680989824e-06, + "loss": 0.6346, "step": 8359 }, { - "epoch": 0.6325905187090916, - "grad_norm": 2.001469135284424, - "learning_rate": 5.864512258757957e-06, - "loss": 0.7864, + "epoch": 0.5885251671946498, + "grad_norm": 1.9433786869049072, + "learning_rate": 5.349358429122701e-06, + "loss": 0.717, "step": 8360 }, { - "epoch": 0.6326661874314252, - "grad_norm": 1.9863218069076538, - "learning_rate": 5.862401239896045e-06, - "loss": 0.6782, + "epoch": 0.5885955649419219, + "grad_norm": 1.727460503578186, + "learning_rate": 5.347807375964767e-06, + "loss": 0.5063, "step": 8361 }, { - "epoch": 0.6327418561537589, - "grad_norm": 2.1429598331451416, - "learning_rate": 5.8602904175207126e-06, - "loss": 0.8312, + "epoch": 0.5886659626891939, + "grad_norm": 1.9968005418777466, + "learning_rate": 5.346256408705826e-06, + "loss": 0.6714, "step": 8362 }, { - "epoch": 0.6328175248760924, - "grad_norm": 1.959979772567749, - "learning_rate": 5.858179791764148e-06, - "loss": 0.7098, + "epoch": 0.588736360436466, + "grad_norm": 1.8546010255813599, + "learning_rate": 5.344705527426509e-06, + "loss": 0.5155, "step": 8363 }, { - "epoch": 0.6328931935984261, - "grad_norm": 2.1421241760253906, - "learning_rate": 5.856069362758528e-06, - "loss": 0.7651, + "epoch": 0.5888067581837381, + "grad_norm": 1.7824926376342773, + "learning_rate": 5.343154732207454e-06, + "loss": 0.6354, "step": 8364 }, { - "epoch": 0.6329688623207597, - "grad_norm": 2.1992077827453613, - "learning_rate": 5.853959130636017e-06, - "loss": 0.6911, + "epoch": 0.5888771559310102, + "grad_norm": 1.6552598476409912, + "learning_rate": 5.341604023129283e-06, + "loss": 0.7075, "step": 8365 }, { - "epoch": 0.6330445310430933, - "grad_norm": 1.951259970664978, - "learning_rate": 5.8518490955287564e-06, - "loss": 0.7088, + "epoch": 0.5889475536782823, + "grad_norm": 1.9904884099960327, + "learning_rate": 5.340053400272623e-06, + "loss": 0.6023, "step": 8366 }, { - "epoch": 0.633120199765427, - "grad_norm": 2.2392029762268066, - "learning_rate": 5.849739257568891e-06, - "loss": 0.6065, + "epoch": 0.5890179514255544, + "grad_norm": 1.6443607807159424, + "learning_rate": 5.338502863718088e-06, + "loss": 0.6593, "step": 8367 }, { - "epoch": 0.6331958684877605, - "grad_norm": 1.755878210067749, - "learning_rate": 5.847629616888538e-06, - "loss": 0.5707, + "epoch": 0.5890883491728265, + "grad_norm": 2.186485528945923, + "learning_rate": 5.336952413546295e-06, + "loss": 0.7277, "step": 8368 }, { - "epoch": 0.6332715372100942, - "grad_norm": 1.9123753309249878, - "learning_rate": 5.845520173619817e-06, - "loss": 0.6594, + "epoch": 0.5891587469200985, + "grad_norm": 1.7671834230422974, + "learning_rate": 5.335402049837849e-06, + "loss": 0.6964, "step": 8369 }, { - "epoch": 0.6333472059324279, - "grad_norm": 2.4851553440093994, - "learning_rate": 5.843410927894827e-06, - "loss": 0.7454, + "epoch": 0.5892291446673706, + "grad_norm": 1.758019208908081, + "learning_rate": 5.333851772673361e-06, + "loss": 0.6913, "step": 8370 }, { - "epoch": 0.6334228746547614, - "grad_norm": 2.0496127605438232, - "learning_rate": 5.841301879845653e-06, - "loss": 0.6757, + "epoch": 0.5892995424146428, + "grad_norm": 1.7682162523269653, + "learning_rate": 5.332301582133429e-06, + "loss": 0.643, "step": 8371 }, { - "epoch": 0.6334985433770951, - "grad_norm": 2.043626070022583, - "learning_rate": 5.839193029604373e-06, - "loss": 0.7011, + "epoch": 0.5893699401619148, + "grad_norm": 1.755088210105896, + "learning_rate": 5.330751478298644e-06, + "loss": 0.5409, "step": 8372 }, { - "epoch": 0.6335742120994287, - "grad_norm": 2.5057694911956787, - "learning_rate": 5.837084377303045e-06, - "loss": 0.6438, + "epoch": 0.5894403379091869, + "grad_norm": 1.946739673614502, + "learning_rate": 5.329201461249607e-06, + "loss": 0.6457, "step": 8373 }, { - "epoch": 0.6336498808217623, - "grad_norm": 1.9614980220794678, - "learning_rate": 5.834975923073727e-06, - "loss": 0.4584, + "epoch": 0.589510735656459, + "grad_norm": 1.7998157739639282, + "learning_rate": 5.327651531066894e-06, + "loss": 0.7217, "step": 8374 }, { - "epoch": 0.633725549544096, - "grad_norm": 1.787739872932434, - "learning_rate": 5.832867667048453e-06, - "loss": 0.6868, + "epoch": 0.5895811334037311, + "grad_norm": 1.7877662181854248, + "learning_rate": 5.3261016878310965e-06, + "loss": 0.7874, "step": 8375 }, { - "epoch": 0.6338012182664295, - "grad_norm": 2.6709229946136475, - "learning_rate": 5.830759609359248e-06, - "loss": 0.7381, + "epoch": 0.5896515311510032, + "grad_norm": 2.0926873683929443, + "learning_rate": 5.324551931622786e-06, + "loss": 0.6781, "step": 8376 }, { - "epoch": 0.6338768869887632, - "grad_norm": 2.4086039066314697, - "learning_rate": 5.828651750138128e-06, - "loss": 0.6239, + "epoch": 0.5897219288982752, + "grad_norm": 1.5359621047973633, + "learning_rate": 5.323002262522539e-06, + "loss": 0.6583, "step": 8377 }, { - "epoch": 0.6339525557110968, - "grad_norm": 2.210710287094116, - "learning_rate": 5.82654408951709e-06, - "loss": 0.7521, + "epoch": 0.5897923266455474, + "grad_norm": 2.2465991973876953, + "learning_rate": 5.321452680610922e-06, + "loss": 0.7219, "step": 8378 }, { - "epoch": 0.6340282244334304, - "grad_norm": 2.0984058380126953, - "learning_rate": 5.82443662762813e-06, - "loss": 0.6862, + "epoch": 0.5898627243928194, + "grad_norm": 1.6470400094985962, + "learning_rate": 5.319903185968503e-06, + "loss": 0.5737, "step": 8379 }, { - "epoch": 0.6341038931557641, - "grad_norm": 2.363299608230591, - "learning_rate": 5.8223293646032166e-06, - "loss": 0.6896, + "epoch": 0.5899331221400915, + "grad_norm": 1.7321155071258545, + "learning_rate": 5.318353778675835e-06, + "loss": 0.6591, "step": 8380 }, { - "epoch": 0.6341795618780977, - "grad_norm": 1.9636515378952026, - "learning_rate": 5.820222300574318e-06, - "loss": 0.7168, + "epoch": 0.5900035198873637, + "grad_norm": 2.0315535068511963, + "learning_rate": 5.316804458813481e-06, + "loss": 0.6546, "step": 8381 }, { - "epoch": 0.6342552306004313, - "grad_norm": 2.6832809448242188, - "learning_rate": 5.8181154356733815e-06, - "loss": 0.7862, + "epoch": 0.5900739176346357, + "grad_norm": 2.011564016342163, + "learning_rate": 5.315255226461984e-06, + "loss": 0.761, "step": 8382 }, { - "epoch": 0.634330899322765, - "grad_norm": 1.9470926523208618, - "learning_rate": 5.816008770032347e-06, - "loss": 0.7394, + "epoch": 0.5901443153819078, + "grad_norm": 1.582651138305664, + "learning_rate": 5.313706081701897e-06, + "loss": 0.6099, "step": 8383 }, { - "epoch": 0.6344065680450985, - "grad_norm": 1.952431321144104, - "learning_rate": 5.8139023037831446e-06, - "loss": 0.7354, + "epoch": 0.5902147131291798, + "grad_norm": 2.2776167392730713, + "learning_rate": 5.312157024613755e-06, + "loss": 0.6998, "step": 8384 }, { - "epoch": 0.6344822367674322, - "grad_norm": 2.5503687858581543, - "learning_rate": 5.8117960370576845e-06, - "loss": 0.7003, + "epoch": 0.590285110876452, + "grad_norm": 1.8175209760665894, + "learning_rate": 5.3106080552781e-06, + "loss": 0.6237, "step": 8385 }, { - "epoch": 0.6345579054897658, - "grad_norm": 2.3349881172180176, - "learning_rate": 5.809689969987869e-06, - "loss": 0.6624, + "epoch": 0.590355508623724, + "grad_norm": 1.7746756076812744, + "learning_rate": 5.30905917377546e-06, + "loss": 0.6373, "step": 8386 }, { - "epoch": 0.6346335742120994, - "grad_norm": 2.486293315887451, - "learning_rate": 5.807584102705585e-06, - "loss": 0.7769, + "epoch": 0.5904259063709961, + "grad_norm": 1.644284963607788, + "learning_rate": 5.307510380186367e-06, + "loss": 0.6058, "step": 8387 }, { - "epoch": 0.6347092429344331, - "grad_norm": 2.2371928691864014, - "learning_rate": 5.805478435342707e-06, - "loss": 0.7067, + "epoch": 0.5904963041182683, + "grad_norm": 1.7496849298477173, + "learning_rate": 5.305961674591342e-06, + "loss": 0.6745, "step": 8388 }, { - "epoch": 0.6347849116567666, - "grad_norm": 2.085529088973999, - "learning_rate": 5.803372968031108e-06, - "loss": 0.7602, + "epoch": 0.5905667018655403, + "grad_norm": 2.0633082389831543, + "learning_rate": 5.304413057070904e-06, + "loss": 0.6459, "step": 8389 }, { - "epoch": 0.6348605803791003, - "grad_norm": 2.1521055698394775, - "learning_rate": 5.80126770090263e-06, - "loss": 0.9044, + "epoch": 0.5906370996128124, + "grad_norm": 2.186246156692505, + "learning_rate": 5.3028645277055635e-06, + "loss": 0.681, "step": 8390 }, { - "epoch": 0.6349362491014339, - "grad_norm": 2.458247661590576, - "learning_rate": 5.799162634089113e-06, - "loss": 0.6094, + "epoch": 0.5907074973600844, + "grad_norm": 1.8135716915130615, + "learning_rate": 5.301316086575838e-06, + "loss": 0.6653, "step": 8391 }, { - "epoch": 0.6350119178237675, - "grad_norm": 1.8370447158813477, - "learning_rate": 5.7970577677223876e-06, - "loss": 0.6742, + "epoch": 0.5907778951073566, + "grad_norm": 1.8409937620162964, + "learning_rate": 5.299767733762223e-06, + "loss": 0.6883, "step": 8392 }, { - "epoch": 0.6350875865461012, - "grad_norm": 2.2332441806793213, - "learning_rate": 5.794953101934262e-06, - "loss": 0.6287, + "epoch": 0.5908482928546287, + "grad_norm": 1.7654578685760498, + "learning_rate": 5.2982194693452275e-06, + "loss": 0.637, "step": 8393 }, { - "epoch": 0.6351632552684348, - "grad_norm": 2.1860263347625732, - "learning_rate": 5.792848636856537e-06, - "loss": 0.674, + "epoch": 0.5909186906019007, + "grad_norm": 1.6612542867660522, + "learning_rate": 5.29667129340534e-06, + "loss": 0.6721, "step": 8394 }, { - "epoch": 0.6352389239907684, - "grad_norm": 2.5469307899475098, - "learning_rate": 5.790744372621009e-06, - "loss": 0.7648, + "epoch": 0.5909890883491729, + "grad_norm": 1.691361427307129, + "learning_rate": 5.295123206023057e-06, + "loss": 0.6998, "step": 8395 }, { - "epoch": 0.6353145927131021, - "grad_norm": 2.202962636947632, - "learning_rate": 5.788640309359445e-06, - "loss": 0.8781, + "epoch": 0.5910594860964449, + "grad_norm": 3.454789876937866, + "learning_rate": 5.293575207278859e-06, + "loss": 0.5624, "step": 8396 }, { - "epoch": 0.6353902614354356, - "grad_norm": 1.9034675359725952, - "learning_rate": 5.786536447203615e-06, - "loss": 0.4877, + "epoch": 0.591129883843717, + "grad_norm": 2.2487659454345703, + "learning_rate": 5.2920272972532355e-06, + "loss": 0.7168, "step": 8397 }, { - "epoch": 0.6354659301577693, - "grad_norm": 1.8396954536437988, - "learning_rate": 5.784432786285264e-06, - "loss": 0.6477, + "epoch": 0.5912002815909891, + "grad_norm": 1.8641632795333862, + "learning_rate": 5.290479476026657e-06, + "loss": 0.7155, "step": 8398 }, { - "epoch": 0.6355415988801029, - "grad_norm": 2.2514522075653076, - "learning_rate": 5.78232932673613e-06, - "loss": 0.6755, + "epoch": 0.5912706793382612, + "grad_norm": 1.8380218744277954, + "learning_rate": 5.288931743679601e-06, + "loss": 0.6472, "step": 8399 }, { - "epoch": 0.6356172676024365, - "grad_norm": 2.3135526180267334, - "learning_rate": 5.780226068687944e-06, - "loss": 0.6906, + "epoch": 0.5913410770855333, + "grad_norm": 1.7818262577056885, + "learning_rate": 5.287384100292531e-06, + "loss": 0.8391, "step": 8400 }, { - "epoch": 0.6356929363247702, - "grad_norm": 1.9569233655929565, - "learning_rate": 5.778123012272415e-06, - "loss": 0.7055, + "epoch": 0.5914114748328053, + "grad_norm": 1.9005120992660522, + "learning_rate": 5.285836545945915e-06, + "loss": 0.6727, "step": 8401 }, { - "epoch": 0.6357686050471038, - "grad_norm": 2.095384359359741, - "learning_rate": 5.776020157621244e-06, - "loss": 0.8051, + "epoch": 0.5914818725800775, + "grad_norm": 2.0661709308624268, + "learning_rate": 5.284289080720206e-06, + "loss": 0.6657, "step": 8402 }, { - "epoch": 0.6358442737694374, - "grad_norm": 2.363507032394409, - "learning_rate": 5.773917504866118e-06, - "loss": 0.6488, + "epoch": 0.5915522703273495, + "grad_norm": 1.7281635999679565, + "learning_rate": 5.282741704695865e-06, + "loss": 0.6936, "step": 8403 }, { - "epoch": 0.635919942491771, - "grad_norm": 1.7671669721603394, - "learning_rate": 5.77181505413871e-06, - "loss": 0.6069, + "epoch": 0.5916226680746216, + "grad_norm": 1.7442364692687988, + "learning_rate": 5.281194417953335e-06, + "loss": 0.6952, "step": 8404 }, { - "epoch": 0.6359956112141046, - "grad_norm": 2.2608208656311035, - "learning_rate": 5.7697128055706865e-06, - "loss": 0.7638, + "epoch": 0.5916930658218937, + "grad_norm": 2.2027995586395264, + "learning_rate": 5.279647220573067e-06, + "loss": 0.7173, "step": 8405 }, { - "epoch": 0.6360712799364383, - "grad_norm": 2.832077741622925, - "learning_rate": 5.767610759293697e-06, - "loss": 0.5604, + "epoch": 0.5917634635691658, + "grad_norm": 1.418886423110962, + "learning_rate": 5.278100112635496e-06, + "loss": 0.6662, "step": 8406 }, { - "epoch": 0.6361469486587719, - "grad_norm": 1.9445099830627441, - "learning_rate": 5.765508915439374e-06, - "loss": 0.5337, + "epoch": 0.5918338613164379, + "grad_norm": 2.9493117332458496, + "learning_rate": 5.27655309422106e-06, + "loss": 0.758, "step": 8407 }, { - "epoch": 0.6362226173811055, - "grad_norm": 2.1563804149627686, - "learning_rate": 5.763407274139347e-06, - "loss": 0.7573, + "epoch": 0.5919042590637099, + "grad_norm": 1.6099900007247925, + "learning_rate": 5.275006165410187e-06, + "loss": 0.7704, "step": 8408 }, { - "epoch": 0.6362982861034392, - "grad_norm": 2.05100417137146, - "learning_rate": 5.761305835525221e-06, - "loss": 0.5423, + "epoch": 0.591974656810982, + "grad_norm": 2.3151681423187256, + "learning_rate": 5.27345932628331e-06, + "loss": 0.6545, "step": 8409 }, { - "epoch": 0.6363739548257727, - "grad_norm": 2.729825258255005, - "learning_rate": 5.7592045997286e-06, - "loss": 0.6868, + "epoch": 0.5920450545582542, + "grad_norm": 2.0567710399627686, + "learning_rate": 5.2719125769208436e-06, + "loss": 0.69, "step": 8410 }, { - "epoch": 0.6364496235481064, - "grad_norm": 2.297889232635498, - "learning_rate": 5.757103566881071e-06, - "loss": 0.8106, + "epoch": 0.5921154523055262, + "grad_norm": 2.069877862930298, + "learning_rate": 5.270365917403209e-06, + "loss": 0.7384, "step": 8411 }, { - "epoch": 0.63652529227044, - "grad_norm": 2.0106875896453857, - "learning_rate": 5.755002737114204e-06, - "loss": 0.6299, + "epoch": 0.5921858500527983, + "grad_norm": 1.6979007720947266, + "learning_rate": 5.268819347810816e-06, + "loss": 0.6329, "step": 8412 }, { - "epoch": 0.6366009609927736, - "grad_norm": 2.3025240898132324, - "learning_rate": 5.752902110559564e-06, - "loss": 0.6633, + "epoch": 0.5922562478000704, + "grad_norm": 1.5977733135223389, + "learning_rate": 5.267272868224074e-06, + "loss": 0.5486, "step": 8413 }, { - "epoch": 0.6366766297151073, - "grad_norm": 2.3724968433380127, - "learning_rate": 5.75080168734869e-06, - "loss": 0.6491, + "epoch": 0.5923266455473425, + "grad_norm": 1.4649254083633423, + "learning_rate": 5.265726478723381e-06, + "loss": 0.6736, "step": 8414 }, { - "epoch": 0.6367522984374409, - "grad_norm": 2.080514430999756, - "learning_rate": 5.748701467613128e-06, - "loss": 0.5447, + "epoch": 0.5923970432946146, + "grad_norm": 1.739910364151001, + "learning_rate": 5.264180179389144e-06, + "loss": 0.6113, "step": 8415 }, { - "epoch": 0.6368279671597745, - "grad_norm": 1.9041091203689575, - "learning_rate": 5.746601451484396e-06, - "loss": 0.7418, + "epoch": 0.5924674410418866, + "grad_norm": 1.9360419511795044, + "learning_rate": 5.262633970301748e-06, + "loss": 0.6817, "step": 8416 }, { - "epoch": 0.6369036358821081, - "grad_norm": 2.2937114238739014, - "learning_rate": 5.744501639094003e-06, - "loss": 0.9319, + "epoch": 0.5925378387891588, + "grad_norm": 1.6822668313980103, + "learning_rate": 5.261087851541588e-06, + "loss": 0.6684, "step": 8417 }, { - "epoch": 0.6369793046044417, - "grad_norm": 1.9907230138778687, - "learning_rate": 5.742402030573449e-06, - "loss": 0.7111, + "epoch": 0.5926082365364308, + "grad_norm": 1.6639292240142822, + "learning_rate": 5.259541823189044e-06, + "loss": 0.6621, "step": 8418 }, { - "epoch": 0.6370549733267754, - "grad_norm": 1.8847614526748657, - "learning_rate": 5.74030262605421e-06, - "loss": 0.6703, + "epoch": 0.5926786342837029, + "grad_norm": 1.8495620489120483, + "learning_rate": 5.257995885324497e-06, + "loss": 0.8016, "step": 8419 }, { - "epoch": 0.637130642049109, - "grad_norm": 1.972623586654663, - "learning_rate": 5.73820342566777e-06, - "loss": 0.763, + "epoch": 0.592749032030975, + "grad_norm": 2.0214755535125732, + "learning_rate": 5.25645003802832e-06, + "loss": 0.6894, "step": 8420 }, { - "epoch": 0.6372063107714426, - "grad_norm": 2.680828332901001, - "learning_rate": 5.736104429545579e-06, - "loss": 0.7193, + "epoch": 0.5928194297782471, + "grad_norm": 2.000746488571167, + "learning_rate": 5.254904281380888e-06, + "loss": 0.7174, "step": 8421 }, { - "epoch": 0.6372819794937763, - "grad_norm": 2.13775372505188, - "learning_rate": 5.7340056378190865e-06, - "loss": 0.6998, + "epoch": 0.5928898275255192, + "grad_norm": 1.8237947225570679, + "learning_rate": 5.25335861546256e-06, + "loss": 0.6055, "step": 8422 }, { - "epoch": 0.6373576482161099, - "grad_norm": 2.1243772506713867, - "learning_rate": 5.731907050619723e-06, - "loss": 0.6792, + "epoch": 0.5929602252727912, + "grad_norm": 1.8719887733459473, + "learning_rate": 5.251813040353698e-06, + "loss": 0.6519, "step": 8423 }, { - "epoch": 0.6374333169384435, - "grad_norm": 1.8973451852798462, - "learning_rate": 5.72980866807891e-06, - "loss": 0.7644, + "epoch": 0.5930306230200634, + "grad_norm": 1.995680332183838, + "learning_rate": 5.250267556134665e-06, + "loss": 0.7133, "step": 8424 }, { - "epoch": 0.6375089856607771, - "grad_norm": 1.803795576095581, - "learning_rate": 5.7277104903280575e-06, - "loss": 0.6412, + "epoch": 0.5931010207673354, + "grad_norm": 1.987311840057373, + "learning_rate": 5.248722162885801e-06, + "loss": 0.7367, "step": 8425 }, { - "epoch": 0.6375846543831107, - "grad_norm": 1.9631472826004028, - "learning_rate": 5.725612517498555e-06, - "loss": 0.6546, + "epoch": 0.5931714185146075, + "grad_norm": 2.1546270847320557, + "learning_rate": 5.24717686068746e-06, + "loss": 0.6577, "step": 8426 }, { - "epoch": 0.6376603231054444, - "grad_norm": 2.7881197929382324, - "learning_rate": 5.723514749721792e-06, - "loss": 0.7727, + "epoch": 0.5932418162618797, + "grad_norm": 1.9644157886505127, + "learning_rate": 5.2456316496199825e-06, + "loss": 0.5863, "step": 8427 }, { - "epoch": 0.637735991827778, - "grad_norm": 2.023376941680908, - "learning_rate": 5.721417187129128e-06, - "loss": 0.7832, + "epoch": 0.5933122140091517, + "grad_norm": 1.6279038190841675, + "learning_rate": 5.244086529763704e-06, + "loss": 0.7078, "step": 8428 }, { - "epoch": 0.6378116605501116, - "grad_norm": 2.40487003326416, - "learning_rate": 5.719319829851925e-06, - "loss": 0.8232, + "epoch": 0.5933826117564238, + "grad_norm": 1.6573389768600464, + "learning_rate": 5.242541501198954e-06, + "loss": 0.7749, "step": 8429 }, { - "epoch": 0.6378873292724452, - "grad_norm": 2.0782392024993896, - "learning_rate": 5.717222678021528e-06, - "loss": 0.5967, + "epoch": 0.5934530095036958, + "grad_norm": 1.94803786277771, + "learning_rate": 5.240996564006068e-06, + "loss": 0.6791, "step": 8430 }, { - "epoch": 0.6379629979947788, - "grad_norm": 2.3502237796783447, - "learning_rate": 5.715125731769261e-06, - "loss": 0.8443, + "epoch": 0.593523407250968, + "grad_norm": 1.7930705547332764, + "learning_rate": 5.2394517182653584e-06, + "loss": 0.7573, "step": 8431 }, { - "epoch": 0.6380386667171125, - "grad_norm": 4.884004592895508, - "learning_rate": 5.713028991226448e-06, - "loss": 0.7055, + "epoch": 0.5935938049982401, + "grad_norm": 2.0571398735046387, + "learning_rate": 5.237906964057153e-06, + "loss": 0.7046, "step": 8432 }, { - "epoch": 0.6381143354394461, - "grad_norm": 2.4166226387023926, - "learning_rate": 5.71093245652439e-06, - "loss": 0.6572, + "epoch": 0.5936642027455121, + "grad_norm": 1.5812034606933594, + "learning_rate": 5.236362301461757e-06, + "loss": 0.5776, "step": 8433 }, { - "epoch": 0.6381900041617797, - "grad_norm": 1.7382185459136963, - "learning_rate": 5.708836127794382e-06, - "loss": 0.4822, + "epoch": 0.5937346004927843, + "grad_norm": 1.9026424884796143, + "learning_rate": 5.234817730559485e-06, + "loss": 0.7376, "step": 8434 }, { - "epoch": 0.6382656728841134, - "grad_norm": 1.87117338180542, - "learning_rate": 5.706740005167694e-06, - "loss": 0.7563, + "epoch": 0.5938049982400563, + "grad_norm": 2.0452849864959717, + "learning_rate": 5.233273251430635e-06, + "loss": 0.5923, "step": 8435 }, { - "epoch": 0.638341341606447, - "grad_norm": 2.3526854515075684, - "learning_rate": 5.704644088775605e-06, - "loss": 0.6714, + "epoch": 0.5938753959873284, + "grad_norm": 1.7293288707733154, + "learning_rate": 5.231728864155513e-06, + "loss": 0.7222, "step": 8436 }, { - "epoch": 0.6384170103287806, - "grad_norm": 1.952702283859253, - "learning_rate": 5.702548378749359e-06, - "loss": 0.6943, + "epoch": 0.5939457937346005, + "grad_norm": 2.533982753753662, + "learning_rate": 5.230184568814403e-06, + "loss": 0.6477, "step": 8437 }, { - "epoch": 0.6384926790511142, - "grad_norm": 4.3784356117248535, - "learning_rate": 5.7004528752202e-06, - "loss": 0.5664, + "epoch": 0.5940161914818726, + "grad_norm": 1.5526905059814453, + "learning_rate": 5.228640365487605e-06, + "loss": 0.6949, "step": 8438 }, { - "epoch": 0.6385683477734478, - "grad_norm": 2.005580425262451, - "learning_rate": 5.698357578319353e-06, - "loss": 0.4389, + "epoch": 0.5940865892291447, + "grad_norm": 1.5807985067367554, + "learning_rate": 5.227096254255396e-06, + "loss": 0.6698, "step": 8439 }, { - "epoch": 0.6386440164957815, - "grad_norm": 2.7630388736724854, - "learning_rate": 5.696262488178031e-06, - "loss": 0.5916, + "epoch": 0.5941569869764167, + "grad_norm": 1.56657874584198, + "learning_rate": 5.225552235198059e-06, + "loss": 0.6564, "step": 8440 }, { - "epoch": 0.6387196852181151, - "grad_norm": 2.2635788917541504, - "learning_rate": 5.694167604927441e-06, - "loss": 0.8042, + "epoch": 0.5942273847236889, + "grad_norm": 2.1229159832000732, + "learning_rate": 5.224008308395867e-06, + "loss": 0.6773, "step": 8441 }, { - "epoch": 0.6387953539404487, - "grad_norm": 2.6450791358947754, - "learning_rate": 5.692072928698768e-06, - "loss": 0.5544, + "epoch": 0.5942977824709609, + "grad_norm": 1.6430245637893677, + "learning_rate": 5.222464473929095e-06, + "loss": 0.6343, "step": 8442 }, { - "epoch": 0.6388710226627823, - "grad_norm": 2.0859289169311523, - "learning_rate": 5.689978459623186e-06, - "loss": 0.798, + "epoch": 0.594368180218233, + "grad_norm": 1.627137303352356, + "learning_rate": 5.2209207318780015e-06, + "loss": 0.6773, "step": 8443 }, { - "epoch": 0.638946691385116, - "grad_norm": 2.267434597015381, - "learning_rate": 5.6878841978318596e-06, - "loss": 0.789, + "epoch": 0.5944385779655051, + "grad_norm": 1.7977741956710815, + "learning_rate": 5.2193770823228526e-06, + "loss": 0.6276, "step": 8444 }, { - "epoch": 0.6390223601074496, - "grad_norm": 2.525017738342285, - "learning_rate": 5.6857901434559335e-06, - "loss": 0.6433, + "epoch": 0.5945089757127772, + "grad_norm": 1.5045078992843628, + "learning_rate": 5.2178335253439e-06, + "loss": 0.6274, "step": 8445 }, { - "epoch": 0.6390980288297832, - "grad_norm": 3.037821054458618, - "learning_rate": 5.683696296626554e-06, - "loss": 0.7781, + "epoch": 0.5945793734600493, + "grad_norm": 2.0456531047821045, + "learning_rate": 5.216290061021398e-06, + "loss": 0.721, "step": 8446 }, { - "epoch": 0.6391736975521168, - "grad_norm": 2.607813835144043, - "learning_rate": 5.681602657474835e-06, - "loss": 0.7136, + "epoch": 0.5946497712073213, + "grad_norm": 1.6100322008132935, + "learning_rate": 5.214746689435589e-06, + "loss": 0.7668, "step": 8447 }, { - "epoch": 0.6392493662744505, - "grad_norm": 2.1024436950683594, - "learning_rate": 5.679509226131894e-06, - "loss": 0.6246, + "epoch": 0.5947201689545935, + "grad_norm": 1.759446620941162, + "learning_rate": 5.213203410666719e-06, + "loss": 0.722, "step": 8448 }, { - "epoch": 0.6393250349967841, - "grad_norm": 2.2261359691619873, - "learning_rate": 5.677416002728822e-06, - "loss": 0.847, + "epoch": 0.5947905667018656, + "grad_norm": 1.530649185180664, + "learning_rate": 5.21166022479502e-06, + "loss": 0.5748, "step": 8449 }, { - "epoch": 0.6394007037191177, - "grad_norm": 2.5437755584716797, - "learning_rate": 5.675322987396705e-06, - "loss": 0.5989, + "epoch": 0.5948609644491376, + "grad_norm": 1.52797532081604, + "learning_rate": 5.210117131900727e-06, + "loss": 0.6511, "step": 8450 }, { - "epoch": 0.6394763724414513, - "grad_norm": 2.4765031337738037, - "learning_rate": 5.673230180266618e-06, - "loss": 0.6378, + "epoch": 0.5949313621964097, + "grad_norm": 1.9313249588012695, + "learning_rate": 5.208574132064064e-06, + "loss": 0.6941, "step": 8451 }, { - "epoch": 0.639552041163785, - "grad_norm": 2.359419584274292, - "learning_rate": 5.6711375814696184e-06, - "loss": 0.7394, + "epoch": 0.5950017599436818, + "grad_norm": 1.6650738716125488, + "learning_rate": 5.207031225365256e-06, + "loss": 0.6766, "step": 8452 }, { - "epoch": 0.6396277098861186, - "grad_norm": 1.8588393926620483, - "learning_rate": 5.66904519113675e-06, - "loss": 0.6876, + "epoch": 0.5950721576909539, + "grad_norm": 1.8308238983154297, + "learning_rate": 5.205488411884516e-06, + "loss": 0.7326, "step": 8453 }, { - "epoch": 0.6397033786084522, - "grad_norm": 2.5035762786865234, - "learning_rate": 5.666953009399045e-06, - "loss": 0.8412, + "epoch": 0.595142555438226, + "grad_norm": 1.9926059246063232, + "learning_rate": 5.203945691702061e-06, + "loss": 0.7732, "step": 8454 }, { - "epoch": 0.6397790473307858, - "grad_norm": 2.0973055362701416, - "learning_rate": 5.6648610363875196e-06, - "loss": 0.6031, + "epoch": 0.595212953185498, + "grad_norm": 1.7136894464492798, + "learning_rate": 5.202403064898094e-06, + "loss": 0.6819, "step": 8455 }, { - "epoch": 0.6398547160531194, - "grad_norm": 1.9568322896957397, - "learning_rate": 5.662769272233186e-06, - "loss": 0.6001, + "epoch": 0.5952833509327702, + "grad_norm": 1.59889554977417, + "learning_rate": 5.200860531552822e-06, + "loss": 0.7941, "step": 8456 }, { - "epoch": 0.639930384775453, - "grad_norm": 1.8160796165466309, - "learning_rate": 5.660677717067035e-06, - "loss": 0.6849, + "epoch": 0.5953537486800422, + "grad_norm": 1.6473182439804077, + "learning_rate": 5.199318091746438e-06, + "loss": 0.7225, "step": 8457 }, { - "epoch": 0.6400060534977867, - "grad_norm": 2.6608216762542725, - "learning_rate": 5.658586371020046e-06, - "loss": 0.8533, + "epoch": 0.5954241464273143, + "grad_norm": 1.749969482421875, + "learning_rate": 5.19777574555914e-06, + "loss": 0.6209, "step": 8458 }, { - "epoch": 0.6400817222201203, - "grad_norm": 1.9408966302871704, - "learning_rate": 5.6564952342231875e-06, - "loss": 0.584, + "epoch": 0.5954945441745864, + "grad_norm": 3.95424222946167, + "learning_rate": 5.196233493071109e-06, + "loss": 0.6266, "step": 8459 }, { - "epoch": 0.6401573909424539, - "grad_norm": 2.22806978225708, - "learning_rate": 5.654404306807407e-06, - "loss": 0.6623, + "epoch": 0.5955649419218585, + "grad_norm": 1.7472293376922607, + "learning_rate": 5.1946913343625366e-06, + "loss": 0.7063, "step": 8460 }, { - "epoch": 0.6402330596647876, - "grad_norm": 1.8229750394821167, - "learning_rate": 5.652313588903652e-06, - "loss": 0.6162, + "epoch": 0.5956353396691306, + "grad_norm": 1.813157081604004, + "learning_rate": 5.1931492695135935e-06, + "loss": 0.6347, "step": 8461 }, { - "epoch": 0.6403087283871212, - "grad_norm": 2.114300489425659, - "learning_rate": 5.650223080642849e-06, - "loss": 0.6585, + "epoch": 0.5957057374164026, + "grad_norm": 1.6858958005905151, + "learning_rate": 5.191607298604458e-06, + "loss": 0.6391, "step": 8462 }, { - "epoch": 0.6403843971094548, - "grad_norm": 2.9402008056640625, - "learning_rate": 5.648132782155911e-06, - "loss": 0.7531, + "epoch": 0.5957761351636748, + "grad_norm": 2.0438168048858643, + "learning_rate": 5.190065421715297e-06, + "loss": 0.7187, "step": 8463 }, { - "epoch": 0.6404600658317884, - "grad_norm": 2.2352404594421387, - "learning_rate": 5.646042693573738e-06, - "loss": 0.7114, + "epoch": 0.5958465329109468, + "grad_norm": 1.9380947351455688, + "learning_rate": 5.188523638926274e-06, + "loss": 0.7089, "step": 8464 }, { - "epoch": 0.640535734554122, - "grad_norm": 1.94172203540802, - "learning_rate": 5.643952815027218e-06, - "loss": 0.6461, + "epoch": 0.5959169306582189, + "grad_norm": 1.6544520854949951, + "learning_rate": 5.186981950317545e-06, + "loss": 0.6336, "step": 8465 }, { - "epoch": 0.6406114032764557, - "grad_norm": 2.5948123931884766, - "learning_rate": 5.6418631466472315e-06, - "loss": 0.714, + "epoch": 0.5959873284054911, + "grad_norm": 1.6327112913131714, + "learning_rate": 5.1854403559692694e-06, + "loss": 0.6226, "step": 8466 }, { - "epoch": 0.6406870719987893, - "grad_norm": 1.9451491832733154, - "learning_rate": 5.639773688564634e-06, - "loss": 0.664, + "epoch": 0.5960577261527631, + "grad_norm": 1.7212512493133545, + "learning_rate": 5.183898855961591e-06, + "loss": 0.5733, "step": 8467 }, { - "epoch": 0.6407627407211229, - "grad_norm": 2.0619945526123047, - "learning_rate": 5.637684440910279e-06, - "loss": 0.7656, + "epoch": 0.5961281239000352, + "grad_norm": 1.7880879640579224, + "learning_rate": 5.182357450374658e-06, + "loss": 0.7616, "step": 8468 }, { - "epoch": 0.6408384094434566, - "grad_norm": 1.8974416255950928, - "learning_rate": 5.635595403814996e-06, - "loss": 0.6097, + "epoch": 0.5961985216473072, + "grad_norm": 2.211440086364746, + "learning_rate": 5.180816139288605e-06, + "loss": 0.6081, "step": 8469 }, { - "epoch": 0.6409140781657902, - "grad_norm": 3.0312063694000244, - "learning_rate": 5.633506577409614e-06, - "loss": 0.7523, + "epoch": 0.5962689193945794, + "grad_norm": 1.8194903135299683, + "learning_rate": 5.179274922783571e-06, + "loss": 0.656, "step": 8470 }, { - "epoch": 0.6409897468881238, - "grad_norm": 2.127302885055542, - "learning_rate": 5.631417961824933e-06, - "loss": 0.7443, + "epoch": 0.5963393171418515, + "grad_norm": 1.9253053665161133, + "learning_rate": 5.177733800939679e-06, + "loss": 0.6949, "step": 8471 }, { - "epoch": 0.6410654156104574, - "grad_norm": 2.226922035217285, - "learning_rate": 5.629329557191758e-06, - "loss": 0.6491, + "epoch": 0.5964097148891235, + "grad_norm": 1.5858569145202637, + "learning_rate": 5.1761927738370604e-06, + "loss": 0.5748, "step": 8472 }, { - "epoch": 0.641141084332791, - "grad_norm": 1.9703574180603027, - "learning_rate": 5.627241363640871e-06, - "loss": 0.5922, + "epoch": 0.5964801126363957, + "grad_norm": 1.9902812242507935, + "learning_rate": 5.17465184155583e-06, + "loss": 0.5605, "step": 8473 }, { - "epoch": 0.6412167530551247, - "grad_norm": 2.090078353881836, - "learning_rate": 5.6251533813030355e-06, - "loss": 0.6001, + "epoch": 0.5965505103836677, + "grad_norm": 2.0020978450775146, + "learning_rate": 5.173111004176102e-06, + "loss": 0.7032, "step": 8474 }, { - "epoch": 0.6412924217774583, - "grad_norm": 2.1429638862609863, - "learning_rate": 5.623065610309013e-06, - "loss": 0.6735, + "epoch": 0.5966209081309398, + "grad_norm": 1.9419745206832886, + "learning_rate": 5.1715702617779915e-06, + "loss": 0.7956, "step": 8475 }, { - "epoch": 0.6413680904997919, - "grad_norm": 1.9501971006393433, - "learning_rate": 5.6209780507895404e-06, - "loss": 0.6364, + "epoch": 0.5966913058782118, + "grad_norm": 1.9510911703109741, + "learning_rate": 5.170029614441594e-06, + "loss": 0.6076, "step": 8476 }, { - "epoch": 0.6414437592221255, - "grad_norm": 1.956324577331543, - "learning_rate": 5.618890702875353e-06, - "loss": 0.7588, + "epoch": 0.596761703625484, + "grad_norm": 1.9945069551467896, + "learning_rate": 5.168489062247018e-06, + "loss": 0.6921, "step": 8477 }, { - "epoch": 0.6415194279444592, - "grad_norm": 2.1515815258026123, - "learning_rate": 5.616803566697168e-06, - "loss": 0.7206, + "epoch": 0.5968321013727561, + "grad_norm": 1.8520053625106812, + "learning_rate": 5.166948605274353e-06, + "loss": 0.6942, "step": 8478 }, { - "epoch": 0.6415950966667928, - "grad_norm": 2.6640117168426514, - "learning_rate": 5.614716642385684e-06, - "loss": 0.6887, + "epoch": 0.5969024991200281, + "grad_norm": 2.0994372367858887, + "learning_rate": 5.165408243603691e-06, + "loss": 0.6709, "step": 8479 }, { - "epoch": 0.6416707653891264, - "grad_norm": 2.3514342308044434, - "learning_rate": 5.612629930071594e-06, - "loss": 0.5355, + "epoch": 0.5969728968673003, + "grad_norm": 1.645316481590271, + "learning_rate": 5.163867977315112e-06, + "loss": 0.6427, "step": 8480 }, { - "epoch": 0.64174643411146, - "grad_norm": 1.976243019104004, - "learning_rate": 5.610543429885571e-06, - "loss": 0.7735, + "epoch": 0.5970432946145723, + "grad_norm": 2.221527099609375, + "learning_rate": 5.162327806488706e-06, + "loss": 0.6555, "step": 8481 }, { - "epoch": 0.6418221028337937, - "grad_norm": 2.3452563285827637, - "learning_rate": 5.608457141958285e-06, - "loss": 0.6755, + "epoch": 0.5971136923618444, + "grad_norm": 2.211069107055664, + "learning_rate": 5.160787731204537e-06, + "loss": 0.7698, "step": 8482 }, { - "epoch": 0.6418977715561273, - "grad_norm": 2.0472850799560547, - "learning_rate": 5.60637106642038e-06, - "loss": 0.6521, + "epoch": 0.5971840901091165, + "grad_norm": 1.7305387258529663, + "learning_rate": 5.159247751542682e-06, + "loss": 0.6889, "step": 8483 }, { - "epoch": 0.6419734402784609, - "grad_norm": 2.2305705547332764, - "learning_rate": 5.6042852034024995e-06, - "loss": 0.8029, + "epoch": 0.5972544878563886, + "grad_norm": 1.7123372554779053, + "learning_rate": 5.157707867583202e-06, + "loss": 0.7088, "step": 8484 }, { - "epoch": 0.6420491090007945, - "grad_norm": 2.1921286582946777, - "learning_rate": 5.602199553035258e-06, - "loss": 0.6344, + "epoch": 0.5973248856036607, + "grad_norm": 1.55746328830719, + "learning_rate": 5.15616807940616e-06, + "loss": 0.5799, "step": 8485 }, { - "epoch": 0.6421247777231281, - "grad_norm": 2.1539828777313232, - "learning_rate": 5.600114115449269e-06, - "loss": 0.7381, + "epoch": 0.5973952833509327, + "grad_norm": 1.6479235887527466, + "learning_rate": 5.154628387091606e-06, + "loss": 0.5741, "step": 8486 }, { - "epoch": 0.6422004464454618, - "grad_norm": 1.9343609809875488, - "learning_rate": 5.598028890775135e-06, - "loss": 0.5944, + "epoch": 0.5974656810982049, + "grad_norm": 1.8764299154281616, + "learning_rate": 5.153088790719599e-06, + "loss": 0.6456, "step": 8487 }, { - "epoch": 0.6422761151677954, - "grad_norm": 1.9569308757781982, - "learning_rate": 5.595943879143434e-06, - "loss": 0.6096, + "epoch": 0.597536078845477, + "grad_norm": 2.1327567100524902, + "learning_rate": 5.151549290370173e-06, + "loss": 0.8826, "step": 8488 }, { - "epoch": 0.642351783890129, - "grad_norm": 2.5590083599090576, - "learning_rate": 5.593859080684738e-06, - "loss": 0.6341, + "epoch": 0.597606476592749, + "grad_norm": 1.8685656785964966, + "learning_rate": 5.150009886123376e-06, + "loss": 0.7579, "step": 8489 }, { - "epoch": 0.6424274526124626, - "grad_norm": 2.0084571838378906, - "learning_rate": 5.591774495529602e-06, - "loss": 0.5779, + "epoch": 0.5976768743400211, + "grad_norm": 1.8479071855545044, + "learning_rate": 5.14847057805924e-06, + "loss": 0.642, "step": 8490 }, { - "epoch": 0.6425031213347963, - "grad_norm": 2.7293450832366943, - "learning_rate": 5.589690123808568e-06, - "loss": 0.7232, + "epoch": 0.5977472720872932, + "grad_norm": 2.115830659866333, + "learning_rate": 5.1469313662577965e-06, + "loss": 0.6204, "step": 8491 }, { - "epoch": 0.6425787900571299, - "grad_norm": 2.072709321975708, - "learning_rate": 5.587605965652173e-06, - "loss": 0.7979, + "epoch": 0.5978176698345653, + "grad_norm": 1.9722508192062378, + "learning_rate": 5.145392250799067e-06, + "loss": 0.6289, "step": 8492 }, { - "epoch": 0.6426544587794635, - "grad_norm": 1.9151296615600586, - "learning_rate": 5.585522021190928e-06, - "loss": 0.7374, + "epoch": 0.5978880675818374, + "grad_norm": 1.6348600387573242, + "learning_rate": 5.143853231763078e-06, + "loss": 0.6839, "step": 8493 }, { - "epoch": 0.6427301275017971, - "grad_norm": 3.357416868209839, - "learning_rate": 5.583438290555337e-06, - "loss": 0.6413, + "epoch": 0.5979584653291095, + "grad_norm": 1.7035253047943115, + "learning_rate": 5.1423143092298365e-06, + "loss": 0.6619, "step": 8494 }, { - "epoch": 0.6428057962241308, - "grad_norm": 2.125547409057617, - "learning_rate": 5.581354773875893e-06, - "loss": 0.6862, + "epoch": 0.5980288630763816, + "grad_norm": 2.2626399993896484, + "learning_rate": 5.140775483279359e-06, + "loss": 0.6119, "step": 8495 }, { - "epoch": 0.6428814649464644, - "grad_norm": 2.053462266921997, - "learning_rate": 5.579271471283065e-06, - "loss": 0.7123, + "epoch": 0.5980992608236536, + "grad_norm": 1.7520761489868164, + "learning_rate": 5.139236753991646e-06, + "loss": 0.7304, "step": 8496 }, { - "epoch": 0.642957133668798, - "grad_norm": 2.2528815269470215, - "learning_rate": 5.577188382907326e-06, - "loss": 0.6743, + "epoch": 0.5981696585709257, + "grad_norm": 1.6778563261032104, + "learning_rate": 5.137698121446702e-06, + "loss": 0.6409, "step": 8497 }, { - "epoch": 0.6430328023911316, - "grad_norm": 2.1880528926849365, - "learning_rate": 5.575105508879122e-06, - "loss": 0.6552, + "epoch": 0.5982400563181978, + "grad_norm": 1.952854037284851, + "learning_rate": 5.136159585724516e-06, + "loss": 0.5217, "step": 8498 }, { - "epoch": 0.6431084711134653, - "grad_norm": 2.2088634967803955, - "learning_rate": 5.573022849328886e-06, - "loss": 0.8085, + "epoch": 0.5983104540654699, + "grad_norm": 1.5444109439849854, + "learning_rate": 5.134621146905085e-06, + "loss": 0.6839, "step": 8499 }, { - "epoch": 0.6431841398357989, - "grad_norm": 2.240103244781494, - "learning_rate": 5.570940404387046e-06, - "loss": 0.6571, + "epoch": 0.598380851812742, + "grad_norm": 1.5762684345245361, + "learning_rate": 5.133082805068386e-06, + "loss": 0.6792, "step": 8500 }, { - "epoch": 0.6432598085581325, - "grad_norm": 2.2424118518829346, - "learning_rate": 5.568858174184005e-06, - "loss": 0.5845, + "epoch": 0.598451249560014, + "grad_norm": 1.628889799118042, + "learning_rate": 5.1315445602944056e-06, + "loss": 0.6528, "step": 8501 }, { - "epoch": 0.6433354772804661, - "grad_norm": 2.1710944175720215, - "learning_rate": 5.566776158850164e-06, - "loss": 0.7788, + "epoch": 0.5985216473072862, + "grad_norm": 2.0879697799682617, + "learning_rate": 5.130006412663113e-06, + "loss": 0.7015, "step": 8502 }, { - "epoch": 0.6434111460027997, - "grad_norm": 2.4002442359924316, - "learning_rate": 5.564694358515907e-06, - "loss": 0.5717, + "epoch": 0.5985920450545582, + "grad_norm": 1.8271204233169556, + "learning_rate": 5.128468362254485e-06, + "loss": 0.601, "step": 8503 }, { - "epoch": 0.6434868147251334, - "grad_norm": 1.750193476676941, - "learning_rate": 5.5626127733115976e-06, - "loss": 0.8584, + "epoch": 0.5986624428018303, + "grad_norm": 2.1266415119171143, + "learning_rate": 5.126930409148476e-06, + "loss": 0.6009, "step": 8504 }, { - "epoch": 0.643562483447467, - "grad_norm": 2.198309898376465, - "learning_rate": 5.560531403367596e-06, - "loss": 0.8146, + "epoch": 0.5987328405491025, + "grad_norm": 1.7206013202667236, + "learning_rate": 5.125392553425056e-06, + "loss": 0.6591, "step": 8505 }, { - "epoch": 0.6436381521698006, - "grad_norm": 1.8954391479492188, - "learning_rate": 5.55845024881424e-06, - "loss": 0.6983, + "epoch": 0.5988032382963745, + "grad_norm": 1.7175180912017822, + "learning_rate": 5.1238547951641726e-06, + "loss": 0.6439, "step": 8506 }, { - "epoch": 0.6437138208921342, - "grad_norm": 2.1946661472320557, - "learning_rate": 5.556369309781862e-06, - "loss": 0.729, + "epoch": 0.5988736360436466, + "grad_norm": 2.0200042724609375, + "learning_rate": 5.122317134445779e-06, + "loss": 0.6865, "step": 8507 }, { - "epoch": 0.6437894896144679, - "grad_norm": 2.274904727935791, - "learning_rate": 5.5542885864007756e-06, - "loss": 0.6662, + "epoch": 0.5989440337909187, + "grad_norm": 1.5090858936309814, + "learning_rate": 5.120779571349817e-06, + "loss": 0.7067, "step": 8508 }, { - "epoch": 0.6438651583368015, - "grad_norm": 2.831035852432251, - "learning_rate": 5.5522080788012845e-06, - "loss": 0.8062, + "epoch": 0.5990144315381908, + "grad_norm": 1.749621868133545, + "learning_rate": 5.119242105956228e-06, + "loss": 0.6408, "step": 8509 }, { - "epoch": 0.6439408270591351, - "grad_norm": 2.2157704830169678, - "learning_rate": 5.550127787113674e-06, - "loss": 0.4898, + "epoch": 0.5990848292854629, + "grad_norm": 2.2275054454803467, + "learning_rate": 5.117704738344943e-06, + "loss": 0.6467, "step": 8510 }, { - "epoch": 0.6440164957814687, - "grad_norm": 2.1222331523895264, - "learning_rate": 5.548047711468221e-06, - "loss": 0.7425, + "epoch": 0.5991552270327349, + "grad_norm": 1.6324996948242188, + "learning_rate": 5.1161674685958965e-06, + "loss": 0.5713, "step": 8511 }, { - "epoch": 0.6440921645038024, - "grad_norm": 2.3518216609954834, - "learning_rate": 5.545967851995182e-06, - "loss": 0.9132, + "epoch": 0.5992256247800071, + "grad_norm": 2.0564937591552734, + "learning_rate": 5.114630296789008e-06, + "loss": 0.7003, "step": 8512 }, { - "epoch": 0.644167833226136, - "grad_norm": 2.1871204376220703, - "learning_rate": 5.543888208824809e-06, - "loss": 0.697, + "epoch": 0.5992960225272791, + "grad_norm": 3.7268025875091553, + "learning_rate": 5.113093223004198e-06, + "loss": 0.5804, "step": 8513 }, { - "epoch": 0.6442435019484696, - "grad_norm": 1.9656989574432373, - "learning_rate": 5.541808782087337e-06, - "loss": 0.7371, + "epoch": 0.5993664202745512, + "grad_norm": 1.8240187168121338, + "learning_rate": 5.111556247321379e-06, + "loss": 0.6736, "step": 8514 }, { - "epoch": 0.6443191706708032, - "grad_norm": 2.232603073120117, - "learning_rate": 5.539729571912982e-06, - "loss": 0.689, + "epoch": 0.5994368180218232, + "grad_norm": 1.7403531074523926, + "learning_rate": 5.110019369820463e-06, + "loss": 0.6275, "step": 8515 }, { - "epoch": 0.6443948393931368, - "grad_norm": 3.9067957401275635, - "learning_rate": 5.537650578431956e-06, - "loss": 0.7104, + "epoch": 0.5995072157690954, + "grad_norm": 1.7659893035888672, + "learning_rate": 5.108482590581348e-06, + "loss": 0.5921, "step": 8516 }, { - "epoch": 0.6444705081154705, - "grad_norm": 2.082522392272949, - "learning_rate": 5.5355718017744444e-06, - "loss": 0.7162, + "epoch": 0.5995776135163675, + "grad_norm": 1.7897708415985107, + "learning_rate": 5.106945909683941e-06, + "loss": 0.6959, "step": 8517 }, { - "epoch": 0.6445461768378041, - "grad_norm": 2.175347328186035, - "learning_rate": 5.533493242070634e-06, - "loss": 0.7402, + "epoch": 0.5996480112636395, + "grad_norm": 1.8650621175765991, + "learning_rate": 5.105409327208127e-06, + "loss": 0.7648, "step": 8518 }, { - "epoch": 0.6446218455601377, - "grad_norm": 1.8790175914764404, - "learning_rate": 5.531414899450692e-06, - "loss": 0.6263, + "epoch": 0.5997184090109117, + "grad_norm": 1.6196131706237793, + "learning_rate": 5.103872843233801e-06, + "loss": 0.5292, "step": 8519 }, { - "epoch": 0.6446975142824714, - "grad_norm": 2.5705785751342773, - "learning_rate": 5.529336774044764e-06, - "loss": 0.694, + "epoch": 0.5997888067581837, + "grad_norm": 1.8076646327972412, + "learning_rate": 5.10233645784084e-06, + "loss": 0.6484, "step": 8520 }, { - "epoch": 0.644773183004805, - "grad_norm": 1.955169916152954, - "learning_rate": 5.527258865982995e-06, - "loss": 0.8155, + "epoch": 0.5998592045054558, + "grad_norm": 1.6266454458236694, + "learning_rate": 5.100800171109128e-06, + "loss": 0.7238, "step": 8521 }, { - "epoch": 0.6448488517271386, - "grad_norm": 1.9175649881362915, - "learning_rate": 5.525181175395503e-06, - "loss": 0.8298, + "epoch": 0.599929602252728, + "grad_norm": 1.970662236213684, + "learning_rate": 5.099263983118532e-06, + "loss": 0.6819, "step": 8522 }, { - "epoch": 0.6449245204494722, - "grad_norm": 2.2717673778533936, - "learning_rate": 5.523103702412411e-06, - "loss": 0.7048, + "epoch": 0.6, + "grad_norm": 1.5631868839263916, + "learning_rate": 5.097727893948924e-06, + "loss": 0.5997, "step": 8523 }, { - "epoch": 0.6450001891718058, - "grad_norm": 2.508749485015869, - "learning_rate": 5.521026447163807e-06, - "loss": 0.7999, + "epoch": 0.6000703977472721, + "grad_norm": 1.7029139995574951, + "learning_rate": 5.096191903680167e-06, + "loss": 0.7245, "step": 8524 }, { - "epoch": 0.6450758578941395, - "grad_norm": 1.875464677810669, - "learning_rate": 5.5189494097797825e-06, - "loss": 0.6481, + "epoch": 0.6001407954945441, + "grad_norm": 1.736367106437683, + "learning_rate": 5.094656012392114e-06, + "loss": 0.6938, "step": 8525 }, { - "epoch": 0.6451515266164731, - "grad_norm": 2.2824044227600098, - "learning_rate": 5.516872590390402e-06, - "loss": 0.6753, + "epoch": 0.6002111932418163, + "grad_norm": 2.0227532386779785, + "learning_rate": 5.093120220164626e-06, + "loss": 0.8128, "step": 8526 }, { - "epoch": 0.6452271953388067, - "grad_norm": 2.4387025833129883, - "learning_rate": 5.514795989125725e-06, - "loss": 0.7616, + "epoch": 0.6002815909890884, + "grad_norm": 1.8936967849731445, + "learning_rate": 5.091584527077539e-06, + "loss": 0.6401, "step": 8527 }, { - "epoch": 0.6453028640611403, - "grad_norm": 1.9747780561447144, - "learning_rate": 5.512719606115799e-06, - "loss": 0.7541, + "epoch": 0.6003519887363604, + "grad_norm": 1.7083042860031128, + "learning_rate": 5.0900489332107044e-06, + "loss": 0.7659, "step": 8528 }, { - "epoch": 0.6453785327834739, - "grad_norm": 2.339794158935547, - "learning_rate": 5.510643441490649e-06, - "loss": 0.7607, + "epoch": 0.6004223864836326, + "grad_norm": 2.085153579711914, + "learning_rate": 5.088513438643955e-06, + "loss": 0.6942, "step": 8529 }, { - "epoch": 0.6454542015058076, - "grad_norm": 2.1262335777282715, - "learning_rate": 5.5085674953802945e-06, - "loss": 0.5666, + "epoch": 0.6004927842309046, + "grad_norm": 1.618595838546753, + "learning_rate": 5.086978043457122e-06, + "loss": 0.711, "step": 8530 }, { - "epoch": 0.6455298702281412, - "grad_norm": 1.9069366455078125, - "learning_rate": 5.506491767914734e-06, - "loss": 0.6467, + "epoch": 0.6005631819781767, + "grad_norm": 1.9468778371810913, + "learning_rate": 5.085442747730032e-06, + "loss": 0.7405, "step": 8531 }, { - "epoch": 0.6456055389504748, - "grad_norm": 2.164801597595215, - "learning_rate": 5.504416259223956e-06, - "loss": 0.6073, + "epoch": 0.6006335797254488, + "grad_norm": 1.9404460191726685, + "learning_rate": 5.083907551542512e-06, + "loss": 0.7658, "step": 8532 }, { - "epoch": 0.6456812076728085, - "grad_norm": 3.146503210067749, - "learning_rate": 5.502340969437942e-06, - "loss": 0.6192, + "epoch": 0.6007039774727209, + "grad_norm": 1.663267731666565, + "learning_rate": 5.082372454974368e-06, + "loss": 0.707, "step": 8533 }, { - "epoch": 0.6457568763951421, - "grad_norm": 2.1276612281799316, - "learning_rate": 5.5002658986866475e-06, - "loss": 0.6759, + "epoch": 0.600774375219993, + "grad_norm": 1.893189549446106, + "learning_rate": 5.080837458105421e-06, + "loss": 0.6555, "step": 8534 }, { - "epoch": 0.6458325451174757, - "grad_norm": 1.8618454933166504, - "learning_rate": 5.498191047100023e-06, - "loss": 0.5535, + "epoch": 0.600844772967265, + "grad_norm": 1.995410680770874, + "learning_rate": 5.07930256101547e-06, + "loss": 0.7091, "step": 8535 }, { - "epoch": 0.6459082138398093, - "grad_norm": 1.86492121219635, - "learning_rate": 5.496116414808002e-06, - "loss": 0.6922, + "epoch": 0.6009151707145372, + "grad_norm": 1.783005714416504, + "learning_rate": 5.077767763784321e-06, + "loss": 0.6776, "step": 8536 }, { - "epoch": 0.6459838825621429, - "grad_norm": 1.8783886432647705, - "learning_rate": 5.4940420019405e-06, - "loss": 0.6147, + "epoch": 0.6009855684618092, + "grad_norm": 1.8897106647491455, + "learning_rate": 5.076233066491764e-06, + "loss": 0.7505, "step": 8537 }, { - "epoch": 0.6460595512844766, - "grad_norm": 2.076680898666382, - "learning_rate": 5.49196780862743e-06, - "loss": 0.7457, + "epoch": 0.6010559662090813, + "grad_norm": 1.9082528352737427, + "learning_rate": 5.074698469217595e-06, + "loss": 0.7357, "step": 8538 }, { - "epoch": 0.6461352200068102, - "grad_norm": 1.9781914949417114, - "learning_rate": 5.489893834998683e-06, - "loss": 0.7596, + "epoch": 0.6011263639563534, + "grad_norm": 1.789666771888733, + "learning_rate": 5.073163972041593e-06, + "loss": 0.691, "step": 8539 }, { - "epoch": 0.6462108887291438, - "grad_norm": 3.7510299682617188, - "learning_rate": 5.487820081184136e-06, - "loss": 0.6868, + "epoch": 0.6011967617036255, + "grad_norm": 2.198082685470581, + "learning_rate": 5.071629575043544e-06, + "loss": 0.6756, "step": 8540 }, { - "epoch": 0.6462865574514775, - "grad_norm": 2.0768771171569824, - "learning_rate": 5.485746547313658e-06, - "loss": 0.606, + "epoch": 0.6012671594508976, + "grad_norm": 1.8661085367202759, + "learning_rate": 5.070095278303218e-06, + "loss": 0.6064, "step": 8541 }, { - "epoch": 0.646362226173811, - "grad_norm": 1.8995541334152222, - "learning_rate": 5.483673233517094e-06, - "loss": 0.7812, + "epoch": 0.6013375571981696, + "grad_norm": 2.3743979930877686, + "learning_rate": 5.068561081900388e-06, + "loss": 0.6505, "step": 8542 }, { - "epoch": 0.6464378948961447, - "grad_norm": 2.8474512100219727, - "learning_rate": 5.4816001399242865e-06, - "loss": 0.6534, + "epoch": 0.6014079549454417, + "grad_norm": 1.5505473613739014, + "learning_rate": 5.067026985914814e-06, + "loss": 0.7101, "step": 8543 }, { - "epoch": 0.6465135636184783, - "grad_norm": 2.1161701679229736, - "learning_rate": 5.479527266665059e-06, - "loss": 0.6513, + "epoch": 0.6014783526927139, + "grad_norm": 2.0057239532470703, + "learning_rate": 5.065492990426262e-06, + "loss": 0.6564, "step": 8544 }, { - "epoch": 0.6465892323408119, - "grad_norm": 1.9404470920562744, - "learning_rate": 5.477454613869219e-06, - "loss": 0.5536, + "epoch": 0.6015487504399859, + "grad_norm": 1.8085163831710815, + "learning_rate": 5.063959095514476e-06, + "loss": 0.7303, "step": 8545 }, { - "epoch": 0.6466649010631456, - "grad_norm": 2.743656873703003, - "learning_rate": 5.475382181666567e-06, - "loss": 0.6976, + "epoch": 0.601619148187258, + "grad_norm": 1.828218936920166, + "learning_rate": 5.062425301259214e-06, + "loss": 0.582, "step": 8546 }, { - "epoch": 0.6467405697854792, - "grad_norm": 1.7919031381607056, - "learning_rate": 5.473309970186882e-06, - "loss": 0.6573, + "epoch": 0.6016895459345301, + "grad_norm": 2.087665319442749, + "learning_rate": 5.0608916077402135e-06, + "loss": 0.6019, "step": 8547 }, { - "epoch": 0.6468162385078128, - "grad_norm": 2.278919219970703, - "learning_rate": 5.4712379795599295e-06, - "loss": 0.7562, + "epoch": 0.6017599436818022, + "grad_norm": 1.8910046815872192, + "learning_rate": 5.059358015037216e-06, + "loss": 0.684, "step": 8548 }, { - "epoch": 0.6468919072301464, - "grad_norm": 2.6098785400390625, - "learning_rate": 5.469166209915472e-06, - "loss": 0.7391, + "epoch": 0.6018303414290743, + "grad_norm": 1.5470184087753296, + "learning_rate": 5.057824523229949e-06, + "loss": 0.6448, "step": 8549 }, { - "epoch": 0.64696757595248, - "grad_norm": 2.1722793579101562, - "learning_rate": 5.4670946613832466e-06, - "loss": 0.839, + "epoch": 0.6019007391763463, + "grad_norm": 1.802042841911316, + "learning_rate": 5.056291132398149e-06, + "loss": 0.7019, "step": 8550 }, { - "epoch": 0.6470432446748137, - "grad_norm": 2.256061553955078, - "learning_rate": 5.465023334092981e-06, - "loss": 0.6023, + "epoch": 0.6019711369236185, + "grad_norm": 1.485946774482727, + "learning_rate": 5.0547578426215295e-06, + "loss": 0.6342, "step": 8551 }, { - "epoch": 0.6471189133971473, - "grad_norm": 2.3544907569885254, - "learning_rate": 5.4629522281743846e-06, - "loss": 0.6151, + "epoch": 0.6020415346708905, + "grad_norm": 2.2183377742767334, + "learning_rate": 5.053224653979814e-06, + "loss": 0.5428, "step": 8552 }, { - "epoch": 0.6471945821194809, - "grad_norm": 1.9995858669281006, - "learning_rate": 5.46088134375716e-06, - "loss": 0.6645, + "epoch": 0.6021119324181626, + "grad_norm": 2.3267033100128174, + "learning_rate": 5.0516915665527106e-06, + "loss": 0.7075, "step": 8553 }, { - "epoch": 0.6472702508418146, - "grad_norm": 2.3130130767822266, - "learning_rate": 5.4588106809709945e-06, - "loss": 0.6585, + "epoch": 0.6021823301654347, + "grad_norm": 2.0063416957855225, + "learning_rate": 5.050158580419929e-06, + "loss": 0.6759, "step": 8554 }, { - "epoch": 0.6473459195641481, - "grad_norm": 2.5768182277679443, - "learning_rate": 5.456740239945559e-06, - "loss": 0.8411, + "epoch": 0.6022527279127068, + "grad_norm": 1.9246195554733276, + "learning_rate": 5.048625695661164e-06, + "loss": 0.8149, "step": 8555 }, { - "epoch": 0.6474215882864818, - "grad_norm": 2.34093976020813, - "learning_rate": 5.454670020810507e-06, - "loss": 0.7124, + "epoch": 0.6023231256599789, + "grad_norm": 1.7804750204086304, + "learning_rate": 5.0470929123561215e-06, + "loss": 0.7818, "step": 8556 }, { - "epoch": 0.6474972570088154, - "grad_norm": 3.165555477142334, - "learning_rate": 5.452600023695488e-06, - "loss": 0.6142, + "epoch": 0.6023935234072509, + "grad_norm": 2.0596866607666016, + "learning_rate": 5.0455602305844845e-06, + "loss": 0.663, "step": 8557 }, { - "epoch": 0.647572925731149, - "grad_norm": 2.1847751140594482, - "learning_rate": 5.450530248730125e-06, - "loss": 0.6331, + "epoch": 0.6024639211545231, + "grad_norm": 1.7001620531082153, + "learning_rate": 5.044027650425942e-06, + "loss": 0.6498, "step": 8558 }, { - "epoch": 0.6476485944534827, - "grad_norm": 2.339569568634033, - "learning_rate": 5.448460696044041e-06, - "loss": 0.6593, + "epoch": 0.6025343189017951, + "grad_norm": 1.6457452774047852, + "learning_rate": 5.042495171960174e-06, + "loss": 0.6394, "step": 8559 }, { - "epoch": 0.6477242631758163, - "grad_norm": 2.989414691925049, - "learning_rate": 5.446391365766837e-06, - "loss": 0.6377, + "epoch": 0.6026047166490672, + "grad_norm": 1.707058310508728, + "learning_rate": 5.040962795266855e-06, + "loss": 0.6914, "step": 8560 }, { - "epoch": 0.6477999318981499, - "grad_norm": 2.329726457595825, - "learning_rate": 5.444322258028096e-06, - "loss": 0.5623, + "epoch": 0.6026751143963394, + "grad_norm": 1.6837317943572998, + "learning_rate": 5.039430520425651e-06, + "loss": 0.589, "step": 8561 }, { - "epoch": 0.6478756006204835, - "grad_norm": 2.963811159133911, - "learning_rate": 5.442253372957399e-06, - "loss": 0.816, + "epoch": 0.6027455121436114, + "grad_norm": 1.6144492626190186, + "learning_rate": 5.037898347516233e-06, + "loss": 0.6763, "step": 8562 }, { - "epoch": 0.6479512693428171, - "grad_norm": 2.018897533416748, - "learning_rate": 5.440184710684299e-06, - "loss": 0.742, + "epoch": 0.6028159098908835, + "grad_norm": 1.8078725337982178, + "learning_rate": 5.036366276618256e-06, + "loss": 0.7543, "step": 8563 }, { - "epoch": 0.6480269380651508, - "grad_norm": 2.518212080001831, - "learning_rate": 5.438116271338347e-06, - "loss": 0.8152, + "epoch": 0.6028863076381555, + "grad_norm": 1.763501763343811, + "learning_rate": 5.034834307811376e-06, + "loss": 0.7227, "step": 8564 }, { - "epoch": 0.6481026067874844, - "grad_norm": 1.4820868968963623, - "learning_rate": 5.436048055049079e-06, - "loss": 0.5836, + "epoch": 0.6029567053854277, + "grad_norm": 1.807627558708191, + "learning_rate": 5.033302441175239e-06, + "loss": 0.6722, "step": 8565 }, { - "epoch": 0.648178275509818, - "grad_norm": 2.1838934421539307, - "learning_rate": 5.433980061946006e-06, - "loss": 0.6756, + "epoch": 0.6030271031326998, + "grad_norm": 1.5825691223144531, + "learning_rate": 5.0317706767894895e-06, + "loss": 0.6777, "step": 8566 }, { - "epoch": 0.6482539442321517, - "grad_norm": 2.867804527282715, - "learning_rate": 5.4319122921586354e-06, - "loss": 0.6669, + "epoch": 0.6030975008799718, + "grad_norm": 1.8179593086242676, + "learning_rate": 5.030239014733763e-06, + "loss": 0.717, "step": 8567 }, { - "epoch": 0.6483296129544852, - "grad_norm": 2.323784112930298, - "learning_rate": 5.429844745816454e-06, - "loss": 0.7107, + "epoch": 0.603167898627244, + "grad_norm": 1.7985919713974, + "learning_rate": 5.028707455087697e-06, + "loss": 0.58, "step": 8568 }, { - "epoch": 0.6484052816768189, - "grad_norm": 2.1240692138671875, - "learning_rate": 5.427777423048948e-06, - "loss": 0.7573, + "epoch": 0.603238296374516, + "grad_norm": 2.086974620819092, + "learning_rate": 5.027175997930915e-06, + "loss": 0.704, "step": 8569 }, { - "epoch": 0.6484809503991525, - "grad_norm": 2.3311257362365723, - "learning_rate": 5.425710323985571e-06, - "loss": 0.6085, + "epoch": 0.6033086941217881, + "grad_norm": 1.8145968914031982, + "learning_rate": 5.025644643343039e-06, + "loss": 0.7088, "step": 8570 }, { - "epoch": 0.6485566191214861, - "grad_norm": 2.233501672744751, - "learning_rate": 5.423643448755776e-06, - "loss": 0.7538, + "epoch": 0.6033790918690601, + "grad_norm": 1.7276487350463867, + "learning_rate": 5.0241133914036865e-06, + "loss": 0.6269, "step": 8571 }, { - "epoch": 0.6486322878438198, - "grad_norm": 2.3207247257232666, - "learning_rate": 5.421576797488994e-06, - "loss": 0.678, + "epoch": 0.6034494896163323, + "grad_norm": 1.9103460311889648, + "learning_rate": 5.0225822421924685e-06, + "loss": 0.6339, "step": 8572 }, { - "epoch": 0.6487079565661534, - "grad_norm": 2.2097394466400146, - "learning_rate": 5.4195103703146445e-06, - "loss": 0.7674, + "epoch": 0.6035198873636044, + "grad_norm": 2.020648956298828, + "learning_rate": 5.021051195788988e-06, + "loss": 0.7199, "step": 8573 }, { - "epoch": 0.648783625288487, - "grad_norm": 3.0667641162872314, - "learning_rate": 5.41744416736214e-06, - "loss": 0.6953, + "epoch": 0.6035902851108764, + "grad_norm": 2.1175076961517334, + "learning_rate": 5.01952025227285e-06, + "loss": 0.5114, "step": 8574 }, { - "epoch": 0.6488592940108207, - "grad_norm": 2.093312978744507, - "learning_rate": 5.4153781887608684e-06, - "loss": 0.7736, + "epoch": 0.6036606828581486, + "grad_norm": 1.6328963041305542, + "learning_rate": 5.017989411723648e-06, + "loss": 0.5377, "step": 8575 }, { - "epoch": 0.6489349627331542, - "grad_norm": 1.9957607984542847, - "learning_rate": 5.41331243464021e-06, - "loss": 0.8296, + "epoch": 0.6037310806054206, + "grad_norm": 1.746058702468872, + "learning_rate": 5.0164586742209705e-06, + "loss": 0.5768, "step": 8576 }, { - "epoch": 0.6490106314554879, - "grad_norm": 1.9288572072982788, - "learning_rate": 5.411246905129525e-06, - "loss": 0.6244, + "epoch": 0.6038014783526927, + "grad_norm": 1.9382121562957764, + "learning_rate": 5.014928039844405e-06, + "loss": 0.7536, "step": 8577 }, { - "epoch": 0.6490863001778215, - "grad_norm": 2.4149534702301025, - "learning_rate": 5.409181600358165e-06, - "loss": 0.7823, + "epoch": 0.6038718760999648, + "grad_norm": 1.689061164855957, + "learning_rate": 5.013397508673526e-06, + "loss": 0.6299, "step": 8578 }, { - "epoch": 0.6491619689001551, - "grad_norm": 2.701127052307129, - "learning_rate": 5.407116520455471e-06, - "loss": 0.661, + "epoch": 0.6039422738472369, + "grad_norm": 2.1944515705108643, + "learning_rate": 5.011867080787912e-06, + "loss": 0.5826, "step": 8579 }, { - "epoch": 0.6492376376224888, - "grad_norm": 2.4020912647247314, - "learning_rate": 5.405051665550759e-06, - "loss": 0.7354, + "epoch": 0.604012671594509, + "grad_norm": 1.8450775146484375, + "learning_rate": 5.010336756267126e-06, + "loss": 0.7151, "step": 8580 }, { - "epoch": 0.6493133063448223, - "grad_norm": 2.4615938663482666, - "learning_rate": 5.4029870357733405e-06, - "loss": 0.6902, + "epoch": 0.604083069341781, + "grad_norm": 1.6049631834030151, + "learning_rate": 5.008806535190737e-06, + "loss": 0.626, "step": 8581 }, { - "epoch": 0.649388975067156, - "grad_norm": 2.4611809253692627, - "learning_rate": 5.400922631252509e-06, - "loss": 0.5852, + "epoch": 0.6041534670890532, + "grad_norm": 1.8794314861297607, + "learning_rate": 5.007276417638297e-06, + "loss": 0.6628, "step": 8582 }, { - "epoch": 0.6494646437894896, - "grad_norm": 2.5221340656280518, - "learning_rate": 5.398858452117539e-06, - "loss": 0.6872, + "epoch": 0.6042238648363253, + "grad_norm": 1.6767003536224365, + "learning_rate": 5.005746403689365e-06, + "loss": 0.6897, "step": 8583 }, { - "epoch": 0.6495403125118232, - "grad_norm": 1.908057451248169, - "learning_rate": 5.396794498497703e-06, - "loss": 0.6117, + "epoch": 0.6042942625835973, + "grad_norm": 1.8321048021316528, + "learning_rate": 5.004216493423479e-06, + "loss": 0.6293, "step": 8584 }, { - "epoch": 0.6496159812341569, - "grad_norm": 1.944278597831726, - "learning_rate": 5.3947307705222515e-06, - "loss": 0.6342, + "epoch": 0.6043646603308694, + "grad_norm": 1.9451040029525757, + "learning_rate": 5.0026866869201875e-06, + "loss": 0.6728, "step": 8585 }, { - "epoch": 0.6496916499564905, - "grad_norm": 1.9990040063858032, - "learning_rate": 5.392667268320418e-06, - "loss": 0.6878, + "epoch": 0.6044350580781415, + "grad_norm": 1.7296987771987915, + "learning_rate": 5.001156984259023e-06, + "loss": 0.6348, "step": 8586 }, { - "epoch": 0.6497673186788241, - "grad_norm": 2.623224973678589, - "learning_rate": 5.390603992021429e-06, - "loss": 0.6674, + "epoch": 0.6045054558254136, + "grad_norm": 1.8339595794677734, + "learning_rate": 4.9996273855195184e-06, + "loss": 0.6729, "step": 8587 }, { - "epoch": 0.6498429874011578, - "grad_norm": 2.0191500186920166, - "learning_rate": 5.38854094175449e-06, - "loss": 0.6673, + "epoch": 0.6045758535726857, + "grad_norm": 1.8075182437896729, + "learning_rate": 4.998097890781195e-06, + "loss": 0.7091, "step": 8588 }, { - "epoch": 0.6499186561234913, - "grad_norm": 2.417546510696411, - "learning_rate": 5.386478117648798e-06, - "loss": 0.5823, + "epoch": 0.6046462513199578, + "grad_norm": 1.6169601678848267, + "learning_rate": 4.99656850012358e-06, + "loss": 0.8479, "step": 8589 }, { - "epoch": 0.649994324845825, - "grad_norm": 1.9548628330230713, - "learning_rate": 5.384415519833536e-06, - "loss": 0.7199, + "epoch": 0.6047166490672299, + "grad_norm": 1.6145334243774414, + "learning_rate": 4.995039213626177e-06, + "loss": 0.6904, "step": 8590 }, { - "epoch": 0.6500699935681586, - "grad_norm": 2.064302444458008, - "learning_rate": 5.382353148437866e-06, - "loss": 0.6522, + "epoch": 0.6047870468145019, + "grad_norm": 1.8232134580612183, + "learning_rate": 4.993510031368506e-06, + "loss": 0.6731, "step": 8591 }, { - "epoch": 0.6501456622904922, - "grad_norm": 2.792672872543335, - "learning_rate": 5.380291003590946e-06, - "loss": 0.5625, + "epoch": 0.604857444561774, + "grad_norm": 1.6872954368591309, + "learning_rate": 4.991980953430064e-06, + "loss": 0.6136, "step": 8592 }, { - "epoch": 0.6502213310128259, - "grad_norm": 2.0606002807617188, - "learning_rate": 5.378229085421909e-06, - "loss": 0.7184, + "epoch": 0.6049278423090461, + "grad_norm": 2.0798399448394775, + "learning_rate": 4.9904519798903505e-06, + "loss": 0.6573, "step": 8593 }, { - "epoch": 0.6502969997351594, - "grad_norm": 2.4181129932403564, - "learning_rate": 5.376167394059879e-06, - "loss": 0.7157, + "epoch": 0.6049982400563182, + "grad_norm": 1.7522470951080322, + "learning_rate": 4.988923110828857e-06, + "loss": 0.6002, "step": 8594 }, { - "epoch": 0.6503726684574931, - "grad_norm": 2.101733922958374, - "learning_rate": 5.374105929633969e-06, - "loss": 0.6396, + "epoch": 0.6050686378035903, + "grad_norm": 1.7747365236282349, + "learning_rate": 4.987394346325076e-06, + "loss": 0.7461, "step": 8595 }, { - "epoch": 0.6504483371798268, - "grad_norm": 2.896683931350708, - "learning_rate": 5.372044692273275e-06, - "loss": 0.749, + "epoch": 0.6051390355508623, + "grad_norm": 2.0236268043518066, + "learning_rate": 4.985865686458483e-06, + "loss": 0.8032, "step": 8596 }, { - "epoch": 0.6505240059021603, - "grad_norm": 2.5679473876953125, - "learning_rate": 5.369983682106875e-06, - "loss": 0.6503, + "epoch": 0.6052094332981345, + "grad_norm": 1.721683144569397, + "learning_rate": 4.984337131308558e-06, + "loss": 0.6456, "step": 8597 }, { - "epoch": 0.650599674624494, - "grad_norm": 2.505889892578125, - "learning_rate": 5.36792289926384e-06, - "loss": 0.7238, + "epoch": 0.6052798310454065, + "grad_norm": 1.8686591386795044, + "learning_rate": 4.9828086809547706e-06, + "loss": 0.7465, "step": 8598 }, { - "epoch": 0.6506753433468276, - "grad_norm": 2.405747413635254, - "learning_rate": 5.3658623438732165e-06, - "loss": 0.7156, + "epoch": 0.6053502287926786, + "grad_norm": 1.7068930864334106, + "learning_rate": 4.981280335476588e-06, + "loss": 0.6524, "step": 8599 }, { - "epoch": 0.6507510120691612, - "grad_norm": 2.227442741394043, - "learning_rate": 5.363802016064049e-06, - "loss": 0.5993, + "epoch": 0.6054206265399508, + "grad_norm": 1.6364012956619263, + "learning_rate": 4.979752094953465e-06, + "loss": 0.6642, "step": 8600 }, { - "epoch": 0.6508266807914949, - "grad_norm": 2.0744035243988037, - "learning_rate": 5.361741915965362e-06, - "loss": 0.7058, + "epoch": 0.6054910242872228, + "grad_norm": 1.750077486038208, + "learning_rate": 4.978223959464866e-06, + "loss": 0.6503, "step": 8601 }, { - "epoch": 0.6509023495138284, - "grad_norm": 2.3363168239593506, - "learning_rate": 5.359682043706162e-06, - "loss": 0.5921, + "epoch": 0.6055614220344949, + "grad_norm": 1.8953856229782104, + "learning_rate": 4.97669592909023e-06, + "loss": 0.6158, "step": 8602 }, { - "epoch": 0.6509780182361621, - "grad_norm": 2.573744058609009, - "learning_rate": 5.357622399415448e-06, - "loss": 0.7344, + "epoch": 0.605631819781767, + "grad_norm": 1.84130859375, + "learning_rate": 4.975168003909008e-06, + "loss": 0.6265, "step": 8603 }, { - "epoch": 0.6510536869584957, - "grad_norm": 2.494749069213867, - "learning_rate": 5.355562983222197e-06, - "loss": 0.7317, + "epoch": 0.6057022175290391, + "grad_norm": 2.011159896850586, + "learning_rate": 4.973640184000633e-06, + "loss": 0.6502, "step": 8604 }, { - "epoch": 0.6511293556808293, - "grad_norm": 2.3259880542755127, - "learning_rate": 5.353503795255382e-06, - "loss": 0.695, + "epoch": 0.6057726152763112, + "grad_norm": 1.7962387800216675, + "learning_rate": 4.972112469444542e-06, + "loss": 0.6226, "step": 8605 }, { - "epoch": 0.651205024403163, - "grad_norm": 2.2108333110809326, - "learning_rate": 5.3514448356439545e-06, - "loss": 0.5293, + "epoch": 0.6058430130235832, + "grad_norm": 1.620439887046814, + "learning_rate": 4.970584860320157e-06, + "loss": 0.5859, "step": 8606 }, { - "epoch": 0.6512806931254965, - "grad_norm": 2.3737170696258545, - "learning_rate": 5.3493861045168514e-06, - "loss": 0.5965, + "epoch": 0.6059134107708554, + "grad_norm": 2.069469451904297, + "learning_rate": 4.969057356706909e-06, + "loss": 0.6181, "step": 8607 }, { - "epoch": 0.6513563618478302, - "grad_norm": 2.9041547775268555, - "learning_rate": 5.347327602002999e-06, - "loss": 0.7724, + "epoch": 0.6059838085181274, + "grad_norm": 2.1689705848693848, + "learning_rate": 4.967529958684201e-06, + "loss": 0.7674, "step": 8608 }, { - "epoch": 0.6514320305701639, - "grad_norm": 2.1703732013702393, - "learning_rate": 5.345269328231303e-06, - "loss": 0.5928, + "epoch": 0.6060542062653995, + "grad_norm": 1.623953938484192, + "learning_rate": 4.966002666331456e-06, + "loss": 0.6677, "step": 8609 }, { - "epoch": 0.6515076992924974, - "grad_norm": 2.191357374191284, - "learning_rate": 5.343211283330663e-06, - "loss": 0.8274, + "epoch": 0.6061246040126715, + "grad_norm": 1.667476773262024, + "learning_rate": 4.964475479728072e-06, + "loss": 0.6912, "step": 8610 }, { - "epoch": 0.6515833680148311, - "grad_norm": 2.2903363704681396, - "learning_rate": 5.341153467429962e-06, - "loss": 0.6822, + "epoch": 0.6061950017599437, + "grad_norm": 2.1617777347564697, + "learning_rate": 4.962948398953455e-06, + "loss": 0.7104, "step": 8611 }, { - "epoch": 0.6516590367371647, - "grad_norm": 2.7158641815185547, - "learning_rate": 5.339095880658066e-06, - "loss": 0.7942, + "epoch": 0.6062653995072158, + "grad_norm": 2.39357590675354, + "learning_rate": 4.96142142408699e-06, + "loss": 0.639, "step": 8612 }, { - "epoch": 0.6517347054594983, - "grad_norm": 1.8598185777664185, - "learning_rate": 5.3370385231438245e-06, - "loss": 0.7329, + "epoch": 0.6063357972544878, + "grad_norm": 1.6362249851226807, + "learning_rate": 4.959894555208076e-06, + "loss": 0.6794, "step": 8613 }, { - "epoch": 0.651810374181832, - "grad_norm": 2.0901763439178467, - "learning_rate": 5.334981395016076e-06, - "loss": 0.7677, + "epoch": 0.60640619500176, + "grad_norm": 1.9354060888290405, + "learning_rate": 4.95836779239609e-06, + "loss": 0.6587, "step": 8614 }, { - "epoch": 0.6518860429041655, - "grad_norm": 2.2997934818267822, - "learning_rate": 5.33292449640365e-06, - "loss": 0.6171, + "epoch": 0.606476592749032, + "grad_norm": 1.7982354164123535, + "learning_rate": 4.956841135730412e-06, + "loss": 0.72, "step": 8615 }, { - "epoch": 0.6519617116264992, - "grad_norm": 2.0025293827056885, - "learning_rate": 5.330867827435353e-06, - "loss": 0.713, + "epoch": 0.6065469904963041, + "grad_norm": 2.1061108112335205, + "learning_rate": 4.955314585290413e-06, + "loss": 0.7104, "step": 8616 }, { - "epoch": 0.6520373803488329, - "grad_norm": 2.187777042388916, - "learning_rate": 5.328811388239981e-06, - "loss": 0.5571, + "epoch": 0.6066173882435762, + "grad_norm": 1.7167054414749146, + "learning_rate": 4.953788141155462e-06, + "loss": 0.6446, "step": 8617 }, { - "epoch": 0.6521130490711664, - "grad_norm": 2.3849644660949707, - "learning_rate": 5.326755178946312e-06, - "loss": 0.6938, + "epoch": 0.6066877859908483, + "grad_norm": 1.6067792177200317, + "learning_rate": 4.952261803404916e-06, + "loss": 0.6787, "step": 8618 }, { - "epoch": 0.6521887177935001, - "grad_norm": 2.3532207012176514, - "learning_rate": 5.324699199683113e-06, - "loss": 0.7331, + "epoch": 0.6067581837381204, + "grad_norm": 1.7047390937805176, + "learning_rate": 4.9507355721181354e-06, + "loss": 0.7202, "step": 8619 }, { - "epoch": 0.6522643865158336, - "grad_norm": 2.0683131217956543, - "learning_rate": 5.3226434505791405e-06, - "loss": 0.6739, + "epoch": 0.6068285814853924, + "grad_norm": 1.7207175493240356, + "learning_rate": 4.949209447374467e-06, + "loss": 0.6641, "step": 8620 }, { - "epoch": 0.6523400552381673, - "grad_norm": 1.9506670236587524, - "learning_rate": 5.320587931763127e-06, - "loss": 0.6907, + "epoch": 0.6068989792326646, + "grad_norm": 2.1717405319213867, + "learning_rate": 4.94768342925326e-06, + "loss": 0.7327, "step": 8621 }, { - "epoch": 0.652415723960501, - "grad_norm": 1.8213778734207153, - "learning_rate": 5.3185326433638e-06, - "loss": 0.7844, + "epoch": 0.6069693769799367, + "grad_norm": 2.1958510875701904, + "learning_rate": 4.946157517833847e-06, + "loss": 0.6688, "step": 8622 }, { - "epoch": 0.6524913926828345, - "grad_norm": 2.387977361679077, - "learning_rate": 5.316477585509865e-06, - "loss": 0.6507, + "epoch": 0.6070397747272087, + "grad_norm": 1.7416307926177979, + "learning_rate": 4.9446317131955666e-06, + "loss": 0.6456, "step": 8623 }, { - "epoch": 0.6525670614051682, - "grad_norm": 2.132040500640869, - "learning_rate": 5.3144227583300185e-06, - "loss": 0.6664, + "epoch": 0.6071101724744808, + "grad_norm": 1.721818208694458, + "learning_rate": 4.943106015417743e-06, + "loss": 0.581, "step": 8624 }, { - "epoch": 0.6526427301275018, - "grad_norm": 2.443291187286377, - "learning_rate": 5.312368161952933e-06, - "loss": 0.5767, + "epoch": 0.6071805702217529, + "grad_norm": 1.853506326675415, + "learning_rate": 4.9415804245797e-06, + "loss": 0.7294, "step": 8625 }, { - "epoch": 0.6527183988498354, - "grad_norm": 2.27197265625, - "learning_rate": 5.310313796507288e-06, - "loss": 0.6735, + "epoch": 0.607250967969025, + "grad_norm": 1.691701054573059, + "learning_rate": 4.9400549407607575e-06, + "loss": 0.6592, "step": 8626 }, { - "epoch": 0.6527940675721691, - "grad_norm": 2.6916403770446777, - "learning_rate": 5.308259662121724e-06, - "loss": 0.6195, + "epoch": 0.607321365716297, + "grad_norm": 1.6401640176773071, + "learning_rate": 4.9385295640402225e-06, + "loss": 0.6847, "step": 8627 }, { - "epoch": 0.6528697362945026, - "grad_norm": 2.7389891147613525, - "learning_rate": 5.306205758924883e-06, - "loss": 0.7494, + "epoch": 0.6073917634635692, + "grad_norm": 1.9085845947265625, + "learning_rate": 4.937004294497404e-06, + "loss": 0.6931, "step": 8628 }, { - "epoch": 0.6529454050168363, - "grad_norm": 2.406222105026245, - "learning_rate": 5.304152087045383e-06, - "loss": 0.6614, + "epoch": 0.6074621612108413, + "grad_norm": 3.475663185119629, + "learning_rate": 4.935479132211597e-06, + "loss": 0.741, "step": 8629 }, { - "epoch": 0.65302107373917, - "grad_norm": 2.205073595046997, - "learning_rate": 5.3020986466118305e-06, - "loss": 0.7074, + "epoch": 0.6075325589581133, + "grad_norm": 2.1198320388793945, + "learning_rate": 4.933954077262104e-06, + "loss": 0.6784, "step": 8630 }, { - "epoch": 0.6530967424615035, - "grad_norm": 2.350299119949341, - "learning_rate": 5.3000454377528256e-06, - "loss": 0.8051, + "epoch": 0.6076029567053854, + "grad_norm": 1.8417434692382812, + "learning_rate": 4.932429129728207e-06, + "loss": 0.5745, "step": 8631 }, { - "epoch": 0.6531724111838372, - "grad_norm": 2.4323620796203613, - "learning_rate": 5.297992460596941e-06, - "loss": 0.6793, + "epoch": 0.6076733544526575, + "grad_norm": 1.7354316711425781, + "learning_rate": 4.930904289689195e-06, + "loss": 0.672, "step": 8632 }, { - "epoch": 0.6532480799061707, - "grad_norm": 2.1624834537506104, - "learning_rate": 5.295939715272742e-06, - "loss": 0.7988, + "epoch": 0.6077437521999296, + "grad_norm": 1.819901943206787, + "learning_rate": 4.929379557224338e-06, + "loss": 0.675, "step": 8633 }, { - "epoch": 0.6533237486285044, - "grad_norm": 2.935800075531006, - "learning_rate": 5.293887201908778e-06, - "loss": 0.7486, + "epoch": 0.6078141499472017, + "grad_norm": 2.0929665565490723, + "learning_rate": 4.927854932412919e-06, + "loss": 0.7386, "step": 8634 }, { - "epoch": 0.6533994173508381, - "grad_norm": 2.3798375129699707, - "learning_rate": 5.291834920633583e-06, - "loss": 0.5782, + "epoch": 0.6078845476944738, + "grad_norm": 1.7057753801345825, + "learning_rate": 4.926330415334193e-06, + "loss": 0.648, "step": 8635 }, { - "epoch": 0.6534750860731716, - "grad_norm": 2.4022819995880127, - "learning_rate": 5.289782871575682e-06, - "loss": 0.666, + "epoch": 0.6079549454417459, + "grad_norm": 1.669165849685669, + "learning_rate": 4.92480600606743e-06, + "loss": 0.6149, "step": 8636 }, { - "epoch": 0.6535507547955053, - "grad_norm": 1.8358285427093506, - "learning_rate": 5.287731054863575e-06, - "loss": 0.7288, + "epoch": 0.6080253431890179, + "grad_norm": 1.5323164463043213, + "learning_rate": 4.9232817046918816e-06, + "loss": 0.6277, "step": 8637 }, { - "epoch": 0.653626423517839, - "grad_norm": 2.1910176277160645, - "learning_rate": 5.28567947062576e-06, - "loss": 0.8125, + "epoch": 0.60809574093629, + "grad_norm": 2.0705578327178955, + "learning_rate": 4.921757511286799e-06, + "loss": 0.7206, "step": 8638 }, { - "epoch": 0.6537020922401725, - "grad_norm": 2.170234203338623, - "learning_rate": 5.283628118990708e-06, - "loss": 0.6749, + "epoch": 0.6081661386835622, + "grad_norm": 1.495934009552002, + "learning_rate": 4.920233425931422e-06, + "loss": 0.5382, "step": 8639 }, { - "epoch": 0.6537777609625062, - "grad_norm": 2.4804930686950684, - "learning_rate": 5.281577000086881e-06, - "loss": 0.6696, + "epoch": 0.6082365364308342, + "grad_norm": 1.622482419013977, + "learning_rate": 4.918709448704998e-06, + "loss": 0.7209, "step": 8640 }, { - "epoch": 0.6538534296848397, - "grad_norm": 2.3019261360168457, - "learning_rate": 5.279526114042731e-06, - "loss": 0.5192, + "epoch": 0.6083069341781063, + "grad_norm": 1.783653974533081, + "learning_rate": 4.91718557968675e-06, + "loss": 0.6364, "step": 8641 }, { - "epoch": 0.6539290984071734, - "grad_norm": 2.1788456439971924, - "learning_rate": 5.27747546098669e-06, - "loss": 0.7353, + "epoch": 0.6083773319253784, + "grad_norm": 1.5827827453613281, + "learning_rate": 4.9156618189559135e-06, + "loss": 0.6842, "step": 8642 }, { - "epoch": 0.6540047671295071, - "grad_norm": 2.3936922550201416, - "learning_rate": 5.2754250410471755e-06, - "loss": 0.5753, + "epoch": 0.6084477296726505, + "grad_norm": 1.6192960739135742, + "learning_rate": 4.914138166591705e-06, + "loss": 0.654, "step": 8643 }, { - "epoch": 0.6540804358518406, - "grad_norm": 2.109896421432495, - "learning_rate": 5.2733748543525925e-06, - "loss": 0.572, + "epoch": 0.6085181274199226, + "grad_norm": 1.6428182125091553, + "learning_rate": 4.912614622673343e-06, + "loss": 0.6932, "step": 8644 }, { - "epoch": 0.6541561045741743, - "grad_norm": 2.3399693965911865, - "learning_rate": 5.271324901031326e-06, - "loss": 0.6521, + "epoch": 0.6085885251671946, + "grad_norm": 1.7241443395614624, + "learning_rate": 4.911091187280035e-06, + "loss": 0.5787, "step": 8645 }, { - "epoch": 0.6542317732965078, - "grad_norm": 2.2131054401397705, - "learning_rate": 5.2692751812117576e-06, - "loss": 0.6941, + "epoch": 0.6086589229144668, + "grad_norm": 1.3127484321594238, + "learning_rate": 4.909567860490992e-06, + "loss": 0.7886, "step": 8646 }, { - "epoch": 0.6543074420188415, - "grad_norm": 2.646402597427368, - "learning_rate": 5.267225695022244e-06, - "loss": 0.802, + "epoch": 0.6087293206617388, + "grad_norm": 2.382514715194702, + "learning_rate": 4.908044642385405e-06, + "loss": 0.7425, "step": 8647 }, { - "epoch": 0.6543831107411752, - "grad_norm": 1.9599591493606567, - "learning_rate": 5.26517644259113e-06, - "loss": 0.8231, + "epoch": 0.6087997184090109, + "grad_norm": 1.8432049751281738, + "learning_rate": 4.906521533042474e-06, + "loss": 0.6922, "step": 8648 }, { - "epoch": 0.6544587794635087, - "grad_norm": 2.2101340293884277, - "learning_rate": 5.263127424046747e-06, - "loss": 0.7479, + "epoch": 0.608870116156283, + "grad_norm": 1.8328766822814941, + "learning_rate": 4.9049985325413835e-06, + "loss": 0.6204, "step": 8649 }, { - "epoch": 0.6545344481858424, - "grad_norm": 2.379575252532959, - "learning_rate": 5.26107863951741e-06, - "loss": 0.6335, + "epoch": 0.6089405139035551, + "grad_norm": 1.701912522315979, + "learning_rate": 4.903475640961318e-06, + "loss": 0.7689, "step": 8650 }, { - "epoch": 0.654610116908176, - "grad_norm": 2.6849305629730225, - "learning_rate": 5.259030089131421e-06, - "loss": 0.7674, + "epoch": 0.6090109116508272, + "grad_norm": 1.9696065187454224, + "learning_rate": 4.90195285838145e-06, + "loss": 0.6389, "step": 8651 }, { - "epoch": 0.6546857856305096, - "grad_norm": 2.3194165229797363, - "learning_rate": 5.256981773017071e-06, - "loss": 0.7081, + "epoch": 0.6090813093980992, + "grad_norm": 1.622334599494934, + "learning_rate": 4.900430184880958e-06, + "loss": 0.6418, "step": 8652 }, { - "epoch": 0.6547614543528433, - "grad_norm": 5.000668048858643, - "learning_rate": 5.254933691302628e-06, - "loss": 0.737, + "epoch": 0.6091517071453714, + "grad_norm": 1.9507313966751099, + "learning_rate": 4.898907620538997e-06, + "loss": 0.6345, "step": 8653 }, { - "epoch": 0.6548371230751768, - "grad_norm": 2.265462875366211, - "learning_rate": 5.252885844116347e-06, - "loss": 0.5589, + "epoch": 0.6092221048926434, + "grad_norm": 1.728570580482483, + "learning_rate": 4.897385165434734e-06, + "loss": 0.6532, "step": 8654 }, { - "epoch": 0.6549127917975105, - "grad_norm": 2.609497308731079, - "learning_rate": 5.250838231586477e-06, - "loss": 0.7, + "epoch": 0.6092925026399155, + "grad_norm": 1.7806295156478882, + "learning_rate": 4.8958628196473194e-06, + "loss": 0.6795, "step": 8655 }, { - "epoch": 0.6549884605198442, - "grad_norm": 2.6671085357666016, - "learning_rate": 5.248790853841241e-06, - "loss": 0.6093, + "epoch": 0.6093629003871877, + "grad_norm": 1.9620670080184937, + "learning_rate": 4.894340583255904e-06, + "loss": 0.6742, "step": 8656 }, { - "epoch": 0.6550641292421777, - "grad_norm": 2.3832755088806152, - "learning_rate": 5.2467437110088565e-06, - "loss": 0.7668, + "epoch": 0.6094332981344597, + "grad_norm": 1.563077449798584, + "learning_rate": 4.892818456339626e-06, + "loss": 0.7534, "step": 8657 }, { - "epoch": 0.6551397979645114, - "grad_norm": 2.164400815963745, - "learning_rate": 5.24469680321752e-06, - "loss": 0.6777, + "epoch": 0.6095036958817318, + "grad_norm": 2.08535099029541, + "learning_rate": 4.891296438977629e-06, + "loss": 0.6165, "step": 8658 }, { - "epoch": 0.6552154666868449, - "grad_norm": 1.7403801679611206, - "learning_rate": 5.242650130595418e-06, - "loss": 0.6456, + "epoch": 0.6095740936290038, + "grad_norm": 1.7356245517730713, + "learning_rate": 4.8897745312490355e-06, + "loss": 0.6216, "step": 8659 }, { - "epoch": 0.6552911354091786, - "grad_norm": 2.5465121269226074, - "learning_rate": 5.240603693270712e-06, - "loss": 0.7281, + "epoch": 0.609644491376276, + "grad_norm": 1.697792410850525, + "learning_rate": 4.888252733232979e-06, + "loss": 0.5639, "step": 8660 }, { - "epoch": 0.6553668041315123, - "grad_norm": 2.5230278968811035, - "learning_rate": 5.238557491371566e-06, - "loss": 0.7588, + "epoch": 0.6097148891235481, + "grad_norm": 1.6127930879592896, + "learning_rate": 4.886731045008573e-06, + "loss": 0.6282, "step": 8661 }, { - "epoch": 0.6554424728538458, - "grad_norm": 1.767684817314148, - "learning_rate": 5.236511525026118e-06, - "loss": 0.824, + "epoch": 0.6097852868708201, + "grad_norm": 1.7413667440414429, + "learning_rate": 4.885209466654936e-06, + "loss": 0.8092, "step": 8662 }, { - "epoch": 0.6555181415761795, - "grad_norm": 2.1746935844421387, - "learning_rate": 5.2344657943624876e-06, - "loss": 0.712, + "epoch": 0.6098556846180923, + "grad_norm": 1.8518530130386353, + "learning_rate": 4.88368799825117e-06, + "loss": 0.7137, "step": 8663 }, { - "epoch": 0.6555938102985132, - "grad_norm": 2.5205862522125244, - "learning_rate": 5.232420299508789e-06, - "loss": 0.7244, + "epoch": 0.6099260823653643, + "grad_norm": 2.069244146347046, + "learning_rate": 4.8821666398763854e-06, + "loss": 0.7668, "step": 8664 }, { - "epoch": 0.6556694790208467, - "grad_norm": 2.556107759475708, - "learning_rate": 5.230375040593117e-06, - "loss": 0.7721, + "epoch": 0.6099964801126364, + "grad_norm": 2.053692102432251, + "learning_rate": 4.8806453916096735e-06, + "loss": 0.6498, "step": 8665 }, { - "epoch": 0.6557451477431804, - "grad_norm": 2.134599208831787, - "learning_rate": 5.228330017743552e-06, - "loss": 0.7349, + "epoch": 0.6100668778599084, + "grad_norm": 1.6638824939727783, + "learning_rate": 4.8791242535301285e-06, + "loss": 0.5847, "step": 8666 }, { - "epoch": 0.6558208164655139, - "grad_norm": 2.397552251815796, - "learning_rate": 5.226285231088161e-06, - "loss": 0.6472, + "epoch": 0.6101372756071806, + "grad_norm": 2.1653056144714355, + "learning_rate": 4.877603225716833e-06, + "loss": 0.6058, "step": 8667 }, { - "epoch": 0.6558964851878476, - "grad_norm": 2.927499532699585, - "learning_rate": 5.224240680754993e-06, - "loss": 0.5487, + "epoch": 0.6102076733544527, + "grad_norm": 1.6418386697769165, + "learning_rate": 4.87608230824887e-06, + "loss": 0.7365, "step": 8668 }, { - "epoch": 0.6559721539101813, - "grad_norm": 1.7893126010894775, - "learning_rate": 5.222196366872091e-06, - "loss": 0.6124, + "epoch": 0.6102780711017247, + "grad_norm": 1.8699485063552856, + "learning_rate": 4.874561501205307e-06, + "loss": 0.7478, "step": 8669 }, { - "epoch": 0.6560478226325148, - "grad_norm": 1.9689534902572632, - "learning_rate": 5.220152289567468e-06, - "loss": 0.7077, + "epoch": 0.6103484688489969, + "grad_norm": 1.6281390190124512, + "learning_rate": 4.873040804665221e-06, + "loss": 0.5714, "step": 8670 }, { - "epoch": 0.6561234913548485, - "grad_norm": 2.465264081954956, - "learning_rate": 5.218108448969129e-06, - "loss": 0.6206, + "epoch": 0.6104188665962689, + "grad_norm": 1.4529949426651, + "learning_rate": 4.871520218707667e-06, + "loss": 0.6216, "step": 8671 }, { - "epoch": 0.656199160077182, - "grad_norm": 2.3276827335357666, - "learning_rate": 5.216064845205075e-06, - "loss": 0.8604, + "epoch": 0.610489264343541, + "grad_norm": 1.5686602592468262, + "learning_rate": 4.869999743411707e-06, + "loss": 0.6653, "step": 8672 }, { - "epoch": 0.6562748287995157, - "grad_norm": 3.2658963203430176, - "learning_rate": 5.214021478403283e-06, - "loss": 0.6325, + "epoch": 0.6105596620908131, + "grad_norm": 2.0689964294433594, + "learning_rate": 4.868479378856387e-06, + "loss": 0.5769, "step": 8673 }, { - "epoch": 0.6563504975218494, - "grad_norm": 2.2174007892608643, - "learning_rate": 5.211978348691708e-06, - "loss": 0.7559, + "epoch": 0.6106300598380852, + "grad_norm": 2.269317626953125, + "learning_rate": 4.866959125120755e-06, + "loss": 0.7247, "step": 8674 }, { - "epoch": 0.6564261662441829, - "grad_norm": 2.120908260345459, - "learning_rate": 5.209935456198301e-06, - "loss": 0.7938, + "epoch": 0.6107004575853573, + "grad_norm": 1.6893086433410645, + "learning_rate": 4.86543898228385e-06, + "loss": 0.6441, "step": 8675 }, { - "epoch": 0.6565018349665166, - "grad_norm": 2.1590099334716797, - "learning_rate": 5.207892801050993e-06, - "loss": 0.6541, + "epoch": 0.6107708553326293, + "grad_norm": 1.7410163879394531, + "learning_rate": 4.863918950424706e-06, + "loss": 0.6169, "step": 8676 }, { - "epoch": 0.6565775036888503, - "grad_norm": 2.1857635974884033, - "learning_rate": 5.205850383377704e-06, - "loss": 0.6656, + "epoch": 0.6108412530799014, + "grad_norm": 1.7204935550689697, + "learning_rate": 4.862399029622351e-06, + "loss": 0.6326, "step": 8677 }, { - "epoch": 0.6566531724111838, - "grad_norm": 2.734827995300293, - "learning_rate": 5.2038082033063365e-06, - "loss": 0.7115, + "epoch": 0.6109116508271736, + "grad_norm": 2.174363374710083, + "learning_rate": 4.860879219955805e-06, + "loss": 0.6507, "step": 8678 }, { - "epoch": 0.6567288411335175, - "grad_norm": 1.769230604171753, - "learning_rate": 5.201766260964777e-06, - "loss": 0.585, + "epoch": 0.6109820485744456, + "grad_norm": 2.1079442501068115, + "learning_rate": 4.859359521504087e-06, + "loss": 0.6801, "step": 8679 }, { - "epoch": 0.656804509855851, - "grad_norm": 1.630325198173523, - "learning_rate": 5.199724556480902e-06, - "loss": 0.7843, + "epoch": 0.6110524463217177, + "grad_norm": 1.7487503290176392, + "learning_rate": 4.857839934346204e-06, + "loss": 0.6493, "step": 8680 }, { - "epoch": 0.6568801785781847, - "grad_norm": 2.335381269454956, - "learning_rate": 5.19768308998256e-06, - "loss": 0.5799, + "epoch": 0.6111228440689898, + "grad_norm": 1.476908802986145, + "learning_rate": 4.856320458561164e-06, + "loss": 0.6192, "step": 8681 }, { - "epoch": 0.6569558473005184, - "grad_norm": 2.884042739868164, - "learning_rate": 5.1956418615976054e-06, - "loss": 0.6448, + "epoch": 0.6111932418162619, + "grad_norm": 1.7957619428634644, + "learning_rate": 4.854801094227965e-06, + "loss": 0.5708, "step": 8682 }, { - "epoch": 0.6570315160228519, - "grad_norm": 2.7924041748046875, - "learning_rate": 5.193600871453866e-06, - "loss": 0.8123, + "epoch": 0.6112636395635339, + "grad_norm": 1.8126438856124878, + "learning_rate": 4.853281841425601e-06, + "loss": 0.6581, "step": 8683 }, { - "epoch": 0.6571071847451856, - "grad_norm": 2.314347267150879, - "learning_rate": 5.191560119679147e-06, - "loss": 0.7676, + "epoch": 0.611334037310806, + "grad_norm": 1.8898149728775024, + "learning_rate": 4.851762700233057e-06, + "loss": 0.6331, "step": 8684 }, { - "epoch": 0.6571828534675191, - "grad_norm": 2.933027744293213, - "learning_rate": 5.189519606401252e-06, - "loss": 0.7057, + "epoch": 0.6114044350580782, + "grad_norm": 1.6255416870117188, + "learning_rate": 4.8502436707293165e-06, + "loss": 0.6618, "step": 8685 }, { - "epoch": 0.6572585221898528, - "grad_norm": 2.0092270374298096, - "learning_rate": 5.18747933174796e-06, - "loss": 0.7644, + "epoch": 0.6114748328053502, + "grad_norm": 1.7979999780654907, + "learning_rate": 4.848724752993353e-06, + "loss": 0.6206, "step": 8686 }, { - "epoch": 0.6573341909121865, - "grad_norm": 3.9684855937957764, - "learning_rate": 5.18543929584705e-06, - "loss": 0.6298, + "epoch": 0.6115452305526223, + "grad_norm": 1.6913442611694336, + "learning_rate": 4.847205947104139e-06, + "loss": 0.5264, "step": 8687 }, { - "epoch": 0.65740985963452, - "grad_norm": 3.053493022918701, - "learning_rate": 5.183399498826266e-06, - "loss": 0.7078, + "epoch": 0.6116156282998944, + "grad_norm": 1.826324224472046, + "learning_rate": 4.8456872531406365e-06, + "loss": 0.7159, "step": 8688 }, { - "epoch": 0.6574855283568537, - "grad_norm": 2.5908734798431396, - "learning_rate": 5.18135994081335e-06, - "loss": 0.6395, + "epoch": 0.6116860260471665, + "grad_norm": 1.973144769668579, + "learning_rate": 4.844168671181807e-06, + "loss": 0.7312, "step": 8689 }, { - "epoch": 0.6575611970791874, - "grad_norm": 2.060353994369507, - "learning_rate": 5.179320621936025e-06, - "loss": 0.5517, + "epoch": 0.6117564237944386, + "grad_norm": 1.4526677131652832, + "learning_rate": 4.8426502013065975e-06, + "loss": 0.7199, "step": 8690 }, { - "epoch": 0.6576368658015209, - "grad_norm": 2.310406446456909, - "learning_rate": 5.177281542322e-06, - "loss": 0.7194, + "epoch": 0.6118268215417106, + "grad_norm": 1.944908857345581, + "learning_rate": 4.841131843593961e-06, + "loss": 0.7364, "step": 8691 }, { - "epoch": 0.6577125345238546, - "grad_norm": 2.261384963989258, - "learning_rate": 5.175242702098969e-06, - "loss": 0.5707, + "epoch": 0.6118972192889828, + "grad_norm": 1.7158997058868408, + "learning_rate": 4.839613598122832e-06, + "loss": 0.6059, "step": 8692 }, { - "epoch": 0.6577882032461881, - "grad_norm": 2.6075448989868164, - "learning_rate": 5.173204101394612e-06, - "loss": 0.6743, + "epoch": 0.6119676170362548, + "grad_norm": 1.6234071254730225, + "learning_rate": 4.838095464972152e-06, + "loss": 0.8222, "step": 8693 }, { - "epoch": 0.6578638719685218, - "grad_norm": 2.4900269508361816, - "learning_rate": 5.1711657403365935e-06, - "loss": 0.7795, + "epoch": 0.6120380147835269, + "grad_norm": 1.7620798349380493, + "learning_rate": 4.836577444220845e-06, + "loss": 0.7459, "step": 8694 }, { - "epoch": 0.6579395406908555, - "grad_norm": 3.0863840579986572, - "learning_rate": 5.169127619052558e-06, - "loss": 0.6771, + "epoch": 0.6121084125307991, + "grad_norm": 1.5444415807724, + "learning_rate": 4.8350595359478374e-06, + "loss": 0.5691, "step": 8695 }, { - "epoch": 0.658015209413189, - "grad_norm": 2.687215566635132, - "learning_rate": 5.167089737670137e-06, - "loss": 0.7066, + "epoch": 0.6121788102780711, + "grad_norm": 1.7618365287780762, + "learning_rate": 4.833541740232041e-06, + "loss": 0.7172, "step": 8696 }, { - "epoch": 0.6580908781355227, - "grad_norm": 2.019657850265503, - "learning_rate": 5.16505209631696e-06, - "loss": 0.7545, + "epoch": 0.6122492080253432, + "grad_norm": 1.566286325454712, + "learning_rate": 4.832024057152378e-06, + "loss": 0.6387, "step": 8697 }, { - "epoch": 0.6581665468578564, - "grad_norm": 2.119903564453125, - "learning_rate": 5.163014695120623e-06, - "loss": 0.8052, + "epoch": 0.6123196057726152, + "grad_norm": 1.959796667098999, + "learning_rate": 4.830506486787742e-06, + "loss": 0.6381, "step": 8698 }, { - "epoch": 0.6582422155801899, - "grad_norm": 8.88284683227539, - "learning_rate": 5.160977534208716e-06, - "loss": 0.8043, + "epoch": 0.6123900035198874, + "grad_norm": 1.8452783823013306, + "learning_rate": 4.828989029217042e-06, + "loss": 0.7665, "step": 8699 }, { - "epoch": 0.6583178843025236, - "grad_norm": 1.7967544794082642, - "learning_rate": 5.158940613708812e-06, - "loss": 0.7364, + "epoch": 0.6124604012671595, + "grad_norm": 1.5699717998504639, + "learning_rate": 4.827471684519167e-06, + "loss": 0.7198, "step": 8700 }, { - "epoch": 0.6583935530248571, - "grad_norm": 2.3100337982177734, - "learning_rate": 5.15690393374847e-06, - "loss": 0.6598, + "epoch": 0.6125307990144315, + "grad_norm": 1.4632755517959595, + "learning_rate": 4.825954452773008e-06, + "loss": 0.6875, "step": 8701 }, { - "epoch": 0.6584692217471908, - "grad_norm": 2.662794589996338, - "learning_rate": 5.154867494455234e-06, - "loss": 0.5787, + "epoch": 0.6126011967617037, + "grad_norm": 1.863256573677063, + "learning_rate": 4.824437334057443e-06, + "loss": 0.6288, "step": 8702 }, { - "epoch": 0.6585448904695245, - "grad_norm": 2.560645341873169, - "learning_rate": 5.152831295956632e-06, - "loss": 0.6013, + "epoch": 0.6126715945089757, + "grad_norm": 1.6889747381210327, + "learning_rate": 4.822920328451355e-06, + "loss": 0.5735, "step": 8703 }, { - "epoch": 0.658620559191858, - "grad_norm": 2.0240793228149414, - "learning_rate": 5.150795338380178e-06, - "loss": 0.7068, + "epoch": 0.6127419922562478, + "grad_norm": 1.6585556268692017, + "learning_rate": 4.821403436033608e-06, + "loss": 0.7663, "step": 8704 }, { - "epoch": 0.6586962279141917, - "grad_norm": 2.2594637870788574, - "learning_rate": 5.1487596218533735e-06, - "loss": 0.656, + "epoch": 0.6128123900035198, + "grad_norm": 1.7450321912765503, + "learning_rate": 4.819886656883071e-06, + "loss": 0.6694, "step": 8705 }, { - "epoch": 0.6587718966365252, - "grad_norm": 3.0724432468414307, - "learning_rate": 5.146724146503693e-06, - "loss": 0.8438, + "epoch": 0.612882787750792, + "grad_norm": 1.5075526237487793, + "learning_rate": 4.8183699910786e-06, + "loss": 0.5755, "step": 8706 }, { - "epoch": 0.6588475653588589, - "grad_norm": 2.630356788635254, - "learning_rate": 5.144688912458607e-06, - "loss": 0.5843, + "epoch": 0.6129531854980641, + "grad_norm": 1.762325644493103, + "learning_rate": 4.816853438699051e-06, + "loss": 0.7159, "step": 8707 }, { - "epoch": 0.6589232340811926, - "grad_norm": 2.645566701889038, - "learning_rate": 5.142653919845578e-06, - "loss": 0.6826, + "epoch": 0.6130235832453361, + "grad_norm": 1.9194058179855347, + "learning_rate": 4.815336999823265e-06, + "loss": 0.6351, "step": 8708 }, { - "epoch": 0.6589989028035261, - "grad_norm": 2.8515145778656006, - "learning_rate": 5.140619168792033e-06, - "loss": 0.6833, + "epoch": 0.6130939809926083, + "grad_norm": 1.6477009057998657, + "learning_rate": 4.813820674530092e-06, + "loss": 0.6652, "step": 8709 }, { - "epoch": 0.6590745715258598, - "grad_norm": 3.5823323726654053, - "learning_rate": 5.138584659425398e-06, - "loss": 0.679, + "epoch": 0.6131643787398803, + "grad_norm": 1.8129243850708008, + "learning_rate": 4.812304462898357e-06, + "loss": 0.6451, "step": 8710 }, { - "epoch": 0.6591502402481935, - "grad_norm": 2.3689839839935303, - "learning_rate": 5.136550391873082e-06, - "loss": 0.6783, + "epoch": 0.6132347764871524, + "grad_norm": 1.919729471206665, + "learning_rate": 4.810788365006897e-06, + "loss": 0.6682, "step": 8711 }, { - "epoch": 0.659225908970527, - "grad_norm": 2.3048954010009766, - "learning_rate": 5.134516366262475e-06, - "loss": 0.8017, + "epoch": 0.6133051742344245, + "grad_norm": 1.828072428703308, + "learning_rate": 4.809272380934532e-06, + "loss": 0.712, "step": 8712 }, { - "epoch": 0.6593015776928607, - "grad_norm": 3.267667531967163, - "learning_rate": 5.1324825827209564e-06, - "loss": 0.7462, + "epoch": 0.6133755719816966, + "grad_norm": 1.899174451828003, + "learning_rate": 4.807756510760082e-06, + "loss": 0.6919, "step": 8713 }, { - "epoch": 0.6593772464151942, - "grad_norm": 2.637197732925415, - "learning_rate": 5.130449041375887e-06, - "loss": 0.487, + "epoch": 0.6134459697289687, + "grad_norm": 1.8846534490585327, + "learning_rate": 4.806240754562353e-06, + "loss": 0.7326, "step": 8714 }, { - "epoch": 0.6594529151375279, - "grad_norm": 2.3977911472320557, - "learning_rate": 5.128415742354615e-06, - "loss": 0.6639, + "epoch": 0.6135163674762407, + "grad_norm": 2.165464401245117, + "learning_rate": 4.804725112420157e-06, + "loss": 0.6908, "step": 8715 }, { - "epoch": 0.6595285838598616, - "grad_norm": 2.283331871032715, - "learning_rate": 5.126382685784475e-06, - "loss": 0.7063, + "epoch": 0.6135867652235129, + "grad_norm": 1.6720211505889893, + "learning_rate": 4.803209584412287e-06, + "loss": 0.6773, "step": 8716 }, { - "epoch": 0.6596042525821951, - "grad_norm": 2.826462507247925, - "learning_rate": 5.124349871792772e-06, - "loss": 0.6383, + "epoch": 0.613657162970785, + "grad_norm": 1.797785758972168, + "learning_rate": 4.8016941706175446e-06, + "loss": 0.6336, "step": 8717 }, { - "epoch": 0.6596799213045288, - "grad_norm": 3.0163345336914062, - "learning_rate": 5.122317300506819e-06, - "loss": 0.6715, + "epoch": 0.613727560718057, + "grad_norm": 1.8831453323364258, + "learning_rate": 4.800178871114711e-06, + "loss": 0.7164, "step": 8718 }, { - "epoch": 0.6597555900268623, - "grad_norm": 2.01212215423584, - "learning_rate": 5.1202849720539035e-06, - "loss": 0.698, + "epoch": 0.6137979584653291, + "grad_norm": 1.866011619567871, + "learning_rate": 4.798663685982571e-06, + "loss": 0.816, "step": 8719 }, { - "epoch": 0.659831258749196, - "grad_norm": 2.5961616039276123, - "learning_rate": 5.118252886561287e-06, - "loss": 0.6204, + "epoch": 0.6138683562126012, + "grad_norm": 1.5246503353118896, + "learning_rate": 4.797148615299896e-06, + "loss": 0.6465, "step": 8720 }, { - "epoch": 0.6599069274715297, - "grad_norm": 2.244570255279541, - "learning_rate": 5.11622104415623e-06, - "loss": 0.6391, + "epoch": 0.6139387539598733, + "grad_norm": 2.540753126144409, + "learning_rate": 4.795633659145463e-06, + "loss": 0.7262, "step": 8721 }, { - "epoch": 0.6599825961938632, - "grad_norm": 2.480987787246704, - "learning_rate": 5.114189444965974e-06, - "loss": 0.869, + "epoch": 0.6140091517071453, + "grad_norm": 1.8100754022598267, + "learning_rate": 4.794118817598031e-06, + "loss": 0.5873, "step": 8722 }, { - "epoch": 0.6600582649161969, - "grad_norm": 2.770308256149292, - "learning_rate": 5.112158089117742e-06, - "loss": 0.7924, + "epoch": 0.6140795494544175, + "grad_norm": 1.6505606174468994, + "learning_rate": 4.792604090736362e-06, + "loss": 0.6367, "step": 8723 }, { - "epoch": 0.6601339336385306, - "grad_norm": 2.041288375854492, - "learning_rate": 5.110126976738745e-06, - "loss": 0.715, + "epoch": 0.6141499472016896, + "grad_norm": 2.0991785526275635, + "learning_rate": 4.791089478639203e-06, + "loss": 0.7336, "step": 8724 }, { - "epoch": 0.6602096023608641, - "grad_norm": 2.9713993072509766, - "learning_rate": 5.108096107956178e-06, - "loss": 0.6904, + "epoch": 0.6142203449489616, + "grad_norm": 1.707631230354309, + "learning_rate": 4.7895749813853005e-06, + "loss": 0.5436, "step": 8725 }, { - "epoch": 0.6602852710831978, - "grad_norm": 2.7926697731018066, - "learning_rate": 5.106065482897225e-06, - "loss": 0.6128, + "epoch": 0.6142907426962337, + "grad_norm": 1.7602643966674805, + "learning_rate": 4.788060599053401e-06, + "loss": 0.8078, "step": 8726 }, { - "epoch": 0.6603609398055313, - "grad_norm": 2.216324806213379, - "learning_rate": 5.104035101689038e-06, - "loss": 0.7352, + "epoch": 0.6143611404435058, + "grad_norm": 1.7059648036956787, + "learning_rate": 4.786546331722232e-06, + "loss": 0.6731, "step": 8727 }, { - "epoch": 0.660436608527865, - "grad_norm": 2.27839732170105, - "learning_rate": 5.1020049644587795e-06, - "loss": 0.6477, + "epoch": 0.6144315381907779, + "grad_norm": 1.8101017475128174, + "learning_rate": 4.785032179470526e-06, + "loss": 0.6395, "step": 8728 }, { - "epoch": 0.6605122772501987, - "grad_norm": 1.9538133144378662, - "learning_rate": 5.0999750713335745e-06, - "loss": 0.6509, + "epoch": 0.61450193593805, + "grad_norm": 1.5502848625183105, + "learning_rate": 4.783518142377e-06, + "loss": 0.6776, "step": 8729 }, { - "epoch": 0.6605879459725322, - "grad_norm": 2.1575965881347656, - "learning_rate": 5.097945422440551e-06, - "loss": 0.7347, + "epoch": 0.614572333685322, + "grad_norm": 2.073719024658203, + "learning_rate": 4.782004220520375e-06, + "loss": 0.5961, "step": 8730 }, { - "epoch": 0.6606636146948659, - "grad_norm": 2.204331398010254, - "learning_rate": 5.095916017906802e-06, - "loss": 0.7395, + "epoch": 0.6146427314325942, + "grad_norm": 2.126835823059082, + "learning_rate": 4.780490413979357e-06, + "loss": 0.7126, "step": 8731 }, { - "epoch": 0.6607392834171995, - "grad_norm": 3.217972993850708, - "learning_rate": 5.093886857859415e-06, - "loss": 0.5874, + "epoch": 0.6147131291798662, + "grad_norm": 2.001443386077881, + "learning_rate": 4.778976722832655e-06, + "loss": 0.7421, "step": 8732 }, { - "epoch": 0.6608149521395331, - "grad_norm": 2.131350040435791, - "learning_rate": 5.0918579424254736e-06, - "loss": 0.7008, + "epoch": 0.6147835269271383, + "grad_norm": 2.221947431564331, + "learning_rate": 4.777463147158964e-06, + "loss": 0.7111, "step": 8733 }, { - "epoch": 0.6608906208618668, - "grad_norm": 2.436474323272705, - "learning_rate": 5.089829271732025e-06, - "loss": 0.6264, + "epoch": 0.6148539246744105, + "grad_norm": 1.8115845918655396, + "learning_rate": 4.775949687036977e-06, + "loss": 0.6194, "step": 8734 }, { - "epoch": 0.6609662895842003, - "grad_norm": 1.924094796180725, - "learning_rate": 5.087800845906116e-06, - "loss": 0.5002, + "epoch": 0.6149243224216825, + "grad_norm": 1.597571849822998, + "learning_rate": 4.774436342545379e-06, + "loss": 0.7143, "step": 8735 }, { - "epoch": 0.661041958306534, - "grad_norm": 2.152076005935669, - "learning_rate": 5.085772665074771e-06, - "loss": 0.7399, + "epoch": 0.6149947201689546, + "grad_norm": 1.7431799173355103, + "learning_rate": 4.772923113762853e-06, + "loss": 0.6162, "step": 8736 }, { - "epoch": 0.6611176270288677, - "grad_norm": 2.1113076210021973, - "learning_rate": 5.083744729365001e-06, - "loss": 0.7238, + "epoch": 0.6150651179162266, + "grad_norm": 1.9278995990753174, + "learning_rate": 4.771410000768069e-06, + "loss": 0.7308, "step": 8737 }, { - "epoch": 0.6611932957512012, - "grad_norm": 3.3278396129608154, - "learning_rate": 5.081717038903803e-06, - "loss": 0.664, + "epoch": 0.6151355156634988, + "grad_norm": 2.2686023712158203, + "learning_rate": 4.769897003639699e-06, + "loss": 0.7439, "step": 8738 }, { - "epoch": 0.6612689644735349, - "grad_norm": 2.1845004558563232, - "learning_rate": 5.079689593818156e-06, - "loss": 0.6055, + "epoch": 0.6152059134107709, + "grad_norm": 1.7720314264297485, + "learning_rate": 4.768384122456402e-06, + "loss": 0.6859, "step": 8739 }, { - "epoch": 0.6613446331958684, - "grad_norm": 2.576305627822876, - "learning_rate": 5.0776623942350324e-06, - "loss": 0.6543, + "epoch": 0.6152763111580429, + "grad_norm": 1.8496757745742798, + "learning_rate": 4.766871357296838e-06, + "loss": 0.7017, "step": 8740 }, { - "epoch": 0.6614203019182021, - "grad_norm": 2.260627269744873, - "learning_rate": 5.075635440281372e-06, - "loss": 0.7901, + "epoch": 0.6153467089053151, + "grad_norm": 2.4582011699676514, + "learning_rate": 4.765358708239651e-06, + "loss": 0.6862, "step": 8741 }, { - "epoch": 0.6614959706405358, - "grad_norm": 2.078648805618286, - "learning_rate": 5.073608732084113e-06, - "loss": 0.6235, + "epoch": 0.6154171066525871, + "grad_norm": 1.6956257820129395, + "learning_rate": 4.7638461753634935e-06, + "loss": 0.7186, "step": 8742 }, { - "epoch": 0.6615716393628693, - "grad_norm": 4.44390869140625, - "learning_rate": 5.0715822697701704e-06, - "loss": 0.7602, + "epoch": 0.6154875043998592, + "grad_norm": 2.125033140182495, + "learning_rate": 4.762333758746993e-06, + "loss": 0.6779, "step": 8743 }, { - "epoch": 0.661647308085203, - "grad_norm": 2.401336193084717, - "learning_rate": 5.06955605346646e-06, - "loss": 0.5624, + "epoch": 0.6155579021471312, + "grad_norm": 1.6986947059631348, + "learning_rate": 4.760821458468791e-06, + "loss": 0.7144, "step": 8744 }, { - "epoch": 0.6617229768075366, - "grad_norm": 2.3435049057006836, - "learning_rate": 5.067530083299858e-06, - "loss": 0.6278, + "epoch": 0.6156282998944034, + "grad_norm": 1.873706340789795, + "learning_rate": 4.759309274607507e-06, + "loss": 0.631, "step": 8745 }, { - "epoch": 0.6617986455298702, - "grad_norm": 3.2491648197174072, - "learning_rate": 5.065504359397241e-06, - "loss": 0.6314, + "epoch": 0.6156986976416755, + "grad_norm": 1.8497536182403564, + "learning_rate": 4.757797207241765e-06, + "loss": 0.6852, "step": 8746 }, { - "epoch": 0.6618743142522039, - "grad_norm": 1.942625641822815, - "learning_rate": 5.063478881885468e-06, - "loss": 0.8968, + "epoch": 0.6157690953889475, + "grad_norm": 1.744938850402832, + "learning_rate": 4.756285256450174e-06, + "loss": 0.6537, "step": 8747 }, { - "epoch": 0.6619499829745374, - "grad_norm": 3.2278025150299072, - "learning_rate": 5.0614536508913785e-06, - "loss": 0.6857, + "epoch": 0.6158394931362197, + "grad_norm": 1.8188581466674805, + "learning_rate": 4.754773422311349e-06, + "loss": 0.7365, "step": 8748 }, { - "epoch": 0.6620256516968711, - "grad_norm": 2.8973255157470703, - "learning_rate": 5.059428666541801e-06, - "loss": 0.7619, + "epoch": 0.6159098908834917, + "grad_norm": 1.7040146589279175, + "learning_rate": 4.753261704903883e-06, + "loss": 0.5759, "step": 8749 }, { - "epoch": 0.6621013204192048, - "grad_norm": 1.850770354270935, - "learning_rate": 5.057403928963545e-06, - "loss": 0.591, + "epoch": 0.6159802886307638, + "grad_norm": 1.901617169380188, + "learning_rate": 4.751750104306377e-06, + "loss": 0.7688, "step": 8750 }, { - "epoch": 0.6621769891415383, - "grad_norm": 1.6158236265182495, - "learning_rate": 5.055379438283411e-06, - "loss": 0.5541, + "epoch": 0.616050686378036, + "grad_norm": 1.8048738241195679, + "learning_rate": 4.75023862059742e-06, + "loss": 0.5645, "step": 8751 }, { - "epoch": 0.662252657863872, - "grad_norm": 2.043518304824829, - "learning_rate": 5.053355194628172e-06, - "loss": 0.7462, + "epoch": 0.616121084125308, + "grad_norm": 1.6941165924072266, + "learning_rate": 4.748727253855595e-06, + "loss": 0.7277, "step": 8752 }, { - "epoch": 0.6623283265862056, - "grad_norm": 1.9455914497375488, - "learning_rate": 5.05133119812459e-06, - "loss": 0.7079, + "epoch": 0.6161914818725801, + "grad_norm": 1.8987079858779907, + "learning_rate": 4.747216004159476e-06, + "loss": 0.639, "step": 8753 }, { - "epoch": 0.6624039953085392, - "grad_norm": 2.781599283218384, - "learning_rate": 5.0493074488994296e-06, - "loss": 0.628, + "epoch": 0.6162618796198521, + "grad_norm": 2.3584744930267334, + "learning_rate": 4.7457048715876415e-06, + "loss": 0.7146, "step": 8754 }, { - "epoch": 0.6624796640308729, - "grad_norm": 1.801193356513977, - "learning_rate": 5.04728394707941e-06, - "loss": 0.6794, + "epoch": 0.6163322773671243, + "grad_norm": 1.954515814781189, + "learning_rate": 4.744193856218649e-06, + "loss": 0.7349, "step": 8755 }, { - "epoch": 0.6625553327532064, - "grad_norm": 2.097200632095337, - "learning_rate": 5.045260692791256e-06, - "loss": 0.6512, + "epoch": 0.6164026751143964, + "grad_norm": 1.896977424621582, + "learning_rate": 4.742682958131062e-06, + "loss": 0.61, "step": 8756 }, { - "epoch": 0.6626310014755401, - "grad_norm": 2.374746322631836, - "learning_rate": 5.0432376861616655e-06, - "loss": 0.7123, + "epoch": 0.6164730728616684, + "grad_norm": 1.7407375574111938, + "learning_rate": 4.741172177403432e-06, + "loss": 0.7351, "step": 8757 }, { - "epoch": 0.6627066701978737, - "grad_norm": 2.2179994583129883, - "learning_rate": 5.0412149273173305e-06, - "loss": 0.5607, + "epoch": 0.6165434706089405, + "grad_norm": 1.9213669300079346, + "learning_rate": 4.739661514114308e-06, + "loss": 0.6701, "step": 8758 }, { - "epoch": 0.6627823389202073, - "grad_norm": 1.721039056777954, - "learning_rate": 5.039192416384922e-06, - "loss": 0.7004, + "epoch": 0.6166138683562126, + "grad_norm": 1.7061903476715088, + "learning_rate": 4.738150968342225e-06, + "loss": 0.7302, "step": 8759 }, { - "epoch": 0.662858007642541, - "grad_norm": 2.0622527599334717, - "learning_rate": 5.037170153491093e-06, - "loss": 0.5236, + "epoch": 0.6166842661034847, + "grad_norm": 1.780179500579834, + "learning_rate": 4.736640540165727e-06, + "loss": 0.695, "step": 8760 }, { - "epoch": 0.6629336763648745, - "grad_norm": 2.1661341190338135, - "learning_rate": 5.035148138762487e-06, - "loss": 0.6125, + "epoch": 0.6167546638507567, + "grad_norm": 1.2619142532348633, + "learning_rate": 4.735130229663332e-06, + "loss": 0.7406, "step": 8761 }, { - "epoch": 0.6630093450872082, - "grad_norm": 2.070807695388794, - "learning_rate": 5.033126372325733e-06, - "loss": 0.7534, + "epoch": 0.6168250615980289, + "grad_norm": 2.0412163734436035, + "learning_rate": 4.733620036913571e-06, + "loss": 0.7751, "step": 8762 }, { - "epoch": 0.6630850138095419, - "grad_norm": 1.9332561492919922, - "learning_rate": 5.031104854307428e-06, - "loss": 0.6172, + "epoch": 0.616895459345301, + "grad_norm": 1.5057686567306519, + "learning_rate": 4.732109961994955e-06, + "loss": 0.6874, "step": 8763 }, { - "epoch": 0.6631606825318754, - "grad_norm": 2.211865186691284, - "learning_rate": 5.029083584834179e-06, - "loss": 0.8465, + "epoch": 0.616965857092573, + "grad_norm": 2.337124824523926, + "learning_rate": 4.730600004985998e-06, + "loss": 0.5928, "step": 8764 }, { - "epoch": 0.6632363512542091, - "grad_norm": 6.03785514831543, - "learning_rate": 5.027062564032561e-06, - "loss": 0.6893, + "epoch": 0.6170362548398451, + "grad_norm": 1.699820637702942, + "learning_rate": 4.729090165965198e-06, + "loss": 0.4585, "step": 8765 }, { - "epoch": 0.6633120199765427, - "grad_norm": 1.8481940031051636, - "learning_rate": 5.025041792029133e-06, - "loss": 0.722, + "epoch": 0.6171066525871172, + "grad_norm": 1.4827662706375122, + "learning_rate": 4.727580445011063e-06, + "loss": 0.6378, "step": 8766 }, { - "epoch": 0.6633876886988763, - "grad_norm": 1.6104676723480225, - "learning_rate": 5.023021268950444e-06, - "loss": 0.6848, + "epoch": 0.6171770503343893, + "grad_norm": 1.9038209915161133, + "learning_rate": 4.726070842202073e-06, + "loss": 0.7766, "step": 8767 }, { - "epoch": 0.66346335742121, - "grad_norm": 2.4193434715270996, - "learning_rate": 5.021000994923026e-06, - "loss": 0.7977, + "epoch": 0.6172474480816614, + "grad_norm": 1.7221735715866089, + "learning_rate": 4.724561357616723e-06, + "loss": 0.6903, "step": 8768 }, { - "epoch": 0.6635390261435435, - "grad_norm": 2.3450634479522705, - "learning_rate": 5.018980970073395e-06, - "loss": 0.6101, + "epoch": 0.6173178458289335, + "grad_norm": 1.623178243637085, + "learning_rate": 4.723051991333487e-06, + "loss": 0.6685, "step": 8769 }, { - "epoch": 0.6636146948658772, - "grad_norm": 1.899941325187683, - "learning_rate": 5.016961194528053e-06, - "loss": 0.5525, + "epoch": 0.6173882435762056, + "grad_norm": 2.0614614486694336, + "learning_rate": 4.721542743430841e-06, + "loss": 0.7102, "step": 8770 }, { - "epoch": 0.6636903635882108, - "grad_norm": 1.9693715572357178, - "learning_rate": 5.014941668413483e-06, - "loss": 0.8298, + "epoch": 0.6174586413234776, + "grad_norm": 1.898468255996704, + "learning_rate": 4.720033613987249e-06, + "loss": 0.7112, "step": 8771 }, { - "epoch": 0.6637660323105444, - "grad_norm": 2.609485626220703, - "learning_rate": 5.012922391856156e-06, - "loss": 0.6256, + "epoch": 0.6175290390707497, + "grad_norm": 2.0207719802856445, + "learning_rate": 4.7185246030811765e-06, + "loss": 0.5445, "step": 8772 }, { - "epoch": 0.6638417010328781, - "grad_norm": 2.0286991596221924, - "learning_rate": 5.010903364982523e-06, - "loss": 0.8595, + "epoch": 0.6175994368180219, + "grad_norm": 1.5107011795043945, + "learning_rate": 4.717015710791075e-06, + "loss": 0.6138, "step": 8773 }, { - "epoch": 0.6639173697552117, - "grad_norm": 1.6877880096435547, - "learning_rate": 5.008884587919025e-06, - "loss": 0.6734, + "epoch": 0.6176698345652939, + "grad_norm": 1.7142055034637451, + "learning_rate": 4.715506937195396e-06, + "loss": 0.5899, "step": 8774 }, { - "epoch": 0.6639930384775453, - "grad_norm": 2.174236297607422, - "learning_rate": 5.006866060792081e-06, - "loss": 0.6317, + "epoch": 0.617740232312566, + "grad_norm": 1.8262277841567993, + "learning_rate": 4.713998282372579e-06, + "loss": 0.5676, "step": 8775 }, { - "epoch": 0.664068707199879, - "grad_norm": 1.8174835443496704, - "learning_rate": 5.004847783728106e-06, - "loss": 0.6284, + "epoch": 0.617810630059838, + "grad_norm": 1.761788249015808, + "learning_rate": 4.71248974640106e-06, + "loss": 0.6826, "step": 8776 }, { - "epoch": 0.6641443759222125, - "grad_norm": 2.0437092781066895, - "learning_rate": 5.002829756853479e-06, - "loss": 0.7169, + "epoch": 0.6178810278071102, + "grad_norm": 2.01226806640625, + "learning_rate": 4.710981329359273e-06, + "loss": 0.8385, "step": 8777 }, { - "epoch": 0.6642200446445462, - "grad_norm": 2.2291433811187744, - "learning_rate": 5.000811980294578e-06, - "loss": 0.6002, + "epoch": 0.6179514255543822, + "grad_norm": 1.9364218711853027, + "learning_rate": 4.709473031325639e-06, + "loss": 0.7134, "step": 8778 }, { - "epoch": 0.6642957133668798, - "grad_norm": 2.1589958667755127, - "learning_rate": 4.998794454177773e-06, - "loss": 0.7183, + "epoch": 0.6180218233016543, + "grad_norm": 1.7863322496414185, + "learning_rate": 4.7079648523785785e-06, + "loss": 0.7405, "step": 8779 }, { - "epoch": 0.6643713820892134, - "grad_norm": 1.941250205039978, - "learning_rate": 4.996777178629397e-06, - "loss": 0.6259, + "epoch": 0.6180922210489265, + "grad_norm": 1.7984915971755981, + "learning_rate": 4.706456792596499e-06, + "loss": 0.8367, "step": 8780 }, { - "epoch": 0.6644470508115471, - "grad_norm": 1.7236140966415405, - "learning_rate": 4.994760153775782e-06, - "loss": 0.6895, + "epoch": 0.6181626187961985, + "grad_norm": 1.724253535270691, + "learning_rate": 4.704948852057809e-06, + "loss": 0.6273, "step": 8781 }, { - "epoch": 0.6645227195338806, - "grad_norm": 2.2558975219726562, - "learning_rate": 4.992743379743242e-06, - "loss": 0.6669, + "epoch": 0.6182330165434706, + "grad_norm": 1.5767030715942383, + "learning_rate": 4.703441030840904e-06, + "loss": 0.6819, "step": 8782 }, { - "epoch": 0.6645983882562143, - "grad_norm": 1.9559904336929321, - "learning_rate": 4.990726856658075e-06, - "loss": 0.6926, + "epoch": 0.6183034142907426, + "grad_norm": 2.218404769897461, + "learning_rate": 4.701933329024183e-06, + "loss": 0.6162, "step": 8783 }, { - "epoch": 0.6646740569785479, - "grad_norm": 2.2078473567962646, - "learning_rate": 4.988710584646552e-06, - "loss": 0.7465, + "epoch": 0.6183738120380148, + "grad_norm": 1.6901092529296875, + "learning_rate": 4.700425746686028e-06, + "loss": 0.6148, "step": 8784 }, { - "epoch": 0.6647497257008815, - "grad_norm": 2.152083396911621, - "learning_rate": 4.986694563834951e-06, - "loss": 0.7085, + "epoch": 0.6184442097852869, + "grad_norm": 1.706143856048584, + "learning_rate": 4.698918283904822e-06, + "loss": 0.518, "step": 8785 }, { - "epoch": 0.6648253944232152, - "grad_norm": 2.4791300296783447, - "learning_rate": 4.98467879434952e-06, - "loss": 0.6156, + "epoch": 0.6185146075325589, + "grad_norm": 1.8324304819107056, + "learning_rate": 4.697410940758937e-06, + "loss": 0.6122, "step": 8786 }, { - "epoch": 0.6649010631455488, - "grad_norm": 2.4543418884277344, - "learning_rate": 4.982663276316487e-06, - "loss": 0.6784, + "epoch": 0.6185850052798311, + "grad_norm": 1.75962233543396, + "learning_rate": 4.6959037173267455e-06, + "loss": 0.8506, "step": 8787 }, { - "epoch": 0.6649767318678824, - "grad_norm": 9.999984741210938, - "learning_rate": 4.980648009862073e-06, - "loss": 0.6158, + "epoch": 0.6186554030271031, + "grad_norm": 1.9706813097000122, + "learning_rate": 4.694396613686601e-06, + "loss": 0.6771, "step": 8788 }, { - "epoch": 0.6650524005902161, - "grad_norm": 2.0445075035095215, - "learning_rate": 4.978632995112476e-06, - "loss": 0.765, + "epoch": 0.6187258007743752, + "grad_norm": 1.9806902408599854, + "learning_rate": 4.69288962991687e-06, + "loss": 0.7546, "step": 8789 }, { - "epoch": 0.6651280693125496, - "grad_norm": 2.165191411972046, - "learning_rate": 4.976618232193895e-06, - "loss": 0.6143, + "epoch": 0.6187961985216474, + "grad_norm": 1.6347546577453613, + "learning_rate": 4.6913827660958935e-06, + "loss": 0.7338, "step": 8790 }, { - "epoch": 0.6652037380348833, - "grad_norm": 1.9150837659835815, - "learning_rate": 4.974603721232492e-06, - "loss": 0.5768, + "epoch": 0.6188665962689194, + "grad_norm": 1.9093701839447021, + "learning_rate": 4.689876022302021e-06, + "loss": 0.6416, "step": 8791 }, { - "epoch": 0.6652794067572169, - "grad_norm": 2.5317835807800293, - "learning_rate": 4.972589462354423e-06, - "loss": 0.7633, + "epoch": 0.6189369940161915, + "grad_norm": 2.1019139289855957, + "learning_rate": 4.688369398613581e-06, + "loss": 0.7337, "step": 8792 }, { - "epoch": 0.6653550754795505, - "grad_norm": 2.1504557132720947, - "learning_rate": 4.970575455685826e-06, - "loss": 0.6546, + "epoch": 0.6190073917634635, + "grad_norm": 1.6005642414093018, + "learning_rate": 4.6868628951089155e-06, + "loss": 0.5845, "step": 8793 }, { - "epoch": 0.6654307442018842, - "grad_norm": 1.821834683418274, - "learning_rate": 4.968561701352829e-06, - "loss": 0.7508, + "epoch": 0.6190777895107357, + "grad_norm": 1.731537938117981, + "learning_rate": 4.685356511866338e-06, + "loss": 0.6718, "step": 8794 }, { - "epoch": 0.6655064129242177, - "grad_norm": 2.307339668273926, - "learning_rate": 4.966548199481536e-06, - "loss": 0.5994, + "epoch": 0.6191481872580078, + "grad_norm": 2.004443645477295, + "learning_rate": 4.683850248964174e-06, + "loss": 0.6077, "step": 8795 }, { - "epoch": 0.6655820816465514, - "grad_norm": 2.067732334136963, - "learning_rate": 4.964534950198041e-06, - "loss": 0.6737, + "epoch": 0.6192185850052798, + "grad_norm": 2.191026449203491, + "learning_rate": 4.682344106480733e-06, + "loss": 0.698, "step": 8796 }, { - "epoch": 0.665657750368885, - "grad_norm": 2.2294864654541016, - "learning_rate": 4.962521953628425e-06, - "loss": 0.6829, + "epoch": 0.619288982752552, + "grad_norm": 1.743820071220398, + "learning_rate": 4.680838084494322e-06, + "loss": 0.6474, "step": 8797 }, { - "epoch": 0.6657334190912186, - "grad_norm": 2.271959066390991, - "learning_rate": 4.960509209898737e-06, - "loss": 0.6612, + "epoch": 0.619359380499824, + "grad_norm": 1.7968326807022095, + "learning_rate": 4.679332183083236e-06, + "loss": 0.6957, "step": 8798 }, { - "epoch": 0.6658090878135523, - "grad_norm": 2.435523271560669, - "learning_rate": 4.958496719135024e-06, - "loss": 0.7108, + "epoch": 0.6194297782470961, + "grad_norm": 1.7209839820861816, + "learning_rate": 4.677826402325777e-06, + "loss": 0.6451, "step": 8799 }, { - "epoch": 0.6658847565358859, - "grad_norm": 2.1555373668670654, - "learning_rate": 4.956484481463328e-06, - "loss": 0.741, + "epoch": 0.6195001759943681, + "grad_norm": 1.6545354127883911, + "learning_rate": 4.67632074230022e-06, + "loss": 0.5857, "step": 8800 }, { - "epoch": 0.6659604252582195, - "grad_norm": 2.427854061126709, - "learning_rate": 4.954472497009647e-06, - "loss": 0.9206, + "epoch": 0.6195705737416403, + "grad_norm": 1.7954758405685425, + "learning_rate": 4.674815203084857e-06, + "loss": 0.7472, "step": 8801 }, { - "epoch": 0.6660360939805532, - "grad_norm": 2.376939296722412, - "learning_rate": 4.952460765899982e-06, - "loss": 0.677, + "epoch": 0.6196409714889124, + "grad_norm": 2.4807910919189453, + "learning_rate": 4.673309784757955e-06, + "loss": 0.8339, "step": 8802 }, { - "epoch": 0.6661117627028867, - "grad_norm": 2.0379579067230225, - "learning_rate": 4.950449288260316e-06, - "loss": 0.6682, + "epoch": 0.6197113692361844, + "grad_norm": 2.9660067558288574, + "learning_rate": 4.671804487397785e-06, + "loss": 0.6188, "step": 8803 }, { - "epoch": 0.6661874314252204, - "grad_norm": 1.950374722480774, - "learning_rate": 4.948438064216615e-06, - "loss": 0.5976, + "epoch": 0.6197817669834566, + "grad_norm": 1.869012475013733, + "learning_rate": 4.6702993110826075e-06, + "loss": 0.6832, "step": 8804 }, { - "epoch": 0.666263100147554, - "grad_norm": 1.9691082239151, - "learning_rate": 4.946427093894825e-06, - "loss": 0.6487, + "epoch": 0.6198521647307286, + "grad_norm": 1.857616662979126, + "learning_rate": 4.6687942558906815e-06, + "loss": 0.7328, "step": 8805 }, { - "epoch": 0.6663387688698876, - "grad_norm": 2.2391226291656494, - "learning_rate": 4.944416377420881e-06, - "loss": 0.7114, + "epoch": 0.6199225624780007, + "grad_norm": 1.7213220596313477, + "learning_rate": 4.66728932190025e-06, + "loss": 0.6039, "step": 8806 }, { - "epoch": 0.6664144375922213, - "grad_norm": 1.9541027545928955, - "learning_rate": 4.942405914920701e-06, - "loss": 0.5922, + "epoch": 0.6199929602252728, + "grad_norm": 1.6209149360656738, + "learning_rate": 4.665784509189561e-06, + "loss": 0.6038, "step": 8807 }, { - "epoch": 0.6664901063145549, - "grad_norm": 2.247073173522949, - "learning_rate": 4.94039570652019e-06, - "loss": 0.7678, + "epoch": 0.6200633579725449, + "grad_norm": 1.7148208618164062, + "learning_rate": 4.664279817836849e-06, + "loss": 0.5814, "step": 8808 }, { - "epoch": 0.6665657750368885, - "grad_norm": 2.149578094482422, - "learning_rate": 4.938385752345224e-06, - "loss": 0.7262, + "epoch": 0.620133755719817, + "grad_norm": 1.8417840003967285, + "learning_rate": 4.662775247920346e-06, + "loss": 0.6002, "step": 8809 }, { - "epoch": 0.6666414437592221, - "grad_norm": 2.3252670764923096, - "learning_rate": 4.936376052521682e-06, - "loss": 0.6556, + "epoch": 0.620204153467089, + "grad_norm": 1.8675941228866577, + "learning_rate": 4.661270799518272e-06, + "loss": 0.7369, "step": 8810 }, { - "epoch": 0.6667171124815557, - "grad_norm": 1.9192390441894531, - "learning_rate": 4.934366607175419e-06, - "loss": 0.6206, + "epoch": 0.6202745512143611, + "grad_norm": 2.162395715713501, + "learning_rate": 4.65976647270885e-06, + "loss": 0.6888, "step": 8811 }, { - "epoch": 0.6667927812038894, - "grad_norm": 2.2456088066101074, - "learning_rate": 4.932357416432264e-06, - "loss": 0.6447, + "epoch": 0.6203449489616333, + "grad_norm": 1.7724957466125488, + "learning_rate": 4.658262267570286e-06, + "loss": 0.6437, "step": 8812 }, { - "epoch": 0.666868449926223, - "grad_norm": 2.371447801589966, - "learning_rate": 4.930348480418045e-06, - "loss": 0.6276, + "epoch": 0.6204153467089053, + "grad_norm": 1.7854640483856201, + "learning_rate": 4.656758184180789e-06, + "loss": 0.7124, "step": 8813 }, { - "epoch": 0.6669441186485566, - "grad_norm": 2.4890329837799072, - "learning_rate": 4.928339799258567e-06, - "loss": 0.5793, + "epoch": 0.6204857444561774, + "grad_norm": 1.8302967548370361, + "learning_rate": 4.6552542226185554e-06, + "loss": 0.6763, "step": 8814 }, { - "epoch": 0.6670197873708903, - "grad_norm": 2.112485885620117, - "learning_rate": 4.926331373079619e-06, - "loss": 0.6851, + "epoch": 0.6205561422034495, + "grad_norm": 1.9310803413391113, + "learning_rate": 4.653750382961779e-06, + "loss": 0.5727, "step": 8815 }, { - "epoch": 0.6670954560932238, - "grad_norm": 2.055906295776367, - "learning_rate": 4.9243232020069775e-06, - "loss": 0.4433, + "epoch": 0.6206265399507216, + "grad_norm": 3.0925021171569824, + "learning_rate": 4.652246665288642e-06, + "loss": 0.7443, "step": 8816 }, { - "epoch": 0.6671711248155575, - "grad_norm": 2.641813278198242, - "learning_rate": 4.9223152861664e-06, - "loss": 0.7469, + "epoch": 0.6206969376979936, + "grad_norm": 1.7223565578460693, + "learning_rate": 4.650743069677333e-06, + "loss": 0.6793, "step": 8817 }, { - "epoch": 0.6672467935378911, - "grad_norm": 2.035452127456665, - "learning_rate": 4.920307625683626e-06, - "loss": 0.5807, + "epoch": 0.6207673354452657, + "grad_norm": 1.7297208309173584, + "learning_rate": 4.649239596206014e-06, + "loss": 0.6774, "step": 8818 }, { - "epoch": 0.6673224622602247, - "grad_norm": 2.3828279972076416, - "learning_rate": 4.9183002206843894e-06, - "loss": 0.8857, + "epoch": 0.6208377331925379, + "grad_norm": 3.186077356338501, + "learning_rate": 4.6477362449528606e-06, + "loss": 0.6815, "step": 8819 }, { - "epoch": 0.6673981309825584, - "grad_norm": 2.7702672481536865, - "learning_rate": 4.916293071294386e-06, - "loss": 0.5946, + "epoch": 0.6209081309398099, + "grad_norm": 1.9918491840362549, + "learning_rate": 4.646233015996029e-06, + "loss": 0.6736, "step": 8820 }, { - "epoch": 0.667473799704892, - "grad_norm": 2.4469079971313477, - "learning_rate": 4.914286177639324e-06, - "loss": 0.6725, + "epoch": 0.620978528687082, + "grad_norm": 1.8663791418075562, + "learning_rate": 4.644729909413675e-06, + "loss": 0.5899, "step": 8821 }, { - "epoch": 0.6675494684272256, - "grad_norm": 2.4644572734832764, - "learning_rate": 4.912279539844879e-06, - "loss": 0.7755, + "epoch": 0.621048926434354, + "grad_norm": 2.3952815532684326, + "learning_rate": 4.643226925283945e-06, + "loss": 0.575, "step": 8822 }, { - "epoch": 0.6676251371495592, - "grad_norm": 2.4523849487304688, - "learning_rate": 4.9102731580367075e-06, - "loss": 0.7279, + "epoch": 0.6211193241816262, + "grad_norm": 2.21958065032959, + "learning_rate": 4.6417240636849825e-06, + "loss": 0.6627, "step": 8823 }, { - "epoch": 0.6677008058718928, - "grad_norm": 2.323460340499878, - "learning_rate": 4.908267032340458e-06, - "loss": 0.594, + "epoch": 0.6211897219288983, + "grad_norm": 2.1999599933624268, + "learning_rate": 4.640221324694921e-06, + "loss": 0.6885, "step": 8824 }, { - "epoch": 0.6677764745942265, - "grad_norm": 2.83245849609375, - "learning_rate": 4.906261162881761e-06, - "loss": 0.7527, + "epoch": 0.6212601196761703, + "grad_norm": 1.8459200859069824, + "learning_rate": 4.638718708391891e-06, + "loss": 0.6962, "step": 8825 }, { - "epoch": 0.6678521433165601, - "grad_norm": 3.085604429244995, - "learning_rate": 4.9042555497862314e-06, - "loss": 0.659, + "epoch": 0.6213305174234425, + "grad_norm": 2.4423933029174805, + "learning_rate": 4.637216214854014e-06, + "loss": 0.7715, "step": 8826 }, { - "epoch": 0.6679278120388937, - "grad_norm": 2.575090169906616, - "learning_rate": 4.902250193179466e-06, - "loss": 0.6034, + "epoch": 0.6214009151707145, + "grad_norm": 1.9009371995925903, + "learning_rate": 4.635713844159403e-06, + "loss": 0.6111, "step": 8827 }, { - "epoch": 0.6680034807612274, - "grad_norm": 2.862489700317383, - "learning_rate": 4.900245093187049e-06, - "loss": 0.7112, + "epoch": 0.6214713129179866, + "grad_norm": 1.6686160564422607, + "learning_rate": 4.634211596386174e-06, + "loss": 0.7214, "step": 8828 }, { - "epoch": 0.668079149483561, - "grad_norm": 2.3818016052246094, - "learning_rate": 4.898240249934546e-06, - "loss": 0.7612, + "epoch": 0.6215417106652588, + "grad_norm": 1.8055163621902466, + "learning_rate": 4.632709471612425e-06, + "loss": 0.6183, "step": 8829 }, { - "epoch": 0.6681548182058946, - "grad_norm": 2.6290574073791504, - "learning_rate": 4.896235663547498e-06, - "loss": 0.6276, + "epoch": 0.6216121084125308, + "grad_norm": 1.7723884582519531, + "learning_rate": 4.6312074699162556e-06, + "loss": 0.5887, "step": 8830 }, { - "epoch": 0.6682304869282282, - "grad_norm": 1.9389046430587769, - "learning_rate": 4.89423133415145e-06, - "loss": 0.5628, + "epoch": 0.6216825061598029, + "grad_norm": 1.9250534772872925, + "learning_rate": 4.6297055913757535e-06, + "loss": 0.7889, "step": 8831 }, { - "epoch": 0.6683061556505618, - "grad_norm": 2.1501002311706543, - "learning_rate": 4.8922272618719154e-06, - "loss": 0.776, + "epoch": 0.6217529039070749, + "grad_norm": 1.7785691022872925, + "learning_rate": 4.628203836069007e-06, + "loss": 0.6983, "step": 8832 }, { - "epoch": 0.6683818243728955, - "grad_norm": 1.9380501508712769, - "learning_rate": 4.8902234468344e-06, - "loss": 0.5632, + "epoch": 0.6218233016543471, + "grad_norm": 2.0277578830718994, + "learning_rate": 4.626702204074087e-06, + "loss": 0.6828, "step": 8833 }, { - "epoch": 0.6684574930952291, - "grad_norm": 1.943785309791565, - "learning_rate": 4.888219889164381e-06, - "loss": 0.6471, + "epoch": 0.6218936994016191, + "grad_norm": 1.9140089750289917, + "learning_rate": 4.625200695469072e-06, + "loss": 0.6877, "step": 8834 }, { - "epoch": 0.6685331618175627, - "grad_norm": 2.238030433654785, - "learning_rate": 4.886216588987328e-06, - "loss": 0.5461, + "epoch": 0.6219640971488912, + "grad_norm": 2.4136857986450195, + "learning_rate": 4.623699310332023e-06, + "loss": 0.6652, "step": 8835 }, { - "epoch": 0.6686088305398963, - "grad_norm": 2.7209441661834717, - "learning_rate": 4.884213546428706e-06, - "loss": 0.6858, + "epoch": 0.6220344948961634, + "grad_norm": 1.936915397644043, + "learning_rate": 4.622198048741e-06, + "loss": 0.6755, "step": 8836 }, { - "epoch": 0.66868449926223, - "grad_norm": 1.9361830949783325, - "learning_rate": 4.882210761613938e-06, - "loss": 0.627, + "epoch": 0.6221048926434354, + "grad_norm": 1.710368037223816, + "learning_rate": 4.620696910774052e-06, + "loss": 0.6381, "step": 8837 }, { - "epoch": 0.6687601679845636, - "grad_norm": 2.493215799331665, - "learning_rate": 4.880208234668452e-06, - "loss": 0.6585, + "epoch": 0.6221752903907075, + "grad_norm": 1.828612208366394, + "learning_rate": 4.619195896509229e-06, + "loss": 0.6742, "step": 8838 }, { - "epoch": 0.6688358367068972, - "grad_norm": 2.2090611457824707, - "learning_rate": 4.878205965717652e-06, - "loss": 0.7604, + "epoch": 0.6222456881379795, + "grad_norm": 1.6009294986724854, + "learning_rate": 4.617695006024563e-06, + "loss": 0.6826, "step": 8839 }, { - "epoch": 0.6689115054292308, - "grad_norm": 1.8827425241470337, - "learning_rate": 4.8762039548869245e-06, - "loss": 0.6121, + "epoch": 0.6223160858852517, + "grad_norm": 1.8125178813934326, + "learning_rate": 4.616194239398094e-06, + "loss": 0.7334, "step": 8840 }, { - "epoch": 0.6689871741515645, - "grad_norm": 1.9385136365890503, - "learning_rate": 4.8742022023016445e-06, - "loss": 0.7205, + "epoch": 0.6223864836325238, + "grad_norm": 2.1499056816101074, + "learning_rate": 4.614693596707844e-06, + "loss": 0.7535, "step": 8841 }, { - "epoch": 0.6690628428738981, - "grad_norm": 2.260593891143799, - "learning_rate": 4.8722007080871675e-06, - "loss": 0.626, + "epoch": 0.6224568813797958, + "grad_norm": 1.9501922130584717, + "learning_rate": 4.613193078031838e-06, + "loss": 0.7755, "step": 8842 }, { - "epoch": 0.6691385115962317, - "grad_norm": 1.950851321220398, - "learning_rate": 4.870199472368835e-06, - "loss": 0.665, + "epoch": 0.622527279127068, + "grad_norm": 1.7487584352493286, + "learning_rate": 4.611692683448082e-06, + "loss": 0.6057, "step": 8843 }, { - "epoch": 0.6692141803185653, - "grad_norm": 3.029724597930908, - "learning_rate": 4.868198495271966e-06, - "loss": 0.7195, + "epoch": 0.62259767687434, + "grad_norm": 1.7857738733291626, + "learning_rate": 4.610192413034588e-06, + "loss": 0.6407, "step": 8844 }, { - "epoch": 0.6692898490408989, - "grad_norm": 2.3940155506134033, - "learning_rate": 4.866197776921867e-06, - "loss": 0.5533, + "epoch": 0.6226680746216121, + "grad_norm": 1.91888427734375, + "learning_rate": 4.6086922668693515e-06, + "loss": 0.627, "step": 8845 }, { - "epoch": 0.6693655177632326, - "grad_norm": 1.612696647644043, - "learning_rate": 4.864197317443839e-06, - "loss": 0.7753, + "epoch": 0.6227384723688842, + "grad_norm": 1.6778323650360107, + "learning_rate": 4.607192245030374e-06, + "loss": 0.7498, "step": 8846 }, { - "epoch": 0.6694411864855662, - "grad_norm": 1.8332983255386353, - "learning_rate": 4.8621971169631535e-06, - "loss": 0.6191, + "epoch": 0.6228088701161563, + "grad_norm": 1.9219059944152832, + "learning_rate": 4.605692347595637e-06, + "loss": 0.746, "step": 8847 }, { - "epoch": 0.6695168552078998, - "grad_norm": 1.9380171298980713, - "learning_rate": 4.8601971756050645e-06, - "loss": 0.6095, + "epoch": 0.6228792678634284, + "grad_norm": 2.783186674118042, + "learning_rate": 4.604192574643124e-06, + "loss": 0.6492, "step": 8848 }, { - "epoch": 0.6695925239302334, - "grad_norm": 2.1490824222564697, - "learning_rate": 4.858197493494819e-06, - "loss": 0.7483, + "epoch": 0.6229496656107004, + "grad_norm": 1.661230206489563, + "learning_rate": 4.6026929262508056e-06, + "loss": 0.7021, "step": 8849 }, { - "epoch": 0.669668192652567, - "grad_norm": 2.265101909637451, - "learning_rate": 4.8561980707576415e-06, - "loss": 0.6927, + "epoch": 0.6230200633579726, + "grad_norm": 1.8668944835662842, + "learning_rate": 4.601193402496658e-06, + "loss": 0.8035, "step": 8850 }, { - "epoch": 0.6697438613749007, - "grad_norm": 1.6495442390441895, - "learning_rate": 4.8541989075187446e-06, - "loss": 0.5957, + "epoch": 0.6230904611052447, + "grad_norm": 1.5594483613967896, + "learning_rate": 4.599694003458634e-06, + "loss": 0.5987, "step": 8851 }, { - "epoch": 0.6698195300972343, - "grad_norm": 2.1201844215393066, - "learning_rate": 4.852200003903321e-06, - "loss": 0.662, + "epoch": 0.6231608588525167, + "grad_norm": 1.7366489171981812, + "learning_rate": 4.598194729214695e-06, + "loss": 0.6074, "step": 8852 }, { - "epoch": 0.6698951988195679, - "grad_norm": 2.0578794479370117, - "learning_rate": 4.850201360036548e-06, - "loss": 0.7122, + "epoch": 0.6232312565997888, + "grad_norm": 2.0236361026763916, + "learning_rate": 4.596695579842786e-06, + "loss": 0.6175, "step": 8853 }, { - "epoch": 0.6699708675419016, - "grad_norm": 2.0692341327667236, - "learning_rate": 4.848202976043593e-06, - "loss": 0.7108, + "epoch": 0.6233016543470609, + "grad_norm": 1.911348819732666, + "learning_rate": 4.595196555420852e-06, + "loss": 0.6882, "step": 8854 }, { - "epoch": 0.6700465362642352, - "grad_norm": 2.5348756313323975, - "learning_rate": 4.846204852049588e-06, - "loss": 0.6537, + "epoch": 0.623372052094333, + "grad_norm": 1.687758207321167, + "learning_rate": 4.5936976560268235e-06, + "loss": 0.7294, "step": 8855 }, { - "epoch": 0.6701222049865688, - "grad_norm": 2.185525417327881, - "learning_rate": 4.844206988179674e-06, - "loss": 0.6937, + "epoch": 0.623442449841605, + "grad_norm": 1.4993699789047241, + "learning_rate": 4.592198881738638e-06, + "loss": 0.6791, "step": 8856 }, { - "epoch": 0.6701978737089024, - "grad_norm": 2.0344419479370117, - "learning_rate": 4.842209384558962e-06, - "loss": 0.535, + "epoch": 0.6235128475888772, + "grad_norm": 1.8500275611877441, + "learning_rate": 4.590700232634209e-06, + "loss": 0.6053, "step": 8857 }, { - "epoch": 0.670273542431236, - "grad_norm": 2.430760145187378, - "learning_rate": 4.840212041312545e-06, - "loss": 0.8704, + "epoch": 0.6235832453361493, + "grad_norm": 1.473334789276123, + "learning_rate": 4.589201708791458e-06, + "loss": 0.6543, "step": 8858 }, { - "epoch": 0.6703492111535697, - "grad_norm": 2.276468276977539, - "learning_rate": 4.838214958565503e-06, - "loss": 0.8045, + "epoch": 0.6236536430834213, + "grad_norm": 1.7584030628204346, + "learning_rate": 4.587703310288294e-06, + "loss": 0.6251, "step": 8859 }, { - "epoch": 0.6704248798759033, - "grad_norm": 2.0148627758026123, - "learning_rate": 4.836218136442902e-06, - "loss": 0.6484, + "epoch": 0.6237240408306934, + "grad_norm": 1.8502508401870728, + "learning_rate": 4.58620503720262e-06, + "loss": 0.6141, "step": 8860 }, { - "epoch": 0.6705005485982369, - "grad_norm": 1.9523584842681885, - "learning_rate": 4.834221575069788e-06, - "loss": 0.7177, + "epoch": 0.6237944385779655, + "grad_norm": 1.4718788862228394, + "learning_rate": 4.5847068896123305e-06, + "loss": 0.5937, "step": 8861 }, { - "epoch": 0.6705762173205705, - "grad_norm": 1.8609076738357544, - "learning_rate": 4.8322252745711925e-06, - "loss": 0.8153, + "epoch": 0.6238648363252376, + "grad_norm": 1.9724866151809692, + "learning_rate": 4.583208867595321e-06, + "loss": 0.6649, "step": 8862 }, { - "epoch": 0.6706518860429042, - "grad_norm": 2.0499250888824463, - "learning_rate": 4.83022923507213e-06, - "loss": 0.7258, + "epoch": 0.6239352340725097, + "grad_norm": 1.8651230335235596, + "learning_rate": 4.581710971229467e-06, + "loss": 0.8245, "step": 8863 }, { - "epoch": 0.6707275547652378, - "grad_norm": 2.329328775405884, - "learning_rate": 4.8282334566976e-06, - "loss": 0.7621, + "epoch": 0.6240056318197817, + "grad_norm": 1.6077384948730469, + "learning_rate": 4.580213200592652e-06, + "loss": 0.6449, "step": 8864 }, { - "epoch": 0.6708032234875714, - "grad_norm": 2.1345584392547607, - "learning_rate": 4.8262379395725885e-06, - "loss": 0.564, + "epoch": 0.6240760295670539, + "grad_norm": 1.6055594682693481, + "learning_rate": 4.5787155557627435e-06, + "loss": 0.6774, "step": 8865 }, { - "epoch": 0.670878892209905, - "grad_norm": 1.9283918142318726, - "learning_rate": 4.82424268382205e-06, - "loss": 0.6522, + "epoch": 0.6241464273143259, + "grad_norm": 1.8255889415740967, + "learning_rate": 4.577218036817607e-06, + "loss": 0.6678, "step": 8866 }, { - "epoch": 0.6709545609322387, - "grad_norm": 2.036198377609253, - "learning_rate": 4.822247689570943e-06, - "loss": 0.7945, + "epoch": 0.624216825061598, + "grad_norm": 1.8783512115478516, + "learning_rate": 4.575720643835097e-06, + "loss": 0.6812, "step": 8867 }, { - "epoch": 0.6710302296545723, - "grad_norm": 2.039332389831543, - "learning_rate": 4.8202529569442015e-06, - "loss": 0.6903, + "epoch": 0.6242872228088702, + "grad_norm": 1.6803581714630127, + "learning_rate": 4.5742233768930695e-06, + "loss": 0.6624, "step": 8868 }, { - "epoch": 0.6711058983769059, - "grad_norm": 2.211557149887085, - "learning_rate": 4.818258486066736e-06, - "loss": 0.6866, + "epoch": 0.6243576205561422, + "grad_norm": 1.6195125579833984, + "learning_rate": 4.572726236069361e-06, + "loss": 0.6215, "step": 8869 }, { - "epoch": 0.6711815670992395, - "grad_norm": 3.0144009590148926, - "learning_rate": 4.816264277063449e-06, - "loss": 0.6603, + "epoch": 0.6244280183034143, + "grad_norm": 1.97798752784729, + "learning_rate": 4.571229221441818e-06, + "loss": 0.6342, "step": 8870 }, { - "epoch": 0.6712572358215732, - "grad_norm": 2.047494649887085, - "learning_rate": 4.814270330059226e-06, - "loss": 0.6691, + "epoch": 0.6244984160506863, + "grad_norm": 1.5793315172195435, + "learning_rate": 4.569732333088266e-06, + "loss": 0.5222, "step": 8871 }, { - "epoch": 0.6713329045439068, - "grad_norm": 2.4948368072509766, - "learning_rate": 4.812276645178932e-06, - "loss": 0.6757, + "epoch": 0.6245688137979585, + "grad_norm": 1.7505803108215332, + "learning_rate": 4.5682355710865305e-06, + "loss": 0.6465, "step": 8872 }, { - "epoch": 0.6714085732662404, - "grad_norm": 2.7353858947753906, - "learning_rate": 4.8102832225474194e-06, - "loss": 0.6022, + "epoch": 0.6246392115452305, + "grad_norm": 2.1643571853637695, + "learning_rate": 4.566738935514429e-06, + "loss": 0.6588, "step": 8873 }, { - "epoch": 0.671484241988574, - "grad_norm": 2.2280282974243164, - "learning_rate": 4.8082900622895226e-06, - "loss": 0.585, + "epoch": 0.6247096092925026, + "grad_norm": 1.9295819997787476, + "learning_rate": 4.5652424264497765e-06, + "loss": 0.7442, "step": 8874 }, { - "epoch": 0.6715599107109076, - "grad_norm": 1.8477840423583984, - "learning_rate": 4.806297164530059e-06, - "loss": 0.8268, + "epoch": 0.6247800070397748, + "grad_norm": 1.8487359285354614, + "learning_rate": 4.563746043970372e-06, + "loss": 0.7059, "step": 8875 }, { - "epoch": 0.6716355794332413, - "grad_norm": 2.323336601257324, - "learning_rate": 4.804304529393834e-06, - "loss": 0.6874, + "epoch": 0.6248504047870468, + "grad_norm": 1.921670913696289, + "learning_rate": 4.562249788154019e-06, + "loss": 0.6999, "step": 8876 }, { - "epoch": 0.6717112481555749, - "grad_norm": 2.1553761959075928, - "learning_rate": 4.8023121570056265e-06, - "loss": 0.6259, + "epoch": 0.6249208025343189, + "grad_norm": 1.9929125308990479, + "learning_rate": 4.560753659078508e-06, + "loss": 0.6738, "step": 8877 }, { - "epoch": 0.6717869168779085, - "grad_norm": 2.515099287033081, - "learning_rate": 4.800320047490211e-06, - "loss": 0.8223, + "epoch": 0.6249912002815909, + "grad_norm": 1.8091833591461182, + "learning_rate": 4.559257656821621e-06, + "loss": 0.6671, "step": 8878 }, { - "epoch": 0.6718625856002421, - "grad_norm": 2.2152724266052246, - "learning_rate": 4.798328200972339e-06, - "loss": 0.5351, + "epoch": 0.6250615980288631, + "grad_norm": 1.6576651334762573, + "learning_rate": 4.557761781461142e-06, + "loss": 0.7232, "step": 8879 }, { - "epoch": 0.6719382543225758, - "grad_norm": 2.307845115661621, - "learning_rate": 4.7963366175767425e-06, - "loss": 0.6745, + "epoch": 0.6251319957761352, + "grad_norm": 1.9018535614013672, + "learning_rate": 4.556266033074838e-06, + "loss": 0.6798, "step": 8880 }, { - "epoch": 0.6720139230449094, - "grad_norm": 2.3008527755737305, - "learning_rate": 4.79434529742814e-06, - "loss": 0.7065, + "epoch": 0.6252023935234072, + "grad_norm": 1.8656281232833862, + "learning_rate": 4.554770411740477e-06, + "loss": 0.7031, "step": 8881 }, { - "epoch": 0.672089591767243, - "grad_norm": 2.3067424297332764, - "learning_rate": 4.792354240651245e-06, - "loss": 0.6639, + "epoch": 0.6252727912706794, + "grad_norm": 2.029564380645752, + "learning_rate": 4.553274917535816e-06, + "loss": 0.6956, "step": 8882 }, { - "epoch": 0.6721652604895766, - "grad_norm": 2.0410807132720947, - "learning_rate": 4.790363447370733e-06, - "loss": 0.6769, + "epoch": 0.6253431890179514, + "grad_norm": 1.7999827861785889, + "learning_rate": 4.551779550538609e-06, + "loss": 0.7467, "step": 8883 }, { - "epoch": 0.6722409292119103, - "grad_norm": 1.8702601194381714, - "learning_rate": 4.788372917711276e-06, - "loss": 0.7758, + "epoch": 0.6254135867652235, + "grad_norm": 1.668978214263916, + "learning_rate": 4.550284310826597e-06, + "loss": 0.6233, "step": 8884 }, { - "epoch": 0.6723165979342439, - "grad_norm": 1.8784395456314087, - "learning_rate": 4.78638265179753e-06, - "loss": 0.7677, + "epoch": 0.6254839845124956, + "grad_norm": 1.926210880279541, + "learning_rate": 4.548789198477526e-06, + "loss": 0.6119, "step": 8885 }, { - "epoch": 0.6723922666565775, - "grad_norm": 2.5597028732299805, - "learning_rate": 4.784392649754131e-06, - "loss": 0.7775, + "epoch": 0.6255543822597677, + "grad_norm": 1.9455151557922363, + "learning_rate": 4.547294213569124e-06, + "loss": 0.6351, "step": 8886 }, { - "epoch": 0.6724679353789111, - "grad_norm": 2.01203989982605, - "learning_rate": 4.782402911705699e-06, - "loss": 0.8557, + "epoch": 0.6256247800070398, + "grad_norm": 1.8759804964065552, + "learning_rate": 4.5457993561791165e-06, + "loss": 0.5917, "step": 8887 }, { - "epoch": 0.6725436041012447, - "grad_norm": 2.301677942276001, - "learning_rate": 4.780413437776838e-06, - "loss": 0.7891, + "epoch": 0.6256951777543118, + "grad_norm": 1.9093433618545532, + "learning_rate": 4.544304626385223e-06, + "loss": 0.7414, "step": 8888 }, { - "epoch": 0.6726192728235784, - "grad_norm": 1.7823597192764282, - "learning_rate": 4.778424228092136e-06, - "loss": 0.5104, + "epoch": 0.625765575501584, + "grad_norm": 1.7155686616897583, + "learning_rate": 4.542810024265157e-06, + "loss": 0.719, "step": 8889 }, { - "epoch": 0.672694941545912, - "grad_norm": 1.991434097290039, - "learning_rate": 4.776435282776166e-06, - "loss": 0.7134, + "epoch": 0.625835973248856, + "grad_norm": 1.9360209703445435, + "learning_rate": 4.54131554989662e-06, + "loss": 0.6376, "step": 8890 }, { - "epoch": 0.6727706102682456, - "grad_norm": 2.2954578399658203, - "learning_rate": 4.774446601953472e-06, - "loss": 0.7555, + "epoch": 0.6259063709961281, + "grad_norm": 2.2942371368408203, + "learning_rate": 4.539821203357318e-06, + "loss": 0.7347, "step": 8891 }, { - "epoch": 0.6728462789905792, - "grad_norm": 2.434096097946167, - "learning_rate": 4.772458185748603e-06, - "loss": 0.5947, + "epoch": 0.6259767687434002, + "grad_norm": 1.882985234260559, + "learning_rate": 4.538326984724938e-06, + "loss": 0.7137, "step": 8892 }, { - "epoch": 0.6729219477129129, - "grad_norm": 2.205364942550659, - "learning_rate": 4.770470034286079e-06, - "loss": 0.601, + "epoch": 0.6260471664906723, + "grad_norm": 1.812325119972229, + "learning_rate": 4.5368328940771685e-06, + "loss": 0.743, "step": 8893 }, { - "epoch": 0.6729976164352465, - "grad_norm": 2.9410433769226074, - "learning_rate": 4.768482147690398e-06, - "loss": 0.8037, + "epoch": 0.6261175642379444, + "grad_norm": 1.7374353408813477, + "learning_rate": 4.535338931491687e-06, + "loss": 0.6531, "step": 8894 }, { - "epoch": 0.6730732851575801, - "grad_norm": 2.166212320327759, - "learning_rate": 4.766494526086052e-06, - "loss": 0.7444, + "epoch": 0.6261879619852164, + "grad_norm": 1.9901626110076904, + "learning_rate": 4.533845097046168e-06, + "loss": 0.6075, "step": 8895 }, { - "epoch": 0.6731489538799137, - "grad_norm": 1.7446627616882324, - "learning_rate": 4.76450716959751e-06, - "loss": 0.7428, + "epoch": 0.6262583597324886, + "grad_norm": 1.9554730653762817, + "learning_rate": 4.532351390818272e-06, + "loss": 0.7893, "step": 8896 }, { - "epoch": 0.6732246226022474, - "grad_norm": 2.243112802505493, - "learning_rate": 4.762520078349229e-06, - "loss": 0.5938, + "epoch": 0.6263287574797607, + "grad_norm": 1.9355320930480957, + "learning_rate": 4.530857812885665e-06, + "loss": 0.6227, "step": 8897 }, { - "epoch": 0.673300291324581, - "grad_norm": 2.0831973552703857, - "learning_rate": 4.760533252465647e-06, - "loss": 0.683, + "epoch": 0.6263991552270327, + "grad_norm": 1.94437837600708, + "learning_rate": 4.529364363325996e-06, + "loss": 0.7253, "step": 8898 }, { - "epoch": 0.6733759600469146, - "grad_norm": 2.1453609466552734, - "learning_rate": 4.7585466920711845e-06, - "loss": 0.6909, + "epoch": 0.6264695529743048, + "grad_norm": 1.732561707496643, + "learning_rate": 4.5278710422169125e-06, + "loss": 0.6789, "step": 8899 }, { - "epoch": 0.6734516287692482, - "grad_norm": 2.366060733795166, - "learning_rate": 4.756560397290251e-06, - "loss": 0.7826, + "epoch": 0.6265399507215769, + "grad_norm": 1.4828764200210571, + "learning_rate": 4.526377849636049e-06, + "loss": 0.6339, "step": 8900 }, { - "epoch": 0.6735272974915818, - "grad_norm": 2.3025095462799072, - "learning_rate": 4.754574368247225e-06, - "loss": 0.6098, + "epoch": 0.626610348468849, + "grad_norm": 1.6962684392929077, + "learning_rate": 4.524884785661046e-06, + "loss": 0.6684, "step": 8901 }, { - "epoch": 0.6736029662139155, - "grad_norm": 1.991722822189331, - "learning_rate": 4.752588605066481e-06, - "loss": 0.7002, + "epoch": 0.6266807462161211, + "grad_norm": 2.6839118003845215, + "learning_rate": 4.5233918503695194e-06, + "loss": 0.8658, "step": 8902 }, { - "epoch": 0.6736786349362491, - "grad_norm": 2.3091063499450684, - "learning_rate": 4.75060310787238e-06, - "loss": 0.8689, + "epoch": 0.6267511439633932, + "grad_norm": 1.760019063949585, + "learning_rate": 4.521899043839098e-06, + "loss": 0.6916, "step": 8903 }, { - "epoch": 0.6737543036585827, - "grad_norm": 2.582026243209839, - "learning_rate": 4.748617876789259e-06, - "loss": 0.759, + "epoch": 0.6268215417106653, + "grad_norm": 1.9874176979064941, + "learning_rate": 4.520406366147387e-06, + "loss": 0.6286, "step": 8904 }, { - "epoch": 0.6738299723809164, - "grad_norm": 2.0004804134368896, - "learning_rate": 4.746632911941435e-06, - "loss": 0.7951, + "epoch": 0.6268919394579373, + "grad_norm": 1.7570241689682007, + "learning_rate": 4.518913817371996e-06, + "loss": 0.6611, "step": 8905 }, { - "epoch": 0.67390564110325, - "grad_norm": 2.1170551776885986, - "learning_rate": 4.744648213453215e-06, - "loss": 0.6839, + "epoch": 0.6269623372052094, + "grad_norm": 2.332871675491333, + "learning_rate": 4.517421397590521e-06, + "loss": 0.6747, "step": 8906 }, { - "epoch": 0.6739813098255836, - "grad_norm": 2.757847785949707, - "learning_rate": 4.742663781448887e-06, - "loss": 0.6751, + "epoch": 0.6270327349524816, + "grad_norm": 1.9562926292419434, + "learning_rate": 4.515929106880558e-06, + "loss": 0.6989, "step": 8907 }, { - "epoch": 0.6740569785479172, - "grad_norm": 1.556501865386963, - "learning_rate": 4.740679616052722e-06, - "loss": 0.7912, + "epoch": 0.6271031326997536, + "grad_norm": 3.8486785888671875, + "learning_rate": 4.514436945319688e-06, + "loss": 0.7272, "step": 8908 }, { - "epoch": 0.6741326472702508, - "grad_norm": 2.0596907138824463, - "learning_rate": 4.7386957173889775e-06, - "loss": 0.7228, + "epoch": 0.6271735304470257, + "grad_norm": 1.870395302772522, + "learning_rate": 4.512944912985495e-06, + "loss": 0.6484, "step": 8909 }, { - "epoch": 0.6742083159925845, - "grad_norm": 2.1540966033935547, - "learning_rate": 4.736712085581889e-06, - "loss": 0.7179, + "epoch": 0.6272439281942978, + "grad_norm": 1.8952008485794067, + "learning_rate": 4.5114530099555455e-06, + "loss": 0.771, "step": 8910 }, { - "epoch": 0.6742839847149181, - "grad_norm": 2.1931824684143066, - "learning_rate": 4.734728720755683e-06, - "loss": 0.717, + "epoch": 0.6273143259415699, + "grad_norm": 2.2265756130218506, + "learning_rate": 4.509961236307411e-06, + "loss": 0.6242, "step": 8911 }, { - "epoch": 0.6743596534372517, - "grad_norm": 2.3688266277313232, - "learning_rate": 4.732745623034552e-06, - "loss": 0.6503, + "epoch": 0.6273847236888419, + "grad_norm": 1.8128106594085693, + "learning_rate": 4.508469592118643e-06, + "loss": 0.5825, "step": 8912 }, { - "epoch": 0.6744353221595853, - "grad_norm": 2.4349288940429688, - "learning_rate": 4.730762792542696e-06, - "loss": 0.5946, + "epoch": 0.627455121436114, + "grad_norm": 1.7894340753555298, + "learning_rate": 4.506978077466803e-06, + "loss": 0.6756, "step": 8913 }, { - "epoch": 0.6745109908819189, - "grad_norm": 2.7900352478027344, - "learning_rate": 4.728780229404286e-06, - "loss": 0.6437, + "epoch": 0.6275255191833862, + "grad_norm": 1.820526361465454, + "learning_rate": 4.5054866924294244e-06, + "loss": 0.7182, "step": 8914 }, { - "epoch": 0.6745866596042526, - "grad_norm": 3.1007180213928223, - "learning_rate": 4.726797933743469e-06, - "loss": 0.7885, + "epoch": 0.6275959169306582, + "grad_norm": 1.7979587316513062, + "learning_rate": 4.503995437084055e-06, + "loss": 0.6916, "step": 8915 }, { - "epoch": 0.6746623283265862, - "grad_norm": 1.8395260572433472, - "learning_rate": 4.724815905684387e-06, - "loss": 0.5847, + "epoch": 0.6276663146779303, + "grad_norm": 2.0242249965667725, + "learning_rate": 4.502504311508223e-06, + "loss": 0.6881, "step": 8916 }, { - "epoch": 0.6747379970489198, - "grad_norm": 2.2980258464813232, - "learning_rate": 4.722834145351159e-06, - "loss": 0.6564, + "epoch": 0.6277367124252023, + "grad_norm": 2.0753743648529053, + "learning_rate": 4.501013315779455e-06, + "loss": 0.7936, "step": 8917 }, { - "epoch": 0.6748136657712535, - "grad_norm": 2.2813050746917725, - "learning_rate": 4.7208526528678934e-06, - "loss": 0.6424, + "epoch": 0.6278071101724745, + "grad_norm": 1.6494354009628296, + "learning_rate": 4.499522449975264e-06, + "loss": 0.7209, "step": 8918 }, { - "epoch": 0.6748893344935871, - "grad_norm": 3.0813608169555664, - "learning_rate": 4.7188714283586735e-06, - "loss": 0.6765, + "epoch": 0.6278775079197466, + "grad_norm": 1.8652344942092896, + "learning_rate": 4.498031714173171e-06, + "loss": 0.6712, "step": 8919 }, { - "epoch": 0.6749650032159207, - "grad_norm": 3.065865993499756, - "learning_rate": 4.716890471947572e-06, - "loss": 0.6069, + "epoch": 0.6279479056670186, + "grad_norm": 1.961323857307434, + "learning_rate": 4.49654110845067e-06, + "loss": 0.6701, "step": 8920 }, { - "epoch": 0.6750406719382543, - "grad_norm": 2.9707062244415283, - "learning_rate": 4.7149097837586425e-06, - "loss": 0.7634, + "epoch": 0.6280183034142908, + "grad_norm": 2.0352137088775635, + "learning_rate": 4.495050632885267e-06, + "loss": 0.8538, "step": 8921 }, { - "epoch": 0.6751163406605879, - "grad_norm": 2.9453794956207275, - "learning_rate": 4.712929363915923e-06, - "loss": 0.7505, + "epoch": 0.6280887011615628, + "grad_norm": 1.7597757577896118, + "learning_rate": 4.493560287554449e-06, + "loss": 0.6143, "step": 8922 }, { - "epoch": 0.6751920093829216, - "grad_norm": 1.7816321849822998, - "learning_rate": 4.710949212543431e-06, - "loss": 0.6617, + "epoch": 0.6281590989088349, + "grad_norm": 1.666231632232666, + "learning_rate": 4.492070072535703e-06, + "loss": 0.6356, "step": 8923 }, { - "epoch": 0.6752676781052552, - "grad_norm": 1.8292231559753418, - "learning_rate": 4.7089693297651725e-06, - "loss": 0.7435, + "epoch": 0.6282294966561071, + "grad_norm": 1.6170204877853394, + "learning_rate": 4.4905799879065025e-06, + "loss": 0.6592, "step": 8924 }, { - "epoch": 0.6753433468275888, - "grad_norm": 2.4246504306793213, - "learning_rate": 4.706989715705137e-06, - "loss": 0.5491, + "epoch": 0.6282998944033791, + "grad_norm": 2.912653684616089, + "learning_rate": 4.489090033744325e-06, + "loss": 0.7391, "step": 8925 }, { - "epoch": 0.6754190155499225, - "grad_norm": 2.1744165420532227, - "learning_rate": 4.705010370487287e-06, - "loss": 0.7401, + "epoch": 0.6283702921506512, + "grad_norm": 1.74045729637146, + "learning_rate": 4.487600210126627e-06, + "loss": 0.7093, "step": 8926 }, { - "epoch": 0.6754946842722561, - "grad_norm": 1.9293251037597656, - "learning_rate": 4.703031294235576e-06, - "loss": 0.6611, + "epoch": 0.6284406898979232, + "grad_norm": 2.0052804946899414, + "learning_rate": 4.48611051713087e-06, + "loss": 0.6913, "step": 8927 }, { - "epoch": 0.6755703529945897, - "grad_norm": 2.0167737007141113, - "learning_rate": 4.701052487073951e-06, - "loss": 0.6623, + "epoch": 0.6285110876451954, + "grad_norm": 1.8702497482299805, + "learning_rate": 4.484620954834505e-06, + "loss": 0.6438, "step": 8928 }, { - "epoch": 0.6756460217169233, - "grad_norm": 2.362187147140503, - "learning_rate": 4.69907394912632e-06, - "loss": 0.8366, + "epoch": 0.6285814853924674, + "grad_norm": 2.2539100646972656, + "learning_rate": 4.483131523314974e-06, + "loss": 0.7963, "step": 8929 }, { - "epoch": 0.6757216904392569, - "grad_norm": 2.09653377532959, - "learning_rate": 4.697095680516588e-06, - "loss": 0.757, + "epoch": 0.6286518831397395, + "grad_norm": 1.5418449640274048, + "learning_rate": 4.481642222649715e-06, + "loss": 0.5593, "step": 8930 }, { - "epoch": 0.6757973591615906, - "grad_norm": 2.5158259868621826, - "learning_rate": 4.695117681368643e-06, - "loss": 0.6652, + "epoch": 0.6287222808870117, + "grad_norm": 2.1155731678009033, + "learning_rate": 4.480153052916157e-06, + "loss": 0.7637, "step": 8931 }, { - "epoch": 0.6758730278839242, - "grad_norm": 2.2666311264038086, - "learning_rate": 4.693139951806352e-06, - "loss": 0.728, + "epoch": 0.6287926786342837, + "grad_norm": 1.8812837600708008, + "learning_rate": 4.478664014191725e-06, + "loss": 0.7016, "step": 8932 }, { - "epoch": 0.6759486966062578, - "grad_norm": 2.03359055519104, - "learning_rate": 4.691162491953568e-06, - "loss": 0.772, + "epoch": 0.6288630763815558, + "grad_norm": 1.7704731225967407, + "learning_rate": 4.4771751065538336e-06, + "loss": 0.661, "step": 8933 }, { - "epoch": 0.6760243653285914, - "grad_norm": 2.2918758392333984, - "learning_rate": 4.689185301934124e-06, - "loss": 0.7318, + "epoch": 0.6289334741288278, + "grad_norm": 2.163632392883301, + "learning_rate": 4.475686330079894e-06, + "loss": 0.6843, "step": 8934 }, { - "epoch": 0.676100034050925, - "grad_norm": 2.3821206092834473, - "learning_rate": 4.6872083818718404e-06, - "loss": 0.678, + "epoch": 0.6290038718761, + "grad_norm": 2.2507331371307373, + "learning_rate": 4.4741976848473065e-06, + "loss": 0.6589, "step": 8935 }, { - "epoch": 0.6761757027732587, - "grad_norm": 1.8658883571624756, - "learning_rate": 4.685231731890521e-06, - "loss": 0.7425, + "epoch": 0.6290742696233721, + "grad_norm": 2.0214552879333496, + "learning_rate": 4.4727091709334705e-06, + "loss": 0.7158, "step": 8936 }, { - "epoch": 0.6762513714955923, - "grad_norm": 1.821655511856079, - "learning_rate": 4.6832553521139415e-06, - "loss": 0.6313, + "epoch": 0.6291446673706441, + "grad_norm": 2.1184444427490234, + "learning_rate": 4.471220788415772e-06, + "loss": 0.6321, "step": 8937 }, { - "epoch": 0.6763270402179259, - "grad_norm": 2.3317677974700928, - "learning_rate": 4.6812792426658715e-06, - "loss": 0.8466, + "epoch": 0.6292150651179163, + "grad_norm": 1.9412455558776855, + "learning_rate": 4.469732537371597e-06, + "loss": 0.7002, "step": 8938 }, { - "epoch": 0.6764027089402596, - "grad_norm": 2.308093547821045, - "learning_rate": 4.679303403670069e-06, - "loss": 0.7643, + "epoch": 0.6292854628651883, + "grad_norm": 1.798052430152893, + "learning_rate": 4.4682444178783185e-06, + "loss": 0.6418, "step": 8939 }, { - "epoch": 0.6764783776625932, - "grad_norm": 2.0288562774658203, - "learning_rate": 4.67732783525026e-06, - "loss": 0.6358, + "epoch": 0.6293558606124604, + "grad_norm": 3.0892040729522705, + "learning_rate": 4.466756430013306e-06, + "loss": 0.6337, "step": 8940 }, { - "epoch": 0.6765540463849268, - "grad_norm": 2.001481771469116, - "learning_rate": 4.675352537530162e-06, - "loss": 0.5445, + "epoch": 0.6294262583597325, + "grad_norm": 1.5022046566009521, + "learning_rate": 4.465268573853918e-06, + "loss": 0.6904, "step": 8941 }, { - "epoch": 0.6766297151072604, - "grad_norm": 2.197216749191284, - "learning_rate": 4.673377510633478e-06, - "loss": 0.7168, + "epoch": 0.6294966561070046, + "grad_norm": 1.737906575202942, + "learning_rate": 4.4637808494775155e-06, + "loss": 0.4896, "step": 8942 }, { - "epoch": 0.676705383829594, - "grad_norm": 3.374070882797241, - "learning_rate": 4.671402754683887e-06, - "loss": 0.7088, + "epoch": 0.6295670538542767, + "grad_norm": 1.8464139699935913, + "learning_rate": 4.462293256961441e-06, + "loss": 0.6529, "step": 8943 }, { - "epoch": 0.6767810525519277, - "grad_norm": 2.1551625728607178, - "learning_rate": 4.669428269805055e-06, - "loss": 0.7868, + "epoch": 0.6296374516015487, + "grad_norm": 1.980373501777649, + "learning_rate": 4.460805796383042e-06, + "loss": 0.8006, "step": 8944 }, { - "epoch": 0.6768567212742613, - "grad_norm": 2.4078245162963867, - "learning_rate": 4.6674540561206336e-06, - "loss": 0.8625, + "epoch": 0.6297078493488208, + "grad_norm": 1.601212501525879, + "learning_rate": 4.459318467819647e-06, + "loss": 0.741, "step": 8945 }, { - "epoch": 0.6769323899965949, - "grad_norm": 2.43843674659729, - "learning_rate": 4.665480113754253e-06, - "loss": 0.6372, + "epoch": 0.629778247096093, + "grad_norm": 1.6373995542526245, + "learning_rate": 4.457831271348586e-06, + "loss": 0.6357, "step": 8946 }, { - "epoch": 0.6770080587189286, - "grad_norm": 6.045653343200684, - "learning_rate": 4.663506442829526e-06, - "loss": 0.7967, + "epoch": 0.629848644843365, + "grad_norm": 1.6908868551254272, + "learning_rate": 4.456344207047178e-06, + "loss": 0.6065, "step": 8947 }, { - "epoch": 0.6770837274412621, - "grad_norm": 1.9134116172790527, - "learning_rate": 4.661533043470047e-06, - "loss": 0.675, + "epoch": 0.6299190425906371, + "grad_norm": 1.9284040927886963, + "learning_rate": 4.4548572749927405e-06, + "loss": 0.6727, "step": 8948 }, { - "epoch": 0.6771593961635958, - "grad_norm": 2.1514625549316406, - "learning_rate": 4.659559915799406e-06, - "loss": 0.7456, + "epoch": 0.6299894403379092, + "grad_norm": 1.7732658386230469, + "learning_rate": 4.453370475262577e-06, + "loss": 0.6713, "step": 8949 }, { - "epoch": 0.6772350648859294, - "grad_norm": 2.0664405822753906, - "learning_rate": 4.657587059941163e-06, - "loss": 0.6689, + "epoch": 0.6300598380851813, + "grad_norm": 2.1013684272766113, + "learning_rate": 4.451883807933989e-06, + "loss": 0.711, "step": 8950 }, { - "epoch": 0.677310733608263, - "grad_norm": 2.511876344680786, - "learning_rate": 4.655614476018862e-06, - "loss": 0.7499, + "epoch": 0.6301302358324533, + "grad_norm": 1.6525382995605469, + "learning_rate": 4.450397273084269e-06, + "loss": 0.5905, "step": 8951 }, { - "epoch": 0.6773864023305967, - "grad_norm": 2.2233481407165527, - "learning_rate": 4.653642164156032e-06, - "loss": 0.695, + "epoch": 0.6302006335797254, + "grad_norm": 1.8177704811096191, + "learning_rate": 4.4489108707907075e-06, + "loss": 0.619, "step": 8952 }, { - "epoch": 0.6774620710529303, - "grad_norm": 4.162423610687256, - "learning_rate": 4.651670124476189e-06, - "loss": 0.5902, + "epoch": 0.6302710313269976, + "grad_norm": 1.7273306846618652, + "learning_rate": 4.447424601130576e-06, + "loss": 0.7432, "step": 8953 }, { - "epoch": 0.6775377397752639, - "grad_norm": 2.2912869453430176, - "learning_rate": 4.649698357102826e-06, - "loss": 0.9048, + "epoch": 0.6303414290742696, + "grad_norm": 1.7753610610961914, + "learning_rate": 4.445938464181154e-06, + "loss": 0.6562, "step": 8954 }, { - "epoch": 0.6776134084975975, - "grad_norm": 2.0150766372680664, - "learning_rate": 4.647726862159423e-06, - "loss": 0.6542, + "epoch": 0.6304118268215417, + "grad_norm": 1.9042919874191284, + "learning_rate": 4.444452460019702e-06, + "loss": 0.6993, "step": 8955 }, { - "epoch": 0.6776890772199311, - "grad_norm": 2.4221763610839844, - "learning_rate": 4.6457556397694415e-06, - "loss": 0.7111, + "epoch": 0.6304822245688138, + "grad_norm": 1.5312023162841797, + "learning_rate": 4.442966588723483e-06, + "loss": 0.611, "step": 8956 }, { - "epoch": 0.6777647459422648, - "grad_norm": 2.3624653816223145, - "learning_rate": 4.643784690056328e-06, - "loss": 0.6624, + "epoch": 0.6305526223160859, + "grad_norm": 2.3948497772216797, + "learning_rate": 4.441480850369745e-06, + "loss": 0.6585, "step": 8957 }, { - "epoch": 0.6778404146645984, - "grad_norm": 2.5852222442626953, - "learning_rate": 4.641814013143499e-06, - "loss": 0.7025, + "epoch": 0.630623020063358, + "grad_norm": 2.1842124462127686, + "learning_rate": 4.4399952450357374e-06, + "loss": 0.7573, "step": 8958 }, { - "epoch": 0.677916083386932, - "grad_norm": 2.3841378688812256, - "learning_rate": 4.639843609154379e-06, - "loss": 0.6945, + "epoch": 0.63069341781063, + "grad_norm": 1.6437325477600098, + "learning_rate": 4.438509772798691e-06, + "loss": 0.641, "step": 8959 }, { - "epoch": 0.6779917521092657, - "grad_norm": 2.0488786697387695, - "learning_rate": 4.637873478212354e-06, - "loss": 0.7785, + "epoch": 0.6307638155579022, + "grad_norm": 1.9346373081207275, + "learning_rate": 4.437024433735846e-06, + "loss": 0.7149, "step": 8960 }, { - "epoch": 0.6780674208315992, - "grad_norm": 2.140420436859131, - "learning_rate": 4.6359036204408e-06, - "loss": 0.5558, + "epoch": 0.6308342133051742, + "grad_norm": 1.7528678178787231, + "learning_rate": 4.435539227924419e-06, + "loss": 0.5203, "step": 8961 }, { - "epoch": 0.6781430895539329, - "grad_norm": 2.20794939994812, - "learning_rate": 4.633934035963076e-06, - "loss": 0.7389, + "epoch": 0.6309046110524463, + "grad_norm": 2.01584792137146, + "learning_rate": 4.434054155441632e-06, + "loss": 0.6299, "step": 8962 }, { - "epoch": 0.6782187582762665, - "grad_norm": 2.0823874473571777, - "learning_rate": 4.631964724902521e-06, - "loss": 0.5781, + "epoch": 0.6309750087997185, + "grad_norm": 1.8728141784667969, + "learning_rate": 4.432569216364691e-06, + "loss": 0.7016, "step": 8963 }, { - "epoch": 0.6782944269986001, - "grad_norm": 2.717106580734253, - "learning_rate": 4.629995687382469e-06, - "loss": 0.7518, + "epoch": 0.6310454065469905, + "grad_norm": 1.567643165588379, + "learning_rate": 4.431084410770806e-06, + "loss": 0.7043, "step": 8964 }, { - "epoch": 0.6783700957209338, - "grad_norm": 1.9206687211990356, - "learning_rate": 4.6280269235262175e-06, - "loss": 0.6779, + "epoch": 0.6311158042942626, + "grad_norm": 1.826465129852295, + "learning_rate": 4.429599738737164e-06, + "loss": 0.734, "step": 8965 }, { - "epoch": 0.6784457644432674, - "grad_norm": 2.1041131019592285, - "learning_rate": 4.626058433457062e-06, - "loss": 0.6477, + "epoch": 0.6311862020415346, + "grad_norm": 1.803178310394287, + "learning_rate": 4.428115200340961e-06, + "loss": 0.7355, "step": 8966 }, { - "epoch": 0.678521433165601, - "grad_norm": 1.8171806335449219, - "learning_rate": 4.624090217298274e-06, - "loss": 0.7458, + "epoch": 0.6312565997888068, + "grad_norm": 1.636277675628662, + "learning_rate": 4.426630795659379e-06, + "loss": 0.8202, "step": 8967 }, { - "epoch": 0.6785971018879347, - "grad_norm": 2.141724109649658, - "learning_rate": 4.62212227517311e-06, - "loss": 0.599, + "epoch": 0.6313269975360788, + "grad_norm": 1.6653649806976318, + "learning_rate": 4.425146524769592e-06, + "loss": 0.6007, "step": 8968 }, { - "epoch": 0.6786727706102682, - "grad_norm": 2.140650510787964, - "learning_rate": 4.620154607204809e-06, - "loss": 0.6146, + "epoch": 0.6313973952833509, + "grad_norm": 1.779578685760498, + "learning_rate": 4.4236623877487675e-06, + "loss": 0.727, "step": 8969 }, { - "epoch": 0.6787484393326019, - "grad_norm": 1.9559601545333862, - "learning_rate": 4.618187213516592e-06, - "loss": 0.7644, + "epoch": 0.6314677930306231, + "grad_norm": 1.9371278285980225, + "learning_rate": 4.422178384674073e-06, + "loss": 0.6408, "step": 8970 }, { - "epoch": 0.6788241080549355, - "grad_norm": 2.512819528579712, - "learning_rate": 4.616220094231669e-06, - "loss": 0.6922, + "epoch": 0.6315381907778951, + "grad_norm": 1.7691351175308228, + "learning_rate": 4.420694515622654e-06, + "loss": 0.7197, "step": 8971 }, { - "epoch": 0.6788997767772691, - "grad_norm": 1.995936393737793, - "learning_rate": 4.614253249473218e-06, - "loss": 0.4985, + "epoch": 0.6316085885251672, + "grad_norm": 1.7146881818771362, + "learning_rate": 4.419210780671667e-06, + "loss": 0.7101, "step": 8972 }, { - "epoch": 0.6789754454996028, - "grad_norm": 1.9954118728637695, - "learning_rate": 4.612286679364414e-06, - "loss": 0.6756, + "epoch": 0.6316789862724392, + "grad_norm": 2.0774588584899902, + "learning_rate": 4.417727179898247e-06, + "loss": 0.6653, "step": 8973 }, { - "epoch": 0.6790511142219363, - "grad_norm": 2.6867809295654297, - "learning_rate": 4.610320384028409e-06, - "loss": 0.6245, + "epoch": 0.6317493840197114, + "grad_norm": 1.8786152601242065, + "learning_rate": 4.41624371337953e-06, + "loss": 0.625, "step": 8974 }, { - "epoch": 0.67912678294427, - "grad_norm": 2.2337684631347656, - "learning_rate": 4.60835436358834e-06, - "loss": 0.7234, + "epoch": 0.6318197817669835, + "grad_norm": 2.0342841148376465, + "learning_rate": 4.4147603811926405e-06, + "loss": 0.6363, "step": 8975 }, { - "epoch": 0.6792024516666036, - "grad_norm": 2.338660478591919, - "learning_rate": 4.606388618167325e-06, - "loss": 0.6593, + "epoch": 0.6318901795142555, + "grad_norm": 1.5877940654754639, + "learning_rate": 4.413277183414701e-06, + "loss": 0.6009, "step": 8976 }, { - "epoch": 0.6792781203889372, - "grad_norm": 2.2352778911590576, - "learning_rate": 4.604423147888467e-06, - "loss": 0.6932, + "epoch": 0.6319605772615277, + "grad_norm": 1.3998557329177856, + "learning_rate": 4.4117941201228245e-06, + "loss": 0.5598, "step": 8977 }, { - "epoch": 0.6793537891112709, - "grad_norm": 1.7522343397140503, - "learning_rate": 4.6024579528748465e-06, - "loss": 0.706, + "epoch": 0.6320309750087997, + "grad_norm": 1.6115599870681763, + "learning_rate": 4.4103111913941155e-06, + "loss": 0.7105, "step": 8978 }, { - "epoch": 0.6794294578336045, - "grad_norm": 2.317509412765503, - "learning_rate": 4.600493033249532e-06, - "loss": 0.6231, + "epoch": 0.6321013727560718, + "grad_norm": 1.8306970596313477, + "learning_rate": 4.4088283973056735e-06, + "loss": 0.7344, "step": 8979 }, { - "epoch": 0.6795051265559381, - "grad_norm": 1.9539676904678345, - "learning_rate": 4.598528389135574e-06, - "loss": 0.7355, + "epoch": 0.6321717705033439, + "grad_norm": 1.734236240386963, + "learning_rate": 4.407345737934588e-06, + "loss": 0.5916, "step": 8980 }, { - "epoch": 0.6795807952782718, - "grad_norm": 1.9069515466690063, - "learning_rate": 4.5965640206560055e-06, - "loss": 0.6524, + "epoch": 0.632242168250616, + "grad_norm": 1.5135917663574219, + "learning_rate": 4.405863213357947e-06, + "loss": 0.7799, "step": 8981 }, { - "epoch": 0.6796564640006053, - "grad_norm": 2.012300729751587, - "learning_rate": 4.594599927933843e-06, - "loss": 0.6192, + "epoch": 0.6323125659978881, + "grad_norm": 1.5535550117492676, + "learning_rate": 4.404380823652826e-06, + "loss": 0.6611, "step": 8982 }, { - "epoch": 0.679732132722939, - "grad_norm": 2.2685201168060303, - "learning_rate": 4.59263611109208e-06, - "loss": 0.7487, + "epoch": 0.6323829637451601, + "grad_norm": 1.7427586317062378, + "learning_rate": 4.402898568896299e-06, + "loss": 0.487, "step": 8983 }, { - "epoch": 0.6798078014452726, - "grad_norm": 2.114442825317383, - "learning_rate": 4.5906725702536925e-06, - "loss": 0.6905, + "epoch": 0.6324533614924323, + "grad_norm": 1.7351548671722412, + "learning_rate": 4.401416449165425e-06, + "loss": 0.7259, "step": 8984 }, { - "epoch": 0.6798834701676062, - "grad_norm": 1.9639710187911987, - "learning_rate": 4.588709305541659e-06, - "loss": 0.6025, + "epoch": 0.6325237592397043, + "grad_norm": 2.4127249717712402, + "learning_rate": 4.399934464537265e-06, + "loss": 0.7635, "step": 8985 }, { - "epoch": 0.6799591388899399, - "grad_norm": 1.5509191751480103, - "learning_rate": 4.586746317078913e-06, - "loss": 0.8064, + "epoch": 0.6325941569869764, + "grad_norm": 2.120973587036133, + "learning_rate": 4.398452615088864e-06, + "loss": 0.7, "step": 8986 }, { - "epoch": 0.6800348076122734, - "grad_norm": 2.1551706790924072, - "learning_rate": 4.584783604988387e-06, - "loss": 0.6756, + "epoch": 0.6326645547342485, + "grad_norm": 2.205505847930908, + "learning_rate": 4.396970900897271e-06, + "loss": 0.6961, "step": 8987 }, { - "epoch": 0.6801104763346071, - "grad_norm": 2.119821548461914, - "learning_rate": 4.5828211693929915e-06, - "loss": 0.8007, + "epoch": 0.6327349524815206, + "grad_norm": 1.8597545623779297, + "learning_rate": 4.395489322039516e-06, + "loss": 0.7071, "step": 8988 }, { - "epoch": 0.6801861450569407, - "grad_norm": 2.0947601795196533, - "learning_rate": 4.580859010415622e-06, - "loss": 0.6009, + "epoch": 0.6328053502287927, + "grad_norm": 1.565760850906372, + "learning_rate": 4.394007878592632e-06, + "loss": 0.6249, "step": 8989 }, { - "epoch": 0.6802618137792743, - "grad_norm": 1.9060765504837036, - "learning_rate": 4.5788971281791535e-06, - "loss": 0.6957, + "epoch": 0.6328757479760647, + "grad_norm": 1.8781038522720337, + "learning_rate": 4.392526570633637e-06, + "loss": 0.7174, "step": 8990 }, { - "epoch": 0.680337482501608, - "grad_norm": 2.3293838500976562, - "learning_rate": 4.576935522806447e-06, - "loss": 0.552, + "epoch": 0.6329461457233369, + "grad_norm": 1.95187509059906, + "learning_rate": 4.391045398239548e-06, + "loss": 0.7089, "step": 8991 }, { - "epoch": 0.6804131512239416, - "grad_norm": 2.8745076656341553, - "learning_rate": 4.574974194420344e-06, - "loss": 0.6307, + "epoch": 0.633016543470609, + "grad_norm": 1.5092695951461792, + "learning_rate": 4.389564361487369e-06, + "loss": 0.6384, "step": 8992 }, { - "epoch": 0.6804888199462752, - "grad_norm": 2.3362932205200195, - "learning_rate": 4.573013143143672e-06, - "loss": 0.6628, + "epoch": 0.633086941217881, + "grad_norm": 1.75419020652771, + "learning_rate": 4.388083460454106e-06, + "loss": 0.5345, "step": 8993 }, { - "epoch": 0.6805644886686089, - "grad_norm": 2.5250635147094727, - "learning_rate": 4.5710523690992296e-06, - "loss": 0.6863, + "epoch": 0.6331573389651531, + "grad_norm": 1.936296820640564, + "learning_rate": 4.386602695216749e-06, + "loss": 0.7201, "step": 8994 }, { - "epoch": 0.6806401573909424, - "grad_norm": 2.553046226501465, - "learning_rate": 4.569091872409816e-06, - "loss": 0.7694, + "epoch": 0.6332277367124252, + "grad_norm": 1.8954877853393555, + "learning_rate": 4.385122065852285e-06, + "loss": 0.6719, "step": 8995 }, { - "epoch": 0.6807158261132761, - "grad_norm": 2.729386806488037, - "learning_rate": 4.567131653198204e-06, - "loss": 0.7139, + "epoch": 0.6332981344596973, + "grad_norm": 1.8082013130187988, + "learning_rate": 4.383641572437691e-06, + "loss": 0.7394, "step": 8996 }, { - "epoch": 0.6807914948356097, - "grad_norm": 1.8493585586547852, - "learning_rate": 4.5651717115871415e-06, - "loss": 0.6534, + "epoch": 0.6333685322069694, + "grad_norm": 2.103165626525879, + "learning_rate": 4.382161215049943e-06, + "loss": 0.6806, "step": 8997 }, { - "epoch": 0.6808671635579433, - "grad_norm": 2.3107059001922607, - "learning_rate": 4.563212047699371e-06, - "loss": 0.6176, + "epoch": 0.6334389299542414, + "grad_norm": 1.5947649478912354, + "learning_rate": 4.380680993766002e-06, + "loss": 0.5284, "step": 8998 }, { - "epoch": 0.680942832280277, - "grad_norm": 1.6899579763412476, - "learning_rate": 4.561252661657613e-06, - "loss": 0.6259, + "epoch": 0.6335093277015136, + "grad_norm": 1.7913581132888794, + "learning_rate": 4.379200908662831e-06, + "loss": 0.5852, "step": 8999 }, { - "epoch": 0.6810185010026105, - "grad_norm": 2.0711593627929688, - "learning_rate": 4.559293553584569e-06, - "loss": 0.6786, + "epoch": 0.6335797254487856, + "grad_norm": 2.014082193374634, + "learning_rate": 4.3777209598173766e-06, + "loss": 0.727, "step": 9000 }, { - "epoch": 0.6810941697249442, - "grad_norm": 1.8845760822296143, - "learning_rate": 4.557334723602927e-06, - "loss": 0.6397, + "epoch": 0.6336501231960577, + "grad_norm": 1.5731520652770996, + "learning_rate": 4.376241147306586e-06, + "loss": 0.7344, "step": 9001 }, { - "epoch": 0.6811698384472779, - "grad_norm": 2.040178060531616, - "learning_rate": 4.555376171835352e-06, - "loss": 0.5297, + "epoch": 0.6337205209433299, + "grad_norm": 2.0209779739379883, + "learning_rate": 4.374761471207392e-06, + "loss": 0.7788, "step": 9002 }, { - "epoch": 0.6812455071696114, - "grad_norm": 2.0751795768737793, - "learning_rate": 4.5534178984045e-06, - "loss": 0.6987, + "epoch": 0.6337909186906019, + "grad_norm": 1.631135106086731, + "learning_rate": 4.3732819315967295e-06, + "loss": 0.6128, "step": 9003 }, { - "epoch": 0.6813211758919451, - "grad_norm": 2.1096200942993164, - "learning_rate": 4.551459903432997e-06, - "loss": 0.7418, + "epoch": 0.633861316437874, + "grad_norm": 1.6738003492355347, + "learning_rate": 4.371802528551515e-06, + "loss": 0.6691, "step": 9004 }, { - "epoch": 0.6813968446142787, - "grad_norm": 2.9717164039611816, - "learning_rate": 4.549502187043465e-06, - "loss": 0.6486, + "epoch": 0.633931714185146, + "grad_norm": 1.9799883365631104, + "learning_rate": 4.37032326214867e-06, + "loss": 0.6484, "step": 9005 }, { - "epoch": 0.6814725133366123, - "grad_norm": 1.7609609365463257, - "learning_rate": 4.5475447493585004e-06, - "loss": 0.7631, + "epoch": 0.6340021119324182, + "grad_norm": 1.7932630777359009, + "learning_rate": 4.368844132465098e-06, + "loss": 0.6074, "step": 9006 }, { - "epoch": 0.681548182058946, - "grad_norm": 2.13960599899292, - "learning_rate": 4.545587590500689e-06, - "loss": 0.7899, + "epoch": 0.6340725096796902, + "grad_norm": 1.7643979787826538, + "learning_rate": 4.367365139577706e-06, + "loss": 0.6284, "step": 9007 }, { - "epoch": 0.6816238507812795, - "grad_norm": 1.9353188276290894, - "learning_rate": 4.543630710592585e-06, - "loss": 0.6638, + "epoch": 0.6341429074269623, + "grad_norm": 1.8642879724502563, + "learning_rate": 4.36588628356338e-06, + "loss": 0.7349, "step": 9008 }, { - "epoch": 0.6816995195036132, - "grad_norm": 1.8156839609146118, - "learning_rate": 4.5416741097567385e-06, - "loss": 0.7883, + "epoch": 0.6342133051742345, + "grad_norm": 1.5627418756484985, + "learning_rate": 4.364407564499016e-06, + "loss": 0.6418, "step": 9009 }, { - "epoch": 0.6817751882259468, - "grad_norm": 1.8130958080291748, - "learning_rate": 4.539717788115684e-06, - "loss": 0.618, + "epoch": 0.6342837029215065, + "grad_norm": 2.265058755874634, + "learning_rate": 4.362928982461485e-06, + "loss": 0.5617, "step": 9010 }, { - "epoch": 0.6818508569482804, - "grad_norm": 1.71598219871521, - "learning_rate": 4.537761745791925e-06, - "loss": 0.627, + "epoch": 0.6343541006687786, + "grad_norm": 1.9182465076446533, + "learning_rate": 4.361450537527669e-06, + "loss": 0.6809, "step": 9011 }, { - "epoch": 0.6819265256706141, - "grad_norm": 2.342985153198242, - "learning_rate": 4.535805982907958e-06, - "loss": 0.7325, + "epoch": 0.6344244984160506, + "grad_norm": 1.69278085231781, + "learning_rate": 4.359972229774427e-06, + "loss": 0.6218, "step": 9012 }, { - "epoch": 0.6820021943929476, - "grad_norm": 1.825118064880371, - "learning_rate": 4.53385049958626e-06, - "loss": 0.6055, + "epoch": 0.6344948961633228, + "grad_norm": 2.5391154289245605, + "learning_rate": 4.35849405927862e-06, + "loss": 0.6638, "step": 9013 }, { - "epoch": 0.6820778631152813, - "grad_norm": 1.9702906608581543, - "learning_rate": 4.531895295949292e-06, - "loss": 0.7337, + "epoch": 0.6345652939105949, + "grad_norm": 1.84584641456604, + "learning_rate": 4.3570160261170975e-06, + "loss": 0.6773, "step": 9014 }, { - "epoch": 0.682153531837615, - "grad_norm": 2.108592987060547, - "learning_rate": 4.529940372119486e-06, - "loss": 0.635, + "epoch": 0.6346356916578669, + "grad_norm": 1.794948697090149, + "learning_rate": 4.35553813036671e-06, + "loss": 0.6689, "step": 9015 }, { - "epoch": 0.6822292005599485, - "grad_norm": 1.7487767934799194, - "learning_rate": 4.5279857282192735e-06, - "loss": 0.6604, + "epoch": 0.6347060894051391, + "grad_norm": 2.4154655933380127, + "learning_rate": 4.354060372104286e-06, + "loss": 0.6186, "step": 9016 }, { - "epoch": 0.6823048692822822, - "grad_norm": 2.037961721420288, - "learning_rate": 4.5260313643710625e-06, - "loss": 0.6136, + "epoch": 0.6347764871524111, + "grad_norm": 1.862809419631958, + "learning_rate": 4.352582751406661e-06, + "loss": 0.7342, "step": 9017 }, { - "epoch": 0.6823805380046158, - "grad_norm": 2.4088294506073, - "learning_rate": 4.524077280697237e-06, - "loss": 0.6478, + "epoch": 0.6348468848996832, + "grad_norm": 1.8163776397705078, + "learning_rate": 4.351105268350656e-06, + "loss": 0.6727, "step": 9018 }, { - "epoch": 0.6824562067269494, - "grad_norm": 2.065066337585449, - "learning_rate": 4.522123477320167e-06, - "loss": 0.6117, + "epoch": 0.6349172826469553, + "grad_norm": 1.8684730529785156, + "learning_rate": 4.349627923013088e-06, + "loss": 0.785, "step": 9019 }, { - "epoch": 0.6825318754492831, - "grad_norm": 3.416294813156128, - "learning_rate": 4.520169954362204e-06, - "loss": 0.76, + "epoch": 0.6349876803942274, + "grad_norm": 1.6635981798171997, + "learning_rate": 4.348150715470762e-06, + "loss": 0.6325, "step": 9020 }, { - "epoch": 0.6826075441716166, - "grad_norm": 2.648378610610962, - "learning_rate": 4.518216711945697e-06, - "loss": 0.7329, + "epoch": 0.6350580781414995, + "grad_norm": 1.97501540184021, + "learning_rate": 4.346673645800485e-06, + "loss": 0.733, "step": 9021 }, { - "epoch": 0.6826832128939503, - "grad_norm": 2.742499589920044, - "learning_rate": 4.516263750192951e-06, - "loss": 0.8434, + "epoch": 0.6351284758887715, + "grad_norm": 1.9633455276489258, + "learning_rate": 4.345196714079046e-06, + "loss": 0.7586, "step": 9022 }, { - "epoch": 0.682758881616284, - "grad_norm": 1.814097285270691, - "learning_rate": 4.514311069226272e-06, - "loss": 0.7289, + "epoch": 0.6351988736360437, + "grad_norm": 1.9288759231567383, + "learning_rate": 4.3437199203832365e-06, + "loss": 0.6283, "step": 9023 }, { - "epoch": 0.6828345503386175, - "grad_norm": 1.8923470973968506, - "learning_rate": 4.5123586691679405e-06, - "loss": 0.7187, + "epoch": 0.6352692713833157, + "grad_norm": 1.7680613994598389, + "learning_rate": 4.342243264789831e-06, + "loss": 0.6474, "step": 9024 }, { - "epoch": 0.6829102190609512, - "grad_norm": 2.0584118366241455, - "learning_rate": 4.510406550140226e-06, - "loss": 0.7942, + "epoch": 0.6353396691305878, + "grad_norm": 1.6070221662521362, + "learning_rate": 4.340766747375608e-06, + "loss": 0.707, "step": 9025 }, { - "epoch": 0.6829858877832847, - "grad_norm": 2.10097074508667, - "learning_rate": 4.508454712265373e-06, - "loss": 0.6773, + "epoch": 0.63541006687786, + "grad_norm": 1.8282153606414795, + "learning_rate": 4.339290368217327e-06, + "loss": 0.6594, "step": 9026 }, { - "epoch": 0.6830615565056184, - "grad_norm": 2.169400453567505, - "learning_rate": 4.506503155665613e-06, - "loss": 0.492, + "epoch": 0.635480464625132, + "grad_norm": 1.8746263980865479, + "learning_rate": 4.337814127391751e-06, + "loss": 0.6546, "step": 9027 }, { - "epoch": 0.6831372252279521, - "grad_norm": 2.034270763397217, - "learning_rate": 4.5045518804631635e-06, - "loss": 0.6998, + "epoch": 0.6355508623724041, + "grad_norm": 1.964434266090393, + "learning_rate": 4.336338024975631e-06, + "loss": 0.7449, "step": 9028 }, { - "epoch": 0.6832128939502856, - "grad_norm": 3.989406108856201, - "learning_rate": 4.502600886780212e-06, - "loss": 0.6633, + "epoch": 0.6356212601196761, + "grad_norm": 1.835306167602539, + "learning_rate": 4.3348620610457086e-06, + "loss": 0.6509, "step": 9029 }, { - "epoch": 0.6832885626726193, - "grad_norm": 2.1138885021209717, - "learning_rate": 4.500650174738935e-06, - "loss": 0.674, + "epoch": 0.6356916578669483, + "grad_norm": 1.9925087690353394, + "learning_rate": 4.333386235678722e-06, + "loss": 0.6753, "step": 9030 }, { - "epoch": 0.683364231394953, - "grad_norm": 1.778512954711914, - "learning_rate": 4.498699744461504e-06, - "loss": 0.6803, + "epoch": 0.6357620556142204, + "grad_norm": 1.790116310119629, + "learning_rate": 4.3319105489513964e-06, + "loss": 0.6791, "step": 9031 }, { - "epoch": 0.6834399001172865, - "grad_norm": 2.0274288654327393, - "learning_rate": 4.496749596070052e-06, - "loss": 0.7654, + "epoch": 0.6358324533614924, + "grad_norm": 1.5501362085342407, + "learning_rate": 4.3304350009404615e-06, + "loss": 0.6163, "step": 9032 }, { - "epoch": 0.6835155688396202, - "grad_norm": 1.8345074653625488, - "learning_rate": 4.494799729686703e-06, - "loss": 0.7808, + "epoch": 0.6359028511087645, + "grad_norm": 1.752990484237671, + "learning_rate": 4.328959591722627e-06, + "loss": 0.636, "step": 9033 }, { - "epoch": 0.6835912375619537, - "grad_norm": 1.9967671632766724, - "learning_rate": 4.492850145433567e-06, - "loss": 0.5957, + "epoch": 0.6359732488560366, + "grad_norm": 1.6951661109924316, + "learning_rate": 4.3274843213746036e-06, + "loss": 0.6688, "step": 9034 }, { - "epoch": 0.6836669062842874, - "grad_norm": 2.4879066944122314, - "learning_rate": 4.490900843432734e-06, - "loss": 0.6603, + "epoch": 0.6360436466033087, + "grad_norm": 2.177689552307129, + "learning_rate": 4.32600918997309e-06, + "loss": 0.7165, "step": 9035 }, { - "epoch": 0.6837425750066211, - "grad_norm": 1.9220361709594727, - "learning_rate": 4.488951823806274e-06, - "loss": 0.6626, + "epoch": 0.6361140443505808, + "grad_norm": 1.7550839185714722, + "learning_rate": 4.324534197594781e-06, + "loss": 0.7021, "step": 9036 }, { - "epoch": 0.6838182437289546, - "grad_norm": 2.3953804969787598, - "learning_rate": 4.487003086676241e-06, - "loss": 0.7222, + "epoch": 0.6361844420978529, + "grad_norm": 1.8532358407974243, + "learning_rate": 4.32305934431636e-06, + "loss": 0.729, "step": 9037 }, { - "epoch": 0.6838939124512883, - "grad_norm": 4.208496570587158, - "learning_rate": 4.485054632164672e-06, - "loss": 0.6668, + "epoch": 0.636254839845125, + "grad_norm": 1.5242801904678345, + "learning_rate": 4.321584630214511e-06, + "loss": 0.6701, "step": 9038 }, { - "epoch": 0.6839695811736218, - "grad_norm": 1.9624886512756348, - "learning_rate": 4.483106460393587e-06, - "loss": 0.6975, + "epoch": 0.636325237592397, + "grad_norm": 2.0381124019622803, + "learning_rate": 4.3201100553659e-06, + "loss": 0.6717, "step": 9039 }, { - "epoch": 0.6840452498959555, - "grad_norm": 1.8978896141052246, - "learning_rate": 4.481158571484981e-06, - "loss": 0.5826, + "epoch": 0.6363956353396691, + "grad_norm": 1.5003957748413086, + "learning_rate": 4.318635619847197e-06, + "loss": 0.6449, "step": 9040 }, { - "epoch": 0.6841209186182892, - "grad_norm": 2.2070109844207764, - "learning_rate": 4.479210965560841e-06, - "loss": 0.743, + "epoch": 0.6364660330869412, + "grad_norm": 1.5939546823501587, + "learning_rate": 4.317161323735055e-06, + "loss": 0.7402, "step": 9041 }, { - "epoch": 0.6841965873406227, - "grad_norm": 2.3264880180358887, - "learning_rate": 4.477263642743137e-06, - "loss": 0.7172, + "epoch": 0.6365364308342133, + "grad_norm": 1.588822603225708, + "learning_rate": 4.315687167106125e-06, + "loss": 0.7506, "step": 9042 }, { - "epoch": 0.6842722560629564, - "grad_norm": 2.088231086730957, - "learning_rate": 4.475316603153809e-06, - "loss": 0.6427, + "epoch": 0.6366068285814854, + "grad_norm": 2.5343167781829834, + "learning_rate": 4.31421315003705e-06, + "loss": 0.7562, "step": 9043 }, { - "epoch": 0.68434792478529, - "grad_norm": 2.185490131378174, - "learning_rate": 4.47336984691479e-06, - "loss": 0.7606, + "epoch": 0.6366772263287575, + "grad_norm": 1.8913538455963135, + "learning_rate": 4.312739272604468e-06, + "loss": 0.6563, "step": 9044 }, { - "epoch": 0.6844235935076236, - "grad_norm": 2.1886022090911865, - "learning_rate": 4.4714233741479914e-06, - "loss": 0.5061, + "epoch": 0.6367476240760296, + "grad_norm": 1.7115387916564941, + "learning_rate": 4.311265534885002e-06, + "loss": 0.7566, "step": 9045 }, { - "epoch": 0.6844992622299573, - "grad_norm": 2.4206368923187256, - "learning_rate": 4.46947718497531e-06, - "loss": 0.7002, + "epoch": 0.6368180218233016, + "grad_norm": 2.016685724258423, + "learning_rate": 4.309791936955279e-06, + "loss": 0.7746, "step": 9046 }, { - "epoch": 0.6845749309522908, - "grad_norm": 2.282135009765625, - "learning_rate": 4.467531279518619e-06, - "loss": 0.7932, + "epoch": 0.6368884195705737, + "grad_norm": 1.7232788801193237, + "learning_rate": 4.308318478891908e-06, + "loss": 0.7338, "step": 9047 }, { - "epoch": 0.6846505996746245, - "grad_norm": 2.5050301551818848, - "learning_rate": 4.465585657899779e-06, - "loss": 0.5978, + "epoch": 0.6369588173178459, + "grad_norm": 2.094759702682495, + "learning_rate": 4.306845160771497e-06, + "loss": 0.7257, "step": 9048 }, { - "epoch": 0.6847262683969582, - "grad_norm": 2.1457459926605225, - "learning_rate": 4.463640320240636e-06, - "loss": 0.6758, + "epoch": 0.6370292150651179, + "grad_norm": 1.5716664791107178, + "learning_rate": 4.305371982670643e-06, + "loss": 0.6351, "step": 9049 }, { - "epoch": 0.6848019371192917, - "grad_norm": 1.7627110481262207, - "learning_rate": 4.4616952666630036e-06, - "loss": 0.7674, + "epoch": 0.63709961281239, + "grad_norm": 1.822417140007019, + "learning_rate": 4.303898944665943e-06, + "loss": 0.6425, "step": 9050 }, { - "epoch": 0.6848776058416254, - "grad_norm": 2.0747087001800537, - "learning_rate": 4.4597504972886895e-06, - "loss": 0.6957, + "epoch": 0.637170010559662, + "grad_norm": 1.8643172979354858, + "learning_rate": 4.302426046833978e-06, + "loss": 0.671, "step": 9051 }, { - "epoch": 0.6849532745639589, - "grad_norm": 1.8389160633087158, - "learning_rate": 4.457806012239488e-06, - "loss": 0.6881, + "epoch": 0.6372404083069342, + "grad_norm": 1.7419620752334595, + "learning_rate": 4.300953289251326e-06, + "loss": 0.6863, "step": 9052 }, { - "epoch": 0.6850289432862926, - "grad_norm": 3.18410325050354, - "learning_rate": 4.455861811637168e-06, - "loss": 0.7342, + "epoch": 0.6373108060542063, + "grad_norm": 1.695651650428772, + "learning_rate": 4.299480671994556e-06, + "loss": 0.6242, "step": 9053 }, { - "epoch": 0.6851046120086263, - "grad_norm": 1.8247333765029907, - "learning_rate": 4.453917895603476e-06, - "loss": 0.5705, + "epoch": 0.6373812038014783, + "grad_norm": 1.6151113510131836, + "learning_rate": 4.298008195140234e-06, + "loss": 0.685, "step": 9054 }, { - "epoch": 0.6851802807309598, - "grad_norm": 1.9525268077850342, - "learning_rate": 4.451974264260148e-06, - "loss": 0.7596, + "epoch": 0.6374516015487505, + "grad_norm": 1.8108160495758057, + "learning_rate": 4.2965358587649086e-06, + "loss": 0.6591, "step": 9055 }, { - "epoch": 0.6852559494532935, - "grad_norm": 1.9142605066299438, - "learning_rate": 4.450030917728903e-06, - "loss": 0.6349, + "epoch": 0.6375219992960225, + "grad_norm": 1.8456366062164307, + "learning_rate": 4.295063662945136e-06, + "loss": 0.5577, "step": 9056 }, { - "epoch": 0.6853316181756272, - "grad_norm": 1.9247804880142212, - "learning_rate": 4.448087856131438e-06, - "loss": 0.5992, + "epoch": 0.6375923970432946, + "grad_norm": 2.0199553966522217, + "learning_rate": 4.293591607757451e-06, + "loss": 0.7059, "step": 9057 }, { - "epoch": 0.6854072868979607, - "grad_norm": 2.2341079711914062, - "learning_rate": 4.446145079589434e-06, - "loss": 0.8441, + "epoch": 0.6376627947905668, + "grad_norm": 1.810887336730957, + "learning_rate": 4.292119693278393e-06, + "loss": 0.7082, "step": 9058 }, { - "epoch": 0.6854829556202944, - "grad_norm": 2.2383766174316406, - "learning_rate": 4.444202588224554e-06, - "loss": 0.5673, + "epoch": 0.6377331925378388, + "grad_norm": 1.7042158842086792, + "learning_rate": 4.29064791958448e-06, + "loss": 0.633, "step": 9059 }, { - "epoch": 0.6855586243426279, - "grad_norm": 1.9901219606399536, - "learning_rate": 4.442260382158447e-06, - "loss": 0.7313, + "epoch": 0.6378035902851109, + "grad_norm": 1.919373869895935, + "learning_rate": 4.289176286752239e-06, + "loss": 0.6796, "step": 9060 }, { - "epoch": 0.6856342930649616, - "grad_norm": 2.1193864345550537, - "learning_rate": 4.440318461512729e-06, - "loss": 0.7233, + "epoch": 0.6378739880323829, + "grad_norm": 2.006330728530884, + "learning_rate": 4.287704794858175e-06, + "loss": 0.6563, "step": 9061 }, { - "epoch": 0.6857099617872953, - "grad_norm": 2.3127729892730713, - "learning_rate": 4.438376826409021e-06, - "loss": 0.656, + "epoch": 0.6379443857796551, + "grad_norm": 1.450805425643921, + "learning_rate": 4.2862334439787966e-06, + "loss": 0.59, "step": 9062 }, { - "epoch": 0.6857856305096288, - "grad_norm": 1.5982202291488647, - "learning_rate": 4.4364354769689125e-06, - "loss": 0.7635, + "epoch": 0.6380147835269271, + "grad_norm": 1.844028353691101, + "learning_rate": 4.2847622341906e-06, + "loss": 0.7113, "step": 9063 }, { - "epoch": 0.6858612992319625, - "grad_norm": 1.8591372966766357, - "learning_rate": 4.434494413313972e-06, - "loss": 0.6837, + "epoch": 0.6380851812741992, + "grad_norm": 1.9812822341918945, + "learning_rate": 4.283291165570073e-06, + "loss": 0.7193, "step": 9064 }, { - "epoch": 0.685936967954296, - "grad_norm": 2.3256266117095947, - "learning_rate": 4.432553635565758e-06, - "loss": 0.8136, + "epoch": 0.6381555790214714, + "grad_norm": 1.8797199726104736, + "learning_rate": 4.281820238193697e-06, + "loss": 0.742, "step": 9065 }, { - "epoch": 0.6860126366766297, - "grad_norm": 1.8938707113265991, - "learning_rate": 4.430613143845805e-06, - "loss": 0.7994, + "epoch": 0.6382259767687434, + "grad_norm": 2.3496038913726807, + "learning_rate": 4.2803494521379534e-06, + "loss": 0.6367, "step": 9066 }, { - "epoch": 0.6860883053989634, - "grad_norm": 1.8198975324630737, - "learning_rate": 4.428672938275642e-06, - "loss": 0.7606, + "epoch": 0.6382963745160155, + "grad_norm": 2.107584238052368, + "learning_rate": 4.278878807479298e-06, + "loss": 0.6751, "step": 9067 }, { - "epoch": 0.6861639741212969, - "grad_norm": 2.892153739929199, - "learning_rate": 4.4267330189767624e-06, - "loss": 0.714, + "epoch": 0.6383667722632875, + "grad_norm": 1.7119874954223633, + "learning_rate": 4.277408304294204e-06, + "loss": 0.592, "step": 9068 }, { - "epoch": 0.6862396428436306, - "grad_norm": 1.9913108348846436, - "learning_rate": 4.424793386070653e-06, - "loss": 0.5629, + "epoch": 0.6384371700105597, + "grad_norm": 1.8018893003463745, + "learning_rate": 4.275937942659114e-06, + "loss": 0.6192, "step": 9069 }, { - "epoch": 0.6863153115659643, - "grad_norm": 2.234666347503662, - "learning_rate": 4.4228540396787795e-06, - "loss": 0.786, + "epoch": 0.6385075677578318, + "grad_norm": 1.8757846355438232, + "learning_rate": 4.2744677226504806e-06, + "loss": 0.7205, "step": 9070 }, { - "epoch": 0.6863909802882978, - "grad_norm": 2.110463857650757, - "learning_rate": 4.4209149799225905e-06, - "loss": 0.657, + "epoch": 0.6385779655051038, + "grad_norm": 1.9046283960342407, + "learning_rate": 4.2729976443447356e-06, + "loss": 0.7431, "step": 9071 }, { - "epoch": 0.6864666490106315, - "grad_norm": 2.373729705810547, - "learning_rate": 4.418976206923516e-06, - "loss": 0.672, + "epoch": 0.638648363252376, + "grad_norm": 1.9265161752700806, + "learning_rate": 4.2715277078183155e-06, + "loss": 0.7417, "step": 9072 }, { - "epoch": 0.686542317732965, - "grad_norm": 1.475502371788025, - "learning_rate": 4.4170377208029684e-06, - "loss": 0.8645, + "epoch": 0.638718760999648, + "grad_norm": 1.9071297645568848, + "learning_rate": 4.270057913147639e-06, + "loss": 0.693, "step": 9073 }, { - "epoch": 0.6866179864552987, - "grad_norm": 2.1476123332977295, - "learning_rate": 4.415099521682345e-06, - "loss": 0.7975, + "epoch": 0.6387891587469201, + "grad_norm": 1.855251669883728, + "learning_rate": 4.268588260409125e-06, + "loss": 0.7503, "step": 9074 }, { - "epoch": 0.6866936551776324, - "grad_norm": 2.0400941371917725, - "learning_rate": 4.4131616096830155e-06, - "loss": 0.7912, + "epoch": 0.6388595564941922, + "grad_norm": 1.939955711364746, + "learning_rate": 4.267118749679179e-06, + "loss": 0.679, "step": 9075 }, { - "epoch": 0.6867693238999659, - "grad_norm": 1.941752314567566, - "learning_rate": 4.411223984926338e-06, - "loss": 0.6795, + "epoch": 0.6389299542414643, + "grad_norm": 1.8332232236862183, + "learning_rate": 4.265649381034205e-06, + "loss": 0.5557, "step": 9076 }, { - "epoch": 0.6868449926222996, - "grad_norm": 2.0431621074676514, - "learning_rate": 4.409286647533664e-06, - "loss": 0.7254, + "epoch": 0.6390003519887364, + "grad_norm": 1.683099389076233, + "learning_rate": 4.264180154550597e-06, + "loss": 0.8239, "step": 9077 }, { - "epoch": 0.6869206613446331, - "grad_norm": 1.969773530960083, - "learning_rate": 4.407349597626304e-06, - "loss": 0.7392, + "epoch": 0.6390707497360084, + "grad_norm": 1.7822729349136353, + "learning_rate": 4.262711070304741e-06, + "loss": 0.7635, "step": 9078 }, { - "epoch": 0.6869963300669668, - "grad_norm": 2.117180109024048, - "learning_rate": 4.4054128353255676e-06, - "loss": 0.7551, + "epoch": 0.6391411474832805, + "grad_norm": 1.7841684818267822, + "learning_rate": 4.261242128373015e-06, + "loss": 0.7259, "step": 9079 }, { - "epoch": 0.6870719987893005, - "grad_norm": 2.118661403656006, - "learning_rate": 4.403476360752739e-06, - "loss": 0.7525, + "epoch": 0.6392115452305526, + "grad_norm": 1.7090479135513306, + "learning_rate": 4.259773328831791e-06, + "loss": 0.6748, "step": 9080 }, { - "epoch": 0.687147667511634, - "grad_norm": 1.820186972618103, - "learning_rate": 4.401540174029088e-06, - "loss": 0.5936, + "epoch": 0.6392819429778247, + "grad_norm": 1.8508425951004028, + "learning_rate": 4.258304671757434e-06, + "loss": 0.7068, "step": 9081 }, { - "epoch": 0.6872233362339677, - "grad_norm": 2.435248851776123, - "learning_rate": 4.399604275275865e-06, - "loss": 0.6549, + "epoch": 0.6393523407250968, + "grad_norm": 1.8740971088409424, + "learning_rate": 4.256836157226298e-06, + "loss": 0.6273, "step": 9082 }, { - "epoch": 0.6872990049563014, - "grad_norm": 2.5884740352630615, - "learning_rate": 4.397668664614301e-06, - "loss": 0.6127, + "epoch": 0.6394227384723689, + "grad_norm": 1.8451915979385376, + "learning_rate": 4.2553677853147366e-06, + "loss": 0.6393, "step": 9083 }, { - "epoch": 0.6873746736786349, - "grad_norm": 2.3123669624328613, - "learning_rate": 4.395733342165612e-06, - "loss": 0.5723, + "epoch": 0.639493136219641, + "grad_norm": 1.6348166465759277, + "learning_rate": 4.253899556099089e-06, + "loss": 0.6983, "step": 9084 }, { - "epoch": 0.6874503424009686, - "grad_norm": 2.0741138458251953, - "learning_rate": 4.393798308050996e-06, - "loss": 0.6145, + "epoch": 0.639563533966913, + "grad_norm": 1.8125122785568237, + "learning_rate": 4.2524314696556914e-06, + "loss": 0.7732, "step": 9085 }, { - "epoch": 0.6875260111233021, - "grad_norm": 2.0065953731536865, - "learning_rate": 4.3918635623916214e-06, - "loss": 0.7152, + "epoch": 0.6396339317141851, + "grad_norm": 1.6707355976104736, + "learning_rate": 4.250963526060869e-06, + "loss": 0.5984, "step": 9086 }, { - "epoch": 0.6876016798456358, - "grad_norm": 2.173222064971924, - "learning_rate": 4.389929105308658e-06, - "loss": 0.7504, + "epoch": 0.6397043294614573, + "grad_norm": 1.770776629447937, + "learning_rate": 4.249495725390944e-06, + "loss": 0.7111, "step": 9087 }, { - "epoch": 0.6876773485679695, - "grad_norm": 2.4735090732574463, - "learning_rate": 4.3879949369232486e-06, - "loss": 0.68, + "epoch": 0.6397747272087293, + "grad_norm": 1.5083149671554565, + "learning_rate": 4.248028067722224e-06, + "loss": 0.6419, "step": 9088 }, { - "epoch": 0.687753017290303, - "grad_norm": 2.0955440998077393, - "learning_rate": 4.38606105735651e-06, - "loss": 0.6141, + "epoch": 0.6398451249560014, + "grad_norm": 1.5843185186386108, + "learning_rate": 4.24656055313102e-06, + "loss": 0.6426, "step": 9089 }, { - "epoch": 0.6878286860126367, - "grad_norm": 1.9914036989212036, - "learning_rate": 4.3841274667295524e-06, - "loss": 0.6674, + "epoch": 0.6399155227032735, + "grad_norm": 1.7143665552139282, + "learning_rate": 4.245093181693624e-06, + "loss": 0.6626, "step": 9090 }, { - "epoch": 0.6879043547349702, - "grad_norm": 1.948864221572876, - "learning_rate": 4.3821941651634605e-06, - "loss": 0.5495, + "epoch": 0.6399859204505456, + "grad_norm": 1.5836093425750732, + "learning_rate": 4.243625953486332e-06, + "loss": 0.6373, "step": 9091 }, { - "epoch": 0.6879800234573039, - "grad_norm": 2.403665542602539, - "learning_rate": 4.380261152779307e-06, - "loss": 0.6433, + "epoch": 0.6400563181978177, + "grad_norm": 1.6392686367034912, + "learning_rate": 4.242158868585421e-06, + "loss": 0.6202, "step": 9092 }, { - "epoch": 0.6880556921796376, - "grad_norm": 1.9592474699020386, - "learning_rate": 4.378328429698142e-06, - "loss": 0.6297, + "epoch": 0.6401267159450897, + "grad_norm": 2.064199209213257, + "learning_rate": 4.240691927067169e-06, + "loss": 0.738, "step": 9093 }, { - "epoch": 0.6881313609019711, - "grad_norm": 1.8239825963974, - "learning_rate": 4.3763959960409985e-06, - "loss": 0.6765, + "epoch": 0.6401971136923619, + "grad_norm": 1.5762439966201782, + "learning_rate": 4.239225129007839e-06, + "loss": 0.5804, "step": 9094 }, { - "epoch": 0.6882070296243048, - "grad_norm": 1.8471215963363647, - "learning_rate": 4.3744638519288915e-06, - "loss": 0.7255, + "epoch": 0.6402675114396339, + "grad_norm": 1.9568367004394531, + "learning_rate": 4.237758474483699e-06, + "loss": 0.6413, "step": 9095 }, { - "epoch": 0.6882826983466385, - "grad_norm": 1.9686150550842285, - "learning_rate": 4.372531997482823e-06, - "loss": 0.6088, + "epoch": 0.640337909186906, + "grad_norm": 1.751034140586853, + "learning_rate": 4.236291963570996e-06, + "loss": 0.6144, "step": 9096 }, { - "epoch": 0.688358367068972, - "grad_norm": 2.405791997909546, - "learning_rate": 4.370600432823762e-06, - "loss": 0.8179, + "epoch": 0.640408306934178, + "grad_norm": 2.6431710720062256, + "learning_rate": 4.234825596345978e-06, + "loss": 0.6392, "step": 9097 }, { - "epoch": 0.6884340357913057, - "grad_norm": 2.5450029373168945, - "learning_rate": 4.368669158072678e-06, - "loss": 0.5827, + "epoch": 0.6404787046814502, + "grad_norm": 2.0099048614501953, + "learning_rate": 4.2333593728848805e-06, + "loss": 0.8248, "step": 9098 }, { - "epoch": 0.6885097045136392, - "grad_norm": 2.776323080062866, - "learning_rate": 4.366738173350514e-06, - "loss": 0.7119, + "epoch": 0.6405491024287223, + "grad_norm": 1.659902572631836, + "learning_rate": 4.2318932932639365e-06, + "loss": 0.6198, "step": 9099 }, { - "epoch": 0.6885853732359729, - "grad_norm": 2.0069100856781006, - "learning_rate": 4.364807478778188e-06, - "loss": 0.7055, + "epoch": 0.6406195001759943, + "grad_norm": 1.6578071117401123, + "learning_rate": 4.230427357559363e-06, + "loss": 0.7174, "step": 9100 }, { - "epoch": 0.6886610419583066, - "grad_norm": 1.881259799003601, - "learning_rate": 4.362877074476611e-06, - "loss": 0.6521, + "epoch": 0.6406898979232665, + "grad_norm": 1.7286264896392822, + "learning_rate": 4.228961565847383e-06, + "loss": 0.7039, "step": 9101 }, { - "epoch": 0.6887367106806401, - "grad_norm": 2.377471923828125, - "learning_rate": 4.3609469605666686e-06, - "loss": 0.6983, + "epoch": 0.6407602956705385, + "grad_norm": 1.6753085851669312, + "learning_rate": 4.227495918204199e-06, + "loss": 0.7452, "step": 9102 }, { - "epoch": 0.6888123794029738, - "grad_norm": 3.351958751678467, - "learning_rate": 4.359017137169231e-06, - "loss": 0.7147, + "epoch": 0.6408306934178106, + "grad_norm": 1.5391632318496704, + "learning_rate": 4.226030414706016e-06, + "loss": 0.7265, "step": 9103 }, { - "epoch": 0.6888880481253074, - "grad_norm": 2.346928119659424, - "learning_rate": 4.3570876044051525e-06, - "loss": 0.6571, + "epoch": 0.6409010911650828, + "grad_norm": 1.897689700126648, + "learning_rate": 4.224565055429021e-06, + "loss": 0.6423, "step": 9104 }, { - "epoch": 0.688963716847641, - "grad_norm": 1.908215880393982, - "learning_rate": 4.355158362395264e-06, - "loss": 0.7263, + "epoch": 0.6409714889123548, + "grad_norm": 1.9320497512817383, + "learning_rate": 4.223099840449404e-06, + "loss": 0.7265, "step": 9105 }, { - "epoch": 0.6890393855699747, - "grad_norm": 2.136303186416626, - "learning_rate": 4.353229411260387e-06, - "loss": 0.732, + "epoch": 0.6410418866596269, + "grad_norm": 2.0784788131713867, + "learning_rate": 4.221634769843338e-06, + "loss": 0.7472, "step": 9106 }, { - "epoch": 0.6891150542923082, - "grad_norm": 2.8035433292388916, - "learning_rate": 4.351300751121307e-06, - "loss": 0.6906, + "epoch": 0.6411122844068989, + "grad_norm": 1.5893454551696777, + "learning_rate": 4.220169843687e-06, + "loss": 0.6979, "step": 9107 }, { - "epoch": 0.6891907230146419, - "grad_norm": 1.8425335884094238, - "learning_rate": 4.3493723820988125e-06, - "loss": 0.6579, + "epoch": 0.6411826821541711, + "grad_norm": 2.388697862625122, + "learning_rate": 4.218705062056549e-06, + "loss": 0.7298, "step": 9108 }, { - "epoch": 0.6892663917369756, - "grad_norm": 1.5369822978973389, - "learning_rate": 4.347444304313661e-06, - "loss": 0.5209, + "epoch": 0.6412530799014432, + "grad_norm": 1.9460208415985107, + "learning_rate": 4.217240425028141e-06, + "loss": 0.6436, "step": 9109 }, { - "epoch": 0.6893420604593091, - "grad_norm": 1.7258330583572388, - "learning_rate": 4.345516517886599e-06, - "loss": 0.7984, + "epoch": 0.6413234776487152, + "grad_norm": 1.4626660346984863, + "learning_rate": 4.215775932677922e-06, + "loss": 0.6, "step": 9110 }, { - "epoch": 0.6894177291816428, - "grad_norm": 2.101191759109497, - "learning_rate": 4.343589022938344e-06, - "loss": 0.6611, + "epoch": 0.6413938753959874, + "grad_norm": 1.7926851511001587, + "learning_rate": 4.214311585082035e-06, + "loss": 0.6811, "step": 9111 }, { - "epoch": 0.6894933979039763, - "grad_norm": 1.9967455863952637, - "learning_rate": 4.341661819589601e-06, - "loss": 0.7261, + "epoch": 0.6414642731432594, + "grad_norm": 1.688137412071228, + "learning_rate": 4.21284738231661e-06, + "loss": 0.6708, "step": 9112 }, { - "epoch": 0.68956906662631, - "grad_norm": 1.7132256031036377, - "learning_rate": 4.339734907961069e-06, - "loss": 0.6795, + "epoch": 0.6415346708905315, + "grad_norm": 1.9744746685028076, + "learning_rate": 4.211383324457775e-06, + "loss": 0.6845, "step": 9113 }, { - "epoch": 0.6896447353486437, - "grad_norm": 1.7403509616851807, - "learning_rate": 4.337808288173407e-06, - "loss": 0.6901, + "epoch": 0.6416050686378036, + "grad_norm": 1.8298836946487427, + "learning_rate": 4.209919411581645e-06, + "loss": 0.5412, "step": 9114 }, { - "epoch": 0.6897204040709772, - "grad_norm": 2.2544641494750977, - "learning_rate": 4.335881960347269e-06, - "loss": 0.5939, + "epoch": 0.6416754663850757, + "grad_norm": 1.738178014755249, + "learning_rate": 4.208455643764334e-06, + "loss": 0.6889, "step": 9115 }, { - "epoch": 0.6897960727933109, - "grad_norm": 2.149181842803955, - "learning_rate": 4.333955924603288e-06, - "loss": 0.8534, + "epoch": 0.6417458641323478, + "grad_norm": 2.198148488998413, + "learning_rate": 4.20699202108194e-06, + "loss": 0.7161, "step": 9116 }, { - "epoch": 0.6898717415156445, - "grad_norm": 1.9814257621765137, - "learning_rate": 4.332030181062079e-06, - "loss": 0.7261, + "epoch": 0.6418162618796198, + "grad_norm": 3.5753347873687744, + "learning_rate": 4.205528543610563e-06, + "loss": 0.6408, "step": 9117 }, { - "epoch": 0.6899474102379781, - "grad_norm": 2.3098654747009277, - "learning_rate": 4.3301047298442385e-06, - "loss": 0.7357, + "epoch": 0.641886659626892, + "grad_norm": 1.8651832342147827, + "learning_rate": 4.204065211426285e-06, + "loss": 0.7958, "step": 9118 }, { - "epoch": 0.6900230789603118, - "grad_norm": 1.9268317222595215, - "learning_rate": 4.3281795710703436e-06, - "loss": 0.6553, + "epoch": 0.641957057374164, + "grad_norm": 1.9919747114181519, + "learning_rate": 4.2026020246051914e-06, + "loss": 0.6114, "step": 9119 }, { - "epoch": 0.6900987476826453, - "grad_norm": 1.951817274093628, - "learning_rate": 4.326254704860959e-06, - "loss": 0.7475, + "epoch": 0.6420274551214361, + "grad_norm": 1.89947509765625, + "learning_rate": 4.20113898322335e-06, + "loss": 0.6794, "step": 9120 }, { - "epoch": 0.690174416404979, - "grad_norm": 2.312189817428589, - "learning_rate": 4.324330131336617e-06, - "loss": 0.5805, + "epoch": 0.6420978528687082, + "grad_norm": 2.2212162017822266, + "learning_rate": 4.199676087356829e-06, + "loss": 0.6821, "step": 9121 }, { - "epoch": 0.6902500851273127, - "grad_norm": 1.6239818334579468, - "learning_rate": 4.322405850617842e-06, - "loss": 0.8134, + "epoch": 0.6421682506159803, + "grad_norm": 1.7974796295166016, + "learning_rate": 4.198213337081682e-06, + "loss": 0.6229, "step": 9122 }, { - "epoch": 0.6903257538496462, - "grad_norm": 2.053333044052124, - "learning_rate": 4.320481862825146e-06, - "loss": 0.6272, + "epoch": 0.6422386483632524, + "grad_norm": 1.9683970212936401, + "learning_rate": 4.1967507324739655e-06, + "loss": 0.7107, "step": 9123 }, { - "epoch": 0.6904014225719799, - "grad_norm": 2.048210859298706, - "learning_rate": 4.318558168079012e-06, - "loss": 0.6803, + "epoch": 0.6423090461105244, + "grad_norm": 1.7824574708938599, + "learning_rate": 4.1952882736097125e-06, + "loss": 0.6347, "step": 9124 }, { - "epoch": 0.6904770912943134, - "grad_norm": 2.4473769664764404, - "learning_rate": 4.316634766499906e-06, - "loss": 0.5872, + "epoch": 0.6423794438577966, + "grad_norm": 1.7771830558776855, + "learning_rate": 4.193825960564965e-06, + "loss": 0.7355, "step": 9125 }, { - "epoch": 0.6905527600166471, - "grad_norm": 2.2394003868103027, - "learning_rate": 4.314711658208278e-06, - "loss": 0.7076, + "epoch": 0.6424498416050687, + "grad_norm": 2.035944700241089, + "learning_rate": 4.192363793415746e-06, + "loss": 0.7087, "step": 9126 }, { - "epoch": 0.6906284287389808, - "grad_norm": 1.7375953197479248, - "learning_rate": 4.31278884332456e-06, - "loss": 0.672, + "epoch": 0.6425202393523407, + "grad_norm": 1.7529469728469849, + "learning_rate": 4.190901772238076e-06, + "loss": 0.6342, "step": 9127 }, { - "epoch": 0.6907040974613143, - "grad_norm": 1.8307822942733765, - "learning_rate": 4.3108663219691656e-06, - "loss": 0.8224, + "epoch": 0.6425906370996128, + "grad_norm": 1.695633888244629, + "learning_rate": 4.189439897107967e-06, + "loss": 0.603, "step": 9128 }, { - "epoch": 0.690779766183648, - "grad_norm": 1.7547332048416138, - "learning_rate": 4.308944094262488e-06, - "loss": 0.6712, + "epoch": 0.6426610348468849, + "grad_norm": 2.252720594406128, + "learning_rate": 4.187978168101424e-06, + "loss": 0.5411, "step": 9129 }, { - "epoch": 0.6908554349059816, - "grad_norm": 2.497670888900757, - "learning_rate": 4.3070221603249036e-06, - "loss": 0.753, + "epoch": 0.642731432594157, + "grad_norm": 2.5463125705718994, + "learning_rate": 4.186516585294443e-06, + "loss": 0.7147, "step": 9130 }, { - "epoch": 0.6909311036283152, - "grad_norm": 1.989039421081543, - "learning_rate": 4.305100520276775e-06, - "loss": 0.7124, + "epoch": 0.6428018303414291, + "grad_norm": 1.7267404794692993, + "learning_rate": 4.185055148763012e-06, + "loss": 0.7914, "step": 9131 }, { - "epoch": 0.6910067723506489, - "grad_norm": 1.7251970767974854, - "learning_rate": 4.303179174238433e-06, - "loss": 0.6056, + "epoch": 0.6428722280887011, + "grad_norm": 1.559985876083374, + "learning_rate": 4.183593858583114e-06, + "loss": 0.6591, "step": 9132 }, { - "epoch": 0.6910824410729824, - "grad_norm": 2.441929578781128, - "learning_rate": 4.301258122330198e-06, - "loss": 0.6691, + "epoch": 0.6429426258359733, + "grad_norm": 2.1675045490264893, + "learning_rate": 4.18213271483072e-06, + "loss": 0.6918, "step": 9133 }, { - "epoch": 0.6911581097953161, - "grad_norm": 2.3875224590301514, - "learning_rate": 4.299337364672385e-06, - "loss": 0.7165, + "epoch": 0.6430130235832453, + "grad_norm": 1.757025122642517, + "learning_rate": 4.1806717175818e-06, + "loss": 0.5634, "step": 9134 }, { - "epoch": 0.6912337785176498, - "grad_norm": 1.9688384532928467, - "learning_rate": 4.297416901385267e-06, - "loss": 0.7152, + "epoch": 0.6430834213305174, + "grad_norm": 1.9944978952407837, + "learning_rate": 4.1792108669123095e-06, + "loss": 0.6624, "step": 9135 }, { - "epoch": 0.6913094472399833, - "grad_norm": 2.073638677597046, - "learning_rate": 4.295496732589112e-06, - "loss": 0.6398, + "epoch": 0.6431538190777895, + "grad_norm": 1.7281886339187622, + "learning_rate": 4.1777501628982025e-06, + "loss": 0.7108, "step": 9136 }, { - "epoch": 0.691385115962317, - "grad_norm": 2.102508068084717, - "learning_rate": 4.293576858404167e-06, - "loss": 0.6271, + "epoch": 0.6432242168250616, + "grad_norm": 1.9555169343948364, + "learning_rate": 4.176289605615419e-06, + "loss": 0.6065, "step": 9137 }, { - "epoch": 0.6914607846846506, - "grad_norm": 2.1116816997528076, - "learning_rate": 4.2916572789506625e-06, - "loss": 0.5456, + "epoch": 0.6432946145723337, + "grad_norm": 1.7418522834777832, + "learning_rate": 4.174829195139898e-06, + "loss": 0.724, "step": 9138 }, { - "epoch": 0.6915364534069842, - "grad_norm": 2.1573615074157715, - "learning_rate": 4.2897379943488075e-06, - "loss": 0.8615, + "epoch": 0.6433650123196057, + "grad_norm": 2.6539337635040283, + "learning_rate": 4.173368931547562e-06, + "loss": 0.6258, "step": 9139 }, { - "epoch": 0.6916121221293179, - "grad_norm": 1.9969463348388672, - "learning_rate": 4.2878190047187944e-06, - "loss": 0.6516, + "epoch": 0.6434354100668779, + "grad_norm": 1.75454580783844, + "learning_rate": 4.171908814914339e-06, + "loss": 0.6705, "step": 9140 }, { - "epoch": 0.6916877908516514, - "grad_norm": 1.9518414735794067, - "learning_rate": 4.285900310180796e-06, - "loss": 0.7213, + "epoch": 0.6435058078141499, + "grad_norm": 1.5221160650253296, + "learning_rate": 4.1704488453161355e-06, + "loss": 0.5645, "step": 9141 }, { - "epoch": 0.6917634595739851, - "grad_norm": 1.909786581993103, - "learning_rate": 4.283981910854971e-06, - "loss": 0.6096, + "epoch": 0.643576205561422, + "grad_norm": 1.475270390510559, + "learning_rate": 4.168989022828861e-06, + "loss": 0.6237, "step": 9142 }, { - "epoch": 0.6918391282963187, - "grad_norm": 2.240398406982422, - "learning_rate": 4.2820638068614455e-06, - "loss": 0.6252, + "epoch": 0.6436466033086942, + "grad_norm": 1.5776747465133667, + "learning_rate": 4.167529347528411e-06, + "loss": 0.5912, "step": 9143 }, { - "epoch": 0.6919147970186523, - "grad_norm": 2.16809344291687, - "learning_rate": 4.280145998320347e-06, - "loss": 0.5408, + "epoch": 0.6437170010559662, + "grad_norm": 1.5148683786392212, + "learning_rate": 4.166069819490675e-06, + "loss": 0.7868, "step": 9144 }, { - "epoch": 0.691990465740986, - "grad_norm": 2.0366976261138916, - "learning_rate": 4.278228485351776e-06, - "loss": 0.757, + "epoch": 0.6437873988032383, + "grad_norm": 2.7540969848632812, + "learning_rate": 4.164610438791534e-06, + "loss": 0.6529, "step": 9145 }, { - "epoch": 0.6920661344633195, - "grad_norm": 1.8961420059204102, - "learning_rate": 4.276311268075806e-06, - "loss": 0.7155, + "epoch": 0.6438577965505103, + "grad_norm": 1.6126291751861572, + "learning_rate": 4.163151205506866e-06, + "loss": 0.6802, "step": 9146 }, { - "epoch": 0.6921418031856532, - "grad_norm": 1.6012108325958252, - "learning_rate": 4.274394346612502e-06, - "loss": 0.6259, + "epoch": 0.6439281942977825, + "grad_norm": 1.7265514135360718, + "learning_rate": 4.161692119712536e-06, + "loss": 0.7411, "step": 9147 }, { - "epoch": 0.6922174719079869, - "grad_norm": 2.297229290008545, - "learning_rate": 4.272477721081908e-06, - "loss": 0.6098, + "epoch": 0.6439985920450546, + "grad_norm": 1.862210988998413, + "learning_rate": 4.160233181484403e-06, + "loss": 0.6742, "step": 9148 }, { - "epoch": 0.6922931406303204, - "grad_norm": 1.7989269495010376, - "learning_rate": 4.270561391604051e-06, - "loss": 0.6358, + "epoch": 0.6440689897923266, + "grad_norm": 1.6713520288467407, + "learning_rate": 4.1587743908983195e-06, + "loss": 0.6545, "step": 9149 }, { - "epoch": 0.6923688093526541, - "grad_norm": 2.2176966667175293, - "learning_rate": 4.268645358298935e-06, - "loss": 0.668, + "epoch": 0.6441393875395988, + "grad_norm": 1.5899771451950073, + "learning_rate": 4.157315748030128e-06, + "loss": 0.6703, "step": 9150 }, { - "epoch": 0.6924444780749877, - "grad_norm": 2.412024974822998, - "learning_rate": 4.266729621286552e-06, - "loss": 0.7053, + "epoch": 0.6442097852868708, + "grad_norm": 1.4651386737823486, + "learning_rate": 4.155857252955663e-06, + "loss": 0.5971, "step": 9151 }, { - "epoch": 0.6925201467973213, - "grad_norm": 2.2924671173095703, - "learning_rate": 4.2648141806868705e-06, - "loss": 0.6765, + "epoch": 0.6442801830341429, + "grad_norm": 2.329368829727173, + "learning_rate": 4.1543989057507585e-06, + "loss": 0.6262, "step": 9152 }, { - "epoch": 0.692595815519655, - "grad_norm": 3.1433582305908203, - "learning_rate": 4.262899036619835e-06, - "loss": 0.7652, + "epoch": 0.644350580781415, + "grad_norm": 3.0783164501190186, + "learning_rate": 4.15294070649123e-06, + "loss": 0.6187, "step": 9153 }, { - "epoch": 0.6926714842419885, - "grad_norm": 2.3667728900909424, - "learning_rate": 4.2609841892053865e-06, - "loss": 0.7888, + "epoch": 0.6444209785286871, + "grad_norm": 1.8251911401748657, + "learning_rate": 4.151482655252894e-06, + "loss": 0.6209, "step": 9154 }, { - "epoch": 0.6927471529643222, - "grad_norm": 2.2989847660064697, - "learning_rate": 4.259069638563436e-06, - "loss": 0.8005, + "epoch": 0.6444913762759592, + "grad_norm": 1.7466423511505127, + "learning_rate": 4.1500247521115535e-06, + "loss": 0.6142, "step": 9155 }, { - "epoch": 0.6928228216866558, - "grad_norm": 2.9312691688537598, - "learning_rate": 4.257155384813883e-06, - "loss": 0.6623, + "epoch": 0.6445617740232312, + "grad_norm": 2.0108773708343506, + "learning_rate": 4.148566997143009e-06, + "loss": 0.6751, "step": 9156 }, { - "epoch": 0.6928984904089894, - "grad_norm": 1.8132741451263428, - "learning_rate": 4.255241428076595e-06, - "loss": 0.7629, + "epoch": 0.6446321717705034, + "grad_norm": 1.8131394386291504, + "learning_rate": 4.147109390423045e-06, + "loss": 0.6033, "step": 9157 }, { - "epoch": 0.6929741591313231, - "grad_norm": 2.117753505706787, - "learning_rate": 4.253327768471433e-06, - "loss": 0.827, + "epoch": 0.6447025695177754, + "grad_norm": 2.285221576690674, + "learning_rate": 4.145651932027451e-06, + "loss": 0.6766, "step": 9158 }, { - "epoch": 0.6930498278536567, - "grad_norm": 2.0779871940612793, - "learning_rate": 4.2514144061182446e-06, - "loss": 0.6522, + "epoch": 0.6447729672650475, + "grad_norm": 2.095156192779541, + "learning_rate": 4.144194622031996e-06, + "loss": 0.76, "step": 9159 }, { - "epoch": 0.6931254965759903, - "grad_norm": 2.123748302459717, - "learning_rate": 4.249501341136843e-06, - "loss": 0.7253, + "epoch": 0.6448433650123196, + "grad_norm": 1.7318828105926514, + "learning_rate": 4.142737460512451e-06, + "loss": 0.6493, "step": 9160 }, { - "epoch": 0.693201165298324, - "grad_norm": 2.4232475757598877, - "learning_rate": 4.24758857364703e-06, - "loss": 0.6461, + "epoch": 0.6449137627595917, + "grad_norm": 1.683665156364441, + "learning_rate": 4.141280447544571e-06, + "loss": 0.6322, "step": 9161 }, { - "epoch": 0.6932768340206575, - "grad_norm": 2.0096006393432617, - "learning_rate": 4.2456761037685936e-06, - "loss": 0.6071, + "epoch": 0.6449841605068638, + "grad_norm": 1.8053125143051147, + "learning_rate": 4.139823583204112e-06, + "loss": 0.7009, "step": 9162 }, { - "epoch": 0.6933525027429912, - "grad_norm": 3.09159255027771, - "learning_rate": 4.243763931621296e-06, - "loss": 0.5963, + "epoch": 0.6450545582541358, + "grad_norm": 1.9910920858383179, + "learning_rate": 4.138366867566813e-06, + "loss": 0.5715, "step": 9163 }, { - "epoch": 0.6934281714653248, - "grad_norm": 2.0967772006988525, - "learning_rate": 4.241852057324885e-06, - "loss": 0.6053, + "epoch": 0.645124956001408, + "grad_norm": 1.7541007995605469, + "learning_rate": 4.136910300708413e-06, + "loss": 0.5983, "step": 9164 }, { - "epoch": 0.6935038401876584, - "grad_norm": 2.970052719116211, - "learning_rate": 4.239940480999087e-06, - "loss": 0.6578, + "epoch": 0.6451953537486801, + "grad_norm": 1.6353970766067505, + "learning_rate": 4.135453882704639e-06, + "loss": 0.565, "step": 9165 }, { - "epoch": 0.6935795089099921, - "grad_norm": 2.1014604568481445, - "learning_rate": 4.238029202763617e-06, - "loss": 0.5055, + "epoch": 0.6452657514959521, + "grad_norm": 1.7474687099456787, + "learning_rate": 4.1339976136312135e-06, + "loss": 0.6484, "step": 9166 }, { - "epoch": 0.6936551776323256, - "grad_norm": 2.0818746089935303, - "learning_rate": 4.2361182227381556e-06, - "loss": 0.7098, + "epoch": 0.6453361492432242, + "grad_norm": 1.6808247566223145, + "learning_rate": 4.132541493563845e-06, + "loss": 0.6641, "step": 9167 }, { - "epoch": 0.6937308463546593, - "grad_norm": 2.38274884223938, - "learning_rate": 4.23420754104238e-06, - "loss": 0.6155, + "epoch": 0.6454065469904963, + "grad_norm": 1.475577712059021, + "learning_rate": 4.131085522578245e-06, + "loss": 0.6926, "step": 9168 }, { - "epoch": 0.693806515076993, - "grad_norm": 1.9527738094329834, - "learning_rate": 4.232297157795939e-06, - "loss": 0.6962, + "epoch": 0.6454769447377684, + "grad_norm": 1.7524925470352173, + "learning_rate": 4.129629700750102e-06, + "loss": 0.5511, "step": 9169 }, { - "epoch": 0.6938821837993265, - "grad_norm": 2.1649208068847656, - "learning_rate": 4.230387073118477e-06, - "loss": 0.7543, + "epoch": 0.6455473424850405, + "grad_norm": 2.022364377975464, + "learning_rate": 4.1281740281551145e-06, + "loss": 0.712, "step": 9170 }, { - "epoch": 0.6939578525216602, - "grad_norm": 3.0182876586914062, - "learning_rate": 4.228477287129601e-06, - "loss": 0.9058, + "epoch": 0.6456177402323126, + "grad_norm": 1.6499886512756348, + "learning_rate": 4.1267185048689576e-06, + "loss": 0.5934, "step": 9171 }, { - "epoch": 0.6940335212439938, - "grad_norm": 2.083481788635254, - "learning_rate": 4.226567799948909e-06, - "loss": 0.6472, + "epoch": 0.6456881379795847, + "grad_norm": 1.826562762260437, + "learning_rate": 4.12526313096731e-06, + "loss": 0.648, "step": 9172 }, { - "epoch": 0.6941091899663274, - "grad_norm": 2.3242437839508057, - "learning_rate": 4.224658611695981e-06, - "loss": 0.632, + "epoch": 0.6457585357268567, + "grad_norm": 1.4519411325454712, + "learning_rate": 4.123807906525833e-06, + "loss": 0.5799, "step": 9173 }, { - "epoch": 0.6941848586886611, - "grad_norm": 2.4421794414520264, - "learning_rate": 4.222749722490377e-06, - "loss": 0.6659, + "epoch": 0.6458289334741288, + "grad_norm": 2.186215877532959, + "learning_rate": 4.122352831620191e-06, + "loss": 0.7625, "step": 9174 }, { - "epoch": 0.6942605274109946, - "grad_norm": 2.1332194805145264, - "learning_rate": 4.220841132451636e-06, - "loss": 0.6021, + "epoch": 0.6458993312214009, + "grad_norm": 1.9628605842590332, + "learning_rate": 4.120897906326029e-06, + "loss": 0.633, "step": 9175 }, { - "epoch": 0.6943361961333283, - "grad_norm": 2.1453001499176025, - "learning_rate": 4.218932841699281e-06, - "loss": 0.7119, + "epoch": 0.645969728968673, + "grad_norm": 2.0311801433563232, + "learning_rate": 4.119443130718994e-06, + "loss": 0.6415, "step": 9176 }, { - "epoch": 0.6944118648556619, - "grad_norm": 2.6229517459869385, - "learning_rate": 4.2170248503528195e-06, - "loss": 0.6428, + "epoch": 0.6460401267159451, + "grad_norm": 2.251056671142578, + "learning_rate": 4.117988504874718e-06, + "loss": 0.7347, "step": 9177 }, { - "epoch": 0.6944875335779955, - "grad_norm": 1.7247917652130127, - "learning_rate": 4.215117158531727e-06, - "loss": 0.7384, + "epoch": 0.6461105244632172, + "grad_norm": 1.6213743686676025, + "learning_rate": 4.1165340288688285e-06, + "loss": 0.6578, "step": 9178 }, { - "epoch": 0.6945632023003292, - "grad_norm": 1.925907850265503, - "learning_rate": 4.213209766355471e-06, - "loss": 0.6112, + "epoch": 0.6461809222104893, + "grad_norm": 1.863011121749878, + "learning_rate": 4.115079702776949e-06, + "loss": 0.6486, "step": 9179 }, { - "epoch": 0.6946388710226628, - "grad_norm": 2.0717689990997314, - "learning_rate": 4.21130267394351e-06, - "loss": 0.7033, + "epoch": 0.6462513199577613, + "grad_norm": 1.9735623598098755, + "learning_rate": 4.113625526674687e-06, + "loss": 0.6392, "step": 9180 }, { - "epoch": 0.6947145397449964, - "grad_norm": 1.7757562398910522, - "learning_rate": 4.209395881415259e-06, - "loss": 0.5708, + "epoch": 0.6463217177050334, + "grad_norm": 1.7519330978393555, + "learning_rate": 4.1121715006376495e-06, + "loss": 0.7062, "step": 9181 }, { - "epoch": 0.6947902084673301, - "grad_norm": 2.3035130500793457, - "learning_rate": 4.207489388890133e-06, - "loss": 0.6394, + "epoch": 0.6463921154523056, + "grad_norm": 1.7023142576217651, + "learning_rate": 4.11071762474143e-06, + "loss": 0.7949, "step": 9182 }, { - "epoch": 0.6948658771896636, - "grad_norm": 2.059298515319824, - "learning_rate": 4.205583196487524e-06, - "loss": 0.7353, + "epoch": 0.6464625131995776, + "grad_norm": 1.6941893100738525, + "learning_rate": 4.1092638990616185e-06, + "loss": 0.6474, "step": 9183 }, { - "epoch": 0.6949415459119973, - "grad_norm": 1.9274907112121582, - "learning_rate": 4.2036773043268005e-06, - "loss": 0.7174, + "epoch": 0.6465329109468497, + "grad_norm": 2.0015220642089844, + "learning_rate": 4.107810323673793e-06, + "loss": 0.7546, "step": 9184 }, { - "epoch": 0.6950172146343309, - "grad_norm": 2.086277961730957, - "learning_rate": 4.201771712527318e-06, - "loss": 0.6836, + "epoch": 0.6466033086941217, + "grad_norm": 2.2114908695220947, + "learning_rate": 4.10635689865353e-06, + "loss": 0.7334, "step": 9185 }, { - "epoch": 0.6950928833566645, - "grad_norm": 2.223573684692383, - "learning_rate": 4.19986642120841e-06, - "loss": 0.6575, + "epoch": 0.6466737064413939, + "grad_norm": 1.6931229829788208, + "learning_rate": 4.104903624076391e-06, + "loss": 0.616, "step": 9186 }, { - "epoch": 0.6951685520789982, - "grad_norm": 3.5324819087982178, - "learning_rate": 4.197961430489393e-06, - "loss": 0.7639, + "epoch": 0.646744104188666, + "grad_norm": 1.809005856513977, + "learning_rate": 4.103450500017937e-06, + "loss": 0.7084, "step": 9187 }, { - "epoch": 0.6952442208013317, - "grad_norm": 3.183359146118164, - "learning_rate": 4.1960567404895644e-06, - "loss": 0.6029, + "epoch": 0.646814501935938, + "grad_norm": 1.4537408351898193, + "learning_rate": 4.101997526553713e-06, + "loss": 0.5779, "step": 9188 }, { - "epoch": 0.6953198895236654, - "grad_norm": 1.7968335151672363, - "learning_rate": 4.194152351328196e-06, - "loss": 0.8075, + "epoch": 0.6468848996832102, + "grad_norm": 1.670598030090332, + "learning_rate": 4.100544703759263e-06, + "loss": 0.6329, "step": 9189 }, { - "epoch": 0.695395558245999, - "grad_norm": 1.813084602355957, - "learning_rate": 4.192248263124554e-06, - "loss": 0.7122, + "epoch": 0.6469552974304822, + "grad_norm": 1.5561916828155518, + "learning_rate": 4.0990920317101175e-06, + "loss": 0.5359, "step": 9190 }, { - "epoch": 0.6954712269683326, - "grad_norm": 2.4125254154205322, - "learning_rate": 4.1903444759978785e-06, - "loss": 0.7781, + "epoch": 0.6470256951777543, + "grad_norm": 1.9127743244171143, + "learning_rate": 4.097639510481807e-06, + "loss": 0.6564, "step": 9191 }, { - "epoch": 0.6955468956906663, - "grad_norm": 2.2479751110076904, - "learning_rate": 4.188440990067385e-06, - "loss": 0.671, + "epoch": 0.6470960929250263, + "grad_norm": 1.5878373384475708, + "learning_rate": 4.096187140149845e-06, + "loss": 0.6945, "step": 9192 }, { - "epoch": 0.6956225644129999, - "grad_norm": 2.125385284423828, - "learning_rate": 4.18653780545228e-06, - "loss": 0.6195, + "epoch": 0.6471664906722985, + "grad_norm": 1.8311591148376465, + "learning_rate": 4.094734920789745e-06, + "loss": 0.6255, "step": 9193 }, { - "epoch": 0.6956982331353335, - "grad_norm": 2.0010180473327637, - "learning_rate": 4.184634922271746e-06, - "loss": 0.712, + "epoch": 0.6472368884195706, + "grad_norm": 1.7654848098754883, + "learning_rate": 4.093282852477007e-06, + "loss": 0.5705, "step": 9194 }, { - "epoch": 0.6957739018576672, - "grad_norm": 2.053400754928589, - "learning_rate": 4.182732340644948e-06, - "loss": 0.538, + "epoch": 0.6473072861668426, + "grad_norm": 2.02492618560791, + "learning_rate": 4.091830935287127e-06, + "loss": 0.7143, "step": 9195 }, { - "epoch": 0.6958495705800007, - "grad_norm": 2.342834949493408, - "learning_rate": 4.180830060691031e-06, - "loss": 0.6268, + "epoch": 0.6473776839141148, + "grad_norm": 2.170499801635742, + "learning_rate": 4.090379169295588e-06, + "loss": 0.6385, "step": 9196 }, { - "epoch": 0.6959252393023344, - "grad_norm": 2.2274081707000732, - "learning_rate": 4.178928082529123e-06, - "loss": 0.7135, + "epoch": 0.6474480816613868, + "grad_norm": 1.7445669174194336, + "learning_rate": 4.0889275545778736e-06, + "loss": 0.5525, "step": 9197 }, { - "epoch": 0.696000908024668, - "grad_norm": 2.246617317199707, - "learning_rate": 4.177026406278332e-06, - "loss": 0.684, + "epoch": 0.6475184794086589, + "grad_norm": 1.6344386339187622, + "learning_rate": 4.087476091209451e-06, + "loss": 0.7382, "step": 9198 }, { - "epoch": 0.6960765767470016, - "grad_norm": 2.258347988128662, - "learning_rate": 4.1751250320577475e-06, - "loss": 0.7247, + "epoch": 0.647588877155931, + "grad_norm": 1.803437352180481, + "learning_rate": 4.086024779265785e-06, + "loss": 0.6828, "step": 9199 }, { - "epoch": 0.6961522454693353, - "grad_norm": 1.881675362586975, - "learning_rate": 4.173223959986437e-06, - "loss": 0.733, + "epoch": 0.6476592749032031, + "grad_norm": 1.8265740871429443, + "learning_rate": 4.084573618822327e-06, + "loss": 0.724, "step": 9200 }, { - "epoch": 0.6962279141916689, - "grad_norm": 3.7385098934173584, - "learning_rate": 4.171323190183455e-06, - "loss": 0.585, + "epoch": 0.6477296726504752, + "grad_norm": 2.0018696784973145, + "learning_rate": 4.083122609954531e-06, + "loss": 0.6309, "step": 9201 }, { - "epoch": 0.6963035829140025, - "grad_norm": 2.185427665710449, - "learning_rate": 4.1694227227678365e-06, - "loss": 0.6836, + "epoch": 0.6478000703977472, + "grad_norm": 1.6273831129074097, + "learning_rate": 4.08167175273783e-06, + "loss": 0.6032, "step": 9202 }, { - "epoch": 0.6963792516363361, - "grad_norm": 2.1207618713378906, - "learning_rate": 4.167522557858588e-06, - "loss": 0.6311, + "epoch": 0.6478704681450194, + "grad_norm": 1.7669438123703003, + "learning_rate": 4.080221047247659e-06, + "loss": 0.7656, "step": 9203 }, { - "epoch": 0.6964549203586697, - "grad_norm": 1.9281233549118042, - "learning_rate": 4.165622695574704e-06, - "loss": 0.8161, + "epoch": 0.6479408658922915, + "grad_norm": 1.8705053329467773, + "learning_rate": 4.0787704935594376e-06, + "loss": 0.7096, "step": 9204 }, { - "epoch": 0.6965305890810034, - "grad_norm": 2.0313522815704346, - "learning_rate": 4.163723136035168e-06, - "loss": 0.5862, + "epoch": 0.6480112636395635, + "grad_norm": 1.9329562187194824, + "learning_rate": 4.077320091748585e-06, + "loss": 0.6925, "step": 9205 }, { - "epoch": 0.696606257803337, - "grad_norm": 2.408750534057617, - "learning_rate": 4.161823879358929e-06, - "loss": 0.6848, + "epoch": 0.6480816613868357, + "grad_norm": 1.8714104890823364, + "learning_rate": 4.0758698418905065e-06, + "loss": 0.5881, "step": 9206 }, { - "epoch": 0.6966819265256706, - "grad_norm": 1.5770151615142822, - "learning_rate": 4.159924925664927e-06, - "loss": 0.5911, + "epoch": 0.6481520591341077, + "grad_norm": 1.8278003931045532, + "learning_rate": 4.074419744060604e-06, + "loss": 0.7287, "step": 9207 }, { - "epoch": 0.6967575952480043, - "grad_norm": 2.710477113723755, - "learning_rate": 4.158026275072082e-06, - "loss": 0.7804, + "epoch": 0.6482224568813798, + "grad_norm": 2.1428380012512207, + "learning_rate": 4.072969798334271e-06, + "loss": 0.6466, "step": 9208 }, { - "epoch": 0.6968332639703378, - "grad_norm": 1.9035274982452393, - "learning_rate": 4.156127927699294e-06, - "loss": 0.6249, + "epoch": 0.6482928546286519, + "grad_norm": 1.7402604818344116, + "learning_rate": 4.071520004786888e-06, + "loss": 0.7588, "step": 9209 }, { - "epoch": 0.6969089326926715, - "grad_norm": 1.7576590776443481, - "learning_rate": 4.154229883665437e-06, - "loss": 0.5942, + "epoch": 0.648363252375924, + "grad_norm": 1.705783724784851, + "learning_rate": 4.07007036349383e-06, + "loss": 0.7302, "step": 9210 }, { - "epoch": 0.6969846014150051, - "grad_norm": 1.8701988458633423, - "learning_rate": 4.152332143089381e-06, - "loss": 0.601, + "epoch": 0.6484336501231961, + "grad_norm": 1.7048941850662231, + "learning_rate": 4.068620874530468e-06, + "loss": 0.607, "step": 9211 }, { - "epoch": 0.6970602701373387, - "grad_norm": 2.1016006469726562, - "learning_rate": 4.150434706089965e-06, - "loss": 0.643, + "epoch": 0.6485040478704681, + "grad_norm": 2.0970375537872314, + "learning_rate": 4.06717153797216e-06, + "loss": 0.6625, "step": 9212 }, { - "epoch": 0.6971359388596724, - "grad_norm": 2.1549484729766846, - "learning_rate": 4.148537572786016e-06, - "loss": 0.734, + "epoch": 0.6485744456177402, + "grad_norm": 1.9738019704818726, + "learning_rate": 4.065722353894264e-06, + "loss": 0.5544, "step": 9213 }, { - "epoch": 0.697211607582006, - "grad_norm": 2.0905442237854004, - "learning_rate": 4.146640743296333e-06, - "loss": 0.6454, + "epoch": 0.6486448433650123, + "grad_norm": 2.048354387283325, + "learning_rate": 4.064273322372119e-06, + "loss": 0.6928, "step": 9214 }, { - "epoch": 0.6972872763043396, - "grad_norm": 1.8356784582138062, - "learning_rate": 4.144744217739701e-06, - "loss": 0.6778, + "epoch": 0.6487152411122844, + "grad_norm": 1.5039058923721313, + "learning_rate": 4.062824443481063e-06, + "loss": 0.6418, "step": 9215 }, { - "epoch": 0.6973629450266732, - "grad_norm": 2.0387625694274902, - "learning_rate": 4.142847996234896e-06, - "loss": 0.6613, + "epoch": 0.6487856388595565, + "grad_norm": 1.8252884149551392, + "learning_rate": 4.061375717296421e-06, + "loss": 0.716, "step": 9216 }, { - "epoch": 0.6974386137490068, - "grad_norm": 1.5910100936889648, - "learning_rate": 4.140952078900658e-06, - "loss": 0.5748, + "epoch": 0.6488560366068286, + "grad_norm": 1.739681363105774, + "learning_rate": 4.05992714389352e-06, + "loss": 0.6148, "step": 9217 }, { - "epoch": 0.6975142824713405, - "grad_norm": 1.9364255666732788, - "learning_rate": 4.139056465855714e-06, - "loss": 0.6595, + "epoch": 0.6489264343541007, + "grad_norm": 1.884558081626892, + "learning_rate": 4.0584787233476666e-06, + "loss": 0.561, "step": 9218 }, { - "epoch": 0.6975899511936741, - "grad_norm": 1.8187669515609741, - "learning_rate": 4.137161157218779e-06, - "loss": 0.5342, + "epoch": 0.6489968321013727, + "grad_norm": 3.210170030593872, + "learning_rate": 4.057030455734172e-06, + "loss": 0.7332, "step": 9219 }, { - "epoch": 0.6976656199160077, - "grad_norm": 1.753604769706726, - "learning_rate": 4.135266153108539e-06, - "loss": 0.6342, + "epoch": 0.6490672298486448, + "grad_norm": 1.7378777265548706, + "learning_rate": 4.055582341128328e-06, + "loss": 0.6976, "step": 9220 }, { - "epoch": 0.6977412886383414, - "grad_norm": 2.2299137115478516, - "learning_rate": 4.133371453643668e-06, - "loss": 0.6365, + "epoch": 0.649137627595917, + "grad_norm": 1.7024846076965332, + "learning_rate": 4.0541343796054254e-06, + "loss": 0.7878, "step": 9221 }, { - "epoch": 0.697816957360675, - "grad_norm": 2.1055357456207275, - "learning_rate": 4.131477058942816e-06, - "loss": 0.8319, + "epoch": 0.649208025343189, + "grad_norm": 1.9325062036514282, + "learning_rate": 4.052686571240741e-06, + "loss": 0.7457, "step": 9222 }, { - "epoch": 0.6978926260830086, - "grad_norm": 3.087947368621826, - "learning_rate": 4.12958296912462e-06, - "loss": 0.6226, + "epoch": 0.6492784230904611, + "grad_norm": 1.5473859310150146, + "learning_rate": 4.051238916109554e-06, + "loss": 0.6305, "step": 9223 }, { - "epoch": 0.6979682948053422, - "grad_norm": 1.8325108289718628, - "learning_rate": 4.127689184307691e-06, - "loss": 0.6056, + "epoch": 0.6493488208377332, + "grad_norm": 1.710837483406067, + "learning_rate": 4.049791414287124e-06, + "loss": 0.7099, "step": 9224 }, { - "epoch": 0.6980439635276758, - "grad_norm": 2.2946414947509766, - "learning_rate": 4.1257957046106185e-06, - "loss": 0.6827, + "epoch": 0.6494192185850053, + "grad_norm": 1.9322608709335327, + "learning_rate": 4.0483440658487125e-06, + "loss": 0.5749, "step": 9225 }, { - "epoch": 0.6981196322500095, - "grad_norm": 2.5224714279174805, - "learning_rate": 4.1239025301519875e-06, - "loss": 0.7167, + "epoch": 0.6494896163322774, + "grad_norm": 1.7355010509490967, + "learning_rate": 4.046896870869567e-06, + "loss": 0.6302, "step": 9226 }, { - "epoch": 0.6981953009723431, - "grad_norm": 1.8570104837417603, - "learning_rate": 4.122009661050355e-06, - "loss": 0.6781, + "epoch": 0.6495600140795494, + "grad_norm": 1.909568428993225, + "learning_rate": 4.045449829424924e-06, + "loss": 0.6986, "step": 9227 }, { - "epoch": 0.6982709696946767, - "grad_norm": 2.250521183013916, - "learning_rate": 4.120117097424252e-06, - "loss": 0.6597, + "epoch": 0.6496304118268216, + "grad_norm": 2.2857961654663086, + "learning_rate": 4.044002941590023e-06, + "loss": 0.561, "step": 9228 }, { - "epoch": 0.6983466384170103, - "grad_norm": 2.871605157852173, - "learning_rate": 4.118224839392199e-06, - "loss": 0.6728, + "epoch": 0.6497008095740936, + "grad_norm": 1.5829561948776245, + "learning_rate": 4.042556207440084e-06, + "loss": 0.6993, "step": 9229 }, { - "epoch": 0.698422307139344, - "grad_norm": 2.111581325531006, - "learning_rate": 4.116332887072697e-06, - "loss": 0.7846, + "epoch": 0.6497712073213657, + "grad_norm": 1.6681197881698608, + "learning_rate": 4.041109627050329e-06, + "loss": 0.5433, "step": 9230 }, { - "epoch": 0.6984979758616776, - "grad_norm": 2.0724074840545654, - "learning_rate": 4.1144412405842245e-06, - "loss": 0.6152, + "epoch": 0.6498416050686378, + "grad_norm": 1.7921912670135498, + "learning_rate": 4.039663200495961e-06, + "loss": 0.6214, "step": 9231 }, { - "epoch": 0.6985736445840112, - "grad_norm": 1.961753487586975, - "learning_rate": 4.112549900045244e-06, - "loss": 0.7844, + "epoch": 0.6499120028159099, + "grad_norm": 1.8705651760101318, + "learning_rate": 4.0382169278521895e-06, + "loss": 0.5266, "step": 9232 }, { - "epoch": 0.6986493133063448, - "grad_norm": 1.6883488893508911, - "learning_rate": 4.1106588655741965e-06, - "loss": 0.7343, + "epoch": 0.649982400563182, + "grad_norm": 1.5860049724578857, + "learning_rate": 4.036770809194197e-06, + "loss": 0.7168, "step": 9233 }, { - "epoch": 0.6987249820286785, - "grad_norm": 1.871146559715271, - "learning_rate": 4.108768137289507e-06, - "loss": 0.5357, + "epoch": 0.650052798310454, + "grad_norm": 1.86215341091156, + "learning_rate": 4.035324844597177e-06, + "loss": 0.5586, "step": 9234 }, { - "epoch": 0.698800650751012, - "grad_norm": 2.21620774269104, - "learning_rate": 4.106877715309572e-06, - "loss": 0.6735, + "epoch": 0.6501231960577262, + "grad_norm": 1.8198368549346924, + "learning_rate": 4.0338790341363e-06, + "loss": 0.6595, "step": 9235 }, { - "epoch": 0.6988763194733457, - "grad_norm": 1.7150919437408447, - "learning_rate": 4.104987599752783e-06, - "loss": 0.5634, + "epoch": 0.6501935938049982, + "grad_norm": 1.8672734498977661, + "learning_rate": 4.032433377886741e-06, + "loss": 0.6471, "step": 9236 }, { - "epoch": 0.6989519881956793, - "grad_norm": 2.08954119682312, - "learning_rate": 4.103097790737507e-06, - "loss": 0.6117, + "epoch": 0.6502639915522703, + "grad_norm": 1.7846415042877197, + "learning_rate": 4.030987875923655e-06, + "loss": 0.6181, "step": 9237 }, { - "epoch": 0.6990276569180129, - "grad_norm": 2.288553237915039, - "learning_rate": 4.101208288382082e-06, - "loss": 0.7672, + "epoch": 0.6503343892995425, + "grad_norm": 1.664820671081543, + "learning_rate": 4.029542528322205e-06, + "loss": 0.594, "step": 9238 }, { - "epoch": 0.6991033256403466, - "grad_norm": 1.9526705741882324, - "learning_rate": 4.099319092804839e-06, - "loss": 0.7364, + "epoch": 0.6504047870468145, + "grad_norm": 1.943035364151001, + "learning_rate": 4.028097335157523e-06, + "loss": 0.7272, "step": 9239 }, { - "epoch": 0.6991789943626802, - "grad_norm": 2.6329140663146973, - "learning_rate": 4.097430204124082e-06, - "loss": 0.7055, + "epoch": 0.6504751847940866, + "grad_norm": 2.471278190612793, + "learning_rate": 4.026652296504755e-06, + "loss": 0.5899, "step": 9240 }, { - "epoch": 0.6992546630850138, - "grad_norm": 2.0371086597442627, - "learning_rate": 4.095541622458108e-06, - "loss": 0.6239, + "epoch": 0.6505455825413586, + "grad_norm": 1.8564873933792114, + "learning_rate": 4.025207412439024e-06, + "loss": 0.6896, "step": 9241 }, { - "epoch": 0.6993303318073474, - "grad_norm": 1.7768152952194214, - "learning_rate": 4.093653347925178e-06, - "loss": 0.651, + "epoch": 0.6506159802886308, + "grad_norm": 1.808910608291626, + "learning_rate": 4.0237626830354566e-06, + "loss": 0.6178, "step": 9242 }, { - "epoch": 0.699406000529681, - "grad_norm": 1.7975434064865112, - "learning_rate": 4.091765380643544e-06, - "loss": 0.6697, + "epoch": 0.6506863780359029, + "grad_norm": 1.6726816892623901, + "learning_rate": 4.02231810836916e-06, + "loss": 0.674, "step": 9243 }, { - "epoch": 0.6994816692520147, - "grad_norm": 1.8695652484893799, - "learning_rate": 4.089877720731438e-06, - "loss": 0.768, + "epoch": 0.6507567757831749, + "grad_norm": 1.9593249559402466, + "learning_rate": 4.020873688515247e-06, + "loss": 0.6339, "step": 9244 }, { - "epoch": 0.6995573379743483, - "grad_norm": 2.318563222885132, - "learning_rate": 4.087990368307073e-06, - "loss": 0.6121, + "epoch": 0.6508271735304471, + "grad_norm": 1.8224382400512695, + "learning_rate": 4.019429423548803e-06, + "loss": 0.7181, "step": 9245 }, { - "epoch": 0.6996330066966819, - "grad_norm": 1.9575533866882324, - "learning_rate": 4.0861033234886305e-06, - "loss": 0.7827, + "epoch": 0.6508975712777191, + "grad_norm": 1.8617184162139893, + "learning_rate": 4.0179853135449275e-06, + "loss": 0.6607, "step": 9246 }, { - "epoch": 0.6997086754190156, - "grad_norm": 2.9256958961486816, - "learning_rate": 4.084216586394297e-06, - "loss": 0.5644, + "epoch": 0.6509679690249912, + "grad_norm": 1.2727385759353638, + "learning_rate": 4.016541358578692e-06, + "loss": 0.7472, "step": 9247 }, { - "epoch": 0.6997843441413492, - "grad_norm": 2.3860838413238525, - "learning_rate": 4.082330157142222e-06, - "loss": 0.8408, + "epoch": 0.6510383667722632, + "grad_norm": 2.0536115169525146, + "learning_rate": 4.015097558725176e-06, + "loss": 0.6132, "step": 9248 }, { - "epoch": 0.6998600128636828, - "grad_norm": 1.9278440475463867, - "learning_rate": 4.080444035850536e-06, - "loss": 0.6105, + "epoch": 0.6511087645195354, + "grad_norm": 1.9759191274642944, + "learning_rate": 4.013653914059438e-06, + "loss": 0.6799, "step": 9249 }, { - "epoch": 0.6999356815860164, - "grad_norm": 2.849597215652466, - "learning_rate": 4.0785582226373555e-06, - "loss": 0.6557, + "epoch": 0.6511791622668075, + "grad_norm": 1.9957935810089111, + "learning_rate": 4.0122104246565446e-06, + "loss": 0.6422, "step": 9250 }, { - "epoch": 0.70001135030835, - "grad_norm": 2.0541129112243652, - "learning_rate": 4.076672717620778e-06, - "loss": 0.6789, + "epoch": 0.6512495600140795, + "grad_norm": 1.6152325868606567, + "learning_rate": 4.01076709059153e-06, + "loss": 0.6617, "step": 9251 }, { - "epoch": 0.7000870190306837, - "grad_norm": 1.8564491271972656, - "learning_rate": 4.074787520918878e-06, - "loss": 0.6847, + "epoch": 0.6513199577613517, + "grad_norm": 1.6450284719467163, + "learning_rate": 4.009323911939444e-06, + "loss": 0.6254, "step": 9252 }, { - "epoch": 0.7001626877530173, - "grad_norm": 1.949328899383545, - "learning_rate": 4.072902632649714e-06, - "loss": 0.7276, + "epoch": 0.6513903555086237, + "grad_norm": 1.97744882106781, + "learning_rate": 4.007880888775316e-06, + "loss": 0.5949, "step": 9253 }, { - "epoch": 0.7002383564753509, - "grad_norm": 1.7822113037109375, - "learning_rate": 4.071018052931323e-06, - "loss": 0.698, + "epoch": 0.6514607532558958, + "grad_norm": 2.0414931774139404, + "learning_rate": 4.006438021174171e-06, + "loss": 0.7387, "step": 9254 }, { - "epoch": 0.7003140251976845, - "grad_norm": 2.231820821762085, - "learning_rate": 4.069133781881727e-06, - "loss": 0.601, + "epoch": 0.6515311510031679, + "grad_norm": 1.8643038272857666, + "learning_rate": 4.004995309211023e-06, + "loss": 0.7092, "step": 9255 }, { - "epoch": 0.7003896939200182, - "grad_norm": 2.3078548908233643, - "learning_rate": 4.067249819618916e-06, - "loss": 0.646, + "epoch": 0.65160154875044, + "grad_norm": 1.800934910774231, + "learning_rate": 4.003552752960886e-06, + "loss": 0.6488, "step": 9256 }, { - "epoch": 0.7004653626423518, - "grad_norm": 2.0806334018707275, - "learning_rate": 4.065366166260878e-06, - "loss": 0.6797, + "epoch": 0.6516719464977121, + "grad_norm": 2.065474271774292, + "learning_rate": 4.0021103524987496e-06, + "loss": 0.6943, "step": 9257 }, { - "epoch": 0.7005410313646854, - "grad_norm": 1.993569254875183, - "learning_rate": 4.063482821925572e-06, - "loss": 0.6611, + "epoch": 0.6517423442449841, + "grad_norm": 1.7736327648162842, + "learning_rate": 4.0006681078996135e-06, + "loss": 0.7201, "step": 9258 }, { - "epoch": 0.700616700087019, - "grad_norm": 2.1819117069244385, - "learning_rate": 4.061599786730941e-06, - "loss": 0.6336, + "epoch": 0.6518127419922563, + "grad_norm": 2.072310209274292, + "learning_rate": 3.999226019238457e-06, + "loss": 0.7205, "step": 9259 }, { - "epoch": 0.7006923688093527, - "grad_norm": 2.2110774517059326, - "learning_rate": 4.059717060794902e-06, - "loss": 0.5346, + "epoch": 0.6518831397395284, + "grad_norm": 2.1861965656280518, + "learning_rate": 3.997784086590259e-06, + "loss": 0.6334, "step": 9260 }, { - "epoch": 0.7007680375316863, - "grad_norm": 2.1333065032958984, - "learning_rate": 4.057834644235355e-06, - "loss": 0.7319, + "epoch": 0.6519535374868004, + "grad_norm": 1.6824575662612915, + "learning_rate": 3.996342310029984e-06, + "loss": 0.713, "step": 9261 }, { - "epoch": 0.7008437062540199, - "grad_norm": 1.8538813591003418, - "learning_rate": 4.055952537170195e-06, - "loss": 0.6036, + "epoch": 0.6520239352340725, + "grad_norm": 1.9644349813461304, + "learning_rate": 3.994900689632595e-06, + "loss": 0.6616, "step": 9262 }, { - "epoch": 0.7009193749763535, - "grad_norm": 2.097684144973755, - "learning_rate": 4.054070739717274e-06, - "loss": 0.7522, + "epoch": 0.6520943329813446, + "grad_norm": 1.7756037712097168, + "learning_rate": 3.99345922547304e-06, + "loss": 0.705, "step": 9263 }, { - "epoch": 0.7009950436986871, - "grad_norm": 1.6460198163986206, - "learning_rate": 4.05218925199444e-06, - "loss": 0.533, + "epoch": 0.6521647307286167, + "grad_norm": 1.5650938749313354, + "learning_rate": 3.992017917626263e-06, + "loss": 0.5902, "step": 9264 }, { - "epoch": 0.7010707124210208, - "grad_norm": 4.016617774963379, - "learning_rate": 4.05030807411952e-06, - "loss": 0.5638, + "epoch": 0.6522351284758888, + "grad_norm": 2.184943914413452, + "learning_rate": 3.990576766167195e-06, + "loss": 0.73, "step": 9265 }, { - "epoch": 0.7011463811433544, - "grad_norm": 1.8979525566101074, - "learning_rate": 4.048427206210316e-06, - "loss": 0.6731, + "epoch": 0.6523055262231608, + "grad_norm": 1.9270200729370117, + "learning_rate": 3.98913577117077e-06, + "loss": 0.6795, "step": 9266 }, { - "epoch": 0.701222049865688, - "grad_norm": 2.001255989074707, - "learning_rate": 4.046546648384616e-06, - "loss": 0.5913, + "epoch": 0.652375923970433, + "grad_norm": 2.0938117504119873, + "learning_rate": 3.987694932711901e-06, + "loss": 0.7106, "step": 9267 }, { - "epoch": 0.7012977185880216, - "grad_norm": 2.3916757106781006, - "learning_rate": 4.044666400760186e-06, - "loss": 0.5653, + "epoch": 0.652446321717705, + "grad_norm": 1.5736783742904663, + "learning_rate": 3.9862542508655035e-06, + "loss": 0.6036, "step": 9268 }, { - "epoch": 0.7013733873103553, - "grad_norm": 1.7634689807891846, - "learning_rate": 4.042786463454778e-06, - "loss": 0.6478, + "epoch": 0.6525167194649771, + "grad_norm": 1.5542707443237305, + "learning_rate": 3.9848137257064755e-06, + "loss": 0.593, "step": 9269 }, { - "epoch": 0.7014490560326889, - "grad_norm": 2.492938280105591, - "learning_rate": 4.04090683658611e-06, - "loss": 0.6283, + "epoch": 0.6525871172122492, + "grad_norm": 2.0307464599609375, + "learning_rate": 3.983373357309713e-06, + "loss": 0.6262, "step": 9270 }, { - "epoch": 0.7015247247550225, - "grad_norm": 1.7969627380371094, - "learning_rate": 4.039027520271894e-06, - "loss": 0.6721, + "epoch": 0.6526575149595213, + "grad_norm": 1.7807729244232178, + "learning_rate": 3.9819331457500996e-06, + "loss": 0.6044, "step": 9271 }, { - "epoch": 0.7016003934773561, - "grad_norm": 1.9584459066390991, - "learning_rate": 4.037148514629823e-06, - "loss": 0.5983, + "epoch": 0.6527279127067934, + "grad_norm": 1.7125300168991089, + "learning_rate": 3.980493091102517e-06, + "loss": 0.6674, "step": 9272 }, { - "epoch": 0.7016760621996898, - "grad_norm": 2.0558922290802, - "learning_rate": 4.035269819777567e-06, - "loss": 0.7428, + "epoch": 0.6527983104540654, + "grad_norm": 1.880088210105896, + "learning_rate": 3.9790531934418315e-06, + "loss": 0.6954, "step": 9273 }, { - "epoch": 0.7017517309220234, - "grad_norm": 1.8677301406860352, - "learning_rate": 4.03339143583277e-06, - "loss": 0.8003, + "epoch": 0.6528687082013376, + "grad_norm": 2.079099416732788, + "learning_rate": 3.977613452842909e-06, + "loss": 0.626, "step": 9274 }, { - "epoch": 0.701827399644357, - "grad_norm": 2.25616192817688, - "learning_rate": 4.0315133629130645e-06, - "loss": 0.6491, + "epoch": 0.6529391059486096, + "grad_norm": 1.6816585063934326, + "learning_rate": 3.9761738693806e-06, + "loss": 0.5569, "step": 9275 }, { - "epoch": 0.7019030683666906, - "grad_norm": 2.1559536457061768, - "learning_rate": 4.029635601136063e-06, - "loss": 0.6757, + "epoch": 0.6530095036958817, + "grad_norm": 1.8992546796798706, + "learning_rate": 3.97473444312975e-06, + "loss": 0.6487, "step": 9276 }, { - "epoch": 0.7019787370890243, - "grad_norm": 1.8619732856750488, - "learning_rate": 4.027758150619356e-06, - "loss": 0.5132, + "epoch": 0.6530799014431539, + "grad_norm": 2.0140066146850586, + "learning_rate": 3.973295174165194e-06, + "loss": 0.5378, "step": 9277 }, { - "epoch": 0.7020544058113579, - "grad_norm": 1.7390613555908203, - "learning_rate": 4.0258810114805156e-06, - "loss": 0.6006, + "epoch": 0.6531502991904259, + "grad_norm": 2.5940306186676025, + "learning_rate": 3.971856062561762e-06, + "loss": 0.7788, "step": 9278 }, { - "epoch": 0.7021300745336915, - "grad_norm": 2.2929675579071045, - "learning_rate": 4.024004183837095e-06, - "loss": 0.605, + "epoch": 0.653220696937698, + "grad_norm": 1.9250954389572144, + "learning_rate": 3.970417108394279e-06, + "loss": 0.6997, "step": 9279 }, { - "epoch": 0.7022057432560251, - "grad_norm": 2.116135835647583, - "learning_rate": 4.022127667806629e-06, - "loss": 0.8126, + "epoch": 0.65329109468497, + "grad_norm": 1.898069143295288, + "learning_rate": 3.968978311737554e-06, + "loss": 0.64, "step": 9280 }, { - "epoch": 0.7022814119783587, - "grad_norm": 2.891150951385498, - "learning_rate": 4.020251463506623e-06, - "loss": 0.7529, + "epoch": 0.6533614924322422, + "grad_norm": 1.7730474472045898, + "learning_rate": 3.967539672666393e-06, + "loss": 0.7441, "step": 9281 }, { - "epoch": 0.7023570807006924, - "grad_norm": 2.0369067192077637, - "learning_rate": 4.01837557105458e-06, - "loss": 0.7471, + "epoch": 0.6534318901795143, + "grad_norm": 1.6446844339370728, + "learning_rate": 3.966101191255586e-06, + "loss": 0.5498, "step": 9282 }, { - "epoch": 0.702432749423026, - "grad_norm": 2.0991392135620117, - "learning_rate": 4.016499990567975e-06, - "loss": 0.8235, + "epoch": 0.6535022879267863, + "grad_norm": 1.8587230443954468, + "learning_rate": 3.96466286757993e-06, + "loss": 0.6087, "step": 9283 }, { - "epoch": 0.7025084181453596, - "grad_norm": 2.3228790760040283, - "learning_rate": 4.014624722164255e-06, - "loss": 0.5768, + "epoch": 0.6535726856740585, + "grad_norm": 1.8281733989715576, + "learning_rate": 3.963224701714197e-06, + "loss": 0.58, "step": 9284 }, { - "epoch": 0.7025840868676932, - "grad_norm": 2.1848552227020264, - "learning_rate": 4.01274976596086e-06, - "loss": 0.6158, + "epoch": 0.6536430834213305, + "grad_norm": 2.5593385696411133, + "learning_rate": 3.961786693733165e-06, + "loss": 0.6069, "step": 9285 }, { - "epoch": 0.7026597555900269, - "grad_norm": 2.320488452911377, - "learning_rate": 4.0108751220752065e-06, - "loss": 0.7085, + "epoch": 0.6537134811686026, + "grad_norm": 1.7586474418640137, + "learning_rate": 3.960348843711594e-06, + "loss": 0.6699, "step": 9286 }, { - "epoch": 0.7027354243123605, - "grad_norm": 3.8415627479553223, - "learning_rate": 4.009000790624687e-06, - "loss": 0.7112, + "epoch": 0.6537838789158746, + "grad_norm": 1.6561896800994873, + "learning_rate": 3.958911151724241e-06, + "loss": 0.6285, "step": 9287 }, { - "epoch": 0.7028110930346941, - "grad_norm": 2.0608065128326416, - "learning_rate": 4.007126771726684e-06, - "loss": 0.6086, + "epoch": 0.6538542766631468, + "grad_norm": 1.8961546421051025, + "learning_rate": 3.957473617845847e-06, + "loss": 0.6882, "step": 9288 }, { - "epoch": 0.7028867617570277, - "grad_norm": 1.4849302768707275, - "learning_rate": 4.005253065498549e-06, - "loss": 0.6349, + "epoch": 0.6539246744104189, + "grad_norm": 1.3032306432724, + "learning_rate": 3.956036242151159e-06, + "loss": 0.7141, "step": 9289 }, { - "epoch": 0.7029624304793614, - "grad_norm": 3.0800068378448486, - "learning_rate": 4.003379672057622e-06, - "loss": 0.7289, + "epoch": 0.6539950721576909, + "grad_norm": 1.6677374839782715, + "learning_rate": 3.954599024714899e-06, + "loss": 0.6517, "step": 9290 }, { - "epoch": 0.703038099201695, - "grad_norm": 2.145936965942383, - "learning_rate": 4.001506591521226e-06, - "loss": 0.7398, + "epoch": 0.6540654699049631, + "grad_norm": 1.8500113487243652, + "learning_rate": 3.953161965611798e-06, + "loss": 0.8063, "step": 9291 }, { - "epoch": 0.7031137679240286, - "grad_norm": 1.8864727020263672, - "learning_rate": 3.999633824006647e-06, - "loss": 0.6183, + "epoch": 0.6541358676522351, + "grad_norm": 1.961794137954712, + "learning_rate": 3.951725064916565e-06, + "loss": 0.6605, "step": 9292 }, { - "epoch": 0.7031894366463622, - "grad_norm": 1.6764299869537354, - "learning_rate": 3.997761369631174e-06, - "loss": 0.6655, + "epoch": 0.6542062653995072, + "grad_norm": 1.6367532014846802, + "learning_rate": 3.950288322703907e-06, + "loss": 0.7377, "step": 9293 }, { - "epoch": 0.7032651053686958, - "grad_norm": 1.7775150537490845, - "learning_rate": 3.9958892285120674e-06, - "loss": 0.5845, + "epoch": 0.6542766631467793, + "grad_norm": 2.065352439880371, + "learning_rate": 3.948851739048519e-06, + "loss": 0.6489, "step": 9294 }, { - "epoch": 0.7033407740910295, - "grad_norm": 1.978812575340271, - "learning_rate": 3.994017400766558e-06, - "loss": 0.7968, + "epoch": 0.6543470608940514, + "grad_norm": 1.6857280731201172, + "learning_rate": 3.947415314025093e-06, + "loss": 0.6695, "step": 9295 }, { - "epoch": 0.7034164428133631, - "grad_norm": 2.058699369430542, - "learning_rate": 3.992145886511871e-06, - "loss": 0.6392, + "epoch": 0.6544174586413235, + "grad_norm": 1.8349976539611816, + "learning_rate": 3.945979047708309e-06, + "loss": 0.6338, "step": 9296 }, { - "epoch": 0.7034921115356967, - "grad_norm": 1.882474422454834, - "learning_rate": 3.990274685865206e-06, - "loss": 0.5851, + "epoch": 0.6544878563885955, + "grad_norm": 1.5804296731948853, + "learning_rate": 3.944542940172842e-06, + "loss": 0.6281, "step": 9297 }, { - "epoch": 0.7035677802580304, - "grad_norm": 1.9979125261306763, - "learning_rate": 3.988403798943743e-06, - "loss": 0.8891, + "epoch": 0.6545582541358677, + "grad_norm": 1.5988562107086182, + "learning_rate": 3.943106991493355e-06, + "loss": 0.6845, "step": 9298 }, { - "epoch": 0.703643448980364, - "grad_norm": 1.9912859201431274, - "learning_rate": 3.986533225864645e-06, - "loss": 0.6231, + "epoch": 0.6546286518831398, + "grad_norm": 1.7060246467590332, + "learning_rate": 3.941671201744503e-06, + "loss": 0.6488, "step": 9299 }, { - "epoch": 0.7037191177026976, - "grad_norm": 2.228140115737915, - "learning_rate": 3.984662966745051e-06, - "loss": 0.8467, + "epoch": 0.6546990496304118, + "grad_norm": 1.7776978015899658, + "learning_rate": 3.940235571000933e-06, + "loss": 0.6622, "step": 9300 }, { - "epoch": 0.7037947864250312, - "grad_norm": 1.6897190809249878, - "learning_rate": 3.982793021702084e-06, - "loss": 0.7131, + "epoch": 0.6547694473776839, + "grad_norm": 2.334264039993286, + "learning_rate": 3.93880009933729e-06, + "loss": 0.6468, "step": 9301 }, { - "epoch": 0.7038704551473648, - "grad_norm": 2.1993837356567383, - "learning_rate": 3.980923390852844e-06, - "loss": 0.7983, + "epoch": 0.654839845124956, + "grad_norm": 1.9047025442123413, + "learning_rate": 3.9373647868282e-06, + "loss": 0.7207, "step": 9302 }, { - "epoch": 0.7039461238696985, - "grad_norm": 2.0132064819335938, - "learning_rate": 3.979054074314417e-06, - "loss": 0.7792, + "epoch": 0.6549102428722281, + "grad_norm": 1.7843750715255737, + "learning_rate": 3.935929633548289e-06, + "loss": 0.6959, "step": 9303 }, { - "epoch": 0.7040217925920321, - "grad_norm": 2.138044595718384, - "learning_rate": 3.977185072203862e-06, - "loss": 0.692, + "epoch": 0.6549806406195001, + "grad_norm": 2.3160400390625, + "learning_rate": 3.934494639572172e-06, + "loss": 0.7093, "step": 9304 }, { - "epoch": 0.7040974613143657, - "grad_norm": 1.7573822736740112, - "learning_rate": 3.975316384638228e-06, - "loss": 0.5628, + "epoch": 0.6550510383667723, + "grad_norm": 2.0542423725128174, + "learning_rate": 3.933059804974456e-06, + "loss": 0.7259, "step": 9305 }, { - "epoch": 0.7041731300366993, - "grad_norm": 1.9482179880142212, - "learning_rate": 3.97344801173453e-06, - "loss": 0.4971, + "epoch": 0.6551214361140444, + "grad_norm": 1.8615144491195679, + "learning_rate": 3.9316251298297354e-06, + "loss": 0.6739, "step": 9306 }, { - "epoch": 0.7042487987590329, - "grad_norm": 2.954737424850464, - "learning_rate": 3.971579953609772e-06, - "loss": 0.8212, + "epoch": 0.6551918338613164, + "grad_norm": 1.9861087799072266, + "learning_rate": 3.930190614212605e-06, + "loss": 0.5984, "step": 9307 }, { - "epoch": 0.7043244674813666, - "grad_norm": 1.92641019821167, - "learning_rate": 3.9697122103809475e-06, - "loss": 0.8241, + "epoch": 0.6552622316085885, + "grad_norm": 1.8594142198562622, + "learning_rate": 3.928756258197643e-06, + "loss": 0.5971, "step": 9308 }, { - "epoch": 0.7044001362037002, - "grad_norm": 2.1319527626037598, - "learning_rate": 3.967844782165012e-06, - "loss": 0.6169, + "epoch": 0.6553326293558606, + "grad_norm": 1.7186833620071411, + "learning_rate": 3.927322061859427e-06, + "loss": 0.6712, "step": 9309 }, { - "epoch": 0.7044758049260338, - "grad_norm": 1.711698293685913, - "learning_rate": 3.9659776690789104e-06, - "loss": 0.6958, + "epoch": 0.6554030271031327, + "grad_norm": 1.8496427536010742, + "learning_rate": 3.925888025272519e-06, + "loss": 0.6182, "step": 9310 }, { - "epoch": 0.7045514736483675, - "grad_norm": 2.079453468322754, - "learning_rate": 3.9641108712395714e-06, - "loss": 0.6348, + "epoch": 0.6554734248504048, + "grad_norm": 1.968284010887146, + "learning_rate": 3.924454148511478e-06, + "loss": 0.6157, "step": 9311 }, { - "epoch": 0.7046271423707011, - "grad_norm": 3.2162301540374756, - "learning_rate": 3.962244388763896e-06, - "loss": 0.6935, + "epoch": 0.6555438225976769, + "grad_norm": 1.9680858850479126, + "learning_rate": 3.923020431650848e-06, + "loss": 0.7107, "step": 9312 }, { - "epoch": 0.7047028110930347, - "grad_norm": 1.953348159790039, - "learning_rate": 3.960378221768772e-06, - "loss": 0.586, + "epoch": 0.655614220344949, + "grad_norm": 1.6702308654785156, + "learning_rate": 3.921586874765176e-06, + "loss": 0.676, "step": 9313 }, { - "epoch": 0.7047784798153683, - "grad_norm": 2.384439468383789, - "learning_rate": 3.958512370371063e-06, - "loss": 0.6983, + "epoch": 0.655684618092221, + "grad_norm": 1.6679167747497559, + "learning_rate": 3.920153477928986e-06, + "loss": 0.5896, "step": 9314 }, { - "epoch": 0.7048541485377019, - "grad_norm": 1.7614166736602783, - "learning_rate": 3.956646834687616e-06, - "loss": 0.7052, + "epoch": 0.6557550158394931, + "grad_norm": 2.0232794284820557, + "learning_rate": 3.918720241216809e-06, + "loss": 0.7058, "step": 9315 }, { - "epoch": 0.7049298172600356, - "grad_norm": 5.346411228179932, - "learning_rate": 3.95478161483526e-06, - "loss": 0.7654, + "epoch": 0.6558254135867653, + "grad_norm": 1.7558764219284058, + "learning_rate": 3.917287164703158e-06, + "loss": 0.7455, "step": 9316 }, { - "epoch": 0.7050054859823692, - "grad_norm": 2.2105648517608643, - "learning_rate": 3.9529167109307915e-06, - "loss": 0.7921, + "epoch": 0.6558958113340373, + "grad_norm": 1.791579246520996, + "learning_rate": 3.9158542484625386e-06, + "loss": 0.7082, "step": 9317 }, { - "epoch": 0.7050811547047028, - "grad_norm": 2.0886669158935547, - "learning_rate": 3.951052123091005e-06, - "loss": 0.6371, + "epoch": 0.6559662090813094, + "grad_norm": 1.8052475452423096, + "learning_rate": 3.9144214925694465e-06, + "loss": 0.658, "step": 9318 }, { - "epoch": 0.7051568234270364, - "grad_norm": 1.895363450050354, - "learning_rate": 3.949187851432667e-06, - "loss": 0.6704, + "epoch": 0.6560366068285814, + "grad_norm": 1.7970119714736938, + "learning_rate": 3.912988897098381e-06, + "loss": 0.5835, "step": 9319 }, { - "epoch": 0.70523249214937, - "grad_norm": 3.5013060569763184, - "learning_rate": 3.947323896072521e-06, - "loss": 0.6872, + "epoch": 0.6561070045758536, + "grad_norm": 1.8704392910003662, + "learning_rate": 3.911556462123812e-06, + "loss": 0.754, "step": 9320 }, { - "epoch": 0.7053081608717037, - "grad_norm": 1.9184173345565796, - "learning_rate": 3.945460257127294e-06, - "loss": 0.665, + "epoch": 0.6561774023231257, + "grad_norm": 1.7874860763549805, + "learning_rate": 3.910124187720224e-06, + "loss": 0.6831, "step": 9321 }, { - "epoch": 0.7053838295940373, - "grad_norm": 1.5872775316238403, - "learning_rate": 3.943596934713695e-06, - "loss": 0.8925, + "epoch": 0.6562478000703977, + "grad_norm": 1.7530848979949951, + "learning_rate": 3.908692073962079e-06, + "loss": 0.5845, "step": 9322 }, { - "epoch": 0.7054594983163709, - "grad_norm": 2.0756430625915527, - "learning_rate": 3.9417339289484085e-06, - "loss": 0.7668, + "epoch": 0.6563181978176699, + "grad_norm": 1.7472612857818604, + "learning_rate": 3.907260120923831e-06, + "loss": 0.5381, "step": 9323 }, { - "epoch": 0.7055351670387046, - "grad_norm": 2.210831642150879, - "learning_rate": 3.939871239948105e-06, - "loss": 0.697, + "epoch": 0.6563885955649419, + "grad_norm": 1.931133508682251, + "learning_rate": 3.905828328679929e-06, + "loss": 0.7108, "step": 9324 }, { - "epoch": 0.7056108357610382, - "grad_norm": 1.8079180717468262, - "learning_rate": 3.93800886782943e-06, - "loss": 0.749, + "epoch": 0.656458993312214, + "grad_norm": 1.842126727104187, + "learning_rate": 3.9043966973048154e-06, + "loss": 0.6377, "step": 9325 }, { - "epoch": 0.7056865044833718, - "grad_norm": 2.1431519985198975, - "learning_rate": 3.936146812709017e-06, - "loss": 0.7066, + "epoch": 0.656529391059486, + "grad_norm": 1.8766865730285645, + "learning_rate": 3.90296522687292e-06, + "loss": 0.624, "step": 9326 }, { - "epoch": 0.7057621732057054, - "grad_norm": 1.8611176013946533, - "learning_rate": 3.934285074703465e-06, - "loss": 0.6619, + "epoch": 0.6565997888067582, + "grad_norm": 1.5321043729782104, + "learning_rate": 3.901533917458669e-06, + "loss": 0.6861, "step": 9327 }, { - "epoch": 0.705837841928039, - "grad_norm": 6.968406677246094, - "learning_rate": 3.932423653929362e-06, - "loss": 0.5841, + "epoch": 0.6566701865540303, + "grad_norm": 1.8581103086471558, + "learning_rate": 3.900102769136477e-06, + "loss": 0.7645, "step": 9328 }, { - "epoch": 0.7059135106503727, - "grad_norm": 2.0079822540283203, - "learning_rate": 3.930562550503284e-06, - "loss": 0.568, + "epoch": 0.6567405843013023, + "grad_norm": 1.873976707458496, + "learning_rate": 3.898671781980746e-06, + "loss": 0.7283, "step": 9329 }, { - "epoch": 0.7059891793727063, - "grad_norm": 2.0030272006988525, - "learning_rate": 3.92870176454178e-06, - "loss": 0.6616, + "epoch": 0.6568109820485745, + "grad_norm": 1.926349401473999, + "learning_rate": 3.89724095606588e-06, + "loss": 0.5764, "step": 9330 }, { - "epoch": 0.7060648480950399, - "grad_norm": 2.209120273590088, - "learning_rate": 3.926841296161369e-06, - "loss": 0.6471, + "epoch": 0.6568813797958465, + "grad_norm": 1.674834966659546, + "learning_rate": 3.895810291466265e-06, + "loss": 0.7369, "step": 9331 }, { - "epoch": 0.7061405168173736, - "grad_norm": 2.2087392807006836, - "learning_rate": 3.924981145478567e-06, - "loss": 0.6976, + "epoch": 0.6569517775431186, + "grad_norm": 2.2116198539733887, + "learning_rate": 3.894379788256285e-06, + "loss": 0.7339, "step": 9332 }, { - "epoch": 0.7062161855397071, - "grad_norm": 2.025752067565918, - "learning_rate": 3.923121312609859e-06, - "loss": 0.6718, + "epoch": 0.6570221752903908, + "grad_norm": 1.9121167659759521, + "learning_rate": 3.892949446510315e-06, + "loss": 0.6056, "step": 9333 }, { - "epoch": 0.7062918542620408, - "grad_norm": 2.695591926574707, - "learning_rate": 3.921261797671714e-06, - "loss": 0.7283, + "epoch": 0.6570925730376628, + "grad_norm": 1.728078842163086, + "learning_rate": 3.891519266302716e-06, + "loss": 0.6296, "step": 9334 }, { - "epoch": 0.7063675229843744, - "grad_norm": 2.5060596466064453, - "learning_rate": 3.9194026007805834e-06, - "loss": 0.7901, + "epoch": 0.6571629707849349, + "grad_norm": 1.9227606058120728, + "learning_rate": 3.8900892477078424e-06, + "loss": 0.7269, "step": 9335 }, { - "epoch": 0.706443191706708, - "grad_norm": 1.7619905471801758, - "learning_rate": 3.917543722052894e-06, - "loss": 0.5261, + "epoch": 0.6572333685322069, + "grad_norm": 1.5952627658843994, + "learning_rate": 3.888659390800048e-06, + "loss": 0.6807, "step": 9336 }, { - "epoch": 0.7065188604290417, - "grad_norm": 2.090834379196167, - "learning_rate": 3.915685161605058e-06, - "loss": 0.6204, + "epoch": 0.6573037662794791, + "grad_norm": 1.7665531635284424, + "learning_rate": 3.887229695653668e-06, + "loss": 0.7226, "step": 9337 }, { - "epoch": 0.7065945291513753, - "grad_norm": 1.739188313484192, - "learning_rate": 3.913826919553457e-06, - "loss": 0.6318, + "epoch": 0.6573741640267512, + "grad_norm": 1.693566083908081, + "learning_rate": 3.885800162343038e-06, + "loss": 0.5786, "step": 9338 }, { - "epoch": 0.7066701978737089, - "grad_norm": 2.2699122428894043, - "learning_rate": 3.911968996014467e-06, - "loss": 0.727, + "epoch": 0.6574445617740232, + "grad_norm": 1.9455152750015259, + "learning_rate": 3.884370790942474e-06, + "loss": 0.6893, "step": 9339 }, { - "epoch": 0.7067458665960425, - "grad_norm": 1.8585529327392578, - "learning_rate": 3.910111391104438e-06, - "loss": 0.6699, + "epoch": 0.6575149595212954, + "grad_norm": 1.8459147214889526, + "learning_rate": 3.882941581526301e-06, + "loss": 0.738, "step": 9340 }, { - "epoch": 0.7068215353183761, - "grad_norm": 2.3249197006225586, - "learning_rate": 3.908254104939695e-06, - "loss": 0.5889, + "epoch": 0.6575853572685674, + "grad_norm": 1.68338942527771, + "learning_rate": 3.881512534168811e-06, + "loss": 0.6283, "step": 9341 }, { - "epoch": 0.7068972040407098, - "grad_norm": 1.8129733800888062, - "learning_rate": 3.906397137636547e-06, - "loss": 0.6422, + "epoch": 0.6576557550158395, + "grad_norm": 1.9208943843841553, + "learning_rate": 3.8800836489443125e-06, + "loss": 0.6059, "step": 9342 }, { - "epoch": 0.7069728727630434, - "grad_norm": 1.9523649215698242, - "learning_rate": 3.9045404893112815e-06, - "loss": 0.6836, + "epoch": 0.6577261527631115, + "grad_norm": 2.097825050354004, + "learning_rate": 3.878654925927087e-06, + "loss": 0.6932, "step": 9343 }, { - "epoch": 0.707048541485377, - "grad_norm": 1.462280035018921, - "learning_rate": 3.902684160080179e-06, - "loss": 0.5477, + "epoch": 0.6577965505103837, + "grad_norm": 2.5287721157073975, + "learning_rate": 3.877226365191423e-06, + "loss": 0.7499, "step": 9344 }, { - "epoch": 0.7071242102077107, - "grad_norm": 2.921168327331543, - "learning_rate": 3.900828150059477e-06, - "loss": 0.5474, + "epoch": 0.6578669482576558, + "grad_norm": 1.8156718015670776, + "learning_rate": 3.875797966811585e-06, + "loss": 0.6296, "step": 9345 }, { - "epoch": 0.7071998789300442, - "grad_norm": 1.826545238494873, - "learning_rate": 3.898972459365409e-06, - "loss": 0.7442, + "epoch": 0.6579373460049278, + "grad_norm": 2.035356044769287, + "learning_rate": 3.874369730861846e-06, + "loss": 0.618, "step": 9346 }, { - "epoch": 0.7072755476523779, - "grad_norm": 1.8657852411270142, - "learning_rate": 3.897117088114185e-06, - "loss": 0.6929, + "epoch": 0.6580077437522, + "grad_norm": 1.8672561645507812, + "learning_rate": 3.872941657416449e-06, + "loss": 0.6737, "step": 9347 }, { - "epoch": 0.7073512163747115, - "grad_norm": 2.4558889865875244, - "learning_rate": 3.895262036421993e-06, - "loss": 0.842, + "epoch": 0.658078141499472, + "grad_norm": 1.7850664854049683, + "learning_rate": 3.8715137465496505e-06, + "loss": 0.6397, "step": 9348 }, { - "epoch": 0.7074268850970451, - "grad_norm": 1.9185665845870972, - "learning_rate": 3.893407304405003e-06, - "loss": 0.6258, + "epoch": 0.6581485392467441, + "grad_norm": 2.1780202388763428, + "learning_rate": 3.870085998335683e-06, + "loss": 0.8194, "step": 9349 }, { - "epoch": 0.7075025538193788, - "grad_norm": 2.26891827583313, - "learning_rate": 3.891552892179365e-06, - "loss": 0.7757, + "epoch": 0.6582189369940162, + "grad_norm": 1.7338881492614746, + "learning_rate": 3.868658412848782e-06, + "loss": 0.7316, "step": 9350 }, { - "epoch": 0.7075782225417124, - "grad_norm": 2.160792589187622, - "learning_rate": 3.88969879986121e-06, - "loss": 0.6637, + "epoch": 0.6582893347412883, + "grad_norm": 1.994356632232666, + "learning_rate": 3.867230990163163e-06, + "loss": 0.6706, "step": 9351 }, { - "epoch": 0.707653891264046, - "grad_norm": 2.357847213745117, - "learning_rate": 3.887845027566642e-06, - "loss": 0.6997, + "epoch": 0.6583597324885604, + "grad_norm": 1.8832377195358276, + "learning_rate": 3.865803730353048e-06, + "loss": 0.7154, "step": 9352 }, { - "epoch": 0.7077295599863797, - "grad_norm": 1.9302562475204468, - "learning_rate": 3.8859915754117505e-06, - "loss": 0.8658, + "epoch": 0.6584301302358324, + "grad_norm": 2.072601079940796, + "learning_rate": 3.8643766334926285e-06, + "loss": 0.6205, "step": 9353 }, { - "epoch": 0.7078052287087132, - "grad_norm": 2.308100700378418, - "learning_rate": 3.884138443512612e-06, - "loss": 0.6944, + "epoch": 0.6585005279831045, + "grad_norm": 1.9571412801742554, + "learning_rate": 3.8629496996561124e-06, + "loss": 0.6285, "step": 9354 }, { - "epoch": 0.7078808974310469, - "grad_norm": 2.170538902282715, - "learning_rate": 3.882285631985269e-06, - "loss": 0.6771, + "epoch": 0.6585709257303767, + "grad_norm": 2.2114007472991943, + "learning_rate": 3.861522928917678e-06, + "loss": 0.676, "step": 9355 }, { - "epoch": 0.7079565661533805, - "grad_norm": 1.9224172830581665, - "learning_rate": 3.880433140945753e-06, - "loss": 0.592, + "epoch": 0.6586413234776487, + "grad_norm": 1.747896671295166, + "learning_rate": 3.860096321351512e-06, + "loss": 0.6466, "step": 9356 }, { - "epoch": 0.7080322348757141, - "grad_norm": 1.849142074584961, - "learning_rate": 3.878580970510071e-06, - "loss": 0.5242, + "epoch": 0.6587117212249208, + "grad_norm": 1.8667545318603516, + "learning_rate": 3.85866987703178e-06, + "loss": 0.6468, "step": 9357 }, { - "epoch": 0.7081079035980478, - "grad_norm": 1.9761168956756592, - "learning_rate": 3.876729120794215e-06, - "loss": 0.706, + "epoch": 0.6587821189721929, + "grad_norm": 1.873183012008667, + "learning_rate": 3.857243596032651e-06, + "loss": 0.6143, "step": 9358 }, { - "epoch": 0.7081835723203813, - "grad_norm": 2.217850685119629, - "learning_rate": 3.87487759191415e-06, - "loss": 0.6843, + "epoch": 0.658852516719465, + "grad_norm": 1.8371046781539917, + "learning_rate": 3.855817478428269e-06, + "loss": 0.7148, "step": 9359 }, { - "epoch": 0.708259241042715, - "grad_norm": 2.1054461002349854, - "learning_rate": 3.873026383985828e-06, - "loss": 0.7485, + "epoch": 0.6589229144667371, + "grad_norm": 2.0181965827941895, + "learning_rate": 3.854391524292785e-06, + "loss": 0.7362, "step": 9360 }, { - "epoch": 0.7083349097650486, - "grad_norm": 1.964118480682373, - "learning_rate": 3.871175497125176e-06, - "loss": 0.5514, + "epoch": 0.6589933122140091, + "grad_norm": 1.776138424873352, + "learning_rate": 3.852965733700332e-06, + "loss": 0.6717, "step": 9361 }, { - "epoch": 0.7084105784873822, - "grad_norm": 1.676416039466858, - "learning_rate": 3.869324931448107e-06, - "loss": 0.587, + "epoch": 0.6590637099612813, + "grad_norm": 1.8375993967056274, + "learning_rate": 3.851540106725045e-06, + "loss": 0.6491, "step": 9362 }, { - "epoch": 0.7084862472097159, - "grad_norm": 1.9723479747772217, - "learning_rate": 3.867474687070502e-06, - "loss": 0.697, + "epoch": 0.6591341077085533, + "grad_norm": 2.354966163635254, + "learning_rate": 3.850114643441035e-06, + "loss": 0.6855, "step": 9363 }, { - "epoch": 0.7085619159320495, - "grad_norm": 2.1898975372314453, - "learning_rate": 3.865624764108229e-06, - "loss": 0.6789, + "epoch": 0.6592045054558254, + "grad_norm": 2.0186827182769775, + "learning_rate": 3.848689343922424e-06, + "loss": 0.563, "step": 9364 }, { - "epoch": 0.7086375846543831, - "grad_norm": 2.4142651557922363, - "learning_rate": 3.863775162677147e-06, - "loss": 0.7163, + "epoch": 0.6592749032030975, + "grad_norm": 1.699950933456421, + "learning_rate": 3.847264208243302e-06, + "loss": 0.5164, "step": 9365 }, { - "epoch": 0.7087132533767168, - "grad_norm": 2.3115365505218506, - "learning_rate": 3.8619258828930725e-06, - "loss": 0.5373, + "epoch": 0.6593453009503696, + "grad_norm": 1.9686071872711182, + "learning_rate": 3.845839236477772e-06, + "loss": 0.7041, "step": 9366 }, { - "epoch": 0.7087889220990503, - "grad_norm": 2.0673863887786865, - "learning_rate": 3.860076924871818e-06, - "loss": 0.5509, + "epoch": 0.6594156986976417, + "grad_norm": 1.9450112581253052, + "learning_rate": 3.844414428699913e-06, + "loss": 0.6658, "step": 9367 }, { - "epoch": 0.708864590821384, - "grad_norm": 2.3030877113342285, - "learning_rate": 3.8582282887291724e-06, - "loss": 0.754, + "epoch": 0.6594860964449137, + "grad_norm": 1.7091954946517944, + "learning_rate": 3.84298978498381e-06, + "loss": 0.6915, "step": 9368 }, { - "epoch": 0.7089402595437176, - "grad_norm": 2.19999361038208, - "learning_rate": 3.856379974580901e-06, - "loss": 0.6528, + "epoch": 0.6595564941921859, + "grad_norm": 1.781018614768982, + "learning_rate": 3.841565305403523e-06, + "loss": 0.7121, "step": 9369 }, { - "epoch": 0.7090159282660512, - "grad_norm": 2.0521130561828613, - "learning_rate": 3.854531982542751e-06, - "loss": 0.7486, + "epoch": 0.6596268919394579, + "grad_norm": 2.0257205963134766, + "learning_rate": 3.84014099003312e-06, + "loss": 0.5844, "step": 9370 }, { - "epoch": 0.7090915969883849, - "grad_norm": 1.9347343444824219, - "learning_rate": 3.852684312730452e-06, - "loss": 0.7396, + "epoch": 0.65969728968673, + "grad_norm": 1.6777924299240112, + "learning_rate": 3.838716838946649e-06, + "loss": 0.6722, "step": 9371 }, { - "epoch": 0.7091672657107184, - "grad_norm": 5.190321922302246, - "learning_rate": 3.850836965259713e-06, - "loss": 0.6111, + "epoch": 0.6597676874340022, + "grad_norm": 1.9068189859390259, + "learning_rate": 3.837292852218151e-06, + "loss": 0.7787, "step": 9372 }, { - "epoch": 0.7092429344330521, - "grad_norm": 1.797953724861145, - "learning_rate": 3.848989940246214e-06, - "loss": 0.6532, + "epoch": 0.6598380851812742, + "grad_norm": 1.8569881916046143, + "learning_rate": 3.83586902992166e-06, + "loss": 0.6965, "step": 9373 }, { - "epoch": 0.7093186031553858, - "grad_norm": 2.0552330017089844, - "learning_rate": 3.847143237805622e-06, - "loss": 0.6478, + "epoch": 0.6599084829285463, + "grad_norm": 1.8083025217056274, + "learning_rate": 3.834445372131208e-06, + "loss": 0.6455, "step": 9374 }, { - "epoch": 0.7093942718777193, - "grad_norm": 2.1826653480529785, - "learning_rate": 3.845296858053591e-06, - "loss": 0.674, + "epoch": 0.6599788806758183, + "grad_norm": 2.1253786087036133, + "learning_rate": 3.833021878920803e-06, + "loss": 0.6295, "step": 9375 }, { - "epoch": 0.709469940600053, - "grad_norm": 2.6305980682373047, - "learning_rate": 3.8434508011057456e-06, - "loss": 0.7174, + "epoch": 0.6600492784230905, + "grad_norm": 2.0754146575927734, + "learning_rate": 3.831598550364462e-06, + "loss": 0.6116, "step": 9376 }, { - "epoch": 0.7095456093223866, - "grad_norm": 2.358619451522827, - "learning_rate": 3.841605067077686e-06, - "loss": 0.7223, + "epoch": 0.6601196761703626, + "grad_norm": 1.8463486433029175, + "learning_rate": 3.8301753865361825e-06, + "loss": 0.6796, "step": 9377 }, { - "epoch": 0.7096212780447202, - "grad_norm": 1.9301419258117676, - "learning_rate": 3.839759656085001e-06, - "loss": 0.6305, + "epoch": 0.6601900739176346, + "grad_norm": 2.059767007827759, + "learning_rate": 3.828752387509952e-06, + "loss": 0.6802, "step": 9378 }, { - "epoch": 0.7096969467670539, - "grad_norm": 1.9100176095962524, - "learning_rate": 3.8379145682432565e-06, - "loss": 0.6708, + "epoch": 0.6602604716649068, + "grad_norm": 1.8597520589828491, + "learning_rate": 3.827329553359759e-06, + "loss": 0.6336, "step": 9379 }, { - "epoch": 0.7097726154893874, - "grad_norm": 2.4053359031677246, - "learning_rate": 3.836069803667998e-06, - "loss": 0.6106, + "epoch": 0.6603308694121788, + "grad_norm": 2.247623920440674, + "learning_rate": 3.825906884159574e-06, + "loss": 0.8309, "step": 9380 }, { - "epoch": 0.7098482842117211, - "grad_norm": 4.036832332611084, - "learning_rate": 3.834225362474753e-06, - "loss": 0.6419, + "epoch": 0.6604012671594509, + "grad_norm": 1.5921657085418701, + "learning_rate": 3.824484379983368e-06, + "loss": 0.5828, "step": 9381 }, { - "epoch": 0.7099239529340547, - "grad_norm": 2.3508119583129883, - "learning_rate": 3.8323812447790205e-06, - "loss": 0.8037, + "epoch": 0.6604716649067229, + "grad_norm": 1.774383544921875, + "learning_rate": 3.823062040905096e-06, + "loss": 0.6137, "step": 9382 }, { - "epoch": 0.7099996216563883, - "grad_norm": 2.4832112789154053, - "learning_rate": 3.830537450696293e-06, - "loss": 0.6607, + "epoch": 0.6605420626539951, + "grad_norm": 2.2428903579711914, + "learning_rate": 3.821639866998704e-06, + "loss": 0.6702, "step": 9383 }, { - "epoch": 0.710075290378722, - "grad_norm": 1.9314616918563843, - "learning_rate": 3.828693980342024e-06, - "loss": 0.7162, + "epoch": 0.6606124604012672, + "grad_norm": 2.34417986869812, + "learning_rate": 3.82021785833813e-06, + "loss": 0.7328, "step": 9384 }, { - "epoch": 0.7101509591010555, - "grad_norm": 2.2306244373321533, - "learning_rate": 3.826850833831668e-06, - "loss": 0.7208, + "epoch": 0.6606828581485392, + "grad_norm": 2.37935209274292, + "learning_rate": 3.8187960149973134e-06, + "loss": 0.6755, "step": 9385 }, { - "epoch": 0.7102266278233892, - "grad_norm": 2.328071355819702, - "learning_rate": 3.825008011280648e-06, - "loss": 0.6431, + "epoch": 0.6607532558958114, + "grad_norm": 1.5201268196105957, + "learning_rate": 3.81737433705017e-06, + "loss": 0.8158, "step": 9386 }, { - "epoch": 0.7103022965457229, - "grad_norm": 1.6222195625305176, - "learning_rate": 3.823165512804361e-06, - "loss": 0.6632, + "epoch": 0.6608236536430834, + "grad_norm": 1.9414503574371338, + "learning_rate": 3.81595282457062e-06, + "loss": 0.6332, "step": 9387 }, { - "epoch": 0.7103779652680564, - "grad_norm": 2.752122640609741, - "learning_rate": 3.821323338518193e-06, - "loss": 0.7188, + "epoch": 0.6608940513903555, + "grad_norm": 2.269308567047119, + "learning_rate": 3.814531477632567e-06, + "loss": 0.7784, "step": 9388 }, { - "epoch": 0.7104536339903901, - "grad_norm": 1.881763219833374, - "learning_rate": 3.819481488537504e-06, - "loss": 0.6389, + "epoch": 0.6609644491376276, + "grad_norm": 2.037278175354004, + "learning_rate": 3.8131102963099074e-06, + "loss": 0.676, "step": 9389 }, { - "epoch": 0.7105293027127237, - "grad_norm": 1.7420378923416138, - "learning_rate": 3.817639962977646e-06, - "loss": 0.7323, + "epoch": 0.6610348468848997, + "grad_norm": 1.9515653848648071, + "learning_rate": 3.8116892806765264e-06, + "loss": 0.6856, "step": 9390 }, { - "epoch": 0.7106049714350573, - "grad_norm": 2.1217947006225586, - "learning_rate": 3.815798761953933e-06, - "loss": 0.7075, + "epoch": 0.6611052446321718, + "grad_norm": 1.9382226467132568, + "learning_rate": 3.81026843080631e-06, + "loss": 0.619, "step": 9391 }, { - "epoch": 0.710680640157391, - "grad_norm": 2.142434597015381, - "learning_rate": 3.813957885581669e-06, - "loss": 0.7825, + "epoch": 0.6611756423794438, + "grad_norm": 1.9385818243026733, + "learning_rate": 3.808847746773123e-06, + "loss": 0.6896, "step": 9392 }, { - "epoch": 0.7107563088797245, - "grad_norm": 2.093893051147461, - "learning_rate": 3.8121173339761356e-06, - "loss": 0.6228, + "epoch": 0.661246040126716, + "grad_norm": 1.860613465309143, + "learning_rate": 3.807427228650836e-06, + "loss": 0.6672, "step": 9393 }, { - "epoch": 0.7108319776020582, - "grad_norm": 2.101154088973999, - "learning_rate": 3.8102771072525944e-06, - "loss": 0.8039, + "epoch": 0.6613164378739881, + "grad_norm": 1.765803337097168, + "learning_rate": 3.8060068765132986e-06, + "loss": 0.688, "step": 9394 }, { - "epoch": 0.7109076463243919, - "grad_norm": 1.9551880359649658, - "learning_rate": 3.8084372055262866e-06, - "loss": 0.7045, + "epoch": 0.6613868356212601, + "grad_norm": 2.1296679973602295, + "learning_rate": 3.8045866904343553e-06, + "loss": 0.5571, "step": 9395 }, { - "epoch": 0.7109833150467254, - "grad_norm": 1.807440996170044, - "learning_rate": 3.8065976289124328e-06, - "loss": 0.6904, + "epoch": 0.6614572333685322, + "grad_norm": 1.6398696899414062, + "learning_rate": 3.803166670487842e-06, + "loss": 0.6189, "step": 9396 }, { - "epoch": 0.7110589837690591, - "grad_norm": 1.9995956420898438, - "learning_rate": 3.8047583775262367e-06, - "loss": 0.7355, + "epoch": 0.6615276311158043, + "grad_norm": 1.6248990297317505, + "learning_rate": 3.8017468167475912e-06, + "loss": 0.6881, "step": 9397 }, { - "epoch": 0.7111346524913927, - "grad_norm": 2.0244057178497314, - "learning_rate": 3.80291945148287e-06, - "loss": 0.6372, + "epoch": 0.6615980288630764, + "grad_norm": 1.6861724853515625, + "learning_rate": 3.8003271292874172e-06, + "loss": 0.4796, "step": 9398 }, { - "epoch": 0.7112103212137263, - "grad_norm": 2.710584878921509, - "learning_rate": 3.801080850897497e-06, - "loss": 0.8311, + "epoch": 0.6616684266103484, + "grad_norm": 1.970700979232788, + "learning_rate": 3.798907608181136e-06, + "loss": 0.6016, "step": 9399 }, { - "epoch": 0.71128598993606, - "grad_norm": 1.9999345541000366, - "learning_rate": 3.7992425758852565e-06, - "loss": 0.6724, + "epoch": 0.6617388243576205, + "grad_norm": 1.733167290687561, + "learning_rate": 3.797488253502548e-06, + "loss": 0.5123, "step": 9400 }, { - "epoch": 0.7113616586583935, - "grad_norm": 2.0285205841064453, - "learning_rate": 3.7974046265612676e-06, - "loss": 0.6539, + "epoch": 0.6618092221048927, + "grad_norm": 1.840609073638916, + "learning_rate": 3.796069065325445e-06, + "loss": 0.7049, "step": 9401 }, { - "epoch": 0.7114373273807272, - "grad_norm": 1.9455727338790894, - "learning_rate": 3.795567003040628e-06, - "loss": 0.472, + "epoch": 0.6618796198521647, + "grad_norm": 1.7726327180862427, + "learning_rate": 3.7946500437236114e-06, + "loss": 0.7589, "step": 9402 }, { - "epoch": 0.7115129961030608, - "grad_norm": 2.234898090362549, - "learning_rate": 3.7937297054384152e-06, - "loss": 0.679, + "epoch": 0.6619500175994368, + "grad_norm": 1.8357254266738892, + "learning_rate": 3.793231188770827e-06, + "loss": 0.6453, "step": 9403 }, { - "epoch": 0.7115886648253944, - "grad_norm": 2.2903592586517334, - "learning_rate": 3.791892733869688e-06, - "loss": 0.5767, + "epoch": 0.6620204153467089, + "grad_norm": 2.0251801013946533, + "learning_rate": 3.7918125005408546e-06, + "loss": 0.7275, "step": 9404 }, { - "epoch": 0.7116643335477281, - "grad_norm": 2.1868555545806885, - "learning_rate": 3.790056088449483e-06, - "loss": 0.6769, + "epoch": 0.662090813093981, + "grad_norm": 1.869101881980896, + "learning_rate": 3.7903939791074584e-06, + "loss": 0.5974, "step": 9405 }, { - "epoch": 0.7117400022700616, - "grad_norm": 2.426342725753784, - "learning_rate": 3.7882197692928168e-06, - "loss": 0.6697, + "epoch": 0.6621612108412531, + "grad_norm": 1.8836418390274048, + "learning_rate": 3.7889756245443865e-06, + "loss": 0.6715, "step": 9406 }, { - "epoch": 0.7118156709923953, - "grad_norm": 3.310873031616211, - "learning_rate": 3.786383776514685e-06, - "loss": 0.6237, + "epoch": 0.6622316085885251, + "grad_norm": 2.020716905593872, + "learning_rate": 3.78755743692538e-06, + "loss": 0.7204, "step": 9407 }, { - "epoch": 0.711891339714729, - "grad_norm": 2.320218324661255, - "learning_rate": 3.784548110230068e-06, - "loss": 0.647, + "epoch": 0.6623020063357973, + "grad_norm": 1.8265376091003418, + "learning_rate": 3.7861394163241683e-06, + "loss": 0.7036, "step": 9408 }, { - "epoch": 0.7119670084370625, - "grad_norm": 1.9522889852523804, - "learning_rate": 3.7827127705539136e-06, - "loss": 0.7945, + "epoch": 0.6623724040830693, + "grad_norm": 1.682831883430481, + "learning_rate": 3.784721562814482e-06, + "loss": 0.688, "step": 9409 }, { - "epoch": 0.7120426771593962, - "grad_norm": 1.851012110710144, - "learning_rate": 3.7808777576011564e-06, - "loss": 0.5536, + "epoch": 0.6624428018303414, + "grad_norm": 1.6723463535308838, + "learning_rate": 3.7833038764700316e-06, + "loss": 0.5509, "step": 9410 }, { - "epoch": 0.7121183458817298, - "grad_norm": 1.8848350048065186, - "learning_rate": 3.7790430714867223e-06, - "loss": 0.6775, + "epoch": 0.6625131995776136, + "grad_norm": 1.6346769332885742, + "learning_rate": 3.7818863573645275e-06, + "loss": 0.74, "step": 9411 }, { - "epoch": 0.7121940146040634, - "grad_norm": 1.8779957294464111, - "learning_rate": 3.777208712325493e-06, - "loss": 0.7499, + "epoch": 0.6625835973248856, + "grad_norm": 1.6294368505477905, + "learning_rate": 3.7804690055716665e-06, + "loss": 0.6559, "step": 9412 }, { - "epoch": 0.7122696833263971, - "grad_norm": 1.9439111948013306, - "learning_rate": 3.775374680232348e-06, - "loss": 0.6631, + "epoch": 0.6626539950721577, + "grad_norm": 2.148592472076416, + "learning_rate": 3.7790518211651384e-06, + "loss": 0.6743, "step": 9413 }, { - "epoch": 0.7123453520487306, - "grad_norm": 2.2685723304748535, - "learning_rate": 3.773540975322138e-06, - "loss": 0.6203, + "epoch": 0.6627243928194297, + "grad_norm": 1.7098876237869263, + "learning_rate": 3.7776348042186197e-06, + "loss": 0.6661, "step": 9414 }, { - "epoch": 0.7124210207710643, - "grad_norm": 2.156620979309082, - "learning_rate": 3.7717075977096973e-06, - "loss": 0.6294, + "epoch": 0.6627947905667019, + "grad_norm": 1.8687469959259033, + "learning_rate": 3.7762179548057884e-06, + "loss": 0.6908, "step": 9415 }, { - "epoch": 0.712496689493398, - "grad_norm": 2.185917854309082, - "learning_rate": 3.7698745475098365e-06, - "loss": 0.7808, + "epoch": 0.662865188313974, + "grad_norm": 1.7020469903945923, + "learning_rate": 3.7748012730003034e-06, + "loss": 0.6844, "step": 9416 }, { - "epoch": 0.7125723582157315, - "grad_norm": 1.884772777557373, - "learning_rate": 3.768041824837349e-06, - "loss": 0.6747, + "epoch": 0.662935586061246, + "grad_norm": 1.7845388650894165, + "learning_rate": 3.7733847588758233e-06, + "loss": 0.731, "step": 9417 }, { - "epoch": 0.7126480269380652, - "grad_norm": 1.9386128187179565, - "learning_rate": 3.766209429807004e-06, - "loss": 0.656, + "epoch": 0.6630059838085182, + "grad_norm": 1.5086592435836792, + "learning_rate": 3.7719684125059915e-06, + "loss": 0.6395, "step": 9418 }, { - "epoch": 0.7127236956603987, - "grad_norm": 1.856753945350647, - "learning_rate": 3.764377362533556e-06, - "loss": 0.4882, + "epoch": 0.6630763815557902, + "grad_norm": 1.7365214824676514, + "learning_rate": 3.7705522339644463e-06, + "loss": 0.6199, "step": 9419 }, { - "epoch": 0.7127993643827324, - "grad_norm": 2.4401142597198486, - "learning_rate": 3.762545623131724e-06, - "loss": 0.654, + "epoch": 0.6631467793030623, + "grad_norm": 1.5664703845977783, + "learning_rate": 3.7691362233248116e-06, + "loss": 0.6601, "step": 9420 }, { - "epoch": 0.7128750331050661, - "grad_norm": 1.9322893619537354, - "learning_rate": 3.7607142117162297e-06, - "loss": 0.7618, + "epoch": 0.6632171770503343, + "grad_norm": 1.9312852621078491, + "learning_rate": 3.7677203806607134e-06, + "loss": 0.592, "step": 9421 }, { - "epoch": 0.7129507018273996, - "grad_norm": 2.7075653076171875, - "learning_rate": 3.7588831284017608e-06, - "loss": 0.6788, + "epoch": 0.6632875747976065, + "grad_norm": 1.7388761043548584, + "learning_rate": 3.7663047060457577e-06, + "loss": 0.7163, "step": 9422 }, { - "epoch": 0.7130263705497333, - "grad_norm": 1.963377833366394, - "learning_rate": 3.757052373302978e-06, - "loss": 0.6509, + "epoch": 0.6633579725448786, + "grad_norm": 2.020277500152588, + "learning_rate": 3.764889199553552e-06, + "loss": 0.6825, "step": 9423 }, { - "epoch": 0.713102039272067, - "grad_norm": 1.9781228303909302, - "learning_rate": 3.7552219465345335e-06, - "loss": 0.5682, + "epoch": 0.6634283702921506, + "grad_norm": 1.5706300735473633, + "learning_rate": 3.763473861257686e-06, + "loss": 0.4898, "step": 9424 }, { - "epoch": 0.7131777079944005, - "grad_norm": 1.9401733875274658, - "learning_rate": 3.7533918482110544e-06, - "loss": 0.5823, + "epoch": 0.6634987680394228, + "grad_norm": 1.9300932884216309, + "learning_rate": 3.762058691231746e-06, + "loss": 0.6349, "step": 9425 }, { - "epoch": 0.7132533767167342, - "grad_norm": 1.7262191772460938, - "learning_rate": 3.7515620784471475e-06, - "loss": 0.6593, + "epoch": 0.6635691657866948, + "grad_norm": 2.148848533630371, + "learning_rate": 3.7606436895493034e-06, + "loss": 0.7609, "step": 9426 }, { - "epoch": 0.7133290454390677, - "grad_norm": 2.252978801727295, - "learning_rate": 3.7497326373573983e-06, - "loss": 0.7555, + "epoch": 0.6636395635339669, + "grad_norm": 2.1542716026306152, + "learning_rate": 3.7592288562839326e-06, + "loss": 0.6681, "step": 9427 }, { - "epoch": 0.7134047141614014, - "grad_norm": 2.0870866775512695, - "learning_rate": 3.747903525056374e-06, - "loss": 0.6717, + "epoch": 0.663709961281239, + "grad_norm": 1.895219326019287, + "learning_rate": 3.757814191509185e-06, + "loss": 0.6336, "step": 9428 }, { - "epoch": 0.713480382883735, - "grad_norm": 2.1834726333618164, - "learning_rate": 3.746074741658621e-06, - "loss": 0.6464, + "epoch": 0.6637803590285111, + "grad_norm": 1.7701058387756348, + "learning_rate": 3.756399695298617e-06, + "loss": 0.5946, "step": 9429 }, { - "epoch": 0.7135560516060686, - "grad_norm": 2.443652629852295, - "learning_rate": 3.744246287278654e-06, - "loss": 0.5819, + "epoch": 0.6638507567757832, + "grad_norm": 1.8151096105575562, + "learning_rate": 3.7549853677257666e-06, + "loss": 0.7242, "step": 9430 }, { - "epoch": 0.7136317203284023, - "grad_norm": 1.8211729526519775, - "learning_rate": 3.742418162030987e-06, - "loss": 0.6914, + "epoch": 0.6639211545230552, + "grad_norm": 1.7594743967056274, + "learning_rate": 3.7535712088641626e-06, + "loss": 0.6536, "step": 9431 }, { - "epoch": 0.7137073890507358, - "grad_norm": 3.0865557193756104, - "learning_rate": 3.740590366030099e-06, - "loss": 0.7489, + "epoch": 0.6639915522703274, + "grad_norm": 1.5877035856246948, + "learning_rate": 3.7521572187873356e-06, + "loss": 0.6477, "step": 9432 }, { - "epoch": 0.7137830577730695, - "grad_norm": 1.6434742212295532, - "learning_rate": 3.738762899390458e-06, - "loss": 0.6222, + "epoch": 0.6640619500175995, + "grad_norm": 1.8904672861099243, + "learning_rate": 3.7507433975687916e-06, + "loss": 0.7276, "step": 9433 }, { - "epoch": 0.7138587264954032, - "grad_norm": 1.8651151657104492, - "learning_rate": 3.736935762226497e-06, - "loss": 0.5819, + "epoch": 0.6641323477648715, + "grad_norm": 2.0647292137145996, + "learning_rate": 3.7493297452820455e-06, + "loss": 0.6654, "step": 9434 }, { - "epoch": 0.7139343952177367, - "grad_norm": 2.248553514480591, - "learning_rate": 3.7351089546526386e-06, - "loss": 0.8123, + "epoch": 0.6642027455121436, + "grad_norm": 1.8412766456604004, + "learning_rate": 3.7479162620005887e-06, + "loss": 0.6895, "step": 9435 }, { - "epoch": 0.7140100639400704, - "grad_norm": 2.241312265396118, - "learning_rate": 3.7332824767832927e-06, - "loss": 0.8631, + "epoch": 0.6642731432594157, + "grad_norm": 1.7119956016540527, + "learning_rate": 3.7465029477979116e-06, + "loss": 0.5475, "step": 9436 }, { - "epoch": 0.714085732662404, - "grad_norm": 2.0975382328033447, - "learning_rate": 3.731456328732829e-06, - "loss": 0.6104, + "epoch": 0.6643435410066878, + "grad_norm": 2.0989201068878174, + "learning_rate": 3.7450898027474896e-06, + "loss": 0.7361, "step": 9437 }, { - "epoch": 0.7141614013847376, - "grad_norm": 1.8900827169418335, - "learning_rate": 3.729630510615611e-06, - "loss": 0.8335, + "epoch": 0.6644139387539598, + "grad_norm": 1.9646848440170288, + "learning_rate": 3.743676826922799e-06, + "loss": 0.7195, "step": 9438 }, { - "epoch": 0.7142370701070713, - "grad_norm": 2.051609992980957, - "learning_rate": 3.7278050225459774e-06, - "loss": 0.6891, + "epoch": 0.664484336501232, + "grad_norm": 2.0146331787109375, + "learning_rate": 3.742264020397297e-06, + "loss": 0.6212, "step": 9439 }, { - "epoch": 0.7143127388294048, - "grad_norm": 2.2935402393341064, - "learning_rate": 3.7259798646382476e-06, - "loss": 0.5638, + "epoch": 0.6645547342485041, + "grad_norm": 1.6136441230773926, + "learning_rate": 3.740851383244441e-06, + "loss": 0.6513, "step": 9440 }, { - "epoch": 0.7143884075517385, - "grad_norm": 2.171649217605591, - "learning_rate": 3.724155037006711e-06, - "loss": 0.7834, + "epoch": 0.6646251319957761, + "grad_norm": 1.706161379814148, + "learning_rate": 3.739438915537674e-06, + "loss": 0.5638, "step": 9441 }, { - "epoch": 0.7144640762740722, - "grad_norm": 2.2171740531921387, - "learning_rate": 3.7223305397656537e-06, - "loss": 0.5864, + "epoch": 0.6646955297430482, + "grad_norm": 1.7694684267044067, + "learning_rate": 3.73802661735043e-06, + "loss": 0.7085, "step": 9442 }, { - "epoch": 0.7145397449964057, - "grad_norm": 2.0101137161254883, - "learning_rate": 3.7205063730293306e-06, - "loss": 0.5468, + "epoch": 0.6647659274903203, + "grad_norm": 1.7655645608901978, + "learning_rate": 3.7366144887561344e-06, + "loss": 0.7057, "step": 9443 }, { - "epoch": 0.7146154137187394, - "grad_norm": 1.868871808052063, - "learning_rate": 3.718682536911972e-06, - "loss": 0.6371, + "epoch": 0.6648363252375924, + "grad_norm": 2.1455209255218506, + "learning_rate": 3.7352025298282098e-06, + "loss": 0.6754, "step": 9444 }, { - "epoch": 0.7146910824410729, - "grad_norm": 1.9993494749069214, - "learning_rate": 3.716859031527794e-06, - "loss": 0.8044, + "epoch": 0.6649067229848645, + "grad_norm": 1.7925257682800293, + "learning_rate": 3.7337907406400596e-06, + "loss": 0.7276, "step": 9445 }, { - "epoch": 0.7147667511634066, - "grad_norm": 2.727073907852173, - "learning_rate": 3.715035856990989e-06, - "loss": 0.6426, + "epoch": 0.6649771207321366, + "grad_norm": 1.767127513885498, + "learning_rate": 3.73237912126509e-06, + "loss": 0.6557, "step": 9446 }, { - "epoch": 0.7148424198857403, - "grad_norm": 1.9772788286209106, - "learning_rate": 3.7132130134157373e-06, - "loss": 0.6816, + "epoch": 0.6650475184794087, + "grad_norm": 1.6553949117660522, + "learning_rate": 3.730967671776685e-06, + "loss": 0.672, "step": 9447 }, { - "epoch": 0.7149180886080738, - "grad_norm": 1.7466607093811035, - "learning_rate": 3.7113905009161843e-06, - "loss": 0.5874, + "epoch": 0.6651179162266807, + "grad_norm": 1.9977774620056152, + "learning_rate": 3.7295563922482383e-06, + "loss": 0.6976, "step": 9448 }, { - "epoch": 0.7149937573304075, - "grad_norm": 2.1143674850463867, - "learning_rate": 3.7095683196064624e-06, - "loss": 0.7799, + "epoch": 0.6651883139739528, + "grad_norm": 2.2203333377838135, + "learning_rate": 3.72814528275311e-06, + "loss": 0.8528, "step": 9449 }, { - "epoch": 0.7150694260527412, - "grad_norm": 2.3886454105377197, - "learning_rate": 3.707746469600685e-06, - "loss": 0.5778, + "epoch": 0.665258711721225, + "grad_norm": 1.574150800704956, + "learning_rate": 3.726734343364675e-06, + "loss": 0.7574, "step": 9450 }, { - "epoch": 0.7151450947750747, - "grad_norm": 1.7100965976715088, - "learning_rate": 3.7059249510129392e-06, - "loss": 0.6333, + "epoch": 0.665329109468497, + "grad_norm": 1.5870815515518188, + "learning_rate": 3.725323574156283e-06, + "loss": 0.73, "step": 9451 }, { - "epoch": 0.7152207634974084, - "grad_norm": 2.1997292041778564, - "learning_rate": 3.7041037639572976e-06, - "loss": 0.698, + "epoch": 0.6653995072157691, + "grad_norm": 1.824533462524414, + "learning_rate": 3.7239129752012874e-06, + "loss": 0.6344, "step": 9452 }, { - "epoch": 0.7152964322197419, - "grad_norm": 2.3083553314208984, - "learning_rate": 3.7022829085478066e-06, - "loss": 0.6241, + "epoch": 0.6654699049630411, + "grad_norm": 1.805981993675232, + "learning_rate": 3.7225025465730195e-06, + "loss": 0.6628, "step": 9453 }, { - "epoch": 0.7153721009420756, - "grad_norm": 2.3041303157806396, - "learning_rate": 3.7004623848984977e-06, - "loss": 0.6932, + "epoch": 0.6655403027103133, + "grad_norm": 1.9066455364227295, + "learning_rate": 3.7210922883448193e-06, + "loss": 0.6215, "step": 9454 }, { - "epoch": 0.7154477696644093, - "grad_norm": 1.8793962001800537, - "learning_rate": 3.698642193123373e-06, - "loss": 0.5792, + "epoch": 0.6656107004575853, + "grad_norm": 2.1212494373321533, + "learning_rate": 3.719682200589994e-06, + "loss": 0.7001, "step": 9455 }, { - "epoch": 0.7155234383867428, - "grad_norm": 2.057422161102295, - "learning_rate": 3.696822333336416e-06, - "loss": 0.6926, + "epoch": 0.6656810982048574, + "grad_norm": 1.9196668863296509, + "learning_rate": 3.7182722833818653e-06, + "loss": 0.6521, "step": 9456 }, { - "epoch": 0.7155991071090765, - "grad_norm": 1.9454712867736816, - "learning_rate": 3.695002805651605e-06, - "loss": 0.6216, + "epoch": 0.6657514959521296, + "grad_norm": 2.0645368099212646, + "learning_rate": 3.71686253679373e-06, + "loss": 0.6809, "step": 9457 }, { - "epoch": 0.71567477583141, - "grad_norm": 2.082719326019287, - "learning_rate": 3.693183610182873e-06, - "loss": 0.6695, + "epoch": 0.6658218936994016, + "grad_norm": 1.5834754705429077, + "learning_rate": 3.715452960898887e-06, + "loss": 0.5932, "step": 9458 }, { - "epoch": 0.7157504445537437, - "grad_norm": 3.1850838661193848, - "learning_rate": 3.691364747044147e-06, - "loss": 0.7003, + "epoch": 0.6658922914466737, + "grad_norm": 1.7434378862380981, + "learning_rate": 3.7140435557706167e-06, + "loss": 0.7071, "step": 9459 }, { - "epoch": 0.7158261132760774, - "grad_norm": 2.776329517364502, - "learning_rate": 3.6895462163493316e-06, - "loss": 0.6318, + "epoch": 0.6659626891939457, + "grad_norm": 1.925760269165039, + "learning_rate": 3.712634321482203e-06, + "loss": 0.7043, "step": 9460 }, { - "epoch": 0.7159017819984109, - "grad_norm": 1.9540523290634155, - "learning_rate": 3.6877280182123084e-06, - "loss": 0.7524, + "epoch": 0.6660330869412179, + "grad_norm": 1.6326918601989746, + "learning_rate": 3.7112252581069033e-06, + "loss": 0.7072, "step": 9461 }, { - "epoch": 0.7159774507207446, - "grad_norm": 2.246755599975586, - "learning_rate": 3.6859101527469375e-06, - "loss": 0.6448, + "epoch": 0.66610348468849, + "grad_norm": 1.5848122835159302, + "learning_rate": 3.7098163657179824e-06, + "loss": 0.7213, "step": 9462 }, { - "epoch": 0.7160531194430783, - "grad_norm": 1.626090168952942, - "learning_rate": 3.684092620067062e-06, - "loss": 0.6897, + "epoch": 0.666173882435762, + "grad_norm": 2.198622465133667, + "learning_rate": 3.7084076443886875e-06, + "loss": 0.6907, "step": 9463 }, { - "epoch": 0.7161287881654118, - "grad_norm": 2.5425055027008057, - "learning_rate": 3.6822754202864992e-06, - "loss": 0.7054, + "epoch": 0.6662442801830342, + "grad_norm": 1.8447812795639038, + "learning_rate": 3.7069990941922622e-06, + "loss": 0.6861, "step": 9464 }, { - "epoch": 0.7162044568877455, - "grad_norm": 2.3118436336517334, - "learning_rate": 3.680458553519053e-06, - "loss": 0.6208, + "epoch": 0.6663146779303062, + "grad_norm": 1.9498350620269775, + "learning_rate": 3.7055907152019335e-06, + "loss": 0.6605, "step": 9465 }, { - "epoch": 0.716280125610079, - "grad_norm": 2.1892473697662354, - "learning_rate": 3.678642019878491e-06, - "loss": 0.6901, + "epoch": 0.6663850756775783, + "grad_norm": 2.246990203857422, + "learning_rate": 3.7041825074909325e-06, + "loss": 0.7552, "step": 9466 }, { - "epoch": 0.7163557943324127, - "grad_norm": 1.977307915687561, - "learning_rate": 3.676825819478581e-06, - "loss": 0.7049, + "epoch": 0.6664554734248505, + "grad_norm": 2.06561541557312, + "learning_rate": 3.702774471132462e-06, + "loss": 0.6062, "step": 9467 }, { - "epoch": 0.7164314630547464, - "grad_norm": 2.380528450012207, - "learning_rate": 3.6750099524330575e-06, - "loss": 0.6314, + "epoch": 0.6665258711721225, + "grad_norm": 1.8024877309799194, + "learning_rate": 3.701366606199736e-06, + "loss": 0.62, "step": 9468 }, { - "epoch": 0.7165071317770799, - "grad_norm": 2.11545729637146, - "learning_rate": 3.6731944188556317e-06, - "loss": 0.7005, + "epoch": 0.6665962689193946, + "grad_norm": 1.9804096221923828, + "learning_rate": 3.6999589127659445e-06, + "loss": 0.6381, "step": 9469 }, { - "epoch": 0.7165828004994136, - "grad_norm": 2.1556060314178467, - "learning_rate": 3.6713792188599997e-06, - "loss": 0.6029, + "epoch": 0.6666666666666666, + "grad_norm": 1.9676563739776611, + "learning_rate": 3.6985513909042815e-06, + "loss": 0.5617, "step": 9470 }, { - "epoch": 0.7166584692217471, - "grad_norm": 1.7340325117111206, - "learning_rate": 3.669564352559837e-06, - "loss": 0.6378, + "epoch": 0.6667370644139388, + "grad_norm": 2.255150556564331, + "learning_rate": 3.697144040687918e-06, + "loss": 0.6844, "step": 9471 }, { - "epoch": 0.7167341379440808, - "grad_norm": 1.669938325881958, - "learning_rate": 3.6677498200687934e-06, - "loss": 0.709, + "epoch": 0.6668074621612109, + "grad_norm": 2.004815101623535, + "learning_rate": 3.6957368621900332e-06, + "loss": 0.6601, "step": 9472 }, { - "epoch": 0.7168098066664145, - "grad_norm": 2.10733699798584, - "learning_rate": 3.6659356215005036e-06, - "loss": 0.728, + "epoch": 0.6668778599084829, + "grad_norm": 1.783914566040039, + "learning_rate": 3.694329855483775e-06, + "loss": 0.754, "step": 9473 }, { - "epoch": 0.716885475388748, - "grad_norm": 1.7881940603256226, - "learning_rate": 3.6641217569685783e-06, - "loss": 0.5225, + "epoch": 0.666948257655755, + "grad_norm": 1.7357683181762695, + "learning_rate": 3.692923020642305e-06, + "loss": 0.6352, "step": 9474 }, { - "epoch": 0.7169611441110817, - "grad_norm": 1.5777084827423096, - "learning_rate": 3.6623082265866098e-06, - "loss": 0.5302, + "epoch": 0.6670186554030271, + "grad_norm": 2.2291619777679443, + "learning_rate": 3.6915163577387594e-06, + "loss": 0.6928, "step": 9475 }, { - "epoch": 0.7170368128334154, - "grad_norm": 2.004207134246826, - "learning_rate": 3.66049503046816e-06, - "loss": 0.8483, + "epoch": 0.6670890531502992, + "grad_norm": 1.7138569355010986, + "learning_rate": 3.690109866846277e-06, + "loss": 0.6203, "step": 9476 }, { - "epoch": 0.7171124815557489, - "grad_norm": 1.8468573093414307, - "learning_rate": 3.658682168726779e-06, - "loss": 0.7662, + "epoch": 0.6671594508975712, + "grad_norm": 1.8721575736999512, + "learning_rate": 3.6887035480379772e-06, + "loss": 0.6842, "step": 9477 }, { - "epoch": 0.7171881502780826, - "grad_norm": 1.8332854509353638, - "learning_rate": 3.6568696414760007e-06, - "loss": 0.5098, + "epoch": 0.6672298486448434, + "grad_norm": 1.749845027923584, + "learning_rate": 3.687297401386983e-06, + "loss": 0.6819, "step": 9478 }, { - "epoch": 0.7172638190004161, - "grad_norm": 1.9783474206924438, - "learning_rate": 3.6550574488293284e-06, - "loss": 0.5637, + "epoch": 0.6673002463921155, + "grad_norm": 2.062042713165283, + "learning_rate": 3.685891426966396e-06, + "loss": 0.5885, "step": 9479 }, { - "epoch": 0.7173394877227498, - "grad_norm": 1.8308309316635132, - "learning_rate": 3.6532455909002453e-06, - "loss": 0.7741, + "epoch": 0.6673706441393875, + "grad_norm": 1.974887728691101, + "learning_rate": 3.6844856248493123e-06, + "loss": 0.7419, "step": 9480 }, { - "epoch": 0.7174151564450835, - "grad_norm": 2.1236815452575684, - "learning_rate": 3.6514340678022155e-06, - "loss": 0.6946, + "epoch": 0.6674410418866596, + "grad_norm": 1.9293524026870728, + "learning_rate": 3.683079995108827e-06, + "loss": 0.6923, "step": 9481 }, { - "epoch": 0.717490825167417, - "grad_norm": 2.0192248821258545, - "learning_rate": 3.649622879648684e-06, - "loss": 0.6707, + "epoch": 0.6675114396339317, + "grad_norm": 2.1494898796081543, + "learning_rate": 3.681674537818014e-06, + "loss": 0.762, "step": 9482 }, { - "epoch": 0.7175664938897507, - "grad_norm": 2.7706687450408936, - "learning_rate": 3.647812026553073e-06, - "loss": 0.6171, + "epoch": 0.6675818373812038, + "grad_norm": 1.9716849327087402, + "learning_rate": 3.680269253049949e-06, + "loss": 0.6586, "step": 9483 }, { - "epoch": 0.7176421626120842, - "grad_norm": 2.084230661392212, - "learning_rate": 3.6460015086287838e-06, - "loss": 0.6501, + "epoch": 0.6676522351284759, + "grad_norm": 1.6840028762817383, + "learning_rate": 3.678864140877693e-06, + "loss": 0.8098, "step": 9484 }, { - "epoch": 0.7177178313344179, - "grad_norm": 2.3443830013275146, - "learning_rate": 3.6441913259891964e-06, - "loss": 0.6697, + "epoch": 0.667722632875748, + "grad_norm": 1.8613474369049072, + "learning_rate": 3.6774592013742976e-06, + "loss": 0.6336, "step": 9485 }, { - "epoch": 0.7177935000567516, - "grad_norm": 2.1774091720581055, - "learning_rate": 3.6423814787476756e-06, - "loss": 0.6251, + "epoch": 0.6677930306230201, + "grad_norm": 2.0760087966918945, + "learning_rate": 3.676054434612804e-06, + "loss": 0.5454, "step": 9486 }, { - "epoch": 0.7178691687790851, - "grad_norm": 1.484096646308899, - "learning_rate": 3.640571967017548e-06, - "loss": 0.6349, + "epoch": 0.6678634283702921, + "grad_norm": 1.9296091794967651, + "learning_rate": 3.6746498406662544e-06, + "loss": 0.6302, "step": 9487 }, { - "epoch": 0.7179448375014188, - "grad_norm": 1.9635212421417236, - "learning_rate": 3.638762790912142e-06, - "loss": 0.5859, + "epoch": 0.6679338261175642, + "grad_norm": 1.5954692363739014, + "learning_rate": 3.6732454196076686e-06, + "loss": 0.5939, "step": 9488 }, { - "epoch": 0.7180205062237525, - "grad_norm": 1.664218544960022, - "learning_rate": 3.636953950544753e-06, - "loss": 0.7502, + "epoch": 0.6680042238648364, + "grad_norm": 1.4578001499176025, + "learning_rate": 3.671841171510068e-06, + "loss": 0.834, "step": 9489 }, { - "epoch": 0.718096174946086, - "grad_norm": 1.7915072441101074, - "learning_rate": 3.635145446028651e-06, - "loss": 0.6983, + "epoch": 0.6680746216121084, + "grad_norm": 1.9700286388397217, + "learning_rate": 3.6704370964464604e-06, + "loss": 0.7237, "step": 9490 }, { - "epoch": 0.7181718436684197, - "grad_norm": 2.0278759002685547, - "learning_rate": 3.6333372774770926e-06, - "loss": 0.6947, + "epoch": 0.6681450193593805, + "grad_norm": 1.9430780410766602, + "learning_rate": 3.669033194489842e-06, + "loss": 0.6651, "step": 9491 }, { - "epoch": 0.7182475123907532, - "grad_norm": 1.7070521116256714, - "learning_rate": 3.631529445003309e-06, - "loss": 0.6624, + "epoch": 0.6682154171066526, + "grad_norm": 1.8464229106903076, + "learning_rate": 3.6676294657132033e-06, + "loss": 0.5712, "step": 9492 }, { - "epoch": 0.7183231811130869, - "grad_norm": 1.7862852811813354, - "learning_rate": 3.629721948720522e-06, - "loss": 0.6233, + "epoch": 0.6682858148539247, + "grad_norm": 1.7033932209014893, + "learning_rate": 3.666225910189529e-06, + "loss": 0.6762, "step": 9493 }, { - "epoch": 0.7183988498354206, - "grad_norm": 1.671974778175354, - "learning_rate": 3.6279147887419135e-06, - "loss": 0.7435, + "epoch": 0.6683562126011967, + "grad_norm": 1.6306568384170532, + "learning_rate": 3.6648225279917855e-06, + "loss": 0.7623, "step": 9494 }, { - "epoch": 0.7184745185577541, - "grad_norm": 2.4894473552703857, - "learning_rate": 3.6261079651806546e-06, - "loss": 0.6102, + "epoch": 0.6684266103484688, + "grad_norm": 1.611977458000183, + "learning_rate": 3.6634193191929425e-06, + "loss": 0.6986, "step": 9495 }, { - "epoch": 0.7185501872800878, - "grad_norm": 2.10819673538208, - "learning_rate": 3.624301478149897e-06, - "loss": 0.6282, + "epoch": 0.668497008095741, + "grad_norm": 1.513511300086975, + "learning_rate": 3.662016283865951e-06, + "loss": 0.734, "step": 9496 }, { - "epoch": 0.7186258560024213, - "grad_norm": 2.059037923812866, - "learning_rate": 3.6224953277627686e-06, - "loss": 0.6832, + "epoch": 0.668567405843013, + "grad_norm": 1.7697981595993042, + "learning_rate": 3.660613422083756e-06, + "loss": 0.6212, "step": 9497 }, { - "epoch": 0.718701524724755, - "grad_norm": 2.2662110328674316, - "learning_rate": 3.620689514132375e-06, - "loss": 0.6392, + "epoch": 0.6686378035902851, + "grad_norm": 1.6116514205932617, + "learning_rate": 3.6592107339192894e-06, + "loss": 0.7491, "step": 9498 }, { - "epoch": 0.7187771934470887, - "grad_norm": 1.8977314233779907, - "learning_rate": 3.6188840373718028e-06, - "loss": 0.6685, + "epoch": 0.6687082013375572, + "grad_norm": 1.8408095836639404, + "learning_rate": 3.6578082194454866e-06, + "loss": 0.7408, "step": 9499 }, { - "epoch": 0.7188528621694222, - "grad_norm": 1.8836039304733276, - "learning_rate": 3.617078897594121e-06, - "loss": 0.7153, + "epoch": 0.6687785990848293, + "grad_norm": 2.066254138946533, + "learning_rate": 3.6564058787352583e-06, + "loss": 0.8014, "step": 9500 }, { - "epoch": 0.7189285308917559, - "grad_norm": 1.4642912149429321, - "learning_rate": 3.6152740949123648e-06, - "loss": 0.5909, + "epoch": 0.6688489968321014, + "grad_norm": 2.787003993988037, + "learning_rate": 3.6550037118615195e-06, + "loss": 0.6655, "step": 9501 }, { - "epoch": 0.7190041996140896, - "grad_norm": 1.8887202739715576, - "learning_rate": 3.6134696294395585e-06, - "loss": 0.726, + "epoch": 0.6689193945793734, + "grad_norm": 1.6057971715927124, + "learning_rate": 3.6536017188971673e-06, + "loss": 0.7232, "step": 9502 }, { - "epoch": 0.7190798683364231, - "grad_norm": 2.040818214416504, - "learning_rate": 3.6116655012887122e-06, - "loss": 0.6889, + "epoch": 0.6689897923266456, + "grad_norm": 1.6754474639892578, + "learning_rate": 3.6521998999150913e-06, + "loss": 0.5428, "step": 9503 }, { - "epoch": 0.7191555370587568, - "grad_norm": 1.7351603507995605, - "learning_rate": 3.6098617105727973e-06, - "loss": 0.7366, + "epoch": 0.6690601900739176, + "grad_norm": 1.8186367750167847, + "learning_rate": 3.650798254988173e-06, + "loss": 0.5656, "step": 9504 }, { - "epoch": 0.7192312057810903, - "grad_norm": 1.4561235904693604, - "learning_rate": 3.608058257404776e-06, - "loss": 0.6087, + "epoch": 0.6691305878211897, + "grad_norm": 1.7936228513717651, + "learning_rate": 3.649396784189288e-06, + "loss": 0.6159, "step": 9505 }, { - "epoch": 0.719306874503424, - "grad_norm": 3.0228309631347656, - "learning_rate": 3.606255141897586e-06, - "loss": 0.8051, + "epoch": 0.6692009855684619, + "grad_norm": 2.2544877529144287, + "learning_rate": 3.6479954875912966e-06, + "loss": 0.6732, "step": 9506 }, { - "epoch": 0.7193825432257577, - "grad_norm": 1.730713129043579, - "learning_rate": 3.6044523641641448e-06, - "loss": 0.685, + "epoch": 0.6692713833157339, + "grad_norm": 2.093235731124878, + "learning_rate": 3.646594365267058e-06, + "loss": 0.6031, "step": 9507 }, { - "epoch": 0.7194582119480912, - "grad_norm": 2.0511322021484375, - "learning_rate": 3.6026499243173475e-06, - "loss": 0.7083, + "epoch": 0.669341781063006, + "grad_norm": 1.6977181434631348, + "learning_rate": 3.645193417289416e-06, + "loss": 0.7282, "step": 9508 }, { - "epoch": 0.7195338806704249, - "grad_norm": 2.072368860244751, - "learning_rate": 3.6008478224700685e-06, - "loss": 0.6813, + "epoch": 0.669412178810278, + "grad_norm": 1.627936601638794, + "learning_rate": 3.643792643731206e-06, + "loss": 0.6186, "step": 9509 }, { - "epoch": 0.7196095493927585, - "grad_norm": 1.9627420902252197, - "learning_rate": 3.5990460587351625e-06, - "loss": 0.7139, + "epoch": 0.6694825765575502, + "grad_norm": 1.8520921468734741, + "learning_rate": 3.642392044665254e-06, + "loss": 0.6344, "step": 9510 }, { - "epoch": 0.7196852181150921, - "grad_norm": 2.194145441055298, - "learning_rate": 3.5972446332254646e-06, - "loss": 0.5347, + "epoch": 0.6695529743048222, + "grad_norm": 1.7636120319366455, + "learning_rate": 3.640991620164382e-06, + "loss": 0.6929, "step": 9511 }, { - "epoch": 0.7197608868374258, - "grad_norm": 1.782472848892212, - "learning_rate": 3.595443546053776e-06, - "loss": 0.6956, + "epoch": 0.6696233720520943, + "grad_norm": 1.7518593072891235, + "learning_rate": 3.639591370301396e-06, + "loss": 0.5579, "step": 9512 }, { - "epoch": 0.7198365555597593, - "grad_norm": 1.7157336473464966, - "learning_rate": 3.5936427973328957e-06, - "loss": 0.7086, + "epoch": 0.6696937697993665, + "grad_norm": 1.8623628616333008, + "learning_rate": 3.6381912951491003e-06, + "loss": 0.6431, "step": 9513 }, { - "epoch": 0.719912224282093, - "grad_norm": 2.2964377403259277, - "learning_rate": 3.591842387175593e-06, - "loss": 0.7328, + "epoch": 0.6697641675466385, + "grad_norm": 1.692836046218872, + "learning_rate": 3.6367913947802838e-06, + "loss": 0.6742, "step": 9514 }, { - "epoch": 0.7199878930044267, - "grad_norm": 1.782142996788025, - "learning_rate": 3.590042315694609e-06, - "loss": 0.7607, + "epoch": 0.6698345652939106, + "grad_norm": 1.660165548324585, + "learning_rate": 3.635391669267729e-06, + "loss": 0.6749, "step": 9515 }, { - "epoch": 0.7200635617267602, - "grad_norm": 2.2905352115631104, - "learning_rate": 3.588242583002674e-06, - "loss": 0.6966, + "epoch": 0.6699049630411826, + "grad_norm": 1.7880569696426392, + "learning_rate": 3.6339921186842055e-06, + "loss": 0.6573, "step": 9516 }, { - "epoch": 0.7201392304490939, - "grad_norm": 1.8695130348205566, - "learning_rate": 3.5864431892124913e-06, - "loss": 0.6522, + "epoch": 0.6699753607884548, + "grad_norm": 1.8317365646362305, + "learning_rate": 3.632592743102483e-06, + "loss": 0.6833, "step": 9517 }, { - "epoch": 0.7202148991714274, - "grad_norm": 1.8097631931304932, - "learning_rate": 3.5846441344367456e-06, - "loss": 0.6304, + "epoch": 0.6700457585357269, + "grad_norm": 1.6970726251602173, + "learning_rate": 3.631193542595311e-06, + "loss": 0.7091, "step": 9518 }, { - "epoch": 0.7202905678937611, - "grad_norm": 4.14026403427124, - "learning_rate": 3.5828454187881e-06, - "loss": 0.5817, + "epoch": 0.6701161562829989, + "grad_norm": 1.8068472146987915, + "learning_rate": 3.62979451723544e-06, + "loss": 0.5906, "step": 9519 }, { - "epoch": 0.7203662366160948, - "grad_norm": 1.8867014646530151, - "learning_rate": 3.581047042379195e-06, - "loss": 0.5896, + "epoch": 0.670186554030271, + "grad_norm": 1.910352349281311, + "learning_rate": 3.6283956670956037e-06, + "loss": 0.6304, "step": 9520 }, { - "epoch": 0.7204419053384283, - "grad_norm": 2.1675286293029785, - "learning_rate": 3.579249005322652e-06, - "loss": 0.7204, + "epoch": 0.6702569517775431, + "grad_norm": 1.8509044647216797, + "learning_rate": 3.6269969922485303e-06, + "loss": 0.6171, "step": 9521 }, { - "epoch": 0.720517574060762, - "grad_norm": 2.6543667316436768, - "learning_rate": 3.577451307731071e-06, - "loss": 0.7004, + "epoch": 0.6703273495248152, + "grad_norm": 1.9555388689041138, + "learning_rate": 3.6255984927669363e-06, + "loss": 0.5205, "step": 9522 }, { - "epoch": 0.7205932427830956, - "grad_norm": 2.085458517074585, - "learning_rate": 3.575653949717022e-06, - "loss": 0.645, + "epoch": 0.6703977472720873, + "grad_norm": 1.9409325122833252, + "learning_rate": 3.624200168723535e-06, + "loss": 0.6705, "step": 9523 }, { - "epoch": 0.7206689115054292, - "grad_norm": 2.0255820751190186, - "learning_rate": 3.5738569313930702e-06, - "loss": 0.6807, + "epoch": 0.6704681450193594, + "grad_norm": 1.757805585861206, + "learning_rate": 3.622802020191022e-06, + "loss": 0.8119, "step": 9524 }, { - "epoch": 0.7207445802277629, - "grad_norm": 2.1324872970581055, - "learning_rate": 3.572060252871752e-06, - "loss": 0.6868, + "epoch": 0.6705385427666315, + "grad_norm": 4.313814640045166, + "learning_rate": 3.6214040472420923e-06, + "loss": 0.6612, "step": 9525 }, { - "epoch": 0.7208202489500964, - "grad_norm": 1.980837345123291, - "learning_rate": 3.570263914265572e-06, - "loss": 0.5308, + "epoch": 0.6706089405139035, + "grad_norm": 1.7196035385131836, + "learning_rate": 3.6200062499494267e-06, + "loss": 0.6445, "step": 9526 }, { - "epoch": 0.7208959176724301, - "grad_norm": 2.0542397499084473, - "learning_rate": 3.5684679156870284e-06, - "loss": 0.6264, + "epoch": 0.6706793382611757, + "grad_norm": 1.7752717733383179, + "learning_rate": 3.6186086283856972e-06, + "loss": 0.7024, "step": 9527 }, { - "epoch": 0.7209715863947638, - "grad_norm": 2.3621749877929688, - "learning_rate": 3.5666722572485916e-06, - "loss": 0.7101, + "epoch": 0.6707497360084478, + "grad_norm": 1.9368467330932617, + "learning_rate": 3.6172111826235645e-06, + "loss": 0.7656, "step": 9528 }, { - "epoch": 0.7210472551170973, - "grad_norm": 2.299753189086914, - "learning_rate": 3.564876939062711e-06, - "loss": 0.6421, + "epoch": 0.6708201337557198, + "grad_norm": 1.908431887626648, + "learning_rate": 3.615813912735686e-06, + "loss": 0.6182, "step": 9529 }, { - "epoch": 0.721122923839431, - "grad_norm": 1.9980701208114624, - "learning_rate": 3.5630819612418172e-06, - "loss": 0.7892, + "epoch": 0.6708905315029919, + "grad_norm": 1.8805185556411743, + "learning_rate": 3.6144168187947103e-06, + "loss": 0.6937, "step": 9530 }, { - "epoch": 0.7211985925617646, - "grad_norm": 1.7415344715118408, - "learning_rate": 3.5612873238983153e-06, - "loss": 0.6191, + "epoch": 0.670960929250264, + "grad_norm": 1.5054733753204346, + "learning_rate": 3.6130199008732695e-06, + "loss": 0.6798, "step": 9531 }, { - "epoch": 0.7212742612840982, - "grad_norm": 2.2024734020233154, - "learning_rate": 3.5594930271445946e-06, - "loss": 0.6404, + "epoch": 0.6710313269975361, + "grad_norm": 1.8607593774795532, + "learning_rate": 3.6116231590439916e-06, + "loss": 0.7082, "step": 9532 }, { - "epoch": 0.7213499300064319, - "grad_norm": 2.5525379180908203, - "learning_rate": 3.557699071093012e-06, - "loss": 0.7167, + "epoch": 0.6711017247448081, + "grad_norm": 1.6252061128616333, + "learning_rate": 3.6102265933794928e-06, + "loss": 0.5119, "step": 9533 }, { - "epoch": 0.7214255987287654, - "grad_norm": 2.1622018814086914, - "learning_rate": 3.5559054558559193e-06, - "loss": 0.6134, + "epoch": 0.6711721224920802, + "grad_norm": 1.8360724449157715, + "learning_rate": 3.6088302039523853e-06, + "loss": 0.641, "step": 9534 }, { - "epoch": 0.7215012674510991, - "grad_norm": 2.7653920650482178, - "learning_rate": 3.5541121815456345e-06, - "loss": 0.6996, + "epoch": 0.6712425202393524, + "grad_norm": 1.9635742902755737, + "learning_rate": 3.607433990835264e-06, + "loss": 0.6628, "step": 9535 }, { - "epoch": 0.7215769361734327, - "grad_norm": 2.7878835201263428, - "learning_rate": 3.5523192482744618e-06, - "loss": 0.6121, + "epoch": 0.6713129179866244, + "grad_norm": 2.2618408203125, + "learning_rate": 3.6060379541007246e-06, + "loss": 0.6885, "step": 9536 }, { - "epoch": 0.7216526048957663, - "grad_norm": 1.8439620733261108, - "learning_rate": 3.5505266561546753e-06, - "loss": 0.6619, + "epoch": 0.6713833157338965, + "grad_norm": 2.410571336746216, + "learning_rate": 3.6046420938213457e-06, + "loss": 0.688, "step": 9537 }, { - "epoch": 0.7217282736181, - "grad_norm": 2.6026670932769775, - "learning_rate": 3.5487344052985323e-06, - "loss": 0.7525, + "epoch": 0.6714537134811686, + "grad_norm": 1.7200067043304443, + "learning_rate": 3.6032464100696995e-06, + "loss": 0.6093, "step": 9538 }, { - "epoch": 0.7218039423404335, - "grad_norm": 2.118645191192627, - "learning_rate": 3.5469424958182783e-06, - "loss": 0.7478, + "epoch": 0.6715241112284407, + "grad_norm": 2.1232666969299316, + "learning_rate": 3.601850902918346e-06, + "loss": 0.6745, "step": 9539 }, { - "epoch": 0.7218796110627672, - "grad_norm": 1.8370145559310913, - "learning_rate": 3.5451509278261196e-06, - "loss": 0.7044, + "epoch": 0.6715945089757128, + "grad_norm": 1.7412270307540894, + "learning_rate": 3.6004555724398446e-06, + "loss": 0.7165, "step": 9540 }, { - "epoch": 0.7219552797851009, - "grad_norm": 2.0463836193084717, - "learning_rate": 3.543359701434254e-06, - "loss": 0.6612, + "epoch": 0.6716649067229848, + "grad_norm": 1.8341511487960815, + "learning_rate": 3.5990604187067336e-06, + "loss": 0.6809, "step": 9541 }, { - "epoch": 0.7220309485074344, - "grad_norm": 1.8631353378295898, - "learning_rate": 3.5415688167548513e-06, - "loss": 0.7544, + "epoch": 0.671735304470257, + "grad_norm": 1.9009373188018799, + "learning_rate": 3.5976654417915546e-06, + "loss": 0.6056, "step": 9542 }, { - "epoch": 0.7221066172297681, - "grad_norm": 2.5322537422180176, - "learning_rate": 3.5397782739000647e-06, - "loss": 0.7171, + "epoch": 0.671805702217529, + "grad_norm": 1.619065761566162, + "learning_rate": 3.5962706417668307e-06, + "loss": 0.6084, "step": 9543 }, { - "epoch": 0.7221822859521017, - "grad_norm": 1.8810664415359497, - "learning_rate": 3.5379880729820227e-06, - "loss": 0.5414, + "epoch": 0.6718760999648011, + "grad_norm": 1.690170168876648, + "learning_rate": 3.5948760187050784e-06, + "loss": 0.6679, "step": 9544 }, { - "epoch": 0.7222579546744353, - "grad_norm": 2.0911777019500732, - "learning_rate": 3.536198214112834e-06, - "loss": 0.6315, + "epoch": 0.6719464977120733, + "grad_norm": 2.2986021041870117, + "learning_rate": 3.5934815726788034e-06, + "loss": 0.6715, "step": 9545 }, { - "epoch": 0.722333623396769, - "grad_norm": 2.1385138034820557, - "learning_rate": 3.534408697404588e-06, - "loss": 0.7681, + "epoch": 0.6720168954593453, + "grad_norm": 1.7641257047653198, + "learning_rate": 3.592087303760509e-06, + "loss": 0.6538, "step": 9546 }, { - "epoch": 0.7224092921191025, - "grad_norm": 2.2795066833496094, - "learning_rate": 3.5326195229693447e-06, - "loss": 0.7531, + "epoch": 0.6720872932066174, + "grad_norm": 1.7836750745773315, + "learning_rate": 3.590693212022679e-06, + "loss": 0.614, "step": 9547 }, { - "epoch": 0.7224849608414362, - "grad_norm": 2.068978786468506, - "learning_rate": 3.5308306909191467e-06, - "loss": 0.6324, + "epoch": 0.6721576909538894, + "grad_norm": 1.4622468948364258, + "learning_rate": 3.5892992975378e-06, + "loss": 0.6495, "step": 9548 }, { - "epoch": 0.7225606295637698, - "grad_norm": 2.281078577041626, - "learning_rate": 3.5290422013660234e-06, - "loss": 0.751, + "epoch": 0.6722280887011616, + "grad_norm": 1.8348771333694458, + "learning_rate": 3.587905560378339e-06, + "loss": 0.674, "step": 9549 }, { - "epoch": 0.7226362982861034, - "grad_norm": 2.842747449874878, - "learning_rate": 3.5272540544219766e-06, - "loss": 0.5912, + "epoch": 0.6722984864484336, + "grad_norm": 1.719138741493225, + "learning_rate": 3.586512000616758e-06, + "loss": 0.59, "step": 9550 }, { - "epoch": 0.7227119670084371, - "grad_norm": 1.6664164066314697, - "learning_rate": 3.5254662501989788e-06, - "loss": 0.5474, + "epoch": 0.6723688841957057, + "grad_norm": 2.1608736515045166, + "learning_rate": 3.5851186183255065e-06, + "loss": 0.6208, "step": 9551 }, { - "epoch": 0.7227876357307707, - "grad_norm": 2.199381113052368, - "learning_rate": 3.5236787888089905e-06, - "loss": 0.6749, + "epoch": 0.6724392819429779, + "grad_norm": 1.814595103263855, + "learning_rate": 3.583725413577034e-06, + "loss": 0.5974, "step": 9552 }, { - "epoch": 0.7228633044531043, - "grad_norm": 1.8498841524124146, - "learning_rate": 3.5218916703639495e-06, - "loss": 0.6779, + "epoch": 0.6725096796902499, + "grad_norm": 1.994049072265625, + "learning_rate": 3.5823323864437686e-06, + "loss": 0.6678, "step": 9553 }, { - "epoch": 0.722938973175438, - "grad_norm": 2.030409336090088, - "learning_rate": 3.5201048949757702e-06, - "loss": 0.7235, + "epoch": 0.672580077437522, + "grad_norm": 1.7073501348495483, + "learning_rate": 3.5809395369981395e-06, + "loss": 0.5204, "step": 9554 }, { - "epoch": 0.7230146418977715, - "grad_norm": 2.3380861282348633, - "learning_rate": 3.5183184627563463e-06, - "loss": 0.6836, + "epoch": 0.672650475184794, + "grad_norm": 2.0833985805511475, + "learning_rate": 3.5795468653125592e-06, + "loss": 0.8357, "step": 9555 }, { - "epoch": 0.7230903106201052, - "grad_norm": 2.5878119468688965, - "learning_rate": 3.5165323738175504e-06, - "loss": 0.6228, + "epoch": 0.6727208729320662, + "grad_norm": 1.846512794494629, + "learning_rate": 3.57815437145944e-06, + "loss": 0.6764, "step": 9556 }, { - "epoch": 0.7231659793424388, - "grad_norm": 2.390239953994751, - "learning_rate": 3.514746628271236e-06, - "loss": 0.4656, + "epoch": 0.6727912706793383, + "grad_norm": 2.240967035293579, + "learning_rate": 3.5767620555111673e-06, + "loss": 0.6787, "step": 9557 }, { - "epoch": 0.7232416480647724, - "grad_norm": 2.0692977905273438, - "learning_rate": 3.512961226229227e-06, - "loss": 0.6703, + "epoch": 0.6728616684266103, + "grad_norm": 1.6403827667236328, + "learning_rate": 3.57536991754014e-06, + "loss": 0.7156, "step": 9558 }, { - "epoch": 0.7233173167871061, - "grad_norm": 2.3725948333740234, - "learning_rate": 3.511176167803329e-06, - "loss": 0.665, + "epoch": 0.6729320661738825, + "grad_norm": 1.5792381763458252, + "learning_rate": 3.5739779576187283e-06, + "loss": 0.6447, "step": 9559 }, { - "epoch": 0.7233929855094396, - "grad_norm": 2.082200288772583, - "learning_rate": 3.509391453105339e-06, - "loss": 0.6218, + "epoch": 0.6730024639211545, + "grad_norm": 2.1986870765686035, + "learning_rate": 3.5725861758193085e-06, + "loss": 0.6905, "step": 9560 }, { - "epoch": 0.7234686542317733, - "grad_norm": 1.8738415241241455, - "learning_rate": 3.5076070822470115e-06, - "loss": 0.7547, + "epoch": 0.6730728616684266, + "grad_norm": 1.9966109991073608, + "learning_rate": 3.5711945722142354e-06, + "loss": 0.572, "step": 9561 }, { - "epoch": 0.7235443229541069, - "grad_norm": 2.0773375034332275, - "learning_rate": 3.5058230553400937e-06, - "loss": 0.6218, + "epoch": 0.6731432594156987, + "grad_norm": 1.5639630556106567, + "learning_rate": 3.569803146875866e-06, + "loss": 0.6173, "step": 9562 }, { - "epoch": 0.7236199916764405, - "grad_norm": 2.684323310852051, - "learning_rate": 3.504039372496306e-06, - "loss": 0.6819, + "epoch": 0.6732136571629708, + "grad_norm": 1.6846318244934082, + "learning_rate": 3.568411899876533e-06, + "loss": 0.5936, "step": 9563 }, { - "epoch": 0.7236956603987742, - "grad_norm": 2.242973804473877, - "learning_rate": 3.502256033827349e-06, - "loss": 0.7311, + "epoch": 0.6732840549102429, + "grad_norm": 1.6655956506729126, + "learning_rate": 3.5670208312885754e-06, + "loss": 0.6481, "step": 9564 }, { - "epoch": 0.7237713291211078, - "grad_norm": 1.914873480796814, - "learning_rate": 3.5004730394449014e-06, - "loss": 0.6217, + "epoch": 0.6733544526575149, + "grad_norm": 1.6137974262237549, + "learning_rate": 3.565629941184312e-06, + "loss": 0.6987, "step": 9565 }, { - "epoch": 0.7238469978434414, - "grad_norm": 3.037616729736328, - "learning_rate": 3.498690389460619e-06, - "loss": 0.7967, + "epoch": 0.6734248504047871, + "grad_norm": 2.266724109649658, + "learning_rate": 3.5642392296360603e-06, + "loss": 0.6172, "step": 9566 }, { - "epoch": 0.7239226665657751, - "grad_norm": 1.9221965074539185, - "learning_rate": 3.4969080839861388e-06, - "loss": 0.6185, + "epoch": 0.6734952481520592, + "grad_norm": 1.5917441844940186, + "learning_rate": 3.5628486967161194e-06, + "loss": 0.6102, "step": 9567 }, { - "epoch": 0.7239983352881086, - "grad_norm": 1.7986969947814941, - "learning_rate": 3.495126123133075e-06, - "loss": 0.5751, + "epoch": 0.6735656458993312, + "grad_norm": 1.7653508186340332, + "learning_rate": 3.5614583424967946e-06, + "loss": 0.6588, "step": 9568 }, { - "epoch": 0.7240740040104423, - "grad_norm": 2.0456697940826416, - "learning_rate": 3.4933445070130137e-06, - "loss": 0.7111, + "epoch": 0.6736360436466033, + "grad_norm": 2.4729971885681152, + "learning_rate": 3.5600681670503576e-06, + "loss": 0.6465, "step": 9569 }, { - "epoch": 0.7241496727327759, - "grad_norm": 2.568084716796875, - "learning_rate": 3.4915632357375322e-06, - "loss": 0.7023, + "epoch": 0.6737064413938754, + "grad_norm": 1.6277023553848267, + "learning_rate": 3.5586781704490957e-06, + "loss": 0.643, "step": 9570 }, { - "epoch": 0.7242253414551095, - "grad_norm": 1.8491854667663574, - "learning_rate": 3.489782309418181e-06, - "loss": 0.5003, + "epoch": 0.6737768391411475, + "grad_norm": 1.7675042152404785, + "learning_rate": 3.55728835276527e-06, + "loss": 0.6329, "step": 9571 }, { - "epoch": 0.7243010101774432, - "grad_norm": 2.522088050842285, - "learning_rate": 3.4880017281664807e-06, - "loss": 0.6625, + "epoch": 0.6738472368884195, + "grad_norm": 1.6642038822174072, + "learning_rate": 3.5558987140711426e-06, + "loss": 0.6726, "step": 9572 }, { - "epoch": 0.7243766788997767, - "grad_norm": 2.015510082244873, - "learning_rate": 3.4862214920939396e-06, - "loss": 0.6182, + "epoch": 0.6739176346356917, + "grad_norm": 1.9192132949829102, + "learning_rate": 3.5545092544389574e-06, + "loss": 0.7024, "step": 9573 }, { - "epoch": 0.7244523476221104, - "grad_norm": 1.839280366897583, - "learning_rate": 3.4844416013120436e-06, - "loss": 0.6601, + "epoch": 0.6739880323829638, + "grad_norm": 1.8174347877502441, + "learning_rate": 3.553119973940963e-06, + "loss": 0.5683, "step": 9574 }, { - "epoch": 0.724528016344444, - "grad_norm": 1.9909266233444214, - "learning_rate": 3.4826620559322523e-06, - "loss": 0.7079, + "epoch": 0.6740584301302358, + "grad_norm": 1.9017715454101562, + "learning_rate": 3.5517308726493765e-06, + "loss": 0.676, "step": 9575 }, { - "epoch": 0.7246036850667776, - "grad_norm": 2.2563157081604004, - "learning_rate": 3.480882856066009e-06, - "loss": 0.5589, + "epoch": 0.6741288278775079, + "grad_norm": 1.959851861000061, + "learning_rate": 3.5503419506364276e-06, + "loss": 0.6248, "step": 9576 }, { - "epoch": 0.7246793537891113, - "grad_norm": 2.3766355514526367, - "learning_rate": 3.4791040018247334e-06, - "loss": 0.6712, + "epoch": 0.67419922562478, + "grad_norm": 1.7759286165237427, + "learning_rate": 3.5489532079743225e-06, + "loss": 0.6489, "step": 9577 }, { - "epoch": 0.7247550225114449, - "grad_norm": 2.32324481010437, - "learning_rate": 3.477325493319824e-06, - "loss": 0.7717, + "epoch": 0.6742696233720521, + "grad_norm": 1.7180191278457642, + "learning_rate": 3.5475646447352684e-06, + "loss": 0.6015, "step": 9578 }, { - "epoch": 0.7248306912337785, - "grad_norm": 1.881474256515503, - "learning_rate": 3.4755473306626482e-06, - "loss": 0.6536, + "epoch": 0.6743400211193242, + "grad_norm": 2.043351411819458, + "learning_rate": 3.546176260991452e-06, + "loss": 0.6129, "step": 9579 }, { - "epoch": 0.7249063599561122, - "grad_norm": 2.3308231830596924, - "learning_rate": 3.4737695139645697e-06, - "loss": 0.6384, + "epoch": 0.6744104188665963, + "grad_norm": 2.4942402839660645, + "learning_rate": 3.5447880568150623e-06, + "loss": 0.7096, "step": 9580 }, { - "epoch": 0.7249820286784457, - "grad_norm": 2.173731803894043, - "learning_rate": 3.471992043336919e-06, - "loss": 0.6587, + "epoch": 0.6744808166138684, + "grad_norm": 1.7417913675308228, + "learning_rate": 3.5434000322782714e-06, + "loss": 0.6306, "step": 9581 }, { - "epoch": 0.7250576974007794, - "grad_norm": 2.35199236869812, - "learning_rate": 3.4702149188910087e-06, - "loss": 0.6212, + "epoch": 0.6745512143611404, + "grad_norm": 1.631111741065979, + "learning_rate": 3.54201218745324e-06, + "loss": 0.6692, "step": 9582 }, { - "epoch": 0.725133366123113, - "grad_norm": 2.924612522125244, - "learning_rate": 3.468438140738123e-06, - "loss": 0.7118, + "epoch": 0.6746216121084125, + "grad_norm": 1.8886805772781372, + "learning_rate": 3.5406245224121296e-06, + "loss": 0.6756, "step": 9583 }, { - "epoch": 0.7252090348454466, - "grad_norm": 2.093873977661133, - "learning_rate": 3.46666170898953e-06, - "loss": 0.6152, + "epoch": 0.6746920098556847, + "grad_norm": 2.2180871963500977, + "learning_rate": 3.5392370372270814e-06, + "loss": 0.6954, "step": 9584 }, { - "epoch": 0.7252847035677803, - "grad_norm": 2.7379560470581055, - "learning_rate": 3.4648856237564827e-06, - "loss": 0.7422, + "epoch": 0.6747624076029567, + "grad_norm": 1.5210446119308472, + "learning_rate": 3.5378497319702355e-06, + "loss": 0.6014, "step": 9585 }, { - "epoch": 0.7253603722901139, - "grad_norm": 2.0272998809814453, - "learning_rate": 3.463109885150198e-06, - "loss": 0.6245, + "epoch": 0.6748328053502288, + "grad_norm": 1.8090317249298096, + "learning_rate": 3.5364626067137185e-06, + "loss": 0.6126, "step": 9586 }, { - "epoch": 0.7254360410124475, - "grad_norm": 2.0299673080444336, - "learning_rate": 3.4613344932818797e-06, - "loss": 0.7292, + "epoch": 0.6749032030975008, + "grad_norm": 1.8884564638137817, + "learning_rate": 3.535075661529646e-06, + "loss": 0.5968, "step": 9587 }, { - "epoch": 0.7255117097347811, - "grad_norm": 2.6118695735931396, - "learning_rate": 3.459559448262711e-06, - "loss": 0.6669, + "epoch": 0.674973600844773, + "grad_norm": 1.5763615369796753, + "learning_rate": 3.533688896490126e-06, + "loss": 0.5913, "step": 9588 }, { - "epoch": 0.7255873784571147, - "grad_norm": 1.9721378087997437, - "learning_rate": 3.457784750203849e-06, - "loss": 0.6908, + "epoch": 0.675043998592045, + "grad_norm": 1.7451555728912354, + "learning_rate": 3.532302311667262e-06, + "loss": 0.7158, "step": 9589 }, { - "epoch": 0.7256630471794484, - "grad_norm": 2.455974817276001, - "learning_rate": 3.456010399216431e-06, - "loss": 0.8746, + "epoch": 0.6751143963393171, + "grad_norm": 1.7790619134902954, + "learning_rate": 3.5309159071331393e-06, + "loss": 0.6961, "step": 9590 }, { - "epoch": 0.725738715901782, - "grad_norm": 1.8864761590957642, - "learning_rate": 3.454236395411574e-06, - "loss": 0.6697, + "epoch": 0.6751847940865893, + "grad_norm": 1.7640100717544556, + "learning_rate": 3.5295296829598426e-06, + "loss": 0.681, "step": 9591 }, { - "epoch": 0.7258143846241156, - "grad_norm": 1.9574358463287354, - "learning_rate": 3.4524627389003745e-06, - "loss": 0.7325, + "epoch": 0.6752551918338613, + "grad_norm": 1.6927580833435059, + "learning_rate": 3.5281436392194406e-06, + "loss": 0.6427, "step": 9592 }, { - "epoch": 0.7258900533464493, - "grad_norm": 1.9608203172683716, - "learning_rate": 3.450689429793897e-06, - "loss": 0.7059, + "epoch": 0.6753255895811334, + "grad_norm": 2.0235867500305176, + "learning_rate": 3.526757775983995e-06, + "loss": 0.623, "step": 9593 }, { - "epoch": 0.7259657220687828, - "grad_norm": 2.011075496673584, - "learning_rate": 3.4489164682031966e-06, - "loss": 0.6755, + "epoch": 0.6753959873284054, + "grad_norm": 1.9084289073944092, + "learning_rate": 3.5253720933255555e-06, + "loss": 0.6644, "step": 9594 }, { - "epoch": 0.7260413907911165, - "grad_norm": 1.925155520439148, - "learning_rate": 3.4471438542392987e-06, - "loss": 0.8509, + "epoch": 0.6754663850756776, + "grad_norm": 1.7594949007034302, + "learning_rate": 3.523986591316171e-06, + "loss": 0.7612, "step": 9595 }, { - "epoch": 0.7261170595134501, - "grad_norm": 1.9459024667739868, - "learning_rate": 3.4453715880132183e-06, - "loss": 0.6895, + "epoch": 0.6755367828229497, + "grad_norm": 2.141514539718628, + "learning_rate": 3.5226012700278688e-06, + "loss": 0.7439, "step": 9596 }, { - "epoch": 0.7261927282357837, - "grad_norm": 1.7830241918563843, - "learning_rate": 3.4435996696359328e-06, - "loss": 0.6713, + "epoch": 0.6756071805702217, + "grad_norm": 1.9407038688659668, + "learning_rate": 3.521216129532678e-06, + "loss": 0.6718, "step": 9597 }, { - "epoch": 0.7262683969581174, - "grad_norm": 2.3200533390045166, - "learning_rate": 3.441828099218406e-06, - "loss": 0.7796, + "epoch": 0.6756775783174939, + "grad_norm": 2.1379191875457764, + "learning_rate": 3.5198311699026115e-06, + "loss": 0.7543, "step": 9598 }, { - "epoch": 0.726344065680451, - "grad_norm": 2.053757429122925, - "learning_rate": 3.4400568768715827e-06, - "loss": 0.6481, + "epoch": 0.6757479760647659, + "grad_norm": 1.8247119188308716, + "learning_rate": 3.5184463912096744e-06, + "loss": 0.6819, "step": 9599 }, { - "epoch": 0.7264197344027846, - "grad_norm": 2.283618688583374, - "learning_rate": 3.4382860027063798e-06, - "loss": 0.7214, + "epoch": 0.675818373812038, + "grad_norm": 1.7505089044570923, + "learning_rate": 3.51706179352586e-06, + "loss": 0.6486, "step": 9600 }, { - "epoch": 0.7264954031251182, - "grad_norm": 2.036465644836426, - "learning_rate": 3.436515476833696e-06, - "loss": 0.6602, + "epoch": 0.6758887715593102, + "grad_norm": 1.8196829557418823, + "learning_rate": 3.5156773769231598e-06, + "loss": 0.8067, "step": 9601 }, { - "epoch": 0.7265710718474518, - "grad_norm": 1.9989351034164429, - "learning_rate": 3.434745299364408e-06, - "loss": 0.6376, + "epoch": 0.6759591693065822, + "grad_norm": 2.3212599754333496, + "learning_rate": 3.514293141473546e-06, + "loss": 0.7312, "step": 9602 }, { - "epoch": 0.7266467405697855, - "grad_norm": 1.9617687463760376, - "learning_rate": 3.4329754704093725e-06, - "loss": 0.5082, + "epoch": 0.6760295670538543, + "grad_norm": 1.6652593612670898, + "learning_rate": 3.5129090872489903e-06, + "loss": 0.6489, "step": 9603 }, { - "epoch": 0.7267224092921191, - "grad_norm": 2.9853837490081787, - "learning_rate": 3.431205990079416e-06, - "loss": 0.7962, + "epoch": 0.6760999648011263, + "grad_norm": 1.572129726409912, + "learning_rate": 3.5115252143214502e-06, + "loss": 0.7139, "step": 9604 }, { - "epoch": 0.7267980780144527, - "grad_norm": 2.0819427967071533, - "learning_rate": 3.4294368584853484e-06, - "loss": 0.6982, + "epoch": 0.6761703625483985, + "grad_norm": 2.000436305999756, + "learning_rate": 3.510141522762873e-06, + "loss": 0.7092, "step": 9605 }, { - "epoch": 0.7268737467367864, - "grad_norm": 2.134868621826172, - "learning_rate": 3.4276680757379687e-06, - "loss": 0.6123, + "epoch": 0.6762407602956705, + "grad_norm": 1.6774673461914062, + "learning_rate": 3.508758012645196e-06, + "loss": 0.6132, "step": 9606 }, { - "epoch": 0.72694941545912, - "grad_norm": 2.1825947761535645, - "learning_rate": 3.425899641948035e-06, - "loss": 0.7086, + "epoch": 0.6763111580429426, + "grad_norm": 1.796303391456604, + "learning_rate": 3.507374684040355e-06, + "loss": 0.635, "step": 9607 }, { - "epoch": 0.7270250841814536, - "grad_norm": 2.3707220554351807, - "learning_rate": 3.4241315572262933e-06, - "loss": 0.8065, + "epoch": 0.6763815557902148, + "grad_norm": 1.9706754684448242, + "learning_rate": 3.5059915370202645e-06, + "loss": 0.5958, "step": 9608 }, { - "epoch": 0.7271007529037872, - "grad_norm": 1.2709568738937378, - "learning_rate": 3.4223638216834683e-06, - "loss": 0.8368, + "epoch": 0.6764519535374868, + "grad_norm": 1.7144855260849, + "learning_rate": 3.5046085716568403e-06, + "loss": 0.6136, "step": 9609 }, { - "epoch": 0.7271764216261208, - "grad_norm": 2.1164627075195312, - "learning_rate": 3.4205964354302608e-06, - "loss": 0.7194, + "epoch": 0.6765223512847589, + "grad_norm": 1.8843894004821777, + "learning_rate": 3.5032257880219828e-06, + "loss": 0.7056, "step": 9610 }, { - "epoch": 0.7272520903484545, - "grad_norm": 1.7704885005950928, - "learning_rate": 3.4188293985773507e-06, - "loss": 0.6807, + "epoch": 0.6765927490320309, + "grad_norm": 1.7151457071304321, + "learning_rate": 3.501843186187583e-06, + "loss": 0.6977, "step": 9611 }, { - "epoch": 0.7273277590707881, - "grad_norm": 1.7768155336380005, - "learning_rate": 3.417062711235396e-06, - "loss": 0.6752, + "epoch": 0.6766631467793031, + "grad_norm": 1.6163272857666016, + "learning_rate": 3.500460766225521e-06, + "loss": 0.7208, "step": 9612 }, { - "epoch": 0.7274034277931217, - "grad_norm": 1.9417698383331299, - "learning_rate": 3.415296373515031e-06, - "loss": 0.7535, + "epoch": 0.6767335445265752, + "grad_norm": 2.081005573272705, + "learning_rate": 3.499078528207677e-06, + "loss": 0.6829, "step": 9613 }, { - "epoch": 0.7274790965154553, - "grad_norm": 2.049741506576538, - "learning_rate": 3.413530385526874e-06, - "loss": 0.7368, + "epoch": 0.6768039422738472, + "grad_norm": 1.6378560066223145, + "learning_rate": 3.4976964722059077e-06, + "loss": 0.6065, "step": 9614 }, { - "epoch": 0.727554765237789, - "grad_norm": 1.6550544500350952, - "learning_rate": 3.411764747381506e-06, - "loss": 0.6998, + "epoch": 0.6768743400211193, + "grad_norm": 2.2524969577789307, + "learning_rate": 3.4963145982920727e-06, + "loss": 0.7096, "step": 9615 }, { - "epoch": 0.7276304339601226, - "grad_norm": 1.9627418518066406, - "learning_rate": 3.409999459189508e-06, - "loss": 0.6864, + "epoch": 0.6769447377683914, + "grad_norm": 1.9056650400161743, + "learning_rate": 3.494932906538016e-06, + "loss": 0.6171, "step": 9616 }, { - "epoch": 0.7277061026824562, - "grad_norm": 2.080371379852295, - "learning_rate": 3.4082345210614273e-06, - "loss": 0.6129, + "epoch": 0.6770151355156635, + "grad_norm": 1.7184414863586426, + "learning_rate": 3.493551397015571e-06, + "loss": 0.7057, "step": 9617 }, { - "epoch": 0.7277817714047898, - "grad_norm": 1.9414567947387695, - "learning_rate": 3.406469933107783e-06, - "loss": 0.6578, + "epoch": 0.6770855332629356, + "grad_norm": 2.5760021209716797, + "learning_rate": 3.492170069796563e-06, + "loss": 0.5984, "step": 9618 }, { - "epoch": 0.7278574401271235, - "grad_norm": 2.097715139389038, - "learning_rate": 3.404705695439083e-06, - "loss": 0.6798, + "epoch": 0.6771559310102077, + "grad_norm": 1.7121531963348389, + "learning_rate": 3.490788924952811e-06, + "loss": 0.6561, "step": 9619 }, { - "epoch": 0.7279331088494571, - "grad_norm": 2.0292246341705322, - "learning_rate": 3.40294180816581e-06, - "loss": 0.6346, + "epoch": 0.6772263287574798, + "grad_norm": 1.803931713104248, + "learning_rate": 3.489407962556119e-06, + "loss": 0.6112, "step": 9620 }, { - "epoch": 0.7280087775717907, - "grad_norm": 2.0286881923675537, - "learning_rate": 3.401178271398425e-06, - "loss": 0.7645, + "epoch": 0.6772967265047518, + "grad_norm": 1.6917119026184082, + "learning_rate": 3.488027182678289e-06, + "loss": 0.6877, "step": 9621 }, { - "epoch": 0.7280844462941243, - "grad_norm": 2.190192461013794, - "learning_rate": 3.3994150852473645e-06, - "loss": 0.6803, + "epoch": 0.6773671242520239, + "grad_norm": 2.1594979763031006, + "learning_rate": 3.486646585391107e-06, + "loss": 0.6299, "step": 9622 }, { - "epoch": 0.7281601150164579, - "grad_norm": 2.6516058444976807, - "learning_rate": 3.3976522498230454e-06, - "loss": 0.9133, + "epoch": 0.6774375219992961, + "grad_norm": 3.835920572280884, + "learning_rate": 3.48526617076635e-06, + "loss": 0.6214, "step": 9623 }, { - "epoch": 0.7282357837387916, - "grad_norm": 1.7994333505630493, - "learning_rate": 3.395889765235864e-06, - "loss": 0.7207, + "epoch": 0.6775079197465681, + "grad_norm": 1.6313978433609009, + "learning_rate": 3.483885938875786e-06, + "loss": 0.5816, "step": 9624 }, { - "epoch": 0.7283114524611252, - "grad_norm": 2.710233211517334, - "learning_rate": 3.3941276315961903e-06, - "loss": 0.6214, + "epoch": 0.6775783174938402, + "grad_norm": 1.709115982055664, + "learning_rate": 3.482505889791179e-06, + "loss": 0.6439, "step": 9625 }, { - "epoch": 0.7283871211834588, - "grad_norm": 2.217609167098999, - "learning_rate": 3.3923658490143767e-06, - "loss": 0.6707, + "epoch": 0.6776487152411123, + "grad_norm": 1.9716651439666748, + "learning_rate": 3.4811260235842737e-06, + "loss": 0.5988, "step": 9626 }, { - "epoch": 0.7284627899057925, - "grad_norm": 2.534865379333496, - "learning_rate": 3.3906044176007505e-06, - "loss": 0.8433, + "epoch": 0.6777191129883844, + "grad_norm": 1.5748286247253418, + "learning_rate": 3.4797463403268157e-06, + "loss": 0.6174, "step": 9627 }, { - "epoch": 0.728538458628126, - "grad_norm": 2.2182860374450684, - "learning_rate": 3.3888433374656217e-06, - "loss": 0.7009, + "epoch": 0.6777895107356564, + "grad_norm": 1.9641507863998413, + "learning_rate": 3.478366840090533e-06, + "loss": 0.6879, "step": 9628 }, { - "epoch": 0.7286141273504597, - "grad_norm": 2.057269811630249, - "learning_rate": 3.387082608719268e-06, - "loss": 0.7962, + "epoch": 0.6778599084829285, + "grad_norm": 1.9443857669830322, + "learning_rate": 3.4769875229471486e-06, + "loss": 0.6406, "step": 9629 }, { - "epoch": 0.7286897960727933, - "grad_norm": 2.081799268722534, - "learning_rate": 3.385322231471954e-06, - "loss": 0.6249, + "epoch": 0.6779303062302007, + "grad_norm": 2.7079031467437744, + "learning_rate": 3.4756083889683706e-06, + "loss": 0.7148, "step": 9630 }, { - "epoch": 0.7287654647951269, - "grad_norm": 2.1988329887390137, - "learning_rate": 3.383562205833927e-06, - "loss": 0.8234, + "epoch": 0.6780007039774727, + "grad_norm": 1.655976414680481, + "learning_rate": 3.474229438225904e-06, + "loss": 0.5471, "step": 9631 }, { - "epoch": 0.7288411335174606, - "grad_norm": 2.533674716949463, - "learning_rate": 3.381802531915398e-06, - "loss": 0.6977, + "epoch": 0.6780711017247448, + "grad_norm": 1.9969831705093384, + "learning_rate": 3.472850670791445e-06, + "loss": 0.7403, "step": 9632 }, { - "epoch": 0.7289168022397942, - "grad_norm": 1.9693000316619873, - "learning_rate": 3.380043209826566e-06, - "loss": 0.5226, + "epoch": 0.6781414994720169, + "grad_norm": 1.840749740600586, + "learning_rate": 3.471472086736674e-06, + "loss": 0.6711, "step": 9633 }, { - "epoch": 0.7289924709621278, - "grad_norm": 2.4341700077056885, - "learning_rate": 3.3782842396776048e-06, - "loss": 0.6874, + "epoch": 0.678211897219289, + "grad_norm": 1.9854642152786255, + "learning_rate": 3.4700936861332656e-06, + "loss": 0.6954, "step": 9634 }, { - "epoch": 0.7290681396844614, - "grad_norm": 2.3296284675598145, - "learning_rate": 3.3765256215786707e-06, - "loss": 0.4436, + "epoch": 0.6782822949665611, + "grad_norm": 1.5022225379943848, + "learning_rate": 3.46871546905288e-06, + "loss": 0.6318, "step": 9635 }, { - "epoch": 0.729143808406795, - "grad_norm": 1.8959673643112183, - "learning_rate": 3.374767355639885e-06, - "loss": 0.6406, + "epoch": 0.6783526927138331, + "grad_norm": 1.7800302505493164, + "learning_rate": 3.4673374355671777e-06, + "loss": 0.5815, "step": 9636 }, { - "epoch": 0.7292194771291287, - "grad_norm": 2.5320215225219727, - "learning_rate": 3.373009441971364e-06, - "loss": 0.7049, + "epoch": 0.6784230904611053, + "grad_norm": 1.7448887825012207, + "learning_rate": 3.4659595857477986e-06, + "loss": 0.6312, "step": 9637 }, { - "epoch": 0.7292951458514623, - "grad_norm": 2.963879346847534, - "learning_rate": 3.3712518806831915e-06, - "loss": 0.6362, + "epoch": 0.6784934882083773, + "grad_norm": 1.8354780673980713, + "learning_rate": 3.464581919666385e-06, + "loss": 0.6808, "step": 9638 }, { - "epoch": 0.7293708145737959, - "grad_norm": 1.864016056060791, - "learning_rate": 3.3694946718854357e-06, - "loss": 0.5834, + "epoch": 0.6785638859556494, + "grad_norm": 1.9327635765075684, + "learning_rate": 3.4632044373945577e-06, + "loss": 0.6992, "step": 9639 }, { - "epoch": 0.7294464832961296, - "grad_norm": 2.2070538997650146, - "learning_rate": 3.3677378156881313e-06, - "loss": 0.64, + "epoch": 0.6786342837029216, + "grad_norm": 1.868553638458252, + "learning_rate": 3.461827139003936e-06, + "loss": 0.6465, "step": 9640 }, { - "epoch": 0.7295221520184632, - "grad_norm": 2.1202077865600586, - "learning_rate": 3.3659813122012987e-06, - "loss": 0.619, + "epoch": 0.6787046814501936, + "grad_norm": 1.6464048624038696, + "learning_rate": 3.460450024566123e-06, + "loss": 0.6273, "step": 9641 }, { - "epoch": 0.7295978207407968, - "grad_norm": 1.996546983718872, - "learning_rate": 3.364225161534945e-06, - "loss": 0.5529, + "epoch": 0.6787750791974657, + "grad_norm": 1.9481877088546753, + "learning_rate": 3.459073094152721e-06, + "loss": 0.7709, "step": 9642 }, { - "epoch": 0.7296734894631304, - "grad_norm": 1.7262141704559326, - "learning_rate": 3.362469363799037e-06, - "loss": 0.6483, + "epoch": 0.6788454769447377, + "grad_norm": 1.9702844619750977, + "learning_rate": 3.4576963478353133e-06, + "loss": 0.673, "step": 9643 }, { - "epoch": 0.729749158185464, - "grad_norm": 2.2432174682617188, - "learning_rate": 3.360713919103532e-06, - "loss": 0.6979, + "epoch": 0.6789158746920099, + "grad_norm": 1.7631657123565674, + "learning_rate": 3.456319785685483e-06, + "loss": 0.7266, "step": 9644 }, { - "epoch": 0.7298248269077977, - "grad_norm": 2.1169657707214355, - "learning_rate": 3.35895882755836e-06, - "loss": 0.7512, + "epoch": 0.6789862724392819, + "grad_norm": 1.9265705347061157, + "learning_rate": 3.4549434077747965e-06, + "loss": 0.5818, "step": 9645 }, { - "epoch": 0.7299004956301313, - "grad_norm": 2.215263843536377, - "learning_rate": 3.357204089273432e-06, - "loss": 0.6911, + "epoch": 0.679056670186554, + "grad_norm": 1.854010820388794, + "learning_rate": 3.4535672141748133e-06, + "loss": 0.6411, "step": 9646 }, { - "epoch": 0.7299761643524649, - "grad_norm": 2.0325393676757812, - "learning_rate": 3.3554497043586354e-06, - "loss": 0.6089, + "epoch": 0.6791270679338262, + "grad_norm": 2.146291494369507, + "learning_rate": 3.4521912049570795e-06, + "loss": 0.681, "step": 9647 }, { - "epoch": 0.7300518330747985, - "grad_norm": 2.492884874343872, - "learning_rate": 3.353695672923835e-06, - "loss": 0.6432, + "epoch": 0.6791974656810982, + "grad_norm": 1.9511109590530396, + "learning_rate": 3.4508153801931404e-06, + "loss": 0.7641, "step": 9648 }, { - "epoch": 0.7301275017971322, - "grad_norm": 1.838275671005249, - "learning_rate": 3.351941995078877e-06, - "loss": 0.6128, + "epoch": 0.6792678634283703, + "grad_norm": 1.939736008644104, + "learning_rate": 3.4494397399545212e-06, + "loss": 0.7771, "step": 9649 }, { - "epoch": 0.7302031705194658, - "grad_norm": 2.5768980979919434, - "learning_rate": 3.3501886709335755e-06, - "loss": 0.542, + "epoch": 0.6793382611756423, + "grad_norm": 1.7828140258789062, + "learning_rate": 3.448064284312749e-06, + "loss": 0.6912, "step": 9650 }, { - "epoch": 0.7302788392417994, - "grad_norm": 2.3133151531219482, - "learning_rate": 3.3484357005977307e-06, - "loss": 0.6435, + "epoch": 0.6794086589229145, + "grad_norm": 1.7981231212615967, + "learning_rate": 3.4466890133393305e-06, + "loss": 0.7267, "step": 9651 }, { - "epoch": 0.730354507964133, - "grad_norm": 2.1591763496398926, - "learning_rate": 3.346683084181125e-06, - "loss": 0.8351, + "epoch": 0.6794790566701866, + "grad_norm": 1.8764609098434448, + "learning_rate": 3.445313927105769e-06, + "loss": 0.6176, "step": 9652 }, { - "epoch": 0.7304301766864667, - "grad_norm": 2.5849671363830566, - "learning_rate": 3.344930821793512e-06, - "loss": 0.5672, + "epoch": 0.6795494544174586, + "grad_norm": 1.9188377857208252, + "learning_rate": 3.443939025683553e-06, + "loss": 0.7099, "step": 9653 }, { - "epoch": 0.7305058454088003, - "grad_norm": 2.141481876373291, - "learning_rate": 3.343178913544619e-06, - "loss": 0.6263, + "epoch": 0.6796198521647308, + "grad_norm": 1.6807130575180054, + "learning_rate": 3.44256430914417e-06, + "loss": 0.6797, "step": 9654 }, { - "epoch": 0.7305815141311339, - "grad_norm": 2.7578744888305664, - "learning_rate": 3.341427359544158e-06, - "loss": 0.7468, + "epoch": 0.6796902499120028, + "grad_norm": 2.0973258018493652, + "learning_rate": 3.441189777559088e-06, + "loss": 0.7043, "step": 9655 }, { - "epoch": 0.7306571828534675, - "grad_norm": 1.959076166152954, - "learning_rate": 3.339676159901819e-06, - "loss": 0.582, + "epoch": 0.6797606476592749, + "grad_norm": 1.950028657913208, + "learning_rate": 3.4398154309997745e-06, + "loss": 0.6808, "step": 9656 }, { - "epoch": 0.7307328515758011, - "grad_norm": 2.0008225440979004, - "learning_rate": 3.3379253147272654e-06, - "loss": 0.6107, + "epoch": 0.679831045406547, + "grad_norm": 1.8879201412200928, + "learning_rate": 3.4384412695376792e-06, + "loss": 0.656, "step": 9657 }, { - "epoch": 0.7308085202981348, - "grad_norm": 2.1886539459228516, - "learning_rate": 3.336174824130143e-06, - "loss": 0.7106, + "epoch": 0.6799014431538191, + "grad_norm": 1.9734127521514893, + "learning_rate": 3.437067293244253e-06, + "loss": 0.598, "step": 9658 }, { - "epoch": 0.7308841890204684, - "grad_norm": 2.4869959354400635, - "learning_rate": 3.334424688220071e-06, - "loss": 0.7828, + "epoch": 0.6799718409010912, + "grad_norm": 2.02650785446167, + "learning_rate": 3.4356935021909195e-06, + "loss": 0.6488, "step": 9659 }, { - "epoch": 0.730959857742802, - "grad_norm": 3.1968321800231934, - "learning_rate": 3.3326749071066546e-06, - "loss": 0.6548, + "epoch": 0.6800422386483632, + "grad_norm": 2.0382115840911865, + "learning_rate": 3.4343198964491117e-06, + "loss": 0.6051, "step": 9660 }, { - "epoch": 0.7310355264651356, - "grad_norm": 2.0156288146972656, - "learning_rate": 3.330925480899458e-06, - "loss": 0.7084, + "epoch": 0.6801126363956354, + "grad_norm": 1.742477297782898, + "learning_rate": 3.432946476090239e-06, + "loss": 0.7091, "step": 9661 }, { - "epoch": 0.7311111951874693, - "grad_norm": 2.093147039413452, - "learning_rate": 3.329176409708048e-06, - "loss": 0.716, + "epoch": 0.6801830341429074, + "grad_norm": 2.718296766281128, + "learning_rate": 3.431573241185712e-06, + "loss": 0.6718, "step": 9662 }, { - "epoch": 0.7311868639098029, - "grad_norm": 1.8537280559539795, - "learning_rate": 3.3274276936419558e-06, - "loss": 0.7604, + "epoch": 0.6802534318901795, + "grad_norm": 2.184391975402832, + "learning_rate": 3.430200191806921e-06, + "loss": 0.6232, "step": 9663 }, { - "epoch": 0.7312625326321365, - "grad_norm": 1.8829224109649658, - "learning_rate": 3.325679332810685e-06, - "loss": 0.5923, + "epoch": 0.6803238296374516, + "grad_norm": 2.223517894744873, + "learning_rate": 3.4288273280252608e-06, + "loss": 0.6614, "step": 9664 }, { - "epoch": 0.7313382013544701, - "grad_norm": 2.2655227184295654, - "learning_rate": 3.323931327323727e-06, - "loss": 0.6448, + "epoch": 0.6803942273847237, + "grad_norm": 1.9078189134597778, + "learning_rate": 3.427454649912097e-06, + "loss": 0.6213, "step": 9665 }, { - "epoch": 0.7314138700768038, - "grad_norm": 2.4388043880462646, - "learning_rate": 3.322183677290546e-06, - "loss": 0.6538, + "epoch": 0.6804646251319958, + "grad_norm": 1.8347957134246826, + "learning_rate": 3.4260821575388037e-06, + "loss": 0.7035, "step": 9666 }, { - "epoch": 0.7314895387991374, - "grad_norm": 2.1966893672943115, - "learning_rate": 3.3204363828205933e-06, - "loss": 0.609, + "epoch": 0.6805350228792678, + "grad_norm": 1.7563501596450806, + "learning_rate": 3.424709850976733e-06, + "loss": 0.713, "step": 9667 }, { - "epoch": 0.731565207521471, - "grad_norm": 1.9812705516815186, - "learning_rate": 3.318689444023281e-06, - "loss": 0.6558, + "epoch": 0.68060542062654, + "grad_norm": 1.747071623802185, + "learning_rate": 3.423337730297238e-06, + "loss": 0.6592, "step": 9668 }, { - "epoch": 0.7316408762438046, - "grad_norm": 2.1352076530456543, - "learning_rate": 3.3169428610080107e-06, - "loss": 0.6868, + "epoch": 0.6806758183738121, + "grad_norm": 1.887338399887085, + "learning_rate": 3.4219657955716512e-06, + "loss": 0.6833, "step": 9669 }, { - "epoch": 0.7317165449661382, - "grad_norm": 1.9275273084640503, - "learning_rate": 3.315196633884161e-06, - "loss": 0.663, + "epoch": 0.6807462161210841, + "grad_norm": 3.8235697746276855, + "learning_rate": 3.4205940468713078e-06, + "loss": 0.7199, "step": 9670 }, { - "epoch": 0.7317922136884719, - "grad_norm": 2.548799991607666, - "learning_rate": 3.3134507627610867e-06, - "loss": 0.743, + "epoch": 0.6808166138683562, + "grad_norm": 1.665531873703003, + "learning_rate": 3.4192224842675168e-06, + "loss": 0.6162, "step": 9671 }, { - "epoch": 0.7318678824108055, - "grad_norm": 1.8957780599594116, - "learning_rate": 3.311705247748113e-06, - "loss": 0.5394, + "epoch": 0.6808870116156283, + "grad_norm": 1.852760910987854, + "learning_rate": 3.417851107831594e-06, + "loss": 0.6304, "step": 9672 }, { - "epoch": 0.7319435511331391, - "grad_norm": 1.761271595954895, - "learning_rate": 3.3099600889545576e-06, - "loss": 0.7391, + "epoch": 0.6809574093629004, + "grad_norm": 1.7917510271072388, + "learning_rate": 3.416479917634834e-06, + "loss": 0.6335, "step": 9673 }, { - "epoch": 0.7320192198554727, - "grad_norm": 1.8656989336013794, - "learning_rate": 3.308215286489708e-06, - "loss": 0.5925, + "epoch": 0.6810278071101725, + "grad_norm": 1.6733452081680298, + "learning_rate": 3.4151089137485317e-06, + "loss": 0.6225, "step": 9674 }, { - "epoch": 0.7320948885778064, - "grad_norm": 2.2291691303253174, - "learning_rate": 3.306470840462824e-06, - "loss": 0.6399, + "epoch": 0.6810982048574445, + "grad_norm": 2.00618314743042, + "learning_rate": 3.41373809624396e-06, + "loss": 0.6379, "step": 9675 }, { - "epoch": 0.73217055730014, - "grad_norm": 2.45021390914917, - "learning_rate": 3.304726750983151e-06, - "loss": 0.7225, + "epoch": 0.6811686026047167, + "grad_norm": 2.0313611030578613, + "learning_rate": 3.4123674651923986e-06, + "loss": 0.6692, "step": 9676 }, { - "epoch": 0.7322462260224736, - "grad_norm": 1.7993860244750977, - "learning_rate": 3.30298301815991e-06, - "loss": 0.6022, + "epoch": 0.6812390003519887, + "grad_norm": 1.804292917251587, + "learning_rate": 3.410997020665096e-06, + "loss": 0.6686, "step": 9677 }, { - "epoch": 0.7323218947448072, - "grad_norm": 2.358670234680176, - "learning_rate": 3.301239642102298e-06, - "loss": 0.691, + "epoch": 0.6813093980992608, + "grad_norm": 2.040475845336914, + "learning_rate": 3.409626762733312e-06, + "loss": 0.7103, "step": 9678 }, { - "epoch": 0.7323975634671409, - "grad_norm": 4.461367130279541, - "learning_rate": 3.2994966229194917e-06, - "loss": 0.6848, + "epoch": 0.681379795846533, + "grad_norm": 2.067575216293335, + "learning_rate": 3.408256691468281e-06, + "loss": 0.552, "step": 9679 }, { - "epoch": 0.7324732321894745, - "grad_norm": 2.1369030475616455, - "learning_rate": 3.297753960720645e-06, - "loss": 0.7066, + "epoch": 0.681450193593805, + "grad_norm": 1.7989346981048584, + "learning_rate": 3.4068868069412376e-06, + "loss": 0.6633, "step": 9680 }, { - "epoch": 0.7325489009118081, - "grad_norm": 2.010079860687256, - "learning_rate": 3.296011655614891e-06, - "loss": 0.7084, + "epoch": 0.6815205913410771, + "grad_norm": 1.5478788614273071, + "learning_rate": 3.4055171092234057e-06, + "loss": 0.6202, "step": 9681 }, { - "epoch": 0.7326245696341417, - "grad_norm": 2.3091893196105957, - "learning_rate": 3.2942697077113305e-06, - "loss": 0.7503, + "epoch": 0.6815909890883491, + "grad_norm": 2.0661299228668213, + "learning_rate": 3.4041475983859944e-06, + "loss": 0.6726, "step": 9682 }, { - "epoch": 0.7327002383564754, - "grad_norm": 2.3691303730010986, - "learning_rate": 3.292528117119058e-06, - "loss": 0.6997, + "epoch": 0.6816613868356213, + "grad_norm": 1.97454035282135, + "learning_rate": 3.4027782745002073e-06, + "loss": 0.6259, "step": 9683 }, { - "epoch": 0.732775907078809, - "grad_norm": 3.361497402191162, - "learning_rate": 3.2907868839471364e-06, - "loss": 0.7454, + "epoch": 0.6817317845828933, + "grad_norm": 1.8558063507080078, + "learning_rate": 3.4014091376372315e-06, + "loss": 0.7048, "step": 9684 }, { - "epoch": 0.7328515758011426, - "grad_norm": 2.057619571685791, - "learning_rate": 3.2890460083046072e-06, - "loss": 0.7054, + "epoch": 0.6818021823301654, + "grad_norm": 2.4426748752593994, + "learning_rate": 3.4000401878682573e-06, + "loss": 0.7328, "step": 9685 }, { - "epoch": 0.7329272445234762, - "grad_norm": 2.1395699977874756, - "learning_rate": 3.2873054903004863e-06, - "loss": 0.5957, + "epoch": 0.6818725800774376, + "grad_norm": 1.9777158498764038, + "learning_rate": 3.398671425264451e-06, + "loss": 0.6249, "step": 9686 }, { - "epoch": 0.7330029132458098, - "grad_norm": 1.947824478149414, - "learning_rate": 3.28556533004377e-06, - "loss": 0.5955, + "epoch": 0.6819429778247096, + "grad_norm": 1.7155790328979492, + "learning_rate": 3.3973028498969813e-06, + "loss": 0.5337, "step": 9687 }, { - "epoch": 0.7330785819681435, - "grad_norm": 2.4234938621520996, - "learning_rate": 3.283825527643441e-06, - "loss": 0.7185, + "epoch": 0.6820133755719817, + "grad_norm": 1.9081308841705322, + "learning_rate": 3.3959344618369998e-06, + "loss": 0.6658, "step": 9688 }, { - "epoch": 0.7331542506904771, - "grad_norm": 4.633688926696777, - "learning_rate": 3.282086083208443e-06, - "loss": 0.7757, + "epoch": 0.6820837733192537, + "grad_norm": 1.5579705238342285, + "learning_rate": 3.3945662611556495e-06, + "loss": 0.6374, "step": 9689 }, { - "epoch": 0.7332299194128107, - "grad_norm": 2.446262836456299, - "learning_rate": 3.280346996847709e-06, - "loss": 0.655, + "epoch": 0.6821541710665259, + "grad_norm": 1.6553308963775635, + "learning_rate": 3.3931982479240618e-06, + "loss": 0.6524, "step": 9690 }, { - "epoch": 0.7333055881351443, - "grad_norm": 1.894422173500061, - "learning_rate": 3.2786082686701447e-06, - "loss": 0.7366, + "epoch": 0.682224568813798, + "grad_norm": 1.8052480220794678, + "learning_rate": 3.3918304222133666e-06, + "loss": 0.7326, "step": 9691 }, { - "epoch": 0.733381256857478, - "grad_norm": 2.1454946994781494, - "learning_rate": 3.2768698987846356e-06, - "loss": 0.5931, + "epoch": 0.68229496656107, + "grad_norm": 2.130686044692993, + "learning_rate": 3.390462784094672e-06, + "loss": 0.8323, "step": 9692 }, { - "epoch": 0.7334569255798116, - "grad_norm": 2.107937812805176, - "learning_rate": 3.2751318873000444e-06, - "loss": 0.5901, + "epoch": 0.6823653643083422, + "grad_norm": 1.680199384689331, + "learning_rate": 3.38909533363909e-06, + "loss": 0.6153, "step": 9693 }, { - "epoch": 0.7335325943021452, - "grad_norm": 1.8799843788146973, - "learning_rate": 3.2733942343252114e-06, - "loss": 0.7529, + "epoch": 0.6824357620556142, + "grad_norm": 1.6699665784835815, + "learning_rate": 3.3877280709177112e-06, + "loss": 0.5972, "step": 9694 }, { - "epoch": 0.7336082630244788, - "grad_norm": 2.41536808013916, - "learning_rate": 3.271656939968957e-06, - "loss": 0.7178, + "epoch": 0.6825061598028863, + "grad_norm": 1.7271040678024292, + "learning_rate": 3.386360996001622e-06, + "loss": 0.6507, "step": 9695 }, { - "epoch": 0.7336839317468125, - "grad_norm": 2.3177335262298584, - "learning_rate": 3.2699200043400684e-06, - "loss": 0.6441, + "epoch": 0.6825765575501584, + "grad_norm": 1.6077675819396973, + "learning_rate": 3.3849941089618936e-06, + "loss": 0.6812, "step": 9696 }, { - "epoch": 0.7337596004691461, - "grad_norm": 1.8029228448867798, - "learning_rate": 3.2681834275473205e-06, - "loss": 0.6193, + "epoch": 0.6826469552974305, + "grad_norm": 1.8001341819763184, + "learning_rate": 3.3836274098695986e-06, + "loss": 0.6142, "step": 9697 }, { - "epoch": 0.7338352691914797, - "grad_norm": 1.7246633768081665, - "learning_rate": 3.2664472096994678e-06, - "loss": 0.5477, + "epoch": 0.6827173530447026, + "grad_norm": 1.6883219480514526, + "learning_rate": 3.382260898795787e-06, + "loss": 0.6374, "step": 9698 }, { - "epoch": 0.7339109379138133, - "grad_norm": 2.265120029449463, - "learning_rate": 3.2647113509052387e-06, - "loss": 0.7033, + "epoch": 0.6827877507919746, + "grad_norm": 1.923975944519043, + "learning_rate": 3.380894575811511e-06, + "loss": 0.6915, "step": 9699 }, { - "epoch": 0.7339866066361469, - "grad_norm": 2.030282974243164, - "learning_rate": 3.2629758512733326e-06, - "loss": 0.6291, + "epoch": 0.6828581485392468, + "grad_norm": 1.8200277090072632, + "learning_rate": 3.3795284409878033e-06, + "loss": 0.7372, "step": 9700 }, { - "epoch": 0.7340622753584806, - "grad_norm": 2.392416477203369, - "learning_rate": 3.261240710912433e-06, - "loss": 0.7904, + "epoch": 0.6829285462865188, + "grad_norm": 1.8282440900802612, + "learning_rate": 3.378162494395691e-06, + "loss": 0.6605, "step": 9701 }, { - "epoch": 0.7341379440808142, - "grad_norm": 2.1410059928894043, - "learning_rate": 3.2595059299312027e-06, - "loss": 0.5866, + "epoch": 0.6829989440337909, + "grad_norm": 1.8709651231765747, + "learning_rate": 3.376796736106187e-06, + "loss": 0.6722, "step": 9702 }, { - "epoch": 0.7342136128031478, - "grad_norm": 2.2164275646209717, - "learning_rate": 3.2577715084382777e-06, - "loss": 0.7813, + "epoch": 0.683069341781063, + "grad_norm": 1.872685432434082, + "learning_rate": 3.375431166190306e-06, + "loss": 0.6957, "step": 9703 }, { - "epoch": 0.7342892815254815, - "grad_norm": 2.500359535217285, - "learning_rate": 3.256037446542273e-06, - "loss": 0.7013, + "epoch": 0.6831397395283351, + "grad_norm": 1.8798104524612427, + "learning_rate": 3.3740657847190382e-06, + "loss": 0.7581, "step": 9704 }, { - "epoch": 0.7343649502478151, - "grad_norm": 2.0464277267456055, - "learning_rate": 3.2543037443517825e-06, - "loss": 0.6824, + "epoch": 0.6832101372756072, + "grad_norm": 1.7929614782333374, + "learning_rate": 3.3727005917633766e-06, + "loss": 0.6343, "step": 9705 }, { - "epoch": 0.7344406189701487, - "grad_norm": 1.9826641082763672, - "learning_rate": 3.252570401975377e-06, - "loss": 0.6748, + "epoch": 0.6832805350228792, + "grad_norm": 1.720344066619873, + "learning_rate": 3.3713355873942966e-06, + "loss": 0.6756, "step": 9706 }, { - "epoch": 0.7345162876924823, - "grad_norm": 2.0672097206115723, - "learning_rate": 3.250837419521598e-06, - "loss": 0.5698, + "epoch": 0.6833509327701514, + "grad_norm": 1.7152900695800781, + "learning_rate": 3.3699707716827656e-06, + "loss": 0.5958, "step": 9707 }, { - "epoch": 0.7345919564148159, - "grad_norm": 1.9913432598114014, - "learning_rate": 3.2491047970989765e-06, - "loss": 0.7454, + "epoch": 0.6834213305174235, + "grad_norm": 1.8341413736343384, + "learning_rate": 3.368606144699739e-06, + "loss": 0.6938, "step": 9708 }, { - "epoch": 0.7346676251371496, - "grad_norm": 1.714163064956665, - "learning_rate": 3.2473725348160173e-06, - "loss": 0.5349, + "epoch": 0.6834917282646955, + "grad_norm": 1.6961334943771362, + "learning_rate": 3.3672417065161705e-06, + "loss": 0.7338, "step": 9709 }, { - "epoch": 0.7347432938594832, - "grad_norm": 1.9784096479415894, - "learning_rate": 3.2456406327811926e-06, - "loss": 0.6531, + "epoch": 0.6835621260119676, + "grad_norm": 1.6929547786712646, + "learning_rate": 3.365877457202993e-06, + "loss": 0.5902, "step": 9710 }, { - "epoch": 0.7348189625818168, - "grad_norm": 2.2186923027038574, - "learning_rate": 3.243909091102964e-06, - "loss": 0.695, + "epoch": 0.6836325237592397, + "grad_norm": 1.181296944618225, + "learning_rate": 3.3645133968311405e-06, + "loss": 0.752, "step": 9711 }, { - "epoch": 0.7348946313041504, - "grad_norm": 2.7318224906921387, - "learning_rate": 3.2421779098897644e-06, - "loss": 0.7293, + "epoch": 0.6837029215065118, + "grad_norm": 2.04714298248291, + "learning_rate": 3.3631495254715284e-06, + "loss": 0.76, "step": 9712 }, { - "epoch": 0.734970300026484, - "grad_norm": 2.105350971221924, - "learning_rate": 3.240447089250008e-06, - "loss": 0.6585, + "epoch": 0.6837733192537839, + "grad_norm": 1.7080050706863403, + "learning_rate": 3.3617858431950668e-06, + "loss": 0.6038, "step": 9713 }, { - "epoch": 0.7350459687488177, - "grad_norm": 2.2211616039276123, - "learning_rate": 3.2387166292920837e-06, - "loss": 0.7232, + "epoch": 0.683843717001056, + "grad_norm": 1.5223158597946167, + "learning_rate": 3.360422350072651e-06, + "loss": 0.6787, "step": 9714 }, { - "epoch": 0.7351216374711513, - "grad_norm": 2.1771297454833984, - "learning_rate": 3.2369865301243573e-06, - "loss": 0.5941, + "epoch": 0.6839141147483281, + "grad_norm": 2.244065999984741, + "learning_rate": 3.359059046175177e-06, + "loss": 0.753, "step": 9715 }, { - "epoch": 0.7351973061934849, - "grad_norm": 30.021831512451172, - "learning_rate": 3.2352567918551753e-06, - "loss": 0.7043, + "epoch": 0.6839845124956001, + "grad_norm": 1.5431822538375854, + "learning_rate": 3.3576959315735173e-06, + "loss": 0.7103, "step": 9716 }, { - "epoch": 0.7352729749158186, - "grad_norm": 1.4489507675170898, - "learning_rate": 3.233527414592861e-06, - "loss": 0.7254, + "epoch": 0.6840549102428722, + "grad_norm": 1.6715564727783203, + "learning_rate": 3.356333006338547e-06, + "loss": 0.6944, "step": 9717 }, { - "epoch": 0.7353486436381522, - "grad_norm": 2.0714879035949707, - "learning_rate": 3.231798398445705e-06, - "loss": 0.7017, + "epoch": 0.6841253079901443, + "grad_norm": 2.172600746154785, + "learning_rate": 3.3549702705411245e-06, + "loss": 0.7739, "step": 9718 }, { - "epoch": 0.7354243123604858, - "grad_norm": 2.2503437995910645, - "learning_rate": 3.230069743521993e-06, - "loss": 0.7195, + "epoch": 0.6841957057374164, + "grad_norm": 1.5589364767074585, + "learning_rate": 3.3536077242520986e-06, + "loss": 0.6548, "step": 9719 }, { - "epoch": 0.7354999810828194, - "grad_norm": 1.817765235900879, - "learning_rate": 3.2283414499299786e-06, - "loss": 0.6089, + "epoch": 0.6842661034846885, + "grad_norm": 1.4670262336730957, + "learning_rate": 3.3522453675423063e-06, + "loss": 0.634, "step": 9720 }, { - "epoch": 0.735575649805153, - "grad_norm": 1.9589232206344604, - "learning_rate": 3.2266135177778883e-06, - "loss": 0.7062, + "epoch": 0.6843365012319605, + "grad_norm": 2.058462381362915, + "learning_rate": 3.350883200482584e-06, + "loss": 0.667, "step": 9721 }, { - "epoch": 0.7356513185274867, - "grad_norm": 2.7293508052825928, - "learning_rate": 3.224885947173932e-06, - "loss": 0.7512, + "epoch": 0.6844068989792327, + "grad_norm": 2.2909421920776367, + "learning_rate": 3.3495212231437464e-06, + "loss": 0.7615, "step": 9722 }, { - "epoch": 0.7357269872498203, - "grad_norm": 2.42242169380188, - "learning_rate": 3.223158738226297e-06, - "loss": 0.8047, + "epoch": 0.6844772967265047, + "grad_norm": 2.250261068344116, + "learning_rate": 3.348159435596609e-06, + "loss": 0.6461, "step": 9723 }, { - "epoch": 0.7358026559721539, - "grad_norm": 2.5627288818359375, - "learning_rate": 3.221431891043146e-06, - "loss": 0.7915, + "epoch": 0.6845476944737768, + "grad_norm": 1.6619738340377808, + "learning_rate": 3.34679783791197e-06, + "loss": 0.6921, "step": 9724 }, { - "epoch": 0.7358783246944876, - "grad_norm": 1.7673484086990356, - "learning_rate": 3.2197054057326203e-06, - "loss": 0.7325, + "epoch": 0.684618092221049, + "grad_norm": 3.1846871376037598, + "learning_rate": 3.3454364301606195e-06, + "loss": 0.6925, "step": 9725 }, { - "epoch": 0.7359539934168211, - "grad_norm": 2.0500118732452393, - "learning_rate": 3.217979282402839e-06, - "loss": 0.6227, + "epoch": 0.684688489968321, + "grad_norm": 1.9113070964813232, + "learning_rate": 3.344075212413336e-06, + "loss": 0.6188, "step": 9726 }, { - "epoch": 0.7360296621391548, - "grad_norm": 2.1804354190826416, - "learning_rate": 3.216253521161894e-06, - "loss": 0.6206, + "epoch": 0.6847588877155931, + "grad_norm": 1.9448421001434326, + "learning_rate": 3.3427141847408963e-06, + "loss": 0.6807, "step": 9727 }, { - "epoch": 0.7361053308614884, - "grad_norm": 1.8224960565567017, - "learning_rate": 3.214528122117862e-06, - "loss": 0.7576, + "epoch": 0.6848292854628651, + "grad_norm": 1.1566511392593384, + "learning_rate": 3.3413533472140556e-06, + "loss": 0.8027, "step": 9728 }, { - "epoch": 0.736180999583822, - "grad_norm": 2.191704750061035, - "learning_rate": 3.212803085378792e-06, - "loss": 0.6808, + "epoch": 0.6848996832101373, + "grad_norm": 2.151968002319336, + "learning_rate": 3.339992699903571e-06, + "loss": 0.7468, "step": 9729 }, { - "epoch": 0.7362566683061557, - "grad_norm": 1.6620792150497437, - "learning_rate": 3.2110784110527098e-06, - "loss": 0.771, + "epoch": 0.6849700809574094, + "grad_norm": 1.6231049299240112, + "learning_rate": 3.3386322428801803e-06, + "loss": 0.5402, "step": 9730 }, { - "epoch": 0.7363323370284893, - "grad_norm": 2.0463523864746094, - "learning_rate": 3.2093540992476243e-06, - "loss": 0.5801, + "epoch": 0.6850404787046814, + "grad_norm": 1.7295827865600586, + "learning_rate": 3.3372719762146117e-06, + "loss": 0.6609, "step": 9731 }, { - "epoch": 0.7364080057508229, - "grad_norm": 1.8782941102981567, - "learning_rate": 3.207630150071512e-06, - "loss": 0.778, + "epoch": 0.6851108764519536, + "grad_norm": 1.8366844654083252, + "learning_rate": 3.335911899977594e-06, + "loss": 0.5769, "step": 9732 }, { - "epoch": 0.7364836744731565, - "grad_norm": 2.127807378768921, - "learning_rate": 3.205906563632331e-06, - "loss": 0.7317, + "epoch": 0.6851812741992256, + "grad_norm": 1.97739839553833, + "learning_rate": 3.334552014239832e-06, + "loss": 0.6351, "step": 9733 }, { - "epoch": 0.7365593431954901, - "grad_norm": 2.123108386993408, - "learning_rate": 3.2041833400380274e-06, - "loss": 0.5925, + "epoch": 0.6852516719464977, + "grad_norm": 1.6563621759414673, + "learning_rate": 3.333192319072033e-06, + "loss": 0.7049, "step": 9734 }, { - "epoch": 0.7366350119178238, - "grad_norm": 1.8136372566223145, - "learning_rate": 3.202460479396505e-06, - "loss": 0.7108, + "epoch": 0.6853220696937699, + "grad_norm": 1.561495304107666, + "learning_rate": 3.3318328145448864e-06, + "loss": 0.5648, "step": 9735 }, { - "epoch": 0.7367106806401574, - "grad_norm": 2.5573692321777344, - "learning_rate": 3.200737981815661e-06, - "loss": 0.8463, + "epoch": 0.6853924674410419, + "grad_norm": 1.7893108129501343, + "learning_rate": 3.330473500729073e-06, + "loss": 0.5896, "step": 9736 }, { - "epoch": 0.736786349362491, - "grad_norm": 2.7275161743164062, - "learning_rate": 3.19901584740336e-06, - "loss": 0.6705, + "epoch": 0.685462865188314, + "grad_norm": 1.8764113187789917, + "learning_rate": 3.3291143776952637e-06, + "loss": 0.675, "step": 9737 }, { - "epoch": 0.7368620180848247, - "grad_norm": 1.9936103820800781, - "learning_rate": 3.1972940762674494e-06, - "loss": 0.6206, + "epoch": 0.685533262935586, + "grad_norm": 1.665452241897583, + "learning_rate": 3.327755445514125e-06, + "loss": 0.6265, "step": 9738 }, { - "epoch": 0.7369376868071582, - "grad_norm": 1.7031792402267456, - "learning_rate": 3.195572668515753e-06, - "loss": 0.6619, + "epoch": 0.6856036606828582, + "grad_norm": 1.8162109851837158, + "learning_rate": 3.3263967042563045e-06, + "loss": 0.6354, "step": 9739 }, { - "epoch": 0.7370133555294919, - "grad_norm": 1.8480082750320435, - "learning_rate": 3.193851624256069e-06, - "loss": 0.6239, + "epoch": 0.6856740584301302, + "grad_norm": 4.292539596557617, + "learning_rate": 3.325038153992448e-06, + "loss": 0.5504, "step": 9740 }, { - "epoch": 0.7370890242518255, - "grad_norm": 1.9207595586776733, - "learning_rate": 3.192130943596176e-06, - "loss": 0.8244, + "epoch": 0.6857444561774023, + "grad_norm": 1.5601040124893188, + "learning_rate": 3.323679794793186e-06, + "loss": 0.5349, "step": 9741 }, { - "epoch": 0.7371646929741591, - "grad_norm": 1.9544023275375366, - "learning_rate": 3.190410626643831e-06, - "loss": 0.6302, + "epoch": 0.6858148539246745, + "grad_norm": 1.967842936515808, + "learning_rate": 3.322321626729142e-06, + "loss": 0.6792, "step": 9742 }, { - "epoch": 0.7372403616964928, - "grad_norm": 2.287046194076538, - "learning_rate": 3.188690673506757e-06, - "loss": 0.5985, + "epoch": 0.6858852516719465, + "grad_norm": 1.7688243389129639, + "learning_rate": 3.3209636498709237e-06, + "loss": 0.7132, "step": 9743 }, { - "epoch": 0.7373160304188264, - "grad_norm": 2.2530996799468994, - "learning_rate": 3.186971084292673e-06, - "loss": 0.7136, + "epoch": 0.6859556494192186, + "grad_norm": 2.313899517059326, + "learning_rate": 3.3196058642891402e-06, + "loss": 0.7338, "step": 9744 }, { - "epoch": 0.73739169914116, - "grad_norm": 2.2167491912841797, - "learning_rate": 3.1852518591092636e-06, - "loss": 0.6572, + "epoch": 0.6860260471664906, + "grad_norm": 1.8735427856445312, + "learning_rate": 3.3182482700543785e-06, + "loss": 0.6521, "step": 9745 }, { - "epoch": 0.7374673678634937, - "grad_norm": 1.5545406341552734, - "learning_rate": 3.1835329980641866e-06, - "loss": 0.7841, + "epoch": 0.6860964449137628, + "grad_norm": 2.304257869720459, + "learning_rate": 3.316890867237226e-06, + "loss": 0.694, "step": 9746 }, { - "epoch": 0.7375430365858272, - "grad_norm": 2.0609633922576904, - "learning_rate": 3.181814501265086e-06, - "loss": 0.6042, + "epoch": 0.6861668426610349, + "grad_norm": 1.8925052881240845, + "learning_rate": 3.3155336559082527e-06, + "loss": 0.7834, "step": 9747 }, { - "epoch": 0.7376187053081609, - "grad_norm": 2.269827365875244, - "learning_rate": 3.18009636881958e-06, - "loss": 0.8208, + "epoch": 0.6862372404083069, + "grad_norm": 2.2385096549987793, + "learning_rate": 3.3141766361380222e-06, + "loss": 0.6104, "step": 9748 }, { - "epoch": 0.7376943740304945, - "grad_norm": 2.116123914718628, - "learning_rate": 3.178378600835264e-06, - "loss": 0.693, + "epoch": 0.686307638155579, + "grad_norm": 2.0116429328918457, + "learning_rate": 3.3128198079970834e-06, + "loss": 0.6901, "step": 9749 }, { - "epoch": 0.7377700427528281, - "grad_norm": 2.1836884021759033, - "learning_rate": 3.176661197419708e-06, - "loss": 0.707, + "epoch": 0.6863780359028511, + "grad_norm": 1.9165788888931274, + "learning_rate": 3.311463171555985e-06, + "loss": 0.6453, "step": 9750 }, { - "epoch": 0.7378457114751618, - "grad_norm": 1.9921830892562866, - "learning_rate": 3.1749441586804633e-06, - "loss": 0.58, + "epoch": 0.6864484336501232, + "grad_norm": 1.9135514497756958, + "learning_rate": 3.310106726885254e-06, + "loss": 0.7627, "step": 9751 }, { - "epoch": 0.7379213801974953, - "grad_norm": 2.1456127166748047, - "learning_rate": 3.173227484725059e-06, - "loss": 0.6073, + "epoch": 0.6865188313973953, + "grad_norm": 1.8727658987045288, + "learning_rate": 3.308750474055419e-06, + "loss": 0.6638, "step": 9752 }, { - "epoch": 0.737997048919829, - "grad_norm": 2.2834341526031494, - "learning_rate": 3.1715111756609924e-06, - "loss": 0.6229, + "epoch": 0.6865892291446674, + "grad_norm": 1.8933016061782837, + "learning_rate": 3.307394413136989e-06, + "loss": 0.668, "step": 9753 }, { - "epoch": 0.7380727176421626, - "grad_norm": 2.3917596340179443, - "learning_rate": 3.1697952315957453e-06, - "loss": 0.7978, + "epoch": 0.6866596268919395, + "grad_norm": 2.046257734298706, + "learning_rate": 3.3060385442004684e-06, + "loss": 0.6432, "step": 9754 }, { - "epoch": 0.7381483863644962, - "grad_norm": 2.0401668548583984, - "learning_rate": 3.1680796526367804e-06, - "loss": 0.7177, + "epoch": 0.6867300246392115, + "grad_norm": 1.7414416074752808, + "learning_rate": 3.304682867316347e-06, + "loss": 0.6471, "step": 9755 }, { - "epoch": 0.7382240550868299, - "grad_norm": 2.360987424850464, - "learning_rate": 3.1663644388915333e-06, - "loss": 0.7348, + "epoch": 0.6868004223864836, + "grad_norm": 1.7298564910888672, + "learning_rate": 3.3033273825551117e-06, + "loss": 0.6549, "step": 9756 }, { - "epoch": 0.7382997238091635, - "grad_norm": 2.4300918579101562, - "learning_rate": 3.1646495904674113e-06, - "loss": 0.667, + "epoch": 0.6868708201337557, + "grad_norm": 1.6054898500442505, + "learning_rate": 3.301972089987231e-06, + "loss": 0.5841, "step": 9757 }, { - "epoch": 0.7383753925314971, - "grad_norm": 2.076064109802246, - "learning_rate": 3.162935107471805e-06, - "loss": 0.6606, + "epoch": 0.6869412178810278, + "grad_norm": 1.9043294191360474, + "learning_rate": 3.300616989683174e-06, + "loss": 0.6751, "step": 9758 }, { - "epoch": 0.7384510612538308, - "grad_norm": 2.9375874996185303, - "learning_rate": 3.1612209900120817e-06, - "loss": 0.6929, + "epoch": 0.6870116156282999, + "grad_norm": 1.7142916917800903, + "learning_rate": 3.2992620817133897e-06, + "loss": 0.5984, "step": 9759 }, { - "epoch": 0.7385267299761643, - "grad_norm": 2.2996110916137695, - "learning_rate": 3.159507238195584e-06, - "loss": 0.6716, + "epoch": 0.687082013375572, + "grad_norm": 1.7733039855957031, + "learning_rate": 3.2979073661483214e-06, + "loss": 0.6521, "step": 9760 }, { - "epoch": 0.738602398698498, - "grad_norm": 1.980484127998352, - "learning_rate": 3.1577938521296352e-06, - "loss": 0.6685, + "epoch": 0.6871524111228441, + "grad_norm": 1.5951529741287231, + "learning_rate": 3.2965528430583987e-06, + "loss": 0.7375, "step": 9761 }, { - "epoch": 0.7386780674208316, - "grad_norm": 3.1064846515655518, - "learning_rate": 3.1560808319215305e-06, - "loss": 0.7042, + "epoch": 0.6872228088701161, + "grad_norm": 1.6974502801895142, + "learning_rate": 3.2951985125140515e-06, + "loss": 0.7624, "step": 9762 }, { - "epoch": 0.7387537361431652, - "grad_norm": 2.007359743118286, - "learning_rate": 3.154368177678548e-06, - "loss": 0.6777, + "epoch": 0.6872932066173882, + "grad_norm": 1.585476279258728, + "learning_rate": 3.2938443745856855e-06, + "loss": 0.6223, "step": 9763 }, { - "epoch": 0.7388294048654989, - "grad_norm": 2.1503567695617676, - "learning_rate": 3.1526558895079316e-06, - "loss": 0.6476, + "epoch": 0.6873636043646604, + "grad_norm": 2.4664852619171143, + "learning_rate": 3.29249042934371e-06, + "loss": 0.6241, "step": 9764 }, { - "epoch": 0.7389050735878324, - "grad_norm": 1.7849518060684204, - "learning_rate": 3.15094396751692e-06, - "loss": 0.5983, + "epoch": 0.6874340021119324, + "grad_norm": 1.9692572355270386, + "learning_rate": 3.2911366768585117e-06, + "loss": 0.6327, "step": 9765 }, { - "epoch": 0.7389807423101661, - "grad_norm": 1.9997239112854004, - "learning_rate": 3.1492324118127173e-06, - "loss": 0.69, + "epoch": 0.6875043998592045, + "grad_norm": 1.7356185913085938, + "learning_rate": 3.289783117200482e-06, + "loss": 0.6416, "step": 9766 }, { - "epoch": 0.7390564110324997, - "grad_norm": 2.347898244857788, - "learning_rate": 3.147521222502502e-06, - "loss": 0.7001, + "epoch": 0.6875747976064766, + "grad_norm": 1.6607965230941772, + "learning_rate": 3.2884297504399826e-06, + "loss": 0.6596, "step": 9767 }, { - "epoch": 0.7391320797548333, - "grad_norm": 2.870927095413208, - "learning_rate": 3.145810399693437e-06, - "loss": 0.692, + "epoch": 0.6876451953537487, + "grad_norm": 1.9120712280273438, + "learning_rate": 3.2870765766473847e-06, + "loss": 0.7271, "step": 9768 }, { - "epoch": 0.739207748477167, - "grad_norm": 2.2380945682525635, - "learning_rate": 3.1440999434926564e-06, - "loss": 0.7641, + "epoch": 0.6877155931010208, + "grad_norm": 1.7009334564208984, + "learning_rate": 3.285723595893036e-06, + "loss": 0.687, "step": 9769 }, { - "epoch": 0.7392834171995006, - "grad_norm": 1.807690143585205, - "learning_rate": 3.1423898540072832e-06, - "loss": 0.6217, + "epoch": 0.6877859908482928, + "grad_norm": 1.5202540159225464, + "learning_rate": 3.2843708082472835e-06, + "loss": 0.7555, "step": 9770 }, { - "epoch": 0.7393590859218342, - "grad_norm": 2.3498446941375732, - "learning_rate": 3.140680131344401e-06, - "loss": 0.6596, + "epoch": 0.687856388595565, + "grad_norm": 1.8177286386489868, + "learning_rate": 3.283018213780456e-06, + "loss": 0.6414, "step": 9771 }, { - "epoch": 0.7394347546441679, - "grad_norm": 2.41398024559021, - "learning_rate": 3.13897077561108e-06, - "loss": 0.7398, + "epoch": 0.687926786342837, + "grad_norm": 1.8193960189819336, + "learning_rate": 3.281665812562882e-06, + "loss": 0.6718, "step": 9772 }, { - "epoch": 0.7395104233665014, - "grad_norm": 2.0578386783599854, - "learning_rate": 3.137261786914366e-06, - "loss": 0.7848, + "epoch": 0.6879971840901091, + "grad_norm": 1.6935453414916992, + "learning_rate": 3.2803136046648658e-06, + "loss": 0.6956, "step": 9773 }, { - "epoch": 0.7395860920888351, - "grad_norm": 2.1671581268310547, - "learning_rate": 3.1355531653612802e-06, - "loss": 0.562, + "epoch": 0.6880675818373813, + "grad_norm": 1.5766830444335938, + "learning_rate": 3.278961590156715e-06, + "loss": 0.645, "step": 9774 }, { - "epoch": 0.7396617608111687, - "grad_norm": 2.2448394298553467, - "learning_rate": 3.1338449110588247e-06, - "loss": 0.7788, + "epoch": 0.6881379795846533, + "grad_norm": 1.8148837089538574, + "learning_rate": 3.27760976910872e-06, + "loss": 0.7029, "step": 9775 }, { - "epoch": 0.7397374295335023, - "grad_norm": 1.992663025856018, - "learning_rate": 3.132137024113973e-06, - "loss": 0.7574, + "epoch": 0.6882083773319254, + "grad_norm": 2.1576030254364014, + "learning_rate": 3.2762581415911663e-06, + "loss": 0.6368, "step": 9776 }, { - "epoch": 0.739813098255836, - "grad_norm": 3.92378830909729, - "learning_rate": 3.1304295046336836e-06, - "loss": 0.5947, + "epoch": 0.6882787750791974, + "grad_norm": 2.0971839427948, + "learning_rate": 3.274906707674322e-06, + "loss": 0.6791, "step": 9777 }, { - "epoch": 0.7398887669781695, - "grad_norm": 1.903420090675354, - "learning_rate": 3.12872235272488e-06, - "loss": 0.6038, + "epoch": 0.6883491728264696, + "grad_norm": 1.884069561958313, + "learning_rate": 3.273555467428456e-06, + "loss": 0.7073, "step": 9778 }, { - "epoch": 0.7399644357005032, - "grad_norm": 2.0623772144317627, - "learning_rate": 3.1270155684944695e-06, - "loss": 0.7105, + "epoch": 0.6884195705737416, + "grad_norm": 2.1183488368988037, + "learning_rate": 3.272204420923812e-06, + "loss": 0.7184, "step": 9779 }, { - "epoch": 0.7400401044228369, - "grad_norm": 2.4272897243499756, - "learning_rate": 3.125309152049346e-06, - "loss": 0.6364, + "epoch": 0.6884899683210137, + "grad_norm": 1.8559417724609375, + "learning_rate": 3.2708535682306352e-06, + "loss": 0.5825, "step": 9780 }, { - "epoch": 0.7401157731451704, - "grad_norm": 2.0178956985473633, - "learning_rate": 3.1236031034963617e-06, - "loss": 0.7385, + "epoch": 0.6885603660682859, + "grad_norm": 1.8890330791473389, + "learning_rate": 3.2695029094191624e-06, + "loss": 0.6556, "step": 9781 }, { - "epoch": 0.7401914418675041, - "grad_norm": 1.974817156791687, - "learning_rate": 3.1218974229423575e-06, - "loss": 0.5617, + "epoch": 0.6886307638155579, + "grad_norm": 2.424203872680664, + "learning_rate": 3.268152444559609e-06, + "loss": 0.683, "step": 9782 }, { - "epoch": 0.7402671105898377, - "grad_norm": 2.286247968673706, - "learning_rate": 3.1201921104941478e-06, - "loss": 0.6671, + "epoch": 0.68870116156283, + "grad_norm": 1.6708738803863525, + "learning_rate": 3.2668021737221912e-06, + "loss": 0.738, "step": 9783 }, { - "epoch": 0.7403427793121713, - "grad_norm": 2.1284759044647217, - "learning_rate": 3.118487166258527e-06, - "loss": 0.6746, + "epoch": 0.688771559310102, + "grad_norm": 2.5689682960510254, + "learning_rate": 3.265452096977111e-06, + "loss": 0.6378, "step": 9784 }, { - "epoch": 0.740418448034505, - "grad_norm": 2.2996256351470947, - "learning_rate": 3.1167825903422616e-06, - "loss": 0.6687, + "epoch": 0.6888419570573742, + "grad_norm": 1.7556096315383911, + "learning_rate": 3.2641022143945577e-06, + "loss": 0.7864, "step": 9785 }, { - "epoch": 0.7404941167568385, - "grad_norm": 2.304643154144287, - "learning_rate": 3.1150783828521005e-06, - "loss": 0.6445, + "epoch": 0.6889123548046463, + "grad_norm": 1.8933371305465698, + "learning_rate": 3.262752526044711e-06, + "loss": 0.6483, "step": 9786 }, { - "epoch": 0.7405697854791722, - "grad_norm": 2.089303731918335, - "learning_rate": 3.1133745438947643e-06, - "loss": 0.5833, + "epoch": 0.6889827525519183, + "grad_norm": 1.9419466257095337, + "learning_rate": 3.261403031997748e-06, + "loss": 0.7188, "step": 9787 }, { - "epoch": 0.7406454542015058, - "grad_norm": 2.256558895111084, - "learning_rate": 3.1116710735769567e-06, - "loss": 0.7369, + "epoch": 0.6890531502991905, + "grad_norm": 2.064037799835205, + "learning_rate": 3.2600537323238243e-06, + "loss": 0.7826, "step": 9788 }, { - "epoch": 0.7407211229238394, - "grad_norm": 2.6049532890319824, - "learning_rate": 3.109967972005349e-06, - "loss": 0.5936, + "epoch": 0.6891235480464625, + "grad_norm": 1.825218677520752, + "learning_rate": 3.2587046270930967e-06, + "loss": 0.5555, "step": 9789 }, { - "epoch": 0.7407967916461731, - "grad_norm": 2.2916321754455566, - "learning_rate": 3.1082652392865946e-06, - "loss": 0.6695, + "epoch": 0.6891939457937346, + "grad_norm": 1.656275749206543, + "learning_rate": 3.2573557163757047e-06, + "loss": 0.6013, "step": 9790 }, { - "epoch": 0.7408724603685066, - "grad_norm": 2.5603201389312744, - "learning_rate": 3.1065628755273324e-06, - "loss": 0.5951, + "epoch": 0.6892643435410067, + "grad_norm": 1.6736310720443726, + "learning_rate": 3.2560070002417777e-06, + "loss": 0.5685, "step": 9791 }, { - "epoch": 0.7409481290908403, - "grad_norm": 2.134892225265503, - "learning_rate": 3.1048608808341624e-06, - "loss": 0.7521, + "epoch": 0.6893347412882788, + "grad_norm": 1.653631567955017, + "learning_rate": 3.2546584787614346e-06, + "loss": 0.5948, "step": 9792 }, { - "epoch": 0.741023797813174, - "grad_norm": 10.978889465332031, - "learning_rate": 3.103159255313671e-06, - "loss": 0.6364, + "epoch": 0.6894051390355509, + "grad_norm": 1.9809132814407349, + "learning_rate": 3.2533101520047928e-06, + "loss": 0.6493, "step": 9793 }, { - "epoch": 0.7410994665355075, - "grad_norm": 1.8416504859924316, - "learning_rate": 3.10145799907242e-06, - "loss": 0.5399, + "epoch": 0.6894755367828229, + "grad_norm": 1.6101429462432861, + "learning_rate": 3.2519620200419466e-06, + "loss": 0.6792, "step": 9794 }, { - "epoch": 0.7411751352578412, - "grad_norm": 2.196185350418091, - "learning_rate": 3.099757112216947e-06, - "loss": 0.6477, + "epoch": 0.689545934530095, + "grad_norm": 1.874140739440918, + "learning_rate": 3.2506140829429915e-06, + "loss": 0.7416, "step": 9795 }, { - "epoch": 0.7412508039801748, - "grad_norm": 2.4861955642700195, - "learning_rate": 3.098056594853767e-06, - "loss": 0.5316, + "epoch": 0.6896163322773671, + "grad_norm": 2.0044023990631104, + "learning_rate": 3.2492663407780064e-06, + "loss": 0.6419, "step": 9796 }, { - "epoch": 0.7413264727025084, - "grad_norm": 2.2695884704589844, - "learning_rate": 3.0963564470893736e-06, - "loss": 0.7883, + "epoch": 0.6896867300246392, + "grad_norm": 1.9635941982269287, + "learning_rate": 3.2479187936170603e-06, + "loss": 0.7253, "step": 9797 }, { - "epoch": 0.7414021414248421, - "grad_norm": 2.2510933876037598, - "learning_rate": 3.094656669030236e-06, - "loss": 0.7622, + "epoch": 0.6897571277719113, + "grad_norm": 1.8705487251281738, + "learning_rate": 3.246571441530212e-06, + "loss": 0.6559, "step": 9798 }, { - "epoch": 0.7414778101471756, - "grad_norm": 2.05856990814209, - "learning_rate": 3.0929572607827946e-06, - "loss": 0.5341, + "epoch": 0.6898275255191834, + "grad_norm": 2.071101665496826, + "learning_rate": 3.2452242845875163e-06, + "loss": 0.7379, "step": 9799 }, { - "epoch": 0.7415534788695093, - "grad_norm": 2.074747323989868, - "learning_rate": 3.0912582224534737e-06, - "loss": 0.6792, + "epoch": 0.6898979232664555, + "grad_norm": 1.8363176584243774, + "learning_rate": 3.243877322859009e-06, + "loss": 0.6223, "step": 9800 }, { - "epoch": 0.741629147591843, - "grad_norm": 2.0920283794403076, - "learning_rate": 3.089559554148676e-06, - "loss": 0.7247, + "epoch": 0.6899683210137275, + "grad_norm": 2.0794527530670166, + "learning_rate": 3.2425305564147223e-06, + "loss": 0.6776, "step": 9801 }, { - "epoch": 0.7417048163141765, - "grad_norm": 2.252413034439087, - "learning_rate": 3.0878612559747785e-06, - "loss": 0.6384, + "epoch": 0.6900387187609996, + "grad_norm": 1.7251089811325073, + "learning_rate": 3.241183985324676e-06, + "loss": 0.7132, "step": 9802 }, { - "epoch": 0.7417804850365102, - "grad_norm": 1.887231707572937, - "learning_rate": 3.0861633280381293e-06, - "loss": 0.5092, + "epoch": 0.6901091165082718, + "grad_norm": 1.5502578020095825, + "learning_rate": 3.2398376096588784e-06, + "loss": 0.7546, "step": 9803 }, { - "epoch": 0.7418561537588437, - "grad_norm": 2.2161378860473633, - "learning_rate": 3.08446577044506e-06, - "loss": 0.8336, + "epoch": 0.6901795142555438, + "grad_norm": 1.6666392087936401, + "learning_rate": 3.2384914294873266e-06, + "loss": 0.6857, "step": 9804 }, { - "epoch": 0.7419318224811774, - "grad_norm": 2.0824790000915527, - "learning_rate": 3.082768583301876e-06, - "loss": 0.7406, + "epoch": 0.6902499120028159, + "grad_norm": 1.8543604612350464, + "learning_rate": 3.237145444880014e-06, + "loss": 0.6124, "step": 9805 }, { - "epoch": 0.7420074912035111, - "grad_norm": 2.0326271057128906, - "learning_rate": 3.0810717667148635e-06, - "loss": 0.6042, + "epoch": 0.690320309750088, + "grad_norm": 1.7732105255126953, + "learning_rate": 3.2357996559069153e-06, + "loss": 0.729, "step": 9806 }, { - "epoch": 0.7420831599258446, - "grad_norm": 1.864272117614746, - "learning_rate": 3.07937532079028e-06, - "loss": 0.6879, + "epoch": 0.6903907074973601, + "grad_norm": 1.7156692743301392, + "learning_rate": 3.2344540626380036e-06, + "loss": 0.705, "step": 9807 }, { - "epoch": 0.7421588286481783, - "grad_norm": 2.2721335887908936, - "learning_rate": 3.0776792456343648e-06, - "loss": 0.7037, + "epoch": 0.6904611052446322, + "grad_norm": 1.922350287437439, + "learning_rate": 3.233108665143236e-06, + "loss": 0.7081, "step": 9808 }, { - "epoch": 0.742234497370512, - "grad_norm": 1.7374581098556519, - "learning_rate": 3.0759835413533324e-06, - "loss": 0.6843, + "epoch": 0.6905315029919042, + "grad_norm": 1.7373038530349731, + "learning_rate": 3.23176346349256e-06, + "loss": 0.6317, "step": 9809 }, { - "epoch": 0.7423101660928455, - "grad_norm": 2.2523179054260254, - "learning_rate": 3.0742882080533656e-06, - "loss": 0.705, + "epoch": 0.6906019007391764, + "grad_norm": 2.240330219268799, + "learning_rate": 3.230418457755912e-06, + "loss": 0.7687, "step": 9810 }, { - "epoch": 0.7423858348151792, - "grad_norm": 1.5839877128601074, - "learning_rate": 3.0725932458406395e-06, - "loss": 0.7204, + "epoch": 0.6906722984864484, + "grad_norm": 1.7646106481552124, + "learning_rate": 3.2290736480032253e-06, + "loss": 0.7504, "step": 9811 }, { - "epoch": 0.7424615035375127, - "grad_norm": 2.310640335083008, - "learning_rate": 3.0708986548212998e-06, - "loss": 0.635, + "epoch": 0.6907426962337205, + "grad_norm": 1.9697329998016357, + "learning_rate": 3.2277290343044115e-06, + "loss": 0.705, "step": 9812 }, { - "epoch": 0.7425371722598464, - "grad_norm": 2.1407198905944824, - "learning_rate": 3.06920443510146e-06, - "loss": 0.6782, + "epoch": 0.6908130939809926, + "grad_norm": 1.7590794563293457, + "learning_rate": 3.2263846167293845e-06, + "loss": 0.6152, "step": 9813 }, { - "epoch": 0.7426128409821801, - "grad_norm": 2.077183246612549, - "learning_rate": 3.067510586787221e-06, - "loss": 0.7058, + "epoch": 0.6908834917282647, + "grad_norm": 1.683505892753601, + "learning_rate": 3.2250403953480384e-06, + "loss": 0.6377, "step": 9814 }, { - "epoch": 0.7426885097045136, - "grad_norm": 1.9576934576034546, - "learning_rate": 3.065817109984654e-06, - "loss": 0.5691, + "epoch": 0.6909538894755368, + "grad_norm": 1.7183583974838257, + "learning_rate": 3.2236963702302616e-06, + "loss": 0.561, "step": 9815 }, { - "epoch": 0.7427641784268473, - "grad_norm": 2.2817611694335938, - "learning_rate": 3.0641240047998196e-06, - "loss": 0.7766, + "epoch": 0.6910242872228088, + "grad_norm": 2.2967069149017334, + "learning_rate": 3.2223525414459272e-06, + "loss": 0.7747, "step": 9816 }, { - "epoch": 0.7428398471491808, - "grad_norm": 2.1753251552581787, - "learning_rate": 3.062431271338736e-06, - "loss": 0.6152, + "epoch": 0.691094684970081, + "grad_norm": 1.6193082332611084, + "learning_rate": 3.2210089090649083e-06, + "loss": 0.8489, "step": 9817 }, { - "epoch": 0.7429155158715145, - "grad_norm": 2.1816024780273438, - "learning_rate": 3.0607389097074095e-06, - "loss": 0.6559, + "epoch": 0.691165082717353, + "grad_norm": 1.962459921836853, + "learning_rate": 3.2196654731570567e-06, + "loss": 0.7523, "step": 9818 }, { - "epoch": 0.7429911845938482, - "grad_norm": 2.068418264389038, - "learning_rate": 3.059046920011823e-06, - "loss": 0.7292, + "epoch": 0.6912354804646251, + "grad_norm": 1.6523611545562744, + "learning_rate": 3.218322233792223e-06, + "loss": 0.7792, "step": 9819 }, { - "epoch": 0.7430668533161817, - "grad_norm": 2.037598133087158, - "learning_rate": 3.057355302357934e-06, - "loss": 0.5673, + "epoch": 0.6913058782118973, + "grad_norm": 1.7787739038467407, + "learning_rate": 3.2169791910402416e-06, + "loss": 0.5944, "step": 9820 }, { - "epoch": 0.7431425220385154, - "grad_norm": 2.198431968688965, - "learning_rate": 3.055664056851677e-06, - "loss": 0.6868, + "epoch": 0.6913762759591693, + "grad_norm": 1.9278583526611328, + "learning_rate": 3.215636344970937e-06, + "loss": 0.5728, "step": 9821 }, { - "epoch": 0.743218190760849, - "grad_norm": 2.101435899734497, - "learning_rate": 3.0539731835989625e-06, - "loss": 0.6842, + "epoch": 0.6914466737064414, + "grad_norm": 1.86435866355896, + "learning_rate": 3.2142936956541246e-06, + "loss": 0.6688, "step": 9822 }, { - "epoch": 0.7432938594831826, - "grad_norm": 2.1221351623535156, - "learning_rate": 3.052282682705682e-06, - "loss": 0.6233, + "epoch": 0.6915170714537134, + "grad_norm": 1.6299535036087036, + "learning_rate": 3.2129512431596143e-06, + "loss": 0.5897, "step": 9823 }, { - "epoch": 0.7433695282055163, - "grad_norm": 1.9273860454559326, - "learning_rate": 3.0505925542776946e-06, - "loss": 0.6363, + "epoch": 0.6915874692009856, + "grad_norm": 1.7736730575561523, + "learning_rate": 3.2116089875571948e-06, + "loss": 0.7876, "step": 9824 }, { - "epoch": 0.7434451969278498, - "grad_norm": 2.380946159362793, - "learning_rate": 3.048902798420844e-06, - "loss": 0.665, + "epoch": 0.6916578669482577, + "grad_norm": 1.7368407249450684, + "learning_rate": 3.2102669289166585e-06, + "loss": 0.7047, "step": 9825 }, { - "epoch": 0.7435208656501835, - "grad_norm": 2.2486279010772705, - "learning_rate": 3.047213415240948e-06, - "loss": 0.6321, + "epoch": 0.6917282646955297, + "grad_norm": 1.8418902158737183, + "learning_rate": 3.2089250673077765e-06, + "loss": 0.6936, "step": 9826 }, { - "epoch": 0.7435965343725172, - "grad_norm": 1.9640283584594727, - "learning_rate": 3.0455244048438014e-06, - "loss": 0.5942, + "epoch": 0.6917986624428019, + "grad_norm": 1.717640995979309, + "learning_rate": 3.2075834028003137e-06, + "loss": 0.5509, "step": 9827 }, { - "epoch": 0.7436722030948507, - "grad_norm": 2.507197141647339, - "learning_rate": 3.043835767335177e-06, - "loss": 0.7769, + "epoch": 0.6918690601900739, + "grad_norm": 2.045048713684082, + "learning_rate": 3.2062419354640225e-06, + "loss": 0.6287, "step": 9828 }, { - "epoch": 0.7437478718171844, - "grad_norm": 2.5422580242156982, - "learning_rate": 3.0421475028208205e-06, - "loss": 0.7886, + "epoch": 0.691939457937346, + "grad_norm": 1.9776087999343872, + "learning_rate": 3.2049006653686505e-06, + "loss": 0.7094, "step": 9829 }, { - "epoch": 0.7438235405395179, - "grad_norm": 2.2974729537963867, - "learning_rate": 3.0404596114064573e-06, - "loss": 0.4964, + "epoch": 0.6920098556846181, + "grad_norm": 1.599056363105774, + "learning_rate": 3.2035595925839277e-06, + "loss": 0.6744, "step": 9830 }, { - "epoch": 0.7438992092618516, - "grad_norm": 2.1887059211730957, - "learning_rate": 3.038772093197789e-06, - "loss": 0.7204, + "epoch": 0.6920802534318902, + "grad_norm": 1.7318631410598755, + "learning_rate": 3.202218717179583e-06, + "loss": 0.6283, "step": 9831 }, { - "epoch": 0.7439748779841853, - "grad_norm": 2.602665901184082, - "learning_rate": 3.0370849483004927e-06, - "loss": 0.5673, + "epoch": 0.6921506511791623, + "grad_norm": 1.8172944784164429, + "learning_rate": 3.2008780392253258e-06, + "loss": 0.6566, "step": 9832 }, { - "epoch": 0.7440505467065188, - "grad_norm": 2.0209755897521973, - "learning_rate": 3.0353981768202243e-06, - "loss": 0.6575, + "epoch": 0.6922210489264343, + "grad_norm": 2.206665277481079, + "learning_rate": 3.1995375587908574e-06, + "loss": 0.706, "step": 9833 }, { - "epoch": 0.7441262154288525, - "grad_norm": 2.425705671310425, - "learning_rate": 3.033711778862616e-06, - "loss": 0.6502, + "epoch": 0.6922914466737065, + "grad_norm": 1.8388936519622803, + "learning_rate": 3.1981972759458767e-06, + "loss": 0.6891, "step": 9834 }, { - "epoch": 0.7442018841511862, - "grad_norm": 3.1738603115081787, - "learning_rate": 3.032025754533271e-06, - "loss": 0.5545, + "epoch": 0.6923618444209785, + "grad_norm": 1.5803570747375488, + "learning_rate": 3.1968571907600598e-06, + "loss": 0.5307, "step": 9835 }, { - "epoch": 0.7442775528735197, - "grad_norm": 1.8896595239639282, - "learning_rate": 3.0303401039377725e-06, - "loss": 0.5624, + "epoch": 0.6924322421682506, + "grad_norm": 1.6953201293945312, + "learning_rate": 3.195517303303084e-06, + "loss": 0.7655, "step": 9836 }, { - "epoch": 0.7443532215958534, - "grad_norm": 2.21313738822937, - "learning_rate": 3.0286548271816916e-06, - "loss": 0.6534, + "epoch": 0.6925026399155227, + "grad_norm": 1.8079746961593628, + "learning_rate": 3.1941776136446097e-06, + "loss": 0.6195, "step": 9837 }, { - "epoch": 0.7444288903181869, - "grad_norm": 2.004441499710083, - "learning_rate": 3.0269699243705555e-06, - "loss": 0.7336, + "epoch": 0.6925730376627948, + "grad_norm": 1.8813693523406982, + "learning_rate": 3.192838121854288e-06, + "loss": 0.6651, "step": 9838 }, { - "epoch": 0.7445045590405206, - "grad_norm": 2.018430233001709, - "learning_rate": 3.025285395609882e-06, - "loss": 0.6419, + "epoch": 0.6926434354100669, + "grad_norm": 2.1017818450927734, + "learning_rate": 3.191498828001757e-06, + "loss": 0.536, "step": 9839 }, { - "epoch": 0.7445802277628543, - "grad_norm": 2.3819639682769775, - "learning_rate": 3.0236012410051617e-06, - "loss": 0.6499, + "epoch": 0.6927138331573389, + "grad_norm": 1.6424918174743652, + "learning_rate": 3.1901597321566545e-06, + "loss": 0.6644, "step": 9840 }, { - "epoch": 0.7446558964851878, - "grad_norm": 2.396756172180176, - "learning_rate": 3.0219174606618614e-06, - "loss": 0.7293, + "epoch": 0.692784230904611, + "grad_norm": 2.1937224864959717, + "learning_rate": 3.1888208343885953e-06, + "loss": 0.7817, "step": 9841 }, { - "epoch": 0.7447315652075215, - "grad_norm": 1.8934662342071533, - "learning_rate": 3.0202340546854254e-06, - "loss": 0.6671, + "epoch": 0.6928546286518832, + "grad_norm": 1.7382913827896118, + "learning_rate": 3.187482134767195e-06, + "loss": 0.612, "step": 9842 }, { - "epoch": 0.744807233929855, - "grad_norm": 2.2285642623901367, - "learning_rate": 3.0185510231812736e-06, - "loss": 0.5863, + "epoch": 0.6929250263991552, + "grad_norm": 2.104919910430908, + "learning_rate": 3.1861436333620508e-06, + "loss": 0.6955, "step": 9843 }, { - "epoch": 0.7448829026521887, - "grad_norm": 1.9590516090393066, - "learning_rate": 3.0168683662548037e-06, - "loss": 0.57, + "epoch": 0.6929954241464273, + "grad_norm": 1.9142935276031494, + "learning_rate": 3.184805330242754e-06, + "loss": 0.6314, "step": 9844 }, { - "epoch": 0.7449585713745224, - "grad_norm": 2.253278970718384, - "learning_rate": 3.0151860840113916e-06, - "loss": 0.6678, + "epoch": 0.6930658218936994, + "grad_norm": 1.8659745454788208, + "learning_rate": 3.1834672254788804e-06, + "loss": 0.6622, "step": 9845 }, { - "epoch": 0.7450342400968559, - "grad_norm": 1.9592149257659912, - "learning_rate": 3.0135041765563778e-06, - "loss": 0.7633, + "epoch": 0.6931362196409715, + "grad_norm": 1.925085425376892, + "learning_rate": 3.1821293191400045e-06, + "loss": 0.6898, "step": 9846 }, { - "epoch": 0.7451099088191896, - "grad_norm": 2.0090091228485107, - "learning_rate": 3.011822643995098e-06, - "loss": 0.7217, + "epoch": 0.6932066173882436, + "grad_norm": 2.0238122940063477, + "learning_rate": 3.1807916112956804e-06, + "loss": 0.6385, "step": 9847 }, { - "epoch": 0.7451855775415233, - "grad_norm": 2.6432924270629883, - "learning_rate": 3.0101414864328547e-06, - "loss": 0.6495, + "epoch": 0.6932770151355157, + "grad_norm": 1.8964465856552124, + "learning_rate": 3.1794541020154625e-06, + "loss": 0.5925, "step": 9848 }, { - "epoch": 0.7452612462638568, - "grad_norm": 2.1458330154418945, - "learning_rate": 3.0084607039749234e-06, - "loss": 0.556, + "epoch": 0.6933474128827878, + "grad_norm": 1.7700719833374023, + "learning_rate": 3.178116791368885e-06, + "loss": 0.6858, "step": 9849 }, { - "epoch": 0.7453369149861905, - "grad_norm": 1.9538377523422241, - "learning_rate": 3.006780296726561e-06, - "loss": 0.582, + "epoch": 0.6934178106300598, + "grad_norm": 1.5642682313919067, + "learning_rate": 3.176779679425478e-06, + "loss": 0.5829, "step": 9850 }, { - "epoch": 0.745412583708524, - "grad_norm": 1.9687731266021729, - "learning_rate": 3.0051002647930002e-06, - "loss": 0.6966, + "epoch": 0.6934882083773319, + "grad_norm": 2.009078025817871, + "learning_rate": 3.175442766254754e-06, + "loss": 0.6342, "step": 9851 }, { - "epoch": 0.7454882524308577, - "grad_norm": 2.3369882106781006, - "learning_rate": 3.0034206082794515e-06, - "loss": 0.5864, + "epoch": 0.693558606124604, + "grad_norm": 4.457292556762695, + "learning_rate": 3.1741060519262283e-06, + "loss": 0.6855, "step": 9852 }, { - "epoch": 0.7455639211531914, - "grad_norm": 2.699866533279419, - "learning_rate": 3.0017413272911e-06, - "loss": 0.7418, + "epoch": 0.6936290038718761, + "grad_norm": 1.7879716157913208, + "learning_rate": 3.1727695365093903e-06, + "loss": 0.6995, "step": 9853 }, { - "epoch": 0.7456395898755249, - "grad_norm": 1.819517731666565, - "learning_rate": 3.000062421933107e-06, - "loss": 0.5972, + "epoch": 0.6936994016191482, + "grad_norm": 1.7567517757415771, + "learning_rate": 3.1714332200737334e-06, + "loss": 0.6989, "step": 9854 }, { - "epoch": 0.7457152585978586, - "grad_norm": 2.184372663497925, - "learning_rate": 2.9983838923106146e-06, - "loss": 0.6785, + "epoch": 0.6937697993664202, + "grad_norm": 1.6670564413070679, + "learning_rate": 3.1700971026887303e-06, + "loss": 0.6836, "step": 9855 }, { - "epoch": 0.7457909273201923, - "grad_norm": 1.910994291305542, - "learning_rate": 2.996705738528728e-06, - "loss": 0.6254, + "epoch": 0.6938401971136924, + "grad_norm": 1.6746838092803955, + "learning_rate": 3.1687611844238475e-06, + "loss": 0.7551, "step": 9856 }, { - "epoch": 0.7458665960425258, - "grad_norm": 5.832062244415283, - "learning_rate": 2.995027960692548e-06, - "loss": 0.5108, + "epoch": 0.6939105948609644, + "grad_norm": 1.623213768005371, + "learning_rate": 3.167425465348538e-06, + "loss": 0.6041, "step": 9857 }, { - "epoch": 0.7459422647648595, - "grad_norm": 4.642062664031982, - "learning_rate": 2.9933505589071393e-06, - "loss": 0.7348, + "epoch": 0.6939809926082365, + "grad_norm": 1.6682995557785034, + "learning_rate": 3.1660899455322525e-06, + "loss": 0.6757, "step": 9858 }, { - "epoch": 0.746017933487193, - "grad_norm": 2.5428013801574707, - "learning_rate": 2.9916735332775504e-06, - "loss": 0.6369, + "epoch": 0.6940513903555087, + "grad_norm": 2.054215669631958, + "learning_rate": 3.1647546250444195e-06, + "loss": 0.7159, "step": 9859 }, { - "epoch": 0.7460936022095267, - "grad_norm": 1.667399525642395, - "learning_rate": 2.989996883908794e-06, - "loss": 0.594, + "epoch": 0.6941217881027807, + "grad_norm": 1.6279382705688477, + "learning_rate": 3.16341950395447e-06, + "loss": 0.6111, "step": 9860 }, { - "epoch": 0.7461692709318604, - "grad_norm": 3.054075002670288, - "learning_rate": 2.9883206109058685e-06, - "loss": 0.7789, + "epoch": 0.6941921858500528, + "grad_norm": 1.4929808378219604, + "learning_rate": 3.1620845823318158e-06, + "loss": 0.5964, "step": 9861 }, { - "epoch": 0.7462449396541939, - "grad_norm": 2.8136708736419678, - "learning_rate": 2.9866447143737572e-06, - "loss": 0.5894, + "epoch": 0.6942625835973248, + "grad_norm": 1.7469303607940674, + "learning_rate": 3.1607498602458586e-06, + "loss": 0.6287, "step": 9862 }, { - "epoch": 0.7463206083765276, - "grad_norm": 2.113799571990967, - "learning_rate": 2.9849691944174e-06, - "loss": 0.5714, + "epoch": 0.694332981344597, + "grad_norm": 2.0467331409454346, + "learning_rate": 3.1594153377659916e-06, + "loss": 0.6918, "step": 9863 }, { - "epoch": 0.7463962770988611, - "grad_norm": 1.9772562980651855, - "learning_rate": 2.983294051141727e-06, - "loss": 0.6968, + "epoch": 0.6944033790918691, + "grad_norm": 1.7731214761734009, + "learning_rate": 3.1580810149616016e-06, + "loss": 0.7643, "step": 9864 }, { - "epoch": 0.7464719458211948, - "grad_norm": 2.416429042816162, - "learning_rate": 2.9816192846516415e-06, - "loss": 0.6939, + "epoch": 0.6944737768391411, + "grad_norm": 1.7016927003860474, + "learning_rate": 3.1567468919020564e-06, + "loss": 0.6056, "step": 9865 }, { - "epoch": 0.7465476145435285, - "grad_norm": 1.8433407545089722, - "learning_rate": 2.9799448950520247e-06, - "loss": 0.5994, + "epoch": 0.6945441745864133, + "grad_norm": 1.5842317342758179, + "learning_rate": 3.1554129686567245e-06, + "loss": 0.6301, "step": 9866 }, { - "epoch": 0.746623283265862, - "grad_norm": 2.278648853302002, - "learning_rate": 2.978270882447723e-06, - "loss": 0.7247, + "epoch": 0.6946145723336853, + "grad_norm": 1.8232433795928955, + "learning_rate": 3.1540792452949534e-06, + "loss": 0.7421, "step": 9867 }, { - "epoch": 0.7466989519881957, - "grad_norm": 2.0349230766296387, - "learning_rate": 2.976597246943579e-06, - "loss": 0.7355, + "epoch": 0.6946849700809574, + "grad_norm": 1.8021427392959595, + "learning_rate": 3.1527457218860855e-06, + "loss": 0.6131, "step": 9868 }, { - "epoch": 0.7467746207105294, - "grad_norm": 1.8349040746688843, - "learning_rate": 2.974923988644401e-06, - "loss": 0.7532, + "epoch": 0.6947553678282294, + "grad_norm": 1.9509978294372559, + "learning_rate": 3.151412398499449e-06, + "loss": 0.6919, "step": 9869 }, { - "epoch": 0.7468502894328629, - "grad_norm": 2.4737563133239746, - "learning_rate": 2.973251107654966e-06, - "loss": 0.6121, + "epoch": 0.6948257655755016, + "grad_norm": 2.3056869506835938, + "learning_rate": 3.15007927520437e-06, + "loss": 0.592, "step": 9870 }, { - "epoch": 0.7469259581551966, - "grad_norm": 2.696403741836548, - "learning_rate": 2.9715786040800403e-06, - "loss": 0.7659, + "epoch": 0.6948961633227737, + "grad_norm": 3.8052291870117188, + "learning_rate": 3.1487463520701534e-06, + "loss": 0.7171, "step": 9871 }, { - "epoch": 0.7470016268775301, - "grad_norm": 2.2334213256835938, - "learning_rate": 2.969906478024358e-06, - "loss": 0.7986, + "epoch": 0.6949665610700457, + "grad_norm": 2.664735794067383, + "learning_rate": 3.147413629166105e-06, + "loss": 0.587, "step": 9872 }, { - "epoch": 0.7470772955998638, - "grad_norm": 1.8761075735092163, - "learning_rate": 2.9682347295926405e-06, - "loss": 0.6269, + "epoch": 0.6950369588173179, + "grad_norm": 2.32882022857666, + "learning_rate": 3.146081106561509e-06, + "loss": 0.7728, "step": 9873 }, { - "epoch": 0.7471529643221975, - "grad_norm": 2.75467586517334, - "learning_rate": 2.9665633588895718e-06, - "loss": 0.6236, + "epoch": 0.6951073565645899, + "grad_norm": 2.0157485008239746, + "learning_rate": 3.1447487843256504e-06, + "loss": 0.6754, "step": 9874 }, { - "epoch": 0.747228633044531, - "grad_norm": 2.219914197921753, - "learning_rate": 2.964892366019819e-06, - "loss": 0.6861, + "epoch": 0.695177754311862, + "grad_norm": 6.115815162658691, + "learning_rate": 3.14341666252779e-06, + "loss": 0.5622, "step": 9875 }, { - "epoch": 0.7473043017668647, - "grad_norm": 1.8167731761932373, - "learning_rate": 2.9632217510880267e-06, - "loss": 0.6217, + "epoch": 0.6952481520591342, + "grad_norm": 1.8709293603897095, + "learning_rate": 3.1420847412371916e-06, + "loss": 0.639, "step": 9876 }, { - "epoch": 0.7473799704891982, - "grad_norm": 1.8947856426239014, - "learning_rate": 2.9615515141988137e-06, - "loss": 0.65, + "epoch": 0.6953185498064062, + "grad_norm": 3.946664571762085, + "learning_rate": 3.1407530205230995e-06, + "loss": 0.6322, "step": 9877 }, { - "epoch": 0.7474556392115319, - "grad_norm": 2.316633701324463, - "learning_rate": 2.959881655456775e-06, - "loss": 0.7939, + "epoch": 0.6953889475536783, + "grad_norm": 2.7696781158447266, + "learning_rate": 3.1394215004547555e-06, + "loss": 0.7086, "step": 9878 }, { - "epoch": 0.7475313079338656, - "grad_norm": 2.0706794261932373, - "learning_rate": 2.9582121749664843e-06, - "loss": 0.7122, + "epoch": 0.6954593453009503, + "grad_norm": 8.076419830322266, + "learning_rate": 3.1380901811013817e-06, + "loss": 0.726, "step": 9879 }, { - "epoch": 0.7476069766561991, - "grad_norm": 2.491586446762085, - "learning_rate": 2.956543072832491e-06, - "loss": 0.6208, + "epoch": 0.6955297430482225, + "grad_norm": 37.39459991455078, + "learning_rate": 3.136759062532202e-06, + "loss": 0.6528, "step": 9880 }, { - "epoch": 0.7476826453785328, - "grad_norm": 1.9248894453048706, - "learning_rate": 2.954874349159314e-06, - "loss": 0.5814, + "epoch": 0.6956001407954946, + "grad_norm": 1.7688554525375366, + "learning_rate": 3.1354281448164115e-06, + "loss": 0.6524, "step": 9881 }, { - "epoch": 0.7477583141008665, - "grad_norm": 2.1573891639709473, - "learning_rate": 2.9532060040514544e-06, - "loss": 0.7211, + "epoch": 0.6956705385427666, + "grad_norm": 1.692724585533142, + "learning_rate": 3.134097428023213e-06, + "loss": 0.5844, "step": 9882 }, { - "epoch": 0.7478339828232, - "grad_norm": 2.213338613510132, - "learning_rate": 2.9515380376133995e-06, - "loss": 0.5858, + "epoch": 0.6957409362900387, + "grad_norm": 1.729780673980713, + "learning_rate": 3.132766912221792e-06, + "loss": 0.6179, "step": 9883 }, { - "epoch": 0.7479096515455337, - "grad_norm": 2.1346771717071533, - "learning_rate": 2.9498704499495923e-06, - "loss": 0.6022, + "epoch": 0.6958113340373108, + "grad_norm": 2.1611130237579346, + "learning_rate": 3.1314365974813196e-06, + "loss": 0.6504, "step": 9884 }, { - "epoch": 0.7479853202678672, - "grad_norm": 2.2232656478881836, - "learning_rate": 2.9482032411644665e-06, - "loss": 0.5621, + "epoch": 0.6958817317845829, + "grad_norm": 1.872864842414856, + "learning_rate": 3.1301064838709644e-06, + "loss": 0.5596, "step": 9885 }, { - "epoch": 0.7480609889902009, - "grad_norm": 2.403750419616699, - "learning_rate": 2.946536411362427e-06, - "loss": 0.6615, + "epoch": 0.695952129531855, + "grad_norm": 1.8586758375167847, + "learning_rate": 3.1287765714598777e-06, + "loss": 0.6792, "step": 9886 }, { - "epoch": 0.7481366577125346, - "grad_norm": 2.009737491607666, - "learning_rate": 2.9448699606478564e-06, - "loss": 0.7192, + "epoch": 0.6960225272791271, + "grad_norm": 1.8439899682998657, + "learning_rate": 3.127446860317203e-06, + "loss": 0.7344, "step": 9887 }, { - "epoch": 0.7482123264348681, - "grad_norm": 1.6389305591583252, - "learning_rate": 2.943203889125114e-06, - "loss": 0.6288, + "epoch": 0.6960929250263992, + "grad_norm": 1.8635179996490479, + "learning_rate": 3.126117350512071e-06, + "loss": 0.6504, "step": 9888 }, { - "epoch": 0.7482879951572018, - "grad_norm": 2.2867612838745117, - "learning_rate": 2.941538196898534e-06, - "loss": 0.8133, + "epoch": 0.6961633227736712, + "grad_norm": 2.055717706680298, + "learning_rate": 3.1247880421136085e-06, + "loss": 0.7057, "step": 9889 }, { - "epoch": 0.7483636638795353, - "grad_norm": 3.108665704727173, - "learning_rate": 2.939872884072428e-06, - "loss": 0.5923, + "epoch": 0.6962337205209433, + "grad_norm": 1.7904536724090576, + "learning_rate": 3.1234589351909234e-06, + "loss": 0.6681, "step": 9890 }, { - "epoch": 0.748439332601869, - "grad_norm": 1.8590794801712036, - "learning_rate": 2.9382079507510856e-06, - "loss": 0.5962, + "epoch": 0.6963041182682154, + "grad_norm": 1.7483241558074951, + "learning_rate": 3.1221300298131213e-06, + "loss": 0.6602, "step": 9891 }, { - "epoch": 0.7485150013242027, - "grad_norm": 2.1245317459106445, - "learning_rate": 2.9365433970387614e-06, - "loss": 0.6682, + "epoch": 0.6963745160154875, + "grad_norm": 1.506252408027649, + "learning_rate": 3.1208013260492903e-06, + "loss": 0.7405, "step": 9892 }, { - "epoch": 0.7485906700465362, - "grad_norm": 2.0722525119781494, - "learning_rate": 2.9348792230397044e-06, - "loss": 0.616, + "epoch": 0.6964449137627596, + "grad_norm": 1.8096550703048706, + "learning_rate": 3.119472823968512e-06, + "loss": 0.6964, "step": 9893 }, { - "epoch": 0.7486663387688699, - "grad_norm": 2.0355629920959473, - "learning_rate": 2.9332154288581305e-06, - "loss": 0.7896, + "epoch": 0.6965153115100317, + "grad_norm": 1.7798280715942383, + "learning_rate": 3.118144523639854e-06, + "loss": 0.626, "step": 9894 }, { - "epoch": 0.7487420074912036, - "grad_norm": 2.689260244369507, - "learning_rate": 2.9315520145982257e-06, - "loss": 0.6665, + "epoch": 0.6965857092573038, + "grad_norm": 1.7485551834106445, + "learning_rate": 3.1168164251323795e-06, + "loss": 0.7572, "step": 9895 }, { - "epoch": 0.7488176762135371, - "grad_norm": 1.7102781534194946, - "learning_rate": 2.929888980364161e-06, - "loss": 0.6444, + "epoch": 0.6966561070045758, + "grad_norm": 1.6644424200057983, + "learning_rate": 3.1154885285151336e-06, + "loss": 0.6945, "step": 9896 }, { - "epoch": 0.7488933449358708, - "grad_norm": 2.3687448501586914, - "learning_rate": 2.9282263262600825e-06, - "loss": 0.8416, + "epoch": 0.6967265047518479, + "grad_norm": 1.778469443321228, + "learning_rate": 3.1141608338571604e-06, + "loss": 0.6262, "step": 9897 }, { - "epoch": 0.7489690136582043, - "grad_norm": 2.313998222351074, - "learning_rate": 2.926564052390109e-06, - "loss": 0.6892, + "epoch": 0.6967969024991201, + "grad_norm": 1.647702932357788, + "learning_rate": 3.112833341227484e-06, + "loss": 0.7133, "step": 9898 }, { - "epoch": 0.749044682380538, - "grad_norm": 1.732693076133728, - "learning_rate": 2.9249021588583393e-06, - "loss": 0.6822, + "epoch": 0.6968673002463921, + "grad_norm": 1.669845700263977, + "learning_rate": 3.111506050695123e-06, + "loss": 0.6301, "step": 9899 }, { - "epoch": 0.7491203511028717, - "grad_norm": 2.4626896381378174, - "learning_rate": 2.9232406457688444e-06, - "loss": 0.5485, + "epoch": 0.6969376979936642, + "grad_norm": 1.6794837713241577, + "learning_rate": 3.1101789623290808e-06, + "loss": 0.6021, "step": 9900 }, { - "epoch": 0.7491960198252052, - "grad_norm": 2.252591133117676, - "learning_rate": 2.9215795132256786e-06, - "loss": 0.7695, + "epoch": 0.6970080957409363, + "grad_norm": 1.6988531351089478, + "learning_rate": 3.10885207619836e-06, + "loss": 0.5916, "step": 9901 }, { - "epoch": 0.7492716885475389, - "grad_norm": 1.9376341104507446, - "learning_rate": 2.9199187613328577e-06, - "loss": 0.6194, + "epoch": 0.6970784934882084, + "grad_norm": 2.176445484161377, + "learning_rate": 3.1075253923719416e-06, + "loss": 0.6666, "step": 9902 }, { - "epoch": 0.7493473572698724, - "grad_norm": 2.1779584884643555, - "learning_rate": 2.9182583901943925e-06, - "loss": 0.7618, + "epoch": 0.6971488912354805, + "grad_norm": 1.7917377948760986, + "learning_rate": 3.106198910918806e-06, + "loss": 0.6541, "step": 9903 }, { - "epoch": 0.7494230259922061, - "grad_norm": 2.131627321243286, - "learning_rate": 2.9165983999142577e-06, - "loss": 0.7612, + "epoch": 0.6972192889827525, + "grad_norm": 1.8982311487197876, + "learning_rate": 3.104872631907915e-06, + "loss": 0.6418, "step": 9904 }, { - "epoch": 0.7494986947145398, - "grad_norm": 2.43209171295166, - "learning_rate": 2.9149387905964096e-06, - "loss": 0.5466, + "epoch": 0.6972896867300247, + "grad_norm": 1.776572823524475, + "learning_rate": 3.1035465554082233e-06, + "loss": 0.5696, "step": 9905 }, { - "epoch": 0.7495743634368733, - "grad_norm": 2.0824105739593506, - "learning_rate": 2.9132795623447736e-06, - "loss": 0.8629, + "epoch": 0.6973600844772967, + "grad_norm": 1.4719231128692627, + "learning_rate": 3.1022206814886724e-06, + "loss": 0.6739, "step": 9906 }, { - "epoch": 0.749650032159207, - "grad_norm": 2.1721785068511963, - "learning_rate": 2.9116207152632575e-06, - "loss": 0.6502, + "epoch": 0.6974304822245688, + "grad_norm": 1.6634774208068848, + "learning_rate": 3.100895010218202e-06, + "loss": 0.6156, "step": 9907 }, { - "epoch": 0.7497257008815407, - "grad_norm": 1.9518764019012451, - "learning_rate": 2.909962249455746e-06, - "loss": 0.7207, + "epoch": 0.6975008799718408, + "grad_norm": 1.8805830478668213, + "learning_rate": 3.099569541665728e-06, + "loss": 0.6759, "step": 9908 }, { - "epoch": 0.7498013696038742, - "grad_norm": 3.3193180561065674, - "learning_rate": 2.908304165026094e-06, - "loss": 0.7037, + "epoch": 0.697571277719113, + "grad_norm": 1.9529987573623657, + "learning_rate": 3.098244275900168e-06, + "loss": 0.6522, "step": 9909 }, { - "epoch": 0.7498770383262079, - "grad_norm": 2.249847412109375, - "learning_rate": 2.906646462078139e-06, - "loss": 0.8591, + "epoch": 0.6976416754663851, + "grad_norm": 1.4345203638076782, + "learning_rate": 3.096919212990422e-06, + "loss": 0.6092, "step": 9910 }, { - "epoch": 0.7499527070485414, - "grad_norm": 1.7684543132781982, - "learning_rate": 2.904989140715691e-06, - "loss": 0.6102, + "epoch": 0.6977120732136571, + "grad_norm": 2.105231761932373, + "learning_rate": 3.095594353005382e-06, + "loss": 0.599, "step": 9911 }, { - "epoch": 0.7500283757708751, - "grad_norm": 2.1191229820251465, - "learning_rate": 2.9033322010425397e-06, - "loss": 0.6022, + "epoch": 0.6977824709609293, + "grad_norm": 1.7613534927368164, + "learning_rate": 3.0942696960139235e-06, + "loss": 0.8087, "step": 9912 }, { - "epoch": 0.7501040444932088, - "grad_norm": 2.044253349304199, - "learning_rate": 2.901675643162439e-06, - "loss": 0.6631, + "epoch": 0.6978528687082013, + "grad_norm": 1.977926254272461, + "learning_rate": 3.092945242084924e-06, + "loss": 0.6145, "step": 9913 }, { - "epoch": 0.7501797132155423, - "grad_norm": 2.230672597885132, - "learning_rate": 2.9000194671791366e-06, - "loss": 0.7228, + "epoch": 0.6979232664554734, + "grad_norm": 1.7748931646347046, + "learning_rate": 3.091620991287237e-06, + "loss": 0.6962, "step": 9914 }, { - "epoch": 0.750255381937876, - "grad_norm": 4.245325088500977, - "learning_rate": 2.898363673196348e-06, - "loss": 0.7393, + "epoch": 0.6979936642027456, + "grad_norm": 1.6308856010437012, + "learning_rate": 3.0902969436897177e-06, + "loss": 0.637, "step": 9915 }, { - "epoch": 0.7503310506602096, - "grad_norm": 2.3195321559906006, - "learning_rate": 2.896708261317758e-06, - "loss": 0.5678, + "epoch": 0.6980640619500176, + "grad_norm": 1.8035162687301636, + "learning_rate": 3.0889730993612e-06, + "loss": 0.6599, "step": 9916 }, { - "epoch": 0.7504067193825432, - "grad_norm": 2.411245346069336, - "learning_rate": 2.8950532316470373e-06, - "loss": 0.7304, + "epoch": 0.6981344596972897, + "grad_norm": 1.5533947944641113, + "learning_rate": 3.0876494583705137e-06, + "loss": 0.6893, "step": 9917 }, { - "epoch": 0.7504823881048769, - "grad_norm": 3.026913642883301, - "learning_rate": 2.893398584287826e-06, - "loss": 0.6865, + "epoch": 0.6982048574445617, + "grad_norm": 1.694244384765625, + "learning_rate": 3.0863260207864726e-06, + "loss": 0.6558, "step": 9918 }, { - "epoch": 0.7505580568272104, - "grad_norm": 1.9332554340362549, - "learning_rate": 2.8917443193437524e-06, - "loss": 0.6483, + "epoch": 0.6982752551918339, + "grad_norm": 2.0870532989501953, + "learning_rate": 3.0850027866778888e-06, + "loss": 0.6597, "step": 9919 }, { - "epoch": 0.7506337255495441, - "grad_norm": 1.6384657621383667, - "learning_rate": 2.890090436918403e-06, - "loss": 0.659, + "epoch": 0.698345652939106, + "grad_norm": 2.5110483169555664, + "learning_rate": 3.083679756113553e-06, + "loss": 0.6625, "step": 9920 }, { - "epoch": 0.7507093942718778, - "grad_norm": 2.61690354347229, - "learning_rate": 2.888436937115353e-06, - "loss": 0.6423, + "epoch": 0.698416050686378, + "grad_norm": 1.9592125415802002, + "learning_rate": 3.0823569291622558e-06, + "loss": 0.5697, "step": 9921 }, { - "epoch": 0.7507850629942113, - "grad_norm": 1.9042266607284546, - "learning_rate": 2.886783820038149e-06, - "loss": 0.5829, + "epoch": 0.6984864484336502, + "grad_norm": 1.6685484647750854, + "learning_rate": 3.08103430589277e-06, + "loss": 0.6306, "step": 9922 }, { - "epoch": 0.750860731716545, - "grad_norm": 2.1988930702209473, - "learning_rate": 2.885131085790314e-06, - "loss": 0.6954, + "epoch": 0.6985568461809222, + "grad_norm": 1.6407891511917114, + "learning_rate": 3.0797118863738605e-06, + "loss": 0.6233, "step": 9923 }, { - "epoch": 0.7509364004388785, - "grad_norm": 2.5003511905670166, - "learning_rate": 2.8834787344753483e-06, - "loss": 0.5322, + "epoch": 0.6986272439281943, + "grad_norm": 1.6002750396728516, + "learning_rate": 3.078389670674277e-06, + "loss": 0.6019, "step": 9924 }, { - "epoch": 0.7510120691612122, - "grad_norm": 2.1539666652679443, - "learning_rate": 2.8818267661967285e-06, - "loss": 0.7318, + "epoch": 0.6986976416754663, + "grad_norm": 1.6346689462661743, + "learning_rate": 3.077067658862769e-06, + "loss": 0.6031, "step": 9925 }, { - "epoch": 0.7510877378835459, - "grad_norm": 1.925028681755066, - "learning_rate": 2.8801751810579074e-06, - "loss": 0.6704, + "epoch": 0.6987680394227385, + "grad_norm": 1.77586829662323, + "learning_rate": 3.0757458510080634e-06, + "loss": 0.6305, "step": 9926 }, { - "epoch": 0.7511634066058794, - "grad_norm": 1.9884802103042603, - "learning_rate": 2.8785239791623075e-06, - "loss": 0.6807, + "epoch": 0.6988384371700106, + "grad_norm": 1.5849015712738037, + "learning_rate": 3.074424247178888e-06, + "loss": 0.6636, "step": 9927 }, { - "epoch": 0.7512390753282131, - "grad_norm": 2.154848575592041, - "learning_rate": 2.8768731606133323e-06, - "loss": 0.6473, + "epoch": 0.6989088349172826, + "grad_norm": 1.782927393913269, + "learning_rate": 3.073102847443951e-06, + "loss": 0.5862, "step": 9928 }, { - "epoch": 0.7513147440505467, - "grad_norm": 2.2960104942321777, - "learning_rate": 2.8752227255143707e-06, - "loss": 0.6503, + "epoch": 0.6989792326645548, + "grad_norm": 1.6202399730682373, + "learning_rate": 3.071781651871954e-06, + "loss": 0.5503, "step": 9929 }, { - "epoch": 0.7513904127728803, - "grad_norm": 2.1243772506713867, - "learning_rate": 2.873572673968768e-06, - "loss": 0.9263, + "epoch": 0.6990496304118268, + "grad_norm": 1.6907997131347656, + "learning_rate": 3.0704606605315827e-06, + "loss": 0.7258, "step": 9930 }, { - "epoch": 0.751466081495214, - "grad_norm": 4.902968406677246, - "learning_rate": 2.8719230060798606e-06, - "loss": 0.7779, + "epoch": 0.6991200281590989, + "grad_norm": 1.6329880952835083, + "learning_rate": 3.069139873491522e-06, + "loss": 0.6715, "step": 9931 }, { - "epoch": 0.7515417502175475, - "grad_norm": 2.171704053878784, - "learning_rate": 2.870273721950955e-06, - "loss": 0.6809, + "epoch": 0.699190425906371, + "grad_norm": 2.2886157035827637, + "learning_rate": 3.0678192908204403e-06, + "loss": 0.6835, "step": 9932 }, { - "epoch": 0.7516174189398812, - "grad_norm": 2.409769296646118, - "learning_rate": 2.868624821685335e-06, - "loss": 0.6696, + "epoch": 0.6992608236536431, + "grad_norm": 1.7134819030761719, + "learning_rate": 3.0664989125869956e-06, + "loss": 0.6838, "step": 9933 }, { - "epoch": 0.7516930876622149, - "grad_norm": 2.15291690826416, - "learning_rate": 2.8669763053862595e-06, - "loss": 0.6879, + "epoch": 0.6993312214009152, + "grad_norm": 2.6969239711761475, + "learning_rate": 3.0651787388598346e-06, + "loss": 0.5845, "step": 9934 }, { - "epoch": 0.7517687563845484, - "grad_norm": 2.0499536991119385, - "learning_rate": 2.8653281731569645e-06, - "loss": 0.5733, + "epoch": 0.6994016191481872, + "grad_norm": 1.7654513120651245, + "learning_rate": 3.063858769707593e-06, + "loss": 0.6514, "step": 9935 }, { - "epoch": 0.7518444251068821, - "grad_norm": 1.5915296077728271, - "learning_rate": 2.8636804251006612e-06, - "loss": 0.593, + "epoch": 0.6994720168954593, + "grad_norm": 1.5988155603408813, + "learning_rate": 3.0625390051989005e-06, + "loss": 0.7556, "step": 9936 }, { - "epoch": 0.7519200938292157, - "grad_norm": 2.1210434436798096, - "learning_rate": 2.862033061320541e-06, - "loss": 0.5076, + "epoch": 0.6995424146427315, + "grad_norm": 1.7948905229568481, + "learning_rate": 3.0612194454023683e-06, + "loss": 0.7142, "step": 9937 }, { - "epoch": 0.7519957625515493, - "grad_norm": 2.2604899406433105, - "learning_rate": 2.8603860819197558e-06, - "loss": 0.6894, + "epoch": 0.6996128123900035, + "grad_norm": 1.8342785835266113, + "learning_rate": 3.059900090386607e-06, + "loss": 0.5125, "step": 9938 }, { - "epoch": 0.752071431273883, - "grad_norm": 2.7228496074676514, - "learning_rate": 2.8587394870014557e-06, - "loss": 0.7777, + "epoch": 0.6996832101372756, + "grad_norm": 1.5799202919006348, + "learning_rate": 3.0585809402202084e-06, + "loss": 0.6409, "step": 9939 }, { - "epoch": 0.7521470999962165, - "grad_norm": 1.9101241827011108, - "learning_rate": 2.857093276668755e-06, - "loss": 0.6761, + "epoch": 0.6997536078845477, + "grad_norm": 2.061084747314453, + "learning_rate": 3.057261994971756e-06, + "loss": 0.6223, "step": 9940 }, { - "epoch": 0.7522227687185502, - "grad_norm": 1.973021388053894, - "learning_rate": 2.8554474510247377e-06, - "loss": 0.4929, + "epoch": 0.6998240056318198, + "grad_norm": 1.915801763534546, + "learning_rate": 3.055943254709821e-06, + "loss": 0.7688, "step": 9941 }, { - "epoch": 0.7522984374408838, - "grad_norm": 1.9737138748168945, - "learning_rate": 2.8538020101724762e-06, - "loss": 0.6213, + "epoch": 0.6998944033790919, + "grad_norm": 1.6499098539352417, + "learning_rate": 3.054624719502971e-06, + "loss": 0.6124, "step": 9942 }, { - "epoch": 0.7523741061632174, - "grad_norm": 2.246549606323242, - "learning_rate": 2.852156954215012e-06, - "loss": 0.7567, + "epoch": 0.699964801126364, + "grad_norm": 2.113302707672119, + "learning_rate": 3.053306389419752e-06, + "loss": 0.7435, "step": 9943 }, { - "epoch": 0.7524497748855511, - "grad_norm": 1.7363240718841553, - "learning_rate": 2.850512283255364e-06, - "loss": 0.7059, + "epoch": 0.7000351988736361, + "grad_norm": 1.918148159980774, + "learning_rate": 3.051988264528711e-06, + "loss": 0.6815, "step": 9944 }, { - "epoch": 0.7525254436078846, - "grad_norm": 2.246518135070801, - "learning_rate": 2.8488679973965264e-06, - "loss": 0.8108, + "epoch": 0.7001055966209081, + "grad_norm": 1.687516689300537, + "learning_rate": 3.0506703448983753e-06, + "loss": 0.6553, "step": 9945 }, { - "epoch": 0.7526011123302183, - "grad_norm": 1.421762466430664, - "learning_rate": 2.84722409674147e-06, - "loss": 0.7699, + "epoch": 0.7001759943681802, + "grad_norm": 1.7586661577224731, + "learning_rate": 3.0493526305972653e-06, + "loss": 0.6385, "step": 9946 }, { - "epoch": 0.752676781052552, - "grad_norm": 2.417525291442871, - "learning_rate": 2.8455805813931415e-06, - "loss": 0.6468, + "epoch": 0.7002463921154523, + "grad_norm": 1.7426607608795166, + "learning_rate": 3.048035121693888e-06, + "loss": 0.7736, "step": 9947 }, { - "epoch": 0.7527524497748855, - "grad_norm": 2.013603687286377, - "learning_rate": 2.8439374514544645e-06, - "loss": 0.7207, + "epoch": 0.7003167898627244, + "grad_norm": 1.9727818965911865, + "learning_rate": 3.0467178182567455e-06, + "loss": 0.563, "step": 9948 }, { - "epoch": 0.7528281184972192, - "grad_norm": 2.3147010803222656, - "learning_rate": 2.8422947070283305e-06, - "loss": 0.6962, + "epoch": 0.7003871876099965, + "grad_norm": 1.7521144151687622, + "learning_rate": 3.0454007203543217e-06, + "loss": 0.7281, "step": 9949 }, { - "epoch": 0.7529037872195528, - "grad_norm": 2.1023120880126953, - "learning_rate": 2.840652348217622e-06, - "loss": 0.7563, + "epoch": 0.7004575853572685, + "grad_norm": 1.9237799644470215, + "learning_rate": 3.044083828055098e-06, + "loss": 0.7325, "step": 9950 }, { - "epoch": 0.7529794559418864, - "grad_norm": 2.0633652210235596, - "learning_rate": 2.8390103751251867e-06, - "loss": 0.5911, + "epoch": 0.7005279831045407, + "grad_norm": 2.0014851093292236, + "learning_rate": 3.042767141427539e-06, + "loss": 0.7079, "step": 9951 }, { - "epoch": 0.7530551246642201, - "grad_norm": 2.3509156703948975, - "learning_rate": 2.8373687878538466e-06, - "loss": 0.7062, + "epoch": 0.7005983808518127, + "grad_norm": 1.8069252967834473, + "learning_rate": 3.0414506605401014e-06, + "loss": 0.6676, "step": 9952 }, { - "epoch": 0.7531307933865536, - "grad_norm": 2.152987480163574, - "learning_rate": 2.8357275865064056e-06, - "loss": 0.6786, + "epoch": 0.7006687785990848, + "grad_norm": 1.7079813480377197, + "learning_rate": 3.040134385461225e-06, + "loss": 0.7015, "step": 9953 }, { - "epoch": 0.7532064621088873, - "grad_norm": 2.2282655239105225, - "learning_rate": 2.834086771185641e-06, - "loss": 0.648, + "epoch": 0.700739176346357, + "grad_norm": 1.5666134357452393, + "learning_rate": 3.0388183162593513e-06, + "loss": 0.5908, "step": 9954 }, { - "epoch": 0.7532821308312209, - "grad_norm": 3.348574638366699, - "learning_rate": 2.8324463419943045e-06, - "loss": 0.6576, + "epoch": 0.700809574093629, + "grad_norm": 1.878873348236084, + "learning_rate": 3.0375024530028986e-06, + "loss": 0.7736, "step": 9955 }, { - "epoch": 0.7533577995535545, - "grad_norm": 1.9638891220092773, - "learning_rate": 2.8308062990351275e-06, - "loss": 0.6466, + "epoch": 0.7008799718409011, + "grad_norm": 2.369109869003296, + "learning_rate": 3.0361867957602835e-06, + "loss": 0.6442, "step": 9956 }, { - "epoch": 0.7534334682758882, - "grad_norm": 2.2350552082061768, - "learning_rate": 2.8291666424108125e-06, - "loss": 0.6518, + "epoch": 0.7009503695881731, + "grad_norm": 1.7840155363082886, + "learning_rate": 3.0348713445999075e-06, + "loss": 0.7165, "step": 9957 }, { - "epoch": 0.7535091369982218, - "grad_norm": 1.747753620147705, - "learning_rate": 2.827527372224046e-06, - "loss": 0.756, + "epoch": 0.7010207673354453, + "grad_norm": 1.6194804906845093, + "learning_rate": 3.0335560995901615e-06, + "loss": 0.769, "step": 9958 }, { - "epoch": 0.7535848057205554, - "grad_norm": 2.364982843399048, - "learning_rate": 2.8258884885774716e-06, - "loss": 0.8052, + "epoch": 0.7010911650827174, + "grad_norm": 2.1263160705566406, + "learning_rate": 3.032241060799423e-06, + "loss": 0.7061, "step": 9959 }, { - "epoch": 0.7536604744428891, - "grad_norm": 1.823944330215454, - "learning_rate": 2.8242499915737346e-06, - "loss": 0.5839, + "epoch": 0.7011615628299894, + "grad_norm": 1.7201207876205444, + "learning_rate": 3.030926228296068e-06, + "loss": 0.7545, "step": 9960 }, { - "epoch": 0.7537361431652226, - "grad_norm": 1.797965168952942, - "learning_rate": 2.822611881315437e-06, - "loss": 0.6997, + "epoch": 0.7012319605772616, + "grad_norm": 1.9961334466934204, + "learning_rate": 3.0296116021484503e-06, + "loss": 0.7484, "step": 9961 }, { - "epoch": 0.7538118118875563, - "grad_norm": 3.2685093879699707, - "learning_rate": 2.8209741579051656e-06, - "loss": 0.8114, + "epoch": 0.7013023583245336, + "grad_norm": 1.7290037870407104, + "learning_rate": 3.028297182424924e-06, + "loss": 0.6662, "step": 9962 }, { - "epoch": 0.7538874806098899, - "grad_norm": 2.2074337005615234, - "learning_rate": 2.8193368214454753e-06, - "loss": 0.6622, + "epoch": 0.7013727560718057, + "grad_norm": 1.839990258216858, + "learning_rate": 3.026982969193824e-06, + "loss": 0.7829, "step": 9963 }, { - "epoch": 0.7539631493322235, - "grad_norm": 3.020928382873535, - "learning_rate": 2.8176998720389014e-06, - "loss": 0.7776, + "epoch": 0.7014431538190777, + "grad_norm": 1.6594538688659668, + "learning_rate": 3.0256689625234773e-06, + "loss": 0.5201, "step": 9964 }, { - "epoch": 0.7540388180545572, - "grad_norm": 2.763895034790039, - "learning_rate": 2.816063309787964e-06, - "loss": 0.7328, + "epoch": 0.7015135515663499, + "grad_norm": 1.9447969198226929, + "learning_rate": 3.0243551624821985e-06, + "loss": 0.555, "step": 9965 }, { - "epoch": 0.7541144867768907, - "grad_norm": 2.075176477432251, - "learning_rate": 2.8144271347951395e-06, - "loss": 0.6773, + "epoch": 0.701583949313622, + "grad_norm": 1.9169667959213257, + "learning_rate": 3.023041569138298e-06, + "loss": 0.7933, "step": 9966 }, { - "epoch": 0.7541901554992244, - "grad_norm": 2.109731912612915, - "learning_rate": 2.8127913471628942e-06, - "loss": 0.5774, + "epoch": 0.701654347060894, + "grad_norm": 1.8591681718826294, + "learning_rate": 3.021728182560065e-06, + "loss": 0.7258, "step": 9967 }, { - "epoch": 0.754265824221558, - "grad_norm": 1.6521979570388794, - "learning_rate": 2.811155946993668e-06, - "loss": 0.7379, + "epoch": 0.7017247448081662, + "grad_norm": 1.6500874757766724, + "learning_rate": 3.02041500281579e-06, + "loss": 0.7696, "step": 9968 }, { - "epoch": 0.7543414929438916, - "grad_norm": 2.2940516471862793, - "learning_rate": 2.809520934389872e-06, - "loss": 0.611, + "epoch": 0.7017951425554382, + "grad_norm": 1.8397413492202759, + "learning_rate": 3.0191020299737424e-06, + "loss": 0.6896, "step": 9969 }, { - "epoch": 0.7544171616662253, - "grad_norm": 3.290804862976074, - "learning_rate": 2.8078863094538983e-06, - "loss": 0.678, + "epoch": 0.7018655403027103, + "grad_norm": 1.7320343255996704, + "learning_rate": 3.017789264102186e-06, + "loss": 0.5827, "step": 9970 }, { - "epoch": 0.7544928303885589, - "grad_norm": 1.9227198362350464, - "learning_rate": 2.8062520722881114e-06, - "loss": 0.659, + "epoch": 0.7019359380499824, + "grad_norm": 1.9542497396469116, + "learning_rate": 3.0164767052693695e-06, + "loss": 0.8304, "step": 9971 }, { - "epoch": 0.7545684991108925, - "grad_norm": 2.2773373126983643, - "learning_rate": 2.8046182229948555e-06, - "loss": 0.5751, + "epoch": 0.7020063357972545, + "grad_norm": 1.961185336112976, + "learning_rate": 3.01516435354354e-06, + "loss": 0.6433, "step": 9972 }, { - "epoch": 0.7546441678332262, - "grad_norm": 2.041769504547119, - "learning_rate": 2.802984761676443e-06, - "loss": 0.6616, + "epoch": 0.7020767335445266, + "grad_norm": 1.7294867038726807, + "learning_rate": 3.013852208992921e-06, + "loss": 0.7757, "step": 9973 }, { - "epoch": 0.7547198365555597, - "grad_norm": 2.143829107284546, - "learning_rate": 2.8013516884351637e-06, - "loss": 0.7292, + "epoch": 0.7021471312917986, + "grad_norm": 1.6946605443954468, + "learning_rate": 3.012540271685739e-06, + "loss": 0.6939, "step": 9974 }, { - "epoch": 0.7547955052778934, - "grad_norm": 1.9663937091827393, - "learning_rate": 2.7997190033732943e-06, - "loss": 0.5109, + "epoch": 0.7022175290390708, + "grad_norm": 1.840102195739746, + "learning_rate": 3.011228541690199e-06, + "loss": 0.7231, "step": 9975 }, { - "epoch": 0.754871174000227, - "grad_norm": 2.2332675457000732, - "learning_rate": 2.7980867065930774e-06, - "loss": 0.6587, + "epoch": 0.7022879267863429, + "grad_norm": 1.9415065050125122, + "learning_rate": 3.0099170190745e-06, + "loss": 0.6195, "step": 9976 }, { - "epoch": 0.7549468427225606, - "grad_norm": 2.470423460006714, - "learning_rate": 2.796454798196729e-06, - "loss": 0.5856, + "epoch": 0.7023583245336149, + "grad_norm": 1.5610259771347046, + "learning_rate": 3.0086057039068266e-06, + "loss": 0.5266, "step": 9977 }, { - "epoch": 0.7550225114448943, - "grad_norm": 2.361738681793213, - "learning_rate": 2.7948232782864444e-06, - "loss": 0.6109, + "epoch": 0.702428722280887, + "grad_norm": 1.7736183404922485, + "learning_rate": 3.007294596255359e-06, + "loss": 0.5888, "step": 9978 }, { - "epoch": 0.7550981801672279, - "grad_norm": 2.0614898204803467, - "learning_rate": 2.793192146964397e-06, - "loss": 0.5809, + "epoch": 0.7024991200281591, + "grad_norm": 1.7453997135162354, + "learning_rate": 3.0059836961882597e-06, + "loss": 0.6833, "step": 9979 }, { - "epoch": 0.7551738488895615, - "grad_norm": 2.270296096801758, - "learning_rate": 2.791561404332731e-06, - "loss": 0.6174, + "epoch": 0.7025695177754312, + "grad_norm": 1.823598027229309, + "learning_rate": 3.0046730037736877e-06, + "loss": 0.627, "step": 9980 }, { - "epoch": 0.7552495176118951, - "grad_norm": 7.146130561828613, - "learning_rate": 2.7899310504935724e-06, - "loss": 0.5779, + "epoch": 0.7026399155227033, + "grad_norm": 1.7351704835891724, + "learning_rate": 3.0033625190797816e-06, + "loss": 0.6981, "step": 9981 }, { - "epoch": 0.7553251863342287, - "grad_norm": 2.3047327995300293, - "learning_rate": 2.788301085549016e-06, - "loss": 0.7432, + "epoch": 0.7027103132699754, + "grad_norm": 2.019728899002075, + "learning_rate": 3.0020522421746804e-06, + "loss": 0.603, "step": 9982 }, { - "epoch": 0.7554008550565624, - "grad_norm": 2.4408271312713623, - "learning_rate": 2.78667150960114e-06, - "loss": 0.6569, + "epoch": 0.7027807110172475, + "grad_norm": 1.6089885234832764, + "learning_rate": 3.000742173126504e-06, + "loss": 0.6049, "step": 9983 }, { - "epoch": 0.755476523778896, - "grad_norm": 2.420197010040283, - "learning_rate": 2.785042322751987e-06, - "loss": 0.8204, + "epoch": 0.7028511087645195, + "grad_norm": 1.894290566444397, + "learning_rate": 2.999432312003361e-06, + "loss": 0.6769, "step": 9984 }, { - "epoch": 0.7555521925012296, - "grad_norm": 5.947091579437256, - "learning_rate": 2.7834135251035825e-06, - "loss": 0.7756, + "epoch": 0.7029215065117916, + "grad_norm": 1.6145617961883545, + "learning_rate": 2.9981226588733587e-06, + "loss": 0.6359, "step": 9985 }, { - "epoch": 0.7556278612235633, - "grad_norm": 2.492147445678711, - "learning_rate": 2.781785116757936e-06, - "loss": 0.6647, + "epoch": 0.7029919042590637, + "grad_norm": 1.755478024482727, + "learning_rate": 2.9968132138045803e-06, + "loss": 0.5813, "step": 9986 }, { - "epoch": 0.7557035299458968, - "grad_norm": 2.13313889503479, - "learning_rate": 2.780157097817015e-06, - "loss": 0.7473, + "epoch": 0.7030623020063358, + "grad_norm": 1.7813531160354614, + "learning_rate": 2.995503976865111e-06, + "loss": 0.7243, "step": 9987 }, { - "epoch": 0.7557791986682305, - "grad_norm": 7.684272289276123, - "learning_rate": 2.778529468382774e-06, - "loss": 0.6594, + "epoch": 0.7031326997536079, + "grad_norm": 1.5463329553604126, + "learning_rate": 2.9941949481230175e-06, + "loss": 0.5378, "step": 9988 }, { - "epoch": 0.7558548673905641, - "grad_norm": 1.934888482093811, - "learning_rate": 2.7769022285571394e-06, - "loss": 0.5884, + "epoch": 0.70320309750088, + "grad_norm": 1.689437985420227, + "learning_rate": 2.9928861276463555e-06, + "loss": 0.6743, "step": 9989 }, { - "epoch": 0.7559305361128977, - "grad_norm": 2.01039457321167, - "learning_rate": 2.7752753784420167e-06, - "loss": 0.6648, + "epoch": 0.7032734952481521, + "grad_norm": 1.5676058530807495, + "learning_rate": 2.9915775155031708e-06, + "loss": 0.6412, "step": 9990 }, { - "epoch": 0.7560062048352314, - "grad_norm": 2.073888063430786, - "learning_rate": 2.7736489181392825e-06, - "loss": 0.6572, + "epoch": 0.7033438929954241, + "grad_norm": 2.1062145233154297, + "learning_rate": 2.9902691117615036e-06, + "loss": 0.6198, "step": 9991 }, { - "epoch": 0.756081873557565, - "grad_norm": 1.679289698600769, - "learning_rate": 2.772022847750791e-06, - "loss": 0.8544, + "epoch": 0.7034142907426962, + "grad_norm": 2.0691866874694824, + "learning_rate": 2.9889609164893743e-06, + "loss": 0.7145, "step": 9992 }, { - "epoch": 0.7561575422798986, - "grad_norm": 1.9221335649490356, - "learning_rate": 2.7703971673783728e-06, - "loss": 0.7504, + "epoch": 0.7034846884899684, + "grad_norm": 1.8989055156707764, + "learning_rate": 2.987652929754802e-06, + "loss": 0.5851, "step": 9993 }, { - "epoch": 0.7562332110022322, - "grad_norm": 2.1600656509399414, - "learning_rate": 2.768771877123836e-06, - "loss": 0.7859, + "epoch": 0.7035550862372404, + "grad_norm": 1.9078233242034912, + "learning_rate": 2.986345151625788e-06, + "loss": 0.6593, "step": 9994 }, { - "epoch": 0.7563088797245658, - "grad_norm": 2.2807884216308594, - "learning_rate": 2.7671469770889522e-06, - "loss": 0.6178, + "epoch": 0.7036254839845125, + "grad_norm": 1.7003302574157715, + "learning_rate": 2.9850375821703243e-06, + "loss": 0.7031, "step": 9995 }, { - "epoch": 0.7563845484468995, - "grad_norm": 2.230642557144165, - "learning_rate": 2.765522467375487e-06, - "loss": 0.5125, + "epoch": 0.7036958817317845, + "grad_norm": 2.9607975482940674, + "learning_rate": 2.98373022145639e-06, + "loss": 0.6085, "step": 9996 }, { - "epoch": 0.7564602171692331, - "grad_norm": 1.8637601137161255, - "learning_rate": 2.7638983480851724e-06, - "loss": 0.6613, + "epoch": 0.7037662794790567, + "grad_norm": 1.9153721332550049, + "learning_rate": 2.982423069551962e-06, + "loss": 0.6626, "step": 9997 }, { - "epoch": 0.7565358858915667, - "grad_norm": 2.0826594829559326, - "learning_rate": 2.7622746193197115e-06, - "loss": 0.8624, + "epoch": 0.7038366772263288, + "grad_norm": 1.7958123683929443, + "learning_rate": 2.9811161265249944e-06, + "loss": 0.6967, "step": 9998 }, { - "epoch": 0.7566115546139004, - "grad_norm": 2.7239291667938232, - "learning_rate": 2.7606512811807885e-06, - "loss": 0.7223, + "epoch": 0.7039070749736008, + "grad_norm": 1.985589623451233, + "learning_rate": 2.9798093924434412e-06, + "loss": 0.6212, "step": 9999 }, { - "epoch": 0.756687223336234, - "grad_norm": 1.7906194925308228, - "learning_rate": 2.7590283337700626e-06, - "loss": 0.8105, + "epoch": 0.703977472720873, + "grad_norm": 1.648389220237732, + "learning_rate": 2.978502867375239e-06, + "loss": 0.6354, "step": 10000 }, { - "epoch": 0.7567628920585676, - "grad_norm": 2.736328125, - "learning_rate": 2.757405777189168e-06, - "loss": 0.8085, + "epoch": 0.704047870468145, + "grad_norm": 2.2563416957855225, + "learning_rate": 2.977196551388314e-06, + "loss": 0.7408, "step": 10001 }, { - "epoch": 0.7568385607809012, - "grad_norm": 3.0120749473571777, - "learning_rate": 2.7557836115397153e-06, - "loss": 0.5936, + "epoch": 0.7041182682154171, + "grad_norm": 2.125845193862915, + "learning_rate": 2.975890444550582e-06, + "loss": 0.5976, "step": 10002 }, { - "epoch": 0.7569142295032348, - "grad_norm": 2.22469425201416, - "learning_rate": 2.754161836923289e-06, - "loss": 0.4523, + "epoch": 0.7041886659626891, + "grad_norm": 1.8960342407226562, + "learning_rate": 2.974584546929953e-06, + "loss": 0.6833, "step": 10003 }, { - "epoch": 0.7569898982255685, - "grad_norm": 2.1429076194763184, - "learning_rate": 2.7525404534414494e-06, - "loss": 0.5562, + "epoch": 0.7042590637099613, + "grad_norm": 1.8986183404922485, + "learning_rate": 2.9732788585943157e-06, + "loss": 0.6496, "step": 10004 }, { - "epoch": 0.7570655669479021, - "grad_norm": 2.4758174419403076, - "learning_rate": 2.750919461195734e-06, - "loss": 0.6039, + "epoch": 0.7043294614572334, + "grad_norm": 1.5489532947540283, + "learning_rate": 2.9719733796115605e-06, + "loss": 0.725, "step": 10005 }, { - "epoch": 0.7571412356702357, - "grad_norm": 1.938726782798767, - "learning_rate": 2.749298860287653e-06, - "loss": 0.6375, + "epoch": 0.7043998592045054, + "grad_norm": 1.7399208545684814, + "learning_rate": 2.970668110049557e-06, + "loss": 0.6495, "step": 10006 }, { - "epoch": 0.7572169043925693, - "grad_norm": 2.25929594039917, - "learning_rate": 2.7476786508186953e-06, - "loss": 0.7259, + "epoch": 0.7044702569517776, + "grad_norm": 1.7441058158874512, + "learning_rate": 2.9693630499761674e-06, + "loss": 0.6594, "step": 10007 }, { - "epoch": 0.757292573114903, - "grad_norm": 1.9672093391418457, - "learning_rate": 2.7460588328903265e-06, - "loss": 0.6031, + "epoch": 0.7045406546990496, + "grad_norm": 1.9479979276657104, + "learning_rate": 2.9680581994592406e-06, + "loss": 0.6068, "step": 10008 }, { - "epoch": 0.7573682418372366, - "grad_norm": 2.4729254245758057, - "learning_rate": 2.7444394066039776e-06, - "loss": 0.6826, + "epoch": 0.7046110524463217, + "grad_norm": 1.6649194955825806, + "learning_rate": 2.9667535585666223e-06, + "loss": 0.7722, "step": 10009 }, { - "epoch": 0.7574439105595702, - "grad_norm": 2.6728367805480957, - "learning_rate": 2.742820372061063e-06, - "loss": 0.6732, + "epoch": 0.7046814501935939, + "grad_norm": 2.0316741466522217, + "learning_rate": 2.965449127366137e-06, + "loss": 0.7279, "step": 10010 }, { - "epoch": 0.7575195792819038, - "grad_norm": 2.688096523284912, - "learning_rate": 2.7412017293629802e-06, - "loss": 0.7156, + "epoch": 0.7047518479408659, + "grad_norm": 1.4823980331420898, + "learning_rate": 2.9641449059256074e-06, + "loss": 0.6653, "step": 10011 }, { - "epoch": 0.7575952480042375, - "grad_norm": 3.0258054733276367, - "learning_rate": 2.7395834786110872e-06, - "loss": 0.5589, + "epoch": 0.704822245688138, + "grad_norm": 1.6513859033584595, + "learning_rate": 2.9628408943128395e-06, + "loss": 0.5652, "step": 10012 }, { - "epoch": 0.757670916726571, - "grad_norm": 1.7880308628082275, - "learning_rate": 2.7379656199067244e-06, - "loss": 0.6092, + "epoch": 0.70489264343541, + "grad_norm": 1.825790286064148, + "learning_rate": 2.96153709259563e-06, + "loss": 0.5563, "step": 10013 }, { - "epoch": 0.7577465854489047, - "grad_norm": 2.20332407951355, - "learning_rate": 2.736348153351208e-06, - "loss": 0.9061, + "epoch": 0.7049630411826822, + "grad_norm": 2.1634082794189453, + "learning_rate": 2.9602335008417623e-06, + "loss": 0.6662, "step": 10014 }, { - "epoch": 0.7578222541712383, - "grad_norm": 1.8708069324493408, - "learning_rate": 2.73473107904583e-06, - "loss": 0.6117, + "epoch": 0.7050334389299543, + "grad_norm": 2.0479843616485596, + "learning_rate": 2.958930119119015e-06, + "loss": 0.6805, "step": 10015 }, { - "epoch": 0.7578979228935719, - "grad_norm": 2.2508816719055176, - "learning_rate": 2.7331143970918554e-06, - "loss": 0.8897, + "epoch": 0.7051038366772263, + "grad_norm": 1.7242577075958252, + "learning_rate": 2.957626947495149e-06, + "loss": 0.6657, "step": 10016 }, { - "epoch": 0.7579735916159056, - "grad_norm": 2.168473243713379, - "learning_rate": 2.7314981075905277e-06, - "loss": 0.6392, + "epoch": 0.7051742344244984, + "grad_norm": 1.8510100841522217, + "learning_rate": 2.956323986037921e-06, + "loss": 0.6794, "step": 10017 }, { - "epoch": 0.7580492603382392, - "grad_norm": 1.8958176374435425, - "learning_rate": 2.729882210643066e-06, - "loss": 0.631, + "epoch": 0.7052446321717705, + "grad_norm": 1.995133399963379, + "learning_rate": 2.9550212348150712e-06, + "loss": 0.6038, "step": 10018 }, { - "epoch": 0.7581249290605728, - "grad_norm": 2.8258678913116455, - "learning_rate": 2.7282667063506567e-06, - "loss": 0.6969, + "epoch": 0.7053150299190426, + "grad_norm": 1.90489661693573, + "learning_rate": 2.953718693894331e-06, + "loss": 0.6511, "step": 10019 }, { - "epoch": 0.7582005977829064, - "grad_norm": 1.941758155822754, - "learning_rate": 2.7266515948144726e-06, - "loss": 0.6123, + "epoch": 0.7053854276663146, + "grad_norm": 1.4566690921783447, + "learning_rate": 2.952416363343418e-06, + "loss": 0.6184, "step": 10020 }, { - "epoch": 0.75827626650524, - "grad_norm": 2.0148000717163086, - "learning_rate": 2.7250368761356524e-06, - "loss": 0.7288, + "epoch": 0.7054558254135868, + "grad_norm": 1.8268593549728394, + "learning_rate": 2.951114243230046e-06, + "loss": 0.5912, "step": 10021 }, { - "epoch": 0.7583519352275737, - "grad_norm": 2.3907501697540283, - "learning_rate": 2.723422550415325e-06, - "loss": 0.7616, + "epoch": 0.7055262231608589, + "grad_norm": 1.544240117073059, + "learning_rate": 2.9498123336219097e-06, + "loss": 0.6227, "step": 10022 }, { - "epoch": 0.7584276039499073, - "grad_norm": 2.1037983894348145, - "learning_rate": 2.7218086177545744e-06, - "loss": 0.6279, + "epoch": 0.7055966209081309, + "grad_norm": 1.6924477815628052, + "learning_rate": 2.9485106345867e-06, + "loss": 0.6735, "step": 10023 }, { - "epoch": 0.7585032726722409, - "grad_norm": 1.8898531198501587, - "learning_rate": 2.7201950782544758e-06, - "loss": 0.6425, + "epoch": 0.705667018655403, + "grad_norm": 1.7307243347167969, + "learning_rate": 2.9472091461920913e-06, + "loss": 0.5756, "step": 10024 }, { - "epoch": 0.7585789413945746, - "grad_norm": 2.113624334335327, - "learning_rate": 2.7185819320160714e-06, - "loss": 0.6748, + "epoch": 0.7057374164026751, + "grad_norm": 1.7634615898132324, + "learning_rate": 2.945907868505751e-06, + "loss": 0.7331, "step": 10025 }, { - "epoch": 0.7586546101169082, - "grad_norm": 2.033940553665161, - "learning_rate": 2.7169691791403844e-06, - "loss": 0.5771, + "epoch": 0.7058078141499472, + "grad_norm": 1.8963921070098877, + "learning_rate": 2.9446068015953284e-06, + "loss": 0.5627, "step": 10026 }, { - "epoch": 0.7587302788392418, - "grad_norm": 2.1298940181732178, - "learning_rate": 2.715356819728408e-06, - "loss": 0.7522, + "epoch": 0.7058782118972193, + "grad_norm": 1.8217390775680542, + "learning_rate": 2.9433059455284737e-06, + "loss": 0.7204, "step": 10027 }, { - "epoch": 0.7588059475615754, - "grad_norm": 2.124619960784912, - "learning_rate": 2.7137448538811158e-06, - "loss": 0.6404, + "epoch": 0.7059486096444914, + "grad_norm": 1.8642678260803223, + "learning_rate": 2.9420053003728145e-06, + "loss": 0.726, "step": 10028 }, { - "epoch": 0.758881616283909, - "grad_norm": 1.9840469360351562, - "learning_rate": 2.712133281699454e-06, - "loss": 0.5566, + "epoch": 0.7060190073917635, + "grad_norm": 2.07973051071167, + "learning_rate": 2.9407048661959773e-06, + "loss": 0.6786, "step": 10029 }, { - "epoch": 0.7589572850062427, - "grad_norm": 2.5573880672454834, - "learning_rate": 2.710522103284342e-06, - "loss": 0.7079, + "epoch": 0.7060894051390355, + "grad_norm": 2.2167539596557617, + "learning_rate": 2.9394046430655703e-06, + "loss": 0.6252, "step": 10030 }, { - "epoch": 0.7590329537285763, - "grad_norm": 2.176572561264038, - "learning_rate": 2.7089113187366758e-06, - "loss": 0.5414, + "epoch": 0.7061598028863076, + "grad_norm": 1.703397512435913, + "learning_rate": 2.938104631049194e-06, + "loss": 0.5628, "step": 10031 }, { - "epoch": 0.7591086224509099, - "grad_norm": 2.228116273880005, - "learning_rate": 2.7073009281573362e-06, - "loss": 0.7058, + "epoch": 0.7062302006335798, + "grad_norm": 2.9558167457580566, + "learning_rate": 2.9368048302144335e-06, + "loss": 0.7147, "step": 10032 }, { - "epoch": 0.7591842911732435, - "grad_norm": 2.373213052749634, - "learning_rate": 2.705690931647162e-06, - "loss": 0.7606, + "epoch": 0.7063005983808518, + "grad_norm": 1.7365111112594604, + "learning_rate": 2.9355052406288695e-06, + "loss": 0.6775, "step": 10033 }, { - "epoch": 0.7592599598955772, - "grad_norm": 1.8033760786056519, - "learning_rate": 2.704081329306981e-06, - "loss": 0.716, + "epoch": 0.7063709961281239, + "grad_norm": 1.7735717296600342, + "learning_rate": 2.934205862360072e-06, + "loss": 0.7053, "step": 10034 }, { - "epoch": 0.7593356286179108, - "grad_norm": 2.362074375152588, - "learning_rate": 2.70247212123759e-06, - "loss": 0.7064, + "epoch": 0.706441393875396, + "grad_norm": 1.928139090538025, + "learning_rate": 2.932906695475594e-06, + "loss": 0.7563, "step": 10035 }, { - "epoch": 0.7594112973402444, - "grad_norm": 2.1984481811523438, - "learning_rate": 2.700863307539763e-06, - "loss": 0.7345, + "epoch": 0.7065117916226681, + "grad_norm": 1.7576345205307007, + "learning_rate": 2.9316077400429807e-06, + "loss": 0.7335, "step": 10036 }, { - "epoch": 0.759486966062578, - "grad_norm": 3.2609875202178955, - "learning_rate": 2.699254888314251e-06, - "loss": 0.7909, + "epoch": 0.7065821893699402, + "grad_norm": 1.7541346549987793, + "learning_rate": 2.9303089961297626e-06, + "loss": 0.6386, "step": 10037 }, { - "epoch": 0.7595626347849117, - "grad_norm": 1.9195688962936401, - "learning_rate": 2.697646863661776e-06, - "loss": 0.6074, + "epoch": 0.7066525871172122, + "grad_norm": 1.8031818866729736, + "learning_rate": 2.9290104638034677e-06, + "loss": 0.6517, "step": 10038 }, { - "epoch": 0.7596383035072453, - "grad_norm": 2.3153951168060303, - "learning_rate": 2.6960392336830385e-06, - "loss": 0.7519, + "epoch": 0.7067229848644844, + "grad_norm": 2.138974189758301, + "learning_rate": 2.927712143131603e-06, + "loss": 0.6623, "step": 10039 }, { - "epoch": 0.7597139722295789, - "grad_norm": 2.753959894180298, - "learning_rate": 2.6944319984787166e-06, - "loss": 0.6649, + "epoch": 0.7067933826117564, + "grad_norm": 1.6375983953475952, + "learning_rate": 2.9264140341816754e-06, + "loss": 0.5633, "step": 10040 }, { - "epoch": 0.7597896409519125, - "grad_norm": 2.4450736045837402, - "learning_rate": 2.692825158149452e-06, - "loss": 0.7087, + "epoch": 0.7068637803590285, + "grad_norm": 2.053149461746216, + "learning_rate": 2.9251161370211705e-06, + "loss": 0.7817, "step": 10041 }, { - "epoch": 0.7598653096742461, - "grad_norm": 1.9317896366119385, - "learning_rate": 2.691218712795879e-06, - "loss": 0.5861, + "epoch": 0.7069341781063005, + "grad_norm": 1.8916683197021484, + "learning_rate": 2.923818451717569e-06, + "loss": 0.6106, "step": 10042 }, { - "epoch": 0.7599409783965798, - "grad_norm": 2.266354560852051, - "learning_rate": 2.689612662518598e-06, - "loss": 0.6434, + "epoch": 0.7070045758535727, + "grad_norm": 1.6641654968261719, + "learning_rate": 2.922520978338335e-06, + "loss": 0.7052, "step": 10043 }, { - "epoch": 0.7600166471189134, - "grad_norm": 1.8731240034103394, - "learning_rate": 2.6880070074181794e-06, - "loss": 0.5763, + "epoch": 0.7070749736008448, + "grad_norm": 2.1250782012939453, + "learning_rate": 2.9212237169509306e-06, + "loss": 0.6432, "step": 10044 }, { - "epoch": 0.760092315841247, - "grad_norm": 2.1654038429260254, - "learning_rate": 2.6864017475951778e-06, - "loss": 0.5699, + "epoch": 0.7071453713481168, + "grad_norm": 1.7730687856674194, + "learning_rate": 2.9199266676227963e-06, + "loss": 0.6636, "step": 10045 }, { - "epoch": 0.7601679845635806, - "grad_norm": 2.76196026802063, - "learning_rate": 2.6847968831501187e-06, - "loss": 0.6435, + "epoch": 0.707215769095389, + "grad_norm": 1.9411559104919434, + "learning_rate": 2.918629830421372e-06, + "loss": 0.6501, "step": 10046 }, { - "epoch": 0.7602436532859143, - "grad_norm": 2.265397548675537, - "learning_rate": 2.6831924141835052e-06, - "loss": 0.6767, + "epoch": 0.707286166842661, + "grad_norm": 1.634627103805542, + "learning_rate": 2.91733320541408e-06, + "loss": 0.6396, "step": 10047 }, { - "epoch": 0.7603193220082479, - "grad_norm": 1.6206222772598267, - "learning_rate": 2.6815883407958136e-06, - "loss": 0.6914, + "epoch": 0.7073565645899331, + "grad_norm": 2.273071050643921, + "learning_rate": 2.916036792668332e-06, + "loss": 0.6539, "step": 10048 }, { - "epoch": 0.7603949907305815, - "grad_norm": 2.7728967666625977, - "learning_rate": 2.6799846630874965e-06, - "loss": 0.7587, + "epoch": 0.7074269623372053, + "grad_norm": 1.532799482345581, + "learning_rate": 2.9147405922515265e-06, + "loss": 0.6132, "step": 10049 }, { - "epoch": 0.7604706594529151, - "grad_norm": 2.363548517227173, - "learning_rate": 2.678381381158981e-06, - "loss": 0.7465, + "epoch": 0.7074973600844773, + "grad_norm": 1.8561948537826538, + "learning_rate": 2.91344460423106e-06, + "loss": 0.5857, "step": 10050 }, { - "epoch": 0.7605463281752488, - "grad_norm": 1.4986516237258911, - "learning_rate": 2.67677849511067e-06, - "loss": 0.5781, + "epoch": 0.7075677578317494, + "grad_norm": 1.6562203168869019, + "learning_rate": 2.912148828674308e-06, + "loss": 0.5901, "step": 10051 }, { - "epoch": 0.7606219968975824, - "grad_norm": 2.2090630531311035, - "learning_rate": 2.6751760050429415e-06, - "loss": 0.7364, + "epoch": 0.7076381555790214, + "grad_norm": 1.8164520263671875, + "learning_rate": 2.9108532656486427e-06, + "loss": 0.6051, "step": 10052 }, { - "epoch": 0.760697665619916, - "grad_norm": 2.0763497352600098, - "learning_rate": 2.673573911056148e-06, - "loss": 0.9323, + "epoch": 0.7077085533262936, + "grad_norm": 2.0443215370178223, + "learning_rate": 2.9095579152214186e-06, + "loss": 0.6902, "step": 10053 }, { - "epoch": 0.7607733343422496, - "grad_norm": 1.6616204977035522, - "learning_rate": 2.6719722132506225e-06, - "loss": 0.6069, + "epoch": 0.7077789510735657, + "grad_norm": 1.7646472454071045, + "learning_rate": 2.9082627774599835e-06, + "loss": 0.5907, "step": 10054 }, { - "epoch": 0.7608490030645833, - "grad_norm": 2.090778112411499, - "learning_rate": 2.67037091172666e-06, - "loss": 0.676, + "epoch": 0.7078493488208377, + "grad_norm": 1.7659674882888794, + "learning_rate": 2.90696785243167e-06, + "loss": 0.7406, "step": 10055 }, { - "epoch": 0.7609246717869169, - "grad_norm": 1.8353502750396729, - "learning_rate": 2.6687700065845417e-06, - "loss": 0.4939, + "epoch": 0.7079197465681099, + "grad_norm": 1.6204605102539062, + "learning_rate": 2.905673140203808e-06, + "loss": 0.6348, "step": 10056 }, { - "epoch": 0.7610003405092505, - "grad_norm": 1.8233598470687866, - "learning_rate": 2.667169497924528e-06, - "loss": 0.5752, + "epoch": 0.7079901443153819, + "grad_norm": 1.8817546367645264, + "learning_rate": 2.9043786408437042e-06, + "loss": 0.6891, "step": 10057 }, { - "epoch": 0.7610760092315841, - "grad_norm": 1.976509928703308, - "learning_rate": 2.6655693858468413e-06, - "loss": 0.6282, + "epoch": 0.708060542062654, + "grad_norm": 1.962913990020752, + "learning_rate": 2.903084354418666e-06, + "loss": 0.7941, "step": 10058 }, { - "epoch": 0.7611516779539177, - "grad_norm": 3.537311553955078, - "learning_rate": 2.6639696704516876e-06, - "loss": 0.7171, + "epoch": 0.708130939809926, + "grad_norm": 1.8554470539093018, + "learning_rate": 2.901790280995983e-06, + "loss": 0.706, "step": 10059 }, { - "epoch": 0.7612273466762514, - "grad_norm": 2.408458709716797, - "learning_rate": 2.6623703518392456e-06, - "loss": 0.733, + "epoch": 0.7082013375571982, + "grad_norm": 1.8062853813171387, + "learning_rate": 2.9004964206429354e-06, + "loss": 0.6888, "step": 10060 }, { - "epoch": 0.761303015398585, - "grad_norm": 1.9443494081497192, - "learning_rate": 2.6607714301096737e-06, - "loss": 0.6078, + "epoch": 0.7082717353044703, + "grad_norm": 1.639888048171997, + "learning_rate": 2.8992027734267885e-06, + "loss": 0.5485, "step": 10061 }, { - "epoch": 0.7613786841209186, - "grad_norm": 2.5981321334838867, - "learning_rate": 2.659172905363094e-06, - "loss": 0.7041, + "epoch": 0.7083421330517423, + "grad_norm": 1.9975818395614624, + "learning_rate": 2.897909339414806e-06, + "loss": 0.6366, "step": 10062 }, { - "epoch": 0.7614543528432522, - "grad_norm": 2.7510721683502197, - "learning_rate": 2.657574777699617e-06, - "loss": 0.7332, + "epoch": 0.7084125307990145, + "grad_norm": 2.1892645359039307, + "learning_rate": 2.8966161186742295e-06, + "loss": 0.6088, "step": 10063 }, { - "epoch": 0.7615300215655859, - "grad_norm": 2.1146605014801025, - "learning_rate": 2.6559770472193217e-06, - "loss": 0.6687, + "epoch": 0.7084829285462865, + "grad_norm": 2.4466724395751953, + "learning_rate": 2.8953231112723e-06, + "loss": 0.7141, "step": 10064 }, { - "epoch": 0.7616056902879195, - "grad_norm": 2.1599109172821045, - "learning_rate": 2.654379714022266e-06, - "loss": 0.6745, + "epoch": 0.7085533262935586, + "grad_norm": 2.4621660709381104, + "learning_rate": 2.8940303172762394e-06, + "loss": 0.6163, "step": 10065 }, { - "epoch": 0.7616813590102531, - "grad_norm": 2.144578695297241, - "learning_rate": 2.6527827782084733e-06, - "loss": 0.6877, + "epoch": 0.7086237240408307, + "grad_norm": 1.685118317604065, + "learning_rate": 2.8927377367532604e-06, + "loss": 0.6521, "step": 10066 }, { - "epoch": 0.7617570277325867, - "grad_norm": 3.8134841918945312, - "learning_rate": 2.6511862398779495e-06, - "loss": 0.6743, + "epoch": 0.7086941217881028, + "grad_norm": 3.233722686767578, + "learning_rate": 2.891445369770564e-06, + "loss": 0.6756, "step": 10067 }, { - "epoch": 0.7618326964549204, - "grad_norm": 1.7415173053741455, - "learning_rate": 2.6495900991306847e-06, - "loss": 0.6796, + "epoch": 0.7087645195353749, + "grad_norm": 3.4760682582855225, + "learning_rate": 2.8901532163953467e-06, + "loss": 0.8055, "step": 10068 }, { - "epoch": 0.761908365177254, - "grad_norm": 2.2260866165161133, - "learning_rate": 2.647994356066624e-06, - "loss": 0.7098, + "epoch": 0.7088349172826469, + "grad_norm": 2.1165568828582764, + "learning_rate": 2.8888612766947836e-06, + "loss": 0.6775, "step": 10069 }, { - "epoch": 0.7619840338995876, - "grad_norm": 2.0269641876220703, - "learning_rate": 2.6463990107857016e-06, - "loss": 0.7222, + "epoch": 0.708905315029919, + "grad_norm": 2.0811569690704346, + "learning_rate": 2.887569550736048e-06, + "loss": 0.6411, "step": 10070 }, { - "epoch": 0.7620597026219212, - "grad_norm": 2.0193490982055664, - "learning_rate": 2.6448040633878226e-06, - "loss": 0.6655, + "epoch": 0.7089757127771912, + "grad_norm": 1.802912950515747, + "learning_rate": 2.886278038586296e-06, + "loss": 0.6361, "step": 10071 }, { - "epoch": 0.7621353713442548, - "grad_norm": 2.8648266792297363, - "learning_rate": 2.6432095139728695e-06, - "loss": 0.6558, + "epoch": 0.7090461105244632, + "grad_norm": 1.9324332475662231, + "learning_rate": 2.884986740312674e-06, + "loss": 0.7376, "step": 10072 }, { - "epoch": 0.7622110400665885, - "grad_norm": 2.253610610961914, - "learning_rate": 2.641615362640696e-06, - "loss": 0.5851, + "epoch": 0.7091165082717353, + "grad_norm": 1.5957167148590088, + "learning_rate": 2.883695655982316e-06, + "loss": 0.7293, "step": 10073 }, { - "epoch": 0.7622867087889221, - "grad_norm": 2.2244420051574707, - "learning_rate": 2.6400216094911348e-06, - "loss": 0.6313, + "epoch": 0.7091869060190074, + "grad_norm": 1.6647099256515503, + "learning_rate": 2.8824047856623506e-06, + "loss": 0.6568, "step": 10074 }, { - "epoch": 0.7623623775112557, - "grad_norm": 2.7857227325439453, - "learning_rate": 2.638428254623993e-06, - "loss": 0.7529, + "epoch": 0.7092573037662795, + "grad_norm": 1.778939127922058, + "learning_rate": 2.8811141294198884e-06, + "loss": 0.6771, "step": 10075 }, { - "epoch": 0.7624380462335894, - "grad_norm": 1.7802857160568237, - "learning_rate": 2.636835298139048e-06, - "loss": 0.616, + "epoch": 0.7093277015135515, + "grad_norm": 1.8343428373336792, + "learning_rate": 2.8798236873220343e-06, + "loss": 0.7314, "step": 10076 }, { - "epoch": 0.762513714955923, - "grad_norm": 2.1480860710144043, - "learning_rate": 2.635242740136054e-06, - "loss": 0.6676, + "epoch": 0.7093980992608236, + "grad_norm": 1.8079493045806885, + "learning_rate": 2.8785334594358785e-06, + "loss": 0.639, "step": 10077 }, { - "epoch": 0.7625893836782566, - "grad_norm": 1.7942516803741455, - "learning_rate": 2.6336505807147486e-06, - "loss": 0.6174, + "epoch": 0.7094684970080958, + "grad_norm": 2.220045328140259, + "learning_rate": 2.8772434458285e-06, + "loss": 0.7172, "step": 10078 }, { - "epoch": 0.7626650524005902, - "grad_norm": 1.9763191938400269, - "learning_rate": 2.6320588199748383e-06, - "loss": 0.7353, + "epoch": 0.7095388947553678, + "grad_norm": 2.0146853923797607, + "learning_rate": 2.8759536465669677e-06, + "loss": 0.706, "step": 10079 }, { - "epoch": 0.7627407211229238, - "grad_norm": 1.8652597665786743, - "learning_rate": 2.6304674580159983e-06, - "loss": 0.7269, + "epoch": 0.7096092925026399, + "grad_norm": 1.6228104829788208, + "learning_rate": 2.8746640617183413e-06, + "loss": 0.6793, "step": 10080 }, { - "epoch": 0.7628163898452575, - "grad_norm": 1.8752639293670654, - "learning_rate": 2.628876494937888e-06, - "loss": 0.6001, + "epoch": 0.709679690249912, + "grad_norm": 1.6667505502700806, + "learning_rate": 2.8733746913496646e-06, + "loss": 0.5433, "step": 10081 }, { - "epoch": 0.7628920585675911, - "grad_norm": 1.8885382413864136, - "learning_rate": 2.6272859308401375e-06, - "loss": 0.6663, + "epoch": 0.7097500879971841, + "grad_norm": 1.8715029954910278, + "learning_rate": 2.8720855355279744e-06, + "loss": 0.5804, "step": 10082 }, { - "epoch": 0.7629677272899247, - "grad_norm": 2.4886248111724854, - "learning_rate": 2.6256957658223537e-06, - "loss": 0.7086, + "epoch": 0.7098204857444562, + "grad_norm": 1.6813031435012817, + "learning_rate": 2.870796594320299e-06, + "loss": 0.5781, "step": 10083 }, { - "epoch": 0.7630433960122583, - "grad_norm": 2.2221665382385254, - "learning_rate": 2.6241059999841183e-06, - "loss": 0.6899, + "epoch": 0.7098908834917282, + "grad_norm": 1.8256782293319702, + "learning_rate": 2.8695078677936475e-06, + "loss": 0.6511, "step": 10084 }, { - "epoch": 0.7631190647345919, - "grad_norm": 1.9292700290679932, - "learning_rate": 2.6225166334249877e-06, - "loss": 0.8171, + "epoch": 0.7099612812390004, + "grad_norm": 2.199021577835083, + "learning_rate": 2.8682193560150232e-06, + "loss": 0.8136, "step": 10085 }, { - "epoch": 0.7631947334569256, - "grad_norm": 2.5671818256378174, - "learning_rate": 2.620927666244496e-06, - "loss": 0.6218, + "epoch": 0.7100316789862724, + "grad_norm": 1.7925736904144287, + "learning_rate": 2.8669310590514146e-06, + "loss": 0.7102, "step": 10086 }, { - "epoch": 0.7632704021792592, - "grad_norm": 2.0714309215545654, - "learning_rate": 2.6193390985421403e-06, - "loss": 0.6615, + "epoch": 0.7101020767335445, + "grad_norm": 1.9456300735473633, + "learning_rate": 2.8656429769698055e-06, + "loss": 0.6162, "step": 10087 }, { - "epoch": 0.7633460709015928, - "grad_norm": 1.8017868995666504, - "learning_rate": 2.6177509304174105e-06, - "loss": 0.6723, + "epoch": 0.7101724744808167, + "grad_norm": 1.9773117303848267, + "learning_rate": 2.8643551098371592e-06, + "loss": 0.7027, "step": 10088 }, { - "epoch": 0.7634217396239265, - "grad_norm": 1.793261170387268, - "learning_rate": 2.616163161969762e-06, - "loss": 0.5502, + "epoch": 0.7102428722280887, + "grad_norm": 1.629971981048584, + "learning_rate": 2.863067457720439e-06, + "loss": 0.5062, "step": 10089 }, { - "epoch": 0.7634974083462601, - "grad_norm": 2.2344236373901367, - "learning_rate": 2.614575793298622e-06, - "loss": 0.6795, + "epoch": 0.7103132699753608, + "grad_norm": 1.8264412879943848, + "learning_rate": 2.861780020686588e-06, + "loss": 0.598, "step": 10090 }, { - "epoch": 0.7635730770685937, - "grad_norm": 1.8927500247955322, - "learning_rate": 2.612988824503399e-06, - "loss": 0.8756, + "epoch": 0.7103836677226328, + "grad_norm": 2.549961805343628, + "learning_rate": 2.8604927988025424e-06, + "loss": 0.6864, "step": 10091 }, { - "epoch": 0.7636487457909273, - "grad_norm": 3.597015142440796, - "learning_rate": 2.6114022556834717e-06, - "loss": 0.6986, + "epoch": 0.710454065469905, + "grad_norm": 2.1344809532165527, + "learning_rate": 2.859205792135222e-06, + "loss": 0.6593, "step": 10092 }, { - "epoch": 0.7637244145132609, - "grad_norm": 2.1370482444763184, - "learning_rate": 2.6098160869382026e-06, - "loss": 0.6671, + "epoch": 0.7105244632171771, + "grad_norm": 1.7912046909332275, + "learning_rate": 2.8579190007515453e-06, + "loss": 0.8025, "step": 10093 }, { - "epoch": 0.7638000832355946, - "grad_norm": 2.2916955947875977, - "learning_rate": 2.6082303183669164e-06, - "loss": 0.5868, + "epoch": 0.7105948609644491, + "grad_norm": 1.9107897281646729, + "learning_rate": 2.8566324247184086e-06, + "loss": 0.6739, "step": 10094 }, { - "epoch": 0.7638757519579282, - "grad_norm": 1.7020882368087769, - "learning_rate": 2.606644950068921e-06, - "loss": 0.5157, + "epoch": 0.7106652587117213, + "grad_norm": 1.7673077583312988, + "learning_rate": 2.855346064102707e-06, + "loss": 0.6631, "step": 10095 }, { - "epoch": 0.7639514206802618, - "grad_norm": 1.9924485683441162, - "learning_rate": 2.6050599821434985e-06, - "loss": 0.6293, + "epoch": 0.7107356564589933, + "grad_norm": 1.9438629150390625, + "learning_rate": 2.8540599189713177e-06, + "loss": 0.8047, "step": 10096 }, { - "epoch": 0.7640270894025954, - "grad_norm": 2.115015983581543, - "learning_rate": 2.603475414689905e-06, - "loss": 0.7781, + "epoch": 0.7108060542062654, + "grad_norm": 1.7098110914230347, + "learning_rate": 2.8527739893911073e-06, + "loss": 0.6286, "step": 10097 }, { - "epoch": 0.7641027581249291, - "grad_norm": 2.8572585582733154, - "learning_rate": 2.6018912478073657e-06, - "loss": 0.5423, + "epoch": 0.7108764519535374, + "grad_norm": 1.919644832611084, + "learning_rate": 2.85148827542893e-06, + "loss": 0.7395, "step": 10098 }, { - "epoch": 0.7641784268472627, - "grad_norm": 1.8709588050842285, - "learning_rate": 2.600307481595092e-06, - "loss": 0.7225, + "epoch": 0.7109468497008096, + "grad_norm": 1.949416160583496, + "learning_rate": 2.8502027771516376e-06, + "loss": 0.6271, "step": 10099 }, { - "epoch": 0.7642540955695963, - "grad_norm": 2.634243965148926, - "learning_rate": 2.5987241161522665e-06, - "loss": 0.837, + "epoch": 0.7110172474480817, + "grad_norm": 1.812337875366211, + "learning_rate": 2.8489174946260587e-06, + "loss": 0.6271, "step": 10100 }, { - "epoch": 0.7643297642919299, - "grad_norm": 1.8500239849090576, - "learning_rate": 2.597141151578038e-06, - "loss": 0.6693, + "epoch": 0.7110876451953537, + "grad_norm": 1.6099995374679565, + "learning_rate": 2.8476324279190215e-06, + "loss": 0.6552, "step": 10101 }, { - "epoch": 0.7644054330142636, - "grad_norm": 1.8766469955444336, - "learning_rate": 2.5955585879715396e-06, - "loss": 0.6458, + "epoch": 0.7111580429426259, + "grad_norm": 1.9353443384170532, + "learning_rate": 2.846347577097335e-06, + "loss": 0.6189, "step": 10102 }, { - "epoch": 0.7644811017365972, - "grad_norm": 2.2552802562713623, - "learning_rate": 2.5939764254318767e-06, - "loss": 0.7068, + "epoch": 0.7112284406898979, + "grad_norm": 2.3352084159851074, + "learning_rate": 2.8450629422277986e-06, + "loss": 0.7473, "step": 10103 }, { - "epoch": 0.7645567704589308, - "grad_norm": 2.3717575073242188, - "learning_rate": 2.5923946640581307e-06, - "loss": 0.685, + "epoch": 0.71129883843717, + "grad_norm": 2.060573101043701, + "learning_rate": 2.8437785233772015e-06, + "loss": 0.7014, "step": 10104 }, { - "epoch": 0.7646324391812644, - "grad_norm": 2.0326504707336426, - "learning_rate": 2.590813303949355e-06, - "loss": 0.8291, + "epoch": 0.7113692361844421, + "grad_norm": 1.7785701751708984, + "learning_rate": 2.842494320612325e-06, + "loss": 0.6469, "step": 10105 }, { - "epoch": 0.764708107903598, - "grad_norm": 2.752199172973633, - "learning_rate": 2.5892323452045797e-06, - "loss": 0.7143, + "epoch": 0.7114396339317142, + "grad_norm": 2.296353578567505, + "learning_rate": 2.8412103339999314e-06, + "loss": 0.686, "step": 10106 }, { - "epoch": 0.7647837766259317, - "grad_norm": 2.53975510597229, - "learning_rate": 2.5876517879228106e-06, - "loss": 0.8028, + "epoch": 0.7115100316789863, + "grad_norm": 2.0027904510498047, + "learning_rate": 2.839926563606782e-06, + "loss": 0.6124, "step": 10107 }, { - "epoch": 0.7648594453482653, - "grad_norm": 1.4467507600784302, - "learning_rate": 2.5860716322030263e-06, - "loss": 0.7469, + "epoch": 0.7115804294262583, + "grad_norm": 1.580527663230896, + "learning_rate": 2.8386430094996176e-06, + "loss": 0.5401, "step": 10108 }, { - "epoch": 0.7649351140705989, - "grad_norm": 2.353743553161621, - "learning_rate": 2.5844918781441815e-06, - "loss": 0.5224, + "epoch": 0.7116508271735305, + "grad_norm": 1.8760838508605957, + "learning_rate": 2.8373596717451722e-06, + "loss": 0.6664, "step": 10109 }, { - "epoch": 0.7650107827929326, - "grad_norm": 2.355583906173706, - "learning_rate": 2.582912525845205e-06, - "loss": 0.7163, + "epoch": 0.7117212249208026, + "grad_norm": 1.6811845302581787, + "learning_rate": 2.836076550410165e-06, + "loss": 0.6138, "step": 10110 }, { - "epoch": 0.7650864515152662, - "grad_norm": 1.8890095949172974, - "learning_rate": 2.5813335754050047e-06, - "loss": 0.5741, + "epoch": 0.7117916226680746, + "grad_norm": 1.8571984767913818, + "learning_rate": 2.8347936455613115e-06, + "loss": 0.641, "step": 10111 }, { - "epoch": 0.7651621202375998, - "grad_norm": 2.1098287105560303, - "learning_rate": 2.5797550269224544e-06, - "loss": 0.6586, + "epoch": 0.7118620204153467, + "grad_norm": 1.8106952905654907, + "learning_rate": 2.8335109572653054e-06, + "loss": 0.6504, "step": 10112 }, { - "epoch": 0.7652377889599334, - "grad_norm": 2.207130193710327, - "learning_rate": 2.5781768804964063e-06, - "loss": 0.7282, + "epoch": 0.7119324181626188, + "grad_norm": 1.9275500774383545, + "learning_rate": 2.832228485588841e-06, + "loss": 0.6633, "step": 10113 }, { - "epoch": 0.765313457682267, - "grad_norm": 2.3205008506774902, - "learning_rate": 2.576599136225698e-06, - "loss": 0.6441, + "epoch": 0.7120028159098909, + "grad_norm": 2.163856029510498, + "learning_rate": 2.8309462305985923e-06, + "loss": 0.7343, "step": 10114 }, { - "epoch": 0.7653891264046007, - "grad_norm": 2.5144574642181396, - "learning_rate": 2.5750217942091252e-06, - "loss": 0.6996, + "epoch": 0.7120732136571629, + "grad_norm": 2.0078117847442627, + "learning_rate": 2.8296641923612247e-06, + "loss": 0.6078, "step": 10115 }, { - "epoch": 0.7654647951269343, - "grad_norm": 2.885598659515381, - "learning_rate": 2.573444854545468e-06, - "loss": 0.5839, + "epoch": 0.712143611404435, + "grad_norm": 1.735643982887268, + "learning_rate": 2.8283823709433902e-06, + "loss": 0.5463, "step": 10116 }, { - "epoch": 0.7655404638492679, - "grad_norm": 1.7929126024246216, - "learning_rate": 2.571868317333481e-06, - "loss": 0.4543, + "epoch": 0.7122140091517072, + "grad_norm": 2.318547010421753, + "learning_rate": 2.8271007664117364e-06, + "loss": 0.5694, "step": 10117 }, { - "epoch": 0.7656161325716015, - "grad_norm": 2.23551082611084, - "learning_rate": 2.5702921826718902e-06, - "loss": 0.7682, + "epoch": 0.7122844068989792, + "grad_norm": 2.0515449047088623, + "learning_rate": 2.825819378832891e-06, + "loss": 0.6846, "step": 10118 }, { - "epoch": 0.7656918012939351, - "grad_norm": 2.1573288440704346, - "learning_rate": 2.5687164506593993e-06, - "loss": 0.6006, + "epoch": 0.7123548046462513, + "grad_norm": 1.9481446743011475, + "learning_rate": 2.824538208273479e-06, + "loss": 0.724, "step": 10119 }, { - "epoch": 0.7657674700162688, - "grad_norm": 3.274181842803955, - "learning_rate": 2.5671411213946864e-06, - "loss": 0.5829, + "epoch": 0.7124252023935234, + "grad_norm": 2.0624639987945557, + "learning_rate": 2.8232572548001064e-06, + "loss": 0.6229, "step": 10120 }, { - "epoch": 0.7658431387386024, - "grad_norm": 2.04793381690979, - "learning_rate": 2.565566194976402e-06, - "loss": 0.5535, + "epoch": 0.7124956001407955, + "grad_norm": 1.8170660734176636, + "learning_rate": 2.821976518479372e-06, + "loss": 0.5622, "step": 10121 }, { - "epoch": 0.765918807460936, - "grad_norm": 2.4527783393859863, - "learning_rate": 2.5639916715031764e-06, - "loss": 0.6092, + "epoch": 0.7125659978880676, + "grad_norm": 1.6875512599945068, + "learning_rate": 2.82069599937786e-06, + "loss": 0.6366, "step": 10122 }, { - "epoch": 0.7659944761832697, - "grad_norm": 2.0146374702453613, - "learning_rate": 2.5624175510736047e-06, - "loss": 0.7187, + "epoch": 0.7126363956353396, + "grad_norm": 2.0657260417938232, + "learning_rate": 2.819415697562151e-06, + "loss": 0.8069, "step": 10123 }, { - "epoch": 0.7660701449056033, - "grad_norm": 2.2973668575286865, - "learning_rate": 2.5608438337862695e-06, - "loss": 0.7676, + "epoch": 0.7127067933826118, + "grad_norm": 2.7567646503448486, + "learning_rate": 2.8181356130988027e-06, + "loss": 0.6792, "step": 10124 }, { - "epoch": 0.7661458136279369, - "grad_norm": 2.033846139907837, - "learning_rate": 2.559270519739723e-06, - "loss": 0.6969, + "epoch": 0.7127771911298838, + "grad_norm": 1.7986947298049927, + "learning_rate": 2.8168557460543743e-06, + "loss": 0.601, "step": 10125 }, { - "epoch": 0.7662214823502705, - "grad_norm": 2.0946013927459717, - "learning_rate": 2.5576976090324856e-06, - "loss": 0.7496, + "epoch": 0.7128475888771559, + "grad_norm": 1.7955129146575928, + "learning_rate": 2.815576096495403e-06, + "loss": 0.5991, "step": 10126 }, { - "epoch": 0.7662971510726041, - "grad_norm": 1.880980372428894, - "learning_rate": 2.556125101763061e-06, - "loss": 0.784, + "epoch": 0.7129179866244281, + "grad_norm": 1.7077510356903076, + "learning_rate": 2.814296664488421e-06, + "loss": 0.6954, "step": 10127 }, { - "epoch": 0.7663728197949378, - "grad_norm": 1.8983718156814575, - "learning_rate": 2.554552998029924e-06, - "loss": 0.6489, + "epoch": 0.7129883843717001, + "grad_norm": 1.6956712007522583, + "learning_rate": 2.813017450099944e-06, + "loss": 0.6931, "step": 10128 }, { - "epoch": 0.7664484885172714, - "grad_norm": 2.347182512283325, - "learning_rate": 2.552981297931526e-06, - "loss": 0.6194, + "epoch": 0.7130587821189722, + "grad_norm": 2.1122684478759766, + "learning_rate": 2.8117384533964836e-06, + "loss": 0.6541, "step": 10129 }, { - "epoch": 0.766524157239605, - "grad_norm": 3.0601274967193604, - "learning_rate": 2.5514100015662915e-06, - "loss": 0.5625, + "epoch": 0.7131291798662442, + "grad_norm": 1.8957139253616333, + "learning_rate": 2.8104596744445323e-06, + "loss": 0.5688, "step": 10130 }, { - "epoch": 0.7665998259619387, - "grad_norm": 2.182065725326538, - "learning_rate": 2.5498391090326193e-06, - "loss": 0.7296, + "epoch": 0.7131995776135164, + "grad_norm": 1.5819088220596313, + "learning_rate": 2.8091811133105783e-06, + "loss": 0.6993, "step": 10131 }, { - "epoch": 0.7666754946842722, - "grad_norm": 1.8279647827148438, - "learning_rate": 2.5482686204288874e-06, - "loss": 0.6273, + "epoch": 0.7132699753607884, + "grad_norm": 1.7509477138519287, + "learning_rate": 2.807902770061094e-06, + "loss": 0.7124, "step": 10132 }, { - "epoch": 0.7667511634066059, - "grad_norm": 2.4019768238067627, - "learning_rate": 2.5466985358534365e-06, - "loss": 0.6006, + "epoch": 0.7133403731080605, + "grad_norm": 1.759712815284729, + "learning_rate": 2.806624644762539e-06, + "loss": 0.5163, "step": 10133 }, { - "epoch": 0.7668268321289395, - "grad_norm": 1.9945967197418213, - "learning_rate": 2.5451288554045986e-06, - "loss": 0.8876, + "epoch": 0.7134107708553327, + "grad_norm": 1.9700337648391724, + "learning_rate": 2.805346737481369e-06, + "loss": 0.633, "step": 10134 }, { - "epoch": 0.7669025008512731, - "grad_norm": 2.012547254562378, - "learning_rate": 2.5435595791806693e-06, - "loss": 0.6557, + "epoch": 0.7134811686026047, + "grad_norm": 2.1291818618774414, + "learning_rate": 2.804069048284019e-06, + "loss": 0.5608, "step": 10135 }, { - "epoch": 0.7669781695736068, - "grad_norm": 2.253406047821045, - "learning_rate": 2.541990707279925e-06, - "loss": 0.599, + "epoch": 0.7135515663498768, + "grad_norm": 2.283766984939575, + "learning_rate": 2.8027915772369205e-06, + "loss": 0.6756, "step": 10136 }, { - "epoch": 0.7670538382959404, - "grad_norm": 1.9846205711364746, - "learning_rate": 2.5404222398006072e-06, - "loss": 0.6009, + "epoch": 0.7136219640971488, + "grad_norm": 1.8617198467254639, + "learning_rate": 2.8015143244064903e-06, + "loss": 0.6289, "step": 10137 }, { - "epoch": 0.767129507018274, - "grad_norm": 2.01389741897583, - "learning_rate": 2.538854176840941e-06, - "loss": 0.6161, + "epoch": 0.713692361844421, + "grad_norm": 1.898706316947937, + "learning_rate": 2.800237289859132e-06, + "loss": 0.7164, "step": 10138 }, { - "epoch": 0.7672051757406076, - "grad_norm": 1.8462287187576294, - "learning_rate": 2.537286518499125e-06, - "loss": 0.7132, + "epoch": 0.7137627595916931, + "grad_norm": 1.9101887941360474, + "learning_rate": 2.7989604736612383e-06, + "loss": 0.6544, "step": 10139 }, { - "epoch": 0.7672808444629412, - "grad_norm": 2.2837891578674316, - "learning_rate": 2.5357192648733296e-06, - "loss": 0.7349, + "epoch": 0.7138331573389651, + "grad_norm": 1.7816282510757446, + "learning_rate": 2.7976838758791965e-06, + "loss": 0.6388, "step": 10140 }, { - "epoch": 0.7673565131852749, - "grad_norm": 2.81986403465271, - "learning_rate": 2.534152416061703e-06, - "loss": 0.6632, + "epoch": 0.7139035550862373, + "grad_norm": 1.6882436275482178, + "learning_rate": 2.796407496579374e-06, + "loss": 0.5628, "step": 10141 }, { - "epoch": 0.7674321819076085, - "grad_norm": 2.114987850189209, - "learning_rate": 2.5325859721623636e-06, - "loss": 0.6577, + "epoch": 0.7139739528335093, + "grad_norm": 1.6930179595947266, + "learning_rate": 2.7951313358281348e-06, + "loss": 0.6171, "step": 10142 }, { - "epoch": 0.7675078506299421, - "grad_norm": 2.2492518424987793, - "learning_rate": 2.5310199332734123e-06, - "loss": 0.7331, + "epoch": 0.7140443505807814, + "grad_norm": 2.290767192840576, + "learning_rate": 2.793855393691825e-06, + "loss": 0.643, "step": 10143 }, { - "epoch": 0.7675835193522758, - "grad_norm": 2.2491164207458496, - "learning_rate": 2.52945429949291e-06, - "loss": 0.6299, + "epoch": 0.7141147483280536, + "grad_norm": 1.7549229860305786, + "learning_rate": 2.7925796702367833e-06, + "loss": 0.575, "step": 10144 }, { - "epoch": 0.7676591880746093, - "grad_norm": 4.50223445892334, - "learning_rate": 2.527889070918911e-06, - "loss": 0.6815, + "epoch": 0.7141851460753256, + "grad_norm": 1.7177040576934814, + "learning_rate": 2.7913041655293325e-06, + "loss": 0.6066, "step": 10145 }, { - "epoch": 0.767734856796943, - "grad_norm": 1.7851481437683105, - "learning_rate": 2.526324247649435e-06, - "loss": 0.5633, + "epoch": 0.7142555438225977, + "grad_norm": 1.8345248699188232, + "learning_rate": 2.7900288796357925e-06, + "loss": 0.6592, "step": 10146 }, { - "epoch": 0.7678105255192766, - "grad_norm": 2.163814067840576, - "learning_rate": 2.5247598297824694e-06, - "loss": 0.6235, + "epoch": 0.7143259415698697, + "grad_norm": 1.7991154193878174, + "learning_rate": 2.788753812622461e-06, + "loss": 0.6248, "step": 10147 }, { - "epoch": 0.7678861942416102, - "grad_norm": 2.0847737789154053, - "learning_rate": 2.523195817415987e-06, - "loss": 0.6388, + "epoch": 0.7143963393171419, + "grad_norm": 1.8708922863006592, + "learning_rate": 2.7874789645556353e-06, + "loss": 0.5848, "step": 10148 }, { - "epoch": 0.7679618629639439, - "grad_norm": 1.7596197128295898, - "learning_rate": 2.5216322106479305e-06, - "loss": 0.6007, + "epoch": 0.714466737064414, + "grad_norm": 1.7442247867584229, + "learning_rate": 2.7862043355015933e-06, + "loss": 0.6606, "step": 10149 }, { - "epoch": 0.7680375316862775, - "grad_norm": 3.082669973373413, - "learning_rate": 2.52006900957622e-06, - "loss": 0.6749, + "epoch": 0.714537134811686, + "grad_norm": 1.9585180282592773, + "learning_rate": 2.784929925526604e-06, + "loss": 0.6494, "step": 10150 }, { - "epoch": 0.7681132004086111, - "grad_norm": 2.0572025775909424, - "learning_rate": 2.518506214298745e-06, - "loss": 0.737, + "epoch": 0.7146075325589581, + "grad_norm": 1.7395250797271729, + "learning_rate": 2.783655734696923e-06, + "loss": 0.728, "step": 10151 }, { - "epoch": 0.7681888691309448, - "grad_norm": 2.777792453765869, - "learning_rate": 2.5169438249133753e-06, - "loss": 0.7714, + "epoch": 0.7146779303062302, + "grad_norm": 1.8813142776489258, + "learning_rate": 2.782381763078801e-06, + "loss": 0.6491, "step": 10152 }, { - "epoch": 0.7682645378532783, - "grad_norm": 2.290621280670166, - "learning_rate": 2.515381841517952e-06, - "loss": 0.5603, + "epoch": 0.7147483280535023, + "grad_norm": 1.9267650842666626, + "learning_rate": 2.7811080107384683e-06, + "loss": 0.6272, "step": 10153 }, { - "epoch": 0.768340206575612, - "grad_norm": 2.1354238986968994, - "learning_rate": 2.5138202642102922e-06, - "loss": 0.762, + "epoch": 0.7148187258007743, + "grad_norm": 1.8490040302276611, + "learning_rate": 2.779834477742154e-06, + "loss": 0.6379, "step": 10154 }, { - "epoch": 0.7684158752979456, - "grad_norm": 1.9276994466781616, - "learning_rate": 2.512259093088186e-06, - "loss": 0.7416, + "epoch": 0.7148891235480465, + "grad_norm": 2.068742513656616, + "learning_rate": 2.778561164156067e-06, + "loss": 0.7681, "step": 10155 }, { - "epoch": 0.7684915440202792, - "grad_norm": 1.9895371198654175, - "learning_rate": 2.5106983282493985e-06, - "loss": 0.564, + "epoch": 0.7149595212953186, + "grad_norm": 2.136749267578125, + "learning_rate": 2.777288070046408e-06, + "loss": 0.715, "step": 10156 }, { - "epoch": 0.7685672127426129, - "grad_norm": 3.0279078483581543, - "learning_rate": 2.5091379697916745e-06, - "loss": 0.6865, + "epoch": 0.7150299190425906, + "grad_norm": 2.166428327560425, + "learning_rate": 2.7760151954793643e-06, + "loss": 0.6737, "step": 10157 }, { - "epoch": 0.7686428814649464, - "grad_norm": 1.848624348640442, - "learning_rate": 2.5075780178127215e-06, - "loss": 0.6344, + "epoch": 0.7151003167898627, + "grad_norm": 1.722038745880127, + "learning_rate": 2.774742540521119e-06, + "loss": 0.6162, "step": 10158 }, { - "epoch": 0.7687185501872801, - "grad_norm": 1.8406096696853638, - "learning_rate": 2.506018472410229e-06, - "loss": 0.6756, + "epoch": 0.7151707145371348, + "grad_norm": 1.8224986791610718, + "learning_rate": 2.773470105237832e-06, + "loss": 0.7141, "step": 10159 }, { - "epoch": 0.7687942189096137, - "grad_norm": 9.052090644836426, - "learning_rate": 2.5044593336818697e-06, - "loss": 0.5753, + "epoch": 0.7152411122844069, + "grad_norm": 1.4595375061035156, + "learning_rate": 2.7721978896956654e-06, + "loss": 0.7829, "step": 10160 }, { - "epoch": 0.7688698876319473, - "grad_norm": 2.3040497303009033, - "learning_rate": 2.502900601725274e-06, - "loss": 0.5959, + "epoch": 0.715311510031679, + "grad_norm": 1.9099692106246948, + "learning_rate": 2.7709258939607592e-06, + "loss": 0.6553, "step": 10161 }, { - "epoch": 0.768945556354281, - "grad_norm": 2.6110458374023438, - "learning_rate": 2.501342276638056e-06, - "loss": 0.8063, + "epoch": 0.715381907778951, + "grad_norm": 2.506944179534912, + "learning_rate": 2.7696541180992457e-06, + "loss": 0.6073, "step": 10162 }, { - "epoch": 0.7690212250766146, - "grad_norm": 2.4658761024475098, - "learning_rate": 2.4997843585178035e-06, - "loss": 0.6493, + "epoch": 0.7154523055262232, + "grad_norm": 2.762363910675049, + "learning_rate": 2.7683825621772434e-06, + "loss": 0.5821, "step": 10163 }, { - "epoch": 0.7690968937989482, - "grad_norm": 2.334641456604004, - "learning_rate": 2.4982268474620786e-06, - "loss": 0.7169, + "epoch": 0.7155227032734952, + "grad_norm": 1.8135781288146973, + "learning_rate": 2.767111226260867e-06, + "loss": 0.6855, "step": 10164 }, { - "epoch": 0.7691725625212819, - "grad_norm": 1.93704354763031, - "learning_rate": 2.4966697435684195e-06, - "loss": 0.6254, + "epoch": 0.7155931010207673, + "grad_norm": 1.8333359956741333, + "learning_rate": 2.765840110416208e-06, + "loss": 0.6861, "step": 10165 }, { - "epoch": 0.7692482312436154, - "grad_norm": 2.5095372200012207, - "learning_rate": 2.495113046934334e-06, - "loss": 0.7474, + "epoch": 0.7156634987680395, + "grad_norm": 1.8007326126098633, + "learning_rate": 2.7645692147093597e-06, + "loss": 0.6021, "step": 10166 }, { - "epoch": 0.7693238999659491, - "grad_norm": 2.160459041595459, - "learning_rate": 2.4935567576573085e-06, - "loss": 0.7978, + "epoch": 0.7157338965153115, + "grad_norm": 2.036118268966675, + "learning_rate": 2.7632985392063933e-06, + "loss": 0.7119, "step": 10167 }, { - "epoch": 0.7693995686882827, - "grad_norm": 2.0386502742767334, - "learning_rate": 2.4920008758348072e-06, - "loss": 0.696, + "epoch": 0.7158042942625836, + "grad_norm": 1.8891340494155884, + "learning_rate": 2.762028083973373e-06, + "loss": 0.6976, "step": 10168 }, { - "epoch": 0.7694752374106163, - "grad_norm": 2.827305555343628, - "learning_rate": 2.4904454015642546e-06, - "loss": 0.6549, + "epoch": 0.7158746920098557, + "grad_norm": 1.8038214445114136, + "learning_rate": 2.7607578490763484e-06, + "loss": 0.5993, "step": 10169 }, { - "epoch": 0.76955090613295, - "grad_norm": 1.8038434982299805, - "learning_rate": 2.4888903349430677e-06, - "loss": 0.658, + "epoch": 0.7159450897571278, + "grad_norm": 1.9499624967575073, + "learning_rate": 2.7594878345813646e-06, + "loss": 0.7899, "step": 10170 }, { - "epoch": 0.7696265748552835, - "grad_norm": 3.055170774459839, - "learning_rate": 2.48733567606863e-06, - "loss": 0.657, + "epoch": 0.7160154875043998, + "grad_norm": 1.7294812202453613, + "learning_rate": 2.758218040554447e-06, + "loss": 0.7171, "step": 10171 }, { - "epoch": 0.7697022435776172, - "grad_norm": 3.3407459259033203, - "learning_rate": 2.485781425038294e-06, - "loss": 0.6095, + "epoch": 0.7160858852516719, + "grad_norm": 1.9732459783554077, + "learning_rate": 2.756948467061617e-06, + "loss": 0.6277, "step": 10172 }, { - "epoch": 0.7697779122999509, - "grad_norm": 2.0380611419677734, - "learning_rate": 2.484227581949396e-06, - "loss": 0.5941, + "epoch": 0.7161562829989441, + "grad_norm": 1.9487568140029907, + "learning_rate": 2.755679114168879e-06, + "loss": 0.6338, "step": 10173 }, { - "epoch": 0.7698535810222844, - "grad_norm": 2.155287504196167, - "learning_rate": 2.4826741468992407e-06, - "loss": 0.7884, + "epoch": 0.7162266807462161, + "grad_norm": 1.967463731765747, + "learning_rate": 2.754409981942228e-06, + "loss": 0.6492, "step": 10174 }, { - "epoch": 0.7699292497446181, - "grad_norm": 2.009636163711548, - "learning_rate": 2.4811211199851102e-06, - "loss": 0.6486, + "epoch": 0.7162970784934882, + "grad_norm": 1.7988029718399048, + "learning_rate": 2.753141070447644e-06, + "loss": 0.6251, "step": 10175 }, { - "epoch": 0.7700049184669517, - "grad_norm": 2.339906930923462, - "learning_rate": 2.479568501304259e-06, - "loss": 0.4735, + "epoch": 0.7163674762407602, + "grad_norm": 2.8949670791625977, + "learning_rate": 2.7518723797511045e-06, + "loss": 0.7378, "step": 10176 }, { - "epoch": 0.7700805871892853, - "grad_norm": 2.612977981567383, - "learning_rate": 2.4780162909539178e-06, - "loss": 0.7915, + "epoch": 0.7164378739880324, + "grad_norm": 1.7894285917282104, + "learning_rate": 2.750603909918565e-06, + "loss": 0.6524, "step": 10177 }, { - "epoch": 0.770156255911619, - "grad_norm": 2.249410629272461, - "learning_rate": 2.4764644890312947e-06, - "loss": 0.6992, + "epoch": 0.7165082717353045, + "grad_norm": 1.886279821395874, + "learning_rate": 2.7493356610159794e-06, + "loss": 0.5449, "step": 10178 }, { - "epoch": 0.7702319246339525, - "grad_norm": 1.8891581296920776, - "learning_rate": 2.474913095633562e-06, - "loss": 0.6526, + "epoch": 0.7165786694825765, + "grad_norm": 1.9029539823532104, + "learning_rate": 2.7480676331092823e-06, + "loss": 0.6781, "step": 10179 }, { - "epoch": 0.7703075933562862, - "grad_norm": 3.0311803817749023, - "learning_rate": 2.473362110857873e-06, - "loss": 0.7193, + "epoch": 0.7166490672298487, + "grad_norm": 1.6867389678955078, + "learning_rate": 2.7467998262643998e-06, + "loss": 0.5941, "step": 10180 }, { - "epoch": 0.7703832620786198, - "grad_norm": 2.6356663703918457, - "learning_rate": 2.4718115348013604e-06, - "loss": 0.6858, + "epoch": 0.7167194649771207, + "grad_norm": 1.7880326509475708, + "learning_rate": 2.7455322405472433e-06, + "loss": 0.6251, "step": 10181 }, { - "epoch": 0.7704589308009534, - "grad_norm": 2.00958514213562, - "learning_rate": 2.4702613675611284e-06, - "loss": 0.7983, + "epoch": 0.7167898627243928, + "grad_norm": 1.9059619903564453, + "learning_rate": 2.7442648760237218e-06, + "loss": 0.6506, "step": 10182 }, { - "epoch": 0.7705345995232871, - "grad_norm": 2.9431421756744385, - "learning_rate": 2.468711609234246e-06, - "loss": 0.6924, + "epoch": 0.716860260471665, + "grad_norm": 2.130054473876953, + "learning_rate": 2.7429977327597204e-06, + "loss": 0.6117, "step": 10183 }, { - "epoch": 0.7706102682456206, - "grad_norm": 1.6894335746765137, - "learning_rate": 2.467162259917767e-06, - "loss": 0.5111, + "epoch": 0.716930658218937, + "grad_norm": 1.89435875415802, + "learning_rate": 2.741730810821123e-06, + "loss": 0.6853, "step": 10184 }, { - "epoch": 0.7706859369679543, - "grad_norm": 2.004911184310913, - "learning_rate": 2.4656133197087166e-06, - "loss": 0.6717, + "epoch": 0.7170010559662091, + "grad_norm": 1.520882487297058, + "learning_rate": 2.7404641102738016e-06, + "loss": 0.6285, "step": 10185 }, { - "epoch": 0.770761605690288, - "grad_norm": 2.340867280960083, - "learning_rate": 2.4640647887040957e-06, - "loss": 0.8315, + "epoch": 0.7170714537134811, + "grad_norm": 1.888420581817627, + "learning_rate": 2.739197631183604e-06, + "loss": 0.6376, "step": 10186 }, { - "epoch": 0.7708372744126215, - "grad_norm": 2.400015115737915, - "learning_rate": 2.4625166670008777e-06, - "loss": 0.691, + "epoch": 0.7171418514607533, + "grad_norm": 2.105788469314575, + "learning_rate": 2.737931373616383e-06, + "loss": 0.7697, "step": 10187 }, { - "epoch": 0.7709129431349552, - "grad_norm": 2.0905392169952393, - "learning_rate": 2.46096895469601e-06, - "loss": 0.6846, + "epoch": 0.7172122492080254, + "grad_norm": 1.9381864070892334, + "learning_rate": 2.7366653376379674e-06, + "loss": 0.7914, "step": 10188 }, { - "epoch": 0.7709886118572888, - "grad_norm": 2.421705961227417, - "learning_rate": 2.45942165188642e-06, - "loss": 0.622, + "epoch": 0.7172826469552974, + "grad_norm": 1.735547661781311, + "learning_rate": 2.7353995233141846e-06, + "loss": 0.707, "step": 10189 }, { - "epoch": 0.7710642805796224, - "grad_norm": 2.1648590564727783, - "learning_rate": 2.457874758668995e-06, - "loss": 0.7249, + "epoch": 0.7173530447025696, + "grad_norm": 1.9828996658325195, + "learning_rate": 2.7341339307108405e-06, + "loss": 0.6185, "step": 10190 }, { - "epoch": 0.7711399493019561, - "grad_norm": 2.4879276752471924, - "learning_rate": 2.4563282751406145e-06, - "loss": 0.7729, + "epoch": 0.7174234424498416, + "grad_norm": 1.6690255403518677, + "learning_rate": 2.732868559893739e-06, + "loss": 0.5694, "step": 10191 }, { - "epoch": 0.7712156180242896, - "grad_norm": 1.9964536428451538, - "learning_rate": 2.4547822013981253e-06, - "loss": 0.735, + "epoch": 0.7174938401971137, + "grad_norm": 1.88585364818573, + "learning_rate": 2.7316034109286654e-06, + "loss": 0.6337, "step": 10192 }, { - "epoch": 0.7712912867466233, - "grad_norm": 2.6276025772094727, - "learning_rate": 2.4532365375383423e-06, - "loss": 0.6983, + "epoch": 0.7175642379443857, + "grad_norm": 6.304566860198975, + "learning_rate": 2.7303384838813965e-06, + "loss": 0.7107, "step": 10193 }, { - "epoch": 0.771366955468957, - "grad_norm": 2.3542721271514893, - "learning_rate": 2.451691283658061e-06, - "loss": 0.751, + "epoch": 0.7176346356916579, + "grad_norm": 2.7297840118408203, + "learning_rate": 2.7290737788176932e-06, + "loss": 0.7114, "step": 10194 }, { - "epoch": 0.7714426241912905, - "grad_norm": 1.8065311908721924, - "learning_rate": 2.4501464398540494e-06, - "loss": 0.67, + "epoch": 0.71770503343893, + "grad_norm": 1.7563896179199219, + "learning_rate": 2.7278092958033158e-06, + "loss": 0.6269, "step": 10195 }, { - "epoch": 0.7715182929136242, - "grad_norm": 1.8417807817459106, - "learning_rate": 2.4486020062230577e-06, - "loss": 0.6552, + "epoch": 0.717775431186202, + "grad_norm": 1.6814230680465698, + "learning_rate": 2.7265450349039995e-06, + "loss": 0.6818, "step": 10196 }, { - "epoch": 0.7715939616359577, - "grad_norm": 2.1254916191101074, - "learning_rate": 2.4470579828617955e-06, - "loss": 0.6009, + "epoch": 0.7178458289334742, + "grad_norm": 1.8521995544433594, + "learning_rate": 2.725280996185479e-06, + "loss": 0.678, "step": 10197 }, { - "epoch": 0.7716696303582914, - "grad_norm": 2.3638763427734375, - "learning_rate": 2.4455143698669573e-06, - "loss": 0.6551, + "epoch": 0.7179162266807462, + "grad_norm": 1.793517827987671, + "learning_rate": 2.7240171797134714e-06, + "loss": 0.7183, "step": 10198 }, { - "epoch": 0.7717452990806251, - "grad_norm": 2.0712759494781494, - "learning_rate": 2.4439711673352094e-06, - "loss": 0.6991, + "epoch": 0.7179866244280183, + "grad_norm": 2.1142678260803223, + "learning_rate": 2.7227535855536827e-06, + "loss": 0.6564, "step": 10199 }, { - "epoch": 0.7718209678029586, - "grad_norm": 1.8671433925628662, - "learning_rate": 2.4424283753631906e-06, - "loss": 0.7074, + "epoch": 0.7180570221752904, + "grad_norm": 1.6673475503921509, + "learning_rate": 2.7214902137718068e-06, + "loss": 0.5634, "step": 10200 }, { - "epoch": 0.7718966365252923, - "grad_norm": 2.3705897331237793, - "learning_rate": 2.4408859940475177e-06, - "loss": 0.6934, + "epoch": 0.7181274199225625, + "grad_norm": 1.6095200777053833, + "learning_rate": 2.720227064433532e-06, + "loss": 0.7166, "step": 10201 }, { - "epoch": 0.771972305247626, - "grad_norm": 2.1461682319641113, - "learning_rate": 2.4393440234847788e-06, - "loss": 0.6653, + "epoch": 0.7181978176698346, + "grad_norm": 3.0073227882385254, + "learning_rate": 2.7189641376045253e-06, + "loss": 0.6747, "step": 10202 }, { - "epoch": 0.7720479739699595, - "grad_norm": 3.3979363441467285, - "learning_rate": 2.4378024637715394e-06, - "loss": 0.509, + "epoch": 0.7182682154171066, + "grad_norm": 1.8546141386032104, + "learning_rate": 2.717701433350453e-06, + "loss": 0.5819, "step": 10203 }, { - "epoch": 0.7721236426922932, - "grad_norm": 1.9009852409362793, - "learning_rate": 2.4362613150043307e-06, - "loss": 0.657, + "epoch": 0.7183386131643787, + "grad_norm": 1.7409852743148804, + "learning_rate": 2.716438951736962e-06, + "loss": 0.6604, "step": 10204 }, { - "epoch": 0.7721993114146267, - "grad_norm": 2.3193490505218506, - "learning_rate": 2.4347205772796663e-06, - "loss": 0.8041, + "epoch": 0.7184090109116509, + "grad_norm": 1.7670824527740479, + "learning_rate": 2.7151766928296887e-06, + "loss": 0.6701, "step": 10205 }, { - "epoch": 0.7722749801369604, - "grad_norm": 2.223050355911255, - "learning_rate": 2.4331802506940397e-06, - "loss": 0.6525, + "epoch": 0.7184794086589229, + "grad_norm": 2.0035204887390137, + "learning_rate": 2.713914656694257e-06, + "loss": 0.6817, "step": 10206 }, { - "epoch": 0.772350648859294, - "grad_norm": 2.456544876098633, - "learning_rate": 2.4316403353439026e-06, - "loss": 0.6296, + "epoch": 0.718549806406195, + "grad_norm": 2.0534746646881104, + "learning_rate": 2.7126528433962865e-06, + "loss": 0.7145, "step": 10207 }, { - "epoch": 0.7724263175816276, - "grad_norm": 2.209892511367798, - "learning_rate": 2.430100831325692e-06, - "loss": 0.6408, + "epoch": 0.7186202041534671, + "grad_norm": 1.7503806352615356, + "learning_rate": 2.7113912530013755e-06, + "loss": 0.5233, "step": 10208 }, { - "epoch": 0.7725019863039613, - "grad_norm": 2.1688029766082764, - "learning_rate": 2.428561738735817e-06, - "loss": 0.8956, + "epoch": 0.7186906019007392, + "grad_norm": 1.7722392082214355, + "learning_rate": 2.7101298855751197e-06, + "loss": 0.6268, "step": 10209 }, { - "epoch": 0.7725776550262948, - "grad_norm": 2.3047657012939453, - "learning_rate": 2.4270230576706603e-06, - "loss": 0.6141, + "epoch": 0.7187609996480112, + "grad_norm": 1.7966272830963135, + "learning_rate": 2.708868741183096e-06, + "loss": 0.6307, "step": 10210 }, { - "epoch": 0.7726533237486285, - "grad_norm": 2.0687978267669678, - "learning_rate": 2.42548478822658e-06, - "loss": 0.5368, + "epoch": 0.7188313973952833, + "grad_norm": 1.5686246156692505, + "learning_rate": 2.7076078198908726e-06, + "loss": 0.6336, "step": 10211 }, { - "epoch": 0.7727289924709622, - "grad_norm": 3.177206039428711, - "learning_rate": 2.4239469304999065e-06, - "loss": 0.7155, + "epoch": 0.7189017951425555, + "grad_norm": 1.8944320678710938, + "learning_rate": 2.7063471217640034e-06, + "loss": 0.6402, "step": 10212 }, { - "epoch": 0.7728046611932957, - "grad_norm": 1.9328745603561401, - "learning_rate": 2.4224094845869464e-06, - "loss": 0.7287, + "epoch": 0.7189721928898275, + "grad_norm": 2.3201475143432617, + "learning_rate": 2.705086646868039e-06, + "loss": 0.719, "step": 10213 }, { - "epoch": 0.7728803299156294, - "grad_norm": 2.771430492401123, - "learning_rate": 2.420872450583981e-06, - "loss": 0.6553, + "epoch": 0.7190425906370996, + "grad_norm": 1.87679123878479, + "learning_rate": 2.7038263952685073e-06, + "loss": 0.6491, "step": 10214 }, { - "epoch": 0.772955998637963, - "grad_norm": 2.3271167278289795, - "learning_rate": 2.419335828587259e-06, - "loss": 0.5808, + "epoch": 0.7191129883843717, + "grad_norm": 3.090827465057373, + "learning_rate": 2.702566367030934e-06, + "loss": 0.7278, "step": 10215 }, { - "epoch": 0.7730316673602966, - "grad_norm": 2.1030850410461426, - "learning_rate": 2.4177996186930102e-06, - "loss": 0.6993, + "epoch": 0.7191833861316438, + "grad_norm": 1.7493771314620972, + "learning_rate": 2.7013065622208276e-06, + "loss": 0.6229, "step": 10216 }, { - "epoch": 0.7731073360826303, - "grad_norm": 2.1259703636169434, - "learning_rate": 2.4162638209974437e-06, - "loss": 0.8063, + "epoch": 0.7192537838789159, + "grad_norm": 2.1330678462982178, + "learning_rate": 2.7000469809036856e-06, + "loss": 0.6557, "step": 10217 }, { - "epoch": 0.7731830048049638, - "grad_norm": 1.9268971681594849, - "learning_rate": 2.414728435596728e-06, - "loss": 0.7061, + "epoch": 0.7193241816261879, + "grad_norm": 1.9239699840545654, + "learning_rate": 2.6987876231449934e-06, + "loss": 0.7583, "step": 10218 }, { - "epoch": 0.7732586735272975, - "grad_norm": 2.1795198917388916, - "learning_rate": 2.413193462587017e-06, - "loss": 0.6964, + "epoch": 0.7193945793734601, + "grad_norm": 1.5765659809112549, + "learning_rate": 2.6975284890102304e-06, + "loss": 0.6048, "step": 10219 }, { - "epoch": 0.7733343422496312, - "grad_norm": 1.8316737413406372, - "learning_rate": 2.4116589020644367e-06, - "loss": 0.6009, + "epoch": 0.7194649771207321, + "grad_norm": 1.7230829000473022, + "learning_rate": 2.6962695785648552e-06, + "loss": 0.6635, "step": 10220 }, { - "epoch": 0.7734100109719647, - "grad_norm": 2.2390308380126953, - "learning_rate": 2.4101247541250833e-06, - "loss": 0.5806, + "epoch": 0.7195353748680042, + "grad_norm": 1.79970383644104, + "learning_rate": 2.695010891874325e-06, + "loss": 0.6228, "step": 10221 }, { - "epoch": 0.7734856796942984, - "grad_norm": 2.265646457672119, - "learning_rate": 2.408591018865034e-06, - "loss": 0.701, + "epoch": 0.7196057726152764, + "grad_norm": 2.2706217765808105, + "learning_rate": 2.6937524290040773e-06, + "loss": 0.609, "step": 10222 }, { - "epoch": 0.7735613484166319, - "grad_norm": 1.9274543523788452, - "learning_rate": 2.407057696380334e-06, - "loss": 0.624, + "epoch": 0.7196761703625484, + "grad_norm": 1.6709871292114258, + "learning_rate": 2.6924941900195406e-06, + "loss": 0.6365, "step": 10223 }, { - "epoch": 0.7736370171389656, - "grad_norm": 1.9996583461761475, - "learning_rate": 2.4055247867670044e-06, - "loss": 0.663, + "epoch": 0.7197465681098205, + "grad_norm": 2.157172679901123, + "learning_rate": 2.69123617498613e-06, + "loss": 0.7235, "step": 10224 }, { - "epoch": 0.7737126858612993, - "grad_norm": 2.1675891876220703, - "learning_rate": 2.4039922901210444e-06, - "loss": 0.6858, + "epoch": 0.7198169658570925, + "grad_norm": 2.3580710887908936, + "learning_rate": 2.689978383969255e-06, + "loss": 0.803, "step": 10225 }, { - "epoch": 0.7737883545836328, - "grad_norm": 2.129465103149414, - "learning_rate": 2.4024602065384162e-06, - "loss": 0.598, + "epoch": 0.7198873636043647, + "grad_norm": 2.494523763656616, + "learning_rate": 2.6887208170343046e-06, + "loss": 0.5329, "step": 10226 }, { - "epoch": 0.7738640233059665, - "grad_norm": 2.162490129470825, - "learning_rate": 2.4009285361150723e-06, - "loss": 0.7343, + "epoch": 0.7199577613516367, + "grad_norm": 1.7783260345458984, + "learning_rate": 2.687463474246666e-06, + "loss": 0.6707, "step": 10227 }, { - "epoch": 0.7739396920283002, - "grad_norm": 2.1317222118377686, - "learning_rate": 2.39939727894693e-06, - "loss": 0.7082, + "epoch": 0.7200281590989088, + "grad_norm": 1.5565259456634521, + "learning_rate": 2.6862063556717074e-06, + "loss": 0.4413, "step": 10228 }, { - "epoch": 0.7740153607506337, - "grad_norm": 2.5465569496154785, - "learning_rate": 2.3978664351298754e-06, - "loss": 0.616, + "epoch": 0.720098556846181, + "grad_norm": 1.8819677829742432, + "learning_rate": 2.6849494613747867e-06, + "loss": 0.62, "step": 10229 }, { - "epoch": 0.7740910294729674, - "grad_norm": 2.2197041511535645, - "learning_rate": 2.396336004759779e-06, - "loss": 0.7131, + "epoch": 0.720168954593453, + "grad_norm": 1.762249231338501, + "learning_rate": 2.68369279142125e-06, + "loss": 0.6135, "step": 10230 }, { - "epoch": 0.7741666981953009, - "grad_norm": 1.9501080513000488, - "learning_rate": 2.39480598793248e-06, - "loss": 0.6171, + "epoch": 0.7202393523407251, + "grad_norm": 1.7999740839004517, + "learning_rate": 2.682436345876436e-06, + "loss": 0.7153, "step": 10231 }, { - "epoch": 0.7742423669176346, - "grad_norm": 1.9470248222351074, - "learning_rate": 2.393276384743795e-06, - "loss": 0.6346, + "epoch": 0.7203097500879971, + "grad_norm": 1.810145378112793, + "learning_rate": 2.6811801248056636e-06, + "loss": 0.688, "step": 10232 }, { - "epoch": 0.7743180356399683, - "grad_norm": 2.145937442779541, - "learning_rate": 2.3917471952895117e-06, - "loss": 0.6556, + "epoch": 0.7203801478352693, + "grad_norm": 1.8567789793014526, + "learning_rate": 2.6799241282742504e-06, + "loss": 0.6273, "step": 10233 }, { - "epoch": 0.7743937043623018, - "grad_norm": 1.949892520904541, - "learning_rate": 2.3902184196653922e-06, - "loss": 0.6536, + "epoch": 0.7204505455825414, + "grad_norm": 2.286994695663452, + "learning_rate": 2.6786683563474947e-06, + "loss": 0.8266, "step": 10234 }, { - "epoch": 0.7744693730846355, - "grad_norm": 2.531683921813965, - "learning_rate": 2.3886900579671765e-06, - "loss": 0.6373, + "epoch": 0.7205209433298134, + "grad_norm": 1.5341379642486572, + "learning_rate": 2.6774128090906823e-06, + "loss": 0.6507, "step": 10235 }, { - "epoch": 0.774545041806969, - "grad_norm": 1.8003000020980835, - "learning_rate": 2.3871621102905676e-06, - "loss": 0.6487, + "epoch": 0.7205913410770856, + "grad_norm": 1.7324180603027344, + "learning_rate": 2.6761574865690952e-06, + "loss": 0.7482, "step": 10236 }, { - "epoch": 0.7746207105293027, - "grad_norm": 2.1588857173919678, - "learning_rate": 2.385634576731258e-06, - "loss": 0.7224, + "epoch": 0.7206617388243576, + "grad_norm": 2.7136831283569336, + "learning_rate": 2.6749023888479937e-06, + "loss": 0.8124, "step": 10237 }, { - "epoch": 0.7746963792516364, - "grad_norm": 1.7768288850784302, - "learning_rate": 2.3841074573849058e-06, - "loss": 0.6877, + "epoch": 0.7207321365716297, + "grad_norm": 1.7971011400222778, + "learning_rate": 2.6736475159926364e-06, + "loss": 0.7422, "step": 10238 }, { - "epoch": 0.7747720479739699, - "grad_norm": 2.2642805576324463, - "learning_rate": 2.382580752347145e-06, - "loss": 0.621, + "epoch": 0.7208025343189018, + "grad_norm": 1.7183927297592163, + "learning_rate": 2.672392868068263e-06, + "loss": 0.5826, "step": 10239 }, { - "epoch": 0.7748477166963036, - "grad_norm": 1.7709468603134155, - "learning_rate": 2.381054461713579e-06, - "loss": 0.7062, + "epoch": 0.7208729320661739, + "grad_norm": 1.7980307340621948, + "learning_rate": 2.6711384451401036e-06, + "loss": 0.686, "step": 10240 }, { - "epoch": 0.7749233854186373, - "grad_norm": 2.4417378902435303, - "learning_rate": 2.3795285855797874e-06, - "loss": 0.6432, + "epoch": 0.720943329813446, + "grad_norm": 1.9288212060928345, + "learning_rate": 2.669884247273375e-06, + "loss": 0.589, "step": 10241 }, { - "epoch": 0.7749990541409708, - "grad_norm": 2.286708354949951, - "learning_rate": 2.3780031240413338e-06, - "loss": 0.6124, + "epoch": 0.721013727560718, + "grad_norm": 1.7248685359954834, + "learning_rate": 2.668630274533288e-06, + "loss": 0.5625, "step": 10242 }, { - "epoch": 0.7750747228633045, - "grad_norm": 2.1248910427093506, - "learning_rate": 2.376478077193741e-06, - "loss": 0.6614, + "epoch": 0.7210841253079902, + "grad_norm": 1.786866545677185, + "learning_rate": 2.6673765269850335e-06, + "loss": 0.5995, "step": 10243 }, { - "epoch": 0.775150391585638, - "grad_norm": 3.0439376831054688, - "learning_rate": 2.3749534451325134e-06, - "loss": 0.8119, + "epoch": 0.7211545230552623, + "grad_norm": 1.827768325805664, + "learning_rate": 2.666123004693799e-06, + "loss": 0.7099, "step": 10244 }, { - "epoch": 0.7752260603079717, - "grad_norm": 2.391871213912964, - "learning_rate": 2.37342922795313e-06, - "loss": 0.6768, + "epoch": 0.7212249208025343, + "grad_norm": 2.071207046508789, + "learning_rate": 2.6648697077247553e-06, + "loss": 0.6702, "step": 10245 }, { - "epoch": 0.7753017290303054, - "grad_norm": 2.5745506286621094, - "learning_rate": 2.3719054257510398e-06, - "loss": 0.7174, + "epoch": 0.7212953185498064, + "grad_norm": 1.7006618976593018, + "learning_rate": 2.663616636143061e-06, + "loss": 0.6742, "step": 10246 }, { - "epoch": 0.7753773977526389, - "grad_norm": 2.4691545963287354, - "learning_rate": 2.370382038621671e-06, - "loss": 0.7401, + "epoch": 0.7213657162970785, + "grad_norm": 2.5495522022247314, + "learning_rate": 2.6623637900138624e-06, + "loss": 0.6941, "step": 10247 }, { - "epoch": 0.7754530664749726, - "grad_norm": 2.357771396636963, - "learning_rate": 2.368859066660421e-06, - "loss": 0.7197, + "epoch": 0.7214361140443506, + "grad_norm": 1.9685434103012085, + "learning_rate": 2.6611111694023017e-06, + "loss": 0.7168, "step": 10248 }, { - "epoch": 0.7755287351973061, - "grad_norm": 2.1640098094940186, - "learning_rate": 2.3673365099626673e-06, - "loss": 0.5828, + "epoch": 0.7215065117916226, + "grad_norm": 1.7188739776611328, + "learning_rate": 2.6598587743734982e-06, + "loss": 0.6672, "step": 10249 }, { - "epoch": 0.7756044039196398, - "grad_norm": 1.9653394222259521, - "learning_rate": 2.365814368623751e-06, - "loss": 0.6857, + "epoch": 0.7215769095388948, + "grad_norm": 1.7173349857330322, + "learning_rate": 2.6586066049925702e-06, + "loss": 0.7975, "step": 10250 }, { - "epoch": 0.7756800726419735, - "grad_norm": 2.2896316051483154, - "learning_rate": 2.364292642738996e-06, - "loss": 0.6718, + "epoch": 0.7216473072861669, + "grad_norm": 2.1525938510894775, + "learning_rate": 2.6573546613246173e-06, + "loss": 0.7062, "step": 10251 }, { - "epoch": 0.775755741364307, - "grad_norm": 2.249856948852539, - "learning_rate": 2.3627713324036957e-06, - "loss": 0.588, + "epoch": 0.7217177050334389, + "grad_norm": 1.7926366329193115, + "learning_rate": 2.6561029434347274e-06, + "loss": 0.5119, "step": 10252 }, { - "epoch": 0.7758314100866407, - "grad_norm": 1.9826165437698364, - "learning_rate": 2.3612504377131283e-06, - "loss": 0.6545, + "epoch": 0.721788102780711, + "grad_norm": 3.288958787918091, + "learning_rate": 2.654851451387978e-06, + "loss": 0.607, "step": 10253 }, { - "epoch": 0.7759070788089744, - "grad_norm": 3.986067295074463, - "learning_rate": 2.359729958762527e-06, - "loss": 0.7375, + "epoch": 0.7218585005279831, + "grad_norm": 1.6256624460220337, + "learning_rate": 2.653600185249439e-06, + "loss": 0.6499, "step": 10254 }, { - "epoch": 0.7759827475313079, - "grad_norm": 2.1496474742889404, - "learning_rate": 2.3582098956471134e-06, - "loss": 0.7683, + "epoch": 0.7219288982752552, + "grad_norm": 1.9199604988098145, + "learning_rate": 2.65234914508416e-06, + "loss": 0.632, "step": 10255 }, { - "epoch": 0.7760584162536416, - "grad_norm": 2.206681966781616, - "learning_rate": 2.3566902484620785e-06, - "loss": 0.5398, + "epoch": 0.7219992960225273, + "grad_norm": 2.004490613937378, + "learning_rate": 2.6510983309571887e-06, + "loss": 0.6607, "step": 10256 }, { - "epoch": 0.7761340849759751, - "grad_norm": 2.004941940307617, - "learning_rate": 2.355171017302587e-06, - "loss": 0.7467, + "epoch": 0.7220696937697993, + "grad_norm": 1.8888812065124512, + "learning_rate": 2.6498477429335538e-06, + "loss": 0.6721, "step": 10257 }, { - "epoch": 0.7762097536983088, - "grad_norm": 2.200032949447632, - "learning_rate": 2.353652202263778e-06, - "loss": 0.6155, + "epoch": 0.7221400915170715, + "grad_norm": 1.713533639907837, + "learning_rate": 2.6485973810782744e-06, + "loss": 0.5738, "step": 10258 }, { - "epoch": 0.7762854224206425, - "grad_norm": 1.9743727445602417, - "learning_rate": 2.352133803440765e-06, - "loss": 0.5706, + "epoch": 0.7222104892643435, + "grad_norm": 1.935546636581421, + "learning_rate": 2.647347245456356e-06, + "loss": 0.6309, "step": 10259 }, { - "epoch": 0.776361091142976, - "grad_norm": 1.8890496492385864, - "learning_rate": 2.350615820928639e-06, - "loss": 0.578, + "epoch": 0.7222808870116156, + "grad_norm": 2.2524161338806152, + "learning_rate": 2.646097336132799e-06, + "loss": 0.584, "step": 10260 }, { - "epoch": 0.7764367598653097, - "grad_norm": 2.166748523712158, - "learning_rate": 2.3490982548224532e-06, - "loss": 0.7314, + "epoch": 0.7223512847588878, + "grad_norm": 2.253981351852417, + "learning_rate": 2.644847653172582e-06, + "loss": 0.6364, "step": 10261 }, { - "epoch": 0.7765124285876432, - "grad_norm": 2.333298683166504, - "learning_rate": 2.3475811052172434e-06, - "loss": 0.7265, + "epoch": 0.7224216825061598, + "grad_norm": 1.843891978263855, + "learning_rate": 2.643598196640682e-06, + "loss": 0.6053, "step": 10262 }, { - "epoch": 0.7765880973099769, - "grad_norm": 2.2891969680786133, - "learning_rate": 2.3460643722080277e-06, - "loss": 0.6929, + "epoch": 0.7224920802534319, + "grad_norm": 1.9015884399414062, + "learning_rate": 2.6423489666020567e-06, + "loss": 0.7176, "step": 10263 }, { - "epoch": 0.7766637660323106, - "grad_norm": 2.030637741088867, - "learning_rate": 2.344548055889779e-06, - "loss": 0.7014, + "epoch": 0.722562478000704, + "grad_norm": 1.8214033842086792, + "learning_rate": 2.6410999631216555e-06, + "loss": 0.6129, "step": 10264 }, { - "epoch": 0.7767394347546441, - "grad_norm": 2.0292556285858154, - "learning_rate": 2.3430321563574577e-06, - "loss": 0.6334, + "epoch": 0.7226328757479761, + "grad_norm": 1.685738444328308, + "learning_rate": 2.6398511862644124e-06, + "loss": 0.6345, "step": 10265 }, { - "epoch": 0.7768151034769778, - "grad_norm": 2.329683542251587, - "learning_rate": 2.3415166737059937e-06, - "loss": 0.7243, + "epoch": 0.7227032734952481, + "grad_norm": 2.2754504680633545, + "learning_rate": 2.6386026360952576e-06, + "loss": 0.6131, "step": 10266 }, { - "epoch": 0.7768907721993115, - "grad_norm": 2.4206478595733643, - "learning_rate": 2.340001608030292e-06, - "loss": 0.604, + "epoch": 0.7227736712425202, + "grad_norm": 1.6086658239364624, + "learning_rate": 2.6373543126790996e-06, + "loss": 0.6573, "step": 10267 }, { - "epoch": 0.776966440921645, - "grad_norm": 2.233008861541748, - "learning_rate": 2.3384869594252304e-06, - "loss": 0.7065, + "epoch": 0.7228440689897924, + "grad_norm": 1.9143977165222168, + "learning_rate": 2.6361062160808435e-06, + "loss": 0.5508, "step": 10268 }, { - "epoch": 0.7770421096439787, - "grad_norm": 1.844909906387329, - "learning_rate": 2.336972727985662e-06, - "loss": 0.7302, + "epoch": 0.7229144667370644, + "grad_norm": 1.8227888345718384, + "learning_rate": 2.634858346365378e-06, + "loss": 0.6279, "step": 10269 }, { - "epoch": 0.7771177783663122, - "grad_norm": 1.7668637037277222, - "learning_rate": 2.335458913806411e-06, - "loss": 0.6437, + "epoch": 0.7229848644843365, + "grad_norm": 1.7899259328842163, + "learning_rate": 2.63361070359758e-06, + "loss": 0.6503, "step": 10270 }, { - "epoch": 0.7771934470886459, - "grad_norm": 2.1801650524139404, - "learning_rate": 2.3339455169822822e-06, - "loss": 0.8086, + "epoch": 0.7230552622316085, + "grad_norm": 2.3337302207946777, + "learning_rate": 2.6323632878423136e-06, + "loss": 0.6374, "step": 10271 }, { - "epoch": 0.7772691158109796, - "grad_norm": 1.8977692127227783, - "learning_rate": 2.33243253760804e-06, - "loss": 0.6991, + "epoch": 0.7231256599788807, + "grad_norm": 1.6244181394577026, + "learning_rate": 2.631116099164438e-06, + "loss": 0.7065, "step": 10272 }, { - "epoch": 0.7773447845333131, - "grad_norm": 2.6029887199401855, - "learning_rate": 2.3309199757784408e-06, - "loss": 0.6931, + "epoch": 0.7231960577261528, + "grad_norm": 1.6365666389465332, + "learning_rate": 2.6298691376287904e-06, + "loss": 0.7407, "step": 10273 }, { - "epoch": 0.7774204532556468, - "grad_norm": 2.1565604209899902, - "learning_rate": 2.3294078315882057e-06, - "loss": 0.66, + "epoch": 0.7232664554734248, + "grad_norm": 1.8988217115402222, + "learning_rate": 2.6286224033002073e-06, + "loss": 0.6427, "step": 10274 }, { - "epoch": 0.7774961219779803, - "grad_norm": 2.308840751647949, - "learning_rate": 2.3278961051320257e-06, - "loss": 0.6124, + "epoch": 0.723336853220697, + "grad_norm": 2.2116246223449707, + "learning_rate": 2.627375896243504e-06, + "loss": 0.6394, "step": 10275 }, { - "epoch": 0.777571790700314, - "grad_norm": 2.039461851119995, - "learning_rate": 2.3263847965045705e-06, - "loss": 0.5688, + "epoch": 0.723407250967969, + "grad_norm": 1.5864765644073486, + "learning_rate": 2.6261296165234875e-06, + "loss": 0.5568, "step": 10276 }, { - "epoch": 0.7776474594226477, - "grad_norm": 2.1434340476989746, - "learning_rate": 2.324873905800485e-06, - "loss": 0.656, + "epoch": 0.7234776487152411, + "grad_norm": 1.6424018144607544, + "learning_rate": 2.6248835642049516e-06, + "loss": 0.6612, "step": 10277 }, { - "epoch": 0.7777231281449812, - "grad_norm": 1.743505597114563, - "learning_rate": 2.323363433114385e-06, - "loss": 0.6187, + "epoch": 0.7235480464625133, + "grad_norm": 1.9140745401382446, + "learning_rate": 2.623637739352683e-06, + "loss": 0.7272, "step": 10278 }, { - "epoch": 0.7777987968673149, - "grad_norm": 2.4365437030792236, - "learning_rate": 2.321853378540862e-06, - "loss": 0.7503, + "epoch": 0.7236184442097853, + "grad_norm": 1.8125613927841187, + "learning_rate": 2.6223921420314505e-06, + "loss": 0.6461, "step": 10279 }, { - "epoch": 0.7778744655896486, - "grad_norm": 2.0435638427734375, - "learning_rate": 2.3203437421744804e-06, - "loss": 0.7011, + "epoch": 0.7236888419570574, + "grad_norm": 1.6389987468719482, + "learning_rate": 2.6211467723060174e-06, + "loss": 0.7059, "step": 10280 }, { - "epoch": 0.7779501343119821, - "grad_norm": 2.0592703819274902, - "learning_rate": 2.318834524109781e-06, - "loss": 0.6205, + "epoch": 0.7237592397043294, + "grad_norm": 2.093770980834961, + "learning_rate": 2.6199016302411286e-06, + "loss": 0.7472, "step": 10281 }, { - "epoch": 0.7780258030343158, - "grad_norm": 2.7824881076812744, - "learning_rate": 2.3173257244412673e-06, - "loss": 0.5982, + "epoch": 0.7238296374516016, + "grad_norm": 1.736376166343689, + "learning_rate": 2.618656715901521e-06, + "loss": 0.5754, "step": 10282 }, { - "epoch": 0.7781014717566493, - "grad_norm": 2.0062973499298096, - "learning_rate": 2.3158173432634347e-06, - "loss": 0.6368, + "epoch": 0.7239000351988736, + "grad_norm": 1.7805185317993164, + "learning_rate": 2.6174120293519177e-06, + "loss": 0.6193, "step": 10283 }, { - "epoch": 0.778177140478983, - "grad_norm": 2.447920322418213, - "learning_rate": 2.314309380670739e-06, - "loss": 0.7854, + "epoch": 0.7239704329461457, + "grad_norm": 1.9871174097061157, + "learning_rate": 2.6161675706570307e-06, + "loss": 0.6938, "step": 10284 }, { - "epoch": 0.7782528092013167, - "grad_norm": 2.387455463409424, - "learning_rate": 2.312801836757616e-06, - "loss": 0.6015, + "epoch": 0.7240408306934178, + "grad_norm": 1.8444913625717163, + "learning_rate": 2.614923339881564e-06, + "loss": 0.7146, "step": 10285 }, { - "epoch": 0.7783284779236502, - "grad_norm": 2.1444883346557617, - "learning_rate": 2.3112947116184693e-06, - "loss": 0.5855, + "epoch": 0.7241112284406899, + "grad_norm": 1.6647013425827026, + "learning_rate": 2.6136793370902035e-06, + "loss": 0.5519, "step": 10286 }, { - "epoch": 0.7784041466459839, - "grad_norm": 2.148451328277588, - "learning_rate": 2.3097880053476777e-06, - "loss": 0.6432, + "epoch": 0.724181626187962, + "grad_norm": 1.727646827697754, + "learning_rate": 2.6124355623476306e-06, + "loss": 0.6795, "step": 10287 }, { - "epoch": 0.7784798153683175, - "grad_norm": 2.473336696624756, - "learning_rate": 2.308281718039607e-06, - "loss": 0.6729, + "epoch": 0.724252023935234, + "grad_norm": 1.8543248176574707, + "learning_rate": 2.6111920157185017e-06, + "loss": 0.693, "step": 10288 }, { - "epoch": 0.7785554840906511, - "grad_norm": 2.9266908168792725, - "learning_rate": 2.306775849788575e-06, - "loss": 0.5335, + "epoch": 0.7243224216825062, + "grad_norm": 1.6542253494262695, + "learning_rate": 2.609948697267476e-06, + "loss": 0.5669, "step": 10289 }, { - "epoch": 0.7786311528129848, - "grad_norm": 2.390139102935791, - "learning_rate": 2.3052704006888876e-06, - "loss": 0.6986, + "epoch": 0.7243928194297783, + "grad_norm": 1.7674121856689453, + "learning_rate": 2.6087056070591926e-06, + "loss": 0.6704, "step": 10290 }, { - "epoch": 0.7787068215353183, - "grad_norm": 2.233603000640869, - "learning_rate": 2.3037653708348215e-06, - "loss": 0.6058, + "epoch": 0.7244632171770503, + "grad_norm": 1.8462090492248535, + "learning_rate": 2.6074627451582832e-06, + "loss": 0.6614, "step": 10291 }, { - "epoch": 0.778782490257652, - "grad_norm": 2.33750057220459, - "learning_rate": 2.302260760320629e-06, - "loss": 0.769, + "epoch": 0.7245336149243224, + "grad_norm": 2.2300493717193604, + "learning_rate": 2.606220111629362e-06, + "loss": 0.7759, "step": 10292 }, { - "epoch": 0.7788581589799857, - "grad_norm": 2.0690042972564697, - "learning_rate": 2.3007565692405256e-06, - "loss": 0.6749, + "epoch": 0.7246040126715945, + "grad_norm": 2.212153673171997, + "learning_rate": 2.6049777065370408e-06, + "loss": 0.6083, "step": 10293 }, { - "epoch": 0.7789338277023192, - "grad_norm": 2.1784255504608154, - "learning_rate": 2.2992527976887156e-06, - "loss": 0.5672, + "epoch": 0.7246744104188666, + "grad_norm": 1.8609983921051025, + "learning_rate": 2.6037355299459043e-06, + "loss": 0.6974, "step": 10294 }, { - "epoch": 0.7790094964246529, - "grad_norm": 1.8492722511291504, - "learning_rate": 2.2977494457593715e-06, - "loss": 0.7427, + "epoch": 0.7247448081661387, + "grad_norm": 2.1889898777008057, + "learning_rate": 2.602493581920541e-06, + "loss": 0.7044, "step": 10295 }, { - "epoch": 0.7790851651469864, - "grad_norm": 2.1456425189971924, - "learning_rate": 2.2962465135466325e-06, - "loss": 0.6621, + "epoch": 0.7248152059134108, + "grad_norm": 1.657961130142212, + "learning_rate": 2.6012518625255175e-06, + "loss": 0.7387, "step": 10296 }, { - "epoch": 0.7791608338693201, - "grad_norm": 2.1436009407043457, - "learning_rate": 2.294744001144619e-06, - "loss": 0.7521, + "epoch": 0.7248856036606829, + "grad_norm": 1.877724051475525, + "learning_rate": 2.6000103718253948e-06, + "loss": 0.7066, "step": 10297 }, { - "epoch": 0.7792365025916538, - "grad_norm": 2.698065996170044, - "learning_rate": 2.2932419086474206e-06, - "loss": 0.7116, + "epoch": 0.7249560014079549, + "grad_norm": 1.6603754758834839, + "learning_rate": 2.5987691098847162e-06, + "loss": 0.5464, "step": 10298 }, { - "epoch": 0.7793121713139873, - "grad_norm": 2.1838340759277344, - "learning_rate": 2.291740236149112e-06, - "loss": 0.6111, + "epoch": 0.725026399155227, + "grad_norm": 2.0902798175811768, + "learning_rate": 2.59752807676802e-06, + "loss": 0.7234, "step": 10299 }, { - "epoch": 0.779387840036321, - "grad_norm": 2.1678380966186523, - "learning_rate": 2.290238983743724e-06, - "loss": 0.5987, + "epoch": 0.7250967969024992, + "grad_norm": 2.2791807651519775, + "learning_rate": 2.5962872725398256e-06, + "loss": 0.6683, "step": 10300 }, { - "epoch": 0.7794635087586546, - "grad_norm": 2.3915209770202637, - "learning_rate": 2.288738151525273e-06, - "loss": 0.5449, + "epoch": 0.7251671946497712, + "grad_norm": 1.746997356414795, + "learning_rate": 2.5950466972646445e-06, + "loss": 0.55, "step": 10301 }, { - "epoch": 0.7795391774809882, - "grad_norm": 2.2159979343414307, - "learning_rate": 2.2872377395877457e-06, - "loss": 0.6592, + "epoch": 0.7252375923970433, + "grad_norm": 2.0980730056762695, + "learning_rate": 2.5938063510069723e-06, + "loss": 0.6214, "step": 10302 }, { - "epoch": 0.7796148462033219, - "grad_norm": 2.2043135166168213, - "learning_rate": 2.285737748025103e-06, - "loss": 0.671, + "epoch": 0.7253079901443154, + "grad_norm": 1.7883740663528442, + "learning_rate": 2.5925662338313008e-06, + "loss": 0.6064, "step": 10303 }, { - "epoch": 0.7796905149256554, - "grad_norm": 2.210493564605713, - "learning_rate": 2.2842381769312798e-06, - "loss": 0.589, + "epoch": 0.7253783878915875, + "grad_norm": 2.0667238235473633, + "learning_rate": 2.5913263458021002e-06, + "loss": 0.6808, "step": 10304 }, { - "epoch": 0.7797661836479891, - "grad_norm": 2.3865721225738525, - "learning_rate": 2.282739026400182e-06, - "loss": 0.6478, + "epoch": 0.7254487856388595, + "grad_norm": 1.8157246112823486, + "learning_rate": 2.5900866869838383e-06, + "loss": 0.6501, "step": 10305 }, { - "epoch": 0.7798418523703228, - "grad_norm": 1.9948359727859497, - "learning_rate": 2.2812402965256957e-06, - "loss": 0.7697, + "epoch": 0.7255191833861316, + "grad_norm": 1.9227396249771118, + "learning_rate": 2.588847257440963e-06, + "loss": 0.5667, "step": 10306 }, { - "epoch": 0.7799175210926563, - "grad_norm": 3.1462361812591553, - "learning_rate": 2.27974198740167e-06, - "loss": 0.6498, + "epoch": 0.7255895811334038, + "grad_norm": 2.1207094192504883, + "learning_rate": 2.587608057237914e-06, + "loss": 0.5921, "step": 10307 }, { - "epoch": 0.77999318981499, - "grad_norm": 1.8765848875045776, - "learning_rate": 2.278244099121936e-06, - "loss": 0.6286, + "epoch": 0.7256599788806758, + "grad_norm": 1.7970744371414185, + "learning_rate": 2.5863690864391152e-06, + "loss": 0.6811, "step": 10308 }, { - "epoch": 0.7800688585373236, - "grad_norm": 4.639185905456543, - "learning_rate": 2.276746631780301e-06, - "loss": 0.5139, + "epoch": 0.7257303766279479, + "grad_norm": 2.1011929512023926, + "learning_rate": 2.585130345108988e-06, + "loss": 0.6683, "step": 10309 }, { - "epoch": 0.7801445272596572, - "grad_norm": 2.111081838607788, - "learning_rate": 2.2752495854705357e-06, - "loss": 0.6906, + "epoch": 0.72580077437522, + "grad_norm": 1.8601335287094116, + "learning_rate": 2.5838918333119295e-06, + "loss": 0.749, "step": 10310 }, { - "epoch": 0.7802201959819909, - "grad_norm": 6.2167744636535645, - "learning_rate": 2.2737529602863918e-06, - "loss": 0.8498, + "epoch": 0.7258711721224921, + "grad_norm": 1.9433733224868774, + "learning_rate": 2.5826535511123365e-06, + "loss": 0.6811, "step": 10311 }, { - "epoch": 0.7802958647043244, - "grad_norm": 2.314579486846924, - "learning_rate": 2.2722567563215922e-06, - "loss": 0.745, + "epoch": 0.7259415698697642, + "grad_norm": 2.216212034225464, + "learning_rate": 2.581415498574587e-06, + "loss": 0.6486, "step": 10312 }, { - "epoch": 0.7803715334266581, - "grad_norm": 1.908983826637268, - "learning_rate": 2.270760973669836e-06, - "loss": 0.6662, + "epoch": 0.7260119676170362, + "grad_norm": 1.6425012350082397, + "learning_rate": 2.580177675763046e-06, + "loss": 0.6094, "step": 10313 }, { - "epoch": 0.7804472021489917, - "grad_norm": 1.937185287475586, - "learning_rate": 2.269265612424791e-06, - "loss": 0.5662, + "epoch": 0.7260823653643084, + "grad_norm": 3.317004442214966, + "learning_rate": 2.578940082742069e-06, + "loss": 0.6949, "step": 10314 }, { - "epoch": 0.7805228708713253, - "grad_norm": 2.0278525352478027, - "learning_rate": 2.2677706726801044e-06, - "loss": 0.8562, + "epoch": 0.7261527631115804, + "grad_norm": 1.6537209749221802, + "learning_rate": 2.577702719576003e-06, + "loss": 0.7909, "step": 10315 }, { - "epoch": 0.780598539593659, - "grad_norm": 2.70788311958313, - "learning_rate": 2.266276154529393e-06, - "loss": 0.8197, + "epoch": 0.7262231608588525, + "grad_norm": 2.0623319149017334, + "learning_rate": 2.5764655863291753e-06, + "loss": 0.6966, "step": 10316 }, { - "epoch": 0.7806742083159925, - "grad_norm": 1.852952480316162, - "learning_rate": 2.2647820580662505e-06, - "loss": 0.7382, + "epoch": 0.7262935586061247, + "grad_norm": 1.8167251348495483, + "learning_rate": 2.5752286830659105e-06, + "loss": 0.6469, "step": 10317 }, { - "epoch": 0.7807498770383262, - "grad_norm": 2.082524299621582, - "learning_rate": 2.263288383384234e-06, - "loss": 0.6123, + "epoch": 0.7263639563533967, + "grad_norm": 1.9063055515289307, + "learning_rate": 2.5739920098505134e-06, + "loss": 0.6378, "step": 10318 }, { - "epoch": 0.7808255457606599, - "grad_norm": 2.4691317081451416, - "learning_rate": 2.2617951305768917e-06, - "loss": 0.7913, + "epoch": 0.7264343541006688, + "grad_norm": 2.0043206214904785, + "learning_rate": 2.5727555667472806e-06, + "loss": 0.6809, "step": 10319 }, { - "epoch": 0.7809012144829934, - "grad_norm": 2.056469678878784, - "learning_rate": 2.2603022997377337e-06, - "loss": 0.7534, + "epoch": 0.7265047518479408, + "grad_norm": 1.867244839668274, + "learning_rate": 2.5715193538204934e-06, + "loss": 0.803, "step": 10320 }, { - "epoch": 0.7809768832053271, - "grad_norm": 2.364788293838501, - "learning_rate": 2.2588098909602435e-06, - "loss": 0.6309, + "epoch": 0.726575149595213, + "grad_norm": 1.8845382928848267, + "learning_rate": 2.570283371134427e-06, + "loss": 0.6293, "step": 10321 }, { - "epoch": 0.7810525519276607, - "grad_norm": 2.2668612003326416, - "learning_rate": 2.2573179043378803e-06, - "loss": 0.7426, + "epoch": 0.726645547342485, + "grad_norm": 1.6482939720153809, + "learning_rate": 2.569047618753339e-06, + "loss": 0.8176, "step": 10322 }, { - "epoch": 0.7811282206499943, - "grad_norm": 2.3786261081695557, - "learning_rate": 2.255826339964079e-06, - "loss": 0.6801, + "epoch": 0.7267159450897571, + "grad_norm": 1.7835850715637207, + "learning_rate": 2.5678120967414796e-06, + "loss": 0.706, "step": 10323 }, { - "epoch": 0.781203889372328, - "grad_norm": 2.557690382003784, - "learning_rate": 2.254335197932246e-06, - "loss": 0.5807, + "epoch": 0.7267863428370293, + "grad_norm": 1.917510747909546, + "learning_rate": 2.5665768051630832e-06, + "loss": 0.6406, "step": 10324 }, { - "epoch": 0.7812795580946615, - "grad_norm": 2.1861777305603027, - "learning_rate": 2.25284447833576e-06, - "loss": 0.6966, + "epoch": 0.7268567405843013, + "grad_norm": 1.6719331741333008, + "learning_rate": 2.5653417440823736e-06, + "loss": 0.642, "step": 10325 }, { - "epoch": 0.7813552268169952, - "grad_norm": 2.943876028060913, - "learning_rate": 2.251354181267977e-06, - "loss": 0.6657, + "epoch": 0.7269271383315734, + "grad_norm": 1.8751591444015503, + "learning_rate": 2.5641069135635612e-06, + "loss": 0.6648, "step": 10326 }, { - "epoch": 0.7814308955393289, - "grad_norm": 1.9715664386749268, - "learning_rate": 2.249864306822222e-06, - "loss": 0.7396, + "epoch": 0.7269975360788454, + "grad_norm": 1.7475430965423584, + "learning_rate": 2.56287231367085e-06, + "loss": 0.6681, "step": 10327 }, { - "epoch": 0.7815065642616624, - "grad_norm": 1.953696608543396, - "learning_rate": 2.248374855091797e-06, - "loss": 0.52, + "epoch": 0.7270679338261176, + "grad_norm": 1.7640563249588013, + "learning_rate": 2.561637944468423e-06, + "loss": 0.6682, "step": 10328 }, { - "epoch": 0.7815822329839961, - "grad_norm": 2.13394832611084, - "learning_rate": 2.246885826169975e-06, - "loss": 0.6673, + "epoch": 0.7271383315733897, + "grad_norm": 2.068821668624878, + "learning_rate": 2.560403806020462e-06, + "loss": 0.6275, "step": 10329 }, { - "epoch": 0.7816579017063296, - "grad_norm": 1.9963359832763672, - "learning_rate": 2.2453972201500055e-06, - "loss": 0.6992, + "epoch": 0.7272087293206617, + "grad_norm": 1.6196540594100952, + "learning_rate": 2.559169898391127e-06, + "loss": 0.5859, "step": 10330 }, { - "epoch": 0.7817335704286633, - "grad_norm": 2.229747772216797, - "learning_rate": 2.243909037125112e-06, - "loss": 0.8009, + "epoch": 0.7272791270679339, + "grad_norm": 1.673447847366333, + "learning_rate": 2.5579362216445723e-06, + "loss": 0.6413, "step": 10331 }, { - "epoch": 0.781809239150997, - "grad_norm": 1.8210551738739014, - "learning_rate": 2.2424212771884842e-06, - "loss": 0.7221, + "epoch": 0.7273495248152059, + "grad_norm": 1.9744035005569458, + "learning_rate": 2.5567027758449323e-06, + "loss": 0.6467, "step": 10332 }, { - "epoch": 0.7818849078733305, - "grad_norm": 2.274820566177368, - "learning_rate": 2.2409339404332924e-06, - "loss": 0.8382, + "epoch": 0.727419922562478, + "grad_norm": 2.404219627380371, + "learning_rate": 2.5554695610563425e-06, + "loss": 0.7162, "step": 10333 }, { - "epoch": 0.7819605765956642, - "grad_norm": 2.1577939987182617, - "learning_rate": 2.2394470269526785e-06, - "loss": 0.7821, + "epoch": 0.7274903203097501, + "grad_norm": 1.932910680770874, + "learning_rate": 2.5542365773429125e-06, + "loss": 0.6499, "step": 10334 }, { - "epoch": 0.7820362453179978, - "grad_norm": 1.9666999578475952, - "learning_rate": 2.2379605368397578e-06, - "loss": 0.7119, + "epoch": 0.7275607180570222, + "grad_norm": 1.9428976774215698, + "learning_rate": 2.5530038247687517e-06, + "loss": 0.6158, "step": 10335 }, { - "epoch": 0.7821119140403314, - "grad_norm": 2.317742109298706, - "learning_rate": 2.2364744701876195e-06, - "loss": 0.5406, + "epoch": 0.7276311158042943, + "grad_norm": 1.9125943183898926, + "learning_rate": 2.5517713033979493e-06, + "loss": 0.5824, "step": 10336 }, { - "epoch": 0.7821875827626651, - "grad_norm": 2.2449235916137695, - "learning_rate": 2.234988827089326e-06, - "loss": 0.6204, + "epoch": 0.7277015135515663, + "grad_norm": 1.8740086555480957, + "learning_rate": 2.5505390132945836e-06, + "loss": 0.7222, "step": 10337 }, { - "epoch": 0.7822632514849986, - "grad_norm": 2.313387632369995, - "learning_rate": 2.2335036076379153e-06, - "loss": 0.7333, + "epoch": 0.7277719112988384, + "grad_norm": 1.9459059238433838, + "learning_rate": 2.5493069545227264e-06, + "loss": 0.7816, "step": 10338 }, { - "epoch": 0.7823389202073323, - "grad_norm": 2.1615755558013916, - "learning_rate": 2.2320188119263895e-06, - "loss": 0.7058, + "epoch": 0.7278423090461105, + "grad_norm": 1.9712246656417847, + "learning_rate": 2.5480751271464286e-06, + "loss": 0.6105, "step": 10339 }, { - "epoch": 0.782414588929666, - "grad_norm": 2.0965301990509033, - "learning_rate": 2.230534440047738e-06, - "loss": 0.5865, + "epoch": 0.7279127067933826, + "grad_norm": 1.8439620733261108, + "learning_rate": 2.54684353122974e-06, + "loss": 0.6835, "step": 10340 }, { - "epoch": 0.7824902576519995, - "grad_norm": 2.1409988403320312, - "learning_rate": 2.2290504920949155e-06, - "loss": 0.643, + "epoch": 0.7279831045406547, + "grad_norm": 1.9995825290679932, + "learning_rate": 2.5456121668366883e-06, + "loss": 0.6597, "step": 10341 }, { - "epoch": 0.7825659263743332, - "grad_norm": 2.249452590942383, - "learning_rate": 2.2275669681608534e-06, - "loss": 0.6476, + "epoch": 0.7280535022879268, + "grad_norm": 1.5900872945785522, + "learning_rate": 2.5443810340312947e-06, + "loss": 0.6195, "step": 10342 }, { - "epoch": 0.7826415950966668, - "grad_norm": 2.122205972671509, - "learning_rate": 2.22608386833845e-06, - "loss": 0.7286, + "epoch": 0.7281239000351989, + "grad_norm": 2.124976873397827, + "learning_rate": 2.5431501328775643e-06, + "loss": 0.7431, "step": 10343 }, { - "epoch": 0.7827172638190004, - "grad_norm": 2.537824869155884, - "learning_rate": 2.224601192720581e-06, - "loss": 0.7216, + "epoch": 0.7281942977824709, + "grad_norm": 1.9709233045578003, + "learning_rate": 2.5419194634394963e-06, + "loss": 0.6714, "step": 10344 }, { - "epoch": 0.7827929325413341, - "grad_norm": 2.4190261363983154, - "learning_rate": 2.2231189414001053e-06, - "loss": 0.544, + "epoch": 0.728264695529743, + "grad_norm": 1.6129564046859741, + "learning_rate": 2.540689025781072e-06, + "loss": 0.7199, "step": 10345 }, { - "epoch": 0.7828686012636676, - "grad_norm": 2.434654712677002, - "learning_rate": 2.221637114469837e-06, - "loss": 0.7063, + "epoch": 0.7283350932770152, + "grad_norm": 2.001476526260376, + "learning_rate": 2.539458819966265e-06, + "loss": 0.7043, "step": 10346 }, { - "epoch": 0.7829442699860013, - "grad_norm": 2.0389528274536133, - "learning_rate": 2.2201557120225783e-06, - "loss": 0.5486, + "epoch": 0.7284054910242872, + "grad_norm": 1.6625242233276367, + "learning_rate": 2.538228846059034e-06, + "loss": 0.6117, "step": 10347 }, { - "epoch": 0.7830199387083349, - "grad_norm": 2.8061161041259766, - "learning_rate": 2.2186747341510968e-06, - "loss": 0.6029, + "epoch": 0.7284758887715593, + "grad_norm": 1.96446692943573, + "learning_rate": 2.5369991041233256e-06, + "loss": 0.7194, "step": 10348 }, { - "epoch": 0.7830956074306685, - "grad_norm": 1.9045759439468384, - "learning_rate": 2.2171941809481367e-06, - "loss": 0.7342, + "epoch": 0.7285462865188314, + "grad_norm": 1.7614984512329102, + "learning_rate": 2.535769594223074e-06, + "loss": 0.5842, "step": 10349 }, { - "epoch": 0.7831712761530022, - "grad_norm": 2.0165064334869385, - "learning_rate": 2.2157140525064155e-06, - "loss": 0.7228, + "epoch": 0.7286166842661035, + "grad_norm": 1.2903255224227905, + "learning_rate": 2.5345403164222063e-06, + "loss": 0.7633, "step": 10350 }, { - "epoch": 0.7832469448753357, - "grad_norm": 2.1648142337799072, - "learning_rate": 2.214234348918623e-06, - "loss": 0.7189, + "epoch": 0.7286870820133756, + "grad_norm": 1.7091795206069946, + "learning_rate": 2.53331127078463e-06, + "loss": 0.5876, "step": 10351 }, { - "epoch": 0.7833226135976694, - "grad_norm": 1.7740191221237183, - "learning_rate": 2.2127550702774267e-06, - "loss": 0.6009, + "epoch": 0.7287574797606476, + "grad_norm": 1.5589216947555542, + "learning_rate": 2.532082457374248e-06, + "loss": 0.7494, "step": 10352 }, { - "epoch": 0.7833982823200031, - "grad_norm": 2.149911642074585, - "learning_rate": 2.2112762166754567e-06, - "loss": 0.7393, + "epoch": 0.7288278775079198, + "grad_norm": 1.876129150390625, + "learning_rate": 2.5308538762549452e-06, + "loss": 0.6515, "step": 10353 }, { - "epoch": 0.7834739510423366, - "grad_norm": 2.2649195194244385, - "learning_rate": 2.209797788205326e-06, - "loss": 0.657, + "epoch": 0.7288982752551918, + "grad_norm": 2.050868034362793, + "learning_rate": 2.5296255274905962e-06, + "loss": 0.6091, "step": 10354 }, { - "epoch": 0.7835496197646703, - "grad_norm": 2.4505796432495117, - "learning_rate": 2.208319784959622e-06, - "loss": 0.569, + "epoch": 0.7289686730024639, + "grad_norm": 1.6494568586349487, + "learning_rate": 2.528397411145063e-06, + "loss": 0.6018, "step": 10355 }, { - "epoch": 0.7836252884870039, - "grad_norm": 2.0467987060546875, - "learning_rate": 2.2068422070309032e-06, - "loss": 0.8597, + "epoch": 0.7290390707497361, + "grad_norm": 2.221632242202759, + "learning_rate": 2.5271695272822007e-06, + "loss": 0.6874, "step": 10356 }, { - "epoch": 0.7837009572093375, - "grad_norm": 2.103842258453369, - "learning_rate": 2.2053650545116936e-06, - "loss": 0.6934, + "epoch": 0.7291094684970081, + "grad_norm": 1.9048995971679688, + "learning_rate": 2.5259418759658427e-06, + "loss": 0.7693, "step": 10357 }, { - "epoch": 0.7837766259316712, - "grad_norm": 1.9594662189483643, - "learning_rate": 2.2038883274945015e-06, - "loss": 0.6439, + "epoch": 0.7291798662442802, + "grad_norm": 1.6914536952972412, + "learning_rate": 2.524714457259822e-06, + "loss": 0.5889, "step": 10358 }, { - "epoch": 0.7838522946540047, - "grad_norm": 2.5224552154541016, - "learning_rate": 2.2024120260718035e-06, - "loss": 0.6937, + "epoch": 0.7292502639915522, + "grad_norm": 2.0195040702819824, + "learning_rate": 2.523487271227948e-06, + "loss": 0.7628, "step": 10359 }, { - "epoch": 0.7839279633763384, - "grad_norm": 1.9823088645935059, - "learning_rate": 2.2009361503360506e-06, - "loss": 0.5863, + "epoch": 0.7293206617388244, + "grad_norm": 1.656240701675415, + "learning_rate": 2.522260317934026e-06, + "loss": 0.583, "step": 10360 }, { - "epoch": 0.784003632098672, - "grad_norm": 2.8926243782043457, - "learning_rate": 2.199460700379666e-06, - "loss": 0.6975, + "epoch": 0.7293910594860964, + "grad_norm": 1.9858083724975586, + "learning_rate": 2.5210335974418428e-06, + "loss": 0.6043, "step": 10361 }, { - "epoch": 0.7840793008210056, - "grad_norm": 2.1695239543914795, - "learning_rate": 2.1979856762950488e-06, - "loss": 0.5354, + "epoch": 0.7294614572333685, + "grad_norm": 1.6349362134933472, + "learning_rate": 2.5198071098151822e-06, + "loss": 0.6762, "step": 10362 }, { - "epoch": 0.7841549695433393, - "grad_norm": 4.557194709777832, - "learning_rate": 2.196511078174571e-06, - "loss": 0.7417, + "epoch": 0.7295318549806407, + "grad_norm": 2.0963664054870605, + "learning_rate": 2.518580855117804e-06, + "loss": 0.6568, "step": 10363 }, { - "epoch": 0.7842306382656729, - "grad_norm": 2.158311605453491, - "learning_rate": 2.19503690611057e-06, - "loss": 0.6356, + "epoch": 0.7296022527279127, + "grad_norm": 1.9063777923583984, + "learning_rate": 2.5173548334134683e-06, + "loss": 0.7014, "step": 10364 }, { - "epoch": 0.7843063069880065, - "grad_norm": 2.076812505722046, - "learning_rate": 2.1935631601953705e-06, - "loss": 0.6391, + "epoch": 0.7296726504751848, + "grad_norm": 1.8628312349319458, + "learning_rate": 2.516129044765915e-06, + "loss": 0.7922, "step": 10365 }, { - "epoch": 0.7843819757103402, - "grad_norm": 2.2256968021392822, - "learning_rate": 2.192089840521263e-06, - "loss": 0.5693, + "epoch": 0.7297430482224568, + "grad_norm": 1.8367992639541626, + "learning_rate": 2.5149034892388736e-06, + "loss": 0.5862, "step": 10366 }, { - "epoch": 0.7844576444326737, - "grad_norm": 3.5892693996429443, - "learning_rate": 2.1906169471805065e-06, - "loss": 0.6821, + "epoch": 0.729813445969729, + "grad_norm": 1.8020175695419312, + "learning_rate": 2.5136781668960596e-06, + "loss": 0.5537, "step": 10367 }, { - "epoch": 0.7845333131550074, - "grad_norm": 2.188708543777466, - "learning_rate": 2.1891444802653406e-06, - "loss": 0.6475, + "epoch": 0.7298838437170011, + "grad_norm": 1.927307367324829, + "learning_rate": 2.5124530778011822e-06, + "loss": 0.6933, "step": 10368 }, { - "epoch": 0.784608981877341, - "grad_norm": 2.307600975036621, - "learning_rate": 2.187672439867977e-06, - "loss": 0.7261, + "epoch": 0.7299542414642731, + "grad_norm": 1.8053714036941528, + "learning_rate": 2.5112282220179316e-06, + "loss": 0.7009, "step": 10369 }, { - "epoch": 0.7846846505996746, - "grad_norm": 1.9995859861373901, - "learning_rate": 2.1862008260805987e-06, - "loss": 0.6297, + "epoch": 0.7300246392115453, + "grad_norm": 1.829987645149231, + "learning_rate": 2.510003599609993e-06, + "loss": 0.6729, "step": 10370 }, { - "epoch": 0.7847603193220083, - "grad_norm": 2.0694448947906494, - "learning_rate": 2.184729638995363e-06, - "loss": 0.6715, + "epoch": 0.7300950369588173, + "grad_norm": 1.6121257543563843, + "learning_rate": 2.5087792106410343e-06, + "loss": 0.6515, "step": 10371 }, { - "epoch": 0.7848359880443418, - "grad_norm": 2.0987765789031982, - "learning_rate": 2.1832588787044003e-06, - "loss": 0.6215, + "epoch": 0.7301654347060894, + "grad_norm": 1.6842812299728394, + "learning_rate": 2.507555055174711e-06, + "loss": 0.6181, "step": 10372 }, { - "epoch": 0.7849116567666755, - "grad_norm": 2.225297689437866, - "learning_rate": 2.1817885452998156e-06, - "loss": 0.5915, + "epoch": 0.7302358324533615, + "grad_norm": 2.9443044662475586, + "learning_rate": 2.5063311332746676e-06, + "loss": 0.6233, "step": 10373 }, { - "epoch": 0.7849873254890091, - "grad_norm": 3.953972101211548, - "learning_rate": 2.1803186388736867e-06, - "loss": 0.789, + "epoch": 0.7303062302006336, + "grad_norm": 1.8835875988006592, + "learning_rate": 2.50510744500454e-06, + "loss": 0.6831, "step": 10374 }, { - "epoch": 0.7850629942113427, - "grad_norm": 2.2498698234558105, - "learning_rate": 2.1788491595180567e-06, - "loss": 0.6853, + "epoch": 0.7303766279479057, + "grad_norm": 1.9468082189559937, + "learning_rate": 2.503883990427945e-06, + "loss": 0.6425, "step": 10375 }, { - "epoch": 0.7851386629336764, - "grad_norm": 2.0867936611175537, - "learning_rate": 2.177380107324958e-06, - "loss": 0.7266, + "epoch": 0.7304470256951777, + "grad_norm": 1.7566752433776855, + "learning_rate": 2.502660769608496e-06, + "loss": 0.5654, "step": 10376 }, { - "epoch": 0.78521433165601, - "grad_norm": 2.7368338108062744, - "learning_rate": 2.175911482386386e-06, - "loss": 0.6268, + "epoch": 0.7305174234424499, + "grad_norm": 2.1460485458374023, + "learning_rate": 2.5014377826097866e-06, + "loss": 0.6749, "step": 10377 }, { - "epoch": 0.7852900003783436, - "grad_norm": 2.203387975692749, - "learning_rate": 2.174443284794307e-06, - "loss": 0.7792, + "epoch": 0.7305878211897219, + "grad_norm": 2.020026683807373, + "learning_rate": 2.5002150294954e-06, + "loss": 0.7093, "step": 10378 }, { - "epoch": 0.7853656691006773, - "grad_norm": 1.9313303232192993, - "learning_rate": 2.1729755146406653e-06, - "loss": 0.7007, + "epoch": 0.730658218936994, + "grad_norm": 1.6902161836624146, + "learning_rate": 2.4989925103289074e-06, + "loss": 0.6391, "step": 10379 }, { - "epoch": 0.7854413378230108, - "grad_norm": 2.8301777839660645, - "learning_rate": 2.171508172017378e-06, - "loss": 0.7817, + "epoch": 0.7307286166842661, + "grad_norm": 2.0284833908081055, + "learning_rate": 2.497770225173873e-06, + "loss": 0.6863, "step": 10380 }, { - "epoch": 0.7855170065453445, - "grad_norm": 2.140004873275757, - "learning_rate": 2.170041257016336e-06, - "loss": 0.7564, + "epoch": 0.7307990144315382, + "grad_norm": 2.023139476776123, + "learning_rate": 2.49654817409384e-06, + "loss": 0.6019, "step": 10381 }, { - "epoch": 0.7855926752676781, - "grad_norm": 9.439233779907227, - "learning_rate": 2.1685747697294005e-06, - "loss": 0.6332, + "epoch": 0.7308694121788103, + "grad_norm": 1.5427888631820679, + "learning_rate": 2.4953263571523485e-06, + "loss": 0.5578, "step": 10382 }, { - "epoch": 0.7856683439900117, - "grad_norm": 3.0512073040008545, - "learning_rate": 2.167108710248408e-06, - "loss": 0.6822, + "epoch": 0.7309398099260823, + "grad_norm": 1.526821494102478, + "learning_rate": 2.4941047744129185e-06, + "loss": 0.6563, "step": 10383 }, { - "epoch": 0.7857440127123454, - "grad_norm": 2.452768564224243, - "learning_rate": 2.165643078665172e-06, - "loss": 0.6007, + "epoch": 0.7310102076733545, + "grad_norm": 1.89927339553833, + "learning_rate": 2.49288342593906e-06, + "loss": 0.678, "step": 10384 }, { - "epoch": 0.785819681434679, - "grad_norm": 2.016571044921875, - "learning_rate": 2.1641778750714653e-06, - "loss": 0.6412, + "epoch": 0.7310806054206266, + "grad_norm": 3.1016879081726074, + "learning_rate": 2.491662311794277e-06, + "loss": 0.6631, "step": 10385 }, { - "epoch": 0.7858953501570126, - "grad_norm": 2.0821337699890137, - "learning_rate": 2.162713099559053e-06, - "loss": 0.6281, + "epoch": 0.7311510031678986, + "grad_norm": 1.7901020050048828, + "learning_rate": 2.490441432042051e-06, + "loss": 0.643, "step": 10386 }, { - "epoch": 0.7859710188793462, - "grad_norm": 1.9462223052978516, - "learning_rate": 2.16124875221966e-06, - "loss": 0.6654, + "epoch": 0.7312214009151707, + "grad_norm": 1.6844515800476074, + "learning_rate": 2.4892207867458604e-06, + "loss": 0.5677, "step": 10387 }, { - "epoch": 0.7860466876016798, - "grad_norm": 1.9950424432754517, - "learning_rate": 2.1597848331449925e-06, - "loss": 0.6193, + "epoch": 0.7312917986624428, + "grad_norm": 1.8172078132629395, + "learning_rate": 2.4880003759691644e-06, + "loss": 0.6054, "step": 10388 }, { - "epoch": 0.7861223563240135, - "grad_norm": 2.3755111694335938, - "learning_rate": 2.1583213424267207e-06, - "loss": 0.6631, + "epoch": 0.7313621964097149, + "grad_norm": 1.8839858770370483, + "learning_rate": 2.4867801997754205e-06, + "loss": 0.7593, "step": 10389 }, { - "epoch": 0.7861980250463471, - "grad_norm": 2.3503427505493164, - "learning_rate": 2.1568582801564918e-06, - "loss": 0.7469, + "epoch": 0.731432594156987, + "grad_norm": 1.858086347579956, + "learning_rate": 2.4855602582280564e-06, + "loss": 0.5796, "step": 10390 }, { - "epoch": 0.7862736937686807, - "grad_norm": 2.1532740592956543, - "learning_rate": 2.1553956464259367e-06, - "loss": 0.675, + "epoch": 0.731502991904259, + "grad_norm": 1.7097035646438599, + "learning_rate": 2.4843405513905054e-06, + "loss": 0.6145, "step": 10391 }, { - "epoch": 0.7863493624910144, - "grad_norm": 3.5984179973602295, - "learning_rate": 2.153933441326641e-06, - "loss": 0.5442, + "epoch": 0.7315733896515312, + "grad_norm": 1.6394836902618408, + "learning_rate": 2.483121079326176e-06, + "loss": 0.6661, "step": 10392 }, { - "epoch": 0.786425031213348, - "grad_norm": 1.983961820602417, - "learning_rate": 2.1524716649501764e-06, - "loss": 0.6268, + "epoch": 0.7316437873988032, + "grad_norm": 2.0659093856811523, + "learning_rate": 2.481901842098475e-06, + "loss": 0.6551, "step": 10393 }, { - "epoch": 0.7865006999356816, - "grad_norm": 2.3665051460266113, - "learning_rate": 2.151010317388083e-06, - "loss": 0.6089, + "epoch": 0.7317141851460753, + "grad_norm": 2.311492681503296, + "learning_rate": 2.4806828397707868e-06, + "loss": 0.6077, "step": 10394 }, { - "epoch": 0.7865763686580152, - "grad_norm": 3.3090527057647705, - "learning_rate": 2.1495493987318773e-06, - "loss": 0.7426, + "epoch": 0.7317845828933475, + "grad_norm": 1.9859529733657837, + "learning_rate": 2.4794640724064956e-06, + "loss": 0.6624, "step": 10395 }, { - "epoch": 0.7866520373803488, - "grad_norm": 2.405433416366577, - "learning_rate": 2.148088909073044e-06, - "loss": 0.7821, + "epoch": 0.7318549806406195, + "grad_norm": 2.0605568885803223, + "learning_rate": 2.478245540068956e-06, + "loss": 0.7471, "step": 10396 }, { - "epoch": 0.7867277061026825, - "grad_norm": 2.0059757232666016, - "learning_rate": 2.1466288485030456e-06, - "loss": 0.6236, + "epoch": 0.7319253783878916, + "grad_norm": 1.6775743961334229, + "learning_rate": 2.477027242821529e-06, + "loss": 0.6633, "step": 10397 }, { - "epoch": 0.786803374825016, - "grad_norm": 2.419422149658203, - "learning_rate": 2.145169217113317e-06, - "loss": 0.7572, + "epoch": 0.7319957761351636, + "grad_norm": 1.9092340469360352, + "learning_rate": 2.475809180727549e-06, + "loss": 0.6218, "step": 10398 }, { - "epoch": 0.7868790435473497, - "grad_norm": 3.1536026000976562, - "learning_rate": 2.143710014995261e-06, - "loss": 0.6652, + "epoch": 0.7320661738824358, + "grad_norm": 1.6495814323425293, + "learning_rate": 2.4745913538503494e-06, + "loss": 0.596, "step": 10399 }, { - "epoch": 0.7869547122696833, - "grad_norm": 2.279188394546509, - "learning_rate": 2.142251242240258e-06, - "loss": 0.6332, + "epoch": 0.7321365716297078, + "grad_norm": 2.274129867553711, + "learning_rate": 2.4733737622532415e-06, + "loss": 0.6144, "step": 10400 }, { - "epoch": 0.7870303809920169, - "grad_norm": 2.087526321411133, - "learning_rate": 2.1407928989396655e-06, - "loss": 0.5919, + "epoch": 0.7322069693769799, + "grad_norm": 1.8947898149490356, + "learning_rate": 2.472156405999536e-06, + "loss": 0.7046, "step": 10401 }, { - "epoch": 0.7871060497143506, - "grad_norm": 3.789907693862915, - "learning_rate": 2.1393349851848084e-06, - "loss": 0.6605, + "epoch": 0.7322773671242521, + "grad_norm": 1.6649097204208374, + "learning_rate": 2.4709392851525147e-06, + "loss": 0.7474, "step": 10402 }, { - "epoch": 0.7871817184366842, - "grad_norm": 2.147244930267334, - "learning_rate": 2.1378775010669824e-06, - "loss": 0.7815, + "epoch": 0.7323477648715241, + "grad_norm": 1.6019611358642578, + "learning_rate": 2.469722399775464e-06, + "loss": 0.6189, "step": 10403 }, { - "epoch": 0.7872573871590178, - "grad_norm": 3.6964704990386963, - "learning_rate": 2.1364204466774623e-06, - "loss": 0.4579, + "epoch": 0.7324181626187962, + "grad_norm": 1.6378843784332275, + "learning_rate": 2.468505749931647e-06, + "loss": 0.615, "step": 10404 }, { - "epoch": 0.7873330558813515, - "grad_norm": 2.3750951290130615, - "learning_rate": 2.134963822107494e-06, - "loss": 0.6549, + "epoch": 0.7324885603660682, + "grad_norm": 1.772958517074585, + "learning_rate": 2.467289335684322e-06, + "loss": 0.6923, "step": 10405 }, { - "epoch": 0.787408724603685, - "grad_norm": 2.594712972640991, - "learning_rate": 2.1335076274482954e-06, - "loss": 0.6412, + "epoch": 0.7325589581133404, + "grad_norm": 1.7548274993896484, + "learning_rate": 2.4660731570967273e-06, + "loss": 0.7506, "step": 10406 }, { - "epoch": 0.7874843933260187, - "grad_norm": 2.0209412574768066, - "learning_rate": 2.132051862791057e-06, - "loss": 0.7044, + "epoch": 0.7326293558606125, + "grad_norm": 1.8461021184921265, + "learning_rate": 2.4648572142320975e-06, + "loss": 0.6914, "step": 10407 }, { - "epoch": 0.7875600620483523, - "grad_norm": 2.0428459644317627, - "learning_rate": 2.130596528226945e-06, - "loss": 0.6416, + "epoch": 0.7326997536078845, + "grad_norm": 1.9672750234603882, + "learning_rate": 2.463641507153648e-06, + "loss": 0.6193, "step": 10408 }, { - "epoch": 0.7876357307706859, - "grad_norm": 2.461974859237671, - "learning_rate": 2.1291416238470994e-06, - "loss": 0.7138, + "epoch": 0.7327701513551567, + "grad_norm": 1.6489132642745972, + "learning_rate": 2.462426035924585e-06, + "loss": 0.6284, "step": 10409 }, { - "epoch": 0.7877113994930196, - "grad_norm": 1.9830410480499268, - "learning_rate": 2.127687149742626e-06, - "loss": 0.6454, + "epoch": 0.7328405491024287, + "grad_norm": 1.8229918479919434, + "learning_rate": 2.4612108006081e-06, + "loss": 0.6176, "step": 10410 }, { - "epoch": 0.7877870682153532, - "grad_norm": 2.3147575855255127, - "learning_rate": 2.126233106004608e-06, - "loss": 0.7328, + "epoch": 0.7329109468497008, + "grad_norm": 2.0706944465637207, + "learning_rate": 2.4599958012673783e-06, + "loss": 0.5642, "step": 10411 }, { - "epoch": 0.7878627369376868, - "grad_norm": 2.058706283569336, - "learning_rate": 2.124779492724111e-06, - "loss": 0.6221, + "epoch": 0.732981344596973, + "grad_norm": 1.4796069860458374, + "learning_rate": 2.4587810379655835e-06, + "loss": 0.4968, "step": 10412 }, { - "epoch": 0.7879384056600204, - "grad_norm": 2.767449140548706, - "learning_rate": 2.1233263099921565e-06, - "loss": 0.6106, + "epoch": 0.733051742344245, + "grad_norm": 1.9044735431671143, + "learning_rate": 2.4575665107658786e-06, + "loss": 0.7521, "step": 10413 }, { - "epoch": 0.788014074382354, - "grad_norm": 2.3518853187561035, - "learning_rate": 2.12187355789975e-06, - "loss": 0.7009, + "epoch": 0.7331221400915171, + "grad_norm": 1.8849941492080688, + "learning_rate": 2.4563522197314044e-06, + "loss": 0.7458, "step": 10414 }, { - "epoch": 0.7880897431046877, - "grad_norm": 1.9216002225875854, - "learning_rate": 2.1204212365378685e-06, - "loss": 0.7082, + "epoch": 0.7331925378387891, + "grad_norm": 1.8751345872879028, + "learning_rate": 2.455138164925293e-06, + "loss": 0.7251, "step": 10415 }, { - "epoch": 0.7881654118270213, - "grad_norm": 2.5170297622680664, - "learning_rate": 2.1189693459974597e-06, - "loss": 0.6601, + "epoch": 0.7332629355860613, + "grad_norm": 1.7583576440811157, + "learning_rate": 2.453924346410662e-06, + "loss": 0.6083, "step": 10416 }, { - "epoch": 0.7882410805493549, - "grad_norm": 3.180408000946045, - "learning_rate": 2.117517886369447e-06, - "loss": 0.8731, + "epoch": 0.7333333333333333, + "grad_norm": 1.8714901208877563, + "learning_rate": 2.4527107642506243e-06, + "loss": 0.6034, "step": 10417 }, { - "epoch": 0.7883167492716886, - "grad_norm": 1.9902843236923218, - "learning_rate": 2.116066857744725e-06, - "loss": 0.6012, + "epoch": 0.7334037310806054, + "grad_norm": 1.7321245670318604, + "learning_rate": 2.4514974185082696e-06, + "loss": 0.6375, "step": 10418 }, { - "epoch": 0.7883924179940222, - "grad_norm": 2.8387811183929443, - "learning_rate": 2.1146162602141614e-06, - "loss": 0.6855, + "epoch": 0.7334741288278775, + "grad_norm": 1.9577374458312988, + "learning_rate": 2.450284309246686e-06, + "loss": 0.691, "step": 10419 }, { - "epoch": 0.7884680867163558, - "grad_norm": 2.100433349609375, - "learning_rate": 2.1131660938685998e-06, - "loss": 0.6094, + "epoch": 0.7335445265751496, + "grad_norm": 1.9070218801498413, + "learning_rate": 2.4490714365289417e-06, + "loss": 0.5542, "step": 10420 }, { - "epoch": 0.7885437554386894, - "grad_norm": 2.3717706203460693, - "learning_rate": 2.1117163587988477e-06, - "loss": 0.6863, + "epoch": 0.7336149243224217, + "grad_norm": 2.037572145462036, + "learning_rate": 2.4478588004180946e-06, + "loss": 0.5704, "step": 10421 }, { - "epoch": 0.788619424161023, - "grad_norm": 1.9035117626190186, - "learning_rate": 2.1102670550956986e-06, - "loss": 0.645, + "epoch": 0.7336853220696937, + "grad_norm": 1.761855125427246, + "learning_rate": 2.446646400977189e-06, + "loss": 0.5766, "step": 10422 }, { - "epoch": 0.7886950928833567, - "grad_norm": 2.151751756668091, - "learning_rate": 2.108818182849914e-06, - "loss": 0.7161, + "epoch": 0.7337557198169659, + "grad_norm": 2.293776512145996, + "learning_rate": 2.445434238269263e-06, + "loss": 0.5806, "step": 10423 }, { - "epoch": 0.7887707616056903, - "grad_norm": 2.054111957550049, - "learning_rate": 2.10736974215222e-06, - "loss": 0.6702, + "epoch": 0.733826117564238, + "grad_norm": 1.8481738567352295, + "learning_rate": 2.444222312357333e-06, + "loss": 0.6725, "step": 10424 }, { - "epoch": 0.7888464303280239, - "grad_norm": 2.213700532913208, - "learning_rate": 2.1059217330933273e-06, - "loss": 0.5848, + "epoch": 0.73389651531151, + "grad_norm": 1.9842244386672974, + "learning_rate": 2.4430106233044146e-06, + "loss": 0.6621, "step": 10425 }, { - "epoch": 0.7889220990503575, - "grad_norm": 1.8522429466247559, - "learning_rate": 2.104474155763913e-06, - "loss": 0.6449, + "epoch": 0.7339669130587821, + "grad_norm": 1.6422535181045532, + "learning_rate": 2.4417991711735e-06, + "loss": 0.7635, "step": 10426 }, { - "epoch": 0.7889977677726911, - "grad_norm": 2.314577341079712, - "learning_rate": 2.1030270102546303e-06, - "loss": 0.7704, + "epoch": 0.7340373108060542, + "grad_norm": 1.657405972480774, + "learning_rate": 2.4405879560275746e-06, + "loss": 0.517, "step": 10427 }, { - "epoch": 0.7890734364950248, - "grad_norm": 2.8026366233825684, - "learning_rate": 2.1015802966561037e-06, - "loss": 0.5235, + "epoch": 0.7341077085533263, + "grad_norm": 2.2505364418029785, + "learning_rate": 2.439376977929608e-06, + "loss": 0.6319, "step": 10428 }, { - "epoch": 0.7891491052173584, - "grad_norm": 2.0576069355010986, - "learning_rate": 2.100134015058931e-06, - "loss": 0.6654, + "epoch": 0.7341781063005984, + "grad_norm": 1.943570613861084, + "learning_rate": 2.4381662369425657e-06, + "loss": 0.5691, "step": 10429 }, { - "epoch": 0.789224773939692, - "grad_norm": 2.624588966369629, - "learning_rate": 2.098688165553683e-06, - "loss": 0.8377, + "epoch": 0.7342485040478705, + "grad_norm": 1.802017331123352, + "learning_rate": 2.43695573312939e-06, + "loss": 0.6559, "step": 10430 }, { - "epoch": 0.7893004426620257, - "grad_norm": 1.8975677490234375, - "learning_rate": 2.0972427482309034e-06, - "loss": 0.8135, + "epoch": 0.7343189017951426, + "grad_norm": 2.0881924629211426, + "learning_rate": 2.43574546655302e-06, + "loss": 0.6334, "step": 10431 }, { - "epoch": 0.7893761113843593, - "grad_norm": 2.2255985736846924, - "learning_rate": 2.09579776318111e-06, - "loss": 0.7366, + "epoch": 0.7343892995424146, + "grad_norm": 1.638030767440796, + "learning_rate": 2.434535437276377e-06, + "loss": 0.6549, "step": 10432 }, { - "epoch": 0.7894517801066929, - "grad_norm": 2.165253162384033, - "learning_rate": 2.0943532104947906e-06, - "loss": 0.6496, + "epoch": 0.7344596972896867, + "grad_norm": 1.8457937240600586, + "learning_rate": 2.433325645362372e-06, + "loss": 0.6206, "step": 10433 }, { - "epoch": 0.7895274488290265, - "grad_norm": 2.220125436782837, - "learning_rate": 2.0929090902624117e-06, - "loss": 0.697, + "epoch": 0.7345300950369588, + "grad_norm": 1.982024908065796, + "learning_rate": 2.4321160908738992e-06, + "loss": 0.6987, "step": 10434 }, { - "epoch": 0.7896031175513601, - "grad_norm": 1.6284946203231812, - "learning_rate": 2.0914654025744034e-06, - "loss": 0.589, + "epoch": 0.7346004927842309, + "grad_norm": 1.9216201305389404, + "learning_rate": 2.430906773873848e-06, + "loss": 0.7064, "step": 10435 }, { - "epoch": 0.7896787862736938, - "grad_norm": 1.925709843635559, - "learning_rate": 2.090022147521174e-06, - "loss": 0.6547, + "epoch": 0.734670890531503, + "grad_norm": 1.777143120765686, + "learning_rate": 2.429697694425094e-06, + "loss": 0.6104, "step": 10436 }, { - "epoch": 0.7897544549960274, - "grad_norm": 2.4849398136138916, - "learning_rate": 2.088579325193112e-06, - "loss": 0.5531, + "epoch": 0.734741288278775, + "grad_norm": 2.3372867107391357, + "learning_rate": 2.4284888525904962e-06, + "loss": 0.6182, "step": 10437 }, { - "epoch": 0.789830123718361, - "grad_norm": 1.8222843408584595, - "learning_rate": 2.0871369356805653e-06, - "loss": 0.6774, + "epoch": 0.7348116860260472, + "grad_norm": 2.173945665359497, + "learning_rate": 2.427280248432903e-06, + "loss": 0.7391, "step": 10438 }, { - "epoch": 0.7899057924406946, - "grad_norm": 2.412240505218506, - "learning_rate": 2.085694979073861e-06, - "loss": 0.7183, + "epoch": 0.7348820837733192, + "grad_norm": 1.961911916732788, + "learning_rate": 2.4260718820151486e-06, + "loss": 0.6536, "step": 10439 }, { - "epoch": 0.7899814611630283, - "grad_norm": 2.244049549102783, - "learning_rate": 2.084253455463302e-06, - "loss": 0.6786, + "epoch": 0.7349524815205913, + "grad_norm": 1.8809400796890259, + "learning_rate": 2.4248637534000625e-06, + "loss": 0.6689, "step": 10440 }, { - "epoch": 0.7900571298853619, - "grad_norm": 3.5137126445770264, - "learning_rate": 2.0828123649391594e-06, - "loss": 0.6426, + "epoch": 0.7350228792678635, + "grad_norm": 2.57781982421875, + "learning_rate": 2.42365586265045e-06, + "loss": 0.6337, "step": 10441 }, { - "epoch": 0.7901327986076955, - "grad_norm": 2.3122079372406006, - "learning_rate": 2.0813717075916797e-06, - "loss": 0.6122, + "epoch": 0.7350932770151355, + "grad_norm": 1.8725321292877197, + "learning_rate": 2.4224482098291162e-06, + "loss": 0.7654, "step": 10442 }, { - "epoch": 0.7902084673300291, - "grad_norm": 2.5689215660095215, - "learning_rate": 2.0799314835110808e-06, - "loss": 0.7128, + "epoch": 0.7351636747624076, + "grad_norm": 1.9231656789779663, + "learning_rate": 2.4212407949988464e-06, + "loss": 0.6708, "step": 10443 }, { - "epoch": 0.7902841360523628, - "grad_norm": 1.6637498140335083, - "learning_rate": 2.0784916927875547e-06, - "loss": 0.6082, + "epoch": 0.7352340725096796, + "grad_norm": 2.1322684288024902, + "learning_rate": 2.4200336182224134e-06, + "loss": 0.6746, "step": 10444 }, { - "epoch": 0.7903598047746964, - "grad_norm": 2.4732978343963623, - "learning_rate": 2.0770523355112686e-06, - "loss": 0.7996, + "epoch": 0.7353044702569518, + "grad_norm": 1.753417730331421, + "learning_rate": 2.4188266795625793e-06, + "loss": 0.6991, "step": 10445 }, { - "epoch": 0.79043547349703, - "grad_norm": 1.848886251449585, - "learning_rate": 2.075613411772353e-06, - "loss": 0.6386, + "epoch": 0.7353748680042239, + "grad_norm": 1.6706979274749756, + "learning_rate": 2.4176199790820962e-06, + "loss": 0.5726, "step": 10446 }, { - "epoch": 0.7905111422193636, - "grad_norm": 2.2179338932037354, - "learning_rate": 2.074174921660921e-06, - "loss": 0.7844, + "epoch": 0.7354452657514959, + "grad_norm": 2.7278974056243896, + "learning_rate": 2.4164135168436988e-06, + "loss": 0.5844, "step": 10447 }, { - "epoch": 0.7905868109416972, - "grad_norm": 2.1151363849639893, - "learning_rate": 2.0727368652670605e-06, - "loss": 0.6207, + "epoch": 0.7355156634987681, + "grad_norm": 1.7401866912841797, + "learning_rate": 2.415207292910116e-06, + "loss": 0.6045, "step": 10448 }, { - "epoch": 0.7906624796640309, - "grad_norm": 1.6570191383361816, - "learning_rate": 2.07129924268082e-06, - "loss": 0.5132, + "epoch": 0.7355860612460401, + "grad_norm": 4.297787666320801, + "learning_rate": 2.4140013073440587e-06, + "loss": 0.7002, "step": 10449 }, { - "epoch": 0.7907381483863645, - "grad_norm": 2.8186240196228027, - "learning_rate": 2.069862053992231e-06, - "loss": 0.7471, + "epoch": 0.7356564589933122, + "grad_norm": 1.51296067237854, + "learning_rate": 2.4127955602082265e-06, + "loss": 0.7967, "step": 10450 }, { - "epoch": 0.7908138171086981, - "grad_norm": 2.399502992630005, - "learning_rate": 2.0684252992912963e-06, - "loss": 0.7079, + "epoch": 0.7357268567405844, + "grad_norm": 1.900515079498291, + "learning_rate": 2.411590051565305e-06, + "loss": 0.608, "step": 10451 }, { - "epoch": 0.7908894858310317, - "grad_norm": 2.391279935836792, - "learning_rate": 2.0669889786679883e-06, - "loss": 0.7076, + "epoch": 0.7357972544878564, + "grad_norm": 1.6434049606323242, + "learning_rate": 2.4103847814779748e-06, + "loss": 0.597, "step": 10452 }, { - "epoch": 0.7909651545533654, - "grad_norm": 2.1151928901672363, - "learning_rate": 2.065553092212254e-06, - "loss": 0.6934, + "epoch": 0.7358676522351285, + "grad_norm": 1.8188968896865845, + "learning_rate": 2.409179750008894e-06, + "loss": 0.6282, "step": 10453 }, { - "epoch": 0.791040823275699, - "grad_norm": 1.9356576204299927, - "learning_rate": 2.0641176400140136e-06, - "loss": 0.5707, + "epoch": 0.7359380499824005, + "grad_norm": 1.8928173780441284, + "learning_rate": 2.407974957220718e-06, + "loss": 0.5754, "step": 10454 }, { - "epoch": 0.7911164919980326, - "grad_norm": 2.005326986312866, - "learning_rate": 2.0626826221631627e-06, - "loss": 0.7772, + "epoch": 0.7360084477296727, + "grad_norm": 1.8666331768035889, + "learning_rate": 2.4067704031760828e-06, + "loss": 0.645, "step": 10455 }, { - "epoch": 0.7911921607203662, - "grad_norm": 2.1144111156463623, - "learning_rate": 2.0612480387495613e-06, - "loss": 0.6407, + "epoch": 0.7360788454769447, + "grad_norm": 1.9609216451644897, + "learning_rate": 2.405566087937614e-06, + "loss": 0.6516, "step": 10456 }, { - "epoch": 0.7912678294426999, - "grad_norm": 1.8899532556533813, - "learning_rate": 2.0598138898630487e-06, - "loss": 0.7053, + "epoch": 0.7361492432242168, + "grad_norm": 1.9499157667160034, + "learning_rate": 2.4043620115679226e-06, + "loss": 0.7342, "step": 10457 }, { - "epoch": 0.7913434981650335, - "grad_norm": 2.0663957595825195, - "learning_rate": 2.0583801755934396e-06, - "loss": 0.5835, + "epoch": 0.736219640971489, + "grad_norm": 2.0044138431549072, + "learning_rate": 2.4031581741296153e-06, + "loss": 0.7563, "step": 10458 }, { - "epoch": 0.7914191668873671, - "grad_norm": 2.444054126739502, - "learning_rate": 2.0569468960305178e-06, - "loss": 0.6798, + "epoch": 0.736290038718761, + "grad_norm": 2.0916850566864014, + "learning_rate": 2.401954575685275e-06, + "loss": 0.645, "step": 10459 }, { - "epoch": 0.7914948356097007, - "grad_norm": 2.1964471340179443, - "learning_rate": 2.055514051264036e-06, - "loss": 0.7087, + "epoch": 0.7363604364660331, + "grad_norm": 1.6393896341323853, + "learning_rate": 2.400751216297483e-06, + "loss": 0.6212, "step": 10460 }, { - "epoch": 0.7915705043320344, - "grad_norm": 2.126030683517456, - "learning_rate": 2.0540816413837256e-06, - "loss": 0.6266, + "epoch": 0.7364308342133051, + "grad_norm": 2.1003224849700928, + "learning_rate": 2.399548096028801e-06, + "loss": 0.7691, "step": 10461 }, { - "epoch": 0.791646173054368, - "grad_norm": 2.224245071411133, - "learning_rate": 2.052649666479289e-06, - "loss": 0.6503, + "epoch": 0.7365012319605773, + "grad_norm": 1.6804018020629883, + "learning_rate": 2.398345214941779e-06, + "loss": 0.6692, "step": 10462 }, { - "epoch": 0.7917218417767016, - "grad_norm": 2.307697057723999, - "learning_rate": 2.0512181266404004e-06, - "loss": 0.6489, + "epoch": 0.7365716297078494, + "grad_norm": 1.8765761852264404, + "learning_rate": 2.397142573098955e-06, + "loss": 0.7727, "step": 10463 }, { - "epoch": 0.7917975104990352, - "grad_norm": 2.7697362899780273, - "learning_rate": 2.0497870219567073e-06, - "loss": 0.6928, + "epoch": 0.7366420274551214, + "grad_norm": 1.759555459022522, + "learning_rate": 2.39594017056286e-06, + "loss": 0.5348, "step": 10464 }, { - "epoch": 0.7918731792213688, - "grad_norm": 2.193354368209839, - "learning_rate": 2.048356352517831e-06, - "loss": 0.7207, + "epoch": 0.7367124252023936, + "grad_norm": 2.049685478210449, + "learning_rate": 2.394738007396002e-06, + "loss": 0.6694, "step": 10465 }, { - "epoch": 0.7919488479437025, - "grad_norm": 2.0516974925994873, - "learning_rate": 2.0469261184133664e-06, - "loss": 0.6624, + "epoch": 0.7367828229496656, + "grad_norm": 1.8037489652633667, + "learning_rate": 2.3935360836608887e-06, + "loss": 0.7069, "step": 10466 }, { - "epoch": 0.7920245166660361, - "grad_norm": 2.0919811725616455, - "learning_rate": 2.0454963197328724e-06, - "loss": 0.6431, + "epoch": 0.7368532206969377, + "grad_norm": 1.8997001647949219, + "learning_rate": 2.392334399420006e-06, + "loss": 0.6244, "step": 10467 }, { - "epoch": 0.7921001853883697, - "grad_norm": 2.1068317890167236, - "learning_rate": 2.044066956565895e-06, - "loss": 0.6878, + "epoch": 0.7369236184442098, + "grad_norm": 1.7653170824050903, + "learning_rate": 2.391132954735831e-06, + "loss": 0.6293, "step": 10468 }, { - "epoch": 0.7921758541107033, - "grad_norm": 2.4329023361206055, - "learning_rate": 2.0426380290019456e-06, - "loss": 0.778, + "epoch": 0.7369940161914819, + "grad_norm": 1.9489895105361938, + "learning_rate": 2.389931749670825e-06, + "loss": 0.742, "step": 10469 }, { - "epoch": 0.792251522833037, - "grad_norm": 2.205319881439209, - "learning_rate": 2.0412095371305034e-06, - "loss": 0.7374, + "epoch": 0.737064413938754, + "grad_norm": 1.6220675706863403, + "learning_rate": 2.388730784287445e-06, + "loss": 0.6482, "step": 10470 }, { - "epoch": 0.7923271915553706, - "grad_norm": 1.9677940607070923, - "learning_rate": 2.0397814810410265e-06, - "loss": 0.6473, + "epoch": 0.737134811686026, + "grad_norm": 1.9206750392913818, + "learning_rate": 2.387530058648126e-06, + "loss": 0.631, "step": 10471 }, { - "epoch": 0.7924028602777042, - "grad_norm": 2.6336700916290283, - "learning_rate": 2.038353860822944e-06, - "loss": 0.6716, + "epoch": 0.7372052094332981, + "grad_norm": 2.1477701663970947, + "learning_rate": 2.386329572815298e-06, + "loss": 0.7374, "step": 10472 }, { - "epoch": 0.7924785290000378, - "grad_norm": 1.8802638053894043, - "learning_rate": 2.0369266765656644e-06, - "loss": 0.7585, + "epoch": 0.7372756071805702, + "grad_norm": 1.7043741941452026, + "learning_rate": 2.3851293268513737e-06, + "loss": 0.604, "step": 10473 }, { - "epoch": 0.7925541977223715, - "grad_norm": 4.260136127471924, - "learning_rate": 2.035499928358554e-06, - "loss": 0.6322, + "epoch": 0.7373460049278423, + "grad_norm": 2.0087103843688965, + "learning_rate": 2.3839293208187562e-06, + "loss": 0.595, "step": 10474 }, { - "epoch": 0.7926298664447051, - "grad_norm": 1.8927791118621826, - "learning_rate": 2.034073616290965e-06, - "loss": 0.5409, + "epoch": 0.7374164026751144, + "grad_norm": 1.8091349601745605, + "learning_rate": 2.3827295547798305e-06, + "loss": 0.5985, "step": 10475 }, { - "epoch": 0.7927055351670387, - "grad_norm": 2.0177175998687744, - "learning_rate": 2.0326477404522163e-06, - "loss": 0.6721, + "epoch": 0.7374868004223865, + "grad_norm": 1.6196082830429077, + "learning_rate": 2.38153002879698e-06, + "loss": 0.7008, "step": 10476 }, { - "epoch": 0.7927812038893723, - "grad_norm": 2.187217950820923, - "learning_rate": 2.031222300931601e-06, - "loss": 0.7119, + "epoch": 0.7375571981696586, + "grad_norm": 1.8437230587005615, + "learning_rate": 2.380330742932563e-06, + "loss": 0.725, "step": 10477 }, { - "epoch": 0.7928568726117059, - "grad_norm": 1.9448692798614502, - "learning_rate": 2.029797297818385e-06, - "loss": 0.5877, + "epoch": 0.7376275959169306, + "grad_norm": 2.1074459552764893, + "learning_rate": 2.379131697248938e-06, + "loss": 0.6177, "step": 10478 }, { - "epoch": 0.7929325413340396, - "grad_norm": 2.5591869354248047, - "learning_rate": 2.0283727312018075e-06, - "loss": 0.5605, + "epoch": 0.7376979936642027, + "grad_norm": 1.6842234134674072, + "learning_rate": 2.37793289180844e-06, + "loss": 0.7677, "step": 10479 }, { - "epoch": 0.7930082100563732, - "grad_norm": 2.923570394515991, - "learning_rate": 2.02694860117108e-06, - "loss": 0.6858, + "epoch": 0.7377683914114749, + "grad_norm": 1.9509350061416626, + "learning_rate": 2.3767343266733974e-06, + "loss": 0.7474, "step": 10480 }, { - "epoch": 0.7930838787787068, - "grad_norm": 1.8390942811965942, - "learning_rate": 2.0255249078153825e-06, - "loss": 0.719, + "epoch": 0.7378387891587469, + "grad_norm": 1.9238464832305908, + "learning_rate": 2.3755360019061215e-06, + "loss": 0.6049, "step": 10481 }, { - "epoch": 0.7931595475010405, - "grad_norm": 2.0713329315185547, - "learning_rate": 2.0241016512238716e-06, - "loss": 0.7737, + "epoch": 0.737909186906019, + "grad_norm": 1.7382951974868774, + "learning_rate": 2.374337917568919e-06, + "loss": 0.6251, "step": 10482 }, { - "epoch": 0.7932352162233741, - "grad_norm": 1.8896489143371582, - "learning_rate": 2.0226788314856824e-06, - "loss": 0.5676, + "epoch": 0.737979584653291, + "grad_norm": 1.696651577949524, + "learning_rate": 2.3731400737240766e-06, + "loss": 0.6367, "step": 10483 }, { - "epoch": 0.7933108849457077, - "grad_norm": 2.4428815841674805, - "learning_rate": 2.021256448689909e-06, - "loss": 0.6226, + "epoch": 0.7380499824005632, + "grad_norm": 1.550777554512024, + "learning_rate": 2.3719424704338704e-06, + "loss": 0.5493, "step": 10484 }, { - "epoch": 0.7933865536680413, - "grad_norm": 2.0781445503234863, - "learning_rate": 2.01983450292563e-06, - "loss": 0.63, + "epoch": 0.7381203801478353, + "grad_norm": 1.930772304534912, + "learning_rate": 2.370745107760572e-06, + "loss": 0.7781, "step": 10485 }, { - "epoch": 0.7934622223903749, - "grad_norm": 2.051621198654175, - "learning_rate": 2.0184129942818912e-06, - "loss": 0.6363, + "epoch": 0.7381907778951073, + "grad_norm": 1.7199431657791138, + "learning_rate": 2.3695479857664236e-06, + "loss": 0.5549, "step": 10486 }, { - "epoch": 0.7935378911127086, - "grad_norm": 2.4698948860168457, - "learning_rate": 2.0169919228477136e-06, - "loss": 0.6365, + "epoch": 0.7382611756423795, + "grad_norm": 2.1171786785125732, + "learning_rate": 2.368351104513671e-06, + "loss": 0.625, "step": 10487 }, { - "epoch": 0.7936135598350422, - "grad_norm": 2.3092644214630127, - "learning_rate": 2.0155712887120822e-06, - "loss": 0.6834, + "epoch": 0.7383315733896515, + "grad_norm": 1.7607247829437256, + "learning_rate": 2.3671544640645362e-06, + "loss": 0.6375, "step": 10488 }, { - "epoch": 0.7936892285573758, - "grad_norm": 1.9739770889282227, - "learning_rate": 2.01415109196397e-06, - "loss": 0.5711, + "epoch": 0.7384019711369236, + "grad_norm": 1.806304693222046, + "learning_rate": 2.36595806448124e-06, + "loss": 0.6446, "step": 10489 }, { - "epoch": 0.7937648972797094, - "grad_norm": 2.368990659713745, - "learning_rate": 2.0127313326923118e-06, - "loss": 0.6306, + "epoch": 0.7384723688841957, + "grad_norm": 2.137054204940796, + "learning_rate": 2.3647619058259782e-06, + "loss": 0.6451, "step": 10490 }, { - "epoch": 0.793840566002043, - "grad_norm": 1.8285222053527832, - "learning_rate": 2.01131201098602e-06, - "loss": 0.7374, + "epoch": 0.7385427666314678, + "grad_norm": 1.986095905303955, + "learning_rate": 2.363565988160947e-06, + "loss": 0.7649, "step": 10491 }, { - "epoch": 0.7939162347243767, - "grad_norm": 2.3828883171081543, - "learning_rate": 2.0098931269339706e-06, - "loss": 0.694, + "epoch": 0.7386131643787399, + "grad_norm": 1.7595442533493042, + "learning_rate": 2.3623703115483145e-06, + "loss": 0.5946, "step": 10492 }, { - "epoch": 0.7939919034467103, - "grad_norm": 2.278015375137329, - "learning_rate": 2.00847468062502e-06, - "loss": 0.6483, + "epoch": 0.7386835621260119, + "grad_norm": 1.5982320308685303, + "learning_rate": 2.361174876050252e-06, + "loss": 0.5926, "step": 10493 }, { - "epoch": 0.7940675721690439, - "grad_norm": 2.0421197414398193, - "learning_rate": 2.0070566721480044e-06, - "loss": 0.6897, + "epoch": 0.7387539598732841, + "grad_norm": 1.5866200923919678, + "learning_rate": 2.3599796817289076e-06, + "loss": 0.6657, "step": 10494 }, { - "epoch": 0.7941432408913776, - "grad_norm": 1.976413369178772, - "learning_rate": 2.005639101591714e-06, - "loss": 0.6678, + "epoch": 0.7388243576205561, + "grad_norm": 2.4669578075408936, + "learning_rate": 2.3587847286464224e-06, + "loss": 0.6072, "step": 10495 }, { - "epoch": 0.7942189096137112, - "grad_norm": 1.767219066619873, - "learning_rate": 2.0042219690449255e-06, - "loss": 0.6366, + "epoch": 0.7388947553678282, + "grad_norm": 1.8730976581573486, + "learning_rate": 2.3575900168649217e-06, + "loss": 0.6237, "step": 10496 }, { - "epoch": 0.7942945783360448, - "grad_norm": 3.3812007904052734, - "learning_rate": 2.002805274596386e-06, - "loss": 0.5554, + "epoch": 0.7389651531151004, + "grad_norm": 2.230947494506836, + "learning_rate": 2.3563955464465246e-06, + "loss": 0.6387, "step": 10497 }, { - "epoch": 0.7943702470583784, - "grad_norm": 2.365610361099243, - "learning_rate": 2.0013890183348107e-06, - "loss": 0.778, + "epoch": 0.7390355508623724, + "grad_norm": 1.91227388381958, + "learning_rate": 2.3552013174533235e-06, + "loss": 0.607, "step": 10498 }, { - "epoch": 0.794445915780712, - "grad_norm": 1.9793906211853027, - "learning_rate": 1.9999732003488917e-06, - "loss": 0.7294, + "epoch": 0.7391059486096445, + "grad_norm": 1.7398139238357544, + "learning_rate": 2.354007329947415e-06, + "loss": 0.7235, "step": 10499 }, { - "epoch": 0.7945215845030457, - "grad_norm": 1.898956060409546, - "learning_rate": 1.9985578207272914e-06, - "loss": 0.649, + "epoch": 0.7391763463569165, + "grad_norm": 2.238408088684082, + "learning_rate": 2.3528135839908706e-06, + "loss": 0.6318, "step": 10500 }, { - "epoch": 0.7945972532253793, - "grad_norm": 1.8723238706588745, - "learning_rate": 1.997142879558649e-06, - "loss": 0.584, + "epoch": 0.7392467441041887, + "grad_norm": 1.6764370203018188, + "learning_rate": 2.351620079645759e-06, + "loss": 0.642, "step": 10501 }, { - "epoch": 0.7946729219477129, - "grad_norm": 2.4054598808288574, - "learning_rate": 1.9957283769315654e-06, - "loss": 0.7476, + "epoch": 0.7393171418514608, + "grad_norm": 2.0645358562469482, + "learning_rate": 2.350426816974127e-06, + "loss": 0.6464, "step": 10502 }, { - "epoch": 0.7947485906700466, - "grad_norm": 2.132002830505371, - "learning_rate": 1.994314312934624e-06, - "loss": 0.8334, + "epoch": 0.7393875395987328, + "grad_norm": 1.9840205907821655, + "learning_rate": 2.34923379603802e-06, + "loss": 0.6137, "step": 10503 }, { - "epoch": 0.7948242593923801, - "grad_norm": 2.4048826694488525, - "learning_rate": 1.9929006876563824e-06, - "loss": 0.6341, + "epoch": 0.739457937346005, + "grad_norm": 1.9744752645492554, + "learning_rate": 2.3480410168994566e-06, + "loss": 0.61, "step": 10504 }, { - "epoch": 0.7948999281147138, - "grad_norm": 6.029483318328857, - "learning_rate": 1.991487501185365e-06, - "loss": 0.695, + "epoch": 0.739528335093277, + "grad_norm": 2.0628254413604736, + "learning_rate": 2.3468484796204545e-06, + "loss": 0.6675, "step": 10505 }, { - "epoch": 0.7949755968370474, - "grad_norm": 2.2719666957855225, - "learning_rate": 1.9900747536100666e-06, - "loss": 0.6664, + "epoch": 0.7395987328405491, + "grad_norm": 1.928671956062317, + "learning_rate": 2.345656184263013e-06, + "loss": 0.7155, "step": 10506 }, { - "epoch": 0.795051265559381, - "grad_norm": 2.24306058883667, - "learning_rate": 1.9886624450189597e-06, - "loss": 0.6445, + "epoch": 0.7396691305878212, + "grad_norm": 1.8981844186782837, + "learning_rate": 2.3444641308891236e-06, + "loss": 0.6502, "step": 10507 }, { - "epoch": 0.7951269342817147, - "grad_norm": 2.079287052154541, - "learning_rate": 1.9872505755004876e-06, - "loss": 0.6169, + "epoch": 0.7397395283350933, + "grad_norm": 1.9040600061416626, + "learning_rate": 2.3432723195607576e-06, + "loss": 0.6461, "step": 10508 }, { - "epoch": 0.7952026030040483, - "grad_norm": 1.9773471355438232, - "learning_rate": 1.985839145143068e-06, - "loss": 0.5757, + "epoch": 0.7398099260823654, + "grad_norm": 1.9066990613937378, + "learning_rate": 2.342080750339884e-06, + "loss": 0.6429, "step": 10509 }, { - "epoch": 0.7952782717263819, - "grad_norm": 1.859931468963623, - "learning_rate": 1.984428154035086e-06, - "loss": 0.655, + "epoch": 0.7398803238296374, + "grad_norm": 1.5089294910430908, + "learning_rate": 2.3408894232884505e-06, + "loss": 0.5803, "step": 10510 }, { - "epoch": 0.7953539404487155, - "grad_norm": 1.6292186975479126, - "learning_rate": 1.983017602264904e-06, - "loss": 0.7832, + "epoch": 0.7399507215769096, + "grad_norm": 2.0545363426208496, + "learning_rate": 2.339698338468395e-06, + "loss": 0.6374, "step": 10511 }, { - "epoch": 0.7954296091710491, - "grad_norm": 1.8309117555618286, - "learning_rate": 1.981607489920859e-06, - "loss": 0.6333, + "epoch": 0.7400211193241816, + "grad_norm": 1.8673263788223267, + "learning_rate": 2.3385074959416418e-06, + "loss": 0.7099, "step": 10512 }, { - "epoch": 0.7955052778933828, - "grad_norm": 1.76371169090271, - "learning_rate": 1.9801978170912485e-06, - "loss": 0.5041, + "epoch": 0.7400915170714537, + "grad_norm": 2.248331308364868, + "learning_rate": 2.337316895770107e-06, + "loss": 0.6058, "step": 10513 }, { - "epoch": 0.7955809466157164, - "grad_norm": 1.981345534324646, - "learning_rate": 1.978788583864357e-06, - "loss": 0.6277, + "epoch": 0.7401619148187258, + "grad_norm": 1.9380592107772827, + "learning_rate": 2.3361265380156886e-06, + "loss": 0.645, "step": 10514 }, { - "epoch": 0.79565661533805, - "grad_norm": 2.2104129791259766, - "learning_rate": 1.9773797903284367e-06, - "loss": 0.6721, + "epoch": 0.7402323125659979, + "grad_norm": 3.016052484512329, + "learning_rate": 2.3349364227402764e-06, + "loss": 0.6806, "step": 10515 }, { - "epoch": 0.7957322840603837, - "grad_norm": 2.0217669010162354, - "learning_rate": 1.975971436571705e-06, - "loss": 0.585, + "epoch": 0.74030271031327, + "grad_norm": 2.0433542728424072, + "learning_rate": 2.3337465500057453e-06, + "loss": 0.7767, "step": 10516 }, { - "epoch": 0.7958079527827172, - "grad_norm": 2.435241222381592, - "learning_rate": 1.97456352268236e-06, - "loss": 0.6876, + "epoch": 0.740373108060542, + "grad_norm": 1.9028315544128418, + "learning_rate": 2.332556919873957e-06, + "loss": 0.5916, "step": 10517 }, { - "epoch": 0.7958836215050509, - "grad_norm": 2.3520796298980713, - "learning_rate": 1.973156048748569e-06, - "loss": 0.4607, + "epoch": 0.7404435058078142, + "grad_norm": 1.9162169694900513, + "learning_rate": 2.3313675324067586e-06, + "loss": 0.6761, "step": 10518 }, { - "epoch": 0.7959592902273845, - "grad_norm": 2.4293181896209717, - "learning_rate": 1.9717490148584775e-06, - "loss": 0.6131, + "epoch": 0.7405139035550863, + "grad_norm": 2.0634725093841553, + "learning_rate": 2.330178387665993e-06, + "loss": 0.5784, "step": 10519 }, { - "epoch": 0.7960349589497181, - "grad_norm": 2.50989031791687, - "learning_rate": 1.9703424211001926e-06, - "loss": 0.7233, + "epoch": 0.7405843013023583, + "grad_norm": 1.7845202684402466, + "learning_rate": 2.3289894857134803e-06, + "loss": 0.6806, "step": 10520 }, { - "epoch": 0.7961106276720518, - "grad_norm": 2.3708267211914062, - "learning_rate": 1.968936267561803e-06, - "loss": 0.7077, + "epoch": 0.7406546990496304, + "grad_norm": 1.9373652935028076, + "learning_rate": 2.3278008266110367e-06, + "loss": 0.635, "step": 10521 }, { - "epoch": 0.7961862963943854, - "grad_norm": 1.8803597688674927, - "learning_rate": 1.9675305543313647e-06, - "loss": 0.632, + "epoch": 0.7407250967969025, + "grad_norm": 1.8783118724822998, + "learning_rate": 2.3266124104204593e-06, + "loss": 0.6907, "step": 10522 }, { - "epoch": 0.796261965116719, - "grad_norm": 1.908509373664856, - "learning_rate": 1.9661252814969117e-06, - "loss": 0.6277, + "epoch": 0.7407954945441746, + "grad_norm": 1.87022864818573, + "learning_rate": 2.325424237203535e-06, + "loss": 0.6094, "step": 10523 }, { - "epoch": 0.7963376338390526, - "grad_norm": 2.45629620552063, - "learning_rate": 1.964720449146439e-06, - "loss": 0.6072, + "epoch": 0.7408658922914467, + "grad_norm": 2.2441649436950684, + "learning_rate": 2.3242363070220366e-06, + "loss": 0.6534, "step": 10524 }, { - "epoch": 0.7964133025613862, - "grad_norm": 3.3856143951416016, - "learning_rate": 1.9633160573679287e-06, - "loss": 0.6671, + "epoch": 0.7409362900387187, + "grad_norm": 1.7782258987426758, + "learning_rate": 2.323048619937729e-06, + "loss": 0.5735, "step": 10525 }, { - "epoch": 0.7964889712837199, - "grad_norm": 2.470855474472046, - "learning_rate": 1.9619121062493283e-06, - "loss": 0.6902, + "epoch": 0.7410066877859909, + "grad_norm": 1.98042893409729, + "learning_rate": 2.321861176012357e-06, + "loss": 0.6348, "step": 10526 }, { - "epoch": 0.7965646400060535, - "grad_norm": 2.5444891452789307, - "learning_rate": 1.960508595878554e-06, - "loss": 0.5386, + "epoch": 0.7410770855332629, + "grad_norm": 2.097153425216675, + "learning_rate": 2.3206739753076612e-06, + "loss": 0.6947, "step": 10527 }, { - "epoch": 0.7966403087283871, - "grad_norm": 2.180934429168701, - "learning_rate": 1.9591055263434998e-06, - "loss": 0.605, + "epoch": 0.741147483280535, + "grad_norm": 1.954114556312561, + "learning_rate": 2.319487017885364e-06, + "loss": 0.6938, "step": 10528 }, { - "epoch": 0.7967159774507208, - "grad_norm": 2.0110931396484375, - "learning_rate": 1.9577028977320297e-06, - "loss": 0.6318, + "epoch": 0.7412178810278071, + "grad_norm": 1.9059178829193115, + "learning_rate": 2.3183003038071752e-06, + "loss": 0.5762, "step": 10529 }, { - "epoch": 0.7967916461730543, - "grad_norm": 2.564516544342041, - "learning_rate": 1.9563007101319826e-06, - "loss": 0.7039, + "epoch": 0.7412882787750792, + "grad_norm": 2.0093929767608643, + "learning_rate": 2.3171138331347912e-06, + "loss": 0.6769, "step": 10530 }, { - "epoch": 0.796867314895388, - "grad_norm": 2.044375419616699, - "learning_rate": 1.9548989636311673e-06, - "loss": 0.7169, + "epoch": 0.7413586765223513, + "grad_norm": 2.3364927768707275, + "learning_rate": 2.315927605929903e-06, + "loss": 0.5081, "step": 10531 }, { - "epoch": 0.7969429836177216, - "grad_norm": 2.060332775115967, - "learning_rate": 1.9534976583173652e-06, - "loss": 0.5094, + "epoch": 0.7414290742696233, + "grad_norm": 1.8921394348144531, + "learning_rate": 2.314741622254178e-06, + "loss": 0.6415, "step": 10532 }, { - "epoch": 0.7970186523400552, - "grad_norm": 2.0547759532928467, - "learning_rate": 1.9520967942783307e-06, - "loss": 0.6053, + "epoch": 0.7414994720168955, + "grad_norm": 1.791669487953186, + "learning_rate": 2.3135558821692817e-06, + "loss": 0.636, "step": 10533 }, { - "epoch": 0.7970943210623889, - "grad_norm": 2.1505582332611084, - "learning_rate": 1.950696371601791e-06, - "loss": 0.6574, + "epoch": 0.7415698697641675, + "grad_norm": 1.988541603088379, + "learning_rate": 2.3123703857368595e-06, + "loss": 0.6799, "step": 10534 }, { - "epoch": 0.7971699897847225, - "grad_norm": 2.3827664852142334, - "learning_rate": 1.949296390375445e-06, - "loss": 0.5319, + "epoch": 0.7416402675114396, + "grad_norm": 1.7247769832611084, + "learning_rate": 2.311185133018544e-06, + "loss": 0.6291, "step": 10535 }, { - "epoch": 0.7972456585070561, - "grad_norm": 2.6274614334106445, - "learning_rate": 1.947896850686963e-06, - "loss": 0.8266, + "epoch": 0.7417106652587118, + "grad_norm": 1.8353080749511719, + "learning_rate": 2.310000124075961e-06, + "loss": 0.7331, "step": 10536 }, { - "epoch": 0.7973213272293898, - "grad_norm": 2.0706329345703125, - "learning_rate": 1.946497752623993e-06, - "loss": 0.7172, + "epoch": 0.7417810630059838, + "grad_norm": 1.6279698610305786, + "learning_rate": 2.3088153589707182e-06, + "loss": 0.6396, "step": 10537 }, { - "epoch": 0.7973969959517233, - "grad_norm": 2.9049019813537598, - "learning_rate": 1.945099096274144e-06, - "loss": 0.7605, + "epoch": 0.7418514607532559, + "grad_norm": 1.8527170419692993, + "learning_rate": 2.3076308377644156e-06, + "loss": 0.6358, "step": 10538 }, { - "epoch": 0.797472664674057, - "grad_norm": 2.054645299911499, - "learning_rate": 1.943700881725006e-06, - "loss": 0.6115, + "epoch": 0.7419218585005279, + "grad_norm": 1.7349870204925537, + "learning_rate": 2.3064465605186353e-06, + "loss": 0.6981, "step": 10539 }, { - "epoch": 0.7975483333963906, - "grad_norm": 2.4279866218566895, - "learning_rate": 1.9423031090641456e-06, - "loss": 0.6093, + "epoch": 0.7419922562478001, + "grad_norm": 2.0764689445495605, + "learning_rate": 2.3052625272949486e-06, + "loss": 0.6935, "step": 10540 }, { - "epoch": 0.7976240021187242, - "grad_norm": 2.322181224822998, - "learning_rate": 1.9409057783790908e-06, - "loss": 0.7224, + "epoch": 0.7420626539950722, + "grad_norm": 1.9295141696929932, + "learning_rate": 2.3040787381549133e-06, + "loss": 0.7102, "step": 10541 }, { - "epoch": 0.7976996708410579, - "grad_norm": 2.0702531337738037, - "learning_rate": 1.9395088897573463e-06, - "loss": 0.8073, + "epoch": 0.7421330517423442, + "grad_norm": 1.8588615655899048, + "learning_rate": 2.3028951931600785e-06, + "loss": 0.7562, "step": 10542 }, { - "epoch": 0.7977753395633914, - "grad_norm": 3.023908853530884, - "learning_rate": 1.9381124432863933e-06, - "loss": 0.6391, + "epoch": 0.7422034494896164, + "grad_norm": 2.0266120433807373, + "learning_rate": 2.301711892371975e-06, + "loss": 0.6798, "step": 10543 }, { - "epoch": 0.7978510082857251, - "grad_norm": 2.054307699203491, - "learning_rate": 1.936716439053679e-06, - "loss": 0.7798, + "epoch": 0.7422738472368884, + "grad_norm": 1.7685116529464722, + "learning_rate": 2.3005288358521278e-06, + "loss": 0.6263, "step": 10544 }, { - "epoch": 0.7979266770080587, - "grad_norm": 2.4131476879119873, - "learning_rate": 1.935320877146627e-06, - "loss": 0.7464, + "epoch": 0.7423442449841605, + "grad_norm": 1.847768783569336, + "learning_rate": 2.299346023662042e-06, + "loss": 0.6433, "step": 10545 }, { - "epoch": 0.7980023457303923, - "grad_norm": 2.163196086883545, - "learning_rate": 1.9339257576526325e-06, - "loss": 0.7076, + "epoch": 0.7424146427314325, + "grad_norm": 1.7625268697738647, + "learning_rate": 2.2981634558632138e-06, + "loss": 0.7161, "step": 10546 }, { - "epoch": 0.798078014452726, - "grad_norm": 2.453834056854248, - "learning_rate": 1.9325310806590596e-06, - "loss": 0.7057, + "epoch": 0.7424850404787047, + "grad_norm": 1.9597043991088867, + "learning_rate": 2.2969811325171237e-06, + "loss": 0.663, "step": 10547 }, { - "epoch": 0.7981536831750596, - "grad_norm": 1.4434751272201538, - "learning_rate": 1.9311368462532536e-06, - "loss": 0.8129, + "epoch": 0.7425554382259768, + "grad_norm": 1.7411106824874878, + "learning_rate": 2.295799053685246e-06, + "loss": 0.7304, "step": 10548 }, { - "epoch": 0.7982293518973932, - "grad_norm": 2.3839449882507324, - "learning_rate": 1.929743054522516e-06, - "loss": 0.7117, + "epoch": 0.7426258359732488, + "grad_norm": 1.8142436742782593, + "learning_rate": 2.2946172194290327e-06, + "loss": 0.6375, "step": 10549 }, { - "epoch": 0.7983050206197269, - "grad_norm": 2.3013927936553955, - "learning_rate": 1.9283497055541383e-06, - "loss": 0.6521, + "epoch": 0.742696233720521, + "grad_norm": 2.066965341567993, + "learning_rate": 2.2934356298099353e-06, + "loss": 0.5805, "step": 10550 }, { - "epoch": 0.7983806893420604, - "grad_norm": 2.079742670059204, - "learning_rate": 1.926956799435378e-06, - "loss": 0.5988, + "epoch": 0.742766631467793, + "grad_norm": 3.818808078765869, + "learning_rate": 2.292254284889381e-06, + "loss": 0.692, "step": 10551 }, { - "epoch": 0.7984563580643941, - "grad_norm": 2.3923838138580322, - "learning_rate": 1.9255643362534573e-06, - "loss": 0.6227, + "epoch": 0.7428370292150651, + "grad_norm": 1.6014583110809326, + "learning_rate": 2.291073184728791e-06, + "loss": 0.5261, "step": 10552 }, { - "epoch": 0.7985320267867277, - "grad_norm": 2.4555153846740723, - "learning_rate": 1.9241723160955793e-06, - "loss": 0.6377, + "epoch": 0.7429074269623372, + "grad_norm": 1.7680667638778687, + "learning_rate": 2.2898923293895677e-06, + "loss": 0.5833, "step": 10553 }, { - "epoch": 0.7986076955090613, - "grad_norm": 1.8996168375015259, - "learning_rate": 1.9227807390489167e-06, - "loss": 0.6329, + "epoch": 0.7429778247096093, + "grad_norm": 1.8301472663879395, + "learning_rate": 2.28871171893311e-06, + "loss": 0.7125, "step": 10554 }, { - "epoch": 0.798683364231395, - "grad_norm": 2.6470742225646973, - "learning_rate": 1.9213896052006145e-06, - "loss": 0.7298, + "epoch": 0.7430482224568814, + "grad_norm": 2.466336250305176, + "learning_rate": 2.2875313534207956e-06, + "loss": 0.6305, "step": 10555 }, { - "epoch": 0.7987590329537286, - "grad_norm": 1.7246336936950684, - "learning_rate": 1.9199989146377903e-06, - "loss": 0.6901, + "epoch": 0.7431186202041534, + "grad_norm": 2.06211256980896, + "learning_rate": 2.286351232913995e-06, + "loss": 0.6594, "step": 10556 }, { - "epoch": 0.7988347016760622, - "grad_norm": 3.4669127464294434, - "learning_rate": 1.918608667447534e-06, - "loss": 0.5589, + "epoch": 0.7431890179514256, + "grad_norm": 1.9201442003250122, + "learning_rate": 2.2851713574740635e-06, + "loss": 0.666, "step": 10557 }, { - "epoch": 0.7989103703983959, - "grad_norm": 2.552844762802124, - "learning_rate": 1.9172188637169087e-06, - "loss": 0.5577, + "epoch": 0.7432594156986977, + "grad_norm": 1.6814101934432983, + "learning_rate": 2.2839917271623426e-06, + "loss": 0.6159, "step": 10558 }, { - "epoch": 0.7989860391207294, - "grad_norm": 3.0051169395446777, - "learning_rate": 1.9158295035329425e-06, - "loss": 0.5776, + "epoch": 0.7433298134459697, + "grad_norm": 1.7780567407608032, + "learning_rate": 2.282812342040161e-06, + "loss": 0.5419, "step": 10559 }, { - "epoch": 0.7990617078430631, - "grad_norm": 2.2740957736968994, - "learning_rate": 1.9144405869826475e-06, - "loss": 0.6686, + "epoch": 0.7434002111932418, + "grad_norm": 1.772312045097351, + "learning_rate": 2.2816332021688406e-06, + "loss": 0.5968, "step": 10560 }, { - "epoch": 0.7991373765653967, - "grad_norm": 2.724806308746338, - "learning_rate": 1.9130521141530013e-06, - "loss": 0.6296, + "epoch": 0.7434706089405139, + "grad_norm": 1.8614624738693237, + "learning_rate": 2.2804543076096807e-06, + "loss": 0.6431, "step": 10561 }, { - "epoch": 0.7992130452877303, - "grad_norm": 2.1365087032318115, - "learning_rate": 1.9116640851309554e-06, - "loss": 0.7783, + "epoch": 0.743541006687786, + "grad_norm": 1.885438323020935, + "learning_rate": 2.2792756584239776e-06, + "loss": 0.6501, "step": 10562 }, { - "epoch": 0.799288714010064, - "grad_norm": 2.0151805877685547, - "learning_rate": 1.9102765000034293e-06, - "loss": 0.6014, + "epoch": 0.7436114044350581, + "grad_norm": 1.694577693939209, + "learning_rate": 2.278097254673008e-06, + "loss": 0.6575, "step": 10563 }, { - "epoch": 0.7993643827323975, - "grad_norm": 1.9399892091751099, - "learning_rate": 1.9088893588573187e-06, - "loss": 0.6421, + "epoch": 0.7436818021823302, + "grad_norm": 1.7613595724105835, + "learning_rate": 2.276919096418039e-06, + "loss": 0.7041, "step": 10564 }, { - "epoch": 0.7994400514547312, - "grad_norm": 2.5492935180664062, - "learning_rate": 1.9075026617794924e-06, - "loss": 0.8427, + "epoch": 0.7437521999296023, + "grad_norm": 1.8277180194854736, + "learning_rate": 2.2757411837203216e-06, + "loss": 0.5762, "step": 10565 }, { - "epoch": 0.7995157201770648, - "grad_norm": 2.4646923542022705, - "learning_rate": 1.9061164088567896e-06, - "loss": 0.5545, + "epoch": 0.7438225976768743, + "grad_norm": 2.2678117752075195, + "learning_rate": 2.2745635166411e-06, + "loss": 0.745, "step": 10566 }, { - "epoch": 0.7995913888993984, - "grad_norm": 1.8194031715393066, - "learning_rate": 1.9047306001760213e-06, - "loss": 0.6374, + "epoch": 0.7438929954241464, + "grad_norm": 1.979601263999939, + "learning_rate": 2.2733860952415994e-06, + "loss": 0.6183, "step": 10567 }, { - "epoch": 0.7996670576217321, - "grad_norm": 1.9019348621368408, - "learning_rate": 1.9033452358239716e-06, - "loss": 0.6827, + "epoch": 0.7439633931714185, + "grad_norm": 2.06463885307312, + "learning_rate": 2.2722089195830385e-06, + "loss": 0.6305, "step": 10568 }, { - "epoch": 0.7997427263440657, - "grad_norm": 2.4975194931030273, - "learning_rate": 1.9019603158873995e-06, - "loss": 0.7033, + "epoch": 0.7440337909186906, + "grad_norm": 1.4106758832931519, + "learning_rate": 2.2710319897266177e-06, + "loss": 0.6743, "step": 10569 }, { - "epoch": 0.7998183950663993, - "grad_norm": 3.534217596054077, - "learning_rate": 1.9005758404530242e-06, - "loss": 0.6896, + "epoch": 0.7441041886659627, + "grad_norm": 1.9700182676315308, + "learning_rate": 2.269855305733526e-06, + "loss": 0.5969, "step": 10570 }, { - "epoch": 0.799894063788733, - "grad_norm": 1.9258010387420654, - "learning_rate": 1.8991918096075558e-06, - "loss": 0.6669, + "epoch": 0.7441745864132348, + "grad_norm": 2.176574945449829, + "learning_rate": 2.268678867664939e-06, + "loss": 0.7174, "step": 10571 }, { - "epoch": 0.7999697325110665, - "grad_norm": 1.901734709739685, - "learning_rate": 1.8978082234376657e-06, - "loss": 0.6585, + "epoch": 0.7442449841605069, + "grad_norm": 2.255629301071167, + "learning_rate": 2.2675026755820247e-06, + "loss": 0.7125, "step": 10572 }, { - "epoch": 0.8000454012334002, - "grad_norm": 2.1467180252075195, - "learning_rate": 1.8964250820299927e-06, - "loss": 0.5655, + "epoch": 0.7443153819077789, + "grad_norm": 1.8950474262237549, + "learning_rate": 2.2663267295459306e-06, + "loss": 0.6181, "step": 10573 }, { - "epoch": 0.8001210699557338, - "grad_norm": 2.114226818084717, - "learning_rate": 1.8950423854711563e-06, - "loss": 0.6701, + "epoch": 0.744385779655051, + "grad_norm": 1.5122852325439453, + "learning_rate": 2.2651510296177984e-06, + "loss": 0.6265, "step": 10574 }, { - "epoch": 0.8001967386780674, - "grad_norm": 2.7017455101013184, - "learning_rate": 1.8936601338477445e-06, - "loss": 0.5554, + "epoch": 0.7444561774023232, + "grad_norm": 1.7592151165008545, + "learning_rate": 2.2639755758587528e-06, + "loss": 0.7046, "step": 10575 }, { - "epoch": 0.8002724074004011, - "grad_norm": 1.8943901062011719, - "learning_rate": 1.8922783272463251e-06, - "loss": 0.5889, + "epoch": 0.7445265751495952, + "grad_norm": 1.8019400835037231, + "learning_rate": 2.262800368329906e-06, + "loss": 0.6763, "step": 10576 }, { - "epoch": 0.8003480761227346, - "grad_norm": 2.2416491508483887, - "learning_rate": 1.8908969657534225e-06, - "loss": 0.7364, + "epoch": 0.7445969728968673, + "grad_norm": 1.781788945198059, + "learning_rate": 2.2616254070923554e-06, + "loss": 0.6215, "step": 10577 }, { - "epoch": 0.8004237448450683, - "grad_norm": 2.565739154815674, - "learning_rate": 1.889516049455546e-06, - "loss": 0.772, + "epoch": 0.7446673706441393, + "grad_norm": 1.973240613937378, + "learning_rate": 2.260450692207194e-06, + "loss": 0.6346, "step": 10578 }, { - "epoch": 0.800499413567402, - "grad_norm": 2.755770683288574, - "learning_rate": 1.888135578439172e-06, - "loss": 0.6644, + "epoch": 0.7447377683914115, + "grad_norm": 2.031369924545288, + "learning_rate": 2.2592762237354918e-06, + "loss": 0.6733, "step": 10579 }, { - "epoch": 0.8005750822897355, - "grad_norm": 1.84762704372406, - "learning_rate": 1.8867555527907516e-06, - "loss": 0.6432, + "epoch": 0.7448081661386836, + "grad_norm": 1.998897671699524, + "learning_rate": 2.258102001738314e-06, + "loss": 0.5662, "step": 10580 }, { - "epoch": 0.8006507510120692, - "grad_norm": 2.085479497909546, - "learning_rate": 1.8853759725967045e-06, - "loss": 0.7186, + "epoch": 0.7448785638859556, + "grad_norm": 2.1013376712799072, + "learning_rate": 2.256928026276708e-06, + "loss": 0.6428, "step": 10581 }, { - "epoch": 0.8007264197344028, - "grad_norm": 1.9291927814483643, - "learning_rate": 1.8839968379434267e-06, - "loss": 0.5889, + "epoch": 0.7449489616332278, + "grad_norm": 2.278740882873535, + "learning_rate": 2.255754297411709e-06, + "loss": 0.6931, "step": 10582 }, { - "epoch": 0.8008020884567364, - "grad_norm": 2.066859483718872, - "learning_rate": 1.8826181489172843e-06, - "loss": 0.6291, + "epoch": 0.7450193593804998, + "grad_norm": 1.7296150922775269, + "learning_rate": 2.254580815204338e-06, + "loss": 0.6949, "step": 10583 }, { - "epoch": 0.8008777571790701, - "grad_norm": 2.1499745845794678, - "learning_rate": 1.8812399056046118e-06, - "loss": 0.5774, + "epoch": 0.7450897571277719, + "grad_norm": 1.907201886177063, + "learning_rate": 2.253407579715611e-06, + "loss": 0.6387, "step": 10584 }, { - "epoch": 0.8009534259014036, - "grad_norm": 2.3581552505493164, - "learning_rate": 1.8798621080917184e-06, - "loss": 0.632, + "epoch": 0.745160154875044, + "grad_norm": 2.2260994911193848, + "learning_rate": 2.252234591006521e-06, + "loss": 0.7017, "step": 10585 }, { - "epoch": 0.8010290946237373, - "grad_norm": 2.0463876724243164, - "learning_rate": 1.8784847564648952e-06, - "loss": 0.7999, + "epoch": 0.7452305526223161, + "grad_norm": 1.9866023063659668, + "learning_rate": 2.251061849138054e-06, + "loss": 0.607, "step": 10586 }, { - "epoch": 0.801104763346071, - "grad_norm": 1.8345085382461548, - "learning_rate": 1.877107850810387e-06, - "loss": 0.6854, + "epoch": 0.7453009503695882, + "grad_norm": 2.24946928024292, + "learning_rate": 2.2498893541711866e-06, + "loss": 0.7478, "step": 10587 }, { - "epoch": 0.8011804320684045, - "grad_norm": 2.0658600330352783, - "learning_rate": 1.8757313912144227e-06, - "loss": 0.6487, + "epoch": 0.7453713481168602, + "grad_norm": 1.8091965913772583, + "learning_rate": 2.2487171061668686e-06, + "loss": 0.6335, "step": 10588 }, { - "epoch": 0.8012561007907382, - "grad_norm": 2.1300668716430664, - "learning_rate": 1.874355377763203e-06, - "loss": 0.8562, + "epoch": 0.7454417458641324, + "grad_norm": 2.271479606628418, + "learning_rate": 2.2475451051860547e-06, + "loss": 0.6173, "step": 10589 }, { - "epoch": 0.8013317695130717, - "grad_norm": 2.3721659183502197, - "learning_rate": 1.8729798105428951e-06, - "loss": 0.6192, + "epoch": 0.7455121436114044, + "grad_norm": 1.828674077987671, + "learning_rate": 2.2463733512896723e-06, + "loss": 0.7544, "step": 10590 }, { - "epoch": 0.8014074382354054, - "grad_norm": 1.95151686668396, - "learning_rate": 1.8716046896396437e-06, - "loss": 0.6669, + "epoch": 0.7455825413586765, + "grad_norm": 1.7478679418563843, + "learning_rate": 2.2452018445386465e-06, + "loss": 0.692, "step": 10591 }, { - "epoch": 0.801483106957739, - "grad_norm": 2.4539577960968018, - "learning_rate": 1.8702300151395627e-06, - "loss": 0.8109, + "epoch": 0.7456529391059487, + "grad_norm": 1.9546513557434082, + "learning_rate": 2.2440305849938805e-06, + "loss": 0.6129, "step": 10592 }, { - "epoch": 0.8015587756800726, - "grad_norm": 2.4067583084106445, - "learning_rate": 1.8688557871287382e-06, - "loss": 0.6994, + "epoch": 0.7457233368532207, + "grad_norm": 2.332404136657715, + "learning_rate": 2.2428595727162765e-06, + "loss": 0.8092, "step": 10593 }, { - "epoch": 0.8016344444024063, - "grad_norm": 2.889045476913452, - "learning_rate": 1.8674820056932325e-06, - "loss": 0.6797, + "epoch": 0.7457937346004928, + "grad_norm": 1.8025412559509277, + "learning_rate": 2.241688807766707e-06, + "loss": 0.6384, "step": 10594 }, { - "epoch": 0.8017101131247399, - "grad_norm": 2.1616549491882324, - "learning_rate": 1.8661086709190677e-06, - "loss": 0.7378, + "epoch": 0.7458641323477648, + "grad_norm": 1.826151728630066, + "learning_rate": 2.240518290206048e-06, + "loss": 0.6578, "step": 10595 }, { - "epoch": 0.8017857818470735, - "grad_norm": 1.9277255535125732, - "learning_rate": 1.864735782892254e-06, - "loss": 0.6307, + "epoch": 0.745934530095037, + "grad_norm": 1.936958909034729, + "learning_rate": 2.239348020095151e-06, + "loss": 0.6509, "step": 10596 }, { - "epoch": 0.8018614505694072, - "grad_norm": 2.1379098892211914, - "learning_rate": 1.8633633416987667e-06, - "loss": 0.5359, + "epoch": 0.7460049278423091, + "grad_norm": 1.381302833557129, + "learning_rate": 2.2381779974948647e-06, + "loss": 0.6808, "step": 10597 }, { - "epoch": 0.8019371192917407, - "grad_norm": 2.582929849624634, - "learning_rate": 1.861991347424547e-06, - "loss": 0.6166, + "epoch": 0.7460753255895811, + "grad_norm": 1.9236586093902588, + "learning_rate": 2.237008222466015e-06, + "loss": 0.6144, "step": 10598 }, { - "epoch": 0.8020127880140744, - "grad_norm": 2.745823383331299, - "learning_rate": 1.8606198001555162e-06, - "loss": 0.7859, + "epoch": 0.7461457233368533, + "grad_norm": 2.0570061206817627, + "learning_rate": 2.2358386950694255e-06, + "loss": 0.7297, "step": 10599 }, { - "epoch": 0.802088456736408, - "grad_norm": 1.990330696105957, - "learning_rate": 1.8592486999775644e-06, - "loss": 0.7426, + "epoch": 0.7462161210841253, + "grad_norm": 2.3556807041168213, + "learning_rate": 2.2346694153658916e-06, + "loss": 0.7333, "step": 10600 }, { - "epoch": 0.8021641254587416, - "grad_norm": 2.0357511043548584, - "learning_rate": 1.8578780469765562e-06, - "loss": 0.8174, + "epoch": 0.7462865188313974, + "grad_norm": 1.9679410457611084, + "learning_rate": 2.233500383416214e-06, + "loss": 0.6945, "step": 10601 }, { - "epoch": 0.8022397941810753, - "grad_norm": 1.937957763671875, - "learning_rate": 1.8565078412383238e-06, - "loss": 0.8851, + "epoch": 0.7463569165786695, + "grad_norm": 2.162771224975586, + "learning_rate": 2.232331599281165e-06, + "loss": 0.6105, "step": 10602 }, { - "epoch": 0.8023154629034088, - "grad_norm": 2.319841146469116, - "learning_rate": 1.8551380828486765e-06, - "loss": 0.6238, + "epoch": 0.7464273143259416, + "grad_norm": 2.0052895545959473, + "learning_rate": 2.2311630630215175e-06, + "loss": 0.5834, "step": 10603 }, { - "epoch": 0.8023911316257425, - "grad_norm": 2.1866133213043213, - "learning_rate": 1.8537687718933928e-06, - "loss": 0.7628, + "epoch": 0.7464977120732137, + "grad_norm": 1.9079086780548096, + "learning_rate": 2.229994774698018e-06, + "loss": 0.629, "step": 10604 }, { - "epoch": 0.8024668003480762, - "grad_norm": 2.112520933151245, - "learning_rate": 1.852399908458221e-06, - "loss": 0.565, + "epoch": 0.7465681098204857, + "grad_norm": 2.191671371459961, + "learning_rate": 2.228826734371415e-06, + "loss": 0.709, "step": 10605 }, { - "epoch": 0.8025424690704097, - "grad_norm": 2.1112277507781982, - "learning_rate": 1.8510314926288826e-06, - "loss": 0.7111, + "epoch": 0.7466385075677578, + "grad_norm": 1.7414783239364624, + "learning_rate": 2.2276589421024264e-06, + "loss": 0.7053, "step": 10606 }, { - "epoch": 0.8026181377927434, - "grad_norm": 2.6001243591308594, - "learning_rate": 1.8496635244910772e-06, - "loss": 0.6784, + "epoch": 0.7467089053150299, + "grad_norm": 2.101712226867676, + "learning_rate": 2.226491397951774e-06, + "loss": 0.6421, "step": 10607 }, { - "epoch": 0.802693806515077, - "grad_norm": 2.205632448196411, - "learning_rate": 1.84829600413047e-06, - "loss": 0.6225, + "epoch": 0.746779303062302, + "grad_norm": 1.9467554092407227, + "learning_rate": 2.2253241019801545e-06, + "loss": 0.6301, "step": 10608 }, { - "epoch": 0.8027694752374106, - "grad_norm": 2.1408543586730957, - "learning_rate": 1.8469289316326977e-06, - "loss": 0.6406, + "epoch": 0.7468497008095741, + "grad_norm": 1.9562954902648926, + "learning_rate": 2.2241570542482612e-06, + "loss": 0.6376, "step": 10609 }, { - "epoch": 0.8028451439597443, - "grad_norm": 2.142906904220581, - "learning_rate": 1.8455623070833706e-06, - "loss": 0.7465, + "epoch": 0.7469200985568462, + "grad_norm": 1.6970778703689575, + "learning_rate": 2.2229902548167657e-06, + "loss": 0.5484, "step": 10610 }, { - "epoch": 0.8029208126820778, - "grad_norm": 2.3650617599487305, - "learning_rate": 1.8441961305680726e-06, - "loss": 0.6459, + "epoch": 0.7469904963041183, + "grad_norm": 2.2862560749053955, + "learning_rate": 2.2218237037463387e-06, + "loss": 0.6886, "step": 10611 }, { - "epoch": 0.8029964814044115, - "grad_norm": 2.328948497772217, - "learning_rate": 1.842830402172357e-06, - "loss": 0.7611, + "epoch": 0.7470608940513903, + "grad_norm": 1.7352159023284912, + "learning_rate": 2.2206574010976186e-06, + "loss": 0.6228, "step": 10612 }, { - "epoch": 0.8030721501267452, - "grad_norm": 1.9152311086654663, - "learning_rate": 1.8414651219817513e-06, - "loss": 0.5491, + "epoch": 0.7471312917986624, + "grad_norm": 1.8268035650253296, + "learning_rate": 2.2194913469312524e-06, + "loss": 0.63, "step": 10613 }, { - "epoch": 0.8031478188490787, - "grad_norm": 3.1804325580596924, - "learning_rate": 1.8401002900817533e-06, - "loss": 0.6511, + "epoch": 0.7472016895459346, + "grad_norm": 2.1068100929260254, + "learning_rate": 2.2183255413078577e-06, + "loss": 0.6312, "step": 10614 }, { - "epoch": 0.8032234875714124, - "grad_norm": 2.4679126739501953, - "learning_rate": 1.8387359065578344e-06, - "loss": 0.6399, + "epoch": 0.7472720872932066, + "grad_norm": 3.098752498626709, + "learning_rate": 2.2171599842880503e-06, + "loss": 0.649, "step": 10615 }, { - "epoch": 0.8032991562937459, - "grad_norm": 2.6551365852355957, - "learning_rate": 1.8373719714954315e-06, - "loss": 0.7862, + "epoch": 0.7473424850404787, + "grad_norm": 2.9756033420562744, + "learning_rate": 2.2159946759324256e-06, + "loss": 0.6778, "step": 10616 }, { - "epoch": 0.8033748250160796, - "grad_norm": 1.9284342527389526, - "learning_rate": 1.8360084849799643e-06, - "loss": 0.6186, + "epoch": 0.7474128827877508, + "grad_norm": 2.46358323097229, + "learning_rate": 2.2148296163015723e-06, + "loss": 0.6402, "step": 10617 }, { - "epoch": 0.8034504937384133, - "grad_norm": 2.3558826446533203, - "learning_rate": 1.8346454470968194e-06, - "loss": 0.7019, + "epoch": 0.7474832805350229, + "grad_norm": 1.854234218597412, + "learning_rate": 2.21366480545606e-06, + "loss": 0.6382, "step": 10618 }, { - "epoch": 0.8035261624607468, - "grad_norm": 2.2871432304382324, - "learning_rate": 1.8332828579313505e-06, - "loss": 0.7563, + "epoch": 0.747553678282295, + "grad_norm": 2.1957812309265137, + "learning_rate": 2.2125002434564505e-06, + "loss": 0.6809, "step": 10619 }, { - "epoch": 0.8036018311830805, - "grad_norm": 2.3927693367004395, - "learning_rate": 1.8319207175688881e-06, - "loss": 0.7131, + "epoch": 0.747624076029567, + "grad_norm": 1.6664396524429321, + "learning_rate": 2.211335930363287e-06, + "loss": 0.539, "step": 10620 }, { - "epoch": 0.8036774999054141, - "grad_norm": 2.0817835330963135, - "learning_rate": 1.8305590260947336e-06, - "loss": 0.6026, + "epoch": 0.7476944737768392, + "grad_norm": 2.0633935928344727, + "learning_rate": 2.2101718662371064e-06, + "loss": 0.6605, "step": 10621 }, { - "epoch": 0.8037531686277477, - "grad_norm": 2.117398738861084, - "learning_rate": 1.8291977835941651e-06, - "loss": 0.6481, + "epoch": 0.7477648715241112, + "grad_norm": 2.3708279132843018, + "learning_rate": 2.209008051138427e-06, + "loss": 0.7537, "step": 10622 }, { - "epoch": 0.8038288373500814, - "grad_norm": 2.352769136428833, - "learning_rate": 1.827836990152423e-06, - "loss": 0.6245, + "epoch": 0.7478352692713833, + "grad_norm": 1.6324924230575562, + "learning_rate": 2.20784448512776e-06, + "loss": 0.6318, "step": 10623 }, { - "epoch": 0.8039045060724149, - "grad_norm": 1.9620620012283325, - "learning_rate": 1.8264766458547258e-06, - "loss": 0.8225, + "epoch": 0.7479056670186554, + "grad_norm": 2.493410348892212, + "learning_rate": 2.2066811682655985e-06, + "loss": 0.7097, "step": 10624 }, { - "epoch": 0.8039801747947486, - "grad_norm": 1.7931766510009766, - "learning_rate": 1.8251167507862633e-06, - "loss": 0.5747, + "epoch": 0.7479760647659275, + "grad_norm": 2.473609209060669, + "learning_rate": 2.2055181006124234e-06, + "loss": 0.7241, "step": 10625 }, { - "epoch": 0.8040558435170823, - "grad_norm": 2.456925630569458, - "learning_rate": 1.8237573050321955e-06, - "loss": 0.6384, + "epoch": 0.7480464625131996, + "grad_norm": 2.0159494876861572, + "learning_rate": 2.2043552822287034e-06, + "loss": 0.5871, "step": 10626 }, { - "epoch": 0.8041315122394158, - "grad_norm": 2.1616482734680176, - "learning_rate": 1.8223983086776574e-06, - "loss": 0.6522, + "epoch": 0.7481168602604716, + "grad_norm": 1.7585728168487549, + "learning_rate": 2.203192713174897e-06, + "loss": 0.6251, "step": 10627 }, { - "epoch": 0.8042071809617495, - "grad_norm": 1.8963215351104736, - "learning_rate": 1.8210397618077507e-06, - "loss": 0.679, + "epoch": 0.7481872580077438, + "grad_norm": 1.8980423212051392, + "learning_rate": 2.2020303935114434e-06, + "loss": 0.6392, "step": 10628 }, { - "epoch": 0.804282849684083, - "grad_norm": 2.105846643447876, - "learning_rate": 1.8196816645075575e-06, - "loss": 0.7072, + "epoch": 0.7482576557550158, + "grad_norm": 2.0884852409362793, + "learning_rate": 2.2008683232987773e-06, + "loss": 0.6744, "step": 10629 }, { - "epoch": 0.8043585184064167, - "grad_norm": 2.052081823348999, - "learning_rate": 1.8183240168621198e-06, - "loss": 0.5888, + "epoch": 0.7483280535022879, + "grad_norm": 2.129077196121216, + "learning_rate": 2.199706502597313e-06, + "loss": 0.6864, "step": 10630 }, { - "epoch": 0.8044341871287504, - "grad_norm": 2.6387953758239746, - "learning_rate": 1.8169668189564574e-06, - "loss": 0.5963, + "epoch": 0.7483984512495601, + "grad_norm": 2.0466761589050293, + "learning_rate": 2.1985449314674557e-06, + "loss": 0.6641, "step": 10631 }, { - "epoch": 0.8045098558510839, - "grad_norm": 3.556173801422119, - "learning_rate": 1.8156100708755705e-06, - "loss": 0.6972, + "epoch": 0.7484688489968321, + "grad_norm": 1.8732798099517822, + "learning_rate": 2.197383609969593e-06, + "loss": 0.7612, "step": 10632 }, { - "epoch": 0.8045855245734176, - "grad_norm": 2.080299139022827, - "learning_rate": 1.8142537727044158e-06, - "loss": 0.5975, + "epoch": 0.7485392467441042, + "grad_norm": 1.8200587034225464, + "learning_rate": 2.196222538164109e-06, + "loss": 0.6479, "step": 10633 }, { - "epoch": 0.8046611932957513, - "grad_norm": 2.5786876678466797, - "learning_rate": 1.812897924527932e-06, - "loss": 0.64, + "epoch": 0.7486096444913762, + "grad_norm": 2.056102991104126, + "learning_rate": 2.195061716111363e-06, + "loss": 0.6624, "step": 10634 }, { - "epoch": 0.8047368620180848, - "grad_norm": 3.127788543701172, - "learning_rate": 1.8115425264310257e-06, - "loss": 0.7167, + "epoch": 0.7486800422386484, + "grad_norm": 2.0249950885772705, + "learning_rate": 2.193901143871712e-06, + "loss": 0.6503, "step": 10635 }, { - "epoch": 0.8048125307404185, - "grad_norm": 1.6849464178085327, - "learning_rate": 1.810187578498577e-06, - "loss": 0.6677, + "epoch": 0.7487504399859205, + "grad_norm": 1.961533784866333, + "learning_rate": 2.192740821505494e-06, + "loss": 0.6496, "step": 10636 }, { - "epoch": 0.804888199462752, - "grad_norm": 2.1735470294952393, - "learning_rate": 1.8088330808154364e-06, - "loss": 0.6236, + "epoch": 0.7488208377331925, + "grad_norm": 1.7750868797302246, + "learning_rate": 2.191580749073031e-06, + "loss": 0.7344, "step": 10637 }, { - "epoch": 0.8049638681850857, - "grad_norm": 2.128405809402466, - "learning_rate": 1.8074790334664275e-06, - "loss": 0.6748, + "epoch": 0.7488912354804647, + "grad_norm": 1.64966881275177, + "learning_rate": 2.190420926634643e-06, + "loss": 0.6325, "step": 10638 }, { - "epoch": 0.8050395369074194, - "grad_norm": 2.5104458332061768, - "learning_rate": 1.806125436536345e-06, - "loss": 0.6915, + "epoch": 0.7489616332277367, + "grad_norm": 1.908503532409668, + "learning_rate": 2.189261354250624e-06, + "loss": 0.5313, "step": 10639 }, { - "epoch": 0.8051152056297529, - "grad_norm": 2.0362462997436523, - "learning_rate": 1.8047722901099575e-06, - "loss": 0.5961, + "epoch": 0.7490320309750088, + "grad_norm": 1.7530661821365356, + "learning_rate": 2.188102031981266e-06, + "loss": 0.5169, "step": 10640 }, { - "epoch": 0.8051908743520866, - "grad_norm": 3.8204987049102783, - "learning_rate": 1.803419594271999e-06, - "loss": 0.6853, + "epoch": 0.7491024287222808, + "grad_norm": 1.8074010610580444, + "learning_rate": 2.1869429598868417e-06, + "loss": 0.5471, "step": 10641 }, { - "epoch": 0.8052665430744201, - "grad_norm": 2.5584213733673096, - "learning_rate": 1.80206734910718e-06, - "loss": 0.8556, + "epoch": 0.749172826469553, + "grad_norm": 1.8718974590301514, + "learning_rate": 2.185784138027612e-06, + "loss": 0.6443, "step": 10642 }, { - "epoch": 0.8053422117967538, - "grad_norm": 2.950486898422241, - "learning_rate": 1.800715554700189e-06, - "loss": 0.7538, + "epoch": 0.7492432242168251, + "grad_norm": 1.9758082628250122, + "learning_rate": 2.1846255664638225e-06, + "loss": 0.6159, "step": 10643 }, { - "epoch": 0.8054178805190875, - "grad_norm": 1.961512565612793, - "learning_rate": 1.7993642111356726e-06, - "loss": 0.6005, + "epoch": 0.7493136219640971, + "grad_norm": 1.5645431280136108, + "learning_rate": 2.1834672452557133e-06, + "loss": 0.6117, "step": 10644 }, { - "epoch": 0.805493549241421, - "grad_norm": 2.0798726081848145, - "learning_rate": 1.7980133184982597e-06, - "loss": 0.7188, + "epoch": 0.7493840197113693, + "grad_norm": 1.683648943901062, + "learning_rate": 2.1823091744635014e-06, + "loss": 0.6039, "step": 10645 }, { - "epoch": 0.8055692179637547, - "grad_norm": 2.015023946762085, - "learning_rate": 1.796662876872547e-06, - "loss": 0.6464, + "epoch": 0.7494544174586413, + "grad_norm": 2.002256155014038, + "learning_rate": 2.181151354147401e-06, + "loss": 0.5373, "step": 10646 }, { - "epoch": 0.8056448866860884, - "grad_norm": 2.150325059890747, - "learning_rate": 1.7953128863431025e-06, - "loss": 0.6864, + "epoch": 0.7495248152059134, + "grad_norm": 1.6662653684616089, + "learning_rate": 2.179993784367606e-06, + "loss": 0.6556, "step": 10647 }, { - "epoch": 0.8057205554084219, - "grad_norm": 2.242133617401123, - "learning_rate": 1.7939633469944687e-06, - "loss": 0.5954, + "epoch": 0.7495952129531855, + "grad_norm": 1.9436935186386108, + "learning_rate": 2.1788364651843002e-06, + "loss": 0.6544, "step": 10648 }, { - "epoch": 0.8057962241307556, - "grad_norm": 2.3537936210632324, - "learning_rate": 1.792614258911157e-06, - "loss": 0.734, + "epoch": 0.7496656107004576, + "grad_norm": 2.2735185623168945, + "learning_rate": 2.17767939665765e-06, + "loss": 0.6164, "step": 10649 }, { - "epoch": 0.8058718928530891, - "grad_norm": 4.886502742767334, - "learning_rate": 1.7912656221776517e-06, - "loss": 0.5386, + "epoch": 0.7497360084477297, + "grad_norm": 1.7908589839935303, + "learning_rate": 2.1765225788478175e-06, + "loss": 0.6308, "step": 10650 }, { - "epoch": 0.8059475615754228, - "grad_norm": 2.2850396633148193, - "learning_rate": 1.7899174368784116e-06, - "loss": 0.7146, + "epoch": 0.7498064061950017, + "grad_norm": 1.828737735748291, + "learning_rate": 2.1753660118149423e-06, + "loss": 0.6804, "step": 10651 }, { - "epoch": 0.8060232302977565, - "grad_norm": 1.9249435663223267, - "learning_rate": 1.7885697030978569e-06, - "loss": 0.7584, + "epoch": 0.7498768039422739, + "grad_norm": 1.8351680040359497, + "learning_rate": 2.1742096956191603e-06, + "loss": 0.6518, "step": 10652 }, { - "epoch": 0.80609889902009, - "grad_norm": 1.7427829504013062, - "learning_rate": 1.787222420920394e-06, - "loss": 0.694, + "epoch": 0.749947201689546, + "grad_norm": 3.11674165725708, + "learning_rate": 2.173053630320586e-06, + "loss": 0.6343, "step": 10653 }, { - "epoch": 0.8061745677424237, - "grad_norm": 2.255074977874756, - "learning_rate": 1.7858755904303947e-06, - "loss": 0.7371, + "epoch": 0.750017599436818, + "grad_norm": 2.0284154415130615, + "learning_rate": 2.1718978159793252e-06, + "loss": 0.6652, "step": 10654 }, { - "epoch": 0.8062502364647572, - "grad_norm": 2.0775415897369385, - "learning_rate": 1.7845292117121972e-06, - "loss": 0.601, + "epoch": 0.7500879971840901, + "grad_norm": 1.9835914373397827, + "learning_rate": 2.1707422526554673e-06, + "loss": 0.7466, "step": 10655 }, { - "epoch": 0.8063259051870909, - "grad_norm": 3.3672308921813965, - "learning_rate": 1.7831832848501183e-06, - "loss": 0.6938, + "epoch": 0.7501583949313622, + "grad_norm": 2.1343541145324707, + "learning_rate": 2.1695869404090957e-06, + "loss": 0.5813, "step": 10656 }, { - "epoch": 0.8064015739094246, - "grad_norm": 2.231694459915161, - "learning_rate": 1.7818378099284435e-06, - "loss": 0.6689, + "epoch": 0.7502287926786343, + "grad_norm": 1.7800339460372925, + "learning_rate": 2.1684318793002715e-06, + "loss": 0.7373, "step": 10657 }, { - "epoch": 0.8064772426317581, - "grad_norm": 2.523862838745117, - "learning_rate": 1.7804927870314314e-06, - "loss": 0.6083, + "epoch": 0.7502991904259064, + "grad_norm": 1.8742040395736694, + "learning_rate": 2.167277069389052e-06, + "loss": 0.7699, "step": 10658 }, { - "epoch": 0.8065529113540918, - "grad_norm": 2.5286412239074707, - "learning_rate": 1.7791482162433126e-06, - "loss": 0.7218, + "epoch": 0.7503695881731784, + "grad_norm": 1.7654849290847778, + "learning_rate": 2.166122510735474e-06, + "loss": 0.7129, "step": 10659 }, { - "epoch": 0.8066285800764255, - "grad_norm": 1.992052674293518, - "learning_rate": 1.7778040976482867e-06, - "loss": 0.7306, + "epoch": 0.7504399859204506, + "grad_norm": 2.039085626602173, + "learning_rate": 2.164968203399565e-06, + "loss": 0.7505, "step": 10660 }, { - "epoch": 0.806704248798759, - "grad_norm": 2.2717790603637695, - "learning_rate": 1.7764604313305307e-06, - "loss": 0.7695, + "epoch": 0.7505103836677226, + "grad_norm": 2.0826282501220703, + "learning_rate": 2.163814147441336e-06, + "loss": 0.6616, "step": 10661 }, { - "epoch": 0.8067799175210927, - "grad_norm": 2.3762831687927246, - "learning_rate": 1.7751172173741807e-06, - "loss": 0.6682, + "epoch": 0.7505807814149947, + "grad_norm": 1.8307174444198608, + "learning_rate": 2.162660342920791e-06, + "loss": 0.5668, "step": 10662 }, { - "epoch": 0.8068555862434262, - "grad_norm": 1.9005999565124512, - "learning_rate": 1.773774455863361e-06, - "loss": 0.7671, + "epoch": 0.7506511791622668, + "grad_norm": 2.2269411087036133, + "learning_rate": 2.161506789897913e-06, + "loss": 0.5919, "step": 10663 }, { - "epoch": 0.8069312549657599, - "grad_norm": 1.9350870847702026, - "learning_rate": 1.772432146882158e-06, - "loss": 0.5733, + "epoch": 0.7507215769095389, + "grad_norm": 1.8220231533050537, + "learning_rate": 2.160353488432682e-06, + "loss": 0.652, "step": 10664 }, { - "epoch": 0.8070069236880936, - "grad_norm": 1.9355764389038086, - "learning_rate": 1.7710902905146324e-06, - "loss": 0.5401, + "epoch": 0.750791974656811, + "grad_norm": 1.7989200353622437, + "learning_rate": 2.159200438585056e-06, + "loss": 0.685, "step": 10665 }, { - "epoch": 0.8070825924104271, - "grad_norm": 8.177988052368164, - "learning_rate": 1.7697488868448123e-06, - "loss": 0.6673, + "epoch": 0.750862372404083, + "grad_norm": 1.7695039510726929, + "learning_rate": 2.158047640414983e-06, + "loss": 0.6239, "step": 10666 }, { - "epoch": 0.8071582611327608, - "grad_norm": 2.334674835205078, - "learning_rate": 1.7684079359567002e-06, - "loss": 0.6466, + "epoch": 0.7509327701513552, + "grad_norm": 2.227508068084717, + "learning_rate": 2.1568950939823964e-06, + "loss": 0.7655, "step": 10667 }, { - "epoch": 0.8072339298550943, - "grad_norm": 1.965072751045227, - "learning_rate": 1.7670674379342773e-06, - "loss": 0.6563, + "epoch": 0.7510031678986272, + "grad_norm": 1.803942084312439, + "learning_rate": 2.155742799347223e-06, + "loss": 0.6639, "step": 10668 }, { - "epoch": 0.807309598577428, - "grad_norm": 2.178422451019287, - "learning_rate": 1.7657273928614828e-06, - "loss": 0.7631, + "epoch": 0.7510735656458993, + "grad_norm": 2.3394241333007812, + "learning_rate": 2.1545907565693654e-06, + "loss": 0.5958, "step": 10669 }, { - "epoch": 0.8073852672997617, - "grad_norm": 2.614917755126953, - "learning_rate": 1.7643878008222373e-06, - "loss": 0.63, + "epoch": 0.7511439633931715, + "grad_norm": 1.8669021129608154, + "learning_rate": 2.1534389657087245e-06, + "loss": 0.661, "step": 10670 }, { - "epoch": 0.8074609360220952, - "grad_norm": 2.7337183952331543, - "learning_rate": 1.7630486619004313e-06, - "loss": 0.7156, + "epoch": 0.7512143611404435, + "grad_norm": 2.707916259765625, + "learning_rate": 2.1522874268251825e-06, + "loss": 0.6391, "step": 10671 }, { - "epoch": 0.8075366047444289, - "grad_norm": 2.084549903869629, - "learning_rate": 1.7617099761799246e-06, - "loss": 0.49, + "epoch": 0.7512847588877156, + "grad_norm": 2.7855446338653564, + "learning_rate": 2.1511361399786063e-06, + "loss": 0.6529, "step": 10672 }, { - "epoch": 0.8076122734667626, - "grad_norm": 2.35610294342041, - "learning_rate": 1.7603717437445506e-06, - "loss": 0.6401, + "epoch": 0.7513551566349876, + "grad_norm": 1.8055672645568848, + "learning_rate": 2.149985105228852e-06, + "loss": 0.6411, "step": 10673 }, { - "epoch": 0.8076879421890961, - "grad_norm": 2.1338629722595215, - "learning_rate": 1.7590339646781149e-06, - "loss": 0.5452, + "epoch": 0.7514255543822598, + "grad_norm": 2.174635410308838, + "learning_rate": 2.1488343226357667e-06, + "loss": 0.6074, "step": 10674 }, { - "epoch": 0.8077636109114298, - "grad_norm": 2.5002894401550293, - "learning_rate": 1.7576966390643935e-06, - "loss": 0.6739, + "epoch": 0.7514959521295319, + "grad_norm": 1.7411386966705322, + "learning_rate": 2.147683792259176e-06, + "loss": 0.6557, "step": 10675 }, { - "epoch": 0.8078392796337633, - "grad_norm": 2.1118812561035156, - "learning_rate": 1.7563597669871315e-06, - "loss": 0.5796, + "epoch": 0.7515663498768039, + "grad_norm": 1.8564995527267456, + "learning_rate": 2.1465335141589016e-06, + "loss": 0.6396, "step": 10676 }, { - "epoch": 0.807914948356097, - "grad_norm": 2.3569159507751465, - "learning_rate": 1.7550233485300469e-06, - "loss": 0.7535, + "epoch": 0.7516367476240761, + "grad_norm": 2.583056926727295, + "learning_rate": 2.1453834883947454e-06, + "loss": 0.6752, "step": 10677 }, { - "epoch": 0.8079906170784307, - "grad_norm": 1.8390127420425415, - "learning_rate": 1.7536873837768358e-06, - "loss": 0.6276, + "epoch": 0.7517071453713481, + "grad_norm": 1.6526085138320923, + "learning_rate": 2.1442337150264983e-06, + "loss": 0.7642, "step": 10678 }, { - "epoch": 0.8080662858007642, - "grad_norm": 2.113814115524292, - "learning_rate": 1.7523518728111603e-06, - "loss": 0.5618, + "epoch": 0.7517775431186202, + "grad_norm": 1.755053162574768, + "learning_rate": 2.1430841941139356e-06, + "loss": 0.6392, "step": 10679 }, { - "epoch": 0.8081419545230979, - "grad_norm": 2.1201260089874268, - "learning_rate": 1.7510168157166506e-06, - "loss": 0.6731, + "epoch": 0.7518479408658922, + "grad_norm": 2.0691592693328857, + "learning_rate": 2.1419349257168268e-06, + "loss": 0.6751, "step": 10680 }, { - "epoch": 0.8082176232454314, - "grad_norm": 2.6762893199920654, - "learning_rate": 1.7496822125769133e-06, - "loss": 0.5319, + "epoch": 0.7519183386131644, + "grad_norm": 1.9485375881195068, + "learning_rate": 2.1407859098949186e-06, + "loss": 0.6569, "step": 10681 }, { - "epoch": 0.8082932919677651, - "grad_norm": 2.3151755332946777, - "learning_rate": 1.7483480634755262e-06, - "loss": 0.7085, + "epoch": 0.7519887363604365, + "grad_norm": 1.9625542163848877, + "learning_rate": 2.1396371467079545e-06, + "loss": 0.6449, "step": 10682 }, { - "epoch": 0.8083689606900988, - "grad_norm": 2.3955535888671875, - "learning_rate": 1.7470143684960382e-06, - "loss": 0.6417, + "epoch": 0.7520591341077085, + "grad_norm": 1.7881028652191162, + "learning_rate": 2.1384886362156557e-06, + "loss": 0.5658, "step": 10683 }, { - "epoch": 0.8084446294124323, - "grad_norm": 2.628687858581543, - "learning_rate": 1.7456811277219693e-06, - "loss": 0.5682, + "epoch": 0.7521295318549807, + "grad_norm": 2.0859737396240234, + "learning_rate": 2.1373403784777363e-06, + "loss": 0.6781, "step": 10684 }, { - "epoch": 0.808520298134766, - "grad_norm": 2.5121073722839355, - "learning_rate": 1.7443483412368119e-06, - "loss": 0.675, + "epoch": 0.7521999296022527, + "grad_norm": 1.7906486988067627, + "learning_rate": 2.1361923735538924e-06, + "loss": 0.7511, "step": 10685 }, { - "epoch": 0.8085959668570997, - "grad_norm": 2.235436201095581, - "learning_rate": 1.7430160091240313e-06, - "loss": 0.599, + "epoch": 0.7522703273495248, + "grad_norm": 1.7091587781906128, + "learning_rate": 2.135044621503812e-06, + "loss": 0.6551, "step": 10686 }, { - "epoch": 0.8086716355794332, - "grad_norm": 2.1594743728637695, - "learning_rate": 1.7416841314670577e-06, - "loss": 0.6628, + "epoch": 0.752340725096797, + "grad_norm": 3.010484218597412, + "learning_rate": 2.13389712238717e-06, + "loss": 0.5863, "step": 10687 }, { - "epoch": 0.8087473043017669, - "grad_norm": 2.264970064163208, - "learning_rate": 1.7403527083492974e-06, - "loss": 0.5556, + "epoch": 0.752411122844069, + "grad_norm": 1.9513206481933594, + "learning_rate": 2.1327498762636233e-06, + "loss": 0.6466, "step": 10688 }, { - "epoch": 0.8088229730241004, - "grad_norm": 2.473870038986206, - "learning_rate": 1.7390217398541355e-06, - "loss": 0.6411, + "epoch": 0.7524815205913411, + "grad_norm": 2.1491734981536865, + "learning_rate": 2.1316028831928186e-06, + "loss": 0.7588, "step": 10689 }, { - "epoch": 0.8088986417464341, - "grad_norm": 2.277695894241333, - "learning_rate": 1.7376912260649158e-06, - "loss": 0.6542, + "epoch": 0.7525519183386131, + "grad_norm": 1.9867998361587524, + "learning_rate": 2.1304561432343863e-06, + "loss": 0.6308, "step": 10690 }, { - "epoch": 0.8089743104687678, - "grad_norm": 2.1290292739868164, - "learning_rate": 1.73636116706496e-06, - "loss": 0.6924, + "epoch": 0.7526223160858853, + "grad_norm": 1.7818580865859985, + "learning_rate": 2.1293096564479525e-06, + "loss": 0.6205, "step": 10691 }, { - "epoch": 0.8090499791911013, - "grad_norm": 2.1614058017730713, - "learning_rate": 1.7350315629375611e-06, - "loss": 0.588, + "epoch": 0.7526927138331574, + "grad_norm": 1.902833342552185, + "learning_rate": 2.128163422893118e-06, + "loss": 0.6652, "step": 10692 }, { - "epoch": 0.809125647913435, - "grad_norm": 1.7077982425689697, - "learning_rate": 1.733702413765984e-06, - "loss": 0.5756, + "epoch": 0.7527631115804294, + "grad_norm": 2.2447350025177, + "learning_rate": 2.127017442629481e-06, + "loss": 0.588, "step": 10693 }, { - "epoch": 0.8092013166357686, - "grad_norm": 2.483567953109741, - "learning_rate": 1.7323737196334635e-06, - "loss": 0.6524, + "epoch": 0.7528335093277015, + "grad_norm": 2.0366477966308594, + "learning_rate": 2.1258717157166194e-06, + "loss": 0.6488, "step": 10694 }, { - "epoch": 0.8092769853581022, - "grad_norm": 2.2687125205993652, - "learning_rate": 1.7310454806232077e-06, - "loss": 0.6016, + "epoch": 0.7529039070749736, + "grad_norm": 1.9507791996002197, + "learning_rate": 2.1247262422141052e-06, + "loss": 0.6585, "step": 10695 }, { - "epoch": 0.8093526540804359, - "grad_norm": 2.1247775554656982, - "learning_rate": 1.7297176968183935e-06, - "loss": 0.6076, + "epoch": 0.7529743048222457, + "grad_norm": 2.2671337127685547, + "learning_rate": 2.123581022181485e-06, + "loss": 0.6024, "step": 10696 }, { - "epoch": 0.8094283228027694, - "grad_norm": 2.6836366653442383, - "learning_rate": 1.7283903683021748e-06, - "loss": 0.6584, + "epoch": 0.7530447025695177, + "grad_norm": 1.9419043064117432, + "learning_rate": 2.1224360556783055e-06, + "loss": 0.7349, "step": 10697 }, { - "epoch": 0.8095039915251031, - "grad_norm": 2.096525192260742, - "learning_rate": 1.7270634951576667e-06, - "loss": 0.5641, + "epoch": 0.7531151003167899, + "grad_norm": 2.00348162651062, + "learning_rate": 2.121291342764091e-06, + "loss": 0.5178, "step": 10698 }, { - "epoch": 0.8095796602474368, - "grad_norm": 2.056962251663208, - "learning_rate": 1.7257370774679675e-06, - "loss": 0.6351, + "epoch": 0.753185498064062, + "grad_norm": 2.0332655906677246, + "learning_rate": 2.1201468834983605e-06, + "loss": 0.6041, "step": 10699 }, { - "epoch": 0.8096553289697703, - "grad_norm": 2.368328809738159, - "learning_rate": 1.7244111153161425e-06, - "loss": 0.696, + "epoch": 0.753255895811334, + "grad_norm": 1.5949476957321167, + "learning_rate": 2.1190026779406104e-06, + "loss": 0.6013, "step": 10700 }, { - "epoch": 0.809730997692104, - "grad_norm": 2.048150062561035, - "learning_rate": 1.7230856087852236e-06, - "loss": 0.6948, + "epoch": 0.7533262935586061, + "grad_norm": 2.18363356590271, + "learning_rate": 2.1178587261503357e-06, + "loss": 0.7228, "step": 10701 }, { - "epoch": 0.8098066664144375, - "grad_norm": 3.076840400695801, - "learning_rate": 1.7217605579582204e-06, - "loss": 0.6797, + "epoch": 0.7533966913058782, + "grad_norm": 8.992382049560547, + "learning_rate": 2.1167150281870036e-06, + "loss": 0.5774, "step": 10702 }, { - "epoch": 0.8098823351367712, - "grad_norm": 1.980602502822876, - "learning_rate": 1.7204359629181112e-06, - "loss": 0.6077, + "epoch": 0.7534670890531503, + "grad_norm": 1.749864935874939, + "learning_rate": 2.115571584110081e-06, + "loss": 0.6571, "step": 10703 }, { - "epoch": 0.8099580038591049, - "grad_norm": 2.4633147716522217, - "learning_rate": 1.719111823747847e-06, - "loss": 0.6029, + "epoch": 0.7535374868004224, + "grad_norm": 1.65178644657135, + "learning_rate": 2.1144283939790142e-06, + "loss": 0.6661, "step": 10704 }, { - "epoch": 0.8100336725814384, - "grad_norm": 6.110641956329346, - "learning_rate": 1.7177881405303505e-06, - "loss": 0.7389, + "epoch": 0.7536078845476945, + "grad_norm": 1.9399515390396118, + "learning_rate": 2.1132854578532415e-06, + "loss": 0.662, "step": 10705 }, { - "epoch": 0.8101093413037721, - "grad_norm": 2.669058322906494, - "learning_rate": 1.716464913348514e-06, - "loss": 0.6563, + "epoch": 0.7536782822949666, + "grad_norm": 2.146806240081787, + "learning_rate": 2.1121427757921815e-06, + "loss": 0.6152, "step": 10706 }, { - "epoch": 0.8101850100261057, - "grad_norm": 2.2113122940063477, - "learning_rate": 1.7151421422852045e-06, - "loss": 0.6166, + "epoch": 0.7537486800422386, + "grad_norm": 1.960129976272583, + "learning_rate": 2.111000347855249e-06, + "loss": 0.7028, "step": 10707 }, { - "epoch": 0.8102606787484393, - "grad_norm": 2.3939402103424072, - "learning_rate": 1.7138198274232508e-06, - "loss": 0.618, + "epoch": 0.7538190777895107, + "grad_norm": 2.1999917030334473, + "learning_rate": 2.1098581741018333e-06, + "loss": 0.7481, "step": 10708 }, { - "epoch": 0.810336347470773, - "grad_norm": 2.17268967628479, - "learning_rate": 1.7124979688454684e-06, - "loss": 0.8182, + "epoch": 0.7538894755367829, + "grad_norm": 1.5683504343032837, + "learning_rate": 2.1087162545913206e-06, + "loss": 0.5517, "step": 10709 }, { - "epoch": 0.8104120161931065, - "grad_norm": 9.700509071350098, - "learning_rate": 1.7111765666346343e-06, - "loss": 0.6712, + "epoch": 0.7539598732840549, + "grad_norm": 3.1013848781585693, + "learning_rate": 2.107574589383078e-06, + "loss": 0.6121, "step": 10710 }, { - "epoch": 0.8104876849154402, - "grad_norm": 2.0205531120300293, - "learning_rate": 1.7098556208735011e-06, - "loss": 0.7657, + "epoch": 0.754030271031327, + "grad_norm": 2.263964891433716, + "learning_rate": 2.106433178536465e-06, + "loss": 0.6787, "step": 10711 }, { - "epoch": 0.8105633536377739, - "grad_norm": 1.8378758430480957, - "learning_rate": 1.708535131644785e-06, - "loss": 0.6912, + "epoch": 0.754100668778599, + "grad_norm": 1.6593859195709229, + "learning_rate": 2.1052920221108216e-06, + "loss": 0.6195, "step": 10712 }, { - "epoch": 0.8106390223601074, - "grad_norm": 1.6077888011932373, - "learning_rate": 1.7072150990311805e-06, - "loss": 0.6132, + "epoch": 0.7541710665258712, + "grad_norm": 1.785494327545166, + "learning_rate": 2.104151120165483e-06, + "loss": 0.6188, "step": 10713 }, { - "epoch": 0.8107146910824411, - "grad_norm": 1.9418666362762451, - "learning_rate": 1.7058955231153598e-06, - "loss": 0.6918, + "epoch": 0.7542414642731433, + "grad_norm": 1.9968197345733643, + "learning_rate": 2.1030104727597575e-06, + "loss": 0.6705, "step": 10714 }, { - "epoch": 0.8107903598047747, - "grad_norm": 2.010127544403076, - "learning_rate": 1.7045764039799502e-06, - "loss": 0.6395, + "epoch": 0.7543118620204153, + "grad_norm": 2.1422975063323975, + "learning_rate": 2.1018700799529537e-06, + "loss": 0.7425, "step": 10715 }, { - "epoch": 0.8108660285271083, - "grad_norm": 1.9228168725967407, - "learning_rate": 1.7032577417075624e-06, - "loss": 0.7397, + "epoch": 0.7543822597676875, + "grad_norm": 1.8996614217758179, + "learning_rate": 2.100729941804359e-06, + "loss": 0.6663, "step": 10716 }, { - "epoch": 0.810941697249442, - "grad_norm": 2.564603328704834, - "learning_rate": 1.7019395363807748e-06, - "loss": 0.7028, + "epoch": 0.7544526575149595, + "grad_norm": 1.6709067821502686, + "learning_rate": 2.099590058373254e-06, + "loss": 0.5304, "step": 10717 }, { - "epoch": 0.8110173659717755, - "grad_norm": 2.034059762954712, - "learning_rate": 1.7006217880821414e-06, - "loss": 0.6505, + "epoch": 0.7545230552622316, + "grad_norm": 1.715179443359375, + "learning_rate": 2.0984504297188976e-06, + "loss": 0.6443, "step": 10718 }, { - "epoch": 0.8110930346941092, - "grad_norm": 2.007988691329956, - "learning_rate": 1.6993044968941754e-06, - "loss": 0.7674, + "epoch": 0.7545934530095036, + "grad_norm": 1.884588360786438, + "learning_rate": 2.097311055900546e-06, + "loss": 0.7353, "step": 10719 }, { - "epoch": 0.8111687034164428, - "grad_norm": 2.383240222930908, - "learning_rate": 1.6979876628993777e-06, - "loss": 0.7798, + "epoch": 0.7546638507567758, + "grad_norm": 1.81199312210083, + "learning_rate": 2.0961719369774292e-06, + "loss": 0.748, "step": 10720 }, { - "epoch": 0.8112443721387764, - "grad_norm": 2.532583236694336, - "learning_rate": 1.6966712861802135e-06, - "loss": 0.7291, + "epoch": 0.7547342485040479, + "grad_norm": 1.8531882762908936, + "learning_rate": 2.095033073008777e-06, + "loss": 0.6225, "step": 10721 }, { - "epoch": 0.8113200408611101, - "grad_norm": 2.3727943897247314, - "learning_rate": 1.6953553668191115e-06, - "loss": 0.6543, + "epoch": 0.7548046462513199, + "grad_norm": 2.0590388774871826, + "learning_rate": 2.0938944640537953e-06, + "loss": 0.7258, "step": 10722 }, { - "epoch": 0.8113957095834436, - "grad_norm": 2.5050201416015625, - "learning_rate": 1.6940399048984833e-06, - "loss": 0.5767, + "epoch": 0.7548750439985921, + "grad_norm": 1.99904465675354, + "learning_rate": 2.0927561101716855e-06, + "loss": 0.6311, "step": 10723 }, { - "epoch": 0.8114713783057773, - "grad_norm": 2.1862447261810303, - "learning_rate": 1.6927249005007034e-06, - "loss": 0.7536, + "epoch": 0.7549454417458641, + "grad_norm": 2.2521321773529053, + "learning_rate": 2.0916180114216274e-06, + "loss": 0.6354, "step": 10724 }, { - "epoch": 0.811547047028111, - "grad_norm": 2.4321300983428955, - "learning_rate": 1.6914103537081305e-06, - "loss": 0.7764, + "epoch": 0.7550158394931362, + "grad_norm": 2.3889448642730713, + "learning_rate": 2.090480167862797e-06, + "loss": 0.7483, "step": 10725 }, { - "epoch": 0.8116227157504445, - "grad_norm": 2.240424871444702, - "learning_rate": 1.6900962646030772e-06, - "loss": 0.6716, + "epoch": 0.7550862372404084, + "grad_norm": 1.8436771631240845, + "learning_rate": 2.089342579554348e-06, + "loss": 0.5941, "step": 10726 }, { - "epoch": 0.8116983844727782, - "grad_norm": 2.253845453262329, - "learning_rate": 1.6887826332678393e-06, - "loss": 0.6926, + "epoch": 0.7551566349876804, + "grad_norm": 2.3821160793304443, + "learning_rate": 2.0882052465554266e-06, + "loss": 0.591, "step": 10727 }, { - "epoch": 0.8117740531951118, - "grad_norm": 1.9651557207107544, - "learning_rate": 1.6874694597846795e-06, - "loss": 0.7082, + "epoch": 0.7552270327349525, + "grad_norm": 1.8380457162857056, + "learning_rate": 2.08706816892516e-06, + "loss": 0.6708, "step": 10728 }, { - "epoch": 0.8118497219174454, - "grad_norm": 2.718416929244995, - "learning_rate": 1.686156744235834e-06, - "loss": 0.7596, + "epoch": 0.7552974304822245, + "grad_norm": 1.6096019744873047, + "learning_rate": 2.0859313467226714e-06, + "loss": 0.6092, "step": 10729 }, { - "epoch": 0.8119253906397791, - "grad_norm": 2.3345022201538086, - "learning_rate": 1.6848444867035093e-06, - "loss": 0.6771, + "epoch": 0.7553678282294967, + "grad_norm": 2.3598668575286865, + "learning_rate": 2.08479478000706e-06, + "loss": 0.7093, "step": 10730 }, { - "epoch": 0.8120010593621126, - "grad_norm": 2.0901668071746826, - "learning_rate": 1.6835326872698826e-06, - "loss": 0.6624, + "epoch": 0.7554382259767688, + "grad_norm": 1.8610048294067383, + "learning_rate": 2.083658468837422e-06, + "loss": 0.6717, "step": 10731 }, { - "epoch": 0.8120767280844463, - "grad_norm": 2.0496819019317627, - "learning_rate": 1.6822213460171061e-06, - "loss": 0.6007, + "epoch": 0.7555086237240408, + "grad_norm": 1.9135003089904785, + "learning_rate": 2.082522413272832e-06, + "loss": 0.6158, "step": 10732 }, { - "epoch": 0.8121523968067799, - "grad_norm": 1.9577654600143433, - "learning_rate": 1.6809104630272944e-06, - "loss": 0.8049, + "epoch": 0.755579021471313, + "grad_norm": 2.163968801498413, + "learning_rate": 2.081386613372356e-06, + "loss": 0.5817, "step": 10733 }, { - "epoch": 0.8122280655291135, - "grad_norm": 2.862490177154541, - "learning_rate": 1.6796000383825414e-06, - "loss": 0.6048, + "epoch": 0.755649419218585, + "grad_norm": 1.7785471677780151, + "learning_rate": 2.080251069195042e-06, + "loss": 0.6109, "step": 10734 }, { - "epoch": 0.8123037342514472, - "grad_norm": 2.0185632705688477, - "learning_rate": 1.6782900721649146e-06, - "loss": 0.67, + "epoch": 0.7557198169658571, + "grad_norm": 1.9112842082977295, + "learning_rate": 2.079115780799933e-06, + "loss": 0.6437, "step": 10735 }, { - "epoch": 0.8123794029737808, - "grad_norm": 2.023618221282959, - "learning_rate": 1.6769805644564426e-06, - "loss": 0.599, + "epoch": 0.7557902147131291, + "grad_norm": 1.7798744440078735, + "learning_rate": 2.077980748246048e-06, + "loss": 0.5323, "step": 10736 }, { - "epoch": 0.8124550716961144, - "grad_norm": 2.3943023681640625, - "learning_rate": 1.6756715153391327e-06, - "loss": 0.5855, + "epoch": 0.7558606124604013, + "grad_norm": 1.9663434028625488, + "learning_rate": 2.0768459715924053e-06, + "loss": 0.7212, "step": 10737 }, { - "epoch": 0.8125307404184481, - "grad_norm": 2.002091407775879, - "learning_rate": 1.6743629248949631e-06, - "loss": 0.7371, + "epoch": 0.7559310102076734, + "grad_norm": 1.9542757272720337, + "learning_rate": 2.0757114508979978e-06, + "loss": 0.5766, "step": 10738 }, { - "epoch": 0.8126064091407816, - "grad_norm": 2.1737380027770996, - "learning_rate": 1.6730547932058806e-06, - "loss": 0.5976, + "epoch": 0.7560014079549454, + "grad_norm": 1.935405969619751, + "learning_rate": 2.07457718622181e-06, + "loss": 0.7411, "step": 10739 }, { - "epoch": 0.8126820778631153, - "grad_norm": 2.151082992553711, - "learning_rate": 1.6717471203538053e-06, - "loss": 0.7263, + "epoch": 0.7560718057022175, + "grad_norm": 1.9950186014175415, + "learning_rate": 2.0734431776228176e-06, + "loss": 0.6979, "step": 10740 }, { - "epoch": 0.8127577465854489, - "grad_norm": 2.34664249420166, - "learning_rate": 1.670439906420628e-06, - "loss": 0.6413, + "epoch": 0.7561422034494896, + "grad_norm": 1.958431363105774, + "learning_rate": 2.072309425159974e-06, + "loss": 0.6714, "step": 10741 }, { - "epoch": 0.8128334153077825, - "grad_norm": 2.2073299884796143, - "learning_rate": 1.66913315148821e-06, - "loss": 0.7334, + "epoch": 0.7562126011967617, + "grad_norm": 2.247626304626465, + "learning_rate": 2.0711759288922278e-06, + "loss": 0.6496, "step": 10742 }, { - "epoch": 0.8129090840301162, - "grad_norm": 1.9917361736297607, - "learning_rate": 1.667826855638388e-06, - "loss": 0.6073, + "epoch": 0.7562829989440338, + "grad_norm": 1.5054131746292114, + "learning_rate": 2.070042688878509e-06, + "loss": 0.6596, "step": 10743 }, { - "epoch": 0.8129847527524497, - "grad_norm": 2.251788854598999, - "learning_rate": 1.6665210189529585e-06, - "loss": 0.6817, + "epoch": 0.7563533966913059, + "grad_norm": 1.738282561302185, + "learning_rate": 2.068909705177735e-06, + "loss": 0.5316, "step": 10744 }, { - "epoch": 0.8130604214747834, - "grad_norm": 2.446361541748047, - "learning_rate": 1.6652156415137041e-06, - "loss": 0.7737, + "epoch": 0.756423794438578, + "grad_norm": 2.141700029373169, + "learning_rate": 2.0677769778488098e-06, + "loss": 0.6669, "step": 10745 }, { - "epoch": 0.813136090197117, - "grad_norm": 2.6486692428588867, - "learning_rate": 1.6639107234023723e-06, - "loss": 0.5922, + "epoch": 0.75649419218585, + "grad_norm": 1.6373023986816406, + "learning_rate": 2.066644506950629e-06, + "loss": 0.5552, "step": 10746 }, { - "epoch": 0.8132117589194506, - "grad_norm": 2.2547566890716553, - "learning_rate": 1.662606264700676e-06, - "loss": 0.6869, + "epoch": 0.7565645899331221, + "grad_norm": 2.4922585487365723, + "learning_rate": 2.065512292542065e-06, + "loss": 0.6121, "step": 10747 }, { - "epoch": 0.8132874276417843, - "grad_norm": 2.7549359798431396, - "learning_rate": 1.6613022654903086e-06, - "loss": 0.5634, + "epoch": 0.7566349876803943, + "grad_norm": 1.9269415140151978, + "learning_rate": 2.064380334681989e-06, + "loss": 0.6149, "step": 10748 }, { - "epoch": 0.8133630963641179, - "grad_norm": 2.0122973918914795, - "learning_rate": 1.6599987258529288e-06, - "loss": 0.5678, + "epoch": 0.7567053854276663, + "grad_norm": 2.179696559906006, + "learning_rate": 2.063248633429249e-06, + "loss": 0.685, "step": 10749 }, { - "epoch": 0.8134387650864515, - "grad_norm": 2.138713836669922, - "learning_rate": 1.6586956458701685e-06, - "loss": 0.7879, + "epoch": 0.7567757831749384, + "grad_norm": 2.8891983032226562, + "learning_rate": 2.0621171888426832e-06, + "loss": 0.6363, "step": 10750 }, { - "epoch": 0.8135144338087852, - "grad_norm": 2.357614040374756, - "learning_rate": 1.6573930256236323e-06, - "loss": 0.6622, + "epoch": 0.7568461809222105, + "grad_norm": 1.67112398147583, + "learning_rate": 2.060986000981115e-06, + "loss": 0.6777, "step": 10751 }, { - "epoch": 0.8135901025311187, - "grad_norm": 2.2221148014068604, - "learning_rate": 1.656090865194894e-06, - "loss": 0.79, + "epoch": 0.7569165786694826, + "grad_norm": 1.7114392518997192, + "learning_rate": 2.05985506990336e-06, + "loss": 0.6807, "step": 10752 }, { - "epoch": 0.8136657712534524, - "grad_norm": 2.7023918628692627, - "learning_rate": 1.654789164665499e-06, - "loss": 0.716, + "epoch": 0.7569869764167546, + "grad_norm": 1.9333758354187012, + "learning_rate": 2.0587243956682115e-06, + "loss": 0.8292, "step": 10753 }, { - "epoch": 0.813741439975786, - "grad_norm": 2.392548084259033, - "learning_rate": 1.6534879241169625e-06, - "loss": 0.6097, + "epoch": 0.7570573741640267, + "grad_norm": 1.8891880512237549, + "learning_rate": 2.057593978334459e-06, + "loss": 0.6404, "step": 10754 }, { - "epoch": 0.8138171086981196, - "grad_norm": 2.4918103218078613, - "learning_rate": 1.6521871436307754e-06, - "loss": 0.5744, + "epoch": 0.7571277719112989, + "grad_norm": 1.820505976676941, + "learning_rate": 2.0564638179608713e-06, + "loss": 0.6438, "step": 10755 }, { - "epoch": 0.8138927774204533, - "grad_norm": 1.715964913368225, - "learning_rate": 1.6508868232883932e-06, - "loss": 0.614, + "epoch": 0.7571981696585709, + "grad_norm": 1.8354361057281494, + "learning_rate": 2.055333914606207e-06, + "loss": 0.5546, "step": 10756 }, { - "epoch": 0.8139684461427869, - "grad_norm": 1.9560282230377197, - "learning_rate": 1.649586963171252e-06, - "loss": 0.6038, + "epoch": 0.757268567405843, + "grad_norm": 2.4429869651794434, + "learning_rate": 2.0542042683292087e-06, + "loss": 0.6761, "step": 10757 }, { - "epoch": 0.8140441148651205, - "grad_norm": 1.8922325372695923, - "learning_rate": 1.6482875633607465e-06, - "loss": 0.6643, + "epoch": 0.757338965153115, + "grad_norm": 1.913644552230835, + "learning_rate": 2.0530748791886117e-06, + "loss": 0.7116, "step": 10758 }, { - "epoch": 0.8141197835874541, - "grad_norm": 2.1275534629821777, - "learning_rate": 1.6469886239382518e-06, - "loss": 0.7323, + "epoch": 0.7574093629003872, + "grad_norm": 1.7810856103897095, + "learning_rate": 2.0519457472431296e-06, + "loss": 0.5339, "step": 10759 }, { - "epoch": 0.8141954523097877, - "grad_norm": 2.745668649673462, - "learning_rate": 1.6456901449851118e-06, - "loss": 0.655, + "epoch": 0.7574797606476593, + "grad_norm": 1.9395297765731812, + "learning_rate": 2.050816872551472e-06, + "loss": 0.5888, "step": 10760 }, { - "epoch": 0.8142711210321214, - "grad_norm": 2.2803080081939697, - "learning_rate": 1.6443921265826423e-06, - "loss": 0.5338, + "epoch": 0.7575501583949313, + "grad_norm": 1.8000385761260986, + "learning_rate": 2.049688255172327e-06, + "loss": 0.6642, "step": 10761 }, { - "epoch": 0.814346789754455, - "grad_norm": 2.5156054496765137, - "learning_rate": 1.6430945688121284e-06, - "loss": 0.6988, + "epoch": 0.7576205561422035, + "grad_norm": 2.2178032398223877, + "learning_rate": 2.048559895164373e-06, + "loss": 0.6894, "step": 10762 }, { - "epoch": 0.8144224584767886, - "grad_norm": 1.8427619934082031, - "learning_rate": 1.6417974717548272e-06, - "loss": 0.5556, + "epoch": 0.7576909538894755, + "grad_norm": 2.024202585220337, + "learning_rate": 2.047431792586273e-06, + "loss": 0.6284, "step": 10763 }, { - "epoch": 0.8144981271991223, - "grad_norm": 2.746143341064453, - "learning_rate": 1.6405008354919705e-06, - "loss": 0.7378, + "epoch": 0.7577613516367476, + "grad_norm": 2.4069342613220215, + "learning_rate": 2.04630394749668e-06, + "loss": 0.6647, "step": 10764 }, { - "epoch": 0.8145737959214558, - "grad_norm": 2.229966163635254, - "learning_rate": 1.6392046601047505e-06, - "loss": 0.7431, + "epoch": 0.7578317493840198, + "grad_norm": 2.0945446491241455, + "learning_rate": 2.04517635995423e-06, + "loss": 0.5639, "step": 10765 }, { - "epoch": 0.8146494646437895, - "grad_norm": 3.1086843013763428, - "learning_rate": 1.637908945674344e-06, - "loss": 0.7127, + "epoch": 0.7579021471312918, + "grad_norm": 1.7555890083312988, + "learning_rate": 2.04404903001755e-06, + "loss": 0.6143, "step": 10766 }, { - "epoch": 0.8147251333661231, - "grad_norm": 2.0336904525756836, - "learning_rate": 1.6366136922818926e-06, - "loss": 0.6288, + "epoch": 0.7579725448785639, + "grad_norm": 1.7979004383087158, + "learning_rate": 2.0429219577452495e-06, + "loss": 0.7021, "step": 10767 }, { - "epoch": 0.8148008020884567, - "grad_norm": 2.8491573333740234, - "learning_rate": 1.635318900008509e-06, - "loss": 0.7268, + "epoch": 0.7580429426258359, + "grad_norm": 2.3970093727111816, + "learning_rate": 2.041795143195926e-06, + "loss": 0.5943, "step": 10768 }, { - "epoch": 0.8148764708107904, - "grad_norm": 2.5054476261138916, - "learning_rate": 1.6340245689352744e-06, - "loss": 0.7421, + "epoch": 0.7581133403731081, + "grad_norm": 2.48932147026062, + "learning_rate": 2.040668586428161e-06, + "loss": 0.6879, "step": 10769 }, { - "epoch": 0.814952139533124, - "grad_norm": 2.3975508213043213, - "learning_rate": 1.6327306991432431e-06, - "loss": 0.6014, + "epoch": 0.7581837381203802, + "grad_norm": 1.9941478967666626, + "learning_rate": 2.03954228750053e-06, + "loss": 0.6368, "step": 10770 }, { - "epoch": 0.8150278082554576, - "grad_norm": 1.5673209428787231, - "learning_rate": 1.6314372907134484e-06, - "loss": 0.8127, + "epoch": 0.7582541358676522, + "grad_norm": 1.8611884117126465, + "learning_rate": 2.0384162464715857e-06, + "loss": 0.6192, "step": 10771 }, { - "epoch": 0.8151034769777912, - "grad_norm": 1.5236127376556396, - "learning_rate": 1.630144343726882e-06, - "loss": 0.6618, + "epoch": 0.7583245336149244, + "grad_norm": 1.9283246994018555, + "learning_rate": 2.0372904633998764e-06, + "loss": 0.7139, "step": 10772 }, { - "epoch": 0.8151791457001248, - "grad_norm": 2.371945858001709, - "learning_rate": 1.6288518582645128e-06, - "loss": 0.7702, + "epoch": 0.7583949313621964, + "grad_norm": 1.8900132179260254, + "learning_rate": 2.0361649383439307e-06, + "loss": 0.6248, "step": 10773 }, { - "epoch": 0.8152548144224585, - "grad_norm": 3.2651684284210205, - "learning_rate": 1.6275598344072825e-06, - "loss": 0.53, + "epoch": 0.7584653291094685, + "grad_norm": 1.9863580465316772, + "learning_rate": 2.035039671362265e-06, + "loss": 0.6984, "step": 10774 }, { - "epoch": 0.8153304831447921, - "grad_norm": 2.621706008911133, - "learning_rate": 1.6262682722360997e-06, - "loss": 0.6125, + "epoch": 0.7585357268567405, + "grad_norm": 1.8568038940429688, + "learning_rate": 2.0339146625133813e-06, + "loss": 0.6972, "step": 10775 }, { - "epoch": 0.8154061518671257, - "grad_norm": 2.606985092163086, - "learning_rate": 1.6249771718318475e-06, - "loss": 0.7299, + "epoch": 0.7586061246040127, + "grad_norm": 1.9941904544830322, + "learning_rate": 2.0327899118557737e-06, + "loss": 0.6792, "step": 10776 }, { - "epoch": 0.8154818205894594, - "grad_norm": 2.2531168460845947, - "learning_rate": 1.6236865332753782e-06, - "loss": 0.6822, + "epoch": 0.7586765223512848, + "grad_norm": 1.707916259765625, + "learning_rate": 2.031665419447915e-06, + "loss": 0.6567, "step": 10777 }, { - "epoch": 0.815557489311793, - "grad_norm": 1.9697020053863525, - "learning_rate": 1.6223963566475195e-06, - "loss": 0.5334, + "epoch": 0.7587469200985568, + "grad_norm": 2.229140043258667, + "learning_rate": 2.0305411853482734e-06, + "loss": 0.64, "step": 10778 }, { - "epoch": 0.8156331580341266, - "grad_norm": 2.3771908283233643, - "learning_rate": 1.6211066420290594e-06, - "loss": 0.6614, + "epoch": 0.758817317845829, + "grad_norm": 1.7428779602050781, + "learning_rate": 2.029417209615296e-06, + "loss": 0.634, "step": 10779 }, { - "epoch": 0.8157088267564602, - "grad_norm": 2.4156370162963867, - "learning_rate": 1.6198173895007665e-06, - "loss": 0.6298, + "epoch": 0.758887715593101, + "grad_norm": 2.1376311779022217, + "learning_rate": 2.028293492307419e-06, + "loss": 0.7055, "step": 10780 }, { - "epoch": 0.8157844954787938, - "grad_norm": 4.180350303649902, - "learning_rate": 1.6185285991433812e-06, - "loss": 0.7348, + "epoch": 0.7589581133403731, + "grad_norm": 2.1472020149230957, + "learning_rate": 2.027170033483064e-06, + "loss": 0.7346, "step": 10781 }, { - "epoch": 0.8158601642011275, - "grad_norm": 2.094904899597168, - "learning_rate": 1.6172402710376108e-06, - "loss": 0.6079, + "epoch": 0.7590285110876452, + "grad_norm": 1.7491997480392456, + "learning_rate": 2.0260468332006453e-06, + "loss": 0.6617, "step": 10782 }, { - "epoch": 0.8159358329234611, - "grad_norm": 2.2320287227630615, - "learning_rate": 1.6159524052641319e-06, - "loss": 0.7023, + "epoch": 0.7590989088349173, + "grad_norm": 1.89515221118927, + "learning_rate": 2.0249238915185534e-06, + "loss": 0.6412, "step": 10783 }, { - "epoch": 0.8160115016457947, - "grad_norm": 2.9694509506225586, - "learning_rate": 1.6146650019035967e-06, - "loss": 0.7397, + "epoch": 0.7591693065821894, + "grad_norm": 1.5369503498077393, + "learning_rate": 2.0238012084951773e-06, + "loss": 0.5948, "step": 10784 }, { - "epoch": 0.8160871703681284, - "grad_norm": 5.22353982925415, - "learning_rate": 1.6133780610366253e-06, - "loss": 0.6958, + "epoch": 0.7592397043294614, + "grad_norm": 3.8681869506835938, + "learning_rate": 2.0226787841888822e-06, + "loss": 0.7494, "step": 10785 }, { - "epoch": 0.816162839090462, - "grad_norm": 2.167146682739258, - "learning_rate": 1.6120915827438116e-06, - "loss": 0.8393, + "epoch": 0.7593101020767336, + "grad_norm": 1.9966708421707153, + "learning_rate": 2.021556618658023e-06, + "loss": 0.725, "step": 10786 }, { - "epoch": 0.8162385078127956, - "grad_norm": 2.2224442958831787, - "learning_rate": 1.6108055671057176e-06, - "loss": 0.5897, + "epoch": 0.7593804998240057, + "grad_norm": 1.804062843322754, + "learning_rate": 2.020434711960946e-06, + "loss": 0.552, "step": 10787 }, { - "epoch": 0.8163141765351292, - "grad_norm": 2.2738425731658936, - "learning_rate": 1.6095200142028796e-06, - "loss": 0.6285, + "epoch": 0.7594508975712777, + "grad_norm": 1.6824818849563599, + "learning_rate": 2.0193130641559763e-06, + "loss": 0.6128, "step": 10788 }, { - "epoch": 0.8163898452574628, - "grad_norm": 2.0885982513427734, - "learning_rate": 1.6082349241158033e-06, - "loss": 0.6971, + "epoch": 0.7595212953185498, + "grad_norm": 1.8883532285690308, + "learning_rate": 2.0181916753014328e-06, + "loss": 0.6567, "step": 10789 }, { - "epoch": 0.8164655139797965, - "grad_norm": 1.7838362455368042, - "learning_rate": 1.6069502969249595e-06, - "loss": 0.6409, + "epoch": 0.7595916930658219, + "grad_norm": 1.9700058698654175, + "learning_rate": 2.017070545455617e-06, + "loss": 0.6958, "step": 10790 }, { - "epoch": 0.81654118270213, - "grad_norm": 2.1843435764312744, - "learning_rate": 1.6056661327108026e-06, - "loss": 0.7144, + "epoch": 0.759662090813094, + "grad_norm": 2.3070149421691895, + "learning_rate": 2.015949674676815e-06, + "loss": 0.5917, "step": 10791 }, { - "epoch": 0.8166168514244637, - "grad_norm": 2.814628839492798, - "learning_rate": 1.6043824315537513e-06, - "loss": 0.5826, + "epoch": 0.759732488560366, + "grad_norm": 2.2768197059631348, + "learning_rate": 2.0148290630233017e-06, + "loss": 0.7377, "step": 10792 }, { - "epoch": 0.8166925201467973, - "grad_norm": 1.8284931182861328, - "learning_rate": 1.6030991935341905e-06, - "loss": 0.6479, + "epoch": 0.7598028863076381, + "grad_norm": 2.1575515270233154, + "learning_rate": 2.013708710553343e-06, + "loss": 0.7417, "step": 10793 }, { - "epoch": 0.8167681888691309, - "grad_norm": 2.4346306324005127, - "learning_rate": 1.6018164187324818e-06, - "loss": 0.7205, + "epoch": 0.7598732840549103, + "grad_norm": 1.894219994544983, + "learning_rate": 2.0125886173251807e-06, + "loss": 0.7097, "step": 10794 }, { - "epoch": 0.8168438575914646, - "grad_norm": 2.5444185733795166, - "learning_rate": 1.6005341072289578e-06, - "loss": 0.8004, + "epoch": 0.7599436818021823, + "grad_norm": 1.919672966003418, + "learning_rate": 2.011468783397056e-06, + "loss": 0.5777, "step": 10795 }, { - "epoch": 0.8169195263137982, - "grad_norm": 3.5385169982910156, - "learning_rate": 1.5992522591039204e-06, - "loss": 0.7806, + "epoch": 0.7600140795494544, + "grad_norm": 2.139437437057495, + "learning_rate": 2.0103492088271876e-06, + "loss": 0.6536, "step": 10796 }, { - "epoch": 0.8169951950361318, - "grad_norm": 2.1103341579437256, - "learning_rate": 1.5979708744376443e-06, - "loss": 0.6149, + "epoch": 0.7600844772967265, + "grad_norm": 1.7971724271774292, + "learning_rate": 2.0092298936737818e-06, + "loss": 0.7449, "step": 10797 }, { - "epoch": 0.8170708637584655, - "grad_norm": 2.047963857650757, - "learning_rate": 1.5966899533103725e-06, - "loss": 0.6581, + "epoch": 0.7601548750439986, + "grad_norm": 1.9266345500946045, + "learning_rate": 2.0081108379950318e-06, + "loss": 0.6472, "step": 10798 }, { - "epoch": 0.817146532480799, - "grad_norm": 2.359651565551758, - "learning_rate": 1.5954094958023217e-06, - "loss": 0.6508, + "epoch": 0.7602252727912707, + "grad_norm": 1.9265652894973755, + "learning_rate": 2.006992041849123e-06, + "loss": 0.612, "step": 10799 }, { - "epoch": 0.8172222012031327, - "grad_norm": 2.0635018348693848, - "learning_rate": 1.5941295019936786e-06, - "loss": 0.7374, + "epoch": 0.7602956705385427, + "grad_norm": 2.06421160697937, + "learning_rate": 2.0058735052942174e-06, + "loss": 0.6047, "step": 10800 }, { - "epoch": 0.8172978699254663, - "grad_norm": 1.9403828382492065, - "learning_rate": 1.5928499719645964e-06, - "loss": 0.6434, + "epoch": 0.7603660682858149, + "grad_norm": 1.8102912902832031, + "learning_rate": 2.004755228388473e-06, + "loss": 0.5532, "step": 10801 }, { - "epoch": 0.8173735386477999, - "grad_norm": 2.187650442123413, - "learning_rate": 1.5915709057952078e-06, - "loss": 0.7436, + "epoch": 0.7604364660330869, + "grad_norm": 1.7224584817886353, + "learning_rate": 2.0036372111900265e-06, + "loss": 0.589, "step": 10802 }, { - "epoch": 0.8174492073701336, - "grad_norm": 2.316969871520996, - "learning_rate": 1.5902923035656138e-06, - "loss": 0.6399, + "epoch": 0.760506863780359, + "grad_norm": 2.140355348587036, + "learning_rate": 2.0025194537570103e-06, + "loss": 0.6236, "step": 10803 }, { - "epoch": 0.8175248760924672, - "grad_norm": 2.0138914585113525, - "learning_rate": 1.5890141653558796e-06, - "loss": 0.7807, + "epoch": 0.7605772615276312, + "grad_norm": 1.8907281160354614, + "learning_rate": 2.0014019561475293e-06, + "loss": 0.5382, "step": 10804 }, { - "epoch": 0.8176005448148008, - "grad_norm": 2.207831382751465, - "learning_rate": 1.5877364912460476e-06, - "loss": 0.5119, + "epoch": 0.7606476592749032, + "grad_norm": 1.9855375289916992, + "learning_rate": 2.0002847184196904e-06, + "loss": 0.6788, "step": 10805 }, { - "epoch": 0.8176762135371344, - "grad_norm": 2.2698066234588623, - "learning_rate": 1.586459281316131e-06, - "loss": 0.7117, + "epoch": 0.7607180570221753, + "grad_norm": 1.9366083145141602, + "learning_rate": 1.999167740631574e-06, + "loss": 0.531, "step": 10806 }, { - "epoch": 0.817751882259468, - "grad_norm": 2.2364702224731445, - "learning_rate": 1.5851825356461133e-06, - "loss": 0.6974, + "epoch": 0.7607884547694473, + "grad_norm": 1.9710350036621094, + "learning_rate": 1.998051022841259e-06, + "loss": 0.638, "step": 10807 }, { - "epoch": 0.8178275509818017, - "grad_norm": 1.6945065259933472, - "learning_rate": 1.583906254315947e-06, - "loss": 0.6608, + "epoch": 0.7608588525167195, + "grad_norm": 2.061950922012329, + "learning_rate": 1.9969345651067984e-06, + "loss": 0.6998, "step": 10808 }, { - "epoch": 0.8179032197041353, - "grad_norm": 1.855660319328308, - "learning_rate": 1.5826304374055573e-06, - "loss": 0.5783, + "epoch": 0.7609292502639915, + "grad_norm": 2.579044818878174, + "learning_rate": 1.9958183674862455e-06, + "loss": 0.6448, "step": 10809 }, { - "epoch": 0.8179788884264689, - "grad_norm": 2.5465874671936035, - "learning_rate": 1.5813550849948433e-06, - "loss": 0.6822, + "epoch": 0.7609996480112636, + "grad_norm": 1.9797077178955078, + "learning_rate": 1.994702430037623e-06, + "loss": 0.7286, "step": 10810 }, { - "epoch": 0.8180545571488026, - "grad_norm": 2.3344638347625732, - "learning_rate": 1.580080197163663e-06, - "loss": 0.6158, + "epoch": 0.7610700457585358, + "grad_norm": 1.6842553615570068, + "learning_rate": 1.9935867528189573e-06, + "loss": 0.5958, "step": 10811 }, { - "epoch": 0.8181302258711362, - "grad_norm": 4.9126877784729, - "learning_rate": 1.578805773991863e-06, - "loss": 0.6967, + "epoch": 0.7611404435058078, + "grad_norm": 2.0496912002563477, + "learning_rate": 1.9924713358882473e-06, + "loss": 0.6583, "step": 10812 }, { - "epoch": 0.8182058945934698, - "grad_norm": 2.2319014072418213, - "learning_rate": 1.577531815559248e-06, - "loss": 0.6559, + "epoch": 0.7612108412530799, + "grad_norm": 2.1744179725646973, + "learning_rate": 1.9913561793034903e-06, + "loss": 0.6353, "step": 10813 }, { - "epoch": 0.8182815633158034, - "grad_norm": 5.378940582275391, - "learning_rate": 1.5762583219456002e-06, - "loss": 0.762, + "epoch": 0.7612812390003519, + "grad_norm": 2.3477537631988525, + "learning_rate": 1.9902412831226592e-06, + "loss": 0.6701, "step": 10814 }, { - "epoch": 0.818357232038137, - "grad_norm": 3.3257508277893066, - "learning_rate": 1.574985293230666e-06, - "loss": 0.7037, + "epoch": 0.7613516367476241, + "grad_norm": 1.8622901439666748, + "learning_rate": 1.9891266474037255e-06, + "loss": 0.6119, "step": 10815 }, { - "epoch": 0.8184329007604707, - "grad_norm": 2.033773899078369, - "learning_rate": 1.5737127294941647e-06, - "loss": 0.6447, + "epoch": 0.7614220344948962, + "grad_norm": 1.9983611106872559, + "learning_rate": 1.9880122722046308e-06, + "loss": 0.7463, "step": 10816 }, { - "epoch": 0.8185085694828043, - "grad_norm": 2.516923666000366, - "learning_rate": 1.5724406308157973e-06, - "loss": 0.546, + "epoch": 0.7614924322421682, + "grad_norm": 1.7648141384124756, + "learning_rate": 1.9868981575833194e-06, + "loss": 0.5745, "step": 10817 }, { - "epoch": 0.8185842382051379, - "grad_norm": 1.9066401720046997, - "learning_rate": 1.5711689972752181e-06, - "loss": 0.5628, + "epoch": 0.7615628299894404, + "grad_norm": 2.958034038543701, + "learning_rate": 1.9857843035977114e-06, + "loss": 0.5753, "step": 10818 }, { - "epoch": 0.8186599069274715, - "grad_norm": 2.1507344245910645, - "learning_rate": 1.5698978289520646e-06, - "loss": 0.6484, + "epoch": 0.7616332277367124, + "grad_norm": 1.6243034601211548, + "learning_rate": 1.9846707103057197e-06, + "loss": 0.6263, "step": 10819 }, { - "epoch": 0.8187355756498051, - "grad_norm": 2.1909475326538086, - "learning_rate": 1.568627125925941e-06, - "loss": 0.5734, + "epoch": 0.7617036254839845, + "grad_norm": 1.6666784286499023, + "learning_rate": 1.9835573777652386e-06, + "loss": 0.5989, "step": 10820 }, { - "epoch": 0.8188112443721388, - "grad_norm": 2.0895121097564697, - "learning_rate": 1.5673568882764225e-06, - "loss": 0.5795, + "epoch": 0.7617740232312566, + "grad_norm": 1.8677774667739868, + "learning_rate": 1.9824443060341565e-06, + "loss": 0.6542, "step": 10821 }, { - "epoch": 0.8188869130944724, - "grad_norm": 1.9548548460006714, - "learning_rate": 1.5660871160830558e-06, - "loss": 0.6567, + "epoch": 0.7618444209785287, + "grad_norm": 1.8209587335586548, + "learning_rate": 1.981331495170335e-06, + "loss": 0.5415, "step": 10822 }, { - "epoch": 0.818962581816806, - "grad_norm": 2.28955340385437, - "learning_rate": 1.564817809425358e-06, - "loss": 0.7006, + "epoch": 0.7619148187258008, + "grad_norm": 2.085097551345825, + "learning_rate": 1.9802189452316365e-06, + "loss": 0.6121, "step": 10823 }, { - "epoch": 0.8190382505391397, - "grad_norm": 2.538539409637451, - "learning_rate": 1.5635489683828196e-06, - "loss": 0.5051, + "epoch": 0.7619852164730728, + "grad_norm": 1.8581043481826782, + "learning_rate": 1.9791066562758988e-06, + "loss": 0.6977, "step": 10824 }, { - "epoch": 0.8191139192614733, - "grad_norm": 2.679903984069824, - "learning_rate": 1.5622805930348953e-06, - "loss": 0.667, + "epoch": 0.762055614220345, + "grad_norm": 2.061673164367676, + "learning_rate": 1.9779946283609554e-06, + "loss": 0.6176, "step": 10825 }, { - "epoch": 0.8191895879838069, - "grad_norm": 2.0182337760925293, - "learning_rate": 1.5610126834610141e-06, - "loss": 0.6008, + "epoch": 0.7621260119676171, + "grad_norm": 2.625190496444702, + "learning_rate": 1.976882861544617e-06, + "loss": 0.7646, "step": 10826 }, { - "epoch": 0.8192652567061405, - "grad_norm": 1.927375316619873, - "learning_rate": 1.5597452397405818e-06, - "loss": 0.612, + "epoch": 0.7621964097148891, + "grad_norm": 1.7892934083938599, + "learning_rate": 1.975771355884693e-06, + "loss": 0.5759, "step": 10827 }, { - "epoch": 0.8193409254284741, - "grad_norm": 2.192244291305542, - "learning_rate": 1.5584782619529688e-06, - "loss": 0.6674, + "epoch": 0.7622668074621612, + "grad_norm": 1.8167368173599243, + "learning_rate": 1.974660111438961e-06, + "loss": 0.657, "step": 10828 }, { - "epoch": 0.8194165941508078, - "grad_norm": 2.570380449295044, - "learning_rate": 1.5572117501775148e-06, - "loss": 0.4887, + "epoch": 0.7623372052094333, + "grad_norm": 1.8639053106307983, + "learning_rate": 1.973549128265204e-06, + "loss": 0.6923, "step": 10829 }, { - "epoch": 0.8194922628731414, - "grad_norm": 2.119783401489258, - "learning_rate": 1.555945704493533e-06, - "loss": 0.7094, + "epoch": 0.7624076029567054, + "grad_norm": 2.0939230918884277, + "learning_rate": 1.9724384064211775e-06, + "loss": 0.639, "step": 10830 }, { - "epoch": 0.819567931595475, - "grad_norm": 2.8816754817962646, - "learning_rate": 1.5546801249803083e-06, - "loss": 0.7619, + "epoch": 0.7624780007039774, + "grad_norm": 1.8771915435791016, + "learning_rate": 1.9713279459646343e-06, + "loss": 0.6156, "step": 10831 }, { - "epoch": 0.8196436003178086, - "grad_norm": 1.987670660018921, - "learning_rate": 1.5534150117170953e-06, - "loss": 0.6066, + "epoch": 0.7625483984512496, + "grad_norm": 2.05932354927063, + "learning_rate": 1.9702177469533025e-06, + "loss": 0.6389, "step": 10832 }, { - "epoch": 0.8197192690401423, - "grad_norm": 2.288383722305298, - "learning_rate": 1.5521503647831193e-06, - "loss": 0.65, + "epoch": 0.7626187961985217, + "grad_norm": 2.2691259384155273, + "learning_rate": 1.9691078094449076e-06, + "loss": 0.5835, "step": 10833 }, { - "epoch": 0.8197949377624759, - "grad_norm": 1.7874622344970703, - "learning_rate": 1.5508861842575773e-06, - "loss": 0.5712, + "epoch": 0.7626891939457937, + "grad_norm": 1.7528700828552246, + "learning_rate": 1.967998133497154e-06, + "loss": 0.6149, "step": 10834 }, { - "epoch": 0.8198706064848095, - "grad_norm": 2.939530372619629, - "learning_rate": 1.549622470219638e-06, - "loss": 0.6617, + "epoch": 0.7627595916930658, + "grad_norm": 1.714844822883606, + "learning_rate": 1.9668887191677346e-06, + "loss": 0.6434, "step": 10835 }, { - "epoch": 0.8199462752071431, - "grad_norm": 2.5253098011016846, - "learning_rate": 1.5483592227484347e-06, - "loss": 0.8274, + "epoch": 0.7628299894403379, + "grad_norm": 1.8516194820404053, + "learning_rate": 1.9657795665143274e-06, + "loss": 0.6767, "step": 10836 }, { - "epoch": 0.8200219439294768, - "grad_norm": 2.590799570083618, - "learning_rate": 1.5470964419230754e-06, - "loss": 0.7798, + "epoch": 0.76290038718761, + "grad_norm": 1.8618766069412231, + "learning_rate": 1.9646706755945994e-06, + "loss": 0.5779, "step": 10837 }, { - "epoch": 0.8200976126518104, - "grad_norm": 2.3619234561920166, - "learning_rate": 1.5458341278226478e-06, - "loss": 0.6679, + "epoch": 0.7629707849348821, + "grad_norm": 2.072801351547241, + "learning_rate": 1.963562046466205e-06, + "loss": 0.6891, "step": 10838 }, { - "epoch": 0.820173281374144, - "grad_norm": 2.0205016136169434, - "learning_rate": 1.544572280526195e-06, - "loss": 0.6351, + "epoch": 0.7630411826821542, + "grad_norm": 2.0085902214050293, + "learning_rate": 1.962453679186782e-06, + "loss": 0.6035, "step": 10839 }, { - "epoch": 0.8202489500964776, - "grad_norm": 2.110157012939453, - "learning_rate": 1.543310900112738e-06, - "loss": 0.6308, + "epoch": 0.7631115804294263, + "grad_norm": 1.8698680400848389, + "learning_rate": 1.9613455738139536e-06, + "loss": 0.7211, "step": 10840 }, { - "epoch": 0.8203246188188112, - "grad_norm": 2.0330491065979004, - "learning_rate": 1.5420499866612723e-06, - "loss": 0.7454, + "epoch": 0.7631819781766983, + "grad_norm": 1.9360501766204834, + "learning_rate": 1.96023773040533e-06, + "loss": 0.6132, "step": 10841 }, { - "epoch": 0.8204002875411449, - "grad_norm": 2.687309980392456, - "learning_rate": 1.5407895402507574e-06, - "loss": 0.78, + "epoch": 0.7632523759239704, + "grad_norm": 1.9527859687805176, + "learning_rate": 1.959130149018513e-06, + "loss": 0.6447, "step": 10842 }, { - "epoch": 0.8204759562634785, - "grad_norm": 3.1039085388183594, - "learning_rate": 1.5395295609601274e-06, - "loss": 0.6781, + "epoch": 0.7633227736712426, + "grad_norm": 2.0749311447143555, + "learning_rate": 1.9580228297110836e-06, + "loss": 0.6568, "step": 10843 }, { - "epoch": 0.8205516249858121, - "grad_norm": 2.10479474067688, - "learning_rate": 1.538270048868286e-06, - "loss": 0.6809, + "epoch": 0.7633931714185146, + "grad_norm": 2.4530117511749268, + "learning_rate": 1.9569157725406157e-06, + "loss": 0.7114, "step": 10844 }, { - "epoch": 0.8206272937081457, - "grad_norm": 5.9827752113342285, - "learning_rate": 1.5370110040541093e-06, - "loss": 0.6575, + "epoch": 0.7634635691657867, + "grad_norm": 1.6684939861297607, + "learning_rate": 1.955808977564663e-06, + "loss": 0.6477, "step": 10845 }, { - "epoch": 0.8207029624304794, - "grad_norm": 2.722191572189331, - "learning_rate": 1.535752426596444e-06, - "loss": 0.6425, + "epoch": 0.7635339669130587, + "grad_norm": 1.917960286140442, + "learning_rate": 1.9547024448407705e-06, + "loss": 0.64, "step": 10846 }, { - "epoch": 0.820778631152813, - "grad_norm": 2.3565070629119873, - "learning_rate": 1.534494316574099e-06, - "loss": 0.6546, + "epoch": 0.7636043646603309, + "grad_norm": 1.7582372426986694, + "learning_rate": 1.953596174426465e-06, + "loss": 0.6514, "step": 10847 }, { - "epoch": 0.8208542998751466, - "grad_norm": 2.351691961288452, - "learning_rate": 1.5332366740658685e-06, - "loss": 0.6076, + "epoch": 0.7636747624076029, + "grad_norm": 2.3220207691192627, + "learning_rate": 1.952490166379267e-06, + "loss": 0.7243, "step": 10848 }, { - "epoch": 0.8209299685974802, - "grad_norm": 3.5153005123138428, - "learning_rate": 1.5319794991505105e-06, - "loss": 0.6766, + "epoch": 0.763745160154875, + "grad_norm": 1.99081289768219, + "learning_rate": 1.9513844207566743e-06, + "loss": 0.6853, "step": 10849 }, { - "epoch": 0.8210056373198139, - "grad_norm": 2.6627254486083984, - "learning_rate": 1.530722791906748e-06, - "loss": 0.6439, + "epoch": 0.7638155579021472, + "grad_norm": 2.2082183361053467, + "learning_rate": 1.9502789376161804e-06, + "loss": 0.5839, "step": 10850 }, { - "epoch": 0.8210813060421475, - "grad_norm": 3.6057939529418945, - "learning_rate": 1.5294665524132828e-06, - "loss": 0.6652, + "epoch": 0.7638859556494192, + "grad_norm": 1.570926547050476, + "learning_rate": 1.949173717015257e-06, + "loss": 0.6659, "step": 10851 }, { - "epoch": 0.8211569747644811, - "grad_norm": 1.9653656482696533, - "learning_rate": 1.5282107807487854e-06, - "loss": 0.7099, + "epoch": 0.7639563533966913, + "grad_norm": 1.9713387489318848, + "learning_rate": 1.9480687590113667e-06, + "loss": 0.5807, "step": 10852 }, { - "epoch": 0.8212326434868147, - "grad_norm": 2.3477768898010254, - "learning_rate": 1.5269554769918955e-06, - "loss": 0.6548, + "epoch": 0.7640267511439633, + "grad_norm": 1.6309221982955933, + "learning_rate": 1.9469640636619545e-06, + "loss": 0.6857, "step": 10853 }, { - "epoch": 0.8213083122091484, - "grad_norm": 2.1630735397338867, - "learning_rate": 1.5257006412212244e-06, - "loss": 0.6972, + "epoch": 0.7640971488912355, + "grad_norm": 1.512961983680725, + "learning_rate": 1.9458596310244594e-06, + "loss": 0.5362, "step": 10854 }, { - "epoch": 0.821383980931482, - "grad_norm": 1.8949837684631348, - "learning_rate": 1.524446273515353e-06, - "loss": 0.5324, + "epoch": 0.7641675466385076, + "grad_norm": 1.9155040979385376, + "learning_rate": 1.944755461156297e-06, + "loss": 0.7054, "step": 10855 }, { - "epoch": 0.8214596496538156, - "grad_norm": 2.782655954360962, - "learning_rate": 1.523192373952836e-06, - "loss": 0.6504, + "epoch": 0.7642379443857796, + "grad_norm": 1.8283376693725586, + "learning_rate": 1.943651554114879e-06, + "loss": 0.5709, "step": 10856 }, { - "epoch": 0.8215353183761492, - "grad_norm": 2.5171873569488525, - "learning_rate": 1.5219389426121952e-06, - "loss": 0.6931, + "epoch": 0.7643083421330518, + "grad_norm": 1.951034665107727, + "learning_rate": 1.942547909957596e-06, + "loss": 0.4867, "step": 10857 }, { - "epoch": 0.8216109870984828, - "grad_norm": 2.055389404296875, - "learning_rate": 1.5206859795719249e-06, - "loss": 0.6662, + "epoch": 0.7643787398803238, + "grad_norm": 1.506604790687561, + "learning_rate": 1.941444528741827e-06, + "loss": 0.6714, "step": 10858 }, { - "epoch": 0.8216866558208165, - "grad_norm": 2.1020753383636475, - "learning_rate": 1.5194334849104892e-06, - "loss": 0.7256, + "epoch": 0.7644491376275959, + "grad_norm": 1.6766712665557861, + "learning_rate": 1.9403414105249363e-06, + "loss": 0.6631, "step": 10859 }, { - "epoch": 0.8217623245431501, - "grad_norm": 2.210233211517334, - "learning_rate": 1.5181814587063255e-06, - "loss": 0.7089, + "epoch": 0.764519535374868, + "grad_norm": 2.0153839588165283, + "learning_rate": 1.9392385553642807e-06, + "loss": 0.5752, "step": 10860 }, { - "epoch": 0.8218379932654837, - "grad_norm": 1.9540194272994995, - "learning_rate": 1.5169299010378372e-06, - "loss": 0.6508, + "epoch": 0.7645899331221401, + "grad_norm": 1.9460481405258179, + "learning_rate": 1.9381359633171933e-06, + "loss": 0.6622, "step": 10861 }, { - "epoch": 0.8219136619878173, - "grad_norm": 2.1212716102600098, - "learning_rate": 1.5156788119833983e-06, - "loss": 0.6668, + "epoch": 0.7646603308694122, + "grad_norm": 2.0354034900665283, + "learning_rate": 1.937033634441003e-06, + "loss": 0.6527, "step": 10862 }, { - "epoch": 0.821989330710151, - "grad_norm": 2.4180808067321777, - "learning_rate": 1.5144281916213645e-06, - "loss": 0.6415, + "epoch": 0.7647307286166842, + "grad_norm": 2.0402092933654785, + "learning_rate": 1.9359315687930203e-06, + "loss": 0.6557, "step": 10863 }, { - "epoch": 0.8220649994324846, - "grad_norm": 2.248098373413086, - "learning_rate": 1.5131780400300459e-06, - "loss": 0.6769, + "epoch": 0.7648011263639564, + "grad_norm": 1.772151231765747, + "learning_rate": 1.9348297664305408e-06, + "loss": 0.6895, "step": 10864 }, { - "epoch": 0.8221406681548182, - "grad_norm": 2.054067611694336, - "learning_rate": 1.5119283572877336e-06, - "loss": 0.8053, + "epoch": 0.7648715241112285, + "grad_norm": 2.132749080657959, + "learning_rate": 1.9337282274108476e-06, + "loss": 0.7401, "step": 10865 }, { - "epoch": 0.8222163368771518, - "grad_norm": 2.5258889198303223, - "learning_rate": 1.5106791434726876e-06, - "loss": 0.691, + "epoch": 0.7649419218585005, + "grad_norm": 1.8612194061279297, + "learning_rate": 1.9326269517912143e-06, + "loss": 0.6294, "step": 10866 }, { - "epoch": 0.8222920055994855, - "grad_norm": 2.0589208602905273, - "learning_rate": 1.509430398663137e-06, - "loss": 0.6954, + "epoch": 0.7650123196057727, + "grad_norm": 1.9259722232818604, + "learning_rate": 1.9315259396288926e-06, + "loss": 0.579, "step": 10867 }, { - "epoch": 0.8223676743218191, - "grad_norm": 2.6181116104125977, - "learning_rate": 1.5081821229372813e-06, - "loss": 0.7106, + "epoch": 0.7650827173530447, + "grad_norm": 2.053269863128662, + "learning_rate": 1.9304251909811296e-06, + "loss": 0.6214, "step": 10868 }, { - "epoch": 0.8224433430441527, - "grad_norm": 2.312509059906006, - "learning_rate": 1.5069343163732939e-06, - "loss": 0.6206, + "epoch": 0.7651531151003168, + "grad_norm": 1.7813489437103271, + "learning_rate": 1.9293247059051526e-06, + "loss": 0.6879, "step": 10869 }, { - "epoch": 0.8225190117664863, - "grad_norm": 2.3199472427368164, - "learning_rate": 1.5056869790493144e-06, - "loss": 0.7401, + "epoch": 0.7652235128475888, + "grad_norm": 2.142317295074463, + "learning_rate": 1.9282244844581757e-06, + "loss": 0.6546, "step": 10870 }, { - "epoch": 0.8225946804888199, - "grad_norm": 2.136983871459961, - "learning_rate": 1.5044401110434582e-06, - "loss": 0.6275, + "epoch": 0.765293910594861, + "grad_norm": 1.6138652563095093, + "learning_rate": 1.9271245266973995e-06, + "loss": 0.5844, "step": 10871 }, { - "epoch": 0.8226703492111536, - "grad_norm": 1.8843696117401123, - "learning_rate": 1.503193712433803e-06, - "loss": 0.6358, + "epoch": 0.7653643083421331, + "grad_norm": 1.815242886543274, + "learning_rate": 1.9260248326800156e-06, + "loss": 0.7901, "step": 10872 }, { - "epoch": 0.8227460179334872, - "grad_norm": 2.7073588371276855, - "learning_rate": 1.5019477832984042e-06, - "loss": 0.8258, + "epoch": 0.7654347060894051, + "grad_norm": 1.7610828876495361, + "learning_rate": 1.9249254024631936e-06, + "loss": 0.6416, "step": 10873 }, { - "epoch": 0.8228216866558208, - "grad_norm": 2.6563751697540283, - "learning_rate": 1.5007023237152905e-06, - "loss": 0.5735, + "epoch": 0.7655051038366772, + "grad_norm": 1.7874455451965332, + "learning_rate": 1.9238262361040986e-06, + "loss": 0.733, "step": 10874 }, { - "epoch": 0.8228973553781544, - "grad_norm": 2.3090715408325195, - "learning_rate": 1.4994573337624505e-06, - "loss": 0.661, + "epoch": 0.7655755015839493, + "grad_norm": 1.9892115592956543, + "learning_rate": 1.9227273336598744e-06, + "loss": 0.7512, "step": 10875 }, { - "epoch": 0.8229730241004881, - "grad_norm": 2.4490222930908203, - "learning_rate": 1.4982128135178528e-06, - "loss": 0.6891, + "epoch": 0.7656458993312214, + "grad_norm": 1.6593953371047974, + "learning_rate": 1.9216286951876546e-06, + "loss": 0.6017, "step": 10876 }, { - "epoch": 0.8230486928228217, - "grad_norm": 2.2669460773468018, - "learning_rate": 1.496968763059431e-06, - "loss": 0.6218, + "epoch": 0.7657162970784935, + "grad_norm": 2.198329448699951, + "learning_rate": 1.9205303207445563e-06, + "loss": 0.6149, "step": 10877 }, { - "epoch": 0.8231243615451553, - "grad_norm": 2.8864951133728027, - "learning_rate": 1.4957251824650948e-06, - "loss": 0.474, + "epoch": 0.7657866948257656, + "grad_norm": 1.9956153631210327, + "learning_rate": 1.9194322103876886e-06, + "loss": 0.6604, "step": 10878 }, { - "epoch": 0.8232000302674889, - "grad_norm": 2.1421849727630615, - "learning_rate": 1.4944820718127179e-06, - "loss": 0.6624, + "epoch": 0.7658570925730377, + "grad_norm": 2.555032730102539, + "learning_rate": 1.9183343641741394e-06, + "loss": 0.5831, "step": 10879 }, { - "epoch": 0.8232756989898226, - "grad_norm": 2.4659669399261475, - "learning_rate": 1.49323943118015e-06, - "loss": 0.6561, + "epoch": 0.7659274903203097, + "grad_norm": 1.8207942247390747, + "learning_rate": 1.9172367821609916e-06, + "loss": 0.7169, "step": 10880 }, { - "epoch": 0.8233513677121562, - "grad_norm": 1.961225986480713, - "learning_rate": 1.4919972606452113e-06, - "loss": 0.6731, + "epoch": 0.7659978880675818, + "grad_norm": 1.7070279121398926, + "learning_rate": 1.916139464405306e-06, + "loss": 0.6024, "step": 10881 }, { - "epoch": 0.8234270364344898, - "grad_norm": 2.310224771499634, - "learning_rate": 1.4907555602856849e-06, - "loss": 0.7196, + "epoch": 0.766068285814854, + "grad_norm": 2.300766944885254, + "learning_rate": 1.9150424109641333e-06, + "loss": 0.6684, "step": 10882 }, { - "epoch": 0.8235027051568234, - "grad_norm": 2.2171401977539062, - "learning_rate": 1.4895143301793321e-06, - "loss": 0.8323, + "epoch": 0.766138683562126, + "grad_norm": 1.876111388206482, + "learning_rate": 1.91394562189451e-06, + "loss": 0.6562, "step": 10883 }, { - "epoch": 0.823578373879157, - "grad_norm": 3.4674880504608154, - "learning_rate": 1.4882735704038853e-06, - "loss": 0.6263, + "epoch": 0.7662090813093981, + "grad_norm": 1.703229546546936, + "learning_rate": 1.9128490972534617e-06, + "loss": 0.6523, "step": 10884 }, { - "epoch": 0.8236540426014907, - "grad_norm": 2.4391887187957764, - "learning_rate": 1.4870332810370457e-06, - "loss": 0.7007, + "epoch": 0.7662794790566702, + "grad_norm": 2.0282440185546875, + "learning_rate": 1.9117528370979936e-06, + "loss": 0.7149, "step": 10885 }, { - "epoch": 0.8237297113238243, - "grad_norm": 2.19388747215271, - "learning_rate": 1.48579346215648e-06, - "loss": 0.744, + "epoch": 0.7663498768039423, + "grad_norm": 1.8566932678222656, + "learning_rate": 1.910656841485106e-06, + "loss": 0.6327, "step": 10886 }, { - "epoch": 0.8238053800461579, - "grad_norm": 2.4731009006500244, - "learning_rate": 1.4845541138398313e-06, - "loss": 0.6056, + "epoch": 0.7664202745512143, + "grad_norm": 2.165825843811035, + "learning_rate": 1.9095611104717788e-06, + "loss": 0.7182, "step": 10887 }, { - "epoch": 0.8238810487684916, - "grad_norm": 1.8592519760131836, - "learning_rate": 1.4833152361647122e-06, - "loss": 0.642, + "epoch": 0.7664906722984864, + "grad_norm": 1.8705730438232422, + "learning_rate": 1.908465644114977e-06, + "loss": 0.5856, "step": 10888 }, { - "epoch": 0.8239567174908252, - "grad_norm": 1.9034373760223389, - "learning_rate": 1.4820768292087048e-06, - "loss": 0.7955, + "epoch": 0.7665610700457586, + "grad_norm": 2.1930713653564453, + "learning_rate": 1.90737044247166e-06, + "loss": 0.7689, "step": 10889 }, { - "epoch": 0.8240323862131588, - "grad_norm": 2.9963486194610596, - "learning_rate": 1.480838893049362e-06, - "loss": 0.7364, + "epoch": 0.7666314677930306, + "grad_norm": 1.9775259494781494, + "learning_rate": 1.9062755055987632e-06, + "loss": 0.7034, "step": 10890 }, { - "epoch": 0.8241080549354924, - "grad_norm": 2.4194231033325195, - "learning_rate": 1.4796014277642077e-06, - "loss": 0.638, + "epoch": 0.7667018655403027, + "grad_norm": 1.88764488697052, + "learning_rate": 1.905180833553219e-06, + "loss": 0.6299, "step": 10891 }, { - "epoch": 0.824183723657826, - "grad_norm": 2.1171884536743164, - "learning_rate": 1.4783644334307374e-06, - "loss": 0.7346, + "epoch": 0.7667722632875748, + "grad_norm": 1.8017656803131104, + "learning_rate": 1.904086426391937e-06, + "loss": 0.6329, "step": 10892 }, { - "epoch": 0.8242593923801597, - "grad_norm": 2.5294923782348633, - "learning_rate": 1.4771279101264106e-06, - "loss": 0.5958, + "epoch": 0.7668426610348469, + "grad_norm": 1.963010311126709, + "learning_rate": 1.9029922841718166e-06, + "loss": 0.7608, "step": 10893 }, { - "epoch": 0.8243350611024933, - "grad_norm": 2.560204029083252, - "learning_rate": 1.4758918579286686e-06, - "loss": 0.7642, + "epoch": 0.766913058782119, + "grad_norm": 1.722939133644104, + "learning_rate": 1.9018984069497415e-06, + "loss": 0.585, "step": 10894 }, { - "epoch": 0.8244107298248269, - "grad_norm": 2.2272257804870605, - "learning_rate": 1.4746562769149163e-06, - "loss": 0.6933, + "epoch": 0.766983456529391, + "grad_norm": 1.884153127670288, + "learning_rate": 1.9008047947825874e-06, + "loss": 0.5385, "step": 10895 }, { - "epoch": 0.8244863985471605, - "grad_norm": 2.733365774154663, - "learning_rate": 1.473421167162525e-06, - "loss": 0.6733, + "epoch": 0.7670538542766632, + "grad_norm": 2.174384117126465, + "learning_rate": 1.899711447727208e-06, + "loss": 0.7878, "step": 10896 }, { - "epoch": 0.8245620672694941, - "grad_norm": 2.285641670227051, - "learning_rate": 1.4721865287488448e-06, - "loss": 0.6461, + "epoch": 0.7671242520239352, + "grad_norm": 1.7687455415725708, + "learning_rate": 1.8986183658404519e-06, + "loss": 0.6768, "step": 10897 }, { - "epoch": 0.8246377359918278, - "grad_norm": 2.4050447940826416, - "learning_rate": 1.4709523617511898e-06, - "loss": 0.6534, + "epoch": 0.7671946497712073, + "grad_norm": 1.6976051330566406, + "learning_rate": 1.8975255491791465e-06, + "loss": 0.7771, "step": 10898 }, { - "epoch": 0.8247134047141614, - "grad_norm": 2.5170106887817383, - "learning_rate": 1.4697186662468542e-06, - "loss": 0.5798, + "epoch": 0.7672650475184795, + "grad_norm": 2.164806365966797, + "learning_rate": 1.896432997800109e-06, + "loss": 0.6698, "step": 10899 }, { - "epoch": 0.824789073436495, - "grad_norm": 2.0948894023895264, - "learning_rate": 1.4684854423130891e-06, - "loss": 0.7217, + "epoch": 0.7673354452657515, + "grad_norm": 1.717596411705017, + "learning_rate": 1.8953407117601401e-06, + "loss": 0.6494, "step": 10900 }, { - "epoch": 0.8248647421588287, - "grad_norm": 2.6201298236846924, - "learning_rate": 1.467252690027126e-06, - "loss": 0.6472, + "epoch": 0.7674058430130236, + "grad_norm": 2.1846981048583984, + "learning_rate": 1.8942486911160328e-06, + "loss": 0.6715, "step": 10901 }, { - "epoch": 0.8249404108811623, - "grad_norm": 2.4255621433258057, - "learning_rate": 1.466020409466163e-06, - "loss": 0.7014, + "epoch": 0.7674762407602956, + "grad_norm": 1.9375718832015991, + "learning_rate": 1.8931569359245584e-06, + "loss": 0.6595, "step": 10902 }, { - "epoch": 0.8250160796034959, - "grad_norm": 2.1704020500183105, - "learning_rate": 1.4647886007073692e-06, - "loss": 0.6657, + "epoch": 0.7675466385075678, + "grad_norm": 2.1230781078338623, + "learning_rate": 1.892065446242481e-06, + "loss": 0.6517, "step": 10903 }, { - "epoch": 0.8250917483258295, - "grad_norm": 2.4163525104522705, - "learning_rate": 1.463557263827886e-06, - "loss": 0.7631, + "epoch": 0.7676170362548398, + "grad_norm": 1.8957995176315308, + "learning_rate": 1.890974222126548e-06, + "loss": 0.655, "step": 10904 }, { - "epoch": 0.8251674170481631, - "grad_norm": 2.173043966293335, - "learning_rate": 1.4623263989048226e-06, - "loss": 0.7403, + "epoch": 0.7676874340021119, + "grad_norm": 1.9107563495635986, + "learning_rate": 1.8898832636334918e-06, + "loss": 0.6284, "step": 10905 }, { - "epoch": 0.8252430857704968, - "grad_norm": 2.1846542358398438, - "learning_rate": 1.4610960060152616e-06, - "loss": 0.7112, + "epoch": 0.7677578317493841, + "grad_norm": 1.715765118598938, + "learning_rate": 1.8887925708200306e-06, + "loss": 0.5767, "step": 10906 }, { - "epoch": 0.8253187544928304, - "grad_norm": 2.4031577110290527, - "learning_rate": 1.4598660852362505e-06, - "loss": 0.6655, + "epoch": 0.7678282294966561, + "grad_norm": 2.4516358375549316, + "learning_rate": 1.8877021437428752e-06, + "loss": 0.6143, "step": 10907 }, { - "epoch": 0.825394423215164, - "grad_norm": 3.223851442337036, - "learning_rate": 1.4586366366448113e-06, - "loss": 0.6503, + "epoch": 0.7678986272439282, + "grad_norm": 2.193472146987915, + "learning_rate": 1.8866119824587127e-06, + "loss": 0.6454, "step": 10908 }, { - "epoch": 0.8254700919374977, - "grad_norm": 2.421539545059204, - "learning_rate": 1.4574076603179413e-06, - "loss": 0.6976, + "epoch": 0.7679690249912002, + "grad_norm": 1.895568609237671, + "learning_rate": 1.885522087024226e-06, + "loss": 0.6085, "step": 10909 }, { - "epoch": 0.8255457606598312, - "grad_norm": 3.1366801261901855, - "learning_rate": 1.4561791563325965e-06, - "loss": 0.62, + "epoch": 0.7680394227384724, + "grad_norm": 1.7502933740615845, + "learning_rate": 1.8844324574960767e-06, + "loss": 0.6933, "step": 10910 }, { - "epoch": 0.8256214293821649, - "grad_norm": 2.170020818710327, - "learning_rate": 1.454951124765714e-06, - "loss": 0.66, + "epoch": 0.7681098204857445, + "grad_norm": 1.8721530437469482, + "learning_rate": 1.8833430939309203e-06, + "loss": 0.6475, "step": 10911 }, { - "epoch": 0.8256970981044985, - "grad_norm": 2.331679344177246, - "learning_rate": 1.4537235656941952e-06, - "loss": 0.6694, + "epoch": 0.7681802182330165, + "grad_norm": 1.7968568801879883, + "learning_rate": 1.8822539963853866e-06, + "loss": 0.6553, "step": 10912 }, { - "epoch": 0.8257727668268321, - "grad_norm": 1.9958034753799438, - "learning_rate": 1.4524964791949157e-06, - "loss": 0.6669, + "epoch": 0.7682506159802887, + "grad_norm": 2.4510445594787598, + "learning_rate": 1.8811651649161046e-06, + "loss": 0.7212, "step": 10913 }, { - "epoch": 0.8258484355491658, - "grad_norm": 1.878063440322876, - "learning_rate": 1.4512698653447153e-06, - "loss": 0.6129, + "epoch": 0.7683210137275607, + "grad_norm": 1.7534704208374023, + "learning_rate": 1.880076599579679e-06, + "loss": 0.5445, "step": 10914 }, { - "epoch": 0.8259241042714994, - "grad_norm": 2.260777711868286, - "learning_rate": 1.450043724220413e-06, - "loss": 0.6623, + "epoch": 0.7683914114748328, + "grad_norm": 1.8581663370132446, + "learning_rate": 1.8789883004327106e-06, + "loss": 0.6029, "step": 10915 }, { - "epoch": 0.825999772993833, - "grad_norm": 2.2355360984802246, - "learning_rate": 1.4488180558987921e-06, - "loss": 0.5674, + "epoch": 0.7684618092221049, + "grad_norm": 2.13411808013916, + "learning_rate": 1.877900267531776e-06, + "loss": 0.7641, "step": 10916 }, { - "epoch": 0.8260754417161666, - "grad_norm": 2.1847424507141113, - "learning_rate": 1.4475928604566107e-06, - "loss": 0.7828, + "epoch": 0.768532206969377, + "grad_norm": 2.026169776916504, + "learning_rate": 1.8768125009334492e-06, + "loss": 0.6657, "step": 10917 }, { - "epoch": 0.8261511104385002, - "grad_norm": 2.2542574405670166, - "learning_rate": 1.4463681379705883e-06, - "loss": 0.804, + "epoch": 0.7686026047166491, + "grad_norm": 2.0005686283111572, + "learning_rate": 1.8757250006942766e-06, + "loss": 0.5663, "step": 10918 }, { - "epoch": 0.8262267791608339, - "grad_norm": 1.649489402770996, - "learning_rate": 1.4451438885174242e-06, - "loss": 0.7146, + "epoch": 0.7686730024639211, + "grad_norm": 2.4174747467041016, + "learning_rate": 1.8746377668708037e-06, + "loss": 0.7315, "step": 10919 }, { - "epoch": 0.8263024478831675, - "grad_norm": 2.6824514865875244, - "learning_rate": 1.4439201121737882e-06, - "loss": 0.6701, + "epoch": 0.7687434002111933, + "grad_norm": 1.8281002044677734, + "learning_rate": 1.8735507995195536e-06, + "loss": 0.6467, "step": 10920 }, { - "epoch": 0.8263781166055011, - "grad_norm": 3.05550217628479, - "learning_rate": 1.4426968090163127e-06, - "loss": 0.4885, + "epoch": 0.7688137979584654, + "grad_norm": 1.9262804985046387, + "learning_rate": 1.8724640986970417e-06, + "loss": 0.6382, "step": 10921 }, { - "epoch": 0.8264537853278348, - "grad_norm": 2.190661907196045, - "learning_rate": 1.4414739791216062e-06, - "loss": 0.5637, + "epoch": 0.7688841957057374, + "grad_norm": 1.853500247001648, + "learning_rate": 1.871377664459763e-06, + "loss": 0.6924, "step": 10922 }, { - "epoch": 0.8265294540501683, - "grad_norm": 2.1329610347747803, - "learning_rate": 1.4402516225662454e-06, - "loss": 0.6271, + "epoch": 0.7689545934530095, + "grad_norm": 1.6917481422424316, + "learning_rate": 1.8702914968642088e-06, + "loss": 0.7336, "step": 10923 }, { - "epoch": 0.826605122772502, - "grad_norm": 2.2187185287475586, - "learning_rate": 1.43902973942678e-06, - "loss": 0.7154, + "epoch": 0.7690249912002816, + "grad_norm": 1.9509743452072144, + "learning_rate": 1.869205595966841e-06, + "loss": 0.6231, "step": 10924 }, { - "epoch": 0.8266807914948356, - "grad_norm": 2.099266529083252, - "learning_rate": 1.4378083297797278e-06, - "loss": 0.5802, + "epoch": 0.7690953889475537, + "grad_norm": 1.7021114826202393, + "learning_rate": 1.868119961824123e-06, + "loss": 0.5654, "step": 10925 }, { - "epoch": 0.8267564602171692, - "grad_norm": 2.433722734451294, - "learning_rate": 1.4365873937015758e-06, - "loss": 0.7381, + "epoch": 0.7691657866948257, + "grad_norm": 2.0035266876220703, + "learning_rate": 1.8670345944924946e-06, + "loss": 0.6344, "step": 10926 }, { - "epoch": 0.8268321289395029, - "grad_norm": 2.2790136337280273, - "learning_rate": 1.4353669312687878e-06, - "loss": 0.6474, + "epoch": 0.7692361844420978, + "grad_norm": 1.7008312940597534, + "learning_rate": 1.8659494940283876e-06, + "loss": 0.741, "step": 10927 }, { - "epoch": 0.8269077976618365, - "grad_norm": 2.4600353240966797, - "learning_rate": 1.4341469425577866e-06, - "loss": 0.7024, + "epoch": 0.76930658218937, + "grad_norm": 1.8191039562225342, + "learning_rate": 1.864864660488214e-06, + "loss": 0.6582, "step": 10928 }, { - "epoch": 0.8269834663841701, - "grad_norm": 2.4430034160614014, - "learning_rate": 1.432927427644973e-06, - "loss": 0.6797, + "epoch": 0.769376979936642, + "grad_norm": 1.8303632736206055, + "learning_rate": 1.863780093928382e-06, + "loss": 0.5956, "step": 10929 }, { - "epoch": 0.8270591351065038, - "grad_norm": 6.050069332122803, - "learning_rate": 1.431708386606721e-06, - "loss": 0.6611, + "epoch": 0.7694473776839141, + "grad_norm": 1.8405649662017822, + "learning_rate": 1.86269579440527e-06, + "loss": 0.7059, "step": 10930 }, { - "epoch": 0.8271348038288373, - "grad_norm": 2.662050485610962, - "learning_rate": 1.4304898195193705e-06, - "loss": 0.6946, + "epoch": 0.7695177754311862, + "grad_norm": 1.9309446811676025, + "learning_rate": 1.8616117619752588e-06, + "loss": 0.6094, "step": 10931 }, { - "epoch": 0.827210472551171, - "grad_norm": 2.608130693435669, - "learning_rate": 1.4292717264592286e-06, - "loss": 0.7405, + "epoch": 0.7695881731784583, + "grad_norm": 1.6489958763122559, + "learning_rate": 1.860527996694703e-06, + "loss": 0.618, "step": 10932 }, { - "epoch": 0.8272861412735046, - "grad_norm": 2.265187978744507, - "learning_rate": 1.428054107502577e-06, - "loss": 0.7296, + "epoch": 0.7696585709257304, + "grad_norm": 2.1765079498291016, + "learning_rate": 1.8594444986199534e-06, + "loss": 0.6673, "step": 10933 }, { - "epoch": 0.8273618099958382, - "grad_norm": 1.9278501272201538, - "learning_rate": 1.426836962725669e-06, - "loss": 0.594, + "epoch": 0.7697289686730024, + "grad_norm": 2.3700578212738037, + "learning_rate": 1.858361267807338e-06, + "loss": 0.6396, "step": 10934 }, { - "epoch": 0.8274374787181719, - "grad_norm": 2.370166540145874, - "learning_rate": 1.4256202922047243e-06, - "loss": 0.5605, + "epoch": 0.7697993664202746, + "grad_norm": 1.6630127429962158, + "learning_rate": 1.8572783043131816e-06, + "loss": 0.4975, "step": 10935 }, { - "epoch": 0.8275131474405054, - "grad_norm": 2.0922703742980957, - "learning_rate": 1.4244040960159356e-06, - "loss": 0.5532, + "epoch": 0.7698697641675466, + "grad_norm": 2.0298945903778076, + "learning_rate": 1.8561956081937788e-06, + "loss": 0.6333, "step": 10936 }, { - "epoch": 0.8275888161628391, - "grad_norm": 2.2597086429595947, - "learning_rate": 1.423188374235464e-06, - "loss": 0.609, + "epoch": 0.7699401619148187, + "grad_norm": 1.7896430492401123, + "learning_rate": 1.8551131795054244e-06, + "loss": 0.5332, "step": 10937 }, { - "epoch": 0.8276644848851727, - "grad_norm": 2.991779088973999, - "learning_rate": 1.4219731269394455e-06, - "loss": 0.6567, + "epoch": 0.7700105596620909, + "grad_norm": 1.6912990808486938, + "learning_rate": 1.854031018304398e-06, + "loss": 0.6503, "step": 10938 }, { - "epoch": 0.8277401536075063, - "grad_norm": 1.8932214975357056, - "learning_rate": 1.4207583542039767e-06, - "loss": 0.5854, + "epoch": 0.7700809574093629, + "grad_norm": 1.779786467552185, + "learning_rate": 1.852949124646957e-06, + "loss": 0.6829, "step": 10939 }, { - "epoch": 0.82781582232984, - "grad_norm": 2.025179147720337, - "learning_rate": 1.4195440561051349e-06, - "loss": 0.7445, + "epoch": 0.770151355156635, + "grad_norm": 1.8129569292068481, + "learning_rate": 1.851867498589355e-06, + "loss": 0.7119, "step": 10940 }, { - "epoch": 0.8278914910521736, - "grad_norm": 2.18306827545166, - "learning_rate": 1.4183302327189654e-06, - "loss": 0.7864, + "epoch": 0.770221752903907, + "grad_norm": 2.1733458042144775, + "learning_rate": 1.8507861401878239e-06, + "loss": 0.677, "step": 10941 }, { - "epoch": 0.8279671597745072, - "grad_norm": 2.069181203842163, - "learning_rate": 1.4171168841214762e-06, - "loss": 0.6587, + "epoch": 0.7702921506511792, + "grad_norm": 2.070523977279663, + "learning_rate": 1.8497050494985848e-06, + "loss": 0.6494, "step": 10942 }, { - "epoch": 0.8280428284968409, - "grad_norm": 1.9649893045425415, - "learning_rate": 1.4159040103886545e-06, - "loss": 0.6386, + "epoch": 0.7703625483984512, + "grad_norm": 1.9014041423797607, + "learning_rate": 1.8486242265778427e-06, + "loss": 0.6024, "step": 10943 }, { - "epoch": 0.8281184972191744, - "grad_norm": 2.0835180282592773, - "learning_rate": 1.4146916115964507e-06, - "loss": 0.5745, + "epoch": 0.7704329461457233, + "grad_norm": 1.9929664134979248, + "learning_rate": 1.847543671481794e-06, + "loss": 0.7296, "step": 10944 }, { - "epoch": 0.8281941659415081, - "grad_norm": 1.8389742374420166, - "learning_rate": 1.413479687820796e-06, - "loss": 0.6632, + "epoch": 0.7705033438929955, + "grad_norm": 1.8248388767242432, + "learning_rate": 1.8464633842666148e-06, + "loss": 0.6451, "step": 10945 }, { - "epoch": 0.8282698346638417, - "grad_norm": 2.533998489379883, - "learning_rate": 1.4122682391375796e-06, - "loss": 0.6618, + "epoch": 0.7705737416402675, + "grad_norm": 1.9449684619903564, + "learning_rate": 1.8453833649884728e-06, + "loss": 0.6758, "step": 10946 }, { - "epoch": 0.8283455033861753, - "grad_norm": 6.8104963302612305, - "learning_rate": 1.411057265622668e-06, - "loss": 0.7536, + "epoch": 0.7706441393875396, + "grad_norm": 2.6143882274627686, + "learning_rate": 1.8443036137035188e-06, + "loss": 0.6623, "step": 10947 }, { - "epoch": 0.828421172108509, - "grad_norm": 2.065166473388672, - "learning_rate": 1.4098467673518954e-06, - "loss": 0.5601, + "epoch": 0.7707145371348116, + "grad_norm": 2.3402364253997803, + "learning_rate": 1.8432241304678886e-06, + "loss": 0.688, "step": 10948 }, { - "epoch": 0.8284968408308425, - "grad_norm": 2.1348156929016113, - "learning_rate": 1.4086367444010704e-06, - "loss": 0.7066, + "epoch": 0.7707849348820838, + "grad_norm": 1.9217042922973633, + "learning_rate": 1.842144915337704e-06, + "loss": 0.7133, "step": 10949 }, { - "epoch": 0.8285725095531762, - "grad_norm": 2.0122158527374268, - "learning_rate": 1.4074271968459609e-06, - "loss": 0.6169, + "epoch": 0.7708553326293559, + "grad_norm": 1.85403311252594, + "learning_rate": 1.8410659683690788e-06, + "loss": 0.622, "step": 10950 }, { - "epoch": 0.8286481782755099, - "grad_norm": 2.3869214057922363, - "learning_rate": 1.4062181247623206e-06, - "loss": 0.651, + "epoch": 0.7709257303766279, + "grad_norm": 2.3979179859161377, + "learning_rate": 1.8399872896181042e-06, + "loss": 0.6194, "step": 10951 }, { - "epoch": 0.8287238469978434, - "grad_norm": 2.8264942169189453, - "learning_rate": 1.4050095282258642e-06, - "loss": 0.6669, + "epoch": 0.7709961281239001, + "grad_norm": 1.7140896320343018, + "learning_rate": 1.838908879140865e-06, + "loss": 0.7445, "step": 10952 }, { - "epoch": 0.8287995157201771, - "grad_norm": 2.2256650924682617, - "learning_rate": 1.4038014073122747e-06, - "loss": 0.6945, + "epoch": 0.7710665258711721, + "grad_norm": 2.0895164012908936, + "learning_rate": 1.8378307369934283e-06, + "loss": 0.6349, "step": 10953 }, { - "epoch": 0.8288751844425107, - "grad_norm": 2.1724610328674316, - "learning_rate": 1.40259376209721e-06, - "loss": 0.618, + "epoch": 0.7711369236184442, + "grad_norm": 2.5773916244506836, + "learning_rate": 1.8367528632318462e-06, + "loss": 0.6175, "step": 10954 }, { - "epoch": 0.8289508531648443, - "grad_norm": 2.1436386108398438, - "learning_rate": 1.401386592656297e-06, - "loss": 0.6178, + "epoch": 0.7712073213657163, + "grad_norm": 1.74588143825531, + "learning_rate": 1.8356752579121564e-06, + "loss": 0.5878, "step": 10955 }, { - "epoch": 0.829026521887178, - "grad_norm": 1.8904942274093628, - "learning_rate": 1.4001798990651317e-06, - "loss": 0.565, + "epoch": 0.7712777191129884, + "grad_norm": 1.9921084642410278, + "learning_rate": 1.8345979210903898e-06, + "loss": 0.6253, "step": 10956 }, { - "epoch": 0.8291021906095115, - "grad_norm": 1.9519450664520264, - "learning_rate": 1.3989736813992826e-06, - "loss": 0.6762, + "epoch": 0.7713481168602605, + "grad_norm": 1.9225609302520752, + "learning_rate": 1.8335208528225534e-06, + "loss": 0.6626, "step": 10957 }, { - "epoch": 0.8291778593318452, - "grad_norm": 2.116001605987549, - "learning_rate": 1.3977679397342863e-06, - "loss": 0.7516, + "epoch": 0.7714185146075325, + "grad_norm": 3.5117552280426025, + "learning_rate": 1.8324440531646484e-06, + "loss": 0.6677, "step": 10958 }, { - "epoch": 0.8292535280541788, - "grad_norm": 2.4981284141540527, - "learning_rate": 1.3965626741456495e-06, - "loss": 0.6231, + "epoch": 0.7714889123548047, + "grad_norm": 1.7947590351104736, + "learning_rate": 1.8313675221726574e-06, + "loss": 0.6492, "step": 10959 }, { - "epoch": 0.8293291967765124, - "grad_norm": 2.15065860748291, - "learning_rate": 1.3953578847088513e-06, - "loss": 0.5323, + "epoch": 0.7715593101020767, + "grad_norm": 1.706742525100708, + "learning_rate": 1.8302912599025492e-06, + "loss": 0.6197, "step": 10960 }, { - "epoch": 0.8294048654988461, - "grad_norm": 2.2730371952056885, - "learning_rate": 1.394153571499339e-06, - "loss": 0.7206, + "epoch": 0.7716297078493488, + "grad_norm": 1.918839931488037, + "learning_rate": 1.8292152664102787e-06, + "loss": 0.6348, "step": 10961 }, { - "epoch": 0.8294805342211796, - "grad_norm": 2.4419403076171875, - "learning_rate": 1.3929497345925299e-06, - "loss": 0.667, + "epoch": 0.7717001055966209, + "grad_norm": 1.9730713367462158, + "learning_rate": 1.8281395417517916e-06, + "loss": 0.6292, "step": 10962 }, { - "epoch": 0.8295562029435133, - "grad_norm": 2.3964638710021973, - "learning_rate": 1.3917463740638146e-06, - "loss": 0.7303, + "epoch": 0.771770503343893, + "grad_norm": 1.7873406410217285, + "learning_rate": 1.8270640859830123e-06, + "loss": 0.6391, "step": 10963 }, { - "epoch": 0.829631871665847, - "grad_norm": 2.3041999340057373, - "learning_rate": 1.3905434899885471e-06, - "loss": 0.657, + "epoch": 0.7718409010911651, + "grad_norm": 1.8078564405441284, + "learning_rate": 1.8259888991598568e-06, + "loss": 0.5736, "step": 10964 }, { - "epoch": 0.8297075403881805, - "grad_norm": 2.5561516284942627, - "learning_rate": 1.389341082442057e-06, - "loss": 0.6983, + "epoch": 0.7719112988384371, + "grad_norm": 2.1115171909332275, + "learning_rate": 1.8249139813382254e-06, + "loss": 0.6998, "step": 10965 }, { - "epoch": 0.8297832091105142, - "grad_norm": 1.8878254890441895, - "learning_rate": 1.3881391514996473e-06, - "loss": 0.6237, + "epoch": 0.7719816965857093, + "grad_norm": 2.4990203380584717, + "learning_rate": 1.8238393325740026e-06, + "loss": 0.6319, "step": 10966 }, { - "epoch": 0.8298588778328478, - "grad_norm": 3.115190029144287, - "learning_rate": 1.3869376972365825e-06, - "loss": 0.798, + "epoch": 0.7720520943329814, + "grad_norm": 1.9439697265625, + "learning_rate": 1.8227649529230582e-06, + "loss": 0.5961, "step": 10967 }, { - "epoch": 0.8299345465551814, - "grad_norm": 2.7332189083099365, - "learning_rate": 1.3857367197281024e-06, - "loss": 0.6883, + "epoch": 0.7721224920802534, + "grad_norm": 2.3151116371154785, + "learning_rate": 1.8216908424412556e-06, + "loss": 0.6392, "step": 10968 }, { - "epoch": 0.8300102152775151, - "grad_norm": 2.638230562210083, - "learning_rate": 1.3845362190494161e-06, - "loss": 0.5863, + "epoch": 0.7721928898275255, + "grad_norm": 1.686690330505371, + "learning_rate": 1.8206170011844327e-06, + "loss": 0.5782, "step": 10969 }, { - "epoch": 0.8300858839998486, - "grad_norm": 2.4270436763763428, - "learning_rate": 1.3833361952757031e-06, - "loss": 0.6456, + "epoch": 0.7722632875747976, + "grad_norm": 1.5983541011810303, + "learning_rate": 1.8195434292084252e-06, + "loss": 0.6726, "step": 10970 }, { - "epoch": 0.8301615527221823, - "grad_norm": 1.9929462671279907, - "learning_rate": 1.3821366484821138e-06, - "loss": 0.6827, + "epoch": 0.7723336853220697, + "grad_norm": 2.0497686862945557, + "learning_rate": 1.8184701265690468e-06, + "loss": 0.7441, "step": 10971 }, { - "epoch": 0.830237221444516, - "grad_norm": 4.07589864730835, - "learning_rate": 1.3809375787437656e-06, - "loss": 0.6311, + "epoch": 0.7724040830693418, + "grad_norm": 1.9763208627700806, + "learning_rate": 1.8173970933220988e-06, + "loss": 0.5735, "step": 10972 }, { - "epoch": 0.8303128901668495, - "grad_norm": 2.5167202949523926, - "learning_rate": 1.3797389861357507e-06, - "loss": 0.7506, + "epoch": 0.7724744808166139, + "grad_norm": 1.6335068941116333, + "learning_rate": 1.8163243295233678e-06, + "loss": 0.711, "step": 10973 }, { - "epoch": 0.8303885588891832, - "grad_norm": 1.9688563346862793, - "learning_rate": 1.378540870733128e-06, - "loss": 0.5853, + "epoch": 0.772544878563886, + "grad_norm": 2.3328099250793457, + "learning_rate": 1.8152518352286312e-06, + "loss": 0.6749, "step": 10974 }, { - "epoch": 0.8304642276115167, - "grad_norm": 2.244810104370117, - "learning_rate": 1.3773432326109234e-06, - "loss": 0.6253, + "epoch": 0.772615276311158, + "grad_norm": 1.8194034099578857, + "learning_rate": 1.8141796104936454e-06, + "loss": 0.5977, "step": 10975 }, { - "epoch": 0.8305398963338504, - "grad_norm": 1.8359615802764893, - "learning_rate": 1.376146071844142e-06, - "loss": 0.7278, + "epoch": 0.7726856740584301, + "grad_norm": 1.787309169769287, + "learning_rate": 1.813107655374161e-06, + "loss": 0.623, "step": 10976 }, { - "epoch": 0.8306155650561841, - "grad_norm": 2.00067138671875, - "learning_rate": 1.374949388507754e-06, - "loss": 0.6246, + "epoch": 0.7727560718057023, + "grad_norm": 1.773770809173584, + "learning_rate": 1.8120359699259062e-06, + "loss": 0.6557, "step": 10977 }, { - "epoch": 0.8306912337785176, - "grad_norm": 2.020059108734131, - "learning_rate": 1.3737531826766962e-06, - "loss": 0.6065, + "epoch": 0.7728264695529743, + "grad_norm": 1.7153410911560059, + "learning_rate": 1.8109645542045996e-06, + "loss": 0.679, "step": 10978 }, { - "epoch": 0.8307669025008513, - "grad_norm": 2.200312614440918, - "learning_rate": 1.3725574544258797e-06, - "loss": 0.7528, + "epoch": 0.7728968673002464, + "grad_norm": 1.954724907875061, + "learning_rate": 1.8098934082659445e-06, + "loss": 0.6976, "step": 10979 }, { - "epoch": 0.830842571223185, - "grad_norm": 2.1670212745666504, - "learning_rate": 1.3713622038301856e-06, - "loss": 0.6273, + "epoch": 0.7729672650475184, + "grad_norm": 1.7913336753845215, + "learning_rate": 1.8088225321656324e-06, + "loss": 0.6446, "step": 10980 }, { - "epoch": 0.8309182399455185, - "grad_norm": 2.5770716667175293, - "learning_rate": 1.3701674309644652e-06, - "loss": 0.6216, + "epoch": 0.7730376627947906, + "grad_norm": 1.9435333013534546, + "learning_rate": 1.8077519259593369e-06, + "loss": 0.6504, "step": 10981 }, { - "epoch": 0.8309939086678522, - "grad_norm": 3.1510024070739746, - "learning_rate": 1.3689731359035375e-06, - "loss": 0.6588, + "epoch": 0.7731080605420626, + "grad_norm": 1.9646354913711548, + "learning_rate": 1.806681589702723e-06, + "loss": 0.5554, "step": 10982 }, { - "epoch": 0.8310695773901857, - "grad_norm": 3.46155047416687, - "learning_rate": 1.3677793187221936e-06, - "loss": 0.5913, + "epoch": 0.7731784582893347, + "grad_norm": 1.7055813074111938, + "learning_rate": 1.8056115234514358e-06, + "loss": 0.6614, "step": 10983 }, { - "epoch": 0.8311452461125194, - "grad_norm": 1.877045750617981, - "learning_rate": 1.3665859794951969e-06, - "loss": 0.5908, + "epoch": 0.7732488560366069, + "grad_norm": 2.268397331237793, + "learning_rate": 1.8045417272611107e-06, + "loss": 0.6145, "step": 10984 }, { - "epoch": 0.831220914834853, - "grad_norm": 2.1516778469085693, - "learning_rate": 1.3653931182972716e-06, - "loss": 0.675, + "epoch": 0.7733192537838789, + "grad_norm": 2.0758512020111084, + "learning_rate": 1.8034722011873641e-06, + "loss": 0.6764, "step": 10985 }, { - "epoch": 0.8312965835571866, - "grad_norm": 2.5128893852233887, - "learning_rate": 1.3642007352031238e-06, - "loss": 0.7084, + "epoch": 0.773389651531151, + "grad_norm": 1.9915462732315063, + "learning_rate": 1.8024029452858066e-06, + "loss": 0.7042, "step": 10986 }, { - "epoch": 0.8313722522795203, - "grad_norm": 2.5535311698913574, - "learning_rate": 1.3630088302874237e-06, - "loss": 0.7207, + "epoch": 0.773460049278423, + "grad_norm": 1.839612364768982, + "learning_rate": 1.8013339596120244e-06, + "loss": 0.5654, "step": 10987 }, { - "epoch": 0.8314479210018538, - "grad_norm": 2.579092502593994, - "learning_rate": 1.3618174036248138e-06, - "loss": 0.6827, + "epoch": 0.7735304470256952, + "grad_norm": 3.047257661819458, + "learning_rate": 1.8002652442216006e-06, + "loss": 0.6339, "step": 10988 }, { - "epoch": 0.8315235897241875, - "grad_norm": 2.279123306274414, - "learning_rate": 1.3606264552899005e-06, - "loss": 0.7558, + "epoch": 0.7736008447729673, + "grad_norm": 2.1048054695129395, + "learning_rate": 1.7991967991700959e-06, + "loss": 0.6484, "step": 10989 }, { - "epoch": 0.8315992584465212, - "grad_norm": 2.163329839706421, - "learning_rate": 1.359435985357268e-06, - "loss": 0.6043, + "epoch": 0.7736712425202393, + "grad_norm": 1.809146523475647, + "learning_rate": 1.7981286245130568e-06, + "loss": 0.5585, "step": 10990 }, { - "epoch": 0.8316749271688547, - "grad_norm": 2.061577320098877, - "learning_rate": 1.3582459939014655e-06, - "loss": 0.7602, + "epoch": 0.7737416402675115, + "grad_norm": 1.606680154800415, + "learning_rate": 1.7970607203060243e-06, + "loss": 0.564, "step": 10991 }, { - "epoch": 0.8317505958911884, - "grad_norm": 2.2457826137542725, - "learning_rate": 1.3570564809970164e-06, - "loss": 0.6957, + "epoch": 0.7738120380147835, + "grad_norm": 2.022623300552368, + "learning_rate": 1.7959930866045149e-06, + "loss": 0.5577, "step": 10992 }, { - "epoch": 0.831826264613522, - "grad_norm": 2.5345616340637207, - "learning_rate": 1.3558674467184096e-06, - "loss": 0.6992, + "epoch": 0.7738824357620556, + "grad_norm": 1.6203343868255615, + "learning_rate": 1.79492572346404e-06, + "loss": 0.6333, "step": 10993 }, { - "epoch": 0.8319019333358556, - "grad_norm": 1.9230178594589233, - "learning_rate": 1.354678891140108e-06, - "loss": 0.7109, + "epoch": 0.7739528335093278, + "grad_norm": 2.0182344913482666, + "learning_rate": 1.7938586309400905e-06, + "loss": 0.6851, "step": 10994 }, { - "epoch": 0.8319776020581893, - "grad_norm": 1.9315879344940186, - "learning_rate": 1.3534908143365452e-06, - "loss": 0.6629, + "epoch": 0.7740232312565998, + "grad_norm": 1.7931199073791504, + "learning_rate": 1.792791809088146e-06, + "loss": 0.5804, "step": 10995 }, { - "epoch": 0.8320532707805228, - "grad_norm": 2.1779463291168213, - "learning_rate": 1.352303216382114e-06, - "loss": 0.7043, + "epoch": 0.7740936290038719, + "grad_norm": 2.1684651374816895, + "learning_rate": 1.7917252579636691e-06, + "loss": 0.7803, "step": 10996 }, { - "epoch": 0.8321289395028565, - "grad_norm": 1.9600178003311157, - "learning_rate": 1.3511160973511935e-06, - "loss": 0.7524, + "epoch": 0.7741640267511439, + "grad_norm": 1.7898391485214233, + "learning_rate": 1.7906589776221154e-06, + "loss": 0.678, "step": 10997 }, { - "epoch": 0.8322046082251902, - "grad_norm": 4.411078929901123, - "learning_rate": 1.3499294573181253e-06, - "loss": 0.749, + "epoch": 0.7742344244984161, + "grad_norm": 2.2383217811584473, + "learning_rate": 1.7895929681189167e-06, + "loss": 0.5998, "step": 10998 }, { - "epoch": 0.8322802769475237, - "grad_norm": 3.7787492275238037, - "learning_rate": 1.3487432963572152e-06, - "loss": 0.5859, + "epoch": 0.7743048222456881, + "grad_norm": 1.791656494140625, + "learning_rate": 1.7885272295095006e-06, + "loss": 0.4995, "step": 10999 }, { - "epoch": 0.8323559456698574, - "grad_norm": 2.2628886699676514, - "learning_rate": 1.3475576145427465e-06, - "loss": 0.7292, + "epoch": 0.7743752199929602, + "grad_norm": 1.7421952486038208, + "learning_rate": 1.787461761849274e-06, + "loss": 0.6728, "step": 11000 }, { - "epoch": 0.8324316143921909, - "grad_norm": 2.6907804012298584, - "learning_rate": 1.346372411948969e-06, - "loss": 0.6431, + "epoch": 0.7744456177402324, + "grad_norm": 2.089238405227661, + "learning_rate": 1.7863965651936303e-06, + "loss": 0.6741, "step": 11001 }, { - "epoch": 0.8325072831145246, - "grad_norm": 3.0805437564849854, - "learning_rate": 1.3451876886501101e-06, - "loss": 0.5905, + "epoch": 0.7745160154875044, + "grad_norm": 1.776188850402832, + "learning_rate": 1.7853316395979495e-06, + "loss": 0.7807, "step": 11002 }, { - "epoch": 0.8325829518368583, - "grad_norm": 2.1811044216156006, - "learning_rate": 1.344003444720356e-06, - "loss": 0.593, + "epoch": 0.7745864132347765, + "grad_norm": 1.5738986730575562, + "learning_rate": 1.7842669851176014e-06, + "loss": 0.6167, "step": 11003 }, { - "epoch": 0.8326586205591918, - "grad_norm": 2.125622272491455, - "learning_rate": 1.3428196802338676e-06, - "loss": 0.5257, + "epoch": 0.7746568109820485, + "grad_norm": 1.8955340385437012, + "learning_rate": 1.7832026018079343e-06, + "loss": 0.6394, "step": 11004 }, { - "epoch": 0.8327342892815255, - "grad_norm": 3.8492448329925537, - "learning_rate": 1.3416363952647772e-06, - "loss": 0.6362, + "epoch": 0.7747272087293207, + "grad_norm": 2.0665059089660645, + "learning_rate": 1.7821384897242903e-06, + "loss": 0.5508, "step": 11005 }, { - "epoch": 0.8328099580038592, - "grad_norm": 2.9568638801574707, - "learning_rate": 1.340453589887185e-06, - "loss": 0.655, + "epoch": 0.7747976064765928, + "grad_norm": 2.4398193359375, + "learning_rate": 1.7810746489219927e-06, + "loss": 0.749, "step": 11006 }, { - "epoch": 0.8328856267261927, - "grad_norm": 2.092532157897949, - "learning_rate": 1.3392712641751645e-06, - "loss": 0.6265, + "epoch": 0.7748680042238648, + "grad_norm": 1.9993984699249268, + "learning_rate": 1.7800110794563498e-06, + "loss": 0.5938, "step": 11007 }, { - "epoch": 0.8329612954485264, - "grad_norm": 2.0288496017456055, - "learning_rate": 1.3380894182027548e-06, - "loss": 0.5884, + "epoch": 0.774938401971137, + "grad_norm": 1.561629056930542, + "learning_rate": 1.7789477813826565e-06, + "loss": 0.622, "step": 11008 }, { - "epoch": 0.8330369641708599, - "grad_norm": 2.5260086059570312, - "learning_rate": 1.336908052043969e-06, - "loss": 0.6618, + "epoch": 0.775008799718409, + "grad_norm": 2.344005823135376, + "learning_rate": 1.777884754756199e-06, + "loss": 0.7067, "step": 11009 }, { - "epoch": 0.8331126328931936, - "grad_norm": 2.303161382675171, - "learning_rate": 1.3357271657727847e-06, - "loss": 0.6813, + "epoch": 0.7750791974656811, + "grad_norm": 2.2331273555755615, + "learning_rate": 1.7768219996322395e-06, + "loss": 0.7552, "step": 11010 }, { - "epoch": 0.8331883016155273, - "grad_norm": 2.152733087539673, - "learning_rate": 1.334546759463152e-06, - "loss": 0.5689, + "epoch": 0.7751495952129532, + "grad_norm": 1.8949662446975708, + "learning_rate": 1.7757595160660376e-06, + "loss": 0.5932, "step": 11011 }, { - "epoch": 0.8332639703378608, - "grad_norm": 2.2623064517974854, - "learning_rate": 1.3333668331889998e-06, - "loss": 0.6485, + "epoch": 0.7752199929602253, + "grad_norm": 1.903679370880127, + "learning_rate": 1.7746973041128298e-06, + "loss": 0.6233, "step": 11012 }, { - "epoch": 0.8333396390601945, - "grad_norm": 2.2851810455322266, - "learning_rate": 1.3321873870242097e-06, - "loss": 0.6046, + "epoch": 0.7752903907074974, + "grad_norm": 1.7019728422164917, + "learning_rate": 1.7736353638278406e-06, + "loss": 0.5626, "step": 11013 }, { - "epoch": 0.8334153077825281, - "grad_norm": 1.8821451663970947, - "learning_rate": 1.3310084210426468e-06, - "loss": 0.8392, + "epoch": 0.7753607884547694, + "grad_norm": 1.798667073249817, + "learning_rate": 1.7725736952662809e-06, + "loss": 0.6641, "step": 11014 }, { - "epoch": 0.8334909765048617, - "grad_norm": 2.2863385677337646, - "learning_rate": 1.3298299353181411e-06, - "loss": 0.6131, + "epoch": 0.7754311862020415, + "grad_norm": 2.9549643993377686, + "learning_rate": 1.7715122984833511e-06, + "loss": 0.734, "step": 11015 }, { - "epoch": 0.8335666452271954, - "grad_norm": 2.0595953464508057, - "learning_rate": 1.3286519299244936e-06, - "loss": 0.7144, + "epoch": 0.7755015839493136, + "grad_norm": 1.5921626091003418, + "learning_rate": 1.7704511735342294e-06, + "loss": 0.7424, "step": 11016 }, { - "epoch": 0.8336423139495289, - "grad_norm": 1.9019546508789062, - "learning_rate": 1.3274744049354739e-06, - "loss": 0.574, + "epoch": 0.7755719816965857, + "grad_norm": 2.0752453804016113, + "learning_rate": 1.7693903204740894e-06, + "loss": 0.5622, "step": 11017 }, { - "epoch": 0.8337179826718626, - "grad_norm": 2.477344036102295, - "learning_rate": 1.3262973604248235e-06, - "loss": 0.6929, + "epoch": 0.7756423794438578, + "grad_norm": 1.5865192413330078, + "learning_rate": 1.7683297393580826e-06, + "loss": 0.6085, "step": 11018 }, { - "epoch": 0.8337936513941963, - "grad_norm": 2.8345818519592285, - "learning_rate": 1.325120796466251e-06, - "loss": 0.6346, + "epoch": 0.7757127771911299, + "grad_norm": 1.8185125589370728, + "learning_rate": 1.7672694302413536e-06, + "loss": 0.6853, "step": 11019 }, { - "epoch": 0.8338693201165298, - "grad_norm": 2.6509807109832764, - "learning_rate": 1.323944713133441e-06, - "loss": 0.6845, + "epoch": 0.775783174938402, + "grad_norm": 1.6539126634597778, + "learning_rate": 1.7662093931790232e-06, + "loss": 0.5613, "step": 11020 }, { - "epoch": 0.8339449888388635, - "grad_norm": 2.0513241291046143, - "learning_rate": 1.322769110500036e-06, - "loss": 0.5085, + "epoch": 0.775853572685674, + "grad_norm": 1.8688111305236816, + "learning_rate": 1.7651496282262079e-06, + "loss": 0.5588, "step": 11021 }, { - "epoch": 0.834020657561197, - "grad_norm": 2.247509002685547, - "learning_rate": 1.3215939886396625e-06, - "loss": 0.7244, + "epoch": 0.7759239704329461, + "grad_norm": 2.8568503856658936, + "learning_rate": 1.7640901354380019e-06, + "loss": 0.5551, "step": 11022 }, { - "epoch": 0.8340963262835307, - "grad_norm": 2.2295756340026855, - "learning_rate": 1.3204193476259096e-06, - "loss": 0.6197, + "epoch": 0.7759943681802183, + "grad_norm": 1.5932812690734863, + "learning_rate": 1.7630309148694946e-06, + "loss": 0.5841, "step": 11023 }, { - "epoch": 0.8341719950058644, - "grad_norm": 2.411051034927368, - "learning_rate": 1.3192451875323353e-06, - "loss": 0.6679, + "epoch": 0.7760647659274903, + "grad_norm": 2.0643835067749023, + "learning_rate": 1.7619719665757499e-06, + "loss": 0.7426, "step": 11024 }, { - "epoch": 0.8342476637281979, - "grad_norm": 2.1455461978912354, - "learning_rate": 1.3180715084324689e-06, - "loss": 0.6545, + "epoch": 0.7761351636747624, + "grad_norm": 2.6580164432525635, + "learning_rate": 1.760913290611831e-06, + "loss": 0.6856, "step": 11025 }, { - "epoch": 0.8343233324505316, - "grad_norm": 2.34451961517334, - "learning_rate": 1.3168983103998115e-06, - "loss": 0.6976, + "epoch": 0.7762055614220345, + "grad_norm": 2.4521689414978027, + "learning_rate": 1.7598548870327713e-06, + "loss": 0.6783, "step": 11026 }, { - "epoch": 0.8343990011728653, - "grad_norm": 2.02919602394104, - "learning_rate": 1.3157255935078313e-06, - "loss": 0.5394, + "epoch": 0.7762759591693066, + "grad_norm": 1.7272828817367554, + "learning_rate": 1.7587967558936034e-06, + "loss": 0.6545, "step": 11027 }, { - "epoch": 0.8344746698951988, - "grad_norm": 2.013793468475342, - "learning_rate": 1.3145533578299699e-06, - "loss": 0.8555, + "epoch": 0.7763463569165787, + "grad_norm": 1.7343230247497559, + "learning_rate": 1.7577388972493363e-06, + "loss": 0.6395, "step": 11028 }, { - "epoch": 0.8345503386175325, - "grad_norm": 2.240281581878662, - "learning_rate": 1.3133816034396343e-06, - "loss": 0.6576, + "epoch": 0.7764167546638507, + "grad_norm": 1.8594332933425903, + "learning_rate": 1.7566813111549748e-06, + "loss": 0.7705, "step": 11029 }, { - "epoch": 0.834626007339866, - "grad_norm": 1.8225212097167969, - "learning_rate": 1.3122103304102057e-06, - "loss": 0.7601, + "epoch": 0.7764871524111229, + "grad_norm": 2.1575520038604736, + "learning_rate": 1.7556239976654976e-06, + "loss": 0.6065, "step": 11030 }, { - "epoch": 0.8347016760621997, - "grad_norm": 2.2404656410217285, - "learning_rate": 1.3110395388150296e-06, - "loss": 0.6523, + "epoch": 0.7765575501583949, + "grad_norm": 1.8409764766693115, + "learning_rate": 1.7545669568358825e-06, + "loss": 0.6343, "step": 11031 }, { - "epoch": 0.8347773447845334, - "grad_norm": 1.9908554553985596, - "learning_rate": 1.3098692287274252e-06, - "loss": 0.5598, + "epoch": 0.776627947905667, + "grad_norm": 2.4617669582366943, + "learning_rate": 1.7535101887210784e-06, + "loss": 0.6205, "step": 11032 }, { - "epoch": 0.8348530135068669, - "grad_norm": 2.0587451457977295, - "learning_rate": 1.3086994002206843e-06, - "loss": 0.5893, + "epoch": 0.7766983456529392, + "grad_norm": 1.5973542928695679, + "learning_rate": 1.7524536933760327e-06, + "loss": 0.6165, "step": 11033 }, { - "epoch": 0.8349286822292006, - "grad_norm": 2.665961503982544, - "learning_rate": 1.3075300533680657e-06, - "loss": 0.7025, + "epoch": 0.7767687434002112, + "grad_norm": 1.836305856704712, + "learning_rate": 1.75139747085567e-06, + "loss": 0.6205, "step": 11034 }, { - "epoch": 0.8350043509515341, - "grad_norm": 2.2750911712646484, - "learning_rate": 1.3063611882427943e-06, - "loss": 0.8163, + "epoch": 0.7768391411474833, + "grad_norm": 1.6932168006896973, + "learning_rate": 1.7503415212149077e-06, + "loss": 0.6653, "step": 11035 }, { - "epoch": 0.8350800196738678, - "grad_norm": 2.3275375366210938, - "learning_rate": 1.3051928049180683e-06, - "loss": 0.5284, + "epoch": 0.7769095388947553, + "grad_norm": 1.6556180715560913, + "learning_rate": 1.7492858445086429e-06, + "loss": 0.6805, "step": 11036 }, { - "epoch": 0.8351556883962015, - "grad_norm": 1.4327739477157593, - "learning_rate": 1.304024903467057e-06, - "loss": 0.7461, + "epoch": 0.7769799366420275, + "grad_norm": 2.1157031059265137, + "learning_rate": 1.7482304407917663e-06, + "loss": 0.6376, "step": 11037 }, { - "epoch": 0.835231357118535, - "grad_norm": 3.3276166915893555, - "learning_rate": 1.3028574839628995e-06, - "loss": 0.8109, + "epoch": 0.7770503343892995, + "grad_norm": 2.147099733352661, + "learning_rate": 1.7471753101191409e-06, + "loss": 0.648, "step": 11038 }, { - "epoch": 0.8353070258408687, - "grad_norm": 1.9185841083526611, - "learning_rate": 1.3016905464787009e-06, - "loss": 0.6655, + "epoch": 0.7771207321365716, + "grad_norm": 1.5262255668640137, + "learning_rate": 1.7461204525456285e-06, + "loss": 0.7772, "step": 11039 }, { - "epoch": 0.8353826945632024, - "grad_norm": 1.899623990058899, - "learning_rate": 1.3005240910875395e-06, - "loss": 0.6141, + "epoch": 0.7771911298838438, + "grad_norm": 1.8037056922912598, + "learning_rate": 1.745065868126074e-06, + "loss": 0.4989, "step": 11040 }, { - "epoch": 0.8354583632855359, - "grad_norm": 2.365811586380005, - "learning_rate": 1.2993581178624644e-06, - "loss": 0.6686, + "epoch": 0.7772615276311158, + "grad_norm": 1.5460970401763916, + "learning_rate": 1.7440115569153025e-06, + "loss": 0.5623, "step": 11041 }, { - "epoch": 0.8355340320078696, - "grad_norm": 2.494126558303833, - "learning_rate": 1.298192626876488e-06, - "loss": 0.6618, + "epoch": 0.7773319253783879, + "grad_norm": 1.7980389595031738, + "learning_rate": 1.7429575189681315e-06, + "loss": 0.6013, "step": 11042 }, { - "epoch": 0.8356097007302031, - "grad_norm": 2.977726459503174, - "learning_rate": 1.2970276182026006e-06, - "loss": 0.6845, + "epoch": 0.7774023231256599, + "grad_norm": 1.618548035621643, + "learning_rate": 1.7419037543393606e-06, + "loss": 0.6837, "step": 11043 }, { - "epoch": 0.8356853694525368, - "grad_norm": 2.094541549682617, - "learning_rate": 1.2958630919137614e-06, - "loss": 0.5355, + "epoch": 0.7774727208729321, + "grad_norm": 1.6543536186218262, + "learning_rate": 1.7408502630837756e-06, + "loss": 0.6242, "step": 11044 }, { - "epoch": 0.8357610381748705, - "grad_norm": 1.4946520328521729, - "learning_rate": 1.2946990480828904e-06, - "loss": 0.9194, + "epoch": 0.7775431186202042, + "grad_norm": 2.241278648376465, + "learning_rate": 1.7397970452561464e-06, + "loss": 0.681, "step": 11045 }, { - "epoch": 0.835836706897204, - "grad_norm": 2.1046907901763916, - "learning_rate": 1.293535486782888e-06, - "loss": 0.7676, + "epoch": 0.7776135163674762, + "grad_norm": 1.9618085622787476, + "learning_rate": 1.7387441009112345e-06, + "loss": 0.765, "step": 11046 }, { - "epoch": 0.8359123756195377, - "grad_norm": 2.2517387866973877, - "learning_rate": 1.2923724080866165e-06, - "loss": 0.5016, + "epoch": 0.7776839141147484, + "grad_norm": 1.665527582168579, + "learning_rate": 1.7376914301037797e-06, + "loss": 0.5803, "step": 11047 }, { - "epoch": 0.8359880443418712, - "grad_norm": 3.1959762573242188, - "learning_rate": 1.2912098120669186e-06, - "loss": 0.633, + "epoch": 0.7777543118620204, + "grad_norm": 1.928641438484192, + "learning_rate": 1.7366390328885157e-06, + "loss": 0.7723, "step": 11048 }, { - "epoch": 0.8360637130642049, - "grad_norm": 2.66766095161438, - "learning_rate": 1.2900476987965934e-06, - "loss": 0.6543, + "epoch": 0.7778247096092925, + "grad_norm": 1.7774659395217896, + "learning_rate": 1.735586909320155e-06, + "loss": 0.6022, "step": 11049 }, { - "epoch": 0.8361393817865386, - "grad_norm": 2.5261588096618652, - "learning_rate": 1.2888860683484182e-06, - "loss": 0.6519, + "epoch": 0.7778951073565646, + "grad_norm": 1.8710529804229736, + "learning_rate": 1.7345350594533982e-06, + "loss": 0.6166, "step": 11050 }, { - "epoch": 0.8362150505088721, - "grad_norm": 2.3242642879486084, - "learning_rate": 1.2877249207951384e-06, - "loss": 0.6523, + "epoch": 0.7779655051038367, + "grad_norm": 2.0416781902313232, + "learning_rate": 1.7334834833429312e-06, + "loss": 0.5424, "step": 11051 }, { - "epoch": 0.8362907192312058, - "grad_norm": 2.110452890396118, - "learning_rate": 1.2865642562094692e-06, - "loss": 0.658, + "epoch": 0.7780359028511088, + "grad_norm": 2.0039896965026855, + "learning_rate": 1.7324321810434287e-06, + "loss": 0.6707, "step": 11052 }, { - "epoch": 0.8363663879535395, - "grad_norm": 2.2709860801696777, - "learning_rate": 1.285404074664094e-06, - "loss": 0.796, + "epoch": 0.7781063005983808, + "grad_norm": 1.9812105894088745, + "learning_rate": 1.7313811526095455e-06, + "loss": 0.7653, "step": 11053 }, { - "epoch": 0.836442056675873, - "grad_norm": 1.7346996068954468, - "learning_rate": 1.284244376231667e-06, - "loss": 0.5757, + "epoch": 0.778176698345653, + "grad_norm": 2.2778193950653076, + "learning_rate": 1.7303303980959308e-06, + "loss": 0.6465, "step": 11054 }, { - "epoch": 0.8365177253982067, - "grad_norm": 2.3549299240112305, - "learning_rate": 1.283085160984816e-06, - "loss": 0.8578, + "epoch": 0.778247096092925, + "grad_norm": 1.5538355112075806, + "learning_rate": 1.7292799175572105e-06, + "loss": 0.5868, "step": 11055 }, { - "epoch": 0.8365933941205402, - "grad_norm": 2.3999102115631104, - "learning_rate": 1.2819264289961293e-06, - "loss": 0.5272, + "epoch": 0.7783174938401971, + "grad_norm": 1.6901787519454956, + "learning_rate": 1.7282297110480006e-06, + "loss": 0.7755, "step": 11056 }, { - "epoch": 0.8366690628428739, - "grad_norm": 2.2719616889953613, - "learning_rate": 1.2807681803381701e-06, - "loss": 0.7264, + "epoch": 0.7783878915874692, + "grad_norm": 2.0541203022003174, + "learning_rate": 1.727179778622901e-06, + "loss": 0.6781, "step": 11057 }, { - "epoch": 0.8367447315652076, - "grad_norm": 2.6402430534362793, - "learning_rate": 1.2796104150834793e-06, - "loss": 0.6027, + "epoch": 0.7784582893347413, + "grad_norm": 2.0761497020721436, + "learning_rate": 1.7261301203365013e-06, + "loss": 0.6503, "step": 11058 }, { - "epoch": 0.8368204002875411, - "grad_norm": 2.883604049682617, - "learning_rate": 1.2784531333045529e-06, - "loss": 0.8013, + "epoch": 0.7785286870820134, + "grad_norm": 1.754136085510254, + "learning_rate": 1.7250807362433714e-06, + "loss": 0.7743, "step": 11059 }, { - "epoch": 0.8368960690098748, - "grad_norm": 1.8437893390655518, - "learning_rate": 1.277296335073866e-06, - "loss": 0.7198, + "epoch": 0.7785990848292854, + "grad_norm": 1.7441813945770264, + "learning_rate": 1.724031626398073e-06, + "loss": 0.5532, "step": 11060 }, { - "epoch": 0.8369717377322083, - "grad_norm": 2.287963628768921, - "learning_rate": 1.2761400204638605e-06, - "loss": 0.7142, + "epoch": 0.7786694825765575, + "grad_norm": 2.2677829265594482, + "learning_rate": 1.7229827908551491e-06, + "loss": 0.7432, "step": 11061 }, { - "epoch": 0.837047406454542, - "grad_norm": 2.6593968868255615, - "learning_rate": 1.2749841895469497e-06, - "loss": 0.5975, + "epoch": 0.7787398803238297, + "grad_norm": 2.3153162002563477, + "learning_rate": 1.7219342296691289e-06, + "loss": 0.6672, "step": 11062 }, { - "epoch": 0.8371230751768757, - "grad_norm": 2.181272268295288, - "learning_rate": 1.2738288423955146e-06, - "loss": 0.6539, + "epoch": 0.7788102780711017, + "grad_norm": 2.1410880088806152, + "learning_rate": 1.7208859428945254e-06, + "loss": 0.7027, "step": 11063 }, { - "epoch": 0.8371987438992092, - "grad_norm": 2.1479294300079346, - "learning_rate": 1.2726739790819062e-06, - "loss": 0.6179, + "epoch": 0.7788806758183738, + "grad_norm": 2.072766065597534, + "learning_rate": 1.7198379305858457e-06, + "loss": 0.6491, "step": 11064 }, { - "epoch": 0.8372744126215429, - "grad_norm": 2.6610589027404785, - "learning_rate": 1.2715195996784468e-06, - "loss": 0.696, + "epoch": 0.7789510735656459, + "grad_norm": 1.9863256216049194, + "learning_rate": 1.718790192797572e-06, + "loss": 0.6977, "step": 11065 }, { - "epoch": 0.8373500813438766, - "grad_norm": 2.6823954582214355, - "learning_rate": 1.2703657042574284e-06, - "loss": 0.6255, + "epoch": 0.779021471312918, + "grad_norm": 2.2018282413482666, + "learning_rate": 1.7177427295841802e-06, + "loss": 0.6823, "step": 11066 }, { - "epoch": 0.8374257500662101, - "grad_norm": 2.4057936668395996, - "learning_rate": 1.2692122928911085e-06, - "loss": 0.5914, + "epoch": 0.7790918690601901, + "grad_norm": 1.9191722869873047, + "learning_rate": 1.7166955410001288e-06, + "loss": 0.6698, "step": 11067 }, { - "epoch": 0.8375014187885438, - "grad_norm": 1.9947429895401, - "learning_rate": 1.268059365651718e-06, - "loss": 0.6166, + "epoch": 0.7791622668074621, + "grad_norm": 2.0711610317230225, + "learning_rate": 1.7156486270998601e-06, + "loss": 0.6887, "step": 11068 }, { - "epoch": 0.8375770875108773, - "grad_norm": 2.460395574569702, - "learning_rate": 1.2669069226114614e-06, - "loss": 0.6943, + "epoch": 0.7792326645547343, + "grad_norm": 1.8579130172729492, + "learning_rate": 1.7146019879378037e-06, + "loss": 0.5965, "step": 11069 }, { - "epoch": 0.837652756233211, - "grad_norm": 2.2644882202148438, - "learning_rate": 1.2657549638425028e-06, - "loss": 0.6663, + "epoch": 0.7793030623020063, + "grad_norm": 2.711268186569214, + "learning_rate": 1.7135556235683784e-06, + "loss": 0.6488, "step": 11070 }, { - "epoch": 0.8377284249555447, - "grad_norm": 2.3289754390716553, - "learning_rate": 1.2646034894169848e-06, - "loss": 0.6335, + "epoch": 0.7793734600492784, + "grad_norm": 2.123586893081665, + "learning_rate": 1.7125095340459822e-06, + "loss": 0.6378, "step": 11071 }, { - "epoch": 0.8378040936778782, - "grad_norm": 2.1823723316192627, - "learning_rate": 1.2634524994070152e-06, - "loss": 0.7363, + "epoch": 0.7794438577965506, + "grad_norm": 2.3076212406158447, + "learning_rate": 1.7114637194250062e-06, + "loss": 0.6702, "step": 11072 }, { - "epoch": 0.8378797624002119, - "grad_norm": 2.479443311691284, - "learning_rate": 1.2623019938846735e-06, - "loss": 0.705, + "epoch": 0.7795142555438226, + "grad_norm": 2.1527416706085205, + "learning_rate": 1.7104181797598207e-06, + "loss": 0.6417, "step": 11073 }, { - "epoch": 0.8379554311225454, - "grad_norm": 2.303396701812744, - "learning_rate": 1.2611519729220074e-06, - "loss": 0.8203, + "epoch": 0.7795846532910947, + "grad_norm": 1.844515323638916, + "learning_rate": 1.7093729151047845e-06, + "loss": 0.7044, "step": 11074 }, { - "epoch": 0.8380310998448791, - "grad_norm": 2.0928168296813965, - "learning_rate": 1.2600024365910352e-06, - "loss": 0.6169, + "epoch": 0.7796550510383667, + "grad_norm": 2.63543438911438, + "learning_rate": 1.7083279255142396e-06, + "loss": 0.7058, "step": 11075 }, { - "epoch": 0.8381067685672128, - "grad_norm": 2.577695608139038, - "learning_rate": 1.258853384963745e-06, - "loss": 0.6813, + "epoch": 0.7797254487856389, + "grad_norm": 2.2445428371429443, + "learning_rate": 1.7072832110425206e-06, + "loss": 0.686, "step": 11076 }, { - "epoch": 0.8381824372895463, - "grad_norm": 1.9073697328567505, - "learning_rate": 1.2577048181120954e-06, - "loss": 0.7995, + "epoch": 0.7797958465329109, + "grad_norm": 1.852050542831421, + "learning_rate": 1.7062387717439393e-06, + "loss": 0.5033, "step": 11077 }, { - "epoch": 0.83825810601188, - "grad_norm": 2.2130842208862305, - "learning_rate": 1.256556736108007e-06, - "loss": 0.6344, + "epoch": 0.779866244280183, + "grad_norm": 2.14237904548645, + "learning_rate": 1.7051946076728002e-06, + "loss": 0.5864, "step": 11078 }, { - "epoch": 0.8383337747342137, - "grad_norm": 2.398723840713501, - "learning_rate": 1.2554091390233841e-06, - "loss": 0.7178, + "epoch": 0.7799366420274552, + "grad_norm": 1.9742411375045776, + "learning_rate": 1.7041507188833883e-06, + "loss": 0.5752, "step": 11079 }, { - "epoch": 0.8384094434565472, - "grad_norm": 2.5538170337677, - "learning_rate": 1.2542620269300912e-06, - "loss": 0.7334, + "epoch": 0.7800070397747272, + "grad_norm": 1.788723349571228, + "learning_rate": 1.703107105429977e-06, + "loss": 0.7361, "step": 11080 }, { - "epoch": 0.8384851121788809, - "grad_norm": 2.482813596725464, - "learning_rate": 1.253115399899962e-06, - "loss": 0.6849, + "epoch": 0.7800774375219993, + "grad_norm": 1.880996823310852, + "learning_rate": 1.7020637673668226e-06, + "loss": 0.659, "step": 11081 }, { - "epoch": 0.8385607809012144, - "grad_norm": 2.486558198928833, - "learning_rate": 1.2519692580048022e-06, - "loss": 0.6309, + "epoch": 0.7801478352692713, + "grad_norm": 1.9866633415222168, + "learning_rate": 1.7010207047481728e-06, + "loss": 0.7008, "step": 11082 }, { - "epoch": 0.8386364496235481, - "grad_norm": 2.245243549346924, - "learning_rate": 1.250823601316388e-06, - "loss": 0.7787, + "epoch": 0.7802182330165435, + "grad_norm": 1.8979167938232422, + "learning_rate": 1.6999779176282542e-06, + "loss": 0.5993, "step": 11083 }, { - "epoch": 0.8387121183458818, - "grad_norm": 2.0628883838653564, - "learning_rate": 1.2496784299064634e-06, - "loss": 0.6715, + "epoch": 0.7802886307638156, + "grad_norm": 2.4394445419311523, + "learning_rate": 1.6989354060612848e-06, + "loss": 0.6431, "step": 11084 }, { - "epoch": 0.8387877870682153, - "grad_norm": 1.9107873439788818, - "learning_rate": 1.2485337438467425e-06, - "loss": 0.5899, + "epoch": 0.7803590285110876, + "grad_norm": 2.275749921798706, + "learning_rate": 1.6978931701014653e-06, + "loss": 0.6232, "step": 11085 }, { - "epoch": 0.838863455790549, - "grad_norm": 2.893704891204834, - "learning_rate": 1.2473895432089116e-06, - "loss": 0.7503, + "epoch": 0.7804294262583598, + "grad_norm": 1.6965659856796265, + "learning_rate": 1.696851209802981e-06, + "loss": 0.6827, "step": 11086 }, { - "epoch": 0.8389391245128826, - "grad_norm": 2.1818957328796387, - "learning_rate": 1.246245828064623e-06, - "loss": 0.6855, + "epoch": 0.7804998240056318, + "grad_norm": 2.420571804046631, + "learning_rate": 1.695809525220004e-06, + "loss": 0.605, "step": 11087 }, { - "epoch": 0.8390147932352162, - "grad_norm": 2.1986918449401855, - "learning_rate": 1.2451025984854952e-06, - "loss": 0.7521, + "epoch": 0.7805702217529039, + "grad_norm": 2.053755521774292, + "learning_rate": 1.6947681164066925e-06, + "loss": 0.5517, "step": 11088 }, { - "epoch": 0.8390904619575499, - "grad_norm": 2.2782137393951416, - "learning_rate": 1.2439598545431285e-06, - "loss": 0.6727, + "epoch": 0.780640619500176, + "grad_norm": 2.175386428833008, + "learning_rate": 1.6937269834171941e-06, + "loss": 0.8115, "step": 11089 }, { - "epoch": 0.8391661306798834, - "grad_norm": 2.482480764389038, - "learning_rate": 1.2428175963090803e-06, - "loss": 0.7597, + "epoch": 0.7807110172474481, + "grad_norm": 1.9240583181381226, + "learning_rate": 1.6926861263056355e-06, + "loss": 0.6026, "step": 11090 }, { - "epoch": 0.8392417994022171, - "grad_norm": 2.7252655029296875, - "learning_rate": 1.2416758238548872e-06, - "loss": 0.5514, + "epoch": 0.7807814149947202, + "grad_norm": 1.8561307191848755, + "learning_rate": 1.6916455451261314e-06, + "loss": 0.6714, "step": 11091 }, { - "epoch": 0.8393174681245508, - "grad_norm": 2.0488505363464355, - "learning_rate": 1.2405345372520447e-06, - "loss": 0.589, + "epoch": 0.7808518127419922, + "grad_norm": 1.793487548828125, + "learning_rate": 1.6906052399327809e-06, + "loss": 0.7356, "step": 11092 }, { - "epoch": 0.8393931368468843, - "grad_norm": 2.3147764205932617, - "learning_rate": 1.2393937365720247e-06, - "loss": 0.6551, + "epoch": 0.7809222104892644, + "grad_norm": 1.8967546224594116, + "learning_rate": 1.6895652107796749e-06, + "loss": 0.6165, "step": 11093 }, { - "epoch": 0.839468805569218, - "grad_norm": 1.9987397193908691, - "learning_rate": 1.2382534218862738e-06, - "loss": 0.8516, + "epoch": 0.7809926082365364, + "grad_norm": 2.002232789993286, + "learning_rate": 1.6885254577208807e-06, + "loss": 0.6295, "step": 11094 }, { - "epoch": 0.8395444742915515, - "grad_norm": 1.991015911102295, - "learning_rate": 1.2371135932661967e-06, - "loss": 0.5578, + "epoch": 0.7810630059838085, + "grad_norm": 2.0695247650146484, + "learning_rate": 1.68748598081046e-06, + "loss": 0.6036, "step": 11095 }, { - "epoch": 0.8396201430138852, - "grad_norm": 2.4074630737304688, - "learning_rate": 1.235974250783174e-06, - "loss": 0.8004, + "epoch": 0.7811334037310806, + "grad_norm": 2.079068183898926, + "learning_rate": 1.6864467801024553e-06, + "loss": 0.6376, "step": 11096 }, { - "epoch": 0.8396958117362189, - "grad_norm": 1.9555567502975464, - "learning_rate": 1.234835394508556e-06, - "loss": 0.7125, + "epoch": 0.7812038014783527, + "grad_norm": 1.9811755418777466, + "learning_rate": 1.6854078556508938e-06, + "loss": 0.717, "step": 11097 }, { - "epoch": 0.8397714804585524, - "grad_norm": 2.148538827896118, - "learning_rate": 1.2336970245136604e-06, - "loss": 0.6414, + "epoch": 0.7812741992256248, + "grad_norm": 2.3885338306427, + "learning_rate": 1.6843692075097899e-06, + "loss": 0.7443, "step": 11098 }, { - "epoch": 0.8398471491808861, - "grad_norm": 2.0188326835632324, - "learning_rate": 1.2325591408697773e-06, - "loss": 0.6591, + "epoch": 0.7813445969728968, + "grad_norm": 1.8399784564971924, + "learning_rate": 1.6833308357331464e-06, + "loss": 0.5969, "step": 11099 }, { - "epoch": 0.8399228179032197, - "grad_norm": 2.890838146209717, - "learning_rate": 1.2314217436481636e-06, - "loss": 0.6491, + "epoch": 0.781414994720169, + "grad_norm": 2.281334161758423, + "learning_rate": 1.682292740374946e-06, + "loss": 0.5836, "step": 11100 }, { - "epoch": 0.8399984866255533, - "grad_norm": 1.9198577404022217, - "learning_rate": 1.2302848329200484e-06, - "loss": 0.5948, + "epoch": 0.7814853924674411, + "grad_norm": 2.1016016006469727, + "learning_rate": 1.681254921489164e-06, + "loss": 0.6082, "step": 11101 }, { - "epoch": 0.840074155347887, - "grad_norm": 12.436066627502441, - "learning_rate": 1.2291484087566258e-06, - "loss": 0.7092, + "epoch": 0.7815557902147131, + "grad_norm": 1.9106851816177368, + "learning_rate": 1.6802173791297558e-06, + "loss": 0.5808, "step": 11102 }, { - "epoch": 0.8401498240702205, - "grad_norm": 2.7914984226226807, - "learning_rate": 1.2280124712290618e-06, - "loss": 0.6803, + "epoch": 0.7816261879619852, + "grad_norm": 1.7404488325119019, + "learning_rate": 1.6791801133506633e-06, + "loss": 0.6207, "step": 11103 }, { - "epoch": 0.8402254927925542, - "grad_norm": 2.818707227706909, - "learning_rate": 1.2268770204084955e-06, - "loss": 0.7888, + "epoch": 0.7816965857092573, + "grad_norm": 2.066589117050171, + "learning_rate": 1.6781431242058146e-06, + "loss": 0.6795, "step": 11104 }, { - "epoch": 0.8403011615148879, - "grad_norm": 2.656458854675293, - "learning_rate": 1.225742056366035e-06, - "loss": 0.6282, + "epoch": 0.7817669834565294, + "grad_norm": 2.135134220123291, + "learning_rate": 1.677106411749126e-06, + "loss": 0.7128, "step": 11105 }, { - "epoch": 0.8403768302372214, - "grad_norm": 2.1726672649383545, - "learning_rate": 1.2246075791727494e-06, - "loss": 0.6697, + "epoch": 0.7818373812038015, + "grad_norm": 1.6378732919692993, + "learning_rate": 1.6760699760344938e-06, + "loss": 0.6492, "step": 11106 }, { - "epoch": 0.8404524989595551, - "grad_norm": 2.160932779312134, - "learning_rate": 1.223473588899685e-06, - "loss": 0.7079, + "epoch": 0.7819077789510736, + "grad_norm": 1.8816508054733276, + "learning_rate": 1.6750338171158068e-06, + "loss": 0.7285, "step": 11107 }, { - "epoch": 0.8405281676818886, - "grad_norm": 1.9137705564498901, - "learning_rate": 1.222340085617858e-06, - "loss": 0.6773, + "epoch": 0.7819781766983457, + "grad_norm": 1.9181289672851562, + "learning_rate": 1.6739979350469345e-06, + "loss": 0.7023, "step": 11108 }, { - "epoch": 0.8406038364042223, - "grad_norm": 2.2129149436950684, - "learning_rate": 1.2212070693982505e-06, - "loss": 0.7601, + "epoch": 0.7820485744456177, + "grad_norm": 1.6321521997451782, + "learning_rate": 1.672962329881733e-06, + "loss": 0.6461, "step": 11109 }, { - "epoch": 0.840679505126556, - "grad_norm": 1.9030208587646484, - "learning_rate": 1.2200745403118159e-06, - "loss": 0.7088, + "epoch": 0.7821189721928898, + "grad_norm": 2.2805120944976807, + "learning_rate": 1.671927001674043e-06, + "loss": 0.8006, "step": 11110 }, { - "epoch": 0.8407551738488895, - "grad_norm": 4.24271821975708, - "learning_rate": 1.2189424984294774e-06, - "loss": 0.6975, + "epoch": 0.7821893699401619, + "grad_norm": 1.7693395614624023, + "learning_rate": 1.6708919504776946e-06, + "loss": 0.6379, "step": 11111 }, { - "epoch": 0.8408308425712232, - "grad_norm": 2.1732661724090576, - "learning_rate": 1.217810943822128e-06, - "loss": 0.7062, + "epoch": 0.782259767687434, + "grad_norm": 2.0257151126861572, + "learning_rate": 1.6698571763464985e-06, + "loss": 0.656, "step": 11112 }, { - "epoch": 0.8409065112935568, - "grad_norm": 2.135040521621704, - "learning_rate": 1.2166798765606255e-06, - "loss": 0.5736, + "epoch": 0.7823301654347061, + "grad_norm": 2.177809000015259, + "learning_rate": 1.6688226793342578e-06, + "loss": 0.6922, "step": 11113 }, { - "epoch": 0.8409821800158904, - "grad_norm": 1.6705262660980225, - "learning_rate": 1.2155492967158019e-06, - "loss": 0.5572, + "epoch": 0.7824005631819781, + "grad_norm": 1.7162494659423828, + "learning_rate": 1.6677884594947533e-06, + "loss": 0.7239, "step": 11114 }, { - "epoch": 0.8410578487382241, - "grad_norm": 2.2693963050842285, - "learning_rate": 1.2144192043584637e-06, - "loss": 0.6938, + "epoch": 0.7824709609292503, + "grad_norm": 1.77838134765625, + "learning_rate": 1.6667545168817558e-06, + "loss": 0.6409, "step": 11115 }, { - "epoch": 0.8411335174605576, - "grad_norm": 2.54014253616333, - "learning_rate": 1.2132895995593742e-06, - "loss": 0.6641, + "epoch": 0.7825413586765223, + "grad_norm": 1.9595921039581299, + "learning_rate": 1.6657208515490194e-06, + "loss": 0.595, "step": 11116 }, { - "epoch": 0.8412091861828913, - "grad_norm": 1.9177050590515137, - "learning_rate": 1.212160482389275e-06, - "loss": 0.6905, + "epoch": 0.7826117564237944, + "grad_norm": 1.9215511083602905, + "learning_rate": 1.6646874635502892e-06, + "loss": 0.5606, "step": 11117 }, { - "epoch": 0.841284854905225, - "grad_norm": 2.4678313732147217, - "learning_rate": 1.2110318529188764e-06, - "loss": 0.8327, + "epoch": 0.7826821541710666, + "grad_norm": 1.6792012453079224, + "learning_rate": 1.6636543529392876e-06, + "loss": 0.64, "step": 11118 }, { - "epoch": 0.8413605236275585, - "grad_norm": 1.8306165933609009, - "learning_rate": 1.209903711218855e-06, - "loss": 0.6428, + "epoch": 0.7827525519183386, + "grad_norm": 1.9724340438842773, + "learning_rate": 1.662621519769731e-06, + "loss": 0.6531, "step": 11119 }, { - "epoch": 0.8414361923498922, - "grad_norm": 10.050545692443848, - "learning_rate": 1.208776057359859e-06, - "loss": 0.5172, + "epoch": 0.7828229496656107, + "grad_norm": 2.0066471099853516, + "learning_rate": 1.6615889640953131e-06, + "loss": 0.6226, "step": 11120 }, { - "epoch": 0.8415118610722258, - "grad_norm": 2.323607921600342, - "learning_rate": 1.207648891412507e-06, - "loss": 0.7041, + "epoch": 0.7828933474128827, + "grad_norm": 1.5381684303283691, + "learning_rate": 1.6605566859697237e-06, + "loss": 0.595, "step": 11121 }, { - "epoch": 0.8415875297945594, - "grad_norm": 2.1820437908172607, - "learning_rate": 1.206522213447384e-06, - "loss": 0.727, + "epoch": 0.7829637451601549, + "grad_norm": 1.6907211542129517, + "learning_rate": 1.6595246854466244e-06, + "loss": 0.7387, "step": 11122 }, { - "epoch": 0.8416631985168931, - "grad_norm": 2.826960563659668, - "learning_rate": 1.2053960235350498e-06, - "loss": 0.6435, + "epoch": 0.783034142907427, + "grad_norm": 1.8660727739334106, + "learning_rate": 1.6584929625796752e-06, + "loss": 0.6801, "step": 11123 }, { - "epoch": 0.8417388672392266, - "grad_norm": 2.4186246395111084, - "learning_rate": 1.2042703217460235e-06, - "loss": 0.6031, + "epoch": 0.783104540654699, + "grad_norm": 1.6023956537246704, + "learning_rate": 1.657461517422512e-06, + "loss": 0.5748, "step": 11124 }, { - "epoch": 0.8418145359615603, - "grad_norm": 1.740941047668457, - "learning_rate": 1.2031451081508057e-06, - "loss": 0.619, + "epoch": 0.7831749384019712, + "grad_norm": 1.8010411262512207, + "learning_rate": 1.6564303500287654e-06, + "loss": 0.6613, "step": 11125 }, { - "epoch": 0.8418902046838939, - "grad_norm": 2.0095555782318115, - "learning_rate": 1.2020203828198617e-06, - "loss": 0.7084, + "epoch": 0.7832453361492432, + "grad_norm": 2.1091854572296143, + "learning_rate": 1.6553994604520418e-06, + "loss": 0.778, "step": 11126 }, { - "epoch": 0.8419658734062275, - "grad_norm": 2.2118263244628906, - "learning_rate": 1.2008961458236206e-06, - "loss": 0.6634, + "epoch": 0.7833157338965153, + "grad_norm": 1.934720516204834, + "learning_rate": 1.6543688487459451e-06, + "loss": 0.6429, "step": 11127 }, { - "epoch": 0.8420415421285612, - "grad_norm": 2.157186269760132, - "learning_rate": 1.1997723972324888e-06, - "loss": 0.62, + "epoch": 0.7833861316437875, + "grad_norm": 1.7607797384262085, + "learning_rate": 1.6533385149640487e-06, + "loss": 0.6062, "step": 11128 }, { - "epoch": 0.8421172108508947, - "grad_norm": 2.188436985015869, - "learning_rate": 1.198649137116838e-06, - "loss": 0.6341, + "epoch": 0.7834565293910595, + "grad_norm": 1.7739607095718384, + "learning_rate": 1.6523084591599284e-06, + "loss": 0.6833, "step": 11129 }, { - "epoch": 0.8421928795732284, - "grad_norm": 2.3916919231414795, - "learning_rate": 1.197526365547011e-06, - "loss": 0.6438, + "epoch": 0.7835269271383316, + "grad_norm": 1.849339485168457, + "learning_rate": 1.6512786813871316e-06, + "loss": 0.5876, "step": 11130 }, { - "epoch": 0.8422685482955621, - "grad_norm": 2.1905405521392822, - "learning_rate": 1.1964040825933196e-06, - "loss": 0.7768, + "epoch": 0.7835973248856036, + "grad_norm": 1.942318081855774, + "learning_rate": 1.6502491816992019e-06, + "loss": 0.7047, "step": 11131 }, { - "epoch": 0.8423442170178956, - "grad_norm": 2.423164129257202, - "learning_rate": 1.1952822883260445e-06, - "loss": 0.6209, + "epoch": 0.7836677226328758, + "grad_norm": 2.3940305709838867, + "learning_rate": 1.6492199601496606e-06, + "loss": 0.7107, "step": 11132 }, { - "epoch": 0.8424198857402293, - "grad_norm": 2.417698621749878, - "learning_rate": 1.1941609828154374e-06, - "loss": 0.7285, + "epoch": 0.7837381203801478, + "grad_norm": 2.1189002990722656, + "learning_rate": 1.6481910167920236e-06, + "loss": 0.7349, "step": 11133 }, { - "epoch": 0.8424955544625629, - "grad_norm": 2.6083669662475586, - "learning_rate": 1.1930401661317124e-06, - "loss": 0.6182, + "epoch": 0.7838085181274199, + "grad_norm": 1.8335667848587036, + "learning_rate": 1.6471623516797784e-06, + "loss": 0.6582, "step": 11134 }, { - "epoch": 0.8425712231848965, - "grad_norm": 2.146847724914551, - "learning_rate": 1.1919198383450663e-06, - "loss": 0.6415, + "epoch": 0.783878915874692, + "grad_norm": 1.8057727813720703, + "learning_rate": 1.6461339648664124e-06, + "loss": 0.6967, "step": 11135 }, { - "epoch": 0.8426468919072302, - "grad_norm": 2.49105167388916, - "learning_rate": 1.190799999525653e-06, - "loss": 0.6844, + "epoch": 0.7839493136219641, + "grad_norm": 1.8569633960723877, + "learning_rate": 1.645105856405388e-06, + "loss": 0.6796, "step": 11136 }, { - "epoch": 0.8427225606295637, - "grad_norm": 2.345392942428589, - "learning_rate": 1.189680649743604e-06, - "loss": 0.56, + "epoch": 0.7840197113692362, + "grad_norm": 2.462355136871338, + "learning_rate": 1.6440780263501623e-06, + "loss": 0.5868, "step": 11137 }, { - "epoch": 0.8427982293518974, - "grad_norm": 2.192192316055298, - "learning_rate": 1.1885617890690128e-06, - "loss": 0.6248, + "epoch": 0.7840901091165082, + "grad_norm": 1.9301851987838745, + "learning_rate": 1.6430504747541684e-06, + "loss": 0.721, "step": 11138 }, { - "epoch": 0.842873898074231, - "grad_norm": 3.144986867904663, - "learning_rate": 1.1874434175719458e-06, - "loss": 0.6772, + "epoch": 0.7841605068637804, + "grad_norm": 2.873478412628174, + "learning_rate": 1.6420232016708343e-06, + "loss": 0.6047, "step": 11139 }, { - "epoch": 0.8429495667965646, - "grad_norm": 1.7006821632385254, - "learning_rate": 1.1863255353224444e-06, - "loss": 0.5736, + "epoch": 0.7842309046110525, + "grad_norm": 1.969352126121521, + "learning_rate": 1.6409962071535664e-06, + "loss": 0.7516, "step": 11140 }, { - "epoch": 0.8430252355188983, - "grad_norm": 3.1362383365631104, - "learning_rate": 1.1852081423905087e-06, - "loss": 0.6766, + "epoch": 0.7843013023583245, + "grad_norm": 1.8931164741516113, + "learning_rate": 1.6399694912557565e-06, + "loss": 0.6039, "step": 11141 }, { - "epoch": 0.8431009042412319, - "grad_norm": 2.2844927310943604, - "learning_rate": 1.1840912388461152e-06, - "loss": 0.6861, + "epoch": 0.7843717001055966, + "grad_norm": 1.8865864276885986, + "learning_rate": 1.638943054030789e-06, + "loss": 0.614, "step": 11142 }, { - "epoch": 0.8431765729635655, - "grad_norm": 2.2476208209991455, - "learning_rate": 1.1829748247592082e-06, - "loss": 0.7521, + "epoch": 0.7844420978528687, + "grad_norm": 2.31870174407959, + "learning_rate": 1.6379168955320254e-06, + "loss": 0.7487, "step": 11143 }, { - "epoch": 0.8432522416858992, - "grad_norm": 2.138150215148926, - "learning_rate": 1.181858900199702e-06, - "loss": 0.6996, + "epoch": 0.7845124956001408, + "grad_norm": 2.31754207611084, + "learning_rate": 1.6368910158128211e-06, + "loss": 0.694, "step": 11144 }, { - "epoch": 0.8433279104082327, - "grad_norm": 2.889930009841919, - "learning_rate": 1.1807434652374754e-06, - "loss": 0.6901, + "epoch": 0.7845828933474129, + "grad_norm": 1.7842503786087036, + "learning_rate": 1.63586541492651e-06, + "loss": 0.7731, "step": 11145 }, { - "epoch": 0.8434035791305664, - "grad_norm": 1.7074170112609863, - "learning_rate": 1.1796285199423857e-06, - "loss": 0.6359, + "epoch": 0.784653291094685, + "grad_norm": 1.6091986894607544, + "learning_rate": 1.6348400929264137e-06, + "loss": 0.784, "step": 11146 }, { - "epoch": 0.8434792478529, - "grad_norm": 2.217623710632324, - "learning_rate": 1.178514064384254e-06, - "loss": 0.6327, + "epoch": 0.7847236888419571, + "grad_norm": 1.9335802793502808, + "learning_rate": 1.6338150498658377e-06, + "loss": 0.6536, "step": 11147 }, { - "epoch": 0.8435549165752336, - "grad_norm": 1.8675048351287842, - "learning_rate": 1.1774000986328665e-06, - "loss": 0.5452, + "epoch": 0.7847940865892291, + "grad_norm": 1.9815512895584106, + "learning_rate": 1.6327902857980798e-06, + "loss": 0.5696, "step": 11148 }, { - "epoch": 0.8436305852975673, - "grad_norm": 1.739334225654602, - "learning_rate": 1.1762866227579872e-06, - "loss": 0.6498, + "epoch": 0.7848644843365012, + "grad_norm": 1.680778980255127, + "learning_rate": 1.6317658007764133e-06, + "loss": 0.777, "step": 11149 }, { - "epoch": 0.8437062540199008, - "grad_norm": 1.5697089433670044, - "learning_rate": 1.1751736368293417e-06, - "loss": 0.5786, + "epoch": 0.7849348820837733, + "grad_norm": 1.7695398330688477, + "learning_rate": 1.6307415948541074e-06, + "loss": 0.6466, "step": 11150 }, { - "epoch": 0.8437819227422345, - "grad_norm": 2.253666639328003, - "learning_rate": 1.1740611409166368e-06, - "loss": 0.7211, + "epoch": 0.7850052798310454, + "grad_norm": 2.084780693054199, + "learning_rate": 1.6297176680844079e-06, + "loss": 0.6528, "step": 11151 }, { - "epoch": 0.8438575914645681, - "grad_norm": 3.369987964630127, - "learning_rate": 1.172949135089532e-06, - "loss": 0.595, + "epoch": 0.7850756775783175, + "grad_norm": 1.8378831148147583, + "learning_rate": 1.6286940205205505e-06, + "loss": 0.6889, "step": 11152 }, { - "epoch": 0.8439332601869017, - "grad_norm": 2.647265672683716, - "learning_rate": 1.171837619417669e-06, - "loss": 0.6779, + "epoch": 0.7851460753255896, + "grad_norm": 2.729276657104492, + "learning_rate": 1.6276706522157532e-06, + "loss": 0.654, "step": 11153 }, { - "epoch": 0.8440089289092354, - "grad_norm": 2.462435483932495, - "learning_rate": 1.1707265939706543e-06, - "loss": 0.6241, + "epoch": 0.7852164730728617, + "grad_norm": 1.905706763267517, + "learning_rate": 1.6266475632232267e-06, + "loss": 0.7795, "step": 11154 }, { - "epoch": 0.844084597631569, - "grad_norm": 2.3992083072662354, - "learning_rate": 1.1696160588180617e-06, - "loss": 0.7099, + "epoch": 0.7852868708201337, + "grad_norm": 1.9225305318832397, + "learning_rate": 1.6256247535961577e-06, + "loss": 0.7133, "step": 11155 }, { - "epoch": 0.8441602663539026, - "grad_norm": 2.0883431434631348, - "learning_rate": 1.1685060140294388e-06, - "loss": 0.7162, + "epoch": 0.7853572685674058, + "grad_norm": 1.7164913415908813, + "learning_rate": 1.6246022233877268e-06, + "loss": 0.7091, "step": 11156 }, { - "epoch": 0.8442359350762363, - "grad_norm": 2.320479393005371, - "learning_rate": 1.1673964596742994e-06, - "loss": 0.6615, + "epoch": 0.785427666314678, + "grad_norm": 2.1878409385681152, + "learning_rate": 1.6235799726510947e-06, + "loss": 0.7435, "step": 11157 }, { - "epoch": 0.8443116037985698, - "grad_norm": 2.3560616970062256, - "learning_rate": 1.1662873958221294e-06, - "loss": 0.6776, + "epoch": 0.78549806406195, + "grad_norm": 1.8221943378448486, + "learning_rate": 1.6225580014394093e-06, + "loss": 0.5633, "step": 11158 }, { - "epoch": 0.8443872725209035, - "grad_norm": 2.070371389389038, - "learning_rate": 1.165178822542378e-06, - "loss": 0.5541, + "epoch": 0.7855684618092221, + "grad_norm": 1.9369239807128906, + "learning_rate": 1.621536309805801e-06, + "loss": 0.6474, "step": 11159 }, { - "epoch": 0.8444629412432371, - "grad_norm": 7.825291633605957, - "learning_rate": 1.164070739904468e-06, - "loss": 0.8369, + "epoch": 0.7856388595564942, + "grad_norm": 1.7878022193908691, + "learning_rate": 1.6205148978033933e-06, + "loss": 0.5737, "step": 11160 }, { - "epoch": 0.8445386099655707, - "grad_norm": 2.257880687713623, - "learning_rate": 1.1629631479777953e-06, - "loss": 0.7629, + "epoch": 0.7857092573037663, + "grad_norm": 1.951951265335083, + "learning_rate": 1.6194937654852858e-06, + "loss": 0.5893, "step": 11161 }, { - "epoch": 0.8446142786879044, - "grad_norm": 2.6338634490966797, - "learning_rate": 1.161856046831718e-06, - "loss": 0.7076, + "epoch": 0.7857796550510384, + "grad_norm": 1.878322958946228, + "learning_rate": 1.6184729129045726e-06, + "loss": 0.5572, "step": 11162 }, { - "epoch": 0.844689947410238, - "grad_norm": 2.4323246479034424, - "learning_rate": 1.1607494365355664e-06, - "loss": 0.7328, + "epoch": 0.7858500527983104, + "grad_norm": 1.925378441810608, + "learning_rate": 1.6174523401143262e-06, + "loss": 0.78, "step": 11163 }, { - "epoch": 0.8447656161325716, - "grad_norm": 2.372086524963379, - "learning_rate": 1.1596433171586389e-06, - "loss": 0.7085, + "epoch": 0.7859204505455826, + "grad_norm": 4.010754585266113, + "learning_rate": 1.616432047167608e-06, + "loss": 0.6226, "step": 11164 }, { - "epoch": 0.8448412848549052, - "grad_norm": 2.4352121353149414, - "learning_rate": 1.1585376887702074e-06, - "loss": 0.7244, + "epoch": 0.7859908482928546, + "grad_norm": 1.9657721519470215, + "learning_rate": 1.6154120341174609e-06, + "loss": 0.6455, "step": 11165 }, { - "epoch": 0.8449169535772388, - "grad_norm": 1.8406596183776855, - "learning_rate": 1.1574325514395073e-06, - "loss": 0.6614, + "epoch": 0.7860612460401267, + "grad_norm": 1.8669992685317993, + "learning_rate": 1.6143923010169204e-06, + "loss": 0.5702, "step": 11166 }, { - "epoch": 0.8449926222995725, - "grad_norm": 2.4071455001831055, - "learning_rate": 1.1563279052357464e-06, - "loss": 0.8433, + "epoch": 0.7861316437873987, + "grad_norm": 2.1184823513031006, + "learning_rate": 1.6133728479189998e-06, + "loss": 0.5493, "step": 11167 }, { - "epoch": 0.8450682910219061, - "grad_norm": 1.97054123878479, - "learning_rate": 1.1552237502281023e-06, - "loss": 0.624, + "epoch": 0.7862020415346709, + "grad_norm": 1.8830924034118652, + "learning_rate": 1.6123536748767052e-06, + "loss": 0.5485, "step": 11168 }, { - "epoch": 0.8451439597442397, - "grad_norm": 2.932701826095581, - "learning_rate": 1.1541200864857225e-06, - "loss": 0.6386, + "epoch": 0.786272439281943, + "grad_norm": 2.405334711074829, + "learning_rate": 1.611334781943023e-06, + "loss": 0.7894, "step": 11169 }, { - "epoch": 0.8452196284665734, - "grad_norm": 2.2148234844207764, - "learning_rate": 1.153016914077714e-06, - "loss": 0.6634, + "epoch": 0.786342837029215, + "grad_norm": 1.8574564456939697, + "learning_rate": 1.6103161691709253e-06, + "loss": 0.6765, "step": 11170 }, { - "epoch": 0.845295297188907, - "grad_norm": 2.325047254562378, - "learning_rate": 1.1519142330731705e-06, - "loss": 0.6842, + "epoch": 0.7864132347764872, + "grad_norm": 2.137089252471924, + "learning_rate": 1.6092978366133691e-06, + "loss": 0.6642, "step": 11171 }, { - "epoch": 0.8453709659112406, - "grad_norm": 2.387718677520752, - "learning_rate": 1.1508120435411416e-06, - "loss": 0.6088, + "epoch": 0.7864836325237592, + "grad_norm": 1.9004735946655273, + "learning_rate": 1.6082797843233024e-06, + "loss": 0.6689, "step": 11172 }, { - "epoch": 0.8454466346335742, - "grad_norm": 1.7872700691223145, - "learning_rate": 1.149710345550649e-06, - "loss": 0.6071, + "epoch": 0.7865540302710313, + "grad_norm": 1.9096980094909668, + "learning_rate": 1.6072620123536505e-06, + "loss": 0.7186, "step": 11173 }, { - "epoch": 0.8455223033559078, - "grad_norm": 1.974530577659607, - "learning_rate": 1.148609139170685e-06, - "loss": 0.7307, + "epoch": 0.7866244280183035, + "grad_norm": 2.1087706089019775, + "learning_rate": 1.6062445207573327e-06, + "loss": 0.6067, "step": 11174 }, { - "epoch": 0.8455979720782415, - "grad_norm": 2.9320738315582275, - "learning_rate": 1.147508424470212e-06, - "loss": 0.6461, + "epoch": 0.7866948257655755, + "grad_norm": 1.8040441274642944, + "learning_rate": 1.6052273095872475e-06, + "loss": 0.7849, "step": 11175 }, { - "epoch": 0.845673640800575, - "grad_norm": 2.1946792602539062, - "learning_rate": 1.146408201518159e-06, - "loss": 0.8129, + "epoch": 0.7867652235128476, + "grad_norm": 2.219050645828247, + "learning_rate": 1.6042103788962805e-06, + "loss": 0.6467, "step": 11176 }, { - "epoch": 0.8457493095229087, - "grad_norm": 2.6159780025482178, - "learning_rate": 1.1453084703834259e-06, - "loss": 0.7393, + "epoch": 0.7868356212601196, + "grad_norm": 1.853709101676941, + "learning_rate": 1.6031937287373001e-06, + "loss": 0.62, "step": 11177 }, { - "epoch": 0.8458249782452423, - "grad_norm": 2.3954858779907227, - "learning_rate": 1.1442092311348814e-06, - "loss": 0.6058, + "epoch": 0.7869060190073918, + "grad_norm": 1.7779865264892578, + "learning_rate": 1.6021773591631676e-06, + "loss": 0.5459, "step": 11178 }, { - "epoch": 0.8459006469675759, - "grad_norm": 2.413275957107544, - "learning_rate": 1.1431104838413637e-06, - "loss": 0.6777, + "epoch": 0.7869764167546639, + "grad_norm": 2.341951370239258, + "learning_rate": 1.6011612702267204e-06, + "loss": 0.7022, "step": 11179 }, { - "epoch": 0.8459763156899096, - "grad_norm": 1.921155333518982, - "learning_rate": 1.1420122285716798e-06, - "loss": 0.5244, + "epoch": 0.7870468145019359, + "grad_norm": 2.1454789638519287, + "learning_rate": 1.6001454619807907e-06, + "loss": 0.6606, "step": 11180 }, { - "epoch": 0.8460519844122432, - "grad_norm": 2.647214651107788, - "learning_rate": 1.1409144653946064e-06, - "loss": 0.6092, + "epoch": 0.787117212249208, + "grad_norm": 1.775092363357544, + "learning_rate": 1.5991299344781885e-06, + "loss": 0.6168, "step": 11181 }, { - "epoch": 0.8461276531345768, - "grad_norm": 3.575390338897705, - "learning_rate": 1.1398171943788878e-06, - "loss": 0.6721, + "epoch": 0.7871876099964801, + "grad_norm": 1.7805384397506714, + "learning_rate": 1.5981146877717125e-06, + "loss": 0.6042, "step": 11182 }, { - "epoch": 0.8462033218569105, - "grad_norm": 2.585902452468872, - "learning_rate": 1.1387204155932418e-06, - "loss": 0.6889, + "epoch": 0.7872580077437522, + "grad_norm": 1.7752861976623535, + "learning_rate": 1.597099721914144e-06, + "loss": 0.6167, "step": 11183 }, { - "epoch": 0.846278990579244, - "grad_norm": 2.851043462753296, - "learning_rate": 1.1376241291063476e-06, - "loss": 0.6995, + "epoch": 0.7873284054910243, + "grad_norm": 1.7175688743591309, + "learning_rate": 1.5960850369582562e-06, + "loss": 0.7495, "step": 11184 }, { - "epoch": 0.8463546593015777, - "grad_norm": 1.9983781576156616, - "learning_rate": 1.1365283349868602e-06, - "loss": 0.6446, + "epoch": 0.7873988032382964, + "grad_norm": 2.266176462173462, + "learning_rate": 1.595070632956799e-06, + "loss": 0.5449, "step": 11185 }, { - "epoch": 0.8464303280239113, - "grad_norm": 2.99881911277771, - "learning_rate": 1.1354330333034028e-06, - "loss": 0.7288, + "epoch": 0.7874692009855685, + "grad_norm": 2.1978259086608887, + "learning_rate": 1.5940565099625174e-06, + "loss": 0.6137, "step": 11186 }, { - "epoch": 0.8465059967462449, - "grad_norm": 2.3670239448547363, - "learning_rate": 1.1343382241245656e-06, - "loss": 0.7242, + "epoch": 0.7875395987328405, + "grad_norm": 2.310164451599121, + "learning_rate": 1.593042668028133e-06, + "loss": 0.6715, "step": 11187 }, { - "epoch": 0.8465816654685786, - "grad_norm": 2.0711822509765625, - "learning_rate": 1.1332439075189095e-06, - "loss": 0.6653, + "epoch": 0.7876099964801127, + "grad_norm": 1.6877752542495728, + "learning_rate": 1.5920291072063552e-06, + "loss": 0.6306, "step": 11188 }, { - "epoch": 0.8466573341909122, - "grad_norm": 1.839362382888794, - "learning_rate": 1.132150083554964e-06, - "loss": 0.6028, + "epoch": 0.7876803942273847, + "grad_norm": 1.7617920637130737, + "learning_rate": 1.591015827549884e-06, + "loss": 0.5065, "step": 11189 }, { - "epoch": 0.8467330029132458, - "grad_norm": 2.130958080291748, - "learning_rate": 1.1310567523012298e-06, - "loss": 0.7051, + "epoch": 0.7877507919746568, + "grad_norm": 1.7321642637252808, + "learning_rate": 1.5900028291113967e-06, + "loss": 0.7049, "step": 11190 }, { - "epoch": 0.8468086716355794, - "grad_norm": 2.0340983867645264, - "learning_rate": 1.1299639138261687e-06, - "loss": 0.6712, + "epoch": 0.7878211897219289, + "grad_norm": 1.9638044834136963, + "learning_rate": 1.5889901119435642e-06, + "loss": 0.6112, "step": 11191 }, { - "epoch": 0.846884340357913, - "grad_norm": 2.854250192642212, - "learning_rate": 1.1288715681982247e-06, - "loss": 0.6333, + "epoch": 0.787891587469201, + "grad_norm": 1.9644232988357544, + "learning_rate": 1.5879776760990355e-06, + "loss": 0.7448, "step": 11192 }, { - "epoch": 0.8469600090802467, - "grad_norm": 2.552464485168457, - "learning_rate": 1.127779715485802e-06, - "loss": 0.6445, + "epoch": 0.7879619852164731, + "grad_norm": 2.303680658340454, + "learning_rate": 1.58696552163045e-06, + "loss": 0.6362, "step": 11193 }, { - "epoch": 0.8470356778025803, - "grad_norm": 2.156697988510132, - "learning_rate": 1.1266883557572762e-06, - "loss": 0.6876, + "epoch": 0.7880323829637451, + "grad_norm": 1.8292583227157593, + "learning_rate": 1.5859536485904266e-06, + "loss": 0.6857, "step": 11194 }, { - "epoch": 0.8471113465249139, - "grad_norm": 1.9202338457107544, - "learning_rate": 1.1255974890809892e-06, - "loss": 0.8281, + "epoch": 0.7881027807110172, + "grad_norm": 2.1460120677948, + "learning_rate": 1.5849420570315777e-06, + "loss": 0.694, "step": 11195 }, { - "epoch": 0.8471870152472476, - "grad_norm": 2.571288585662842, - "learning_rate": 1.1245071155252547e-06, - "loss": 0.7569, + "epoch": 0.7881731784582894, + "grad_norm": 2.4236605167388916, + "learning_rate": 1.5839307470064947e-06, + "loss": 0.6758, "step": 11196 }, { - "epoch": 0.8472626839695812, - "grad_norm": 2.219005823135376, - "learning_rate": 1.1234172351583611e-06, - "loss": 0.5319, + "epoch": 0.7882435762055614, + "grad_norm": 2.088330030441284, + "learning_rate": 1.582919718567758e-06, + "loss": 0.6134, "step": 11197 }, { - "epoch": 0.8473383526919148, - "grad_norm": 1.8485450744628906, - "learning_rate": 1.1223278480485535e-06, - "loss": 0.5917, + "epoch": 0.7883139739528335, + "grad_norm": 2.225099802017212, + "learning_rate": 1.5819089717679322e-06, + "loss": 0.5889, "step": 11198 }, { - "epoch": 0.8474140214142484, - "grad_norm": 2.1197338104248047, - "learning_rate": 1.1212389542640566e-06, - "loss": 0.6706, + "epoch": 0.7883843717001056, + "grad_norm": 1.4640287160873413, + "learning_rate": 1.580898506659565e-06, + "loss": 0.5249, "step": 11199 }, { - "epoch": 0.847489690136582, - "grad_norm": 2.1624670028686523, - "learning_rate": 1.1201505538730586e-06, - "loss": 0.6419, + "epoch": 0.7884547694473777, + "grad_norm": 1.806365966796875, + "learning_rate": 1.579888323295191e-06, + "loss": 0.5889, "step": 11200 }, { - "epoch": 0.8475653588589157, - "grad_norm": 6.582333087921143, - "learning_rate": 1.1190626469437192e-06, - "loss": 0.6285, + "epoch": 0.7885251671946498, + "grad_norm": 1.8128294944763184, + "learning_rate": 1.5788784217273336e-06, + "loss": 0.6766, "step": 11201 }, { - "epoch": 0.8476410275812493, - "grad_norm": 2.2934112548828125, - "learning_rate": 1.117975233544168e-06, - "loss": 0.6322, + "epoch": 0.7885955649419218, + "grad_norm": 3.2374463081359863, + "learning_rate": 1.5778688020084946e-06, + "loss": 0.5881, "step": 11202 }, { - "epoch": 0.8477166963035829, - "grad_norm": 2.3044066429138184, - "learning_rate": 1.1168883137425003e-06, - "loss": 0.6555, + "epoch": 0.788665962689194, + "grad_norm": 1.7775721549987793, + "learning_rate": 1.576859464191169e-06, + "loss": 0.6303, "step": 11203 }, { - "epoch": 0.8477923650259165, - "grad_norm": 2.2896509170532227, - "learning_rate": 1.1158018876067855e-06, - "loss": 0.6963, + "epoch": 0.788736360436466, + "grad_norm": 2.231595516204834, + "learning_rate": 1.5758504083278315e-06, + "loss": 0.6408, "step": 11204 }, { - "epoch": 0.8478680337482501, - "grad_norm": 3.5141971111297607, - "learning_rate": 1.1147159552050557e-06, - "loss": 0.6393, + "epoch": 0.7888067581837381, + "grad_norm": 1.8019248247146606, + "learning_rate": 1.574841634470943e-06, + "loss": 0.5297, "step": 11205 }, { - "epoch": 0.8479437024705838, - "grad_norm": 2.2190842628479004, - "learning_rate": 1.113630516605315e-06, - "loss": 0.666, + "epoch": 0.7888771559310102, + "grad_norm": 1.980423927307129, + "learning_rate": 1.5738331426729501e-06, + "loss": 0.5498, "step": 11206 }, { - "epoch": 0.8480193711929174, - "grad_norm": 1.9215750694274902, - "learning_rate": 1.1125455718755402e-06, - "loss": 0.4681, + "epoch": 0.7889475536782823, + "grad_norm": 2.1128151416778564, + "learning_rate": 1.572824932986288e-06, + "loss": 0.7201, "step": 11207 }, { - "epoch": 0.848095039915251, - "grad_norm": 1.961254596710205, - "learning_rate": 1.1114611210836752e-06, - "loss": 0.6418, + "epoch": 0.7890179514255544, + "grad_norm": 1.5770304203033447, + "learning_rate": 1.5718170054633714e-06, + "loss": 0.6093, "step": 11208 }, { - "epoch": 0.8481707086375847, - "grad_norm": 1.8327749967575073, - "learning_rate": 1.1103771642976272e-06, - "loss": 0.7484, + "epoch": 0.7890883491728264, + "grad_norm": 1.906086802482605, + "learning_rate": 1.5708093601566064e-06, + "loss": 0.6047, "step": 11209 }, { - "epoch": 0.8482463773599183, - "grad_norm": 2.4051928520202637, - "learning_rate": 1.1092937015852793e-06, - "loss": 0.6549, + "epoch": 0.7891587469200986, + "grad_norm": 1.949823021888733, + "learning_rate": 1.5698019971183791e-06, + "loss": 0.7331, "step": 11210 }, { - "epoch": 0.8483220460822519, - "grad_norm": 2.0945074558258057, - "learning_rate": 1.108210733014482e-06, - "loss": 0.6683, + "epoch": 0.7892291446673706, + "grad_norm": 1.903473973274231, + "learning_rate": 1.568794916401064e-06, + "loss": 0.5802, "step": 11211 }, { - "epoch": 0.8483977148045855, - "grad_norm": 2.168553113937378, - "learning_rate": 1.1071282586530533e-06, - "loss": 0.7284, + "epoch": 0.7892995424146427, + "grad_norm": 2.362618923187256, + "learning_rate": 1.5677881180570182e-06, + "loss": 0.7005, "step": 11212 }, { - "epoch": 0.8484733835269191, - "grad_norm": 2.0133745670318604, - "learning_rate": 1.1060462785687816e-06, - "loss": 0.6258, + "epoch": 0.7893699401619149, + "grad_norm": 1.962419867515564, + "learning_rate": 1.5667816021385906e-06, + "loss": 0.6699, "step": 11213 }, { - "epoch": 0.8485490522492528, - "grad_norm": 2.4811995029449463, - "learning_rate": 1.104964792829424e-06, - "loss": 0.6499, + "epoch": 0.7894403379091869, + "grad_norm": 1.8860288858413696, + "learning_rate": 1.565775368698105e-06, + "loss": 0.6602, "step": 11214 }, { - "epoch": 0.8486247209715864, - "grad_norm": 2.246004343032837, - "learning_rate": 1.1038838015027091e-06, - "loss": 0.7389, + "epoch": 0.789510735656459, + "grad_norm": 1.9362924098968506, + "learning_rate": 1.5647694177878825e-06, + "loss": 0.6854, "step": 11215 }, { - "epoch": 0.84870038969392, - "grad_norm": 3.04372501373291, - "learning_rate": 1.1028033046563251e-06, - "loss": 0.7067, + "epoch": 0.789581133403731, + "grad_norm": 1.766741394996643, + "learning_rate": 1.5637637494602195e-06, + "loss": 0.6157, "step": 11216 }, { - "epoch": 0.8487760584162536, - "grad_norm": 4.410548210144043, - "learning_rate": 1.1017233023579434e-06, - "loss": 0.7638, + "epoch": 0.7896515311510032, + "grad_norm": 1.8660367727279663, + "learning_rate": 1.5627583637674025e-06, + "loss": 0.6443, "step": 11217 }, { - "epoch": 0.8488517271385873, - "grad_norm": 2.3239493370056152, - "learning_rate": 1.1006437946751964e-06, - "loss": 0.6129, + "epoch": 0.7897219288982753, + "grad_norm": 2.163687229156494, + "learning_rate": 1.5617532607617012e-06, + "loss": 0.6925, "step": 11218 }, { - "epoch": 0.8489273958609209, - "grad_norm": 1.7490601539611816, - "learning_rate": 1.0995647816756827e-06, - "loss": 0.6007, + "epoch": 0.7897923266455473, + "grad_norm": 1.6547349691390991, + "learning_rate": 1.560748440495375e-06, + "loss": 0.6852, "step": 11219 }, { - "epoch": 0.8490030645832545, - "grad_norm": 4.648648738861084, - "learning_rate": 1.0984862634269753e-06, - "loss": 0.5775, + "epoch": 0.7898627243928195, + "grad_norm": 1.909751534461975, + "learning_rate": 1.5597439030206608e-06, + "loss": 0.6817, "step": 11220 }, { - "epoch": 0.8490787333055881, - "grad_norm": 1.797368049621582, - "learning_rate": 1.0974082399966151e-06, - "loss": 0.6743, + "epoch": 0.7899331221400915, + "grad_norm": 1.9884084463119507, + "learning_rate": 1.5587396483897902e-06, + "loss": 0.6798, "step": 11221 }, { - "epoch": 0.8491544020279218, - "grad_norm": 2.3413918018341064, - "learning_rate": 1.0963307114521103e-06, - "loss": 0.9284, + "epoch": 0.7900035198873636, + "grad_norm": 2.1216928958892822, + "learning_rate": 1.5577356766549737e-06, + "loss": 0.631, "step": 11222 }, { - "epoch": 0.8492300707502554, - "grad_norm": 2.0559816360473633, - "learning_rate": 1.0952536778609407e-06, - "loss": 0.6177, + "epoch": 0.7900739176346356, + "grad_norm": 1.9194802045822144, + "learning_rate": 1.5567319878684076e-06, + "loss": 0.6399, "step": 11223 }, { - "epoch": 0.849305739472589, - "grad_norm": 1.990634560585022, - "learning_rate": 1.0941771392905526e-06, - "loss": 0.5489, + "epoch": 0.7901443153819078, + "grad_norm": 1.579756498336792, + "learning_rate": 1.5557285820822733e-06, + "loss": 0.6521, "step": 11224 }, { - "epoch": 0.8493814081949226, - "grad_norm": 1.8581657409667969, - "learning_rate": 1.0931010958083619e-06, - "loss": 0.6661, + "epoch": 0.7902147131291799, + "grad_norm": 1.89894437789917, + "learning_rate": 1.5547254593487428e-06, + "loss": 0.6697, "step": 11225 }, { - "epoch": 0.8494570769172562, - "grad_norm": 3.590940475463867, - "learning_rate": 1.0920255474817577e-06, - "loss": 0.5803, + "epoch": 0.7902851108764519, + "grad_norm": 1.7662122249603271, + "learning_rate": 1.5537226197199647e-06, + "loss": 0.6534, "step": 11226 }, { - "epoch": 0.8495327456395899, - "grad_norm": 1.9839054346084595, - "learning_rate": 1.0909504943780863e-06, - "loss": 0.7265, + "epoch": 0.7903555086237241, + "grad_norm": 2.0041792392730713, + "learning_rate": 1.5527200632480824e-06, + "loss": 0.7241, "step": 11227 }, { - "epoch": 0.8496084143619235, - "grad_norm": 2.0688846111297607, - "learning_rate": 1.0898759365646786e-06, - "loss": 0.6612, + "epoch": 0.7904259063709961, + "grad_norm": 1.8210434913635254, + "learning_rate": 1.5517177899852162e-06, + "loss": 0.6284, "step": 11228 }, { - "epoch": 0.8496840830842571, - "grad_norm": 2.5211896896362305, - "learning_rate": 1.0888018741088258e-06, - "loss": 0.5971, + "epoch": 0.7904963041182682, + "grad_norm": 2.1379621028900146, + "learning_rate": 1.5507157999834792e-06, + "loss": 0.627, "step": 11229 }, { - "epoch": 0.8497597518065907, - "grad_norm": 1.9265793561935425, - "learning_rate": 1.0877283070777852e-06, - "loss": 0.7328, + "epoch": 0.7905667018655403, + "grad_norm": 2.2805116176605225, + "learning_rate": 1.5497140932949593e-06, + "loss": 0.6891, "step": 11230 }, { - "epoch": 0.8498354205289244, - "grad_norm": 2.134287118911743, - "learning_rate": 1.08665523553879e-06, - "loss": 0.6763, + "epoch": 0.7906370996128124, + "grad_norm": 2.0883426666259766, + "learning_rate": 1.548712669971742e-06, + "loss": 0.6181, "step": 11231 }, { - "epoch": 0.849911089251258, - "grad_norm": 2.190967082977295, - "learning_rate": 1.0855826595590385e-06, - "loss": 0.7203, + "epoch": 0.7907074973600845, + "grad_norm": 2.0271170139312744, + "learning_rate": 1.5477115300658876e-06, + "loss": 0.543, "step": 11232 }, { - "epoch": 0.8499867579735916, - "grad_norm": 2.094014883041382, - "learning_rate": 1.0845105792056989e-06, - "loss": 0.7426, + "epoch": 0.7907778951073565, + "grad_norm": 5.069008827209473, + "learning_rate": 1.5467106736294505e-06, + "loss": 0.5683, "step": 11233 }, { - "epoch": 0.8500624266959252, - "grad_norm": 2.0137734413146973, - "learning_rate": 1.0834389945459096e-06, - "loss": 0.6875, + "epoch": 0.7908482928546287, + "grad_norm": 2.011101484298706, + "learning_rate": 1.5457101007144624e-06, + "loss": 0.6261, "step": 11234 }, { - "epoch": 0.8501380954182589, - "grad_norm": 2.131049156188965, - "learning_rate": 1.0823679056467746e-06, - "loss": 0.6814, + "epoch": 0.7909186906019008, + "grad_norm": 1.8853598833084106, + "learning_rate": 1.54470981137295e-06, + "loss": 0.5945, "step": 11235 }, { - "epoch": 0.8502137641405925, - "grad_norm": 2.530545949935913, - "learning_rate": 1.0812973125753708e-06, - "loss": 0.5937, + "epoch": 0.7909890883491728, + "grad_norm": 1.9928348064422607, + "learning_rate": 1.5437098056569118e-06, + "loss": 0.6555, "step": 11236 }, { - "epoch": 0.8502894328629261, - "grad_norm": 2.1850709915161133, - "learning_rate": 1.080227215398741e-06, - "loss": 0.7354, + "epoch": 0.7910594860964449, + "grad_norm": 1.9339262247085571, + "learning_rate": 1.5427100836183435e-06, + "loss": 0.5948, "step": 11237 }, { - "epoch": 0.8503651015852597, - "grad_norm": 3.4205210208892822, - "learning_rate": 1.0791576141838997e-06, - "loss": 0.6784, + "epoch": 0.791129883843717, + "grad_norm": 1.764718770980835, + "learning_rate": 1.5417106453092182e-06, + "loss": 0.5502, "step": 11238 }, { - "epoch": 0.8504407703075934, - "grad_norm": 1.7098037004470825, - "learning_rate": 1.0780885089978268e-06, - "loss": 0.6278, + "epoch": 0.7912002815909891, + "grad_norm": 2.087714195251465, + "learning_rate": 1.5407114907815e-06, + "loss": 0.6503, "step": 11239 }, { - "epoch": 0.850516439029927, - "grad_norm": 2.0882728099823, - "learning_rate": 1.0770198999074763e-06, - "loss": 0.5592, + "epoch": 0.7912706793382612, + "grad_norm": 1.670902132987976, + "learning_rate": 1.5397126200871373e-06, + "loss": 0.636, "step": 11240 }, { - "epoch": 0.8505921077522606, - "grad_norm": 1.8970396518707275, - "learning_rate": 1.0759517869797636e-06, - "loss": 0.577, + "epoch": 0.7913410770855333, + "grad_norm": 2.019625425338745, + "learning_rate": 1.538714033278061e-06, + "loss": 0.5729, "step": 11241 }, { - "epoch": 0.8506677764745942, - "grad_norm": 2.3336918354034424, - "learning_rate": 1.0748841702815775e-06, - "loss": 0.6625, + "epoch": 0.7914114748328054, + "grad_norm": 1.8858132362365723, + "learning_rate": 1.5377157304061887e-06, + "loss": 0.6308, "step": 11242 }, { - "epoch": 0.8507434451969278, - "grad_norm": 2.1653034687042236, - "learning_rate": 1.0738170498797813e-06, - "loss": 0.7601, + "epoch": 0.7914818725800774, + "grad_norm": 2.177860975265503, + "learning_rate": 1.5367177115234198e-06, + "loss": 0.5813, "step": 11243 }, { - "epoch": 0.8508191139192615, - "grad_norm": 2.735419988632202, - "learning_rate": 1.0727504258411958e-06, - "loss": 0.7221, + "epoch": 0.7915522703273495, + "grad_norm": 2.2972328662872314, + "learning_rate": 1.5357199766816472e-06, + "loss": 0.6665, "step": 11244 }, { - "epoch": 0.8508947826415951, - "grad_norm": 2.724544048309326, - "learning_rate": 1.0716842982326182e-06, - "loss": 0.6285, + "epoch": 0.7916226680746216, + "grad_norm": 2.021249532699585, + "learning_rate": 1.534722525932739e-06, + "loss": 0.6103, "step": 11245 }, { - "epoch": 0.8509704513639287, - "grad_norm": 1.9268399477005005, - "learning_rate": 1.0706186671208144e-06, - "loss": 0.6552, + "epoch": 0.7916930658218937, + "grad_norm": 1.8931219577789307, + "learning_rate": 1.5337253593285592e-06, + "loss": 0.5773, "step": 11246 }, { - "epoch": 0.8510461200862623, - "grad_norm": 3.0010483264923096, - "learning_rate": 1.069553532572515e-06, - "loss": 0.6105, + "epoch": 0.7917634635691658, + "grad_norm": 1.7485551834106445, + "learning_rate": 1.5327284769209483e-06, + "loss": 0.6855, "step": 11247 }, { - "epoch": 0.851121788808596, - "grad_norm": 2.1853091716766357, - "learning_rate": 1.0684888946544244e-06, - "loss": 0.6504, + "epoch": 0.7918338613164378, + "grad_norm": 1.9976626634597778, + "learning_rate": 1.5317318787617351e-06, + "loss": 0.6118, "step": 11248 }, { - "epoch": 0.8511974575309296, - "grad_norm": 2.737804412841797, - "learning_rate": 1.0674247534332125e-06, - "loss": 0.6128, + "epoch": 0.79190425906371, + "grad_norm": 1.8768340349197388, + "learning_rate": 1.5307355649027324e-06, + "loss": 0.6205, "step": 11249 }, { - "epoch": 0.8512731262532632, - "grad_norm": 2.3587794303894043, - "learning_rate": 1.066361108975522e-06, - "loss": 0.6185, + "epoch": 0.791974656810982, + "grad_norm": 1.7207293510437012, + "learning_rate": 1.5297395353957424e-06, + "loss": 0.6772, "step": 11250 }, { - "epoch": 0.8513487949755968, - "grad_norm": 1.3188940286636353, - "learning_rate": 1.0652979613479574e-06, - "loss": 0.7213, + "epoch": 0.7920450545582541, + "grad_norm": 1.7383793592453003, + "learning_rate": 1.5287437902925464e-06, + "loss": 0.5527, "step": 11251 }, { - "epoch": 0.8514244636979305, - "grad_norm": 2.1479740142822266, - "learning_rate": 1.0642353106170956e-06, - "loss": 0.4939, + "epoch": 0.7921154523055263, + "grad_norm": 1.839133858680725, + "learning_rate": 1.5277483296449174e-06, + "loss": 0.6094, "step": 11252 }, { - "epoch": 0.8515001324202641, - "grad_norm": 2.823068141937256, - "learning_rate": 1.0631731568494884e-06, - "loss": 0.7325, + "epoch": 0.7921858500527983, + "grad_norm": 3.4630885124206543, + "learning_rate": 1.526753153504609e-06, + "loss": 0.683, "step": 11253 }, { - "epoch": 0.8515758011425977, - "grad_norm": 1.8597098588943481, - "learning_rate": 1.0621115001116516e-06, - "loss": 0.5473, + "epoch": 0.7922562478000704, + "grad_norm": 1.806921124458313, + "learning_rate": 1.5257582619233614e-06, + "loss": 0.658, "step": 11254 }, { - "epoch": 0.8516514698649313, - "grad_norm": 2.1567814350128174, - "learning_rate": 1.0610503404700639e-06, - "loss": 0.7219, + "epoch": 0.7923266455473424, + "grad_norm": 1.8149384260177612, + "learning_rate": 1.5247636549528971e-06, + "loss": 0.5469, "step": 11255 }, { - "epoch": 0.851727138587265, - "grad_norm": 2.5054931640625, - "learning_rate": 1.0599896779911822e-06, - "loss": 0.7023, + "epoch": 0.7923970432946146, + "grad_norm": 1.783969521522522, + "learning_rate": 1.5237693326449312e-06, + "loss": 0.6515, "step": 11256 }, { - "epoch": 0.8518028073095986, - "grad_norm": 2.3276219367980957, - "learning_rate": 1.0589295127414283e-06, - "loss": 0.661, + "epoch": 0.7924674410418867, + "grad_norm": 1.6275043487548828, + "learning_rate": 1.5227752950511552e-06, + "loss": 0.5798, "step": 11257 }, { - "epoch": 0.8518784760319322, - "grad_norm": 2.0778353214263916, - "learning_rate": 1.0578698447871923e-06, - "loss": 0.7084, + "epoch": 0.7925378387891587, + "grad_norm": 1.8110941648483276, + "learning_rate": 1.521781542223254e-06, + "loss": 0.5755, "step": 11258 }, { - "epoch": 0.8519541447542658, - "grad_norm": 2.253126621246338, - "learning_rate": 1.056810674194835e-06, - "loss": 0.6965, + "epoch": 0.7926082365364309, + "grad_norm": 2.0637526512145996, + "learning_rate": 1.520788074212892e-06, + "loss": 0.5482, "step": 11259 }, { - "epoch": 0.8520298134765995, - "grad_norm": 2.866034984588623, - "learning_rate": 1.0557520010306842e-06, - "loss": 0.7603, + "epoch": 0.7926786342837029, + "grad_norm": 1.5671215057373047, + "learning_rate": 1.51979489107172e-06, + "loss": 0.6745, "step": 11260 }, { - "epoch": 0.8521054821989331, - "grad_norm": 2.482037305831909, - "learning_rate": 1.0546938253610389e-06, - "loss": 0.5681, + "epoch": 0.792749032030975, + "grad_norm": 2.1400701999664307, + "learning_rate": 1.518801992851373e-06, + "loss": 0.6605, "step": 11261 }, { - "epoch": 0.8521811509212667, - "grad_norm": 1.911925196647644, - "learning_rate": 1.0536361472521644e-06, - "loss": 0.7476, + "epoch": 0.792819429778247, + "grad_norm": 1.8211106061935425, + "learning_rate": 1.5178093796034764e-06, + "loss": 0.5649, "step": 11262 }, { - "epoch": 0.8522568196436003, - "grad_norm": 2.388798236846924, - "learning_rate": 1.0525789667702935e-06, - "loss": 0.6213, + "epoch": 0.7928898275255192, + "grad_norm": 1.869382619857788, + "learning_rate": 1.516817051379633e-06, + "loss": 0.741, "step": 11263 }, { - "epoch": 0.8523324883659339, - "grad_norm": 2.2827963829040527, - "learning_rate": 1.051522283981636e-06, - "loss": 0.7701, + "epoch": 0.7929602252727913, + "grad_norm": 1.8937757015228271, + "learning_rate": 1.5158250082314387e-06, + "loss": 0.7003, "step": 11264 }, { - "epoch": 0.8524081570882676, - "grad_norm": 2.937349796295166, - "learning_rate": 1.0504660989523604e-06, - "loss": 0.7427, + "epoch": 0.7930306230200633, + "grad_norm": 1.9148468971252441, + "learning_rate": 1.5148332502104697e-06, + "loss": 0.703, "step": 11265 }, { - "epoch": 0.8524838258106012, - "grad_norm": 1.9935485124588013, - "learning_rate": 1.0494104117486086e-06, - "loss": 0.6395, + "epoch": 0.7931010207673355, + "grad_norm": 2.082277536392212, + "learning_rate": 1.513841777368287e-06, + "loss": 0.6188, "step": 11266 }, { - "epoch": 0.8525594945329348, - "grad_norm": 1.938444972038269, - "learning_rate": 1.0483552224364936e-06, - "loss": 0.639, + "epoch": 0.7931714185146075, + "grad_norm": 1.8408116102218628, + "learning_rate": 1.512850589756437e-06, + "loss": 0.574, "step": 11267 }, { - "epoch": 0.8526351632552684, - "grad_norm": 2.6357712745666504, - "learning_rate": 1.047300531082092e-06, - "loss": 0.7034, + "epoch": 0.7932418162618796, + "grad_norm": 2.1696865558624268, + "learning_rate": 1.511859687426457e-06, + "loss": 0.6132, "step": 11268 }, { - "epoch": 0.8527108319776021, - "grad_norm": 1.8678675889968872, - "learning_rate": 1.0462463377514543e-06, - "loss": 0.7342, + "epoch": 0.7933122140091518, + "grad_norm": 1.9473010301589966, + "learning_rate": 1.510869070429859e-06, + "loss": 0.6288, "step": 11269 }, { - "epoch": 0.8527865006999357, - "grad_norm": 2.1463828086853027, - "learning_rate": 1.045192642510596e-06, - "loss": 0.6495, + "epoch": 0.7933826117564238, + "grad_norm": 1.9700613021850586, + "learning_rate": 1.5098787388181526e-06, + "loss": 0.622, "step": 11270 }, { - "epoch": 0.8528621694222693, - "grad_norm": 1.9937474727630615, - "learning_rate": 1.0441394454255035e-06, - "loss": 0.6389, + "epoch": 0.7934530095036959, + "grad_norm": 1.846717357635498, + "learning_rate": 1.5088886926428215e-06, + "loss": 0.5384, "step": 11271 }, { - "epoch": 0.8529378381446029, - "grad_norm": 2.2576024532318115, - "learning_rate": 1.0430867465621328e-06, - "loss": 0.676, + "epoch": 0.7935234072509679, + "grad_norm": 2.233133316040039, + "learning_rate": 1.507898931955341e-06, + "loss": 0.6516, "step": 11272 }, { - "epoch": 0.8530135068669366, - "grad_norm": 2.1842191219329834, - "learning_rate": 1.0420345459864023e-06, - "loss": 0.5946, + "epoch": 0.7935938049982401, + "grad_norm": 2.0047366619110107, + "learning_rate": 1.506909456807167e-06, + "loss": 0.6135, "step": 11273 }, { - "epoch": 0.8530891755892702, - "grad_norm": 1.927615761756897, - "learning_rate": 1.0409828437642092e-06, - "loss": 0.6436, + "epoch": 0.7936642027455122, + "grad_norm": 2.2033889293670654, + "learning_rate": 1.505920267249747e-06, + "loss": 0.6758, "step": 11274 }, { - "epoch": 0.8531648443116038, - "grad_norm": 3.0947635173797607, - "learning_rate": 1.039931639961416e-06, - "loss": 0.5957, + "epoch": 0.7937346004927842, + "grad_norm": 1.5893237590789795, + "learning_rate": 1.5049313633345066e-06, + "loss": 0.5681, "step": 11275 }, { - "epoch": 0.8532405130339374, - "grad_norm": 2.468949556350708, - "learning_rate": 1.0388809346438467e-06, - "loss": 0.618, + "epoch": 0.7938049982400563, + "grad_norm": 1.7532336711883545, + "learning_rate": 1.5039427451128625e-06, + "loss": 0.6724, "step": 11276 }, { - "epoch": 0.853316181756271, - "grad_norm": 3.7624311447143555, - "learning_rate": 1.037830727877303e-06, - "loss": 0.6508, + "epoch": 0.7938753959873284, + "grad_norm": 1.8150497674942017, + "learning_rate": 1.5029544126362127e-06, + "loss": 0.6727, "step": 11277 }, { - "epoch": 0.8533918504786047, - "grad_norm": 1.588759183883667, - "learning_rate": 1.036781019727552e-06, - "loss": 0.6209, + "epoch": 0.7939457937346005, + "grad_norm": 2.1115150451660156, + "learning_rate": 1.5019663659559413e-06, + "loss": 0.6579, "step": 11278 }, { - "epoch": 0.8534675192009383, - "grad_norm": 2.215994119644165, - "learning_rate": 1.0357318102603293e-06, - "loss": 0.6864, + "epoch": 0.7940161914818726, + "grad_norm": 2.001028060913086, + "learning_rate": 1.500978605123416e-06, + "loss": 0.6479, "step": 11279 }, { - "epoch": 0.8535431879232719, - "grad_norm": 2.4442219734191895, - "learning_rate": 1.0346830995413405e-06, - "loss": 0.5362, + "epoch": 0.7940865892291447, + "grad_norm": 1.964374303817749, + "learning_rate": 1.4999911301899957e-06, + "loss": 0.5868, "step": 11280 }, { - "epoch": 0.8536188566456056, - "grad_norm": 1.9089670181274414, - "learning_rate": 1.0336348876362587e-06, - "loss": 0.6868, + "epoch": 0.7941569869764168, + "grad_norm": 1.8014540672302246, + "learning_rate": 1.499003941207015e-06, + "loss": 0.7072, "step": 11281 }, { - "epoch": 0.8536945253679392, - "grad_norm": 2.9302616119384766, - "learning_rate": 1.0325871746107266e-06, - "loss": 0.8656, + "epoch": 0.7942273847236888, + "grad_norm": 2.18180251121521, + "learning_rate": 1.498017038225804e-06, + "loss": 0.7253, "step": 11282 }, { - "epoch": 0.8537701940902728, - "grad_norm": 2.2180144786834717, - "learning_rate": 1.0315399605303558e-06, - "loss": 0.5997, + "epoch": 0.794297782470961, + "grad_norm": 2.2905514240264893, + "learning_rate": 1.4970304212976691e-06, + "loss": 0.7653, "step": 11283 }, { - "epoch": 0.8538458628126064, - "grad_norm": 2.2152187824249268, - "learning_rate": 1.0304932454607254e-06, - "loss": 0.675, + "epoch": 0.794368180218233, + "grad_norm": 2.056652545928955, + "learning_rate": 1.4960440904739073e-06, + "loss": 0.67, "step": 11284 }, { - "epoch": 0.85392153153494, - "grad_norm": 1.842807412147522, - "learning_rate": 1.0294470294673846e-06, - "loss": 0.6631, + "epoch": 0.7944385779655051, + "grad_norm": 2.067701578140259, + "learning_rate": 1.4950580458057954e-06, + "loss": 0.5904, "step": 11285 }, { - "epoch": 0.8539972002572737, - "grad_norm": 2.476421356201172, - "learning_rate": 1.0284013126158527e-06, - "loss": 0.5843, + "epoch": 0.7945089757127772, + "grad_norm": 2.0860769748687744, + "learning_rate": 1.4940722873446039e-06, + "loss": 0.6808, "step": 11286 }, { - "epoch": 0.8540728689796073, - "grad_norm": 2.07633113861084, - "learning_rate": 1.0273560949716123e-06, - "loss": 0.6895, + "epoch": 0.7945793734600493, + "grad_norm": 1.8879669904708862, + "learning_rate": 1.4930868151415776e-06, + "loss": 0.6585, "step": 11287 }, { - "epoch": 0.8541485377019409, - "grad_norm": 2.113013744354248, - "learning_rate": 1.026311376600117e-06, - "loss": 0.7096, + "epoch": 0.7946497712073214, + "grad_norm": 1.8322242498397827, + "learning_rate": 1.4921016292479576e-06, + "loss": 0.6423, "step": 11288 }, { - "epoch": 0.8542242064242745, - "grad_norm": 1.9778804779052734, - "learning_rate": 1.0252671575667984e-06, - "loss": 0.5172, + "epoch": 0.7947201689545934, + "grad_norm": 2.2098991870880127, + "learning_rate": 1.4911167297149625e-06, + "loss": 0.6443, "step": 11289 }, { - "epoch": 0.8542998751466081, - "grad_norm": 2.0087730884552, - "learning_rate": 1.0242234379370402e-06, - "loss": 0.5877, + "epoch": 0.7947905667018655, + "grad_norm": 1.9363467693328857, + "learning_rate": 1.4901321165937959e-06, + "loss": 0.6227, "step": 11290 }, { - "epoch": 0.8543755438689418, - "grad_norm": 2.063204288482666, - "learning_rate": 1.0231802177762084e-06, - "loss": 0.6326, + "epoch": 0.7948609644491377, + "grad_norm": 2.6937050819396973, + "learning_rate": 1.489147789935652e-06, + "loss": 0.7298, "step": 11291 }, { - "epoch": 0.8544512125912754, - "grad_norm": 2.7657787799835205, - "learning_rate": 1.0221374971496316e-06, - "loss": 0.7622, + "epoch": 0.7949313621964097, + "grad_norm": 3.3898191452026367, + "learning_rate": 1.4881637497917036e-06, + "loss": 0.6581, "step": 11292 }, { - "epoch": 0.854526881313609, - "grad_norm": 1.7157849073410034, - "learning_rate": 1.0210952761226075e-06, - "loss": 0.7045, + "epoch": 0.7950017599436818, + "grad_norm": 1.8404685258865356, + "learning_rate": 1.4871799962131156e-06, + "loss": 0.6658, "step": 11293 }, { - "epoch": 0.8546025500359427, - "grad_norm": 2.532585620880127, - "learning_rate": 1.020053554760405e-06, - "loss": 0.5766, + "epoch": 0.7950721576909539, + "grad_norm": 1.7160937786102295, + "learning_rate": 1.4861965292510325e-06, + "loss": 0.7059, "step": 11294 }, { - "epoch": 0.8546782187582763, - "grad_norm": 2.225200891494751, - "learning_rate": 1.0190123331282586e-06, - "loss": 0.6826, + "epoch": 0.795142555438226, + "grad_norm": 1.6603354215621948, + "learning_rate": 1.4852133489565858e-06, + "loss": 0.6896, "step": 11295 }, { - "epoch": 0.8547538874806099, - "grad_norm": 2.5582962036132812, - "learning_rate": 1.0179716112913737e-06, - "loss": 0.6516, + "epoch": 0.7952129531854981, + "grad_norm": 1.7605094909667969, + "learning_rate": 1.484230455380889e-06, + "loss": 0.7083, "step": 11296 }, { - "epoch": 0.8548295562029435, - "grad_norm": 1.9359956979751587, - "learning_rate": 1.0169313893149256e-06, - "loss": 0.6565, + "epoch": 0.7952833509327701, + "grad_norm": 1.9338182210922241, + "learning_rate": 1.483247848575048e-06, + "loss": 0.7174, "step": 11297 }, { - "epoch": 0.8549052249252771, - "grad_norm": 1.8306528329849243, - "learning_rate": 1.0158916672640524e-06, - "loss": 0.7271, + "epoch": 0.7953537486800423, + "grad_norm": 1.693452000617981, + "learning_rate": 1.4822655285901461e-06, + "loss": 0.6671, "step": 11298 }, { - "epoch": 0.8549808936476108, - "grad_norm": 2.077057123184204, - "learning_rate": 1.0148524452038643e-06, - "loss": 0.6396, + "epoch": 0.7954241464273143, + "grad_norm": 2.4138917922973633, + "learning_rate": 1.4812834954772589e-06, + "loss": 0.7405, "step": 11299 }, { - "epoch": 0.8550565623699444, - "grad_norm": 2.082470655441284, - "learning_rate": 1.0138137231994477e-06, - "loss": 0.7199, + "epoch": 0.7954945441745864, + "grad_norm": 2.0573744773864746, + "learning_rate": 1.4803017492874395e-06, + "loss": 0.6611, "step": 11300 }, { - "epoch": 0.855132231092278, - "grad_norm": 1.7077547311782837, - "learning_rate": 1.0127755013158432e-06, - "loss": 0.679, + "epoch": 0.7955649419218584, + "grad_norm": 1.7144583463668823, + "learning_rate": 1.479320290071732e-06, + "loss": 0.5746, "step": 11301 }, { - "epoch": 0.8552078998146116, - "grad_norm": 2.193718671798706, - "learning_rate": 1.0117377796180712e-06, - "loss": 0.7218, + "epoch": 0.7956353396691306, + "grad_norm": 1.9715425968170166, + "learning_rate": 1.4783391178811606e-06, + "loss": 0.6052, "step": 11302 }, { - "epoch": 0.8552835685369452, - "grad_norm": 2.146713972091675, - "learning_rate": 1.010700558171117e-06, - "loss": 0.8642, + "epoch": 0.7957057374164027, + "grad_norm": 2.55731463432312, + "learning_rate": 1.4773582327667407e-06, + "loss": 0.6859, "step": 11303 }, { - "epoch": 0.8553592372592789, - "grad_norm": 2.3448047637939453, - "learning_rate": 1.0096638370399347e-06, - "loss": 0.6397, + "epoch": 0.7957761351636747, + "grad_norm": 2.941145658493042, + "learning_rate": 1.4763776347794663e-06, + "loss": 0.7211, "step": 11304 }, { - "epoch": 0.8554349059816125, - "grad_norm": 2.3762691020965576, - "learning_rate": 1.0086276162894462e-06, - "loss": 0.5452, + "epoch": 0.7958465329109469, + "grad_norm": 2.024477005004883, + "learning_rate": 1.4753973239703228e-06, + "loss": 0.6032, "step": 11305 }, { - "epoch": 0.8555105747039461, - "grad_norm": 2.4727251529693604, - "learning_rate": 1.0075918959845437e-06, - "loss": 0.7184, + "epoch": 0.7959169306582189, + "grad_norm": 1.8886548280715942, + "learning_rate": 1.474417300390276e-06, + "loss": 0.6182, "step": 11306 }, { - "epoch": 0.8555862434262798, - "grad_norm": 2.2382912635803223, - "learning_rate": 1.0065566761900882e-06, - "loss": 0.5858, + "epoch": 0.795987328405491, + "grad_norm": 2.3313779830932617, + "learning_rate": 1.4734375640902779e-06, + "loss": 0.6112, "step": 11307 }, { - "epoch": 0.8556619121486134, - "grad_norm": 2.0968942642211914, - "learning_rate": 1.005521956970907e-06, - "loss": 0.6225, + "epoch": 0.7960577261527632, + "grad_norm": 1.6120902299880981, + "learning_rate": 1.472458115121264e-06, + "loss": 0.7178, "step": 11308 }, { - "epoch": 0.855737580870947, - "grad_norm": 2.303739309310913, - "learning_rate": 1.0044877383917962e-06, - "loss": 0.7995, + "epoch": 0.7961281239000352, + "grad_norm": 1.7141472101211548, + "learning_rate": 1.4714789535341606e-06, + "loss": 0.5853, "step": 11309 }, { - "epoch": 0.8558132495932806, - "grad_norm": 2.176666498184204, - "learning_rate": 1.003454020517525e-06, - "loss": 0.6295, + "epoch": 0.7961985216473073, + "grad_norm": 2.1929807662963867, + "learning_rate": 1.4705000793798713e-06, + "loss": 0.7112, "step": 11310 }, { - "epoch": 0.8558889183156142, - "grad_norm": 2.0503792762756348, - "learning_rate": 1.0024208034128285e-06, - "loss": 0.599, + "epoch": 0.7962689193945793, + "grad_norm": 2.6587483882904053, + "learning_rate": 1.469521492709293e-06, + "loss": 0.6662, "step": 11311 }, { - "epoch": 0.8559645870379479, - "grad_norm": 2.041576385498047, - "learning_rate": 1.0013880871424082e-06, - "loss": 0.6261, + "epoch": 0.7963393171418515, + "grad_norm": 1.86634361743927, + "learning_rate": 1.4685431935733008e-06, + "loss": 0.6219, "step": 11312 }, { - "epoch": 0.8560402557602815, - "grad_norm": 2.608793258666992, - "learning_rate": 1.0003558717709352e-06, - "loss": 0.5635, + "epoch": 0.7964097148891236, + "grad_norm": 1.9872443675994873, + "learning_rate": 1.4675651820227577e-06, + "loss": 0.6355, "step": 11313 }, { - "epoch": 0.8561159244826151, - "grad_norm": 2.738176107406616, - "learning_rate": 9.99324157363053e-07, - "loss": 0.7301, + "epoch": 0.7964801126363956, + "grad_norm": 2.2503440380096436, + "learning_rate": 1.4665874581085093e-06, + "loss": 0.67, "step": 11314 }, { - "epoch": 0.8561915932049488, - "grad_norm": 2.0771257877349854, - "learning_rate": 9.982929439833684e-07, - "loss": 0.6853, + "epoch": 0.7965505103836678, + "grad_norm": 2.2943084239959717, + "learning_rate": 1.4656100218813922e-06, + "loss": 0.6219, "step": 11315 }, { - "epoch": 0.8562672619272823, - "grad_norm": 2.2196590900421143, - "learning_rate": 9.972622316964602e-07, - "loss": 0.5945, + "epoch": 0.7966209081309398, + "grad_norm": 1.858384609222412, + "learning_rate": 1.4646328733922206e-06, + "loss": 0.7125, "step": 11316 }, { - "epoch": 0.856342930649616, - "grad_norm": 2.066857099533081, - "learning_rate": 9.962320205668747e-07, - "loss": 0.6929, + "epoch": 0.7966913058782119, + "grad_norm": 1.856398582458496, + "learning_rate": 1.4636560126918006e-06, + "loss": 0.688, "step": 11317 }, { - "epoch": 0.8564185993719496, - "grad_norm": 3.5094408988952637, - "learning_rate": 9.9520231065913e-07, - "loss": 0.7241, + "epoch": 0.7967617036254839, + "grad_norm": 1.9442437887191772, + "learning_rate": 1.4626794398309186e-06, + "loss": 0.7171, "step": 11318 }, { - "epoch": 0.8564942680942832, - "grad_norm": 2.093404531478882, - "learning_rate": 9.94173102037703e-07, - "loss": 0.6483, + "epoch": 0.7968321013727561, + "grad_norm": 2.2677114009857178, + "learning_rate": 1.4617031548603472e-06, + "loss": 0.7344, "step": 11319 }, { - "epoch": 0.8565699368166169, - "grad_norm": 2.078911304473877, - "learning_rate": 9.931443947670527e-07, - "loss": 0.6671, + "epoch": 0.7969024991200282, + "grad_norm": 1.9007998704910278, + "learning_rate": 1.460727157830843e-06, + "loss": 0.6621, "step": 11320 }, { - "epoch": 0.8566456055389505, - "grad_norm": 1.9345077276229858, - "learning_rate": 9.92116188911599e-07, - "loss": 0.665, + "epoch": 0.7969728968673002, + "grad_norm": 1.7070995569229126, + "learning_rate": 1.4597514487931522e-06, + "loss": 0.6978, "step": 11321 }, { - "epoch": 0.8567212742612841, - "grad_norm": 1.7748900651931763, - "learning_rate": 9.91088484535729e-07, - "loss": 0.6294, + "epoch": 0.7970432946145724, + "grad_norm": 1.9699820280075073, + "learning_rate": 1.4587760277979996e-06, + "loss": 0.6516, "step": 11322 }, { - "epoch": 0.8567969429836177, - "grad_norm": 2.2130126953125, - "learning_rate": 9.900612817038015e-07, - "loss": 0.637, + "epoch": 0.7971136923618444, + "grad_norm": 2.1600401401519775, + "learning_rate": 1.457800894896101e-06, + "loss": 0.711, "step": 11323 }, { - "epoch": 0.8568726117059513, - "grad_norm": 2.3196053504943848, - "learning_rate": 9.890345804801428e-07, - "loss": 0.5467, + "epoch": 0.7971840901091165, + "grad_norm": 2.270263195037842, + "learning_rate": 1.456826050138154e-06, + "loss": 0.6221, "step": 11324 }, { - "epoch": 0.856948280428285, - "grad_norm": 3.2533650398254395, - "learning_rate": 9.880083809290526e-07, - "loss": 0.6086, + "epoch": 0.7972544878563886, + "grad_norm": 1.6153262853622437, + "learning_rate": 1.4558514935748402e-06, + "loss": 0.6801, "step": 11325 }, { - "epoch": 0.8570239491506186, - "grad_norm": 2.1937711238861084, - "learning_rate": 9.869826831147895e-07, - "loss": 0.5794, + "epoch": 0.7973248856036607, + "grad_norm": 2.5926828384399414, + "learning_rate": 1.4548772252568262e-06, + "loss": 0.7235, "step": 11326 }, { - "epoch": 0.8570996178729522, - "grad_norm": 2.005005121231079, - "learning_rate": 9.85957487101588e-07, - "loss": 0.5671, + "epoch": 0.7973952833509328, + "grad_norm": 1.8965530395507812, + "learning_rate": 1.4539032452347702e-06, + "loss": 0.5666, "step": 11327 }, { - "epoch": 0.8571752865952859, - "grad_norm": 2.504263401031494, - "learning_rate": 9.84932792953649e-07, - "loss": 0.6214, + "epoch": 0.7974656810982048, + "grad_norm": 2.019597291946411, + "learning_rate": 1.4529295535593048e-06, + "loss": 0.7345, "step": 11328 }, { - "epoch": 0.8572509553176194, - "grad_norm": 2.071230888366699, - "learning_rate": 9.83908600735142e-07, - "loss": 0.6633, + "epoch": 0.797536078845477, + "grad_norm": 2.17825984954834, + "learning_rate": 1.451956150281057e-06, + "loss": 0.6205, "step": 11329 }, { - "epoch": 0.8573266240399531, - "grad_norm": 2.571350574493408, - "learning_rate": 9.828849105102067e-07, - "loss": 0.6164, + "epoch": 0.7976064765927491, + "grad_norm": 1.9837154150009155, + "learning_rate": 1.4509830354506342e-06, + "loss": 0.6207, "step": 11330 }, { - "epoch": 0.8574022927622867, - "grad_norm": 2.430696725845337, - "learning_rate": 9.81861722342948e-07, - "loss": 0.6413, + "epoch": 0.7976768743400211, + "grad_norm": 1.8884116411209106, + "learning_rate": 1.4500102091186288e-06, + "loss": 0.6254, "step": 11331 }, { - "epoch": 0.8574779614846203, - "grad_norm": 3.106407403945923, - "learning_rate": 9.80839036297444e-07, - "loss": 0.5968, + "epoch": 0.7977472720872932, + "grad_norm": 1.7636656761169434, + "learning_rate": 1.449037671335617e-06, + "loss": 0.6887, "step": 11332 }, { - "epoch": 0.857553630206954, - "grad_norm": 2.656306505203247, - "learning_rate": 9.798168524377353e-07, - "loss": 0.621, + "epoch": 0.7978176698345653, + "grad_norm": 1.9112602472305298, + "learning_rate": 1.4480654221521657e-06, + "loss": 0.7742, "step": 11333 }, { - "epoch": 0.8576292989292876, - "grad_norm": 2.564502000808716, - "learning_rate": 9.787951708278334e-07, - "loss": 0.8049, + "epoch": 0.7978880675818374, + "grad_norm": 1.7022463083267212, + "learning_rate": 1.4470934616188192e-06, + "loss": 0.6427, "step": 11334 }, { - "epoch": 0.8577049676516212, - "grad_norm": 2.267310619354248, - "learning_rate": 9.77773991531726e-07, - "loss": 0.724, + "epoch": 0.7979584653291095, + "grad_norm": 1.7339926958084106, + "learning_rate": 1.4461217897861154e-06, + "loss": 0.6755, "step": 11335 }, { - "epoch": 0.8577806363739549, - "grad_norm": 2.382852554321289, - "learning_rate": 9.767533146133558e-07, - "loss": 0.6997, + "epoch": 0.7980288630763815, + "grad_norm": 1.850535273551941, + "learning_rate": 1.445150406704567e-06, + "loss": 0.6828, "step": 11336 }, { - "epoch": 0.8578563050962884, - "grad_norm": 3.9764015674591064, - "learning_rate": 9.757331401366431e-07, - "loss": 0.5756, + "epoch": 0.7980992608236537, + "grad_norm": 1.9330639839172363, + "learning_rate": 1.4441793124246837e-06, + "loss": 0.6695, "step": 11337 }, { - "epoch": 0.8579319738186221, - "grad_norm": 2.423640727996826, - "learning_rate": 9.747134681654754e-07, - "loss": 0.6611, + "epoch": 0.7981696585709257, + "grad_norm": 1.6631863117218018, + "learning_rate": 1.4432085069969457e-06, + "loss": 0.5305, "step": 11338 }, { - "epoch": 0.8580076425409557, - "grad_norm": 1.80607008934021, - "learning_rate": 9.736942987637069e-07, - "loss": 0.7413, + "epoch": 0.7982400563181978, + "grad_norm": 1.7349246740341187, + "learning_rate": 1.44223799047183e-06, + "loss": 0.599, "step": 11339 }, { - "epoch": 0.8580833112632893, - "grad_norm": 2.182513952255249, - "learning_rate": 9.726756319951625e-07, - "loss": 0.81, + "epoch": 0.7983104540654699, + "grad_norm": 1.9773101806640625, + "learning_rate": 1.4412677628997968e-06, + "loss": 0.6924, "step": 11340 }, { - "epoch": 0.858158979985623, - "grad_norm": 2.507810592651367, - "learning_rate": 9.716574679236322e-07, - "loss": 0.6897, + "epoch": 0.798380851812742, + "grad_norm": 2.0680389404296875, + "learning_rate": 1.440297824331284e-06, + "loss": 0.7041, "step": 11341 }, { - "epoch": 0.8582346487079565, - "grad_norm": 2.9260153770446777, - "learning_rate": 9.706398066128786e-07, - "loss": 0.6551, + "epoch": 0.7984512495600141, + "grad_norm": 2.40694260597229, + "learning_rate": 1.4393281748167255e-06, + "loss": 0.6501, "step": 11342 }, { - "epoch": 0.8583103174302902, - "grad_norm": 1.9764418601989746, - "learning_rate": 9.696226481266323e-07, - "loss": 0.7505, + "epoch": 0.7985216473072861, + "grad_norm": 1.8158074617385864, + "learning_rate": 1.4383588144065305e-06, + "loss": 0.6469, "step": 11343 }, { - "epoch": 0.8583859861526238, - "grad_norm": 2.561077356338501, - "learning_rate": 9.68605992528588e-07, - "loss": 0.6391, + "epoch": 0.7985920450545583, + "grad_norm": 1.8681696653366089, + "learning_rate": 1.4373897431510983e-06, + "loss": 0.6285, "step": 11344 }, { - "epoch": 0.8584616548749574, - "grad_norm": 2.1961679458618164, - "learning_rate": 9.675898398824107e-07, - "loss": 0.7866, + "epoch": 0.7986624428018303, + "grad_norm": 2.0735113620758057, + "learning_rate": 1.4364209611008084e-06, + "loss": 0.7115, "step": 11345 }, { - "epoch": 0.8585373235972911, - "grad_norm": 4.30953311920166, - "learning_rate": 9.66574190251741e-07, - "loss": 0.7338, + "epoch": 0.7987328405491024, + "grad_norm": 1.7642958164215088, + "learning_rate": 1.435452468306034e-06, + "loss": 0.5954, "step": 11346 }, { - "epoch": 0.8586129923196247, - "grad_norm": 5.359286308288574, - "learning_rate": 9.655590437001774e-07, - "loss": 0.6028, + "epoch": 0.7988032382963746, + "grad_norm": 2.7819740772247314, + "learning_rate": 1.4344842648171237e-06, + "loss": 0.6064, "step": 11347 }, { - "epoch": 0.8586886610419583, - "grad_norm": 2.5870842933654785, - "learning_rate": 9.645444002912923e-07, - "loss": 0.6086, + "epoch": 0.7988736360436466, + "grad_norm": 2.157402515411377, + "learning_rate": 1.433516350684418e-06, + "loss": 0.6896, "step": 11348 }, { - "epoch": 0.858764329764292, - "grad_norm": 2.1643459796905518, - "learning_rate": 9.63530260088627e-07, - "loss": 0.6189, + "epoch": 0.7989440337909187, + "grad_norm": 1.5065827369689941, + "learning_rate": 1.432548725958239e-06, + "loss": 0.5946, "step": 11349 }, { - "epoch": 0.8588399984866255, - "grad_norm": 2.0352864265441895, - "learning_rate": 9.625166231556905e-07, - "loss": 0.6887, + "epoch": 0.7990144315381907, + "grad_norm": 1.9106087684631348, + "learning_rate": 1.4315813906888934e-06, + "loss": 0.6005, "step": 11350 }, { - "epoch": 0.8589156672089592, - "grad_norm": 2.4447758197784424, - "learning_rate": 9.615034895559582e-07, - "loss": 0.821, + "epoch": 0.7990848292854629, + "grad_norm": 1.9756278991699219, + "learning_rate": 1.4306143449266732e-06, + "loss": 0.6078, "step": 11351 }, { - "epoch": 0.8589913359312928, - "grad_norm": 1.9808082580566406, - "learning_rate": 9.604908593528783e-07, - "loss": 0.8302, + "epoch": 0.799155227032735, + "grad_norm": 1.8290400505065918, + "learning_rate": 1.429647588721859e-06, + "loss": 0.5706, "step": 11352 }, { - "epoch": 0.8590670046536264, - "grad_norm": 2.222776174545288, - "learning_rate": 9.594787326098629e-07, - "loss": 0.6017, + "epoch": 0.799225624780007, + "grad_norm": 1.7628275156021118, + "learning_rate": 1.4286811221247086e-06, + "loss": 0.6824, "step": 11353 }, { - "epoch": 0.8591426733759601, - "grad_norm": 2.0380992889404297, - "learning_rate": 9.584671093902976e-07, - "loss": 0.6517, + "epoch": 0.7992960225272792, + "grad_norm": 1.9750852584838867, + "learning_rate": 1.4277149451854752e-06, + "loss": 0.5995, "step": 11354 }, { - "epoch": 0.8592183420982936, - "grad_norm": 2.0588114261627197, - "learning_rate": 9.574559897575285e-07, - "loss": 0.768, + "epoch": 0.7993664202745512, + "grad_norm": 2.7035372257232666, + "learning_rate": 1.4267490579543882e-06, + "loss": 0.6676, "step": 11355 }, { - "epoch": 0.8592940108206273, - "grad_norm": 2.0195462703704834, - "learning_rate": 9.564453737748789e-07, - "loss": 0.5548, + "epoch": 0.7994368180218233, + "grad_norm": 2.0212388038635254, + "learning_rate": 1.425783460481665e-06, + "loss": 0.6436, "step": 11356 }, { - "epoch": 0.859369679542961, - "grad_norm": 2.724370002746582, - "learning_rate": 9.554352615056375e-07, - "loss": 0.7525, + "epoch": 0.7995072157690953, + "grad_norm": 2.0253102779388428, + "learning_rate": 1.4248181528175057e-06, + "loss": 0.6634, "step": 11357 }, { - "epoch": 0.8594453482652945, - "grad_norm": 4.9696855545043945, - "learning_rate": 9.544256530130582e-07, - "loss": 0.5823, + "epoch": 0.7995776135163675, + "grad_norm": 1.765637993812561, + "learning_rate": 1.4238531350121017e-06, + "loss": 0.6474, "step": 11358 }, { - "epoch": 0.8595210169876282, - "grad_norm": 7.338983058929443, - "learning_rate": 9.534165483603669e-07, - "loss": 0.6556, + "epoch": 0.7996480112636396, + "grad_norm": 2.072031021118164, + "learning_rate": 1.4228884071156225e-06, + "loss": 0.677, "step": 11359 }, { - "epoch": 0.8595966857099618, - "grad_norm": 2.392385244369507, - "learning_rate": 9.524079476107569e-07, - "loss": 0.6524, + "epoch": 0.7997184090109116, + "grad_norm": 1.739799976348877, + "learning_rate": 1.4219239691782269e-06, + "loss": 0.6015, "step": 11360 }, { - "epoch": 0.8596723544322954, - "grad_norm": 2.5599746704101562, - "learning_rate": 9.513998508273906e-07, - "loss": 0.6916, + "epoch": 0.7997888067581838, + "grad_norm": 1.9842106103897095, + "learning_rate": 1.4209598212500563e-06, + "loss": 0.6658, "step": 11361 }, { - "epoch": 0.8597480231546291, - "grad_norm": 2.130995750427246, - "learning_rate": 9.503922580733985e-07, - "loss": 0.6197, + "epoch": 0.7998592045054558, + "grad_norm": 1.7701550722122192, + "learning_rate": 1.4199959633812366e-06, + "loss": 0.5757, "step": 11362 }, { - "epoch": 0.8598236918769626, - "grad_norm": 1.7371002435684204, - "learning_rate": 9.493851694118781e-07, - "loss": 0.8717, + "epoch": 0.7999296022527279, + "grad_norm": 1.9236843585968018, + "learning_rate": 1.4190323956218795e-06, + "loss": 0.5912, "step": 11363 }, { - "epoch": 0.8598993605992963, - "grad_norm": 2.7299606800079346, - "learning_rate": 9.483785849058991e-07, - "loss": 0.7653, + "epoch": 0.8, + "grad_norm": 2.118994951248169, + "learning_rate": 1.4180691180220839e-06, + "loss": 0.5779, "step": 11364 }, { - "epoch": 0.85997502932163, - "grad_norm": 2.7302086353302, - "learning_rate": 9.47372504618491e-07, - "loss": 0.6537, + "epoch": 0.8000703977472721, + "grad_norm": 2.1276960372924805, + "learning_rate": 1.4171061306319276e-06, + "loss": 0.5404, "step": 11365 }, { - "epoch": 0.8600506980439635, - "grad_norm": 4.3424153327941895, - "learning_rate": 9.463669286126653e-07, - "loss": 0.6663, + "epoch": 0.8001407954945442, + "grad_norm": 2.2069308757781982, + "learning_rate": 1.4161434335014827e-06, + "loss": 0.6361, "step": 11366 }, { - "epoch": 0.8601263667662972, - "grad_norm": 2.4264373779296875, - "learning_rate": 9.453618569513898e-07, - "loss": 0.7241, + "epoch": 0.8002111932418162, + "grad_norm": 2.143888235092163, + "learning_rate": 1.4151810266807975e-06, + "loss": 0.6441, "step": 11367 }, { - "epoch": 0.8602020354886307, - "grad_norm": 2.623892307281494, - "learning_rate": 9.443572896976091e-07, - "loss": 0.6928, + "epoch": 0.8002815909890884, + "grad_norm": 2.2175118923187256, + "learning_rate": 1.4142189102199085e-06, + "loss": 0.6236, "step": 11368 }, { - "epoch": 0.8602777042109644, - "grad_norm": 2.2946858406066895, - "learning_rate": 9.433532269142278e-07, - "loss": 0.6384, + "epoch": 0.8003519887363605, + "grad_norm": 1.8238818645477295, + "learning_rate": 1.4132570841688347e-06, + "loss": 0.6647, "step": 11369 }, { - "epoch": 0.860353372933298, - "grad_norm": 2.276153326034546, - "learning_rate": 9.423496686641248e-07, - "loss": 0.7294, + "epoch": 0.8004223864836325, + "grad_norm": 1.8080753087997437, + "learning_rate": 1.412295548577587e-06, + "loss": 0.7154, "step": 11370 }, { - "epoch": 0.8604290416556316, - "grad_norm": 1.7452298402786255, - "learning_rate": 9.413466150101505e-07, - "loss": 0.7299, + "epoch": 0.8004927842309046, + "grad_norm": 1.720168113708496, + "learning_rate": 1.4113343034961527e-06, + "loss": 0.4807, "step": 11371 }, { - "epoch": 0.8605047103779653, - "grad_norm": 3.4095396995544434, - "learning_rate": 9.403440660151139e-07, - "loss": 0.641, + "epoch": 0.8005631819781767, + "grad_norm": 2.1643388271331787, + "learning_rate": 1.410373348974511e-06, + "loss": 0.668, "step": 11372 }, { - "epoch": 0.8605803791002989, - "grad_norm": 3.2750163078308105, - "learning_rate": 9.393420217417997e-07, - "loss": 0.7713, + "epoch": 0.8006335797254488, + "grad_norm": 2.1322219371795654, + "learning_rate": 1.4094126850626215e-06, + "loss": 0.6701, "step": 11373 }, { - "epoch": 0.8606560478226325, - "grad_norm": 2.141160726547241, - "learning_rate": 9.383404822529598e-07, - "loss": 0.6203, + "epoch": 0.8007039774727208, + "grad_norm": 1.7646738290786743, + "learning_rate": 1.4084523118104291e-06, + "loss": 0.6101, "step": 11374 }, { - "epoch": 0.8607317165449662, - "grad_norm": 2.0613651275634766, - "learning_rate": 9.373394476113149e-07, - "loss": 0.5827, + "epoch": 0.800774375219993, + "grad_norm": 1.930837631225586, + "learning_rate": 1.4074922292678635e-06, + "loss": 0.61, "step": 11375 }, { - "epoch": 0.8608073852672997, - "grad_norm": 2.971005916595459, - "learning_rate": 9.363389178795488e-07, - "loss": 0.8215, + "epoch": 0.8008447729672651, + "grad_norm": 1.583247423171997, + "learning_rate": 1.4065324374848432e-06, + "loss": 0.6192, "step": 11376 }, { - "epoch": 0.8608830539896334, - "grad_norm": 2.2407732009887695, - "learning_rate": 9.353388931203216e-07, - "loss": 0.6456, + "epoch": 0.8009151707145371, + "grad_norm": 1.9839202165603638, + "learning_rate": 1.4055729365112661e-06, + "loss": 0.5203, "step": 11377 }, { - "epoch": 0.860958722711967, - "grad_norm": 2.820349931716919, - "learning_rate": 9.343393733962601e-07, - "loss": 0.7552, + "epoch": 0.8009855684618092, + "grad_norm": 1.9675312042236328, + "learning_rate": 1.4046137263970197e-06, + "loss": 0.6194, "step": 11378 }, { - "epoch": 0.8610343914343006, - "grad_norm": 2.415344476699829, - "learning_rate": 9.333403587699511e-07, - "loss": 0.712, + "epoch": 0.8010559662090813, + "grad_norm": 1.701028823852539, + "learning_rate": 1.403654807191974e-06, + "loss": 0.8156, "step": 11379 }, { - "epoch": 0.8611100601566343, - "grad_norm": 2.1280593872070312, - "learning_rate": 9.323418493039609e-07, - "loss": 0.518, + "epoch": 0.8011263639563534, + "grad_norm": 2.733729839324951, + "learning_rate": 1.4026961789459823e-06, + "loss": 0.5672, "step": 11380 }, { - "epoch": 0.8611857288789678, - "grad_norm": 2.12552547454834, - "learning_rate": 9.31343845060818e-07, - "loss": 0.6181, + "epoch": 0.8011967617036255, + "grad_norm": 1.739900827407837, + "learning_rate": 1.4017378417088844e-06, + "loss": 0.7126, "step": 11381 }, { - "epoch": 0.8612613976013015, - "grad_norm": 2.4220640659332275, - "learning_rate": 9.303463461030208e-07, - "loss": 0.632, + "epoch": 0.8012671594508975, + "grad_norm": 1.9974050521850586, + "learning_rate": 1.4007797955305078e-06, + "loss": 0.6567, "step": 11382 }, { - "epoch": 0.8613370663236352, - "grad_norm": 1.9439997673034668, - "learning_rate": 9.293493524930352e-07, - "loss": 0.511, + "epoch": 0.8013375571981697, + "grad_norm": 1.9759145975112915, + "learning_rate": 1.3998220404606587e-06, + "loss": 0.6065, "step": 11383 }, { - "epoch": 0.8614127350459687, - "grad_norm": 2.1692566871643066, - "learning_rate": 9.283528642932972e-07, - "loss": 0.7734, + "epoch": 0.8014079549454417, + "grad_norm": 2.022333860397339, + "learning_rate": 1.3988645765491352e-06, + "loss": 0.6012, "step": 11384 }, { - "epoch": 0.8614884037683024, - "grad_norm": 2.1930227279663086, - "learning_rate": 9.27356881566209e-07, - "loss": 0.512, + "epoch": 0.8014783526927138, + "grad_norm": 1.8751697540283203, + "learning_rate": 1.397907403845715e-06, + "loss": 0.5995, "step": 11385 }, { - "epoch": 0.861564072490636, - "grad_norm": 2.2688231468200684, - "learning_rate": 9.263614043741437e-07, - "loss": 0.7379, + "epoch": 0.801548750439986, + "grad_norm": 1.9361951351165771, + "learning_rate": 1.3969505224001627e-06, + "loss": 0.6946, "step": 11386 }, { - "epoch": 0.8616397412129696, - "grad_norm": 1.992141842842102, - "learning_rate": 9.253664327794402e-07, - "loss": 0.7816, + "epoch": 0.801619148187258, + "grad_norm": 1.8390111923217773, + "learning_rate": 1.3959939322622247e-06, + "loss": 0.7618, "step": 11387 }, { - "epoch": 0.8617154099353033, - "grad_norm": 1.8397136926651, - "learning_rate": 9.243719668444079e-07, - "loss": 0.6154, + "epoch": 0.8016895459345301, + "grad_norm": 1.883154034614563, + "learning_rate": 1.39503763348164e-06, + "loss": 0.5858, "step": 11388 }, { - "epoch": 0.8617910786576368, - "grad_norm": 2.1748602390289307, - "learning_rate": 9.23378006631324e-07, - "loss": 0.7455, + "epoch": 0.8017599436818021, + "grad_norm": 1.5163835287094116, + "learning_rate": 1.394081626108123e-06, + "loss": 0.6163, "step": 11389 }, { - "epoch": 0.8618667473799705, - "grad_norm": 2.877929925918579, - "learning_rate": 9.223845522024305e-07, - "loss": 0.7016, + "epoch": 0.8018303414290743, + "grad_norm": 1.955304741859436, + "learning_rate": 1.3931259101913808e-06, + "loss": 0.6804, "step": 11390 }, { - "epoch": 0.8619424161023042, - "grad_norm": 2.6606006622314453, - "learning_rate": 9.213916036199409e-07, - "loss": 0.7219, + "epoch": 0.8019007391763464, + "grad_norm": 1.8714109659194946, + "learning_rate": 1.3921704857811007e-06, + "loss": 0.6727, "step": 11391 }, { - "epoch": 0.8620180848246377, - "grad_norm": 2.1790380477905273, - "learning_rate": 9.203991609460422e-07, - "loss": 0.6841, + "epoch": 0.8019711369236184, + "grad_norm": 2.1342484951019287, + "learning_rate": 1.391215352926954e-06, + "loss": 0.6799, "step": 11392 }, { - "epoch": 0.8620937535469714, - "grad_norm": 2.446531295776367, - "learning_rate": 9.19407224242879e-07, - "loss": 0.63, + "epoch": 0.8020415346708906, + "grad_norm": 1.720576524734497, + "learning_rate": 1.3902605116786024e-06, + "loss": 0.6918, "step": 11393 }, { - "epoch": 0.8621694222693049, - "grad_norm": 2.1365866661071777, - "learning_rate": 9.184157935725702e-07, - "loss": 0.7192, + "epoch": 0.8021119324181626, + "grad_norm": 1.9192559719085693, + "learning_rate": 1.3893059620856854e-06, + "loss": 0.7152, "step": 11394 }, { - "epoch": 0.8622450909916386, - "grad_norm": 3.078911304473877, - "learning_rate": 9.174248689972045e-07, - "loss": 0.7079, + "epoch": 0.8021823301654347, + "grad_norm": 2.001103401184082, + "learning_rate": 1.388351704197835e-06, + "loss": 0.6401, "step": 11395 }, { - "epoch": 0.8623207597139723, - "grad_norm": 1.73885977268219, - "learning_rate": 9.164344505788351e-07, - "loss": 0.7938, + "epoch": 0.8022527279127067, + "grad_norm": 2.000474452972412, + "learning_rate": 1.3873977380646613e-06, + "loss": 0.6468, "step": 11396 }, { - "epoch": 0.8623964284363058, - "grad_norm": 2.480379581451416, - "learning_rate": 9.154445383794863e-07, - "loss": 0.5902, + "epoch": 0.8023231256599789, + "grad_norm": 1.9348132610321045, + "learning_rate": 1.386444063735762e-06, + "loss": 0.6129, "step": 11397 }, { - "epoch": 0.8624720971586395, - "grad_norm": 2.1541407108306885, - "learning_rate": 9.144551324611486e-07, - "loss": 0.6969, + "epoch": 0.802393523407251, + "grad_norm": 1.903549313545227, + "learning_rate": 1.3854906812607185e-06, + "loss": 0.646, "step": 11398 }, { - "epoch": 0.8625477658809731, - "grad_norm": 2.4464011192321777, - "learning_rate": 9.134662328857826e-07, - "loss": 0.6242, + "epoch": 0.802463921154523, + "grad_norm": 1.839813232421875, + "learning_rate": 1.384537590689101e-06, + "loss": 0.5642, "step": 11399 }, { - "epoch": 0.8626234346033067, - "grad_norm": 2.167466640472412, - "learning_rate": 9.124778397153175e-07, - "loss": 0.6878, + "epoch": 0.8025343189017952, + "grad_norm": 1.812215805053711, + "learning_rate": 1.383584792070458e-06, + "loss": 0.5978, "step": 11400 }, { - "epoch": 0.8626991033256404, - "grad_norm": 2.1103248596191406, - "learning_rate": 9.114899530116459e-07, - "loss": 0.6499, + "epoch": 0.8026047166490672, + "grad_norm": 1.801242709159851, + "learning_rate": 1.3826322854543299e-06, + "loss": 0.5847, "step": 11401 }, { - "epoch": 0.8627747720479739, - "grad_norm": 1.9201300144195557, - "learning_rate": 9.105025728366354e-07, - "loss": 0.6265, + "epoch": 0.8026751143963393, + "grad_norm": 1.7318964004516602, + "learning_rate": 1.381680070890236e-06, + "loss": 0.7605, "step": 11402 }, { - "epoch": 0.8628504407703076, - "grad_norm": 2.0732924938201904, - "learning_rate": 9.095156992521204e-07, - "loss": 0.6943, + "epoch": 0.8027455121436115, + "grad_norm": 2.2511987686157227, + "learning_rate": 1.3807281484276847e-06, + "loss": 0.6879, "step": 11403 }, { - "epoch": 0.8629261094926413, - "grad_norm": 2.583040952682495, - "learning_rate": 9.085293323198995e-07, - "loss": 0.6831, + "epoch": 0.8028159098908835, + "grad_norm": 1.6148810386657715, + "learning_rate": 1.3797765181161632e-06, + "loss": 0.7123, "step": 11404 }, { - "epoch": 0.8630017782149748, - "grad_norm": 2.722351312637329, - "learning_rate": 9.075434721017414e-07, - "loss": 0.6832, + "epoch": 0.8028863076381556, + "grad_norm": 1.875288963317871, + "learning_rate": 1.3788251800051532e-06, + "loss": 0.6539, "step": 11405 }, { - "epoch": 0.8630774469373085, - "grad_norm": 2.765360116958618, - "learning_rate": 9.065581186593855e-07, - "loss": 0.6247, + "epoch": 0.8029567053854276, + "grad_norm": 1.9759695529937744, + "learning_rate": 1.3778741341441107e-06, + "loss": 0.7427, "step": 11406 }, { - "epoch": 0.863153115659642, - "grad_norm": 2.3917152881622314, - "learning_rate": 9.055732720545377e-07, - "loss": 0.647, + "epoch": 0.8030271031326998, + "grad_norm": 2.0365777015686035, + "learning_rate": 1.3769233805824855e-06, + "loss": 0.6286, "step": 11407 }, { - "epoch": 0.8632287843819757, - "grad_norm": 2.091081380844116, - "learning_rate": 9.045889323488724e-07, - "loss": 0.5273, + "epoch": 0.8030975008799719, + "grad_norm": 2.034219264984131, + "learning_rate": 1.375972919369706e-06, + "loss": 0.7596, "step": 11408 }, { - "epoch": 0.8633044531043094, - "grad_norm": 2.154362201690674, - "learning_rate": 9.036050996040325e-07, - "loss": 0.5733, + "epoch": 0.8031678986272439, + "grad_norm": 1.7592711448669434, + "learning_rate": 1.3750227505551885e-06, + "loss": 0.576, "step": 11409 }, { - "epoch": 0.8633801218266429, - "grad_norm": 1.648404836654663, - "learning_rate": 9.026217738816286e-07, - "loss": 0.4534, + "epoch": 0.803238296374516, + "grad_norm": 1.8435781002044678, + "learning_rate": 1.37407287418833e-06, + "loss": 0.543, "step": 11410 }, { - "epoch": 0.8634557905489766, - "grad_norm": 2.2857422828674316, - "learning_rate": 9.016389552432365e-07, - "loss": 0.6723, + "epoch": 0.8033086941217881, + "grad_norm": 1.8639764785766602, + "learning_rate": 1.37312329031852e-06, + "loss": 0.7287, "step": 11411 }, { - "epoch": 0.8635314592713103, - "grad_norm": 2.5683135986328125, - "learning_rate": 9.006566437504079e-07, - "loss": 0.6025, + "epoch": 0.8033790918690602, + "grad_norm": 1.983073115348816, + "learning_rate": 1.3721739989951245e-06, + "loss": 0.7526, "step": 11412 }, { - "epoch": 0.8636071279936438, - "grad_norm": 5.81996488571167, - "learning_rate": 8.996748394646567e-07, - "loss": 0.7606, + "epoch": 0.8034494896163322, + "grad_norm": 2.3264198303222656, + "learning_rate": 1.3712250002675014e-06, + "loss": 0.6072, "step": 11413 }, { - "epoch": 0.8636827967159775, - "grad_norm": 2.0632705688476562, - "learning_rate": 8.986935424474686e-07, - "loss": 0.4855, + "epoch": 0.8035198873636044, + "grad_norm": 1.6777223348617554, + "learning_rate": 1.3702762941849876e-06, + "loss": 0.6871, "step": 11414 }, { - "epoch": 0.863758465438311, - "grad_norm": 2.0599944591522217, - "learning_rate": 8.977127527602925e-07, - "loss": 0.6972, + "epoch": 0.8035902851108765, + "grad_norm": 2.6157119274139404, + "learning_rate": 1.3693278807969086e-06, + "loss": 0.7756, "step": 11415 }, { - "epoch": 0.8638341341606447, - "grad_norm": 2.6218225955963135, - "learning_rate": 8.967324704645483e-07, - "loss": 0.7039, + "epoch": 0.8036606828581485, + "grad_norm": 2.08610200881958, + "learning_rate": 1.368379760152571e-06, + "loss": 0.565, "step": 11416 }, { - "epoch": 0.8639098028829784, - "grad_norm": 2.5727875232696533, - "learning_rate": 8.957526956216269e-07, - "loss": 0.7001, + "epoch": 0.8037310806054206, + "grad_norm": 2.056652784347534, + "learning_rate": 1.367431932301271e-06, + "loss": 0.5624, "step": 11417 }, { - "epoch": 0.8639854716053119, - "grad_norm": 2.259443998336792, - "learning_rate": 8.947734282928841e-07, - "loss": 0.6443, + "epoch": 0.8038014783526927, + "grad_norm": 1.7646756172180176, + "learning_rate": 1.3664843972922842e-06, + "loss": 0.5946, "step": 11418 }, { - "epoch": 0.8640611403276456, - "grad_norm": 2.0039772987365723, - "learning_rate": 8.93794668539645e-07, - "loss": 0.6373, + "epoch": 0.8038718760999648, + "grad_norm": 2.1260688304901123, + "learning_rate": 1.3655371551748777e-06, + "loss": 0.616, "step": 11419 }, { - "epoch": 0.8641368090499791, - "grad_norm": 3.764800548553467, - "learning_rate": 8.928164164232015e-07, - "loss": 0.5713, + "epoch": 0.8039422738472369, + "grad_norm": 1.9007340669631958, + "learning_rate": 1.3645902059982978e-06, + "loss": 0.6272, "step": 11420 }, { - "epoch": 0.8642124777723128, - "grad_norm": 2.341893196105957, - "learning_rate": 8.918386720048185e-07, - "loss": 0.6677, + "epoch": 0.804012671594509, + "grad_norm": 1.8246557712554932, + "learning_rate": 1.3636435498117757e-06, + "loss": 0.651, "step": 11421 }, { - "epoch": 0.8642881464946465, - "grad_norm": 2.431959867477417, - "learning_rate": 8.908614353457182e-07, - "loss": 0.7105, + "epoch": 0.8040830693417811, + "grad_norm": 1.8492770195007324, + "learning_rate": 1.362697186664529e-06, + "loss": 0.5674, "step": 11422 }, { - "epoch": 0.86436381521698, - "grad_norm": 2.6695075035095215, - "learning_rate": 8.898847065071055e-07, - "loss": 0.7028, + "epoch": 0.8041534670890531, + "grad_norm": 1.6899116039276123, + "learning_rate": 1.3617511166057628e-06, + "loss": 0.5827, "step": 11423 }, { - "epoch": 0.8644394839393137, - "grad_norm": 2.1305665969848633, - "learning_rate": 8.889084855501456e-07, - "loss": 0.6387, + "epoch": 0.8042238648363252, + "grad_norm": 2.3094027042388916, + "learning_rate": 1.3608053396846607e-06, + "loss": 0.6563, "step": 11424 }, { - "epoch": 0.8645151526616474, - "grad_norm": 2.250269889831543, - "learning_rate": 8.879327725359684e-07, - "loss": 0.6831, + "epoch": 0.8042942625835974, + "grad_norm": 1.8328922986984253, + "learning_rate": 1.359859855950397e-06, + "loss": 0.5963, "step": 11425 }, { - "epoch": 0.8645908213839809, - "grad_norm": 2.085334300994873, - "learning_rate": 8.869575675256794e-07, - "loss": 0.8023, + "epoch": 0.8043646603308694, + "grad_norm": 1.6510944366455078, + "learning_rate": 1.3589146654521286e-06, + "loss": 0.6017, "step": 11426 }, { - "epoch": 0.8646664901063146, - "grad_norm": 2.079393148422241, - "learning_rate": 8.859828705803463e-07, - "loss": 0.5462, + "epoch": 0.8044350580781415, + "grad_norm": 2.0927212238311768, + "learning_rate": 1.357969768238995e-06, + "loss": 0.6628, "step": 11427 }, { - "epoch": 0.8647421588286481, - "grad_norm": 2.8435254096984863, - "learning_rate": 8.850086817610126e-07, - "loss": 0.6934, + "epoch": 0.8045054558254136, + "grad_norm": 1.7044569253921509, + "learning_rate": 1.3570251643601215e-06, + "loss": 0.6275, "step": 11428 }, { - "epoch": 0.8648178275509818, - "grad_norm": 2.8293187618255615, - "learning_rate": 8.840350011286813e-07, - "loss": 0.6062, + "epoch": 0.8045758535726857, + "grad_norm": 2.4702868461608887, + "learning_rate": 1.3560808538646215e-06, + "loss": 0.6939, "step": 11429 }, { - "epoch": 0.8648934962733155, - "grad_norm": 1.95986008644104, - "learning_rate": 8.830618287443277e-07, - "loss": 0.6402, + "epoch": 0.8046462513199577, + "grad_norm": 2.383164405822754, + "learning_rate": 1.355136836801588e-06, + "loss": 0.6439, "step": 11430 }, { - "epoch": 0.864969164995649, - "grad_norm": 1.9640858173370361, - "learning_rate": 8.820891646688961e-07, - "loss": 0.7183, + "epoch": 0.8047166490672298, + "grad_norm": 1.8779642581939697, + "learning_rate": 1.3541931132201038e-06, + "loss": 0.656, "step": 11431 }, { - "epoch": 0.8650448337179827, - "grad_norm": 2.166637659072876, - "learning_rate": 8.811170089632977e-07, - "loss": 0.7106, + "epoch": 0.804787046814502, + "grad_norm": 1.9936634302139282, + "learning_rate": 1.3532496831692333e-06, + "loss": 0.623, "step": 11432 }, { - "epoch": 0.8651205024403162, - "grad_norm": 2.3195998668670654, - "learning_rate": 8.801453616884119e-07, - "loss": 0.605, + "epoch": 0.804857444561774, + "grad_norm": 2.147031307220459, + "learning_rate": 1.352306546698026e-06, + "loss": 0.5383, "step": 11433 }, { - "epoch": 0.8651961711626499, - "grad_norm": 2.1643240451812744, - "learning_rate": 8.791742229050869e-07, - "loss": 0.5808, + "epoch": 0.8049278423090461, + "grad_norm": 1.7955427169799805, + "learning_rate": 1.3513637038555143e-06, + "loss": 0.5768, "step": 11434 }, { - "epoch": 0.8652718398849836, - "grad_norm": 2.442021131515503, - "learning_rate": 8.782035926741381e-07, - "loss": 0.6294, + "epoch": 0.8049982400563181, + "grad_norm": 2.253312110900879, + "learning_rate": 1.350421154690721e-06, + "loss": 0.6595, "step": 11435 }, { - "epoch": 0.8653475086073171, - "grad_norm": 2.4725868701934814, - "learning_rate": 8.772334710563489e-07, - "loss": 0.5829, + "epoch": 0.8050686378035903, + "grad_norm": 2.3037917613983154, + "learning_rate": 1.349478899252646e-06, + "loss": 0.6808, "step": 11436 }, { - "epoch": 0.8654231773296508, - "grad_norm": 2.438480854034424, - "learning_rate": 8.762638581124707e-07, - "loss": 0.6427, + "epoch": 0.8051390355508624, + "grad_norm": 2.014744758605957, + "learning_rate": 1.3485369375902834e-06, + "loss": 0.5962, "step": 11437 }, { - "epoch": 0.8654988460519845, - "grad_norm": 2.3041131496429443, - "learning_rate": 8.752947539032268e-07, - "loss": 0.564, + "epoch": 0.8052094332981344, + "grad_norm": 2.178278923034668, + "learning_rate": 1.3475952697526024e-06, + "loss": 0.7544, "step": 11438 }, { - "epoch": 0.865574514774318, - "grad_norm": 1.981211543083191, - "learning_rate": 8.74326158489304e-07, - "loss": 0.7893, + "epoch": 0.8052798310454066, + "grad_norm": 1.9399526119232178, + "learning_rate": 1.3466538957885639e-06, + "loss": 0.6271, "step": 11439 }, { - "epoch": 0.8656501834966517, - "grad_norm": 4.369067668914795, - "learning_rate": 8.733580719313574e-07, - "loss": 0.6974, + "epoch": 0.8053502287926786, + "grad_norm": 2.0367653369903564, + "learning_rate": 1.3457128157471067e-06, + "loss": 0.6563, "step": 11440 }, { - "epoch": 0.8657258522189852, - "grad_norm": 2.5769243240356445, - "learning_rate": 8.723904942900137e-07, - "loss": 0.5311, + "epoch": 0.8054206265399507, + "grad_norm": 1.9427365064620972, + "learning_rate": 1.3447720296771608e-06, + "loss": 0.5219, "step": 11441 }, { - "epoch": 0.8658015209413189, - "grad_norm": 2.2570548057556152, - "learning_rate": 8.714234256258654e-07, - "loss": 0.6526, + "epoch": 0.8054910242872229, + "grad_norm": 2.148848295211792, + "learning_rate": 1.3438315376276405e-06, + "loss": 0.5478, "step": 11442 }, { - "epoch": 0.8658771896636526, - "grad_norm": 1.7594521045684814, - "learning_rate": 8.704568659994721e-07, - "loss": 0.6366, + "epoch": 0.8055614220344949, + "grad_norm": 2.268916368484497, + "learning_rate": 1.3428913396474403e-06, + "loss": 0.6939, "step": 11443 }, { - "epoch": 0.8659528583859861, - "grad_norm": 1.9956740140914917, - "learning_rate": 8.694908154713652e-07, - "loss": 0.6423, + "epoch": 0.805631819781767, + "grad_norm": 1.8224856853485107, + "learning_rate": 1.3419514357854434e-06, + "loss": 0.6956, "step": 11444 }, { - "epoch": 0.8660285271083198, - "grad_norm": 2.2370998859405518, - "learning_rate": 8.685252741020405e-07, - "loss": 0.6358, + "epoch": 0.805702217529039, + "grad_norm": 2.1273529529571533, + "learning_rate": 1.3410118260905161e-06, + "loss": 0.6538, "step": 11445 }, { - "epoch": 0.8661041958306533, - "grad_norm": 2.284075975418091, - "learning_rate": 8.675602419519646e-07, - "loss": 0.6383, + "epoch": 0.8057726152763112, + "grad_norm": 1.4929358959197998, + "learning_rate": 1.3400725106115092e-06, + "loss": 0.7019, "step": 11446 }, { - "epoch": 0.866179864552987, - "grad_norm": 2.2030980587005615, - "learning_rate": 8.665957190815671e-07, - "loss": 0.5712, + "epoch": 0.8058430130235833, + "grad_norm": 1.9759856462478638, + "learning_rate": 1.3391334893972564e-06, + "loss": 0.5999, "step": 11447 }, { - "epoch": 0.8662555332753207, - "grad_norm": 2.6359593868255615, - "learning_rate": 8.656317055512537e-07, - "loss": 0.6499, + "epoch": 0.8059134107708553, + "grad_norm": 1.8308696746826172, + "learning_rate": 1.3381947624965824e-06, + "loss": 0.5907, "step": 11448 }, { - "epoch": 0.8663312019976542, - "grad_norm": 14.77769660949707, - "learning_rate": 8.646682014213941e-07, - "loss": 0.8428, + "epoch": 0.8059838085181275, + "grad_norm": 2.0598270893096924, + "learning_rate": 1.337256329958288e-06, + "loss": 0.6294, "step": 11449 }, { - "epoch": 0.8664068707199879, - "grad_norm": 2.7199504375457764, - "learning_rate": 8.637052067523231e-07, - "loss": 0.6286, + "epoch": 0.8060542062653995, + "grad_norm": 2.1272287368774414, + "learning_rate": 1.3363181918311676e-06, + "loss": 0.6817, "step": 11450 }, { - "epoch": 0.8664825394423216, - "grad_norm": 1.821079134941101, - "learning_rate": 8.627427216043474e-07, - "loss": 0.578, + "epoch": 0.8061246040126716, + "grad_norm": 2.1811113357543945, + "learning_rate": 1.3353803481639934e-06, + "loss": 0.7422, "step": 11451 }, { - "epoch": 0.8665582081646551, - "grad_norm": 2.893192768096924, - "learning_rate": 8.617807460377428e-07, - "loss": 0.7833, + "epoch": 0.8061950017599436, + "grad_norm": 1.839769959449768, + "learning_rate": 1.3344427990055256e-06, + "loss": 0.6125, "step": 11452 }, { - "epoch": 0.8666338768869888, - "grad_norm": 2.2512125968933105, - "learning_rate": 8.608192801127491e-07, - "loss": 0.7754, + "epoch": 0.8062653995072158, + "grad_norm": 1.9620177745819092, + "learning_rate": 1.3335055444045053e-06, + "loss": 0.6828, "step": 11453 }, { - "epoch": 0.8667095456093223, - "grad_norm": 1.8546165227890015, - "learning_rate": 8.598583238895782e-07, - "loss": 0.457, + "epoch": 0.8063357972544879, + "grad_norm": 2.1109323501586914, + "learning_rate": 1.3325685844096661e-06, + "loss": 0.5473, "step": 11454 }, { - "epoch": 0.866785214331656, - "grad_norm": 2.225909471511841, - "learning_rate": 8.588978774284069e-07, - "loss": 0.6653, + "epoch": 0.8064061950017599, + "grad_norm": 2.0842626094818115, + "learning_rate": 1.3316319190697163e-06, + "loss": 0.7059, "step": 11455 }, { - "epoch": 0.8668608830539897, - "grad_norm": 2.2338666915893555, - "learning_rate": 8.57937940789382e-07, - "loss": 0.6326, + "epoch": 0.806476592749032, + "grad_norm": 1.6074466705322266, + "learning_rate": 1.330695548433359e-06, + "loss": 0.6453, "step": 11456 }, { - "epoch": 0.8669365517763232, - "grad_norm": 2.0527563095092773, - "learning_rate": 8.569785140326197e-07, - "loss": 0.8369, + "epoch": 0.8065469904963041, + "grad_norm": 1.6488196849822998, + "learning_rate": 1.3297594725492747e-06, + "loss": 0.6104, "step": 11457 }, { - "epoch": 0.8670122204986569, - "grad_norm": 3.0348801612854004, - "learning_rate": 8.560195972181965e-07, - "loss": 0.705, + "epoch": 0.8066173882435762, + "grad_norm": 2.2520194053649902, + "learning_rate": 1.3288236914661304e-06, + "loss": 0.5992, "step": 11458 }, { - "epoch": 0.8670878892209904, - "grad_norm": 2.616718292236328, - "learning_rate": 8.550611904061698e-07, - "loss": 0.7044, + "epoch": 0.8066877859908483, + "grad_norm": 1.990192174911499, + "learning_rate": 1.3278882052325765e-06, + "loss": 0.7122, "step": 11459 }, { - "epoch": 0.8671635579433241, - "grad_norm": 2.5653347969055176, - "learning_rate": 8.541032936565564e-07, - "loss": 0.7478, + "epoch": 0.8067581837381204, + "grad_norm": 1.795461893081665, + "learning_rate": 1.3269530138972543e-06, + "loss": 0.7402, "step": 11460 }, { - "epoch": 0.8672392266656578, - "grad_norm": 2.5321779251098633, - "learning_rate": 8.531459070293403e-07, - "loss": 0.5518, + "epoch": 0.8068285814853925, + "grad_norm": 1.9962430000305176, + "learning_rate": 1.3260181175087806e-06, + "loss": 0.6179, "step": 11461 }, { - "epoch": 0.8673148953879913, - "grad_norm": 2.7409920692443848, - "learning_rate": 8.521890305844775e-07, - "loss": 0.6368, + "epoch": 0.8068989792326645, + "grad_norm": 1.5068672895431519, + "learning_rate": 1.3250835161157646e-06, + "loss": 0.5987, "step": 11462 }, { - "epoch": 0.867390564110325, - "grad_norm": 2.6878130435943604, - "learning_rate": 8.512326643818912e-07, - "loss": 0.6114, + "epoch": 0.8069693769799366, + "grad_norm": 1.779559850692749, + "learning_rate": 1.324149209766797e-06, + "loss": 0.6367, "step": 11463 }, { - "epoch": 0.8674662328326587, - "grad_norm": 2.02453351020813, - "learning_rate": 8.502768084814714e-07, - "loss": 0.6421, + "epoch": 0.8070397747272088, + "grad_norm": 2.0289785861968994, + "learning_rate": 1.3232151985104519e-06, + "loss": 0.5705, "step": 11464 }, { - "epoch": 0.8675419015549922, - "grad_norm": 2.029902696609497, - "learning_rate": 8.493214629430773e-07, - "loss": 0.6868, + "epoch": 0.8071101724744808, + "grad_norm": 1.816357970237732, + "learning_rate": 1.3222814823952884e-06, + "loss": 0.6196, "step": 11465 }, { - "epoch": 0.8676175702773259, - "grad_norm": 2.0235416889190674, - "learning_rate": 8.483666278265348e-07, - "loss": 0.6933, + "epoch": 0.8071805702217529, + "grad_norm": 1.790949821472168, + "learning_rate": 1.3213480614698542e-06, + "loss": 0.6506, "step": 11466 }, { - "epoch": 0.8676932389996594, - "grad_norm": 1.4413191080093384, - "learning_rate": 8.474123031916425e-07, - "loss": 0.7743, + "epoch": 0.807250967969025, + "grad_norm": 4.385681629180908, + "learning_rate": 1.3204149357826756e-06, + "loss": 0.7263, "step": 11467 }, { - "epoch": 0.8677689077219931, - "grad_norm": 2.967958688735962, - "learning_rate": 8.464584890981572e-07, - "loss": 0.7331, + "epoch": 0.8073213657162971, + "grad_norm": 1.7642340660095215, + "learning_rate": 1.3194821053822702e-06, + "loss": 0.6713, "step": 11468 }, { - "epoch": 0.8678445764443268, - "grad_norm": 1.6408123970031738, - "learning_rate": 8.455051856058142e-07, - "loss": 0.6527, + "epoch": 0.8073917634635691, + "grad_norm": 1.6287602186203003, + "learning_rate": 1.3185495703171342e-06, + "loss": 0.7071, "step": 11469 }, { - "epoch": 0.8679202451666603, - "grad_norm": 2.9439444541931152, - "learning_rate": 8.44552392774311e-07, - "loss": 0.6358, + "epoch": 0.8074621612108412, + "grad_norm": 2.1964635848999023, + "learning_rate": 1.3176173306357514e-06, + "loss": 0.6343, "step": 11470 }, { - "epoch": 0.867995913888994, - "grad_norm": 2.3100738525390625, - "learning_rate": 8.436001106633165e-07, - "loss": 0.7387, + "epoch": 0.8075325589581134, + "grad_norm": 1.7177718877792358, + "learning_rate": 1.3166853863865892e-06, + "loss": 0.6168, "step": 11471 }, { - "epoch": 0.8680715826113276, - "grad_norm": 2.5663743019104004, - "learning_rate": 8.426483393324633e-07, - "loss": 0.658, + "epoch": 0.8076029567053854, + "grad_norm": 1.8409374952316284, + "learning_rate": 1.3157537376181015e-06, + "loss": 0.6411, "step": 11472 }, { - "epoch": 0.8681472513336612, - "grad_norm": 3.1118669509887695, - "learning_rate": 8.416970788413527e-07, - "loss": 0.7758, + "epoch": 0.8076733544526575, + "grad_norm": 1.9458372592926025, + "learning_rate": 1.3148223843787237e-06, + "loss": 0.5529, "step": 11473 }, { - "epoch": 0.8682229200559949, - "grad_norm": 2.1538186073303223, - "learning_rate": 8.407463292495617e-07, - "loss": 0.6391, + "epoch": 0.8077437521999296, + "grad_norm": 2.0199294090270996, + "learning_rate": 1.31389132671688e-06, + "loss": 0.6097, "step": 11474 }, { - "epoch": 0.8682985887783284, - "grad_norm": 2.083836078643799, - "learning_rate": 8.39796090616625e-07, - "loss": 0.6682, + "epoch": 0.8078141499472017, + "grad_norm": 1.6371279954910278, + "learning_rate": 1.3129605646809769e-06, + "loss": 0.5084, "step": 11475 }, { - "epoch": 0.8683742575006621, - "grad_norm": 2.3815724849700928, - "learning_rate": 8.38846363002049e-07, - "loss": 0.4637, + "epoch": 0.8078845476944738, + "grad_norm": 1.8041237592697144, + "learning_rate": 1.3120300983194039e-06, + "loss": 0.6503, "step": 11476 }, { - "epoch": 0.8684499262229958, - "grad_norm": 1.9574769735336304, - "learning_rate": 8.378971464653112e-07, - "loss": 0.7101, + "epoch": 0.8079549454417458, + "grad_norm": 1.6346466541290283, + "learning_rate": 1.3110999276805354e-06, + "loss": 0.6256, "step": 11477 }, { - "epoch": 0.8685255949453293, - "grad_norm": 2.0645639896392822, - "learning_rate": 8.369484410658537e-07, - "loss": 0.5948, + "epoch": 0.808025343189018, + "grad_norm": 1.9633437395095825, + "learning_rate": 1.310170052812736e-06, + "loss": 0.656, "step": 11478 }, { - "epoch": 0.868601263667663, - "grad_norm": 2.0333096981048584, - "learning_rate": 8.360002468630862e-07, - "loss": 0.6257, + "epoch": 0.80809574093629, + "grad_norm": 2.046372652053833, + "learning_rate": 1.309240473764347e-06, + "loss": 0.6121, "step": 11479 }, { - "epoch": 0.8686769323899965, - "grad_norm": 2.2880470752716064, - "learning_rate": 8.350525639163892e-07, - "loss": 0.7798, + "epoch": 0.8081661386835621, + "grad_norm": 1.907213807106018, + "learning_rate": 1.3083111905837014e-06, + "loss": 0.6584, "step": 11480 }, { - "epoch": 0.8687526011123302, - "grad_norm": 2.19171404838562, - "learning_rate": 8.341053922851111e-07, - "loss": 0.7149, + "epoch": 0.8082365364308343, + "grad_norm": 1.8404332399368286, + "learning_rate": 1.307382203319111e-06, + "loss": 0.7598, "step": 11481 }, { - "epoch": 0.8688282698346639, - "grad_norm": 2.084751605987549, - "learning_rate": 8.331587320285638e-07, - "loss": 0.5851, + "epoch": 0.8083069341781063, + "grad_norm": 1.9596598148345947, + "learning_rate": 1.3064535120188757e-06, + "loss": 0.7075, "step": 11482 }, { - "epoch": 0.8689039385569974, - "grad_norm": 2.067612886428833, - "learning_rate": 8.322125832060294e-07, - "loss": 0.6274, + "epoch": 0.8083773319253784, + "grad_norm": 2.156439781188965, + "learning_rate": 1.3055251167312775e-06, + "loss": 0.6135, "step": 11483 }, { - "epoch": 0.8689796072793311, - "grad_norm": 2.2991397380828857, - "learning_rate": 8.312669458767629e-07, - "loss": 0.5268, + "epoch": 0.8084477296726504, + "grad_norm": 2.032388210296631, + "learning_rate": 1.3045970175045871e-06, + "loss": 0.5828, "step": 11484 }, { - "epoch": 0.8690552760016648, - "grad_norm": 2.2263104915618896, - "learning_rate": 8.303218200999817e-07, - "loss": 0.642, + "epoch": 0.8085181274199226, + "grad_norm": 2.067199945449829, + "learning_rate": 1.3036692143870536e-06, + "loss": 0.679, "step": 11485 }, { - "epoch": 0.8691309447239983, - "grad_norm": 2.2175941467285156, - "learning_rate": 8.293772059348716e-07, - "loss": 0.5625, + "epoch": 0.8085885251671947, + "grad_norm": 2.123072624206543, + "learning_rate": 1.302741707426919e-06, + "loss": 0.5967, "step": 11486 }, { - "epoch": 0.869206613446332, - "grad_norm": 2.1039774417877197, - "learning_rate": 8.28433103440587e-07, - "loss": 0.6794, + "epoch": 0.8086589229144667, + "grad_norm": 1.8231598138809204, + "learning_rate": 1.3018144966724025e-06, + "loss": 0.7078, "step": 11487 }, { - "epoch": 0.8692822821686655, - "grad_norm": 2.4359757900238037, - "learning_rate": 8.27489512676252e-07, - "loss": 0.5836, + "epoch": 0.8087293206617389, + "grad_norm": 2.068263292312622, + "learning_rate": 1.3008875821717107e-06, + "loss": 0.6143, "step": 11488 }, { - "epoch": 0.8693579508909992, - "grad_norm": 2.079524278640747, - "learning_rate": 8.265464337009572e-07, - "loss": 0.6959, + "epoch": 0.8087997184090109, + "grad_norm": 1.8325581550598145, + "learning_rate": 1.2999609639730331e-06, + "loss": 0.5251, "step": 11489 }, { - "epoch": 0.8694336196133329, - "grad_norm": 2.333555221557617, - "learning_rate": 8.256038665737602e-07, - "loss": 0.766, + "epoch": 0.808870116156283, + "grad_norm": 1.9576624631881714, + "learning_rate": 1.2990346421245474e-06, + "loss": 0.6397, "step": 11490 }, { - "epoch": 0.8695092883356664, - "grad_norm": 2.577230215072632, - "learning_rate": 8.246618113536889e-07, - "loss": 0.6448, + "epoch": 0.808940513903555, + "grad_norm": 1.6430970430374146, + "learning_rate": 1.2981086166744153e-06, + "loss": 0.6618, "step": 11491 }, { - "epoch": 0.8695849570580001, - "grad_norm": 2.1624553203582764, - "learning_rate": 8.237202680997381e-07, - "loss": 0.6679, + "epoch": 0.8090109116508272, + "grad_norm": 1.6646099090576172, + "learning_rate": 1.2971828876707812e-06, + "loss": 0.6274, "step": 11492 }, { - "epoch": 0.8696606257803337, - "grad_norm": 2.065601110458374, - "learning_rate": 8.227792368708686e-07, - "loss": 0.5157, + "epoch": 0.8090813093980993, + "grad_norm": 2.3444411754608154, + "learning_rate": 1.2962574551617724e-06, + "loss": 0.7122, "step": 11493 }, { - "epoch": 0.8697362945026673, - "grad_norm": 2.3269245624542236, - "learning_rate": 8.218387177260094e-07, - "loss": 0.7201, + "epoch": 0.8091517071453713, + "grad_norm": 2.1876556873321533, + "learning_rate": 1.295332319195503e-06, + "loss": 0.6196, "step": 11494 }, { - "epoch": 0.869811963225001, - "grad_norm": 2.024136543273926, - "learning_rate": 8.208987107240642e-07, - "loss": 0.5877, + "epoch": 0.8092221048926435, + "grad_norm": 2.060347080230713, + "learning_rate": 1.2944074798200742e-06, + "loss": 0.576, "step": 11495 }, { - "epoch": 0.8698876319473345, - "grad_norm": 2.4356894493103027, - "learning_rate": 8.19959215923895e-07, - "loss": 0.6413, + "epoch": 0.8092925026399155, + "grad_norm": 2.1362125873565674, + "learning_rate": 1.2934829370835662e-06, + "loss": 0.6234, "step": 11496 }, { - "epoch": 0.8699633006696682, - "grad_norm": 2.5709826946258545, - "learning_rate": 8.190202333843368e-07, - "loss": 0.7845, + "epoch": 0.8093629003871876, + "grad_norm": 1.6987309455871582, + "learning_rate": 1.29255869103405e-06, + "loss": 0.595, "step": 11497 }, { - "epoch": 0.8700389693920019, - "grad_norm": 2.712878704071045, - "learning_rate": 8.180817631641923e-07, - "loss": 0.623, + "epoch": 0.8094332981344597, + "grad_norm": 1.5392735004425049, + "learning_rate": 1.2916347417195765e-06, + "loss": 0.5785, "step": 11498 }, { - "epoch": 0.8701146381143354, - "grad_norm": 3.3466601371765137, - "learning_rate": 8.171438053222318e-07, - "loss": 0.6294, + "epoch": 0.8095036958817318, + "grad_norm": 2.345473289489746, + "learning_rate": 1.2907110891881826e-06, + "loss": 0.6215, "step": 11499 }, { - "epoch": 0.8701903068366691, - "grad_norm": 2.0400924682617188, - "learning_rate": 8.162063599171923e-07, - "loss": 0.6766, + "epoch": 0.8095740936290039, + "grad_norm": 1.8447591066360474, + "learning_rate": 1.2897877334878876e-06, + "loss": 0.663, "step": 11500 }, { - "epoch": 0.8702659755590026, - "grad_norm": 2.169499397277832, - "learning_rate": 8.152694270077796e-07, - "loss": 0.6908, + "epoch": 0.8096444913762759, + "grad_norm": 1.8799978494644165, + "learning_rate": 1.2888646746667022e-06, + "loss": 0.6877, "step": 11501 }, { - "epoch": 0.8703416442813363, - "grad_norm": 2.0179340839385986, - "learning_rate": 8.143330066526689e-07, - "loss": 0.7238, + "epoch": 0.809714889123548, + "grad_norm": 1.9247381687164307, + "learning_rate": 1.2879419127726126e-06, + "loss": 0.6403, "step": 11502 }, { - "epoch": 0.87041731300367, - "grad_norm": 2.6831698417663574, - "learning_rate": 8.133970989105024e-07, - "loss": 0.6951, + "epoch": 0.8097852868708202, + "grad_norm": 3.595715284347534, + "learning_rate": 1.2870194478535981e-06, + "loss": 0.6744, "step": 11503 }, { - "epoch": 0.8704929817260035, - "grad_norm": 2.6158101558685303, - "learning_rate": 8.12461703839884e-07, - "loss": 0.561, + "epoch": 0.8098556846180922, + "grad_norm": 1.8586254119873047, + "learning_rate": 1.2860972799576172e-06, + "loss": 0.5837, "step": 11504 }, { - "epoch": 0.8705686504483372, - "grad_norm": 2.133500337600708, - "learning_rate": 8.115268214993981e-07, - "loss": 0.7176, + "epoch": 0.8099260823653643, + "grad_norm": 1.894490122795105, + "learning_rate": 1.2851754091326122e-06, + "loss": 0.7185, "step": 11505 }, { - "epoch": 0.8706443191706708, - "grad_norm": 2.4899628162384033, - "learning_rate": 8.105924519475886e-07, - "loss": 0.7863, + "epoch": 0.8099964801126364, + "grad_norm": 1.869681477546692, + "learning_rate": 1.284253835426512e-06, + "loss": 0.6021, "step": 11506 }, { - "epoch": 0.8707199878930044, - "grad_norm": 2.085965394973755, - "learning_rate": 8.096585952429668e-07, - "loss": 0.6698, + "epoch": 0.8100668778599085, + "grad_norm": 2.0047333240509033, + "learning_rate": 1.2833325588872333e-06, + "loss": 0.755, "step": 11507 }, { - "epoch": 0.8707956566153381, - "grad_norm": 2.165194272994995, - "learning_rate": 8.08725251444013e-07, - "loss": 0.7282, + "epoch": 0.8101372756071805, + "grad_norm": 2.0629642009735107, + "learning_rate": 1.2824115795626704e-06, + "loss": 0.5478, "step": 11508 }, { - "epoch": 0.8708713253376716, - "grad_norm": 2.1750943660736084, - "learning_rate": 8.077924206091794e-07, - "loss": 0.7459, + "epoch": 0.8102076733544527, + "grad_norm": 1.7991596460342407, + "learning_rate": 1.2814908975007094e-06, + "loss": 0.5634, "step": 11509 }, { - "epoch": 0.8709469940600053, - "grad_norm": 2.4543588161468506, - "learning_rate": 8.068601027968802e-07, - "loss": 0.7384, + "epoch": 0.8102780711017248, + "grad_norm": 1.75753915309906, + "learning_rate": 1.2805705127492153e-06, + "loss": 0.6498, "step": 11510 }, { - "epoch": 0.871022662782339, - "grad_norm": 2.087395668029785, - "learning_rate": 8.059282980655007e-07, - "loss": 0.5855, + "epoch": 0.8103484688489968, + "grad_norm": 1.738338828086853, + "learning_rate": 1.2796504253560406e-06, + "loss": 0.6032, "step": 11511 }, { - "epoch": 0.8710983315046725, - "grad_norm": 2.3441057205200195, - "learning_rate": 8.049970064733953e-07, - "loss": 0.7226, + "epoch": 0.8104188665962689, + "grad_norm": 1.752328872680664, + "learning_rate": 1.2787306353690188e-06, + "loss": 0.6024, "step": 11512 }, { - "epoch": 0.8711740002270062, - "grad_norm": 1.9657081365585327, - "learning_rate": 8.040662280788844e-07, - "loss": 0.631, + "epoch": 0.810489264343541, + "grad_norm": 1.9830378293991089, + "learning_rate": 1.277811142835975e-06, + "loss": 0.6042, "step": 11513 }, { - "epoch": 0.8712496689493398, - "grad_norm": 3.1376612186431885, - "learning_rate": 8.031359629402512e-07, - "loss": 0.7256, + "epoch": 0.8105596620908131, + "grad_norm": 2.1890339851379395, + "learning_rate": 1.2768919478047098e-06, + "loss": 0.603, "step": 11514 }, { - "epoch": 0.8713253376716734, - "grad_norm": 2.253770112991333, - "learning_rate": 8.022062111157583e-07, - "loss": 0.5113, + "epoch": 0.8106300598380852, + "grad_norm": 1.7706308364868164, + "learning_rate": 1.2759730503230177e-06, + "loss": 0.567, "step": 11515 }, { - "epoch": 0.8714010063940071, - "grad_norm": 2.1332924365997314, - "learning_rate": 8.01276972663627e-07, - "loss": 0.6469, + "epoch": 0.8107004575853572, + "grad_norm": 1.9652307033538818, + "learning_rate": 1.2750544504386703e-06, + "loss": 0.6365, "step": 11516 }, { - "epoch": 0.8714766751163406, - "grad_norm": 2.4022717475891113, - "learning_rate": 8.003482476420517e-07, - "loss": 0.6562, + "epoch": 0.8107708553326294, + "grad_norm": 1.6745883226394653, + "learning_rate": 1.274136148199427e-06, + "loss": 0.7114, "step": 11517 }, { - "epoch": 0.8715523438386743, - "grad_norm": 2.587195634841919, - "learning_rate": 7.99420036109188e-07, - "loss": 0.7556, + "epoch": 0.8108412530799014, + "grad_norm": 1.653090000152588, + "learning_rate": 1.2732181436530294e-06, + "loss": 0.7448, "step": 11518 }, { - "epoch": 0.8716280125610079, - "grad_norm": 2.437520742416382, - "learning_rate": 7.984923381231634e-07, - "loss": 0.8001, + "epoch": 0.8109116508271735, + "grad_norm": 1.911849856376648, + "learning_rate": 1.2723004368472087e-06, + "loss": 0.6709, "step": 11519 }, { - "epoch": 0.8717036812833415, - "grad_norm": 2.373514413833618, - "learning_rate": 7.975651537420793e-07, - "loss": 0.6267, + "epoch": 0.8109820485744457, + "grad_norm": 1.6923558712005615, + "learning_rate": 1.271383027829673e-06, + "loss": 0.6709, "step": 11520 }, { - "epoch": 0.8717793500056752, - "grad_norm": 1.9270378351211548, - "learning_rate": 7.966384830239933e-07, - "loss": 0.5706, + "epoch": 0.8110524463217177, + "grad_norm": 2.108186721801758, + "learning_rate": 1.2704659166481248e-06, + "loss": 0.6089, "step": 11521 }, { - "epoch": 0.8718550187280087, - "grad_norm": 2.1025631427764893, - "learning_rate": 7.957123260269391e-07, - "loss": 0.6488, + "epoch": 0.8111228440689898, + "grad_norm": 2.3524858951568604, + "learning_rate": 1.2695491033502416e-06, + "loss": 0.701, "step": 11522 }, { - "epoch": 0.8719306874503424, - "grad_norm": 2.036912441253662, - "learning_rate": 7.947866828089142e-07, - "loss": 0.6159, + "epoch": 0.8111932418162618, + "grad_norm": 2.10211181640625, + "learning_rate": 1.268632587983691e-06, + "loss": 0.6011, "step": 11523 }, { - "epoch": 0.8720063561726761, - "grad_norm": 2.3144853115081787, - "learning_rate": 7.938615534278862e-07, - "loss": 0.7151, + "epoch": 0.811263639563534, + "grad_norm": 2.020643949508667, + "learning_rate": 1.2677163705961204e-06, + "loss": 0.7034, "step": 11524 }, { - "epoch": 0.8720820248950096, - "grad_norm": 2.1579651832580566, - "learning_rate": 7.929369379417899e-07, - "loss": 0.6715, + "epoch": 0.811334037310806, + "grad_norm": 2.0158276557922363, + "learning_rate": 1.26680045123517e-06, + "loss": 0.7104, "step": 11525 }, { - "epoch": 0.8721576936173433, - "grad_norm": 2.2118868827819824, - "learning_rate": 7.920128364085268e-07, - "loss": 0.6239, + "epoch": 0.8114044350580781, + "grad_norm": 1.8085435628890991, + "learning_rate": 1.2658848299484537e-06, + "loss": 0.5591, "step": 11526 }, { - "epoch": 0.8722333623396769, - "grad_norm": 2.359009265899658, - "learning_rate": 7.910892488859698e-07, - "loss": 0.6618, + "epoch": 0.8114748328053503, + "grad_norm": 3.403820276260376, + "learning_rate": 1.2649695067835801e-06, + "loss": 0.6539, "step": 11527 }, { - "epoch": 0.8723090310620105, - "grad_norm": 2.0738399028778076, - "learning_rate": 7.901661754319534e-07, - "loss": 0.5986, + "epoch": 0.8115452305526223, + "grad_norm": 1.7470988035202026, + "learning_rate": 1.2640544817881363e-06, + "loss": 0.5611, "step": 11528 }, { - "epoch": 0.8723846997843442, - "grad_norm": 3.8261353969573975, - "learning_rate": 7.892436161042826e-07, - "loss": 0.7576, + "epoch": 0.8116156282998944, + "grad_norm": 1.7625483274459839, + "learning_rate": 1.263139755009694e-06, + "loss": 0.6464, "step": 11529 }, { - "epoch": 0.8724603685066777, - "grad_norm": 1.8722835779190063, - "learning_rate": 7.883215709607351e-07, - "loss": 0.7365, + "epoch": 0.8116860260471664, + "grad_norm": 2.4934773445129395, + "learning_rate": 1.2622253264958107e-06, + "loss": 0.7301, "step": 11530 }, { - "epoch": 0.8725360372290114, - "grad_norm": 1.9653728008270264, - "learning_rate": 7.874000400590526e-07, - "loss": 0.6152, + "epoch": 0.8117564237944386, + "grad_norm": 2.111682176589966, + "learning_rate": 1.2613111962940295e-06, + "loss": 0.6534, "step": 11531 }, { - "epoch": 0.872611705951345, - "grad_norm": 3.910702705383301, - "learning_rate": 7.864790234569411e-07, - "loss": 0.5375, + "epoch": 0.8118268215417107, + "grad_norm": 2.081493616104126, + "learning_rate": 1.2603973644518762e-06, + "loss": 0.6562, "step": 11532 }, { - "epoch": 0.8726873746736786, - "grad_norm": 1.5178172588348389, - "learning_rate": 7.855585212120783e-07, - "loss": 0.7787, + "epoch": 0.8118972192889827, + "grad_norm": 2.2230446338653564, + "learning_rate": 1.2594838310168626e-06, + "loss": 0.65, "step": 11533 }, { - "epoch": 0.8727630433960123, - "grad_norm": 2.157939910888672, - "learning_rate": 7.846385333821103e-07, - "loss": 0.5934, + "epoch": 0.8119676170362549, + "grad_norm": 9.53791332244873, + "learning_rate": 1.258570596036484e-06, + "loss": 0.6742, "step": 11534 }, { - "epoch": 0.8728387121183458, - "grad_norm": 2.1008949279785156, - "learning_rate": 7.837190600246489e-07, - "loss": 0.7366, + "epoch": 0.8120380147835269, + "grad_norm": 2.2004709243774414, + "learning_rate": 1.25765765955822e-06, + "loss": 0.531, "step": 11535 }, { - "epoch": 0.8729143808406795, - "grad_norm": 2.356288433074951, - "learning_rate": 7.82800101197274e-07, - "loss": 0.5139, + "epoch": 0.812108412530799, + "grad_norm": 2.046013832092285, + "learning_rate": 1.2567450216295332e-06, + "loss": 0.7765, "step": 11536 }, { - "epoch": 0.8729900495630132, - "grad_norm": 1.809434413909912, - "learning_rate": 7.818816569575346e-07, - "loss": 0.5801, + "epoch": 0.8121788102780712, + "grad_norm": 1.738359808921814, + "learning_rate": 1.2558326822978757e-06, + "loss": 0.6579, "step": 11537 }, { - "epoch": 0.8730657182853467, - "grad_norm": 4.456824779510498, - "learning_rate": 7.809637273629486e-07, - "loss": 0.732, + "epoch": 0.8122492080253432, + "grad_norm": 1.7597792148590088, + "learning_rate": 1.2549206416106767e-06, + "loss": 0.6666, "step": 11538 }, { - "epoch": 0.8731413870076804, - "grad_norm": 2.023094415664673, - "learning_rate": 7.800463124709952e-07, - "loss": 0.4706, + "epoch": 0.8123196057726153, + "grad_norm": 1.6818815469741821, + "learning_rate": 1.2540088996153599e-06, + "loss": 0.6766, "step": 11539 }, { - "epoch": 0.873217055730014, - "grad_norm": 2.1191153526306152, - "learning_rate": 7.791294123391274e-07, - "loss": 0.6217, + "epoch": 0.8123900035198873, + "grad_norm": 1.7099568843841553, + "learning_rate": 1.253097456359323e-06, + "loss": 0.5297, "step": 11540 }, { - "epoch": 0.8732927244523476, - "grad_norm": 3.088059186935425, - "learning_rate": 7.782130270247681e-07, - "loss": 0.727, + "epoch": 0.8124604012671595, + "grad_norm": 2.2062594890594482, + "learning_rate": 1.2521863118899528e-06, + "loss": 0.6868, "step": 11541 }, { - "epoch": 0.8733683931746813, - "grad_norm": 2.3737430572509766, - "learning_rate": 7.772971565852997e-07, - "loss": 0.6377, + "epoch": 0.8125307990144316, + "grad_norm": 1.7982800006866455, + "learning_rate": 1.2512754662546235e-06, + "loss": 0.5958, "step": 11542 }, { - "epoch": 0.8734440618970148, - "grad_norm": 3.2077624797821045, - "learning_rate": 7.76381801078079e-07, - "loss": 0.7562, + "epoch": 0.8126011967617036, + "grad_norm": 2.1656923294067383, + "learning_rate": 1.2503649195006874e-06, + "loss": 0.6736, "step": 11543 }, { - "epoch": 0.8735197306193485, - "grad_norm": 2.7162673473358154, - "learning_rate": 7.754669605604284e-07, - "loss": 0.5585, + "epoch": 0.8126715945089757, + "grad_norm": 1.7081496715545654, + "learning_rate": 1.249454671675488e-06, + "loss": 0.6378, "step": 11544 }, { - "epoch": 0.8735953993416821, - "grad_norm": 1.9874165058135986, - "learning_rate": 7.745526350896388e-07, - "loss": 0.695, + "epoch": 0.8127419922562478, + "grad_norm": 1.7475199699401855, + "learning_rate": 1.2485447228263465e-06, + "loss": 0.6264, "step": 11545 }, { - "epoch": 0.8736710680640157, - "grad_norm": 2.2441554069519043, - "learning_rate": 7.736388247229667e-07, - "loss": 0.716, + "epoch": 0.8128123900035199, + "grad_norm": 1.8914319276809692, + "learning_rate": 1.2476350730005753e-06, + "loss": 0.6703, "step": 11546 }, { - "epoch": 0.8737467367863494, - "grad_norm": 2.2191619873046875, - "learning_rate": 7.727255295176391e-07, - "loss": 0.725, + "epoch": 0.8128827877507919, + "grad_norm": 1.6785622835159302, + "learning_rate": 1.2467257222454665e-06, + "loss": 0.6658, "step": 11547 }, { - "epoch": 0.873822405508683, - "grad_norm": 2.3463375568389893, - "learning_rate": 7.718127495308483e-07, - "loss": 0.7155, + "epoch": 0.8129531854980641, + "grad_norm": 2.1364166736602783, + "learning_rate": 1.2458166706082973e-06, + "loss": 0.7402, "step": 11548 }, { - "epoch": 0.8738980742310166, - "grad_norm": 2.282282590866089, - "learning_rate": 7.709004848197588e-07, - "loss": 0.8187, + "epoch": 0.8130235832453362, + "grad_norm": 2.2987780570983887, + "learning_rate": 1.24490791813633e-06, + "loss": 0.6955, "step": 11549 }, { - "epoch": 0.8739737429533503, - "grad_norm": 2.2139830589294434, - "learning_rate": 7.699887354414935e-07, - "loss": 0.7518, + "epoch": 0.8130939809926082, + "grad_norm": 1.838543176651001, + "learning_rate": 1.2439994648768128e-06, + "loss": 0.5564, "step": 11550 }, { - "epoch": 0.8740494116756838, - "grad_norm": 2.0290632247924805, - "learning_rate": 7.69077501453154e-07, - "loss": 0.6057, + "epoch": 0.8131643787398803, + "grad_norm": 2.079221248626709, + "learning_rate": 1.2430913108769756e-06, + "loss": 0.7025, "step": 11551 }, { - "epoch": 0.8741250803980175, - "grad_norm": 1.9807419776916504, - "learning_rate": 7.681667829118057e-07, - "loss": 0.6753, + "epoch": 0.8132347764871524, + "grad_norm": 2.5970427989959717, + "learning_rate": 1.2421834561840355e-06, + "loss": 0.5461, "step": 11552 }, { - "epoch": 0.8742007491203511, - "grad_norm": 2.6558451652526855, - "learning_rate": 7.672565798744757e-07, - "loss": 0.6585, + "epoch": 0.8133051742344245, + "grad_norm": 2.220283031463623, + "learning_rate": 1.241275900845193e-06, + "loss": 0.6992, "step": 11553 }, { - "epoch": 0.8742764178426847, - "grad_norm": 2.189072847366333, - "learning_rate": 7.663468923981677e-07, - "loss": 0.6509, + "epoch": 0.8133755719816966, + "grad_norm": 1.7730604410171509, + "learning_rate": 1.2403686449076316e-06, + "loss": 0.6347, "step": 11554 }, { - "epoch": 0.8743520865650184, - "grad_norm": 4.953122138977051, - "learning_rate": 7.654377205398479e-07, - "loss": 0.7143, + "epoch": 0.8134459697289687, + "grad_norm": 1.8959667682647705, + "learning_rate": 1.2394616884185184e-06, + "loss": 0.6419, "step": 11555 }, { - "epoch": 0.874427755287352, - "grad_norm": 1.9904096126556396, - "learning_rate": 7.64529064356451e-07, - "loss": 0.7092, + "epoch": 0.8135163674762408, + "grad_norm": 2.122929096221924, + "learning_rate": 1.2385550314250116e-06, + "loss": 0.6388, "step": 11556 }, { - "epoch": 0.8745034240096856, - "grad_norm": 2.697661876678467, - "learning_rate": 7.636209239048823e-07, - "loss": 0.6954, + "epoch": 0.8135867652235128, + "grad_norm": 1.9649137258529663, + "learning_rate": 1.2376486739742445e-06, + "loss": 0.6162, "step": 11557 }, { - "epoch": 0.8745790927320192, - "grad_norm": 2.0726523399353027, - "learning_rate": 7.627132992420103e-07, - "loss": 0.7002, + "epoch": 0.8136571629707849, + "grad_norm": 2.0050625801086426, + "learning_rate": 1.2367426161133438e-06, + "loss": 0.6739, "step": 11558 }, { - "epoch": 0.8746547614543528, - "grad_norm": 3.2430927753448486, - "learning_rate": 7.618061904246736e-07, - "loss": 0.6645, + "epoch": 0.8137275607180571, + "grad_norm": 1.7007052898406982, + "learning_rate": 1.2358368578894132e-06, + "loss": 0.6788, "step": 11559 }, { - "epoch": 0.8747304301766865, - "grad_norm": 1.8486747741699219, - "learning_rate": 7.608995975096797e-07, - "loss": 0.6222, + "epoch": 0.8137979584653291, + "grad_norm": 1.9958778619766235, + "learning_rate": 1.2349313993495452e-06, + "loss": 0.6938, "step": 11560 }, { - "epoch": 0.8748060988990201, - "grad_norm": 1.8390332460403442, - "learning_rate": 7.599935205538003e-07, - "loss": 0.6021, + "epoch": 0.8138683562126012, + "grad_norm": 1.8454171419143677, + "learning_rate": 1.2340262405408128e-06, + "loss": 0.6621, "step": 11561 }, { - "epoch": 0.8748817676213537, - "grad_norm": 2.2097909450531006, - "learning_rate": 7.590879596137789e-07, - "loss": 0.7386, + "epoch": 0.8139387539598733, + "grad_norm": 1.7732059955596924, + "learning_rate": 1.23312138151028e-06, + "loss": 0.6974, "step": 11562 }, { - "epoch": 0.8749574363436874, - "grad_norm": 2.2231502532958984, - "learning_rate": 7.581829147463252e-07, - "loss": 0.7924, + "epoch": 0.8140091517071454, + "grad_norm": 1.8272533416748047, + "learning_rate": 1.2322168223049872e-06, + "loss": 0.7026, "step": 11563 }, { - "epoch": 0.8750331050660209, - "grad_norm": 3.517443895339966, - "learning_rate": 7.572783860081139e-07, - "loss": 0.7055, + "epoch": 0.8140795494544174, + "grad_norm": 1.7943578958511353, + "learning_rate": 1.2313125629719678e-06, + "loss": 0.696, "step": 11564 }, { - "epoch": 0.8751087737883546, - "grad_norm": 2.743074417114258, - "learning_rate": 7.563743734557877e-07, - "loss": 0.6501, + "epoch": 0.8141499472016895, + "grad_norm": 2.223125457763672, + "learning_rate": 1.2304086035582327e-06, + "loss": 0.609, "step": 11565 }, { - "epoch": 0.8751844425106882, - "grad_norm": 2.1922786235809326, - "learning_rate": 7.554708771459651e-07, - "loss": 0.7249, + "epoch": 0.8142203449489617, + "grad_norm": 1.8067569732666016, + "learning_rate": 1.2295049441107792e-06, + "loss": 0.5971, "step": 11566 }, { - "epoch": 0.8752601112330218, - "grad_norm": 2.300901174545288, - "learning_rate": 7.5456789713522e-07, - "loss": 0.6253, + "epoch": 0.8142907426962337, + "grad_norm": 2.025108575820923, + "learning_rate": 1.2286015846765883e-06, + "loss": 0.6255, "step": 11567 }, { - "epoch": 0.8753357799553555, - "grad_norm": 2.120061159133911, - "learning_rate": 7.536654334801022e-07, - "loss": 0.6272, + "epoch": 0.8143611404435058, + "grad_norm": 1.8220270872116089, + "learning_rate": 1.2276985253026292e-06, + "loss": 0.6164, "step": 11568 }, { - "epoch": 0.875411448677689, - "grad_norm": 2.244152307510376, - "learning_rate": 7.527634862371274e-07, - "loss": 0.7696, + "epoch": 0.8144315381907778, + "grad_norm": 2.289609670639038, + "learning_rate": 1.22679576603585e-06, + "loss": 0.65, "step": 11569 }, { - "epoch": 0.8754871174000227, - "grad_norm": 2.225641965866089, - "learning_rate": 7.518620554627785e-07, - "loss": 0.598, + "epoch": 0.81450193593805, + "grad_norm": 2.29123592376709, + "learning_rate": 1.225893306923189e-06, + "loss": 0.7178, "step": 11570 }, { - "epoch": 0.8755627861223563, - "grad_norm": 2.236870288848877, - "learning_rate": 7.509611412135034e-07, - "loss": 0.7316, + "epoch": 0.8145723336853221, + "grad_norm": 2.010111093521118, + "learning_rate": 1.2249911480115643e-06, + "loss": 0.6918, "step": 11571 }, { - "epoch": 0.8756384548446899, - "grad_norm": 2.152217149734497, - "learning_rate": 7.500607435457238e-07, - "loss": 0.7231, + "epoch": 0.8146427314325941, + "grad_norm": 2.303853988647461, + "learning_rate": 1.2240892893478803e-06, + "loss": 0.7328, "step": 11572 }, { - "epoch": 0.8757141235670236, - "grad_norm": 2.509950637817383, - "learning_rate": 7.491608625158226e-07, - "loss": 0.6495, + "epoch": 0.8147131291798663, + "grad_norm": 1.9864588975906372, + "learning_rate": 1.2231877309790235e-06, + "loss": 0.7104, "step": 11573 }, { - "epoch": 0.8757897922893572, - "grad_norm": 2.5385403633117676, - "learning_rate": 7.482614981801579e-07, - "loss": 0.652, + "epoch": 0.8147835269271383, + "grad_norm": 1.9669435024261475, + "learning_rate": 1.2222864729518696e-06, + "loss": 0.5577, "step": 11574 }, { - "epoch": 0.8758654610116908, - "grad_norm": 2.698321580886841, - "learning_rate": 7.473626505950445e-07, - "loss": 0.7599, + "epoch": 0.8148539246744104, + "grad_norm": 2.230881452560425, + "learning_rate": 1.2213855153132734e-06, + "loss": 0.6005, "step": 11575 }, { - "epoch": 0.8759411297340245, - "grad_norm": 2.6774983406066895, - "learning_rate": 7.464643198167735e-07, - "loss": 0.6847, + "epoch": 0.8149243224216826, + "grad_norm": 2.199880838394165, + "learning_rate": 1.2204848581100786e-06, + "loss": 0.5744, "step": 11576 }, { - "epoch": 0.876016798456358, - "grad_norm": 2.8981354236602783, - "learning_rate": 7.455665059016056e-07, - "loss": 0.7735, + "epoch": 0.8149947201689546, + "grad_norm": 2.576234817504883, + "learning_rate": 1.2195845013891108e-06, + "loss": 0.7194, "step": 11577 }, { - "epoch": 0.8760924671786917, - "grad_norm": 1.9417154788970947, - "learning_rate": 7.446692089057583e-07, - "loss": 0.5627, + "epoch": 0.8150651179162267, + "grad_norm": 2.048187732696533, + "learning_rate": 1.21868444519718e-06, + "loss": 0.6869, "step": 11578 }, { - "epoch": 0.8761681359010253, - "grad_norm": 2.02998948097229, - "learning_rate": 7.437724288854273e-07, - "loss": 0.6576, + "epoch": 0.8151355156634987, + "grad_norm": 1.7232043743133545, + "learning_rate": 1.2177846895810784e-06, + "loss": 0.6313, "step": 11579 }, { - "epoch": 0.8762438046233589, - "grad_norm": 2.600839614868164, - "learning_rate": 7.428761658967697e-07, - "loss": 0.7377, + "epoch": 0.8152059134107709, + "grad_norm": 2.1057658195495605, + "learning_rate": 1.2168852345875897e-06, + "loss": 0.6445, "step": 11580 }, { - "epoch": 0.8763194733456926, - "grad_norm": 2.005692720413208, - "learning_rate": 7.419804199959138e-07, - "loss": 0.5753, + "epoch": 0.8152763111580429, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.2159860802634734e-06, + "loss": 0.6581, "step": 11581 }, { - "epoch": 0.8763951420680262, - "grad_norm": 2.1509876251220703, - "learning_rate": 7.410851912389536e-07, - "loss": 0.7596, + "epoch": 0.815346708905315, + "grad_norm": 2.4711365699768066, + "learning_rate": 1.2150872266554812e-06, + "loss": 0.6945, "step": 11582 }, { - "epoch": 0.8764708107903598, - "grad_norm": 2.080975294113159, - "learning_rate": 7.401904796819512e-07, - "loss": 0.5581, + "epoch": 0.8154171066525872, + "grad_norm": 2.2204113006591797, + "learning_rate": 1.2141886738103423e-06, + "loss": 0.6832, "step": 11583 }, { - "epoch": 0.8765464795126934, - "grad_norm": 2.399707317352295, - "learning_rate": 7.392962853809388e-07, - "loss": 0.4842, + "epoch": 0.8154875043998592, + "grad_norm": 1.8834255933761597, + "learning_rate": 1.2132904217747744e-06, + "loss": 0.5746, "step": 11584 }, { - "epoch": 0.876622148235027, - "grad_norm": 2.309359550476074, - "learning_rate": 7.384026083919087e-07, - "loss": 0.6132, + "epoch": 0.8155579021471313, + "grad_norm": 1.839613676071167, + "learning_rate": 1.2123924705954771e-06, + "loss": 0.6089, "step": 11585 }, { - "epoch": 0.8766978169573607, - "grad_norm": 2.053427219390869, - "learning_rate": 7.375094487708281e-07, - "loss": 0.6819, + "epoch": 0.8156282998944033, + "grad_norm": 1.8236157894134521, + "learning_rate": 1.2114948203191385e-06, + "loss": 0.6159, "step": 11586 }, { - "epoch": 0.8767734856796943, - "grad_norm": 1.9425896406173706, - "learning_rate": 7.366168065736302e-07, - "loss": 0.6406, + "epoch": 0.8156986976416755, + "grad_norm": 2.053830623626709, + "learning_rate": 1.210597470992424e-06, + "loss": 0.5938, "step": 11587 }, { - "epoch": 0.8768491544020279, - "grad_norm": 2.2914395332336426, - "learning_rate": 7.357246818562174e-07, - "loss": 0.6731, + "epoch": 0.8157690953889476, + "grad_norm": 1.770499348640442, + "learning_rate": 1.2097004226619925e-06, + "loss": 0.6286, "step": 11588 }, { - "epoch": 0.8769248231243616, - "grad_norm": 1.905128836631775, - "learning_rate": 7.348330746744529e-07, - "loss": 0.6197, + "epoch": 0.8158394931362196, + "grad_norm": 1.8678058385849, + "learning_rate": 1.2088036753744801e-06, + "loss": 0.6796, "step": 11589 }, { - "epoch": 0.8770004918466952, - "grad_norm": 2.3496506214141846, - "learning_rate": 7.339419850841741e-07, - "loss": 0.6112, + "epoch": 0.8159098908834918, + "grad_norm": 2.9591104984283447, + "learning_rate": 1.2079072291765096e-06, + "loss": 0.6972, "step": 11590 }, { - "epoch": 0.8770761605690288, - "grad_norm": 2.184185743331909, - "learning_rate": 7.330514131411843e-07, - "loss": 0.4918, + "epoch": 0.8159802886307638, + "grad_norm": 2.063809633255005, + "learning_rate": 1.2070110841146857e-06, + "loss": 0.6629, "step": 11591 }, { - "epoch": 0.8771518292913624, - "grad_norm": 2.560593843460083, - "learning_rate": 7.321613589012529e-07, - "loss": 0.6898, + "epoch": 0.8160506863780359, + "grad_norm": 2.6836938858032227, + "learning_rate": 1.2061152402356024e-06, + "loss": 0.7265, "step": 11592 }, { - "epoch": 0.877227498013696, - "grad_norm": 1.9485561847686768, - "learning_rate": 7.312718224201194e-07, - "loss": 0.6156, + "epoch": 0.816121084125308, + "grad_norm": 2.451695203781128, + "learning_rate": 1.2052196975858363e-06, + "loss": 0.6906, "step": 11593 }, { - "epoch": 0.8773031667360297, - "grad_norm": 2.1661360263824463, - "learning_rate": 7.303828037534881e-07, - "loss": 0.5933, + "epoch": 0.8161914818725801, + "grad_norm": 2.039898157119751, + "learning_rate": 1.2043244562119465e-06, + "loss": 0.6292, "step": 11594 }, { - "epoch": 0.8773788354583633, - "grad_norm": 2.1465418338775635, - "learning_rate": 7.294943029570345e-07, - "loss": 0.5613, + "epoch": 0.8162618796198522, + "grad_norm": 1.7124994993209839, + "learning_rate": 1.203429516160477e-06, + "loss": 0.6843, "step": 11595 }, { - "epoch": 0.8774545041806969, - "grad_norm": 2.0607333183288574, - "learning_rate": 7.286063200863953e-07, - "loss": 0.4852, + "epoch": 0.8163322773671242, + "grad_norm": 1.8479670286178589, + "learning_rate": 1.2025348774779548e-06, + "loss": 0.601, "step": 11596 }, { - "epoch": 0.8775301729030305, - "grad_norm": 2.41105580329895, - "learning_rate": 7.277188551971817e-07, - "loss": 0.6185, + "epoch": 0.8164026751143963, + "grad_norm": 2.4289841651916504, + "learning_rate": 1.2016405402108963e-06, + "loss": 0.6458, "step": 11597 }, { - "epoch": 0.8776058416253641, - "grad_norm": 1.9282902479171753, - "learning_rate": 7.268319083449715e-07, - "loss": 0.506, + "epoch": 0.8164730728616685, + "grad_norm": 2.0979530811309814, + "learning_rate": 1.2007465044057958e-06, + "loss": 0.6587, "step": 11598 }, { - "epoch": 0.8776815103476978, - "grad_norm": 2.118765354156494, - "learning_rate": 7.259454795853041e-07, - "loss": 0.856, + "epoch": 0.8165434706089405, + "grad_norm": 1.8725982904434204, + "learning_rate": 1.1998527701091388e-06, + "loss": 0.572, "step": 11599 }, { - "epoch": 0.8777571790700314, - "grad_norm": 2.2869462966918945, - "learning_rate": 7.250595689736921e-07, - "loss": 0.6693, + "epoch": 0.8166138683562126, + "grad_norm": 2.0733070373535156, + "learning_rate": 1.1989593373673894e-06, + "loss": 0.5965, "step": 11600 }, { - "epoch": 0.877832847792365, - "grad_norm": 2.2871768474578857, - "learning_rate": 7.241741765656124e-07, - "loss": 0.6692, + "epoch": 0.8166842661034847, + "grad_norm": 1.7516764402389526, + "learning_rate": 1.198066206226997e-06, + "loss": 0.5996, "step": 11601 }, { - "epoch": 0.8779085165146987, - "grad_norm": 2.093322992324829, - "learning_rate": 7.232893024165172e-07, + "epoch": 0.8167546638507568, + "grad_norm": 2.3231043815612793, + "learning_rate": 1.1971733767343961e-06, "loss": 0.6427, "step": 11602 }, { - "epoch": 0.8779841852370323, - "grad_norm": 1.7536505460739136, - "learning_rate": 7.224049465818136e-07, - "loss": 0.646, + "epoch": 0.8168250615980288, + "grad_norm": 1.9291763305664062, + "learning_rate": 1.196280848936008e-06, + "loss": 0.6075, "step": 11603 }, { - "epoch": 0.8780598539593659, - "grad_norm": 2.1361818313598633, - "learning_rate": 7.215211091168859e-07, - "loss": 0.7456, + "epoch": 0.816895459345301, + "grad_norm": 1.9735145568847656, + "learning_rate": 1.1953886228782324e-06, + "loss": 0.5452, "step": 11604 }, { - "epoch": 0.8781355226816995, - "grad_norm": 2.706083297729492, - "learning_rate": 7.206377900770812e-07, - "loss": 0.5506, + "epoch": 0.8169658570925731, + "grad_norm": 1.5139546394348145, + "learning_rate": 1.194496698607462e-06, + "loss": 0.5184, "step": 11605 }, { - "epoch": 0.8782111914040331, - "grad_norm": 2.02065372467041, - "learning_rate": 7.19754989517718e-07, - "loss": 0.6444, + "epoch": 0.8170362548398451, + "grad_norm": 1.8908846378326416, + "learning_rate": 1.1936050761700657e-06, + "loss": 0.5399, "step": 11606 }, { - "epoch": 0.8782868601263668, - "grad_norm": 2.013063669204712, - "learning_rate": 7.188727074940781e-07, - "loss": 0.7695, + "epoch": 0.8171066525871172, + "grad_norm": 2.7195045948028564, + "learning_rate": 1.1927137556123998e-06, + "loss": 0.703, "step": 11607 }, { - "epoch": 0.8783625288487004, - "grad_norm": 1.6234900951385498, - "learning_rate": 7.179909440614135e-07, - "loss": 0.8568, + "epoch": 0.8171770503343893, + "grad_norm": 1.942753553390503, + "learning_rate": 1.1918227369808034e-06, + "loss": 0.6607, "step": 11608 }, { - "epoch": 0.878438197571034, - "grad_norm": 2.600998878479004, - "learning_rate": 7.171096992749458e-07, - "loss": 0.5912, + "epoch": 0.8172474480816614, + "grad_norm": 1.911805272102356, + "learning_rate": 1.190932020321605e-06, + "loss": 0.5877, "step": 11609 }, { - "epoch": 0.8785138662933676, - "grad_norm": 2.169203281402588, - "learning_rate": 7.162289731898561e-07, - "loss": 0.6923, + "epoch": 0.8173178458289335, + "grad_norm": 1.8059594631195068, + "learning_rate": 1.1900416056811107e-06, + "loss": 0.6613, "step": 11610 }, { - "epoch": 0.8785895350157013, - "grad_norm": 2.445180892944336, - "learning_rate": 7.153487658613019e-07, - "loss": 0.5911, + "epoch": 0.8173882435762055, + "grad_norm": 1.9994440078735352, + "learning_rate": 1.1891514931056164e-06, + "loss": 0.6848, "step": 11611 }, { - "epoch": 0.8786652037380349, - "grad_norm": 2.2180263996124268, - "learning_rate": 7.144690773444034e-07, - "loss": 0.6728, + "epoch": 0.8174586413234777, + "grad_norm": 2.3009605407714844, + "learning_rate": 1.1882616826413988e-06, + "loss": 0.6667, "step": 11612 }, { - "epoch": 0.8787408724603685, - "grad_norm": 2.4425292015075684, - "learning_rate": 7.135899076942506e-07, - "loss": 0.6479, + "epoch": 0.8175290390707497, + "grad_norm": 1.8259732723236084, + "learning_rate": 1.1873721743347194e-06, + "loss": 0.614, "step": 11613 }, { - "epoch": 0.8788165411827021, - "grad_norm": 2.1483867168426514, - "learning_rate": 7.127112569658982e-07, - "loss": 0.6243, + "epoch": 0.8175994368180218, + "grad_norm": 2.161604881286621, + "learning_rate": 1.1864829682318228e-06, + "loss": 0.6824, "step": 11614 }, { - "epoch": 0.8788922099050358, - "grad_norm": 1.742142677307129, - "learning_rate": 7.118331252143724e-07, - "loss": 0.6261, + "epoch": 0.817669834565294, + "grad_norm": 1.6888447999954224, + "learning_rate": 1.185594064378944e-06, + "loss": 0.651, "step": 11615 }, { - "epoch": 0.8789678786273694, - "grad_norm": 2.120514392852783, - "learning_rate": 7.109555124946641e-07, - "loss": 0.7177, + "epoch": 0.817740232312566, + "grad_norm": 1.9729787111282349, + "learning_rate": 1.1847054628222938e-06, + "loss": 0.6977, "step": 11616 }, { - "epoch": 0.879043547349703, - "grad_norm": 2.504844903945923, - "learning_rate": 7.100784188617293e-07, - "loss": 0.5731, + "epoch": 0.8178106300598381, + "grad_norm": 1.8046672344207764, + "learning_rate": 1.1838171636080747e-06, + "loss": 0.6296, "step": 11617 }, { - "epoch": 0.8791192160720366, - "grad_norm": 2.5175814628601074, - "learning_rate": 7.092018443704971e-07, - "loss": 0.6151, + "epoch": 0.8178810278071101, + "grad_norm": 1.612300157546997, + "learning_rate": 1.1829291667824694e-06, + "loss": 0.6065, "step": 11618 }, { - "epoch": 0.8791948847943702, - "grad_norm": 2.128124713897705, - "learning_rate": 7.083257890758618e-07, - "loss": 0.6207, + "epoch": 0.8179514255543823, + "grad_norm": 2.0749127864837646, + "learning_rate": 1.1820414723916443e-06, + "loss": 0.6673, "step": 11619 }, { - "epoch": 0.8792705535167039, - "grad_norm": 1.736514687538147, - "learning_rate": 7.074502530326862e-07, - "loss": 0.6287, + "epoch": 0.8180218233016543, + "grad_norm": 2.0135419368743896, + "learning_rate": 1.18115408048175e-06, + "loss": 0.6853, "step": 11620 }, { - "epoch": 0.8793462222390375, - "grad_norm": 6.643167018890381, - "learning_rate": 7.065752362957955e-07, - "loss": 0.6171, + "epoch": 0.8180922210489264, + "grad_norm": 1.7360681295394897, + "learning_rate": 1.1802669910989281e-06, + "loss": 0.6558, "step": 11621 }, { - "epoch": 0.8794218909613711, - "grad_norm": 1.8215636014938354, - "learning_rate": 7.057007389199851e-07, - "loss": 0.6954, + "epoch": 0.8181626187961986, + "grad_norm": 1.9508566856384277, + "learning_rate": 1.1793802042892932e-06, + "loss": 0.5821, "step": 11622 }, { - "epoch": 0.8794975596837047, - "grad_norm": 2.094165325164795, - "learning_rate": 7.048267609600249e-07, - "loss": 0.56, + "epoch": 0.8182330165434706, + "grad_norm": 1.995926022529602, + "learning_rate": 1.1784937200989547e-06, + "loss": 0.6742, "step": 11623 }, { - "epoch": 0.8795732284060384, - "grad_norm": 1.876628041267395, - "learning_rate": 7.039533024706424e-07, - "loss": 0.7134, + "epoch": 0.8183034142907427, + "grad_norm": 2.918591260910034, + "learning_rate": 1.1776075385739997e-06, + "loss": 0.673, "step": 11624 }, { - "epoch": 0.879648897128372, - "grad_norm": 2.3218555450439453, - "learning_rate": 7.030803635065356e-07, - "loss": 0.7137, + "epoch": 0.8183738120380147, + "grad_norm": 1.7965044975280762, + "learning_rate": 1.1767216597605026e-06, + "loss": 0.5724, "step": 11625 }, { - "epoch": 0.8797245658507056, - "grad_norm": 3.562688112258911, - "learning_rate": 7.022079441223718e-07, - "loss": 0.6009, + "epoch": 0.8184442097852869, + "grad_norm": 2.1743597984313965, + "learning_rate": 1.1758360837045185e-06, + "loss": 0.5886, "step": 11626 }, { - "epoch": 0.8798002345730392, - "grad_norm": 3.2988367080688477, - "learning_rate": 7.013360443727855e-07, - "loss": 0.5274, + "epoch": 0.818514607532559, + "grad_norm": 1.8178746700286865, + "learning_rate": 1.1749508104520922e-06, + "loss": 0.609, "step": 11627 }, { - "epoch": 0.8798759032953729, - "grad_norm": 2.6240482330322266, - "learning_rate": 7.004646643123769e-07, - "loss": 0.7412, + "epoch": 0.818585005279831, + "grad_norm": 1.6995210647583008, + "learning_rate": 1.1740658400492478e-06, + "loss": 0.7302, "step": 11628 }, { - "epoch": 0.8799515720177065, - "grad_norm": 3.6613142490386963, - "learning_rate": 6.995938039957153e-07, - "loss": 0.6822, + "epoch": 0.8186554030271032, + "grad_norm": 2.1178205013275146, + "learning_rate": 1.1731811725419977e-06, + "loss": 0.613, "step": 11629 }, { - "epoch": 0.8800272407400401, - "grad_norm": 1.9795455932617188, - "learning_rate": 6.987234634773381e-07, - "loss": 0.6066, + "epoch": 0.8187258007743752, + "grad_norm": 1.94413423538208, + "learning_rate": 1.1722968079763359e-06, + "loss": 0.6067, "step": 11630 }, { - "epoch": 0.8801029094623737, - "grad_norm": 2.0627286434173584, - "learning_rate": 6.978536428117447e-07, - "loss": 0.6163, + "epoch": 0.8187961985216473, + "grad_norm": 1.8624835014343262, + "learning_rate": 1.1714127463982402e-06, + "loss": 0.6417, "step": 11631 }, { - "epoch": 0.8801785781847073, - "grad_norm": 2.3510892391204834, - "learning_rate": 6.969843420534085e-07, - "loss": 0.7384, + "epoch": 0.8188665962689194, + "grad_norm": 2.076371908187866, + "learning_rate": 1.1705289878536738e-06, + "loss": 0.6794, "step": 11632 }, { - "epoch": 0.880254246907041, - "grad_norm": 2.18217396736145, - "learning_rate": 6.961155612567681e-07, - "loss": 0.5873, + "epoch": 0.8189369940161915, + "grad_norm": 1.947147250175476, + "learning_rate": 1.1696455323885859e-06, + "loss": 0.6276, "step": 11633 }, { - "epoch": 0.8803299156293746, - "grad_norm": 2.205120086669922, - "learning_rate": 6.952473004762319e-07, - "loss": 0.6898, + "epoch": 0.8190073917634636, + "grad_norm": 1.870434045791626, + "learning_rate": 1.1687623800489058e-06, + "loss": 0.5995, "step": 11634 }, { - "epoch": 0.8804055843517082, - "grad_norm": 2.0564920902252197, - "learning_rate": 6.943795597661683e-07, - "loss": 0.5414, + "epoch": 0.8190777895107356, + "grad_norm": 1.5979231595993042, + "learning_rate": 1.1678795308805519e-06, + "loss": 0.689, "step": 11635 }, { - "epoch": 0.8804812530740418, - "grad_norm": 2.376497983932495, - "learning_rate": 6.935123391809209e-07, - "loss": 0.8052, + "epoch": 0.8191481872580078, + "grad_norm": 1.909206748008728, + "learning_rate": 1.1669969849294235e-06, + "loss": 0.597, "step": 11636 }, { - "epoch": 0.8805569217963755, - "grad_norm": 2.4886834621429443, - "learning_rate": 6.926456387747964e-07, - "loss": 0.6784, + "epoch": 0.8192185850052798, + "grad_norm": 1.7351129055023193, + "learning_rate": 1.166114742241404e-06, + "loss": 0.633, "step": 11637 }, { - "epoch": 0.8806325905187091, - "grad_norm": 2.1387746334075928, - "learning_rate": 6.917794586020722e-07, - "loss": 0.6157, + "epoch": 0.8192889827525519, + "grad_norm": 1.86285400390625, + "learning_rate": 1.1652328028623607e-06, + "loss": 0.5571, "step": 11638 }, { - "epoch": 0.8807082592410427, - "grad_norm": 3.309083938598633, - "learning_rate": 6.909137987169899e-07, - "loss": 0.761, + "epoch": 0.819359380499824, + "grad_norm": 1.7373600006103516, + "learning_rate": 1.1643511668381507e-06, + "loss": 0.5931, "step": 11639 }, { - "epoch": 0.8807839279633763, - "grad_norm": 2.392918109893799, - "learning_rate": 6.900486591737603e-07, - "loss": 0.7521, + "epoch": 0.8194297782470961, + "grad_norm": 1.9270728826522827, + "learning_rate": 1.1634698342146064e-06, + "loss": 0.62, "step": 11640 }, { - "epoch": 0.88085959668571, - "grad_norm": 2.2521510124206543, - "learning_rate": 6.891840400265629e-07, - "loss": 0.6259, + "epoch": 0.8195001759943682, + "grad_norm": 1.7736799716949463, + "learning_rate": 1.1625888050375511e-06, + "loss": 0.6406, "step": 11641 }, { - "epoch": 0.8809352654080436, - "grad_norm": 2.2190327644348145, - "learning_rate": 6.883199413295384e-07, - "loss": 0.7168, + "epoch": 0.8195705737416402, + "grad_norm": 1.9107613563537598, + "learning_rate": 1.1617080793527945e-06, + "loss": 0.6347, "step": 11642 }, { - "epoch": 0.8810109341303772, - "grad_norm": 1.9417771100997925, - "learning_rate": 6.874563631368037e-07, - "loss": 0.7122, + "epoch": 0.8196409714889124, + "grad_norm": 1.8354227542877197, + "learning_rate": 1.1608276572061185e-06, + "loss": 0.6086, "step": 11643 }, { - "epoch": 0.8810866028527108, - "grad_norm": 2.9581193923950195, - "learning_rate": 6.865933055024394e-07, - "loss": 0.6767, + "epoch": 0.8197113692361845, + "grad_norm": 1.9200222492218018, + "learning_rate": 1.1599475386433033e-06, + "loss": 0.5529, "step": 11644 }, { - "epoch": 0.8811622715750445, - "grad_norm": 3.3201589584350586, - "learning_rate": 6.857307684804902e-07, - "loss": 0.7296, + "epoch": 0.8197817669834565, + "grad_norm": 1.9411100149154663, + "learning_rate": 1.1590677237101024e-06, + "loss": 0.7736, "step": 11645 }, { - "epoch": 0.8812379402973781, - "grad_norm": 2.456382989883423, - "learning_rate": 6.848687521249711e-07, - "loss": 0.6919, + "epoch": 0.8198521647307286, + "grad_norm": 1.8973581790924072, + "learning_rate": 1.158188212452262e-06, + "loss": 0.7894, "step": 11646 }, { - "epoch": 0.8813136090197117, - "grad_norm": 2.552457332611084, - "learning_rate": 6.840072564898647e-07, - "loss": 0.7836, + "epoch": 0.8199225624780007, + "grad_norm": 2.3564324378967285, + "learning_rate": 1.157309004915505e-06, + "loss": 0.6243, "step": 11647 }, { - "epoch": 0.8813892777420453, - "grad_norm": 2.111968755722046, - "learning_rate": 6.831462816291219e-07, - "loss": 0.5549, + "epoch": 0.8199929602252728, + "grad_norm": 2.3475093841552734, + "learning_rate": 1.156430101145548e-06, + "loss": 0.7066, "step": 11648 }, { - "epoch": 0.8814649464643789, - "grad_norm": 3.1068809032440186, - "learning_rate": 6.822858275966585e-07, - "loss": 0.6755, + "epoch": 0.8200633579725449, + "grad_norm": 2.0157594680786133, + "learning_rate": 1.1555515011880796e-06, + "loss": 0.6768, "step": 11649 }, { - "epoch": 0.8815406151867126, - "grad_norm": 2.4133050441741943, - "learning_rate": 6.814258944463598e-07, - "loss": 0.6294, + "epoch": 0.820133755719817, + "grad_norm": 1.9498379230499268, + "learning_rate": 1.154673205088783e-06, + "loss": 0.6627, "step": 11650 }, { - "epoch": 0.8816162839090462, - "grad_norm": 2.4189083576202393, - "learning_rate": 6.805664822320762e-07, - "loss": 0.6289, + "epoch": 0.8202041534670891, + "grad_norm": 1.7277573347091675, + "learning_rate": 1.1537952128933179e-06, + "loss": 0.6338, "step": 11651 }, { - "epoch": 0.8816919526313798, - "grad_norm": 1.988824486732483, - "learning_rate": 6.797075910076299e-07, - "loss": 0.6044, + "epoch": 0.8202745512143611, + "grad_norm": 1.9233089685440063, + "learning_rate": 1.1529175246473367e-06, + "loss": 0.7906, "step": 11652 }, { - "epoch": 0.8817676213537134, - "grad_norm": 2.337367534637451, - "learning_rate": 6.788492208268029e-07, - "loss": 0.6373, + "epoch": 0.8203449489616332, + "grad_norm": 1.9683926105499268, + "learning_rate": 1.152040140396467e-06, + "loss": 0.5872, "step": 11653 }, { - "epoch": 0.8818432900760471, - "grad_norm": 4.022536277770996, - "learning_rate": 6.779913717433521e-07, - "loss": 0.6707, + "epoch": 0.8204153467089054, + "grad_norm": 2.1079487800598145, + "learning_rate": 1.1511630601863285e-06, + "loss": 0.5552, "step": 11654 }, { - "epoch": 0.8819189587983807, - "grad_norm": 2.011725902557373, - "learning_rate": 6.771340438109996e-07, - "loss": 0.7432, + "epoch": 0.8204857444561774, + "grad_norm": 2.3553500175476074, + "learning_rate": 1.150286284062519e-06, + "loss": 0.5477, "step": 11655 }, { - "epoch": 0.8819946275207143, - "grad_norm": 1.9914313554763794, - "learning_rate": 6.762772370834324e-07, - "loss": 0.5172, + "epoch": 0.8205561422034495, + "grad_norm": 2.0819571018218994, + "learning_rate": 1.1494098120706237e-06, + "loss": 0.6941, "step": 11656 }, { - "epoch": 0.8820702962430479, - "grad_norm": 1.9742159843444824, - "learning_rate": 6.754209516143058e-07, - "loss": 0.6344, + "epoch": 0.8206265399507215, + "grad_norm": 2.000378131866455, + "learning_rate": 1.1485336442562096e-06, + "loss": 0.7107, "step": 11657 }, { - "epoch": 0.8821459649653816, - "grad_norm": 2.7394680976867676, - "learning_rate": 6.745651874572445e-07, - "loss": 0.6957, + "epoch": 0.8206969376979937, + "grad_norm": 2.426657199859619, + "learning_rate": 1.1476577806648323e-06, + "loss": 0.6392, "step": 11658 }, { - "epoch": 0.8822216336877152, - "grad_norm": 2.470006227493286, - "learning_rate": 6.737099446658389e-07, - "loss": 0.7392, + "epoch": 0.8207673354452657, + "grad_norm": 2.0434834957122803, + "learning_rate": 1.146782221342025e-06, + "loss": 0.7524, "step": 11659 }, { - "epoch": 0.8822973024100488, - "grad_norm": 2.4756081104278564, - "learning_rate": 6.728552232936471e-07, - "loss": 0.7639, + "epoch": 0.8208377331925378, + "grad_norm": 2.2999584674835205, + "learning_rate": 1.1459069663333125e-06, + "loss": 0.7193, "step": 11660 }, { - "epoch": 0.8823729711323824, - "grad_norm": 2.942502021789551, - "learning_rate": 6.720010233941943e-07, - "loss": 0.6419, + "epoch": 0.82090813093981, + "grad_norm": 1.9935555458068848, + "learning_rate": 1.1450320156841992e-06, + "loss": 0.7098, "step": 11661 }, { - "epoch": 0.882448639854716, - "grad_norm": 1.9842886924743652, - "learning_rate": 6.711473450209737e-07, - "loss": 0.7802, + "epoch": 0.820978528687082, + "grad_norm": 2.2103447914123535, + "learning_rate": 1.1441573694401743e-06, + "loss": 0.6342, "step": 11662 }, { - "epoch": 0.8825243085770497, - "grad_norm": 1.7675484418869019, - "learning_rate": 6.702941882274446e-07, - "loss": 0.6591, + "epoch": 0.8210489264343541, + "grad_norm": 1.6640064716339111, + "learning_rate": 1.1432830276467087e-06, + "loss": 0.644, "step": 11663 }, { - "epoch": 0.8825999772993833, - "grad_norm": 2.0675406455993652, - "learning_rate": 6.694415530670351e-07, - "loss": 0.6647, + "epoch": 0.8211193241816261, + "grad_norm": 2.4066553115844727, + "learning_rate": 1.1424089903492644e-06, + "loss": 0.7027, "step": 11664 }, { - "epoch": 0.8826756460217169, - "grad_norm": 2.01813006401062, - "learning_rate": 6.685894395931396e-07, - "loss": 0.631, + "epoch": 0.8211897219288983, + "grad_norm": 1.819202184677124, + "learning_rate": 1.1415352575932802e-06, + "loss": 0.5361, "step": 11665 }, { - "epoch": 0.8827513147440506, - "grad_norm": 1.776025414466858, - "learning_rate": 6.677378478591225e-07, - "loss": 0.819, + "epoch": 0.8212601196761704, + "grad_norm": 2.066023111343384, + "learning_rate": 1.1406618294241855e-06, + "loss": 0.5837, "step": 11666 }, { - "epoch": 0.8828269834663842, - "grad_norm": 2.657186269760132, - "learning_rate": 6.668867779183099e-07, - "loss": 0.6946, + "epoch": 0.8213305174234424, + "grad_norm": 1.765192985534668, + "learning_rate": 1.1397887058873886e-06, + "loss": 0.7189, "step": 11667 }, { - "epoch": 0.8829026521887178, - "grad_norm": 2.1002979278564453, - "learning_rate": 6.660362298239985e-07, - "loss": 0.7027, + "epoch": 0.8214009151707146, + "grad_norm": 1.9115898609161377, + "learning_rate": 1.1389158870282843e-06, + "loss": 0.6384, "step": 11668 }, { - "epoch": 0.8829783209110514, - "grad_norm": 1.9301815032958984, - "learning_rate": 6.651862036294554e-07, - "loss": 0.7285, + "epoch": 0.8214713129179866, + "grad_norm": 1.8058311939239502, + "learning_rate": 1.1380433728922488e-06, + "loss": 0.5812, "step": 11669 }, { - "epoch": 0.883053989633385, - "grad_norm": 2.0952084064483643, - "learning_rate": 6.6433669938791e-07, - "loss": 0.652, + "epoch": 0.8215417106652587, + "grad_norm": 1.7405433654785156, + "learning_rate": 1.1371711635246497e-06, + "loss": 0.5849, "step": 11670 }, { - "epoch": 0.8831296583557187, - "grad_norm": 2.0233519077301025, - "learning_rate": 6.634877171525611e-07, - "loss": 0.6332, + "epoch": 0.8216121084125309, + "grad_norm": 2.5721728801727295, + "learning_rate": 1.13629925897083e-06, + "loss": 0.6882, "step": 11671 }, { - "epoch": 0.8832053270780523, - "grad_norm": 2.134343385696411, - "learning_rate": 6.626392569765738e-07, - "loss": 0.7207, + "epoch": 0.8216825061598029, + "grad_norm": 2.3350727558135986, + "learning_rate": 1.1354276592761237e-06, + "loss": 0.6713, "step": 11672 }, { - "epoch": 0.8832809958003859, - "grad_norm": 1.9051159620285034, - "learning_rate": 6.617913189130837e-07, - "loss": 0.6636, + "epoch": 0.821752903907075, + "grad_norm": 1.6434913873672485, + "learning_rate": 1.1345563644858441e-06, + "loss": 0.5789, "step": 11673 }, { - "epoch": 0.8833566645227195, - "grad_norm": 3.0139591693878174, - "learning_rate": 6.609439030151905e-07, - "loss": 0.5951, + "epoch": 0.821823301654347, + "grad_norm": 2.236314535140991, + "learning_rate": 1.133685374645292e-06, + "loss": 0.6318, "step": 11674 }, { - "epoch": 0.8834323332450531, - "grad_norm": 2.278470277786255, - "learning_rate": 6.600970093359605e-07, - "loss": 0.519, + "epoch": 0.8218936994016192, + "grad_norm": 1.7385880947113037, + "learning_rate": 1.1328146897997482e-06, + "loss": 0.6133, "step": 11675 }, { - "epoch": 0.8835080019673868, - "grad_norm": 2.205688953399658, - "learning_rate": 6.592506379284314e-07, - "loss": 0.7323, + "epoch": 0.8219640971488912, + "grad_norm": 1.7876893281936646, + "learning_rate": 1.1319443099944838e-06, + "loss": 0.7381, "step": 11676 }, { - "epoch": 0.8835836706897204, - "grad_norm": 2.893535852432251, - "learning_rate": 6.584047888456058e-07, - "loss": 0.6726, + "epoch": 0.8220344948961633, + "grad_norm": 2.2851662635803223, + "learning_rate": 1.1310742352747477e-06, + "loss": 0.611, "step": 11677 }, { - "epoch": 0.883659339412054, - "grad_norm": 3.5614638328552246, - "learning_rate": 6.575594621404494e-07, - "loss": 0.7368, + "epoch": 0.8221048926434354, + "grad_norm": 1.958611249923706, + "learning_rate": 1.1302044656857782e-06, + "loss": 0.6998, "step": 11678 }, { - "epoch": 0.8837350081343877, - "grad_norm": 2.1940793991088867, - "learning_rate": 6.567146578659037e-07, - "loss": 0.694, + "epoch": 0.8221752903907075, + "grad_norm": 2.0010173320770264, + "learning_rate": 1.1293350012727958e-06, + "loss": 0.6683, "step": 11679 }, { - "epoch": 0.8838106768567213, - "grad_norm": 2.0482017993927, - "learning_rate": 6.558703760748725e-07, - "loss": 0.6786, + "epoch": 0.8222456881379796, + "grad_norm": 1.941108226776123, + "learning_rate": 1.1284658420810029e-06, + "loss": 0.6569, "step": 11680 }, { - "epoch": 0.8838863455790549, - "grad_norm": 1.887109637260437, - "learning_rate": 6.550266168202263e-07, - "loss": 0.7625, + "epoch": 0.8223160858852516, + "grad_norm": 1.8926050662994385, + "learning_rate": 1.1275969881555867e-06, + "loss": 0.6688, "step": 11681 }, { - "epoch": 0.8839620143013885, - "grad_norm": 1.6695749759674072, - "learning_rate": 6.541833801548032e-07, - "loss": 0.6128, + "epoch": 0.8223864836325238, + "grad_norm": 1.9495124816894531, + "learning_rate": 1.1267284395417228e-06, + "loss": 0.6526, "step": 11682 }, { - "epoch": 0.8840376830237221, - "grad_norm": 2.545053720474243, - "learning_rate": 6.533406661314107e-07, - "loss": 0.6178, + "epoch": 0.8224568813797959, + "grad_norm": 1.7502614259719849, + "learning_rate": 1.1258601962845653e-06, + "loss": 0.6236, "step": 11683 }, { - "epoch": 0.8841133517460558, - "grad_norm": 2.606736183166504, - "learning_rate": 6.524984748028226e-07, - "loss": 0.7046, + "epoch": 0.8225272791270679, + "grad_norm": 2.134036064147949, + "learning_rate": 1.124992258429258e-06, + "loss": 0.6456, "step": 11684 }, { - "epoch": 0.8841890204683894, - "grad_norm": 2.562772512435913, - "learning_rate": 6.516568062217777e-07, - "loss": 0.5824, + "epoch": 0.82259767687434, + "grad_norm": 1.8293125629425049, + "learning_rate": 1.1241246260209235e-06, + "loss": 0.5512, "step": 11685 }, { - "epoch": 0.884264689190723, - "grad_norm": 2.3864822387695312, - "learning_rate": 6.50815660440987e-07, - "loss": 0.7149, + "epoch": 0.8226680746216121, + "grad_norm": 2.217392683029175, + "learning_rate": 1.1232572991046712e-06, + "loss": 0.6007, "step": 11686 }, { - "epoch": 0.8843403579130567, - "grad_norm": 1.7921406030654907, - "learning_rate": 6.499750375131251e-07, - "loss": 0.7122, + "epoch": 0.8227384723688842, + "grad_norm": 1.95255708694458, + "learning_rate": 1.1223902777255924e-06, + "loss": 0.6724, "step": 11687 }, { - "epoch": 0.8844160266353902, - "grad_norm": 2.3052492141723633, - "learning_rate": 6.491349374908321e-07, - "loss": 0.6496, + "epoch": 0.8228088701161563, + "grad_norm": 1.8877983093261719, + "learning_rate": 1.121523561928768e-06, + "loss": 0.7683, "step": 11688 }, { - "epoch": 0.8844916953577239, - "grad_norm": 1.8889057636260986, - "learning_rate": 6.482953604267179e-07, - "loss": 0.5084, + "epoch": 0.8228792678634284, + "grad_norm": 1.9690465927124023, + "learning_rate": 1.1206571517592558e-06, + "loss": 0.673, "step": 11689 }, { - "epoch": 0.8845673640800575, - "grad_norm": 2.0114352703094482, - "learning_rate": 6.474563063733615e-07, - "loss": 0.7584, + "epoch": 0.8229496656107005, + "grad_norm": 2.526233673095703, + "learning_rate": 1.119791047262105e-06, + "loss": 0.692, "step": 11690 }, { - "epoch": 0.8846430328023911, - "grad_norm": 3.2486960887908936, - "learning_rate": 6.466177753833097e-07, - "loss": 0.6731, + "epoch": 0.8230200633579725, + "grad_norm": 1.7170294523239136, + "learning_rate": 1.1189252484823422e-06, + "loss": 0.6456, "step": 11691 }, { - "epoch": 0.8847187015247248, - "grad_norm": 1.9725160598754883, - "learning_rate": 6.457797675090685e-07, - "loss": 0.6108, + "epoch": 0.8230904611052446, + "grad_norm": 1.7564340829849243, + "learning_rate": 1.1180597554649812e-06, + "loss": 0.6241, "step": 11692 }, { - "epoch": 0.8847943702470584, - "grad_norm": 2.553386926651001, - "learning_rate": 6.449422828031191e-07, - "loss": 0.5285, + "epoch": 0.8231608588525168, + "grad_norm": 1.7787857055664062, + "learning_rate": 1.1171945682550218e-06, + "loss": 0.5711, "step": 11693 }, { - "epoch": 0.884870038969392, - "grad_norm": 1.3259893655776978, - "learning_rate": 6.441053213179074e-07, - "loss": 0.7945, + "epoch": 0.8232312565997888, + "grad_norm": 2.1169326305389404, + "learning_rate": 1.1163296868974425e-06, + "loss": 0.6566, "step": 11694 }, { - "epoch": 0.8849457076917256, - "grad_norm": 2.1485307216644287, - "learning_rate": 6.432688831058464e-07, - "loss": 0.534, + "epoch": 0.8233016543470609, + "grad_norm": 2.73169207572937, + "learning_rate": 1.1154651114372135e-06, + "loss": 0.7159, "step": 11695 }, { - "epoch": 0.8850213764140592, - "grad_norm": 1.817466139793396, - "learning_rate": 6.424329682193174e-07, - "loss": 0.7578, + "epoch": 0.823372052094333, + "grad_norm": 2.1130974292755127, + "learning_rate": 1.1146008419192826e-06, + "loss": 0.6504, "step": 11696 }, { - "epoch": 0.8850970451363929, - "grad_norm": 2.9842936992645264, - "learning_rate": 6.415975767106674e-07, - "loss": 0.8272, + "epoch": 0.8234424498416051, + "grad_norm": 2.0473294258117676, + "learning_rate": 1.113736878388584e-06, + "loss": 0.7201, "step": 11697 }, { - "epoch": 0.8851727138587265, - "grad_norm": 2.0747790336608887, - "learning_rate": 6.407627086322136e-07, - "loss": 0.7141, + "epoch": 0.8235128475888771, + "grad_norm": 1.770151138305664, + "learning_rate": 1.1128732208900343e-06, + "loss": 0.658, "step": 11698 }, { - "epoch": 0.8852483825810601, - "grad_norm": 2.0690724849700928, - "learning_rate": 6.399283640362322e-07, - "loss": 0.6518, + "epoch": 0.8235832453361492, + "grad_norm": 2.0844058990478516, + "learning_rate": 1.1120098694685397e-06, + "loss": 0.7483, "step": 11699 }, { - "epoch": 0.8853240513033938, - "grad_norm": 1.9558627605438232, - "learning_rate": 6.390945429749784e-07, - "loss": 0.689, + "epoch": 0.8236536430834214, + "grad_norm": 2.0410683155059814, + "learning_rate": 1.111146824168982e-06, + "loss": 0.7685, "step": 11700 }, { - "epoch": 0.8853997200257273, - "grad_norm": 2.246373414993286, - "learning_rate": 6.382612455006684e-07, - "loss": 0.6619, + "epoch": 0.8237240408306934, + "grad_norm": 2.266251802444458, + "learning_rate": 1.1102840850362364e-06, + "loss": 0.5886, "step": 11701 }, { - "epoch": 0.885475388748061, - "grad_norm": 2.5528721809387207, - "learning_rate": 6.374284716654823e-07, - "loss": 0.6938, + "epoch": 0.8237944385779655, + "grad_norm": 1.7778346538543701, + "learning_rate": 1.1094216521151549e-06, + "loss": 0.5778, "step": 11702 }, { - "epoch": 0.8855510574703946, - "grad_norm": 3.1837098598480225, - "learning_rate": 6.365962215215737e-07, - "loss": 0.7115, + "epoch": 0.8238648363252375, + "grad_norm": 1.8601349592208862, + "learning_rate": 1.1085595254505772e-06, + "loss": 0.6656, "step": 11703 }, { - "epoch": 0.8856267261927282, - "grad_norm": 1.9520882368087769, - "learning_rate": 6.357644951210588e-07, - "loss": 0.6254, + "epoch": 0.8239352340725097, + "grad_norm": 2.3582122325897217, + "learning_rate": 1.1076977050873226e-06, + "loss": 0.6299, "step": 11704 }, { - "epoch": 0.8857023949150619, - "grad_norm": 2.3639538288116455, - "learning_rate": 6.349332925160267e-07, - "loss": 0.8559, + "epoch": 0.8240056318197818, + "grad_norm": 1.8820184469223022, + "learning_rate": 1.1068361910702034e-06, + "loss": 0.6782, "step": 11705 }, { - "epoch": 0.8857780636373955, - "grad_norm": 2.534674644470215, - "learning_rate": 6.341026137585271e-07, - "loss": 0.7494, + "epoch": 0.8240760295670538, + "grad_norm": 1.8860328197479248, + "learning_rate": 1.1059749834440056e-06, + "loss": 0.6203, "step": 11706 }, { - "epoch": 0.8858537323597291, - "grad_norm": 2.048832416534424, - "learning_rate": 6.332724589005792e-07, - "loss": 0.7289, + "epoch": 0.824146427314326, + "grad_norm": 2.159929037094116, + "learning_rate": 1.1051140822535081e-06, + "loss": 0.6898, "step": 11707 }, { - "epoch": 0.8859294010820628, - "grad_norm": 2.1212267875671387, - "learning_rate": 6.324428279941724e-07, - "loss": 0.7757, + "epoch": 0.824216825061598, + "grad_norm": 2.3911125659942627, + "learning_rate": 1.1042534875434683e-06, + "loss": 0.6865, "step": 11708 }, { - "epoch": 0.8860050698043963, - "grad_norm": 2.0502870082855225, - "learning_rate": 6.316137210912593e-07, - "loss": 0.7043, + "epoch": 0.8242872228088701, + "grad_norm": 2.02652907371521, + "learning_rate": 1.1033931993586292e-06, + "loss": 0.7102, "step": 11709 }, { - "epoch": 0.88608073852673, - "grad_norm": 2.497807502746582, - "learning_rate": 6.307851382437612e-07, - "loss": 0.653, + "epoch": 0.8243576205561423, + "grad_norm": 1.9419628381729126, + "learning_rate": 1.1025332177437162e-06, + "loss": 0.6203, "step": 11710 }, { - "epoch": 0.8861564072490636, - "grad_norm": 1.9462158679962158, - "learning_rate": 6.299570795035676e-07, - "loss": 0.6508, + "epoch": 0.8244280183034143, + "grad_norm": 1.7641956806182861, + "learning_rate": 1.101673542743444e-06, + "loss": 0.7279, "step": 11711 }, { - "epoch": 0.8862320759713972, - "grad_norm": 3.3071272373199463, - "learning_rate": 6.291295449225352e-07, - "loss": 0.7087, + "epoch": 0.8244984160506864, + "grad_norm": 1.7892591953277588, + "learning_rate": 1.100814174402504e-06, + "loss": 0.6101, "step": 11712 }, { - "epoch": 0.8863077446937309, - "grad_norm": 2.035278797149658, - "learning_rate": 6.283025345524833e-07, - "loss": 0.6981, + "epoch": 0.8245688137979584, + "grad_norm": 1.9028633832931519, + "learning_rate": 1.0999551127655798e-06, + "loss": 0.612, "step": 11713 }, { - "epoch": 0.8863834134160645, - "grad_norm": 1.910923719406128, - "learning_rate": 6.274760484452027e-07, - "loss": 0.5813, + "epoch": 0.8246392115452306, + "grad_norm": 1.7455382347106934, + "learning_rate": 1.0990963578773332e-06, + "loss": 0.672, "step": 11714 }, { - "epoch": 0.8864590821383981, - "grad_norm": 1.7221661806106567, - "learning_rate": 6.266500866524558e-07, - "loss": 0.8203, + "epoch": 0.8247096092925026, + "grad_norm": 2.082840919494629, + "learning_rate": 1.0982379097824102e-06, + "loss": 0.5868, "step": 11715 }, { - "epoch": 0.8865347508607317, - "grad_norm": 2.013583183288574, - "learning_rate": 6.258246492259604e-07, - "loss": 0.7452, + "epoch": 0.8247800070397747, + "grad_norm": 1.9009650945663452, + "learning_rate": 1.097379768525442e-06, + "loss": 0.6589, "step": 11716 }, { - "epoch": 0.8866104195830653, - "grad_norm": 2.711366653442383, - "learning_rate": 6.24999736217412e-07, - "loss": 0.7126, + "epoch": 0.8248504047870469, + "grad_norm": 2.271049737930298, + "learning_rate": 1.0965219341510473e-06, + "loss": 0.667, "step": 11717 }, { - "epoch": 0.886686088305399, - "grad_norm": 2.107581853866577, - "learning_rate": 6.241753476784674e-07, - "loss": 0.655, + "epoch": 0.8249208025343189, + "grad_norm": 2.0151166915893555, + "learning_rate": 1.0956644067038213e-06, + "loss": 0.6531, "step": 11718 }, { - "epoch": 0.8867617570277326, - "grad_norm": 2.0487051010131836, - "learning_rate": 6.233514836607533e-07, - "loss": 0.6132, + "epoch": 0.824991200281591, + "grad_norm": 1.9394114017486572, + "learning_rate": 1.0948071862283521e-06, + "loss": 0.6799, "step": 11719 }, { - "epoch": 0.8868374257500662, - "grad_norm": 2.098557949066162, - "learning_rate": 6.225281442158633e-07, - "loss": 0.6881, + "epoch": 0.825061598028863, + "grad_norm": 1.938377857208252, + "learning_rate": 1.0939502727692061e-06, + "loss": 0.6703, "step": 11720 }, { - "epoch": 0.8869130944723999, - "grad_norm": 2.5246660709381104, - "learning_rate": 6.217053293953562e-07, - "loss": 0.6164, + "epoch": 0.8251319957761352, + "grad_norm": 2.030181646347046, + "learning_rate": 1.0930936663709336e-06, + "loss": 0.6156, "step": 11721 }, { - "epoch": 0.8869887631947334, - "grad_norm": 2.3522937297821045, - "learning_rate": 6.208830392507609e-07, - "loss": 0.661, + "epoch": 0.8252023935234073, + "grad_norm": 1.8705270290374756, + "learning_rate": 1.0922373670780694e-06, + "loss": 0.5955, "step": 11722 }, { - "epoch": 0.8870644319170671, - "grad_norm": 2.151280164718628, - "learning_rate": 6.20061273833572e-07, - "loss": 0.7885, + "epoch": 0.8252727912706793, + "grad_norm": 2.1540346145629883, + "learning_rate": 1.0913813749351363e-06, + "loss": 0.5968, "step": 11723 }, { - "epoch": 0.8871401006394007, - "grad_norm": 2.2462844848632812, - "learning_rate": 6.192400331952486e-07, - "loss": 0.7719, + "epoch": 0.8253431890179515, + "grad_norm": 2.24743390083313, + "learning_rate": 1.0905256899866347e-06, + "loss": 0.6759, "step": 11724 }, { - "epoch": 0.8872157693617343, - "grad_norm": 2.168433427810669, - "learning_rate": 6.184193173872194e-07, - "loss": 0.5961, + "epoch": 0.8254135867652235, + "grad_norm": 2.3311219215393066, + "learning_rate": 1.089670312277057e-06, + "loss": 0.7778, "step": 11725 }, { - "epoch": 0.887291438084068, - "grad_norm": 2.1609530448913574, - "learning_rate": 6.175991264608853e-07, - "loss": 0.8232, + "epoch": 0.8254839845124956, + "grad_norm": 1.687445878982544, + "learning_rate": 1.088815241850872e-06, + "loss": 0.5577, "step": 11726 }, { - "epoch": 0.8873671068064016, - "grad_norm": 1.7684580087661743, - "learning_rate": 6.167794604676032e-07, - "loss": 0.7733, + "epoch": 0.8255543822597677, + "grad_norm": 2.0636613368988037, + "learning_rate": 1.0879604787525363e-06, + "loss": 0.6343, "step": 11727 }, { - "epoch": 0.8874427755287352, - "grad_norm": 2.24383807182312, - "learning_rate": 6.15960319458707e-07, - "loss": 0.682, + "epoch": 0.8256247800070398, + "grad_norm": 1.8136405944824219, + "learning_rate": 1.0871060230264874e-06, + "loss": 0.7201, "step": 11728 }, { - "epoch": 0.8875184442510688, - "grad_norm": 2.093867540359497, - "learning_rate": 6.151417034854928e-07, - "loss": 0.6278, + "epoch": 0.8256951777543119, + "grad_norm": 2.0409340858459473, + "learning_rate": 1.0862518747171534e-06, + "loss": 0.6081, "step": 11729 }, { - "epoch": 0.8875941129734024, - "grad_norm": 1.9309519529342651, - "learning_rate": 6.143236125992245e-07, - "loss": 0.6813, + "epoch": 0.8257655755015839, + "grad_norm": 1.9709153175354004, + "learning_rate": 1.0853980338689383e-06, + "loss": 0.7412, "step": 11730 }, { - "epoch": 0.8876697816957361, - "grad_norm": 2.1042773723602295, - "learning_rate": 6.135060468511352e-07, - "loss": 0.6952, + "epoch": 0.825835973248856, + "grad_norm": 1.6567955017089844, + "learning_rate": 1.0845445005262379e-06, + "loss": 0.7809, "step": 11731 }, { - "epoch": 0.8877454504180697, - "grad_norm": 4.267055988311768, - "learning_rate": 6.126890062924218e-07, - "loss": 0.685, + "epoch": 0.8259063709961281, + "grad_norm": 2.1562483310699463, + "learning_rate": 1.0836912747334263e-06, + "loss": 0.6113, "step": 11732 }, { - "epoch": 0.8878211191404033, - "grad_norm": 1.7628538608551025, - "learning_rate": 6.118724909742515e-07, - "loss": 0.5026, + "epoch": 0.8259767687434002, + "grad_norm": 2.5700173377990723, + "learning_rate": 1.0828383565348632e-06, + "loss": 0.6249, "step": 11733 }, { - "epoch": 0.887896787862737, - "grad_norm": 1.8866184949874878, - "learning_rate": 6.110565009477555e-07, - "loss": 0.5225, + "epoch": 0.8260471664906723, + "grad_norm": 2.006458282470703, + "learning_rate": 1.0819857459748918e-06, + "loss": 0.7791, "step": 11734 }, { - "epoch": 0.8879724565850705, - "grad_norm": 2.075368881225586, - "learning_rate": 6.102410362640336e-07, - "loss": 0.6586, + "epoch": 0.8261175642379444, + "grad_norm": 2.227231025695801, + "learning_rate": 1.0811334430978422e-06, + "loss": 0.7395, "step": 11735 }, { - "epoch": 0.8880481253074042, - "grad_norm": 3.18533992767334, - "learning_rate": 6.094260969741542e-07, - "loss": 0.7591, + "epoch": 0.8261879619852165, + "grad_norm": 1.770276427268982, + "learning_rate": 1.0802814479480238e-06, + "loss": 0.6748, "step": 11736 }, { - "epoch": 0.8881237940297378, - "grad_norm": 2.374406576156616, - "learning_rate": 6.086116831291534e-07, - "loss": 0.8184, + "epoch": 0.8262583597324885, + "grad_norm": 1.8572019338607788, + "learning_rate": 1.0794297605697361e-06, + "loss": 0.6397, "step": 11737 }, { - "epoch": 0.8881994627520714, - "grad_norm": 2.989527940750122, - "learning_rate": 6.077977947800284e-07, - "loss": 0.8288, + "epoch": 0.8263287574797606, + "grad_norm": 2.276538133621216, + "learning_rate": 1.078578381007257e-06, + "loss": 0.6357, "step": 11738 }, { - "epoch": 0.8882751314744051, - "grad_norm": 2.4953837394714355, - "learning_rate": 6.069844319777485e-07, - "loss": 0.6191, + "epoch": 0.8263991552270328, + "grad_norm": 1.793267846107483, + "learning_rate": 1.0777273093048505e-06, + "loss": 0.6101, "step": 11739 }, { - "epoch": 0.8883508001967387, - "grad_norm": 2.7576467990875244, - "learning_rate": 6.061715947732508e-07, - "loss": 0.5426, + "epoch": 0.8264695529743048, + "grad_norm": 2.0969796180725098, + "learning_rate": 1.0768765455067628e-06, + "loss": 0.6176, "step": 11740 }, { - "epoch": 0.8884264689190723, - "grad_norm": 2.4133262634277344, - "learning_rate": 6.053592832174357e-07, - "loss": 0.6257, + "epoch": 0.8265399507215769, + "grad_norm": 2.058298110961914, + "learning_rate": 1.0760260896572296e-06, + "loss": 0.64, "step": 11741 }, { - "epoch": 0.888502137641406, - "grad_norm": 2.3025264739990234, - "learning_rate": 6.045474973611746e-07, - "loss": 0.6992, + "epoch": 0.826610348468849, + "grad_norm": 2.0297250747680664, + "learning_rate": 1.0751759418004639e-06, + "loss": 0.6627, "step": 11742 }, { - "epoch": 0.8885778063637395, - "grad_norm": 2.0357069969177246, - "learning_rate": 6.037362372553026e-07, - "loss": 0.6468, + "epoch": 0.8266807462161211, + "grad_norm": 2.083895444869995, + "learning_rate": 1.0743261019806667e-06, + "loss": 0.6822, "step": 11743 }, { - "epoch": 0.8886534750860732, - "grad_norm": 2.5537617206573486, - "learning_rate": 6.029255029506262e-07, - "loss": 0.6608, + "epoch": 0.8267511439633932, + "grad_norm": 1.9134571552276611, + "learning_rate": 1.0734765702420245e-06, + "loss": 0.7134, "step": 11744 }, { - "epoch": 0.8887291438084068, - "grad_norm": 2.2393789291381836, - "learning_rate": 6.021152944979118e-07, - "loss": 0.6493, + "epoch": 0.8268215417106652, + "grad_norm": 1.8997936248779297, + "learning_rate": 1.0726273466287003e-06, + "loss": 0.65, "step": 11745 }, { - "epoch": 0.8888048125307404, - "grad_norm": 2.217641830444336, - "learning_rate": 6.013056119479008e-07, - "loss": 0.7603, + "epoch": 0.8268919394579374, + "grad_norm": 1.7747695446014404, + "learning_rate": 1.0717784311848502e-06, + "loss": 0.6229, "step": 11746 }, { - "epoch": 0.8888804812530741, - "grad_norm": 2.554548978805542, - "learning_rate": 6.004964553512986e-07, - "loss": 0.7148, + "epoch": 0.8269623372052094, + "grad_norm": 1.8545395135879517, + "learning_rate": 1.0709298239546062e-06, + "loss": 0.7627, "step": 11747 }, { - "epoch": 0.8889561499754076, - "grad_norm": 1.9256244897842407, - "learning_rate": 5.996878247587737e-07, - "loss": 0.7035, + "epoch": 0.8270327349524815, + "grad_norm": 1.9362428188323975, + "learning_rate": 1.0700815249820924e-06, + "loss": 0.6737, "step": 11748 }, { - "epoch": 0.8890318186977413, - "grad_norm": 4.53993034362793, - "learning_rate": 5.988797202209676e-07, - "loss": 0.4874, + "epoch": 0.8271031326997537, + "grad_norm": 4.833147048950195, + "learning_rate": 1.0692335343114089e-06, + "loss": 0.5995, "step": 11749 }, { - "epoch": 0.889107487420075, - "grad_norm": 2.335663080215454, - "learning_rate": 5.980721417884838e-07, - "loss": 0.7383, + "epoch": 0.8271735304470257, + "grad_norm": 1.9159475564956665, + "learning_rate": 1.0683858519866482e-06, + "loss": 0.5766, "step": 11750 }, { - "epoch": 0.8891831561424085, - "grad_norm": 2.05330228805542, - "learning_rate": 5.972650895119018e-07, - "loss": 0.7447, + "epoch": 0.8272439281942978, + "grad_norm": 2.1874923706054688, + "learning_rate": 1.067538478051876e-06, + "loss": 0.7052, "step": 11751 }, { - "epoch": 0.8892588248647422, - "grad_norm": 2.6175127029418945, - "learning_rate": 5.964585634417553e-07, - "loss": 0.6698, + "epoch": 0.8273143259415698, + "grad_norm": 1.9260319471359253, + "learning_rate": 1.0666914125511532e-06, + "loss": 0.7021, "step": 11752 }, { - "epoch": 0.8893344935870758, - "grad_norm": 1.903134822845459, - "learning_rate": 5.956525636285538e-07, - "loss": 0.623, + "epoch": 0.827384723688842, + "grad_norm": 1.9797090291976929, + "learning_rate": 1.0658446555285148e-06, + "loss": 0.6563, "step": 11753 }, { - "epoch": 0.8894101623094094, - "grad_norm": 2.384854316711426, - "learning_rate": 5.94847090122772e-07, - "loss": 0.6605, + "epoch": 0.827455121436114, + "grad_norm": 1.6092268228530884, + "learning_rate": 1.0649982070279893e-06, + "loss": 0.6078, "step": 11754 }, { - "epoch": 0.8894858310317431, - "grad_norm": 3.7998368740081787, - "learning_rate": 5.940421429748514e-07, - "loss": 0.627, + "epoch": 0.8275255191833861, + "grad_norm": 1.723626971244812, + "learning_rate": 1.0641520670935798e-06, + "loss": 0.6183, "step": 11755 }, { - "epoch": 0.8895614997540766, - "grad_norm": 2.438006639480591, - "learning_rate": 5.932377222351987e-07, - "loss": 0.5972, + "epoch": 0.8275959169306583, + "grad_norm": 2.1120383739471436, + "learning_rate": 1.0633062357692841e-06, + "loss": 0.6432, "step": 11756 }, { - "epoch": 0.8896371684764103, - "grad_norm": 2.115206718444824, - "learning_rate": 5.924338279541919e-07, - "loss": 0.5647, + "epoch": 0.8276663146779303, + "grad_norm": 2.013521432876587, + "learning_rate": 1.0624607130990703e-06, + "loss": 0.6718, "step": 11757 }, { - "epoch": 0.8897128371987439, - "grad_norm": 2.2133545875549316, - "learning_rate": 5.916304601821733e-07, - "loss": 0.6166, + "epoch": 0.8277367124252024, + "grad_norm": 1.5783677101135254, + "learning_rate": 1.0616154991269026e-06, + "loss": 0.5631, "step": 11758 }, { - "epoch": 0.8897885059210775, - "grad_norm": 2.2478818893432617, - "learning_rate": 5.90827618969449e-07, - "loss": 0.6041, + "epoch": 0.8278071101724744, + "grad_norm": 1.9372743368148804, + "learning_rate": 1.060770593896722e-06, + "loss": 0.725, "step": 11759 }, { - "epoch": 0.8898641746434112, - "grad_norm": 2.2799623012542725, - "learning_rate": 5.900253043662977e-07, - "loss": 0.59, + "epoch": 0.8278775079197466, + "grad_norm": 2.208268880844116, + "learning_rate": 1.0599259974524585e-06, + "loss": 0.6316, "step": 11760 }, { - "epoch": 0.8899398433657447, - "grad_norm": 2.2203476428985596, - "learning_rate": 5.89223516422965e-07, - "loss": 0.6151, + "epoch": 0.8279479056670187, + "grad_norm": 1.8652933835983276, + "learning_rate": 1.0590817098380211e-06, + "loss": 0.6793, "step": 11761 }, { - "epoch": 0.8900155120880784, - "grad_norm": 3.3624329566955566, - "learning_rate": 5.88422255189658e-07, - "loss": 0.7139, + "epoch": 0.8280183034142907, + "grad_norm": 2.043830156326294, + "learning_rate": 1.0582377310973066e-06, + "loss": 0.6455, "step": 11762 }, { - "epoch": 0.890091180810412, - "grad_norm": 1.922843337059021, - "learning_rate": 5.876215207165554e-07, - "loss": 0.6256, + "epoch": 0.8280887011615629, + "grad_norm": 1.8313099145889282, + "learning_rate": 1.0573940612741943e-06, + "loss": 0.5468, "step": 11763 }, { - "epoch": 0.8901668495327456, - "grad_norm": 2.0801382064819336, - "learning_rate": 5.868213130538032e-07, - "loss": 0.6623, + "epoch": 0.8281590989088349, + "grad_norm": 1.988839030265808, + "learning_rate": 1.0565507004125466e-06, + "loss": 0.6992, "step": 11764 }, { - "epoch": 0.8902425182550793, - "grad_norm": 1.8652883768081665, - "learning_rate": 5.860216322515112e-07, - "loss": 0.6678, + "epoch": 0.828229496656107, + "grad_norm": 1.971311330795288, + "learning_rate": 1.0557076485562094e-06, + "loss": 0.6827, "step": 11765 }, { - "epoch": 0.8903181869774129, - "grad_norm": 1.8340070247650146, - "learning_rate": 5.852224783597584e-07, - "loss": 0.7897, + "epoch": 0.8282998944033791, + "grad_norm": 3.297419309616089, + "learning_rate": 1.0548649057490165e-06, + "loss": 0.5619, "step": 11766 }, { - "epoch": 0.8903938556997465, - "grad_norm": 2.2345545291900635, - "learning_rate": 5.844238514285908e-07, - "loss": 0.6383, + "epoch": 0.8283702921506512, + "grad_norm": 2.1379284858703613, + "learning_rate": 1.0540224720347792e-06, + "loss": 0.6019, "step": 11767 }, { - "epoch": 0.8904695244220802, - "grad_norm": 3.453942060470581, - "learning_rate": 5.836257515080213e-07, - "loss": 0.5833, + "epoch": 0.8284406898979233, + "grad_norm": 1.8178068399429321, + "learning_rate": 1.0531803474573003e-06, + "loss": 0.7161, "step": 11768 }, { - "epoch": 0.8905451931444137, - "grad_norm": 1.9211537837982178, - "learning_rate": 5.82828178648031e-07, - "loss": 0.6603, + "epoch": 0.8285110876451953, + "grad_norm": 2.171966552734375, + "learning_rate": 1.0523385320603611e-06, + "loss": 0.6228, "step": 11769 }, { - "epoch": 0.8906208618667474, - "grad_norm": 1.9571908712387085, - "learning_rate": 5.82031132898562e-07, - "loss": 0.5726, + "epoch": 0.8285814853924675, + "grad_norm": 2.1364872455596924, + "learning_rate": 1.0514970258877288e-06, + "loss": 0.6479, "step": 11770 }, { - "epoch": 0.890696530589081, - "grad_norm": 2.344529151916504, - "learning_rate": 5.812346143095303e-07, - "loss": 0.7352, + "epoch": 0.8286518831397395, + "grad_norm": 1.7620666027069092, + "learning_rate": 1.0506558289831507e-06, + "loss": 0.6242, "step": 11771 }, { - "epoch": 0.8907721993114146, - "grad_norm": 1.8455153703689575, - "learning_rate": 5.80438622930818e-07, - "loss": 0.6122, + "epoch": 0.8287222808870116, + "grad_norm": 1.8743064403533936, + "learning_rate": 1.0498149413903661e-06, + "loss": 0.593, "step": 11772 }, { - "epoch": 0.8908478680337483, - "grad_norm": 2.3054256439208984, - "learning_rate": 5.796431588122711e-07, - "loss": 0.5951, + "epoch": 0.8287926786342837, + "grad_norm": 1.796466588973999, + "learning_rate": 1.04897436315309e-06, + "loss": 0.6317, "step": 11773 }, { - "epoch": 0.8909235367560818, - "grad_norm": 2.202211618423462, - "learning_rate": 5.788482220037041e-07, - "loss": 0.5783, + "epoch": 0.8288630763815558, + "grad_norm": 2.0288572311401367, + "learning_rate": 1.0481340943150283e-06, + "loss": 0.7151, "step": 11774 }, { - "epoch": 0.8909992054784155, - "grad_norm": 3.3458003997802734, - "learning_rate": 5.780538125548977e-07, - "loss": 0.6301, + "epoch": 0.8289334741288279, + "grad_norm": 1.6945778131484985, + "learning_rate": 1.0472941349198647e-06, + "loss": 0.6804, "step": 11775 }, { - "epoch": 0.8910748742007492, - "grad_norm": 6.680607795715332, - "learning_rate": 5.772599305156026e-07, - "loss": 0.6437, + "epoch": 0.8290038718760999, + "grad_norm": 1.6837270259857178, + "learning_rate": 1.0464544850112709e-06, + "loss": 0.7238, "step": 11776 }, { - "epoch": 0.8911505429230827, - "grad_norm": 2.2788503170013428, - "learning_rate": 5.764665759355326e-07, - "loss": 0.5404, + "epoch": 0.829074269623372, + "grad_norm": 1.6397615671157837, + "learning_rate": 1.045615144632898e-06, + "loss": 0.6793, "step": 11777 }, { - "epoch": 0.8912262116454164, - "grad_norm": 2.148418426513672, - "learning_rate": 5.756737488643713e-07, - "loss": 0.6569, + "epoch": 0.8291446673706442, + "grad_norm": 2.278855085372925, + "learning_rate": 1.0447761138283883e-06, + "loss": 0.5985, "step": 11778 }, { - "epoch": 0.89130188036775, - "grad_norm": 2.1473772525787354, - "learning_rate": 5.748814493517668e-07, - "loss": 0.4821, + "epoch": 0.8292150651179162, + "grad_norm": 1.9640207290649414, + "learning_rate": 1.0439373926413591e-06, + "loss": 0.606, "step": 11779 }, { - "epoch": 0.8913775490900836, - "grad_norm": 2.9149558544158936, - "learning_rate": 5.740896774473374e-07, - "loss": 0.7051, + "epoch": 0.8292854628651883, + "grad_norm": 2.000046968460083, + "learning_rate": 1.043098981115422e-06, + "loss": 0.7004, "step": 11780 }, { - "epoch": 0.8914532178124173, - "grad_norm": 2.491338014602661, - "learning_rate": 5.732984332006625e-07, - "loss": 0.6287, + "epoch": 0.8293558606124604, + "grad_norm": 1.7933224439620972, + "learning_rate": 1.0422608792941633e-06, + "loss": 0.5961, "step": 11781 }, { - "epoch": 0.8915288865347508, - "grad_norm": 1.9785722494125366, - "learning_rate": 5.725077166612966e-07, - "loss": 0.5939, + "epoch": 0.8294262583597325, + "grad_norm": 2.422968626022339, + "learning_rate": 1.0414230872211568e-06, + "loss": 0.7182, "step": 11782 }, { - "epoch": 0.8916045552570845, - "grad_norm": 2.247680187225342, - "learning_rate": 5.717175278787568e-07, - "loss": 0.7237, + "epoch": 0.8294966561070046, + "grad_norm": 1.8723886013031006, + "learning_rate": 1.040585604939959e-06, + "loss": 0.6106, "step": 11783 }, { - "epoch": 0.8916802239794182, - "grad_norm": 2.0729784965515137, - "learning_rate": 5.709278669025236e-07, - "loss": 0.6251, + "epoch": 0.8295670538542766, + "grad_norm": 2.0195765495300293, + "learning_rate": 1.0397484324941147e-06, + "loss": 0.613, "step": 11784 }, { - "epoch": 0.8917558927017517, - "grad_norm": 8.808799743652344, - "learning_rate": 5.701387337820506e-07, - "loss": 0.6845, + "epoch": 0.8296374516015488, + "grad_norm": 1.9527889490127563, + "learning_rate": 1.0389115699271452e-06, + "loss": 0.5973, "step": 11785 }, { - "epoch": 0.8918315614240854, - "grad_norm": 2.378300666809082, - "learning_rate": 5.693501285667561e-07, - "loss": 0.6736, + "epoch": 0.8297078493488208, + "grad_norm": 2.0800251960754395, + "learning_rate": 1.0380750172825638e-06, + "loss": 0.629, "step": 11786 }, { - "epoch": 0.8919072301464189, - "grad_norm": 1.9735015630722046, - "learning_rate": 5.685620513060238e-07, - "loss": 0.648, + "epoch": 0.8297782470960929, + "grad_norm": 1.741429090499878, + "learning_rate": 1.0372387746038617e-06, + "loss": 0.6739, "step": 11787 }, { - "epoch": 0.8919828988687526, - "grad_norm": 2.113264799118042, - "learning_rate": 5.67774502049207e-07, - "loss": 0.6149, + "epoch": 0.829848644843365, + "grad_norm": 2.1203558444976807, + "learning_rate": 1.036402841934516e-06, + "loss": 0.679, "step": 11788 }, { - "epoch": 0.8920585675910863, - "grad_norm": 2.309506893157959, - "learning_rate": 5.669874808456244e-07, - "loss": 0.8259, + "epoch": 0.8299190425906371, + "grad_norm": 1.8024678230285645, + "learning_rate": 1.035567219317986e-06, + "loss": 0.6619, "step": 11789 }, { - "epoch": 0.8921342363134198, - "grad_norm": 2.5161781311035156, - "learning_rate": 5.662009877445614e-07, - "loss": 0.6214, + "epoch": 0.8299894403379092, + "grad_norm": 1.9664345979690552, + "learning_rate": 1.0347319067977193e-06, + "loss": 0.7151, "step": 11790 }, { - "epoch": 0.8922099050357535, - "grad_norm": 1.9568015336990356, - "learning_rate": 5.654150227952688e-07, - "loss": 0.6697, + "epoch": 0.8300598380851812, + "grad_norm": 1.872698426246643, + "learning_rate": 1.0338969044171412e-06, + "loss": 0.6098, "step": 11791 }, { - "epoch": 0.8922855737580871, - "grad_norm": 2.603675127029419, - "learning_rate": 5.646295860469701e-07, - "loss": 0.6682, + "epoch": 0.8301302358324534, + "grad_norm": 2.262057065963745, + "learning_rate": 1.0330622122196685e-06, + "loss": 0.6603, "step": 11792 }, { - "epoch": 0.8923612424804207, - "grad_norm": 2.497239589691162, - "learning_rate": 5.63844677548849e-07, - "loss": 0.6721, + "epoch": 0.8302006335797254, + "grad_norm": 1.8218258619308472, + "learning_rate": 1.032227830248695e-06, + "loss": 0.6972, "step": 11793 }, { - "epoch": 0.8924369112027544, - "grad_norm": 2.2003486156463623, - "learning_rate": 5.630602973500622e-07, - "loss": 0.6348, + "epoch": 0.8302710313269975, + "grad_norm": 1.7197411060333252, + "learning_rate": 1.0313937585476e-06, + "loss": 0.681, "step": 11794 }, { - "epoch": 0.8925125799250879, - "grad_norm": 2.175077199935913, - "learning_rate": 5.622764454997265e-07, - "loss": 0.7373, + "epoch": 0.8303414290742697, + "grad_norm": 1.7676634788513184, + "learning_rate": 1.0305599971597506e-06, + "loss": 0.6611, "step": 11795 }, { - "epoch": 0.8925882486474216, - "grad_norm": 2.1149489879608154, - "learning_rate": 5.614931220469294e-07, - "loss": 0.5557, + "epoch": 0.8304118268215417, + "grad_norm": 1.9484196901321411, + "learning_rate": 1.0297265461284915e-06, + "loss": 0.5752, "step": 11796 }, { - "epoch": 0.8926639173697553, - "grad_norm": 2.437793493270874, - "learning_rate": 5.607103270407288e-07, - "loss": 0.8061, + "epoch": 0.8304822245688138, + "grad_norm": 2.3353731632232666, + "learning_rate": 1.0288934054971574e-06, + "loss": 0.7153, "step": 11797 }, { - "epoch": 0.8927395860920888, - "grad_norm": 2.231208562850952, - "learning_rate": 5.599280605301424e-07, - "loss": 0.6489, + "epoch": 0.8305526223160858, + "grad_norm": 2.024710178375244, + "learning_rate": 1.0280605753090635e-06, + "loss": 0.7667, "step": 11798 }, { - "epoch": 0.8928152548144225, - "grad_norm": 1.8926862478256226, - "learning_rate": 5.591463225641592e-07, - "loss": 0.7406, + "epoch": 0.830623020063358, + "grad_norm": 1.9246197938919067, + "learning_rate": 1.0272280556075084e-06, + "loss": 0.5551, "step": 11799 }, { - "epoch": 0.892890923536756, - "grad_norm": 3.975660562515259, - "learning_rate": 5.583651131917338e-07, - "loss": 0.6159, + "epoch": 0.8306934178106301, + "grad_norm": 1.9521030187606812, + "learning_rate": 1.0263958464357752e-06, + "loss": 0.6599, "step": 11800 }, { - "epoch": 0.8929665922590897, - "grad_norm": 1.7848179340362549, - "learning_rate": 5.575844324617914e-07, - "loss": 0.6479, + "epoch": 0.8307638155579021, + "grad_norm": 1.9869035482406616, + "learning_rate": 1.0255639478371324e-06, + "loss": 0.5819, "step": 11801 }, { - "epoch": 0.8930422609814234, - "grad_norm": 2.325378656387329, - "learning_rate": 5.568042804232135e-07, - "loss": 0.7373, + "epoch": 0.8308342133051743, + "grad_norm": 2.4981844425201416, + "learning_rate": 1.02473235985483e-06, + "loss": 0.6887, "step": 11802 }, { - "epoch": 0.8931179297037569, - "grad_norm": 2.4571590423583984, - "learning_rate": 5.560246571248623e-07, - "loss": 0.8312, + "epoch": 0.8309046110524463, + "grad_norm": 2.032525062561035, + "learning_rate": 1.0239010825321048e-06, + "loss": 0.6251, "step": 11803 }, { - "epoch": 0.8931935984260906, - "grad_norm": 2.313886880874634, - "learning_rate": 5.552455626155596e-07, - "loss": 0.7574, + "epoch": 0.8309750087997184, + "grad_norm": 1.7464314699172974, + "learning_rate": 1.0230701159121746e-06, + "loss": 0.6545, "step": 11804 }, { - "epoch": 0.8932692671484243, - "grad_norm": 3.1456427574157715, - "learning_rate": 5.544669969440924e-07, - "loss": 0.5273, + "epoch": 0.8310454065469906, + "grad_norm": 2.094531297683716, + "learning_rate": 1.0222394600382422e-06, + "loss": 0.7864, "step": 11805 }, { - "epoch": 0.8933449358707578, - "grad_norm": 2.003390073776245, - "learning_rate": 5.536889601592178e-07, - "loss": 0.7509, + "epoch": 0.8311158042942626, + "grad_norm": 1.8416595458984375, + "learning_rate": 1.0214091149534928e-06, + "loss": 0.6207, "step": 11806 }, { - "epoch": 0.8934206045930915, - "grad_norm": 2.1938109397888184, - "learning_rate": 5.529114523096576e-07, - "loss": 0.627, + "epoch": 0.8311862020415347, + "grad_norm": 4.786726474761963, + "learning_rate": 1.0205790807010993e-06, + "loss": 0.6756, "step": 11807 }, { - "epoch": 0.893496273315425, - "grad_norm": 3.804588794708252, - "learning_rate": 5.521344734441061e-07, - "loss": 0.6258, + "epoch": 0.8312565997888067, + "grad_norm": 2.2290782928466797, + "learning_rate": 1.019749357324213e-06, + "loss": 0.7367, "step": 11808 }, { - "epoch": 0.8935719420377587, - "grad_norm": 5.310543060302734, - "learning_rate": 5.513580236112163e-07, - "loss": 0.743, + "epoch": 0.8313269975360789, + "grad_norm": 2.9232380390167236, + "learning_rate": 1.0189199448659758e-06, + "loss": 0.7888, "step": 11809 }, { - "epoch": 0.8936476107600924, - "grad_norm": 2.3194751739501953, - "learning_rate": 5.505821028596133e-07, - "loss": 0.8416, + "epoch": 0.8313973952833509, + "grad_norm": 2.0332534313201904, + "learning_rate": 1.018090843369507e-06, + "loss": 0.6862, "step": 11810 }, { - "epoch": 0.8937232794824259, - "grad_norm": 2.0355045795440674, - "learning_rate": 5.498067112378881e-07, - "loss": 0.7552, + "epoch": 0.831467793030623, + "grad_norm": 1.982346773147583, + "learning_rate": 1.017262052877913e-06, + "loss": 0.6847, "step": 11811 }, { - "epoch": 0.8937989482047596, - "grad_norm": 2.1219053268432617, - "learning_rate": 5.490318487945971e-07, - "loss": 0.6533, + "epoch": 0.8315381907778951, + "grad_norm": 2.0246989727020264, + "learning_rate": 1.0164335734342815e-06, + "loss": 0.7049, "step": 11812 }, { - "epoch": 0.8938746169270931, - "grad_norm": 2.342350959777832, - "learning_rate": 5.482575155782663e-07, - "loss": 0.6366, + "epoch": 0.8316085885251672, + "grad_norm": 2.117277145385742, + "learning_rate": 1.0156054050816888e-06, + "loss": 0.6308, "step": 11813 }, { - "epoch": 0.8939502856494268, - "grad_norm": 2.078801393508911, - "learning_rate": 5.47483711637386e-07, - "loss": 0.7247, + "epoch": 0.8316789862724393, + "grad_norm": 2.048142671585083, + "learning_rate": 1.0147775478631892e-06, + "loss": 0.6308, "step": 11814 }, { - "epoch": 0.8940259543717605, - "grad_norm": 2.194209337234497, - "learning_rate": 5.467104370204153e-07, - "loss": 0.5571, + "epoch": 0.8317493840197113, + "grad_norm": 2.7204959392547607, + "learning_rate": 1.0139500018218275e-06, + "loss": 0.6021, "step": 11815 }, { - "epoch": 0.894101623094094, - "grad_norm": 2.5547542572021484, - "learning_rate": 5.459376917757776e-07, - "loss": 0.8075, + "epoch": 0.8318197817669835, + "grad_norm": 2.555323362350464, + "learning_rate": 1.0131227670006266e-06, + "loss": 0.7267, "step": 11816 }, { - "epoch": 0.8941772918164277, - "grad_norm": 2.040752410888672, - "learning_rate": 5.451654759518632e-07, - "loss": 0.5734, + "epoch": 0.8318901795142556, + "grad_norm": 1.7068243026733398, + "learning_rate": 1.0122958434425945e-06, + "loss": 0.6303, "step": 11817 }, { - "epoch": 0.8942529605387614, - "grad_norm": 2.9403791427612305, - "learning_rate": 5.443937895970364e-07, - "loss": 0.694, + "epoch": 0.8319605772615276, + "grad_norm": 1.9000121355056763, + "learning_rate": 1.0114692311907237e-06, + "loss": 0.5921, "step": 11818 }, { - "epoch": 0.8943286292610949, - "grad_norm": 2.2212417125701904, - "learning_rate": 5.436226327596176e-07, - "loss": 0.5364, + "epoch": 0.8320309750087997, + "grad_norm": 1.64280104637146, + "learning_rate": 1.0106429302879912e-06, + "loss": 0.6873, "step": 11819 }, { - "epoch": 0.8944042979834286, - "grad_norm": 2.305257558822632, - "learning_rate": 5.428520054879009e-07, - "loss": 0.5975, + "epoch": 0.8321013727560718, + "grad_norm": 1.9385122060775757, + "learning_rate": 1.0098169407773563e-06, + "loss": 0.7587, "step": 11820 }, { - "epoch": 0.8944799667057621, - "grad_norm": 2.220174551010132, - "learning_rate": 5.42081907830145e-07, - "loss": 0.7507, + "epoch": 0.8321717705033439, + "grad_norm": 1.9547218084335327, + "learning_rate": 1.0089912627017652e-06, + "loss": 0.7014, "step": 11821 }, { - "epoch": 0.8945556354280958, - "grad_norm": 2.3511104583740234, - "learning_rate": 5.413123398345761e-07, - "loss": 0.7957, + "epoch": 0.832242168250616, + "grad_norm": 1.5750517845153809, + "learning_rate": 1.0081658961041443e-06, + "loss": 0.6676, "step": 11822 }, { - "epoch": 0.8946313041504295, - "grad_norm": 2.9117751121520996, - "learning_rate": 5.405433015493879e-07, - "loss": 0.6301, + "epoch": 0.832312565997888, + "grad_norm": 1.7348181009292603, + "learning_rate": 1.0073408410274048e-06, + "loss": 0.7694, "step": 11823 }, { - "epoch": 0.894706972872763, - "grad_norm": 2.1860318183898926, - "learning_rate": 5.397747930227386e-07, - "loss": 0.7152, + "epoch": 0.8323829637451602, + "grad_norm": 1.8851971626281738, + "learning_rate": 1.006516097514441e-06, + "loss": 0.7424, "step": 11824 }, { - "epoch": 0.8947826415950967, - "grad_norm": 2.775752067565918, - "learning_rate": 5.39006814302756e-07, - "loss": 0.7394, + "epoch": 0.8324533614924322, + "grad_norm": 1.7109941244125366, + "learning_rate": 1.0056916656081346e-06, + "loss": 0.5753, "step": 11825 }, { - "epoch": 0.8948583103174302, - "grad_norm": 2.653651475906372, - "learning_rate": 5.382393654375344e-07, - "loss": 0.6355, + "epoch": 0.8325237592397043, + "grad_norm": 1.787218451499939, + "learning_rate": 1.004867545351346e-06, + "loss": 0.6279, "step": 11826 }, { - "epoch": 0.8949339790397639, - "grad_norm": 1.9705297946929932, - "learning_rate": 5.374724464751294e-07, - "loss": 0.5801, + "epoch": 0.8325941569869764, + "grad_norm": 1.9828084707260132, + "learning_rate": 1.004043736786924e-06, + "loss": 0.5164, "step": 11827 }, { - "epoch": 0.8950096477620976, - "grad_norm": 1.8360956907272339, - "learning_rate": 5.367060574635726e-07, - "loss": 0.7124, + "epoch": 0.8326645547342485, + "grad_norm": 2.0185019969940186, + "learning_rate": 1.0032202399576989e-06, + "loss": 0.6436, "step": 11828 }, { - "epoch": 0.8950853164844311, - "grad_norm": 2.0758135318756104, - "learning_rate": 5.359401984508566e-07, - "loss": 0.7395, + "epoch": 0.8327349524815206, + "grad_norm": 2.0849947929382324, + "learning_rate": 1.0023970549064845e-06, + "loss": 0.692, "step": 11829 }, { - "epoch": 0.8951609852067648, - "grad_norm": 2.226825475692749, - "learning_rate": 5.351748694849411e-07, - "loss": 0.6466, + "epoch": 0.8328053502287927, + "grad_norm": 2.312873125076294, + "learning_rate": 1.0015741816760773e-06, + "loss": 0.7408, "step": 11830 }, { - "epoch": 0.8952366539290985, - "grad_norm": 1.9071615934371948, - "learning_rate": 5.344100706137527e-07, - "loss": 0.708, + "epoch": 0.8328757479760648, + "grad_norm": 1.7708053588867188, + "learning_rate": 1.0007516203092626e-06, + "loss": 0.6715, "step": 11831 }, { - "epoch": 0.895312322651432, - "grad_norm": 2.7298760414123535, - "learning_rate": 5.336458018851881e-07, - "loss": 0.6113, + "epoch": 0.8329461457233368, + "grad_norm": 2.857072114944458, + "learning_rate": 9.999293708488028e-07, + "loss": 0.6449, "step": 11832 }, { - "epoch": 0.8953879913737657, - "grad_norm": 2.248567581176758, - "learning_rate": 5.32882063347106e-07, - "loss": 0.5838, + "epoch": 0.8330165434706089, + "grad_norm": 1.6391522884368896, + "learning_rate": 9.991074333374506e-07, + "loss": 0.6691, "step": 11833 }, { - "epoch": 0.8954636600960992, - "grad_norm": 2.2924344539642334, - "learning_rate": 5.321188550473351e-07, - "loss": 0.5261, + "epoch": 0.8330869412178811, + "grad_norm": 1.9650218486785889, + "learning_rate": 9.982858078179375e-07, + "loss": 0.6047, "step": 11834 }, { - "epoch": 0.8955393288184329, - "grad_norm": 1.8909533023834229, - "learning_rate": 5.313561770336704e-07, - "loss": 0.5217, + "epoch": 0.8331573389651531, + "grad_norm": 2.042174816131592, + "learning_rate": 9.974644943329813e-07, + "loss": 0.6758, "step": 11835 }, { - "epoch": 0.8956149975407666, - "grad_norm": 3.445955514907837, - "learning_rate": 5.305940293538733e-07, - "loss": 0.6061, + "epoch": 0.8332277367124252, + "grad_norm": 3.3807153701782227, + "learning_rate": 9.966434929252803e-07, + "loss": 0.6424, "step": 11836 }, { - "epoch": 0.8956906662631001, - "grad_norm": 1.8945612907409668, - "learning_rate": 5.2983241205567e-07, - "loss": 0.6668, + "epoch": 0.8332981344596972, + "grad_norm": 2.203474521636963, + "learning_rate": 9.958228036375224e-07, + "loss": 0.6838, "step": 11837 }, { - "epoch": 0.8957663349854338, - "grad_norm": 2.5115628242492676, - "learning_rate": 5.290713251867571e-07, - "loss": 0.5408, + "epoch": 0.8333685322069694, + "grad_norm": 1.9949090480804443, + "learning_rate": 9.950024265123733e-07, + "loss": 0.6207, "step": 11838 }, { - "epoch": 0.8958420037077673, - "grad_norm": 2.304194688796997, - "learning_rate": 5.283107687947967e-07, - "loss": 0.5717, + "epoch": 0.8334389299542415, + "grad_norm": 1.9815353155136108, + "learning_rate": 9.941823615924881e-07, + "loss": 0.6932, "step": 11839 }, { - "epoch": 0.895917672430101, - "grad_norm": 2.0823569297790527, - "learning_rate": 5.275507429274185e-07, - "loss": 0.6496, + "epoch": 0.8335093277015135, + "grad_norm": 2.224470376968384, + "learning_rate": 9.93362608920501e-07, + "loss": 0.8333, "step": 11840 }, { - "epoch": 0.8959933411524347, - "grad_norm": 2.2738540172576904, - "learning_rate": 5.267912476322134e-07, - "loss": 0.5803, + "epoch": 0.8335797254487857, + "grad_norm": 2.1539809703826904, + "learning_rate": 9.925431685390314e-07, + "loss": 0.66, "step": 11841 }, { - "epoch": 0.8960690098747682, - "grad_norm": 1.8352035284042358, - "learning_rate": 5.260322829567465e-07, - "loss": 0.6032, + "epoch": 0.8336501231960577, + "grad_norm": 2.3457577228546143, + "learning_rate": 9.91724040490682e-07, + "loss": 0.7075, "step": 11842 }, { - "epoch": 0.8961446785971019, - "grad_norm": 2.4070634841918945, - "learning_rate": 5.252738489485467e-07, - "loss": 0.6438, + "epoch": 0.8337205209433298, + "grad_norm": 1.8069336414337158, + "learning_rate": 9.909052248180416e-07, + "loss": 0.6574, "step": 11843 }, { - "epoch": 0.8962203473194356, - "grad_norm": 2.4445745944976807, - "learning_rate": 5.245159456551092e-07, - "loss": 0.6391, + "epoch": 0.8337909186906018, + "grad_norm": 2.2363688945770264, + "learning_rate": 9.90086721563682e-07, + "loss": 0.6468, "step": 11844 }, { - "epoch": 0.8962960160417691, - "grad_norm": 2.533707857131958, - "learning_rate": 5.237585731238958e-07, - "loss": 0.6541, + "epoch": 0.833861316437874, + "grad_norm": 2.0500166416168213, + "learning_rate": 9.89268530770155e-07, + "loss": 0.5697, "step": 11845 }, { - "epoch": 0.8963716847641028, - "grad_norm": 2.023911476135254, - "learning_rate": 5.230017314023366e-07, - "loss": 0.667, + "epoch": 0.8339317141851461, + "grad_norm": 2.2500364780426025, + "learning_rate": 9.884506524800033e-07, + "loss": 0.6877, "step": 11846 }, { - "epoch": 0.8964473534864363, - "grad_norm": 2.331500768661499, - "learning_rate": 5.222454205378277e-07, - "loss": 0.7697, + "epoch": 0.8340021119324181, + "grad_norm": 2.2594716548919678, + "learning_rate": 9.876330867357437e-07, + "loss": 0.6564, "step": 11847 }, { - "epoch": 0.89652302220877, - "grad_norm": 2.1052236557006836, - "learning_rate": 5.214896405777281e-07, - "loss": 0.6318, + "epoch": 0.8340725096796903, + "grad_norm": 2.0518603324890137, + "learning_rate": 9.868158335798868e-07, + "loss": 0.549, "step": 11848 }, { - "epoch": 0.8965986909311037, - "grad_norm": 1.6714646816253662, - "learning_rate": 5.207343915693713e-07, - "loss": 0.6142, + "epoch": 0.8341429074269623, + "grad_norm": 1.9076703786849976, + "learning_rate": 9.859988930549196e-07, + "loss": 0.6138, "step": 11849 }, { - "epoch": 0.8966743596534372, - "grad_norm": 1.8316816091537476, - "learning_rate": 5.199796735600541e-07, - "loss": 0.63, + "epoch": 0.8342133051742344, + "grad_norm": 2.008492946624756, + "learning_rate": 9.851822652033168e-07, + "loss": 0.6026, "step": 11850 }, { - "epoch": 0.8967500283757709, - "grad_norm": 1.962476134300232, - "learning_rate": 5.19225486597036e-07, - "loss": 0.5936, + "epoch": 0.8342837029215066, + "grad_norm": 2.028149366378784, + "learning_rate": 9.843659500675346e-07, + "loss": 0.6527, "step": 11851 }, { - "epoch": 0.8968256970981044, - "grad_norm": 2.385847330093384, - "learning_rate": 5.184718307275479e-07, - "loss": 0.6865, + "epoch": 0.8343541006687786, + "grad_norm": 1.8890113830566406, + "learning_rate": 9.835499476900178e-07, + "loss": 0.4955, "step": 11852 }, { - "epoch": 0.8969013658204381, - "grad_norm": 6.628912925720215, - "learning_rate": 5.177187059987842e-07, - "loss": 0.7063, + "epoch": 0.8344244984160507, + "grad_norm": 3.130518674850464, + "learning_rate": 9.827342581131855e-07, + "loss": 0.6469, "step": 11853 }, { - "epoch": 0.8969770345427718, - "grad_norm": 2.5971126556396484, - "learning_rate": 5.169661124579143e-07, - "loss": 0.7106, + "epoch": 0.8344948961633227, + "grad_norm": 2.2375717163085938, + "learning_rate": 9.819188813794494e-07, + "loss": 0.7268, "step": 11854 }, { - "epoch": 0.8970527032651053, - "grad_norm": 1.8385004997253418, - "learning_rate": 5.162140501520612e-07, - "loss": 0.5338, + "epoch": 0.8345652939105949, + "grad_norm": 2.1378681659698486, + "learning_rate": 9.811038175311991e-07, + "loss": 0.5218, "step": 11855 }, { - "epoch": 0.897128371987439, - "grad_norm": 1.9331872463226318, - "learning_rate": 5.154625191283256e-07, - "loss": 0.7039, + "epoch": 0.834635691657867, + "grad_norm": 2.090581178665161, + "learning_rate": 9.80289066610815e-07, + "loss": 0.675, "step": 11856 }, { - "epoch": 0.8972040407097727, - "grad_norm": 2.373424530029297, - "learning_rate": 5.147115194337685e-07, - "loss": 0.7406, + "epoch": 0.834706089405139, + "grad_norm": 2.288585901260376, + "learning_rate": 9.794746286606518e-07, + "loss": 0.713, "step": 11857 }, { - "epoch": 0.8972797094321062, - "grad_norm": 2.4697651863098145, - "learning_rate": 5.139610511154204e-07, - "loss": 0.7292, + "epoch": 0.8347764871524112, + "grad_norm": 1.8675429821014404, + "learning_rate": 9.786605037230583e-07, + "loss": 0.6176, "step": 11858 }, { - "epoch": 0.8973553781544399, - "grad_norm": 2.0015337467193604, - "learning_rate": 5.132111142202799e-07, - "loss": 0.6327, + "epoch": 0.8348468848996832, + "grad_norm": 1.858512043952942, + "learning_rate": 9.778466918403563e-07, + "loss": 0.6777, "step": 11859 }, { - "epoch": 0.8974310468767734, - "grad_norm": 2.549948215484619, - "learning_rate": 5.124617087953082e-07, - "loss": 0.7159, + "epoch": 0.8349172826469553, + "grad_norm": 2.6141397953033447, + "learning_rate": 9.770331930548593e-07, + "loss": 0.563, "step": 11860 }, { - "epoch": 0.8975067155991071, - "grad_norm": 1.9285160303115845, - "learning_rate": 5.117128348874368e-07, - "loss": 0.6433, + "epoch": 0.8349876803942274, + "grad_norm": 1.7100967168807983, + "learning_rate": 9.762200074088595e-07, + "loss": 0.5469, "step": 11861 }, { - "epoch": 0.8975823843214408, - "grad_norm": 2.8049867153167725, - "learning_rate": 5.109644925435622e-07, - "loss": 0.7273, + "epoch": 0.8350580781414995, + "grad_norm": 1.7507002353668213, + "learning_rate": 9.754071349446398e-07, + "loss": 0.6465, "step": 11862 }, { - "epoch": 0.8976580530437743, - "grad_norm": 6.72634744644165, - "learning_rate": 5.10216681810546e-07, - "loss": 0.7476, + "epoch": 0.8351284758887716, + "grad_norm": 2.3309056758880615, + "learning_rate": 9.74594575704457e-07, + "loss": 0.7548, "step": 11863 }, { - "epoch": 0.897733721766108, - "grad_norm": 2.0858564376831055, - "learning_rate": 5.094694027352227e-07, - "loss": 0.6723, + "epoch": 0.8351988736360436, + "grad_norm": 3.2890467643737793, + "learning_rate": 9.737823297305622e-07, + "loss": 0.5869, "step": 11864 }, { - "epoch": 0.8978093904884415, - "grad_norm": 2.3242621421813965, - "learning_rate": 5.087226553643868e-07, - "loss": 0.6651, + "epoch": 0.8352692713833157, + "grad_norm": 1.9718685150146484, + "learning_rate": 9.72970397065178e-07, + "loss": 0.5789, "step": 11865 }, { - "epoch": 0.8978850592107752, - "grad_norm": 2.657841205596924, - "learning_rate": 5.079764397448019e-07, - "loss": 0.7593, + "epoch": 0.8353396691305878, + "grad_norm": 2.0817525386810303, + "learning_rate": 9.721587777505238e-07, + "loss": 0.6261, "step": 11866 }, { - "epoch": 0.8979607279331089, - "grad_norm": 1.891689658164978, - "learning_rate": 5.072307559231986e-07, - "loss": 0.6064, + "epoch": 0.8354100668778599, + "grad_norm": 1.9797958135604858, + "learning_rate": 9.71347471828792e-07, + "loss": 0.6796, "step": 11867 }, { - "epoch": 0.8980363966554424, - "grad_norm": 2.2270796298980713, - "learning_rate": 5.064856039462747e-07, - "loss": 0.712, + "epoch": 0.835480464625132, + "grad_norm": 5.007521629333496, + "learning_rate": 9.705364793421665e-07, + "loss": 0.7907, "step": 11868 }, { - "epoch": 0.8981120653777761, - "grad_norm": 2.0540242195129395, - "learning_rate": 5.057409838606928e-07, - "loss": 0.6487, + "epoch": 0.8355508623724041, + "grad_norm": 2.1641643047332764, + "learning_rate": 9.69725800332809e-07, + "loss": 0.7336, "step": 11869 }, { - "epoch": 0.8981877341001098, - "grad_norm": 2.1607933044433594, - "learning_rate": 5.049968957130855e-07, - "loss": 0.5223, + "epoch": 0.8356212601196762, + "grad_norm": 1.7456432580947876, + "learning_rate": 9.689154348428694e-07, + "loss": 0.5003, "step": 11870 }, { - "epoch": 0.8982634028224433, - "grad_norm": 2.262275457382202, - "learning_rate": 5.042533395500475e-07, - "loss": 0.7798, + "epoch": 0.8356916578669482, + "grad_norm": 1.9123005867004395, + "learning_rate": 9.681053829144785e-07, + "loss": 0.5893, "step": 11871 }, { - "epoch": 0.898339071544777, - "grad_norm": 2.3458058834075928, - "learning_rate": 5.035103154181458e-07, - "loss": 0.7041, + "epoch": 0.8357620556142203, + "grad_norm": 1.9720799922943115, + "learning_rate": 9.672956445897523e-07, + "loss": 0.7124, "step": 11872 }, { - "epoch": 0.8984147402671105, - "grad_norm": 2.098083257675171, - "learning_rate": 5.02767823363907e-07, - "loss": 0.6414, + "epoch": 0.8358324533614925, + "grad_norm": 1.8450602293014526, + "learning_rate": 9.664862199107862e-07, + "loss": 0.5762, "step": 11873 }, { - "epoch": 0.8984904089894442, - "grad_norm": 2.3881022930145264, - "learning_rate": 5.020258634338309e-07, - "loss": 0.7463, + "epoch": 0.8359028511087645, + "grad_norm": 2.293516159057617, + "learning_rate": 9.656771089196685e-07, + "loss": 0.6909, "step": 11874 }, { - "epoch": 0.8985660777117779, - "grad_norm": 1.8996672630310059, - "learning_rate": 5.012844356743834e-07, - "loss": 0.5266, + "epoch": 0.8359732488560366, + "grad_norm": 1.987818956375122, + "learning_rate": 9.648683116584605e-07, + "loss": 0.6315, "step": 11875 }, { - "epoch": 0.8986417464341114, - "grad_norm": 2.157325506210327, - "learning_rate": 5.005435401319904e-07, - "loss": 0.6429, + "epoch": 0.8360436466033087, + "grad_norm": 1.9014067649841309, + "learning_rate": 9.640598281692173e-07, + "loss": 0.6329, "step": 11876 }, { - "epoch": 0.8987174151564451, - "grad_norm": 2.011565685272217, - "learning_rate": 4.998031768530525e-07, - "loss": 0.6239, + "epoch": 0.8361140443505808, + "grad_norm": 2.312798261642456, + "learning_rate": 9.63251658493969e-07, + "loss": 0.6112, "step": 11877 }, { - "epoch": 0.8987930838787787, - "grad_norm": 2.1313388347625732, - "learning_rate": 4.99063345883932e-07, - "loss": 0.6479, + "epoch": 0.8361844420978529, + "grad_norm": 2.158432960510254, + "learning_rate": 9.624438026747348e-07, + "loss": 0.5675, "step": 11878 }, { - "epoch": 0.8988687526011123, - "grad_norm": 2.972902536392212, - "learning_rate": 4.983240472709606e-07, - "loss": 0.7627, + "epoch": 0.8362548398451249, + "grad_norm": 1.899212121963501, + "learning_rate": 9.61636260753514e-07, + "loss": 0.594, "step": 11879 }, { - "epoch": 0.898944421323446, - "grad_norm": 2.6932406425476074, - "learning_rate": 4.975852810604343e-07, - "loss": 0.8005, + "epoch": 0.8363252375923971, + "grad_norm": 1.9241764545440674, + "learning_rate": 9.608290327722938e-07, + "loss": 0.7018, "step": 11880 }, { - "epoch": 0.8990200900457795, - "grad_norm": 2.0103864669799805, - "learning_rate": 4.968470472986182e-07, - "loss": 0.5966, + "epoch": 0.8363956353396691, + "grad_norm": 1.742537498474121, + "learning_rate": 9.600221187730399e-07, + "loss": 0.6497, "step": 11881 }, { - "epoch": 0.8990957587681132, - "grad_norm": 2.020077705383301, - "learning_rate": 4.961093460317422e-07, - "loss": 0.7393, + "epoch": 0.8364660330869412, + "grad_norm": 2.4315872192382812, + "learning_rate": 9.592155187977077e-07, + "loss": 0.7088, "step": 11882 }, { - "epoch": 0.8991714274904469, - "grad_norm": 2.478301763534546, - "learning_rate": 4.953721773060064e-07, - "loss": 0.5821, + "epoch": 0.8365364308342133, + "grad_norm": 1.9719769954681396, + "learning_rate": 9.584092328882314e-07, + "loss": 0.6495, "step": 11883 }, { - "epoch": 0.8992470962127804, - "grad_norm": 2.066504955291748, - "learning_rate": 4.946355411675688e-07, - "loss": 0.5965, + "epoch": 0.8366068285814854, + "grad_norm": 2.225410223007202, + "learning_rate": 9.576032610865312e-07, + "loss": 0.6399, "step": 11884 }, { - "epoch": 0.8993227649351141, - "grad_norm": 2.5086374282836914, - "learning_rate": 4.938994376625646e-07, - "loss": 0.7674, + "epoch": 0.8366772263287575, + "grad_norm": 2.4155564308166504, + "learning_rate": 9.567976034345076e-07, + "loss": 0.729, "step": 11885 }, { - "epoch": 0.8993984336574476, - "grad_norm": 2.390106201171875, - "learning_rate": 4.931638668370909e-07, - "loss": 0.604, + "epoch": 0.8367476240760295, + "grad_norm": 2.2682721614837646, + "learning_rate": 9.559922599740517e-07, + "loss": 0.6942, "step": 11886 }, { - "epoch": 0.8994741023797813, - "grad_norm": 2.292848825454712, - "learning_rate": 4.924288287372089e-07, - "loss": 0.8054, + "epoch": 0.8368180218233017, + "grad_norm": 2.2352585792541504, + "learning_rate": 9.551872307470304e-07, + "loss": 0.6176, "step": 11887 }, { - "epoch": 0.899549771102115, - "grad_norm": 2.5886833667755127, - "learning_rate": 4.916943234089506e-07, - "loss": 0.6667, + "epoch": 0.8368884195705737, + "grad_norm": 2.0744550228118896, + "learning_rate": 9.543825157953014e-07, + "loss": 0.6062, "step": 11888 }, { - "epoch": 0.8996254398244485, - "grad_norm": 2.0663654804229736, - "learning_rate": 4.909603508983124e-07, - "loss": 0.7111, + "epoch": 0.8369588173178458, + "grad_norm": 1.937880039215088, + "learning_rate": 9.535781151606996e-07, + "loss": 0.5982, "step": 11889 }, { - "epoch": 0.8997011085467822, - "grad_norm": 2.1520442962646484, - "learning_rate": 4.902269112512594e-07, - "loss": 0.7455, + "epoch": 0.837029215065118, + "grad_norm": 2.2099926471710205, + "learning_rate": 9.527740288850486e-07, + "loss": 0.7, "step": 11890 }, { - "epoch": 0.8997767772691158, - "grad_norm": 2.241392135620117, - "learning_rate": 4.894940045137209e-07, - "loss": 0.6856, + "epoch": 0.83709961281239, + "grad_norm": 2.199674129486084, + "learning_rate": 9.5197025701015e-07, + "loss": 0.7058, "step": 11891 }, { - "epoch": 0.8998524459914494, - "grad_norm": 2.823662757873535, - "learning_rate": 4.887616307315943e-07, - "loss": 0.6895, + "epoch": 0.8371700105596621, + "grad_norm": 1.7452260255813599, + "learning_rate": 9.511667995777954e-07, + "loss": 0.5666, "step": 11892 }, { - "epoch": 0.8999281147137831, - "grad_norm": 2.172192335128784, - "learning_rate": 4.880297899507438e-07, - "loss": 0.7223, + "epoch": 0.8372404083069341, + "grad_norm": 1.7155158519744873, + "learning_rate": 9.503636566297594e-07, + "loss": 0.6734, "step": 11893 }, { - "epoch": 0.9000037834361166, - "grad_norm": 2.1436941623687744, - "learning_rate": 4.872984822169967e-07, - "loss": 0.5677, + "epoch": 0.8373108060542063, + "grad_norm": 2.0733320713043213, + "learning_rate": 9.495608282077953e-07, + "loss": 0.6101, "step": 11894 }, { - "epoch": 0.9000794521584503, - "grad_norm": 2.2516303062438965, - "learning_rate": 4.865677075761534e-07, - "loss": 0.8164, + "epoch": 0.8373812038014784, + "grad_norm": 1.95787513256073, + "learning_rate": 9.487583143536426e-07, + "loss": 0.6042, "step": 11895 }, { - "epoch": 0.900155120880784, - "grad_norm": 3.5125577449798584, - "learning_rate": 4.858374660739764e-07, - "loss": 0.6542, + "epoch": 0.8374516015487504, + "grad_norm": 2.1931326389312744, + "learning_rate": 9.479561151090247e-07, + "loss": 0.6947, "step": 11896 }, { - "epoch": 0.9002307896031175, - "grad_norm": 2.0232994556427, - "learning_rate": 4.85107757756196e-07, - "loss": 0.5626, + "epoch": 0.8375219992960226, + "grad_norm": 2.832143783569336, + "learning_rate": 9.471542305156513e-07, + "loss": 0.7827, "step": 11897 }, { - "epoch": 0.9003064583254512, - "grad_norm": 2.3365819454193115, - "learning_rate": 4.843785826685076e-07, - "loss": 0.7357, + "epoch": 0.8375923970432946, + "grad_norm": 1.7975903749465942, + "learning_rate": 9.463526606152092e-07, + "loss": 0.5243, "step": 11898 }, { - "epoch": 0.9003821270477848, - "grad_norm": 1.9569621086120605, - "learning_rate": 4.836499408565738e-07, - "loss": 0.7641, + "epoch": 0.8376627947905667, + "grad_norm": 2.002018928527832, + "learning_rate": 9.455514054493764e-07, + "loss": 0.7445, "step": 11899 }, { - "epoch": 0.9004577957701184, - "grad_norm": 2.693556547164917, - "learning_rate": 4.829218323660282e-07, - "loss": 0.6307, + "epoch": 0.8377331925378388, + "grad_norm": 2.0564229488372803, + "learning_rate": 9.447504650598095e-07, + "loss": 0.7574, "step": 11900 }, { - "epoch": 0.9005334644924521, - "grad_norm": 1.9152913093566895, - "learning_rate": 4.821942572424641e-07, - "loss": 0.5283, + "epoch": 0.8378035902851109, + "grad_norm": 2.0475523471832275, + "learning_rate": 9.439498394881489e-07, + "loss": 0.6399, "step": 11901 }, { - "epoch": 0.9006091332147856, - "grad_norm": 8.86640739440918, - "learning_rate": 4.81467215531445e-07, - "loss": 0.7087, + "epoch": 0.837873988032383, + "grad_norm": 1.8461024761199951, + "learning_rate": 9.431495287760201e-07, + "loss": 0.7049, "step": 11902 }, { - "epoch": 0.9006848019371193, - "grad_norm": 2.024935722351074, - "learning_rate": 4.807407072785018e-07, - "loss": 0.657, + "epoch": 0.837944385779655, + "grad_norm": 2.1855337619781494, + "learning_rate": 9.423495329650346e-07, + "loss": 0.5842, "step": 11903 }, { - "epoch": 0.9007604706594529, - "grad_norm": 2.7039053440093994, - "learning_rate": 4.800147325291298e-07, - "loss": 0.6988, + "epoch": 0.8380147835269272, + "grad_norm": 2.035095453262329, + "learning_rate": 9.415498520967806e-07, + "loss": 0.6343, "step": 11904 }, { - "epoch": 0.9008361393817865, - "grad_norm": 2.0335659980773926, - "learning_rate": 4.792892913287927e-07, - "loss": 0.5938, + "epoch": 0.8380851812741992, + "grad_norm": 1.8983060121536255, + "learning_rate": 9.407504862128388e-07, + "loss": 0.66, "step": 11905 }, { - "epoch": 0.9009118081041202, - "grad_norm": 2.8437318801879883, - "learning_rate": 4.785643837229183e-07, - "loss": 0.6922, + "epoch": 0.8381555790214713, + "grad_norm": 1.923671007156372, + "learning_rate": 9.399514353547664e-07, + "loss": 0.6765, "step": 11906 }, { - "epoch": 0.9009874768264537, - "grad_norm": 2.2634239196777344, - "learning_rate": 4.778400097569062e-07, - "loss": 0.7243, + "epoch": 0.8382259767687434, + "grad_norm": 1.852258324623108, + "learning_rate": 9.391526995641074e-07, + "loss": 0.5492, "step": 11907 }, { - "epoch": 0.9010631455487874, - "grad_norm": 1.859001636505127, - "learning_rate": 4.771161694761152e-07, - "loss": 0.8049, + "epoch": 0.8382963745160155, + "grad_norm": 1.6002683639526367, + "learning_rate": 9.383542788823867e-07, + "loss": 0.6223, "step": 11908 }, { - "epoch": 0.9011388142711211, - "grad_norm": 2.409040927886963, - "learning_rate": 4.763928629258748e-07, - "loss": 0.5497, + "epoch": 0.8383667722632876, + "grad_norm": 1.7320125102996826, + "learning_rate": 9.375561733511191e-07, + "loss": 0.6808, "step": 11909 }, { - "epoch": 0.9012144829934546, - "grad_norm": 2.5353803634643555, - "learning_rate": 4.75670090151483e-07, - "loss": 0.7495, + "epoch": 0.8384371700105596, + "grad_norm": 1.715061902999878, + "learning_rate": 9.367583830117947e-07, + "loss": 0.4669, "step": 11910 }, { - "epoch": 0.9012901517157883, - "grad_norm": 4.32592248916626, - "learning_rate": 4.749478511982025e-07, - "loss": 0.5614, + "epoch": 0.8385075677578318, + "grad_norm": 1.9795764684677124, + "learning_rate": 9.359609079058942e-07, + "loss": 0.6041, "step": 11911 }, { - "epoch": 0.9013658204381219, - "grad_norm": 1.791941523551941, - "learning_rate": 4.7422614611126013e-07, - "loss": 0.5837, + "epoch": 0.8385779655051039, + "grad_norm": 1.9655967950820923, + "learning_rate": 9.351637480748782e-07, + "loss": 0.6444, "step": 11912 }, { - "epoch": 0.9014414891604555, - "grad_norm": 1.7201472520828247, - "learning_rate": 4.7350497493585175e-07, - "loss": 0.7207, + "epoch": 0.8386483632523759, + "grad_norm": 3.498270273208618, + "learning_rate": 9.343669035601916e-07, + "loss": 0.6078, "step": 11913 }, { - "epoch": 0.9015171578827892, - "grad_norm": 2.2074570655822754, - "learning_rate": 4.7278433771714027e-07, - "loss": 0.5863, + "epoch": 0.838718760999648, + "grad_norm": 1.907637596130371, + "learning_rate": 9.335703744032622e-07, + "loss": 0.7282, "step": 11914 }, { - "epoch": 0.9015928266051227, - "grad_norm": 1.8226673603057861, - "learning_rate": 4.720642345002535e-07, - "loss": 0.5865, + "epoch": 0.8387891587469201, + "grad_norm": 2.3639867305755615, + "learning_rate": 9.327741606455039e-07, + "loss": 0.5893, "step": 11915 }, { - "epoch": 0.9016684953274564, - "grad_norm": 2.0403738021850586, - "learning_rate": 4.7134466533028643e-07, - "loss": 0.694, + "epoch": 0.8388595564941922, + "grad_norm": 2.0383708477020264, + "learning_rate": 9.319782623283103e-07, + "loss": 0.63, "step": 11916 }, { - "epoch": 0.90174416404979, - "grad_norm": 2.33229398727417, - "learning_rate": 4.70625630252303e-07, - "loss": 0.7215, + "epoch": 0.8389299542414643, + "grad_norm": 2.8173999786376953, + "learning_rate": 9.311826794930636e-07, + "loss": 0.6538, "step": 11917 }, { - "epoch": 0.9018198327721236, - "grad_norm": 2.150709629058838, - "learning_rate": 4.6990712931133015e-07, - "loss": 0.7984, + "epoch": 0.8390003519887363, + "grad_norm": 1.929152011871338, + "learning_rate": 9.303874121811263e-07, + "loss": 0.5697, "step": 11918 }, { - "epoch": 0.9018955014944573, - "grad_norm": 9.452735900878906, - "learning_rate": 4.69189162552361e-07, - "loss": 0.5954, + "epoch": 0.8390707497360085, + "grad_norm": 1.9143911600112915, + "learning_rate": 9.295924604338446e-07, + "loss": 0.739, "step": 11919 }, { - "epoch": 0.9019711702167909, - "grad_norm": 2.4279088973999023, - "learning_rate": 4.6847173002035747e-07, - "loss": 0.6056, + "epoch": 0.8391411474832805, + "grad_norm": 2.237581253051758, + "learning_rate": 9.287978242925461e-07, + "loss": 0.6714, "step": 11920 }, { - "epoch": 0.9020468389391245, - "grad_norm": 2.4450533390045166, - "learning_rate": 4.677548317602517e-07, - "loss": 0.6681, + "epoch": 0.8392115452305526, + "grad_norm": 1.9224939346313477, + "learning_rate": 9.280035037985492e-07, + "loss": 0.6605, "step": 11921 }, { - "epoch": 0.9021225076614582, - "grad_norm": 3.3857312202453613, - "learning_rate": 4.670384678169337e-07, - "loss": 0.5885, + "epoch": 0.8392819429778247, + "grad_norm": 2.0145907402038574, + "learning_rate": 9.272094989931479e-07, + "loss": 0.6354, "step": 11922 }, { - "epoch": 0.9021981763837917, - "grad_norm": 2.306729793548584, - "learning_rate": 4.6632263823526467e-07, - "loss": 0.7093, + "epoch": 0.8393523407250968, + "grad_norm": 1.6656808853149414, + "learning_rate": 9.264158099176256e-07, + "loss": 0.6666, "step": 11923 }, { - "epoch": 0.9022738451061254, - "grad_norm": 1.9452950954437256, - "learning_rate": 4.656073430600747e-07, - "loss": 0.6975, + "epoch": 0.8394227384723689, + "grad_norm": 2.3115999698638916, + "learning_rate": 9.256224366132463e-07, + "loss": 0.7159, "step": 11924 }, { - "epoch": 0.902349513828459, - "grad_norm": 2.4310667514801025, - "learning_rate": 4.6489258233615596e-07, - "loss": 0.6344, + "epoch": 0.839493136219641, + "grad_norm": 1.940340280532837, + "learning_rate": 9.248293791212581e-07, + "loss": 0.6564, "step": 11925 }, { - "epoch": 0.9024251825507926, - "grad_norm": 2.114664077758789, - "learning_rate": 4.6417835610826863e-07, - "loss": 0.7136, + "epoch": 0.8395635339669131, + "grad_norm": 2.0944511890411377, + "learning_rate": 9.240366374828902e-07, + "loss": 0.5968, "step": 11926 }, { - "epoch": 0.9025008512731263, - "grad_norm": 2.3902628421783447, - "learning_rate": 4.63464664421142e-07, - "loss": 0.5602, + "epoch": 0.8396339317141851, + "grad_norm": 2.3638877868652344, + "learning_rate": 9.232442117393628e-07, + "loss": 0.6211, "step": 11927 }, { - "epoch": 0.9025765199954598, - "grad_norm": 2.4101219177246094, - "learning_rate": 4.6275150731946827e-07, - "loss": 0.576, + "epoch": 0.8397043294614572, + "grad_norm": 2.098498821258545, + "learning_rate": 9.224521019318698e-07, + "loss": 0.7039, "step": 11928 }, { - "epoch": 0.9026521887177935, - "grad_norm": 5.150691509246826, - "learning_rate": 4.620388848479087e-07, - "loss": 0.7431, + "epoch": 0.8397747272087294, + "grad_norm": 1.9541692733764648, + "learning_rate": 9.216603081015981e-07, + "loss": 0.5384, "step": 11929 }, { - "epoch": 0.9027278574401271, - "grad_norm": 2.332685708999634, - "learning_rate": 4.613267970510876e-07, - "loss": 0.67, + "epoch": 0.8398451249560014, + "grad_norm": 2.31354022026062, + "learning_rate": 9.208688302897116e-07, + "loss": 0.6635, "step": 11930 }, { - "epoch": 0.9028035261624607, - "grad_norm": 2.803213119506836, - "learning_rate": 4.606152439736003e-07, - "loss": 0.6053, + "epoch": 0.8399155227032735, + "grad_norm": 2.02278208732605, + "learning_rate": 9.200776685373604e-07, + "loss": 0.7346, "step": 11931 }, { - "epoch": 0.9028791948847944, - "grad_norm": 2.4882402420043945, - "learning_rate": 4.5990422566000824e-07, - "loss": 0.6151, + "epoch": 0.8399859204505455, + "grad_norm": 1.850508451461792, + "learning_rate": 9.192868228856756e-07, + "loss": 0.7429, "step": 11932 }, { - "epoch": 0.902954863607128, - "grad_norm": 2.111898422241211, - "learning_rate": 4.591937421548337e-07, - "loss": 0.7308, + "epoch": 0.8400563181978177, + "grad_norm": 1.8289672136306763, + "learning_rate": 9.184962933757782e-07, + "loss": 0.718, "step": 11933 }, { - "epoch": 0.9030305323294616, - "grad_norm": 1.9006226062774658, - "learning_rate": 4.584837935025721e-07, - "loss": 0.667, + "epoch": 0.8401267159450898, + "grad_norm": 1.66080641746521, + "learning_rate": 9.177060800487641e-07, + "loss": 0.5948, "step": 11934 }, { - "epoch": 0.9031062010517953, - "grad_norm": 2.327817678451538, - "learning_rate": 4.5777437974768186e-07, - "loss": 0.6714, + "epoch": 0.8401971136923618, + "grad_norm": 1.4064574241638184, + "learning_rate": 9.169161829457208e-07, + "loss": 0.6499, "step": 11935 }, { - "epoch": 0.9031818697741288, - "grad_norm": 2.1458778381347656, - "learning_rate": 4.5706550093458856e-07, - "loss": 0.744, + "epoch": 0.840267511439634, + "grad_norm": 3.1613857746124268, + "learning_rate": 9.161266021077147e-07, + "loss": 0.6006, "step": 11936 }, { - "epoch": 0.9032575384964625, - "grad_norm": 1.835634708404541, - "learning_rate": 4.5635715710768457e-07, - "loss": 0.6296, + "epoch": 0.840337909186906, + "grad_norm": 2.0942885875701904, + "learning_rate": 9.15337337575796e-07, + "loss": 0.7392, "step": 11937 }, { - "epoch": 0.9033332072187961, - "grad_norm": 2.9382152557373047, - "learning_rate": 4.5564934831132844e-07, - "loss": 0.6187, + "epoch": 0.8404083069341781, + "grad_norm": 1.7091478109359741, + "learning_rate": 9.145483893909986e-07, + "loss": 0.64, "step": 11938 }, { - "epoch": 0.9034088759411297, - "grad_norm": 2.5697879791259766, - "learning_rate": 4.5494207458984773e-07, - "loss": 0.698, + "epoch": 0.8404787046814501, + "grad_norm": 2.630505323410034, + "learning_rate": 9.137597575943432e-07, + "loss": 0.7203, "step": 11939 }, { - "epoch": 0.9034845446634634, - "grad_norm": 2.2090299129486084, - "learning_rate": 4.5423533598752997e-07, - "loss": 0.6213, + "epoch": 0.8405491024287223, + "grad_norm": 1.6979405879974365, + "learning_rate": 9.129714422268286e-07, + "loss": 0.6369, "step": 11940 }, { - "epoch": 0.903560213385797, - "grad_norm": 3.206984043121338, - "learning_rate": 4.5352913254863683e-07, - "loss": 0.5154, + "epoch": 0.8406195001759944, + "grad_norm": 1.7149690389633179, + "learning_rate": 9.121834433294427e-07, + "loss": 0.6913, "step": 11941 }, { - "epoch": 0.9036358821081306, - "grad_norm": 3.334904670715332, - "learning_rate": 4.5282346431739285e-07, - "loss": 0.5593, + "epoch": 0.8406898979232664, + "grad_norm": 1.9496560096740723, + "learning_rate": 9.113957609431533e-07, + "loss": 0.7427, "step": 11942 }, { - "epoch": 0.9037115508304643, - "grad_norm": 2.202554225921631, - "learning_rate": 4.5211833133798873e-07, - "loss": 0.6092, + "epoch": 0.8407602956705386, + "grad_norm": 2.172482967376709, + "learning_rate": 9.106083951089104e-07, + "loss": 0.7397, "step": 11943 }, { - "epoch": 0.9037872195527978, - "grad_norm": 2.342474937438965, - "learning_rate": 4.5141373365458116e-07, - "loss": 0.658, + "epoch": 0.8408306934178106, + "grad_norm": 1.9663090705871582, + "learning_rate": 9.098213458676538e-07, + "loss": 0.6799, "step": 11944 }, { - "epoch": 0.9038628882751315, - "grad_norm": 2.030301094055176, - "learning_rate": 4.5070967131129383e-07, - "loss": 0.4765, + "epoch": 0.8409010911650827, + "grad_norm": 1.9403783082962036, + "learning_rate": 9.090346132603e-07, + "loss": 0.6506, "step": 11945 }, { - "epoch": 0.9039385569974651, - "grad_norm": 1.962053656578064, - "learning_rate": 4.500061443522214e-07, - "loss": 0.5477, + "epoch": 0.8409714889123548, + "grad_norm": 1.852002739906311, + "learning_rate": 9.082481973277538e-07, + "loss": 0.5761, "step": 11946 }, { - "epoch": 0.9040142257197987, - "grad_norm": 2.112226724624634, - "learning_rate": 4.4930315282141574e-07, - "loss": 0.4979, + "epoch": 0.8410418866596269, + "grad_norm": 1.637376308441162, + "learning_rate": 9.074620981108997e-07, + "loss": 0.616, "step": 11947 }, { - "epoch": 0.9040898944421324, - "grad_norm": 2.1766979694366455, - "learning_rate": 4.486006967629046e-07, - "loss": 0.7071, + "epoch": 0.841112284406899, + "grad_norm": 1.704947829246521, + "learning_rate": 9.066763156506117e-07, + "loss": 0.6789, "step": 11948 }, { - "epoch": 0.904165563164466, - "grad_norm": 2.6591641902923584, - "learning_rate": 4.478987762206748e-07, - "loss": 0.6703, + "epoch": 0.841182682154171, + "grad_norm": 2.1198372840881348, + "learning_rate": 9.058908499877369e-07, + "loss": 0.6886, "step": 11949 }, { - "epoch": 0.9042412318867996, - "grad_norm": 2.473120927810669, - "learning_rate": 4.471973912386861e-07, - "loss": 0.5526, + "epoch": 0.8412530799014432, + "grad_norm": 1.7787586450576782, + "learning_rate": 9.051057011631182e-07, + "loss": 0.6649, "step": 11950 }, { - "epoch": 0.9043169006091332, - "grad_norm": 2.2571189403533936, - "learning_rate": 4.464965418608584e-07, - "loss": 0.6613, + "epoch": 0.8413234776487153, + "grad_norm": 2.1381659507751465, + "learning_rate": 9.043208692175723e-07, + "loss": 0.6121, "step": 11951 }, { - "epoch": 0.9043925693314668, - "grad_norm": 2.007202386856079, - "learning_rate": 4.4579622813108365e-07, - "loss": 0.4949, + "epoch": 0.8413938753959873, + "grad_norm": 2.292370319366455, + "learning_rate": 9.035363541919065e-07, + "loss": 0.6161, "step": 11952 }, { - "epoch": 0.9044682380538005, - "grad_norm": 2.1282033920288086, - "learning_rate": 4.4509645009321774e-07, - "loss": 0.6742, + "epoch": 0.8414642731432594, + "grad_norm": 2.183227062225342, + "learning_rate": 9.027521561269056e-07, + "loss": 0.6963, "step": 11953 }, { - "epoch": 0.904543906776134, - "grad_norm": 2.2609729766845703, - "learning_rate": 4.443972077910806e-07, - "loss": 0.6759, + "epoch": 0.8415346708905315, + "grad_norm": 2.0537126064300537, + "learning_rate": 9.019682750633448e-07, + "loss": 0.7325, "step": 11954 }, { - "epoch": 0.9046195754984677, - "grad_norm": 2.033329486846924, - "learning_rate": 4.436985012684612e-07, - "loss": 0.6447, + "epoch": 0.8416050686378036, + "grad_norm": 2.1232261657714844, + "learning_rate": 9.011847110419736e-07, + "loss": 0.7432, "step": 11955 }, { - "epoch": 0.9046952442208014, - "grad_norm": 2.3120713233947754, - "learning_rate": 4.430003305691176e-07, - "loss": 0.741, + "epoch": 0.8416754663850757, + "grad_norm": 2.1536478996276855, + "learning_rate": 9.004014641035335e-07, + "loss": 0.6747, "step": 11956 }, { - "epoch": 0.9047709129431349, - "grad_norm": 2.225062847137451, - "learning_rate": 4.423026957367707e-07, - "loss": 0.5616, + "epoch": 0.8417458641323478, + "grad_norm": 1.718031406402588, + "learning_rate": 8.996185342887444e-07, + "loss": 0.6474, "step": 11957 }, { - "epoch": 0.9048465816654686, - "grad_norm": 1.9412503242492676, - "learning_rate": 4.416055968151077e-07, - "loss": 0.6355, + "epoch": 0.8418162618796199, + "grad_norm": 2.2083163261413574, + "learning_rate": 8.988359216383137e-07, + "loss": 0.6554, "step": 11958 }, { - "epoch": 0.9049222503878022, - "grad_norm": 1.9755481481552124, - "learning_rate": 4.409090338477826e-07, - "loss": 0.6126, + "epoch": 0.8418866596268919, + "grad_norm": 2.100618839263916, + "learning_rate": 8.980536261929272e-07, + "loss": 0.6027, "step": 11959 }, { - "epoch": 0.9049979191101358, - "grad_norm": 2.4795279502868652, - "learning_rate": 4.4021300687841747e-07, - "loss": 0.6847, + "epoch": 0.841957057374164, + "grad_norm": 2.0426692962646484, + "learning_rate": 8.972716479932626e-07, + "loss": 0.6667, "step": 11960 }, { - "epoch": 0.9050735878324695, - "grad_norm": 2.1052944660186768, - "learning_rate": 4.395175159505995e-07, - "loss": 0.6671, + "epoch": 0.8420274551214361, + "grad_norm": 2.988525390625, + "learning_rate": 8.964899870799685e-07, + "loss": 0.719, "step": 11961 }, { - "epoch": 0.905149256554803, - "grad_norm": 3.0458271503448486, - "learning_rate": 4.3882256110788286e-07, - "loss": 0.6731, + "epoch": 0.8420978528687082, + "grad_norm": 1.870186448097229, + "learning_rate": 8.957086434936893e-07, + "loss": 0.5827, "step": 11962 }, { - "epoch": 0.9052249252771367, - "grad_norm": 3.665377378463745, - "learning_rate": 4.3812814239378774e-07, - "loss": 0.7169, + "epoch": 0.8421682506159803, + "grad_norm": 2.2353708744049072, + "learning_rate": 8.949276172750445e-07, + "loss": 0.7437, "step": 11963 }, { - "epoch": 0.9053005939994703, - "grad_norm": 1.8927977085113525, - "learning_rate": 4.374342598518013e-07, - "loss": 0.6032, + "epoch": 0.8422386483632524, + "grad_norm": 1.922163724899292, + "learning_rate": 8.941469084646438e-07, + "loss": 0.7653, "step": 11964 }, { - "epoch": 0.9053762627218039, - "grad_norm": 1.916894555091858, - "learning_rate": 4.367409135253758e-07, - "loss": 0.6076, + "epoch": 0.8423090461105245, + "grad_norm": 1.859761357307434, + "learning_rate": 8.933665171030733e-07, + "loss": 0.6517, "step": 11965 }, { - "epoch": 0.9054519314441376, - "grad_norm": 1.8118383884429932, - "learning_rate": 4.3604810345792956e-07, - "loss": 0.7221, + "epoch": 0.8423794438577965, + "grad_norm": 1.7190653085708618, + "learning_rate": 8.925864432309115e-07, + "loss": 0.5997, "step": 11966 }, { - "epoch": 0.9055276001664712, - "grad_norm": 1.7049998044967651, - "learning_rate": 4.353558296928528e-07, - "loss": 0.6027, + "epoch": 0.8424498416050686, + "grad_norm": 2.524077892303467, + "learning_rate": 8.91806686888708e-07, + "loss": 0.5992, "step": 11967 }, { - "epoch": 0.9056032688888048, - "grad_norm": 1.889378547668457, - "learning_rate": 4.346640922734949e-07, - "loss": 0.6484, + "epoch": 0.8425202393523408, + "grad_norm": 1.5954021215438843, + "learning_rate": 8.910272481170086e-07, + "loss": 0.576, "step": 11968 }, { - "epoch": 0.9056789376111385, - "grad_norm": 2.0997726917266846, - "learning_rate": 4.339728912431742e-07, - "loss": 0.6673, + "epoch": 0.8425906370996128, + "grad_norm": 1.880162239074707, + "learning_rate": 8.902481269563342e-07, + "loss": 0.6651, "step": 11969 }, { - "epoch": 0.905754606333472, - "grad_norm": 3.0155575275421143, - "learning_rate": 4.33282226645177e-07, - "loss": 0.7523, + "epoch": 0.8426610348468849, + "grad_norm": 1.783121109008789, + "learning_rate": 8.894693234471934e-07, + "loss": 0.5986, "step": 11970 }, { - "epoch": 0.9058302750558057, - "grad_norm": 2.474846839904785, - "learning_rate": 4.3259209852275583e-07, - "loss": 0.6474, + "epoch": 0.842731432594157, + "grad_norm": 2.119314432144165, + "learning_rate": 8.886908376300759e-07, + "loss": 0.6378, "step": 11971 }, { - "epoch": 0.9059059437781393, - "grad_norm": 2.1822690963745117, - "learning_rate": 4.31902506919127e-07, - "loss": 0.6469, + "epoch": 0.8428018303414291, + "grad_norm": 2.0516304969787598, + "learning_rate": 8.879126695454577e-07, + "loss": 0.611, "step": 11972 }, { - "epoch": 0.9059816125004729, - "grad_norm": 2.6391453742980957, - "learning_rate": 4.312134518774761e-07, - "loss": 0.7557, + "epoch": 0.8428722280887012, + "grad_norm": 2.368353843688965, + "learning_rate": 8.87134819233795e-07, + "loss": 0.5971, "step": 11973 }, { - "epoch": 0.9060572812228066, - "grad_norm": 1.6580966711044312, - "learning_rate": 4.3052493344095346e-07, - "loss": 0.8282, + "epoch": 0.8429426258359732, + "grad_norm": 2.2456343173980713, + "learning_rate": 8.863572867355296e-07, + "loss": 0.66, "step": 11974 }, { - "epoch": 0.9061329499451402, - "grad_norm": 2.3595659732818604, - "learning_rate": 4.298369516526777e-07, - "loss": 0.6902, + "epoch": 0.8430130235832454, + "grad_norm": 1.9594826698303223, + "learning_rate": 8.855800720910845e-07, + "loss": 0.6252, "step": 11975 }, { - "epoch": 0.9062086186674738, - "grad_norm": 2.4112627506256104, - "learning_rate": 4.2914950655572827e-07, - "loss": 0.66, + "epoch": 0.8430834213305174, + "grad_norm": 1.8720499277114868, + "learning_rate": 8.848031753408694e-07, + "loss": 0.612, "step": 11976 }, { - "epoch": 0.9062842873898074, - "grad_norm": 2.256720781326294, - "learning_rate": 4.284625981931608e-07, - "loss": 0.7697, + "epoch": 0.8431538190777895, + "grad_norm": 2.015707492828369, + "learning_rate": 8.840265965252747e-07, + "loss": 0.6385, "step": 11977 }, { - "epoch": 0.906359956112141, - "grad_norm": 2.7008187770843506, - "learning_rate": 4.277762266079899e-07, - "loss": 0.5343, + "epoch": 0.8432242168250615, + "grad_norm": 2.0578813552856445, + "learning_rate": 8.832503356846776e-07, + "loss": 0.6957, "step": 11978 }, { - "epoch": 0.9064356248344747, - "grad_norm": 2.01400089263916, - "learning_rate": 4.270903918431961e-07, - "loss": 0.5767, + "epoch": 0.8432946145723337, + "grad_norm": 2.2612390518188477, + "learning_rate": 8.82474392859435e-07, + "loss": 0.6667, "step": 11979 }, { - "epoch": 0.9065112935568083, - "grad_norm": 2.3042778968811035, - "learning_rate": 4.264050939417301e-07, - "loss": 0.789, + "epoch": 0.8433650123196058, + "grad_norm": 1.797975778579712, + "learning_rate": 8.816987680898897e-07, + "loss": 0.6257, "step": 11980 }, { - "epoch": 0.9065869622791419, - "grad_norm": 2.1162967681884766, - "learning_rate": 4.2572033294650756e-07, - "loss": 0.6247, + "epoch": 0.8434354100668778, + "grad_norm": 1.904595136642456, + "learning_rate": 8.80923461416364e-07, + "loss": 0.6578, "step": 11981 }, { - "epoch": 0.9066626310014756, - "grad_norm": 1.9995644092559814, - "learning_rate": 4.2503610890041023e-07, - "loss": 0.6561, + "epoch": 0.84350580781415, + "grad_norm": 1.6964287757873535, + "learning_rate": 8.801484728791718e-07, + "loss": 0.701, "step": 11982 }, { - "epoch": 0.9067382997238091, - "grad_norm": 2.2234203815460205, - "learning_rate": 4.2435242184628677e-07, - "loss": 0.719, + "epoch": 0.843576205561422, + "grad_norm": 1.767348289489746, + "learning_rate": 8.79373802518601e-07, + "loss": 0.5963, "step": 11983 }, { - "epoch": 0.9068139684461428, - "grad_norm": 2.4800000190734863, - "learning_rate": 4.236692718269519e-07, - "loss": 0.635, + "epoch": 0.8436466033086941, + "grad_norm": 2.138378858566284, + "learning_rate": 8.785994503749309e-07, + "loss": 0.7245, "step": 11984 }, { - "epoch": 0.9068896371684764, - "grad_norm": 2.198791742324829, - "learning_rate": 4.229866588851855e-07, - "loss": 0.5428, + "epoch": 0.8437170010559663, + "grad_norm": 1.9704139232635498, + "learning_rate": 8.778254164884196e-07, + "loss": 0.6554, "step": 11985 }, { - "epoch": 0.90696530589081, - "grad_norm": 2.6701927185058594, - "learning_rate": 4.2230458306373634e-07, - "loss": 0.6647, + "epoch": 0.8437873988032383, + "grad_norm": 2.0024943351745605, + "learning_rate": 8.77051700899309e-07, + "loss": 0.7251, "step": 11986 }, { - "epoch": 0.9070409746131437, - "grad_norm": 2.1392929553985596, - "learning_rate": 4.216230444053182e-07, - "loss": 0.6216, + "epoch": 0.8438577965505104, + "grad_norm": 2.019590139389038, + "learning_rate": 8.762783036478241e-07, + "loss": 0.7094, "step": 11987 }, { - "epoch": 0.9071166433354773, - "grad_norm": 2.303083658218384, - "learning_rate": 4.2094204295261095e-07, - "loss": 0.7002, + "epoch": 0.8439281942977824, + "grad_norm": 1.8085826635360718, + "learning_rate": 8.755052247741779e-07, + "loss": 0.642, "step": 11988 }, { - "epoch": 0.9071923120578109, - "grad_norm": 2.640005111694336, - "learning_rate": 4.2026157874826254e-07, - "loss": 0.734, + "epoch": 0.8439985920450546, + "grad_norm": 2.0684685707092285, + "learning_rate": 8.747324643185603e-07, + "loss": 0.5302, "step": 11989 }, { - "epoch": 0.9072679807801445, - "grad_norm": 2.1459431648254395, - "learning_rate": 4.1958165183488185e-07, - "loss": 0.7214, + "epoch": 0.8440689897923267, + "grad_norm": 2.038626194000244, + "learning_rate": 8.739600223211504e-07, + "loss": 0.7186, "step": 11990 }, { - "epoch": 0.9073436495024781, - "grad_norm": 2.6311416625976562, - "learning_rate": 4.189022622550508e-07, - "loss": 0.6757, + "epoch": 0.8441393875395987, + "grad_norm": 1.8479328155517578, + "learning_rate": 8.731878988221074e-07, + "loss": 0.7355, "step": 11991 }, { - "epoch": 0.9074193182248118, - "grad_norm": 2.1436285972595215, - "learning_rate": 4.1822341005131636e-07, - "loss": 0.7854, + "epoch": 0.8442097852868709, + "grad_norm": 1.8110523223876953, + "learning_rate": 8.724160938615741e-07, + "loss": 0.6209, "step": 11992 }, { - "epoch": 0.9074949869471454, - "grad_norm": 2.0433895587921143, - "learning_rate": 4.1754509526618754e-07, - "loss": 0.6659, + "epoch": 0.8442801830341429, + "grad_norm": 2.706589698791504, + "learning_rate": 8.716446074796765e-07, + "loss": 0.87, "step": 11993 }, { - "epoch": 0.907570655669479, - "grad_norm": 1.8191251754760742, - "learning_rate": 4.1686731794214337e-07, - "loss": 0.6189, + "epoch": 0.844350580781415, + "grad_norm": 1.759390115737915, + "learning_rate": 8.708734397165252e-07, + "loss": 0.6568, "step": 11994 }, { - "epoch": 0.9076463243918127, - "grad_norm": 2.218461751937866, - "learning_rate": 4.161900781216299e-07, - "loss": 0.5692, + "epoch": 0.844420978528687, + "grad_norm": 1.9835692644119263, + "learning_rate": 8.701025906122167e-07, + "loss": 0.6505, "step": 11995 }, { - "epoch": 0.9077219931141463, - "grad_norm": 2.302450656890869, - "learning_rate": 4.1551337584705815e-07, - "loss": 0.5076, + "epoch": 0.8444913762759592, + "grad_norm": 2.252683639526367, + "learning_rate": 8.69332060206826e-07, + "loss": 0.6635, "step": 11996 }, { - "epoch": 0.9077976618364799, - "grad_norm": 2.4182517528533936, - "learning_rate": 4.148372111608023e-07, - "loss": 0.6981, + "epoch": 0.8445617740232313, + "grad_norm": 1.8339051008224487, + "learning_rate": 8.685618485404137e-07, + "loss": 0.6008, "step": 11997 }, { - "epoch": 0.9078733305588135, - "grad_norm": 3.4762744903564453, - "learning_rate": 4.1416158410520845e-07, - "loss": 0.5797, + "epoch": 0.8446321717705033, + "grad_norm": 2.128572463989258, + "learning_rate": 8.677919556530223e-07, + "loss": 0.5697, "step": 11998 }, { - "epoch": 0.9079489992811471, - "grad_norm": 2.167374610900879, - "learning_rate": 4.1348649472258673e-07, - "loss": 0.6399, + "epoch": 0.8447025695177754, + "grad_norm": 1.7478877305984497, + "learning_rate": 8.670223815846821e-07, + "loss": 0.5678, "step": 11999 }, { - "epoch": 0.9080246680034808, - "grad_norm": 2.449777841567993, - "learning_rate": 4.128119430552133e-07, - "loss": 0.6094, + "epoch": 0.8447729672650475, + "grad_norm": 1.7025572061538696, + "learning_rate": 8.662531263754011e-07, + "loss": 0.5468, "step": 12000 }, { - "epoch": 0.9081003367258144, - "grad_norm": 2.1977956295013428, - "learning_rate": 4.1213792914533046e-07, - "loss": 0.6119, + "epoch": 0.8448433650123196, + "grad_norm": 1.863908052444458, + "learning_rate": 8.654841900651773e-07, + "loss": 0.6418, "step": 12001 }, { - "epoch": 0.908176005448148, - "grad_norm": 1.5713450908660889, - "learning_rate": 4.1146445303514537e-07, - "loss": 0.8275, + "epoch": 0.8449137627595917, + "grad_norm": 1.9756085872650146, + "learning_rate": 8.647155726939852e-07, + "loss": 0.6023, "step": 12002 }, { - "epoch": 0.9082516741704816, - "grad_norm": 2.628474235534668, - "learning_rate": 4.107915147668363e-07, - "loss": 0.8309, + "epoch": 0.8449841605068638, + "grad_norm": 1.9528632164001465, + "learning_rate": 8.639472743017878e-07, + "loss": 0.7549, "step": 12003 }, { - "epoch": 0.9083273428928152, - "grad_norm": 2.199763774871826, - "learning_rate": 4.1011911438254357e-07, - "loss": 0.6514, + "epoch": 0.8450545582541359, + "grad_norm": 2.0850720405578613, + "learning_rate": 8.631792949285271e-07, + "loss": 0.6573, "step": 12004 }, { - "epoch": 0.9084030116151489, - "grad_norm": 5.241161823272705, - "learning_rate": 4.094472519243745e-07, - "loss": 0.7267, + "epoch": 0.8451249560014079, + "grad_norm": 2.0792953968048096, + "learning_rate": 8.62411634614134e-07, + "loss": 0.6495, "step": 12005 }, { - "epoch": 0.9084786803374825, - "grad_norm": 2.1290805339813232, - "learning_rate": 4.087759274344034e-07, - "loss": 0.6428, + "epoch": 0.84519535374868, + "grad_norm": 1.9516854286193848, + "learning_rate": 8.61644293398517e-07, + "loss": 0.5947, "step": 12006 }, { - "epoch": 0.9085543490598161, - "grad_norm": 1.9543763399124146, - "learning_rate": 4.0810514095467164e-07, - "loss": 0.6201, + "epoch": 0.8452657514959522, + "grad_norm": 2.0438404083251953, + "learning_rate": 8.608772713215739e-07, + "loss": 0.6897, "step": 12007 }, { - "epoch": 0.9086300177821498, - "grad_norm": 2.172839879989624, - "learning_rate": 4.074348925271847e-07, - "loss": 0.739, + "epoch": 0.8453361492432242, + "grad_norm": 2.1773784160614014, + "learning_rate": 8.601105684231812e-07, + "loss": 0.4839, "step": 12008 }, { - "epoch": 0.9087056865044834, - "grad_norm": 2.1397907733917236, - "learning_rate": 4.067651821939169e-07, - "loss": 0.6348, + "epoch": 0.8454065469904963, + "grad_norm": 2.00911283493042, + "learning_rate": 8.59344184743201e-07, + "loss": 0.5644, "step": 12009 }, { - "epoch": 0.908781355226817, - "grad_norm": 2.9188485145568848, - "learning_rate": 4.0609600999680875e-07, - "loss": 0.687, + "epoch": 0.8454769447377684, + "grad_norm": 2.045351982116699, + "learning_rate": 8.585781203214763e-07, + "loss": 0.7135, "step": 12010 }, { - "epoch": 0.9088570239491506, - "grad_norm": 2.0263235569000244, - "learning_rate": 4.054273759777627e-07, - "loss": 0.677, + "epoch": 0.8455473424850405, + "grad_norm": 1.7235389947891235, + "learning_rate": 8.578123751978388e-07, + "loss": 0.684, "step": 12011 }, { - "epoch": 0.9089326926714842, - "grad_norm": 2.360248327255249, - "learning_rate": 4.047592801786523e-07, - "loss": 0.6305, + "epoch": 0.8456177402323126, + "grad_norm": 6.825764179229736, + "learning_rate": 8.570469494120968e-07, + "loss": 0.6952, "step": 12012 }, { - "epoch": 0.9090083613938179, - "grad_norm": 2.5668838024139404, - "learning_rate": 4.04091722641317e-07, - "loss": 0.7643, + "epoch": 0.8456881379795846, + "grad_norm": 1.5847210884094238, + "learning_rate": 8.562818430040492e-07, + "loss": 0.5383, "step": 12013 }, { - "epoch": 0.9090840301161515, - "grad_norm": 2.728790521621704, - "learning_rate": 4.0342470340756145e-07, - "loss": 0.5854, + "epoch": 0.8457585357268568, + "grad_norm": 1.9202572107315063, + "learning_rate": 8.555170560134724e-07, + "loss": 0.6171, "step": 12014 }, { - "epoch": 0.9091596988384851, - "grad_norm": 2.55326771736145, - "learning_rate": 4.0275822251915517e-07, - "loss": 0.7622, + "epoch": 0.8458289334741288, + "grad_norm": 2.20739483833313, + "learning_rate": 8.54752588480128e-07, + "loss": 0.6361, "step": 12015 }, { - "epoch": 0.9092353675608187, - "grad_norm": 2.370495080947876, - "learning_rate": 4.0209228001783484e-07, - "loss": 0.7064, + "epoch": 0.8458993312214009, + "grad_norm": 2.2145626544952393, + "learning_rate": 8.539884404437614e-07, + "loss": 0.5276, "step": 12016 }, { - "epoch": 0.9093110362831524, - "grad_norm": 1.9629454612731934, - "learning_rate": 4.0142687594530604e-07, - "loss": 0.686, + "epoch": 0.845969728968673, + "grad_norm": 1.6211379766464233, + "learning_rate": 8.532246119441031e-07, + "loss": 0.6138, "step": 12017 }, { - "epoch": 0.909386705005486, - "grad_norm": 2.457399368286133, - "learning_rate": 4.0076201034323647e-07, - "loss": 0.5467, + "epoch": 0.8460401267159451, + "grad_norm": 2.0243048667907715, + "learning_rate": 8.524611030208628e-07, + "loss": 0.6431, "step": 12018 }, { - "epoch": 0.9094623737278196, - "grad_norm": 1.8854044675827026, - "learning_rate": 4.000976832532638e-07, - "loss": 0.6292, + "epoch": 0.8461105244632172, + "grad_norm": 2.0953104496002197, + "learning_rate": 8.516979137137381e-07, + "loss": 0.6203, "step": 12019 }, { - "epoch": 0.9095380424501532, - "grad_norm": 2.935149908065796, - "learning_rate": 3.994338947169888e-07, - "loss": 0.5731, + "epoch": 0.8461809222104892, + "grad_norm": 2.0164268016815186, + "learning_rate": 8.509350440624078e-07, + "loss": 0.7068, "step": 12020 }, { - "epoch": 0.9096137111724869, - "grad_norm": 2.078005790710449, - "learning_rate": 3.987706447759831e-07, - "loss": 0.5449, + "epoch": 0.8462513199577614, + "grad_norm": 2.0234463214874268, + "learning_rate": 8.501724941065331e-07, + "loss": 0.6729, "step": 12021 }, { - "epoch": 0.9096893798948205, - "grad_norm": 2.3061161041259766, - "learning_rate": 3.9810793347177663e-07, - "loss": 0.5563, + "epoch": 0.8463217177050334, + "grad_norm": 2.4009599685668945, + "learning_rate": 8.494102638857592e-07, + "loss": 0.5928, "step": 12022 }, { - "epoch": 0.9097650486171541, - "grad_norm": 2.2766451835632324, - "learning_rate": 3.9744576084587413e-07, - "loss": 0.6867, + "epoch": 0.8463921154523055, + "grad_norm": 1.9578211307525635, + "learning_rate": 8.486483534397171e-07, + "loss": 0.5652, "step": 12023 }, { - "epoch": 0.9098407173394877, - "grad_norm": 2.109184503555298, - "learning_rate": 3.967841269397434e-07, - "loss": 0.5578, + "epoch": 0.8464625131995777, + "grad_norm": 1.5387351512908936, + "learning_rate": 8.478867628080161e-07, + "loss": 0.5658, "step": 12024 }, { - "epoch": 0.9099163860618213, - "grad_norm": 2.568835735321045, - "learning_rate": 3.9612303179481634e-07, - "loss": 0.6472, + "epoch": 0.8465329109468497, + "grad_norm": 2.8826587200164795, + "learning_rate": 8.471254920302562e-07, + "loss": 0.6943, "step": 12025 }, { - "epoch": 0.909992054784155, - "grad_norm": 2.184821128845215, - "learning_rate": 3.9546247545249284e-07, - "loss": 0.753, + "epoch": 0.8466033086941218, + "grad_norm": 1.8337199687957764, + "learning_rate": 8.46364541146014e-07, + "loss": 0.5756, "step": 12026 }, { - "epoch": 0.9100677235064886, - "grad_norm": 1.9900933504104614, - "learning_rate": 3.948024579541377e-07, - "loss": 0.5763, + "epoch": 0.8466737064413938, + "grad_norm": 1.889025330543518, + "learning_rate": 8.456039101948525e-07, + "loss": 0.7298, "step": 12027 }, { - "epoch": 0.9101433922288222, - "grad_norm": 2.366878032684326, - "learning_rate": 3.94142979341089e-07, - "loss": 0.6965, + "epoch": 0.846744104188666, + "grad_norm": 2.107468366622925, + "learning_rate": 8.448435992163161e-07, + "loss": 0.6599, "step": 12028 }, { - "epoch": 0.9102190609511558, - "grad_norm": 2.603574275970459, - "learning_rate": 3.934840396546396e-07, - "loss": 0.7252, + "epoch": 0.8468145019359381, + "grad_norm": 1.7836085557937622, + "learning_rate": 8.440836082499367e-07, + "loss": 0.7036, "step": 12029 }, { - "epoch": 0.9102947296734895, - "grad_norm": 2.070781707763672, - "learning_rate": 3.928256389360566e-07, - "loss": 0.5665, + "epoch": 0.8468848996832101, + "grad_norm": 2.1276419162750244, + "learning_rate": 8.433239373352239e-07, + "loss": 0.5514, "step": 12030 }, { - "epoch": 0.9103703983958231, - "grad_norm": 1.9723803997039795, - "learning_rate": 3.921677772265709e-07, - "loss": 0.7802, + "epoch": 0.8469552974304823, + "grad_norm": 1.9467180967330933, + "learning_rate": 8.425645865116769e-07, + "loss": 0.7143, "step": 12031 }, { - "epoch": 0.9104460671181567, - "grad_norm": 2.6558046340942383, - "learning_rate": 3.915104545673807e-07, - "loss": 0.6873, + "epoch": 0.8470256951777543, + "grad_norm": 2.056413412094116, + "learning_rate": 8.418055558187742e-07, + "loss": 0.6569, "step": 12032 }, { - "epoch": 0.9105217358404903, - "grad_norm": 2.883211374282837, - "learning_rate": 3.9085367099964786e-07, - "loss": 0.7914, + "epoch": 0.8470960929250264, + "grad_norm": 1.853884220123291, + "learning_rate": 8.410468452959769e-07, + "loss": 0.72, "step": 12033 }, { - "epoch": 0.910597404562824, - "grad_norm": 2.342439651489258, - "learning_rate": 3.9019742656450465e-07, - "loss": 0.6175, + "epoch": 0.8471664906722984, + "grad_norm": 1.782046914100647, + "learning_rate": 8.402884549827311e-07, + "loss": 0.6393, "step": 12034 }, { - "epoch": 0.9106730732851576, - "grad_norm": 2.299743413925171, - "learning_rate": 3.895417213030471e-07, - "loss": 0.7706, + "epoch": 0.8472368884195706, + "grad_norm": 1.9772406816482544, + "learning_rate": 8.395303849184687e-07, + "loss": 0.7033, "step": 12035 }, { - "epoch": 0.9107487420074912, - "grad_norm": 2.087432861328125, - "learning_rate": 3.8888655525633544e-07, - "loss": 0.6474, + "epoch": 0.8473072861668427, + "grad_norm": 2.4502296447753906, + "learning_rate": 8.387726351425995e-07, + "loss": 0.7204, "step": 12036 }, { - "epoch": 0.9108244107298248, - "grad_norm": 1.8561009168624878, - "learning_rate": 3.882319284653988e-07, - "loss": 0.7941, + "epoch": 0.8473776839141147, + "grad_norm": 2.200040102005005, + "learning_rate": 8.380152056945214e-07, + "loss": 0.7528, "step": 12037 }, { - "epoch": 0.9109000794521585, - "grad_norm": 1.991654634475708, - "learning_rate": 3.8757784097123236e-07, - "loss": 0.6651, + "epoch": 0.8474480816613869, + "grad_norm": 1.8245927095413208, + "learning_rate": 8.372580966136145e-07, + "loss": 0.7106, "step": 12038 }, { - "epoch": 0.9109757481744921, - "grad_norm": 2.475132942199707, - "learning_rate": 3.8692429281479845e-07, - "loss": 0.6396, + "epoch": 0.8475184794086589, + "grad_norm": 1.9589587450027466, + "learning_rate": 8.3650130793924e-07, + "loss": 0.5593, "step": 12039 }, { - "epoch": 0.9110514168968257, - "grad_norm": 1.9189382791519165, - "learning_rate": 3.8627128403702326e-07, - "loss": 0.7264, + "epoch": 0.847588877155931, + "grad_norm": 2.1325252056121826, + "learning_rate": 8.357448397107431e-07, + "loss": 0.6776, "step": 12040 }, { - "epoch": 0.9111270856191593, - "grad_norm": 2.612868547439575, - "learning_rate": 3.856188146788001e-07, - "loss": 0.6025, + "epoch": 0.8476592749032031, + "grad_norm": 1.7983934879302979, + "learning_rate": 8.349886919674558e-07, + "loss": 0.6617, "step": 12041 }, { - "epoch": 0.9112027543414929, - "grad_norm": 2.8287506103515625, - "learning_rate": 3.849668847809903e-07, - "loss": 0.5975, + "epoch": 0.8477296726504752, + "grad_norm": 1.9579949378967285, + "learning_rate": 8.342328647486888e-07, + "loss": 0.6121, "step": 12042 }, { - "epoch": 0.9112784230638266, - "grad_norm": 2.0225670337677, - "learning_rate": 3.8431549438441616e-07, - "loss": 0.6964, + "epoch": 0.8478000703977473, + "grad_norm": 3.013944387435913, + "learning_rate": 8.334773580937407e-07, + "loss": 0.6077, "step": 12043 }, { - "epoch": 0.9113540917861602, - "grad_norm": 2.5549564361572266, - "learning_rate": 3.8366464352987405e-07, - "loss": 0.5845, + "epoch": 0.8478704681450193, + "grad_norm": 2.198981761932373, + "learning_rate": 8.327221720418888e-07, + "loss": 0.6206, "step": 12044 }, { - "epoch": 0.9114297605084938, - "grad_norm": 2.6276886463165283, - "learning_rate": 3.8301433225811945e-07, - "loss": 0.8134, + "epoch": 0.8479408658922915, + "grad_norm": 1.9666117429733276, + "learning_rate": 8.319673066323959e-07, + "loss": 0.6497, "step": 12045 }, { - "epoch": 0.9115054292308274, - "grad_norm": 2.4634835720062256, - "learning_rate": 3.8236456060987967e-07, - "loss": 0.6734, + "epoch": 0.8480112636395636, + "grad_norm": 2.183073043823242, + "learning_rate": 8.3121276190451e-07, + "loss": 0.656, "step": 12046 }, { - "epoch": 0.9115810979531611, - "grad_norm": 2.0669867992401123, - "learning_rate": 3.8171532862584326e-07, - "loss": 0.6143, + "epoch": 0.8480816613868356, + "grad_norm": 1.9353358745574951, + "learning_rate": 8.30458537897458e-07, + "loss": 0.7653, "step": 12047 }, { - "epoch": 0.9116567666754947, - "grad_norm": 2.2459287643432617, - "learning_rate": 3.810666363466666e-07, - "loss": 0.6493, + "epoch": 0.8481520591341077, + "grad_norm": 1.8877111673355103, + "learning_rate": 8.297046346504551e-07, + "loss": 0.6841, "step": 12048 }, { - "epoch": 0.9117324353978283, - "grad_norm": 1.9273678064346313, - "learning_rate": 3.8041848381297626e-07, - "loss": 0.6154, + "epoch": 0.8482224568813798, + "grad_norm": 2.5490939617156982, + "learning_rate": 8.289510522026949e-07, + "loss": 0.6385, "step": 12049 }, { - "epoch": 0.9118081041201619, - "grad_norm": 2.385669231414795, - "learning_rate": 3.797708710653588e-07, - "loss": 0.6914, + "epoch": 0.8482928546286519, + "grad_norm": 1.8302912712097168, + "learning_rate": 8.281977905933617e-07, + "loss": 0.4988, "step": 12050 }, { - "epoch": 0.9118837728424956, - "grad_norm": 2.3687374591827393, - "learning_rate": 3.791237981443697e-07, - "loss": 0.8302, + "epoch": 0.8483632523759239, + "grad_norm": 2.543398141860962, + "learning_rate": 8.274448498616125e-07, + "loss": 0.6106, "step": 12051 }, { - "epoch": 0.9119594415648292, - "grad_norm": 2.349445343017578, - "learning_rate": 3.784772650905326e-07, - "loss": 0.6558, + "epoch": 0.848433650123196, + "grad_norm": 2.00384259223938, + "learning_rate": 8.266922300465964e-07, + "loss": 0.5193, "step": 12052 }, { - "epoch": 0.9120351102871628, - "grad_norm": 2.4349260330200195, - "learning_rate": 3.778312719443341e-07, - "loss": 0.7113, + "epoch": 0.8485040478704682, + "grad_norm": 2.1824889183044434, + "learning_rate": 8.25939931187441e-07, + "loss": 0.6326, "step": 12053 }, { - "epoch": 0.9121107790094964, - "grad_norm": 2.196366786956787, - "learning_rate": 3.771858187462288e-07, - "loss": 0.6412, + "epoch": 0.8485744456177402, + "grad_norm": 1.7346351146697998, + "learning_rate": 8.251879533232613e-07, + "loss": 0.6443, "step": 12054 }, { - "epoch": 0.91218644773183, - "grad_norm": 2.005478620529175, - "learning_rate": 3.7654090553663747e-07, - "loss": 0.5158, + "epoch": 0.8486448433650123, + "grad_norm": 2.1501874923706055, + "learning_rate": 8.244362964931507e-07, + "loss": 0.5437, "step": 12055 }, { - "epoch": 0.9122621164541637, - "grad_norm": 2.5596768856048584, - "learning_rate": 3.758965323559467e-07, - "loss": 0.7028, + "epoch": 0.8487152411122844, + "grad_norm": 2.4734530448913574, + "learning_rate": 8.236849607361929e-07, + "loss": 0.7154, "step": 12056 }, { - "epoch": 0.9123377851764973, - "grad_norm": 2.0070645809173584, - "learning_rate": 3.752526992445082e-07, - "loss": 0.5788, + "epoch": 0.8487856388595565, + "grad_norm": 2.052004337310791, + "learning_rate": 8.229339460914448e-07, + "loss": 0.5953, "step": 12057 }, { - "epoch": 0.9124134538988309, - "grad_norm": 2.1916732788085938, - "learning_rate": 3.7460940624263985e-07, - "loss": 0.7334, + "epoch": 0.8488560366068286, + "grad_norm": 1.9413179159164429, + "learning_rate": 8.221832525979563e-07, + "loss": 0.634, "step": 12058 }, { - "epoch": 0.9124891226211646, - "grad_norm": 2.4893784523010254, - "learning_rate": 3.739666533906303e-07, - "loss": 0.7867, + "epoch": 0.8489264343541006, + "grad_norm": 2.039217948913574, + "learning_rate": 8.214328802947545e-07, + "loss": 0.7399, "step": 12059 }, { - "epoch": 0.9125647913434982, - "grad_norm": 2.1541330814361572, - "learning_rate": 3.733244407287294e-07, - "loss": 0.6831, + "epoch": 0.8489968321013728, + "grad_norm": 2.323258399963379, + "learning_rate": 8.206828292208541e-07, + "loss": 0.7823, "step": 12060 }, { - "epoch": 0.9126404600658318, - "grad_norm": 2.3403072357177734, - "learning_rate": 3.72682768297153e-07, - "loss": 0.7134, + "epoch": 0.8490672298486448, + "grad_norm": 2.1827962398529053, + "learning_rate": 8.199330994152482e-07, + "loss": 0.7164, "step": 12061 }, { - "epoch": 0.9127161287881654, - "grad_norm": 2.0377376079559326, - "learning_rate": 3.720416361360859e-07, - "loss": 0.6679, + "epoch": 0.8491376275959169, + "grad_norm": 2.299327850341797, + "learning_rate": 8.191836909169206e-07, + "loss": 0.7477, "step": 12062 }, { - "epoch": 0.912791797510499, - "grad_norm": 2.7400593757629395, - "learning_rate": 3.71401044285678e-07, - "loss": 0.7331, + "epoch": 0.8492080253431891, + "grad_norm": 1.788019061088562, + "learning_rate": 8.184346037648278e-07, + "loss": 0.6797, "step": 12063 }, { - "epoch": 0.9128674662328327, - "grad_norm": 1.9614194631576538, - "learning_rate": 3.7076099278604527e-07, - "loss": 0.57, + "epoch": 0.8492784230904611, + "grad_norm": 1.9028282165527344, + "learning_rate": 8.176858379979192e-07, + "loss": 0.6312, "step": 12064 }, { - "epoch": 0.9129431349551663, - "grad_norm": 1.974969506263733, - "learning_rate": 3.7012148167726855e-07, - "loss": 0.4305, + "epoch": 0.8493488208377332, + "grad_norm": 2.5175983905792236, + "learning_rate": 8.169373936551217e-07, + "loss": 0.6978, "step": 12065 }, { - "epoch": 0.9130188036774999, - "grad_norm": 2.233001470565796, - "learning_rate": 3.694825109993979e-07, - "loss": 0.74, + "epoch": 0.8494192185850052, + "grad_norm": 2.034388303756714, + "learning_rate": 8.161892707753508e-07, + "loss": 0.6578, "step": 12066 }, { - "epoch": 0.9130944723998335, - "grad_norm": 2.316767454147339, - "learning_rate": 3.688440807924472e-07, - "loss": 0.8145, + "epoch": 0.8494896163322774, + "grad_norm": 2.4538657665252686, + "learning_rate": 8.154414693974977e-07, + "loss": 0.724, "step": 12067 }, { - "epoch": 0.9131701411221671, - "grad_norm": 1.535016417503357, - "learning_rate": 3.682061910963956e-07, - "loss": 0.7931, + "epoch": 0.8495600140795495, + "grad_norm": 2.000366449356079, + "learning_rate": 8.146939895604473e-07, + "loss": 0.6413, "step": 12068 }, { - "epoch": 0.9132458098445008, - "grad_norm": 1.9249364137649536, - "learning_rate": 3.6756884195119114e-07, - "loss": 0.6361, + "epoch": 0.8496304118268215, + "grad_norm": 1.8984955549240112, + "learning_rate": 8.139468313030553e-07, + "loss": 0.628, "step": 12069 }, { - "epoch": 0.9133214785668344, - "grad_norm": 2.287426471710205, - "learning_rate": 3.669320333967477e-07, - "loss": 0.6633, + "epoch": 0.8497008095740937, + "grad_norm": 2.6452651023864746, + "learning_rate": 8.131999946641706e-07, + "loss": 0.531, "step": 12070 }, { - "epoch": 0.913397147289168, - "grad_norm": 2.515241861343384, - "learning_rate": 3.662957654729416e-07, - "loss": 0.7616, + "epoch": 0.8497712073213657, + "grad_norm": 1.9814568758010864, + "learning_rate": 8.12453479682621e-07, + "loss": 0.6368, "step": 12071 }, { - "epoch": 0.9134728160115017, - "grad_norm": 2.161454677581787, - "learning_rate": 3.656600382196199e-07, - "loss": 0.7475, + "epoch": 0.8498416050686378, + "grad_norm": 1.9756944179534912, + "learning_rate": 8.11707286397219e-07, + "loss": 0.653, "step": 12072 }, { - "epoch": 0.9135484847338353, - "grad_norm": 1.951395869255066, - "learning_rate": 3.650248516765937e-07, - "loss": 0.77, + "epoch": 0.8499120028159098, + "grad_norm": 2.166168212890625, + "learning_rate": 8.109614148467589e-07, + "loss": 0.6179, "step": 12073 }, { - "epoch": 0.9136241534561689, - "grad_norm": 2.1375932693481445, - "learning_rate": 3.6439020588364023e-07, - "loss": 0.5793, + "epoch": 0.849982400563182, + "grad_norm": 2.1702804565429688, + "learning_rate": 8.10215865070023e-07, + "loss": 0.6458, "step": 12074 }, { - "epoch": 0.9136998221785025, - "grad_norm": 2.560678720474243, - "learning_rate": 3.637561008805027e-07, - "loss": 0.6161, + "epoch": 0.8500527983104541, + "grad_norm": 2.082092761993408, + "learning_rate": 8.094706371057665e-07, + "loss": 0.5552, "step": 12075 }, { - "epoch": 0.9137754909008361, - "grad_norm": 2.45676851272583, - "learning_rate": 3.631225367068913e-07, - "loss": 0.7046, + "epoch": 0.8501231960577261, + "grad_norm": 2.423396348953247, + "learning_rate": 8.0872573099274e-07, + "loss": 0.7184, "step": 12076 }, { - "epoch": 0.9138511596231698, - "grad_norm": 2.3351166248321533, - "learning_rate": 3.6248951340248136e-07, - "loss": 0.6914, + "epoch": 0.8501935938049983, + "grad_norm": 1.8008294105529785, + "learning_rate": 8.079811467696685e-07, + "loss": 0.6618, "step": 12077 }, { - "epoch": 0.9139268283455034, - "grad_norm": 2.179896354675293, - "learning_rate": 3.6185703100691615e-07, - "loss": 0.6828, + "epoch": 0.8502639915522703, + "grad_norm": 1.9954781532287598, + "learning_rate": 8.072368844752659e-07, + "loss": 0.6424, "step": 12078 }, { - "epoch": 0.914002497067837, - "grad_norm": 2.130155563354492, - "learning_rate": 3.6122508955980094e-07, - "loss": 0.6875, + "epoch": 0.8503343892995424, + "grad_norm": 2.2267613410949707, + "learning_rate": 8.064929441482252e-07, + "loss": 0.6657, "step": 12079 }, { - "epoch": 0.9140781657901706, - "grad_norm": 2.144454002380371, - "learning_rate": 3.6059368910071313e-07, - "loss": 0.702, + "epoch": 0.8504047870468145, + "grad_norm": 1.9550248384475708, + "learning_rate": 8.057493258272273e-07, + "loss": 0.6532, "step": 12080 }, { - "epoch": 0.9141538345125042, - "grad_norm": 1.5945395231246948, - "learning_rate": 3.5996282966919303e-07, - "loss": 0.7236, + "epoch": 0.8504751847940866, + "grad_norm": 2.2800028324127197, + "learning_rate": 8.050060295509306e-07, + "loss": 0.6215, "step": 12081 }, { - "epoch": 0.9142295032348379, - "grad_norm": 1.7243108749389648, - "learning_rate": 3.593325113047441e-07, - "loss": 0.5943, + "epoch": 0.8505455825413587, + "grad_norm": 1.9233009815216064, + "learning_rate": 8.042630553579821e-07, + "loss": 0.7772, "step": 12082 }, { - "epoch": 0.9143051719571715, - "grad_norm": 2.1047885417938232, - "learning_rate": 3.5870273404684073e-07, - "loss": 0.7484, + "epoch": 0.8506159802886307, + "grad_norm": 1.6504650115966797, + "learning_rate": 8.035204032870066e-07, + "loss": 0.6432, "step": 12083 }, { - "epoch": 0.9143808406795051, - "grad_norm": 2.1068966388702393, - "learning_rate": 3.580734979349214e-07, - "loss": 0.7103, + "epoch": 0.8506863780359029, + "grad_norm": 2.047360420227051, + "learning_rate": 8.027780733766184e-07, + "loss": 0.6523, "step": 12084 }, { - "epoch": 0.9144565094018388, - "grad_norm": 3.1981070041656494, - "learning_rate": 3.5744480300839156e-07, - "loss": 0.6087, + "epoch": 0.850756775783175, + "grad_norm": 2.1061673164367676, + "learning_rate": 8.020360656654101e-07, + "loss": 0.5555, "step": 12085 }, { - "epoch": 0.9145321781241724, - "grad_norm": 3.288362741470337, - "learning_rate": 3.5681664930662075e-07, - "loss": 0.5641, + "epoch": 0.850827173530447, + "grad_norm": 1.98600435256958, + "learning_rate": 8.012943801919604e-07, + "loss": 0.5546, "step": 12086 }, { - "epoch": 0.914607846846506, - "grad_norm": 2.8169796466827393, - "learning_rate": 3.5618903686894745e-07, - "loss": 0.6044, + "epoch": 0.8508975712777191, + "grad_norm": 1.9371720552444458, + "learning_rate": 8.005530169948304e-07, + "loss": 0.6234, "step": 12087 }, { - "epoch": 0.9146835155688396, - "grad_norm": 2.2789206504821777, - "learning_rate": 3.5556196573467426e-07, - "loss": 0.6518, + "epoch": 0.8509679690249912, + "grad_norm": 2.0547590255737305, + "learning_rate": 7.998119761125638e-07, + "loss": 0.6145, "step": 12088 }, { - "epoch": 0.9147591842911732, - "grad_norm": 2.204268455505371, - "learning_rate": 3.5493543594306974e-07, - "loss": 0.6927, + "epoch": 0.8510383667722633, + "grad_norm": 2.1735448837280273, + "learning_rate": 7.990712575836859e-07, + "loss": 0.6687, "step": 12089 }, { - "epoch": 0.9148348530135069, - "grad_norm": 2.291841983795166, - "learning_rate": 3.5430944753336956e-07, - "loss": 0.7375, + "epoch": 0.8511087645195353, + "grad_norm": 2.168487787246704, + "learning_rate": 7.983308614467101e-07, + "loss": 0.6635, "step": 12090 }, { - "epoch": 0.9149105217358405, - "grad_norm": 2.3435380458831787, - "learning_rate": 3.5368400054477637e-07, - "loss": 0.6929, + "epoch": 0.8511791622668075, + "grad_norm": 3.155900716781616, + "learning_rate": 7.975907877401292e-07, + "loss": 0.7475, "step": 12091 }, { - "epoch": 0.9149861904581741, - "grad_norm": 2.182016134262085, - "learning_rate": 3.530590950164567e-07, - "loss": 0.7357, + "epoch": 0.8512495600140796, + "grad_norm": 1.934801459312439, + "learning_rate": 7.968510365024214e-07, + "loss": 0.7552, "step": 12092 }, { - "epoch": 0.9150618591805078, - "grad_norm": 2.184502363204956, - "learning_rate": 3.524347309875434e-07, - "loss": 0.7398, + "epoch": 0.8513199577613516, + "grad_norm": 1.9198135137557983, + "learning_rate": 7.961116077720453e-07, + "loss": 0.6909, "step": 12093 }, { - "epoch": 0.9151375279028413, - "grad_norm": 6.090785026550293, - "learning_rate": 3.5181090849713617e-07, - "loss": 0.6725, + "epoch": 0.8513903555086237, + "grad_norm": 1.7487915754318237, + "learning_rate": 7.953725015874451e-07, + "loss": 0.5616, "step": 12094 }, { - "epoch": 0.915213196625175, - "grad_norm": 2.206554412841797, - "learning_rate": 3.511876275843037e-07, - "loss": 0.6784, + "epoch": 0.8514607532558958, + "grad_norm": 1.9127850532531738, + "learning_rate": 7.946337179870481e-07, + "loss": 0.6442, "step": 12095 }, { - "epoch": 0.9152888653475086, - "grad_norm": 2.320699453353882, - "learning_rate": 3.5056488828807377e-07, - "loss": 0.6322, + "epoch": 0.8515311510031679, + "grad_norm": 2.07527756690979, + "learning_rate": 7.938952570092631e-07, + "loss": 0.6704, "step": 12096 }, { - "epoch": 0.9153645340698422, - "grad_norm": 2.412203311920166, - "learning_rate": 3.4994269064744624e-07, - "loss": 0.6949, + "epoch": 0.85160154875044, + "grad_norm": 2.559131622314453, + "learning_rate": 7.93157118692486e-07, + "loss": 0.59, "step": 12097 }, { - "epoch": 0.9154402027921759, - "grad_norm": 2.6732378005981445, - "learning_rate": 3.493210347013859e-07, - "loss": 0.7427, + "epoch": 0.851671946497712, + "grad_norm": 2.102616310119629, + "learning_rate": 7.924193030750914e-07, + "loss": 0.6475, "step": 12098 }, { - "epoch": 0.9155158715145095, - "grad_norm": 1.9151742458343506, - "learning_rate": 3.486999204888216e-07, - "loss": 0.6351, + "epoch": 0.8517423442449842, + "grad_norm": 2.0279722213745117, + "learning_rate": 7.916818101954389e-07, + "loss": 0.6309, "step": 12099 }, { - "epoch": 0.9155915402368431, - "grad_norm": 1.943975567817688, - "learning_rate": 3.480793480486493e-07, - "loss": 0.614, + "epoch": 0.8518127419922562, + "grad_norm": 1.86268150806427, + "learning_rate": 7.909446400918702e-07, + "loss": 0.6802, "step": 12100 }, { - "epoch": 0.9156672089591767, - "grad_norm": 2.587817907333374, - "learning_rate": 3.474593174197328e-07, - "loss": 0.6968, + "epoch": 0.8518831397395283, + "grad_norm": 1.8064039945602417, + "learning_rate": 7.902077928027148e-07, + "loss": 0.6384, "step": 12101 }, { - "epoch": 0.9157428776815103, - "grad_norm": 3.2089931964874268, - "learning_rate": 3.4683982864090013e-07, - "loss": 0.5855, + "epoch": 0.8519535374868005, + "grad_norm": 2.2994813919067383, + "learning_rate": 7.894712683662785e-07, + "loss": 0.634, "step": 12102 }, { - "epoch": 0.915818546403844, - "grad_norm": 1.9807599782943726, - "learning_rate": 3.462208817509452e-07, - "loss": 0.5766, + "epoch": 0.8520239352340725, + "grad_norm": 1.7149685621261597, + "learning_rate": 7.887350668208567e-07, + "loss": 0.594, "step": 12103 }, { - "epoch": 0.9158942151261776, - "grad_norm": 2.119952917098999, - "learning_rate": 3.456024767886261e-07, - "loss": 0.6788, + "epoch": 0.8520943329813446, + "grad_norm": 1.913367748260498, + "learning_rate": 7.879991882047241e-07, + "loss": 0.6428, "step": 12104 }, { - "epoch": 0.9159698838485112, - "grad_norm": 2.423600196838379, - "learning_rate": 3.4498461379267277e-07, - "loss": 0.631, + "epoch": 0.8521647307286166, + "grad_norm": 1.9182802438735962, + "learning_rate": 7.872636325561403e-07, + "loss": 0.6534, "step": 12105 }, { - "epoch": 0.9160455525708449, - "grad_norm": 1.9820013046264648, - "learning_rate": 3.4436729280177823e-07, - "loss": 0.8097, + "epoch": 0.8522351284758888, + "grad_norm": 2.1323633193969727, + "learning_rate": 7.865283999133443e-07, + "loss": 0.5459, "step": 12106 }, { - "epoch": 0.9161212212931784, - "grad_norm": 1.9202264547348022, - "learning_rate": 3.4375051385459864e-07, - "loss": 0.5799, + "epoch": 0.8523055262231609, + "grad_norm": 2.0607919692993164, + "learning_rate": 7.857934903145661e-07, + "loss": 0.5932, "step": 12107 }, { - "epoch": 0.9161968900155121, - "grad_norm": 2.3052821159362793, - "learning_rate": 3.431342769897591e-07, - "loss": 0.7043, + "epoch": 0.8523759239704329, + "grad_norm": 1.9429184198379517, + "learning_rate": 7.850589037980115e-07, + "loss": 0.6726, "step": 12108 }, { - "epoch": 0.9162725587378457, - "grad_norm": 2.5394515991210938, - "learning_rate": 3.4251858224585064e-07, - "loss": 0.6048, + "epoch": 0.8524463217177051, + "grad_norm": 2.1002354621887207, + "learning_rate": 7.843246404018737e-07, + "loss": 0.7198, "step": 12109 }, { - "epoch": 0.9163482274601793, - "grad_norm": 2.8350353240966797, - "learning_rate": 3.419034296614305e-07, - "loss": 0.5312, + "epoch": 0.8525167194649771, + "grad_norm": 1.9775296449661255, + "learning_rate": 7.835907001643267e-07, + "loss": 0.5985, "step": 12110 }, { - "epoch": 0.916423896182513, - "grad_norm": 2.1664719581604004, - "learning_rate": 3.4128881927502086e-07, - "loss": 0.598, + "epoch": 0.8525871172122492, + "grad_norm": 3.001347303390503, + "learning_rate": 7.828570831235295e-07, + "loss": 0.6357, "step": 12111 }, { - "epoch": 0.9164995649048466, - "grad_norm": 2.737666606903076, - "learning_rate": 3.406747511251119e-07, - "loss": 0.7566, + "epoch": 0.8526575149595212, + "grad_norm": 2.6425821781158447, + "learning_rate": 7.82123789317622e-07, + "loss": 0.6071, "step": 12112 }, { - "epoch": 0.9165752336271802, - "grad_norm": 2.4136788845062256, - "learning_rate": 3.4006122525015793e-07, - "loss": 0.745, + "epoch": 0.8527279127067934, + "grad_norm": 2.197132110595703, + "learning_rate": 7.813908187847314e-07, + "loss": 0.6903, "step": 12113 }, { - "epoch": 0.9166509023495139, - "grad_norm": 2.2882440090179443, - "learning_rate": 3.3944824168857914e-07, - "loss": 0.7246, + "epoch": 0.8527983104540655, + "grad_norm": 2.041741371154785, + "learning_rate": 7.806581715629618e-07, + "loss": 0.616, "step": 12114 }, { - "epoch": 0.9167265710718474, - "grad_norm": 3.440190553665161, - "learning_rate": 3.3883580047876186e-07, - "loss": 0.6751, + "epoch": 0.8528687082013375, + "grad_norm": 1.9825263023376465, + "learning_rate": 7.799258476904091e-07, + "loss": 0.5353, "step": 12115 }, { - "epoch": 0.9168022397941811, - "grad_norm": 2.7280311584472656, - "learning_rate": 3.3822390165906134e-07, - "loss": 0.7142, + "epoch": 0.8529391059486097, + "grad_norm": 2.6092278957366943, + "learning_rate": 7.791938472051442e-07, + "loss": 0.6239, "step": 12116 }, { - "epoch": 0.9168779085165147, - "grad_norm": 2.077730417251587, - "learning_rate": 3.376125452677971e-07, - "loss": 0.6187, + "epoch": 0.8530095036958817, + "grad_norm": 2.0818915367126465, + "learning_rate": 7.784621701452249e-07, + "loss": 0.6461, "step": 12117 }, { - "epoch": 0.9169535772388483, - "grad_norm": 2.064181327819824, - "learning_rate": 3.370017313432513e-07, - "loss": 0.6946, + "epoch": 0.8530799014431538, + "grad_norm": 1.8072972297668457, + "learning_rate": 7.7773081654869e-07, + "loss": 0.6958, "step": 12118 }, { - "epoch": 0.917029245961182, - "grad_norm": 2.3035082817077637, - "learning_rate": 3.3639145992367647e-07, - "loss": 0.6829, + "epoch": 0.853150299190426, + "grad_norm": 1.949683666229248, + "learning_rate": 7.769997864535674e-07, + "loss": 0.6797, "step": 12119 }, { - "epoch": 0.9171049146835155, - "grad_norm": 1.866401195526123, - "learning_rate": 3.3578173104729005e-07, - "loss": 0.6442, + "epoch": 0.853220696937698, + "grad_norm": 2.250246286392212, + "learning_rate": 7.762690798978606e-07, + "loss": 0.5407, "step": 12120 }, { - "epoch": 0.9171805834058492, - "grad_norm": 2.719041585922241, - "learning_rate": 3.3517254475227544e-07, - "loss": 0.7242, + "epoch": 0.8532910946849701, + "grad_norm": 1.5520117282867432, + "learning_rate": 7.755386969195618e-07, + "loss": 0.5656, "step": 12121 }, { - "epoch": 0.9172562521281828, - "grad_norm": 2.2494893074035645, - "learning_rate": 3.345639010767811e-07, - "loss": 0.701, + "epoch": 0.8533614924322421, + "grad_norm": 2.107489585876465, + "learning_rate": 7.748086375566429e-07, + "loss": 0.6246, "step": 12122 }, { - "epoch": 0.9173319208505164, - "grad_norm": 2.3660080432891846, - "learning_rate": 3.3395580005892365e-07, - "loss": 0.7243, + "epoch": 0.8534318901795143, + "grad_norm": 2.381615400314331, + "learning_rate": 7.740789018470617e-07, + "loss": 0.6622, "step": 12123 }, { - "epoch": 0.9174075895728501, - "grad_norm": 2.231206178665161, - "learning_rate": 3.333482417367836e-07, - "loss": 0.6173, + "epoch": 0.8535022879267864, + "grad_norm": 1.83864426612854, + "learning_rate": 7.733494898287551e-07, + "loss": 0.683, "step": 12124 }, { - "epoch": 0.9174832582951837, - "grad_norm": 2.3385729789733887, - "learning_rate": 3.327412261484064e-07, - "loss": 0.7117, + "epoch": 0.8535726856740584, + "grad_norm": 1.8288240432739258, + "learning_rate": 7.726204015396489e-07, + "loss": 0.668, "step": 12125 }, { - "epoch": 0.9175589270175173, - "grad_norm": 2.6792593002319336, - "learning_rate": 3.3213475333180777e-07, - "loss": 0.7404, + "epoch": 0.8536430834213306, + "grad_norm": 1.9510787725448608, + "learning_rate": 7.718916370176468e-07, + "loss": 0.5597, "step": 12126 }, { - "epoch": 0.917634595739851, - "grad_norm": 2.788846492767334, - "learning_rate": 3.315288233249663e-07, - "loss": 0.7583, + "epoch": 0.8537134811686026, + "grad_norm": 2.4010512828826904, + "learning_rate": 7.711631963006405e-07, + "loss": 0.65, "step": 12127 }, { - "epoch": 0.9177102644621845, - "grad_norm": 1.883687973022461, - "learning_rate": 3.3092343616582753e-07, - "loss": 0.7129, + "epoch": 0.8537838789158747, + "grad_norm": 1.8233548402786255, + "learning_rate": 7.704350794265001e-07, + "loss": 0.6041, "step": 12128 }, { - "epoch": 0.9177859331845182, - "grad_norm": 2.291748285293579, - "learning_rate": 3.303185918923013e-07, - "loss": 0.6957, + "epoch": 0.8538542766631467, + "grad_norm": 2.15811824798584, + "learning_rate": 7.697072864330824e-07, + "loss": 0.6476, "step": 12129 }, { - "epoch": 0.9178616019068518, - "grad_norm": 1.4826828241348267, - "learning_rate": 3.297142905422652e-07, - "loss": 0.6491, + "epoch": 0.8539246744104189, + "grad_norm": 2.0297179222106934, + "learning_rate": 7.689798173582239e-07, + "loss": 0.6697, "step": 12130 }, { - "epoch": 0.9179372706291854, - "grad_norm": 2.145691394805908, - "learning_rate": 3.29110532153566e-07, - "loss": 0.7542, + "epoch": 0.853995072157691, + "grad_norm": 2.068207263946533, + "learning_rate": 7.68252672239749e-07, + "loss": 0.7319, "step": 12131 }, { - "epoch": 0.9180129393515191, - "grad_norm": 2.223241090774536, - "learning_rate": 3.2850731676400945e-07, - "loss": 0.615, + "epoch": 0.854065469904963, + "grad_norm": 1.9194790124893188, + "learning_rate": 7.675258511154608e-07, + "loss": 0.7282, "step": 12132 }, { - "epoch": 0.9180886080738526, - "grad_norm": 4.687051773071289, - "learning_rate": 3.2790464441137037e-07, - "loss": 0.7339, + "epoch": 0.8541358676522351, + "grad_norm": 2.370466709136963, + "learning_rate": 7.667993540231485e-07, + "loss": 0.6385, "step": 12133 }, { - "epoch": 0.9181642767961863, - "grad_norm": 2.535534620285034, - "learning_rate": 3.273025151333925e-07, - "loss": 0.7676, + "epoch": 0.8542062653995072, + "grad_norm": 2.166938304901123, + "learning_rate": 7.660731810005834e-07, + "loss": 0.5815, "step": 12134 }, { - "epoch": 0.91823994551852, - "grad_norm": 2.9152448177337646, - "learning_rate": 3.267009289677817e-07, - "loss": 0.6491, + "epoch": 0.8542766631467793, + "grad_norm": 1.7950276136398315, + "learning_rate": 7.653473320855191e-07, + "loss": 0.5607, "step": 12135 }, { - "epoch": 0.9183156142408535, - "grad_norm": 2.461850881576538, - "learning_rate": 3.2609988595221183e-07, - "loss": 0.612, + "epoch": 0.8543470608940514, + "grad_norm": 1.8224844932556152, + "learning_rate": 7.646218073156926e-07, + "loss": 0.6756, "step": 12136 }, { - "epoch": 0.9183912829631872, - "grad_norm": 2.5751256942749023, - "learning_rate": 3.254993861243218e-07, - "loss": 0.4901, + "epoch": 0.8544174586413235, + "grad_norm": 2.3032443523406982, + "learning_rate": 7.638966067288264e-07, + "loss": 0.5057, "step": 12137 }, { - "epoch": 0.9184669516855208, - "grad_norm": 2.4287333488464355, - "learning_rate": 3.248994295217176e-07, - "loss": 0.6079, + "epoch": 0.8544878563885956, + "grad_norm": 1.8148366212844849, + "learning_rate": 7.631717303626219e-07, + "loss": 0.6903, "step": 12138 }, { - "epoch": 0.9185426204078544, - "grad_norm": 2.0941126346588135, - "learning_rate": 3.24300016181969e-07, - "loss": 0.7313, + "epoch": 0.8545582541358676, + "grad_norm": 2.009895086288452, + "learning_rate": 7.624471782547694e-07, + "loss": 0.6768, "step": 12139 }, { - "epoch": 0.9186182891301881, - "grad_norm": 2.2236790657043457, - "learning_rate": 3.2370114614261313e-07, - "loss": 0.9, + "epoch": 0.8546286518831397, + "grad_norm": 1.8870183229446411, + "learning_rate": 7.617229504429365e-07, + "loss": 0.6643, "step": 12140 }, { - "epoch": 0.9186939578525216, - "grad_norm": 2.147141933441162, - "learning_rate": 3.231028194411569e-07, - "loss": 0.6299, + "epoch": 0.8546990496304119, + "grad_norm": 1.9002747535705566, + "learning_rate": 7.609990469647775e-07, + "loss": 0.5698, "step": 12141 }, { - "epoch": 0.9187696265748553, - "grad_norm": 2.7429134845733643, - "learning_rate": 3.2250503611506444e-07, - "loss": 0.8518, + "epoch": 0.8547694473776839, + "grad_norm": 1.7270288467407227, + "learning_rate": 7.602754678579266e-07, + "loss": 0.6369, "step": 12142 }, { - "epoch": 0.918845295297189, - "grad_norm": 2.0502939224243164, - "learning_rate": 3.2190779620177267e-07, - "loss": 0.5715, + "epoch": 0.854839845124956, + "grad_norm": 2.180175542831421, + "learning_rate": 7.595522131600073e-07, + "loss": 0.7091, "step": 12143 }, { - "epoch": 0.9189209640195225, - "grad_norm": 2.8797767162323, - "learning_rate": 3.213110997386838e-07, - "loss": 0.6093, + "epoch": 0.854910242872228, + "grad_norm": 1.936262607574463, + "learning_rate": 7.588292829086183e-07, + "loss": 0.6355, "step": 12144 }, { - "epoch": 0.9189966327418562, - "grad_norm": 2.793109655380249, - "learning_rate": 3.2071494676316484e-07, - "loss": 0.7734, + "epoch": 0.8549806406195002, + "grad_norm": 2.027648448944092, + "learning_rate": 7.581066771413486e-07, + "loss": 0.6118, "step": 12145 }, { - "epoch": 0.9190723014641897, - "grad_norm": 2.135164976119995, - "learning_rate": 3.2011933731254697e-07, - "loss": 0.6241, + "epoch": 0.8550510383667722, + "grad_norm": 2.3917951583862305, + "learning_rate": 7.573843958957657e-07, + "loss": 0.6667, "step": 12146 }, { - "epoch": 0.9191479701865234, - "grad_norm": 1.7029752731323242, - "learning_rate": 3.1952427142413033e-07, - "loss": 0.6607, + "epoch": 0.8551214361140443, + "grad_norm": 1.8411661386489868, + "learning_rate": 7.56662439209421e-07, + "loss": 0.6319, "step": 12147 }, { - "epoch": 0.919223638908857, - "grad_norm": 6.995512008666992, - "learning_rate": 3.1892974913518016e-07, - "loss": 0.7122, + "epoch": 0.8551918338613165, + "grad_norm": 1.8412338495254517, + "learning_rate": 7.559408071198512e-07, + "loss": 0.5867, "step": 12148 }, { - "epoch": 0.9192993076311906, - "grad_norm": 2.4727792739868164, - "learning_rate": 3.183357704829286e-07, - "loss": 0.7127, + "epoch": 0.8552622316085885, + "grad_norm": 2.2204627990722656, + "learning_rate": 7.55219499664572e-07, + "loss": 0.5446, "step": 12149 }, { - "epoch": 0.9193749763535243, - "grad_norm": 1.784459114074707, - "learning_rate": 3.1774233550457e-07, - "loss": 0.5731, + "epoch": 0.8553326293558606, + "grad_norm": 1.9465546607971191, + "learning_rate": 7.544985168810882e-07, + "loss": 0.6065, "step": 12150 }, { - "epoch": 0.9194506450758579, - "grad_norm": 1.7401350736618042, - "learning_rate": 3.1714944423726653e-07, - "loss": 0.6354, + "epoch": 0.8554030271031327, + "grad_norm": 1.9441288709640503, + "learning_rate": 7.537778588068834e-07, + "loss": 0.7073, "step": 12151 }, { - "epoch": 0.9195263137981915, - "grad_norm": 2.0081143379211426, - "learning_rate": 3.165570967181506e-07, - "loss": 0.7686, + "epoch": 0.8554734248504048, + "grad_norm": 2.0028257369995117, + "learning_rate": 7.530575254794245e-07, + "loss": 0.5664, "step": 12152 }, { - "epoch": 0.9196019825205252, - "grad_norm": 1.8823308944702148, - "learning_rate": 3.1596529298431445e-07, - "loss": 0.6569, + "epoch": 0.8555438225976769, + "grad_norm": 2.194694995880127, + "learning_rate": 7.523375169361602e-07, + "loss": 0.7167, "step": 12153 }, { - "epoch": 0.9196776512428587, - "grad_norm": 2.5882723331451416, - "learning_rate": 3.1537403307281843e-07, - "loss": 0.5767, + "epoch": 0.8556142203449489, + "grad_norm": 1.8612339496612549, + "learning_rate": 7.516178332145286e-07, + "loss": 0.689, "step": 12154 }, { - "epoch": 0.9197533199651924, - "grad_norm": 2.073834180831909, - "learning_rate": 3.14783317020691e-07, - "loss": 0.7049, + "epoch": 0.8556846180922211, + "grad_norm": 2.12447452545166, + "learning_rate": 7.508984743519433e-07, + "loss": 0.6423, "step": 12155 }, { - "epoch": 0.919828988687526, - "grad_norm": 2.439730644226074, - "learning_rate": 3.1419314486492245e-07, - "loss": 0.6684, + "epoch": 0.8557550158394931, + "grad_norm": 1.9065877199172974, + "learning_rate": 7.501794403858075e-07, + "loss": 0.7028, "step": 12156 }, { - "epoch": 0.9199046574098596, - "grad_norm": 2.621870756149292, - "learning_rate": 3.136035166424733e-07, - "loss": 0.7028, + "epoch": 0.8558254135867652, + "grad_norm": 1.8404210805892944, + "learning_rate": 7.494607313535008e-07, + "loss": 0.5846, "step": 12157 }, { - "epoch": 0.9199803261321933, - "grad_norm": 2.3154456615448, - "learning_rate": 3.1301443239026705e-07, - "loss": 0.7616, + "epoch": 0.8558958113340374, + "grad_norm": 1.981136679649353, + "learning_rate": 7.487423472923949e-07, + "loss": 0.615, "step": 12158 }, { - "epoch": 0.9200559948545268, - "grad_norm": 2.4377593994140625, - "learning_rate": 3.1242589214519513e-07, - "loss": 0.5677, + "epoch": 0.8559662090813094, + "grad_norm": 2.107985258102417, + "learning_rate": 7.480242882398325e-07, + "loss": 0.7377, "step": 12159 }, { - "epoch": 0.9201316635768605, - "grad_norm": 2.1143412590026855, - "learning_rate": 3.1183789594411203e-07, - "loss": 0.591, + "epoch": 0.8560366068285815, + "grad_norm": 1.8776301145553589, + "learning_rate": 7.47306554233151e-07, + "loss": 0.5529, "step": 12160 }, { - "epoch": 0.9202073322991942, - "grad_norm": 1.7824926376342773, - "learning_rate": 3.112504438238394e-07, - "loss": 0.6487, + "epoch": 0.8561070045758535, + "grad_norm": 2.342283248901367, + "learning_rate": 7.465891453096633e-07, + "loss": 0.5709, "step": 12161 }, { - "epoch": 0.9202830010215277, - "grad_norm": 2.3391568660736084, - "learning_rate": 3.106635358211687e-07, - "loss": 0.6635, + "epoch": 0.8561774023231257, + "grad_norm": 2.122431755065918, + "learning_rate": 7.458720615066706e-07, + "loss": 0.6733, "step": 12162 }, { - "epoch": 0.9203586697438614, - "grad_norm": 2.347287178039551, - "learning_rate": 3.100771719728526e-07, - "loss": 0.6899, + "epoch": 0.8562478000703978, + "grad_norm": 2.1144227981567383, + "learning_rate": 7.451553028614521e-07, + "loss": 0.6225, "step": 12163 }, { - "epoch": 0.920434338466195, - "grad_norm": 2.611984968185425, - "learning_rate": 3.0949135231560864e-07, - "loss": 0.5357, + "epoch": 0.8563181978176698, + "grad_norm": 2.1301469802856445, + "learning_rate": 7.444388694112766e-07, + "loss": 0.6415, "step": 12164 }, { - "epoch": 0.9205100071885286, - "grad_norm": 2.079094886779785, - "learning_rate": 3.089060768861256e-07, - "loss": 0.6983, + "epoch": 0.856388595564942, + "grad_norm": 1.9728256464004517, + "learning_rate": 7.437227611933877e-07, + "loss": 0.7061, "step": 12165 }, { - "epoch": 0.9205856759108623, - "grad_norm": 2.0612375736236572, - "learning_rate": 3.0832134572105507e-07, - "loss": 0.7183, + "epoch": 0.856458993312214, + "grad_norm": 2.5734989643096924, + "learning_rate": 7.430069782450197e-07, + "loss": 0.6341, "step": 12166 }, { - "epoch": 0.9206613446331958, - "grad_norm": 2.252366542816162, - "learning_rate": 3.0773715885701284e-07, - "loss": 0.6214, + "epoch": 0.8565293910594861, + "grad_norm": 1.871416687965393, + "learning_rate": 7.422915206033835e-07, + "loss": 0.6418, "step": 12167 }, { - "epoch": 0.9207370133555295, - "grad_norm": 2.7472410202026367, - "learning_rate": 3.071535163305845e-07, - "loss": 0.6864, + "epoch": 0.8565997888067581, + "grad_norm": 2.025885820388794, + "learning_rate": 7.415763883056801e-07, + "loss": 0.7255, "step": 12168 }, { - "epoch": 0.9208126820778632, - "grad_norm": 2.145517349243164, - "learning_rate": 3.0657041817831897e-07, - "loss": 0.6973, + "epoch": 0.8566701865540303, + "grad_norm": 1.7988250255584717, + "learning_rate": 7.408615813890872e-07, + "loss": 0.7015, "step": 12169 }, { - "epoch": 0.9208883508001967, - "grad_norm": 1.8512307405471802, - "learning_rate": 3.05987864436733e-07, - "loss": 0.7021, + "epoch": 0.8567405843013024, + "grad_norm": 2.2621381282806396, + "learning_rate": 7.401470998907721e-07, + "loss": 0.6372, "step": 12170 }, { - "epoch": 0.9209640195225304, - "grad_norm": 2.517260789871216, - "learning_rate": 3.054058551423053e-07, - "loss": 0.6665, + "epoch": 0.8568109820485744, + "grad_norm": 2.0480589866638184, + "learning_rate": 7.394329438478751e-07, + "loss": 0.7108, "step": 12171 }, { - "epoch": 0.9210396882448639, - "grad_norm": 2.0453529357910156, - "learning_rate": 3.048243903314849e-07, - "loss": 0.6017, + "epoch": 0.8568813797958466, + "grad_norm": 2.332232713699341, + "learning_rate": 7.387191132975312e-07, + "loss": 0.6493, "step": 12172 }, { - "epoch": 0.9211153569671976, - "grad_norm": 2.419189453125, - "learning_rate": 3.0424347004068555e-07, - "loss": 0.5802, + "epoch": 0.8569517775431186, + "grad_norm": 2.340855836868286, + "learning_rate": 7.380056082768495e-07, + "loss": 0.6763, "step": 12173 }, { - "epoch": 0.9211910256895313, - "grad_norm": 2.2323241233825684, - "learning_rate": 3.0366309430628516e-07, - "loss": 0.7098, + "epoch": 0.8570221752903907, + "grad_norm": 1.9872361421585083, + "learning_rate": 7.372924288229292e-07, + "loss": 0.5821, "step": 12174 }, { - "epoch": 0.9212666944118648, - "grad_norm": 2.0994818210601807, - "learning_rate": 3.0308326316462966e-07, - "loss": 0.6425, + "epoch": 0.8570925730376628, + "grad_norm": 2.1895592212677, + "learning_rate": 7.365795749728452e-07, + "loss": 0.6568, "step": 12175 }, { - "epoch": 0.9213423631341985, - "grad_norm": 1.8704789876937866, - "learning_rate": 3.02503976652027e-07, - "loss": 0.558, + "epoch": 0.8571629707849349, + "grad_norm": 1.952144980430603, + "learning_rate": 7.358670467636648e-07, + "loss": 0.7146, "step": 12176 }, { - "epoch": 0.9214180318565321, - "grad_norm": 2.0065083503723145, - "learning_rate": 3.019252348047602e-07, - "loss": 0.623, + "epoch": 0.857233368532207, + "grad_norm": 2.3143491744995117, + "learning_rate": 7.351548442324272e-07, + "loss": 0.7156, "step": 12177 }, { - "epoch": 0.9214937005788657, - "grad_norm": 2.0805394649505615, - "learning_rate": 3.0134703765906626e-07, - "loss": 0.6629, + "epoch": 0.857303766279479, + "grad_norm": 2.196525812149048, + "learning_rate": 7.344429674161647e-07, + "loss": 0.6681, "step": 12178 }, { - "epoch": 0.9215693693011994, - "grad_norm": 2.2282912731170654, - "learning_rate": 3.007693852511552e-07, - "loss": 0.7508, + "epoch": 0.8573741640267512, + "grad_norm": 1.8202887773513794, + "learning_rate": 7.337314163518847e-07, + "loss": 0.5853, "step": 12179 }, { - "epoch": 0.9216450380235329, - "grad_norm": 2.3103513717651367, - "learning_rate": 3.0019227761720304e-07, - "loss": 0.6449, + "epoch": 0.8574445617740233, + "grad_norm": 1.9669896364212036, + "learning_rate": 7.330201910765852e-07, + "loss": 0.666, "step": 12180 }, { - "epoch": 0.9217207067458666, - "grad_norm": 2.332411050796509, - "learning_rate": 2.9961571479334794e-07, - "loss": 0.5846, + "epoch": 0.8575149595212953, + "grad_norm": 2.899357795715332, + "learning_rate": 7.323092916272395e-07, + "loss": 0.6569, "step": 12181 }, { - "epoch": 0.9217963754682003, - "grad_norm": 1.9255980253219604, - "learning_rate": 2.99039696815698e-07, - "loss": 0.6451, + "epoch": 0.8575853572685674, + "grad_norm": 2.0070855617523193, + "learning_rate": 7.315987180408139e-07, + "loss": 0.6519, "step": 12182 }, { - "epoch": 0.9218720441905338, - "grad_norm": 4.481695652008057, - "learning_rate": 2.9846422372032434e-07, - "loss": 0.5743, + "epoch": 0.8576557550158395, + "grad_norm": 1.6909377574920654, + "learning_rate": 7.308884703542448e-07, + "loss": 0.5464, "step": 12183 }, { - "epoch": 0.9219477129128675, - "grad_norm": 2.167587995529175, - "learning_rate": 2.9788929554326614e-07, - "loss": 0.7151, + "epoch": 0.8577261527631116, + "grad_norm": 1.6118720769882202, + "learning_rate": 7.30178548604463e-07, + "loss": 0.6783, "step": 12184 }, { - "epoch": 0.9220233816352011, - "grad_norm": 2.3407418727874756, - "learning_rate": 2.9731491232052466e-07, - "loss": 0.6573, + "epoch": 0.8577965505103836, + "grad_norm": 1.7495582103729248, + "learning_rate": 7.294689528283753e-07, + "loss": 0.7096, "step": 12185 }, { - "epoch": 0.9220990503575347, - "grad_norm": 2.2735655307769775, - "learning_rate": 2.9674107408807107e-07, - "loss": 0.6146, + "epoch": 0.8578669482576557, + "grad_norm": 2.192085027694702, + "learning_rate": 7.287596830628774e-07, + "loss": 0.7411, "step": 12186 }, { - "epoch": 0.9221747190798684, - "grad_norm": 2.0419833660125732, - "learning_rate": 2.961677808818436e-07, - "loss": 0.6306, + "epoch": 0.8579373460049279, + "grad_norm": 1.9461082220077515, + "learning_rate": 7.280507393448414e-07, + "loss": 0.5938, "step": 12187 }, { - "epoch": 0.9222503878022019, - "grad_norm": 2.499739646911621, - "learning_rate": 2.955950327377396e-07, - "loss": 0.6939, + "epoch": 0.8580077437521999, + "grad_norm": 2.347353935241699, + "learning_rate": 7.273421217111294e-07, + "loss": 0.6025, "step": 12188 }, { - "epoch": 0.9223260565245356, - "grad_norm": 2.449592351913452, - "learning_rate": 2.950228296916283e-07, - "loss": 0.834, + "epoch": 0.858078141499472, + "grad_norm": 2.173781156539917, + "learning_rate": 7.266338301985818e-07, + "loss": 0.6357, "step": 12189 }, { - "epoch": 0.9224017252468693, - "grad_norm": 2.2888007164001465, - "learning_rate": 2.944511717793421e-07, - "loss": 0.6675, + "epoch": 0.8581485392467441, + "grad_norm": 1.988889217376709, + "learning_rate": 7.259258648440216e-07, + "loss": 0.701, "step": 12190 }, { - "epoch": 0.9224773939692028, - "grad_norm": 2.9520766735076904, - "learning_rate": 2.938800590366814e-07, - "loss": 0.7078, + "epoch": 0.8582189369940162, + "grad_norm": 1.7890304327011108, + "learning_rate": 7.252182256842574e-07, + "loss": 0.6314, "step": 12191 }, { - "epoch": 0.9225530626915365, - "grad_norm": 2.2078518867492676, - "learning_rate": 2.9330949149941044e-07, - "loss": 0.6119, + "epoch": 0.8582893347412883, + "grad_norm": 1.8918514251708984, + "learning_rate": 7.245109127560812e-07, + "loss": 0.6431, "step": 12192 }, { - "epoch": 0.92262873141387, - "grad_norm": 2.88496470451355, - "learning_rate": 2.927394692032598e-07, - "loss": 0.6607, + "epoch": 0.8583597324885603, + "grad_norm": 2.151613712310791, + "learning_rate": 7.23803926096265e-07, + "loss": 0.5764, "step": 12193 }, { - "epoch": 0.9227044001362037, - "grad_norm": 2.3544445037841797, - "learning_rate": 2.921699921839258e-07, - "loss": 0.7694, + "epoch": 0.8584301302358325, + "grad_norm": 1.8541706800460815, + "learning_rate": 7.230972657415683e-07, + "loss": 0.71, "step": 12194 }, { - "epoch": 0.9227800688585374, - "grad_norm": 2.2602198123931885, - "learning_rate": 2.91601060477073e-07, - "loss": 0.7182, + "epoch": 0.8585005279831045, + "grad_norm": 1.838590145111084, + "learning_rate": 7.223909317287295e-07, + "loss": 0.7138, "step": 12195 }, { - "epoch": 0.9228557375808709, - "grad_norm": 2.0758039951324463, - "learning_rate": 2.910326741183269e-07, - "loss": 0.6234, + "epoch": 0.8585709257303766, + "grad_norm": 1.9017055034637451, + "learning_rate": 7.216849240944694e-07, + "loss": 0.6606, "step": 12196 }, { - "epoch": 0.9229314063032046, - "grad_norm": 2.403632402420044, - "learning_rate": 2.9046483314328296e-07, - "loss": 0.6832, + "epoch": 0.8586413234776488, + "grad_norm": 1.9152257442474365, + "learning_rate": 7.209792428754985e-07, + "loss": 0.6584, "step": 12197 }, { - "epoch": 0.9230070750255382, - "grad_norm": 2.320176362991333, - "learning_rate": 2.898975375875018e-07, - "loss": 0.7362, + "epoch": 0.8587117212249208, + "grad_norm": 1.9669020175933838, + "learning_rate": 7.202738881085016e-07, + "loss": 0.653, "step": 12198 }, { - "epoch": 0.9230827437478718, - "grad_norm": 2.266352891921997, - "learning_rate": 2.89330787486508e-07, - "loss": 0.6202, + "epoch": 0.8587821189721929, + "grad_norm": 2.9137351512908936, + "learning_rate": 7.195688598301533e-07, + "loss": 0.5998, "step": 12199 }, { - "epoch": 0.9231584124702055, - "grad_norm": 2.6983745098114014, - "learning_rate": 2.887645828757951e-07, - "loss": 0.6643, + "epoch": 0.8588525167194649, + "grad_norm": 2.199464797973633, + "learning_rate": 7.188641580771086e-07, + "loss": 0.6061, "step": 12200 }, { - "epoch": 0.923234081192539, - "grad_norm": 3.239264488220215, - "learning_rate": 2.881989237908188e-07, - "loss": 0.6703, + "epoch": 0.8589229144667371, + "grad_norm": 2.0012755393981934, + "learning_rate": 7.181597828860046e-07, + "loss": 0.6659, "step": 12201 }, { - "epoch": 0.9233097499148727, - "grad_norm": 1.9743820428848267, - "learning_rate": 2.876338102670028e-07, - "loss": 0.6001, + "epoch": 0.8589933122140091, + "grad_norm": 1.8894436359405518, + "learning_rate": 7.17455734293461e-07, + "loss": 0.7565, "step": 12202 }, { - "epoch": 0.9233854186372064, - "grad_norm": 2.7023541927337646, - "learning_rate": 2.8706924233973765e-07, - "loss": 0.6539, + "epoch": 0.8590637099612812, + "grad_norm": 2.1361453533172607, + "learning_rate": 7.167520123360849e-07, + "loss": 0.5952, "step": 12203 }, { - "epoch": 0.9234610873595399, - "grad_norm": 1.8627772331237793, - "learning_rate": 2.865052200443772e-07, - "loss": 0.6342, + "epoch": 0.8591341077085534, + "grad_norm": 3.7478480339050293, + "learning_rate": 7.160486170504612e-07, + "loss": 0.6556, "step": 12204 }, { - "epoch": 0.9235367560818736, - "grad_norm": 2.2735352516174316, - "learning_rate": 2.8594174341624216e-07, - "loss": 0.6798, + "epoch": 0.8592045054558254, + "grad_norm": 2.9292428493499756, + "learning_rate": 7.153455484731626e-07, + "loss": 0.6806, "step": 12205 }, { - "epoch": 0.9236124248042071, - "grad_norm": 2.6565213203430176, - "learning_rate": 2.8537881249062225e-07, - "loss": 0.6429, + "epoch": 0.8592749032030975, + "grad_norm": 1.8225903511047363, + "learning_rate": 7.146428066407403e-07, + "loss": 0.7101, "step": 12206 }, { - "epoch": 0.9236880935265408, - "grad_norm": 1.921863317489624, - "learning_rate": 2.8481642730276434e-07, - "loss": 0.5624, + "epoch": 0.8593453009503695, + "grad_norm": 2.573578119277954, + "learning_rate": 7.139403915897308e-07, + "loss": 0.6031, "step": 12207 }, { - "epoch": 0.9237637622488745, - "grad_norm": 2.8346986770629883, - "learning_rate": 2.8425458788789126e-07, - "loss": 0.6731, + "epoch": 0.8594156986976417, + "grad_norm": 1.718753695487976, + "learning_rate": 7.132383033566513e-07, + "loss": 0.6527, "step": 12208 }, { - "epoch": 0.923839430971208, - "grad_norm": 1.9833852052688599, - "learning_rate": 2.8369329428118784e-07, - "loss": 0.7168, + "epoch": 0.8594860964449138, + "grad_norm": 2.025161027908325, + "learning_rate": 7.125365419780084e-07, + "loss": 0.618, "step": 12209 }, { - "epoch": 0.9239150996935417, - "grad_norm": 2.3957440853118896, - "learning_rate": 2.8313254651779997e-07, - "loss": 0.7039, + "epoch": 0.8595564941921858, + "grad_norm": 1.7555241584777832, + "learning_rate": 7.118351074902835e-07, + "loss": 0.5351, "step": 12210 }, { - "epoch": 0.9239907684158754, - "grad_norm": 2.2904884815216064, - "learning_rate": 2.8257234463284653e-07, - "loss": 0.7522, + "epoch": 0.859626891939458, + "grad_norm": 1.7950536012649536, + "learning_rate": 7.11133999929947e-07, + "loss": 0.632, "step": 12211 }, { - "epoch": 0.9240664371382089, - "grad_norm": 2.137908458709717, - "learning_rate": 2.820126886614085e-07, - "loss": 0.5352, + "epoch": 0.85969728968673, + "grad_norm": 1.8013205528259277, + "learning_rate": 7.104332193334499e-07, + "loss": 0.7026, "step": 12212 }, { - "epoch": 0.9241421058605426, - "grad_norm": 2.9669129848480225, - "learning_rate": 2.814535786385338e-07, - "loss": 0.6325, + "epoch": 0.8597676874340021, + "grad_norm": 2.7835729122161865, + "learning_rate": 7.097327657372259e-07, + "loss": 0.584, "step": 12213 }, { - "epoch": 0.9242177745828761, - "grad_norm": 4.183269500732422, - "learning_rate": 2.808950145992345e-07, - "loss": 0.7076, + "epoch": 0.8598380851812742, + "grad_norm": 2.832486152648926, + "learning_rate": 7.090326391776904e-07, + "loss": 0.6998, "step": 12214 }, { - "epoch": 0.9242934433052098, - "grad_norm": 1.7913990020751953, - "learning_rate": 2.8033699657849056e-07, - "loss": 0.6799, + "epoch": 0.8599084829285463, + "grad_norm": 2.318272352218628, + "learning_rate": 7.083328396912468e-07, + "loss": 0.6422, "step": 12215 }, { - "epoch": 0.9243691120275435, - "grad_norm": 1.807224988937378, - "learning_rate": 2.79779524611248e-07, - "loss": 0.5429, + "epoch": 0.8599788806758184, + "grad_norm": 1.7277363538742065, + "learning_rate": 7.076333673142761e-07, + "loss": 0.6709, "step": 12216 }, { - "epoch": 0.924444780749877, - "grad_norm": 3.026785135269165, - "learning_rate": 2.7922259873241397e-07, - "loss": 0.6853, + "epoch": 0.8600492784230904, + "grad_norm": 2.106096029281616, + "learning_rate": 7.069342220831462e-07, + "loss": 0.7015, "step": 12217 }, { - "epoch": 0.9245204494722107, - "grad_norm": 2.0761656761169434, - "learning_rate": 2.786662189768685e-07, - "loss": 0.5143, + "epoch": 0.8601196761703626, + "grad_norm": 2.198626756668091, + "learning_rate": 7.062354040342063e-07, + "loss": 0.6123, "step": 12218 }, { - "epoch": 0.9245961181945442, - "grad_norm": 2.615605354309082, - "learning_rate": 2.7811038537945177e-07, - "loss": 0.734, + "epoch": 0.8601900739176347, + "grad_norm": 1.8306388854980469, + "learning_rate": 7.055369132037866e-07, + "loss": 0.5889, "step": 12219 }, { - "epoch": 0.9246717869168779, - "grad_norm": 2.0705811977386475, - "learning_rate": 2.775550979749739e-07, - "loss": 0.5934, + "epoch": 0.8602604716649067, + "grad_norm": 2.060499668121338, + "learning_rate": 7.048387496282031e-07, + "loss": 0.682, "step": 12220 }, { - "epoch": 0.9247474556392116, - "grad_norm": 2.260209321975708, - "learning_rate": 2.7700035679820714e-07, - "loss": 0.8695, + "epoch": 0.8603308694121788, + "grad_norm": 1.7447426319122314, + "learning_rate": 7.041409133437544e-07, + "loss": 0.7, "step": 12221 }, { - "epoch": 0.9248231243615451, - "grad_norm": 2.3986639976501465, - "learning_rate": 2.764461618838906e-07, - "loss": 0.7589, + "epoch": 0.8604012671594509, + "grad_norm": 1.7603721618652344, + "learning_rate": 7.034434043867212e-07, + "loss": 0.5735, "step": 12222 }, { - "epoch": 0.9248987930838788, - "grad_norm": 2.1753921508789062, - "learning_rate": 2.758925132667326e-07, - "loss": 0.6476, + "epoch": 0.860471664906723, + "grad_norm": 1.9317619800567627, + "learning_rate": 7.027462227933686e-07, + "loss": 0.6547, "step": 12223 }, { - "epoch": 0.9249744618062125, - "grad_norm": 1.9226336479187012, - "learning_rate": 2.7533941098140234e-07, - "loss": 0.7568, + "epoch": 0.860542062653995, + "grad_norm": 2.7453114986419678, + "learning_rate": 7.020493685999433e-07, + "loss": 0.6795, "step": 12224 }, { - "epoch": 0.925050130528546, - "grad_norm": 2.058516502380371, - "learning_rate": 2.747868550625362e-07, - "loss": 0.6215, + "epoch": 0.8606124604012672, + "grad_norm": 2.217226505279541, + "learning_rate": 7.013528418426749e-07, + "loss": 0.7533, "step": 12225 }, { - "epoch": 0.9251257992508797, - "grad_norm": 1.9395172595977783, - "learning_rate": 2.742348455447384e-07, - "loss": 0.571, + "epoch": 0.8606828581485393, + "grad_norm": 2.8593389987945557, + "learning_rate": 7.006566425577755e-07, + "loss": 0.5366, "step": 12226 }, { - "epoch": 0.9252014679732132, - "grad_norm": 2.5205371379852295, - "learning_rate": 2.736833824625774e-07, - "loss": 0.6528, + "epoch": 0.8607532558958113, + "grad_norm": 1.5845566987991333, + "learning_rate": 6.999607707814435e-07, + "loss": 0.5539, "step": 12227 }, { - "epoch": 0.9252771366955469, - "grad_norm": 2.5392866134643555, - "learning_rate": 2.7313246585058647e-07, - "loss": 0.6829, + "epoch": 0.8608236536430834, + "grad_norm": 2.0742924213409424, + "learning_rate": 6.99265226549855e-07, + "loss": 0.7766, "step": 12228 }, { - "epoch": 0.9253528054178806, - "grad_norm": 3.4775822162628174, - "learning_rate": 2.7258209574326707e-07, - "loss": 0.5941, + "epoch": 0.8608940513903555, + "grad_norm": 2.3513288497924805, + "learning_rate": 6.985700098991755e-07, + "loss": 0.7099, "step": 12229 }, { - "epoch": 0.9254284741402141, - "grad_norm": 2.08290433883667, - "learning_rate": 2.7203227217508565e-07, - "loss": 0.586, + "epoch": 0.8609644491376276, + "grad_norm": 3.824047565460205, + "learning_rate": 6.978751208655482e-07, + "loss": 0.7542, "step": 12230 }, { - "epoch": 0.9255041428625478, - "grad_norm": 2.0628485679626465, - "learning_rate": 2.714829951804716e-07, - "loss": 0.6772, + "epoch": 0.8610348468848997, + "grad_norm": 2.351215362548828, + "learning_rate": 6.971805594851002e-07, + "loss": 0.6761, "step": 12231 }, { - "epoch": 0.9255798115848813, - "grad_norm": 2.3678271770477295, - "learning_rate": 2.709342647938244e-07, - "loss": 0.6361, + "epoch": 0.8611052446321718, + "grad_norm": 3.058062791824341, + "learning_rate": 6.964863257939425e-07, + "loss": 0.6762, "step": 12232 }, { - "epoch": 0.925655480307215, - "grad_norm": 2.2154617309570312, - "learning_rate": 2.703860810495057e-07, - "loss": 0.6324, + "epoch": 0.8611756423794439, + "grad_norm": 2.221510410308838, + "learning_rate": 6.957924198281704e-07, + "loss": 0.7208, "step": 12233 }, { - "epoch": 0.9257311490295487, - "grad_norm": 2.3665215969085693, - "learning_rate": 2.698384439818479e-07, - "loss": 0.6778, + "epoch": 0.8612460401267159, + "grad_norm": 1.9123719930648804, + "learning_rate": 6.950988416238586e-07, + "loss": 0.5892, "step": 12234 }, { - "epoch": 0.9258068177518822, - "grad_norm": 2.475618362426758, - "learning_rate": 2.692913536251416e-07, - "loss": 0.5842, + "epoch": 0.861316437873988, + "grad_norm": 1.9603482484817505, + "learning_rate": 6.944055912170695e-07, + "loss": 0.6283, "step": 12235 }, { - "epoch": 0.9258824864742159, - "grad_norm": 2.6356213092803955, - "learning_rate": 2.6874481001365035e-07, - "loss": 0.8052, + "epoch": 0.8613868356212602, + "grad_norm": 2.205613136291504, + "learning_rate": 6.937126686438444e-07, + "loss": 0.6198, "step": 12236 }, { - "epoch": 0.9259581551965496, - "grad_norm": 2.1997127532958984, - "learning_rate": 2.681988131815989e-07, - "loss": 0.564, + "epoch": 0.8614572333685322, + "grad_norm": 2.24139666557312, + "learning_rate": 6.930200739402093e-07, + "loss": 0.6363, "step": 12237 }, { - "epoch": 0.9260338239188831, - "grad_norm": 2.037262201309204, - "learning_rate": 2.676533631631798e-07, - "loss": 0.6311, + "epoch": 0.8615276311158043, + "grad_norm": 2.237309455871582, + "learning_rate": 6.923278071421704e-07, + "loss": 0.6405, "step": 12238 }, { - "epoch": 0.9261094926412168, - "grad_norm": 1.9841945171356201, - "learning_rate": 2.6710845999255076e-07, - "loss": 0.6565, + "epoch": 0.8615980288630763, + "grad_norm": 1.9342663288116455, + "learning_rate": 6.916358682857235e-07, + "loss": 0.6623, "step": 12239 }, { - "epoch": 0.9261851613635503, - "grad_norm": 2.244075298309326, - "learning_rate": 2.6656410370383544e-07, - "loss": 0.614, + "epoch": 0.8616684266103485, + "grad_norm": 1.7722220420837402, + "learning_rate": 6.909442574068392e-07, + "loss": 0.7095, "step": 12240 }, { - "epoch": 0.926260830085884, - "grad_norm": 2.1905736923217773, - "learning_rate": 2.660202943311246e-07, - "loss": 0.8046, + "epoch": 0.8617388243576205, + "grad_norm": 1.8531336784362793, + "learning_rate": 6.90252974541478e-07, + "loss": 0.621, "step": 12241 }, { - "epoch": 0.9263364988082177, - "grad_norm": 2.341736078262329, - "learning_rate": 2.6547703190847105e-07, - "loss": 0.6139, + "epoch": 0.8618092221048926, + "grad_norm": 1.9714322090148926, + "learning_rate": 6.895620197255794e-07, + "loss": 0.7039, "step": 12242 }, { - "epoch": 0.9264121675305512, - "grad_norm": 2.3669471740722656, - "learning_rate": 2.649343164698965e-07, - "loss": 0.7966, + "epoch": 0.8618796198521648, + "grad_norm": 2.0464155673980713, + "learning_rate": 6.88871392995066e-07, + "loss": 0.6278, "step": 12243 }, { - "epoch": 0.9264878362528849, - "grad_norm": 2.272200345993042, - "learning_rate": 2.643921480493888e-07, - "loss": 0.6013, + "epoch": 0.8619500175994368, + "grad_norm": 1.8525066375732422, + "learning_rate": 6.881810943858433e-07, + "loss": 0.8225, "step": 12244 }, { - "epoch": 0.9265635049752184, - "grad_norm": 2.1507675647735596, - "learning_rate": 2.6385052668089784e-07, - "loss": 0.6805, + "epoch": 0.8620204153467089, + "grad_norm": 2.530982494354248, + "learning_rate": 6.874911239338025e-07, + "loss": 0.6432, "step": 12245 }, { - "epoch": 0.9266391736975521, - "grad_norm": 2.2726826667785645, - "learning_rate": 2.6330945239834336e-07, - "loss": 0.5546, + "epoch": 0.862090813093981, + "grad_norm": 1.8591593503952026, + "learning_rate": 6.868014816748157e-07, + "loss": 0.6734, "step": 12246 }, { - "epoch": 0.9267148424198858, - "grad_norm": 2.1980509757995605, - "learning_rate": 2.6276892523560934e-07, - "loss": 0.6105, + "epoch": 0.8621612108412531, + "grad_norm": 2.10263729095459, + "learning_rate": 6.861121676447377e-07, + "loss": 0.6079, "step": 12247 }, { - "epoch": 0.9267905111422193, - "grad_norm": 2.0372631549835205, - "learning_rate": 2.6222894522654375e-07, - "loss": 0.6964, + "epoch": 0.8622316085885252, + "grad_norm": 2.0360233783721924, + "learning_rate": 6.854231818794063e-07, + "loss": 0.6553, "step": 12248 }, { - "epoch": 0.926866179864553, - "grad_norm": 2.292754888534546, - "learning_rate": 2.6168951240496443e-07, - "loss": 0.761, + "epoch": 0.8623020063357972, + "grad_norm": 2.0961086750030518, + "learning_rate": 6.847345244146414e-07, + "loss": 0.6672, "step": 12249 }, { - "epoch": 0.9269418485868867, - "grad_norm": 2.1714630126953125, - "learning_rate": 2.611506268046494e-07, - "loss": 0.6707, + "epoch": 0.8623724040830694, + "grad_norm": 1.8538607358932495, + "learning_rate": 6.84046195286249e-07, + "loss": 0.5681, "step": 12250 }, { - "epoch": 0.9270175173092202, - "grad_norm": 2.125967025756836, - "learning_rate": 2.606122884593477e-07, - "loss": 0.6586, + "epoch": 0.8624428018303414, + "grad_norm": 1.9557244777679443, + "learning_rate": 6.833581945300139e-07, + "loss": 0.6575, "step": 12251 }, { - "epoch": 0.9270931860315539, - "grad_norm": 2.231060266494751, - "learning_rate": 2.6007449740277235e-07, - "loss": 0.5914, + "epoch": 0.8625131995776135, + "grad_norm": 1.946211814880371, + "learning_rate": 6.826705221817086e-07, + "loss": 0.6134, "step": 12252 }, { - "epoch": 0.9271688547538874, - "grad_norm": 2.295248031616211, - "learning_rate": 2.5953725366859836e-07, - "loss": 0.6735, + "epoch": 0.8625835973248857, + "grad_norm": 2.5263001918792725, + "learning_rate": 6.819831782770832e-07, + "loss": 0.7783, "step": 12253 }, { - "epoch": 0.9272445234762211, - "grad_norm": 3.4666316509246826, - "learning_rate": 2.590005572904729e-07, - "loss": 0.6238, + "epoch": 0.8626539950721577, + "grad_norm": 1.810930609703064, + "learning_rate": 6.812961628518751e-07, + "loss": 0.6221, "step": 12254 }, { - "epoch": 0.9273201921985548, - "grad_norm": 2.405599355697632, - "learning_rate": 2.5846440830200404e-07, - "loss": 0.7202, + "epoch": 0.8627243928194298, + "grad_norm": 1.719546914100647, + "learning_rate": 6.806094759418009e-07, + "loss": 0.6943, "step": 12255 }, { - "epoch": 0.9273958609208883, - "grad_norm": 2.2995810508728027, - "learning_rate": 2.579288067367679e-07, - "loss": 0.6795, + "epoch": 0.8627947905667018, + "grad_norm": 1.71648371219635, + "learning_rate": 6.799231175825638e-07, + "loss": 0.5387, "step": 12256 }, { - "epoch": 0.927471529643222, - "grad_norm": 2.8682172298431396, - "learning_rate": 2.5739375262830464e-07, - "loss": 0.5819, + "epoch": 0.862865188313974, + "grad_norm": 2.33948016166687, + "learning_rate": 6.792370878098464e-07, + "loss": 0.7657, "step": 12257 }, { - "epoch": 0.9275471983655555, - "grad_norm": 3.253770589828491, - "learning_rate": 2.5685924601012157e-07, - "loss": 0.6481, + "epoch": 0.862935586061246, + "grad_norm": 1.9201332330703735, + "learning_rate": 6.785513866593191e-07, + "loss": 0.6817, "step": 12258 }, { - "epoch": 0.9276228670878892, - "grad_norm": 2.7173924446105957, - "learning_rate": 2.563252869156908e-07, - "loss": 0.4945, + "epoch": 0.8630059838085181, + "grad_norm": 1.9587985277175903, + "learning_rate": 6.778660141666294e-07, + "loss": 0.6202, "step": 12259 }, { - "epoch": 0.9276985358102229, - "grad_norm": 2.2100868225097656, - "learning_rate": 2.5579187537845164e-07, - "loss": 0.619, + "epoch": 0.8630763815557903, + "grad_norm": 2.7265472412109375, + "learning_rate": 6.771809703674125e-07, + "loss": 0.7218, "step": 12260 }, { - "epoch": 0.9277742045325564, - "grad_norm": 2.2828118801116943, - "learning_rate": 2.552590114318073e-07, - "loss": 0.783, + "epoch": 0.8631467793030623, + "grad_norm": 1.7775119543075562, + "learning_rate": 6.764962552972811e-07, + "loss": 0.6527, "step": 12261 }, { - "epoch": 0.9278498732548901, - "grad_norm": 2.5905113220214844, - "learning_rate": 2.5472669510912916e-07, - "loss": 0.6722, + "epoch": 0.8632171770503344, + "grad_norm": 1.885414958000183, + "learning_rate": 6.758118689918378e-07, + "loss": 0.7085, "step": 12262 }, { - "epoch": 0.9279255419772238, - "grad_norm": 2.3828647136688232, - "learning_rate": 2.5419492644374855e-07, - "loss": 0.6373, + "epoch": 0.8632875747976064, + "grad_norm": 1.796035885810852, + "learning_rate": 6.751278114866615e-07, + "loss": 0.6002, "step": 12263 }, { - "epoch": 0.9280012106995573, - "grad_norm": 2.7237589359283447, - "learning_rate": 2.536637054689698e-07, - "loss": 0.697, + "epoch": 0.8633579725448786, + "grad_norm": 1.932136058807373, + "learning_rate": 6.744440828173194e-07, + "loss": 0.6878, "step": 12264 }, { - "epoch": 0.928076879421891, - "grad_norm": 2.399646282196045, - "learning_rate": 2.531330322180593e-07, - "loss": 0.5974, + "epoch": 0.8634283702921507, + "grad_norm": 2.3438398838043213, + "learning_rate": 6.737606830193575e-07, + "loss": 0.7259, "step": 12265 }, { - "epoch": 0.9281525481442245, - "grad_norm": 2.0155105590820312, - "learning_rate": 2.5260290672424947e-07, - "loss": 0.5291, + "epoch": 0.8634987680394227, + "grad_norm": 1.8484212160110474, + "learning_rate": 6.730776121283091e-07, + "loss": 0.6813, "step": 12266 }, { - "epoch": 0.9282282168665582, - "grad_norm": 2.4780659675598145, - "learning_rate": 2.5207332902073776e-07, - "loss": 0.6977, + "epoch": 0.8635691657866948, + "grad_norm": 1.73306143283844, + "learning_rate": 6.723948701796838e-07, + "loss": 0.6487, "step": 12267 }, { - "epoch": 0.9283038855888919, - "grad_norm": 2.1585497856140137, - "learning_rate": 2.5154429914068764e-07, - "loss": 0.7286, + "epoch": 0.8636395635339669, + "grad_norm": 1.9522175788879395, + "learning_rate": 6.71712457208981e-07, + "loss": 0.6972, "step": 12268 }, { - "epoch": 0.9283795543112254, - "grad_norm": 2.1875948905944824, - "learning_rate": 2.510158171172296e-07, - "loss": 0.6065, + "epoch": 0.863709961281239, + "grad_norm": 1.8504985570907593, + "learning_rate": 6.710303732516772e-07, + "loss": 0.5603, "step": 12269 }, { - "epoch": 0.9284552230335591, - "grad_norm": 2.345557928085327, - "learning_rate": 2.5048788298345926e-07, - "loss": 0.4536, + "epoch": 0.8637803590285111, + "grad_norm": 1.7786047458648682, + "learning_rate": 6.70348618343237e-07, + "loss": 0.7636, "step": 12270 }, { - "epoch": 0.9285308917558927, - "grad_norm": 2.446751356124878, - "learning_rate": 2.4996049677243703e-07, - "loss": 0.6631, + "epoch": 0.8638507567757832, + "grad_norm": 2.256232738494873, + "learning_rate": 6.696671925191035e-07, + "loss": 0.7214, "step": 12271 }, { - "epoch": 0.9286065604782263, - "grad_norm": 2.7235686779022217, - "learning_rate": 2.494336585171896e-07, - "loss": 0.6673, + "epoch": 0.8639211545230553, + "grad_norm": 1.9394261837005615, + "learning_rate": 6.689860958147089e-07, + "loss": 0.5906, "step": 12272 }, { - "epoch": 0.92868222920056, - "grad_norm": 2.1927380561828613, - "learning_rate": 2.489073682507105e-07, - "loss": 0.6072, + "epoch": 0.8639915522703273, + "grad_norm": 1.7506892681121826, + "learning_rate": 6.68305328265458e-07, + "loss": 0.6271, "step": 12273 }, { - "epoch": 0.9287578979228935, - "grad_norm": 2.9181969165802, - "learning_rate": 2.483816260059534e-07, - "loss": 0.6484, + "epoch": 0.8640619500175994, + "grad_norm": 2.4459354877471924, + "learning_rate": 6.676248899067486e-07, + "loss": 0.5533, "step": 12274 }, { - "epoch": 0.9288335666452272, - "grad_norm": 2.1326992511749268, - "learning_rate": 2.4785643181584696e-07, - "loss": 0.7141, + "epoch": 0.8641323477648716, + "grad_norm": 1.8939160108566284, + "learning_rate": 6.669447807739554e-07, + "loss": 0.5894, "step": 12275 }, { - "epoch": 0.9289092353675609, - "grad_norm": 2.227208375930786, - "learning_rate": 2.4733178571327887e-07, - "loss": 0.554, + "epoch": 0.8642027455121436, + "grad_norm": 2.035979747772217, + "learning_rate": 6.662650009024391e-07, + "loss": 0.5725, "step": 12276 }, { - "epoch": 0.9289849040898944, - "grad_norm": 2.5068063735961914, - "learning_rate": 2.4680768773110383e-07, - "loss": 0.7795, + "epoch": 0.8642731432594157, + "grad_norm": 1.753029704093933, + "learning_rate": 6.655855503275408e-07, + "loss": 0.5431, "step": 12277 }, { - "epoch": 0.9290605728122281, - "grad_norm": 2.883193016052246, - "learning_rate": 2.462841379021417e-07, - "loss": 0.6704, + "epoch": 0.8643435410066878, + "grad_norm": 24.344524383544922, + "learning_rate": 6.649064290845891e-07, + "loss": 0.5504, "step": 12278 }, { - "epoch": 0.9291362415345616, - "grad_norm": 2.217097520828247, - "learning_rate": 2.4576113625918005e-07, - "loss": 0.6113, + "epoch": 0.8644139387539599, + "grad_norm": 2.0214591026306152, + "learning_rate": 6.642276372088864e-07, + "loss": 0.5853, "step": 12279 }, { - "epoch": 0.9292119102568953, - "grad_norm": 3.0743801593780518, - "learning_rate": 2.4523868283497186e-07, - "loss": 0.7329, + "epoch": 0.8644843365012319, + "grad_norm": 1.9576324224472046, + "learning_rate": 6.635491747357281e-07, + "loss": 0.494, "step": 12280 }, { - "epoch": 0.929287578979229, - "grad_norm": 2.3578438758850098, - "learning_rate": 2.447167776622329e-07, - "loss": 0.7069, + "epoch": 0.864554734248504, + "grad_norm": 1.9089299440383911, + "learning_rate": 6.628710417003863e-07, + "loss": 0.6052, "step": 12281 }, { - "epoch": 0.9293632477015625, - "grad_norm": 2.4407949447631836, - "learning_rate": 2.44195420773647e-07, - "loss": 0.6827, + "epoch": 0.8646251319957762, + "grad_norm": 1.6759536266326904, + "learning_rate": 6.621932381381194e-07, + "loss": 0.6235, "step": 12282 }, { - "epoch": 0.9294389164238962, - "grad_norm": 2.97994065284729, - "learning_rate": 2.4367461220186406e-07, - "loss": 0.572, + "epoch": 0.8646955297430482, + "grad_norm": 1.6584700345993042, + "learning_rate": 6.615157640841641e-07, + "loss": 0.5806, "step": 12283 }, { - "epoch": 0.9295145851462298, - "grad_norm": 2.102031946182251, - "learning_rate": 2.43154351979498e-07, - "loss": 0.685, + "epoch": 0.8647659274903203, + "grad_norm": 2.0703604221343994, + "learning_rate": 6.608386195737479e-07, + "loss": 0.6998, "step": 12284 }, { - "epoch": 0.9295902538685634, - "grad_norm": 2.1757426261901855, - "learning_rate": 2.426346401391287e-07, - "loss": 0.6041, + "epoch": 0.8648363252375924, + "grad_norm": 2.2783470153808594, + "learning_rate": 6.601618046420697e-07, + "loss": 0.5912, "step": 12285 }, { - "epoch": 0.9296659225908971, - "grad_norm": 2.5137274265289307, - "learning_rate": 2.4211547671330423e-07, - "loss": 0.6202, + "epoch": 0.8649067229848645, + "grad_norm": 1.6384145021438599, + "learning_rate": 6.594853193243232e-07, + "loss": 0.5489, "step": 12286 }, { - "epoch": 0.9297415913132306, - "grad_norm": 2.278620719909668, - "learning_rate": 2.415968617345355e-07, - "loss": 0.7326, + "epoch": 0.8649771207321366, + "grad_norm": 2.1358044147491455, + "learning_rate": 6.588091636556753e-07, + "loss": 0.7793, "step": 12287 }, { - "epoch": 0.9298172600355643, - "grad_norm": 1.830971360206604, - "learning_rate": 2.410787952352986e-07, - "loss": 0.6772, + "epoch": 0.8650475184794086, + "grad_norm": 2.850532293319702, + "learning_rate": 6.581333376712832e-07, + "loss": 0.6101, "step": 12288 }, { - "epoch": 0.929892928757898, - "grad_norm": 2.2332699298858643, - "learning_rate": 2.4056127724803656e-07, - "loss": 0.6575, + "epoch": 0.8651179162266808, + "grad_norm": 1.9525861740112305, + "learning_rate": 6.574578414062811e-07, + "loss": 0.6478, "step": 12289 }, { - "epoch": 0.9299685974802315, - "grad_norm": 3.241168975830078, - "learning_rate": 2.400443078051604e-07, - "loss": 0.7853, + "epoch": 0.8651883139739528, + "grad_norm": 1.7365671396255493, + "learning_rate": 6.567826748957927e-07, + "loss": 0.6787, "step": 12290 }, { - "epoch": 0.9300442662025652, - "grad_norm": 1.8960295915603638, - "learning_rate": 2.3952788693904125e-07, - "loss": 0.5753, + "epoch": 0.8652587117212249, + "grad_norm": 2.0831897258758545, + "learning_rate": 6.561078381749152e-07, + "loss": 0.6971, "step": 12291 }, { - "epoch": 0.9301199349248988, - "grad_norm": 2.006786584854126, - "learning_rate": 2.3901201468202126e-07, - "loss": 0.5916, + "epoch": 0.8653291094684971, + "grad_norm": 2.1527087688446045, + "learning_rate": 6.554333312787378e-07, + "loss": 0.6744, "step": 12292 }, { - "epoch": 0.9301956036472324, - "grad_norm": 2.6823008060455322, - "learning_rate": 2.3849669106640557e-07, - "loss": 0.7097, + "epoch": 0.8653995072157691, + "grad_norm": 1.6069968938827515, + "learning_rate": 6.54759154242326e-07, + "loss": 0.6252, "step": 12293 }, { - "epoch": 0.9302712723695661, - "grad_norm": 1.6065900325775146, - "learning_rate": 2.379819161244654e-07, - "loss": 0.5853, + "epoch": 0.8654699049630412, + "grad_norm": 2.120293617248535, + "learning_rate": 6.540853071007341e-07, + "loss": 0.6082, "step": 12294 }, { - "epoch": 0.9303469410918996, - "grad_norm": 3.367147207260132, - "learning_rate": 2.3746768988843693e-07, - "loss": 0.7193, + "epoch": 0.8655403027103132, + "grad_norm": 1.8694043159484863, + "learning_rate": 6.534117898889932e-07, + "loss": 0.6347, "step": 12295 }, { - "epoch": 0.9304226098142333, - "grad_norm": 2.1877822875976562, - "learning_rate": 2.3695401239052338e-07, - "loss": 0.7386, + "epoch": 0.8656107004575854, + "grad_norm": 1.883455514907837, + "learning_rate": 6.527386026421219e-07, + "loss": 0.6206, "step": 12296 }, { - "epoch": 0.9304982785365669, - "grad_norm": 2.3759443759918213, - "learning_rate": 2.3644088366289208e-07, - "loss": 0.6824, + "epoch": 0.8656810982048574, + "grad_norm": 1.9862228631973267, + "learning_rate": 6.520657453951188e-07, + "loss": 0.6563, "step": 12297 }, { - "epoch": 0.9305739472589005, - "grad_norm": 1.9915658235549927, - "learning_rate": 2.3592830373767925e-07, - "loss": 0.7018, + "epoch": 0.8657514959521295, + "grad_norm": 2.9440689086914062, + "learning_rate": 6.513932181829658e-07, + "loss": 0.5369, "step": 12298 }, { - "epoch": 0.9306496159812342, - "grad_norm": 2.2175698280334473, - "learning_rate": 2.3541627264698028e-07, - "loss": 0.7714, + "epoch": 0.8658218936994017, + "grad_norm": 1.6704999208450317, + "learning_rate": 6.507210210406305e-07, + "loss": 0.486, "step": 12299 }, { - "epoch": 0.9307252847035677, - "grad_norm": 2.8158113956451416, - "learning_rate": 2.349047904228635e-07, - "loss": 0.6239, + "epoch": 0.8658922914466737, + "grad_norm": 1.7548023462295532, + "learning_rate": 6.500491540030582e-07, + "loss": 0.5713, "step": 12300 }, { - "epoch": 0.9308009534259014, - "grad_norm": 2.3479740619659424, - "learning_rate": 2.3439385709735928e-07, - "loss": 0.7483, + "epoch": 0.8659626891939458, + "grad_norm": 1.8087455034255981, + "learning_rate": 6.493776171051817e-07, + "loss": 0.6688, "step": 12301 }, { - "epoch": 0.9308766221482351, - "grad_norm": 2.05190372467041, - "learning_rate": 2.3388347270246202e-07, - "loss": 0.5696, + "epoch": 0.8660330869412178, + "grad_norm": 2.422179937362671, + "learning_rate": 6.487064103819152e-07, + "loss": 0.6866, "step": 12302 }, { - "epoch": 0.9309522908705686, - "grad_norm": 2.0475738048553467, - "learning_rate": 2.3337363727013515e-07, - "loss": 0.5486, + "epoch": 0.86610348468849, + "grad_norm": 1.82688570022583, + "learning_rate": 6.480355338681547e-07, + "loss": 0.5653, "step": 12303 }, { - "epoch": 0.9310279595929023, - "grad_norm": 2.140483856201172, - "learning_rate": 2.3286435083230618e-07, - "loss": 0.6311, + "epoch": 0.8661738824357621, + "grad_norm": 2.292933702468872, + "learning_rate": 6.473649875987773e-07, + "loss": 0.6276, "step": 12304 }, { - "epoch": 0.9311036283152359, - "grad_norm": 2.0714380741119385, - "learning_rate": 2.3235561342086753e-07, - "loss": 0.6138, + "epoch": 0.8662442801830341, + "grad_norm": 2.2834343910217285, + "learning_rate": 6.466947716086495e-07, + "loss": 0.5951, "step": 12305 }, { - "epoch": 0.9311792970375695, - "grad_norm": 2.3134987354278564, - "learning_rate": 2.3184742506767775e-07, - "loss": 0.6722, + "epoch": 0.8663146779303063, + "grad_norm": 2.302008867263794, + "learning_rate": 6.460248859326126e-07, + "loss": 0.6537, "step": 12306 }, { - "epoch": 0.9312549657599032, - "grad_norm": 2.5604090690612793, - "learning_rate": 2.313397858045624e-07, - "loss": 0.708, + "epoch": 0.8663850756775783, + "grad_norm": 2.095219612121582, + "learning_rate": 6.453553306054981e-07, + "loss": 0.5673, "step": 12307 }, { - "epoch": 0.9313306344822367, - "grad_norm": 3.3896567821502686, - "learning_rate": 2.30832695663311e-07, - "loss": 0.6125, + "epoch": 0.8664554734248504, + "grad_norm": 2.066807508468628, + "learning_rate": 6.446861056621147e-07, + "loss": 0.6117, "step": 12308 }, { - "epoch": 0.9314063032045704, - "grad_norm": 1.9573801755905151, - "learning_rate": 2.303261546756802e-07, - "loss": 0.5785, + "epoch": 0.8665258711721225, + "grad_norm": 2.1426174640655518, + "learning_rate": 6.440172111372565e-07, + "loss": 0.6145, "step": 12309 }, { - "epoch": 0.931481971926904, - "grad_norm": 1.7080085277557373, - "learning_rate": 2.298201628733876e-07, - "loss": 0.7792, + "epoch": 0.8665962689193946, + "grad_norm": 1.8237351179122925, + "learning_rate": 6.43348647065699e-07, + "loss": 0.6372, "step": 12310 }, { - "epoch": 0.9315576406492376, - "grad_norm": 1.9010158777236938, - "learning_rate": 2.2931472028812384e-07, - "loss": 0.5984, + "epoch": 0.8666666666666667, + "grad_norm": 2.8520400524139404, + "learning_rate": 6.426804134822034e-07, + "loss": 0.6482, "step": 12311 }, { - "epoch": 0.9316333093715713, - "grad_norm": 2.1762309074401855, - "learning_rate": 2.2880982695154162e-07, - "loss": 0.6983, + "epoch": 0.8667370644139387, + "grad_norm": 2.030606746673584, + "learning_rate": 6.420125104215087e-07, + "loss": 0.6361, "step": 12312 }, { - "epoch": 0.9317089780939048, - "grad_norm": 1.8109301328659058, - "learning_rate": 2.2830548289525666e-07, - "loss": 0.6687, + "epoch": 0.8668074621612109, + "grad_norm": 1.8557727336883545, + "learning_rate": 6.413449379183438e-07, + "loss": 0.6086, "step": 12313 }, { - "epoch": 0.9317846468162385, - "grad_norm": 1.976968765258789, - "learning_rate": 2.2780168815085267e-07, - "loss": 0.6647, + "epoch": 0.866877859908483, + "grad_norm": 1.845473289489746, + "learning_rate": 6.406776960074139e-07, + "loss": 0.6118, "step": 12314 }, { - "epoch": 0.9318603155385722, - "grad_norm": 3.7262067794799805, - "learning_rate": 2.2729844274987942e-07, - "loss": 0.6439, + "epoch": 0.866948257655755, + "grad_norm": 1.9028384685516357, + "learning_rate": 6.4001078472341e-07, + "loss": 0.7202, "step": 12315 }, { - "epoch": 0.9319359842609057, - "grad_norm": 1.9241440296173096, - "learning_rate": 2.2679574672385272e-07, - "loss": 0.7134, + "epoch": 0.8670186554030271, + "grad_norm": 1.9366062879562378, + "learning_rate": 6.393442041010039e-07, + "loss": 0.6053, "step": 12316 }, { - "epoch": 0.9320116529832394, - "grad_norm": 2.4006447792053223, - "learning_rate": 2.2629360010425237e-07, - "loss": 0.598, + "epoch": 0.8670890531502992, + "grad_norm": 4.0813374519348145, + "learning_rate": 6.386779541748545e-07, + "loss": 0.6177, "step": 12317 }, { - "epoch": 0.932087321705573, - "grad_norm": 2.9187188148498535, - "learning_rate": 2.2579200292252422e-07, - "loss": 0.7099, + "epoch": 0.8671594508975713, + "grad_norm": 1.9982905387878418, + "learning_rate": 6.380120349795982e-07, + "loss": 0.6814, "step": 12318 }, { - "epoch": 0.9321629904279066, - "grad_norm": 2.171943426132202, - "learning_rate": 2.2529095521008114e-07, - "loss": 0.6414, + "epoch": 0.8672298486448433, + "grad_norm": 1.788461685180664, + "learning_rate": 6.373464465498594e-07, + "loss": 0.6626, "step": 12319 }, { - "epoch": 0.9322386591502403, - "grad_norm": 2.2062504291534424, - "learning_rate": 2.2479045699829803e-07, - "loss": 0.653, + "epoch": 0.8673002463921154, + "grad_norm": 1.9240549802780151, + "learning_rate": 6.366811889202403e-07, + "loss": 0.6193, "step": 12320 }, { - "epoch": 0.9323143278725738, - "grad_norm": 2.55442476272583, - "learning_rate": 2.2429050831851882e-07, - "loss": 0.6587, + "epoch": 0.8673706441393876, + "grad_norm": 1.9687443971633911, + "learning_rate": 6.3601626212533e-07, + "loss": 0.6119, "step": 12321 }, { - "epoch": 0.9323899965949075, - "grad_norm": 2.2659802436828613, - "learning_rate": 2.2379110920205248e-07, - "loss": 0.6878, + "epoch": 0.8674410418866596, + "grad_norm": 2.328357219696045, + "learning_rate": 6.353516661996969e-07, + "loss": 0.6782, "step": 12322 }, { - "epoch": 0.9324656653172411, - "grad_norm": 3.343369245529175, - "learning_rate": 2.2329225968017296e-07, - "loss": 0.7144, + "epoch": 0.8675114396339317, + "grad_norm": 1.6942709684371948, + "learning_rate": 6.346874011778951e-07, + "loss": 0.6259, "step": 12323 }, { - "epoch": 0.9325413340395747, - "grad_norm": 1.9788143634796143, - "learning_rate": 2.2279395978411932e-07, - "loss": 0.5868, + "epoch": 0.8675818373812038, + "grad_norm": 2.30438494682312, + "learning_rate": 6.340234670944597e-07, + "loss": 0.7025, "step": 12324 }, { - "epoch": 0.9326170027619084, - "grad_norm": 3.297405481338501, - "learning_rate": 2.2229620954509554e-07, - "loss": 0.6762, + "epoch": 0.8676522351284759, + "grad_norm": 1.968029499053955, + "learning_rate": 6.333598639839106e-07, + "loss": 0.6074, "step": 12325 }, { - "epoch": 0.932692671484242, - "grad_norm": 2.0477468967437744, - "learning_rate": 2.2179900899427574e-07, - "loss": 0.5926, + "epoch": 0.867722632875748, + "grad_norm": 1.9899897575378418, + "learning_rate": 6.326965918807479e-07, + "loss": 0.7077, "step": 12326 }, { - "epoch": 0.9327683402065756, - "grad_norm": 2.3132870197296143, - "learning_rate": 2.21302358162793e-07, - "loss": 0.6212, + "epoch": 0.86779303062302, + "grad_norm": 1.8205324411392212, + "learning_rate": 6.320336508194558e-07, + "loss": 0.6521, "step": 12327 }, { - "epoch": 0.9328440089289093, - "grad_norm": 2.395486354827881, - "learning_rate": 2.208062570817514e-07, - "loss": 0.664, + "epoch": 0.8678634283702922, + "grad_norm": 1.8948554992675781, + "learning_rate": 6.31371040834501e-07, + "loss": 0.6306, "step": 12328 }, { - "epoch": 0.9329196776512428, - "grad_norm": 2.0944366455078125, - "learning_rate": 2.2031070578221612e-07, - "loss": 0.6979, + "epoch": 0.8679338261175642, + "grad_norm": 1.7037118673324585, + "learning_rate": 6.30708761960334e-07, + "loss": 0.5964, "step": 12329 }, { - "epoch": 0.9329953463735765, - "grad_norm": 2.699592351913452, - "learning_rate": 2.1981570429522134e-07, - "loss": 0.6174, + "epoch": 0.8680042238648363, + "grad_norm": 1.9155439138412476, + "learning_rate": 6.300468142313862e-07, + "loss": 0.6721, "step": 12330 }, { - "epoch": 0.9330710150959101, - "grad_norm": 2.9009218215942383, - "learning_rate": 2.1932125265176628e-07, - "loss": 0.7047, + "epoch": 0.8680746216121085, + "grad_norm": 2.0127551555633545, + "learning_rate": 6.293851976820736e-07, + "loss": 0.6299, "step": 12331 }, { - "epoch": 0.9331466838182437, - "grad_norm": 2.090672254562378, - "learning_rate": 2.1882735088281414e-07, - "loss": 0.7085, + "epoch": 0.8681450193593805, + "grad_norm": 1.697637915611267, + "learning_rate": 6.28723912346795e-07, + "loss": 0.5587, "step": 12332 }, { - "epoch": 0.9332223525405774, - "grad_norm": 2.3523049354553223, - "learning_rate": 2.1833399901929618e-07, - "loss": 0.5658, + "epoch": 0.8682154171066526, + "grad_norm": 1.9747779369354248, + "learning_rate": 6.280629582599309e-07, + "loss": 0.7881, "step": 12333 }, { - "epoch": 0.933298021262911, - "grad_norm": 1.9913511276245117, - "learning_rate": 2.178411970921057e-07, - "loss": 0.6611, + "epoch": 0.8682858148539246, + "grad_norm": 1.8473942279815674, + "learning_rate": 6.274023354558423e-07, + "loss": 0.6125, "step": 12334 }, { - "epoch": 0.9333736899852446, - "grad_norm": 2.0843513011932373, - "learning_rate": 2.1734894513210303e-07, - "loss": 0.64, + "epoch": 0.8683562126011968, + "grad_norm": 1.9317309856414795, + "learning_rate": 6.26742043968879e-07, + "loss": 0.7428, "step": 12335 }, { - "epoch": 0.9334493587075782, - "grad_norm": 2.296250343322754, - "learning_rate": 2.1685724317011746e-07, - "loss": 0.6017, + "epoch": 0.8684266103484688, + "grad_norm": 1.9018001556396484, + "learning_rate": 6.260820838333678e-07, + "loss": 0.6234, "step": 12336 }, { - "epoch": 0.9335250274299118, - "grad_norm": 2.035994291305542, - "learning_rate": 2.163660912369404e-07, - "loss": 0.6316, + "epoch": 0.8684970080957409, + "grad_norm": 1.9823906421661377, + "learning_rate": 6.254224550836226e-07, + "loss": 0.634, "step": 12337 }, { - "epoch": 0.9336006961522455, - "grad_norm": 2.133603096008301, - "learning_rate": 2.1587548936332723e-07, - "loss": 0.7112, + "epoch": 0.8685674058430131, + "grad_norm": 2.0588581562042236, + "learning_rate": 6.247631577539368e-07, + "loss": 0.6893, "step": 12338 }, { - "epoch": 0.9336763648745791, - "grad_norm": 9.666108131408691, - "learning_rate": 2.1538543758000239e-07, - "loss": 0.6514, + "epoch": 0.8686378035902851, + "grad_norm": 2.0016465187072754, + "learning_rate": 6.241041918785887e-07, + "loss": 0.6497, "step": 12339 }, { - "epoch": 0.9337520335969127, - "grad_norm": 2.0413687229156494, - "learning_rate": 2.1489593591765434e-07, - "loss": 0.7346, + "epoch": 0.8687082013375572, + "grad_norm": 1.9175257682800293, + "learning_rate": 6.234455574918366e-07, + "loss": 0.6145, "step": 12340 }, { - "epoch": 0.9338277023192464, - "grad_norm": 7.327420711517334, - "learning_rate": 2.144069844069365e-07, - "loss": 0.6501, + "epoch": 0.8687785990848292, + "grad_norm": 2.6499195098876953, + "learning_rate": 6.227872546279268e-07, + "loss": 0.7077, "step": 12341 }, { - "epoch": 0.9339033710415799, - "grad_norm": 2.2991442680358887, - "learning_rate": 2.1391858307847045e-07, - "loss": 0.7028, + "epoch": 0.8688489968321014, + "grad_norm": 2.0057642459869385, + "learning_rate": 6.221292833210814e-07, + "loss": 0.5858, "step": 12342 }, { - "epoch": 0.9339790397639136, - "grad_norm": 10.1016845703125, - "learning_rate": 2.134307319628397e-07, - "loss": 0.6521, + "epoch": 0.8689193945793735, + "grad_norm": 1.6899651288986206, + "learning_rate": 6.214716436055115e-07, + "loss": 0.5702, "step": 12343 }, { - "epoch": 0.9340547084862472, - "grad_norm": 1.8840327262878418, - "learning_rate": 2.1294343109059677e-07, - "loss": 0.6156, + "epoch": 0.8689897923266455, + "grad_norm": 1.6656993627548218, + "learning_rate": 6.208143355154082e-07, + "loss": 0.6558, "step": 12344 }, { - "epoch": 0.9341303772085808, - "grad_norm": 1.7931628227233887, - "learning_rate": 2.124566804922563e-07, - "loss": 0.632, + "epoch": 0.8690601900739177, + "grad_norm": 1.9014239311218262, + "learning_rate": 6.201573590849438e-07, + "loss": 0.6759, "step": 12345 }, { - "epoch": 0.9342060459309145, - "grad_norm": 2.3598361015319824, - "learning_rate": 2.119704801982999e-07, - "loss": 0.6864, + "epoch": 0.8691305878211897, + "grad_norm": 1.91402268409729, + "learning_rate": 6.195007143482764e-07, + "loss": 0.5364, "step": 12346 }, { - "epoch": 0.934281714653248, - "grad_norm": 1.6346832513809204, - "learning_rate": 2.114848302391772e-07, - "loss": 0.5736, + "epoch": 0.8692009855684618, + "grad_norm": 1.941772222518921, + "learning_rate": 6.188444013395454e-07, + "loss": 0.7203, "step": 12347 }, { - "epoch": 0.9343573833755817, - "grad_norm": 2.1589810848236084, - "learning_rate": 2.1099973064529987e-07, - "loss": 0.657, + "epoch": 0.869271383315734, + "grad_norm": 1.9245855808258057, + "learning_rate": 6.181884200928737e-07, + "loss": 0.6009, "step": 12348 }, { - "epoch": 0.9344330520979153, - "grad_norm": 3.102557897567749, - "learning_rate": 2.1051518144704562e-07, - "loss": 0.6275, + "epoch": 0.869341781063006, + "grad_norm": 2.39880108833313, + "learning_rate": 6.175327706423662e-07, + "loss": 0.7607, "step": 12349 }, { - "epoch": 0.9345087208202489, - "grad_norm": 2.09216046333313, - "learning_rate": 2.100311826747602e-07, - "loss": 0.7042, + "epoch": 0.8694121788102781, + "grad_norm": 2.0807368755340576, + "learning_rate": 6.168774530221098e-07, + "loss": 0.7003, "step": 12350 }, { - "epoch": 0.9345843895425826, - "grad_norm": 1.9065994024276733, - "learning_rate": 2.095477343587513e-07, - "loss": 0.6763, + "epoch": 0.8694825765575501, + "grad_norm": 1.5776948928833008, + "learning_rate": 6.162224672661743e-07, + "loss": 0.6656, "step": 12351 }, { - "epoch": 0.9346600582649162, - "grad_norm": 2.5319108963012695, - "learning_rate": 2.0906483652929576e-07, - "loss": 0.6599, + "epoch": 0.8695529743048223, + "grad_norm": 1.6801567077636719, + "learning_rate": 6.155678134086152e-07, + "loss": 0.5676, "step": 12352 }, { - "epoch": 0.9347357269872498, - "grad_norm": 2.6786136627197266, - "learning_rate": 2.0858248921663337e-07, - "loss": 0.7684, + "epoch": 0.8696233720520943, + "grad_norm": 1.8275877237319946, + "learning_rate": 6.149134914834668e-07, + "loss": 0.5115, "step": 12353 }, { - "epoch": 0.9348113957095835, - "grad_norm": 2.0246286392211914, - "learning_rate": 2.0810069245097097e-07, - "loss": 0.7829, + "epoch": 0.8696937697993664, + "grad_norm": 2.2930023670196533, + "learning_rate": 6.142595015247504e-07, + "loss": 0.5123, "step": 12354 }, { - "epoch": 0.934887064431917, - "grad_norm": 2.1122617721557617, - "learning_rate": 2.0761944626247942e-07, - "loss": 0.7041, + "epoch": 0.8697641675466385, + "grad_norm": 2.5919103622436523, + "learning_rate": 6.136058435664644e-07, + "loss": 0.6281, "step": 12355 }, { - "epoch": 0.9349627331542507, - "grad_norm": 2.1690616607666016, - "learning_rate": 2.0713875068129563e-07, - "loss": 0.6317, + "epoch": 0.8698345652939106, + "grad_norm": 2.013655662536621, + "learning_rate": 6.129525176425954e-07, + "loss": 0.6439, "step": 12356 }, { - "epoch": 0.9350384018765843, - "grad_norm": 2.203425407409668, - "learning_rate": 2.066586057375225e-07, - "loss": 0.7214, + "epoch": 0.8699049630411827, + "grad_norm": 1.983436107635498, + "learning_rate": 6.122995237871081e-07, + "loss": 0.5693, "step": 12357 }, { - "epoch": 0.9351140705989179, - "grad_norm": 2.0501766204833984, - "learning_rate": 2.0617901146122998e-07, - "loss": 0.595, + "epoch": 0.8699753607884547, + "grad_norm": 1.9990464448928833, + "learning_rate": 6.116468620339552e-07, + "loss": 0.6091, "step": 12358 }, { - "epoch": 0.9351897393212516, - "grad_norm": 3.1031334400177, - "learning_rate": 2.0569996788245005e-07, - "loss": 0.6128, + "epoch": 0.8700457585357269, + "grad_norm": 1.92135751247406, + "learning_rate": 6.109945324170654e-07, + "loss": 0.783, "step": 12359 }, { - "epoch": 0.9352654080435852, - "grad_norm": 2.129321336746216, - "learning_rate": 2.052214750311817e-07, - "loss": 0.6836, + "epoch": 0.870116156282999, + "grad_norm": 2.202084541320801, + "learning_rate": 6.103425349703578e-07, + "loss": 0.634, "step": 12360 }, { - "epoch": 0.9353410767659188, - "grad_norm": 2.268303871154785, - "learning_rate": 2.04743532937391e-07, - "loss": 0.6845, + "epoch": 0.870186554030271, + "grad_norm": 1.8450795412063599, + "learning_rate": 6.09690869727729e-07, + "loss": 0.6587, "step": 12361 }, { - "epoch": 0.9354167454882524, - "grad_norm": 2.151488780975342, - "learning_rate": 2.0426614163100698e-07, - "loss": 0.6448, + "epoch": 0.8702569517775431, + "grad_norm": 2.2180373668670654, + "learning_rate": 6.090395367230589e-07, + "loss": 0.5746, "step": 12362 }, { - "epoch": 0.935492414210586, - "grad_norm": 2.471512794494629, - "learning_rate": 2.0378930114192572e-07, - "loss": 0.6199, + "epoch": 0.8703273495248152, + "grad_norm": 2.175124406814575, + "learning_rate": 6.0838853599021e-07, + "loss": 0.666, "step": 12363 }, { - "epoch": 0.9355680829329197, - "grad_norm": 2.2034783363342285, - "learning_rate": 2.0331301150000935e-07, - "loss": 0.6998, + "epoch": 0.8703977472720873, + "grad_norm": 1.9541453123092651, + "learning_rate": 6.077378675630311e-07, + "loss": 0.5614, "step": 12364 }, { - "epoch": 0.9356437516552533, - "grad_norm": 2.128675937652588, - "learning_rate": 2.02837272735085e-07, - "loss": 0.7106, + "epoch": 0.8704681450193594, + "grad_norm": 1.8550477027893066, + "learning_rate": 6.070875314753481e-07, + "loss": 0.7179, "step": 12365 }, { - "epoch": 0.9357194203775869, - "grad_norm": 2.1250526905059814, - "learning_rate": 2.0236208487694285e-07, - "loss": 0.7074, + "epoch": 0.8705385427666315, + "grad_norm": 1.7106709480285645, + "learning_rate": 6.064375277609757e-07, + "loss": 0.5462, "step": 12366 }, { - "epoch": 0.9357950890999206, - "grad_norm": 3.409339189529419, - "learning_rate": 2.018874479553421e-07, - "loss": 0.6652, + "epoch": 0.8706089405139036, + "grad_norm": 2.519601821899414, + "learning_rate": 6.057878564537063e-07, + "loss": 0.6386, "step": 12367 }, { - "epoch": 0.9358707578222542, - "grad_norm": 2.3269472122192383, - "learning_rate": 2.0141336200000592e-07, - "loss": 0.5631, + "epoch": 0.8706793382611756, + "grad_norm": 1.831817865371704, + "learning_rate": 6.051385175873173e-07, + "loss": 0.7393, "step": 12368 }, { - "epoch": 0.9359464265445878, - "grad_norm": 2.5745363235473633, - "learning_rate": 2.0093982704062463e-07, - "loss": 0.6937, + "epoch": 0.8707497360084477, + "grad_norm": 1.8567488193511963, + "learning_rate": 6.044895111955677e-07, + "loss": 0.6745, "step": 12369 }, { - "epoch": 0.9360220952669214, - "grad_norm": 2.068528413772583, - "learning_rate": 2.0046684310684948e-07, - "loss": 0.7959, + "epoch": 0.8708201337557199, + "grad_norm": 1.7132319211959839, + "learning_rate": 6.038408373122008e-07, + "loss": 0.6774, "step": 12370 }, { - "epoch": 0.936097763989255, - "grad_norm": 2.4575674533843994, - "learning_rate": 1.9999441022830078e-07, - "loss": 0.6793, + "epoch": 0.8708905315029919, + "grad_norm": 2.3973114490509033, + "learning_rate": 6.031924959709406e-07, + "loss": 0.5934, "step": 12371 }, { - "epoch": 0.9361734327115887, - "grad_norm": 1.9595268964767456, - "learning_rate": 1.9952252843456685e-07, - "loss": 0.8498, + "epoch": 0.870960929250264, + "grad_norm": 1.7032952308654785, + "learning_rate": 6.025444872054969e-07, + "loss": 0.6769, "step": 12372 }, { - "epoch": 0.9362491014339223, - "grad_norm": 3.2010905742645264, - "learning_rate": 1.990511977551951e-07, - "loss": 0.6914, + "epoch": 0.871031326997536, + "grad_norm": 2.3492016792297363, + "learning_rate": 6.018968110495587e-07, + "loss": 0.677, "step": 12373 }, { - "epoch": 0.9363247701562559, - "grad_norm": 2.2969300746917725, - "learning_rate": 1.9858041821970386e-07, - "loss": 0.6583, + "epoch": 0.8711017247448082, + "grad_norm": 1.8388519287109375, + "learning_rate": 6.012494675368009e-07, + "loss": 0.6226, "step": 12374 }, { - "epoch": 0.9364004388785895, - "grad_norm": 2.4109859466552734, - "learning_rate": 1.9811018985757357e-07, - "loss": 0.7333, + "epoch": 0.8711721224920802, + "grad_norm": 1.8002142906188965, + "learning_rate": 6.006024567008768e-07, + "loss": 0.5597, "step": 12375 }, { - "epoch": 0.9364761076009231, - "grad_norm": 1.9935057163238525, - "learning_rate": 1.9764051269825168e-07, - "loss": 0.6399, + "epoch": 0.8712425202393523, + "grad_norm": 2.0324923992156982, + "learning_rate": 5.999557785754279e-07, + "loss": 0.7168, "step": 12376 }, { - "epoch": 0.9365517763232568, - "grad_norm": 2.352203130722046, - "learning_rate": 1.9717138677115164e-07, - "loss": 0.6009, + "epoch": 0.8713129179866245, + "grad_norm": 2.35260272026062, + "learning_rate": 5.993094331940732e-07, + "loss": 0.7353, "step": 12377 }, { - "epoch": 0.9366274450455904, - "grad_norm": 2.5235226154327393, - "learning_rate": 1.96702812105651e-07, - "loss": 0.6322, + "epoch": 0.8713833157338965, + "grad_norm": 1.69230318069458, + "learning_rate": 5.986634205904183e-07, + "loss": 0.6423, "step": 12378 }, { - "epoch": 0.936703113767924, - "grad_norm": 2.3327648639678955, - "learning_rate": 1.9623478873109424e-07, - "loss": 0.6441, + "epoch": 0.8714537134811686, + "grad_norm": 1.7984284162521362, + "learning_rate": 5.98017740798049e-07, + "loss": 0.6651, "step": 12379 }, { - "epoch": 0.9367787824902577, - "grad_norm": 2.742035388946533, - "learning_rate": 1.9576731667678993e-07, - "loss": 0.6182, + "epoch": 0.8715241112284406, + "grad_norm": 2.5513641834259033, + "learning_rate": 5.973723938505377e-07, + "loss": 0.6131, "step": 12380 }, { - "epoch": 0.9368544512125913, - "grad_norm": 3.735501766204834, - "learning_rate": 1.9530039597201066e-07, - "loss": 0.6479, + "epoch": 0.8715945089757128, + "grad_norm": 2.3677849769592285, + "learning_rate": 5.967273797814308e-07, + "loss": 0.7608, "step": 12381 }, { - "epoch": 0.9369301199349249, - "grad_norm": 7.853936195373535, - "learning_rate": 1.9483402664600002e-07, - "loss": 0.7149, + "epoch": 0.8716649067229849, + "grad_norm": 2.2552201747894287, + "learning_rate": 5.960826986242692e-07, + "loss": 0.73, "step": 12382 }, { - "epoch": 0.9370057886572585, - "grad_norm": 2.559588670730591, - "learning_rate": 1.9436820872796169e-07, - "loss": 0.5942, + "epoch": 0.8717353044702569, + "grad_norm": 2.024350643157959, + "learning_rate": 5.95438350412566e-07, + "loss": 0.6776, "step": 12383 }, { - "epoch": 0.9370814573795921, - "grad_norm": 2.3932056427001953, - "learning_rate": 1.939029422470673e-07, - "loss": 0.6378, + "epoch": 0.8718057022175291, + "grad_norm": 2.015204668045044, + "learning_rate": 5.947943351798244e-07, + "loss": 0.5926, "step": 12384 }, { - "epoch": 0.9371571261019258, - "grad_norm": 2.621971368789673, - "learning_rate": 1.9343822723245251e-07, - "loss": 0.7658, + "epoch": 0.8718760999648011, + "grad_norm": 2.203486204147339, + "learning_rate": 5.941506529595251e-07, + "loss": 0.6735, "step": 12385 }, { - "epoch": 0.9372327948242594, - "grad_norm": 2.2142841815948486, - "learning_rate": 1.9297406371322012e-07, - "loss": 0.67, + "epoch": 0.8719464977120732, + "grad_norm": 1.7439419031143188, + "learning_rate": 5.93507303785137e-07, + "loss": 0.6527, "step": 12386 }, { - "epoch": 0.937308463546593, - "grad_norm": 2.741013526916504, - "learning_rate": 1.9251045171843684e-07, - "loss": 0.8169, + "epoch": 0.8720168954593454, + "grad_norm": 1.9150508642196655, + "learning_rate": 5.928642876901044e-07, + "loss": 0.6283, "step": 12387 }, { - "epoch": 0.9373841322689266, - "grad_norm": 2.1864612102508545, - "learning_rate": 1.9204739127713644e-07, - "loss": 0.6494, + "epoch": 0.8720872932066174, + "grad_norm": 1.9373284578323364, + "learning_rate": 5.922216047078606e-07, + "loss": 0.6548, "step": 12388 }, { - "epoch": 0.9374598009912603, - "grad_norm": 2.0697124004364014, - "learning_rate": 1.9158488241831672e-07, - "loss": 0.8498, + "epoch": 0.8721576909538895, + "grad_norm": 2.159534454345703, + "learning_rate": 5.915792548718184e-07, + "loss": 0.6496, "step": 12389 }, { - "epoch": 0.9375354697135939, - "grad_norm": 2.364565372467041, - "learning_rate": 1.9112292517094255e-07, - "loss": 0.8094, + "epoch": 0.8722280887011615, + "grad_norm": 1.8774043321609497, + "learning_rate": 5.909372382153755e-07, + "loss": 0.7095, "step": 12390 }, { - "epoch": 0.9376111384359275, - "grad_norm": 2.1771390438079834, - "learning_rate": 1.9066151956394074e-07, - "loss": 0.7019, + "epoch": 0.8722984864484337, + "grad_norm": 1.7596157789230347, + "learning_rate": 5.902955547719083e-07, + "loss": 0.567, "step": 12391 }, { - "epoch": 0.9376868071582611, - "grad_norm": 2.1345317363739014, - "learning_rate": 1.902006656262062e-07, - "loss": 0.5427, + "epoch": 0.8723688841957057, + "grad_norm": 1.8083730936050415, + "learning_rate": 5.896542045747825e-07, + "loss": 0.6833, "step": 12392 }, { - "epoch": 0.9377624758805948, - "grad_norm": 2.93278431892395, - "learning_rate": 1.8974036338660283e-07, - "loss": 0.6624, + "epoch": 0.8724392819429778, + "grad_norm": 1.9431865215301514, + "learning_rate": 5.890131876573382e-07, + "loss": 0.6908, "step": 12393 }, { - "epoch": 0.9378381446029284, - "grad_norm": 2.2009384632110596, - "learning_rate": 1.892806128739526e-07, - "loss": 0.7585, + "epoch": 0.87250967969025, + "grad_norm": 2.0456631183624268, + "learning_rate": 5.883725040529059e-07, + "loss": 0.6424, "step": 12394 }, { - "epoch": 0.937913813325262, - "grad_norm": 2.6381959915161133, - "learning_rate": 1.8882141411704845e-07, - "loss": 0.8012, + "epoch": 0.872580077437522, + "grad_norm": 1.7156262397766113, + "learning_rate": 5.877321537947924e-07, + "loss": 0.6213, "step": 12395 }, { - "epoch": 0.9379894820475956, - "grad_norm": 2.5263381004333496, - "learning_rate": 1.883627671446454e-07, - "loss": 0.6025, + "epoch": 0.8726504751847941, + "grad_norm": 1.801667332649231, + "learning_rate": 5.870921369162908e-07, + "loss": 0.5569, "step": 12396 }, { - "epoch": 0.9380651507699292, - "grad_norm": 2.298753261566162, - "learning_rate": 1.8790467198546647e-07, - "loss": 0.687, + "epoch": 0.8727208729320661, + "grad_norm": 2.290043830871582, + "learning_rate": 5.86452453450679e-07, + "loss": 0.6165, "step": 12397 }, { - "epoch": 0.9381408194922629, - "grad_norm": 3.0033249855041504, - "learning_rate": 1.8744712866819768e-07, - "loss": 0.8006, + "epoch": 0.8727912706793383, + "grad_norm": 1.9641053676605225, + "learning_rate": 5.858131034312125e-07, + "loss": 0.5659, "step": 12398 }, { - "epoch": 0.9382164882145965, - "grad_norm": 1.8784987926483154, - "learning_rate": 1.8699013722149417e-07, - "loss": 0.6037, + "epoch": 0.8728616684266104, + "grad_norm": 1.9753097295761108, + "learning_rate": 5.851740868911315e-07, + "loss": 0.759, "step": 12399 }, { - "epoch": 0.9382921569369301, - "grad_norm": 2.2286734580993652, - "learning_rate": 1.8653369767397298e-07, - "loss": 0.7111, + "epoch": 0.8729320661738824, + "grad_norm": 1.4204447269439697, + "learning_rate": 5.845354038636582e-07, + "loss": 0.7223, "step": 12400 }, { - "epoch": 0.9383678256592637, - "grad_norm": 2.3188793659210205, - "learning_rate": 1.8607781005421832e-07, - "loss": 0.6128, + "epoch": 0.8730024639211545, + "grad_norm": 2.232449531555176, + "learning_rate": 5.838970543820003e-07, + "loss": 0.6494, "step": 12401 }, { - "epoch": 0.9384434943815974, - "grad_norm": 2.3316099643707275, - "learning_rate": 1.856224743907773e-07, - "loss": 0.6353, + "epoch": 0.8730728616684266, + "grad_norm": 1.7102608680725098, + "learning_rate": 5.832590384793446e-07, + "loss": 0.6349, "step": 12402 }, { - "epoch": 0.938519163103931, - "grad_norm": 2.368476629257202, - "learning_rate": 1.851676907121671e-07, - "loss": 0.6556, + "epoch": 0.8731432594156987, + "grad_norm": 1.7840009927749634, + "learning_rate": 5.826213561888641e-07, + "loss": 0.6526, "step": 12403 }, { - "epoch": 0.9385948318262646, - "grad_norm": 2.1538264751434326, - "learning_rate": 1.8471345904686699e-07, - "loss": 0.6717, + "epoch": 0.8732136571629708, + "grad_norm": 1.9471572637557983, + "learning_rate": 5.819840075437108e-07, + "loss": 0.6155, "step": 12404 }, { - "epoch": 0.9386705005485982, - "grad_norm": 2.1779584884643555, - "learning_rate": 1.8425977942332118e-07, - "loss": 0.7298, + "epoch": 0.8732840549102429, + "grad_norm": 1.906006097793579, + "learning_rate": 5.813469925770219e-07, + "loss": 0.6339, "step": 12405 }, { - "epoch": 0.9387461692709319, - "grad_norm": 11.318231582641602, - "learning_rate": 1.8380665186994294e-07, - "loss": 0.708, + "epoch": 0.873354452657515, + "grad_norm": 2.3006038665771484, + "learning_rate": 5.807103113219141e-07, + "loss": 0.6731, "step": 12406 }, { - "epoch": 0.9388218379932655, - "grad_norm": 2.0828261375427246, - "learning_rate": 1.833540764151056e-07, - "loss": 0.6225, + "epoch": 0.873424850404787, + "grad_norm": 1.7832194566726685, + "learning_rate": 5.800739638114921e-07, + "loss": 0.7547, "step": 12407 }, { - "epoch": 0.9388975067155991, - "grad_norm": 10.891009330749512, - "learning_rate": 1.8290205308715346e-07, - "loss": 0.634, + "epoch": 0.8734952481520591, + "grad_norm": 1.7783010005950928, + "learning_rate": 5.794379500788387e-07, + "loss": 0.6103, "step": 12408 }, { - "epoch": 0.9389731754379327, - "grad_norm": 3.8643298149108887, - "learning_rate": 1.824505819143929e-07, - "loss": 0.7067, + "epoch": 0.8735656458993312, + "grad_norm": 2.200235605239868, + "learning_rate": 5.78802270157021e-07, + "loss": 0.658, "step": 12409 }, { - "epoch": 0.9390488441602663, - "grad_norm": 3.798006534576416, - "learning_rate": 1.819996629250953e-07, - "loss": 0.876, + "epoch": 0.8736360436466033, + "grad_norm": 1.9313105344772339, + "learning_rate": 5.7816692407909e-07, + "loss": 0.6821, "step": 12410 }, { - "epoch": 0.9391245128826, - "grad_norm": 1.9390292167663574, - "learning_rate": 1.8154929614750004e-07, - "loss": 0.6763, + "epoch": 0.8737064413938754, + "grad_norm": 1.9369288682937622, + "learning_rate": 5.775319118780755e-07, + "loss": 0.6289, "step": 12411 }, { - "epoch": 0.9392001816049336, - "grad_norm": 2.075162172317505, - "learning_rate": 1.810994816098106e-07, - "loss": 0.6476, + "epoch": 0.8737768391411475, + "grad_norm": 1.803001046180725, + "learning_rate": 5.768972335869927e-07, + "loss": 0.6339, "step": 12412 }, { - "epoch": 0.9392758503272672, - "grad_norm": 2.536733865737915, - "learning_rate": 1.8065021934019542e-07, - "loss": 0.6086, + "epoch": 0.8738472368884196, + "grad_norm": 2.151129961013794, + "learning_rate": 5.762628892388405e-07, + "loss": 0.6873, "step": 12413 }, { - "epoch": 0.9393515190496009, - "grad_norm": 2.2751870155334473, - "learning_rate": 1.8020150936678804e-07, - "loss": 0.7458, + "epoch": 0.8739176346356916, + "grad_norm": 1.7585601806640625, + "learning_rate": 5.756288788665982e-07, + "loss": 0.6773, "step": 12414 }, { - "epoch": 0.9394271877719345, - "grad_norm": 2.1633825302124023, - "learning_rate": 1.7975335171768992e-07, - "loss": 0.727, + "epoch": 0.8739880323829637, + "grad_norm": 2.863351583480835, + "learning_rate": 5.749952025032289e-07, + "loss": 0.6264, "step": 12415 }, { - "epoch": 0.9395028564942681, - "grad_norm": 1.8044906854629517, - "learning_rate": 1.7930574642096464e-07, - "loss": 0.5019, + "epoch": 0.8740584301302359, + "grad_norm": 2.662703275680542, + "learning_rate": 5.743618601816786e-07, + "loss": 0.6624, "step": 12416 }, { - "epoch": 0.9395785252166017, - "grad_norm": 2.0498223304748535, - "learning_rate": 1.788586935046428e-07, - "loss": 0.6854, + "epoch": 0.8741288278775079, + "grad_norm": 1.9904201030731201, + "learning_rate": 5.737288519348746e-07, + "loss": 0.6606, "step": 12417 }, { - "epoch": 0.9396541939389353, - "grad_norm": 1.9652925729751587, - "learning_rate": 1.7841219299672096e-07, - "loss": 0.7548, + "epoch": 0.87419922562478, + "grad_norm": 1.8840415477752686, + "learning_rate": 5.730961777957255e-07, + "loss": 0.5538, "step": 12418 }, { - "epoch": 0.939729862661269, - "grad_norm": 2.0633480548858643, - "learning_rate": 1.7796624492515978e-07, - "loss": 0.6056, + "epoch": 0.874269623372052, + "grad_norm": 2.1769895553588867, + "learning_rate": 5.724638377971285e-07, + "loss": 0.7278, "step": 12419 }, { - "epoch": 0.9398055313836026, - "grad_norm": 2.8833742141723633, - "learning_rate": 1.775208493178869e-07, - "loss": 0.657, + "epoch": 0.8743400211193242, + "grad_norm": 1.707557201385498, + "learning_rate": 5.718318319719564e-07, + "loss": 0.5268, "step": 12420 }, { - "epoch": 0.9398812001059362, - "grad_norm": 3.0172793865203857, - "learning_rate": 1.7707600620279307e-07, - "loss": 0.7535, + "epoch": 0.8744104188665963, + "grad_norm": 1.8136482238769531, + "learning_rate": 5.712001603530705e-07, + "loss": 0.6198, "step": 12421 }, { - "epoch": 0.9399568688282698, - "grad_norm": 2.2233293056488037, - "learning_rate": 1.7663171560773694e-07, - "loss": 0.7763, + "epoch": 0.8744808166138683, + "grad_norm": 1.9212180376052856, + "learning_rate": 5.705688229733104e-07, + "loss": 0.5136, "step": 12422 }, { - "epoch": 0.9400325375506035, - "grad_norm": 1.8882126808166504, - "learning_rate": 1.761879775605403e-07, - "loss": 0.718, + "epoch": 0.8745512143611405, + "grad_norm": 1.9870349168777466, + "learning_rate": 5.699378198654999e-07, + "loss": 0.6271, "step": 12423 }, { - "epoch": 0.9401082062729371, - "grad_norm": 2.3364417552948, - "learning_rate": 1.7574479208899286e-07, - "loss": 0.6638, + "epoch": 0.8746216121084125, + "grad_norm": 2.0328502655029297, + "learning_rate": 5.693071510624443e-07, + "loss": 0.6022, "step": 12424 }, { - "epoch": 0.9401838749952707, - "grad_norm": 2.688260316848755, - "learning_rate": 1.7530215922084646e-07, - "loss": 0.6975, + "epoch": 0.8746920098556846, + "grad_norm": 1.9485429525375366, + "learning_rate": 5.686768165969352e-07, + "loss": 0.584, "step": 12425 }, { - "epoch": 0.9402595437176043, - "grad_norm": 1.90416419506073, - "learning_rate": 1.7486007898382393e-07, - "loss": 0.607, + "epoch": 0.8747624076029568, + "grad_norm": 2.210902452468872, + "learning_rate": 5.680468165017418e-07, + "loss": 0.6783, "step": 12426 }, { - "epoch": 0.940335212439938, - "grad_norm": 2.5654749870300293, - "learning_rate": 1.7441855140560515e-07, - "loss": 0.709, + "epoch": 0.8748328053502288, + "grad_norm": 2.555391550064087, + "learning_rate": 5.674171508096199e-07, + "loss": 0.6397, "step": 12427 }, { - "epoch": 0.9404108811622716, - "grad_norm": 2.002608060836792, - "learning_rate": 1.7397757651384194e-07, - "loss": 0.5844, + "epoch": 0.8749032030975009, + "grad_norm": 2.215306043624878, + "learning_rate": 5.667878195533072e-07, + "loss": 0.6291, "step": 12428 }, { - "epoch": 0.9404865498846052, - "grad_norm": 1.7451132535934448, - "learning_rate": 1.7353715433615125e-07, - "loss": 0.6631, + "epoch": 0.8749736008447729, + "grad_norm": 1.6926283836364746, + "learning_rate": 5.66158822765522e-07, + "loss": 0.6467, "step": 12429 }, { - "epoch": 0.9405622186069388, - "grad_norm": 2.49381685256958, - "learning_rate": 1.73097284900111e-07, - "loss": 0.6062, + "epoch": 0.8750439985920451, + "grad_norm": 2.199791193008423, + "learning_rate": 5.655301604789646e-07, + "loss": 0.5689, "step": 12430 }, { - "epoch": 0.9406378873292724, - "grad_norm": 3.437695264816284, - "learning_rate": 1.726579682332682e-07, - "loss": 0.6267, + "epoch": 0.8751143963393171, + "grad_norm": 2.328932285308838, + "learning_rate": 5.649018327263229e-07, + "loss": 0.7356, "step": 12431 }, { - "epoch": 0.9407135560516061, - "grad_norm": 4.435547828674316, - "learning_rate": 1.7221920436313577e-07, - "loss": 0.65, + "epoch": 0.8751847940865892, + "grad_norm": 1.7139474153518677, + "learning_rate": 5.642738395402617e-07, + "loss": 0.5869, "step": 12432 }, { - "epoch": 0.9407892247739397, - "grad_norm": 2.160989761352539, - "learning_rate": 1.7178099331718776e-07, - "loss": 0.6809, + "epoch": 0.8752551918338614, + "grad_norm": 1.9066331386566162, + "learning_rate": 5.636461809534331e-07, + "loss": 0.4834, "step": 12433 }, { - "epoch": 0.9408648934962733, - "grad_norm": 1.8670154809951782, - "learning_rate": 1.7134333512286925e-07, - "loss": 0.6438, + "epoch": 0.8753255895811334, + "grad_norm": 1.7696985006332397, + "learning_rate": 5.630188569984692e-07, + "loss": 0.6874, "step": 12434 }, { - "epoch": 0.9409405622186069, - "grad_norm": 2.766958713531494, - "learning_rate": 1.709062298075853e-07, - "loss": 0.6525, + "epoch": 0.8753959873284055, + "grad_norm": 1.8335795402526855, + "learning_rate": 5.623918677079841e-07, + "loss": 0.589, "step": 12435 }, { - "epoch": 0.9410162309409406, - "grad_norm": 2.01920747756958, - "learning_rate": 1.70469677398711e-07, - "loss": 0.7654, + "epoch": 0.8754663850756775, + "grad_norm": 2.240257501602173, + "learning_rate": 5.617652131145756e-07, + "loss": 0.6691, "step": 12436 }, { - "epoch": 0.9410918996632742, - "grad_norm": 2.5431160926818848, - "learning_rate": 1.700336779235835e-07, - "loss": 0.6332, + "epoch": 0.8755367828229497, + "grad_norm": 2.310007333755493, + "learning_rate": 5.611388932508253e-07, + "loss": 0.6309, "step": 12437 }, { - "epoch": 0.9411675683856078, - "grad_norm": 2.6801328659057617, - "learning_rate": 1.695982314095059e-07, - "loss": 0.6775, + "epoch": 0.8756071805702218, + "grad_norm": 1.7370781898498535, + "learning_rate": 5.605129081492935e-07, + "loss": 0.5741, "step": 12438 }, { - "epoch": 0.9412432371079414, - "grad_norm": 2.0385563373565674, - "learning_rate": 1.6916333788374849e-07, - "loss": 0.8871, + "epoch": 0.8756775783174938, + "grad_norm": 1.9046759605407715, + "learning_rate": 5.598872578425291e-07, + "loss": 0.5974, "step": 12439 }, { - "epoch": 0.9413189058302751, - "grad_norm": 1.3202354907989502, - "learning_rate": 1.687289973735454e-07, - "loss": 0.7429, + "epoch": 0.875747976064766, + "grad_norm": 1.7231807708740234, + "learning_rate": 5.592619423630594e-07, + "loss": 0.5822, "step": 12440 }, { - "epoch": 0.9413945745526087, - "grad_norm": 2.1803438663482666, - "learning_rate": 1.6829520990609592e-07, - "loss": 0.6388, + "epoch": 0.875818373812038, + "grad_norm": 2.024609327316284, + "learning_rate": 5.586369617433933e-07, + "loss": 0.5909, "step": 12441 }, { - "epoch": 0.9414702432749423, - "grad_norm": 2.1765198707580566, - "learning_rate": 1.678619755085663e-07, - "loss": 0.6521, + "epoch": 0.8758887715593101, + "grad_norm": 2.0159311294555664, + "learning_rate": 5.580123160160249e-07, + "loss": 0.6831, "step": 12442 }, { - "epoch": 0.9415459119972759, - "grad_norm": 2.3797476291656494, - "learning_rate": 1.6742929420808584e-07, - "loss": 0.7488, + "epoch": 0.8759591693065822, + "grad_norm": 1.6980421543121338, + "learning_rate": 5.573880052134311e-07, + "loss": 0.5514, "step": 12443 }, { - "epoch": 0.9416215807196096, - "grad_norm": 2.2045605182647705, - "learning_rate": 1.6699716603175086e-07, - "loss": 0.7189, + "epoch": 0.8760295670538543, + "grad_norm": 2.2040481567382812, + "learning_rate": 5.567640293680686e-07, + "loss": 0.7179, "step": 12444 }, { - "epoch": 0.9416972494419432, - "grad_norm": 2.272555351257324, - "learning_rate": 1.6656559100662272e-07, - "loss": 0.6715, + "epoch": 0.8760999648011264, + "grad_norm": 2.4337949752807617, + "learning_rate": 5.561403885123803e-07, + "loss": 0.7542, "step": 12445 }, { - "epoch": 0.9417729181642768, - "grad_norm": 2.058490514755249, - "learning_rate": 1.661345691597288e-07, - "loss": 0.6625, + "epoch": 0.8761703625483984, + "grad_norm": 1.8271018266677856, + "learning_rate": 5.555170826787902e-07, + "loss": 0.6199, "step": 12446 }, { - "epoch": 0.9418485868866104, - "grad_norm": 4.0172038078308105, - "learning_rate": 1.657041005180605e-07, - "loss": 0.7327, + "epoch": 0.8762407602956706, + "grad_norm": 2.082618474960327, + "learning_rate": 5.548941118997008e-07, + "loss": 0.622, "step": 12447 }, { - "epoch": 0.941924255608944, - "grad_norm": 2.1872177124023438, - "learning_rate": 1.6527418510857328e-07, - "loss": 0.7891, + "epoch": 0.8763111580429426, + "grad_norm": 1.8145813941955566, + "learning_rate": 5.542714762075057e-07, + "loss": 0.6685, "step": 12448 }, { - "epoch": 0.9419999243312777, - "grad_norm": 1.810878038406372, - "learning_rate": 1.6484482295819258e-07, - "loss": 0.5745, + "epoch": 0.8763815557902147, + "grad_norm": 2.1360180377960205, + "learning_rate": 5.536491756345726e-07, + "loss": 0.5714, "step": 12449 }, { - "epoch": 0.9420755930536113, - "grad_norm": 2.1320366859436035, - "learning_rate": 1.6441601409380591e-07, - "loss": 0.7669, + "epoch": 0.8764519535374868, + "grad_norm": 1.9111604690551758, + "learning_rate": 5.530272102132584e-07, + "loss": 0.6015, "step": 12450 }, { - "epoch": 0.9421512617759449, - "grad_norm": 2.2419726848602295, - "learning_rate": 1.6398775854226578e-07, - "loss": 0.6414, + "epoch": 0.8765223512847589, + "grad_norm": 2.142665147781372, + "learning_rate": 5.52405579975899e-07, + "loss": 0.5533, "step": 12451 }, { - "epoch": 0.9422269304982785, - "grad_norm": 1.9634507894515991, - "learning_rate": 1.6356005633039074e-07, - "loss": 0.7198, + "epoch": 0.876592749032031, + "grad_norm": 2.005434513092041, + "learning_rate": 5.517842849548117e-07, + "loss": 0.5728, "step": 12452 }, { - "epoch": 0.9423025992206122, - "grad_norm": 2.088675022125244, - "learning_rate": 1.6313290748496534e-07, - "loss": 0.5175, + "epoch": 0.876663146779303, + "grad_norm": 1.7838457822799683, + "learning_rate": 5.511633251822982e-07, + "loss": 0.6009, "step": 12453 }, { - "epoch": 0.9423782679429458, - "grad_norm": 2.3254096508026123, - "learning_rate": 1.6270631203274023e-07, - "loss": 0.5991, + "epoch": 0.8767335445265751, + "grad_norm": 1.8212769031524658, + "learning_rate": 5.505427006906453e-07, + "loss": 0.6253, "step": 12454 }, { - "epoch": 0.9424539366652794, - "grad_norm": 1.976793646812439, - "learning_rate": 1.62280270000429e-07, - "loss": 0.6158, + "epoch": 0.8768039422738473, + "grad_norm": 2.0761964321136475, + "learning_rate": 5.49922411512117e-07, + "loss": 0.6613, "step": 12455 }, { - "epoch": 0.942529605387613, - "grad_norm": 1.8767503499984741, - "learning_rate": 1.6185478141471132e-07, - "loss": 0.6511, + "epoch": 0.8768743400211193, + "grad_norm": 2.0871469974517822, + "learning_rate": 5.493024576789657e-07, + "loss": 0.6646, "step": 12456 }, { - "epoch": 0.9426052741099467, - "grad_norm": 2.4342007637023926, - "learning_rate": 1.614298463022339e-07, - "loss": 0.7071, + "epoch": 0.8769447377683914, + "grad_norm": 1.8835831880569458, + "learning_rate": 5.486828392234217e-07, + "loss": 0.656, "step": 12457 }, { - "epoch": 0.9426809428322803, - "grad_norm": 1.5997973680496216, - "learning_rate": 1.6100546468960642e-07, - "loss": 0.7338, + "epoch": 0.8770151355156635, + "grad_norm": 2.1265852451324463, + "learning_rate": 5.480635561776991e-07, + "loss": 0.6879, "step": 12458 }, { - "epoch": 0.9427566115546139, - "grad_norm": 1.8619414567947388, - "learning_rate": 1.6058163660340563e-07, - "loss": 0.7648, + "epoch": 0.8770855332629356, + "grad_norm": 1.6158206462860107, + "learning_rate": 5.474446085739945e-07, + "loss": 0.7158, "step": 12459 }, { - "epoch": 0.9428322802769475, - "grad_norm": 1.8532341718673706, - "learning_rate": 1.601583620701733e-07, - "loss": 0.7516, + "epoch": 0.8771559310102077, + "grad_norm": 1.847436785697937, + "learning_rate": 5.468259964444892e-07, + "loss": 0.5653, "step": 12460 }, { - "epoch": 0.9429079489992811, - "grad_norm": 2.581843614578247, - "learning_rate": 1.5973564111641625e-07, - "loss": 0.6256, + "epoch": 0.8772263287574797, + "grad_norm": 1.9006954431533813, + "learning_rate": 5.462077198213441e-07, + "loss": 0.5939, "step": 12461 }, { - "epoch": 0.9429836177216148, - "grad_norm": 2.2200398445129395, - "learning_rate": 1.5931347376860528e-07, - "loss": 0.6617, + "epoch": 0.8772967265047519, + "grad_norm": 1.6521570682525635, + "learning_rate": 5.455897787367051e-07, + "loss": 0.5965, "step": 12462 }, { - "epoch": 0.9430592864439484, - "grad_norm": 3.4904468059539795, - "learning_rate": 1.5889186005317923e-07, - "loss": 0.6086, + "epoch": 0.8773671242520239, + "grad_norm": 2.0698721408843994, + "learning_rate": 5.449721732226995e-07, + "loss": 0.5513, "step": 12463 }, { - "epoch": 0.943134955166282, - "grad_norm": 3.9420273303985596, - "learning_rate": 1.5847079999654e-07, - "loss": 0.6135, + "epoch": 0.877437521999296, + "grad_norm": 1.7844667434692383, + "learning_rate": 5.443549033114365e-07, + "loss": 0.6949, "step": 12464 }, { - "epoch": 0.9432106238886157, - "grad_norm": 1.8716062307357788, - "learning_rate": 1.5805029362505652e-07, - "loss": 0.6254, + "epoch": 0.877507919746568, + "grad_norm": 2.3367984294891357, + "learning_rate": 5.437379690350077e-07, + "loss": 0.7059, "step": 12465 }, { - "epoch": 0.9432862926109493, - "grad_norm": 2.7966954708099365, - "learning_rate": 1.5763034096506167e-07, - "loss": 0.601, + "epoch": 0.8775783174938402, + "grad_norm": 1.9479585886001587, + "learning_rate": 5.431213704254897e-07, + "loss": 0.7052, "step": 12466 }, { - "epoch": 0.9433619613332829, - "grad_norm": 1.860151767730713, - "learning_rate": 1.5721094204285547e-07, - "loss": 0.6642, + "epoch": 0.8776487152411123, + "grad_norm": 1.9008338451385498, + "learning_rate": 5.425051075149385e-07, + "loss": 0.5676, "step": 12467 }, { - "epoch": 0.9434376300556165, - "grad_norm": 2.0277626514434814, - "learning_rate": 1.5679209688470087e-07, - "loss": 0.754, + "epoch": 0.8777191129883843, + "grad_norm": 2.0254533290863037, + "learning_rate": 5.418891803353963e-07, + "loss": 0.7215, "step": 12468 }, { - "epoch": 0.9435132987779501, - "grad_norm": 1.7337976694107056, - "learning_rate": 1.563738055168269e-07, - "loss": 0.6579, + "epoch": 0.8777895107356565, + "grad_norm": 2.3364129066467285, + "learning_rate": 5.412735889188843e-07, + "loss": 0.7401, "step": 12469 }, { - "epoch": 0.9435889675002838, - "grad_norm": 2.3716025352478027, - "learning_rate": 1.559560679654296e-07, - "loss": 0.6865, + "epoch": 0.8778599084829285, + "grad_norm": 1.8128169775009155, + "learning_rate": 5.406583332974073e-07, + "loss": 0.6386, "step": 12470 }, { - "epoch": 0.9436646362226174, - "grad_norm": 2.448322057723999, - "learning_rate": 1.5553888425666806e-07, - "loss": 0.693, + "epoch": 0.8779303062302006, + "grad_norm": 2.2829535007476807, + "learning_rate": 5.400434135029531e-07, + "loss": 0.5878, "step": 12471 }, { - "epoch": 0.943740304944951, - "grad_norm": 2.229336977005005, - "learning_rate": 1.551222544166684e-07, - "loss": 0.6458, + "epoch": 0.8780007039774728, + "grad_norm": 2.1493473052978516, + "learning_rate": 5.39428829567493e-07, + "loss": 0.5299, "step": 12472 }, { - "epoch": 0.9438159736672846, - "grad_norm": 2.3972370624542236, - "learning_rate": 1.5470617847152068e-07, - "loss": 0.697, + "epoch": 0.8780711017247448, + "grad_norm": 2.1487510204315186, + "learning_rate": 5.388145815229782e-07, + "loss": 0.6902, "step": 12473 }, { - "epoch": 0.9438916423896182, - "grad_norm": 5.422283172607422, - "learning_rate": 1.5429065644728113e-07, - "loss": 0.6889, + "epoch": 0.8781414994720169, + "grad_norm": 2.0615949630737305, + "learning_rate": 5.382006694013454e-07, + "loss": 0.6149, "step": 12474 }, { - "epoch": 0.9439673111119519, - "grad_norm": 2.0572350025177, - "learning_rate": 1.538756883699719e-07, - "loss": 0.7075, + "epoch": 0.8782118972192889, + "grad_norm": 2.0165295600891113, + "learning_rate": 5.37587093234513e-07, + "loss": 0.5507, "step": 12475 }, { - "epoch": 0.9440429798342855, - "grad_norm": 3.023061990737915, - "learning_rate": 1.5346127426557822e-07, - "loss": 0.7228, + "epoch": 0.8782822949665611, + "grad_norm": 1.7045530080795288, + "learning_rate": 5.369738530543792e-07, + "loss": 0.5077, "step": 12476 }, { - "epoch": 0.9441186485566191, - "grad_norm": 2.5621566772460938, - "learning_rate": 1.530474141600523e-07, - "loss": 0.6039, + "epoch": 0.8783526927138332, + "grad_norm": 1.7953816652297974, + "learning_rate": 5.363609488928281e-07, + "loss": 0.6033, "step": 12477 }, { - "epoch": 0.9441943172789528, - "grad_norm": 2.2269248962402344, - "learning_rate": 1.5263410807931244e-07, - "loss": 0.6546, + "epoch": 0.8784230904611052, + "grad_norm": 1.8475604057312012, + "learning_rate": 5.357483807817249e-07, + "loss": 0.6679, "step": 12478 }, { - "epoch": 0.9442699860012864, - "grad_norm": 2.0988047122955322, - "learning_rate": 1.5222135604924093e-07, - "loss": 0.5909, + "epoch": 0.8784934882083774, + "grad_norm": 1.8167989253997803, + "learning_rate": 5.351361487529179e-07, + "loss": 0.6268, "step": 12479 }, { - "epoch": 0.94434565472362, - "grad_norm": 2.0287554264068604, - "learning_rate": 1.5180915809568507e-07, - "loss": 0.7294, + "epoch": 0.8785638859556494, + "grad_norm": 1.7776645421981812, + "learning_rate": 5.345242528382375e-07, + "loss": 0.5207, "step": 12480 }, { - "epoch": 0.9444213234459536, - "grad_norm": 2.013700008392334, - "learning_rate": 1.5139751424445726e-07, - "loss": 0.7171, + "epoch": 0.8786342837029215, + "grad_norm": 1.9120320081710815, + "learning_rate": 5.33912693069496e-07, + "loss": 0.6566, "step": 12481 }, { - "epoch": 0.9444969921682872, - "grad_norm": 2.1935606002807617, - "learning_rate": 1.5098642452133883e-07, - "loss": 0.6523, + "epoch": 0.8787046814501936, + "grad_norm": 1.9108085632324219, + "learning_rate": 5.333014694784922e-07, + "loss": 0.6423, "step": 12482 }, { - "epoch": 0.9445726608906209, - "grad_norm": 1.8394144773483276, - "learning_rate": 1.505758889520702e-07, - "loss": 0.6384, + "epoch": 0.8787750791974657, + "grad_norm": 1.7291969060897827, + "learning_rate": 5.326905820969988e-07, + "loss": 0.7108, "step": 12483 }, { - "epoch": 0.9446483296129545, - "grad_norm": 2.9377388954162598, - "learning_rate": 1.5016590756236183e-07, - "loss": 0.646, + "epoch": 0.8788454769447378, + "grad_norm": 1.8108457326889038, + "learning_rate": 5.320800309567809e-07, + "loss": 0.692, "step": 12484 }, { - "epoch": 0.9447239983352881, - "grad_norm": 2.116981267929077, - "learning_rate": 1.4975648037788914e-07, - "loss": 0.6948, + "epoch": 0.8789158746920098, + "grad_norm": 1.7530781030654907, + "learning_rate": 5.314698160895786e-07, + "loss": 0.6408, "step": 12485 }, { - "epoch": 0.9447996670576218, - "grad_norm": 2.6826493740081787, - "learning_rate": 1.4934760742429066e-07, - "loss": 0.5801, + "epoch": 0.878986272439282, + "grad_norm": 2.7339794635772705, + "learning_rate": 5.308599375271193e-07, + "loss": 0.6382, "step": 12486 }, { - "epoch": 0.9448753357799553, - "grad_norm": 2.8828585147857666, - "learning_rate": 1.489392887271709e-07, - "loss": 0.641, + "epoch": 0.879056670186554, + "grad_norm": 2.2469475269317627, + "learning_rate": 5.302503953011101e-07, + "loss": 0.5786, "step": 12487 }, { - "epoch": 0.944951004502289, - "grad_norm": 1.9045580625534058, - "learning_rate": 1.4853152431210138e-07, - "loss": 0.5268, + "epoch": 0.8791270679338261, + "grad_norm": 1.9358872175216675, + "learning_rate": 5.296411894432443e-07, + "loss": 0.6362, "step": 12488 }, { - "epoch": 0.9450266732246226, - "grad_norm": 6.540375709533691, - "learning_rate": 1.481243142046157e-07, - "loss": 0.6434, + "epoch": 0.8791974656810982, + "grad_norm": 2.5186924934387207, + "learning_rate": 5.29032319985191e-07, + "loss": 0.6177, "step": 12489 }, { - "epoch": 0.9451023419469562, - "grad_norm": 1.9567054510116577, - "learning_rate": 1.4771765843021746e-07, - "loss": 0.6509, + "epoch": 0.8792678634283703, + "grad_norm": 1.8338857889175415, + "learning_rate": 5.284237869586082e-07, + "loss": 0.5527, "step": 12490 }, { - "epoch": 0.9451780106692899, - "grad_norm": 4.807526111602783, - "learning_rate": 1.4731155701437028e-07, - "loss": 0.6686, + "epoch": 0.8793382611756424, + "grad_norm": 2.0719635486602783, + "learning_rate": 5.278155903951335e-07, + "loss": 0.6138, "step": 12491 }, { - "epoch": 0.9452536793916235, - "grad_norm": 2.3727920055389404, - "learning_rate": 1.469060099825068e-07, - "loss": 0.7862, + "epoch": 0.8794086589229144, + "grad_norm": 2.0458948612213135, + "learning_rate": 5.272077303263884e-07, + "loss": 0.6184, "step": 12492 }, { - "epoch": 0.9453293481139571, - "grad_norm": 2.6744470596313477, - "learning_rate": 1.4650101736002374e-07, - "loss": 0.6864, + "epoch": 0.8794790566701866, + "grad_norm": 2.2301459312438965, + "learning_rate": 5.266002067839744e-07, + "loss": 0.5578, "step": 12493 }, { - "epoch": 0.9454050168362907, - "grad_norm": 2.174910306930542, - "learning_rate": 1.460965791722808e-07, - "loss": 0.4797, + "epoch": 0.8795494544174587, + "grad_norm": 1.4917396306991577, + "learning_rate": 5.259930197994805e-07, + "loss": 0.6909, "step": 12494 }, { - "epoch": 0.9454806855586243, - "grad_norm": 2.039815664291382, - "learning_rate": 1.4569269544460872e-07, - "loss": 0.621, + "epoch": 0.8796198521647307, + "grad_norm": 2.2213997840881348, + "learning_rate": 5.253861694044711e-07, + "loss": 0.7882, "step": 12495 }, { - "epoch": 0.945556354280958, - "grad_norm": 2.9486753940582275, - "learning_rate": 1.4528936620229826e-07, - "loss": 0.7348, + "epoch": 0.8796902499120028, + "grad_norm": 2.350996255874634, + "learning_rate": 5.247796556304979e-07, + "loss": 0.6377, "step": 12496 }, { - "epoch": 0.9456320230032916, - "grad_norm": 2.2600271701812744, - "learning_rate": 1.4488659147060723e-07, - "loss": 0.6521, + "epoch": 0.8797606476592749, + "grad_norm": 1.7027032375335693, + "learning_rate": 5.241734785090963e-07, + "loss": 0.6019, "step": 12497 }, { - "epoch": 0.9457076917256252, - "grad_norm": 2.1503450870513916, - "learning_rate": 1.4448437127475844e-07, - "loss": 0.6144, + "epoch": 0.879831045406547, + "grad_norm": 2.1899254322052, + "learning_rate": 5.235676380717783e-07, + "loss": 0.6112, "step": 12498 }, { - "epoch": 0.9457833604479589, - "grad_norm": 2.493232250213623, - "learning_rate": 1.4408270563994075e-07, - "loss": 0.6895, + "epoch": 0.8799014431538191, + "grad_norm": 2.197096347808838, + "learning_rate": 5.229621343500461e-07, + "loss": 0.5626, "step": 12499 }, { - "epoch": 0.9458590291702924, - "grad_norm": 2.2982404232025146, - "learning_rate": 1.4368159459130704e-07, - "loss": 0.6948, + "epoch": 0.8799718409010912, + "grad_norm": 1.9483237266540527, + "learning_rate": 5.223569673753784e-07, + "loss": 0.6702, "step": 12500 }, { - "epoch": 0.9459346978926261, - "grad_norm": 1.8142317533493042, - "learning_rate": 1.432810381539772e-07, - "loss": 0.6644, + "epoch": 0.8800422386483633, + "grad_norm": 1.7283504009246826, + "learning_rate": 5.217521371792383e-07, + "loss": 0.5245, "step": 12501 }, { - "epoch": 0.9460103666149597, - "grad_norm": 2.070129871368408, - "learning_rate": 1.4288103635303517e-07, - "loss": 0.5584, + "epoch": 0.8801126363956353, + "grad_norm": 2.3502001762390137, + "learning_rate": 5.211476437930711e-07, + "loss": 0.5854, "step": 12502 }, { - "epoch": 0.9460860353372933, - "grad_norm": 2.6108832359313965, - "learning_rate": 1.4248158921352894e-07, - "loss": 0.6624, + "epoch": 0.8801830341429074, + "grad_norm": 1.7420787811279297, + "learning_rate": 5.205434872483058e-07, + "loss": 0.5895, "step": 12503 }, { - "epoch": 0.946161704059627, - "grad_norm": 1.9606751203536987, - "learning_rate": 1.4208269676047547e-07, - "loss": 0.6436, + "epoch": 0.8802534318901795, + "grad_norm": 1.764638900756836, + "learning_rate": 5.199396675763519e-07, + "loss": 0.6983, "step": 12504 }, { - "epoch": 0.9462373727819606, - "grad_norm": 1.9400218725204468, - "learning_rate": 1.416843590188528e-07, - "loss": 0.7202, + "epoch": 0.8803238296374516, + "grad_norm": 2.60969614982605, + "learning_rate": 5.19336184808605e-07, + "loss": 0.6494, "step": 12505 }, { - "epoch": 0.9463130415042942, - "grad_norm": 3.4818761348724365, - "learning_rate": 1.4128657601360696e-07, - "loss": 0.8214, + "epoch": 0.8803942273847237, + "grad_norm": 2.0727624893188477, + "learning_rate": 5.187330389764388e-07, + "loss": 0.7484, "step": 12506 }, { - "epoch": 0.9463887102266278, - "grad_norm": 2.120473623275757, - "learning_rate": 1.4088934776964902e-07, - "loss": 0.651, + "epoch": 0.8804646251319957, + "grad_norm": 2.236168384552002, + "learning_rate": 5.181302301112118e-07, + "loss": 0.5733, "step": 12507 }, { - "epoch": 0.9464643789489614, - "grad_norm": 2.1492528915405273, - "learning_rate": 1.404926743118531e-07, - "loss": 0.6341, + "epoch": 0.8805350228792679, + "grad_norm": 1.9175970554351807, + "learning_rate": 5.175277582442624e-07, + "loss": 0.7172, "step": 12508 }, { - "epoch": 0.9465400476712951, - "grad_norm": 2.199296712875366, - "learning_rate": 1.400965556650613e-07, - "loss": 0.6321, + "epoch": 0.8806054206265399, + "grad_norm": 2.018249273300171, + "learning_rate": 5.169256234069173e-07, + "loss": 0.6834, "step": 12509 }, { - "epoch": 0.9466157163936287, - "grad_norm": 1.9306163787841797, - "learning_rate": 1.3970099185407982e-07, - "loss": 0.6515, + "epoch": 0.880675818373812, + "grad_norm": 2.1356425285339355, + "learning_rate": 5.163238256304796e-07, + "loss": 0.6243, "step": 12510 }, { - "epoch": 0.9466913851159623, - "grad_norm": 1.7523746490478516, - "learning_rate": 1.393059829036788e-07, - "loss": 0.672, + "epoch": 0.8807462161210842, + "grad_norm": 1.8183988332748413, + "learning_rate": 5.157223649462398e-07, + "loss": 0.6033, "step": 12511 }, { - "epoch": 0.946767053838296, - "grad_norm": 1.741031527519226, - "learning_rate": 1.3891152883859748e-07, - "loss": 0.5683, + "epoch": 0.8808166138683562, + "grad_norm": 2.1137032508850098, + "learning_rate": 5.15121241385466e-07, + "loss": 0.6445, "step": 12512 }, { - "epoch": 0.9468427225606295, - "grad_norm": 2.261603355407715, - "learning_rate": 1.385176296835361e-07, - "loss": 0.6403, + "epoch": 0.8808870116156283, + "grad_norm": 1.846226453781128, + "learning_rate": 5.145204549794123e-07, + "loss": 0.6165, "step": 12513 }, { - "epoch": 0.9469183912829632, - "grad_norm": 2.020596504211426, - "learning_rate": 1.381242854631619e-07, - "loss": 0.7049, + "epoch": 0.8809574093629003, + "grad_norm": 1.7362251281738281, + "learning_rate": 5.139200057593127e-07, + "loss": 0.5688, "step": 12514 }, { - "epoch": 0.9469940600052968, - "grad_norm": 1.8669471740722656, - "learning_rate": 1.3773149620210723e-07, - "loss": 0.6478, + "epoch": 0.8810278071101725, + "grad_norm": 2.1299259662628174, + "learning_rate": 5.133198937563863e-07, + "loss": 0.6372, "step": 12515 }, { - "epoch": 0.9470697287276304, - "grad_norm": 2.3542299270629883, - "learning_rate": 1.3733926192497136e-07, - "loss": 0.7477, + "epoch": 0.8810982048574446, + "grad_norm": 1.8625078201293945, + "learning_rate": 5.127201190018335e-07, + "loss": 0.6451, "step": 12516 }, { - "epoch": 0.9471453974499641, - "grad_norm": 2.5550339221954346, - "learning_rate": 1.3694758265631568e-07, - "loss": 0.6786, + "epoch": 0.8811686026047166, + "grad_norm": 3.2149338722229004, + "learning_rate": 5.121206815268371e-07, + "loss": 0.73, "step": 12517 }, { - "epoch": 0.9472210661722977, - "grad_norm": 2.4507362842559814, - "learning_rate": 1.3655645842066956e-07, - "loss": 0.6481, + "epoch": 0.8812390003519888, + "grad_norm": 1.9278578758239746, + "learning_rate": 5.115215813625624e-07, + "loss": 0.6175, "step": 12518 }, { - "epoch": 0.9472967348946313, - "grad_norm": 1.8866103887557983, - "learning_rate": 1.3616588924252538e-07, - "loss": 0.6686, + "epoch": 0.8813093980992608, + "grad_norm": 2.0707976818084717, + "learning_rate": 5.109228185401572e-07, + "loss": 0.6079, "step": 12519 }, { - "epoch": 0.947372403616965, - "grad_norm": 2.28023624420166, - "learning_rate": 1.357758751463416e-07, - "loss": 0.7095, + "epoch": 0.8813797958465329, + "grad_norm": 1.9656046628952026, + "learning_rate": 5.103243930907503e-07, + "loss": 0.6668, "step": 12520 }, { - "epoch": 0.9474480723392985, - "grad_norm": 2.679999351501465, - "learning_rate": 1.3538641615654468e-07, - "loss": 0.7683, + "epoch": 0.881450193593805, + "grad_norm": 2.0888001918792725, + "learning_rate": 5.09726305045457e-07, + "loss": 0.6767, "step": 12521 }, { - "epoch": 0.9475237410616322, - "grad_norm": 2.136809825897217, - "learning_rate": 1.349975122975211e-07, - "loss": 0.771, + "epoch": 0.8815205913410771, + "grad_norm": 2.4927759170532227, + "learning_rate": 5.091285544353703e-07, + "loss": 0.718, "step": 12522 }, { - "epoch": 0.9475994097839658, - "grad_norm": 2.2693591117858887, - "learning_rate": 1.346091635936254e-07, - "loss": 0.6842, + "epoch": 0.8815909890883492, + "grad_norm": 1.7889806032180786, + "learning_rate": 5.085311412915687e-07, + "loss": 0.6844, "step": 12523 }, { - "epoch": 0.9476750785062994, - "grad_norm": 1.6307967901229858, - "learning_rate": 1.3422137006917913e-07, - "loss": 0.801, + "epoch": 0.8816613868356212, + "grad_norm": 1.725429654121399, + "learning_rate": 5.079340656451128e-07, + "loss": 0.5482, "step": 12524 }, { - "epoch": 0.9477507472286331, - "grad_norm": 2.6975574493408203, - "learning_rate": 1.3383413174846582e-07, - "loss": 0.6235, + "epoch": 0.8817317845828934, + "grad_norm": 2.248323678970337, + "learning_rate": 5.07337327527044e-07, + "loss": 0.7097, "step": 12525 }, { - "epoch": 0.9478264159509666, - "grad_norm": 2.2791359424591064, - "learning_rate": 1.334474486557351e-07, - "loss": 0.591, + "epoch": 0.8818021823301654, + "grad_norm": 2.371589183807373, + "learning_rate": 5.067409269683861e-07, + "loss": 0.5871, "step": 12526 }, { - "epoch": 0.9479020846733003, - "grad_norm": 2.0071561336517334, - "learning_rate": 1.3306132081520362e-07, - "loss": 0.6949, + "epoch": 0.8818725800774375, + "grad_norm": 1.9698947668075562, + "learning_rate": 5.061448640001499e-07, + "loss": 0.645, "step": 12527 }, { - "epoch": 0.947977753395634, - "grad_norm": 3.1164028644561768, - "learning_rate": 1.32675748251052e-07, - "loss": 0.6275, + "epoch": 0.8819429778247097, + "grad_norm": 1.9110363721847534, + "learning_rate": 5.055491386533224e-07, + "loss": 0.691, "step": 12528 }, { - "epoch": 0.9480534221179675, - "grad_norm": 2.2760443687438965, - "learning_rate": 1.3229073098742496e-07, - "loss": 0.7345, + "epoch": 0.8820133755719817, + "grad_norm": 1.8444054126739502, + "learning_rate": 5.049537509588771e-07, + "loss": 0.6373, "step": 12529 }, { - "epoch": 0.9481290908403012, - "grad_norm": 3.998307228088379, - "learning_rate": 1.3190626904843317e-07, - "loss": 0.7405, + "epoch": 0.8820837733192538, + "grad_norm": 2.128666639328003, + "learning_rate": 5.043587009477696e-07, + "loss": 0.6567, "step": 12530 }, { - "epoch": 0.9482047595626348, - "grad_norm": 2.044447183609009, - "learning_rate": 1.315223624581544e-07, - "loss": 0.6031, + "epoch": 0.8821541710665258, + "grad_norm": 1.8344435691833496, + "learning_rate": 5.037639886509354e-07, + "loss": 0.5835, "step": 12531 }, { - "epoch": 0.9482804282849684, - "grad_norm": 3.347496509552002, - "learning_rate": 1.3113901124063045e-07, - "loss": 0.7423, + "epoch": 0.882224568813798, + "grad_norm": 1.9073823690414429, + "learning_rate": 5.031696140992938e-07, + "loss": 0.5663, "step": 12532 }, { - "epoch": 0.9483560970073021, - "grad_norm": 1.8583823442459106, - "learning_rate": 1.3075621541986605e-07, - "loss": 0.6394, + "epoch": 0.8822949665610701, + "grad_norm": 2.0453548431396484, + "learning_rate": 5.025755773237489e-07, + "loss": 0.6041, "step": 12533 }, { - "epoch": 0.9484317657296356, - "grad_norm": 2.4121158123016357, - "learning_rate": 1.3037397501983406e-07, - "loss": 0.6032, + "epoch": 0.8823653643083421, + "grad_norm": 2.1344330310821533, + "learning_rate": 5.019818783551828e-07, + "loss": 0.6739, "step": 12534 }, { - "epoch": 0.9485074344519693, - "grad_norm": 2.290224313735962, - "learning_rate": 1.2999229006447134e-07, - "loss": 0.6019, + "epoch": 0.8824357620556142, + "grad_norm": 2.2291245460510254, + "learning_rate": 5.013885172244657e-07, + "loss": 0.5836, "step": 12535 }, { - "epoch": 0.9485831031743029, - "grad_norm": 2.389235019683838, - "learning_rate": 1.2961116057768074e-07, - "loss": 0.7305, + "epoch": 0.8825061598028863, + "grad_norm": 1.75200617313385, + "learning_rate": 5.007954939624443e-07, + "loss": 0.6428, "step": 12536 }, { - "epoch": 0.9486587718966365, - "grad_norm": 2.0536439418792725, - "learning_rate": 1.292305865833292e-07, - "loss": 0.5639, + "epoch": 0.8825765575501584, + "grad_norm": 1.747405767440796, + "learning_rate": 5.002028085999515e-07, + "loss": 0.5865, "step": 12537 }, { - "epoch": 0.9487344406189702, - "grad_norm": 2.742306709289551, - "learning_rate": 1.2885056810525063e-07, - "loss": 0.7101, + "epoch": 0.8826469552974305, + "grad_norm": 1.766391396522522, + "learning_rate": 4.996104611678003e-07, + "loss": 0.6186, "step": 12538 }, { - "epoch": 0.9488101093413037, - "grad_norm": 1.4020804166793823, - "learning_rate": 1.2847110516724202e-07, - "loss": 0.8055, + "epoch": 0.8827173530447026, + "grad_norm": 1.986774206161499, + "learning_rate": 4.99018451696789e-07, + "loss": 0.6582, "step": 12539 }, { - "epoch": 0.9488857780636374, - "grad_norm": 1.8928502798080444, - "learning_rate": 1.2809219779306735e-07, - "loss": 0.4833, + "epoch": 0.8827877507919747, + "grad_norm": 1.932236909866333, + "learning_rate": 4.984267802176956e-07, + "loss": 0.6017, "step": 12540 }, { - "epoch": 0.948961446785971, - "grad_norm": 1.8035271167755127, - "learning_rate": 1.2771384600645264e-07, - "loss": 0.727, + "epoch": 0.8828581485392467, + "grad_norm": 1.9888869524002075, + "learning_rate": 4.978354467612822e-07, + "loss": 0.6197, "step": 12541 }, { - "epoch": 0.9490371155083046, - "grad_norm": 2.05291485786438, - "learning_rate": 1.2733604983109493e-07, - "loss": 0.6463, + "epoch": 0.8829285462865188, + "grad_norm": 2.3829824924468994, + "learning_rate": 4.972444513582936e-07, + "loss": 0.7048, "step": 12542 }, { - "epoch": 0.9491127842306383, - "grad_norm": 2.0966427326202393, - "learning_rate": 1.269588092906513e-07, - "loss": 0.5246, + "epoch": 0.8829989440337909, + "grad_norm": 2.0170528888702393, + "learning_rate": 4.966537940394543e-07, + "loss": 0.6764, "step": 12543 }, { - "epoch": 0.9491884529529719, - "grad_norm": 1.9400415420532227, - "learning_rate": 1.2658212440874585e-07, - "loss": 0.5948, + "epoch": 0.883069341781063, + "grad_norm": 2.070763349533081, + "learning_rate": 4.960634748354734e-07, + "loss": 0.7168, "step": 12544 }, { - "epoch": 0.9492641216753055, - "grad_norm": 2.1425304412841797, - "learning_rate": 1.262059952089677e-07, - "loss": 0.5876, + "epoch": 0.8831397395283351, + "grad_norm": 1.8647924661636353, + "learning_rate": 4.954734937770439e-07, + "loss": 0.6324, "step": 12545 }, { - "epoch": 0.9493397903976392, - "grad_norm": 2.972198724746704, - "learning_rate": 1.2583042171487103e-07, - "loss": 0.6889, + "epoch": 0.8832101372756072, + "grad_norm": 2.0136897563934326, + "learning_rate": 4.948838508948372e-07, + "loss": 0.6489, "step": 12546 }, { - "epoch": 0.9494154591199727, - "grad_norm": 1.892687201499939, - "learning_rate": 1.25455403949976e-07, - "loss": 0.6808, + "epoch": 0.8832805350228793, + "grad_norm": 1.8455528020858765, + "learning_rate": 4.94294546219512e-07, + "loss": 0.7315, "step": 12547 }, { - "epoch": 0.9494911278423064, - "grad_norm": 2.389648914337158, - "learning_rate": 1.2508094193776786e-07, - "loss": 0.7109, + "epoch": 0.8833509327701513, + "grad_norm": 1.8983827829360962, + "learning_rate": 4.937055797817045e-07, + "loss": 0.7392, "step": 12548 }, { - "epoch": 0.94956679656464, - "grad_norm": 2.40558123588562, - "learning_rate": 1.2470703570169583e-07, - "loss": 0.6682, + "epoch": 0.8834213305174234, + "grad_norm": 4.205691814422607, + "learning_rate": 4.931169516120355e-07, + "loss": 0.7103, "step": 12549 }, { - "epoch": 0.9496424652869736, - "grad_norm": 5.251361846923828, - "learning_rate": 1.2433368526517619e-07, - "loss": 0.7605, + "epoch": 0.8834917282646956, + "grad_norm": 2.2361857891082764, + "learning_rate": 4.925286617411103e-07, + "loss": 0.5571, "step": 12550 }, { - "epoch": 0.9497181340093073, - "grad_norm": 1.9533052444458008, - "learning_rate": 1.2396089065158722e-07, - "loss": 0.619, + "epoch": 0.8835621260119676, + "grad_norm": 1.7480322122573853, + "learning_rate": 4.919407101995128e-07, + "loss": 0.6179, "step": 12551 }, { - "epoch": 0.9497938027316408, - "grad_norm": 2.2325778007507324, - "learning_rate": 1.2358865188427626e-07, - "loss": 0.707, + "epoch": 0.8836325237592397, + "grad_norm": 1.9113088846206665, + "learning_rate": 4.913530970178127e-07, + "loss": 0.6369, "step": 12552 }, { - "epoch": 0.9498694714539745, - "grad_norm": 2.874405860900879, - "learning_rate": 1.2321696898655465e-07, - "loss": 0.6691, + "epoch": 0.8837029215065118, + "grad_norm": 1.9788504838943481, + "learning_rate": 4.907658222265592e-07, + "loss": 0.6537, "step": 12553 }, { - "epoch": 0.9499451401763082, - "grad_norm": 6.6497697830200195, - "learning_rate": 1.228458419816968e-07, - "loss": 0.7334, + "epoch": 0.8837733192537839, + "grad_norm": 2.1421337127685547, + "learning_rate": 4.901788858562859e-07, + "loss": 0.6859, "step": 12554 }, { - "epoch": 0.9500208088986417, - "grad_norm": 2.4262197017669678, - "learning_rate": 1.2247527089294408e-07, - "loss": 0.7098, + "epoch": 0.883843717001056, + "grad_norm": 2.737111806869507, + "learning_rate": 4.895922879375071e-07, + "loss": 0.6934, "step": 12555 }, { - "epoch": 0.9500964776209754, - "grad_norm": 2.1515469551086426, - "learning_rate": 1.2210525574350296e-07, - "loss": 0.5938, + "epoch": 0.883914114748328, + "grad_norm": 1.8041478395462036, + "learning_rate": 4.890060285007223e-07, + "loss": 0.5781, "step": 12556 }, { - "epoch": 0.950172146343309, - "grad_norm": 2.0948853492736816, - "learning_rate": 1.2173579655654686e-07, - "loss": 0.7028, + "epoch": 0.8839845124956002, + "grad_norm": 2.1074395179748535, + "learning_rate": 4.8842010757641e-07, + "loss": 0.6392, "step": 12557 }, { - "epoch": 0.9502478150656426, - "grad_norm": 2.3338329792022705, - "learning_rate": 1.2136689335521035e-07, - "loss": 0.7445, + "epoch": 0.8840549102428722, + "grad_norm": 1.9360331296920776, + "learning_rate": 4.878345251950346e-07, + "loss": 0.6259, "step": 12558 }, { - "epoch": 0.9503234837879763, - "grad_norm": 1.8340742588043213, - "learning_rate": 1.2099854616259587e-07, - "loss": 0.6667, + "epoch": 0.8841253079901443, + "grad_norm": 1.9327516555786133, + "learning_rate": 4.8724928138704e-07, + "loss": 0.5636, "step": 12559 }, { - "epoch": 0.9503991525103098, - "grad_norm": 3.241755962371826, - "learning_rate": 1.2063075500177e-07, - "loss": 0.6357, + "epoch": 0.8841957057374163, + "grad_norm": 1.8306328058242798, + "learning_rate": 4.86664376182853e-07, + "loss": 0.7617, "step": 12560 }, { - "epoch": 0.9504748212326435, - "grad_norm": 2.4523792266845703, - "learning_rate": 1.2026351989576633e-07, - "loss": 0.5422, + "epoch": 0.8842661034846885, + "grad_norm": 2.0171685218811035, + "learning_rate": 4.860798096128831e-07, + "loss": 0.6347, "step": 12561 }, { - "epoch": 0.9505504899549772, - "grad_norm": 2.439547538757324, - "learning_rate": 1.1989684086758147e-07, - "loss": 0.7088, + "epoch": 0.8843365012319606, + "grad_norm": 2.2314300537109375, + "learning_rate": 4.854955817075242e-07, + "loss": 0.5936, "step": 12562 }, { - "epoch": 0.9506261586773107, - "grad_norm": 2.1670706272125244, - "learning_rate": 1.19530717940178e-07, - "loss": 0.7398, + "epoch": 0.8844068989792326, + "grad_norm": 1.6845163106918335, + "learning_rate": 4.849116924971489e-07, + "loss": 0.6114, "step": 12563 }, { - "epoch": 0.9507018273996444, - "grad_norm": 3.0719752311706543, - "learning_rate": 1.1916515113648463e-07, - "loss": 0.7488, + "epoch": 0.8844772967265048, + "grad_norm": 2.020432949066162, + "learning_rate": 4.84328142012117e-07, + "loss": 0.6631, "step": 12564 }, { - "epoch": 0.9507774961219779, - "grad_norm": 4.41519832611084, - "learning_rate": 1.1880014047939302e-07, - "loss": 0.58, + "epoch": 0.8845476944737768, + "grad_norm": 2.068474531173706, + "learning_rate": 4.837449302827647e-07, + "loss": 0.5934, "step": 12565 }, { - "epoch": 0.9508531648443116, - "grad_norm": 2.505060911178589, - "learning_rate": 1.1843568599176091e-07, - "loss": 0.6799, + "epoch": 0.8846180922210489, + "grad_norm": 1.97312331199646, + "learning_rate": 4.831620573394159e-07, + "loss": 0.6651, "step": 12566 }, { - "epoch": 0.9509288335666453, - "grad_norm": 2.1886472702026367, - "learning_rate": 1.1807178769641402e-07, - "loss": 0.6989, + "epoch": 0.8846884899683211, + "grad_norm": 1.785732388496399, + "learning_rate": 4.82579523212372e-07, + "loss": 0.5195, "step": 12567 }, { - "epoch": 0.9510045022889788, - "grad_norm": 2.5705251693725586, - "learning_rate": 1.1770844561613913e-07, - "loss": 0.6411, + "epoch": 0.8847588877155931, + "grad_norm": 2.500920534133911, + "learning_rate": 4.819973279319232e-07, + "loss": 0.6702, "step": 12568 }, { - "epoch": 0.9510801710113125, - "grad_norm": 2.1216318607330322, - "learning_rate": 1.1734565977369005e-07, - "loss": 0.6649, + "epoch": 0.8848292854628652, + "grad_norm": 1.9539536237716675, + "learning_rate": 4.814154715283344e-07, + "loss": 0.595, "step": 12569 }, { - "epoch": 0.9511558397336461, - "grad_norm": 2.3023529052734375, - "learning_rate": 1.1698343019178559e-07, - "loss": 0.6896, + "epoch": 0.8848996832101372, + "grad_norm": 2.789226770401001, + "learning_rate": 4.808339540318603e-07, + "loss": 0.6999, "step": 12570 }, { - "epoch": 0.9512315084559797, - "grad_norm": 2.0217092037200928, - "learning_rate": 1.166217568931096e-07, - "loss": 0.4953, + "epoch": 0.8849700809574094, + "grad_norm": 2.1186294555664062, + "learning_rate": 4.802527754727324e-07, + "loss": 0.5425, "step": 12571 }, { - "epoch": 0.9513071771783134, - "grad_norm": 2.0048747062683105, - "learning_rate": 1.1626063990031199e-07, - "loss": 0.5621, + "epoch": 0.8850404787046815, + "grad_norm": 1.9864985942840576, + "learning_rate": 4.796719358811678e-07, + "loss": 0.791, "step": 12572 }, { - "epoch": 0.9513828459006469, - "grad_norm": 2.298532009124756, - "learning_rate": 1.1590007923600665e-07, - "loss": 0.733, + "epoch": 0.8851108764519535, + "grad_norm": 2.0700480937957764, + "learning_rate": 4.790914352873639e-07, + "loss": 0.6638, "step": 12573 }, { - "epoch": 0.9514585146229806, - "grad_norm": 2.250257730484009, - "learning_rate": 1.1554007492277252e-07, - "loss": 0.7155, + "epoch": 0.8851812741992257, + "grad_norm": 2.1172220706939697, + "learning_rate": 4.785112737215023e-07, + "loss": 0.6276, "step": 12574 }, { - "epoch": 0.9515341833453143, - "grad_norm": 2.662155866622925, - "learning_rate": 1.1518062698315557e-07, - "loss": 0.608, + "epoch": 0.8852516719464977, + "grad_norm": 2.698059558868408, + "learning_rate": 4.779314512137452e-07, + "loss": 0.6711, "step": 12575 }, { - "epoch": 0.9516098520676478, - "grad_norm": 2.0141024589538574, - "learning_rate": 1.1482173543966479e-07, - "loss": 0.5917, + "epoch": 0.8853220696937698, + "grad_norm": 1.9960970878601074, + "learning_rate": 4.773519677942393e-07, + "loss": 0.6787, "step": 12576 }, { - "epoch": 0.9516855207899815, - "grad_norm": 7.37333345413208, - "learning_rate": 1.144634003147742e-07, - "loss": 0.8291, + "epoch": 0.8853924674410419, + "grad_norm": 1.7608866691589355, + "learning_rate": 4.767728234931117e-07, + "loss": 0.645, "step": 12577 }, { - "epoch": 0.951761189512315, - "grad_norm": 2.1229794025421143, - "learning_rate": 1.1410562163092486e-07, - "loss": 0.5582, + "epoch": 0.885462865188314, + "grad_norm": 2.8511006832122803, + "learning_rate": 4.7619401834047336e-07, + "loss": 0.6368, "step": 12578 }, { - "epoch": 0.9518368582346487, - "grad_norm": 2.4636754989624023, - "learning_rate": 1.1374839941052284e-07, - "loss": 0.8073, + "epoch": 0.8855332629355861, + "grad_norm": 3.0386931896209717, + "learning_rate": 4.7561555236641505e-07, + "loss": 0.7486, "step": 12579 }, { - "epoch": 0.9519125269569824, - "grad_norm": 2.854630947113037, - "learning_rate": 1.1339173367593725e-07, - "loss": 0.5689, + "epoch": 0.8856036606828581, + "grad_norm": 2.6089000701904297, + "learning_rate": 4.7503742560101427e-07, + "loss": 0.7601, "step": 12580 }, { - "epoch": 0.9519881956793159, - "grad_norm": 1.9160290956497192, - "learning_rate": 1.1303562444950321e-07, - "loss": 0.5861, + "epoch": 0.8856740584301303, + "grad_norm": 2.2246148586273193, + "learning_rate": 4.744596380743259e-07, + "loss": 0.6228, "step": 12581 }, { - "epoch": 0.9520638644016496, - "grad_norm": 2.2296550273895264, - "learning_rate": 1.1268007175352291e-07, - "loss": 0.6746, + "epoch": 0.8857444561774023, + "grad_norm": 2.0544445514678955, + "learning_rate": 4.7388218981639175e-07, + "loss": 0.635, "step": 12582 }, { - "epoch": 0.9521395331239833, - "grad_norm": 2.0400097370147705, - "learning_rate": 1.123250756102625e-07, - "loss": 0.7203, + "epoch": 0.8858148539246744, + "grad_norm": 2.0381734371185303, + "learning_rate": 4.7330508085723174e-07, + "loss": 0.6092, "step": 12583 }, { - "epoch": 0.9522152018463168, - "grad_norm": 1.9765279293060303, - "learning_rate": 1.1197063604195123e-07, - "loss": 0.589, + "epoch": 0.8858852516719465, + "grad_norm": 1.9095635414123535, + "learning_rate": 4.727283112268535e-07, + "loss": 0.6702, "step": 12584 }, { - "epoch": 0.9522908705686505, - "grad_norm": 2.517303228378296, - "learning_rate": 1.1161675307078534e-07, - "loss": 0.741, + "epoch": 0.8859556494192186, + "grad_norm": 2.1992170810699463, + "learning_rate": 4.721518809552405e-07, + "loss": 0.675, "step": 12585 }, { - "epoch": 0.952366539290984, - "grad_norm": 2.5748841762542725, - "learning_rate": 1.1126342671892908e-07, - "loss": 0.6368, + "epoch": 0.8860260471664907, + "grad_norm": 1.7810245752334595, + "learning_rate": 4.7157579007236374e-07, + "loss": 0.6398, "step": 12586 }, { - "epoch": 0.9524422080133177, - "grad_norm": 1.8756376504898071, - "learning_rate": 1.1091065700850378e-07, - "loss": 0.6526, + "epoch": 0.8860964449137627, + "grad_norm": 2.144512891769409, + "learning_rate": 4.710000386081724e-07, + "loss": 0.6211, "step": 12587 }, { - "epoch": 0.9525178767356514, - "grad_norm": 3.596529960632324, - "learning_rate": 1.1055844396160574e-07, - "loss": 0.5991, + "epoch": 0.8861668426610348, + "grad_norm": 1.890750527381897, + "learning_rate": 4.704246265926042e-07, + "loss": 0.532, "step": 12588 }, { - "epoch": 0.9525935454579849, - "grad_norm": 6.150516033172607, - "learning_rate": 1.1020678760029035e-07, - "loss": 0.6345, + "epoch": 0.886237240408307, + "grad_norm": 1.8819948434829712, + "learning_rate": 4.6984955405557183e-07, + "loss": 0.7227, "step": 12589 }, { - "epoch": 0.9526692141803186, - "grad_norm": 2.0023791790008545, - "learning_rate": 1.0985568794657797e-07, - "loss": 0.5204, + "epoch": 0.886307638155579, + "grad_norm": 1.8352811336517334, + "learning_rate": 4.6927482102697714e-07, + "loss": 0.6645, "step": 12590 }, { - "epoch": 0.9527448829026521, - "grad_norm": 1.9689321517944336, - "learning_rate": 1.0950514502245701e-07, - "loss": 0.666, + "epoch": 0.8863780359028511, + "grad_norm": 1.9750006198883057, + "learning_rate": 4.6870042753669714e-07, + "loss": 0.6551, "step": 12591 }, { - "epoch": 0.9528205516249858, - "grad_norm": 2.2433865070343018, - "learning_rate": 1.0915515884987892e-07, - "loss": 0.6823, + "epoch": 0.8864484336501232, + "grad_norm": 1.6565097570419312, + "learning_rate": 4.6812637361459797e-07, + "loss": 0.7189, "step": 12592 }, { - "epoch": 0.9528962203473195, - "grad_norm": 2.0992069244384766, - "learning_rate": 1.0880572945076217e-07, - "loss": 0.7694, + "epoch": 0.8865188313973953, + "grad_norm": 2.216400623321533, + "learning_rate": 4.6755265929052313e-07, + "loss": 0.673, "step": 12593 }, { - "epoch": 0.952971889069653, - "grad_norm": 3.2551393508911133, - "learning_rate": 1.0845685684698726e-07, - "loss": 0.6114, + "epoch": 0.8865892291446674, + "grad_norm": 2.2100305557250977, + "learning_rate": 4.6697928459430226e-07, + "loss": 0.6386, "step": 12594 }, { - "epoch": 0.9530475577919867, - "grad_norm": 2.008338212966919, - "learning_rate": 1.0810854106040268e-07, - "loss": 0.5985, + "epoch": 0.8866596268919394, + "grad_norm": 1.5414975881576538, + "learning_rate": 4.6640624955574473e-07, + "loss": 0.7041, "step": 12595 }, { - "epoch": 0.9531232265143204, - "grad_norm": 2.3150219917297363, - "learning_rate": 1.0776078211282203e-07, - "loss": 0.7655, + "epoch": 0.8867300246392116, + "grad_norm": 2.4466373920440674, + "learning_rate": 4.6583355420464434e-07, + "loss": 0.6624, "step": 12596 }, { - "epoch": 0.9531988952366539, - "grad_norm": 2.6294565200805664, - "learning_rate": 1.0741358002602086e-07, - "loss": 0.6856, + "epoch": 0.8868004223864836, + "grad_norm": 1.8951863050460815, + "learning_rate": 4.652611985707747e-07, + "loss": 0.6161, "step": 12597 }, { - "epoch": 0.9532745639589876, - "grad_norm": 1.913138747215271, - "learning_rate": 1.0706693482174479e-07, - "loss": 0.597, + "epoch": 0.8868708201337557, + "grad_norm": 2.0504043102264404, + "learning_rate": 4.6468918268389325e-07, + "loss": 0.7088, "step": 12598 }, { - "epoch": 0.9533502326813211, - "grad_norm": 2.5744075775146484, - "learning_rate": 1.0672084652169944e-07, - "loss": 0.8647, + "epoch": 0.8869412178810278, + "grad_norm": 2.017246961593628, + "learning_rate": 4.6411750657374085e-07, + "loss": 0.6243, "step": 12599 }, { - "epoch": 0.9534259014036548, - "grad_norm": 1.7659814357757568, - "learning_rate": 1.0637531514756049e-07, - "loss": 0.6648, + "epoch": 0.8870116156282999, + "grad_norm": 2.987362861633301, + "learning_rate": 4.635461702700367e-07, + "loss": 0.6016, "step": 12600 }, { - "epoch": 0.9535015701259885, - "grad_norm": 1.7846674919128418, - "learning_rate": 1.0603034072096363e-07, - "loss": 0.8073, + "epoch": 0.887082013375572, + "grad_norm": 2.317232608795166, + "learning_rate": 4.6297517380248854e-07, + "loss": 0.6449, "step": 12601 }, { - "epoch": 0.953577238848322, - "grad_norm": 2.20290207862854, - "learning_rate": 1.0568592326351257e-07, - "loss": 0.7963, + "epoch": 0.887152411122844, + "grad_norm": 2.4976181983947754, + "learning_rate": 4.6240451720078126e-07, + "loss": 0.7789, "step": 12602 }, { - "epoch": 0.9536529075706557, - "grad_norm": 1.7884464263916016, - "learning_rate": 1.0534206279677904e-07, - "loss": 0.6471, + "epoch": 0.8872228088701162, + "grad_norm": 1.8676873445510864, + "learning_rate": 4.6183420049458433e-07, + "loss": 0.5504, "step": 12603 }, { - "epoch": 0.9537285762929892, - "grad_norm": 2.2755014896392822, - "learning_rate": 1.0499875934229286e-07, - "loss": 0.6275, + "epoch": 0.8872932066173882, + "grad_norm": 2.159871816635132, + "learning_rate": 4.6126422371354713e-07, + "loss": 0.6574, "step": 12604 }, { - "epoch": 0.9538042450153229, - "grad_norm": 1.9066975116729736, - "learning_rate": 1.046560129215538e-07, - "loss": 0.6213, + "epoch": 0.8873636043646603, + "grad_norm": 1.8507325649261475, + "learning_rate": 4.606945868873057e-07, + "loss": 0.6194, "step": 12605 }, { - "epoch": 0.9538799137376566, - "grad_norm": 3.067195177078247, - "learning_rate": 1.043138235560267e-07, - "loss": 0.6876, + "epoch": 0.8874340021119325, + "grad_norm": 1.921963095664978, + "learning_rate": 4.601252900454743e-07, + "loss": 0.6975, "step": 12606 }, { - "epoch": 0.9539555824599901, - "grad_norm": 2.0356249809265137, - "learning_rate": 1.0397219126714042e-07, - "loss": 0.6169, + "epoch": 0.8875043998592045, + "grad_norm": 1.8644044399261475, + "learning_rate": 4.5955633321765333e-07, + "loss": 0.6165, "step": 12607 }, { - "epoch": 0.9540312511823238, - "grad_norm": 2.0770580768585205, - "learning_rate": 1.0363111607628884e-07, - "loss": 0.7419, + "epoch": 0.8875747976064766, + "grad_norm": 2.009706974029541, + "learning_rate": 4.589877164334213e-07, + "loss": 0.8199, "step": 12608 }, { - "epoch": 0.9541069199046575, - "grad_norm": 3.184155225753784, - "learning_rate": 1.0329059800483087e-07, - "loss": 0.7008, + "epoch": 0.8876451953537486, + "grad_norm": 2.3021645545959473, + "learning_rate": 4.58419439722342e-07, + "loss": 0.6711, "step": 12609 }, { - "epoch": 0.954182588626991, - "grad_norm": 1.850549578666687, - "learning_rate": 1.0295063707409147e-07, - "loss": 0.6739, + "epoch": 0.8877155931010208, + "grad_norm": 1.6189368963241577, + "learning_rate": 4.5785150311395985e-07, + "loss": 0.6955, "step": 12610 }, { - "epoch": 0.9542582573493247, - "grad_norm": 2.2812001705169678, - "learning_rate": 1.026112333053596e-07, - "loss": 0.6861, + "epoch": 0.8877859908482929, + "grad_norm": 1.905119776725769, + "learning_rate": 4.572839066378036e-07, + "loss": 0.5634, "step": 12611 }, { - "epoch": 0.9543339260716582, - "grad_norm": 2.713418960571289, - "learning_rate": 1.0227238671988925e-07, - "loss": 0.5918, + "epoch": 0.8878563885955649, + "grad_norm": 2.0477712154388428, + "learning_rate": 4.5671665032338114e-07, + "loss": 0.6037, "step": 12612 }, { - "epoch": 0.9544095947939919, - "grad_norm": 4.540587902069092, - "learning_rate": 1.0193409733890147e-07, - "loss": 0.6968, + "epoch": 0.8879267863428371, + "grad_norm": 2.172132730484009, + "learning_rate": 4.5614973420018714e-07, + "loss": 0.7269, "step": 12613 }, { - "epoch": 0.9544852635163256, - "grad_norm": 6.917119979858398, - "learning_rate": 1.0159636518358029e-07, - "loss": 0.6452, + "epoch": 0.8879971840901091, + "grad_norm": 2.0179283618927, + "learning_rate": 4.555831582976951e-07, + "loss": 0.625, "step": 12614 }, { - "epoch": 0.9545609322386591, - "grad_norm": 1.9576506614685059, - "learning_rate": 1.012591902750758e-07, - "loss": 0.7493, + "epoch": 0.8880675818373812, + "grad_norm": 1.731439471244812, + "learning_rate": 4.5501692264536177e-07, + "loss": 0.6481, "step": 12615 }, { - "epoch": 0.9546366009609928, - "grad_norm": 2.1393682956695557, - "learning_rate": 1.009225726345021e-07, - "loss": 0.7141, + "epoch": 0.8881379795846532, + "grad_norm": 2.0047290325164795, + "learning_rate": 4.544510272726249e-07, + "loss": 0.6004, "step": 12616 }, { - "epoch": 0.9547122696833263, - "grad_norm": 2.726297378540039, - "learning_rate": 1.0058651228294036e-07, - "loss": 0.697, + "epoch": 0.8882083773319254, + "grad_norm": 1.8155726194381714, + "learning_rate": 4.538854722089084e-07, + "loss": 0.6468, "step": 12617 }, { - "epoch": 0.95478793840566, - "grad_norm": 2.0172853469848633, - "learning_rate": 1.0025100924143571e-07, - "loss": 0.6984, + "epoch": 0.8882787750791975, + "grad_norm": 1.731797456741333, + "learning_rate": 4.533202574836137e-07, + "loss": 0.5862, "step": 12618 }, { - "epoch": 0.9548636071279937, - "grad_norm": 2.437422752380371, - "learning_rate": 9.991606353099836e-08, - "loss": 0.7801, + "epoch": 0.8883491728264695, + "grad_norm": 1.7664546966552734, + "learning_rate": 4.5275538312612815e-07, + "loss": 0.6641, "step": 12619 }, { - "epoch": 0.9549392758503272, - "grad_norm": 2.639605760574341, - "learning_rate": 9.958167517260252e-08, - "loss": 0.6264, + "epoch": 0.8884195705737417, + "grad_norm": 2.243551015853882, + "learning_rate": 4.521908491658205e-07, + "loss": 0.6471, "step": 12620 }, { - "epoch": 0.9550149445726609, - "grad_norm": 2.6709601879119873, - "learning_rate": 9.924784418719146e-08, - "loss": 0.7681, + "epoch": 0.8884899683210137, + "grad_norm": 2.0624167919158936, + "learning_rate": 4.5162665563204007e-07, + "loss": 0.6051, "step": 12621 }, { - "epoch": 0.9550906132949946, - "grad_norm": 1.9807193279266357, - "learning_rate": 9.891457059566745e-08, - "loss": 0.6269, + "epoch": 0.8885603660682858, + "grad_norm": 2.009974241256714, + "learning_rate": 4.5106280255411976e-07, + "loss": 0.7002, "step": 12622 }, { - "epoch": 0.9551662820173281, - "grad_norm": 2.4436280727386475, - "learning_rate": 9.858185441890177e-08, - "loss": 0.5812, + "epoch": 0.888630763815558, + "grad_norm": 1.9113819599151611, + "learning_rate": 4.5049928996137634e-07, + "loss": 0.6388, "step": 12623 }, { - "epoch": 0.9552419507396618, - "grad_norm": 2.2791218757629395, - "learning_rate": 9.824969567773278e-08, - "loss": 0.6978, + "epoch": 0.88870116156283, + "grad_norm": 1.9039044380187988, + "learning_rate": 4.499361178831054e-07, + "loss": 0.6713, "step": 12624 }, { - "epoch": 0.9553176194619953, - "grad_norm": 1.9470678567886353, - "learning_rate": 9.791809439295885e-08, - "loss": 0.8761, + "epoch": 0.8887715593101021, + "grad_norm": 1.7000787258148193, + "learning_rate": 4.4937328634858864e-07, + "loss": 0.6211, "step": 12625 }, { - "epoch": 0.955393288184329, - "grad_norm": 2.1823413372039795, - "learning_rate": 9.758705058534634e-08, - "loss": 0.5794, + "epoch": 0.8888419570573741, + "grad_norm": 1.9217792749404907, + "learning_rate": 4.488107953870876e-07, + "loss": 0.6413, "step": 12626 }, { - "epoch": 0.9554689569066627, - "grad_norm": 2.474637508392334, - "learning_rate": 9.725656427562769e-08, - "loss": 0.6105, + "epoch": 0.8889123548046463, + "grad_norm": 2.1340763568878174, + "learning_rate": 4.4824864502784654e-07, + "loss": 0.7392, "step": 12627 }, { - "epoch": 0.9555446256289962, - "grad_norm": 2.545193910598755, - "learning_rate": 9.692663548449732e-08, - "loss": 0.6524, + "epoch": 0.8889827525519184, + "grad_norm": 2.5532572269439697, + "learning_rate": 4.4768683530009055e-07, + "loss": 0.6537, "step": 12628 }, { - "epoch": 0.9556202943513299, - "grad_norm": 1.8600281476974487, - "learning_rate": 9.659726423261672e-08, - "loss": 0.594, + "epoch": 0.8890531502991904, + "grad_norm": 2.724128007888794, + "learning_rate": 4.4712536623303134e-07, + "loss": 0.61, "step": 12629 }, { - "epoch": 0.9556959630736634, - "grad_norm": 1.867389440536499, - "learning_rate": 9.626845054061239e-08, - "loss": 0.624, + "epoch": 0.8891235480464625, + "grad_norm": 1.9622584581375122, + "learning_rate": 4.465642378558581e-07, + "loss": 0.6468, "step": 12630 }, { - "epoch": 0.9557716317959971, - "grad_norm": 1.7528537511825562, - "learning_rate": 9.594019442907686e-08, - "loss": 0.5813, + "epoch": 0.8891939457937346, + "grad_norm": 1.8023685216903687, + "learning_rate": 4.46003450197746e-07, + "loss": 0.6097, "step": 12631 }, { - "epoch": 0.9558473005183308, - "grad_norm": 4.033431053161621, - "learning_rate": 9.561249591856569e-08, - "loss": 0.6462, + "epoch": 0.8892643435410067, + "grad_norm": 2.103194236755371, + "learning_rate": 4.454430032878509e-07, + "loss": 0.6559, "step": 12632 }, { - "epoch": 0.9559229692406643, - "grad_norm": 2.031341075897217, - "learning_rate": 9.528535502959845e-08, - "loss": 0.6628, + "epoch": 0.8893347412882788, + "grad_norm": 2.0208394527435303, + "learning_rate": 4.4488289715530913e-07, + "loss": 0.7369, "step": 12633 }, { - "epoch": 0.955998637962998, - "grad_norm": 2.2305564880371094, - "learning_rate": 9.495877178266477e-08, - "loss": 0.6545, + "epoch": 0.8894051390355509, + "grad_norm": 2.5296177864074707, + "learning_rate": 4.443231318292423e-07, + "loss": 0.774, "step": 12634 }, { - "epoch": 0.9560743066853317, - "grad_norm": 1.8292995691299438, - "learning_rate": 9.463274619821627e-08, - "loss": 0.5837, + "epoch": 0.889475536782823, + "grad_norm": 2.1311676502227783, + "learning_rate": 4.4376370733875336e-07, + "loss": 0.6271, "step": 12635 }, { - "epoch": 0.9561499754076652, - "grad_norm": 1.8190289735794067, - "learning_rate": 9.430727829666763e-08, - "loss": 0.6024, + "epoch": 0.889545934530095, + "grad_norm": 1.8202016353607178, + "learning_rate": 4.432046237129258e-07, + "loss": 0.6154, "step": 12636 }, { - "epoch": 0.9562256441299989, - "grad_norm": 1.8149267435073853, - "learning_rate": 9.398236809840155e-08, - "loss": 0.7985, + "epoch": 0.8896163322773671, + "grad_norm": 2.395353078842163, + "learning_rate": 4.426458809808291e-07, + "loss": 0.6625, "step": 12637 }, { - "epoch": 0.9563013128523324, - "grad_norm": 2.005405902862549, - "learning_rate": 9.365801562376474e-08, - "loss": 0.7324, + "epoch": 0.8896867300246392, + "grad_norm": 2.452648878097534, + "learning_rate": 4.420874791715119e-07, + "loss": 0.7539, "step": 12638 }, { - "epoch": 0.9563769815746661, - "grad_norm": 3.069957971572876, - "learning_rate": 9.333422089307097e-08, - "loss": 0.5228, + "epoch": 0.8897571277719113, + "grad_norm": 2.4652719497680664, + "learning_rate": 4.4152941831400635e-07, + "loss": 0.6552, "step": 12639 }, { - "epoch": 0.9564526502969998, - "grad_norm": 2.064141035079956, - "learning_rate": 9.301098392659502e-08, - "loss": 0.7289, + "epoch": 0.8898275255191834, + "grad_norm": 1.8555617332458496, + "learning_rate": 4.409716984373245e-07, + "loss": 0.5417, "step": 12640 }, { - "epoch": 0.9565283190193333, - "grad_norm": 1.8948678970336914, - "learning_rate": 9.268830474457967e-08, - "loss": 0.6597, + "epoch": 0.8898979232664554, + "grad_norm": 1.5887924432754517, + "learning_rate": 4.4041431957046516e-07, + "loss": 0.5334, "step": 12641 }, { - "epoch": 0.956603987741667, - "grad_norm": 2.4123117923736572, - "learning_rate": 9.236618336723379e-08, - "loss": 0.7717, + "epoch": 0.8899683210137276, + "grad_norm": 2.2379753589630127, + "learning_rate": 4.398572817424053e-07, + "loss": 0.5867, "step": 12642 }, { - "epoch": 0.9566796564640007, - "grad_norm": 2.754319667816162, - "learning_rate": 9.204461981472623e-08, - "loss": 0.5993, + "epoch": 0.8900387187609996, + "grad_norm": 1.77045476436615, + "learning_rate": 4.393005849821073e-07, + "loss": 0.5764, "step": 12643 }, { - "epoch": 0.9567553251863342, - "grad_norm": 2.069659471511841, - "learning_rate": 9.172361410719787e-08, - "loss": 0.692, + "epoch": 0.8901091165082717, + "grad_norm": 2.3526084423065186, + "learning_rate": 4.38744229318514e-07, + "loss": 0.587, "step": 12644 }, { - "epoch": 0.9568309939086679, - "grad_norm": 2.8982131481170654, - "learning_rate": 9.140316626474865e-08, - "loss": 0.6309, + "epoch": 0.8901795142555439, + "grad_norm": 2.0011634826660156, + "learning_rate": 4.3818821478055025e-07, + "loss": 0.7181, "step": 12645 }, { - "epoch": 0.9569066626310014, - "grad_norm": 2.2474629878997803, - "learning_rate": 9.10832763074485e-08, - "loss": 0.5228, + "epoch": 0.8902499120028159, + "grad_norm": 1.930196762084961, + "learning_rate": 4.376325413971225e-07, + "loss": 0.5916, "step": 12646 }, { - "epoch": 0.9569823313533351, - "grad_norm": 3.5946133136749268, - "learning_rate": 9.076394425532741e-08, - "loss": 0.6798, + "epoch": 0.890320309750088, + "grad_norm": 1.971988320350647, + "learning_rate": 4.37077209197123e-07, + "loss": 0.6627, "step": 12647 }, { - "epoch": 0.9570580000756688, - "grad_norm": 1.9877676963806152, - "learning_rate": 9.044517012838438e-08, - "loss": 0.6968, + "epoch": 0.89039070749736, + "grad_norm": 2.086435556411743, + "learning_rate": 4.365222182094239e-07, + "loss": 0.6197, "step": 12648 }, { - "epoch": 0.9571336687980023, - "grad_norm": 2.1319262981414795, - "learning_rate": 9.012695394658143e-08, - "loss": 0.6383, + "epoch": 0.8904611052446322, + "grad_norm": 1.9747546911239624, + "learning_rate": 4.3596756846287875e-07, + "loss": 0.5652, "step": 12649 }, { - "epoch": 0.957209337520336, - "grad_norm": 2.4543004035949707, - "learning_rate": 8.980929572984764e-08, - "loss": 0.7055, + "epoch": 0.8905315029919043, + "grad_norm": 1.927749752998352, + "learning_rate": 4.3541325998632383e-07, + "loss": 0.726, "step": 12650 }, { - "epoch": 0.9572850062426695, - "grad_norm": 2.64933180809021, - "learning_rate": 8.949219549807408e-08, - "loss": 0.5386, + "epoch": 0.8906019007391763, + "grad_norm": 1.9744817018508911, + "learning_rate": 4.348592928085785e-07, + "loss": 0.5774, "step": 12651 }, { - "epoch": 0.9573606749650032, - "grad_norm": 2.116211414337158, - "learning_rate": 8.917565327111888e-08, - "loss": 0.6279, + "epoch": 0.8906722984864485, + "grad_norm": 2.151243209838867, + "learning_rate": 4.343056669584456e-07, + "loss": 0.7422, "step": 12652 }, { - "epoch": 0.9574363436873369, - "grad_norm": 2.017352342605591, - "learning_rate": 8.885966906880616e-08, - "loss": 0.5641, + "epoch": 0.8907426962337205, + "grad_norm": 2.133857250213623, + "learning_rate": 4.337523824647057e-07, + "loss": 0.6856, "step": 12653 }, { - "epoch": 0.9575120124096704, - "grad_norm": 2.1703391075134277, - "learning_rate": 8.854424291092311e-08, - "loss": 0.7587, + "epoch": 0.8908130939809926, + "grad_norm": 1.9856460094451904, + "learning_rate": 4.3319943935612745e-07, + "loss": 0.7201, "step": 12654 }, { - "epoch": 0.9575876811320041, - "grad_norm": 2.2390897274017334, - "learning_rate": 8.822937481722194e-08, - "loss": 0.6269, + "epoch": 0.8908834917282646, + "grad_norm": 2.055107593536377, + "learning_rate": 4.3264683766145715e-07, + "loss": 0.6206, "step": 12655 }, { - "epoch": 0.9576633498543378, - "grad_norm": 1.785683035850525, - "learning_rate": 8.791506480742284e-08, - "loss": 0.6123, + "epoch": 0.8909538894755368, + "grad_norm": 1.8936703205108643, + "learning_rate": 4.320945774094262e-07, + "loss": 0.6901, "step": 12656 }, { - "epoch": 0.9577390185766713, - "grad_norm": 1.9447267055511475, - "learning_rate": 8.76013129012061e-08, - "loss": 0.6235, + "epoch": 0.8910242872228089, + "grad_norm": 1.836005687713623, + "learning_rate": 4.315426586287444e-07, + "loss": 0.5887, "step": 12657 }, { - "epoch": 0.957814687299005, - "grad_norm": 1.9245911836624146, - "learning_rate": 8.728811911822199e-08, - "loss": 0.5552, + "epoch": 0.8910946849700809, + "grad_norm": 1.5579479932785034, + "learning_rate": 4.3099108134811045e-07, + "loss": 0.5247, "step": 12658 }, { - "epoch": 0.9578903560213385, - "grad_norm": 3.584024667739868, - "learning_rate": 8.697548347808281e-08, - "loss": 0.8091, + "epoch": 0.8911650827173531, + "grad_norm": 1.9940286874771118, + "learning_rate": 4.304398455961984e-07, + "loss": 0.7357, "step": 12659 }, { - "epoch": 0.9579660247436722, - "grad_norm": 2.3792314529418945, - "learning_rate": 8.666340600036793e-08, - "loss": 0.6981, + "epoch": 0.8912354804646251, + "grad_norm": 2.0340497493743896, + "learning_rate": 4.2988895140166895e-07, + "loss": 0.661, "step": 12660 }, { - "epoch": 0.9580416934660059, - "grad_norm": 3.4363317489624023, - "learning_rate": 8.635188670461869e-08, - "loss": 0.5795, + "epoch": 0.8913058782118972, + "grad_norm": 1.8769004344940186, + "learning_rate": 4.2933839879316336e-07, + "loss": 0.6091, "step": 12661 }, { - "epoch": 0.9581173621883394, - "grad_norm": 2.3299102783203125, - "learning_rate": 8.604092561034549e-08, - "loss": 0.6234, + "epoch": 0.8913762759591694, + "grad_norm": 1.9572727680206299, + "learning_rate": 4.2878818779930593e-07, + "loss": 0.6865, "step": 12662 }, { - "epoch": 0.9581930309106731, - "grad_norm": 2.2737045288085938, - "learning_rate": 8.573052273701975e-08, - "loss": 0.7343, + "epoch": 0.8914466737064414, + "grad_norm": 1.9584788084030151, + "learning_rate": 4.2823831844869984e-07, + "loss": 0.6134, "step": 12663 }, { - "epoch": 0.9582686996330066, - "grad_norm": 2.334836483001709, - "learning_rate": 8.542067810408194e-08, - "loss": 0.7093, + "epoch": 0.8915170714537135, + "grad_norm": 1.6269088983535767, + "learning_rate": 4.276887907699367e-07, + "loss": 0.5742, "step": 12664 }, { - "epoch": 0.9583443683553403, - "grad_norm": 2.3729231357574463, - "learning_rate": 8.511139173093352e-08, - "loss": 0.6742, + "epoch": 0.8915874692009855, + "grad_norm": 2.0361266136169434, + "learning_rate": 4.2713960479158473e-07, + "loss": 0.7153, "step": 12665 }, { - "epoch": 0.958420037077674, - "grad_norm": 2.322737693786621, - "learning_rate": 8.4802663636945e-08, - "loss": 0.6121, + "epoch": 0.8916578669482577, + "grad_norm": 2.0586066246032715, + "learning_rate": 4.2659076054219824e-07, + "loss": 0.7005, "step": 12666 }, { - "epoch": 0.9584957058000075, - "grad_norm": 2.7772295475006104, - "learning_rate": 8.449449384144891e-08, - "loss": 0.4887, + "epoch": 0.8917282646955298, + "grad_norm": 1.815468668937683, + "learning_rate": 4.260422580503113e-07, + "loss": 0.6341, "step": 12667 }, { - "epoch": 0.9585713745223412, - "grad_norm": 2.32209849357605, - "learning_rate": 8.418688236374283e-08, - "loss": 0.5629, + "epoch": 0.8917986624428018, + "grad_norm": 1.9471534490585327, + "learning_rate": 4.254940973444415e-07, + "loss": 0.5626, "step": 12668 }, { - "epoch": 0.9586470432446749, - "grad_norm": 2.3986520767211914, - "learning_rate": 8.387982922309135e-08, - "loss": 0.5758, + "epoch": 0.891869060190074, + "grad_norm": 2.039745569229126, + "learning_rate": 4.2494627845308587e-07, + "loss": 0.5514, "step": 12669 }, { - "epoch": 0.9587227119670084, - "grad_norm": 2.683042526245117, - "learning_rate": 8.357333443872406e-08, - "loss": 0.7964, + "epoch": 0.891939457937346, + "grad_norm": 2.5373291969299316, + "learning_rate": 4.243988014047301e-07, + "loss": 0.6677, "step": 12670 }, { - "epoch": 0.9587983806893421, - "grad_norm": 3.9749975204467773, - "learning_rate": 8.326739802983363e-08, - "loss": 0.6979, + "epoch": 0.8920098556846181, + "grad_norm": 1.5186189413070679, + "learning_rate": 4.2385166622783363e-07, + "loss": 0.5757, "step": 12671 }, { - "epoch": 0.9588740494116756, - "grad_norm": 2.608891010284424, - "learning_rate": 8.296202001557873e-08, - "loss": 0.7334, + "epoch": 0.8920802534318901, + "grad_norm": 1.9326879978179932, + "learning_rate": 4.233048729508467e-07, + "loss": 0.692, "step": 12672 }, { - "epoch": 0.9589497181340093, - "grad_norm": 2.276517152786255, - "learning_rate": 8.265720041508407e-08, - "loss": 0.6777, + "epoch": 0.8921506511791623, + "grad_norm": 2.410602331161499, + "learning_rate": 4.227584216021953e-07, + "loss": 0.6127, "step": 12673 }, { - "epoch": 0.959025386856343, - "grad_norm": 2.6598875522613525, - "learning_rate": 8.235293924743636e-08, - "loss": 0.6954, + "epoch": 0.8922210489264344, + "grad_norm": 2.6422340869903564, + "learning_rate": 4.2221231221029074e-07, + "loss": 0.627, "step": 12674 }, { - "epoch": 0.9591010555786765, - "grad_norm": 1.9157791137695312, - "learning_rate": 8.204923653169139e-08, - "loss": 0.5896, + "epoch": 0.8922914466737064, + "grad_norm": 1.9146846532821655, + "learning_rate": 4.216665448035233e-07, + "loss": 0.7242, "step": 12675 }, { - "epoch": 0.9591767243010102, - "grad_norm": 2.493894100189209, - "learning_rate": 8.174609228686792e-08, - "loss": 0.6405, + "epoch": 0.8923618444209785, + "grad_norm": 2.0007271766662598, + "learning_rate": 4.211211194102715e-07, + "loss": 0.7023, "step": 12676 }, { - "epoch": 0.9592523930233438, - "grad_norm": 2.365548610687256, - "learning_rate": 8.144350653194877e-08, - "loss": 0.6652, + "epoch": 0.8924322421682506, + "grad_norm": 1.8550442457199097, + "learning_rate": 4.2057603605889006e-07, + "loss": 0.6761, "step": 12677 }, { - "epoch": 0.9593280617456774, - "grad_norm": 1.957023024559021, - "learning_rate": 8.114147928588377e-08, - "loss": 0.6683, + "epoch": 0.8925026399155227, + "grad_norm": 1.820585012435913, + "learning_rate": 4.2003129477771936e-07, + "loss": 0.6479, "step": 12678 }, { - "epoch": 0.9594037304680111, - "grad_norm": 2.2314658164978027, - "learning_rate": 8.084001056758583e-08, - "loss": 0.5744, + "epoch": 0.8925730376627948, + "grad_norm": 1.8227121829986572, + "learning_rate": 4.1948689559508143e-07, + "loss": 0.6955, "step": 12679 }, { - "epoch": 0.9594793991903446, - "grad_norm": 2.346789836883545, - "learning_rate": 8.053910039593481e-08, - "loss": 0.608, + "epoch": 0.8926434354100669, + "grad_norm": 1.7441926002502441, + "learning_rate": 4.189428385392795e-07, + "loss": 0.6291, "step": 12680 }, { - "epoch": 0.9595550679126783, - "grad_norm": 2.3163628578186035, - "learning_rate": 8.023874878977467e-08, - "loss": 0.6973, + "epoch": 0.892713833157339, + "grad_norm": 2.3456947803497314, + "learning_rate": 4.183991236385989e-07, + "loss": 0.7299, "step": 12681 }, { - "epoch": 0.959630736635012, - "grad_norm": 2.010727643966675, - "learning_rate": 7.993895576791333e-08, - "loss": 0.7321, + "epoch": 0.892784230904611, + "grad_norm": 2.203671455383301, + "learning_rate": 4.178557509213087e-07, + "loss": 0.6026, "step": 12682 }, { - "epoch": 0.9597064053573455, - "grad_norm": 2.24619197845459, - "learning_rate": 7.963972134912578e-08, - "loss": 0.8042, + "epoch": 0.8928546286518831, + "grad_norm": 1.7664738893508911, + "learning_rate": 4.173127204156585e-07, + "loss": 0.7034, "step": 12683 }, { - "epoch": 0.9597820740796792, - "grad_norm": 2.5780961513519287, - "learning_rate": 7.934104555215105e-08, - "loss": 0.5879, + "epoch": 0.8929250263991553, + "grad_norm": 1.8282629251480103, + "learning_rate": 4.1677003214988317e-07, + "loss": 0.6397, "step": 12684 }, { - "epoch": 0.9598577428020127, - "grad_norm": 2.02319598197937, - "learning_rate": 7.904292839569315e-08, - "loss": 0.6226, + "epoch": 0.8929954241464273, + "grad_norm": 1.897026538848877, + "learning_rate": 4.162276861521951e-07, + "loss": 0.5903, "step": 12685 }, { - "epoch": 0.9599334115243464, - "grad_norm": 2.3072006702423096, - "learning_rate": 7.874536989842018e-08, - "loss": 0.742, + "epoch": 0.8930658218936994, + "grad_norm": 2.0793826580047607, + "learning_rate": 4.156856824507925e-07, + "loss": 0.6716, "step": 12686 }, { - "epoch": 0.9600090802466801, - "grad_norm": 2.2716474533081055, - "learning_rate": 7.844837007896821e-08, - "loss": 0.7209, + "epoch": 0.8931362196409715, + "grad_norm": 2.1143579483032227, + "learning_rate": 4.151440210738536e-07, + "loss": 0.7654, "step": 12687 }, { - "epoch": 0.9600847489690136, - "grad_norm": 2.065999984741211, - "learning_rate": 7.815192895593437e-08, - "loss": 0.6923, + "epoch": 0.8932066173882436, + "grad_norm": 1.7290246486663818, + "learning_rate": 4.1460270204954177e-07, + "loss": 0.5228, "step": 12688 }, { - "epoch": 0.9601604176913473, - "grad_norm": 2.5855307579040527, - "learning_rate": 7.785604654788281e-08, - "loss": 0.4902, + "epoch": 0.8932770151355157, + "grad_norm": 1.8596315383911133, + "learning_rate": 4.140617254059977e-07, + "loss": 0.5703, "step": 12689 }, { - "epoch": 0.9602360864136809, - "grad_norm": 1.992203950881958, - "learning_rate": 7.75607228733447e-08, - "loss": 0.6572, + "epoch": 0.8933474128827877, + "grad_norm": 1.9747642278671265, + "learning_rate": 4.135210911713507e-07, + "loss": 0.6762, "step": 12690 }, { - "epoch": 0.9603117551360145, - "grad_norm": 1.9567265510559082, - "learning_rate": 7.726595795081226e-08, - "loss": 0.6281, + "epoch": 0.8934178106300599, + "grad_norm": 1.8441801071166992, + "learning_rate": 4.1298079937370656e-07, + "loss": 0.6229, "step": 12691 }, { - "epoch": 0.9603874238583482, - "grad_norm": 1.9469960927963257, - "learning_rate": 7.697175179874472e-08, - "loss": 0.6364, + "epoch": 0.8934882083773319, + "grad_norm": 1.6450830698013306, + "learning_rate": 4.1244085004115793e-07, + "loss": 0.7024, "step": 12692 }, { - "epoch": 0.9604630925806817, - "grad_norm": 1.9473823308944702, - "learning_rate": 7.667810443556733e-08, - "loss": 0.5201, + "epoch": 0.893558606124604, + "grad_norm": 1.7553074359893799, + "learning_rate": 4.1190124320177334e-07, + "loss": 0.7143, "step": 12693 }, { - "epoch": 0.9605387613030154, - "grad_norm": 2.243472099304199, - "learning_rate": 7.638501587966839e-08, - "loss": 0.569, + "epoch": 0.893629003871876, + "grad_norm": 2.377546548843384, + "learning_rate": 4.1136197888361125e-07, + "loss": 0.6723, "step": 12694 }, { - "epoch": 0.9606144300253491, - "grad_norm": 1.9308743476867676, - "learning_rate": 7.609248614940123e-08, - "loss": 0.5012, + "epoch": 0.8936994016191482, + "grad_norm": 1.6331195831298828, + "learning_rate": 4.1082305711470525e-07, + "loss": 0.6593, "step": 12695 }, { - "epoch": 0.9606900987476826, - "grad_norm": 2.4356298446655273, - "learning_rate": 7.580051526308718e-08, - "loss": 0.7305, + "epoch": 0.8937697993664203, + "grad_norm": 1.8971028327941895, + "learning_rate": 4.102844779230772e-07, + "loss": 0.5626, "step": 12696 }, { - "epoch": 0.9607657674700163, - "grad_norm": 1.883434534072876, - "learning_rate": 7.550910323900862e-08, - "loss": 0.6199, + "epoch": 0.8938401971136923, + "grad_norm": 1.750231146812439, + "learning_rate": 4.0974624133672577e-07, + "loss": 0.6543, "step": 12697 }, { - "epoch": 0.9608414361923499, - "grad_norm": 2.298492908477783, - "learning_rate": 7.521825009541594e-08, - "loss": 0.5387, + "epoch": 0.8939105948609645, + "grad_norm": 2.030688524246216, + "learning_rate": 4.092083473836371e-07, + "loss": 0.7174, "step": 12698 }, { - "epoch": 0.9609171049146835, - "grad_norm": 1.997653841972351, - "learning_rate": 7.49279558505226e-08, - "loss": 0.5514, + "epoch": 0.8939809926082365, + "grad_norm": 1.862993597984314, + "learning_rate": 4.086707960917756e-07, + "loss": 0.6059, "step": 12699 }, { - "epoch": 0.9609927736370172, - "grad_norm": 2.485727310180664, - "learning_rate": 7.463822052250702e-08, - "loss": 0.7501, + "epoch": 0.8940513903555086, + "grad_norm": 1.9729101657867432, + "learning_rate": 4.0813358748908776e-07, + "loss": 0.6657, "step": 12700 }, { - "epoch": 0.9610684423593507, - "grad_norm": 4.434056758880615, - "learning_rate": 7.434904412951471e-08, - "loss": 0.6913, + "epoch": 0.8941217881027808, + "grad_norm": 2.405872344970703, + "learning_rate": 4.0759672160350546e-07, + "loss": 0.6074, "step": 12701 }, { - "epoch": 0.9611441110816844, - "grad_norm": 2.3401598930358887, - "learning_rate": 7.406042668965419e-08, - "loss": 0.654, + "epoch": 0.8941921858500528, + "grad_norm": 2.0389952659606934, + "learning_rate": 4.0706019846293937e-07, + "loss": 0.5382, "step": 12702 }, { - "epoch": 0.961219779804018, - "grad_norm": 1.9578860998153687, - "learning_rate": 7.377236822099998e-08, - "loss": 0.831, + "epoch": 0.8942625835973249, + "grad_norm": 2.1494410037994385, + "learning_rate": 4.065240180952856e-07, + "loss": 0.7051, "step": 12703 }, { - "epoch": 0.9612954485263516, - "grad_norm": 2.5894887447357178, - "learning_rate": 7.348486874159166e-08, - "loss": 0.6493, + "epoch": 0.8943329813445969, + "grad_norm": 2.3281142711639404, + "learning_rate": 4.0598818052841914e-07, + "loss": 0.576, "step": 12704 }, { - "epoch": 0.9613711172486853, - "grad_norm": 2.045994281768799, - "learning_rate": 7.319792826943084e-08, - "loss": 0.6242, + "epoch": 0.8944033790918691, + "grad_norm": 1.9307186603546143, + "learning_rate": 4.0545268579019944e-07, + "loss": 0.7282, "step": 12705 }, { - "epoch": 0.9614467859710188, - "grad_norm": 1.8696407079696655, - "learning_rate": 7.291154682249013e-08, - "loss": 0.7577, + "epoch": 0.8944737768391412, + "grad_norm": 1.8863215446472168, + "learning_rate": 4.0491753390846586e-07, + "loss": 0.559, "step": 12706 }, { - "epoch": 0.9615224546933525, - "grad_norm": 2.9939026832580566, - "learning_rate": 7.262572441870219e-08, - "loss": 0.7651, + "epoch": 0.8945441745864132, + "grad_norm": 2.196160078048706, + "learning_rate": 4.0438272491104286e-07, + "loss": 0.5142, "step": 12707 }, { - "epoch": 0.9615981234156862, - "grad_norm": 2.0975334644317627, - "learning_rate": 7.234046107596471e-08, - "loss": 0.6157, + "epoch": 0.8946145723336854, + "grad_norm": 1.7630287408828735, + "learning_rate": 4.038482588257348e-07, + "loss": 0.5842, "step": 12708 }, { - "epoch": 0.9616737921380197, - "grad_norm": 2.63741397857666, - "learning_rate": 7.205575681214438e-08, - "loss": 0.6313, + "epoch": 0.8946849700809574, + "grad_norm": 1.9779359102249146, + "learning_rate": 4.0331413568033125e-07, + "loss": 0.687, "step": 12709 }, { - "epoch": 0.9617494608603534, - "grad_norm": 2.186922788619995, - "learning_rate": 7.177161164506795e-08, - "loss": 0.622, + "epoch": 0.8947553678282295, + "grad_norm": 1.8714802265167236, + "learning_rate": 4.0278035550259915e-07, + "loss": 0.552, "step": 12710 }, { - "epoch": 0.961825129582687, - "grad_norm": 4.426486492156982, - "learning_rate": 7.14880255925312e-08, - "loss": 0.7372, + "epoch": 0.8948257655755015, + "grad_norm": 1.8859127759933472, + "learning_rate": 4.0224691832029157e-07, + "loss": 0.5592, "step": 12711 }, { - "epoch": 0.9619007983050206, - "grad_norm": 2.0475969314575195, - "learning_rate": 7.12049986722919e-08, - "loss": 0.7598, + "epoch": 0.8948961633227737, + "grad_norm": 1.96669340133667, + "learning_rate": 4.017138241611413e-07, + "loss": 0.6411, "step": 12712 }, { - "epoch": 0.9619764670273543, - "grad_norm": 2.091733932495117, - "learning_rate": 7.09225309020759e-08, - "loss": 0.6758, + "epoch": 0.8949665610700458, + "grad_norm": 1.7382675409317017, + "learning_rate": 4.0118107305286563e-07, + "loss": 0.5957, "step": 12713 }, { - "epoch": 0.9620521357496878, - "grad_norm": 1.9092284440994263, - "learning_rate": 7.064062229957102e-08, - "loss": 0.6692, + "epoch": 0.8950369588173178, + "grad_norm": 2.4964847564697266, + "learning_rate": 4.0064866502316085e-07, + "loss": 0.7347, "step": 12714 }, { - "epoch": 0.9621278044720215, - "grad_norm": 2.5357465744018555, - "learning_rate": 7.035927288243016e-08, - "loss": 0.6938, + "epoch": 0.89510735656459, + "grad_norm": 1.8137723207473755, + "learning_rate": 4.0011660009971003e-07, + "loss": 0.6441, "step": 12715 }, { - "epoch": 0.9622034731943551, - "grad_norm": 2.2587950229644775, - "learning_rate": 7.007848266827521e-08, - "loss": 0.6561, + "epoch": 0.895177754311862, + "grad_norm": 1.9350216388702393, + "learning_rate": 3.995848783101753e-07, + "loss": 0.7188, "step": 12716 }, { - "epoch": 0.9622791419166887, - "grad_norm": 2.3485636711120605, - "learning_rate": 6.979825167468812e-08, - "loss": 0.6759, + "epoch": 0.8952481520591341, + "grad_norm": 2.6836845874786377, + "learning_rate": 3.990534996821993e-07, + "loss": 0.672, "step": 12717 }, { - "epoch": 0.9623548106390224, - "grad_norm": 1.9571853876113892, - "learning_rate": 6.951857991921783e-08, - "loss": 0.6174, + "epoch": 0.8953185498064062, + "grad_norm": 1.407375454902649, + "learning_rate": 3.985224642434091e-07, + "loss": 0.7466, "step": 12718 }, { - "epoch": 0.962430479361356, - "grad_norm": 1.7149250507354736, - "learning_rate": 6.923946741937836e-08, - "loss": 0.6175, + "epoch": 0.8953889475536783, + "grad_norm": 2.1451542377471924, + "learning_rate": 3.9799177202141633e-07, + "loss": 0.6636, "step": 12719 }, { - "epoch": 0.9625061480836896, - "grad_norm": 1.5973360538482666, - "learning_rate": 6.896091419264971e-08, - "loss": 0.4001, + "epoch": 0.8954593453009504, + "grad_norm": 1.5805013179779053, + "learning_rate": 3.974614230438085e-07, + "loss": 0.6829, "step": 12720 }, { - "epoch": 0.9625818168060233, - "grad_norm": 2.3491268157958984, - "learning_rate": 6.868292025647494e-08, - "loss": 0.5713, + "epoch": 0.8955297430482224, + "grad_norm": 2.174046277999878, + "learning_rate": 3.969314173381629e-07, + "loss": 0.639, "step": 12721 }, { - "epoch": 0.9626574855283568, - "grad_norm": 2.8299319744110107, - "learning_rate": 6.84054856282631e-08, - "loss": 0.7462, + "epoch": 0.8956001407954945, + "grad_norm": 2.1297147274017334, + "learning_rate": 3.964017549320322e-07, + "loss": 0.7597, "step": 12722 }, { - "epoch": 0.9627331542506905, - "grad_norm": 2.6515955924987793, - "learning_rate": 6.81286103253883e-08, - "loss": 0.7521, + "epoch": 0.8956705385427667, + "grad_norm": 2.342426300048828, + "learning_rate": 3.958724358529556e-07, + "loss": 0.7285, "step": 12723 }, { - "epoch": 0.9628088229730241, - "grad_norm": 2.534411668777466, - "learning_rate": 6.785229436518969e-08, - "loss": 0.5264, + "epoch": 0.8957409362900387, + "grad_norm": 2.6220457553863525, + "learning_rate": 3.953434601284507e-07, + "loss": 0.7247, "step": 12724 }, { - "epoch": 0.9628844916953577, - "grad_norm": 2.334911823272705, - "learning_rate": 6.757653776496841e-08, - "loss": 0.5722, + "epoch": 0.8958113340373108, + "grad_norm": 2.3223671913146973, + "learning_rate": 3.948148277860217e-07, + "loss": 0.6116, "step": 12725 }, { - "epoch": 0.9629601604176914, - "grad_norm": 2.0208029747009277, - "learning_rate": 6.730134054199665e-08, - "loss": 0.6558, + "epoch": 0.8958817317845829, + "grad_norm": 2.9614992141723633, + "learning_rate": 3.9428653885315054e-07, + "loss": 0.6774, "step": 12726 }, { - "epoch": 0.963035829140025, - "grad_norm": 2.0118119716644287, - "learning_rate": 6.702670271350764e-08, - "loss": 0.7271, + "epoch": 0.895952129531855, + "grad_norm": 2.4214603900909424, + "learning_rate": 3.9375859335730653e-07, + "loss": 0.7084, "step": 12727 }, { - "epoch": 0.9631114978623586, - "grad_norm": 2.5050179958343506, - "learning_rate": 6.675262429669759e-08, - "loss": 0.7486, + "epoch": 0.8960225272791271, + "grad_norm": 2.5057008266448975, + "learning_rate": 3.9323099132593577e-07, + "loss": 0.625, "step": 12728 }, { - "epoch": 0.9631871665846922, - "grad_norm": 2.230125665664673, - "learning_rate": 6.64791053087328e-08, - "loss": 0.6211, + "epoch": 0.8960929250263991, + "grad_norm": 1.738076090812683, + "learning_rate": 3.927037327864694e-07, + "loss": 0.5931, "step": 12729 }, { - "epoch": 0.9632628353070258, - "grad_norm": 2.61617112159729, - "learning_rate": 6.620614576673956e-08, - "loss": 0.6537, + "epoch": 0.8961633227736713, + "grad_norm": 2.3016772270202637, + "learning_rate": 3.921768177663186e-07, + "loss": 0.6065, "step": 12730 }, { - "epoch": 0.9633385040293595, - "grad_norm": 2.5779638290405273, - "learning_rate": 6.593374568781519e-08, - "loss": 0.6734, + "epoch": 0.8962337205209433, + "grad_norm": 1.7701576948165894, + "learning_rate": 3.9165024629288047e-07, + "loss": 0.5428, "step": 12731 }, { - "epoch": 0.963414172751693, - "grad_norm": 2.4025802612304688, - "learning_rate": 6.566190508901404e-08, - "loss": 0.7389, + "epoch": 0.8963041182682154, + "grad_norm": 1.8990041017532349, + "learning_rate": 3.911240183935303e-07, + "loss": 0.6474, "step": 12732 }, { - "epoch": 0.9634898414740267, - "grad_norm": 2.136965751647949, - "learning_rate": 6.539062398736251e-08, - "loss": 0.5392, + "epoch": 0.8963745160154875, + "grad_norm": 1.805142879486084, + "learning_rate": 3.905981340956278e-07, + "loss": 0.6213, "step": 12733 }, { - "epoch": 0.9635655101963604, - "grad_norm": 4.406826496124268, - "learning_rate": 6.5119902399848e-08, - "loss": 0.6547, + "epoch": 0.8964449137627596, + "grad_norm": 2.0903775691986084, + "learning_rate": 3.90072593426515e-07, + "loss": 0.6795, "step": 12734 }, { - "epoch": 0.9636411789186939, - "grad_norm": 1.7842594385147095, - "learning_rate": 6.484974034342395e-08, - "loss": 0.6089, + "epoch": 0.8965153115100317, + "grad_norm": 2.172001838684082, + "learning_rate": 3.895473964135143e-07, + "loss": 0.6873, "step": 12735 }, { - "epoch": 0.9637168476410276, - "grad_norm": 2.642629623413086, - "learning_rate": 6.458013783500882e-08, - "loss": 0.7135, + "epoch": 0.8965857092573037, + "grad_norm": 2.584003210067749, + "learning_rate": 3.8902254308393036e-07, + "loss": 0.684, "step": 12736 }, { - "epoch": 0.9637925163633612, - "grad_norm": 2.0451202392578125, - "learning_rate": 6.431109489148612e-08, - "loss": 0.745, + "epoch": 0.8966561070045759, + "grad_norm": 1.8173872232437134, + "learning_rate": 3.884980334650522e-07, + "loss": 0.5839, "step": 12737 }, { - "epoch": 0.9638681850856948, - "grad_norm": 2.547163486480713, - "learning_rate": 6.404261152970437e-08, - "loss": 0.5514, + "epoch": 0.8967265047518479, + "grad_norm": 2.1484858989715576, + "learning_rate": 3.8797386758414785e-07, + "loss": 0.666, "step": 12738 }, { - "epoch": 0.9639438538080285, - "grad_norm": 2.487988233566284, - "learning_rate": 6.37746877664771e-08, - "loss": 0.6643, + "epoch": 0.89679690249912, + "grad_norm": 3.101059913635254, + "learning_rate": 3.8745004546847227e-07, + "loss": 0.7095, "step": 12739 }, { - "epoch": 0.964019522530362, - "grad_norm": 2.42266583442688, - "learning_rate": 6.350732361858092e-08, - "loss": 0.6916, + "epoch": 0.8968673002463922, + "grad_norm": 1.8578574657440186, + "learning_rate": 3.8692656714525696e-07, + "loss": 0.6334, "step": 12740 }, { - "epoch": 0.9640951912526957, - "grad_norm": 2.2286386489868164, - "learning_rate": 6.324051910276141e-08, - "loss": 0.7287, + "epoch": 0.8969376979936642, + "grad_norm": 2.14135479927063, + "learning_rate": 3.8640343264171796e-07, + "loss": 0.6967, "step": 12741 }, { - "epoch": 0.9641708599750293, - "grad_norm": 2.0510995388031006, - "learning_rate": 6.297427423572521e-08, - "loss": 0.6814, + "epoch": 0.8970080957409363, + "grad_norm": 2.084247589111328, + "learning_rate": 3.858806419850541e-07, + "loss": 0.6307, "step": 12742 }, { - "epoch": 0.9642465286973629, - "grad_norm": 2.0005273818969727, - "learning_rate": 6.2708589034146e-08, - "loss": 0.6475, + "epoch": 0.8970784934882083, + "grad_norm": 2.465562582015991, + "learning_rate": 3.853581952024465e-07, + "loss": 0.6801, "step": 12743 }, { - "epoch": 0.9643221974196966, - "grad_norm": 1.9023462533950806, - "learning_rate": 6.244346351466146e-08, - "loss": 0.716, + "epoch": 0.8971488912354805, + "grad_norm": 2.085970401763916, + "learning_rate": 3.848360923210552e-07, + "loss": 0.7281, "step": 12744 }, { - "epoch": 0.9643978661420302, - "grad_norm": 2.4381988048553467, - "learning_rate": 6.21788976938743e-08, - "loss": 0.6293, + "epoch": 0.8972192889827526, + "grad_norm": 1.9784350395202637, + "learning_rate": 3.8431433336802854e-07, + "loss": 0.6766, "step": 12745 }, { - "epoch": 0.9644735348643638, - "grad_norm": 1.6925064325332642, - "learning_rate": 6.191489158835328e-08, - "loss": 0.5658, + "epoch": 0.8972896867300246, + "grad_norm": 2.0120532512664795, + "learning_rate": 3.837929183704909e-07, + "loss": 0.7519, "step": 12746 }, { - "epoch": 0.9645492035866975, - "grad_norm": 2.0021846294403076, - "learning_rate": 6.165144521463117e-08, - "loss": 0.5507, + "epoch": 0.8973600844772968, + "grad_norm": 2.0796923637390137, + "learning_rate": 3.8327184735555017e-07, + "loss": 0.653, "step": 12747 }, { - "epoch": 0.964624872309031, - "grad_norm": 2.283257484436035, - "learning_rate": 6.138855858920577e-08, - "loss": 0.8271, + "epoch": 0.8974304822245688, + "grad_norm": 2.0237269401550293, + "learning_rate": 3.827511203502997e-07, + "loss": 0.7329, "step": 12748 }, { - "epoch": 0.9647005410313647, - "grad_norm": 2.3239376544952393, - "learning_rate": 6.112623172853993e-08, - "loss": 0.5572, + "epoch": 0.8975008799718409, + "grad_norm": 2.126702070236206, + "learning_rate": 3.8223073738181003e-07, + "loss": 0.8473, "step": 12749 }, { - "epoch": 0.9647762097536983, - "grad_norm": 1.8095026016235352, - "learning_rate": 6.086446464906148e-08, - "loss": 0.6423, + "epoch": 0.8975712777191129, + "grad_norm": 1.6316657066345215, + "learning_rate": 3.8171069847713947e-07, + "loss": 0.6875, "step": 12750 }, { - "epoch": 0.9648518784760319, - "grad_norm": 2.217108964920044, - "learning_rate": 6.060325736716133e-08, - "loss": 0.6364, + "epoch": 0.8976416754663851, + "grad_norm": 2.3432180881500244, + "learning_rate": 3.811910036633229e-07, + "loss": 0.6506, "step": 12751 }, { - "epoch": 0.9649275471983656, - "grad_norm": 2.219142436981201, - "learning_rate": 6.034260989920037e-08, - "loss": 0.7133, + "epoch": 0.8977120732136572, + "grad_norm": 2.431824207305908, + "learning_rate": 3.806716529673806e-07, + "loss": 0.8177, "step": 12752 }, { - "epoch": 0.9650032159206992, - "grad_norm": 2.773773670196533, - "learning_rate": 6.008252226149957e-08, - "loss": 0.5822, + "epoch": 0.8977824709609292, + "grad_norm": 1.7998286485671997, + "learning_rate": 3.801526464163132e-07, + "loss": 0.6041, "step": 12753 }, { - "epoch": 0.9650788846430328, - "grad_norm": 2.4652557373046875, - "learning_rate": 5.982299447034589e-08, - "loss": 0.6448, + "epoch": 0.8978528687082014, + "grad_norm": 1.8347160816192627, + "learning_rate": 3.796339840371059e-07, + "loss": 0.703, "step": 12754 }, { - "epoch": 0.9651545533653664, - "grad_norm": 1.8013569116592407, - "learning_rate": 5.9564026541992333e-08, - "loss": 0.4925, + "epoch": 0.8979232664554734, + "grad_norm": 2.0737791061401367, + "learning_rate": 3.791156658567222e-07, + "loss": 0.6715, "step": 12755 }, { - "epoch": 0.9652302220877, - "grad_norm": 2.425915241241455, - "learning_rate": 5.930561849265592e-08, - "loss": 0.6024, + "epoch": 0.8979936642027455, + "grad_norm": 1.8967783451080322, + "learning_rate": 3.785976919021122e-07, + "loss": 0.7001, "step": 12756 }, { - "epoch": 0.9653058908100337, - "grad_norm": 2.936739921569824, - "learning_rate": 5.9047770338520714e-08, - "loss": 0.7074, + "epoch": 0.8980640619500176, + "grad_norm": 2.0396907329559326, + "learning_rate": 3.780800622002053e-07, + "loss": 0.6604, "step": 12757 }, { - "epoch": 0.9653815595323673, - "grad_norm": 1.838965892791748, - "learning_rate": 5.879048209573079e-08, - "loss": 0.7531, + "epoch": 0.8981344596972897, + "grad_norm": 2.4060018062591553, + "learning_rate": 3.7756277677791274e-07, + "loss": 0.5937, "step": 12758 }, { - "epoch": 0.9654572282547009, - "grad_norm": 2.287705421447754, - "learning_rate": 5.853375378040227e-08, - "loss": 0.6203, + "epoch": 0.8982048574445618, + "grad_norm": 1.9974687099456787, + "learning_rate": 3.7704583566212876e-07, + "loss": 0.7244, "step": 12759 }, { - "epoch": 0.9655328969770346, - "grad_norm": 2.7539687156677246, - "learning_rate": 5.827758540860928e-08, - "loss": 0.5758, + "epoch": 0.8982752551918338, + "grad_norm": 2.4429128170013428, + "learning_rate": 3.765292388797307e-07, + "loss": 0.6131, "step": 12760 }, { - "epoch": 0.9656085656993681, - "grad_norm": 1.8179734945297241, - "learning_rate": 5.8021976996394e-08, - "loss": 0.5622, + "epoch": 0.898345652939106, + "grad_norm": 1.806418538093567, + "learning_rate": 3.760129864575745e-07, + "loss": 0.7047, "step": 12761 }, { - "epoch": 0.9656842344217018, - "grad_norm": 2.9774842262268066, - "learning_rate": 5.776692855976562e-08, - "loss": 0.7241, + "epoch": 0.8984160506863781, + "grad_norm": 1.8437045812606812, + "learning_rate": 3.7549707842250335e-07, + "loss": 0.6921, "step": 12762 }, { - "epoch": 0.9657599031440354, - "grad_norm": 1.650606632232666, - "learning_rate": 5.751244011469536e-08, - "loss": 0.743, + "epoch": 0.8984864484336501, + "grad_norm": 1.7365573644638062, + "learning_rate": 3.7498151480133835e-07, + "loss": 0.6237, "step": 12763 }, { - "epoch": 0.965835571866369, - "grad_norm": 2.1129419803619385, - "learning_rate": 5.7258511677118485e-08, - "loss": 0.6685, + "epoch": 0.8985568461809222, + "grad_norm": 1.713553786277771, + "learning_rate": 3.7446629562088515e-07, + "loss": 0.6486, "step": 12764 }, { - "epoch": 0.9659112405887027, - "grad_norm": 3.256843328475952, - "learning_rate": 5.7005143262938266e-08, - "loss": 0.6937, + "epoch": 0.8986272439281943, + "grad_norm": 1.8129632472991943, + "learning_rate": 3.7395142090792853e-07, + "loss": 0.6672, "step": 12765 }, { - "epoch": 0.9659869093110363, - "grad_norm": 2.002124309539795, - "learning_rate": 5.675233488802101e-08, - "loss": 0.7681, + "epoch": 0.8986976416754664, + "grad_norm": 1.9399206638336182, + "learning_rate": 3.734368906892391e-07, + "loss": 0.7274, "step": 12766 }, { - "epoch": 0.9660625780333699, - "grad_norm": 2.2396044731140137, - "learning_rate": 5.650008656819905e-08, - "loss": 0.5746, + "epoch": 0.8987680394227384, + "grad_norm": 2.4641377925872803, + "learning_rate": 3.729227049915659e-07, + "loss": 0.6882, "step": 12767 }, { - "epoch": 0.9661382467557035, - "grad_norm": 2.126986503601074, - "learning_rate": 5.624839831926776e-08, - "loss": 0.6165, + "epoch": 0.8988384371700106, + "grad_norm": 2.2354109287261963, + "learning_rate": 3.724088638416445e-07, + "loss": 0.6854, "step": 12768 }, { - "epoch": 0.9662139154780371, - "grad_norm": 1.8271424770355225, - "learning_rate": 5.5997270156989525e-08, - "loss": 0.6282, + "epoch": 0.8989088349172827, + "grad_norm": 2.4282901287078857, + "learning_rate": 3.718953672661882e-07, + "loss": 0.6613, "step": 12769 }, { - "epoch": 0.9662895842003708, - "grad_norm": 2.457106828689575, - "learning_rate": 5.574670209709176e-08, - "loss": 0.6688, + "epoch": 0.8989792326645547, + "grad_norm": 2.288916826248169, + "learning_rate": 3.713822152918946e-07, + "loss": 0.6422, "step": 12770 }, { - "epoch": 0.9663652529227044, - "grad_norm": 2.185253381729126, - "learning_rate": 5.5496694155262925e-08, - "loss": 0.677, + "epoch": 0.8990496304118268, + "grad_norm": 2.0670173168182373, + "learning_rate": 3.70869407945442e-07, + "loss": 0.6026, "step": 12771 }, { - "epoch": 0.966440921645038, - "grad_norm": 1.8739707469940186, - "learning_rate": 5.524724634716149e-08, - "loss": 0.6415, + "epoch": 0.8991200281590989, + "grad_norm": 1.9988847970962524, + "learning_rate": 3.703569452534937e-07, + "loss": 0.6016, "step": 12772 }, { - "epoch": 0.9665165903673717, - "grad_norm": 2.169405221939087, - "learning_rate": 5.499835868840997e-08, - "loss": 0.5788, + "epoch": 0.899190425906371, + "grad_norm": 1.838800311088562, + "learning_rate": 3.6984482724269074e-07, + "loss": 0.6439, "step": 12773 }, { - "epoch": 0.9665922590897053, - "grad_norm": 2.6439480781555176, - "learning_rate": 5.4750031194590875e-08, - "loss": 0.6599, + "epoch": 0.8992608236536431, + "grad_norm": 2.296947717666626, + "learning_rate": 3.6933305393966076e-07, + "loss": 0.6726, "step": 12774 }, { - "epoch": 0.9666679278120389, - "grad_norm": 2.3185319900512695, - "learning_rate": 5.4502263881258784e-08, - "loss": 0.6413, + "epoch": 0.8993312214009151, + "grad_norm": 2.171412467956543, + "learning_rate": 3.6882162537101124e-07, + "loss": 0.669, "step": 12775 }, { - "epoch": 0.9667435965343725, - "grad_norm": 1.7796145677566528, - "learning_rate": 5.425505676392728e-08, - "loss": 0.601, + "epoch": 0.8994016191481873, + "grad_norm": 1.9719966650009155, + "learning_rate": 3.683105415633303e-07, + "loss": 0.6021, "step": 12776 }, { - "epoch": 0.9668192652567061, - "grad_norm": 1.9893208742141724, - "learning_rate": 5.4008409858077977e-08, - "loss": 0.6446, + "epoch": 0.8994720168954593, + "grad_norm": 2.230123519897461, + "learning_rate": 3.677998025431889e-07, + "loss": 0.7535, "step": 12777 }, { - "epoch": 0.9668949339790398, - "grad_norm": 2.2175159454345703, - "learning_rate": 5.376232317915752e-08, - "loss": 0.6573, + "epoch": 0.8995424146427314, + "grad_norm": 2.4460668563842773, + "learning_rate": 3.6728940833714397e-07, + "loss": 0.7305, "step": 12778 }, { - "epoch": 0.9669706027013734, - "grad_norm": 3.3002736568450928, - "learning_rate": 5.351679674257559e-08, - "loss": 0.6405, + "epoch": 0.8996128123900036, + "grad_norm": 2.2440969944000244, + "learning_rate": 3.6677935897172843e-07, + "loss": 0.5962, "step": 12779 }, { - "epoch": 0.967046271423707, - "grad_norm": 2.012585401535034, - "learning_rate": 5.327183056370888e-08, - "loss": 0.6775, + "epoch": 0.8996832101372756, + "grad_norm": 2.0131995677948, + "learning_rate": 3.6626965447346196e-07, + "loss": 0.6669, "step": 12780 }, { - "epoch": 0.9671219401460406, - "grad_norm": 2.179703712463379, - "learning_rate": 5.302742465789712e-08, - "loss": 0.5052, + "epoch": 0.8997536078845477, + "grad_norm": 2.241041660308838, + "learning_rate": 3.6576029486884476e-07, + "loss": 0.617, "step": 12781 }, { - "epoch": 0.9671976088683742, - "grad_norm": 2.5398223400115967, - "learning_rate": 5.278357904044606e-08, - "loss": 0.7536, + "epoch": 0.8998240056318197, + "grad_norm": 2.0290729999542236, + "learning_rate": 3.652512801843577e-07, + "loss": 0.5318, "step": 12782 }, { - "epoch": 0.9672732775907079, - "grad_norm": 2.5192582607269287, - "learning_rate": 5.2540293726625497e-08, - "loss": 0.7409, + "epoch": 0.8998944033790919, + "grad_norm": 2.052516222000122, + "learning_rate": 3.647426104464645e-07, + "loss": 0.7506, "step": 12783 }, { - "epoch": 0.9673489463130415, - "grad_norm": 2.220503091812134, - "learning_rate": 5.229756873167224e-08, - "loss": 0.6389, + "epoch": 0.899964801126364, + "grad_norm": 2.6618399620056152, + "learning_rate": 3.6423428568161255e-07, + "loss": 0.7495, "step": 12784 }, { - "epoch": 0.9674246150353751, - "grad_norm": 2.697296142578125, - "learning_rate": 5.205540407078513e-08, - "loss": 0.77, + "epoch": 0.900035198873636, + "grad_norm": 2.049405336380005, + "learning_rate": 3.637263059162298e-07, + "loss": 0.7616, "step": 12785 }, { - "epoch": 0.9675002837577088, - "grad_norm": 2.3187201023101807, - "learning_rate": 5.1813799759130034e-08, - "loss": 0.6236, + "epoch": 0.9001055966209082, + "grad_norm": 1.9660004377365112, + "learning_rate": 3.632186711767271e-07, + "loss": 0.5933, "step": 12786 }, { - "epoch": 0.9675759524800424, - "grad_norm": 2.0090014934539795, - "learning_rate": 5.157275581183585e-08, - "loss": 0.6874, + "epoch": 0.9001759943681802, + "grad_norm": 2.071615695953369, + "learning_rate": 3.6271138148949677e-07, + "loss": 0.5644, "step": 12787 }, { - "epoch": 0.967651621202376, - "grad_norm": 1.9001106023788452, - "learning_rate": 5.13322722439995e-08, - "loss": 0.6184, + "epoch": 0.9002463921154523, + "grad_norm": 2.2999231815338135, + "learning_rate": 3.622044368809123e-07, + "loss": 0.6951, "step": 12788 }, { - "epoch": 0.9677272899247096, - "grad_norm": 2.549781560897827, - "learning_rate": 5.1092349070678944e-08, - "loss": 0.6552, + "epoch": 0.9003167898627243, + "grad_norm": 2.2266645431518555, + "learning_rate": 3.616978373773294e-07, + "loss": 0.5845, "step": 12789 }, { - "epoch": 0.9678029586470432, - "grad_norm": 2.1971235275268555, - "learning_rate": 5.085298630690016e-08, - "loss": 0.6244, + "epoch": 0.9003871876099965, + "grad_norm": 2.211724042892456, + "learning_rate": 3.611915830050898e-07, + "loss": 0.6247, "step": 12790 }, { - "epoch": 0.9678786273693769, - "grad_norm": 3.237020492553711, - "learning_rate": 5.061418396765316e-08, - "loss": 0.7338, + "epoch": 0.9004575853572686, + "grad_norm": 2.1906816959381104, + "learning_rate": 3.606856737905103e-07, + "loss": 0.5441, "step": 12791 }, { - "epoch": 0.9679542960917105, - "grad_norm": 2.797713279724121, - "learning_rate": 5.0375942067890976e-08, - "loss": 0.7075, + "epoch": 0.9005279831045406, + "grad_norm": 2.0404815673828125, + "learning_rate": 3.601801097598969e-07, + "loss": 0.6288, "step": 12792 }, { - "epoch": 0.9680299648140441, - "grad_norm": 2.096550941467285, - "learning_rate": 5.013826062253368e-08, - "loss": 0.5549, + "epoch": 0.9005983808518128, + "grad_norm": 2.127931833267212, + "learning_rate": 3.5967489093953376e-07, + "loss": 0.6777, "step": 12793 }, { - "epoch": 0.9681056335363777, - "grad_norm": 1.996402621269226, - "learning_rate": 4.9901139646466364e-08, - "loss": 0.6456, + "epoch": 0.9006687785990848, + "grad_norm": 1.8189016580581665, + "learning_rate": 3.5917001735568567e-07, + "loss": 0.6455, "step": 12794 }, { - "epoch": 0.9681813022587114, - "grad_norm": 1.7945371866226196, - "learning_rate": 4.966457915453815e-08, - "loss": 0.5903, + "epoch": 0.9007391763463569, + "grad_norm": 2.260509729385376, + "learning_rate": 3.586654890346026e-07, + "loss": 0.6994, "step": 12795 }, { - "epoch": 0.968256970981045, - "grad_norm": 2.276614189147949, - "learning_rate": 4.9428579161562184e-08, - "loss": 0.7016, + "epoch": 0.900809574093629, + "grad_norm": 1.9314017295837402, + "learning_rate": 3.5816130600251595e-07, + "loss": 0.7092, "step": 12796 }, { - "epoch": 0.9683326397033786, - "grad_norm": 2.1116700172424316, - "learning_rate": 4.919313968231765e-08, - "loss": 0.6455, + "epoch": 0.9008799718409011, + "grad_norm": 2.261931896209717, + "learning_rate": 3.576574682856376e-07, + "loss": 0.5837, "step": 12797 }, { - "epoch": 0.9684083084257122, - "grad_norm": 2.1539816856384277, - "learning_rate": 4.895826073155074e-08, - "loss": 0.7131, + "epoch": 0.9009503695881732, + "grad_norm": 2.093508005142212, + "learning_rate": 3.5715397591016317e-07, + "loss": 0.5798, "step": 12798 }, { - "epoch": 0.9684839771480459, - "grad_norm": 2.0993354320526123, - "learning_rate": 4.872394232396771e-08, - "loss": 0.6818, + "epoch": 0.9010207673354452, + "grad_norm": 2.929140329360962, + "learning_rate": 3.566508289022704e-07, + "loss": 0.5784, "step": 12799 }, { - "epoch": 0.9685596458703795, - "grad_norm": 2.5427918434143066, - "learning_rate": 4.8490184474243806e-08, - "loss": 0.472, + "epoch": 0.9010911650827174, + "grad_norm": 1.8963792324066162, + "learning_rate": 3.561480272881176e-07, + "loss": 0.5878, "step": 12800 }, { - "epoch": 0.9686353145927131, - "grad_norm": 2.007993459701538, - "learning_rate": 4.825698719701632e-08, - "loss": 0.6107, + "epoch": 0.9011615628299895, + "grad_norm": 1.8582444190979004, + "learning_rate": 3.5564557109384586e-07, + "loss": 0.6156, "step": 12801 }, { - "epoch": 0.9687109833150467, - "grad_norm": 2.1347367763519287, - "learning_rate": 4.802435050689058e-08, - "loss": 0.7914, + "epoch": 0.9012319605772615, + "grad_norm": 2.0806281566619873, + "learning_rate": 3.551434603455771e-07, + "loss": 0.733, "step": 12802 }, { - "epoch": 0.9687866520373803, - "grad_norm": 2.216740846633911, - "learning_rate": 4.779227441843392e-08, - "loss": 0.702, + "epoch": 0.9013023583245336, + "grad_norm": 1.9748127460479736, + "learning_rate": 3.54641695069419e-07, + "loss": 0.6122, "step": 12803 }, { - "epoch": 0.968862320759714, - "grad_norm": 2.9680533409118652, - "learning_rate": 4.756075894618073e-08, - "loss": 0.6582, + "epoch": 0.9013727560718057, + "grad_norm": 2.2677619457244873, + "learning_rate": 3.5414027529145685e-07, + "loss": 0.6485, "step": 12804 }, { - "epoch": 0.9689379894820476, - "grad_norm": 2.0287156105041504, - "learning_rate": 4.7329804104627394e-08, - "loss": 0.6694, + "epoch": 0.9014431538190778, + "grad_norm": 2.0273773670196533, + "learning_rate": 3.5363920103776115e-07, + "loss": 0.5757, "step": 12805 }, { - "epoch": 0.9690136582043812, - "grad_norm": 2.646369695663452, - "learning_rate": 4.7099409908239355e-08, - "loss": 0.6483, + "epoch": 0.9015135515663498, + "grad_norm": 2.1722378730773926, + "learning_rate": 3.531384723343829e-07, + "loss": 0.5686, "step": 12806 }, { - "epoch": 0.9690893269267148, - "grad_norm": 2.547806978225708, - "learning_rate": 4.686957637144207e-08, - "loss": 0.5989, + "epoch": 0.901583949313622, + "grad_norm": 1.9806171655654907, + "learning_rate": 3.5263808920735527e-07, + "loss": 0.6125, "step": 12807 }, { - "epoch": 0.9691649956490485, - "grad_norm": 2.441589832305908, - "learning_rate": 4.664030350863102e-08, - "loss": 0.75, + "epoch": 0.9016543470608941, + "grad_norm": 2.1067287921905518, + "learning_rate": 3.5213805168269283e-07, + "loss": 0.8511, "step": 12808 }, { - "epoch": 0.9692406643713821, - "grad_norm": 2.0199062824249268, - "learning_rate": 4.641159133416273e-08, - "loss": 0.6601, + "epoch": 0.9017247448081661, + "grad_norm": 2.519634962081909, + "learning_rate": 3.5163835978639457e-07, + "loss": 0.6586, "step": 12809 }, { - "epoch": 0.9693163330937157, - "grad_norm": 2.6701488494873047, - "learning_rate": 4.618343986235973e-08, - "loss": 0.7961, + "epoch": 0.9017951425554382, + "grad_norm": 1.8084743022918701, + "learning_rate": 3.5113901354443765e-07, + "loss": 0.6227, "step": 12810 }, { - "epoch": 0.9693920018160493, - "grad_norm": 2.2760307788848877, - "learning_rate": 4.5955849107509603e-08, - "loss": 0.5234, + "epoch": 0.9018655403027103, + "grad_norm": 1.6561353206634521, + "learning_rate": 3.506400129827869e-07, + "loss": 0.6574, "step": 12811 }, { - "epoch": 0.969467670538383, - "grad_norm": 2.0577778816223145, - "learning_rate": 4.572881908386495e-08, - "loss": 0.5675, + "epoch": 0.9019359380499824, + "grad_norm": 1.8989489078521729, + "learning_rate": 3.50141358127383e-07, + "loss": 0.6935, "step": 12812 }, { - "epoch": 0.9695433392607166, - "grad_norm": 2.3686912059783936, - "learning_rate": 4.5502349805643385e-08, - "loss": 0.6366, + "epoch": 0.9020063357972545, + "grad_norm": 2.3569931983947754, + "learning_rate": 3.496430490041527e-07, + "loss": 0.6914, "step": 12813 }, { - "epoch": 0.9696190079830502, - "grad_norm": 2.451944351196289, - "learning_rate": 4.527644128702757e-08, - "loss": 0.7198, + "epoch": 0.9020767335445266, + "grad_norm": 1.9794529676437378, + "learning_rate": 3.491450856390024e-07, + "loss": 0.578, "step": 12814 }, { - "epoch": 0.9696946767053838, - "grad_norm": 2.4918670654296875, - "learning_rate": 4.505109354216419e-08, - "loss": 0.5377, + "epoch": 0.9021471312917987, + "grad_norm": 1.936430811882019, + "learning_rate": 3.486474680578231e-07, + "loss": 0.6688, "step": 12815 }, { - "epoch": 0.9697703454277175, - "grad_norm": 1.964171051979065, - "learning_rate": 4.4826306585164955e-08, - "loss": 0.6491, + "epoch": 0.9022175290390707, + "grad_norm": 1.8899744749069214, + "learning_rate": 3.481501962864848e-07, + "loss": 0.6176, "step": 12816 }, { - "epoch": 0.9698460141500511, - "grad_norm": 2.240342855453491, - "learning_rate": 4.4602080430106605e-08, - "loss": 0.7502, + "epoch": 0.9022879267863428, + "grad_norm": 1.9729769229888916, + "learning_rate": 3.4765327035084345e-07, + "loss": 0.6852, "step": 12817 }, { - "epoch": 0.9699216828723847, - "grad_norm": 2.0190277099609375, - "learning_rate": 4.437841509103091e-08, - "loss": 0.6771, + "epoch": 0.902358324533615, + "grad_norm": 1.7387604713439941, + "learning_rate": 3.4715669027673334e-07, + "loss": 0.7151, "step": 12818 }, { - "epoch": 0.9699973515947183, - "grad_norm": 2.1004180908203125, - "learning_rate": 4.415531058194566e-08, - "loss": 0.675, + "epoch": 0.902428722280887, + "grad_norm": 2.793734550476074, + "learning_rate": 3.4666045608997154e-07, + "loss": 0.6514, "step": 12819 }, { - "epoch": 0.9700730203170519, - "grad_norm": 2.711425304412842, - "learning_rate": 4.3932766916821684e-08, - "loss": 0.6182, + "epoch": 0.9024991200281591, + "grad_norm": 2.391604423522949, + "learning_rate": 3.461645678163573e-07, + "loss": 0.7935, "step": 12820 }, { - "epoch": 0.9701486890393856, - "grad_norm": 2.490840435028076, - "learning_rate": 4.371078410959484e-08, - "loss": 0.6801, + "epoch": 0.9025695177754312, + "grad_norm": 2.06465744972229, + "learning_rate": 3.4566902548167435e-07, + "loss": 0.7444, "step": 12821 }, { - "epoch": 0.9702243577617192, - "grad_norm": 2.158508777618408, - "learning_rate": 4.348936217416599e-08, - "loss": 0.6393, + "epoch": 0.9026399155227033, + "grad_norm": 2.3096909523010254, + "learning_rate": 3.451738291116846e-07, + "loss": 0.6121, "step": 12822 }, { - "epoch": 0.9703000264840528, - "grad_norm": 2.2075629234313965, - "learning_rate": 4.326850112440306e-08, - "loss": 0.5681, + "epoch": 0.9027103132699753, + "grad_norm": 1.8359850645065308, + "learning_rate": 3.4467897873213524e-07, + "loss": 0.536, "step": 12823 }, { - "epoch": 0.9703756952063864, - "grad_norm": 2.199986457824707, - "learning_rate": 4.304820097413698e-08, - "loss": 0.7375, + "epoch": 0.9027807110172474, + "grad_norm": 2.5471737384796143, + "learning_rate": 3.4418447436875334e-07, + "loss": 0.6599, "step": 12824 }, { - "epoch": 0.9704513639287201, - "grad_norm": 2.364950180053711, - "learning_rate": 4.2828461737161706e-08, - "loss": 0.6709, + "epoch": 0.9028511087645196, + "grad_norm": 1.7400717735290527, + "learning_rate": 3.436903160472479e-07, + "loss": 0.5602, "step": 12825 }, { - "epoch": 0.9705270326510537, - "grad_norm": 2.256744861602783, - "learning_rate": 4.2609283427239245e-08, - "loss": 0.4733, + "epoch": 0.9029215065117916, + "grad_norm": 1.8394352197647095, + "learning_rate": 3.431965037933109e-07, + "loss": 0.6697, "step": 12826 }, { - "epoch": 0.9706027013733873, - "grad_norm": 2.7820794582366943, - "learning_rate": 4.2390666058095606e-08, - "loss": 0.7549, + "epoch": 0.9029919042590637, + "grad_norm": 2.6791720390319824, + "learning_rate": 3.4270303763261733e-07, + "loss": 0.7471, "step": 12827 }, { - "epoch": 0.9706783700957209, - "grad_norm": 2.726006507873535, - "learning_rate": 4.2172609643420846e-08, - "loss": 0.8422, + "epoch": 0.9030623020063357, + "grad_norm": 1.7802547216415405, + "learning_rate": 3.422099175908203e-07, + "loss": 0.5254, "step": 12828 }, { - "epoch": 0.9707540388180546, - "grad_norm": 2.408484697341919, - "learning_rate": 4.1955114196870035e-08, - "loss": 0.6995, + "epoch": 0.9031326997536079, + "grad_norm": 2.0168209075927734, + "learning_rate": 3.417171436935613e-07, + "loss": 0.6435, "step": 12829 }, { - "epoch": 0.9708297075403882, - "grad_norm": 1.8652710914611816, - "learning_rate": 4.1738179732064286e-08, - "loss": 0.6074, + "epoch": 0.90320309750088, + "grad_norm": 2.1399827003479004, + "learning_rate": 3.4122471596645775e-07, + "loss": 0.7143, "step": 12830 }, { - "epoch": 0.9709053762627218, - "grad_norm": 1.773630976676941, - "learning_rate": 4.152180626258772e-08, - "loss": 0.7438, + "epoch": 0.903273495248152, + "grad_norm": 2.0741162300109863, + "learning_rate": 3.407326344351115e-07, + "loss": 0.6574, "step": 12831 }, { - "epoch": 0.9709810449850554, - "grad_norm": 1.8957676887512207, - "learning_rate": 4.1305993801991514e-08, - "loss": 0.6815, + "epoch": 0.9033438929954242, + "grad_norm": 1.9227501153945923, + "learning_rate": 3.4024089912510567e-07, + "loss": 0.5754, "step": 12832 }, { - "epoch": 0.971056713707389, - "grad_norm": 2.1224136352539062, - "learning_rate": 4.109074236378885e-08, - "loss": 0.5116, + "epoch": 0.9034142907426962, + "grad_norm": 3.1551308631896973, + "learning_rate": 3.3974951006200803e-07, + "loss": 0.6727, "step": 12833 }, { - "epoch": 0.9711323824297227, - "grad_norm": 2.1865241527557373, - "learning_rate": 4.087605196146094e-08, - "loss": 0.6051, + "epoch": 0.9034846884899683, + "grad_norm": 2.6464955806732178, + "learning_rate": 3.3925846727136445e-07, + "loss": 0.657, "step": 12834 }, { - "epoch": 0.9712080511520563, - "grad_norm": 2.5317890644073486, - "learning_rate": 4.066192260845303e-08, - "loss": 0.6102, + "epoch": 0.9035550862372405, + "grad_norm": 1.7021260261535645, + "learning_rate": 3.38767770778706e-07, + "loss": 0.5236, "step": 12835 }, { - "epoch": 0.9712837198743899, - "grad_norm": 2.466912269592285, - "learning_rate": 4.0448354318172395e-08, - "loss": 0.6871, + "epoch": 0.9036254839845125, + "grad_norm": 2.599956750869751, + "learning_rate": 3.3827742060954444e-07, + "loss": 0.7633, "step": 12836 }, { - "epoch": 0.9713593885967235, - "grad_norm": 2.1204843521118164, - "learning_rate": 4.023534710399435e-08, - "loss": 0.5491, + "epoch": 0.9036958817317846, + "grad_norm": 1.7120383977890015, + "learning_rate": 3.3778741678937283e-07, + "loss": 0.5652, "step": 12837 }, { - "epoch": 0.9714350573190572, - "grad_norm": 2.1326029300689697, - "learning_rate": 4.0022900979259206e-08, - "loss": 0.549, + "epoch": 0.9037662794790566, + "grad_norm": 2.0349061489105225, + "learning_rate": 3.3729775934366557e-07, + "loss": 0.6224, "step": 12838 }, { - "epoch": 0.9715107260413908, - "grad_norm": 2.1497695446014404, - "learning_rate": 3.981101595726933e-08, - "loss": 0.6592, + "epoch": 0.9038366772263288, + "grad_norm": 2.4620511531829834, + "learning_rate": 3.368084482978838e-07, + "loss": 0.6314, "step": 12839 }, { - "epoch": 0.9715863947637244, - "grad_norm": 1.952813744544983, - "learning_rate": 3.95996920512951e-08, - "loss": 0.6218, + "epoch": 0.9039070749736009, + "grad_norm": 2.026829719543457, + "learning_rate": 3.363194836774639e-07, + "loss": 0.6376, "step": 12840 }, { - "epoch": 0.971662063486058, - "grad_norm": 2.6534552574157715, - "learning_rate": 3.938892927456994e-08, - "loss": 0.6288, + "epoch": 0.9039774727208729, + "grad_norm": 1.983805775642395, + "learning_rate": 3.358308655078297e-07, + "loss": 0.5969, "step": 12841 }, { - "epoch": 0.9717377322083917, - "grad_norm": 1.9842441082000732, - "learning_rate": 3.917872764029129e-08, - "loss": 0.6466, + "epoch": 0.904047870468145, + "grad_norm": 2.1517233848571777, + "learning_rate": 3.3534259381438413e-07, + "loss": 0.665, "step": 12842 }, { - "epoch": 0.9718134009307253, - "grad_norm": 2.3986659049987793, - "learning_rate": 3.8969087161622616e-08, - "loss": 0.6197, + "epoch": 0.9041182682154171, + "grad_norm": 1.6961874961853027, + "learning_rate": 3.348546686225138e-07, + "loss": 0.7299, "step": 12843 }, { - "epoch": 0.9718890696530589, - "grad_norm": 2.6465704441070557, - "learning_rate": 3.8760007851695423e-08, - "loss": 0.7109, + "epoch": 0.9041886659626892, + "grad_norm": 1.9158265590667725, + "learning_rate": 3.343670899575842e-07, + "loss": 0.6425, "step": 12844 }, { - "epoch": 0.9719647383753925, - "grad_norm": 2.349898338317871, - "learning_rate": 3.855148972359923e-08, - "loss": 0.6664, + "epoch": 0.9042590637099612, + "grad_norm": 1.935977816581726, + "learning_rate": 3.338798578449471e-07, + "loss": 0.5472, "step": 12845 }, { - "epoch": 0.9720404070977261, - "grad_norm": 2.368164300918579, - "learning_rate": 3.83435327903936e-08, - "loss": 0.5955, + "epoch": 0.9043294614572334, + "grad_norm": 1.5335156917572021, + "learning_rate": 3.3339297230993224e-07, + "loss": 0.5999, "step": 12846 }, { - "epoch": 0.9721160758200598, - "grad_norm": 2.0866646766662598, - "learning_rate": 3.8136137065102104e-08, - "loss": 0.6821, + "epoch": 0.9043998592045055, + "grad_norm": 1.9072974920272827, + "learning_rate": 3.3290643337785553e-07, + "loss": 0.6188, "step": 12847 }, { - "epoch": 0.9721917445423934, - "grad_norm": 2.3973708152770996, - "learning_rate": 3.7929302560711365e-08, - "loss": 0.6995, + "epoch": 0.9044702569517775, + "grad_norm": 2.341827392578125, + "learning_rate": 3.324202410740118e-07, + "loss": 0.7135, "step": 12848 }, { - "epoch": 0.972267413264727, - "grad_norm": 2.413179397583008, - "learning_rate": 3.772302929017502e-08, - "loss": 0.6289, + "epoch": 0.9045406546990497, + "grad_norm": 2.4039247035980225, + "learning_rate": 3.319343954236773e-07, + "loss": 0.5868, "step": 12849 }, { - "epoch": 0.9723430819870607, - "grad_norm": 2.0390636920928955, - "learning_rate": 3.7517317266409725e-08, - "loss": 0.6386, + "epoch": 0.9046110524463217, + "grad_norm": 2.480161666870117, + "learning_rate": 3.314488964521135e-07, + "loss": 0.7152, "step": 12850 }, { - "epoch": 0.9724187507093943, - "grad_norm": 2.3033881187438965, - "learning_rate": 3.7312166502298184e-08, - "loss": 0.6435, + "epoch": 0.9046814501935938, + "grad_norm": 2.279153823852539, + "learning_rate": 3.309637441845593e-07, + "loss": 0.6372, "step": 12851 }, { - "epoch": 0.9724944194317279, - "grad_norm": 2.501260995864868, - "learning_rate": 3.710757701068812e-08, - "loss": 0.6013, + "epoch": 0.9047518479408659, + "grad_norm": 2.2725799083709717, + "learning_rate": 3.3047893864624124e-07, + "loss": 0.5783, "step": 12852 }, { - "epoch": 0.9725700881540615, - "grad_norm": 2.2412755489349365, - "learning_rate": 3.6903548804390283e-08, - "loss": 0.637, + "epoch": 0.904822245688138, + "grad_norm": 1.8363969326019287, + "learning_rate": 3.29994479862364e-07, + "loss": 0.6459, "step": 12853 }, { - "epoch": 0.9726457568763951, - "grad_norm": 1.7953567504882812, - "learning_rate": 3.670008189618246e-08, - "loss": 0.6847, + "epoch": 0.9048926434354101, + "grad_norm": 1.9575806856155396, + "learning_rate": 3.295103678581136e-07, + "loss": 0.6621, "step": 12854 }, { - "epoch": 0.9727214255987288, - "grad_norm": 2.0152835845947266, - "learning_rate": 3.6497176298807445e-08, - "loss": 0.6373, + "epoch": 0.9049630411826821, + "grad_norm": 2.1380631923675537, + "learning_rate": 3.290266026586599e-07, + "loss": 0.6271, "step": 12855 }, { - "epoch": 0.9727970943210624, - "grad_norm": 2.8471808433532715, - "learning_rate": 3.629483202497008e-08, - "loss": 0.6311, + "epoch": 0.9050334389299542, + "grad_norm": 2.2080421447753906, + "learning_rate": 3.285431842891555e-07, + "loss": 0.6841, "step": 12856 }, { - "epoch": 0.972872763043396, - "grad_norm": 1.9700920581817627, - "learning_rate": 3.6093049087342236e-08, - "loss": 0.6387, + "epoch": 0.9051038366772264, + "grad_norm": 2.043266534805298, + "learning_rate": 3.2806011277473214e-07, + "loss": 0.603, "step": 12857 }, { - "epoch": 0.9729484317657296, - "grad_norm": 2.660348653793335, - "learning_rate": 3.589182749855979e-08, - "loss": 0.6581, + "epoch": 0.9051742344244984, + "grad_norm": 2.1221463680267334, + "learning_rate": 3.2757738814050665e-07, + "loss": 0.6724, "step": 12858 }, { - "epoch": 0.9730241004880632, - "grad_norm": 2.1217596530914307, - "learning_rate": 3.5691167271225676e-08, - "loss": 0.5784, + "epoch": 0.9052446321717705, + "grad_norm": 2.029904365539551, + "learning_rate": 3.270950104115757e-07, + "loss": 0.712, "step": 12859 }, { - "epoch": 0.9730997692103969, - "grad_norm": 2.266953945159912, - "learning_rate": 3.549106841790484e-08, - "loss": 0.7355, + "epoch": 0.9053150299190426, + "grad_norm": 1.723382830619812, + "learning_rate": 3.2661297961301907e-07, + "loss": 0.5654, "step": 12860 }, { - "epoch": 0.9731754379327305, - "grad_norm": 2.1540300846099854, - "learning_rate": 3.5291530951127247e-08, - "loss": 0.6858, + "epoch": 0.9053854276663147, + "grad_norm": 2.1475448608398438, + "learning_rate": 3.2613129576989594e-07, + "loss": 0.7168, "step": 12861 }, { - "epoch": 0.9732511066550641, - "grad_norm": 2.050753355026245, - "learning_rate": 3.5092554883389916e-08, - "loss": 0.617, + "epoch": 0.9054558254135867, + "grad_norm": 1.9974924325942993, + "learning_rate": 3.256499589072519e-07, + "loss": 0.5738, "step": 12862 }, { - "epoch": 0.9733267753773978, - "grad_norm": 4.436022758483887, - "learning_rate": 3.489414022715287e-08, - "loss": 0.567, + "epoch": 0.9055262231608588, + "grad_norm": 2.3779542446136475, + "learning_rate": 3.251689690501105e-07, + "loss": 0.6544, "step": 12863 }, { - "epoch": 0.9734024440997314, - "grad_norm": 2.4525585174560547, - "learning_rate": 3.4696286994841176e-08, - "loss": 0.7939, + "epoch": 0.905596620908131, + "grad_norm": 1.848689317703247, + "learning_rate": 3.2468832622347984e-07, + "loss": 0.6981, "step": 12864 }, { - "epoch": 0.973478112822065, - "grad_norm": 1.9763293266296387, - "learning_rate": 3.449899519884492e-08, - "loss": 0.5779, + "epoch": 0.905667018655403, + "grad_norm": 2.2935121059417725, + "learning_rate": 3.242080304523487e-07, + "loss": 0.6354, "step": 12865 }, { - "epoch": 0.9735537815443986, - "grad_norm": 1.7585806846618652, - "learning_rate": 3.430226485152021e-08, - "loss": 0.7401, + "epoch": 0.9057374164026751, + "grad_norm": 1.865422010421753, + "learning_rate": 3.2372808176168853e-07, + "loss": 0.6466, "step": 12866 }, { - "epoch": 0.9736294502667322, - "grad_norm": 1.8818296194076538, - "learning_rate": 3.410609596518621e-08, - "loss": 0.6261, + "epoch": 0.9058078141499472, + "grad_norm": 2.3552417755126953, + "learning_rate": 3.232484801764508e-07, + "loss": 0.7121, "step": 12867 }, { - "epoch": 0.9737051189890659, - "grad_norm": 2.081613302230835, - "learning_rate": 3.3910488552127085e-08, - "loss": 0.6814, + "epoch": 0.9058782118972193, + "grad_norm": 1.6709052324295044, + "learning_rate": 3.227692257215721e-07, + "loss": 0.5838, "step": 12868 }, { - "epoch": 0.9737807877113995, - "grad_norm": 2.024808168411255, - "learning_rate": 3.3715442624594025e-08, - "loss": 0.6636, + "epoch": 0.9059486096444914, + "grad_norm": 1.9453078508377075, + "learning_rate": 3.22290318421968e-07, + "loss": 0.6875, "step": 12869 }, { - "epoch": 0.9738564564337331, - "grad_norm": 3.159187078475952, - "learning_rate": 3.352095819479928e-08, - "loss": 0.6785, + "epoch": 0.9060190073917634, + "grad_norm": 1.6113935708999634, + "learning_rate": 3.218117583025386e-07, + "loss": 0.6246, "step": 12870 }, { - "epoch": 0.9739321251560668, - "grad_norm": 2.1414241790771484, - "learning_rate": 3.332703527492409e-08, - "loss": 0.7564, + "epoch": 0.9060894051390356, + "grad_norm": 1.9072312116622925, + "learning_rate": 3.2133354538816383e-07, + "loss": 0.6494, "step": 12871 }, { - "epoch": 0.9740077938784004, - "grad_norm": 2.4196348190307617, - "learning_rate": 3.3133673877111745e-08, - "loss": 0.573, + "epoch": 0.9061598028863076, + "grad_norm": 1.6125556230545044, + "learning_rate": 3.208556797037064e-07, + "loss": 0.5431, "step": 12872 }, { - "epoch": 0.974083462600734, - "grad_norm": 2.986185312271118, - "learning_rate": 3.2940874013470567e-08, - "loss": 0.653, + "epoch": 0.9062302006335797, + "grad_norm": 2.9508135318756104, + "learning_rate": 3.203781612740105e-07, + "loss": 0.8712, "step": 12873 }, { - "epoch": 0.9741591313230676, - "grad_norm": 2.1942930221557617, - "learning_rate": 3.274863569607489e-08, - "loss": 0.832, + "epoch": 0.9063005983808519, + "grad_norm": 2.1149542331695557, + "learning_rate": 3.199009901239047e-07, + "loss": 0.7053, "step": 12874 }, { - "epoch": 0.9742348000454012, - "grad_norm": 2.0108728408813477, - "learning_rate": 3.255695893696309e-08, - "loss": 0.5848, + "epoch": 0.9063709961281239, + "grad_norm": 2.007214307785034, + "learning_rate": 3.19424166278195e-07, + "loss": 0.7127, "step": 12875 }, { - "epoch": 0.9743104687677349, - "grad_norm": 1.9388446807861328, - "learning_rate": 3.2365843748139554e-08, - "loss": 0.7127, + "epoch": 0.906441393875396, + "grad_norm": 1.7325917482376099, + "learning_rate": 3.1894768976167433e-07, + "loss": 0.6148, "step": 12876 }, { - "epoch": 0.9743861374900685, - "grad_norm": 4.268650054931641, - "learning_rate": 3.2175290141571725e-08, - "loss": 0.6322, + "epoch": 0.906511791622668, + "grad_norm": 2.2876899242401123, + "learning_rate": 3.184715605991144e-07, + "loss": 0.6835, "step": 12877 }, { - "epoch": 0.9744618062124021, - "grad_norm": 2.069544553756714, - "learning_rate": 3.198529812919204e-08, - "loss": 0.6331, + "epoch": 0.9065821893699402, + "grad_norm": 2.0610899925231934, + "learning_rate": 3.179957788152685e-07, + "loss": 0.6274, "step": 12878 }, { - "epoch": 0.9745374749347357, - "grad_norm": 2.953239679336548, - "learning_rate": 3.1795867722898995e-08, - "loss": 0.6101, + "epoch": 0.9066525871172122, + "grad_norm": 2.0056865215301514, + "learning_rate": 3.1752034443487273e-07, + "loss": 0.6549, "step": 12879 }, { - "epoch": 0.9746131436570693, - "grad_norm": 2.6282474994659424, - "learning_rate": 3.16069989345561e-08, - "loss": 0.6981, + "epoch": 0.9067229848644843, + "grad_norm": 2.2209813594818115, + "learning_rate": 3.170452574826477e-07, + "loss": 0.7177, "step": 12880 }, { - "epoch": 0.974688812379403, - "grad_norm": 2.169370412826538, - "learning_rate": 3.141869177598988e-08, - "loss": 0.6561, + "epoch": 0.9067933826117565, + "grad_norm": 1.6608116626739502, + "learning_rate": 3.165705179832905e-07, + "loss": 0.5956, "step": 12881 }, { - "epoch": 0.9747644811017366, - "grad_norm": 2.562084913253784, - "learning_rate": 3.123094625899292e-08, - "loss": 0.6634, + "epoch": 0.9068637803590285, + "grad_norm": 1.812481164932251, + "learning_rate": 3.1609612596148605e-07, + "loss": 0.592, "step": 12882 }, { - "epoch": 0.9748401498240702, - "grad_norm": 2.8010025024414062, - "learning_rate": 3.1043762395321804e-08, - "loss": 0.6248, + "epoch": 0.9069341781063006, + "grad_norm": 1.9805703163146973, + "learning_rate": 3.156220814418974e-07, + "loss": 0.6844, "step": 12883 }, { - "epoch": 0.9749158185464039, - "grad_norm": 1.9485735893249512, - "learning_rate": 3.085714019670116e-08, - "loss": 0.6315, + "epoch": 0.9070045758535726, + "grad_norm": 2.609215259552002, + "learning_rate": 3.151483844491713e-07, + "loss": 0.6554, "step": 12884 }, { - "epoch": 0.9749914872687375, - "grad_norm": 2.397359848022461, - "learning_rate": 3.067107967481464e-08, - "loss": 0.6059, + "epoch": 0.9070749736008448, + "grad_norm": 1.7005317211151123, + "learning_rate": 3.1467503500793257e-07, + "loss": 0.7062, "step": 12885 }, { - "epoch": 0.9750671559910711, - "grad_norm": 2.445741891860962, - "learning_rate": 3.0485580841315916e-08, - "loss": 0.5743, + "epoch": 0.9071453713481169, + "grad_norm": 2.3874008655548096, + "learning_rate": 3.1420203314279547e-07, + "loss": 0.6532, "step": 12886 }, { - "epoch": 0.9751428247134047, - "grad_norm": 11.2733736038208, - "learning_rate": 3.030064370782171e-08, - "loss": 0.6714, + "epoch": 0.9072157690953889, + "grad_norm": 2.201958417892456, + "learning_rate": 3.137293788783476e-07, + "loss": 0.5865, "step": 12887 }, { - "epoch": 0.9752184934357383, - "grad_norm": 2.1173009872436523, - "learning_rate": 3.011626828591274e-08, - "loss": 0.7012, + "epoch": 0.9072861668426611, + "grad_norm": 2.382857322692871, + "learning_rate": 3.1325707223916654e-07, + "loss": 0.5585, "step": 12888 }, { - "epoch": 0.975294162158072, - "grad_norm": 2.1452109813690186, - "learning_rate": 2.9932454587133784e-08, - "loss": 0.7467, + "epoch": 0.9073565645899331, + "grad_norm": 1.9099528789520264, + "learning_rate": 3.1278511324980573e-07, + "loss": 0.6919, "step": 12889 }, { - "epoch": 0.9753698308804056, - "grad_norm": 2.0479838848114014, - "learning_rate": 2.9749202622998628e-08, - "loss": 0.6547, + "epoch": 0.9074269623372052, + "grad_norm": 1.8682796955108643, + "learning_rate": 3.123135019348039e-07, + "loss": 0.6485, "step": 12890 }, { - "epoch": 0.9754454996027392, - "grad_norm": 2.4938087463378906, - "learning_rate": 2.9566512404981096e-08, - "loss": 0.6852, + "epoch": 0.9074973600844773, + "grad_norm": 1.8306677341461182, + "learning_rate": 3.1184223831867797e-07, + "loss": 0.5813, "step": 12891 }, { - "epoch": 0.9755211683250729, - "grad_norm": 1.6364085674285889, - "learning_rate": 2.9384383944522032e-08, - "loss": 0.6448, + "epoch": 0.9075677578317494, + "grad_norm": 1.8511178493499756, + "learning_rate": 3.1137132242593247e-07, + "loss": 0.6978, "step": 12892 }, { - "epoch": 0.9755968370474064, - "grad_norm": 2.1803574562072754, - "learning_rate": 2.9202817253028314e-08, - "loss": 0.6075, + "epoch": 0.9076381555790215, + "grad_norm": 2.71256947517395, + "learning_rate": 3.109007542810486e-07, + "loss": 0.5565, "step": 12893 }, { - "epoch": 0.9756725057697401, - "grad_norm": 1.8982106447219849, - "learning_rate": 2.9021812341868847e-08, - "loss": 0.6446, + "epoch": 0.9077085533262935, + "grad_norm": 2.013268232345581, + "learning_rate": 3.104305339084935e-07, + "loss": 0.6849, "step": 12894 }, { - "epoch": 0.9757481744920737, - "grad_norm": 2.0787320137023926, - "learning_rate": 2.8841369222378566e-08, - "loss": 0.7196, + "epoch": 0.9077789510735657, + "grad_norm": 1.8601024150848389, + "learning_rate": 3.099606613327135e-07, + "loss": 0.6463, "step": 12895 }, { - "epoch": 0.9758238432144073, - "grad_norm": 2.5845391750335693, - "learning_rate": 2.866148790585843e-08, - "loss": 0.5858, + "epoch": 0.9078493488208378, + "grad_norm": 1.8847731351852417, + "learning_rate": 3.094911365781376e-07, + "loss": 0.6595, "step": 12896 }, { - "epoch": 0.975899511936741, - "grad_norm": 1.972965955734253, - "learning_rate": 2.8482168403573427e-08, - "loss": 0.6546, + "epoch": 0.9079197465681098, + "grad_norm": 2.0277316570281982, + "learning_rate": 3.090219596691757e-07, + "loss": 0.5459, "step": 12897 }, { - "epoch": 0.9759751806590746, - "grad_norm": 2.0222232341766357, - "learning_rate": 2.8303410726751576e-08, - "loss": 0.645, + "epoch": 0.9079901443153819, + "grad_norm": 2.359217643737793, + "learning_rate": 3.085531306302217e-07, + "loss": 0.5499, "step": 12898 }, { - "epoch": 0.9760508493814082, - "grad_norm": 2.261725425720215, - "learning_rate": 2.8125214886588923e-08, - "loss": 0.6731, + "epoch": 0.908060542062654, + "grad_norm": 2.085557222366333, + "learning_rate": 3.080846494856521e-07, + "loss": 0.5772, "step": 12899 }, { - "epoch": 0.9761265181037418, - "grad_norm": 2.3352370262145996, - "learning_rate": 2.7947580894242542e-08, - "loss": 0.6196, + "epoch": 0.9081309398099261, + "grad_norm": 1.8098909854888916, + "learning_rate": 3.0761651625982043e-07, + "loss": 0.5923, "step": 12900 }, { - "epoch": 0.9762021868260754, - "grad_norm": 2.054361581802368, - "learning_rate": 2.777050876083953e-08, - "loss": 0.5426, + "epoch": 0.9082013375571981, + "grad_norm": 3.088669538497925, + "learning_rate": 3.0714873097706896e-07, + "loss": 0.5931, "step": 12901 }, { - "epoch": 0.9762778555484091, - "grad_norm": 2.2071480751037598, - "learning_rate": 2.759399849746602e-08, - "loss": 0.6568, + "epoch": 0.9082717353044703, + "grad_norm": 2.3344790935516357, + "learning_rate": 3.066812936617148e-07, + "loss": 0.6671, "step": 12902 }, { - "epoch": 0.9763535242707427, - "grad_norm": 2.048191785812378, - "learning_rate": 2.7418050115176176e-08, - "loss": 0.7078, + "epoch": 0.9083421330517424, + "grad_norm": 2.184030055999756, + "learning_rate": 3.0621420433806355e-07, + "loss": 0.6498, "step": 12903 }, { - "epoch": 0.9764291929930763, - "grad_norm": 2.7316575050354004, - "learning_rate": 2.7242663624989172e-08, - "loss": 0.6979, + "epoch": 0.9084125307990144, + "grad_norm": 1.6947458982467651, + "learning_rate": 3.057474630303959e-07, + "loss": 0.6077, "step": 12904 }, { - "epoch": 0.97650486171541, - "grad_norm": 2.226996660232544, - "learning_rate": 2.706783903788823e-08, - "loss": 0.5833, + "epoch": 0.9084829285462865, + "grad_norm": 2.3132050037384033, + "learning_rate": 3.052810697629824e-07, + "loss": 0.7431, "step": 12905 }, { - "epoch": 0.9765805304377435, - "grad_norm": 2.25785231590271, - "learning_rate": 2.6893576364821593e-08, - "loss": 0.6875, + "epoch": 0.9085533262935586, + "grad_norm": 1.8850780725479126, + "learning_rate": 3.0481502456006724e-07, + "loss": 0.686, "step": 12906 }, { - "epoch": 0.9766561991600772, - "grad_norm": 2.408618927001953, - "learning_rate": 2.6719875616701528e-08, - "loss": 0.7061, + "epoch": 0.9086237240408307, + "grad_norm": 1.936397671699524, + "learning_rate": 3.043493274458837e-07, + "loss": 0.6904, "step": 12907 }, { - "epoch": 0.9767318678824108, - "grad_norm": 2.1182861328125, - "learning_rate": 2.6546736804405337e-08, - "loss": 0.6309, + "epoch": 0.9086941217881028, + "grad_norm": 2.5216119289398193, + "learning_rate": 3.038839784446433e-07, + "loss": 0.5572, "step": 12908 }, { - "epoch": 0.9768075366047444, - "grad_norm": 2.118018388748169, - "learning_rate": 2.6374159938777342e-08, - "loss": 0.6792, + "epoch": 0.9087645195353748, + "grad_norm": 2.46268630027771, + "learning_rate": 3.0341897758053817e-07, + "loss": 0.5814, "step": 12909 }, { - "epoch": 0.9768832053270781, - "grad_norm": 2.3272552490234375, - "learning_rate": 2.6202145030621904e-08, - "loss": 0.7025, + "epoch": 0.908834917282647, + "grad_norm": 2.907092332839966, + "learning_rate": 3.0295432487774473e-07, + "loss": 0.5944, "step": 12910 }, { - "epoch": 0.9769588740494117, - "grad_norm": 2.4793472290039062, - "learning_rate": 2.6030692090714404e-08, - "loss": 0.6057, + "epoch": 0.908905315029919, + "grad_norm": 1.8788238763809204, + "learning_rate": 3.024900203604226e-07, + "loss": 0.6825, "step": 12911 }, { - "epoch": 0.9770345427717453, - "grad_norm": 2.1847920417785645, - "learning_rate": 2.585980112978925e-08, - "loss": 0.7071, + "epoch": 0.9089757127771911, + "grad_norm": 1.6942845582962036, + "learning_rate": 3.020260640527086e-07, + "loss": 0.751, "step": 12912 }, { - "epoch": 0.977110211494079, - "grad_norm": 1.9853038787841797, - "learning_rate": 2.5689472158549888e-08, - "loss": 0.6536, + "epoch": 0.9090461105244633, + "grad_norm": 2.387601375579834, + "learning_rate": 3.015624559787257e-07, + "loss": 0.6126, "step": 12913 }, { - "epoch": 0.9771858802164125, - "grad_norm": 2.087451934814453, - "learning_rate": 2.5519705187662778e-08, - "loss": 0.5842, + "epoch": 0.9091165082717353, + "grad_norm": 1.8445591926574707, + "learning_rate": 3.010991961625781e-07, + "loss": 0.6721, "step": 12914 }, { - "epoch": 0.9772615489387462, - "grad_norm": 2.3047263622283936, - "learning_rate": 2.535050022775742e-08, - "loss": 0.555, + "epoch": 0.9091869060190074, + "grad_norm": 1.9884434938430786, + "learning_rate": 3.006362846283499e-07, + "loss": 0.5458, "step": 12915 }, { - "epoch": 0.9773372176610798, - "grad_norm": 1.9747389554977417, - "learning_rate": 2.518185728943234e-08, - "loss": 0.5205, + "epoch": 0.9092573037662794, + "grad_norm": 1.6827765703201294, + "learning_rate": 3.001737214001081e-07, + "loss": 0.5919, "step": 12916 }, { - "epoch": 0.9774128863834134, - "grad_norm": 2.134197235107422, - "learning_rate": 2.5013776383247088e-08, - "loss": 0.7488, + "epoch": 0.9093277015135516, + "grad_norm": 2.0639185905456543, + "learning_rate": 2.997115065019026e-07, + "loss": 0.6093, "step": 12917 }, { - "epoch": 0.9774885551057471, - "grad_norm": 2.0079126358032227, - "learning_rate": 2.4846257519727246e-08, - "loss": 0.666, + "epoch": 0.9093980992608236, + "grad_norm": 2.175994873046875, + "learning_rate": 2.9924963995776297e-07, + "loss": 0.7045, "step": 12918 }, { - "epoch": 0.9775642238280806, - "grad_norm": 2.0842461585998535, - "learning_rate": 2.4679300709364416e-08, - "loss": 0.6784, + "epoch": 0.9094684970080957, + "grad_norm": 2.692185401916504, + "learning_rate": 2.9878812179170423e-07, + "loss": 0.6566, "step": 12919 }, { - "epoch": 0.9776398925504143, - "grad_norm": 2.3097853660583496, - "learning_rate": 2.4512905962613242e-08, - "loss": 0.7302, + "epoch": 0.9095388947553679, + "grad_norm": 1.9929656982421875, + "learning_rate": 2.9832695202772017e-07, + "loss": 0.5973, "step": 12920 }, { - "epoch": 0.977715561272748, - "grad_norm": 2.2408077716827393, - "learning_rate": 2.4347073289894382e-08, - "loss": 0.7462, + "epoch": 0.9096092925026399, + "grad_norm": 1.713752269744873, + "learning_rate": 2.9786613068978694e-07, + "loss": 0.6728, "step": 12921 }, { - "epoch": 0.9777912299950815, - "grad_norm": 2.217599630355835, - "learning_rate": 2.4181802701592537e-08, - "loss": 0.6856, + "epoch": 0.909679690249912, + "grad_norm": 2.154620409011841, + "learning_rate": 2.974056578018627e-07, + "loss": 0.6812, "step": 12922 }, { - "epoch": 0.9778668987174152, - "grad_norm": 2.31512188911438, - "learning_rate": 2.401709420805842e-08, - "loss": 0.6898, + "epoch": 0.909750087997184, + "grad_norm": 1.8132238388061523, + "learning_rate": 2.9694553338788997e-07, + "loss": 0.6376, "step": 12923 }, { - "epoch": 0.9779425674397488, - "grad_norm": 2.1751387119293213, - "learning_rate": 2.3852947819604788e-08, - "loss": 0.5326, + "epoch": 0.9098204857444562, + "grad_norm": 2.3785531520843506, + "learning_rate": 2.964857574717882e-07, + "loss": 0.7322, "step": 12924 }, { - "epoch": 0.9780182361620824, - "grad_norm": 2.364943265914917, - "learning_rate": 2.3689363546511413e-08, - "loss": 0.6644, + "epoch": 0.9098908834917283, + "grad_norm": 1.9371064901351929, + "learning_rate": 2.9602633007746335e-07, + "loss": 0.6114, "step": 12925 }, { - "epoch": 0.978093904884416, - "grad_norm": 1.4398491382598877, - "learning_rate": 2.3526341399024097e-08, - "loss": 0.7215, + "epoch": 0.9099612812390003, + "grad_norm": 2.2457175254821777, + "learning_rate": 2.9556725122880147e-07, + "loss": 0.7938, "step": 12926 }, { - "epoch": 0.9781695736067496, - "grad_norm": 2.1224849224090576, - "learning_rate": 2.3363881387349684e-08, - "loss": 0.6535, + "epoch": 0.9100316789862725, + "grad_norm": 1.915107011795044, + "learning_rate": 2.9510852094967045e-07, + "loss": 0.65, "step": 12927 }, { - "epoch": 0.9782452423290833, - "grad_norm": 2.169689178466797, - "learning_rate": 2.3201983521664027e-08, - "loss": 0.6147, + "epoch": 0.9101020767335445, + "grad_norm": 1.6298257112503052, + "learning_rate": 2.9465013926391893e-07, + "loss": 0.6213, "step": 12928 }, { - "epoch": 0.9783209110514169, - "grad_norm": 2.206355094909668, - "learning_rate": 2.304064781210402e-08, - "loss": 0.7304, + "epoch": 0.9101724744808166, + "grad_norm": 2.3069403171539307, + "learning_rate": 2.9419210619537993e-07, + "loss": 0.7225, "step": 12929 }, { - "epoch": 0.9783965797737505, - "grad_norm": 2.7798423767089844, - "learning_rate": 2.2879874268773583e-08, - "loss": 0.6318, + "epoch": 0.9102428722280888, + "grad_norm": 3.0743777751922607, + "learning_rate": 2.9373442176786556e-07, + "loss": 0.6973, "step": 12930 }, { - "epoch": 0.9784722484960842, - "grad_norm": 2.0683248043060303, - "learning_rate": 2.2719662901741656e-08, - "loss": 0.6629, + "epoch": 0.9103132699753608, + "grad_norm": 2.3194031715393066, + "learning_rate": 2.932770860051731e-07, + "loss": 0.6929, "step": 12931 }, { - "epoch": 0.9785479172184177, - "grad_norm": 3.2282752990722656, - "learning_rate": 2.2560013721039217e-08, - "loss": 0.6315, + "epoch": 0.9103836677226329, + "grad_norm": 1.6122033596038818, + "learning_rate": 2.928200989310789e-07, + "loss": 0.6249, "step": 12932 }, { - "epoch": 0.9786235859407514, - "grad_norm": 2.0770423412323, - "learning_rate": 2.240092673666627e-08, - "loss": 0.5093, + "epoch": 0.9104540654699049, + "grad_norm": 2.3794121742248535, + "learning_rate": 2.9236346056934205e-07, + "loss": 0.703, "step": 12933 }, { - "epoch": 0.978699254663085, - "grad_norm": 2.091789722442627, - "learning_rate": 2.2242401958584847e-08, - "loss": 0.6643, + "epoch": 0.9105244632171771, + "grad_norm": 1.8181971311569214, + "learning_rate": 2.9190717094370246e-07, + "loss": 0.6488, "step": 12934 }, { - "epoch": 0.9787749233854186, - "grad_norm": 2.1290385723114014, - "learning_rate": 2.2084439396721002e-08, - "loss": 0.6081, + "epoch": 0.9105948609644492, + "grad_norm": 1.5821506977081299, + "learning_rate": 2.914512300778859e-07, + "loss": 0.6288, "step": 12935 }, { - "epoch": 0.9788505921077523, - "grad_norm": 1.8989332914352417, - "learning_rate": 2.1927039060966825e-08, - "loss": 0.5779, + "epoch": 0.9106652587117212, + "grad_norm": 2.1430742740631104, + "learning_rate": 2.90995637995594e-07, + "loss": 0.6411, "step": 12936 }, { - "epoch": 0.9789262608300859, - "grad_norm": 2.21238112449646, - "learning_rate": 2.177020096118143e-08, - "loss": 0.6956, + "epoch": 0.9107356564589933, + "grad_norm": 1.9010369777679443, + "learning_rate": 2.905403947205162e-07, + "loss": 0.6514, "step": 12937 }, { - "epoch": 0.9790019295524195, - "grad_norm": 2.3626580238342285, - "learning_rate": 2.1613925107184962e-08, - "loss": 0.7432, + "epoch": 0.9108060542062654, + "grad_norm": 2.226746082305908, + "learning_rate": 2.9008550027631984e-07, + "loss": 0.587, "step": 12938 }, { - "epoch": 0.9790775982747532, - "grad_norm": 2.329190254211426, - "learning_rate": 2.1458211508763594e-08, - "loss": 0.7325, + "epoch": 0.9108764519535375, + "grad_norm": 2.738449811935425, + "learning_rate": 2.8963095468665467e-07, + "loss": 0.5993, "step": 12939 }, { - "epoch": 0.9791532669970867, - "grad_norm": 3.697888135910034, - "learning_rate": 2.130306017566952e-08, - "loss": 0.6174, + "epoch": 0.9109468497008095, + "grad_norm": 2.043180465698242, + "learning_rate": 2.891767579751532e-07, + "loss": 0.5962, "step": 12940 }, { - "epoch": 0.9792289357194204, - "grad_norm": 1.962836503982544, - "learning_rate": 2.1148471117617972e-08, - "loss": 0.6935, + "epoch": 0.9110172474480817, + "grad_norm": 1.6848148107528687, + "learning_rate": 2.8872291016543006e-07, + "loss": 0.7046, "step": 12941 }, { - "epoch": 0.979304604441754, - "grad_norm": 2.527402877807617, - "learning_rate": 2.0994444344291207e-08, - "loss": 0.7468, + "epoch": 0.9110876451953538, + "grad_norm": 2.207163095474243, + "learning_rate": 2.8826941128107974e-07, + "loss": 0.6559, "step": 12942 }, { - "epoch": 0.9793802731640876, - "grad_norm": 3.003408670425415, - "learning_rate": 2.084097986533351e-08, - "loss": 0.7296, + "epoch": 0.9111580429426258, + "grad_norm": 2.2823832035064697, + "learning_rate": 2.878162613456827e-07, + "loss": 0.6383, "step": 12943 }, { - "epoch": 0.9794559418864213, - "grad_norm": 1.8468185663223267, - "learning_rate": 2.068807769035519e-08, - "loss": 0.6277, + "epoch": 0.911228440689898, + "grad_norm": 1.8808118104934692, + "learning_rate": 2.873634603827969e-07, + "loss": 0.658, "step": 12944 }, { - "epoch": 0.9795316106087548, - "grad_norm": 2.2292520999908447, - "learning_rate": 2.053573782893259e-08, - "loss": 0.6755, + "epoch": 0.91129883843717, + "grad_norm": 2.5022969245910645, + "learning_rate": 2.869110084159647e-07, + "loss": 0.5719, "step": 12945 }, { - "epoch": 0.9796072793310885, - "grad_norm": 3.2633378505706787, - "learning_rate": 2.0383960290605076e-08, - "loss": 0.637, + "epoch": 0.9113692361844421, + "grad_norm": 2.1335158348083496, + "learning_rate": 2.8645890546870757e-07, + "loss": 0.664, "step": 12946 }, { - "epoch": 0.9796829480534222, - "grad_norm": 2.093501329421997, - "learning_rate": 2.0232745084878046e-08, - "loss": 0.6773, + "epoch": 0.9114396339317142, + "grad_norm": 1.650978446006775, + "learning_rate": 2.8600715156453284e-07, + "loss": 0.5679, "step": 12947 }, { - "epoch": 0.9797586167757557, - "grad_norm": 2.401658773422241, - "learning_rate": 2.0082092221220925e-08, - "loss": 0.6135, + "epoch": 0.9115100316789863, + "grad_norm": 2.3181514739990234, + "learning_rate": 2.8555574672692616e-07, + "loss": 0.6988, "step": 12948 }, { - "epoch": 0.9798342854980894, - "grad_norm": 2.732320547103882, - "learning_rate": 1.9932001709066172e-08, - "loss": 0.5327, + "epoch": 0.9115804294262584, + "grad_norm": 2.750030279159546, + "learning_rate": 2.8510469097935776e-07, + "loss": 0.5633, "step": 12949 }, { - "epoch": 0.979909954220423, - "grad_norm": 2.547961711883545, - "learning_rate": 1.978247355781626e-08, - "loss": 0.684, + "epoch": 0.9116508271735304, + "grad_norm": 2.1886115074157715, + "learning_rate": 2.846539843452782e-07, + "loss": 0.711, "step": 12950 }, { - "epoch": 0.9799856229427566, - "grad_norm": 2.245232105255127, - "learning_rate": 1.9633507776831704e-08, - "loss": 0.5505, + "epoch": 0.9117212249208025, + "grad_norm": 2.0369720458984375, + "learning_rate": 2.8420362684811883e-07, + "loss": 0.7429, "step": 12951 }, { - "epoch": 0.9800612916650903, - "grad_norm": 2.435441732406616, - "learning_rate": 1.948510437544404e-08, - "loss": 0.5674, + "epoch": 0.9117916226680747, + "grad_norm": 1.700015664100647, + "learning_rate": 2.837536185112961e-07, + "loss": 0.6094, "step": 12952 }, { - "epoch": 0.9801369603874238, - "grad_norm": 2.2396018505096436, - "learning_rate": 1.9337263362945833e-08, - "loss": 0.6784, + "epoch": 0.9118620204153467, + "grad_norm": 1.8799644708633423, + "learning_rate": 2.8330395935820395e-07, + "loss": 0.6908, "step": 12953 }, { - "epoch": 0.9802126291097575, - "grad_norm": 1.6761008501052856, - "learning_rate": 1.918998474859468e-08, - "loss": 0.6931, + "epoch": 0.9119324181626188, + "grad_norm": 1.662203311920166, + "learning_rate": 2.8285464941222314e-07, + "loss": 0.7045, "step": 12954 }, { - "epoch": 0.9802882978320911, - "grad_norm": 1.8858741521835327, - "learning_rate": 1.90432685416142e-08, - "loss": 0.6703, + "epoch": 0.9120028159098909, + "grad_norm": 2.0265326499938965, + "learning_rate": 2.824056886967127e-07, + "loss": 0.6533, "step": 12955 }, { - "epoch": 0.9803639665544247, - "grad_norm": 2.178541421890259, - "learning_rate": 1.8897114751192046e-08, - "loss": 0.669, + "epoch": 0.912073213657163, + "grad_norm": 2.2038724422454834, + "learning_rate": 2.819570772350144e-07, + "loss": 0.6262, "step": 12956 }, { - "epoch": 0.9804396352767584, - "grad_norm": 2.8125827312469482, - "learning_rate": 1.8751523386480896e-08, - "loss": 0.7143, + "epoch": 0.912143611404435, + "grad_norm": 2.0032784938812256, + "learning_rate": 2.815088150504508e-07, + "loss": 0.5833, "step": 12957 }, { - "epoch": 0.9805153039990919, - "grad_norm": 2.164923667907715, - "learning_rate": 1.8606494456599453e-08, - "loss": 0.5678, + "epoch": 0.9122140091517071, + "grad_norm": 1.745127558708191, + "learning_rate": 2.8106090216632873e-07, + "loss": 0.6171, "step": 12958 }, { - "epoch": 0.9805909727214256, - "grad_norm": 1.9167901277542114, - "learning_rate": 1.846202797062746e-08, - "loss": 0.5551, + "epoch": 0.9122844068989793, + "grad_norm": 2.322532892227173, + "learning_rate": 2.806133386059357e-07, + "loss": 0.6989, "step": 12959 }, { - "epoch": 0.9806666414437593, - "grad_norm": 4.015124320983887, - "learning_rate": 1.8318123937612674e-08, - "loss": 0.6921, + "epoch": 0.9123548046462513, + "grad_norm": 1.5998344421386719, + "learning_rate": 2.801661243925406e-07, + "loss": 0.6727, "step": 12960 }, { - "epoch": 0.9807423101660928, - "grad_norm": 2.202143907546997, - "learning_rate": 1.8174782366567887e-08, - "loss": 0.6212, + "epoch": 0.9124252023935234, + "grad_norm": 2.2188775539398193, + "learning_rate": 2.7971925954939513e-07, + "loss": 0.7142, "step": 12961 }, { - "epoch": 0.9808179788884265, - "grad_norm": 2.2554047107696533, - "learning_rate": 1.803200326646992e-08, - "loss": 0.9251, + "epoch": 0.9124956001407954, + "grad_norm": 2.630490303039551, + "learning_rate": 2.792727440997308e-07, + "loss": 0.6296, "step": 12962 }, { - "epoch": 0.9808936476107601, - "grad_norm": 2.1025631427764893, - "learning_rate": 1.7889786646257622e-08, - "loss": 0.6946, + "epoch": 0.9125659978880676, + "grad_norm": 2.212928295135498, + "learning_rate": 2.788265780667628e-07, + "loss": 0.5461, "step": 12963 }, { - "epoch": 0.9809693163330937, - "grad_norm": 2.0117862224578857, - "learning_rate": 1.7748132514838868e-08, - "loss": 0.7143, + "epoch": 0.9126363956353397, + "grad_norm": 2.1200106143951416, + "learning_rate": 2.783807614736893e-07, + "loss": 0.6577, "step": 12964 }, { - "epoch": 0.9810449850554274, - "grad_norm": 2.1950743198394775, - "learning_rate": 1.7607040881084558e-08, - "loss": 0.5661, + "epoch": 0.9127067933826117, + "grad_norm": 2.518336296081543, + "learning_rate": 2.779352943436858e-07, + "loss": 0.5986, "step": 12965 }, { - "epoch": 0.9811206537777609, - "grad_norm": 2.6558985710144043, - "learning_rate": 1.7466511753830626e-08, - "loss": 0.5764, + "epoch": 0.9127771911298839, + "grad_norm": 2.000070095062256, + "learning_rate": 2.774901766999155e-07, + "loss": 0.721, "step": 12966 }, { - "epoch": 0.9811963225000946, - "grad_norm": 1.9270297288894653, - "learning_rate": 1.7326545141875038e-08, - "loss": 0.6928, + "epoch": 0.9128475888771559, + "grad_norm": 2.003119945526123, + "learning_rate": 2.770454085655197e-07, + "loss": 0.5464, "step": 12967 }, { - "epoch": 0.9812719912224283, - "grad_norm": 1.9736948013305664, - "learning_rate": 1.7187141053985776e-08, - "loss": 0.6422, + "epoch": 0.912917986624428, + "grad_norm": 1.9646438360214233, + "learning_rate": 2.766009899636213e-07, + "loss": 0.6372, "step": 12968 }, { - "epoch": 0.9813476599447618, - "grad_norm": 2.117433786392212, - "learning_rate": 1.7048299498891862e-08, - "loss": 0.6823, + "epoch": 0.9129883843717002, + "grad_norm": 1.8579083681106567, + "learning_rate": 2.761569209173257e-07, + "loss": 0.6781, "step": 12969 }, { - "epoch": 0.9814233286670955, - "grad_norm": 1.8852287530899048, - "learning_rate": 1.6910020485287338e-08, - "loss": 0.7003, + "epoch": 0.9130587821189722, + "grad_norm": 2.1277284622192383, + "learning_rate": 2.7571320144972155e-07, + "loss": 0.5659, "step": 12970 }, { - "epoch": 0.981498997389429, - "grad_norm": 2.4513890743255615, - "learning_rate": 1.6772304021832275e-08, - "loss": 0.7829, + "epoch": 0.9131291798662443, + "grad_norm": 2.117549180984497, + "learning_rate": 2.7526983158387794e-07, + "loss": 0.6738, "step": 12971 }, { - "epoch": 0.9815746661117627, - "grad_norm": 2.3832831382751465, - "learning_rate": 1.6635150117150776e-08, - "loss": 0.7601, + "epoch": 0.9131995776135163, + "grad_norm": 2.224393367767334, + "learning_rate": 2.748268113428461e-07, + "loss": 0.6591, "step": 12972 }, { - "epoch": 0.9816503348340964, - "grad_norm": 2.286743402481079, - "learning_rate": 1.6498558779831973e-08, - "loss": 0.7211, + "epoch": 0.9132699753607885, + "grad_norm": 1.9128360748291016, + "learning_rate": 2.7438414074965945e-07, + "loss": 0.583, "step": 12973 }, { - "epoch": 0.9817260035564299, - "grad_norm": 3.2696454524993896, - "learning_rate": 1.6362530018430022e-08, - "loss": 0.7254, + "epoch": 0.9133403731080605, + "grad_norm": 1.6618742942810059, + "learning_rate": 2.7394181982733256e-07, + "loss": 0.5807, "step": 12974 }, { - "epoch": 0.9818016722787636, - "grad_norm": 2.6552700996398926, - "learning_rate": 1.6227063841462108e-08, - "loss": 0.7234, + "epoch": 0.9134107708553326, + "grad_norm": 2.0710880756378174, + "learning_rate": 2.734998485988608e-07, + "loss": 0.6459, "step": 12975 }, { - "epoch": 0.9818773410010972, - "grad_norm": 2.3581531047821045, - "learning_rate": 1.6092160257413446e-08, - "loss": 0.7437, + "epoch": 0.9134811686026048, + "grad_norm": 2.95670747756958, + "learning_rate": 2.730582270872246e-07, + "loss": 0.6601, "step": 12976 }, { - "epoch": 0.9819530097234308, - "grad_norm": 3.002993106842041, - "learning_rate": 1.5957819274730277e-08, - "loss": 0.6852, + "epoch": 0.9135515663498768, + "grad_norm": 2.03173565864563, + "learning_rate": 2.7261695531538275e-07, + "loss": 0.6073, "step": 12977 }, { - "epoch": 0.9820286784457645, - "grad_norm": 2.3881561756134033, - "learning_rate": 1.5824040901826876e-08, - "loss": 0.608, + "epoch": 0.9136219640971489, + "grad_norm": 2.3559954166412354, + "learning_rate": 2.721760333062793e-07, + "loss": 0.6074, "step": 12978 }, { - "epoch": 0.982104347168098, - "grad_norm": 2.43519926071167, - "learning_rate": 1.5690825147080533e-08, - "loss": 0.6577, + "epoch": 0.9136923618444209, + "grad_norm": 1.9245232343673706, + "learning_rate": 2.717354610828363e-07, + "loss": 0.7437, "step": 12979 }, { - "epoch": 0.9821800158904317, - "grad_norm": 2.051454544067383, - "learning_rate": 1.5558172018833584e-08, - "loss": 0.6634, + "epoch": 0.9137627595916931, + "grad_norm": 2.0752909183502197, + "learning_rate": 2.712952386679606e-07, + "loss": 0.7155, "step": 12980 }, { - "epoch": 0.9822556846127654, - "grad_norm": 2.9443256855010986, - "learning_rate": 1.5426081525392377e-08, - "loss": 0.5589, + "epoch": 0.9138331573389652, + "grad_norm": 2.422154426574707, + "learning_rate": 2.7085536608453855e-07, + "loss": 0.6325, "step": 12981 }, { - "epoch": 0.9823313533350989, - "grad_norm": 2.4073567390441895, - "learning_rate": 1.52945536750303e-08, - "loss": 0.6178, + "epoch": 0.9139035550862372, + "grad_norm": 1.7547389268875122, + "learning_rate": 2.7041584335544035e-07, + "loss": 0.6994, "step": 12982 }, { - "epoch": 0.9824070220574326, - "grad_norm": 2.8144779205322266, - "learning_rate": 1.516358847598376e-08, - "loss": 0.6876, + "epoch": 0.9139739528335094, + "grad_norm": 1.8597640991210938, + "learning_rate": 2.699766705035175e-07, + "loss": 0.5826, "step": 12983 }, { - "epoch": 0.9824826907797661, - "grad_norm": 2.132742166519165, - "learning_rate": 1.50331859364522e-08, - "loss": 0.6289, + "epoch": 0.9140443505807814, + "grad_norm": 2.288832426071167, + "learning_rate": 2.6953784755160203e-07, + "loss": 0.7322, "step": 12984 }, { - "epoch": 0.9825583595020998, - "grad_norm": 2.0860514640808105, - "learning_rate": 1.4903346064605085e-08, - "loss": 0.6644, + "epoch": 0.9141147483280535, + "grad_norm": 1.8130297660827637, + "learning_rate": 2.6909937452251054e-07, + "loss": 0.6785, "step": 12985 }, { - "epoch": 0.9826340282244335, - "grad_norm": 2.203529119491577, - "learning_rate": 1.4774068868570911e-08, - "loss": 0.7307, + "epoch": 0.9141851460753256, + "grad_norm": 2.438861608505249, + "learning_rate": 2.6866125143903773e-07, + "loss": 0.797, "step": 12986 }, { - "epoch": 0.982709696946767, - "grad_norm": 1.9949660301208496, - "learning_rate": 1.4645354356446206e-08, - "loss": 0.7352, + "epoch": 0.9142555438225977, + "grad_norm": 2.048807382583618, + "learning_rate": 2.682234783239621e-07, + "loss": 0.77, "step": 12987 }, { - "epoch": 0.9827853656691007, - "grad_norm": 1.9843783378601074, - "learning_rate": 1.4517202536291519e-08, - "loss": 0.6633, + "epoch": 0.9143259415698698, + "grad_norm": 1.7300328016281128, + "learning_rate": 2.677860552000457e-07, + "loss": 0.5437, "step": 12988 }, { - "epoch": 0.9828610343914344, - "grad_norm": 2.39467191696167, - "learning_rate": 1.438961341613243e-08, - "loss": 0.4948, + "epoch": 0.9143963393171418, + "grad_norm": 1.9139983654022217, + "learning_rate": 2.673489820900282e-07, + "loss": 0.6075, "step": 12989 }, { - "epoch": 0.9829367031137679, - "grad_norm": 2.534012794494629, - "learning_rate": 1.4262587003959549e-08, - "loss": 0.6207, + "epoch": 0.914466737064414, + "grad_norm": 1.7831809520721436, + "learning_rate": 2.669122590166352e-07, + "loss": 0.6383, "step": 12990 }, { - "epoch": 0.9830123718361016, - "grad_norm": 2.261028528213501, - "learning_rate": 1.4136123307725512e-08, - "loss": 0.6348, + "epoch": 0.9145371348116861, + "grad_norm": 2.1173999309539795, + "learning_rate": 2.66475886002572e-07, + "loss": 0.6657, "step": 12991 }, { - "epoch": 0.9830880405584351, - "grad_norm": 2.1703314781188965, - "learning_rate": 1.4010222335351985e-08, - "loss": 0.5724, + "epoch": 0.9146075325589581, + "grad_norm": 1.9126523733139038, + "learning_rate": 2.660398630705254e-07, + "loss": 0.6028, "step": 12992 }, { - "epoch": 0.9831637092807688, - "grad_norm": 2.269578218460083, - "learning_rate": 1.3884884094722662e-08, - "loss": 0.6435, + "epoch": 0.9146779303062302, + "grad_norm": 2.1471195220947266, + "learning_rate": 2.6560419024316426e-07, + "loss": 0.6569, "step": 12993 }, { - "epoch": 0.9832393780031025, - "grad_norm": 2.1392128467559814, - "learning_rate": 1.376010859368626e-08, - "loss": 0.714, + "epoch": 0.9147483280535023, + "grad_norm": 2.1342782974243164, + "learning_rate": 2.651688675431412e-07, + "loss": 0.6435, "step": 12994 }, { - "epoch": 0.983315046725436, - "grad_norm": 1.8886982202529907, - "learning_rate": 1.3635895840056534e-08, - "loss": 0.5763, + "epoch": 0.9148187258007744, + "grad_norm": 1.830386996269226, + "learning_rate": 2.6473389499308764e-07, + "loss": 0.6426, "step": 12995 }, { - "epoch": 0.9833907154477697, - "grad_norm": 2.211404323577881, - "learning_rate": 1.3512245841613257e-08, - "loss": 0.6028, + "epoch": 0.9148891235480464, + "grad_norm": 2.2077248096466064, + "learning_rate": 2.642992726156189e-07, + "loss": 0.5789, "step": 12996 }, { - "epoch": 0.9834663841701032, - "grad_norm": 1.8366492986679077, - "learning_rate": 1.338915860609824e-08, - "loss": 0.6571, + "epoch": 0.9149595212953185, + "grad_norm": 1.9610991477966309, + "learning_rate": 2.638650004333316e-07, + "loss": 0.5147, "step": 12997 }, { - "epoch": 0.9835420528924369, - "grad_norm": 3.9259090423583984, - "learning_rate": 1.3266634141220312e-08, - "loss": 0.7254, + "epoch": 0.9150299190425907, + "grad_norm": 2.3692550659179688, + "learning_rate": 2.6343107846880284e-07, + "loss": 0.6526, "step": 12998 }, { - "epoch": 0.9836177216147706, - "grad_norm": 2.866379499435425, - "learning_rate": 1.314467245465334e-08, - "loss": 0.6933, + "epoch": 0.9151003167898627, + "grad_norm": 2.5062198638916016, + "learning_rate": 2.6299750674459354e-07, + "loss": 0.6737, "step": 12999 }, { - "epoch": 0.9836933903371041, - "grad_norm": 1.9616422653198242, - "learning_rate": 1.302327355403321e-08, - "loss": 0.6589, + "epoch": 0.9151707145371348, + "grad_norm": 2.4291772842407227, + "learning_rate": 2.6256428528324424e-07, + "loss": 0.6893, "step": 13000 }, { - "epoch": 0.9837690590594378, - "grad_norm": 2.223374843597412, - "learning_rate": 1.2902437446962844e-08, - "loss": 0.6306, + "epoch": 0.9152411122844069, + "grad_norm": 1.9411602020263672, + "learning_rate": 2.6213141410728093e-07, + "loss": 0.6362, "step": 13001 }, { - "epoch": 0.9838447277817715, - "grad_norm": 2.437225818634033, - "learning_rate": 1.2782164141010188e-08, - "loss": 0.841, + "epoch": 0.915311510031679, + "grad_norm": 1.9173519611358643, + "learning_rate": 2.616988932392068e-07, + "loss": 0.6198, "step": 13002 }, { - "epoch": 0.983920396504105, - "grad_norm": 2.194181203842163, - "learning_rate": 1.2662453643706217e-08, - "loss": 0.6298, + "epoch": 0.9153819077789511, + "grad_norm": 1.8591630458831787, + "learning_rate": 2.612667227015113e-07, + "loss": 0.7029, "step": 13003 }, { - "epoch": 0.9839960652264387, - "grad_norm": 2.2740933895111084, - "learning_rate": 1.2543305962548935e-08, - "loss": 0.6895, + "epoch": 0.9154523055262231, + "grad_norm": 1.89719557762146, + "learning_rate": 2.6083490251666043e-07, + "loss": 0.6149, "step": 13004 }, { - "epoch": 0.9840717339487722, - "grad_norm": 2.391655206680298, - "learning_rate": 1.2424721104997371e-08, - "loss": 0.7314, + "epoch": 0.9155227032734953, + "grad_norm": 1.7636524438858032, + "learning_rate": 2.6040343270710695e-07, + "loss": 0.5997, "step": 13005 }, { - "epoch": 0.9841474026711059, - "grad_norm": 2.1336898803710938, - "learning_rate": 1.2306699078479588e-08, - "loss": 0.609, + "epoch": 0.9155931010207673, + "grad_norm": 1.9375889301300049, + "learning_rate": 2.5997231329528203e-07, + "loss": 0.653, "step": 13006 }, { - "epoch": 0.9842230713934396, - "grad_norm": 4.608248710632324, - "learning_rate": 1.2189239890386672e-08, - "loss": 0.7341, + "epoch": 0.9156634987680394, + "grad_norm": 2.4724762439727783, + "learning_rate": 2.595415443036011e-07, + "loss": 0.6289, "step": 13007 }, { - "epoch": 0.9842987401157731, - "grad_norm": 3.4450154304504395, - "learning_rate": 1.207234354807374e-08, - "loss": 0.6952, + "epoch": 0.9157338965153116, + "grad_norm": 2.059335231781006, + "learning_rate": 2.591111257544595e-07, + "loss": 0.6466, "step": 13008 }, { - "epoch": 0.9843744088381068, - "grad_norm": 2.1393322944641113, - "learning_rate": 1.1956010058859934e-08, - "loss": 0.6627, + "epoch": 0.9158042942625836, + "grad_norm": 1.795561671257019, + "learning_rate": 2.586810576702354e-07, + "loss": 0.6271, "step": 13009 }, { - "epoch": 0.9844500775604403, - "grad_norm": 2.124804973602295, - "learning_rate": 1.1840239430032429e-08, - "loss": 0.5678, + "epoch": 0.9158746920098557, + "grad_norm": 1.7821000814437866, + "learning_rate": 2.582513400732892e-07, + "loss": 0.5178, "step": 13010 }, { - "epoch": 0.984525746282774, - "grad_norm": 2.0524282455444336, - "learning_rate": 1.1725031668840425e-08, - "loss": 0.7415, + "epoch": 0.9159450897571277, + "grad_norm": 2.222073554992676, + "learning_rate": 2.57821972985961e-07, + "loss": 0.6334, "step": 13011 }, { - "epoch": 0.9846014150051077, - "grad_norm": 1.9901623725891113, - "learning_rate": 1.161038678249815e-08, - "loss": 0.541, + "epoch": 0.9160154875043999, + "grad_norm": 1.8639497756958008, + "learning_rate": 2.573929564305738e-07, + "loss": 0.7132, "step": 13012 }, { - "epoch": 0.9846770837274412, - "grad_norm": 2.330474376678467, - "learning_rate": 1.1496304778185863e-08, - "loss": 0.7449, + "epoch": 0.9160858852516719, + "grad_norm": 2.181734085083008, + "learning_rate": 2.5696429042943437e-07, + "loss": 0.604, "step": 13013 }, { - "epoch": 0.9847527524497749, - "grad_norm": 2.8459479808807373, - "learning_rate": 1.1382785663046846e-08, - "loss": 0.7962, + "epoch": 0.916156282998944, + "grad_norm": 2.5417721271514893, + "learning_rate": 2.565359750048276e-07, + "loss": 0.6183, "step": 13014 }, { - "epoch": 0.9848284211721086, - "grad_norm": 2.1679039001464844, - "learning_rate": 1.1269829444191416e-08, - "loss": 0.639, + "epoch": 0.9162266807462162, + "grad_norm": 1.8888300657272339, + "learning_rate": 2.5610801017902297e-07, + "loss": 0.5776, "step": 13015 }, { - "epoch": 0.9849040898944421, - "grad_norm": 1.800337314605713, - "learning_rate": 1.1157436128691911e-08, - "loss": 0.5612, + "epoch": 0.9162970784934882, + "grad_norm": 2.186311960220337, + "learning_rate": 2.5568039597427114e-07, + "loss": 0.5999, "step": 13016 }, { - "epoch": 0.9849797586167758, - "grad_norm": 2.2454311847686768, - "learning_rate": 1.1045605723586705e-08, - "loss": 0.6121, + "epoch": 0.9163674762407603, + "grad_norm": 2.0323121547698975, + "learning_rate": 2.5525313241280356e-07, + "loss": 0.6397, "step": 13017 }, { - "epoch": 0.9850554273391093, - "grad_norm": 2.1114368438720703, - "learning_rate": 1.0934338235879193e-08, - "loss": 0.8751, + "epoch": 0.9164378739880323, + "grad_norm": 2.294940233230591, + "learning_rate": 2.5482621951683283e-07, + "loss": 0.7057, "step": 13018 }, { - "epoch": 0.985131096061443, - "grad_norm": 2.5677621364593506, - "learning_rate": 1.0823633672538802e-08, - "loss": 0.5946, + "epoch": 0.9165082717353045, + "grad_norm": 2.631450653076172, + "learning_rate": 2.543996573085568e-07, + "loss": 0.6175, "step": 13019 }, { - "epoch": 0.9852067647837767, - "grad_norm": 2.1673922538757324, - "learning_rate": 1.0713492040495986e-08, - "loss": 0.65, + "epoch": 0.9165786694825766, + "grad_norm": 1.8565332889556885, + "learning_rate": 2.539734458101502e-07, + "loss": 0.61, "step": 13020 }, { - "epoch": 0.9852824335061102, - "grad_norm": 2.8875181674957275, - "learning_rate": 1.060391334664923e-08, - "loss": 0.5534, + "epoch": 0.9166490672298486, + "grad_norm": 2.0079493522644043, + "learning_rate": 2.5354758504377516e-07, + "loss": 0.7557, "step": 13021 }, { - "epoch": 0.9853581022284439, - "grad_norm": 2.2637150287628174, - "learning_rate": 1.0494897597861041e-08, - "loss": 0.6161, + "epoch": 0.9167194649771208, + "grad_norm": 1.9346044063568115, + "learning_rate": 2.5312207503157124e-07, + "loss": 0.7307, "step": 13022 }, { - "epoch": 0.9854337709507774, - "grad_norm": 2.323862314224243, - "learning_rate": 1.0386444800957962e-08, - "loss": 0.7628, + "epoch": 0.9167898627243928, + "grad_norm": 1.817567229270935, + "learning_rate": 2.52696915795661e-07, + "loss": 0.7192, "step": 13023 }, { - "epoch": 0.9855094396731111, - "grad_norm": 2.155294179916382, - "learning_rate": 1.0278554962731557e-08, - "loss": 0.657, + "epoch": 0.9168602604716649, + "grad_norm": 2.1696627140045166, + "learning_rate": 2.5227210735814754e-07, + "loss": 0.6555, "step": 13024 }, { - "epoch": 0.9855851083954448, - "grad_norm": 2.271121025085449, - "learning_rate": 1.0171228089938422e-08, - "loss": 0.63, + "epoch": 0.916930658218937, + "grad_norm": 1.7943049669265747, + "learning_rate": 2.5184764974111996e-07, + "loss": 0.5801, "step": 13025 }, { - "epoch": 0.9856607771177783, - "grad_norm": 2.7450411319732666, - "learning_rate": 1.0064464189300181e-08, - "loss": 0.6431, + "epoch": 0.9170010559662091, + "grad_norm": 1.8890653848648071, + "learning_rate": 2.5142354296664246e-07, + "loss": 0.6304, "step": 13026 }, { - "epoch": 0.985736445840112, - "grad_norm": 1.9051148891448975, - "learning_rate": 9.958263267501488e-09, - "loss": 0.5382, + "epoch": 0.9170714537134812, + "grad_norm": 1.8958680629730225, + "learning_rate": 2.509997870567685e-07, + "loss": 0.6855, "step": 13027 }, { - "epoch": 0.9858121145624457, - "grad_norm": 2.6679155826568604, - "learning_rate": 9.852625331193021e-09, - "loss": 0.5978, + "epoch": 0.9171418514607532, + "grad_norm": 2.032759428024292, + "learning_rate": 2.5057638203352807e-07, + "loss": 0.7247, "step": 13028 }, { - "epoch": 0.9858877832847792, - "grad_norm": 2.288546323776245, - "learning_rate": 9.747550386991488e-09, - "loss": 0.5773, + "epoch": 0.9172122492080254, + "grad_norm": 2.080650568008423, + "learning_rate": 2.501533279189341e-07, + "loss": 0.557, "step": 13029 }, { - "epoch": 0.9859634520071129, - "grad_norm": 2.641033411026001, - "learning_rate": 9.643038441476626e-09, - "loss": 0.6219, + "epoch": 0.9172826469552974, + "grad_norm": 1.8138928413391113, + "learning_rate": 2.497306247349801e-07, + "loss": 0.6231, "step": 13030 }, { - "epoch": 0.9860391207294464, - "grad_norm": 3.3034932613372803, - "learning_rate": 9.539089501193199e-09, - "loss": 0.7107, + "epoch": 0.9173530447025695, + "grad_norm": 1.764664888381958, + "learning_rate": 2.493082725036456e-07, + "loss": 0.6258, "step": 13031 }, { - "epoch": 0.9861147894517801, - "grad_norm": 1.9344165325164795, - "learning_rate": 9.43570357265e-09, - "loss": 0.7148, + "epoch": 0.9174234424498416, + "grad_norm": 2.1150505542755127, + "learning_rate": 2.488862712468869e-07, + "loss": 0.5335, "step": 13032 }, { - "epoch": 0.9861904581741138, - "grad_norm": 2.1721315383911133, - "learning_rate": 9.332880662321852e-09, - "loss": 0.6956, + "epoch": 0.9174938401971137, + "grad_norm": 2.2884786128997803, + "learning_rate": 2.484646209866461e-07, + "loss": 0.6219, "step": 13033 }, { - "epoch": 0.9862661268964473, - "grad_norm": 1.8685091733932495, - "learning_rate": 9.230620776648602e-09, - "loss": 0.583, + "epoch": 0.9175642379443858, + "grad_norm": 1.8056668043136597, + "learning_rate": 2.4804332174484453e-07, + "loss": 0.7442, "step": 13034 }, { - "epoch": 0.986341795618781, - "grad_norm": 1.820870280265808, - "learning_rate": 9.128923922033128e-09, - "loss": 0.5824, + "epoch": 0.9176346356916578, + "grad_norm": 1.675096869468689, + "learning_rate": 2.476223735433848e-07, + "loss": 0.6162, "step": 13035 }, { - "epoch": 0.9864174643411145, - "grad_norm": 2.135571002960205, - "learning_rate": 9.027790104845335e-09, - "loss": 0.7753, + "epoch": 0.91770503343893, + "grad_norm": 2.0619211196899414, + "learning_rate": 2.4720177640415306e-07, + "loss": 0.5897, "step": 13036 }, { - "epoch": 0.9864931330634482, - "grad_norm": 3.053931474685669, - "learning_rate": 8.927219331417158e-09, - "loss": 0.6846, + "epoch": 0.9177754311862021, + "grad_norm": 2.0062878131866455, + "learning_rate": 2.4678153034901705e-07, + "loss": 0.539, "step": 13037 }, { - "epoch": 0.9865688017857819, - "grad_norm": 2.2578110694885254, - "learning_rate": 8.82721160804656e-09, - "loss": 0.6994, + "epoch": 0.9178458289334741, + "grad_norm": 2.2292983531951904, + "learning_rate": 2.463616353998249e-07, + "loss": 0.6331, "step": 13038 }, { - "epoch": 0.9866444705081154, - "grad_norm": 2.533595561981201, - "learning_rate": 8.727766940997528e-09, - "loss": 0.7083, + "epoch": 0.9179162266807462, + "grad_norm": 2.533203363418579, + "learning_rate": 2.459420915784085e-07, + "loss": 0.7321, "step": 13039 }, { - "epoch": 0.9867201392304491, - "grad_norm": 2.277336835861206, - "learning_rate": 8.628885336497084e-09, - "loss": 0.8314, + "epoch": 0.9179866244280183, + "grad_norm": 2.214169979095459, + "learning_rate": 2.4552289890657954e-07, + "loss": 0.5964, "step": 13040 }, { - "epoch": 0.9867958079527828, - "grad_norm": 2.3916232585906982, - "learning_rate": 8.530566800738272e-09, - "loss": 0.6584, + "epoch": 0.9180570221752904, + "grad_norm": 2.030954122543335, + "learning_rate": 2.4510405740613175e-07, + "loss": 0.6614, "step": 13041 }, { - "epoch": 0.9868714766751163, - "grad_norm": 2.5253095626831055, - "learning_rate": 8.432811339876168e-09, - "loss": 0.6123, + "epoch": 0.9181274199225625, + "grad_norm": 2.361471652984619, + "learning_rate": 2.446855670988418e-07, + "loss": 0.588, "step": 13042 }, { - "epoch": 0.98694714539745, - "grad_norm": 1.7095435857772827, - "learning_rate": 8.335618960033876e-09, - "loss": 0.8094, + "epoch": 0.9181978176698345, + "grad_norm": 2.0715253353118896, + "learning_rate": 2.4426742800646703e-07, + "loss": 0.7429, "step": 13043 }, { - "epoch": 0.9870228141197835, - "grad_norm": 2.151761293411255, - "learning_rate": 8.238989667297526e-09, - "loss": 0.716, + "epoch": 0.9182682154171067, + "grad_norm": 2.3849236965179443, + "learning_rate": 2.4384964015074684e-07, + "loss": 0.6672, "step": 13044 }, { - "epoch": 0.9870984828421172, - "grad_norm": 2.2348501682281494, - "learning_rate": 8.142923467718277e-09, - "loss": 0.7185, + "epoch": 0.9183386131643787, + "grad_norm": 1.8552350997924805, + "learning_rate": 2.434322035534034e-07, + "loss": 0.6067, "step": 13045 }, { - "epoch": 0.9871741515644509, - "grad_norm": 2.2857894897460938, - "learning_rate": 8.047420367313319e-09, - "loss": 0.7555, + "epoch": 0.9184090109116508, + "grad_norm": 2.0291528701782227, + "learning_rate": 2.430151182361382e-07, + "loss": 0.6855, "step": 13046 }, { - "epoch": 0.9872498202867844, - "grad_norm": 2.075570821762085, - "learning_rate": 7.952480372061866e-09, - "loss": 0.6908, + "epoch": 0.918479408658923, + "grad_norm": 2.0626068115234375, + "learning_rate": 2.425983842206368e-07, + "loss": 0.593, "step": 13047 }, { - "epoch": 0.9873254890091181, - "grad_norm": 2.3449413776397705, - "learning_rate": 7.858103487910161e-09, - "loss": 0.6586, + "epoch": 0.918549806406195, + "grad_norm": 2.356649160385132, + "learning_rate": 2.4218200152856494e-07, + "loss": 0.6326, "step": 13048 }, { - "epoch": 0.9874011577314517, - "grad_norm": 1.8678113222122192, - "learning_rate": 7.764289720767482e-09, - "loss": 0.6416, + "epoch": 0.9186202041534671, + "grad_norm": 1.737630009651184, + "learning_rate": 2.4176597018157095e-07, + "loss": 0.6072, "step": 13049 }, { - "epoch": 0.9874768264537853, - "grad_norm": 1.8483861684799194, - "learning_rate": 7.671039076510123e-09, - "loss": 0.6662, + "epoch": 0.9186906019007391, + "grad_norm": 2.207005023956299, + "learning_rate": 2.4135029020128475e-07, + "loss": 0.6367, "step": 13050 }, { - "epoch": 0.987552495176119, - "grad_norm": 2.6870992183685303, - "learning_rate": 7.578351560976416e-09, - "loss": 0.7492, + "epoch": 0.9187609996480113, + "grad_norm": 2.361814498901367, + "learning_rate": 2.4093496160931894e-07, + "loss": 0.5959, "step": 13051 }, { - "epoch": 0.9876281638984525, - "grad_norm": 2.3274192810058594, - "learning_rate": 7.486227179971717e-09, - "loss": 0.7949, + "epoch": 0.9188313973952833, + "grad_norm": 1.9990813732147217, + "learning_rate": 2.405199844272653e-07, + "loss": 0.6499, "step": 13052 }, { - "epoch": 0.9877038326207862, - "grad_norm": 2.314833641052246, - "learning_rate": 7.394665939264411e-09, - "loss": 0.7524, + "epoch": 0.9189017951425554, + "grad_norm": 1.9525548219680786, + "learning_rate": 2.401053586766992e-07, + "loss": 0.6243, "step": 13053 }, { - "epoch": 0.9877795013431199, - "grad_norm": 2.826127767562866, - "learning_rate": 7.303667844589912e-09, - "loss": 0.6682, + "epoch": 0.9189721928898276, + "grad_norm": 2.1572964191436768, + "learning_rate": 2.3969108437917754e-07, + "loss": 0.6033, "step": 13054 }, { - "epoch": 0.9878551700654534, - "grad_norm": 2.203782081604004, - "learning_rate": 7.213232901644662e-09, - "loss": 0.5775, + "epoch": 0.9190425906370996, + "grad_norm": 2.3277482986450195, + "learning_rate": 2.39277161556239e-07, + "loss": 0.645, "step": 13055 }, { - "epoch": 0.9879308387877871, - "grad_norm": 2.5531766414642334, - "learning_rate": 7.12336111609313e-09, - "loss": 0.6546, + "epoch": 0.9191129883843717, + "grad_norm": 2.484318256378174, + "learning_rate": 2.3886359022940395e-07, + "loss": 0.6301, "step": 13056 }, { - "epoch": 0.9880065075101206, - "grad_norm": 2.4287493228912354, - "learning_rate": 7.034052493562815e-09, + "epoch": 0.9191833861316437, + "grad_norm": 2.177945852279663, + "learning_rate": 2.384503704201739e-07, "loss": 0.7344, "step": 13057 }, { - "epoch": 0.9880821762324543, - "grad_norm": 1.6254581212997437, - "learning_rate": 6.945307039647242e-09, - "loss": 0.8126, + "epoch": 0.9192537838789159, + "grad_norm": 2.267620086669922, + "learning_rate": 2.3803750215003338e-07, + "loss": 0.6214, "step": 13058 }, { - "epoch": 0.988157844954788, - "grad_norm": 2.4416098594665527, - "learning_rate": 6.857124759903966e-09, - "loss": 0.6512, + "epoch": 0.919324181626188, + "grad_norm": 1.8809915781021118, + "learning_rate": 2.3762498544044587e-07, + "loss": 0.5785, "step": 13059 }, { - "epoch": 0.9882335136771215, - "grad_norm": 2.1222572326660156, - "learning_rate": 6.769505659854569e-09, - "loss": 0.7188, + "epoch": 0.91939457937346, + "grad_norm": 2.6290054321289062, + "learning_rate": 2.3721282031286007e-07, + "loss": 0.6356, "step": 13060 }, { - "epoch": 0.9883091823994552, - "grad_norm": 2.3706514835357666, - "learning_rate": 6.682449744986663e-09, - "loss": 0.5678, + "epoch": 0.9194649771207322, + "grad_norm": 1.8903818130493164, + "learning_rate": 2.368010067887045e-07, + "loss": 0.6286, "step": 13061 }, { - "epoch": 0.9883848511217888, - "grad_norm": 1.8657547235488892, - "learning_rate": 6.5959570207508864e-09, - "loss": 0.7439, + "epoch": 0.9195353748680042, + "grad_norm": 2.1426987648010254, + "learning_rate": 2.3638954488939068e-07, + "loss": 0.6891, "step": 13062 }, { - "epoch": 0.9884605198441224, - "grad_norm": 2.860536813735962, - "learning_rate": 6.5100274925649075e-09, - "loss": 0.5991, + "epoch": 0.9196057726152763, + "grad_norm": 2.0700857639312744, + "learning_rate": 2.3597843463630897e-07, + "loss": 0.7017, "step": 13063 }, { - "epoch": 0.9885361885664561, - "grad_norm": 2.22926664352417, - "learning_rate": 6.42466116580942e-09, - "loss": 0.817, + "epoch": 0.9196761703625485, + "grad_norm": 1.745529294013977, + "learning_rate": 2.3556767605083505e-07, + "loss": 0.5734, "step": 13064 }, { - "epoch": 0.9886118572887896, - "grad_norm": 1.8725168704986572, - "learning_rate": 6.339858045830149e-09, - "loss": 0.6259, + "epoch": 0.9197465681098205, + "grad_norm": 1.6505508422851562, + "learning_rate": 2.3515726915432288e-07, + "loss": 0.6678, "step": 13065 }, { - "epoch": 0.9886875260111233, - "grad_norm": 2.0670039653778076, - "learning_rate": 6.255618137938845e-09, - "loss": 0.6289, + "epoch": 0.9198169658570926, + "grad_norm": 2.035557746887207, + "learning_rate": 2.347472139681116e-07, + "loss": 0.6441, "step": 13066 }, { - "epoch": 0.988763194733457, - "grad_norm": 2.009124517440796, - "learning_rate": 6.17194144740929e-09, - "loss": 0.6825, + "epoch": 0.9198873636043646, + "grad_norm": 1.969401240348816, + "learning_rate": 2.3433751051351858e-07, + "loss": 0.634, "step": 13067 }, { - "epoch": 0.9888388634557905, - "grad_norm": 1.9414106607437134, - "learning_rate": 6.088827979483291e-09, - "loss": 0.4782, + "epoch": 0.9199577613516368, + "grad_norm": 1.529972791671753, + "learning_rate": 2.3392815881184724e-07, + "loss": 0.5595, "step": 13068 }, { - "epoch": 0.9889145321781242, - "grad_norm": 2.9034688472747803, - "learning_rate": 6.006277739363686e-09, - "loss": 0.6774, + "epoch": 0.9200281590989088, + "grad_norm": 1.8371819257736206, + "learning_rate": 2.3351915888437768e-07, + "loss": 0.6409, "step": 13069 }, { - "epoch": 0.9889902009004577, - "grad_norm": 2.372115135192871, - "learning_rate": 5.924290732221338e-09, - "loss": 0.6958, + "epoch": 0.9200985568461809, + "grad_norm": 3.101969003677368, + "learning_rate": 2.3311051075237598e-07, + "loss": 0.5358, "step": 13070 }, { - "epoch": 0.9890658696227914, - "grad_norm": 1.480709433555603, - "learning_rate": 5.842866963190141e-09, - "loss": 0.6686, + "epoch": 0.920168954593453, + "grad_norm": 2.170285701751709, + "learning_rate": 2.327022144370865e-07, + "loss": 0.5833, "step": 13071 }, { - "epoch": 0.9891415383451251, - "grad_norm": 2.762083053588867, - "learning_rate": 5.762006437370015e-09, - "loss": 0.6242, + "epoch": 0.9202393523407251, + "grad_norm": 2.1048662662506104, + "learning_rate": 2.32294269959738e-07, + "loss": 0.6662, "step": 13072 }, { - "epoch": 0.9892172070674586, - "grad_norm": 2.282045364379883, - "learning_rate": 5.681709159822912e-09, - "loss": 0.5928, + "epoch": 0.9203097500879972, + "grad_norm": 2.122101306915283, + "learning_rate": 2.318866773415391e-07, + "loss": 0.6606, "step": 13073 }, { - "epoch": 0.9892928757897923, - "grad_norm": 2.124495029449463, - "learning_rate": 5.601975135578807e-09, - "loss": 0.5653, + "epoch": 0.9203801478352692, + "grad_norm": 1.8412833213806152, + "learning_rate": 2.3147943660368207e-07, + "loss": 0.8097, "step": 13074 }, { - "epoch": 0.9893685445121259, - "grad_norm": 2.060455560684204, - "learning_rate": 5.522804369630707e-09, - "loss": 0.6898, + "epoch": 0.9204505455825414, + "grad_norm": 1.8397619724273682, + "learning_rate": 2.3107254776733897e-07, + "loss": 0.6999, "step": 13075 }, { - "epoch": 0.9894442132344595, - "grad_norm": 2.3240835666656494, - "learning_rate": 5.444196866935647e-09, - "loss": 0.6417, + "epoch": 0.9205209433298135, + "grad_norm": 2.167442560195923, + "learning_rate": 2.3066601085366478e-07, + "loss": 0.6782, "step": 13076 }, { - "epoch": 0.9895198819567932, - "grad_norm": 2.27996826171875, - "learning_rate": 5.366152632417687e-09, - "loss": 0.5804, + "epoch": 0.9205913410770855, + "grad_norm": 2.13474702835083, + "learning_rate": 2.3025982588379423e-07, + "loss": 0.7624, "step": 13077 }, { - "epoch": 0.9895955506791267, - "grad_norm": 2.054598569869995, - "learning_rate": 5.288671670962919e-09, - "loss": 0.7136, + "epoch": 0.9206617388243576, + "grad_norm": 2.35141658782959, + "learning_rate": 2.2985399287884733e-07, + "loss": 0.6351, "step": 13078 }, { - "epoch": 0.9896712194014604, - "grad_norm": 2.2513556480407715, - "learning_rate": 5.211753987423462e-09, - "loss": 0.8557, + "epoch": 0.9207321365716297, + "grad_norm": 1.8389403820037842, + "learning_rate": 2.2944851185992154e-07, + "loss": 0.6104, "step": 13079 }, { - "epoch": 0.9897468881237941, - "grad_norm": 2.1142704486846924, - "learning_rate": 5.135399586617462e-09, - "loss": 0.6724, + "epoch": 0.9208025343189018, + "grad_norm": 2.2518162727355957, + "learning_rate": 2.2904338284810032e-07, + "loss": 0.5764, "step": 13080 }, { - "epoch": 0.9898225568461276, - "grad_norm": 2.2248411178588867, - "learning_rate": 5.059608473325095e-09, - "loss": 0.6099, + "epoch": 0.9208729320661739, + "grad_norm": 2.2403578758239746, + "learning_rate": 2.2863860586444618e-07, + "loss": 0.6247, "step": 13081 }, { - "epoch": 0.9898982255684613, - "grad_norm": 3.098581552505493, - "learning_rate": 4.984380652293563e-09, - "loss": 0.5557, + "epoch": 0.920943329813446, + "grad_norm": 1.8059453964233398, + "learning_rate": 2.2823418093000368e-07, + "loss": 0.6423, "step": 13082 }, { - "epoch": 0.9899738942907949, - "grad_norm": 1.7447338104248047, - "learning_rate": 4.909716128234098e-09, - "loss": 0.6664, + "epoch": 0.9210137275607181, + "grad_norm": 2.5895965099334717, + "learning_rate": 2.2783010806579727e-07, + "loss": 0.6983, "step": 13083 }, { - "epoch": 0.9900495630131285, - "grad_norm": 2.116192102432251, - "learning_rate": 4.835614905820962e-09, - "loss": 0.6416, + "epoch": 0.9210841253079901, + "grad_norm": 1.9550738334655762, + "learning_rate": 2.2742638729283815e-07, + "loss": 0.7081, "step": 13084 }, { - "epoch": 0.9901252317354622, - "grad_norm": 3.5600974559783936, - "learning_rate": 4.762076989695441e-09, - "loss": 0.5656, + "epoch": 0.9211545230552622, + "grad_norm": 1.832158088684082, + "learning_rate": 2.2702301863211416e-07, + "loss": 0.6215, "step": 13085 }, { - "epoch": 0.9902009004577957, - "grad_norm": 2.4236409664154053, - "learning_rate": 4.689102384462851e-09, - "loss": 0.6626, + "epoch": 0.9212249208025343, + "grad_norm": 1.8998596668243408, + "learning_rate": 2.2662000210459764e-07, + "loss": 0.6529, "step": 13086 }, { - "epoch": 0.9902765691801294, - "grad_norm": 1.842241644859314, - "learning_rate": 4.616691094693537e-09, - "loss": 0.6851, + "epoch": 0.9212953185498064, + "grad_norm": 1.9926947355270386, + "learning_rate": 2.2621733773124153e-07, + "loss": 0.6663, "step": 13087 }, { - "epoch": 0.990352237902463, - "grad_norm": 2.1137871742248535, - "learning_rate": 4.5448431249218715e-09, - "loss": 0.5285, + "epoch": 0.9213657162970785, + "grad_norm": 1.77881920337677, + "learning_rate": 2.258150255329816e-07, + "loss": 0.592, "step": 13088 }, { - "epoch": 0.9904279066247966, - "grad_norm": 2.3001229763031006, - "learning_rate": 4.473558479646256e-09, - "loss": 0.7189, + "epoch": 0.9214361140443506, + "grad_norm": 2.2827601432800293, + "learning_rate": 2.2541306553073192e-07, + "loss": 0.6129, "step": 13089 }, { - "epoch": 0.9905035753471303, - "grad_norm": 2.6320502758026123, - "learning_rate": 4.402837163331119e-09, - "loss": 0.6221, + "epoch": 0.9215065117916227, + "grad_norm": 1.9593254327774048, + "learning_rate": 2.2501145774539333e-07, + "loss": 0.7778, "step": 13090 }, { - "epoch": 0.9905792440694638, - "grad_norm": 2.4288251399993896, - "learning_rate": 4.332679180406918e-09, - "loss": 0.5301, + "epoch": 0.9215769095388947, + "grad_norm": 3.1792967319488525, + "learning_rate": 2.246102021978441e-07, + "loss": 0.7215, "step": 13091 }, { - "epoch": 0.9906549127917975, - "grad_norm": 2.138550043106079, - "learning_rate": 4.2630845352651384e-09, - "loss": 0.6783, + "epoch": 0.9216473072861668, + "grad_norm": 2.518643856048584, + "learning_rate": 2.2420929890894781e-07, + "loss": 0.6812, "step": 13092 }, { - "epoch": 0.9907305815141312, - "grad_norm": 2.3440093994140625, - "learning_rate": 4.1940532322642946e-09, - "loss": 0.7674, + "epoch": 0.921717705033439, + "grad_norm": 2.1951982975006104, + "learning_rate": 2.2380874789954618e-07, + "loss": 0.5847, "step": 13093 }, { - "epoch": 0.9908062502364647, - "grad_norm": 1.6465092897415161, - "learning_rate": 4.125585275728927e-09, - "loss": 0.6484, + "epoch": 0.921788102780711, + "grad_norm": 2.02725887298584, + "learning_rate": 2.2340854919046472e-07, + "loss": 0.5821, "step": 13094 }, { - "epoch": 0.9908819189587984, - "grad_norm": 2.4605023860931396, - "learning_rate": 4.057680669944608e-09, - "loss": 0.7061, + "epoch": 0.9218585005279831, + "grad_norm": 1.802482008934021, + "learning_rate": 2.230087028025094e-07, + "loss": 0.6779, "step": 13095 }, { - "epoch": 0.990957587681132, - "grad_norm": 1.8242436647415161, - "learning_rate": 3.990339419164935e-09, - "loss": 0.7324, + "epoch": 0.9219288982752551, + "grad_norm": 2.1034321784973145, + "learning_rate": 2.2260920875646916e-07, + "loss": 0.565, "step": 13096 }, { - "epoch": 0.9910332564034656, - "grad_norm": 2.511643648147583, - "learning_rate": 3.923561527606534e-09, - "loss": 0.6348, + "epoch": 0.9219992960225273, + "grad_norm": 1.8127824068069458, + "learning_rate": 2.222100670731143e-07, + "loss": 0.5812, "step": 13097 }, { - "epoch": 0.9911089251257993, - "grad_norm": 2.8463854789733887, - "learning_rate": 3.857346999452061e-09, - "loss": 0.7027, + "epoch": 0.9220696937697994, + "grad_norm": 1.7618314027786255, + "learning_rate": 2.218112777731972e-07, + "loss": 0.6715, "step": 13098 }, { - "epoch": 0.9911845938481328, - "grad_norm": 1.9040359258651733, - "learning_rate": 3.7916958388481974e-09, - "loss": 0.6504, + "epoch": 0.9221400915170714, + "grad_norm": 3.5270285606384277, + "learning_rate": 2.2141284087745006e-07, + "loss": 0.7049, "step": 13099 }, { - "epoch": 0.9912602625704665, - "grad_norm": 2.7657530307769775, - "learning_rate": 3.726608049904656e-09, - "loss": 0.8033, + "epoch": 0.9222104892643436, + "grad_norm": 1.846886396408081, + "learning_rate": 2.2101475640658718e-07, + "loss": 0.6535, "step": 13100 }, { - "epoch": 0.9913359312928002, - "grad_norm": 2.482766628265381, - "learning_rate": 3.662083636698177e-09, - "loss": 0.6571, + "epoch": 0.9222808870116156, + "grad_norm": 2.216886281967163, + "learning_rate": 2.2061702438130814e-07, + "loss": 0.6623, "step": 13101 }, { - "epoch": 0.9914116000151337, - "grad_norm": 2.55605411529541, - "learning_rate": 3.598122603270526e-09, - "loss": 0.6026, + "epoch": 0.9223512847588877, + "grad_norm": 1.9087120294570923, + "learning_rate": 2.202196448222884e-07, + "loss": 0.5886, "step": 13102 }, { - "epoch": 0.9914872687374674, - "grad_norm": 2.0801234245300293, - "learning_rate": 3.534724953625501e-09, - "loss": 0.6188, + "epoch": 0.9224216825061599, + "grad_norm": 1.849780797958374, + "learning_rate": 2.1982261775019017e-07, + "loss": 0.6214, "step": 13103 }, { - "epoch": 0.991562937459801, - "grad_norm": 3.96653413772583, - "learning_rate": 3.4718906917349245e-09, - "loss": 0.6812, + "epoch": 0.9224920802534319, + "grad_norm": 1.9401655197143555, + "learning_rate": 2.1942594318565477e-07, + "loss": 0.6308, "step": 13104 }, { - "epoch": 0.9916386061821346, - "grad_norm": 2.2530484199523926, - "learning_rate": 3.4096198215326504e-09, - "loss": 0.7594, + "epoch": 0.922562478000704, + "grad_norm": 1.6900510787963867, + "learning_rate": 2.1902962114930634e-07, + "loss": 0.5578, "step": 13105 }, { - "epoch": 0.9917142749044683, - "grad_norm": 2.784916639328003, - "learning_rate": 3.347912346917559e-09, - "loss": 0.6711, + "epoch": 0.922632875747976, + "grad_norm": 1.7932088375091553, + "learning_rate": 2.186336516617473e-07, + "loss": 0.5953, "step": 13106 }, { - "epoch": 0.9917899436268018, - "grad_norm": 2.254490852355957, - "learning_rate": 3.286768271756557e-09, - "loss": 0.6553, + "epoch": 0.9227032734952482, + "grad_norm": 1.894115686416626, + "learning_rate": 2.1823803474356683e-07, + "loss": 0.6161, "step": 13107 }, { - "epoch": 0.9918656123491355, - "grad_norm": 1.7609950304031372, - "learning_rate": 3.226187599875585e-09, - "loss": 0.6226, + "epoch": 0.9227736712425202, + "grad_norm": 1.8502811193466187, + "learning_rate": 2.178427704153324e-07, + "loss": 0.6336, "step": 13108 }, { - "epoch": 0.9919412810714691, - "grad_norm": 2.008504629135132, - "learning_rate": 3.166170335070606e-09, - "loss": 0.6512, + "epoch": 0.9228440689897923, + "grad_norm": 2.1144769191741943, + "learning_rate": 2.1744785869759586e-07, + "loss": 0.697, "step": 13109 }, { - "epoch": 0.9920169497938027, - "grad_norm": 2.3329126834869385, - "learning_rate": 3.106716481098615e-09, - "loss": 0.7869, + "epoch": 0.9229144667370645, + "grad_norm": 2.0175631046295166, + "learning_rate": 2.170532996108866e-07, + "loss": 0.5922, "step": 13110 }, { - "epoch": 0.9920926185161364, - "grad_norm": 2.263775587081909, - "learning_rate": 3.0478260416846314e-09, - "loss": 0.5358, + "epoch": 0.9229848644843365, + "grad_norm": 1.9735265970230103, + "learning_rate": 2.1665909317572074e-07, + "loss": 0.6263, "step": 13111 }, { - "epoch": 0.99216828723847, - "grad_norm": 2.2455990314483643, - "learning_rate": 2.9894990205147076e-09, - "loss": 0.5658, + "epoch": 0.9230552622316086, + "grad_norm": 2.23647141456604, + "learning_rate": 2.1626523941259113e-07, + "loss": 0.7162, "step": 13112 }, { - "epoch": 0.9922439559608036, - "grad_norm": 1.8885899782180786, - "learning_rate": 2.931735421241921e-09, - "loss": 0.744, + "epoch": 0.9231256599788806, + "grad_norm": 2.3847899436950684, + "learning_rate": 2.1587173834197504e-07, + "loss": 0.6563, "step": 13113 }, { - "epoch": 0.9923196246831373, - "grad_norm": 1.837786316871643, - "learning_rate": 2.874535247484378e-09, - "loss": 0.5932, + "epoch": 0.9231960577261528, + "grad_norm": 2.076019048690796, + "learning_rate": 2.154785899843319e-07, + "loss": 0.6253, "step": 13114 }, { - "epoch": 0.9923952934054708, - "grad_norm": 2.2272021770477295, - "learning_rate": 2.817898502824212e-09, - "loss": 0.6078, + "epoch": 0.9232664554734249, + "grad_norm": 2.0171959400177, + "learning_rate": 2.1508579436010088e-07, + "loss": 0.6213, "step": 13115 }, { - "epoch": 0.9924709621278045, - "grad_norm": 5.379904747009277, - "learning_rate": 2.7618251908065884e-09, - "loss": 0.7149, + "epoch": 0.9233368532206969, + "grad_norm": 1.9714202880859375, + "learning_rate": 2.1469335148970414e-07, + "loss": 0.6281, "step": 13116 }, { - "epoch": 0.9925466308501381, - "grad_norm": 2.127641201019287, - "learning_rate": 2.706315314944696e-09, - "loss": 0.6478, + "epoch": 0.923407250967969, + "grad_norm": 6.8839616775512695, + "learning_rate": 2.1430126139354588e-07, + "loss": 0.7458, "step": 13117 }, { - "epoch": 0.9926222995724717, - "grad_norm": 2.2539281845092773, - "learning_rate": 2.6513688787137557e-09, - "loss": 0.672, + "epoch": 0.9234776487152411, + "grad_norm": 1.990700602531433, + "learning_rate": 2.1390952409201013e-07, + "loss": 0.629, "step": 13118 }, { - "epoch": 0.9926979682948054, - "grad_norm": 2.355245351791382, - "learning_rate": 2.5969858855560138e-09, - "loss": 0.5535, + "epoch": 0.9235480464625132, + "grad_norm": 1.7642958164215088, + "learning_rate": 2.1351813960546462e-07, + "loss": 0.6794, "step": 13119 }, { - "epoch": 0.9927736370171389, - "grad_norm": 2.0226423740386963, - "learning_rate": 2.543166338874747e-09, - "loss": 0.5628, + "epoch": 0.9236184442097853, + "grad_norm": 2.0247766971588135, + "learning_rate": 2.1312710795425603e-07, + "loss": 0.7207, "step": 13120 }, { - "epoch": 0.9928493057394726, - "grad_norm": 2.060910701751709, - "learning_rate": 2.4899102420422593e-09, - "loss": 0.787, + "epoch": 0.9236888419570574, + "grad_norm": 2.3110640048980713, + "learning_rate": 2.1273642915871637e-07, + "loss": 0.7323, "step": 13121 }, { - "epoch": 0.9929249744618062, - "grad_norm": 2.399125576019287, - "learning_rate": 2.4372175983938817e-09, - "loss": 0.5982, + "epoch": 0.9237592397043295, + "grad_norm": 1.868674397468567, + "learning_rate": 2.123461032391558e-07, + "loss": 0.4803, "step": 13122 }, { - "epoch": 0.9930006431841398, - "grad_norm": 3.2431602478027344, - "learning_rate": 2.385088411227976e-09, - "loss": 0.6401, + "epoch": 0.9238296374516015, + "grad_norm": 1.85226571559906, + "learning_rate": 2.1195613021586978e-07, + "loss": 0.6783, "step": 13123 }, { - "epoch": 0.9930763119064735, - "grad_norm": 1.9777895212173462, - "learning_rate": 2.333522683808931e-09, - "loss": 0.5933, + "epoch": 0.9239000351988736, + "grad_norm": 2.0512852668762207, + "learning_rate": 2.1156651010913196e-07, + "loss": 0.6131, "step": 13124 }, { - "epoch": 0.993151980628807, - "grad_norm": 2.409860372543335, - "learning_rate": 2.2825204193681613e-09, - "loss": 0.7402, + "epoch": 0.9239704329461457, + "grad_norm": 2.0776469707489014, + "learning_rate": 2.111772429391997e-07, + "loss": 0.6858, "step": 13125 }, { - "epoch": 0.9932276493511407, - "grad_norm": 2.1435985565185547, - "learning_rate": 2.232081621097115e-09, - "loss": 0.7274, + "epoch": 0.9240408306934178, + "grad_norm": 1.8141531944274902, + "learning_rate": 2.1078832872630936e-07, + "loss": 0.6824, "step": 13126 }, { - "epoch": 0.9933033180734744, - "grad_norm": 2.2561211585998535, - "learning_rate": 2.1822062921552644e-09, - "loss": 0.5811, + "epoch": 0.9241112284406899, + "grad_norm": 2.018172025680542, + "learning_rate": 2.1039976749068413e-07, + "loss": 0.6732, "step": 13127 }, { - "epoch": 0.9933789867958079, - "grad_norm": 2.5629122257232666, - "learning_rate": 2.132894435666111e-09, - "loss": 0.763, + "epoch": 0.924181626187962, + "grad_norm": 1.7046018838882446, + "learning_rate": 2.1001155925252223e-07, + "loss": 0.6056, "step": 13128 }, { - "epoch": 0.9934546555181416, - "grad_norm": 2.6304588317871094, - "learning_rate": 2.0841460547181833e-09, - "loss": 0.653, + "epoch": 0.9242520239352341, + "grad_norm": 1.9714709520339966, + "learning_rate": 2.0962370403201036e-07, + "loss": 0.6377, "step": 13129 }, { - "epoch": 0.9935303242404752, - "grad_norm": 2.282959222793579, - "learning_rate": 2.035961152364041e-09, - "loss": 0.5899, + "epoch": 0.9243224216825061, + "grad_norm": 2.343332529067993, + "learning_rate": 2.09236201849311e-07, + "loss": 0.5959, "step": 13130 }, { - "epoch": 0.9936059929628088, - "grad_norm": 2.0651817321777344, - "learning_rate": 1.9883397316202702e-09, - "loss": 0.693, + "epoch": 0.9243928194297782, + "grad_norm": 2.0396785736083984, + "learning_rate": 2.0884905272457276e-07, + "loss": 0.6572, "step": 13131 }, { - "epoch": 0.9936816616851425, - "grad_norm": 2.5978622436523438, - "learning_rate": 1.941281795470484e-09, - "loss": 0.7713, + "epoch": 0.9244632171770504, + "grad_norm": 2.8825254440307617, + "learning_rate": 2.0846225667792084e-07, + "loss": 0.6289, "step": 13132 }, { - "epoch": 0.993757330407476, - "grad_norm": 2.876408338546753, - "learning_rate": 1.894787346860327e-09, - "loss": 0.5898, + "epoch": 0.9245336149243224, + "grad_norm": 2.236971139907837, + "learning_rate": 2.0807581372946803e-07, + "loss": 0.6821, "step": 13133 }, { - "epoch": 0.9938329991298097, - "grad_norm": 2.2665717601776123, - "learning_rate": 1.848856388702469e-09, - "loss": 0.7736, + "epoch": 0.9246040126715945, + "grad_norm": 1.9789252281188965, + "learning_rate": 2.0768972389930462e-07, + "loss": 0.5636, "step": 13134 }, { - "epoch": 0.9939086678521433, - "grad_norm": 2.229184150695801, - "learning_rate": 1.8034889238726093e-09, - "loss": 0.5054, + "epoch": 0.9246744104188666, + "grad_norm": 1.8933284282684326, + "learning_rate": 2.0730398720750374e-07, + "loss": 0.5631, "step": 13135 }, { - "epoch": 0.9939843365744769, - "grad_norm": 2.301201820373535, - "learning_rate": 1.7586849552114758e-09, - "loss": 0.5685, + "epoch": 0.9247448081661387, + "grad_norm": 1.831667423248291, + "learning_rate": 2.069186036741215e-07, + "loss": 0.6273, "step": 13136 }, { - "epoch": 0.9940600052968106, - "grad_norm": 2.13543701171875, - "learning_rate": 1.7144444855258234e-09, - "loss": 0.6025, + "epoch": 0.9248152059134108, + "grad_norm": 1.6159573793411255, + "learning_rate": 2.065335733191922e-07, + "loss": 0.6211, "step": 13137 }, { - "epoch": 0.9941356740191442, - "grad_norm": 1.9200706481933594, - "learning_rate": 1.6707675175854363e-09, - "loss": 0.4845, + "epoch": 0.9248856036606828, + "grad_norm": 1.9434174299240112, + "learning_rate": 2.0614889616273456e-07, + "loss": 0.6423, "step": 13138 }, { - "epoch": 0.9942113427414778, - "grad_norm": 2.272440195083618, - "learning_rate": 1.6276540541261265e-09, - "loss": 0.6253, + "epoch": 0.924956001407955, + "grad_norm": 2.4163131713867188, + "learning_rate": 2.0576457222474952e-07, + "loss": 0.7161, "step": 13139 }, { - "epoch": 0.9942870114638115, - "grad_norm": 2.107754945755005, - "learning_rate": 1.5851040978467346e-09, - "loss": 0.7687, + "epoch": 0.925026399155227, + "grad_norm": 2.02070951461792, + "learning_rate": 2.0538060152521698e-07, + "loss": 0.6317, "step": 13140 }, { - "epoch": 0.994362680186145, - "grad_norm": 2.178772449493408, - "learning_rate": 1.5431176514131285e-09, - "loss": 0.5703, + "epoch": 0.9250967969024991, + "grad_norm": 1.8030856847763062, + "learning_rate": 2.0499698408410128e-07, + "loss": 0.602, "step": 13141 }, { - "epoch": 0.9944383489084787, - "grad_norm": 2.395909070968628, - "learning_rate": 1.5016947174532058e-09, - "loss": 0.7377, + "epoch": 0.9251671946497712, + "grad_norm": 1.9782562255859375, + "learning_rate": 2.046137199213458e-07, + "loss": 0.6005, "step": 13142 }, { - "epoch": 0.9945140176308123, - "grad_norm": 1.9375349283218384, - "learning_rate": 1.4608352985628904e-09, - "loss": 0.728, + "epoch": 0.9252375923970433, + "grad_norm": 2.7762298583984375, + "learning_rate": 2.0423080905687762e-07, + "loss": 0.781, "step": 13143 }, { - "epoch": 0.9945896863531459, - "grad_norm": 2.407315254211426, - "learning_rate": 1.4205393972991366e-09, - "loss": 0.7251, + "epoch": 0.9253079901443154, + "grad_norm": 1.9437885284423828, + "learning_rate": 2.0384825151060358e-07, + "loss": 0.6406, "step": 13144 }, { - "epoch": 0.9946653550754796, - "grad_norm": 2.111492872238159, - "learning_rate": 1.3808070161859255e-09, - "loss": 0.7817, + "epoch": 0.9253783878915874, + "grad_norm": 2.078758716583252, + "learning_rate": 2.0346604730241502e-07, + "loss": 0.7721, "step": 13145 }, { - "epoch": 0.9947410237978132, - "grad_norm": 2.2751195430755615, - "learning_rate": 1.341638157712266e-09, - "loss": 0.7493, + "epoch": 0.9254487856388596, + "grad_norm": 2.0421524047851562, + "learning_rate": 2.0308419645218067e-07, + "loss": 0.5779, "step": 13146 }, { - "epoch": 0.9948166925201468, - "grad_norm": 2.5054099559783936, - "learning_rate": 1.303032824330197e-09, - "loss": 0.6376, + "epoch": 0.9255191833861316, + "grad_norm": 1.8051934242248535, + "learning_rate": 2.0270269897975534e-07, + "loss": 0.533, "step": 13147 }, { - "epoch": 0.9948923612424804, - "grad_norm": 2.099950075149536, - "learning_rate": 1.264991018457784e-09, - "loss": 0.6259, + "epoch": 0.9255895811334037, + "grad_norm": 2.3806211948394775, + "learning_rate": 2.0232155490497283e-07, + "loss": 0.7012, "step": 13148 }, { - "epoch": 0.994968029964814, - "grad_norm": 2.118708848953247, - "learning_rate": 1.2275127424771216e-09, - "loss": 0.6925, + "epoch": 0.9256599788806759, + "grad_norm": 1.9553751945495605, + "learning_rate": 2.0194076424764905e-07, + "loss": 0.5204, "step": 13149 }, { - "epoch": 0.9950436986871477, - "grad_norm": 4.26461935043335, - "learning_rate": 1.190597998734333e-09, - "loss": 0.6318, + "epoch": 0.9257303766279479, + "grad_norm": 1.771055817604065, + "learning_rate": 2.015603270275813e-07, + "loss": 0.5484, "step": 13150 }, { - "epoch": 0.9951193674094813, - "grad_norm": 1.8085392713546753, - "learning_rate": 1.1542467895425679e-09, - "loss": 0.5068, + "epoch": 0.92580077437522, + "grad_norm": 1.739923119544983, + "learning_rate": 2.0118024326454897e-07, + "loss": 0.6193, "step": 13151 }, { - "epoch": 0.9951950361318149, - "grad_norm": 2.8070521354675293, - "learning_rate": 1.1184591171780056e-09, - "loss": 0.6108, + "epoch": 0.925871172122492, + "grad_norm": 2.3301503658294678, + "learning_rate": 2.0080051297831358e-07, + "loss": 0.626, "step": 13152 }, { - "epoch": 0.9952707048541486, - "grad_norm": 2.0022857189178467, - "learning_rate": 1.0832349838808542e-09, - "loss": 0.5927, + "epoch": 0.9259415698697642, + "grad_norm": 1.8840514421463013, + "learning_rate": 2.0042113618861723e-07, + "loss": 0.6081, "step": 13153 }, { - "epoch": 0.9953463735764821, - "grad_norm": 2.230762243270874, - "learning_rate": 1.0485743918583478e-09, - "loss": 0.6105, + "epoch": 0.9260119676170363, + "grad_norm": 2.526224374771118, + "learning_rate": 2.0004211291518415e-07, + "loss": 0.7377, "step": 13154 }, { - "epoch": 0.9954220422988158, - "grad_norm": 2.8104093074798584, - "learning_rate": 1.0144773432797516e-09, - "loss": 0.5951, + "epoch": 0.9260823653643083, + "grad_norm": 1.610671877861023, + "learning_rate": 1.996634431777191e-07, + "loss": 0.6821, "step": 13155 }, { - "epoch": 0.9954977110211494, - "grad_norm": 2.1270148754119873, - "learning_rate": 9.809438402803572e-10, - "loss": 0.6414, + "epoch": 0.9261527631115805, + "grad_norm": 2.4639649391174316, + "learning_rate": 1.9928512699591138e-07, + "loss": 0.5744, "step": 13156 }, { - "epoch": 0.995573379743483, - "grad_norm": 2.065595865249634, - "learning_rate": 9.479738849614838e-10, - "loss": 0.5966, + "epoch": 0.9262231608588525, + "grad_norm": 2.242016553878784, + "learning_rate": 1.9890716438942846e-07, + "loss": 0.7302, "step": 13157 }, { - "epoch": 0.9956490484658167, - "grad_norm": 2.2215771675109863, - "learning_rate": 9.15567479386481e-10, - "loss": 0.6672, + "epoch": 0.9262935586061246, + "grad_norm": 2.2868804931640625, + "learning_rate": 1.985295553779215e-07, + "loss": 0.5721, "step": 13158 }, { - "epoch": 0.9957247171881503, - "grad_norm": 2.1374623775482178, - "learning_rate": 8.837246255847253e-10, - "loss": 0.6385, + "epoch": 0.9263639563533967, + "grad_norm": 2.2068183422088623, + "learning_rate": 1.9815229998102378e-07, + "loss": 0.6501, "step": 13159 }, { - "epoch": 0.9958003859104839, - "grad_norm": 2.5752291679382324, - "learning_rate": 8.524453255516207e-10, - "loss": 0.6717, + "epoch": 0.9264343541006688, + "grad_norm": 2.1537744998931885, + "learning_rate": 1.9777539821834767e-07, + "loss": 0.5598, "step": 13160 }, { - "epoch": 0.9958760546328175, - "grad_norm": 2.127338409423828, - "learning_rate": 8.217295812446013e-10, - "loss": 0.5726, + "epoch": 0.9265047518479409, + "grad_norm": 2.4405174255371094, + "learning_rate": 1.9739885010948755e-07, + "loss": 0.5704, "step": 13161 }, { - "epoch": 0.9959517233551511, - "grad_norm": 2.5369412899017334, - "learning_rate": 7.915773945881277e-10, - "loss": 0.6269, + "epoch": 0.9265751495952129, + "grad_norm": 2.5955545902252197, + "learning_rate": 1.9702265567402387e-07, + "loss": 0.6337, "step": 13162 }, { - "epoch": 0.9960273920774848, - "grad_norm": 3.5135183334350586, - "learning_rate": 7.619887674696902e-10, - "loss": 0.5283, + "epoch": 0.926645547342485, + "grad_norm": 2.023045539855957, + "learning_rate": 1.9664681493151227e-07, + "loss": 0.6526, "step": 13163 }, { - "epoch": 0.9961030607998184, - "grad_norm": 2.36592960357666, - "learning_rate": 7.329637017428059e-10, - "loss": 0.6122, + "epoch": 0.9267159450897571, + "grad_norm": 1.918657898902893, + "learning_rate": 1.9627132790149504e-07, + "loss": 0.5941, "step": 13164 }, { - "epoch": 0.996178729522152, - "grad_norm": 2.4969322681427, - "learning_rate": 7.045021992250211e-10, - "loss": 0.6736, + "epoch": 0.9267863428370292, + "grad_norm": 2.1270759105682373, + "learning_rate": 1.9589619460349283e-07, + "loss": 0.6627, "step": 13165 }, { - "epoch": 0.9962543982444857, - "grad_norm": 1.717811107635498, - "learning_rate": 6.766042616989098e-10, - "loss": 0.6547, + "epoch": 0.9268567405843013, + "grad_norm": 1.8145078420639038, + "learning_rate": 1.9552141505700991e-07, + "loss": 0.62, "step": 13166 }, { - "epoch": 0.9963300669668192, - "grad_norm": 1.9306954145431519, - "learning_rate": 6.49269890911075e-10, - "loss": 0.7854, + "epoch": 0.9269271383315734, + "grad_norm": 1.891083836555481, + "learning_rate": 1.9514698928153118e-07, + "loss": 0.7705, "step": 13167 }, { - "epoch": 0.9964057356891529, - "grad_norm": 2.652772903442383, - "learning_rate": 6.224990885721482e-10, - "loss": 0.5947, + "epoch": 0.9269975360788455, + "grad_norm": 1.9336978197097778, + "learning_rate": 1.947729172965228e-07, + "loss": 0.7599, "step": 13168 }, { - "epoch": 0.9964814044114865, - "grad_norm": 2.094191074371338, - "learning_rate": 5.962918563607867e-10, - "loss": 0.7139, + "epoch": 0.9270679338261175, + "grad_norm": 1.8618131875991821, + "learning_rate": 1.9439919912143387e-07, + "loss": 0.6633, "step": 13169 }, { - "epoch": 0.9965570731338201, - "grad_norm": 2.385401725769043, - "learning_rate": 5.706481959176779e-10, - "loss": 0.6746, + "epoch": 0.9271383315733897, + "grad_norm": 1.6706620454788208, + "learning_rate": 1.9402583477569414e-07, + "loss": 0.6003, "step": 13170 }, { - "epoch": 0.9966327418561538, - "grad_norm": 2.2519052028656006, - "learning_rate": 5.455681088475383e-10, - "loss": 0.6605, + "epoch": 0.9272087293206618, + "grad_norm": 1.96750807762146, + "learning_rate": 1.9365282427871533e-07, + "loss": 0.707, "step": 13171 }, { - "epoch": 0.9967084105784874, - "grad_norm": 2.4455978870391846, - "learning_rate": 5.210515967221108e-10, - "loss": 0.6044, + "epoch": 0.9272791270679338, + "grad_norm": 1.9514906406402588, + "learning_rate": 1.9328016764989065e-07, + "loss": 0.672, "step": 13172 }, { - "epoch": 0.996784079300821, - "grad_norm": 2.6201171875, - "learning_rate": 4.970986610761675e-10, - "loss": 0.5899, + "epoch": 0.9273495248152059, + "grad_norm": 1.8673120737075806, + "learning_rate": 1.929078649085938e-07, + "loss": 0.6343, "step": 13173 }, { - "epoch": 0.9968597480231546, - "grad_norm": 2.2739479541778564, - "learning_rate": 4.737093034095086e-10, - "loss": 0.6609, + "epoch": 0.927419922562478, + "grad_norm": 2.5881171226501465, + "learning_rate": 1.9253591607418218e-07, + "loss": 0.6162, "step": 13174 }, { - "epoch": 0.9969354167454882, - "grad_norm": 2.2343196868896484, - "learning_rate": 4.5088352518796173e-10, - "loss": 0.6017, + "epoch": 0.9274903203097501, + "grad_norm": 2.1870265007019043, + "learning_rate": 1.9216432116599302e-07, + "loss": 0.6779, "step": 13175 }, { - "epoch": 0.9970110854678219, - "grad_norm": 2.2220582962036133, - "learning_rate": 4.286213278393847e-10, - "loss": 0.6353, + "epoch": 0.9275607180570222, + "grad_norm": 2.2599480152130127, + "learning_rate": 1.9179308020334717e-07, + "loss": 0.6097, "step": 13176 }, { - "epoch": 0.9970867541901555, - "grad_norm": 2.405085802078247, - "learning_rate": 4.0692271275866167e-10, - "loss": 0.5783, + "epoch": 0.9276311158042942, + "grad_norm": 1.6916089057922363, + "learning_rate": 1.9142219320554532e-07, + "loss": 0.7066, "step": 13177 }, { - "epoch": 0.9971624229124891, - "grad_norm": 2.3258261680603027, - "learning_rate": 3.8578768130470565e-10, - "loss": 0.5282, + "epoch": 0.9277015135515664, + "grad_norm": 2.9274792671203613, + "learning_rate": 1.9105166019186947e-07, + "loss": 0.7127, "step": 13178 }, { - "epoch": 0.9972380916348228, - "grad_norm": 2.120819568634033, - "learning_rate": 3.652162348014576e-10, - "loss": 0.7857, + "epoch": 0.9277719112988384, + "grad_norm": 2.3721227645874023, + "learning_rate": 1.9068148118158302e-07, + "loss": 0.6535, "step": 13179 }, { - "epoch": 0.9973137603571564, - "grad_norm": 2.094067335128784, - "learning_rate": 3.4520837453688726e-10, - "loss": 0.7784, + "epoch": 0.9278423090461105, + "grad_norm": 2.075751543045044, + "learning_rate": 1.9031165619393453e-07, + "loss": 0.732, "step": 13180 }, { - "epoch": 0.99738942907949, - "grad_norm": 1.7406028509140015, - "learning_rate": 3.257641017629931e-10, - "loss": 0.617, + "epoch": 0.9279127067933826, + "grad_norm": 2.7561581134796143, + "learning_rate": 1.8994218524815008e-07, + "loss": 0.601, "step": 13181 }, { - "epoch": 0.9974650978018236, - "grad_norm": 1.7582353353500366, - "learning_rate": 3.0688341769880004e-10, - "loss": 0.6051, + "epoch": 0.9279831045406547, + "grad_norm": 2.2195985317230225, + "learning_rate": 1.8957306836343868e-07, + "loss": 0.784, "step": 13182 }, { - "epoch": 0.9975407665241572, - "grad_norm": 2.7545251846313477, - "learning_rate": 2.8856632352636247e-10, - "loss": 0.5891, + "epoch": 0.9280535022879268, + "grad_norm": 2.0215773582458496, + "learning_rate": 1.8920430555899214e-07, + "loss": 0.6118, "step": 13183 }, { - "epoch": 0.9976164352464909, - "grad_norm": 2.582125425338745, - "learning_rate": 2.708128203917637e-10, - "loss": 0.6018, + "epoch": 0.9281239000351988, + "grad_norm": 1.7946466207504272, + "learning_rate": 1.8883589685398137e-07, + "loss": 0.6397, "step": 13184 }, { - "epoch": 0.9976921039688245, - "grad_norm": 2.3737707138061523, - "learning_rate": 2.536229094081133e-10, - "loss": 0.6962, + "epoch": 0.928194297782471, + "grad_norm": 2.0051631927490234, + "learning_rate": 1.8846784226756098e-07, + "loss": 0.5802, "step": 13185 }, { - "epoch": 0.9977677726911581, - "grad_norm": 2.5087530612945557, - "learning_rate": 2.369965916505512e-10, - "loss": 0.7588, + "epoch": 0.928264695529743, + "grad_norm": 1.8493760824203491, + "learning_rate": 1.8810014181886604e-07, + "loss": 0.6972, "step": 13186 }, { - "epoch": 0.9978434414134917, - "grad_norm": 2.271505117416382, - "learning_rate": 2.2093386816124383e-10, - "loss": 0.6266, + "epoch": 0.9283350932770151, + "grad_norm": 1.9925554990768433, + "learning_rate": 1.8773279552701382e-07, + "loss": 0.6373, "step": 13187 }, { - "epoch": 0.9979191101358253, - "grad_norm": 2.8579909801483154, - "learning_rate": 2.054347399463863e-10, - "loss": 0.7987, + "epoch": 0.9284054910242873, + "grad_norm": 1.9798104763031006, + "learning_rate": 1.8736580341110375e-07, + "loss": 0.6693, "step": 13188 }, { - "epoch": 0.997994778858159, - "grad_norm": 1.9885179996490479, - "learning_rate": 1.9049920797620245e-10, - "loss": 0.6924, + "epoch": 0.9284758887715593, + "grad_norm": 2.58467435836792, + "learning_rate": 1.8699916549021577e-07, + "loss": 0.733, "step": 13189 }, { - "epoch": 0.9980704475804926, - "grad_norm": 2.303568124771118, - "learning_rate": 1.7612727318494503e-10, - "loss": 0.783, + "epoch": 0.9285462865188314, + "grad_norm": 2.282043218612671, + "learning_rate": 1.8663288178341042e-07, + "loss": 0.517, "step": 13190 }, { - "epoch": 0.9981461163028262, - "grad_norm": 2.4012231826782227, - "learning_rate": 1.6231893647389306e-10, - "loss": 0.6901, + "epoch": 0.9286166842661034, + "grad_norm": 1.897059679031372, + "learning_rate": 1.8626695230973268e-07, + "loss": 0.6848, "step": 13191 }, { - "epoch": 0.9982217850251599, - "grad_norm": 2.4622113704681396, - "learning_rate": 1.4907419870835437e-10, - "loss": 0.7907, + "epoch": 0.9286870820133756, + "grad_norm": 2.2119691371917725, + "learning_rate": 1.8590137708820653e-07, + "loss": 0.694, "step": 13192 }, { - "epoch": 0.9982974537474935, - "grad_norm": 2.3311586380004883, - "learning_rate": 1.3639306071566714e-10, - "loss": 0.7405, + "epoch": 0.9287574797606477, + "grad_norm": 2.107862949371338, + "learning_rate": 1.855361561378389e-07, + "loss": 0.7159, "step": 13193 }, { - "epoch": 0.9983731224698271, - "grad_norm": 2.215780019760132, - "learning_rate": 1.2427552329119517e-10, - "loss": 0.6988, + "epoch": 0.9288278775079197, + "grad_norm": 1.9590411186218262, + "learning_rate": 1.8517128947761885e-07, + "loss": 0.529, "step": 13194 }, { - "epoch": 0.9984487911921607, - "grad_norm": 2.3962464332580566, - "learning_rate": 1.12721587194331e-10, - "loss": 0.6859, + "epoch": 0.9288982752551919, + "grad_norm": 1.8786492347717285, + "learning_rate": 1.8480677712651515e-07, + "loss": 0.5597, "step": 13195 }, { - "epoch": 0.9985244599144943, - "grad_norm": 1.8870036602020264, - "learning_rate": 1.0173125314749676e-10, - "loss": 0.6691, + "epoch": 0.9289686730024639, + "grad_norm": 2.027925729751587, + "learning_rate": 1.844426191034788e-07, + "loss": 0.7978, "step": 13196 }, { - "epoch": 0.998600128636828, - "grad_norm": 1.81784188747406, - "learning_rate": 9.130452184014093e-11, - "loss": 0.5999, + "epoch": 0.929039070749736, + "grad_norm": 2.2738254070281982, + "learning_rate": 1.8407881542744364e-07, + "loss": 0.6593, "step": 13197 }, { - "epoch": 0.9986757973591616, - "grad_norm": 2.0552120208740234, - "learning_rate": 8.14413939237424e-11, - "loss": 0.5859, + "epoch": 0.9291094684970082, + "grad_norm": 1.8106472492218018, + "learning_rate": 1.837153661173233e-07, + "loss": 0.7261, "step": 13198 }, { - "epoch": 0.9987514660814952, - "grad_norm": 2.1729273796081543, - "learning_rate": 7.21418700178056e-11, - "loss": 0.5408, + "epoch": 0.9291798662442802, + "grad_norm": 2.121351957321167, + "learning_rate": 1.8335227119201437e-07, + "loss": 0.7573, "step": 13199 }, { - "epoch": 0.9988271348038288, - "grad_norm": 1.8984811305999756, - "learning_rate": 6.340595070286614e-11, - "loss": 0.6977, + "epoch": 0.9292502639915523, + "grad_norm": 1.849687933921814, + "learning_rate": 1.829895306703939e-07, + "loss": 0.5338, "step": 13200 }, { - "epoch": 0.9989028035261625, - "grad_norm": 2.0209014415740967, - "learning_rate": 5.5233636526486054e-11, - "loss": 0.6062, + "epoch": 0.9293206617388243, + "grad_norm": 2.080855131149292, + "learning_rate": 1.8262714457132357e-07, + "loss": 0.6964, "step": 13201 }, { - "epoch": 0.9989784722484961, - "grad_norm": 2.05021333694458, - "learning_rate": 4.7624928001255286e-11, - "loss": 0.8087, + "epoch": 0.9293910594860965, + "grad_norm": 3.83990478515625, + "learning_rate": 1.8226511291364e-07, + "loss": 0.6795, "step": 13202 }, { - "epoch": 0.9990541409708297, - "grad_norm": 2.2087504863739014, - "learning_rate": 4.057982560279339e-11, - "loss": 0.7889, + "epoch": 0.9294614572333685, + "grad_norm": 2.2116787433624268, + "learning_rate": 1.8190343571616906e-07, + "loss": 0.644, "step": 13203 }, { - "epoch": 0.9991298096931633, - "grad_norm": 2.0803823471069336, - "learning_rate": 3.409832977274707e-11, - "loss": 0.6568, + "epoch": 0.9295318549806406, + "grad_norm": 1.9551973342895508, + "learning_rate": 1.8154211299771173e-07, + "loss": 0.617, "step": 13204 }, { - "epoch": 0.999205478415497, - "grad_norm": 2.142883777618408, - "learning_rate": 2.818044091779104e-11, - "loss": 0.6263, + "epoch": 0.9296022527279127, + "grad_norm": 2.7091851234436035, + "learning_rate": 1.811811447770565e-07, + "loss": 0.6156, "step": 13205 }, { - "epoch": 0.9992811471378306, - "grad_norm": 2.5195977687835693, - "learning_rate": 2.2826159406630353e-11, - "loss": 0.6548, + "epoch": 0.9296726504751848, + "grad_norm": 1.6118403673171997, + "learning_rate": 1.8082053107296857e-07, + "loss": 0.7251, "step": 13206 }, { - "epoch": 0.9993568158601642, - "grad_norm": 1.918473720550537, - "learning_rate": 1.8035485574996458e-11, - "loss": 0.6472, + "epoch": 0.9297430482224569, + "grad_norm": 2.0315260887145996, + "learning_rate": 1.8046027190419766e-07, + "loss": 0.6883, "step": 13207 }, { - "epoch": 0.9994324845824978, - "grad_norm": 2.169924020767212, - "learning_rate": 1.380841972464797e-11, - "loss": 0.5984, + "epoch": 0.9298134459697289, + "grad_norm": 2.174501895904541, + "learning_rate": 1.8010036728947244e-07, + "loss": 0.5576, "step": 13208 }, { - "epoch": 0.9995081533048314, - "grad_norm": 2.4711294174194336, - "learning_rate": 1.0144962118374678e-11, - "loss": 0.6986, + "epoch": 0.9298838437170011, + "grad_norm": 1.9963492155075073, + "learning_rate": 1.7974081724750678e-07, + "loss": 0.6868, "step": 13209 }, { - "epoch": 0.9995838220271651, - "grad_norm": 1.8922516107559204, - "learning_rate": 7.045112986991953e-12, - "loss": 0.6675, + "epoch": 0.9299542414642732, + "grad_norm": 1.8345372676849365, + "learning_rate": 1.7938162179699134e-07, + "loss": 0.6441, "step": 13210 }, { - "epoch": 0.9996594907494987, - "grad_norm": 1.9481332302093506, - "learning_rate": 4.508872523345531e-12, - "loss": 0.6967, + "epoch": 0.9300246392115452, + "grad_norm": 1.867884874343872, + "learning_rate": 1.790227809566035e-07, + "loss": 0.5886, "step": 13211 }, { - "epoch": 0.9997351594718323, - "grad_norm": 2.386504650115967, - "learning_rate": 2.536240887307528e-12, - "loss": 0.7459, + "epoch": 0.9300950369588173, + "grad_norm": 1.9402841329574585, + "learning_rate": 1.786642947449981e-07, + "loss": 0.5257, "step": 13212 }, { - "epoch": 0.9998108281941659, - "grad_norm": 1.9094631671905518, - "learning_rate": 1.127218201779634e-12, - "loss": 0.6635, + "epoch": 0.9301654347060894, + "grad_norm": 1.792761206626892, + "learning_rate": 1.7830616318081527e-07, + "loss": 0.7679, "step": 13213 }, { - "epoch": 0.9998864969164996, - "grad_norm": 2.6249496936798096, - "learning_rate": 2.818045546915116e-13, - "loss": 0.8093, + "epoch": 0.9302358324533615, + "grad_norm": 1.7937467098236084, + "learning_rate": 1.7794838628267096e-07, + "loss": 0.5555, "step": 13214 }, { - "epoch": 0.9999621656388332, - "grad_norm": 2.202721118927002, - "learning_rate": 0.0, - "loss": 0.7656, + "epoch": 0.9303062302006336, + "grad_norm": 1.7643886804580688, + "learning_rate": 1.775909640691703e-07, + "loss": 0.7003, "step": 13215 }, { - "epoch": 0.9999621656388332, - "step": 13215, - "total_flos": 4.661948874456302e+18, - "train_loss": 0.7379108269377905, - "train_runtime": 288889.3373, - "train_samples_per_second": 2.928, - "train_steps_per_second": 0.046 + "epoch": 0.9303766279479057, + "grad_norm": 1.8626103401184082, + "learning_rate": 1.7723389655889278e-07, + "loss": 0.6716, + "step": 13216 + }, + { + "epoch": 0.9304470256951778, + "grad_norm": 1.9842243194580078, + "learning_rate": 1.768771837704046e-07, + "loss": 0.5734, + "step": 13217 + }, + { + "epoch": 0.9305174234424498, + "grad_norm": 1.9420018196105957, + "learning_rate": 1.7652082572225035e-07, + "loss": 0.58, + "step": 13218 + }, + { + "epoch": 0.9305878211897219, + "grad_norm": 3.357947826385498, + "learning_rate": 1.7616482243295973e-07, + "loss": 0.7061, + "step": 13219 + }, + { + "epoch": 0.930658218936994, + "grad_norm": 1.9501343965530396, + "learning_rate": 1.7580917392103912e-07, + "loss": 0.6434, + "step": 13220 + }, + { + "epoch": 0.9307286166842661, + "grad_norm": 2.627782106399536, + "learning_rate": 1.7545388020497942e-07, + "loss": 0.5211, + "step": 13221 + }, + { + "epoch": 0.9307990144315382, + "grad_norm": 2.27801513671875, + "learning_rate": 1.7509894130325365e-07, + "loss": 0.7786, + "step": 13222 + }, + { + "epoch": 0.9308694121788103, + "grad_norm": 1.963484525680542, + "learning_rate": 1.7474435723431458e-07, + "loss": 0.6494, + "step": 13223 + }, + { + "epoch": 0.9309398099260824, + "grad_norm": 2.280142068862915, + "learning_rate": 1.7439012801659793e-07, + "loss": 0.5708, + "step": 13224 + }, + { + "epoch": 0.9310102076733544, + "grad_norm": 2.0318377017974854, + "learning_rate": 1.7403625366851994e-07, + "loss": 0.6715, + "step": 13225 + }, + { + "epoch": 0.9310806054206265, + "grad_norm": 2.0464730262756348, + "learning_rate": 1.736827342084798e-07, + "loss": 0.6637, + "step": 13226 + }, + { + "epoch": 0.9311510031678987, + "grad_norm": 1.67006254196167, + "learning_rate": 1.7332956965485647e-07, + "loss": 0.6125, + "step": 13227 + }, + { + "epoch": 0.9312214009151707, + "grad_norm": 1.8928942680358887, + "learning_rate": 1.7297676002601024e-07, + "loss": 0.7139, + "step": 13228 + }, + { + "epoch": 0.9312917986624428, + "grad_norm": 1.724674940109253, + "learning_rate": 1.7262430534028672e-07, + "loss": 0.5621, + "step": 13229 + }, + { + "epoch": 0.9313621964097148, + "grad_norm": 2.3279953002929688, + "learning_rate": 1.722722056160073e-07, + "loss": 0.728, + "step": 13230 + }, + { + "epoch": 0.931432594156987, + "grad_norm": 2.1211533546447754, + "learning_rate": 1.7192046087148027e-07, + "loss": 0.626, + "step": 13231 + }, + { + "epoch": 0.9315029919042591, + "grad_norm": 1.9122998714447021, + "learning_rate": 1.7156907112499285e-07, + "loss": 0.6419, + "step": 13232 + }, + { + "epoch": 0.9315733896515311, + "grad_norm": 2.2430953979492188, + "learning_rate": 1.7121803639481293e-07, + "loss": 0.6629, + "step": 13233 + }, + { + "epoch": 0.9316437873988033, + "grad_norm": 2.636220932006836, + "learning_rate": 1.7086735669919196e-07, + "loss": 0.7573, + "step": 13234 + }, + { + "epoch": 0.9317141851460753, + "grad_norm": 3.4375717639923096, + "learning_rate": 1.7051703205636202e-07, + "loss": 0.705, + "step": 13235 + }, + { + "epoch": 0.9317845828933474, + "grad_norm": 2.4830164909362793, + "learning_rate": 1.7016706248453583e-07, + "loss": 0.6223, + "step": 13236 + }, + { + "epoch": 0.9318549806406194, + "grad_norm": 1.575363039970398, + "learning_rate": 1.6981744800191123e-07, + "loss": 0.5804, + "step": 13237 + }, + { + "epoch": 0.9319253783878916, + "grad_norm": 1.9362457990646362, + "learning_rate": 1.69468188626662e-07, + "loss": 0.6281, + "step": 13238 + }, + { + "epoch": 0.9319957761351637, + "grad_norm": 1.8486586809158325, + "learning_rate": 1.691192843769488e-07, + "loss": 0.5861, + "step": 13239 + }, + { + "epoch": 0.9320661738824357, + "grad_norm": 2.1544313430786133, + "learning_rate": 1.6877073527090879e-07, + "loss": 0.6358, + "step": 13240 + }, + { + "epoch": 0.9321365716297079, + "grad_norm": 2.045125722885132, + "learning_rate": 1.6842254132666612e-07, + "loss": 0.5861, + "step": 13241 + }, + { + "epoch": 0.9322069693769799, + "grad_norm": 2.1938016414642334, + "learning_rate": 1.6807470256232226e-07, + "loss": 0.5933, + "step": 13242 + }, + { + "epoch": 0.932277367124252, + "grad_norm": 2.2448441982269287, + "learning_rate": 1.6772721899596244e-07, + "loss": 0.6991, + "step": 13243 + }, + { + "epoch": 0.9323477648715242, + "grad_norm": 1.9716392755508423, + "learning_rate": 1.6738009064565162e-07, + "loss": 0.6295, + "step": 13244 + }, + { + "epoch": 0.9324181626187962, + "grad_norm": 2.060844659805298, + "learning_rate": 1.670333175294385e-07, + "loss": 0.5153, + "step": 13245 + }, + { + "epoch": 0.9324885603660683, + "grad_norm": 1.9421509504318237, + "learning_rate": 1.6668689966535078e-07, + "loss": 0.726, + "step": 13246 + }, + { + "epoch": 0.9325589581133403, + "grad_norm": 1.948441743850708, + "learning_rate": 1.6634083707140057e-07, + "loss": 0.6142, + "step": 13247 + }, + { + "epoch": 0.9326293558606125, + "grad_norm": 1.7198500633239746, + "learning_rate": 1.6599512976557828e-07, + "loss": 0.7062, + "step": 13248 + }, + { + "epoch": 0.9326997536078846, + "grad_norm": 1.7886621952056885, + "learning_rate": 1.6564977776585875e-07, + "loss": 0.6604, + "step": 13249 + }, + { + "epoch": 0.9327701513551566, + "grad_norm": 2.6718661785125732, + "learning_rate": 1.6530478109019742e-07, + "loss": 0.6943, + "step": 13250 + }, + { + "epoch": 0.9328405491024288, + "grad_norm": 1.9216123819351196, + "learning_rate": 1.6496013975652945e-07, + "loss": 0.7278, + "step": 13251 + }, + { + "epoch": 0.9329109468497008, + "grad_norm": 1.9470645189285278, + "learning_rate": 1.6461585378277453e-07, + "loss": 0.5853, + "step": 13252 + }, + { + "epoch": 0.9329813445969729, + "grad_norm": 1.722367763519287, + "learning_rate": 1.6427192318683137e-07, + "loss": 0.6484, + "step": 13253 + }, + { + "epoch": 0.933051742344245, + "grad_norm": 1.7758417129516602, + "learning_rate": 1.6392834798658229e-07, + "loss": 0.6274, + "step": 13254 + }, + { + "epoch": 0.9331221400915171, + "grad_norm": 1.610559105873108, + "learning_rate": 1.6358512819988946e-07, + "loss": 0.5261, + "step": 13255 + }, + { + "epoch": 0.9331925378387892, + "grad_norm": 1.7869821786880493, + "learning_rate": 1.6324226384459717e-07, + "loss": 0.7884, + "step": 13256 + }, + { + "epoch": 0.9332629355860612, + "grad_norm": 1.656443476676941, + "learning_rate": 1.6289975493853026e-07, + "loss": 0.5256, + "step": 13257 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 2.170328378677368, + "learning_rate": 1.6255760149949882e-07, + "loss": 0.6366, + "step": 13258 + }, + { + "epoch": 0.9334037310806054, + "grad_norm": 1.839320182800293, + "learning_rate": 1.6221580354528887e-07, + "loss": 0.6768, + "step": 13259 + }, + { + "epoch": 0.9334741288278775, + "grad_norm": 1.6912871599197388, + "learning_rate": 1.6187436109367236e-07, + "loss": 0.7138, + "step": 13260 + }, + { + "epoch": 0.9335445265751496, + "grad_norm": 2.146909713745117, + "learning_rate": 1.6153327416240116e-07, + "loss": 0.4927, + "step": 13261 + }, + { + "epoch": 0.9336149243224217, + "grad_norm": 1.9304616451263428, + "learning_rate": 1.6119254276920837e-07, + "loss": 0.6376, + "step": 13262 + }, + { + "epoch": 0.9336853220696938, + "grad_norm": 1.9058887958526611, + "learning_rate": 1.608521669318077e-07, + "loss": 0.594, + "step": 13263 + }, + { + "epoch": 0.9337557198169658, + "grad_norm": 1.781761646270752, + "learning_rate": 1.6051214666789813e-07, + "loss": 0.5923, + "step": 13264 + }, + { + "epoch": 0.933826117564238, + "grad_norm": 1.7289245128631592, + "learning_rate": 1.6017248199515533e-07, + "loss": 0.5824, + "step": 13265 + }, + { + "epoch": 0.9338965153115101, + "grad_norm": 2.367927074432373, + "learning_rate": 1.598331729312401e-07, + "loss": 0.6318, + "step": 13266 + }, + { + "epoch": 0.9339669130587821, + "grad_norm": 1.934836983680725, + "learning_rate": 1.5949421949379317e-07, + "loss": 0.6204, + "step": 13267 + }, + { + "epoch": 0.9340373108060542, + "grad_norm": 2.6572108268737793, + "learning_rate": 1.5915562170043727e-07, + "loss": 0.7278, + "step": 13268 + }, + { + "epoch": 0.9341077085533263, + "grad_norm": 1.6981714963912964, + "learning_rate": 1.5881737956877506e-07, + "loss": 0.6244, + "step": 13269 + }, + { + "epoch": 0.9341781063005984, + "grad_norm": 1.9275212287902832, + "learning_rate": 1.5847949311639354e-07, + "loss": 0.6607, + "step": 13270 + }, + { + "epoch": 0.9342485040478705, + "grad_norm": 1.800333023071289, + "learning_rate": 1.5814196236085876e-07, + "loss": 0.6623, + "step": 13271 + }, + { + "epoch": 0.9343189017951425, + "grad_norm": 2.1838483810424805, + "learning_rate": 1.578047873197197e-07, + "loss": 0.7002, + "step": 13272 + }, + { + "epoch": 0.9343892995424147, + "grad_norm": 2.0434207916259766, + "learning_rate": 1.5746796801050745e-07, + "loss": 0.6801, + "step": 13273 + }, + { + "epoch": 0.9344596972896867, + "grad_norm": 2.0047900676727295, + "learning_rate": 1.571315044507321e-07, + "loss": 0.7772, + "step": 13274 + }, + { + "epoch": 0.9345300950369588, + "grad_norm": 2.4842631816864014, + "learning_rate": 1.567953966578859e-07, + "loss": 0.7029, + "step": 13275 + }, + { + "epoch": 0.9346004927842309, + "grad_norm": 1.7539458274841309, + "learning_rate": 1.5645964464944628e-07, + "loss": 0.6499, + "step": 13276 + }, + { + "epoch": 0.934670890531503, + "grad_norm": 2.2985599040985107, + "learning_rate": 1.5612424844286588e-07, + "loss": 0.6999, + "step": 13277 + }, + { + "epoch": 0.9347412882787751, + "grad_norm": 1.8434758186340332, + "learning_rate": 1.557892080555856e-07, + "loss": 0.7079, + "step": 13278 + }, + { + "epoch": 0.9348116860260471, + "grad_norm": 2.716810703277588, + "learning_rate": 1.554545235050223e-07, + "loss": 0.6821, + "step": 13279 + }, + { + "epoch": 0.9348820837733193, + "grad_norm": 2.051342248916626, + "learning_rate": 1.551201948085773e-07, + "loss": 0.6324, + "step": 13280 + }, + { + "epoch": 0.9349524815205913, + "grad_norm": 2.02905535697937, + "learning_rate": 1.5478622198363168e-07, + "loss": 0.6089, + "step": 13281 + }, + { + "epoch": 0.9350228792678634, + "grad_norm": 1.7197540998458862, + "learning_rate": 1.5445260504755098e-07, + "loss": 0.6502, + "step": 13282 + }, + { + "epoch": 0.9350932770151356, + "grad_norm": 2.109349250793457, + "learning_rate": 1.5411934401767745e-07, + "loss": 0.6388, + "step": 13283 + }, + { + "epoch": 0.9351636747624076, + "grad_norm": 1.775551676750183, + "learning_rate": 1.537864389113409e-07, + "loss": 0.5783, + "step": 13284 + }, + { + "epoch": 0.9352340725096797, + "grad_norm": 2.0438787937164307, + "learning_rate": 1.5345388974584707e-07, + "loss": 0.6185, + "step": 13285 + }, + { + "epoch": 0.9353044702569517, + "grad_norm": 1.9314522743225098, + "learning_rate": 1.5312169653848605e-07, + "loss": 0.7186, + "step": 13286 + }, + { + "epoch": 0.9353748680042239, + "grad_norm": 1.8685283660888672, + "learning_rate": 1.5278985930652865e-07, + "loss": 0.7352, + "step": 13287 + }, + { + "epoch": 0.935445265751496, + "grad_norm": 1.9805548191070557, + "learning_rate": 1.5245837806722773e-07, + "loss": 0.6185, + "step": 13288 + }, + { + "epoch": 0.935515663498768, + "grad_norm": 2.463775396347046, + "learning_rate": 1.5212725283781668e-07, + "loss": 0.7226, + "step": 13289 + }, + { + "epoch": 0.9355860612460402, + "grad_norm": 1.7719370126724243, + "learning_rate": 1.5179648363551267e-07, + "loss": 0.5854, + "step": 13290 + }, + { + "epoch": 0.9356564589933122, + "grad_norm": 1.967349886894226, + "learning_rate": 1.5146607047751104e-07, + "loss": 0.7254, + "step": 13291 + }, + { + "epoch": 0.9357268567405843, + "grad_norm": 1.8770520687103271, + "learning_rate": 1.5113601338099159e-07, + "loss": 0.5775, + "step": 13292 + }, + { + "epoch": 0.9357972544878563, + "grad_norm": 2.4764773845672607, + "learning_rate": 1.5080631236311236e-07, + "loss": 0.6579, + "step": 13293 + }, + { + "epoch": 0.9358676522351285, + "grad_norm": 2.5551488399505615, + "learning_rate": 1.504769674410167e-07, + "loss": 0.5611, + "step": 13294 + }, + { + "epoch": 0.9359380499824006, + "grad_norm": 1.8475490808486938, + "learning_rate": 1.5014797863182604e-07, + "loss": 0.6772, + "step": 13295 + }, + { + "epoch": 0.9360084477296726, + "grad_norm": 2.067599296569824, + "learning_rate": 1.4981934595264568e-07, + "loss": 0.5575, + "step": 13296 + }, + { + "epoch": 0.9360788454769448, + "grad_norm": 2.09792423248291, + "learning_rate": 1.494910694205621e-07, + "loss": 0.6199, + "step": 13297 + }, + { + "epoch": 0.9361492432242168, + "grad_norm": 1.8727668523788452, + "learning_rate": 1.4916314905264173e-07, + "loss": 0.6404, + "step": 13298 + }, + { + "epoch": 0.9362196409714889, + "grad_norm": 2.1438350677490234, + "learning_rate": 1.4883558486593374e-07, + "loss": 0.6948, + "step": 13299 + }, + { + "epoch": 0.936290038718761, + "grad_norm": 1.7700340747833252, + "learning_rate": 1.48508376877468e-07, + "loss": 0.6396, + "step": 13300 + }, + { + "epoch": 0.9363604364660331, + "grad_norm": 2.792051315307617, + "learning_rate": 1.481815251042572e-07, + "loss": 0.6994, + "step": 13301 + }, + { + "epoch": 0.9364308342133052, + "grad_norm": 2.284693717956543, + "learning_rate": 1.4785502956329387e-07, + "loss": 0.5517, + "step": 13302 + }, + { + "epoch": 0.9365012319605772, + "grad_norm": 1.953615427017212, + "learning_rate": 1.4752889027155426e-07, + "loss": 0.6143, + "step": 13303 + }, + { + "epoch": 0.9365716297078494, + "grad_norm": 2.183960437774658, + "learning_rate": 1.4720310724599273e-07, + "loss": 0.6914, + "step": 13304 + }, + { + "epoch": 0.9366420274551215, + "grad_norm": 1.885040283203125, + "learning_rate": 1.468776805035482e-07, + "loss": 0.6171, + "step": 13305 + }, + { + "epoch": 0.9367124252023935, + "grad_norm": 2.572415590286255, + "learning_rate": 1.4655261006113938e-07, + "loss": 0.5617, + "step": 13306 + }, + { + "epoch": 0.9367828229496656, + "grad_norm": 2.4824862480163574, + "learning_rate": 1.4622789593566787e-07, + "loss": 0.6255, + "step": 13307 + }, + { + "epoch": 0.9368532206969377, + "grad_norm": 2.0509915351867676, + "learning_rate": 1.4590353814401423e-07, + "loss": 0.6224, + "step": 13308 + }, + { + "epoch": 0.9369236184442098, + "grad_norm": 2.0128326416015625, + "learning_rate": 1.4557953670304436e-07, + "loss": 0.7122, + "step": 13309 + }, + { + "epoch": 0.9369940161914819, + "grad_norm": 1.8022143840789795, + "learning_rate": 1.4525589162960072e-07, + "loss": 0.5587, + "step": 13310 + }, + { + "epoch": 0.937064413938754, + "grad_norm": 1.9159172773361206, + "learning_rate": 1.449326029405127e-07, + "loss": 0.5955, + "step": 13311 + }, + { + "epoch": 0.9371348116860261, + "grad_norm": 1.9758696556091309, + "learning_rate": 1.4460967065258546e-07, + "loss": 0.7471, + "step": 13312 + }, + { + "epoch": 0.9372052094332981, + "grad_norm": 1.606368064880371, + "learning_rate": 1.4428709478261103e-07, + "loss": 0.5827, + "step": 13313 + }, + { + "epoch": 0.9372756071805702, + "grad_norm": 2.092729091644287, + "learning_rate": 1.439648753473589e-07, + "loss": 0.6878, + "step": 13314 + }, + { + "epoch": 0.9373460049278423, + "grad_norm": 2.2854456901550293, + "learning_rate": 1.4364301236358302e-07, + "loss": 0.7303, + "step": 13315 + }, + { + "epoch": 0.9374164026751144, + "grad_norm": 2.153390407562256, + "learning_rate": 1.433215058480155e-07, + "loss": 0.6701, + "step": 13316 + }, + { + "epoch": 0.9374868004223865, + "grad_norm": 1.978127360343933, + "learning_rate": 1.4300035581737302e-07, + "loss": 0.6428, + "step": 13317 + }, + { + "epoch": 0.9375571981696585, + "grad_norm": 2.17630672454834, + "learning_rate": 1.4267956228835122e-07, + "loss": 0.6547, + "step": 13318 + }, + { + "epoch": 0.9376275959169307, + "grad_norm": 1.8432931900024414, + "learning_rate": 1.4235912527763094e-07, + "loss": 0.6216, + "step": 13319 + }, + { + "epoch": 0.9376979936642027, + "grad_norm": 1.869399905204773, + "learning_rate": 1.42039044801869e-07, + "loss": 0.5833, + "step": 13320 + }, + { + "epoch": 0.9377683914114748, + "grad_norm": 1.9556959867477417, + "learning_rate": 1.4171932087770978e-07, + "loss": 0.611, + "step": 13321 + }, + { + "epoch": 0.937838789158747, + "grad_norm": 2.006492853164673, + "learning_rate": 1.413999535217727e-07, + "loss": 0.6882, + "step": 13322 + }, + { + "epoch": 0.937909186906019, + "grad_norm": 2.170966625213623, + "learning_rate": 1.4108094275066407e-07, + "loss": 0.6829, + "step": 13323 + }, + { + "epoch": 0.9379795846532911, + "grad_norm": 2.059617280960083, + "learning_rate": 1.407622885809684e-07, + "loss": 0.5758, + "step": 13324 + }, + { + "epoch": 0.9380499824005631, + "grad_norm": 1.845617651939392, + "learning_rate": 1.4044399102925387e-07, + "loss": 0.6816, + "step": 13325 + }, + { + "epoch": 0.9381203801478353, + "grad_norm": 2.020054817199707, + "learning_rate": 1.4012605011206846e-07, + "loss": 0.6442, + "step": 13326 + }, + { + "epoch": 0.9381907778951074, + "grad_norm": 2.0616862773895264, + "learning_rate": 1.3980846584594387e-07, + "loss": 0.635, + "step": 13327 + }, + { + "epoch": 0.9382611756423794, + "grad_norm": 1.9472640752792358, + "learning_rate": 1.394912382473884e-07, + "loss": 0.5763, + "step": 13328 + }, + { + "epoch": 0.9383315733896516, + "grad_norm": 1.9161458015441895, + "learning_rate": 1.3917436733289722e-07, + "loss": 0.672, + "step": 13329 + }, + { + "epoch": 0.9384019711369236, + "grad_norm": 2.8704330921173096, + "learning_rate": 1.388578531189437e-07, + "loss": 0.6403, + "step": 13330 + }, + { + "epoch": 0.9384723688841957, + "grad_norm": 1.9875990152359009, + "learning_rate": 1.3854169562198493e-07, + "loss": 0.5648, + "step": 13331 + }, + { + "epoch": 0.9385427666314677, + "grad_norm": 2.2443625926971436, + "learning_rate": 1.3822589485845693e-07, + "loss": 0.7358, + "step": 13332 + }, + { + "epoch": 0.9386131643787399, + "grad_norm": 2.2825517654418945, + "learning_rate": 1.3791045084477948e-07, + "loss": 0.523, + "step": 13333 + }, + { + "epoch": 0.938683562126012, + "grad_norm": 1.9903979301452637, + "learning_rate": 1.3759536359735292e-07, + "loss": 0.6266, + "step": 13334 + }, + { + "epoch": 0.938753959873284, + "grad_norm": 1.855832815170288, + "learning_rate": 1.3728063313255734e-07, + "loss": 0.5957, + "step": 13335 + }, + { + "epoch": 0.9388243576205562, + "grad_norm": 1.8685144186019897, + "learning_rate": 1.3696625946675734e-07, + "loss": 0.6714, + "step": 13336 + }, + { + "epoch": 0.9388947553678282, + "grad_norm": 1.9417548179626465, + "learning_rate": 1.3665224261629726e-07, + "loss": 0.646, + "step": 13337 + }, + { + "epoch": 0.9389651531151003, + "grad_norm": 2.1069509983062744, + "learning_rate": 1.3633858259750208e-07, + "loss": 0.6027, + "step": 13338 + }, + { + "epoch": 0.9390355508623724, + "grad_norm": 1.876248836517334, + "learning_rate": 1.3602527942668041e-07, + "loss": 0.624, + "step": 13339 + }, + { + "epoch": 0.9391059486096445, + "grad_norm": 2.1138436794281006, + "learning_rate": 1.3571233312012143e-07, + "loss": 0.7145, + "step": 13340 + }, + { + "epoch": 0.9391763463569166, + "grad_norm": 1.8298373222351074, + "learning_rate": 1.353997436940949e-07, + "loss": 0.6999, + "step": 13341 + }, + { + "epoch": 0.9392467441041886, + "grad_norm": 2.4271535873413086, + "learning_rate": 1.3508751116485196e-07, + "loss": 0.6155, + "step": 13342 + }, + { + "epoch": 0.9393171418514608, + "grad_norm": 2.5883007049560547, + "learning_rate": 1.3477563554862736e-07, + "loss": 0.5719, + "step": 13343 + }, + { + "epoch": 0.9393875395987329, + "grad_norm": 2.2408640384674072, + "learning_rate": 1.344641168616334e-07, + "loss": 0.6879, + "step": 13344 + }, + { + "epoch": 0.9394579373460049, + "grad_norm": 2.0020039081573486, + "learning_rate": 1.3415295512006985e-07, + "loss": 0.6111, + "step": 13345 + }, + { + "epoch": 0.939528335093277, + "grad_norm": 2.008058547973633, + "learning_rate": 1.3384215034011094e-07, + "loss": 0.6138, + "step": 13346 + }, + { + "epoch": 0.9395987328405491, + "grad_norm": 1.7638314962387085, + "learning_rate": 1.3353170253791758e-07, + "loss": 0.5324, + "step": 13347 + }, + { + "epoch": 0.9396691305878212, + "grad_norm": 2.1924805641174316, + "learning_rate": 1.3322161172962898e-07, + "loss": 0.4768, + "step": 13348 + }, + { + "epoch": 0.9397395283350932, + "grad_norm": 2.082374095916748, + "learning_rate": 1.3291187793136884e-07, + "loss": 0.58, + "step": 13349 + }, + { + "epoch": 0.9398099260823654, + "grad_norm": 1.7560726404190063, + "learning_rate": 1.3260250115923823e-07, + "loss": 0.7549, + "step": 13350 + }, + { + "epoch": 0.9398803238296375, + "grad_norm": 1.7496052980422974, + "learning_rate": 1.322934814293235e-07, + "loss": 0.6133, + "step": 13351 + }, + { + "epoch": 0.9399507215769095, + "grad_norm": 2.5254268646240234, + "learning_rate": 1.3198481875769085e-07, + "loss": 0.6304, + "step": 13352 + }, + { + "epoch": 0.9400211193241816, + "grad_norm": 1.9248766899108887, + "learning_rate": 1.316765131603862e-07, + "loss": 0.6336, + "step": 13353 + }, + { + "epoch": 0.9400915170714537, + "grad_norm": 2.0074350833892822, + "learning_rate": 1.313685646534415e-07, + "loss": 0.6809, + "step": 13354 + }, + { + "epoch": 0.9401619148187258, + "grad_norm": 1.9991869926452637, + "learning_rate": 1.3106097325286458e-07, + "loss": 0.7303, + "step": 13355 + }, + { + "epoch": 0.9402323125659979, + "grad_norm": 2.7196602821350098, + "learning_rate": 1.3075373897464938e-07, + "loss": 0.6888, + "step": 13356 + }, + { + "epoch": 0.94030271031327, + "grad_norm": 1.6328494548797607, + "learning_rate": 1.3044686183476794e-07, + "loss": 0.5732, + "step": 13357 + }, + { + "epoch": 0.9403731080605421, + "grad_norm": 2.0037388801574707, + "learning_rate": 1.3014034184917612e-07, + "loss": 0.6235, + "step": 13358 + }, + { + "epoch": 0.9404435058078141, + "grad_norm": 2.0218405723571777, + "learning_rate": 1.2983417903380867e-07, + "loss": 0.6314, + "step": 13359 + }, + { + "epoch": 0.9405139035550862, + "grad_norm": 1.667686104774475, + "learning_rate": 1.295283734045849e-07, + "loss": 0.7063, + "step": 13360 + }, + { + "epoch": 0.9405843013023584, + "grad_norm": 2.0363080501556396, + "learning_rate": 1.2922292497740305e-07, + "loss": 0.5657, + "step": 13361 + }, + { + "epoch": 0.9406546990496304, + "grad_norm": 2.6451399326324463, + "learning_rate": 1.2891783376814437e-07, + "loss": 0.5946, + "step": 13362 + }, + { + "epoch": 0.9407250967969025, + "grad_norm": 1.9126780033111572, + "learning_rate": 1.2861309979266977e-07, + "loss": 0.6181, + "step": 13363 + }, + { + "epoch": 0.9407954945441745, + "grad_norm": 2.5435376167297363, + "learning_rate": 1.2830872306682395e-07, + "loss": 0.7088, + "step": 13364 + }, + { + "epoch": 0.9408658922914467, + "grad_norm": 1.936307668685913, + "learning_rate": 1.280047036064298e-07, + "loss": 0.6383, + "step": 13365 + }, + { + "epoch": 0.9409362900387188, + "grad_norm": 2.2764346599578857, + "learning_rate": 1.277010414272962e-07, + "loss": 0.5562, + "step": 13366 + }, + { + "epoch": 0.9410066877859908, + "grad_norm": 2.0958378314971924, + "learning_rate": 1.2739773654520804e-07, + "loss": 0.7162, + "step": 13367 + }, + { + "epoch": 0.941077085533263, + "grad_norm": 2.5358734130859375, + "learning_rate": 1.2709478897593607e-07, + "loss": 0.7169, + "step": 13368 + }, + { + "epoch": 0.941147483280535, + "grad_norm": 2.028324842453003, + "learning_rate": 1.2679219873523094e-07, + "loss": 0.649, + "step": 13369 + }, + { + "epoch": 0.9412178810278071, + "grad_norm": 2.199768543243408, + "learning_rate": 1.2648996583882466e-07, + "loss": 0.7162, + "step": 13370 + }, + { + "epoch": 0.9412882787750791, + "grad_norm": 1.8420740365982056, + "learning_rate": 1.2618809030242893e-07, + "loss": 0.5834, + "step": 13371 + }, + { + "epoch": 0.9413586765223513, + "grad_norm": 1.8969570398330688, + "learning_rate": 1.2588657214174004e-07, + "loss": 0.7387, + "step": 13372 + }, + { + "epoch": 0.9414290742696234, + "grad_norm": 1.9307193756103516, + "learning_rate": 1.2558541137243317e-07, + "loss": 0.6654, + "step": 13373 + }, + { + "epoch": 0.9414994720168954, + "grad_norm": 1.999553918838501, + "learning_rate": 1.2528460801016806e-07, + "loss": 0.574, + "step": 13374 + }, + { + "epoch": 0.9415698697641676, + "grad_norm": 1.8283189535140991, + "learning_rate": 1.2498416207058183e-07, + "loss": 0.6844, + "step": 13375 + }, + { + "epoch": 0.9416402675114396, + "grad_norm": 1.9879765510559082, + "learning_rate": 1.2468407356929535e-07, + "loss": 0.5697, + "step": 13376 + }, + { + "epoch": 0.9417106652587117, + "grad_norm": 1.7956700325012207, + "learning_rate": 1.243843425219092e-07, + "loss": 0.6978, + "step": 13377 + }, + { + "epoch": 0.9417810630059839, + "grad_norm": 1.8093096017837524, + "learning_rate": 1.2408496894400933e-07, + "loss": 0.7176, + "step": 13378 + }, + { + "epoch": 0.9418514607532559, + "grad_norm": 1.9028016328811646, + "learning_rate": 1.23785952851159e-07, + "loss": 0.692, + "step": 13379 + }, + { + "epoch": 0.941921858500528, + "grad_norm": 1.720513939857483, + "learning_rate": 1.2348729425890447e-07, + "loss": 0.6549, + "step": 13380 + }, + { + "epoch": 0.9419922562478, + "grad_norm": 2.0097224712371826, + "learning_rate": 1.2318899318277333e-07, + "loss": 0.7064, + "step": 13381 + }, + { + "epoch": 0.9420626539950722, + "grad_norm": 2.683281898498535, + "learning_rate": 1.2289104963827446e-07, + "loss": 0.7585, + "step": 13382 + }, + { + "epoch": 0.9421330517423443, + "grad_norm": 2.4127514362335205, + "learning_rate": 1.225934636408974e-07, + "loss": 0.6857, + "step": 13383 + }, + { + "epoch": 0.9422034494896163, + "grad_norm": 2.459670066833496, + "learning_rate": 1.222962352061153e-07, + "loss": 0.6954, + "step": 13384 + }, + { + "epoch": 0.9422738472368885, + "grad_norm": 1.7655326128005981, + "learning_rate": 1.2199936434938042e-07, + "loss": 0.6531, + "step": 13385 + }, + { + "epoch": 0.9423442449841605, + "grad_norm": 1.9743541479110718, + "learning_rate": 1.2170285108612854e-07, + "loss": 0.6366, + "step": 13386 + }, + { + "epoch": 0.9424146427314326, + "grad_norm": 1.840343713760376, + "learning_rate": 1.2140669543177386e-07, + "loss": 0.5885, + "step": 13387 + }, + { + "epoch": 0.9424850404787046, + "grad_norm": 2.410935878753662, + "learning_rate": 1.2111089740171565e-07, + "loss": 0.8034, + "step": 13388 + }, + { + "epoch": 0.9425554382259768, + "grad_norm": 2.0432605743408203, + "learning_rate": 1.208154570113308e-07, + "loss": 0.6208, + "step": 13389 + }, + { + "epoch": 0.9426258359732489, + "grad_norm": 3.531604766845703, + "learning_rate": 1.2052037427598128e-07, + "loss": 0.5343, + "step": 13390 + }, + { + "epoch": 0.9426962337205209, + "grad_norm": 2.132019281387329, + "learning_rate": 1.2022564921100663e-07, + "loss": 0.753, + "step": 13391 + }, + { + "epoch": 0.942766631467793, + "grad_norm": 1.8770201206207275, + "learning_rate": 1.199312818317324e-07, + "loss": 0.6844, + "step": 13392 + }, + { + "epoch": 0.9428370292150651, + "grad_norm": 2.0440452098846436, + "learning_rate": 1.1963727215346155e-07, + "loss": 0.616, + "step": 13393 + }, + { + "epoch": 0.9429074269623372, + "grad_norm": 1.9908077716827393, + "learning_rate": 1.193436201914807e-07, + "loss": 0.7426, + "step": 13394 + }, + { + "epoch": 0.9429778247096093, + "grad_norm": 2.350351095199585, + "learning_rate": 1.1905032596105558e-07, + "loss": 0.6889, + "step": 13395 + }, + { + "epoch": 0.9430482224568814, + "grad_norm": 2.683887243270874, + "learning_rate": 1.187573894774363e-07, + "loss": 0.6248, + "step": 13396 + }, + { + "epoch": 0.9431186202041535, + "grad_norm": 2.1175897121429443, + "learning_rate": 1.1846481075585125e-07, + "loss": 0.6527, + "step": 13397 + }, + { + "epoch": 0.9431890179514255, + "grad_norm": 1.7565255165100098, + "learning_rate": 1.1817258981151401e-07, + "loss": 0.5528, + "step": 13398 + }, + { + "epoch": 0.9432594156986976, + "grad_norm": 3.0567774772644043, + "learning_rate": 1.1788072665961646e-07, + "loss": 0.5473, + "step": 13399 + }, + { + "epoch": 0.9433298134459698, + "grad_norm": 1.5908703804016113, + "learning_rate": 1.1758922131533334e-07, + "loss": 0.6722, + "step": 13400 + }, + { + "epoch": 0.9434002111932418, + "grad_norm": 2.1995723247528076, + "learning_rate": 1.172980737938184e-07, + "loss": 0.5862, + "step": 13401 + }, + { + "epoch": 0.9434706089405139, + "grad_norm": 2.1486356258392334, + "learning_rate": 1.1700728411020989e-07, + "loss": 0.6992, + "step": 13402 + }, + { + "epoch": 0.943541006687786, + "grad_norm": 3.505035877227783, + "learning_rate": 1.1671685227962658e-07, + "loss": 0.6592, + "step": 13403 + }, + { + "epoch": 0.9436114044350581, + "grad_norm": 2.2411534786224365, + "learning_rate": 1.1642677831716863e-07, + "loss": 0.6456, + "step": 13404 + }, + { + "epoch": 0.9436818021823302, + "grad_norm": 1.7767131328582764, + "learning_rate": 1.1613706223791675e-07, + "loss": 0.7135, + "step": 13405 + }, + { + "epoch": 0.9437521999296022, + "grad_norm": 1.7237287759780884, + "learning_rate": 1.1584770405693223e-07, + "loss": 0.6692, + "step": 13406 + }, + { + "epoch": 0.9438225976768744, + "grad_norm": 2.2404141426086426, + "learning_rate": 1.155587037892608e-07, + "loss": 0.6509, + "step": 13407 + }, + { + "epoch": 0.9438929954241464, + "grad_norm": 1.940363883972168, + "learning_rate": 1.1527006144992723e-07, + "loss": 0.7703, + "step": 13408 + }, + { + "epoch": 0.9439633931714185, + "grad_norm": 2.470426321029663, + "learning_rate": 1.1498177705393841e-07, + "loss": 0.5592, + "step": 13409 + }, + { + "epoch": 0.9440337909186906, + "grad_norm": 2.131884813308716, + "learning_rate": 1.1469385061628179e-07, + "loss": 0.6829, + "step": 13410 + }, + { + "epoch": 0.9441041886659627, + "grad_norm": 1.9821776151657104, + "learning_rate": 1.1440628215192927e-07, + "loss": 0.6773, + "step": 13411 + }, + { + "epoch": 0.9441745864132348, + "grad_norm": 2.196107864379883, + "learning_rate": 1.1411907167582869e-07, + "loss": 0.6317, + "step": 13412 + }, + { + "epoch": 0.9442449841605068, + "grad_norm": 1.8204424381256104, + "learning_rate": 1.1383221920291464e-07, + "loss": 0.6256, + "step": 13413 + }, + { + "epoch": 0.944315381907779, + "grad_norm": 2.257648229598999, + "learning_rate": 1.135457247480992e-07, + "loss": 0.6152, + "step": 13414 + }, + { + "epoch": 0.944385779655051, + "grad_norm": 1.9619024991989136, + "learning_rate": 1.1325958832627892e-07, + "loss": 0.4953, + "step": 13415 + }, + { + "epoch": 0.9444561774023231, + "grad_norm": 2.039294481277466, + "learning_rate": 1.1297380995232852e-07, + "loss": 0.6296, + "step": 13416 + }, + { + "epoch": 0.9445265751495953, + "grad_norm": 2.3243672847747803, + "learning_rate": 1.1268838964110883e-07, + "loss": 0.76, + "step": 13417 + }, + { + "epoch": 0.9445969728968673, + "grad_norm": 1.8297600746154785, + "learning_rate": 1.1240332740745574e-07, + "loss": 0.5294, + "step": 13418 + }, + { + "epoch": 0.9446673706441394, + "grad_norm": 2.0596461296081543, + "learning_rate": 1.1211862326619193e-07, + "loss": 0.7043, + "step": 13419 + }, + { + "epoch": 0.9447377683914114, + "grad_norm": 1.8427293300628662, + "learning_rate": 1.1183427723211914e-07, + "loss": 0.6452, + "step": 13420 + }, + { + "epoch": 0.9448081661386836, + "grad_norm": 2.1977436542510986, + "learning_rate": 1.1155028932002042e-07, + "loss": 0.6651, + "step": 13421 + }, + { + "epoch": 0.9448785638859557, + "grad_norm": 1.924061894416809, + "learning_rate": 1.1126665954466019e-07, + "loss": 0.6985, + "step": 13422 + }, + { + "epoch": 0.9449489616332277, + "grad_norm": 2.1756746768951416, + "learning_rate": 1.1098338792078655e-07, + "loss": 0.6686, + "step": 13423 + }, + { + "epoch": 0.9450193593804999, + "grad_norm": 1.829715609550476, + "learning_rate": 1.1070047446312425e-07, + "loss": 0.5827, + "step": 13424 + }, + { + "epoch": 0.9450897571277719, + "grad_norm": 1.9215165376663208, + "learning_rate": 1.1041791918638411e-07, + "loss": 0.5644, + "step": 13425 + }, + { + "epoch": 0.945160154875044, + "grad_norm": 2.140357732772827, + "learning_rate": 1.1013572210525513e-07, + "loss": 0.6462, + "step": 13426 + }, + { + "epoch": 0.945230552622316, + "grad_norm": 1.9782085418701172, + "learning_rate": 1.0985388323441081e-07, + "loss": 0.5965, + "step": 13427 + }, + { + "epoch": 0.9453009503695882, + "grad_norm": 2.385695457458496, + "learning_rate": 1.0957240258850287e-07, + "loss": 0.6152, + "step": 13428 + }, + { + "epoch": 0.9453713481168603, + "grad_norm": 1.8895820379257202, + "learning_rate": 1.0929128018216671e-07, + "loss": 0.6061, + "step": 13429 + }, + { + "epoch": 0.9454417458641323, + "grad_norm": 1.9299699068069458, + "learning_rate": 1.0901051603001677e-07, + "loss": 0.7612, + "step": 13430 + }, + { + "epoch": 0.9455121436114045, + "grad_norm": 1.8086830377578735, + "learning_rate": 1.0873011014665113e-07, + "loss": 0.6596, + "step": 13431 + }, + { + "epoch": 0.9455825413586765, + "grad_norm": 1.9652626514434814, + "learning_rate": 1.0845006254664769e-07, + "loss": 0.6228, + "step": 13432 + }, + { + "epoch": 0.9456529391059486, + "grad_norm": 2.0700204372406006, + "learning_rate": 1.0817037324456724e-07, + "loss": 0.7144, + "step": 13433 + }, + { + "epoch": 0.9457233368532207, + "grad_norm": 2.1408965587615967, + "learning_rate": 1.0789104225495038e-07, + "loss": 0.6702, + "step": 13434 + }, + { + "epoch": 0.9457937346004928, + "grad_norm": 2.03265643119812, + "learning_rate": 1.0761206959232061e-07, + "loss": 0.5931, + "step": 13435 + }, + { + "epoch": 0.9458641323477649, + "grad_norm": 2.9437503814697266, + "learning_rate": 1.073334552711812e-07, + "loss": 0.5668, + "step": 13436 + }, + { + "epoch": 0.9459345300950369, + "grad_norm": 1.9239253997802734, + "learning_rate": 1.0705519930601836e-07, + "loss": 0.6318, + "step": 13437 + }, + { + "epoch": 0.946004927842309, + "grad_norm": 2.275963068008423, + "learning_rate": 1.0677730171129729e-07, + "loss": 0.7199, + "step": 13438 + }, + { + "epoch": 0.9460753255895812, + "grad_norm": 2.400522232055664, + "learning_rate": 1.0649976250146842e-07, + "loss": 0.6643, + "step": 13439 + }, + { + "epoch": 0.9461457233368532, + "grad_norm": 1.7531081438064575, + "learning_rate": 1.0622258169095888e-07, + "loss": 0.6229, + "step": 13440 + }, + { + "epoch": 0.9462161210841253, + "grad_norm": 1.947320580482483, + "learning_rate": 1.0594575929418104e-07, + "loss": 0.6282, + "step": 13441 + }, + { + "epoch": 0.9462865188313974, + "grad_norm": 2.0972158908843994, + "learning_rate": 1.0566929532552705e-07, + "loss": 0.5943, + "step": 13442 + }, + { + "epoch": 0.9463569165786695, + "grad_norm": 2.1381309032440186, + "learning_rate": 1.053931897993704e-07, + "loss": 0.7212, + "step": 13443 + }, + { + "epoch": 0.9464273143259415, + "grad_norm": 1.8956172466278076, + "learning_rate": 1.0511744273006518e-07, + "loss": 0.611, + "step": 13444 + }, + { + "epoch": 0.9464977120732136, + "grad_norm": 1.8293368816375732, + "learning_rate": 1.0484205413194913e-07, + "loss": 0.7091, + "step": 13445 + }, + { + "epoch": 0.9465681098204858, + "grad_norm": 1.7131117582321167, + "learning_rate": 1.0456702401933904e-07, + "loss": 0.7191, + "step": 13446 + }, + { + "epoch": 0.9466385075677578, + "grad_norm": 1.8247473239898682, + "learning_rate": 1.0429235240653456e-07, + "loss": 0.6599, + "step": 13447 + }, + { + "epoch": 0.9467089053150299, + "grad_norm": 2.294304609298706, + "learning_rate": 1.0401803930781595e-07, + "loss": 0.7134, + "step": 13448 + }, + { + "epoch": 0.946779303062302, + "grad_norm": 1.7649935483932495, + "learning_rate": 1.0374408473744478e-07, + "loss": 0.5601, + "step": 13449 + }, + { + "epoch": 0.9468497008095741, + "grad_norm": 1.7575172185897827, + "learning_rate": 1.0347048870966324e-07, + "loss": 0.5715, + "step": 13450 + }, + { + "epoch": 0.9469200985568462, + "grad_norm": 1.9083542823791504, + "learning_rate": 1.0319725123869716e-07, + "loss": 0.7381, + "step": 13451 + }, + { + "epoch": 0.9469904963041182, + "grad_norm": 2.151188850402832, + "learning_rate": 1.0292437233875295e-07, + "loss": 0.7239, + "step": 13452 + }, + { + "epoch": 0.9470608940513904, + "grad_norm": 1.6774327754974365, + "learning_rate": 1.0265185202401683e-07, + "loss": 0.6017, + "step": 13453 + }, + { + "epoch": 0.9471312917986624, + "grad_norm": 2.2918827533721924, + "learning_rate": 1.0237969030865712e-07, + "loss": 0.6906, + "step": 13454 + }, + { + "epoch": 0.9472016895459345, + "grad_norm": 2.1149768829345703, + "learning_rate": 1.0210788720682429e-07, + "loss": 0.6504, + "step": 13455 + }, + { + "epoch": 0.9472720872932067, + "grad_norm": 2.3543179035186768, + "learning_rate": 1.0183644273264935e-07, + "loss": 0.6879, + "step": 13456 + }, + { + "epoch": 0.9473424850404787, + "grad_norm": 3.0645675659179688, + "learning_rate": 1.0156535690024471e-07, + "loss": 0.8039, + "step": 13457 + }, + { + "epoch": 0.9474128827877508, + "grad_norm": 1.8291361331939697, + "learning_rate": 1.0129462972370562e-07, + "loss": 0.6105, + "step": 13458 + }, + { + "epoch": 0.9474832805350228, + "grad_norm": 2.110509157180786, + "learning_rate": 1.0102426121710639e-07, + "loss": 0.603, + "step": 13459 + }, + { + "epoch": 0.947553678282295, + "grad_norm": 2.33627986907959, + "learning_rate": 1.0075425139450422e-07, + "loss": 0.6006, + "step": 13460 + }, + { + "epoch": 0.9476240760295671, + "grad_norm": 2.027860403060913, + "learning_rate": 1.0048460026993611e-07, + "loss": 0.665, + "step": 13461 + }, + { + "epoch": 0.9476944737768391, + "grad_norm": 2.1664278507232666, + "learning_rate": 1.0021530785742272e-07, + "loss": 0.7787, + "step": 13462 + }, + { + "epoch": 0.9477648715241113, + "grad_norm": 2.110544204711914, + "learning_rate": 9.994637417096375e-08, + "loss": 0.6048, + "step": 13463 + }, + { + "epoch": 0.9478352692713833, + "grad_norm": 1.8432906866073608, + "learning_rate": 9.967779922454179e-08, + "loss": 0.5431, + "step": 13464 + }, + { + "epoch": 0.9479056670186554, + "grad_norm": 1.8463634252548218, + "learning_rate": 9.940958303212155e-08, + "loss": 0.7114, + "step": 13465 + }, + { + "epoch": 0.9479760647659274, + "grad_norm": 2.187211751937866, + "learning_rate": 9.9141725607646e-08, + "loss": 0.6276, + "step": 13466 + }, + { + "epoch": 0.9480464625131996, + "grad_norm": 1.8492976427078247, + "learning_rate": 9.8874226965041e-08, + "loss": 0.6902, + "step": 13467 + }, + { + "epoch": 0.9481168602604717, + "grad_norm": 2.1779837608337402, + "learning_rate": 9.860708711821609e-08, + "loss": 0.7217, + "step": 13468 + }, + { + "epoch": 0.9481872580077437, + "grad_norm": 1.863638997077942, + "learning_rate": 9.834030608105826e-08, + "loss": 0.5507, + "step": 13469 + }, + { + "epoch": 0.9482576557550159, + "grad_norm": 1.8404645919799805, + "learning_rate": 9.807388386743898e-08, + "loss": 0.6783, + "step": 13470 + }, + { + "epoch": 0.9483280535022879, + "grad_norm": 2.3536882400512695, + "learning_rate": 9.780782049120873e-08, + "loss": 0.6077, + "step": 13471 + }, + { + "epoch": 0.94839845124956, + "grad_norm": 1.8249744176864624, + "learning_rate": 9.754211596620166e-08, + "loss": 0.6344, + "step": 13472 + }, + { + "epoch": 0.9484688489968321, + "grad_norm": 2.0205838680267334, + "learning_rate": 9.727677030623017e-08, + "loss": 0.5455, + "step": 13473 + }, + { + "epoch": 0.9485392467441042, + "grad_norm": 2.186877965927124, + "learning_rate": 9.701178352509187e-08, + "loss": 0.561, + "step": 13474 + }, + { + "epoch": 0.9486096444913763, + "grad_norm": 2.459009885787964, + "learning_rate": 9.674715563656189e-08, + "loss": 0.6331, + "step": 13475 + }, + { + "epoch": 0.9486800422386483, + "grad_norm": 2.4594810009002686, + "learning_rate": 9.648288665439975e-08, + "loss": 0.7357, + "step": 13476 + }, + { + "epoch": 0.9487504399859205, + "grad_norm": 1.593027114868164, + "learning_rate": 9.621897659234401e-08, + "loss": 0.7009, + "step": 13477 + }, + { + "epoch": 0.9488208377331926, + "grad_norm": 2.043124198913574, + "learning_rate": 9.595542546411695e-08, + "loss": 0.7113, + "step": 13478 + }, + { + "epoch": 0.9488912354804646, + "grad_norm": 1.7053319215774536, + "learning_rate": 9.569223328341903e-08, + "loss": 0.6996, + "step": 13479 + }, + { + "epoch": 0.9489616332277367, + "grad_norm": 2.0380146503448486, + "learning_rate": 9.542940006393518e-08, + "loss": 0.6315, + "step": 13480 + }, + { + "epoch": 0.9490320309750088, + "grad_norm": 1.8521177768707275, + "learning_rate": 9.516692581933017e-08, + "loss": 0.6577, + "step": 13481 + }, + { + "epoch": 0.9491024287222809, + "grad_norm": 2.0218870639801025, + "learning_rate": 9.490481056325006e-08, + "loss": 0.6741, + "step": 13482 + }, + { + "epoch": 0.9491728264695529, + "grad_norm": 1.9566231966018677, + "learning_rate": 9.464305430932229e-08, + "loss": 0.657, + "step": 13483 + }, + { + "epoch": 0.949243224216825, + "grad_norm": 2.349400281906128, + "learning_rate": 9.438165707115642e-08, + "loss": 0.6015, + "step": 13484 + }, + { + "epoch": 0.9493136219640972, + "grad_norm": 3.958007574081421, + "learning_rate": 9.412061886234102e-08, + "loss": 0.6934, + "step": 13485 + }, + { + "epoch": 0.9493840197113692, + "grad_norm": 2.712344169616699, + "learning_rate": 9.385993969645067e-08, + "loss": 0.6356, + "step": 13486 + }, + { + "epoch": 0.9494544174586413, + "grad_norm": 2.4533417224884033, + "learning_rate": 9.359961958703511e-08, + "loss": 0.6557, + "step": 13487 + }, + { + "epoch": 0.9495248152059134, + "grad_norm": 2.0656628608703613, + "learning_rate": 9.33396585476316e-08, + "loss": 0.6405, + "step": 13488 + }, + { + "epoch": 0.9495952129531855, + "grad_norm": 1.852553367614746, + "learning_rate": 9.308005659175334e-08, + "loss": 0.5772, + "step": 13489 + }, + { + "epoch": 0.9496656107004576, + "grad_norm": 1.814408302307129, + "learning_rate": 9.282081373289874e-08, + "loss": 0.6601, + "step": 13490 + }, + { + "epoch": 0.9497360084477297, + "grad_norm": 2.009521007537842, + "learning_rate": 9.256192998454448e-08, + "loss": 0.6537, + "step": 13491 + }, + { + "epoch": 0.9498064061950018, + "grad_norm": 1.9993176460266113, + "learning_rate": 9.230340536015247e-08, + "loss": 0.5729, + "step": 13492 + }, + { + "epoch": 0.9498768039422738, + "grad_norm": 2.1286978721618652, + "learning_rate": 9.204523987316126e-08, + "loss": 0.6327, + "step": 13493 + }, + { + "epoch": 0.9499472016895459, + "grad_norm": 2.102461338043213, + "learning_rate": 9.178743353699547e-08, + "loss": 0.6518, + "step": 13494 + }, + { + "epoch": 0.9500175994368181, + "grad_norm": 3.2038493156433105, + "learning_rate": 9.152998636505716e-08, + "loss": 0.7764, + "step": 13495 + }, + { + "epoch": 0.9500879971840901, + "grad_norm": 2.2323856353759766, + "learning_rate": 9.127289837073127e-08, + "loss": 0.5964, + "step": 13496 + }, + { + "epoch": 0.9501583949313622, + "grad_norm": 2.464371919631958, + "learning_rate": 9.101616956738412e-08, + "loss": 0.6192, + "step": 13497 + }, + { + "epoch": 0.9502287926786342, + "grad_norm": 2.102384567260742, + "learning_rate": 9.075979996836336e-08, + "loss": 0.7398, + "step": 13498 + }, + { + "epoch": 0.9502991904259064, + "grad_norm": 2.1495323181152344, + "learning_rate": 9.0503789586998e-08, + "loss": 0.6304, + "step": 13499 + }, + { + "epoch": 0.9503695881731784, + "grad_norm": 2.1055006980895996, + "learning_rate": 9.02481384365984e-08, + "loss": 0.6086, + "step": 13500 + }, + { + "epoch": 0.9504399859204505, + "grad_norm": 1.6962460279464722, + "learning_rate": 8.999284653045625e-08, + "loss": 0.6911, + "step": 13501 + }, + { + "epoch": 0.9505103836677227, + "grad_norm": 1.733275294303894, + "learning_rate": 8.973791388184383e-08, + "loss": 0.6118, + "step": 13502 + }, + { + "epoch": 0.9505807814149947, + "grad_norm": 1.9594041109085083, + "learning_rate": 8.948334050401552e-08, + "loss": 0.6067, + "step": 13503 + }, + { + "epoch": 0.9506511791622668, + "grad_norm": 2.673973321914673, + "learning_rate": 8.922912641020631e-08, + "loss": 0.6799, + "step": 13504 + }, + { + "epoch": 0.9507215769095388, + "grad_norm": 2.197950601577759, + "learning_rate": 8.897527161363484e-08, + "loss": 0.6227, + "step": 13505 + }, + { + "epoch": 0.950791974656811, + "grad_norm": 1.811566948890686, + "learning_rate": 8.872177612749721e-08, + "loss": 0.611, + "step": 13506 + }, + { + "epoch": 0.9508623724040831, + "grad_norm": 1.947840690612793, + "learning_rate": 8.846863996497401e-08, + "loss": 0.5917, + "step": 13507 + }, + { + "epoch": 0.9509327701513551, + "grad_norm": 2.029402017593384, + "learning_rate": 8.82158631392248e-08, + "loss": 0.6756, + "step": 13508 + }, + { + "epoch": 0.9510031678986273, + "grad_norm": 2.3874142169952393, + "learning_rate": 8.796344566339364e-08, + "loss": 0.6737, + "step": 13509 + }, + { + "epoch": 0.9510735656458993, + "grad_norm": 1.7948827743530273, + "learning_rate": 8.771138755060204e-08, + "loss": 0.5972, + "step": 13510 + }, + { + "epoch": 0.9511439633931714, + "grad_norm": 2.2914843559265137, + "learning_rate": 8.745968881395594e-08, + "loss": 0.6657, + "step": 13511 + }, + { + "epoch": 0.9512143611404436, + "grad_norm": 2.180859327316284, + "learning_rate": 8.72083494665411e-08, + "loss": 0.7661, + "step": 13512 + }, + { + "epoch": 0.9512847588877156, + "grad_norm": 1.9718856811523438, + "learning_rate": 8.695736952142541e-08, + "loss": 0.7791, + "step": 13513 + }, + { + "epoch": 0.9513551566349877, + "grad_norm": 2.4089953899383545, + "learning_rate": 8.670674899165576e-08, + "loss": 0.6239, + "step": 13514 + }, + { + "epoch": 0.9514255543822597, + "grad_norm": 2.314481496810913, + "learning_rate": 8.64564878902635e-08, + "loss": 0.6262, + "step": 13515 + }, + { + "epoch": 0.9514959521295319, + "grad_norm": 1.8290013074874878, + "learning_rate": 8.620658623025978e-08, + "loss": 0.6519, + "step": 13516 + }, + { + "epoch": 0.951566349876804, + "grad_norm": 1.5431467294692993, + "learning_rate": 8.595704402463711e-08, + "loss": 0.576, + "step": 13517 + }, + { + "epoch": 0.951636747624076, + "grad_norm": 2.2041656970977783, + "learning_rate": 8.570786128636931e-08, + "loss": 0.5726, + "step": 13518 + }, + { + "epoch": 0.9517071453713482, + "grad_norm": 1.8178954124450684, + "learning_rate": 8.545903802841237e-08, + "loss": 0.6285, + "step": 13519 + }, + { + "epoch": 0.9517775431186202, + "grad_norm": 2.2567670345306396, + "learning_rate": 8.521057426370126e-08, + "loss": 0.6002, + "step": 13520 + }, + { + "epoch": 0.9518479408658923, + "grad_norm": 1.675374984741211, + "learning_rate": 8.496247000515622e-08, + "loss": 0.5838, + "step": 13521 + }, + { + "epoch": 0.9519183386131643, + "grad_norm": 1.9119999408721924, + "learning_rate": 8.471472526567336e-08, + "loss": 0.6513, + "step": 13522 + }, + { + "epoch": 0.9519887363604365, + "grad_norm": 1.8354443311691284, + "learning_rate": 8.44673400581356e-08, + "loss": 0.5931, + "step": 13523 + }, + { + "epoch": 0.9520591341077086, + "grad_norm": 2.5288004875183105, + "learning_rate": 8.422031439540412e-08, + "loss": 0.718, + "step": 13524 + }, + { + "epoch": 0.9521295318549806, + "grad_norm": 2.2762513160705566, + "learning_rate": 8.397364829032217e-08, + "loss": 0.5504, + "step": 13525 + }, + { + "epoch": 0.9521999296022527, + "grad_norm": 1.7784111499786377, + "learning_rate": 8.372734175571284e-08, + "loss": 0.5967, + "step": 13526 + }, + { + "epoch": 0.9522703273495248, + "grad_norm": 2.297367811203003, + "learning_rate": 8.348139480438288e-08, + "loss": 0.7091, + "step": 13527 + }, + { + "epoch": 0.9523407250967969, + "grad_norm": 1.9913169145584106, + "learning_rate": 8.323580744911884e-08, + "loss": 0.6787, + "step": 13528 + }, + { + "epoch": 0.952411122844069, + "grad_norm": 1.8363467454910278, + "learning_rate": 8.29905797026902e-08, + "loss": 0.6809, + "step": 13529 + }, + { + "epoch": 0.9524815205913411, + "grad_norm": 2.024702548980713, + "learning_rate": 8.274571157784538e-08, + "loss": 0.5699, + "step": 13530 + }, + { + "epoch": 0.9525519183386132, + "grad_norm": 2.0820822715759277, + "learning_rate": 8.250120308731578e-08, + "loss": 0.6642, + "step": 13531 + }, + { + "epoch": 0.9526223160858852, + "grad_norm": 2.251187324523926, + "learning_rate": 8.225705424381336e-08, + "loss": 0.6524, + "step": 13532 + }, + { + "epoch": 0.9526927138331573, + "grad_norm": 2.0526387691497803, + "learning_rate": 8.201326506003215e-08, + "loss": 0.7165, + "step": 13533 + }, + { + "epoch": 0.9527631115804295, + "grad_norm": 1.8201161623001099, + "learning_rate": 8.176983554864602e-08, + "loss": 0.7284, + "step": 13534 + }, + { + "epoch": 0.9528335093277015, + "grad_norm": 1.6643059253692627, + "learning_rate": 8.152676572231176e-08, + "loss": 0.7196, + "step": 13535 + }, + { + "epoch": 0.9529039070749736, + "grad_norm": 2.2868950366973877, + "learning_rate": 8.12840555936667e-08, + "loss": 0.6481, + "step": 13536 + }, + { + "epoch": 0.9529743048222457, + "grad_norm": 2.399549961090088, + "learning_rate": 8.104170517533027e-08, + "loss": 0.6557, + "step": 13537 + }, + { + "epoch": 0.9530447025695178, + "grad_norm": 2.4057297706604004, + "learning_rate": 8.079971447990019e-08, + "loss": 0.5776, + "step": 13538 + }, + { + "epoch": 0.9531151003167898, + "grad_norm": 2.19360089302063, + "learning_rate": 8.055808351996096e-08, + "loss": 0.67, + "step": 13539 + }, + { + "epoch": 0.9531854980640619, + "grad_norm": 1.9187644720077515, + "learning_rate": 8.031681230807219e-08, + "loss": 0.6529, + "step": 13540 + }, + { + "epoch": 0.9532558958113341, + "grad_norm": 4.9516167640686035, + "learning_rate": 8.007590085678029e-08, + "loss": 0.6386, + "step": 13541 + }, + { + "epoch": 0.9533262935586061, + "grad_norm": 2.395658493041992, + "learning_rate": 7.983534917860912e-08, + "loss": 0.6582, + "step": 13542 + }, + { + "epoch": 0.9533966913058782, + "grad_norm": 1.8376082181930542, + "learning_rate": 7.95951572860647e-08, + "loss": 0.5181, + "step": 13543 + }, + { + "epoch": 0.9534670890531503, + "grad_norm": 1.9218366146087646, + "learning_rate": 7.935532519163668e-08, + "loss": 0.6488, + "step": 13544 + }, + { + "epoch": 0.9535374868004224, + "grad_norm": 1.8384186029434204, + "learning_rate": 7.911585290779222e-08, + "loss": 0.5468, + "step": 13545 + }, + { + "epoch": 0.9536078845476945, + "grad_norm": 2.158933162689209, + "learning_rate": 7.88767404469829e-08, + "loss": 0.6122, + "step": 13546 + }, + { + "epoch": 0.9536782822949665, + "grad_norm": 2.077000617980957, + "learning_rate": 7.86379878216401e-08, + "loss": 0.7468, + "step": 13547 + }, + { + "epoch": 0.9537486800422387, + "grad_norm": 2.0154943466186523, + "learning_rate": 7.839959504417581e-08, + "loss": 0.6556, + "step": 13548 + }, + { + "epoch": 0.9538190777895107, + "grad_norm": 2.7230684757232666, + "learning_rate": 7.816156212698567e-08, + "loss": 0.7211, + "step": 13549 + }, + { + "epoch": 0.9538894755367828, + "grad_norm": 1.6363073587417603, + "learning_rate": 7.792388908244508e-08, + "loss": 0.6103, + "step": 13550 + }, + { + "epoch": 0.953959873284055, + "grad_norm": 2.0410609245300293, + "learning_rate": 7.768657592291006e-08, + "loss": 0.6816, + "step": 13551 + }, + { + "epoch": 0.954030271031327, + "grad_norm": 2.1506080627441406, + "learning_rate": 7.744962266071953e-08, + "loss": 0.7932, + "step": 13552 + }, + { + "epoch": 0.9541006687785991, + "grad_norm": 2.7225136756896973, + "learning_rate": 7.72130293081914e-08, + "loss": 0.6829, + "step": 13553 + }, + { + "epoch": 0.9541710665258711, + "grad_norm": 2.1076903343200684, + "learning_rate": 7.697679587762885e-08, + "loss": 0.6964, + "step": 13554 + }, + { + "epoch": 0.9542414642731433, + "grad_norm": 1.9626339673995972, + "learning_rate": 7.674092238131247e-08, + "loss": 0.7169, + "step": 13555 + }, + { + "epoch": 0.9543118620204153, + "grad_norm": 1.7695571184158325, + "learning_rate": 7.650540883150503e-08, + "loss": 0.665, + "step": 13556 + }, + { + "epoch": 0.9543822597676874, + "grad_norm": 2.1026620864868164, + "learning_rate": 7.627025524045217e-08, + "loss": 0.6022, + "step": 13557 + }, + { + "epoch": 0.9544526575149596, + "grad_norm": 2.006113052368164, + "learning_rate": 7.603546162037855e-08, + "loss": 0.6476, + "step": 13558 + }, + { + "epoch": 0.9545230552622316, + "grad_norm": 1.9097014665603638, + "learning_rate": 7.580102798349253e-08, + "loss": 0.5617, + "step": 13559 + }, + { + "epoch": 0.9545934530095037, + "grad_norm": 1.8966267108917236, + "learning_rate": 7.556695434198146e-08, + "loss": 0.5206, + "step": 13560 + }, + { + "epoch": 0.9546638507567757, + "grad_norm": 1.8744064569473267, + "learning_rate": 7.533324070801639e-08, + "loss": 0.6914, + "step": 13561 + }, + { + "epoch": 0.9547342485040479, + "grad_norm": 2.0989973545074463, + "learning_rate": 7.509988709374737e-08, + "loss": 0.7281, + "step": 13562 + }, + { + "epoch": 0.95480464625132, + "grad_norm": 1.9051287174224854, + "learning_rate": 7.486689351130581e-08, + "loss": 0.7331, + "step": 13563 + }, + { + "epoch": 0.954875043998592, + "grad_norm": 2.101846694946289, + "learning_rate": 7.463425997280759e-08, + "loss": 0.7578, + "step": 13564 + }, + { + "epoch": 0.9549454417458642, + "grad_norm": 1.9735002517700195, + "learning_rate": 7.440198649034524e-08, + "loss": 0.7437, + "step": 13565 + }, + { + "epoch": 0.9550158394931362, + "grad_norm": 1.9298404455184937, + "learning_rate": 7.417007307599576e-08, + "loss": 0.5696, + "step": 13566 + }, + { + "epoch": 0.9550862372404083, + "grad_norm": 1.8533270359039307, + "learning_rate": 7.393851974181753e-08, + "loss": 0.7156, + "step": 13567 + }, + { + "epoch": 0.9551566349876804, + "grad_norm": 2.2969706058502197, + "learning_rate": 7.370732649984713e-08, + "loss": 0.6322, + "step": 13568 + }, + { + "epoch": 0.9552270327349525, + "grad_norm": 1.7792129516601562, + "learning_rate": 7.347649336210638e-08, + "loss": 0.6077, + "step": 13569 + }, + { + "epoch": 0.9552974304822246, + "grad_norm": 1.7985377311706543, + "learning_rate": 7.324602034059535e-08, + "loss": 0.6283, + "step": 13570 + }, + { + "epoch": 0.9553678282294966, + "grad_norm": 3.7738869190216064, + "learning_rate": 7.301590744729702e-08, + "loss": 0.6665, + "step": 13571 + }, + { + "epoch": 0.9554382259767688, + "grad_norm": 1.789225459098816, + "learning_rate": 7.278615469417494e-08, + "loss": 0.7112, + "step": 13572 + }, + { + "epoch": 0.9555086237240409, + "grad_norm": 1.7603168487548828, + "learning_rate": 7.255676209317474e-08, + "loss": 0.6419, + "step": 13573 + }, + { + "epoch": 0.9555790214713129, + "grad_norm": 1.94046151638031, + "learning_rate": 7.232772965622269e-08, + "loss": 0.6086, + "step": 13574 + }, + { + "epoch": 0.955649419218585, + "grad_norm": 5.449466228485107, + "learning_rate": 7.20990573952256e-08, + "loss": 0.7261, + "step": 13575 + }, + { + "epoch": 0.9557198169658571, + "grad_norm": 2.30816912651062, + "learning_rate": 7.187074532207238e-08, + "loss": 0.5006, + "step": 13576 + }, + { + "epoch": 0.9557902147131292, + "grad_norm": 2.058734178543091, + "learning_rate": 7.164279344863412e-08, + "loss": 0.7273, + "step": 13577 + }, + { + "epoch": 0.9558606124604012, + "grad_norm": 1.8945261240005493, + "learning_rate": 7.141520178676164e-08, + "loss": 0.6206, + "step": 13578 + }, + { + "epoch": 0.9559310102076733, + "grad_norm": 2.8676421642303467, + "learning_rate": 7.118797034828794e-08, + "loss": 0.7222, + "step": 13579 + }, + { + "epoch": 0.9560014079549455, + "grad_norm": 2.3443057537078857, + "learning_rate": 7.096109914502657e-08, + "loss": 0.5452, + "step": 13580 + }, + { + "epoch": 0.9560718057022175, + "grad_norm": 1.8776721954345703, + "learning_rate": 7.073458818877243e-08, + "loss": 0.6766, + "step": 13581 + }, + { + "epoch": 0.9561422034494896, + "grad_norm": 1.7718335390090942, + "learning_rate": 7.050843749130331e-08, + "loss": 0.6911, + "step": 13582 + }, + { + "epoch": 0.9562126011967617, + "grad_norm": 2.2475342750549316, + "learning_rate": 7.028264706437526e-08, + "loss": 0.621, + "step": 13583 + }, + { + "epoch": 0.9562829989440338, + "grad_norm": 1.8259342908859253, + "learning_rate": 7.005721691972954e-08, + "loss": 0.6216, + "step": 13584 + }, + { + "epoch": 0.9563533966913059, + "grad_norm": 1.8573869466781616, + "learning_rate": 6.983214706908491e-08, + "loss": 0.4355, + "step": 13585 + }, + { + "epoch": 0.956423794438578, + "grad_norm": 2.1163718700408936, + "learning_rate": 6.9607437524143e-08, + "loss": 0.5748, + "step": 13586 + }, + { + "epoch": 0.9564941921858501, + "grad_norm": 2.0051276683807373, + "learning_rate": 6.93830882965868e-08, + "loss": 0.5903, + "step": 13587 + }, + { + "epoch": 0.9565645899331221, + "grad_norm": 1.763647437095642, + "learning_rate": 6.915909939808062e-08, + "loss": 0.6669, + "step": 13588 + }, + { + "epoch": 0.9566349876803942, + "grad_norm": 2.1762747764587402, + "learning_rate": 6.89354708402694e-08, + "loss": 0.5582, + "step": 13589 + }, + { + "epoch": 0.9567053854276664, + "grad_norm": 2.0491652488708496, + "learning_rate": 6.871220263478095e-08, + "loss": 0.6248, + "step": 13590 + }, + { + "epoch": 0.9567757831749384, + "grad_norm": 1.8627161979675293, + "learning_rate": 6.848929479322286e-08, + "loss": 0.5746, + "step": 13591 + }, + { + "epoch": 0.9568461809222105, + "grad_norm": 1.8740869760513306, + "learning_rate": 6.826674732718329e-08, + "loss": 0.6643, + "step": 13592 + }, + { + "epoch": 0.9569165786694825, + "grad_norm": 1.8977786302566528, + "learning_rate": 6.804456024823258e-08, + "loss": 0.6286, + "step": 13593 + }, + { + "epoch": 0.9569869764167547, + "grad_norm": 2.4675509929656982, + "learning_rate": 6.78227335679239e-08, + "loss": 0.6601, + "step": 13594 + }, + { + "epoch": 0.9570573741640267, + "grad_norm": 1.7861179113388062, + "learning_rate": 6.760126729778948e-08, + "loss": 0.6436, + "step": 13595 + }, + { + "epoch": 0.9571277719112988, + "grad_norm": 2.0821263790130615, + "learning_rate": 6.738016144934366e-08, + "loss": 0.6429, + "step": 13596 + }, + { + "epoch": 0.957198169658571, + "grad_norm": 2.348494529724121, + "learning_rate": 6.715941603408138e-08, + "loss": 0.5882, + "step": 13597 + }, + { + "epoch": 0.957268567405843, + "grad_norm": 2.142024040222168, + "learning_rate": 6.693903106348043e-08, + "loss": 0.6135, + "step": 13598 + }, + { + "epoch": 0.9573389651531151, + "grad_norm": 2.120490789413452, + "learning_rate": 6.671900654899687e-08, + "loss": 0.7266, + "step": 13599 + }, + { + "epoch": 0.9574093629003871, + "grad_norm": 2.4888932704925537, + "learning_rate": 6.649934250207279e-08, + "loss": 0.6363, + "step": 13600 + }, + { + "epoch": 0.9574797606476593, + "grad_norm": 2.310333728790283, + "learning_rate": 6.628003893412615e-08, + "loss": 0.6688, + "step": 13601 + }, + { + "epoch": 0.9575501583949314, + "grad_norm": 1.9564297199249268, + "learning_rate": 6.606109585656017e-08, + "loss": 0.4714, + "step": 13602 + }, + { + "epoch": 0.9576205561422034, + "grad_norm": 1.7093617916107178, + "learning_rate": 6.584251328075785e-08, + "loss": 0.6653, + "step": 13603 + }, + { + "epoch": 0.9576909538894756, + "grad_norm": 2.188835382461548, + "learning_rate": 6.562429121808277e-08, + "loss": 0.63, + "step": 13604 + }, + { + "epoch": 0.9577613516367476, + "grad_norm": 1.8316766023635864, + "learning_rate": 6.540642967988142e-08, + "loss": 0.598, + "step": 13605 + }, + { + "epoch": 0.9578317493840197, + "grad_norm": 2.0702500343322754, + "learning_rate": 6.518892867748005e-08, + "loss": 0.6616, + "step": 13606 + }, + { + "epoch": 0.9579021471312918, + "grad_norm": 2.1756365299224854, + "learning_rate": 6.497178822218707e-08, + "loss": 0.6875, + "step": 13607 + }, + { + "epoch": 0.9579725448785639, + "grad_norm": 2.3575305938720703, + "learning_rate": 6.475500832529068e-08, + "loss": 0.6875, + "step": 13608 + }, + { + "epoch": 0.958042942625836, + "grad_norm": 1.8828456401824951, + "learning_rate": 6.453858899806352e-08, + "loss": 0.6451, + "step": 13609 + }, + { + "epoch": 0.958113340373108, + "grad_norm": 2.190805435180664, + "learning_rate": 6.432253025175494e-08, + "loss": 0.6011, + "step": 13610 + }, + { + "epoch": 0.9581837381203802, + "grad_norm": 1.8467955589294434, + "learning_rate": 6.410683209760026e-08, + "loss": 0.6955, + "step": 13611 + }, + { + "epoch": 0.9582541358676523, + "grad_norm": 1.8554730415344238, + "learning_rate": 6.389149454681231e-08, + "loss": 0.6153, + "step": 13612 + }, + { + "epoch": 0.9583245336149243, + "grad_norm": 1.9214736223220825, + "learning_rate": 6.367651761058757e-08, + "loss": 0.626, + "step": 13613 + }, + { + "epoch": 0.9583949313621964, + "grad_norm": 2.225468873977661, + "learning_rate": 6.346190130010232e-08, + "loss": 0.696, + "step": 13614 + }, + { + "epoch": 0.9584653291094685, + "grad_norm": 2.6386213302612305, + "learning_rate": 6.324764562651575e-08, + "loss": 0.6364, + "step": 13615 + }, + { + "epoch": 0.9585357268567406, + "grad_norm": 1.8707605600357056, + "learning_rate": 6.303375060096527e-08, + "loss": 0.6791, + "step": 13616 + }, + { + "epoch": 0.9586061246040126, + "grad_norm": 3.8989546298980713, + "learning_rate": 6.282021623457356e-08, + "loss": 0.6301, + "step": 13617 + }, + { + "epoch": 0.9586765223512848, + "grad_norm": 1.9435675144195557, + "learning_rate": 6.260704253844073e-08, + "loss": 0.6889, + "step": 13618 + }, + { + "epoch": 0.9587469200985569, + "grad_norm": 1.8625903129577637, + "learning_rate": 6.239422952365058e-08, + "loss": 0.7497, + "step": 13619 + }, + { + "epoch": 0.9588173178458289, + "grad_norm": 2.246880292892456, + "learning_rate": 6.218177720126827e-08, + "loss": 0.683, + "step": 13620 + }, + { + "epoch": 0.958887715593101, + "grad_norm": 2.0311625003814697, + "learning_rate": 6.196968558233873e-08, + "loss": 0.6082, + "step": 13621 + }, + { + "epoch": 0.9589581133403731, + "grad_norm": 1.8701856136322021, + "learning_rate": 6.175795467788748e-08, + "loss": 0.6778, + "step": 13622 + }, + { + "epoch": 0.9590285110876452, + "grad_norm": 1.9871346950531006, + "learning_rate": 6.154658449892447e-08, + "loss": 0.707, + "step": 13623 + }, + { + "epoch": 0.9590989088349173, + "grad_norm": 1.9327200651168823, + "learning_rate": 6.133557505643871e-08, + "loss": 0.6129, + "step": 13624 + }, + { + "epoch": 0.9591693065821894, + "grad_norm": 1.8003798723220825, + "learning_rate": 6.112492636139977e-08, + "loss": 0.5667, + "step": 13625 + }, + { + "epoch": 0.9592397043294615, + "grad_norm": 2.252497911453247, + "learning_rate": 6.091463842476009e-08, + "loss": 0.6133, + "step": 13626 + }, + { + "epoch": 0.9593101020767335, + "grad_norm": 1.715781569480896, + "learning_rate": 6.070471125745425e-08, + "loss": 0.6085, + "step": 13627 + }, + { + "epoch": 0.9593804998240056, + "grad_norm": 1.992509126663208, + "learning_rate": 6.049514487039354e-08, + "loss": 0.6454, + "step": 13628 + }, + { + "epoch": 0.9594508975712778, + "grad_norm": 2.1212854385375977, + "learning_rate": 6.028593927447523e-08, + "loss": 0.5407, + "step": 13629 + }, + { + "epoch": 0.9595212953185498, + "grad_norm": 2.5157265663146973, + "learning_rate": 6.007709448057563e-08, + "loss": 0.7494, + "step": 13630 + }, + { + "epoch": 0.9595916930658219, + "grad_norm": 1.9883391857147217, + "learning_rate": 5.986861049955394e-08, + "loss": 0.557, + "step": 13631 + }, + { + "epoch": 0.959662090813094, + "grad_norm": 2.130262613296509, + "learning_rate": 5.966048734224837e-08, + "loss": 0.73, + "step": 13632 + }, + { + "epoch": 0.9597324885603661, + "grad_norm": 2.1431305408477783, + "learning_rate": 5.945272501947929e-08, + "loss": 0.5935, + "step": 13633 + }, + { + "epoch": 0.9598028863076381, + "grad_norm": 1.9333453178405762, + "learning_rate": 5.9245323542049145e-08, + "loss": 0.5559, + "step": 13634 + }, + { + "epoch": 0.9598732840549102, + "grad_norm": 2.570906639099121, + "learning_rate": 5.9038282920740205e-08, + "loss": 0.6179, + "step": 13635 + }, + { + "epoch": 0.9599436818021824, + "grad_norm": 2.0127527713775635, + "learning_rate": 5.883160316631686e-08, + "loss": 0.6484, + "step": 13636 + }, + { + "epoch": 0.9600140795494544, + "grad_norm": 2.1900198459625244, + "learning_rate": 5.862528428952485e-08, + "loss": 0.6746, + "step": 13637 + }, + { + "epoch": 0.9600844772967265, + "grad_norm": 2.0005452632904053, + "learning_rate": 5.8419326301091255e-08, + "loss": 0.6448, + "step": 13638 + }, + { + "epoch": 0.9601548750439985, + "grad_norm": 2.011763095855713, + "learning_rate": 5.821372921172374e-08, + "loss": 0.6064, + "step": 13639 + }, + { + "epoch": 0.9602252727912707, + "grad_norm": 1.964816689491272, + "learning_rate": 5.800849303211053e-08, + "loss": 0.6651, + "step": 13640 + }, + { + "epoch": 0.9602956705385428, + "grad_norm": 2.1794002056121826, + "learning_rate": 5.780361777292275e-08, + "loss": 0.7228, + "step": 13641 + }, + { + "epoch": 0.9603660682858148, + "grad_norm": 2.037421226501465, + "learning_rate": 5.7599103444812114e-08, + "loss": 0.5911, + "step": 13642 + }, + { + "epoch": 0.960436466033087, + "grad_norm": 2.1224215030670166, + "learning_rate": 5.739495005841244e-08, + "loss": 0.6842, + "step": 13643 + }, + { + "epoch": 0.960506863780359, + "grad_norm": 1.9006543159484863, + "learning_rate": 5.7191157624335796e-08, + "loss": 0.6786, + "step": 13644 + }, + { + "epoch": 0.9605772615276311, + "grad_norm": 1.9615905284881592, + "learning_rate": 5.698772615317948e-08, + "loss": 0.5202, + "step": 13645 + }, + { + "epoch": 0.9606476592749033, + "grad_norm": 1.8670190572738647, + "learning_rate": 5.6784655655519044e-08, + "loss": 0.6248, + "step": 13646 + }, + { + "epoch": 0.9607180570221753, + "grad_norm": 1.9940986633300781, + "learning_rate": 5.6581946141912144e-08, + "loss": 0.6862, + "step": 13647 + }, + { + "epoch": 0.9607884547694474, + "grad_norm": 1.7541002035140991, + "learning_rate": 5.6379597622898566e-08, + "loss": 0.5908, + "step": 13648 + }, + { + "epoch": 0.9608588525167194, + "grad_norm": 2.1047027111053467, + "learning_rate": 5.617761010899791e-08, + "loss": 0.6175, + "step": 13649 + }, + { + "epoch": 0.9609292502639916, + "grad_norm": 2.541874647140503, + "learning_rate": 5.5975983610711876e-08, + "loss": 0.7125, + "step": 13650 + }, + { + "epoch": 0.9609996480112636, + "grad_norm": 3.5797765254974365, + "learning_rate": 5.577471813852353e-08, + "loss": 0.7839, + "step": 13651 + }, + { + "epoch": 0.9610700457585357, + "grad_norm": 2.0358362197875977, + "learning_rate": 5.557381370289727e-08, + "loss": 0.56, + "step": 13652 + }, + { + "epoch": 0.9611404435058079, + "grad_norm": 1.8879426717758179, + "learning_rate": 5.537327031427652e-08, + "loss": 0.6267, + "step": 13653 + }, + { + "epoch": 0.9612108412530799, + "grad_norm": 2.4014925956726074, + "learning_rate": 5.517308798308995e-08, + "loss": 0.7053, + "step": 13654 + }, + { + "epoch": 0.961281239000352, + "grad_norm": 2.054710865020752, + "learning_rate": 5.497326671974367e-08, + "loss": 0.6721, + "step": 13655 + }, + { + "epoch": 0.961351636747624, + "grad_norm": 1.744775414466858, + "learning_rate": 5.477380653462671e-08, + "loss": 0.7076, + "step": 13656 + }, + { + "epoch": 0.9614220344948962, + "grad_norm": 2.3048479557037354, + "learning_rate": 5.4574707438110216e-08, + "loss": 0.7094, + "step": 13657 + }, + { + "epoch": 0.9614924322421683, + "grad_norm": 1.9648104906082153, + "learning_rate": 5.437596944054435e-08, + "loss": 0.5633, + "step": 13658 + }, + { + "epoch": 0.9615628299894403, + "grad_norm": 1.8399486541748047, + "learning_rate": 5.417759255226218e-08, + "loss": 0.6452, + "step": 13659 + }, + { + "epoch": 0.9616332277367124, + "grad_norm": 1.984792709350586, + "learning_rate": 5.397957678357812e-08, + "loss": 0.6495, + "step": 13660 + }, + { + "epoch": 0.9617036254839845, + "grad_norm": 1.9999053478240967, + "learning_rate": 5.378192214478561e-08, + "loss": 0.6539, + "step": 13661 + }, + { + "epoch": 0.9617740232312566, + "grad_norm": 1.8746427297592163, + "learning_rate": 5.358462864616253e-08, + "loss": 0.7237, + "step": 13662 + }, + { + "epoch": 0.9618444209785287, + "grad_norm": 2.001176357269287, + "learning_rate": 5.3387696297965025e-08, + "loss": 0.6083, + "step": 13663 + }, + { + "epoch": 0.9619148187258008, + "grad_norm": 3.503817081451416, + "learning_rate": 5.319112511043289e-08, + "loss": 0.7136, + "step": 13664 + }, + { + "epoch": 0.9619852164730729, + "grad_norm": 1.9593348503112793, + "learning_rate": 5.299491509378495e-08, + "loss": 0.6674, + "step": 13665 + }, + { + "epoch": 0.9620556142203449, + "grad_norm": 1.7852369546890259, + "learning_rate": 5.279906625822373e-08, + "loss": 0.6685, + "step": 13666 + }, + { + "epoch": 0.962126011967617, + "grad_norm": 1.710307002067566, + "learning_rate": 5.260357861393072e-08, + "loss": 0.6338, + "step": 13667 + }, + { + "epoch": 0.9621964097148892, + "grad_norm": 2.1761062145233154, + "learning_rate": 5.2408452171069593e-08, + "loss": 0.533, + "step": 13668 + }, + { + "epoch": 0.9622668074621612, + "grad_norm": 1.9399069547653198, + "learning_rate": 5.221368693978456e-08, + "loss": 0.6902, + "step": 13669 + }, + { + "epoch": 0.9623372052094333, + "grad_norm": 2.0464670658111572, + "learning_rate": 5.201928293020275e-08, + "loss": 0.6279, + "step": 13670 + }, + { + "epoch": 0.9624076029567054, + "grad_norm": 1.9260276556015015, + "learning_rate": 5.182524015243028e-08, + "loss": 0.6916, + "step": 13671 + }, + { + "epoch": 0.9624780007039775, + "grad_norm": 2.711745500564575, + "learning_rate": 5.1631558616556996e-08, + "loss": 0.5955, + "step": 13672 + }, + { + "epoch": 0.9625483984512495, + "grad_norm": 1.569609522819519, + "learning_rate": 5.1438238332651706e-08, + "loss": 0.4917, + "step": 13673 + }, + { + "epoch": 0.9626187961985216, + "grad_norm": 1.781225562095642, + "learning_rate": 5.1245279310764615e-08, + "loss": 0.6134, + "step": 13674 + }, + { + "epoch": 0.9626891939457938, + "grad_norm": 2.5091304779052734, + "learning_rate": 5.105268156092957e-08, + "loss": 0.5715, + "step": 13675 + }, + { + "epoch": 0.9627595916930658, + "grad_norm": 1.6548396348953247, + "learning_rate": 5.086044509315868e-08, + "loss": 0.6333, + "step": 13676 + }, + { + "epoch": 0.9628299894403379, + "grad_norm": 2.183459997177124, + "learning_rate": 5.066856991744617e-08, + "loss": 0.581, + "step": 13677 + }, + { + "epoch": 0.96290038718761, + "grad_norm": 2.3391876220703125, + "learning_rate": 5.047705604376918e-08, + "loss": 0.7035, + "step": 13678 + }, + { + "epoch": 0.9629707849348821, + "grad_norm": 1.8509852886199951, + "learning_rate": 5.028590348208306e-08, + "loss": 0.7064, + "step": 13679 + }, + { + "epoch": 0.9630411826821542, + "grad_norm": 2.7745394706726074, + "learning_rate": 5.009511224232765e-08, + "loss": 0.6997, + "step": 13680 + }, + { + "epoch": 0.9631115804294262, + "grad_norm": 2.326376438140869, + "learning_rate": 4.99046823344218e-08, + "loss": 0.534, + "step": 13681 + }, + { + "epoch": 0.9631819781766984, + "grad_norm": 1.7370011806488037, + "learning_rate": 4.9714613768264916e-08, + "loss": 0.5492, + "step": 13682 + }, + { + "epoch": 0.9632523759239704, + "grad_norm": 1.788685917854309, + "learning_rate": 4.952490655374009e-08, + "loss": 0.5602, + "step": 13683 + }, + { + "epoch": 0.9633227736712425, + "grad_norm": 1.8205682039260864, + "learning_rate": 4.933556070071099e-08, + "loss": 0.5379, + "step": 13684 + }, + { + "epoch": 0.9633931714185147, + "grad_norm": 1.8902606964111328, + "learning_rate": 4.914657621901952e-08, + "loss": 0.6016, + "step": 13685 + }, + { + "epoch": 0.9634635691657867, + "grad_norm": 2.2452504634857178, + "learning_rate": 4.89579531184936e-08, + "loss": 0.7491, + "step": 13686 + }, + { + "epoch": 0.9635339669130588, + "grad_norm": 1.9576537609100342, + "learning_rate": 4.876969140893861e-08, + "loss": 0.6756, + "step": 13687 + }, + { + "epoch": 0.9636043646603308, + "grad_norm": 1.6032516956329346, + "learning_rate": 4.85817911001436e-08, + "loss": 0.7054, + "step": 13688 + }, + { + "epoch": 0.963674762407603, + "grad_norm": 1.879217505455017, + "learning_rate": 4.8394252201875874e-08, + "loss": 0.6189, + "step": 13689 + }, + { + "epoch": 0.963745160154875, + "grad_norm": 1.9222198724746704, + "learning_rate": 4.8207074723886414e-08, + "loss": 0.6918, + "step": 13690 + }, + { + "epoch": 0.9638155579021471, + "grad_norm": 2.85994553565979, + "learning_rate": 4.8020258675907535e-08, + "loss": 0.6966, + "step": 13691 + }, + { + "epoch": 0.9638859556494193, + "grad_norm": 1.9601545333862305, + "learning_rate": 4.7833804067651364e-08, + "loss": 0.5764, + "step": 13692 + }, + { + "epoch": 0.9639563533966913, + "grad_norm": 3.5736024379730225, + "learning_rate": 4.764771090881292e-08, + "loss": 0.6153, + "step": 13693 + }, + { + "epoch": 0.9640267511439634, + "grad_norm": 1.934577226638794, + "learning_rate": 4.7461979209065474e-08, + "loss": 0.6435, + "step": 13694 + }, + { + "epoch": 0.9640971488912354, + "grad_norm": 2.4388949871063232, + "learning_rate": 4.727660897806596e-08, + "loss": 0.6673, + "step": 13695 + }, + { + "epoch": 0.9641675466385076, + "grad_norm": 1.8684061765670776, + "learning_rate": 4.7091600225453443e-08, + "loss": 0.5635, + "step": 13696 + }, + { + "epoch": 0.9642379443857797, + "grad_norm": 2.0020551681518555, + "learning_rate": 4.690695296084446e-08, + "loss": 0.6221, + "step": 13697 + }, + { + "epoch": 0.9643083421330517, + "grad_norm": 2.44270920753479, + "learning_rate": 4.6722667193840774e-08, + "loss": 0.5463, + "step": 13698 + }, + { + "epoch": 0.9643787398803239, + "grad_norm": 2.484109401702881, + "learning_rate": 4.653874293402238e-08, + "loss": 0.5261, + "step": 13699 + }, + { + "epoch": 0.9644491376275959, + "grad_norm": 1.8915208578109741, + "learning_rate": 4.635518019095297e-08, + "loss": 0.6331, + "step": 13700 + }, + { + "epoch": 0.964519535374868, + "grad_norm": 1.8285003900527954, + "learning_rate": 4.617197897417446e-08, + "loss": 0.5944, + "step": 13701 + }, + { + "epoch": 0.9645899331221401, + "grad_norm": 2.1266233921051025, + "learning_rate": 4.598913929321324e-08, + "loss": 0.6461, + "step": 13702 + }, + { + "epoch": 0.9646603308694122, + "grad_norm": 2.3052542209625244, + "learning_rate": 4.5806661157573925e-08, + "loss": 0.6764, + "step": 13703 + }, + { + "epoch": 0.9647307286166843, + "grad_norm": 2.432960033416748, + "learning_rate": 4.562454457674481e-08, + "loss": 0.6349, + "step": 13704 + }, + { + "epoch": 0.9648011263639563, + "grad_norm": 1.808173418045044, + "learning_rate": 4.544278956019398e-08, + "loss": 0.5895, + "step": 13705 + }, + { + "epoch": 0.9648715241112285, + "grad_norm": 1.897590160369873, + "learning_rate": 4.5261396117370124e-08, + "loss": 0.565, + "step": 13706 + }, + { + "epoch": 0.9649419218585005, + "grad_norm": 1.9976789951324463, + "learning_rate": 4.508036425770556e-08, + "loss": 0.5121, + "step": 13707 + }, + { + "epoch": 0.9650123196057726, + "grad_norm": 1.755511999130249, + "learning_rate": 4.4899693990611666e-08, + "loss": 0.5495, + "step": 13708 + }, + { + "epoch": 0.9650827173530447, + "grad_norm": 1.722562313079834, + "learning_rate": 4.4719385325481144e-08, + "loss": 0.6023, + "step": 13709 + }, + { + "epoch": 0.9651531151003168, + "grad_norm": 2.240427255630493, + "learning_rate": 4.45394382716896e-08, + "loss": 0.6186, + "step": 13710 + }, + { + "epoch": 0.9652235128475889, + "grad_norm": 1.8379778861999512, + "learning_rate": 4.4359852838591676e-08, + "loss": 0.5972, + "step": 13711 + }, + { + "epoch": 0.9652939105948609, + "grad_norm": 2.2406554222106934, + "learning_rate": 4.418062903552411e-08, + "loss": 0.7509, + "step": 13712 + }, + { + "epoch": 0.965364308342133, + "grad_norm": 1.744474172592163, + "learning_rate": 4.400176687180501e-08, + "loss": 0.5903, + "step": 13713 + }, + { + "epoch": 0.9654347060894052, + "grad_norm": 2.278252363204956, + "learning_rate": 4.3823266356733835e-08, + "loss": 0.6373, + "step": 13714 + }, + { + "epoch": 0.9655051038366772, + "grad_norm": 2.012127637863159, + "learning_rate": 4.364512749959137e-08, + "loss": 0.6659, + "step": 13715 + }, + { + "epoch": 0.9655755015839493, + "grad_norm": 2.0083298683166504, + "learning_rate": 4.346735030963822e-08, + "loss": 0.7038, + "step": 13716 + }, + { + "epoch": 0.9656458993312214, + "grad_norm": 2.1117491722106934, + "learning_rate": 4.328993479611864e-08, + "loss": 0.5787, + "step": 13717 + }, + { + "epoch": 0.9657162970784935, + "grad_norm": 1.7972369194030762, + "learning_rate": 4.3112880968254385e-08, + "loss": 0.6348, + "step": 13718 + }, + { + "epoch": 0.9657866948257656, + "grad_norm": 2.3543920516967773, + "learning_rate": 4.293618883525318e-08, + "loss": 0.5828, + "step": 13719 + }, + { + "epoch": 0.9658570925730376, + "grad_norm": 2.033046007156372, + "learning_rate": 4.275985840629948e-08, + "loss": 0.5921, + "step": 13720 + }, + { + "epoch": 0.9659274903203098, + "grad_norm": 2.2680375576019287, + "learning_rate": 4.2583889690562146e-08, + "loss": 0.5027, + "step": 13721 + }, + { + "epoch": 0.9659978880675818, + "grad_norm": 2.0761473178863525, + "learning_rate": 4.24082826971891e-08, + "loss": 0.6492, + "step": 13722 + }, + { + "epoch": 0.9660682858148539, + "grad_norm": 1.8724055290222168, + "learning_rate": 4.223303743531037e-08, + "loss": 0.644, + "step": 13723 + }, + { + "epoch": 0.9661386835621261, + "grad_norm": 1.912227749824524, + "learning_rate": 4.2058153914037334e-08, + "loss": 0.6217, + "step": 13724 + }, + { + "epoch": 0.9662090813093981, + "grad_norm": 2.0713272094726562, + "learning_rate": 4.188363214246271e-08, + "loss": 0.7078, + "step": 13725 + }, + { + "epoch": 0.9662794790566702, + "grad_norm": 1.892340898513794, + "learning_rate": 4.1709472129659805e-08, + "loss": 0.6538, + "step": 13726 + }, + { + "epoch": 0.9663498768039422, + "grad_norm": 2.1259801387786865, + "learning_rate": 4.1535673884683264e-08, + "loss": 0.6784, + "step": 13727 + }, + { + "epoch": 0.9664202745512144, + "grad_norm": 2.2566416263580322, + "learning_rate": 4.1362237416568305e-08, + "loss": 0.6666, + "step": 13728 + }, + { + "epoch": 0.9664906722984864, + "grad_norm": 2.335249900817871, + "learning_rate": 4.1189162734333815e-08, + "loss": 0.6866, + "step": 13729 + }, + { + "epoch": 0.9665610700457585, + "grad_norm": 1.7556648254394531, + "learning_rate": 4.1016449846976165e-08, + "loss": 0.6344, + "step": 13730 + }, + { + "epoch": 0.9666314677930307, + "grad_norm": 2.0587639808654785, + "learning_rate": 4.084409876347539e-08, + "loss": 0.5968, + "step": 13731 + }, + { + "epoch": 0.9667018655403027, + "grad_norm": 1.611339807510376, + "learning_rate": 4.0672109492792877e-08, + "loss": 0.6389, + "step": 13732 + }, + { + "epoch": 0.9667722632875748, + "grad_norm": 2.242575168609619, + "learning_rate": 4.0500482043870576e-08, + "loss": 0.6742, + "step": 13733 + }, + { + "epoch": 0.9668426610348468, + "grad_norm": 1.7965474128723145, + "learning_rate": 4.032921642563103e-08, + "loss": 0.7114, + "step": 13734 + }, + { + "epoch": 0.966913058782119, + "grad_norm": 1.9060027599334717, + "learning_rate": 4.015831264697889e-08, + "loss": 0.7644, + "step": 13735 + }, + { + "epoch": 0.9669834565293911, + "grad_norm": 1.8464876413345337, + "learning_rate": 3.99877707167986e-08, + "loss": 0.6625, + "step": 13736 + }, + { + "epoch": 0.9670538542766631, + "grad_norm": 2.0757455825805664, + "learning_rate": 3.9817590643957514e-08, + "loss": 0.5526, + "step": 13737 + }, + { + "epoch": 0.9671242520239353, + "grad_norm": 2.465709924697876, + "learning_rate": 3.9647772437303555e-08, + "loss": 0.6474, + "step": 13738 + }, + { + "epoch": 0.9671946497712073, + "grad_norm": 1.9949442148208618, + "learning_rate": 3.947831610566521e-08, + "loss": 0.7046, + "step": 13739 + }, + { + "epoch": 0.9672650475184794, + "grad_norm": 2.1636664867401123, + "learning_rate": 3.930922165785311e-08, + "loss": 0.6579, + "step": 13740 + }, + { + "epoch": 0.9673354452657515, + "grad_norm": 2.1165449619293213, + "learning_rate": 3.9140489102659214e-08, + "loss": 0.6063, + "step": 13741 + }, + { + "epoch": 0.9674058430130236, + "grad_norm": 2.056638717651367, + "learning_rate": 3.8972118448855283e-08, + "loss": 0.5656, + "step": 13742 + }, + { + "epoch": 0.9674762407602957, + "grad_norm": 2.017348289489746, + "learning_rate": 3.8804109705194434e-08, + "loss": 0.5285, + "step": 13743 + }, + { + "epoch": 0.9675466385075677, + "grad_norm": 1.928014874458313, + "learning_rate": 3.863646288041267e-08, + "loss": 0.562, + "step": 13744 + }, + { + "epoch": 0.9676170362548399, + "grad_norm": 2.2839653491973877, + "learning_rate": 3.846917798322658e-08, + "loss": 0.6967, + "step": 13745 + }, + { + "epoch": 0.9676874340021119, + "grad_norm": 2.198289394378662, + "learning_rate": 3.830225502233175e-08, + "loss": 0.6358, + "step": 13746 + }, + { + "epoch": 0.967757831749384, + "grad_norm": 1.8184248208999634, + "learning_rate": 3.813569400640826e-08, + "loss": 0.5485, + "step": 13747 + }, + { + "epoch": 0.9678282294966561, + "grad_norm": 2.0214271545410156, + "learning_rate": 3.796949494411439e-08, + "loss": 0.6928, + "step": 13748 + }, + { + "epoch": 0.9678986272439282, + "grad_norm": 1.9419320821762085, + "learning_rate": 3.780365784409212e-08, + "loss": 0.7494, + "step": 13749 + }, + { + "epoch": 0.9679690249912003, + "grad_norm": 2.0093870162963867, + "learning_rate": 3.763818271496244e-08, + "loss": 0.6123, + "step": 13750 + }, + { + "epoch": 0.9680394227384723, + "grad_norm": 3.599543809890747, + "learning_rate": 3.747306956533003e-08, + "loss": 0.6356, + "step": 13751 + }, + { + "epoch": 0.9681098204857445, + "grad_norm": 1.6152442693710327, + "learning_rate": 3.730831840377702e-08, + "loss": 0.6024, + "step": 13752 + }, + { + "epoch": 0.9681802182330166, + "grad_norm": 2.727076292037964, + "learning_rate": 3.7143929238871555e-08, + "loss": 0.7176, + "step": 13753 + }, + { + "epoch": 0.9682506159802886, + "grad_norm": 1.813298225402832, + "learning_rate": 3.6979902079159245e-08, + "loss": 0.6077, + "step": 13754 + }, + { + "epoch": 0.9683210137275607, + "grad_norm": 2.0602526664733887, + "learning_rate": 3.6816236933167045e-08, + "loss": 0.7281, + "step": 13755 + }, + { + "epoch": 0.9683914114748328, + "grad_norm": 2.108492374420166, + "learning_rate": 3.66529338094056e-08, + "loss": 0.6805, + "step": 13756 + }, + { + "epoch": 0.9684618092221049, + "grad_norm": 2.101062059402466, + "learning_rate": 3.648999271636377e-08, + "loss": 0.6342, + "step": 13757 + }, + { + "epoch": 0.968532206969377, + "grad_norm": 2.110302686691284, + "learning_rate": 3.6327413662514126e-08, + "loss": 0.5507, + "step": 13758 + }, + { + "epoch": 0.968602604716649, + "grad_norm": 2.1095926761627197, + "learning_rate": 3.6165196656309016e-08, + "loss": 0.6567, + "step": 13759 + }, + { + "epoch": 0.9686730024639212, + "grad_norm": 2.2759459018707275, + "learning_rate": 3.6003341706182136e-08, + "loss": 0.6682, + "step": 13760 + }, + { + "epoch": 0.9687434002111932, + "grad_norm": 2.211298704147339, + "learning_rate": 3.5841848820548525e-08, + "loss": 0.6534, + "step": 13761 + }, + { + "epoch": 0.9688137979584653, + "grad_norm": 1.9167650938034058, + "learning_rate": 3.568071800780459e-08, + "loss": 0.7187, + "step": 13762 + }, + { + "epoch": 0.9688841957057374, + "grad_norm": 2.0093300342559814, + "learning_rate": 3.551994927632651e-08, + "loss": 0.6064, + "step": 13763 + }, + { + "epoch": 0.9689545934530095, + "grad_norm": 2.1279139518737793, + "learning_rate": 3.535954263447416e-08, + "loss": 0.6517, + "step": 13764 + }, + { + "epoch": 0.9690249912002816, + "grad_norm": 1.8638882637023926, + "learning_rate": 3.51994980905872e-08, + "loss": 0.7537, + "step": 13765 + }, + { + "epoch": 0.9690953889475536, + "grad_norm": 2.17608380317688, + "learning_rate": 3.5039815652985864e-08, + "loss": 0.5723, + "step": 13766 + }, + { + "epoch": 0.9691657866948258, + "grad_norm": 1.6910866498947144, + "learning_rate": 3.4880495329971724e-08, + "loss": 0.7545, + "step": 13767 + }, + { + "epoch": 0.9692361844420978, + "grad_norm": 2.3649721145629883, + "learning_rate": 3.4721537129829285e-08, + "loss": 0.6347, + "step": 13768 + }, + { + "epoch": 0.9693065821893699, + "grad_norm": 2.007607936859131, + "learning_rate": 3.456294106082125e-08, + "loss": 0.5703, + "step": 13769 + }, + { + "epoch": 0.9693769799366421, + "grad_norm": 2.212717294692993, + "learning_rate": 3.440470713119559e-08, + "loss": 0.6582, + "step": 13770 + }, + { + "epoch": 0.9694473776839141, + "grad_norm": 2.257544994354248, + "learning_rate": 3.424683534917694e-08, + "loss": 0.7785, + "step": 13771 + }, + { + "epoch": 0.9695177754311862, + "grad_norm": 2.1596286296844482, + "learning_rate": 3.4089325722973626e-08, + "loss": 0.6152, + "step": 13772 + }, + { + "epoch": 0.9695881731784582, + "grad_norm": 1.7524927854537964, + "learning_rate": 3.3932178260775325e-08, + "loss": 0.6171, + "step": 13773 + }, + { + "epoch": 0.9696585709257304, + "grad_norm": 2.126952886581421, + "learning_rate": 3.3775392970751495e-08, + "loss": 0.6621, + "step": 13774 + }, + { + "epoch": 0.9697289686730025, + "grad_norm": 3.2249696254730225, + "learning_rate": 3.3618969861054504e-08, + "loss": 0.6192, + "step": 13775 + }, + { + "epoch": 0.9697993664202745, + "grad_norm": 1.8159661293029785, + "learning_rate": 3.3462908939815736e-08, + "loss": 0.6157, + "step": 13776 + }, + { + "epoch": 0.9698697641675467, + "grad_norm": 2.0638270378112793, + "learning_rate": 3.330721021515026e-08, + "loss": 0.6378, + "step": 13777 + }, + { + "epoch": 0.9699401619148187, + "grad_norm": 2.250180244445801, + "learning_rate": 3.315187369515216e-08, + "loss": 0.7498, + "step": 13778 + }, + { + "epoch": 0.9700105596620908, + "grad_norm": 2.424560070037842, + "learning_rate": 3.2996899387897625e-08, + "loss": 0.6402, + "step": 13779 + }, + { + "epoch": 0.970080957409363, + "grad_norm": 2.0093135833740234, + "learning_rate": 3.284228730144423e-08, + "loss": 0.6501, + "step": 13780 + }, + { + "epoch": 0.970151355156635, + "grad_norm": 1.8810350894927979, + "learning_rate": 3.268803744383009e-08, + "loss": 0.5327, + "step": 13781 + }, + { + "epoch": 0.9702217529039071, + "grad_norm": 2.0646541118621826, + "learning_rate": 3.2534149823074676e-08, + "loss": 0.6755, + "step": 13782 + }, + { + "epoch": 0.9702921506511791, + "grad_norm": 2.23921799659729, + "learning_rate": 3.238062444717882e-08, + "loss": 0.6152, + "step": 13783 + }, + { + "epoch": 0.9703625483984513, + "grad_norm": 2.328871965408325, + "learning_rate": 3.222746132412468e-08, + "loss": 0.8001, + "step": 13784 + }, + { + "epoch": 0.9704329461457233, + "grad_norm": 3.347743511199951, + "learning_rate": 3.2074660461875014e-08, + "loss": 0.6823, + "step": 13785 + }, + { + "epoch": 0.9705033438929954, + "grad_norm": 2.5995590686798096, + "learning_rate": 3.192222186837468e-08, + "loss": 0.5941, + "step": 13786 + }, + { + "epoch": 0.9705737416402676, + "grad_norm": 2.1383590698242188, + "learning_rate": 3.177014555154833e-08, + "loss": 0.675, + "step": 13787 + }, + { + "epoch": 0.9706441393875396, + "grad_norm": 2.100113868713379, + "learning_rate": 3.161843151930277e-08, + "loss": 0.6546, + "step": 13788 + }, + { + "epoch": 0.9707145371348117, + "grad_norm": 1.831207036972046, + "learning_rate": 3.146707977952612e-08, + "loss": 0.6857, + "step": 13789 + }, + { + "epoch": 0.9707849348820837, + "grad_norm": 1.7736589908599854, + "learning_rate": 3.131609034008787e-08, + "loss": 0.5573, + "step": 13790 + }, + { + "epoch": 0.9708553326293559, + "grad_norm": 1.8500123023986816, + "learning_rate": 3.116546320883573e-08, + "loss": 0.7001, + "step": 13791 + }, + { + "epoch": 0.970925730376628, + "grad_norm": 1.9352549314498901, + "learning_rate": 3.1015198393603446e-08, + "loss": 0.5978, + "step": 13792 + }, + { + "epoch": 0.9709961281239, + "grad_norm": 1.9136306047439575, + "learning_rate": 3.086529590220222e-08, + "loss": 0.582, + "step": 13793 + }, + { + "epoch": 0.9710665258711721, + "grad_norm": 2.065199375152588, + "learning_rate": 3.071575574242613e-08, + "loss": 0.6702, + "step": 13794 + }, + { + "epoch": 0.9711369236184442, + "grad_norm": 3.1111533641815186, + "learning_rate": 3.0566577922049864e-08, + "loss": 0.6772, + "step": 13795 + }, + { + "epoch": 0.9712073213657163, + "grad_norm": 1.74871027469635, + "learning_rate": 3.0417762448828655e-08, + "loss": 0.599, + "step": 13796 + }, + { + "epoch": 0.9712777191129884, + "grad_norm": 2.356180191040039, + "learning_rate": 3.026930933050065e-08, + "loss": 0.638, + "step": 13797 + }, + { + "epoch": 0.9713481168602605, + "grad_norm": 2.254185199737549, + "learning_rate": 3.0121218574783004e-08, + "loss": 0.8017, + "step": 13798 + }, + { + "epoch": 0.9714185146075326, + "grad_norm": 2.1784896850585938, + "learning_rate": 2.9973490189375005e-08, + "loss": 0.5563, + "step": 13799 + }, + { + "epoch": 0.9714889123548046, + "grad_norm": 2.4291844367980957, + "learning_rate": 2.982612418195807e-08, + "loss": 0.6661, + "step": 13800 + }, + { + "epoch": 0.9715593101020767, + "grad_norm": 2.2967331409454346, + "learning_rate": 2.9679120560193395e-08, + "loss": 0.7242, + "step": 13801 + }, + { + "epoch": 0.9716297078493488, + "grad_norm": 1.7246410846710205, + "learning_rate": 2.9532479331724314e-08, + "loss": 0.4971, + "step": 13802 + }, + { + "epoch": 0.9717001055966209, + "grad_norm": 2.4033584594726562, + "learning_rate": 2.9386200504173952e-08, + "loss": 0.6688, + "step": 13803 + }, + { + "epoch": 0.971770503343893, + "grad_norm": 2.190147638320923, + "learning_rate": 2.9240284085148338e-08, + "loss": 0.5297, + "step": 13804 + }, + { + "epoch": 0.971840901091165, + "grad_norm": 2.003810405731201, + "learning_rate": 2.9094730082233287e-08, + "loss": 0.6186, + "step": 13805 + }, + { + "epoch": 0.9719112988384372, + "grad_norm": 2.5083229541778564, + "learning_rate": 2.894953850299675e-08, + "loss": 0.6584, + "step": 13806 + }, + { + "epoch": 0.9719816965857092, + "grad_norm": 2.0494346618652344, + "learning_rate": 2.8804709354986468e-08, + "loss": 0.5935, + "step": 13807 + }, + { + "epoch": 0.9720520943329813, + "grad_norm": 1.530909538269043, + "learning_rate": 2.866024264573308e-08, + "loss": 0.6716, + "step": 13808 + }, + { + "epoch": 0.9721224920802535, + "grad_norm": 2.096888542175293, + "learning_rate": 2.85161383827478e-08, + "loss": 0.6397, + "step": 13809 + }, + { + "epoch": 0.9721928898275255, + "grad_norm": 2.1749026775360107, + "learning_rate": 2.837239657352164e-08, + "loss": 0.6469, + "step": 13810 + }, + { + "epoch": 0.9722632875747976, + "grad_norm": 2.020385265350342, + "learning_rate": 2.8229017225528507e-08, + "loss": 0.5683, + "step": 13811 + }, + { + "epoch": 0.9723336853220697, + "grad_norm": 2.1140897274017334, + "learning_rate": 2.8086000346222882e-08, + "loss": 0.6913, + "step": 13812 + }, + { + "epoch": 0.9724040830693418, + "grad_norm": 1.8613414764404297, + "learning_rate": 2.7943345943040596e-08, + "loss": 0.5747, + "step": 13813 + }, + { + "epoch": 0.9724744808166139, + "grad_norm": 2.575803279876709, + "learning_rate": 2.7801054023397276e-08, + "loss": 0.5775, + "step": 13814 + }, + { + "epoch": 0.9725448785638859, + "grad_norm": 1.8442661762237549, + "learning_rate": 2.765912459469222e-08, + "loss": 0.6239, + "step": 13815 + }, + { + "epoch": 0.9726152763111581, + "grad_norm": 1.7612059116363525, + "learning_rate": 2.7517557664302973e-08, + "loss": 0.6679, + "step": 13816 + }, + { + "epoch": 0.9726856740584301, + "grad_norm": 2.4677224159240723, + "learning_rate": 2.7376353239591534e-08, + "loss": 0.5866, + "step": 13817 + }, + { + "epoch": 0.9727560718057022, + "grad_norm": 2.812812089920044, + "learning_rate": 2.7235511327898143e-08, + "loss": 0.6094, + "step": 13818 + }, + { + "epoch": 0.9728264695529744, + "grad_norm": 2.116992712020874, + "learning_rate": 2.7095031936545164e-08, + "loss": 0.5641, + "step": 13819 + }, + { + "epoch": 0.9728968673002464, + "grad_norm": 2.0512852668762207, + "learning_rate": 2.6954915072836315e-08, + "loss": 0.6909, + "step": 13820 + }, + { + "epoch": 0.9729672650475185, + "grad_norm": 1.9277839660644531, + "learning_rate": 2.6815160744056654e-08, + "loss": 0.6588, + "step": 13821 + }, + { + "epoch": 0.9730376627947905, + "grad_norm": 1.9388375282287598, + "learning_rate": 2.6675768957471812e-08, + "loss": 0.5145, + "step": 13822 + }, + { + "epoch": 0.9731080605420627, + "grad_norm": 2.120755434036255, + "learning_rate": 2.6536739720329548e-08, + "loss": 0.6715, + "step": 13823 + }, + { + "epoch": 0.9731784582893347, + "grad_norm": 1.7773373126983643, + "learning_rate": 2.6398073039858193e-08, + "loss": 0.7001, + "step": 13824 + }, + { + "epoch": 0.9732488560366068, + "grad_norm": 1.9858182668685913, + "learning_rate": 2.6259768923266647e-08, + "loss": 0.6148, + "step": 13825 + }, + { + "epoch": 0.973319253783879, + "grad_norm": 1.94850492477417, + "learning_rate": 2.6121827377744377e-08, + "loss": 0.7132, + "step": 13826 + }, + { + "epoch": 0.973389651531151, + "grad_norm": 1.8273011445999146, + "learning_rate": 2.5984248410465316e-08, + "loss": 0.5647, + "step": 13827 + }, + { + "epoch": 0.9734600492784231, + "grad_norm": 2.043755292892456, + "learning_rate": 2.584703202858085e-08, + "loss": 0.7167, + "step": 13828 + }, + { + "epoch": 0.9735304470256951, + "grad_norm": 2.362682819366455, + "learning_rate": 2.571017823922528e-08, + "loss": 0.6403, + "step": 13829 + }, + { + "epoch": 0.9736008447729673, + "grad_norm": 1.7078474760055542, + "learning_rate": 2.557368704951346e-08, + "loss": 0.6568, + "step": 13830 + }, + { + "epoch": 0.9736712425202394, + "grad_norm": 1.7653274536132812, + "learning_rate": 2.5437558466543164e-08, + "loss": 0.57, + "step": 13831 + }, + { + "epoch": 0.9737416402675114, + "grad_norm": 1.721909523010254, + "learning_rate": 2.530179249738962e-08, + "loss": 0.6512, + "step": 13832 + }, + { + "epoch": 0.9738120380147836, + "grad_norm": 1.807871699333191, + "learning_rate": 2.5166389149113288e-08, + "loss": 0.6403, + "step": 13833 + }, + { + "epoch": 0.9738824357620556, + "grad_norm": 2.1584830284118652, + "learning_rate": 2.50313484287521e-08, + "loss": 0.7158, + "step": 13834 + }, + { + "epoch": 0.9739528335093277, + "grad_norm": 1.9495420455932617, + "learning_rate": 2.4896670343329206e-08, + "loss": 0.6859, + "step": 13835 + }, + { + "epoch": 0.9740232312565998, + "grad_norm": 1.9879692792892456, + "learning_rate": 2.476235489984524e-08, + "loss": 0.6168, + "step": 13836 + }, + { + "epoch": 0.9740936290038719, + "grad_norm": 1.999027967453003, + "learning_rate": 2.4628402105282943e-08, + "loss": 0.571, + "step": 13837 + }, + { + "epoch": 0.974164026751144, + "grad_norm": 1.825156807899475, + "learning_rate": 2.4494811966607964e-08, + "loss": 0.5776, + "step": 13838 + }, + { + "epoch": 0.974234424498416, + "grad_norm": 2.056699514389038, + "learning_rate": 2.4361584490764977e-08, + "loss": 0.6569, + "step": 13839 + }, + { + "epoch": 0.9743048222456882, + "grad_norm": 1.8539211750030518, + "learning_rate": 2.422871968467999e-08, + "loss": 0.6684, + "step": 13840 + }, + { + "epoch": 0.9743752199929602, + "grad_norm": 1.9481741189956665, + "learning_rate": 2.40962175552627e-08, + "loss": 0.5466, + "step": 13841 + }, + { + "epoch": 0.9744456177402323, + "grad_norm": 2.0550355911254883, + "learning_rate": 2.3964078109400265e-08, + "loss": 0.5996, + "step": 13842 + }, + { + "epoch": 0.9745160154875044, + "grad_norm": 2.2819433212280273, + "learning_rate": 2.3832301353963526e-08, + "loss": 0.6794, + "step": 13843 + }, + { + "epoch": 0.9745864132347765, + "grad_norm": 1.8703759908676147, + "learning_rate": 2.370088729580233e-08, + "loss": 0.6743, + "step": 13844 + }, + { + "epoch": 0.9746568109820486, + "grad_norm": 1.8964557647705078, + "learning_rate": 2.3569835941750993e-08, + "loss": 0.6691, + "step": 13845 + }, + { + "epoch": 0.9747272087293206, + "grad_norm": 2.0510387420654297, + "learning_rate": 2.3439147298622064e-08, + "loss": 0.6789, + "step": 13846 + }, + { + "epoch": 0.9747976064765927, + "grad_norm": 1.7790250778198242, + "learning_rate": 2.330882137321022e-08, + "loss": 0.6605, + "step": 13847 + }, + { + "epoch": 0.9748680042238649, + "grad_norm": 1.8985987901687622, + "learning_rate": 2.3178858172291484e-08, + "loss": 0.7453, + "step": 13848 + }, + { + "epoch": 0.9749384019711369, + "grad_norm": 1.9306399822235107, + "learning_rate": 2.3049257702622448e-08, + "loss": 0.7393, + "step": 13849 + }, + { + "epoch": 0.975008799718409, + "grad_norm": 1.9223668575286865, + "learning_rate": 2.2920019970940287e-08, + "loss": 0.6218, + "step": 13850 + }, + { + "epoch": 0.9750791974656811, + "grad_norm": 2.1543235778808594, + "learning_rate": 2.2791144983965837e-08, + "loss": 0.5695, + "step": 13851 + }, + { + "epoch": 0.9751495952129532, + "grad_norm": 2.402139186859131, + "learning_rate": 2.266263274839897e-08, + "loss": 0.7162, + "step": 13852 + }, + { + "epoch": 0.9752199929602253, + "grad_norm": 1.901343584060669, + "learning_rate": 2.2534483270920114e-08, + "loss": 0.669, + "step": 13853 + }, + { + "epoch": 0.9752903907074973, + "grad_norm": 1.8808670043945312, + "learning_rate": 2.2406696558193385e-08, + "loss": 0.6065, + "step": 13854 + }, + { + "epoch": 0.9753607884547695, + "grad_norm": 1.8576053380966187, + "learning_rate": 2.2279272616861135e-08, + "loss": 0.6347, + "step": 13855 + }, + { + "epoch": 0.9754311862020415, + "grad_norm": 1.8071579933166504, + "learning_rate": 2.2152211453549396e-08, + "loss": 0.6097, + "step": 13856 + }, + { + "epoch": 0.9755015839493136, + "grad_norm": 1.5272597074508667, + "learning_rate": 2.2025513074863222e-08, + "loss": 0.5678, + "step": 13857 + }, + { + "epoch": 0.9755719816965857, + "grad_norm": 2.009242296218872, + "learning_rate": 2.1899177487390563e-08, + "loss": 0.5959, + "step": 13858 + }, + { + "epoch": 0.9756423794438578, + "grad_norm": 2.187286615371704, + "learning_rate": 2.1773204697699167e-08, + "loss": 0.6089, + "step": 13859 + }, + { + "epoch": 0.9757127771911299, + "grad_norm": 1.8573803901672363, + "learning_rate": 2.164759471233968e-08, + "loss": 0.628, + "step": 13860 + }, + { + "epoch": 0.9757831749384019, + "grad_norm": 2.5342578887939453, + "learning_rate": 2.152234753784099e-08, + "loss": 0.682, + "step": 13861 + }, + { + "epoch": 0.9758535726856741, + "grad_norm": 2.3051044940948486, + "learning_rate": 2.139746318071567e-08, + "loss": 0.6661, + "step": 13862 + }, + { + "epoch": 0.9759239704329461, + "grad_norm": 1.8559985160827637, + "learning_rate": 2.127294164745608e-08, + "loss": 0.6014, + "step": 13863 + }, + { + "epoch": 0.9759943681802182, + "grad_norm": 2.173276662826538, + "learning_rate": 2.1148782944536703e-08, + "loss": 0.6273, + "step": 13864 + }, + { + "epoch": 0.9760647659274904, + "grad_norm": 2.2270429134368896, + "learning_rate": 2.1024987078411826e-08, + "loss": 0.6448, + "step": 13865 + }, + { + "epoch": 0.9761351636747624, + "grad_norm": 2.927433490753174, + "learning_rate": 2.090155405551941e-08, + "loss": 0.6222, + "step": 13866 + }, + { + "epoch": 0.9762055614220345, + "grad_norm": 2.027301788330078, + "learning_rate": 2.0778483882275656e-08, + "loss": 0.6716, + "step": 13867 + }, + { + "epoch": 0.9762759591693065, + "grad_norm": 2.2907028198242188, + "learning_rate": 2.0655776565079665e-08, + "loss": 0.6039, + "step": 13868 + }, + { + "epoch": 0.9763463569165787, + "grad_norm": 1.721142053604126, + "learning_rate": 2.053343211030956e-08, + "loss": 0.5974, + "step": 13869 + }, + { + "epoch": 0.9764167546638508, + "grad_norm": 1.9070703983306885, + "learning_rate": 2.041145052432869e-08, + "loss": 0.5604, + "step": 13870 + }, + { + "epoch": 0.9764871524111228, + "grad_norm": 3.7234890460968018, + "learning_rate": 2.0289831813476322e-08, + "loss": 0.6713, + "step": 13871 + }, + { + "epoch": 0.976557550158395, + "grad_norm": 2.377213478088379, + "learning_rate": 2.016857598407773e-08, + "loss": 0.6441, + "step": 13872 + }, + { + "epoch": 0.976627947905667, + "grad_norm": 2.015456199645996, + "learning_rate": 2.004768304243565e-08, + "loss": 0.599, + "step": 13873 + }, + { + "epoch": 0.9766983456529391, + "grad_norm": 1.7866677045822144, + "learning_rate": 1.9927152994836493e-08, + "loss": 0.6745, + "step": 13874 + }, + { + "epoch": 0.9767687434002112, + "grad_norm": 1.762474536895752, + "learning_rate": 1.98069858475457e-08, + "loss": 0.5882, + "step": 13875 + }, + { + "epoch": 0.9768391411474833, + "grad_norm": 2.118748903274536, + "learning_rate": 1.96871816068116e-08, + "loss": 0.6522, + "step": 13876 + }, + { + "epoch": 0.9769095388947554, + "grad_norm": 1.7152910232543945, + "learning_rate": 1.956774027886232e-08, + "loss": 0.623, + "step": 13877 + }, + { + "epoch": 0.9769799366420274, + "grad_norm": 1.8316621780395508, + "learning_rate": 1.9448661869908123e-08, + "loss": 0.6189, + "step": 13878 + }, + { + "epoch": 0.9770503343892996, + "grad_norm": 1.845401406288147, + "learning_rate": 1.9329946386140607e-08, + "loss": 0.6614, + "step": 13879 + }, + { + "epoch": 0.9771207321365716, + "grad_norm": 1.9917969703674316, + "learning_rate": 1.921159383373039e-08, + "loss": 0.6096, + "step": 13880 + }, + { + "epoch": 0.9771911298838437, + "grad_norm": 1.9638712406158447, + "learning_rate": 1.909360421883177e-08, + "loss": 0.5694, + "step": 13881 + }, + { + "epoch": 0.9772615276311158, + "grad_norm": 2.659841537475586, + "learning_rate": 1.8975977547579626e-08, + "loss": 0.5996, + "step": 13882 + }, + { + "epoch": 0.9773319253783879, + "grad_norm": 2.295395612716675, + "learning_rate": 1.8858713826087836e-08, + "loss": 0.6725, + "step": 13883 + }, + { + "epoch": 0.97740232312566, + "grad_norm": 1.779305100440979, + "learning_rate": 1.8741813060454746e-08, + "loss": 0.605, + "step": 13884 + }, + { + "epoch": 0.977472720872932, + "grad_norm": 1.9546655416488647, + "learning_rate": 1.8625275256756946e-08, + "loss": 0.5521, + "step": 13885 + }, + { + "epoch": 0.9775431186202042, + "grad_norm": 2.355107545852661, + "learning_rate": 1.8509100421053913e-08, + "loss": 0.6819, + "step": 13886 + }, + { + "epoch": 0.9776135163674763, + "grad_norm": 1.9291077852249146, + "learning_rate": 1.839328855938571e-08, + "loss": 0.611, + "step": 13887 + }, + { + "epoch": 0.9776839141147483, + "grad_norm": 1.707094430923462, + "learning_rate": 1.8277839677773743e-08, + "loss": 0.6856, + "step": 13888 + }, + { + "epoch": 0.9777543118620204, + "grad_norm": 3.1466445922851562, + "learning_rate": 1.816275378221921e-08, + "loss": 0.6719, + "step": 13889 + }, + { + "epoch": 0.9778247096092925, + "grad_norm": 2.2554380893707275, + "learning_rate": 1.8048030878706213e-08, + "loss": 0.6391, + "step": 13890 + }, + { + "epoch": 0.9778951073565646, + "grad_norm": 1.7734118700027466, + "learning_rate": 1.7933670973199422e-08, + "loss": 0.6176, + "step": 13891 + }, + { + "epoch": 0.9779655051038367, + "grad_norm": 2.480674982070923, + "learning_rate": 1.7819674071644864e-08, + "loss": 0.7438, + "step": 13892 + }, + { + "epoch": 0.9780359028511088, + "grad_norm": 2.0716285705566406, + "learning_rate": 1.7706040179968352e-08, + "loss": 0.5728, + "step": 13893 + }, + { + "epoch": 0.9781063005983809, + "grad_norm": 2.003028154373169, + "learning_rate": 1.7592769304078604e-08, + "loss": 0.5889, + "step": 13894 + }, + { + "epoch": 0.9781766983456529, + "grad_norm": 2.124868392944336, + "learning_rate": 1.7479861449864132e-08, + "loss": 0.6142, + "step": 13895 + }, + { + "epoch": 0.978247096092925, + "grad_norm": 2.04422926902771, + "learning_rate": 1.736731662319635e-08, + "loss": 0.5897, + "step": 13896 + }, + { + "epoch": 0.9783174938401971, + "grad_norm": 2.4537200927734375, + "learning_rate": 1.725513482992491e-08, + "loss": 0.6467, + "step": 13897 + }, + { + "epoch": 0.9783878915874692, + "grad_norm": 1.790695071220398, + "learning_rate": 1.7143316075883152e-08, + "loss": 0.6422, + "step": 13898 + }, + { + "epoch": 0.9784582893347413, + "grad_norm": 2.1047520637512207, + "learning_rate": 1.7031860366883422e-08, + "loss": 0.732, + "step": 13899 + }, + { + "epoch": 0.9785286870820133, + "grad_norm": 1.7866829633712769, + "learning_rate": 1.6920767708722527e-08, + "loss": 0.5976, + "step": 13900 + }, + { + "epoch": 0.9785990848292855, + "grad_norm": 1.88518488407135, + "learning_rate": 1.681003810717474e-08, + "loss": 0.5731, + "step": 13901 + }, + { + "epoch": 0.9786694825765575, + "grad_norm": 1.81674325466156, + "learning_rate": 1.669967156799801e-08, + "loss": 0.6891, + "step": 13902 + }, + { + "epoch": 0.9787398803238296, + "grad_norm": 1.8479012250900269, + "learning_rate": 1.65896680969293e-08, + "loss": 0.6021, + "step": 13903 + }, + { + "epoch": 0.9788102780711018, + "grad_norm": 2.177363395690918, + "learning_rate": 1.648002769968848e-08, + "loss": 0.7644, + "step": 13904 + }, + { + "epoch": 0.9788806758183738, + "grad_norm": 2.0527548789978027, + "learning_rate": 1.637075038197522e-08, + "loss": 0.7305, + "step": 13905 + }, + { + "epoch": 0.9789510735656459, + "grad_norm": 2.0223965644836426, + "learning_rate": 1.6261836149472074e-08, + "loss": 0.683, + "step": 13906 + }, + { + "epoch": 0.979021471312918, + "grad_norm": 2.2245566844940186, + "learning_rate": 1.6153285007840634e-08, + "loss": 0.6173, + "step": 13907 + }, + { + "epoch": 0.9790918690601901, + "grad_norm": 2.1556358337402344, + "learning_rate": 1.60450969627246e-08, + "loss": 0.6861, + "step": 13908 + }, + { + "epoch": 0.9791622668074622, + "grad_norm": 1.4890003204345703, + "learning_rate": 1.593727201974904e-08, + "loss": 0.442, + "step": 13909 + }, + { + "epoch": 0.9792326645547342, + "grad_norm": 1.9451042413711548, + "learning_rate": 1.5829810184520343e-08, + "loss": 0.5701, + "step": 13910 + }, + { + "epoch": 0.9793030623020064, + "grad_norm": 2.187666893005371, + "learning_rate": 1.5722711462624718e-08, + "loss": 0.5516, + "step": 13911 + }, + { + "epoch": 0.9793734600492784, + "grad_norm": 1.9932849407196045, + "learning_rate": 1.5615975859630482e-08, + "loss": 0.7308, + "step": 13912 + }, + { + "epoch": 0.9794438577965505, + "grad_norm": 2.0667884349823, + "learning_rate": 1.550960338108731e-08, + "loss": 0.7485, + "step": 13913 + }, + { + "epoch": 0.9795142555438225, + "grad_norm": 2.0107059478759766, + "learning_rate": 1.5403594032525446e-08, + "loss": 0.5765, + "step": 13914 + }, + { + "epoch": 0.9795846532910947, + "grad_norm": 1.7248635292053223, + "learning_rate": 1.529794781945648e-08, + "loss": 0.5836, + "step": 13915 + }, + { + "epoch": 0.9796550510383668, + "grad_norm": 1.7558048963546753, + "learning_rate": 1.5192664747372574e-08, + "loss": 0.567, + "step": 13916 + }, + { + "epoch": 0.9797254487856388, + "grad_norm": 1.8172531127929688, + "learning_rate": 1.5087744821748018e-08, + "loss": 0.6093, + "step": 13917 + }, + { + "epoch": 0.979795846532911, + "grad_norm": 1.8133544921875, + "learning_rate": 1.498318804803689e-08, + "loss": 0.8076, + "step": 13918 + }, + { + "epoch": 0.979866244280183, + "grad_norm": 2.195199489593506, + "learning_rate": 1.487899443167695e-08, + "loss": 0.5849, + "step": 13919 + }, + { + "epoch": 0.9799366420274551, + "grad_norm": 1.9511091709136963, + "learning_rate": 1.4775163978083427e-08, + "loss": 0.5935, + "step": 13920 + }, + { + "epoch": 0.9800070397747273, + "grad_norm": 1.900455355644226, + "learning_rate": 1.4671696692656e-08, + "loss": 0.6581, + "step": 13921 + }, + { + "epoch": 0.9800774375219993, + "grad_norm": 1.9135953187942505, + "learning_rate": 1.4568592580772587e-08, + "loss": 0.6016, + "step": 13922 + }, + { + "epoch": 0.9801478352692714, + "grad_norm": 2.168309450149536, + "learning_rate": 1.4465851647794791e-08, + "loss": 0.5908, + "step": 13923 + }, + { + "epoch": 0.9802182330165434, + "grad_norm": 2.160351037979126, + "learning_rate": 1.4363473899064005e-08, + "loss": 0.6346, + "step": 13924 + }, + { + "epoch": 0.9802886307638156, + "grad_norm": 2.0873334407806396, + "learning_rate": 1.4261459339902194e-08, + "loss": 0.691, + "step": 13925 + }, + { + "epoch": 0.9803590285110877, + "grad_norm": 2.1926686763763428, + "learning_rate": 1.4159807975614225e-08, + "loss": 0.6193, + "step": 13926 + }, + { + "epoch": 0.9804294262583597, + "grad_norm": 2.1842873096466064, + "learning_rate": 1.405851981148476e-08, + "loss": 0.5953, + "step": 13927 + }, + { + "epoch": 0.9804998240056318, + "grad_norm": 2.3529088497161865, + "learning_rate": 1.3957594852779031e-08, + "loss": 0.635, + "step": 13928 + }, + { + "epoch": 0.9805702217529039, + "grad_norm": 2.137972593307495, + "learning_rate": 1.3857033104745175e-08, + "loss": 0.5746, + "step": 13929 + }, + { + "epoch": 0.980640619500176, + "grad_norm": 2.4479687213897705, + "learning_rate": 1.375683457261112e-08, + "loss": 0.5976, + "step": 13930 + }, + { + "epoch": 0.9807110172474481, + "grad_norm": 1.9709559679031372, + "learning_rate": 1.3656999261586921e-08, + "loss": 0.6576, + "step": 13931 + }, + { + "epoch": 0.9807814149947202, + "grad_norm": 2.2264909744262695, + "learning_rate": 1.3557527176862428e-08, + "loss": 0.7995, + "step": 13932 + }, + { + "epoch": 0.9808518127419923, + "grad_norm": 1.7327836751937866, + "learning_rate": 1.3458418323608833e-08, + "loss": 0.6612, + "step": 13933 + }, + { + "epoch": 0.9809222104892643, + "grad_norm": 1.8441612720489502, + "learning_rate": 1.3359672706979464e-08, + "loss": 0.6012, + "step": 13934 + }, + { + "epoch": 0.9809926082365364, + "grad_norm": 2.039013624191284, + "learning_rate": 1.3261290332108988e-08, + "loss": 0.6183, + "step": 13935 + }, + { + "epoch": 0.9810630059838085, + "grad_norm": 2.775846242904663, + "learning_rate": 1.3163271204110316e-08, + "loss": 0.6341, + "step": 13936 + }, + { + "epoch": 0.9811334037310806, + "grad_norm": 2.336066246032715, + "learning_rate": 1.3065615328082369e-08, + "loss": 0.634, + "step": 13937 + }, + { + "epoch": 0.9812038014783527, + "grad_norm": 1.8354887962341309, + "learning_rate": 1.2968322709099977e-08, + "loss": 0.6416, + "step": 13938 + }, + { + "epoch": 0.9812741992256248, + "grad_norm": 1.8197954893112183, + "learning_rate": 1.2871393352222426e-08, + "loss": 0.6178, + "step": 13939 + }, + { + "epoch": 0.9813445969728969, + "grad_norm": 1.792296051979065, + "learning_rate": 1.2774827262488797e-08, + "loss": 0.7439, + "step": 13940 + }, + { + "epoch": 0.9814149947201689, + "grad_norm": 2.19572377204895, + "learning_rate": 1.2678624444920295e-08, + "loss": 0.5924, + "step": 13941 + }, + { + "epoch": 0.981485392467441, + "grad_norm": 2.081674814224243, + "learning_rate": 1.2582784904517918e-08, + "loss": 0.6659, + "step": 13942 + }, + { + "epoch": 0.9815557902147132, + "grad_norm": 1.905203938484192, + "learning_rate": 1.2487308646264794e-08, + "loss": 0.6418, + "step": 13943 + }, + { + "epoch": 0.9816261879619852, + "grad_norm": 2.6321566104888916, + "learning_rate": 1.2392195675124618e-08, + "loss": 0.6995, + "step": 13944 + }, + { + "epoch": 0.9816965857092573, + "grad_norm": 2.2972865104675293, + "learning_rate": 1.2297445996042433e-08, + "loss": 0.5546, + "step": 13945 + }, + { + "epoch": 0.9817669834565294, + "grad_norm": 1.9354954957962036, + "learning_rate": 1.2203059613944633e-08, + "loss": 0.6373, + "step": 13946 + }, + { + "epoch": 0.9818373812038015, + "grad_norm": 2.3683784008026123, + "learning_rate": 1.210903653373896e-08, + "loss": 0.6019, + "step": 13947 + }, + { + "epoch": 0.9819077789510736, + "grad_norm": 1.8320515155792236, + "learning_rate": 1.2015376760312168e-08, + "loss": 0.6912, + "step": 13948 + }, + { + "epoch": 0.9819781766983456, + "grad_norm": 1.9634754657745361, + "learning_rate": 1.1922080298534699e-08, + "loss": 0.6567, + "step": 13949 + }, + { + "epoch": 0.9820485744456178, + "grad_norm": 1.9985042810440063, + "learning_rate": 1.1829147153257557e-08, + "loss": 0.754, + "step": 13950 + }, + { + "epoch": 0.9821189721928898, + "grad_norm": 1.9498015642166138, + "learning_rate": 1.1736577329311547e-08, + "loss": 0.5641, + "step": 13951 + }, + { + "epoch": 0.9821893699401619, + "grad_norm": 1.918049693107605, + "learning_rate": 1.1644370831510376e-08, + "loss": 0.6028, + "step": 13952 + }, + { + "epoch": 0.982259767687434, + "grad_norm": 1.8839645385742188, + "learning_rate": 1.1552527664646761e-08, + "loss": 0.516, + "step": 13953 + }, + { + "epoch": 0.9823301654347061, + "grad_norm": 2.1459548473358154, + "learning_rate": 1.146104783349633e-08, + "loss": 0.5675, + "step": 13954 + }, + { + "epoch": 0.9824005631819782, + "grad_norm": 2.742687702178955, + "learning_rate": 1.1369931342815275e-08, + "loss": 0.6558, + "step": 13955 + }, + { + "epoch": 0.9824709609292502, + "grad_norm": 2.6880836486816406, + "learning_rate": 1.1279178197341144e-08, + "loss": 0.7264, + "step": 13956 + }, + { + "epoch": 0.9825413586765224, + "grad_norm": 2.388540029525757, + "learning_rate": 1.118878840179205e-08, + "loss": 0.7139, + "step": 13957 + }, + { + "epoch": 0.9826117564237944, + "grad_norm": 2.3610472679138184, + "learning_rate": 1.1098761960866677e-08, + "loss": 0.6989, + "step": 13958 + }, + { + "epoch": 0.9826821541710665, + "grad_norm": 1.921624779701233, + "learning_rate": 1.1009098879246615e-08, + "loss": 0.7972, + "step": 13959 + }, + { + "epoch": 0.9827525519183387, + "grad_norm": 1.9396510124206543, + "learning_rate": 1.0919799161592469e-08, + "loss": 0.7281, + "step": 13960 + }, + { + "epoch": 0.9828229496656107, + "grad_norm": 2.013742685317993, + "learning_rate": 1.0830862812548526e-08, + "loss": 0.66, + "step": 13961 + }, + { + "epoch": 0.9828933474128828, + "grad_norm": 1.8520091772079468, + "learning_rate": 1.0742289836737307e-08, + "loss": 0.5934, + "step": 13962 + }, + { + "epoch": 0.9829637451601548, + "grad_norm": 2.0910303592681885, + "learning_rate": 1.0654080238764241e-08, + "loss": 0.7546, + "step": 13963 + }, + { + "epoch": 0.983034142907427, + "grad_norm": 1.8276128768920898, + "learning_rate": 1.0566234023216103e-08, + "loss": 0.68, + "step": 13964 + }, + { + "epoch": 0.9831045406546991, + "grad_norm": 2.4960989952087402, + "learning_rate": 1.0478751194658686e-08, + "loss": 0.6393, + "step": 13965 + }, + { + "epoch": 0.9831749384019711, + "grad_norm": 2.4091808795928955, + "learning_rate": 1.0391631757641462e-08, + "loss": 0.585, + "step": 13966 + }, + { + "epoch": 0.9832453361492433, + "grad_norm": 1.992873191833496, + "learning_rate": 1.0304875716692919e-08, + "loss": 0.6613, + "step": 13967 + }, + { + "epoch": 0.9833157338965153, + "grad_norm": 1.9012705087661743, + "learning_rate": 1.0218483076324447e-08, + "loss": 0.6813, + "step": 13968 + }, + { + "epoch": 0.9833861316437874, + "grad_norm": 1.7931522130966187, + "learning_rate": 1.0132453841028011e-08, + "loss": 0.662, + "step": 13969 + }, + { + "epoch": 0.9834565293910594, + "grad_norm": 1.7924164533615112, + "learning_rate": 1.0046788015274587e-08, + "loss": 0.6523, + "step": 13970 + }, + { + "epoch": 0.9835269271383316, + "grad_norm": 1.8519757986068726, + "learning_rate": 9.961485603519615e-09, + "loss": 0.5935, + "step": 13971 + }, + { + "epoch": 0.9835973248856037, + "grad_norm": 2.0876309871673584, + "learning_rate": 9.876546610198322e-09, + "loss": 0.6158, + "step": 13972 + }, + { + "epoch": 0.9836677226328757, + "grad_norm": 2.145796298980713, + "learning_rate": 9.791971039724955e-09, + "loss": 0.5718, + "step": 13973 + }, + { + "epoch": 0.9837381203801479, + "grad_norm": 2.106675148010254, + "learning_rate": 9.70775889649822e-09, + "loss": 0.7333, + "step": 13974 + }, + { + "epoch": 0.9838085181274199, + "grad_norm": 3.06893253326416, + "learning_rate": 9.623910184895834e-09, + "loss": 0.7402, + "step": 13975 + }, + { + "epoch": 0.983878915874692, + "grad_norm": 1.6798187494277954, + "learning_rate": 9.540424909276868e-09, + "loss": 0.7888, + "step": 13976 + }, + { + "epoch": 0.9839493136219641, + "grad_norm": 2.155367374420166, + "learning_rate": 9.457303073982515e-09, + "loss": 0.6205, + "step": 13977 + }, + { + "epoch": 0.9840197113692362, + "grad_norm": 1.775622844696045, + "learning_rate": 9.374544683333763e-09, + "loss": 0.7154, + "step": 13978 + }, + { + "epoch": 0.9840901091165083, + "grad_norm": 1.9982688426971436, + "learning_rate": 9.292149741633727e-09, + "loss": 0.6728, + "step": 13979 + }, + { + "epoch": 0.9841605068637803, + "grad_norm": 1.8827953338623047, + "learning_rate": 9.210118253166088e-09, + "loss": 0.5596, + "step": 13980 + }, + { + "epoch": 0.9842309046110524, + "grad_norm": 1.9417206048965454, + "learning_rate": 9.128450222195106e-09, + "loss": 0.6872, + "step": 13981 + }, + { + "epoch": 0.9843013023583246, + "grad_norm": 1.97786545753479, + "learning_rate": 9.047145652967936e-09, + "loss": 0.6288, + "step": 13982 + }, + { + "epoch": 0.9843717001055966, + "grad_norm": 1.8112918138504028, + "learning_rate": 8.966204549710754e-09, + "loss": 0.6174, + "step": 13983 + }, + { + "epoch": 0.9844420978528687, + "grad_norm": 1.851441502571106, + "learning_rate": 8.885626916631862e-09, + "loss": 0.6695, + "step": 13984 + }, + { + "epoch": 0.9845124956001408, + "grad_norm": 2.0595505237579346, + "learning_rate": 8.805412757920905e-09, + "loss": 0.5973, + "step": 13985 + }, + { + "epoch": 0.9845828933474129, + "grad_norm": 1.6627434492111206, + "learning_rate": 8.725562077748106e-09, + "loss": 0.5953, + "step": 13986 + }, + { + "epoch": 0.984653291094685, + "grad_norm": 1.946688175201416, + "learning_rate": 8.646074880265031e-09, + "loss": 0.6711, + "step": 13987 + }, + { + "epoch": 0.984723688841957, + "grad_norm": 1.7648195028305054, + "learning_rate": 8.566951169604597e-09, + "loss": 0.6777, + "step": 13988 + }, + { + "epoch": 0.9847940865892292, + "grad_norm": 1.8309361934661865, + "learning_rate": 8.488190949879515e-09, + "loss": 0.6544, + "step": 13989 + }, + { + "epoch": 0.9848644843365012, + "grad_norm": 2.323896884918213, + "learning_rate": 8.409794225186173e-09, + "loss": 0.7401, + "step": 13990 + }, + { + "epoch": 0.9849348820837733, + "grad_norm": 1.9718310832977295, + "learning_rate": 8.331760999598425e-09, + "loss": 0.6844, + "step": 13991 + }, + { + "epoch": 0.9850052798310454, + "grad_norm": 2.058990955352783, + "learning_rate": 8.254091277175357e-09, + "loss": 0.5444, + "step": 13992 + }, + { + "epoch": 0.9850756775783175, + "grad_norm": 1.3400294780731201, + "learning_rate": 8.176785061953517e-09, + "loss": 0.7251, + "step": 13993 + }, + { + "epoch": 0.9851460753255896, + "grad_norm": 2.042553663253784, + "learning_rate": 8.099842357953912e-09, + "loss": 0.5357, + "step": 13994 + }, + { + "epoch": 0.9852164730728616, + "grad_norm": 2.0574820041656494, + "learning_rate": 8.023263169174232e-09, + "loss": 0.5628, + "step": 13995 + }, + { + "epoch": 0.9852868708201338, + "grad_norm": 1.9616235494613647, + "learning_rate": 7.947047499598958e-09, + "loss": 0.6786, + "step": 13996 + }, + { + "epoch": 0.9853572685674058, + "grad_norm": 1.701490879058838, + "learning_rate": 7.8711953531877e-09, + "loss": 0.5749, + "step": 13997 + }, + { + "epoch": 0.9854276663146779, + "grad_norm": 2.7846617698669434, + "learning_rate": 7.795706733886076e-09, + "loss": 0.6449, + "step": 13998 + }, + { + "epoch": 0.9854980640619501, + "grad_norm": 2.211088180541992, + "learning_rate": 7.720581645618729e-09, + "loss": 0.5475, + "step": 13999 + }, + { + "epoch": 0.9855684618092221, + "grad_norm": 2.5922281742095947, + "learning_rate": 7.64582009229009e-09, + "loss": 0.7724, + "step": 14000 + }, + { + "epoch": 0.9856388595564942, + "grad_norm": 2.1484062671661377, + "learning_rate": 7.57142207778827e-09, + "loss": 0.6083, + "step": 14001 + }, + { + "epoch": 0.9857092573037662, + "grad_norm": 1.7994557619094849, + "learning_rate": 7.497387605980399e-09, + "loss": 0.5999, + "step": 14002 + }, + { + "epoch": 0.9857796550510384, + "grad_norm": 1.8114882707595825, + "learning_rate": 7.423716680716507e-09, + "loss": 0.6932, + "step": 14003 + }, + { + "epoch": 0.9858500527983105, + "grad_norm": 1.9373490810394287, + "learning_rate": 7.3504093058264214e-09, + "loss": 0.6656, + "step": 14004 + }, + { + "epoch": 0.9859204505455825, + "grad_norm": 1.9877787828445435, + "learning_rate": 7.277465485122092e-09, + "loss": 0.5325, + "step": 14005 + }, + { + "epoch": 0.9859908482928547, + "grad_norm": 2.1145706176757812, + "learning_rate": 7.204885222394486e-09, + "loss": 0.6834, + "step": 14006 + }, + { + "epoch": 0.9860612460401267, + "grad_norm": 2.020113945007324, + "learning_rate": 7.132668521418251e-09, + "loss": 0.703, + "step": 14007 + }, + { + "epoch": 0.9861316437873988, + "grad_norm": 2.0172245502471924, + "learning_rate": 7.06081538594705e-09, + "loss": 0.6896, + "step": 14008 + }, + { + "epoch": 0.9862020415346708, + "grad_norm": 2.1435389518737793, + "learning_rate": 6.989325819718228e-09, + "loss": 0.6335, + "step": 14009 + }, + { + "epoch": 0.986272439281943, + "grad_norm": 4.499448776245117, + "learning_rate": 6.918199826447368e-09, + "loss": 0.6926, + "step": 14010 + }, + { + "epoch": 0.9863428370292151, + "grad_norm": 2.1330573558807373, + "learning_rate": 6.8474374098321795e-09, + "loss": 0.5628, + "step": 14011 + }, + { + "epoch": 0.9864132347764871, + "grad_norm": 1.944887638092041, + "learning_rate": 6.777038573552496e-09, + "loss": 0.708, + "step": 14012 + }, + { + "epoch": 0.9864836325237593, + "grad_norm": 2.613743782043457, + "learning_rate": 6.7070033212679455e-09, + "loss": 0.6284, + "step": 14013 + }, + { + "epoch": 0.9865540302710313, + "grad_norm": 1.9081488847732544, + "learning_rate": 6.637331656619505e-09, + "loss": 0.7146, + "step": 14014 + }, + { + "epoch": 0.9866244280183034, + "grad_norm": 1.8610341548919678, + "learning_rate": 6.568023583230276e-09, + "loss": 0.5814, + "step": 14015 + }, + { + "epoch": 0.9866948257655755, + "grad_norm": 2.2371318340301514, + "learning_rate": 6.499079104702377e-09, + "loss": 0.6816, + "step": 14016 + }, + { + "epoch": 0.9867652235128476, + "grad_norm": 1.9451833963394165, + "learning_rate": 6.430498224621605e-09, + "loss": 0.6729, + "step": 14017 + }, + { + "epoch": 0.9868356212601197, + "grad_norm": 1.9928584098815918, + "learning_rate": 6.362280946552778e-09, + "loss": 0.6279, + "step": 14018 + }, + { + "epoch": 0.9869060190073917, + "grad_norm": 1.9561370611190796, + "learning_rate": 6.2944272740428355e-09, + "loss": 0.5764, + "step": 14019 + }, + { + "epoch": 0.9869764167546639, + "grad_norm": 1.652284860610962, + "learning_rate": 6.226937210620065e-09, + "loss": 0.7394, + "step": 14020 + }, + { + "epoch": 0.987046814501936, + "grad_norm": 2.2796897888183594, + "learning_rate": 6.159810759791773e-09, + "loss": 0.5876, + "step": 14021 + }, + { + "epoch": 0.987117212249208, + "grad_norm": 1.7181034088134766, + "learning_rate": 6.093047925049721e-09, + "loss": 0.5474, + "step": 14022 + }, + { + "epoch": 0.9871876099964801, + "grad_norm": 2.130495309829712, + "learning_rate": 6.026648709863913e-09, + "loss": 0.5941, + "step": 14023 + }, + { + "epoch": 0.9872580077437522, + "grad_norm": 2.361323118209839, + "learning_rate": 5.960613117686475e-09, + "loss": 0.7427, + "step": 14024 + }, + { + "epoch": 0.9873284054910243, + "grad_norm": 1.876387357711792, + "learning_rate": 5.8949411519508834e-09, + "loss": 0.6727, + "step": 14025 + }, + { + "epoch": 0.9873988032382964, + "grad_norm": 2.15578556060791, + "learning_rate": 5.829632816071961e-09, + "loss": 0.6189, + "step": 14026 + }, + { + "epoch": 0.9874692009855685, + "grad_norm": 2.12323260307312, + "learning_rate": 5.7646881134443275e-09, + "loss": 0.6963, + "step": 14027 + }, + { + "epoch": 0.9875395987328406, + "grad_norm": 1.783199429512024, + "learning_rate": 5.700107047445502e-09, + "loss": 0.685, + "step": 14028 + }, + { + "epoch": 0.9876099964801126, + "grad_norm": 2.0257086753845215, + "learning_rate": 5.635889621432022e-09, + "loss": 0.6085, + "step": 14029 + }, + { + "epoch": 0.9876803942273847, + "grad_norm": 1.8655483722686768, + "learning_rate": 5.5720358387433276e-09, + "loss": 0.5587, + "step": 14030 + }, + { + "epoch": 0.9877507919746568, + "grad_norm": 1.9546794891357422, + "learning_rate": 5.508545702698653e-09, + "loss": 0.6426, + "step": 14031 + }, + { + "epoch": 0.9878211897219289, + "grad_norm": 2.264629602432251, + "learning_rate": 5.4454192165993565e-09, + "loss": 0.6619, + "step": 14032 + }, + { + "epoch": 0.987891587469201, + "grad_norm": 1.9713423252105713, + "learning_rate": 5.382656383727369e-09, + "loss": 0.5958, + "step": 14033 + }, + { + "epoch": 0.987961985216473, + "grad_norm": 2.0198304653167725, + "learning_rate": 5.3202572073459684e-09, + "loss": 0.7124, + "step": 14034 + }, + { + "epoch": 0.9880323829637452, + "grad_norm": 2.1491012573242188, + "learning_rate": 5.258221690699783e-09, + "loss": 0.6389, + "step": 14035 + }, + { + "epoch": 0.9881027807110172, + "grad_norm": 2.0994949340820312, + "learning_rate": 5.196549837012454e-09, + "loss": 0.6081, + "step": 14036 + }, + { + "epoch": 0.9881731784582893, + "grad_norm": 1.6076310873031616, + "learning_rate": 5.135241649492084e-09, + "loss": 0.6784, + "step": 14037 + }, + { + "epoch": 0.9882435762055615, + "grad_norm": 2.304334878921509, + "learning_rate": 5.074297131325011e-09, + "loss": 0.7208, + "step": 14038 + }, + { + "epoch": 0.9883139739528335, + "grad_norm": 1.677261233329773, + "learning_rate": 5.013716285679704e-09, + "loss": 0.5862, + "step": 14039 + }, + { + "epoch": 0.9883843717001056, + "grad_norm": 1.9720070362091064, + "learning_rate": 4.9534991157067495e-09, + "loss": 0.6894, + "step": 14040 + }, + { + "epoch": 0.9884547694473776, + "grad_norm": 1.5806623697280884, + "learning_rate": 4.893645624537313e-09, + "loss": 0.5267, + "step": 14041 + }, + { + "epoch": 0.9885251671946498, + "grad_norm": 2.0200562477111816, + "learning_rate": 4.834155815281571e-09, + "loss": 0.6403, + "step": 14042 + }, + { + "epoch": 0.9885955649419219, + "grad_norm": 2.1230628490448, + "learning_rate": 4.775029691033383e-09, + "loss": 0.6975, + "step": 14043 + }, + { + "epoch": 0.9886659626891939, + "grad_norm": 1.834315299987793, + "learning_rate": 4.716267254866402e-09, + "loss": 0.5647, + "step": 14044 + }, + { + "epoch": 0.9887363604364661, + "grad_norm": 1.9170018434524536, + "learning_rate": 4.657868509836405e-09, + "loss": 0.5133, + "step": 14045 + }, + { + "epoch": 0.9888067581837381, + "grad_norm": 1.9031949043273926, + "learning_rate": 4.599833458978187e-09, + "loss": 0.5852, + "step": 14046 + }, + { + "epoch": 0.9888771559310102, + "grad_norm": 4.969810962677002, + "learning_rate": 4.542162105310998e-09, + "loss": 0.7106, + "step": 14047 + }, + { + "epoch": 0.9889475536782822, + "grad_norm": 1.840739130973816, + "learning_rate": 4.484854451832332e-09, + "loss": 0.6549, + "step": 14048 + }, + { + "epoch": 0.9890179514255544, + "grad_norm": 1.9708251953125, + "learning_rate": 4.4279105015218035e-09, + "loss": 0.648, + "step": 14049 + }, + { + "epoch": 0.9890883491728265, + "grad_norm": 2.1382148265838623, + "learning_rate": 4.3713302573388234e-09, + "loss": 0.6916, + "step": 14050 + }, + { + "epoch": 0.9891587469200985, + "grad_norm": 1.7827239036560059, + "learning_rate": 4.315113722226482e-09, + "loss": 0.6595, + "step": 14051 + }, + { + "epoch": 0.9892291446673707, + "grad_norm": 2.6011300086975098, + "learning_rate": 4.259260899106887e-09, + "loss": 0.706, + "step": 14052 + }, + { + "epoch": 0.9892995424146427, + "grad_norm": 1.8428279161453247, + "learning_rate": 4.2037717908842695e-09, + "loss": 0.5553, + "step": 14053 + }, + { + "epoch": 0.9893699401619148, + "grad_norm": 1.9215117692947388, + "learning_rate": 4.148646400443434e-09, + "loss": 0.6553, + "step": 14054 + }, + { + "epoch": 0.989440337909187, + "grad_norm": 2.105013847351074, + "learning_rate": 4.093884730649755e-09, + "loss": 0.6031, + "step": 14055 + }, + { + "epoch": 0.989510735656459, + "grad_norm": 1.760157823562622, + "learning_rate": 4.039486784351509e-09, + "loss": 0.6701, + "step": 14056 + }, + { + "epoch": 0.9895811334037311, + "grad_norm": 1.9528449773788452, + "learning_rate": 3.9854525643759905e-09, + "loss": 0.6564, + "step": 14057 + }, + { + "epoch": 0.9896515311510031, + "grad_norm": 2.08646821975708, + "learning_rate": 3.931782073532619e-09, + "loss": 0.649, + "step": 14058 + }, + { + "epoch": 0.9897219288982753, + "grad_norm": 2.053027391433716, + "learning_rate": 3.878475314612162e-09, + "loss": 0.4688, + "step": 14059 + }, + { + "epoch": 0.9897923266455474, + "grad_norm": 1.674824833869934, + "learning_rate": 3.825532290385958e-09, + "loss": 0.6578, + "step": 14060 + }, + { + "epoch": 0.9898627243928194, + "grad_norm": 2.2559123039245605, + "learning_rate": 3.772953003605917e-09, + "loss": 0.6346, + "step": 14061 + }, + { + "epoch": 0.9899331221400915, + "grad_norm": 1.688109040260315, + "learning_rate": 3.7207374570068506e-09, + "loss": 0.5361, + "step": 14062 + }, + { + "epoch": 0.9900035198873636, + "grad_norm": 2.0562539100646973, + "learning_rate": 3.6688856533025893e-09, + "loss": 0.7315, + "step": 14063 + }, + { + "epoch": 0.9900739176346357, + "grad_norm": 1.9297562837600708, + "learning_rate": 3.617397595189087e-09, + "loss": 0.7031, + "step": 14064 + }, + { + "epoch": 0.9901443153819077, + "grad_norm": 2.104715347290039, + "learning_rate": 3.5662732853428692e-09, + "loss": 0.6194, + "step": 14065 + }, + { + "epoch": 0.9902147131291799, + "grad_norm": 2.136404275894165, + "learning_rate": 3.5155127264233645e-09, + "loss": 0.6665, + "step": 14066 + }, + { + "epoch": 0.990285110876452, + "grad_norm": 1.848243236541748, + "learning_rate": 3.4651159210682402e-09, + "loss": 0.5983, + "step": 14067 + }, + { + "epoch": 0.990355508623724, + "grad_norm": 2.237325429916382, + "learning_rate": 3.4150828718980676e-09, + "loss": 0.6312, + "step": 14068 + }, + { + "epoch": 0.9904259063709961, + "grad_norm": 1.7631250619888306, + "learning_rate": 3.3654135815147647e-09, + "loss": 0.7385, + "step": 14069 + }, + { + "epoch": 0.9904963041182682, + "grad_norm": 2.184685468673706, + "learning_rate": 3.316108052500044e-09, + "loss": 0.6602, + "step": 14070 + }, + { + "epoch": 0.9905667018655403, + "grad_norm": 1.9652299880981445, + "learning_rate": 3.267166287416967e-09, + "loss": 0.6241, + "step": 14071 + }, + { + "epoch": 0.9906370996128124, + "grad_norm": 2.068751811981201, + "learning_rate": 3.2185882888107196e-09, + "loss": 0.6031, + "step": 14072 + }, + { + "epoch": 0.9907074973600845, + "grad_norm": 1.6743046045303345, + "learning_rate": 3.1703740592070593e-09, + "loss": 0.5726, + "step": 14073 + }, + { + "epoch": 0.9907778951073566, + "grad_norm": 2.113968849182129, + "learning_rate": 3.1225236011115376e-09, + "loss": 0.6491, + "step": 14074 + }, + { + "epoch": 0.9908482928546286, + "grad_norm": 1.942960262298584, + "learning_rate": 3.075036917013385e-09, + "loss": 0.6085, + "step": 14075 + }, + { + "epoch": 0.9909186906019007, + "grad_norm": 2.0166244506835938, + "learning_rate": 3.02791400938085e-09, + "loss": 0.6402, + "step": 14076 + }, + { + "epoch": 0.9909890883491729, + "grad_norm": 2.287775754928589, + "learning_rate": 2.981154880664305e-09, + "loss": 0.6096, + "step": 14077 + }, + { + "epoch": 0.9910594860964449, + "grad_norm": 2.3151180744171143, + "learning_rate": 2.934759533293918e-09, + "loss": 0.6723, + "step": 14078 + }, + { + "epoch": 0.991129883843717, + "grad_norm": 3.1817901134490967, + "learning_rate": 2.888727969681981e-09, + "loss": 0.5293, + "step": 14079 + }, + { + "epoch": 0.991200281590989, + "grad_norm": 1.7909828424453735, + "learning_rate": 2.8430601922221352e-09, + "loss": 0.5784, + "step": 14080 + }, + { + "epoch": 0.9912706793382612, + "grad_norm": 2.031649112701416, + "learning_rate": 2.7977562032885927e-09, + "loss": 0.5314, + "step": 14081 + }, + { + "epoch": 0.9913410770855333, + "grad_norm": 2.5910756587982178, + "learning_rate": 2.752816005236913e-09, + "loss": 0.6443, + "step": 14082 + }, + { + "epoch": 0.9914114748328053, + "grad_norm": 1.8849961757659912, + "learning_rate": 2.7082396004040054e-09, + "loss": 0.6841, + "step": 14083 + }, + { + "epoch": 0.9914818725800775, + "grad_norm": 1.9312026500701904, + "learning_rate": 2.6640269911057945e-09, + "loss": 0.6617, + "step": 14084 + }, + { + "epoch": 0.9915522703273495, + "grad_norm": 1.8406873941421509, + "learning_rate": 2.6201781796426626e-09, + "loss": 0.5606, + "step": 14085 + }, + { + "epoch": 0.9916226680746216, + "grad_norm": 1.8848075866699219, + "learning_rate": 2.5766931682932315e-09, + "loss": 0.6702, + "step": 14086 + }, + { + "epoch": 0.9916930658218936, + "grad_norm": 1.9048500061035156, + "learning_rate": 2.5335719593190252e-09, + "loss": 0.5284, + "step": 14087 + }, + { + "epoch": 0.9917634635691658, + "grad_norm": 2.1925032138824463, + "learning_rate": 2.4908145549621397e-09, + "loss": 0.5853, + "step": 14088 + }, + { + "epoch": 0.9918338613164379, + "grad_norm": 2.0724008083343506, + "learning_rate": 2.448420957444464e-09, + "loss": 0.6931, + "step": 14089 + }, + { + "epoch": 0.9919042590637099, + "grad_norm": 1.7706056833267212, + "learning_rate": 2.4063911689707897e-09, + "loss": 0.5902, + "step": 14090 + }, + { + "epoch": 0.9919746568109821, + "grad_norm": 1.7925007343292236, + "learning_rate": 2.36472519172648e-09, + "loss": 0.5885, + "step": 14091 + }, + { + "epoch": 0.9920450545582541, + "grad_norm": 2.3087406158447266, + "learning_rate": 2.3234230278774693e-09, + "loss": 0.6964, + "step": 14092 + }, + { + "epoch": 0.9921154523055262, + "grad_norm": 1.9109275341033936, + "learning_rate": 2.2824846795710398e-09, + "loss": 0.6631, + "step": 14093 + }, + { + "epoch": 0.9921858500527984, + "grad_norm": 1.6266261339187622, + "learning_rate": 2.2419101489358216e-09, + "loss": 0.716, + "step": 14094 + }, + { + "epoch": 0.9922562478000704, + "grad_norm": 2.133861780166626, + "learning_rate": 2.2016994380810174e-09, + "loss": 0.6442, + "step": 14095 + }, + { + "epoch": 0.9923266455473425, + "grad_norm": 1.495941162109375, + "learning_rate": 2.1618525490979533e-09, + "loss": 0.7063, + "step": 14096 + }, + { + "epoch": 0.9923970432946145, + "grad_norm": 1.9309149980545044, + "learning_rate": 2.122369484057751e-09, + "loss": 0.5599, + "step": 14097 + }, + { + "epoch": 0.9924674410418867, + "grad_norm": 1.9421753883361816, + "learning_rate": 2.0832502450128796e-09, + "loss": 0.6043, + "step": 14098 + }, + { + "epoch": 0.9925378387891588, + "grad_norm": 2.603945732116699, + "learning_rate": 2.044494833997157e-09, + "loss": 0.6254, + "step": 14099 + }, + { + "epoch": 0.9926082365364308, + "grad_norm": 1.834574818611145, + "learning_rate": 2.0061032530265254e-09, + "loss": 0.5427, + "step": 14100 + }, + { + "epoch": 0.992678634283703, + "grad_norm": 2.470163106918335, + "learning_rate": 1.968075504095945e-09, + "loss": 0.6672, + "step": 14101 + }, + { + "epoch": 0.992749032030975, + "grad_norm": 2.352792978286743, + "learning_rate": 1.9304115891832783e-09, + "loss": 0.5693, + "step": 14102 + }, + { + "epoch": 0.9928194297782471, + "grad_norm": 2.573340892791748, + "learning_rate": 1.8931115102461816e-09, + "loss": 0.5922, + "step": 14103 + }, + { + "epoch": 0.9928898275255191, + "grad_norm": 1.9745357036590576, + "learning_rate": 1.856175269224436e-09, + "loss": 0.6376, + "step": 14104 + }, + { + "epoch": 0.9929602252727913, + "grad_norm": 2.1511168479919434, + "learning_rate": 1.8196028680376176e-09, + "loss": 0.5702, + "step": 14105 + }, + { + "epoch": 0.9930306230200634, + "grad_norm": 1.8787190914154053, + "learning_rate": 1.7833943085874271e-09, + "loss": 0.6719, + "step": 14106 + }, + { + "epoch": 0.9931010207673354, + "grad_norm": 2.222062826156616, + "learning_rate": 1.747549592756914e-09, + "loss": 0.6474, + "step": 14107 + }, + { + "epoch": 0.9931714185146076, + "grad_norm": 1.794286847114563, + "learning_rate": 1.7120687224089215e-09, + "loss": 0.5515, + "step": 14108 + }, + { + "epoch": 0.9932418162618796, + "grad_norm": 1.915016770362854, + "learning_rate": 1.676951699388418e-09, + "loss": 0.636, + "step": 14109 + }, + { + "epoch": 0.9933122140091517, + "grad_norm": 1.786875605583191, + "learning_rate": 1.642198525521721e-09, + "loss": 0.5618, + "step": 14110 + }, + { + "epoch": 0.9933826117564238, + "grad_norm": 2.170663356781006, + "learning_rate": 1.6078092026149404e-09, + "loss": 0.6753, + "step": 14111 + }, + { + "epoch": 0.9934530095036959, + "grad_norm": 1.7235007286071777, + "learning_rate": 1.573783732456313e-09, + "loss": 0.6654, + "step": 14112 + }, + { + "epoch": 0.993523407250968, + "grad_norm": 1.6433396339416504, + "learning_rate": 1.5401221168146461e-09, + "loss": 0.6042, + "step": 14113 + }, + { + "epoch": 0.99359380499824, + "grad_norm": 1.9440629482269287, + "learning_rate": 1.5068243574408723e-09, + "loss": 0.6908, + "step": 14114 + }, + { + "epoch": 0.9936642027455121, + "grad_norm": 2.149019241333008, + "learning_rate": 1.4738904560649413e-09, + "loss": 0.7371, + "step": 14115 + }, + { + "epoch": 0.9937346004927843, + "grad_norm": 2.410818338394165, + "learning_rate": 1.441320414399705e-09, + "loss": 0.6622, + "step": 14116 + }, + { + "epoch": 0.9938049982400563, + "grad_norm": 1.860845685005188, + "learning_rate": 1.4091142341385866e-09, + "loss": 0.633, + "step": 14117 + }, + { + "epoch": 0.9938753959873284, + "grad_norm": 2.246426582336426, + "learning_rate": 1.3772719169555802e-09, + "loss": 0.6925, + "step": 14118 + }, + { + "epoch": 0.9939457937346005, + "grad_norm": 1.945698857307434, + "learning_rate": 1.345793464506806e-09, + "loss": 0.6595, + "step": 14119 + }, + { + "epoch": 0.9940161914818726, + "grad_norm": 1.9650613069534302, + "learning_rate": 1.3146788784289543e-09, + "loss": 0.6548, + "step": 14120 + }, + { + "epoch": 0.9940865892291446, + "grad_norm": 1.8250097036361694, + "learning_rate": 1.28392816033851e-09, + "loss": 0.6318, + "step": 14121 + }, + { + "epoch": 0.9941569869764167, + "grad_norm": 1.8473412990570068, + "learning_rate": 1.2535413118356375e-09, + "loss": 0.565, + "step": 14122 + }, + { + "epoch": 0.9942273847236889, + "grad_norm": 1.6056463718414307, + "learning_rate": 1.2235183344995182e-09, + "loss": 0.6967, + "step": 14123 + }, + { + "epoch": 0.9942977824709609, + "grad_norm": 2.119598865509033, + "learning_rate": 1.1938592298914585e-09, + "loss": 0.6024, + "step": 14124 + }, + { + "epoch": 0.994368180218233, + "grad_norm": 2.2559263706207275, + "learning_rate": 1.1645639995525592e-09, + "loss": 0.6745, + "step": 14125 + }, + { + "epoch": 0.994438577965505, + "grad_norm": 2.1920175552368164, + "learning_rate": 1.1356326450068232e-09, + "loss": 0.7412, + "step": 14126 + }, + { + "epoch": 0.9945089757127772, + "grad_norm": 2.134639263153076, + "learning_rate": 1.107065167758048e-09, + "loss": 0.6635, + "step": 14127 + }, + { + "epoch": 0.9945793734600493, + "grad_norm": 1.9435850381851196, + "learning_rate": 1.0788615692913783e-09, + "loss": 0.6596, + "step": 14128 + }, + { + "epoch": 0.9946497712073213, + "grad_norm": 1.8507931232452393, + "learning_rate": 1.0510218510733083e-09, + "loss": 0.5353, + "step": 14129 + }, + { + "epoch": 0.9947201689545935, + "grad_norm": 2.150902032852173, + "learning_rate": 1.0235460145516795e-09, + "loss": 0.6313, + "step": 14130 + }, + { + "epoch": 0.9947905667018655, + "grad_norm": 1.600439190864563, + "learning_rate": 9.964340611541278e-10, + "loss": 0.5963, + "step": 14131 + }, + { + "epoch": 0.9948609644491376, + "grad_norm": 2.304680347442627, + "learning_rate": 9.696859922904144e-10, + "loss": 0.642, + "step": 14132 + }, + { + "epoch": 0.9949313621964098, + "grad_norm": 2.050065279006958, + "learning_rate": 9.433018093516488e-10, + "loss": 0.681, + "step": 14133 + }, + { + "epoch": 0.9950017599436818, + "grad_norm": 2.0588579177856445, + "learning_rate": 9.172815137095114e-10, + "loss": 0.6598, + "step": 14134 + }, + { + "epoch": 0.9950721576909539, + "grad_norm": 2.595705986022949, + "learning_rate": 8.916251067162539e-10, + "loss": 0.6005, + "step": 14135 + }, + { + "epoch": 0.9951425554382259, + "grad_norm": 2.1721200942993164, + "learning_rate": 8.663325897070306e-10, + "loss": 0.6764, + "step": 14136 + }, + { + "epoch": 0.9952129531854981, + "grad_norm": 1.7241231203079224, + "learning_rate": 8.414039639952353e-10, + "loss": 0.6472, + "step": 14137 + }, + { + "epoch": 0.9952833509327702, + "grad_norm": 2.1990201473236084, + "learning_rate": 8.168392308779415e-10, + "loss": 0.6975, + "step": 14138 + }, + { + "epoch": 0.9953537486800422, + "grad_norm": 2.0451316833496094, + "learning_rate": 7.926383916320167e-10, + "loss": 0.5878, + "step": 14139 + }, + { + "epoch": 0.9954241464273144, + "grad_norm": 2.0774171352386475, + "learning_rate": 7.688014475148997e-10, + "loss": 0.6085, + "step": 14140 + }, + { + "epoch": 0.9954945441745864, + "grad_norm": 1.7620710134506226, + "learning_rate": 7.453283997677085e-10, + "loss": 0.5158, + "step": 14141 + }, + { + "epoch": 0.9955649419218585, + "grad_norm": 1.9479080438613892, + "learning_rate": 7.222192496090241e-10, + "loss": 0.6348, + "step": 14142 + }, + { + "epoch": 0.9956353396691305, + "grad_norm": 2.004749298095703, + "learning_rate": 6.994739982418841e-10, + "loss": 0.6239, + "step": 14143 + }, + { + "epoch": 0.9957057374164027, + "grad_norm": 2.126868963241577, + "learning_rate": 6.770926468475658e-10, + "loss": 0.66, + "step": 14144 + }, + { + "epoch": 0.9957761351636748, + "grad_norm": 1.7352176904678345, + "learning_rate": 6.55075196590249e-10, + "loss": 0.6451, + "step": 14145 + }, + { + "epoch": 0.9958465329109468, + "grad_norm": 1.9501675367355347, + "learning_rate": 6.334216486146848e-10, + "loss": 0.5504, + "step": 14146 + }, + { + "epoch": 0.995916930658219, + "grad_norm": 2.2541067600250244, + "learning_rate": 6.121320040461953e-10, + "loss": 0.7119, + "step": 14147 + }, + { + "epoch": 0.995987328405491, + "grad_norm": 2.080595016479492, + "learning_rate": 5.912062639922277e-10, + "loss": 0.6392, + "step": 14148 + }, + { + "epoch": 0.9960577261527631, + "grad_norm": 1.8793208599090576, + "learning_rate": 5.706444295400237e-10, + "loss": 0.6152, + "step": 14149 + }, + { + "epoch": 0.9961281239000352, + "grad_norm": 1.77863609790802, + "learning_rate": 5.504465017597271e-10, + "loss": 0.553, + "step": 14150 + }, + { + "epoch": 0.9961985216473073, + "grad_norm": 1.911460280418396, + "learning_rate": 5.306124817004986e-10, + "loss": 0.726, + "step": 14151 + }, + { + "epoch": 0.9962689193945794, + "grad_norm": 1.8550230264663696, + "learning_rate": 5.111423703936246e-10, + "loss": 0.6247, + "step": 14152 + }, + { + "epoch": 0.9963393171418514, + "grad_norm": 1.716148853302002, + "learning_rate": 4.920361688517394e-10, + "loss": 0.6444, + "step": 14153 + }, + { + "epoch": 0.9964097148891236, + "grad_norm": 1.9871270656585693, + "learning_rate": 4.732938780680485e-10, + "loss": 0.6804, + "step": 14154 + }, + { + "epoch": 0.9964801126363957, + "grad_norm": 2.0309035778045654, + "learning_rate": 4.549154990171056e-10, + "loss": 0.7381, + "step": 14155 + }, + { + "epoch": 0.9965505103836677, + "grad_norm": 1.902988076210022, + "learning_rate": 4.3690103265403567e-10, + "loss": 0.6604, + "step": 14156 + }, + { + "epoch": 0.9966209081309398, + "grad_norm": 2.0539450645446777, + "learning_rate": 4.19250479916089e-10, + "loss": 0.6483, + "step": 14157 + }, + { + "epoch": 0.9966913058782119, + "grad_norm": 1.8673015832901, + "learning_rate": 4.019638417203097e-10, + "loss": 0.6771, + "step": 14158 + }, + { + "epoch": 0.996761703625484, + "grad_norm": 2.5850186347961426, + "learning_rate": 3.850411189650904e-10, + "loss": 0.6324, + "step": 14159 + }, + { + "epoch": 0.996832101372756, + "grad_norm": 1.9290416240692139, + "learning_rate": 3.68482312531726e-10, + "loss": 0.5, + "step": 14160 + }, + { + "epoch": 0.9969024991200282, + "grad_norm": 2.1429805755615234, + "learning_rate": 3.5228742327975124e-10, + "loss": 0.7525, + "step": 14161 + }, + { + "epoch": 0.9969728968673003, + "grad_norm": 2.008741617202759, + "learning_rate": 3.3645645205160334e-10, + "loss": 0.6475, + "step": 14162 + }, + { + "epoch": 0.9970432946145723, + "grad_norm": 1.93205726146698, + "learning_rate": 3.2098939967029063e-10, + "loss": 0.7307, + "step": 14163 + }, + { + "epoch": 0.9971136923618444, + "grad_norm": 2.1158223152160645, + "learning_rate": 3.058862669393925e-10, + "loss": 0.6162, + "step": 14164 + }, + { + "epoch": 0.9971840901091165, + "grad_norm": 1.6570450067520142, + "learning_rate": 2.911470546461681e-10, + "loss": 0.6142, + "step": 14165 + }, + { + "epoch": 0.9972544878563886, + "grad_norm": 1.985223650932312, + "learning_rate": 2.7677176355456187e-10, + "loss": 0.6387, + "step": 14166 + }, + { + "epoch": 0.9973248856036607, + "grad_norm": 2.378422975540161, + "learning_rate": 2.627603944129753e-10, + "loss": 0.5987, + "step": 14167 + }, + { + "epoch": 0.9973952833509327, + "grad_norm": 1.5257035493850708, + "learning_rate": 2.4911294794960345e-10, + "loss": 0.7169, + "step": 14168 + }, + { + "epoch": 0.9974656810982049, + "grad_norm": 1.816434383392334, + "learning_rate": 2.358294248747672e-10, + "loss": 0.5912, + "step": 14169 + }, + { + "epoch": 0.9975360788454769, + "grad_norm": 1.9785574674606323, + "learning_rate": 2.229098258785811e-10, + "loss": 0.6445, + "step": 14170 + }, + { + "epoch": 0.997606476592749, + "grad_norm": 1.8715665340423584, + "learning_rate": 2.1035415163250803e-10, + "loss": 0.6614, + "step": 14171 + }, + { + "epoch": 0.9976768743400212, + "grad_norm": 1.8300496339797974, + "learning_rate": 1.9816240278935914e-10, + "loss": 0.6502, + "step": 14172 + }, + { + "epoch": 0.9977472720872932, + "grad_norm": 1.8629519939422607, + "learning_rate": 1.8633457998329385e-10, + "loss": 0.7724, + "step": 14173 + }, + { + "epoch": 0.9978176698345653, + "grad_norm": 1.5848112106323242, + "learning_rate": 1.7487068382904257e-10, + "loss": 0.6106, + "step": 14174 + }, + { + "epoch": 0.9978880675818373, + "grad_norm": 2.324415683746338, + "learning_rate": 1.6377071492268413e-10, + "loss": 0.6038, + "step": 14175 + }, + { + "epoch": 0.9979584653291095, + "grad_norm": 2.0654568672180176, + "learning_rate": 1.5303467384086832e-10, + "loss": 0.6651, + "step": 14176 + }, + { + "epoch": 0.9980288630763815, + "grad_norm": 1.7763820886611938, + "learning_rate": 1.4266256114237042e-10, + "loss": 0.6066, + "step": 14177 + }, + { + "epoch": 0.9980992608236536, + "grad_norm": 5.992873191833496, + "learning_rate": 1.3265437736653672e-10, + "loss": 0.6407, + "step": 14178 + }, + { + "epoch": 0.9981696585709258, + "grad_norm": 2.625185251235962, + "learning_rate": 1.230101230340619e-10, + "loss": 0.6763, + "step": 14179 + }, + { + "epoch": 0.9982400563181978, + "grad_norm": 2.2489564418792725, + "learning_rate": 1.1372979864465727e-10, + "loss": 0.7371, + "step": 14180 + }, + { + "epoch": 0.9983104540654699, + "grad_norm": 2.240708589553833, + "learning_rate": 1.0481340468249112e-10, + "loss": 0.6033, + "step": 14181 + }, + { + "epoch": 0.9983808518127419, + "grad_norm": 2.5129106044769287, + "learning_rate": 9.62609416107485e-11, + "loss": 0.5712, + "step": 14182 + }, + { + "epoch": 0.9984512495600141, + "grad_norm": 1.9680602550506592, + "learning_rate": 8.807240987318554e-11, + "loss": 0.551, + "step": 14183 + }, + { + "epoch": 0.9985216473072862, + "grad_norm": 2.267918586730957, + "learning_rate": 8.024780989723812e-11, + "loss": 0.6886, + "step": 14184 + }, + { + "epoch": 0.9985920450545582, + "grad_norm": 2.017542600631714, + "learning_rate": 7.278714208780457e-11, + "loss": 0.703, + "step": 14185 + }, + { + "epoch": 0.9986624428018304, + "grad_norm": 2.2735044956207275, + "learning_rate": 6.56904068342401e-11, + "loss": 0.6792, + "step": 14186 + }, + { + "epoch": 0.9987328405491024, + "grad_norm": 1.860819697380066, + "learning_rate": 5.895760450413955e-11, + "loss": 0.5716, + "step": 14187 + }, + { + "epoch": 0.9988032382963745, + "grad_norm": 1.9798089265823364, + "learning_rate": 5.258873544877751e-11, + "loss": 0.5898, + "step": 14188 + }, + { + "epoch": 0.9988736360436467, + "grad_norm": 1.7142122983932495, + "learning_rate": 4.658379999922246e-11, + "loss": 0.5734, + "step": 14189 + }, + { + "epoch": 0.9989440337909187, + "grad_norm": 2.1595804691314697, + "learning_rate": 4.094279846711402e-11, + "loss": 0.7522, + "step": 14190 + }, + { + "epoch": 0.9990144315381908, + "grad_norm": 1.7425943613052368, + "learning_rate": 3.5665731146217184e-11, + "loss": 0.5077, + "step": 14191 + }, + { + "epoch": 0.9990848292854628, + "grad_norm": 2.145644187927246, + "learning_rate": 3.075259831009092e-11, + "loss": 0.6717, + "step": 14192 + }, + { + "epoch": 0.999155227032735, + "grad_norm": 2.2604100704193115, + "learning_rate": 2.620340021441958e-11, + "loss": 0.5737, + "step": 14193 + }, + { + "epoch": 0.9992256247800071, + "grad_norm": 1.644917368888855, + "learning_rate": 2.2018137097012947e-11, + "loss": 0.6543, + "step": 14194 + }, + { + "epoch": 0.9992960225272791, + "grad_norm": 2.2545053958892822, + "learning_rate": 1.8196809173920413e-11, + "loss": 0.5299, + "step": 14195 + }, + { + "epoch": 0.9993664202745512, + "grad_norm": 2.11588716506958, + "learning_rate": 1.473941664409395e-11, + "loss": 0.6384, + "step": 14196 + }, + { + "epoch": 0.9994368180218233, + "grad_norm": 1.6545628309249878, + "learning_rate": 1.1645959687833773e-11, + "loss": 0.5867, + "step": 14197 + }, + { + "epoch": 0.9995072157690954, + "grad_norm": 1.9196447134017944, + "learning_rate": 8.916438465234044e-12, + "loss": 0.6798, + "step": 14198 + }, + { + "epoch": 0.9995776135163674, + "grad_norm": 2.4446861743927, + "learning_rate": 6.550853119291488e-12, + "loss": 0.6521, + "step": 14199 + }, + { + "epoch": 0.9996480112636396, + "grad_norm": 2.23484206199646, + "learning_rate": 4.549203772019616e-12, + "loss": 0.6638, + "step": 14200 + }, + { + "epoch": 0.9997184090109117, + "grad_norm": 1.748366117477417, + "learning_rate": 2.9114905275573477e-12, + "loss": 0.6011, + "step": 14201 + }, + { + "epoch": 0.9997888067581837, + "grad_norm": 1.6977177858352661, + "learning_rate": 1.637713471391855e-12, + "loss": 0.463, + "step": 14202 + }, + { + "epoch": 0.9998592045054558, + "grad_norm": 1.8524410724639893, + "learning_rate": 7.278726695814086e-13, + "loss": 0.6481, + "step": 14203 + }, + { + "epoch": 0.9999296022527279, + "grad_norm": 1.696658730506897, + "learning_rate": 1.8196816953253147e-13, + "loss": 0.5645, + "step": 14204 + }, + { + "epoch": 1.0, + "grad_norm": 1.9762729406356812, + "learning_rate": 0.0, + "loss": 0.6818, + "step": 14205 + }, + { + "epoch": 1.0, + "step": 14205, + "total_flos": 6.540228112607609e+18, + "train_loss": 0.3734022373319805, + "train_runtime": 132908.7237, + "train_samples_per_second": 10.26, + "train_steps_per_second": 0.107 } ], "logging_steps": 1.0, - "max_steps": 13215, + "max_steps": 14205, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, @@ -92540,7 +99470,7 @@ "attributes": {} } }, - "total_flos": 4.661948874456302e+18, + "total_flos": 6.540228112607609e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null