Qwen2.5-VL-3B-Instruct-Agentic / trainer_state.json
m-ric's picture
m-ric HF Staff
Model save
ebfdd2d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1926,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015600624024960999,
"grad_norm": 129.38328725317604,
"learning_rate": 4.1450777202072546e-07,
"loss": 2.516,
"mean_token_accuracy": 0.5559801399707794,
"num_tokens": 133460.0,
"step": 5
},
{
"epoch": 0.031201248049921998,
"grad_norm": 109.78138881563702,
"learning_rate": 9.326424870466322e-07,
"loss": 2.4202,
"mean_token_accuracy": 0.5638172149658203,
"num_tokens": 269026.0,
"step": 10
},
{
"epoch": 0.046801872074883,
"grad_norm": 21.95808254821198,
"learning_rate": 1.4507772020725389e-06,
"loss": 2.0485,
"mean_token_accuracy": 0.6104433059692382,
"num_tokens": 401447.0,
"step": 15
},
{
"epoch": 0.062402496099843996,
"grad_norm": 14.11423255160202,
"learning_rate": 1.968911917098446e-06,
"loss": 1.6362,
"mean_token_accuracy": 0.6654394745826722,
"num_tokens": 536684.0,
"step": 20
},
{
"epoch": 0.078003120124805,
"grad_norm": 9.622340375457814,
"learning_rate": 2.4870466321243523e-06,
"loss": 1.2735,
"mean_token_accuracy": 0.7199482321739197,
"num_tokens": 670646.0,
"step": 25
},
{
"epoch": 0.093603744149766,
"grad_norm": 9.306287977685484,
"learning_rate": 3.0051813471502592e-06,
"loss": 0.8738,
"mean_token_accuracy": 0.7971087396144867,
"num_tokens": 804318.0,
"step": 30
},
{
"epoch": 0.10920436817472699,
"grad_norm": 1.3713338593715485,
"learning_rate": 3.5233160621761657e-06,
"loss": 0.6686,
"mean_token_accuracy": 0.8349042236804962,
"num_tokens": 938579.0,
"step": 35
},
{
"epoch": 0.12480499219968799,
"grad_norm": 1.0749561589307342,
"learning_rate": 4.041450777202073e-06,
"loss": 0.5967,
"mean_token_accuracy": 0.8444766402244568,
"num_tokens": 1071445.0,
"step": 40
},
{
"epoch": 0.14040561622464898,
"grad_norm": 0.9496605192114075,
"learning_rate": 4.55958549222798e-06,
"loss": 0.589,
"mean_token_accuracy": 0.8443255186080932,
"num_tokens": 1208671.0,
"step": 45
},
{
"epoch": 0.15600624024961,
"grad_norm": 0.9337552437064247,
"learning_rate": 5.077720207253887e-06,
"loss": 0.5467,
"mean_token_accuracy": 0.8530825853347779,
"num_tokens": 1340184.0,
"step": 50
},
{
"epoch": 0.17160686427457097,
"grad_norm": 0.8238179306235244,
"learning_rate": 5.5958549222797934e-06,
"loss": 0.5285,
"mean_token_accuracy": 0.8581411242485046,
"num_tokens": 1472408.0,
"step": 55
},
{
"epoch": 0.187207488299532,
"grad_norm": 0.8310047027070511,
"learning_rate": 6.113989637305699e-06,
"loss": 0.5171,
"mean_token_accuracy": 0.8576966345310211,
"num_tokens": 1607538.0,
"step": 60
},
{
"epoch": 0.20280811232449297,
"grad_norm": 0.9786821516730765,
"learning_rate": 6.632124352331607e-06,
"loss": 0.4964,
"mean_token_accuracy": 0.8631992697715759,
"num_tokens": 1742785.0,
"step": 65
},
{
"epoch": 0.21840873634945399,
"grad_norm": 0.9411599854617563,
"learning_rate": 7.150259067357514e-06,
"loss": 0.4961,
"mean_token_accuracy": 0.8622171819210053,
"num_tokens": 1878332.0,
"step": 70
},
{
"epoch": 0.23400936037441497,
"grad_norm": 1.0340069668875889,
"learning_rate": 7.66839378238342e-06,
"loss": 0.4678,
"mean_token_accuracy": 0.8705899536609649,
"num_tokens": 2013744.0,
"step": 75
},
{
"epoch": 0.24960998439937598,
"grad_norm": 0.9135002582214133,
"learning_rate": 8.186528497409328e-06,
"loss": 0.4413,
"mean_token_accuracy": 0.8774827301502228,
"num_tokens": 2147559.0,
"step": 80
},
{
"epoch": 0.26521060842433697,
"grad_norm": 0.9713464125782284,
"learning_rate": 8.704663212435233e-06,
"loss": 0.4213,
"mean_token_accuracy": 0.8831651329994201,
"num_tokens": 2282653.0,
"step": 85
},
{
"epoch": 0.28081123244929795,
"grad_norm": 1.258905573505298,
"learning_rate": 9.22279792746114e-06,
"loss": 0.3952,
"mean_token_accuracy": 0.8886785268783569,
"num_tokens": 2417343.0,
"step": 90
},
{
"epoch": 0.296411856474259,
"grad_norm": 1.0236538285858234,
"learning_rate": 9.740932642487048e-06,
"loss": 0.3721,
"mean_token_accuracy": 0.8954536736011505,
"num_tokens": 2552005.0,
"step": 95
},
{
"epoch": 0.31201248049922,
"grad_norm": 1.0098528095352195,
"learning_rate": 1.0259067357512955e-05,
"loss": 0.3399,
"mean_token_accuracy": 0.9048069655895233,
"num_tokens": 2687951.0,
"step": 100
},
{
"epoch": 0.32761310452418096,
"grad_norm": 1.04186438377383,
"learning_rate": 1.0777202072538861e-05,
"loss": 0.3326,
"mean_token_accuracy": 0.905648124217987,
"num_tokens": 2820443.0,
"step": 105
},
{
"epoch": 0.34321372854914195,
"grad_norm": 1.032367777542592,
"learning_rate": 1.1295336787564768e-05,
"loss": 0.3141,
"mean_token_accuracy": 0.9102253496646882,
"num_tokens": 2955262.0,
"step": 110
},
{
"epoch": 0.358814352574103,
"grad_norm": 0.9772961653469765,
"learning_rate": 1.1813471502590674e-05,
"loss": 0.2923,
"mean_token_accuracy": 0.9158960580825806,
"num_tokens": 3088104.0,
"step": 115
},
{
"epoch": 0.374414976599064,
"grad_norm": 1.1625665709943187,
"learning_rate": 1.2331606217616581e-05,
"loss": 0.2951,
"mean_token_accuracy": 0.915445065498352,
"num_tokens": 3222890.0,
"step": 120
},
{
"epoch": 0.39001560062402496,
"grad_norm": 0.9514106687699287,
"learning_rate": 1.2849740932642487e-05,
"loss": 0.2684,
"mean_token_accuracy": 0.9234537720680237,
"num_tokens": 3357508.0,
"step": 125
},
{
"epoch": 0.40561622464898595,
"grad_norm": 0.9436965806836382,
"learning_rate": 1.3367875647668396e-05,
"loss": 0.2649,
"mean_token_accuracy": 0.9228121876716614,
"num_tokens": 3492123.0,
"step": 130
},
{
"epoch": 0.42121684867394693,
"grad_norm": 2.9221775609122385,
"learning_rate": 1.3886010362694302e-05,
"loss": 0.2524,
"mean_token_accuracy": 0.9274605810642242,
"num_tokens": 3626221.0,
"step": 135
},
{
"epoch": 0.43681747269890797,
"grad_norm": 0.9970697064309755,
"learning_rate": 1.4404145077720209e-05,
"loss": 0.2565,
"mean_token_accuracy": 0.9262707114219666,
"num_tokens": 3759517.0,
"step": 140
},
{
"epoch": 0.45241809672386896,
"grad_norm": 1.0665217619628524,
"learning_rate": 1.4922279792746115e-05,
"loss": 0.2446,
"mean_token_accuracy": 0.9297859013080597,
"num_tokens": 3893880.0,
"step": 145
},
{
"epoch": 0.46801872074882994,
"grad_norm": 0.8670182837524361,
"learning_rate": 1.544041450777202e-05,
"loss": 0.2473,
"mean_token_accuracy": 0.9284217417240143,
"num_tokens": 4030353.0,
"step": 150
},
{
"epoch": 0.4836193447737909,
"grad_norm": 1.05147852072561,
"learning_rate": 1.595854922279793e-05,
"loss": 0.2405,
"mean_token_accuracy": 0.9299076437950134,
"num_tokens": 4165133.0,
"step": 155
},
{
"epoch": 0.49921996879875197,
"grad_norm": 0.8517307290696935,
"learning_rate": 1.6476683937823835e-05,
"loss": 0.2338,
"mean_token_accuracy": 0.9323501110076904,
"num_tokens": 4301452.0,
"step": 160
},
{
"epoch": 0.514820592823713,
"grad_norm": 0.8672850209975566,
"learning_rate": 1.6994818652849744e-05,
"loss": 0.2308,
"mean_token_accuracy": 0.9331668317317963,
"num_tokens": 4434235.0,
"step": 165
},
{
"epoch": 0.5304212168486739,
"grad_norm": 0.7613765456655692,
"learning_rate": 1.751295336787565e-05,
"loss": 0.2237,
"mean_token_accuracy": 0.9341790914535523,
"num_tokens": 4565517.0,
"step": 170
},
{
"epoch": 0.5460218408736349,
"grad_norm": 0.7810705098422559,
"learning_rate": 1.8031088082901555e-05,
"loss": 0.2,
"mean_token_accuracy": 0.9416050374507904,
"num_tokens": 4705082.0,
"step": 175
},
{
"epoch": 0.5616224648985959,
"grad_norm": 0.9205061316516083,
"learning_rate": 1.854922279792746e-05,
"loss": 0.2088,
"mean_token_accuracy": 0.9381526112556458,
"num_tokens": 4837136.0,
"step": 180
},
{
"epoch": 0.5772230889235569,
"grad_norm": 1.058982637944881,
"learning_rate": 1.9067357512953367e-05,
"loss": 0.2043,
"mean_token_accuracy": 0.9402987122535705,
"num_tokens": 4971044.0,
"step": 185
},
{
"epoch": 0.592823712948518,
"grad_norm": 0.824897710515413,
"learning_rate": 1.9585492227979276e-05,
"loss": 0.2064,
"mean_token_accuracy": 0.9386222898960114,
"num_tokens": 5106335.0,
"step": 190
},
{
"epoch": 0.608424336973479,
"grad_norm": 0.7483263969132283,
"learning_rate": 1.998845931909983e-05,
"loss": 0.2045,
"mean_token_accuracy": 0.9389774143695832,
"num_tokens": 5242005.0,
"step": 195
},
{
"epoch": 0.62402496099844,
"grad_norm": 0.956493222774255,
"learning_rate": 1.9930755914598962e-05,
"loss": 0.1916,
"mean_token_accuracy": 0.9423897206783295,
"num_tokens": 5376154.0,
"step": 200
},
{
"epoch": 0.6396255850234009,
"grad_norm": 0.7286666895021545,
"learning_rate": 1.9873052510098098e-05,
"loss": 0.2031,
"mean_token_accuracy": 0.9401489853858948,
"num_tokens": 5509423.0,
"step": 205
},
{
"epoch": 0.6552262090483619,
"grad_norm": 0.9135913203051874,
"learning_rate": 1.9815349105597233e-05,
"loss": 0.201,
"mean_token_accuracy": 0.9404536783695221,
"num_tokens": 5642113.0,
"step": 210
},
{
"epoch": 0.6708268330733229,
"grad_norm": 0.7481872057084359,
"learning_rate": 1.9757645701096365e-05,
"loss": 0.1952,
"mean_token_accuracy": 0.9418298482894898,
"num_tokens": 5774341.0,
"step": 215
},
{
"epoch": 0.6864274570982839,
"grad_norm": 0.7237687833871097,
"learning_rate": 1.96999422965955e-05,
"loss": 0.1966,
"mean_token_accuracy": 0.9417529046535492,
"num_tokens": 5908974.0,
"step": 220
},
{
"epoch": 0.7020280811232449,
"grad_norm": 0.7969480392056931,
"learning_rate": 1.9642238892094636e-05,
"loss": 0.1907,
"mean_token_accuracy": 0.9438920319080353,
"num_tokens": 6045637.0,
"step": 225
},
{
"epoch": 0.717628705148206,
"grad_norm": 0.7265197616696507,
"learning_rate": 1.958453548759377e-05,
"loss": 0.1978,
"mean_token_accuracy": 0.9413581132888794,
"num_tokens": 6177199.0,
"step": 230
},
{
"epoch": 0.733229329173167,
"grad_norm": 0.7170346798055343,
"learning_rate": 1.9526832083092904e-05,
"loss": 0.1867,
"mean_token_accuracy": 0.9436001777648926,
"num_tokens": 6309061.0,
"step": 235
},
{
"epoch": 0.748829953198128,
"grad_norm": 0.7850135506356014,
"learning_rate": 1.9469128678592036e-05,
"loss": 0.1861,
"mean_token_accuracy": 0.9450059533119202,
"num_tokens": 6445167.0,
"step": 240
},
{
"epoch": 0.7644305772230889,
"grad_norm": 0.6636614795993062,
"learning_rate": 1.9411425274091175e-05,
"loss": 0.1834,
"mean_token_accuracy": 0.9457758605480194,
"num_tokens": 6581753.0,
"step": 245
},
{
"epoch": 0.7800312012480499,
"grad_norm": 0.6771924573777538,
"learning_rate": 1.9353721869590307e-05,
"loss": 0.1809,
"mean_token_accuracy": 0.9465215265750885,
"num_tokens": 6719052.0,
"step": 250
},
{
"epoch": 0.7956318252730109,
"grad_norm": 0.7411173161974047,
"learning_rate": 1.9296018465089442e-05,
"loss": 0.1777,
"mean_token_accuracy": 0.9468727946281433,
"num_tokens": 6853523.0,
"step": 255
},
{
"epoch": 0.8112324492979719,
"grad_norm": 0.650242673061882,
"learning_rate": 1.9238315060588578e-05,
"loss": 0.1781,
"mean_token_accuracy": 0.9471937596797944,
"num_tokens": 6988476.0,
"step": 260
},
{
"epoch": 0.8268330733229329,
"grad_norm": 0.5819540826094594,
"learning_rate": 1.918061165608771e-05,
"loss": 0.1654,
"mean_token_accuracy": 0.9496288418769836,
"num_tokens": 7125030.0,
"step": 265
},
{
"epoch": 0.8424336973478939,
"grad_norm": 0.6403317597999031,
"learning_rate": 1.9122908251586845e-05,
"loss": 0.1666,
"mean_token_accuracy": 0.9501708805561065,
"num_tokens": 7257681.0,
"step": 270
},
{
"epoch": 0.858034321372855,
"grad_norm": 0.6418778871794507,
"learning_rate": 1.906520484708598e-05,
"loss": 0.1767,
"mean_token_accuracy": 0.9471412360668182,
"num_tokens": 7387859.0,
"step": 275
},
{
"epoch": 0.8736349453978159,
"grad_norm": 0.622540720290129,
"learning_rate": 1.9007501442585113e-05,
"loss": 0.1694,
"mean_token_accuracy": 0.9499199390411377,
"num_tokens": 7525714.0,
"step": 280
},
{
"epoch": 0.8892355694227769,
"grad_norm": 0.583668566012697,
"learning_rate": 1.894979803808425e-05,
"loss": 0.1666,
"mean_token_accuracy": 0.9499054729938508,
"num_tokens": 7662117.0,
"step": 285
},
{
"epoch": 0.9048361934477379,
"grad_norm": 0.5710724995564919,
"learning_rate": 1.889209463358338e-05,
"loss": 0.1589,
"mean_token_accuracy": 0.9527071595191956,
"num_tokens": 7798972.0,
"step": 290
},
{
"epoch": 0.9204368174726989,
"grad_norm": 0.5746752867779363,
"learning_rate": 1.883439122908252e-05,
"loss": 0.1686,
"mean_token_accuracy": 0.9504578649997711,
"num_tokens": 7935314.0,
"step": 295
},
{
"epoch": 0.9360374414976599,
"grad_norm": 0.603982996580727,
"learning_rate": 1.877668782458165e-05,
"loss": 0.1629,
"mean_token_accuracy": 0.9501371085643768,
"num_tokens": 8066117.0,
"step": 300
},
{
"epoch": 0.9516380655226209,
"grad_norm": 0.5922487470312926,
"learning_rate": 1.8718984420080787e-05,
"loss": 0.1648,
"mean_token_accuracy": 0.9501347541809082,
"num_tokens": 8201824.0,
"step": 305
},
{
"epoch": 0.9672386895475819,
"grad_norm": 0.5388743066315098,
"learning_rate": 1.8661281015579923e-05,
"loss": 0.1645,
"mean_token_accuracy": 0.9501584231853485,
"num_tokens": 8337103.0,
"step": 310
},
{
"epoch": 0.982839313572543,
"grad_norm": 0.6436638863952264,
"learning_rate": 1.8603577611079055e-05,
"loss": 0.1701,
"mean_token_accuracy": 0.9488860845565796,
"num_tokens": 8469896.0,
"step": 315
},
{
"epoch": 0.9984399375975039,
"grad_norm": 0.6288476584333543,
"learning_rate": 1.854587420657819e-05,
"loss": 0.1632,
"mean_token_accuracy": 0.9505932807922364,
"num_tokens": 8606247.0,
"step": 320
},
{
"epoch": 1.0124804992199687,
"grad_norm": 0.5165837525539804,
"learning_rate": 1.8488170802077322e-05,
"loss": 0.1253,
"mean_token_accuracy": 0.956979387336307,
"num_tokens": 8725614.0,
"step": 325
},
{
"epoch": 1.0280811232449298,
"grad_norm": 0.5586235874833988,
"learning_rate": 1.8430467397576458e-05,
"loss": 0.1385,
"mean_token_accuracy": 0.95757777094841,
"num_tokens": 8860633.0,
"step": 330
},
{
"epoch": 1.0436817472698907,
"grad_norm": 0.5360739832407388,
"learning_rate": 1.8372763993075593e-05,
"loss": 0.139,
"mean_token_accuracy": 0.9575653314590454,
"num_tokens": 8993751.0,
"step": 335
},
{
"epoch": 1.0592823712948518,
"grad_norm": 0.586330833103125,
"learning_rate": 1.8315060588574725e-05,
"loss": 0.132,
"mean_token_accuracy": 0.959426885843277,
"num_tokens": 9129958.0,
"step": 340
},
{
"epoch": 1.074882995319813,
"grad_norm": 0.5465937446033756,
"learning_rate": 1.8257357184073864e-05,
"loss": 0.1347,
"mean_token_accuracy": 0.9579224228858948,
"num_tokens": 9261996.0,
"step": 345
},
{
"epoch": 1.0904836193447738,
"grad_norm": 0.6247134416034062,
"learning_rate": 1.8199653779572996e-05,
"loss": 0.1342,
"mean_token_accuracy": 0.9582812249660492,
"num_tokens": 9397095.0,
"step": 350
},
{
"epoch": 1.1060842433697349,
"grad_norm": 0.6041369695098568,
"learning_rate": 1.8141950375072132e-05,
"loss": 0.1393,
"mean_token_accuracy": 0.9570248365402222,
"num_tokens": 9529942.0,
"step": 355
},
{
"epoch": 1.1216848673946958,
"grad_norm": 0.6320621020426849,
"learning_rate": 1.8084246970571264e-05,
"loss": 0.1372,
"mean_token_accuracy": 0.9566760540008545,
"num_tokens": 9661196.0,
"step": 360
},
{
"epoch": 1.1372854914196568,
"grad_norm": 0.55603630239386,
"learning_rate": 1.80265435660704e-05,
"loss": 0.135,
"mean_token_accuracy": 0.9582180440425873,
"num_tokens": 9795761.0,
"step": 365
},
{
"epoch": 1.1528861154446177,
"grad_norm": 0.5409365481066034,
"learning_rate": 1.7968840161569535e-05,
"loss": 0.1348,
"mean_token_accuracy": 0.9582259178161621,
"num_tokens": 9927945.0,
"step": 370
},
{
"epoch": 1.1684867394695788,
"grad_norm": 0.5119811661411944,
"learning_rate": 1.7911136757068667e-05,
"loss": 0.1348,
"mean_token_accuracy": 0.9586300194263458,
"num_tokens": 10063699.0,
"step": 375
},
{
"epoch": 1.1840873634945397,
"grad_norm": 0.5525476773773188,
"learning_rate": 1.7853433352567802e-05,
"loss": 0.1326,
"mean_token_accuracy": 0.9586183845996856,
"num_tokens": 10197789.0,
"step": 380
},
{
"epoch": 1.1996879875195008,
"grad_norm": 0.5483837555676431,
"learning_rate": 1.7795729948066938e-05,
"loss": 0.1351,
"mean_token_accuracy": 0.9586268842220307,
"num_tokens": 10331682.0,
"step": 385
},
{
"epoch": 1.2152886115444619,
"grad_norm": 0.5433839857021544,
"learning_rate": 1.773802654356607e-05,
"loss": 0.1285,
"mean_token_accuracy": 0.9602697193622589,
"num_tokens": 10468253.0,
"step": 390
},
{
"epoch": 1.2308892355694228,
"grad_norm": 0.5051500128900668,
"learning_rate": 1.7680323139065206e-05,
"loss": 0.1281,
"mean_token_accuracy": 0.9597992599010468,
"num_tokens": 10605460.0,
"step": 395
},
{
"epoch": 1.2464898595943839,
"grad_norm": 0.5987143217070542,
"learning_rate": 1.762261973456434e-05,
"loss": 0.1347,
"mean_token_accuracy": 0.9584230363368988,
"num_tokens": 10740021.0,
"step": 400
},
{
"epoch": 1.2620904836193447,
"grad_norm": 0.5700082158465958,
"learning_rate": 1.7564916330063477e-05,
"loss": 0.1278,
"mean_token_accuracy": 0.9604202508926392,
"num_tokens": 10876584.0,
"step": 405
},
{
"epoch": 1.2776911076443058,
"grad_norm": 0.5881496450863309,
"learning_rate": 1.750721292556261e-05,
"loss": 0.1235,
"mean_token_accuracy": 0.9618001461029053,
"num_tokens": 11014040.0,
"step": 410
},
{
"epoch": 1.2932917316692667,
"grad_norm": 0.5581301413309461,
"learning_rate": 1.7449509521061744e-05,
"loss": 0.126,
"mean_token_accuracy": 0.9612515866756439,
"num_tokens": 11147226.0,
"step": 415
},
{
"epoch": 1.3088923556942278,
"grad_norm": 0.5414259556709807,
"learning_rate": 1.739180611656088e-05,
"loss": 0.1236,
"mean_token_accuracy": 0.9617384076118469,
"num_tokens": 11281741.0,
"step": 420
},
{
"epoch": 1.3244929797191887,
"grad_norm": 0.5836584507491059,
"learning_rate": 1.7334102712060012e-05,
"loss": 0.1258,
"mean_token_accuracy": 0.9612742185592651,
"num_tokens": 11415198.0,
"step": 425
},
{
"epoch": 1.3400936037441498,
"grad_norm": 0.5465194574173875,
"learning_rate": 1.7276399307559147e-05,
"loss": 0.1276,
"mean_token_accuracy": 0.9597448110580444,
"num_tokens": 11547705.0,
"step": 430
},
{
"epoch": 1.3556942277691109,
"grad_norm": 0.5683225699257467,
"learning_rate": 1.7218695903058283e-05,
"loss": 0.1268,
"mean_token_accuracy": 0.9613869369029999,
"num_tokens": 11685203.0,
"step": 435
},
{
"epoch": 1.3712948517940717,
"grad_norm": 0.5752321684685067,
"learning_rate": 1.7160992498557415e-05,
"loss": 0.1292,
"mean_token_accuracy": 0.9604803204536438,
"num_tokens": 11823197.0,
"step": 440
},
{
"epoch": 1.3868954758190328,
"grad_norm": 0.5388447370973155,
"learning_rate": 1.710328909405655e-05,
"loss": 0.128,
"mean_token_accuracy": 0.9602981925010681,
"num_tokens": 11956118.0,
"step": 445
},
{
"epoch": 1.4024960998439937,
"grad_norm": 0.5444320848079807,
"learning_rate": 1.7045585689555686e-05,
"loss": 0.1222,
"mean_token_accuracy": 0.962395453453064,
"num_tokens": 12090445.0,
"step": 450
},
{
"epoch": 1.4180967238689548,
"grad_norm": 0.554566606109682,
"learning_rate": 1.698788228505482e-05,
"loss": 0.1234,
"mean_token_accuracy": 0.9616039395332336,
"num_tokens": 12217939.0,
"step": 455
},
{
"epoch": 1.4336973478939157,
"grad_norm": 0.6070727464807655,
"learning_rate": 1.6930178880553953e-05,
"loss": 0.1284,
"mean_token_accuracy": 0.9608745992183685,
"num_tokens": 12350482.0,
"step": 460
},
{
"epoch": 1.4492979719188768,
"grad_norm": 0.5523782519510332,
"learning_rate": 1.687247547605309e-05,
"loss": 0.1167,
"mean_token_accuracy": 0.9636275410652161,
"num_tokens": 12485977.0,
"step": 465
},
{
"epoch": 1.4648985959438376,
"grad_norm": 0.577639640008196,
"learning_rate": 1.6814772071552224e-05,
"loss": 0.1304,
"mean_token_accuracy": 0.9609861671924591,
"num_tokens": 12617413.0,
"step": 470
},
{
"epoch": 1.4804992199687987,
"grad_norm": 0.5602199179222196,
"learning_rate": 1.6757068667051356e-05,
"loss": 0.1216,
"mean_token_accuracy": 0.9624531030654907,
"num_tokens": 12750714.0,
"step": 475
},
{
"epoch": 1.4960998439937598,
"grad_norm": 0.4967916021381074,
"learning_rate": 1.6699365262550492e-05,
"loss": 0.1187,
"mean_token_accuracy": 0.9640235006809235,
"num_tokens": 12887826.0,
"step": 480
},
{
"epoch": 1.5117004680187207,
"grad_norm": 0.5435358963770943,
"learning_rate": 1.6641661858049627e-05,
"loss": 0.119,
"mean_token_accuracy": 0.9637284338474273,
"num_tokens": 13023445.0,
"step": 485
},
{
"epoch": 1.5273010920436816,
"grad_norm": 0.6202411461407195,
"learning_rate": 1.658395845354876e-05,
"loss": 0.1239,
"mean_token_accuracy": 0.9621273696422576,
"num_tokens": 13156136.0,
"step": 490
},
{
"epoch": 1.5429017160686427,
"grad_norm": 0.5860990715234197,
"learning_rate": 1.6526255049047895e-05,
"loss": 0.114,
"mean_token_accuracy": 0.9654937386512756,
"num_tokens": 13295904.0,
"step": 495
},
{
"epoch": 1.5585023400936038,
"grad_norm": 0.5650400891346256,
"learning_rate": 1.646855164454703e-05,
"loss": 0.1156,
"mean_token_accuracy": 0.9648711144924164,
"num_tokens": 13430554.0,
"step": 500
},
{
"epoch": 1.5741029641185649,
"grad_norm": 0.5781630886222578,
"learning_rate": 1.6410848240046166e-05,
"loss": 0.1197,
"mean_token_accuracy": 0.9637046754360199,
"num_tokens": 13565119.0,
"step": 505
},
{
"epoch": 1.5897035881435257,
"grad_norm": 0.5432256680693549,
"learning_rate": 1.6353144835545298e-05,
"loss": 0.1234,
"mean_token_accuracy": 0.9622001588344574,
"num_tokens": 13699623.0,
"step": 510
},
{
"epoch": 1.6053042121684866,
"grad_norm": 0.6409787925018047,
"learning_rate": 1.6295441431044434e-05,
"loss": 0.1244,
"mean_token_accuracy": 0.9616802036762238,
"num_tokens": 13829628.0,
"step": 515
},
{
"epoch": 1.6209048361934477,
"grad_norm": 0.6383227582961417,
"learning_rate": 1.623773802654357e-05,
"loss": 0.1238,
"mean_token_accuracy": 0.9625266194343567,
"num_tokens": 13961224.0,
"step": 520
},
{
"epoch": 1.6365054602184088,
"grad_norm": 0.6314161515841656,
"learning_rate": 1.61800346220427e-05,
"loss": 0.1121,
"mean_token_accuracy": 0.9656075298786163,
"num_tokens": 14098377.0,
"step": 525
},
{
"epoch": 1.6521060842433697,
"grad_norm": 0.614045523172391,
"learning_rate": 1.6122331217541837e-05,
"loss": 0.1196,
"mean_token_accuracy": 0.963841724395752,
"num_tokens": 14235157.0,
"step": 530
},
{
"epoch": 1.6677067082683308,
"grad_norm": 0.5872146814971247,
"learning_rate": 1.6064627813040972e-05,
"loss": 0.1133,
"mean_token_accuracy": 0.9656576633453369,
"num_tokens": 14369585.0,
"step": 535
},
{
"epoch": 1.6833073322932917,
"grad_norm": 0.5744613415474724,
"learning_rate": 1.6006924408540104e-05,
"loss": 0.1073,
"mean_token_accuracy": 0.9668791234493256,
"num_tokens": 14506057.0,
"step": 540
},
{
"epoch": 1.6989079563182528,
"grad_norm": 0.5973743143104617,
"learning_rate": 1.594922100403924e-05,
"loss": 0.1083,
"mean_token_accuracy": 0.9668962955474854,
"num_tokens": 14642411.0,
"step": 545
},
{
"epoch": 1.7145085803432139,
"grad_norm": 0.6176806575167721,
"learning_rate": 1.5891517599538372e-05,
"loss": 0.1071,
"mean_token_accuracy": 0.9674273788928985,
"num_tokens": 14774203.0,
"step": 550
},
{
"epoch": 1.7301092043681747,
"grad_norm": 0.5649249307146187,
"learning_rate": 1.583381419503751e-05,
"loss": 0.1099,
"mean_token_accuracy": 0.9660561561584473,
"num_tokens": 14909209.0,
"step": 555
},
{
"epoch": 1.7457098283931356,
"grad_norm": 0.5450399044206087,
"learning_rate": 1.5776110790536643e-05,
"loss": 0.1077,
"mean_token_accuracy": 0.9665981948375701,
"num_tokens": 15041381.0,
"step": 560
},
{
"epoch": 1.7613104524180967,
"grad_norm": 0.550655637358523,
"learning_rate": 1.5718407386035778e-05,
"loss": 0.104,
"mean_token_accuracy": 0.9683968663215637,
"num_tokens": 15178472.0,
"step": 565
},
{
"epoch": 1.7769110764430578,
"grad_norm": 0.5468003600764055,
"learning_rate": 1.5660703981534914e-05,
"loss": 0.105,
"mean_token_accuracy": 0.9676320672035217,
"num_tokens": 15314309.0,
"step": 570
},
{
"epoch": 1.7925117004680189,
"grad_norm": 0.662660268609156,
"learning_rate": 1.5603000577034046e-05,
"loss": 0.1081,
"mean_token_accuracy": 0.9670586466789246,
"num_tokens": 15447427.0,
"step": 575
},
{
"epoch": 1.8081123244929798,
"grad_norm": 0.5514391464827504,
"learning_rate": 1.554529717253318e-05,
"loss": 0.1095,
"mean_token_accuracy": 0.9667521238327026,
"num_tokens": 15585445.0,
"step": 580
},
{
"epoch": 1.8237129485179406,
"grad_norm": 0.5981613351314938,
"learning_rate": 1.5487593768032313e-05,
"loss": 0.1038,
"mean_token_accuracy": 0.968332713842392,
"num_tokens": 15719904.0,
"step": 585
},
{
"epoch": 1.8393135725429017,
"grad_norm": 0.5481985576397634,
"learning_rate": 1.542989036353145e-05,
"loss": 0.1068,
"mean_token_accuracy": 0.9675829410552979,
"num_tokens": 15854787.0,
"step": 590
},
{
"epoch": 1.8549141965678628,
"grad_norm": 0.5826788610080513,
"learning_rate": 1.5372186959030584e-05,
"loss": 0.1027,
"mean_token_accuracy": 0.9690861344337464,
"num_tokens": 15991840.0,
"step": 595
},
{
"epoch": 1.8705148205928237,
"grad_norm": 0.6031982698144975,
"learning_rate": 1.5314483554529717e-05,
"loss": 0.1048,
"mean_token_accuracy": 0.9682729780673981,
"num_tokens": 16126187.0,
"step": 600
},
{
"epoch": 1.8861154446177846,
"grad_norm": 0.644822605733718,
"learning_rate": 1.5256780150028854e-05,
"loss": 0.1034,
"mean_token_accuracy": 0.9689009010791778,
"num_tokens": 16260005.0,
"step": 605
},
{
"epoch": 1.9017160686427457,
"grad_norm": 0.6292923852496343,
"learning_rate": 1.5199076745527988e-05,
"loss": 0.1027,
"mean_token_accuracy": 0.9685427904129028,
"num_tokens": 16392826.0,
"step": 610
},
{
"epoch": 1.9173166926677068,
"grad_norm": 0.5905364025571268,
"learning_rate": 1.5141373341027121e-05,
"loss": 0.1023,
"mean_token_accuracy": 0.969213730096817,
"num_tokens": 16530546.0,
"step": 615
},
{
"epoch": 1.9329173166926679,
"grad_norm": 0.6356617111003939,
"learning_rate": 1.5083669936526257e-05,
"loss": 0.1016,
"mean_token_accuracy": 0.96930211186409,
"num_tokens": 16664985.0,
"step": 620
},
{
"epoch": 1.9485179407176287,
"grad_norm": 0.6071607901539858,
"learning_rate": 1.502596653202539e-05,
"loss": 0.1021,
"mean_token_accuracy": 0.9688980400562286,
"num_tokens": 16796878.0,
"step": 625
},
{
"epoch": 1.9641185647425896,
"grad_norm": 0.5064988276006431,
"learning_rate": 1.4968263127524524e-05,
"loss": 0.0957,
"mean_token_accuracy": 0.9704132974147797,
"num_tokens": 16931288.0,
"step": 630
},
{
"epoch": 1.9797191887675507,
"grad_norm": 0.5537435426659689,
"learning_rate": 1.4910559723023658e-05,
"loss": 0.1087,
"mean_token_accuracy": 0.9669051706790924,
"num_tokens": 17064859.0,
"step": 635
},
{
"epoch": 1.9953198127925118,
"grad_norm": 0.5997048037627525,
"learning_rate": 1.4852856318522795e-05,
"loss": 0.0947,
"mean_token_accuracy": 0.9707705557346344,
"num_tokens": 17199056.0,
"step": 640
},
{
"epoch": 2.0093603744149764,
"grad_norm": 0.45765040060824713,
"learning_rate": 1.479515291402193e-05,
"loss": 0.0677,
"mean_token_accuracy": 0.9769907527499728,
"num_tokens": 17320735.0,
"step": 645
},
{
"epoch": 2.0249609984399375,
"grad_norm": 0.6379753427562452,
"learning_rate": 1.4737449509521063e-05,
"loss": 0.0697,
"mean_token_accuracy": 0.9782340943813324,
"num_tokens": 17456135.0,
"step": 650
},
{
"epoch": 2.0405616224648986,
"grad_norm": 0.46594436704934616,
"learning_rate": 1.4679746105020198e-05,
"loss": 0.0674,
"mean_token_accuracy": 0.9790053486824035,
"num_tokens": 17591486.0,
"step": 655
},
{
"epoch": 2.0561622464898597,
"grad_norm": 0.6276578458248949,
"learning_rate": 1.4622042700519332e-05,
"loss": 0.0686,
"mean_token_accuracy": 0.9787347793579102,
"num_tokens": 17726558.0,
"step": 660
},
{
"epoch": 2.0717628705148208,
"grad_norm": 0.5062901549101052,
"learning_rate": 1.4564339296018466e-05,
"loss": 0.0663,
"mean_token_accuracy": 0.979675030708313,
"num_tokens": 17858362.0,
"step": 665
},
{
"epoch": 2.0873634945397814,
"grad_norm": 0.5848008860177072,
"learning_rate": 1.45066358915176e-05,
"loss": 0.0676,
"mean_token_accuracy": 0.9788794219493866,
"num_tokens": 17992249.0,
"step": 670
},
{
"epoch": 2.1029641185647425,
"grad_norm": 0.5586217868546769,
"learning_rate": 1.4448932487016735e-05,
"loss": 0.066,
"mean_token_accuracy": 0.979490089416504,
"num_tokens": 18126932.0,
"step": 675
},
{
"epoch": 2.1185647425897036,
"grad_norm": 0.5456969706238991,
"learning_rate": 1.4391229082515869e-05,
"loss": 0.068,
"mean_token_accuracy": 0.9786436498165131,
"num_tokens": 18261674.0,
"step": 680
},
{
"epoch": 2.1341653666146647,
"grad_norm": 0.547513915986846,
"learning_rate": 1.4333525678015003e-05,
"loss": 0.0654,
"mean_token_accuracy": 0.9795780777931213,
"num_tokens": 18395303.0,
"step": 685
},
{
"epoch": 2.149765990639626,
"grad_norm": 0.5280062621107973,
"learning_rate": 1.427582227351414e-05,
"loss": 0.0646,
"mean_token_accuracy": 0.9799421310424805,
"num_tokens": 18531153.0,
"step": 690
},
{
"epoch": 2.1653666146645865,
"grad_norm": 0.4754449961678256,
"learning_rate": 1.4218118869013274e-05,
"loss": 0.0659,
"mean_token_accuracy": 0.9795533120632172,
"num_tokens": 18672214.0,
"step": 695
},
{
"epoch": 2.1809672386895476,
"grad_norm": 0.5115660900975986,
"learning_rate": 1.4160415464512408e-05,
"loss": 0.0679,
"mean_token_accuracy": 0.9789233446121216,
"num_tokens": 18805938.0,
"step": 700
},
{
"epoch": 2.1965678627145087,
"grad_norm": 0.5751725861583461,
"learning_rate": 1.4102712060011541e-05,
"loss": 0.0648,
"mean_token_accuracy": 0.9792892873287201,
"num_tokens": 18937866.0,
"step": 705
},
{
"epoch": 2.2121684867394698,
"grad_norm": 0.5046032343771815,
"learning_rate": 1.4045008655510677e-05,
"loss": 0.064,
"mean_token_accuracy": 0.9802761554718018,
"num_tokens": 19073783.0,
"step": 710
},
{
"epoch": 2.2277691107644304,
"grad_norm": 0.4771833967652389,
"learning_rate": 1.398730525100981e-05,
"loss": 0.062,
"mean_token_accuracy": 0.9805822253227234,
"num_tokens": 19207935.0,
"step": 715
},
{
"epoch": 2.2433697347893915,
"grad_norm": 0.6115193129572258,
"learning_rate": 1.3929601846508945e-05,
"loss": 0.0649,
"mean_token_accuracy": 0.9801377832889557,
"num_tokens": 19340335.0,
"step": 720
},
{
"epoch": 2.2589703588143526,
"grad_norm": 0.6617445283747385,
"learning_rate": 1.387189844200808e-05,
"loss": 0.0654,
"mean_token_accuracy": 0.9797018051147461,
"num_tokens": 19471321.0,
"step": 725
},
{
"epoch": 2.2745709828393137,
"grad_norm": 0.5272477660409615,
"learning_rate": 1.3814195037507214e-05,
"loss": 0.0667,
"mean_token_accuracy": 0.979661637544632,
"num_tokens": 19602047.0,
"step": 730
},
{
"epoch": 2.2901716068642743,
"grad_norm": 0.6200363163248618,
"learning_rate": 1.3756491633006348e-05,
"loss": 0.066,
"mean_token_accuracy": 0.9794325530529022,
"num_tokens": 19735871.0,
"step": 735
},
{
"epoch": 2.3057722308892354,
"grad_norm": 0.5017649119361518,
"learning_rate": 1.3698788228505481e-05,
"loss": 0.0657,
"mean_token_accuracy": 0.9798206746578216,
"num_tokens": 19869603.0,
"step": 740
},
{
"epoch": 2.3213728549141965,
"grad_norm": 0.5600351935619347,
"learning_rate": 1.3641084824004619e-05,
"loss": 0.0637,
"mean_token_accuracy": 0.9808857142925262,
"num_tokens": 20006195.0,
"step": 745
},
{
"epoch": 2.3369734789391576,
"grad_norm": 0.625047128391445,
"learning_rate": 1.3583381419503752e-05,
"loss": 0.0607,
"mean_token_accuracy": 0.9811928570270538,
"num_tokens": 20143037.0,
"step": 750
},
{
"epoch": 2.3525741029641187,
"grad_norm": 0.5412878260525056,
"learning_rate": 1.3525678015002886e-05,
"loss": 0.0625,
"mean_token_accuracy": 0.9809665322303772,
"num_tokens": 20277564.0,
"step": 755
},
{
"epoch": 2.3681747269890794,
"grad_norm": 0.5719463156340375,
"learning_rate": 1.3467974610502022e-05,
"loss": 0.0656,
"mean_token_accuracy": 0.9800610601902008,
"num_tokens": 20413306.0,
"step": 760
},
{
"epoch": 2.3837753510140405,
"grad_norm": 0.5934985891542635,
"learning_rate": 1.3410271206001155e-05,
"loss": 0.0643,
"mean_token_accuracy": 0.9803776860237121,
"num_tokens": 20550646.0,
"step": 765
},
{
"epoch": 2.3993759750390016,
"grad_norm": 0.623605985698589,
"learning_rate": 1.335256780150029e-05,
"loss": 0.0638,
"mean_token_accuracy": 0.980189961194992,
"num_tokens": 20683074.0,
"step": 770
},
{
"epoch": 2.4149765990639627,
"grad_norm": 0.5634033196138294,
"learning_rate": 1.3294864396999425e-05,
"loss": 0.0607,
"mean_token_accuracy": 0.9815069198608398,
"num_tokens": 20820858.0,
"step": 775
},
{
"epoch": 2.4305772230889238,
"grad_norm": 0.5067524068480144,
"learning_rate": 1.3237160992498559e-05,
"loss": 0.0635,
"mean_token_accuracy": 0.9803751349449158,
"num_tokens": 20953472.0,
"step": 780
},
{
"epoch": 2.4461778471138844,
"grad_norm": 0.5357765170954253,
"learning_rate": 1.3179457587997692e-05,
"loss": 0.0647,
"mean_token_accuracy": 0.9799603164196015,
"num_tokens": 21084458.0,
"step": 785
},
{
"epoch": 2.4617784711388455,
"grad_norm": 0.5784103573729823,
"learning_rate": 1.3121754183496826e-05,
"loss": 0.0612,
"mean_token_accuracy": 0.981369924545288,
"num_tokens": 21217206.0,
"step": 790
},
{
"epoch": 2.4773790951638066,
"grad_norm": 0.6539595621485318,
"learning_rate": 1.3064050778995963e-05,
"loss": 0.0619,
"mean_token_accuracy": 0.9811006963253022,
"num_tokens": 21351848.0,
"step": 795
},
{
"epoch": 2.4929797191887677,
"grad_norm": 0.47881209756399007,
"learning_rate": 1.3006347374495097e-05,
"loss": 0.0601,
"mean_token_accuracy": 0.9817553579807281,
"num_tokens": 21487331.0,
"step": 800
},
{
"epoch": 2.508580343213729,
"grad_norm": 0.5246365795702795,
"learning_rate": 1.2948643969994231e-05,
"loss": 0.0606,
"mean_token_accuracy": 0.9815096974372863,
"num_tokens": 21621528.0,
"step": 805
},
{
"epoch": 2.5241809672386895,
"grad_norm": 0.5184181249911959,
"learning_rate": 1.2890940565493366e-05,
"loss": 0.0597,
"mean_token_accuracy": 0.9815921604633331,
"num_tokens": 21759899.0,
"step": 810
},
{
"epoch": 2.5397815912636506,
"grad_norm": 0.4546161425335325,
"learning_rate": 1.28332371609925e-05,
"loss": 0.0586,
"mean_token_accuracy": 0.9819862425327301,
"num_tokens": 21895472.0,
"step": 815
},
{
"epoch": 2.5553822152886116,
"grad_norm": 0.5215846680122781,
"learning_rate": 1.2775533756491634e-05,
"loss": 0.0584,
"mean_token_accuracy": 0.9823523163795471,
"num_tokens": 22028234.0,
"step": 820
},
{
"epoch": 2.5709828393135723,
"grad_norm": 0.5953777316721176,
"learning_rate": 1.2717830351990768e-05,
"loss": 0.0577,
"mean_token_accuracy": 0.9820267677307128,
"num_tokens": 22162546.0,
"step": 825
},
{
"epoch": 2.5865834633385334,
"grad_norm": 0.5025426378809432,
"learning_rate": 1.2660126947489903e-05,
"loss": 0.0585,
"mean_token_accuracy": 0.9818842828273773,
"num_tokens": 22295641.0,
"step": 830
},
{
"epoch": 2.6021840873634945,
"grad_norm": 0.5039953498707387,
"learning_rate": 1.2602423542989037e-05,
"loss": 0.0598,
"mean_token_accuracy": 0.9823204934597015,
"num_tokens": 22428467.0,
"step": 835
},
{
"epoch": 2.6177847113884556,
"grad_norm": 0.5504893854668644,
"learning_rate": 1.2544720138488171e-05,
"loss": 0.0584,
"mean_token_accuracy": 0.9822833776473999,
"num_tokens": 22565528.0,
"step": 840
},
{
"epoch": 2.6333853354134167,
"grad_norm": 0.5661795657841351,
"learning_rate": 1.2487016733987306e-05,
"loss": 0.058,
"mean_token_accuracy": 0.9823375225067139,
"num_tokens": 22698845.0,
"step": 845
},
{
"epoch": 2.6489859594383773,
"grad_norm": 0.5613797362503292,
"learning_rate": 1.2429313329486442e-05,
"loss": 0.0584,
"mean_token_accuracy": 0.9821543335914612,
"num_tokens": 22833219.0,
"step": 850
},
{
"epoch": 2.6645865834633384,
"grad_norm": 0.5774656256827283,
"learning_rate": 1.2371609924985576e-05,
"loss": 0.0561,
"mean_token_accuracy": 0.9826458513736724,
"num_tokens": 22964040.0,
"step": 855
},
{
"epoch": 2.6801872074882995,
"grad_norm": 0.482343196208055,
"learning_rate": 1.231390652048471e-05,
"loss": 0.0543,
"mean_token_accuracy": 0.9835286378860474,
"num_tokens": 23101855.0,
"step": 860
},
{
"epoch": 2.6957878315132606,
"grad_norm": 0.5526473196570735,
"learning_rate": 1.2256203115983845e-05,
"loss": 0.058,
"mean_token_accuracy": 0.9824776828289032,
"num_tokens": 23238097.0,
"step": 865
},
{
"epoch": 2.7113884555382217,
"grad_norm": 0.5840397892589643,
"learning_rate": 1.2198499711482979e-05,
"loss": 0.0557,
"mean_token_accuracy": 0.9830466687679291,
"num_tokens": 23372611.0,
"step": 870
},
{
"epoch": 2.7269890795631824,
"grad_norm": 0.46214768163185344,
"learning_rate": 1.2140796306982112e-05,
"loss": 0.0548,
"mean_token_accuracy": 0.9835707128047944,
"num_tokens": 23508539.0,
"step": 875
},
{
"epoch": 2.7425897035881435,
"grad_norm": 0.5827338732441717,
"learning_rate": 1.2083092902481248e-05,
"loss": 0.0553,
"mean_token_accuracy": 0.9831783056259156,
"num_tokens": 23644764.0,
"step": 880
},
{
"epoch": 2.7581903276131046,
"grad_norm": 0.5208245768516603,
"learning_rate": 1.2025389497980382e-05,
"loss": 0.0532,
"mean_token_accuracy": 0.9844220519065857,
"num_tokens": 23782159.0,
"step": 885
},
{
"epoch": 2.7737909516380657,
"grad_norm": 0.4956082222868665,
"learning_rate": 1.1967686093479516e-05,
"loss": 0.0542,
"mean_token_accuracy": 0.9836461126804352,
"num_tokens": 23912494.0,
"step": 890
},
{
"epoch": 2.7893915756630268,
"grad_norm": 0.557876314767124,
"learning_rate": 1.190998268897865e-05,
"loss": 0.0557,
"mean_token_accuracy": 0.9829520642757416,
"num_tokens": 24043095.0,
"step": 895
},
{
"epoch": 2.8049921996879874,
"grad_norm": 0.4530892797848299,
"learning_rate": 1.1852279284477785e-05,
"loss": 0.0551,
"mean_token_accuracy": 0.9832406878471375,
"num_tokens": 24173768.0,
"step": 900
},
{
"epoch": 2.8205928237129485,
"grad_norm": 0.5120835399081769,
"learning_rate": 1.1794575879976919e-05,
"loss": 0.054,
"mean_token_accuracy": 0.9839985728263855,
"num_tokens": 24311082.0,
"step": 905
},
{
"epoch": 2.8361934477379096,
"grad_norm": 0.5699193393236468,
"learning_rate": 1.1736872475476052e-05,
"loss": 0.054,
"mean_token_accuracy": 0.9838815748691558,
"num_tokens": 24446211.0,
"step": 910
},
{
"epoch": 2.8517940717628703,
"grad_norm": 0.5124623073225754,
"learning_rate": 1.167916907097519e-05,
"loss": 0.0512,
"mean_token_accuracy": 0.9842207610607148,
"num_tokens": 24580329.0,
"step": 915
},
{
"epoch": 2.8673946957878313,
"grad_norm": 0.5621277019606149,
"learning_rate": 1.1621465666474323e-05,
"loss": 0.054,
"mean_token_accuracy": 0.9836201250553132,
"num_tokens": 24711881.0,
"step": 920
},
{
"epoch": 2.8829953198127924,
"grad_norm": 0.5477814912496453,
"learning_rate": 1.1563762261973457e-05,
"loss": 0.0495,
"mean_token_accuracy": 0.9848583757877349,
"num_tokens": 24847049.0,
"step": 925
},
{
"epoch": 2.8985959438377535,
"grad_norm": 0.47455570945887876,
"learning_rate": 1.1506058857472593e-05,
"loss": 0.0508,
"mean_token_accuracy": 0.9842885196208954,
"num_tokens": 24980229.0,
"step": 930
},
{
"epoch": 2.9141965678627146,
"grad_norm": 0.4687139530424816,
"learning_rate": 1.1448355452971726e-05,
"loss": 0.0524,
"mean_token_accuracy": 0.9845821619033813,
"num_tokens": 25114675.0,
"step": 935
},
{
"epoch": 2.9297971918876753,
"grad_norm": 0.5660748619715202,
"learning_rate": 1.139065204847086e-05,
"loss": 0.0482,
"mean_token_accuracy": 0.9855000972747803,
"num_tokens": 25252634.0,
"step": 940
},
{
"epoch": 2.9453978159126364,
"grad_norm": 0.4854573299925896,
"learning_rate": 1.1332948643969994e-05,
"loss": 0.052,
"mean_token_accuracy": 0.9846445024013519,
"num_tokens": 25388533.0,
"step": 945
},
{
"epoch": 2.9609984399375975,
"grad_norm": 0.5047346397846462,
"learning_rate": 1.127524523946913e-05,
"loss": 0.0524,
"mean_token_accuracy": 0.9843859314918518,
"num_tokens": 25523721.0,
"step": 950
},
{
"epoch": 2.9765990639625586,
"grad_norm": 0.5165845784231596,
"learning_rate": 1.1217541834968263e-05,
"loss": 0.0494,
"mean_token_accuracy": 0.9851727843284607,
"num_tokens": 25655853.0,
"step": 955
},
{
"epoch": 2.9921996879875197,
"grad_norm": 0.49095039440654903,
"learning_rate": 1.1159838430467397e-05,
"loss": 0.0518,
"mean_token_accuracy": 0.9850000500679016,
"num_tokens": 25792877.0,
"step": 960
},
{
"epoch": 3.0062402496099843,
"grad_norm": 0.3490677066256782,
"learning_rate": 1.1102135025966534e-05,
"loss": 0.0362,
"mean_token_accuracy": 0.9879513912730746,
"num_tokens": 25915060.0,
"step": 965
},
{
"epoch": 3.0218408736349454,
"grad_norm": 0.9913523718070499,
"learning_rate": 1.1044431621465668e-05,
"loss": 0.0362,
"mean_token_accuracy": 0.9894399225711823,
"num_tokens": 26050591.0,
"step": 970
},
{
"epoch": 3.0374414976599065,
"grad_norm": 0.5071611911954856,
"learning_rate": 1.0986728216964802e-05,
"loss": 0.0364,
"mean_token_accuracy": 0.9891938328742981,
"num_tokens": 26184429.0,
"step": 975
},
{
"epoch": 3.0530421216848675,
"grad_norm": 0.44176530408204834,
"learning_rate": 1.0929024812463936e-05,
"loss": 0.0358,
"mean_token_accuracy": 0.9892865777015686,
"num_tokens": 26317217.0,
"step": 980
},
{
"epoch": 3.068642745709828,
"grad_norm": 0.4474135118791752,
"learning_rate": 1.0871321407963071e-05,
"loss": 0.0359,
"mean_token_accuracy": 0.9892495989799499,
"num_tokens": 26448145.0,
"step": 985
},
{
"epoch": 3.0842433697347893,
"grad_norm": 0.4469912153610548,
"learning_rate": 1.0813618003462205e-05,
"loss": 0.0355,
"mean_token_accuracy": 0.9895004034042358,
"num_tokens": 26583779.0,
"step": 990
},
{
"epoch": 3.0998439937597504,
"grad_norm": 0.3796984229372117,
"learning_rate": 1.0755914598961339e-05,
"loss": 0.0354,
"mean_token_accuracy": 0.9895397126674652,
"num_tokens": 26717224.0,
"step": 995
},
{
"epoch": 3.1154446177847115,
"grad_norm": 0.48554097245214073,
"learning_rate": 1.0698211194460474e-05,
"loss": 0.0352,
"mean_token_accuracy": 0.9896060526371002,
"num_tokens": 26851803.0,
"step": 1000
},
{
"epoch": 3.1310452418096726,
"grad_norm": 0.4204735666395639,
"learning_rate": 1.0640507789959608e-05,
"loss": 0.0349,
"mean_token_accuracy": 0.9891953945159913,
"num_tokens": 26984272.0,
"step": 1005
},
{
"epoch": 3.1466458658346332,
"grad_norm": 0.43388479747872655,
"learning_rate": 1.0582804385458742e-05,
"loss": 0.0349,
"mean_token_accuracy": 0.9893825590610504,
"num_tokens": 27118280.0,
"step": 1010
},
{
"epoch": 3.1622464898595943,
"grad_norm": 0.5126608366935945,
"learning_rate": 1.0525100980957876e-05,
"loss": 0.0363,
"mean_token_accuracy": 0.9891520202159881,
"num_tokens": 27249856.0,
"step": 1015
},
{
"epoch": 3.1778471138845554,
"grad_norm": 0.4022500904809516,
"learning_rate": 1.0467397576457013e-05,
"loss": 0.0343,
"mean_token_accuracy": 0.9896589994430542,
"num_tokens": 27383606.0,
"step": 1020
},
{
"epoch": 3.1934477379095165,
"grad_norm": 0.36763857285465273,
"learning_rate": 1.0409694171956147e-05,
"loss": 0.0356,
"mean_token_accuracy": 0.9893875420093536,
"num_tokens": 27514324.0,
"step": 1025
},
{
"epoch": 3.209048361934477,
"grad_norm": 0.3761184120835254,
"learning_rate": 1.035199076745528e-05,
"loss": 0.0329,
"mean_token_accuracy": 0.9900339007377624,
"num_tokens": 27651167.0,
"step": 1030
},
{
"epoch": 3.2246489859594383,
"grad_norm": 0.4170183760865521,
"learning_rate": 1.0294287362954416e-05,
"loss": 0.0339,
"mean_token_accuracy": 0.9897565901279449,
"num_tokens": 27787041.0,
"step": 1035
},
{
"epoch": 3.2402496099843994,
"grad_norm": 0.3432109707003376,
"learning_rate": 1.023658395845355e-05,
"loss": 0.0332,
"mean_token_accuracy": 0.9902929067611694,
"num_tokens": 27922540.0,
"step": 1040
},
{
"epoch": 3.2558502340093605,
"grad_norm": 0.34743178714773904,
"learning_rate": 1.0178880553952684e-05,
"loss": 0.0338,
"mean_token_accuracy": 0.9896102547645569,
"num_tokens": 28054161.0,
"step": 1045
},
{
"epoch": 3.2714508580343216,
"grad_norm": 0.46566002269683127,
"learning_rate": 1.0121177149451817e-05,
"loss": 0.0349,
"mean_token_accuracy": 0.9895286440849305,
"num_tokens": 28188801.0,
"step": 1050
},
{
"epoch": 3.287051482059282,
"grad_norm": 0.3383493517355259,
"learning_rate": 1.0063473744950953e-05,
"loss": 0.0331,
"mean_token_accuracy": 0.9901463866233826,
"num_tokens": 28323390.0,
"step": 1055
},
{
"epoch": 3.3026521060842433,
"grad_norm": 0.3890941341577385,
"learning_rate": 1.0005770340450087e-05,
"loss": 0.0338,
"mean_token_accuracy": 0.9900007307529449,
"num_tokens": 28461086.0,
"step": 1060
},
{
"epoch": 3.3182527301092044,
"grad_norm": 0.8722465278924197,
"learning_rate": 9.948066935949222e-06,
"loss": 0.0344,
"mean_token_accuracy": 0.9897770345211029,
"num_tokens": 28595624.0,
"step": 1065
},
{
"epoch": 3.3338533541341655,
"grad_norm": 0.3068180815070705,
"learning_rate": 9.890363531448356e-06,
"loss": 0.0341,
"mean_token_accuracy": 0.9896463751792908,
"num_tokens": 28729146.0,
"step": 1070
},
{
"epoch": 3.3494539781591266,
"grad_norm": 0.405693055582265,
"learning_rate": 9.832660126947491e-06,
"loss": 0.0345,
"mean_token_accuracy": 0.9897280871868134,
"num_tokens": 28863209.0,
"step": 1075
},
{
"epoch": 3.3650546021840873,
"grad_norm": 0.3651986120751572,
"learning_rate": 9.774956722446625e-06,
"loss": 0.0337,
"mean_token_accuracy": 0.9899032473564148,
"num_tokens": 28997560.0,
"step": 1080
},
{
"epoch": 3.3806552262090483,
"grad_norm": 0.4655638046551388,
"learning_rate": 9.717253317945759e-06,
"loss": 0.0336,
"mean_token_accuracy": 0.9898359596729278,
"num_tokens": 29132738.0,
"step": 1085
},
{
"epoch": 3.3962558502340094,
"grad_norm": 0.390597341957638,
"learning_rate": 9.659549913444894e-06,
"loss": 0.0342,
"mean_token_accuracy": 0.9898122906684875,
"num_tokens": 29266954.0,
"step": 1090
},
{
"epoch": 3.4118564742589705,
"grad_norm": 0.48861692751948993,
"learning_rate": 9.601846508944028e-06,
"loss": 0.0338,
"mean_token_accuracy": 0.9897624969482421,
"num_tokens": 29398154.0,
"step": 1095
},
{
"epoch": 3.427457098283931,
"grad_norm": 0.3811068399441777,
"learning_rate": 9.544143104443164e-06,
"loss": 0.0327,
"mean_token_accuracy": 0.9900210738182068,
"num_tokens": 29531832.0,
"step": 1100
},
{
"epoch": 3.4430577223088923,
"grad_norm": 0.36698461790623293,
"learning_rate": 9.486439699942298e-06,
"loss": 0.0335,
"mean_token_accuracy": 0.9899003326892852,
"num_tokens": 29666458.0,
"step": 1105
},
{
"epoch": 3.4586583463338534,
"grad_norm": 0.3725030509215407,
"learning_rate": 9.428736295441431e-06,
"loss": 0.0337,
"mean_token_accuracy": 0.9900450468063354,
"num_tokens": 29802111.0,
"step": 1110
},
{
"epoch": 3.4742589703588145,
"grad_norm": 0.4175587860110882,
"learning_rate": 9.371032890940567e-06,
"loss": 0.0333,
"mean_token_accuracy": 0.9899795711040497,
"num_tokens": 29938336.0,
"step": 1115
},
{
"epoch": 3.489859594383775,
"grad_norm": 0.9192949340801146,
"learning_rate": 9.3133294864397e-06,
"loss": 0.0332,
"mean_token_accuracy": 0.9900984108448029,
"num_tokens": 30070284.0,
"step": 1120
},
{
"epoch": 3.5054602184087362,
"grad_norm": 0.4382238438396383,
"learning_rate": 9.255626081938836e-06,
"loss": 0.0334,
"mean_token_accuracy": 0.9900668442249299,
"num_tokens": 30201902.0,
"step": 1125
},
{
"epoch": 3.5210608424336973,
"grad_norm": 0.4415595784740572,
"learning_rate": 9.19792267743797e-06,
"loss": 0.0345,
"mean_token_accuracy": 0.9897962033748626,
"num_tokens": 30335421.0,
"step": 1130
},
{
"epoch": 3.5366614664586584,
"grad_norm": 0.4103246672411594,
"learning_rate": 9.140219272937104e-06,
"loss": 0.0323,
"mean_token_accuracy": 0.9902926802635192,
"num_tokens": 30469531.0,
"step": 1135
},
{
"epoch": 3.5522620904836195,
"grad_norm": 0.3613436635984974,
"learning_rate": 9.082515868436237e-06,
"loss": 0.0312,
"mean_token_accuracy": 0.9908117353916168,
"num_tokens": 30608253.0,
"step": 1140
},
{
"epoch": 3.56786271450858,
"grad_norm": 0.6129262337079979,
"learning_rate": 9.024812463935373e-06,
"loss": 0.0316,
"mean_token_accuracy": 0.990767502784729,
"num_tokens": 30744344.0,
"step": 1145
},
{
"epoch": 3.5834633385335413,
"grad_norm": 0.3718642406716124,
"learning_rate": 8.967109059434508e-06,
"loss": 0.0332,
"mean_token_accuracy": 0.9901099085807801,
"num_tokens": 30882716.0,
"step": 1150
},
{
"epoch": 3.5990639625585024,
"grad_norm": 0.3888395053037814,
"learning_rate": 8.909405654933642e-06,
"loss": 0.0328,
"mean_token_accuracy": 0.9903294146060944,
"num_tokens": 31019880.0,
"step": 1155
},
{
"epoch": 3.6146645865834635,
"grad_norm": 0.3270250852564694,
"learning_rate": 8.851702250432776e-06,
"loss": 0.0321,
"mean_token_accuracy": 0.9901081144809722,
"num_tokens": 31150261.0,
"step": 1160
},
{
"epoch": 3.6302652106084246,
"grad_norm": 0.3651680490380126,
"learning_rate": 8.79399884593191e-06,
"loss": 0.033,
"mean_token_accuracy": 0.9901183784008026,
"num_tokens": 31285179.0,
"step": 1165
},
{
"epoch": 3.645865834633385,
"grad_norm": 0.3835485037750142,
"learning_rate": 8.736295441431045e-06,
"loss": 0.034,
"mean_token_accuracy": 0.9899890661239624,
"num_tokens": 31420811.0,
"step": 1170
},
{
"epoch": 3.6614664586583463,
"grad_norm": 0.3907588292879908,
"learning_rate": 8.678592036930179e-06,
"loss": 0.0322,
"mean_token_accuracy": 0.9904151439666748,
"num_tokens": 31557161.0,
"step": 1175
},
{
"epoch": 3.6770670826833074,
"grad_norm": 0.3507936183329917,
"learning_rate": 8.620888632429315e-06,
"loss": 0.0335,
"mean_token_accuracy": 0.9902589380741119,
"num_tokens": 31687531.0,
"step": 1180
},
{
"epoch": 3.6926677067082685,
"grad_norm": 0.36813793522661714,
"learning_rate": 8.563185227928448e-06,
"loss": 0.0312,
"mean_token_accuracy": 0.9907920718193054,
"num_tokens": 31823866.0,
"step": 1185
},
{
"epoch": 3.7082683307332296,
"grad_norm": 0.3774995579093435,
"learning_rate": 8.505481823427582e-06,
"loss": 0.0311,
"mean_token_accuracy": 0.990852290391922,
"num_tokens": 31959926.0,
"step": 1190
},
{
"epoch": 3.7238689547581902,
"grad_norm": 0.37260395664452817,
"learning_rate": 8.447778418926718e-06,
"loss": 0.0322,
"mean_token_accuracy": 0.9901103973388672,
"num_tokens": 32095024.0,
"step": 1195
},
{
"epoch": 3.7394695787831513,
"grad_norm": 0.4010288846696042,
"learning_rate": 8.390075014425851e-06,
"loss": 0.0315,
"mean_token_accuracy": 0.9906571626663208,
"num_tokens": 32231044.0,
"step": 1200
},
{
"epoch": 3.7550702028081124,
"grad_norm": 1.096838169906387,
"learning_rate": 8.332371609924987e-06,
"loss": 0.0307,
"mean_token_accuracy": 0.9910696685314179,
"num_tokens": 32368150.0,
"step": 1205
},
{
"epoch": 3.770670826833073,
"grad_norm": 0.37562066364740404,
"learning_rate": 8.27466820542412e-06,
"loss": 0.0313,
"mean_token_accuracy": 0.9904199957847595,
"num_tokens": 32500610.0,
"step": 1210
},
{
"epoch": 3.786271450858034,
"grad_norm": 0.3830733284662822,
"learning_rate": 8.216964800923255e-06,
"loss": 0.0321,
"mean_token_accuracy": 0.9904833734035492,
"num_tokens": 32635303.0,
"step": 1215
},
{
"epoch": 3.8018720748829953,
"grad_norm": 0.3091988680492327,
"learning_rate": 8.15926139642239e-06,
"loss": 0.0312,
"mean_token_accuracy": 0.9907013535499573,
"num_tokens": 32771586.0,
"step": 1220
},
{
"epoch": 3.8174726989079564,
"grad_norm": 0.3312248899119601,
"learning_rate": 8.101557991921524e-06,
"loss": 0.0297,
"mean_token_accuracy": 0.9907315850257874,
"num_tokens": 32908003.0,
"step": 1225
},
{
"epoch": 3.8330733229329175,
"grad_norm": 0.348853963998793,
"learning_rate": 8.04385458742066e-06,
"loss": 0.0309,
"mean_token_accuracy": 0.9906924843788147,
"num_tokens": 33042994.0,
"step": 1230
},
{
"epoch": 3.848673946957878,
"grad_norm": 0.4198481215223838,
"learning_rate": 7.986151182919793e-06,
"loss": 0.0322,
"mean_token_accuracy": 0.9902994751930236,
"num_tokens": 33175265.0,
"step": 1235
},
{
"epoch": 3.864274570982839,
"grad_norm": 0.39686447444012835,
"learning_rate": 7.928447778418927e-06,
"loss": 0.0315,
"mean_token_accuracy": 0.9903347849845886,
"num_tokens": 33308175.0,
"step": 1240
},
{
"epoch": 3.8798751950078003,
"grad_norm": 0.3428618913867458,
"learning_rate": 7.870744373918062e-06,
"loss": 0.0305,
"mean_token_accuracy": 0.9907864332199097,
"num_tokens": 33444560.0,
"step": 1245
},
{
"epoch": 3.8954758190327614,
"grad_norm": 0.41036568455175115,
"learning_rate": 7.813040969417196e-06,
"loss": 0.0312,
"mean_token_accuracy": 0.9906459391117096,
"num_tokens": 33577614.0,
"step": 1250
},
{
"epoch": 3.9110764430577225,
"grad_norm": 0.37505675085074575,
"learning_rate": 7.755337564916332e-06,
"loss": 0.0307,
"mean_token_accuracy": 0.9909157037734986,
"num_tokens": 33711210.0,
"step": 1255
},
{
"epoch": 3.926677067082683,
"grad_norm": 0.3021326967543721,
"learning_rate": 7.697634160415465e-06,
"loss": 0.0298,
"mean_token_accuracy": 0.9911834299564362,
"num_tokens": 33843044.0,
"step": 1260
},
{
"epoch": 3.9422776911076443,
"grad_norm": 0.2580343755150667,
"learning_rate": 7.6399307559146e-06,
"loss": 0.0301,
"mean_token_accuracy": 0.991005277633667,
"num_tokens": 33976958.0,
"step": 1265
},
{
"epoch": 3.9578783151326054,
"grad_norm": 0.32406819404307124,
"learning_rate": 7.582227351413735e-06,
"loss": 0.03,
"mean_token_accuracy": 0.9909539341926574,
"num_tokens": 34112537.0,
"step": 1270
},
{
"epoch": 3.9734789391575664,
"grad_norm": 0.4072364555030912,
"learning_rate": 7.5245239469128685e-06,
"loss": 0.0299,
"mean_token_accuracy": 0.9912158846855164,
"num_tokens": 34247507.0,
"step": 1275
},
{
"epoch": 3.9890795631825275,
"grad_norm": 0.34177409995444047,
"learning_rate": 7.466820542412003e-06,
"loss": 0.0294,
"mean_token_accuracy": 0.9913082957267761,
"num_tokens": 34383614.0,
"step": 1280
},
{
"epoch": 4.003120124804992,
"grad_norm": 0.2087928113819105,
"learning_rate": 7.409117137911137e-06,
"loss": 0.0258,
"mean_token_accuracy": 0.9916192359394498,
"num_tokens": 34505394.0,
"step": 1285
},
{
"epoch": 4.018720748829953,
"grad_norm": 0.22015089497171852,
"learning_rate": 7.351413733410272e-06,
"loss": 0.0261,
"mean_token_accuracy": 0.9923950254917144,
"num_tokens": 34639693.0,
"step": 1290
},
{
"epoch": 4.034321372854914,
"grad_norm": 0.2460585690565452,
"learning_rate": 7.293710328909405e-06,
"loss": 0.0267,
"mean_token_accuracy": 0.9919693648815155,
"num_tokens": 34770892.0,
"step": 1295
},
{
"epoch": 4.049921996879875,
"grad_norm": 0.2822290499571362,
"learning_rate": 7.236006924408541e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.9924468576908112,
"num_tokens": 34908168.0,
"step": 1300
},
{
"epoch": 4.0655226209048365,
"grad_norm": 0.25669589512793606,
"learning_rate": 7.1783035199076755e-06,
"loss": 0.0255,
"mean_token_accuracy": 0.9925323188304901,
"num_tokens": 35044647.0,
"step": 1305
},
{
"epoch": 4.081123244929797,
"grad_norm": 0.30119454787041633,
"learning_rate": 7.120600115406809e-06,
"loss": 0.0264,
"mean_token_accuracy": 0.9920835256576538,
"num_tokens": 35177022.0,
"step": 1310
},
{
"epoch": 4.096723868954758,
"grad_norm": 0.2836926501079029,
"learning_rate": 7.062896710905944e-06,
"loss": 0.0261,
"mean_token_accuracy": 0.9923124372959137,
"num_tokens": 35312214.0,
"step": 1315
},
{
"epoch": 4.112324492979719,
"grad_norm": 0.2796025997070368,
"learning_rate": 7.005193306405078e-06,
"loss": 0.0261,
"mean_token_accuracy": 0.9922410726547242,
"num_tokens": 35445108.0,
"step": 1320
},
{
"epoch": 4.12792511700468,
"grad_norm": 0.23863636949043232,
"learning_rate": 6.947489901904213e-06,
"loss": 0.0259,
"mean_token_accuracy": 0.9924528360366821,
"num_tokens": 35578641.0,
"step": 1325
},
{
"epoch": 4.1435257410296416,
"grad_norm": 0.2319631078574882,
"learning_rate": 6.889786497403347e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.9922460615634918,
"num_tokens": 35712950.0,
"step": 1330
},
{
"epoch": 4.159126365054602,
"grad_norm": 0.22755436684672453,
"learning_rate": 6.832083092902482e-06,
"loss": 0.0261,
"mean_token_accuracy": 0.992229801416397,
"num_tokens": 35844696.0,
"step": 1335
},
{
"epoch": 4.174726989079563,
"grad_norm": 0.27015899956441786,
"learning_rate": 6.774379688401616e-06,
"loss": 0.0256,
"mean_token_accuracy": 0.992426085472107,
"num_tokens": 35981751.0,
"step": 1340
},
{
"epoch": 4.190327613104524,
"grad_norm": 0.1982276144930778,
"learning_rate": 6.71667628390075e-06,
"loss": 0.0258,
"mean_token_accuracy": 0.9924410998821258,
"num_tokens": 36115684.0,
"step": 1345
},
{
"epoch": 4.205928237129485,
"grad_norm": 0.25665434029238543,
"learning_rate": 6.658972879399886e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.992529320716858,
"num_tokens": 36251146.0,
"step": 1350
},
{
"epoch": 4.221528861154447,
"grad_norm": 0.20742945875597904,
"learning_rate": 6.601269474899019e-06,
"loss": 0.0255,
"mean_token_accuracy": 0.9925081074237824,
"num_tokens": 36387342.0,
"step": 1355
},
{
"epoch": 4.237129485179407,
"grad_norm": 0.20830237441174027,
"learning_rate": 6.543566070398154e-06,
"loss": 0.0256,
"mean_token_accuracy": 0.99239182472229,
"num_tokens": 36522199.0,
"step": 1360
},
{
"epoch": 4.252730109204368,
"grad_norm": 0.18329148966652772,
"learning_rate": 6.485862665897289e-06,
"loss": 0.0252,
"mean_token_accuracy": 0.9924617230892181,
"num_tokens": 36659905.0,
"step": 1365
},
{
"epoch": 4.268330733229329,
"grad_norm": 0.2210417236492713,
"learning_rate": 6.4281592613964225e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.9924541592597962,
"num_tokens": 36794596.0,
"step": 1370
},
{
"epoch": 4.28393135725429,
"grad_norm": 0.2450525112386853,
"learning_rate": 6.370455856895558e-06,
"loss": 0.026,
"mean_token_accuracy": 0.9922269523143769,
"num_tokens": 36927331.0,
"step": 1375
},
{
"epoch": 4.299531981279252,
"grad_norm": 0.2389982156469671,
"learning_rate": 6.312752452394692e-06,
"loss": 0.0255,
"mean_token_accuracy": 0.9925234019756317,
"num_tokens": 37064209.0,
"step": 1380
},
{
"epoch": 4.315132605304212,
"grad_norm": 0.22305148751441065,
"learning_rate": 6.255049047893826e-06,
"loss": 0.0253,
"mean_token_accuracy": 0.9923750519752502,
"num_tokens": 37198863.0,
"step": 1385
},
{
"epoch": 4.330733229329173,
"grad_norm": 0.23036166204016767,
"learning_rate": 6.19734564339296e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.9924868226051331,
"num_tokens": 37333313.0,
"step": 1390
},
{
"epoch": 4.3463338533541345,
"grad_norm": 0.21120793280034894,
"learning_rate": 6.139642238892095e-06,
"loss": 0.0256,
"mean_token_accuracy": 0.9923638105392456,
"num_tokens": 37469908.0,
"step": 1395
},
{
"epoch": 4.361934477379095,
"grad_norm": 0.2543427837048502,
"learning_rate": 6.08193883439123e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.9921638011932373,
"num_tokens": 37605045.0,
"step": 1400
},
{
"epoch": 4.377535101404056,
"grad_norm": 0.2521878430596593,
"learning_rate": 6.024235429890364e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.9922496914863587,
"num_tokens": 37738642.0,
"step": 1405
},
{
"epoch": 4.393135725429017,
"grad_norm": 0.29703714329235237,
"learning_rate": 5.966532025389499e-06,
"loss": 0.0256,
"mean_token_accuracy": 0.9924009621143342,
"num_tokens": 37873937.0,
"step": 1410
},
{
"epoch": 4.408736349453978,
"grad_norm": 0.25917181515112186,
"learning_rate": 5.9088286208886326e-06,
"loss": 0.0251,
"mean_token_accuracy": 0.9925038278102875,
"num_tokens": 38010192.0,
"step": 1415
},
{
"epoch": 4.4243369734789395,
"grad_norm": 0.2729195248404703,
"learning_rate": 5.851125216387767e-06,
"loss": 0.0254,
"mean_token_accuracy": 0.9924300014972687,
"num_tokens": 38144250.0,
"step": 1420
},
{
"epoch": 4.4399375975039,
"grad_norm": 0.19314426189940467,
"learning_rate": 5.793421811886903e-06,
"loss": 0.0254,
"mean_token_accuracy": 0.9922723174095154,
"num_tokens": 38276938.0,
"step": 1425
},
{
"epoch": 4.455538221528861,
"grad_norm": 0.26163600842534557,
"learning_rate": 5.7357184073860365e-06,
"loss": 0.0254,
"mean_token_accuracy": 0.9924465179443359,
"num_tokens": 38413184.0,
"step": 1430
},
{
"epoch": 4.471138845553822,
"grad_norm": 0.2594397682913733,
"learning_rate": 5.678015002885171e-06,
"loss": 0.0257,
"mean_token_accuracy": 0.9922716200351716,
"num_tokens": 38545079.0,
"step": 1435
},
{
"epoch": 4.486739469578783,
"grad_norm": 0.21072322905834717,
"learning_rate": 5.620311598384305e-06,
"loss": 0.0258,
"mean_token_accuracy": 0.9923346698284149,
"num_tokens": 38677670.0,
"step": 1440
},
{
"epoch": 4.502340093603744,
"grad_norm": 0.2456711813003879,
"learning_rate": 5.5626081938834396e-06,
"loss": 0.0258,
"mean_token_accuracy": 0.9918740570545197,
"num_tokens": 38810368.0,
"step": 1445
},
{
"epoch": 4.517940717628705,
"grad_norm": 0.2724680423947134,
"learning_rate": 5.504904789382573e-06,
"loss": 0.0252,
"mean_token_accuracy": 0.9924174904823303,
"num_tokens": 38944806.0,
"step": 1450
},
{
"epoch": 4.533541341653666,
"grad_norm": 0.21008096218444924,
"learning_rate": 5.447201384881709e-06,
"loss": 0.0244,
"mean_token_accuracy": 0.992695277929306,
"num_tokens": 39082214.0,
"step": 1455
},
{
"epoch": 4.549141965678627,
"grad_norm": 0.22916295891537064,
"learning_rate": 5.3894979803808435e-06,
"loss": 0.0247,
"mean_token_accuracy": 0.9923828959465026,
"num_tokens": 39219193.0,
"step": 1460
},
{
"epoch": 4.564742589703588,
"grad_norm": 0.2331878172390347,
"learning_rate": 5.331794575879977e-06,
"loss": 0.025,
"mean_token_accuracy": 0.9924946069717407,
"num_tokens": 39356196.0,
"step": 1465
},
{
"epoch": 4.580343213728549,
"grad_norm": 0.251639715266913,
"learning_rate": 5.274091171379112e-06,
"loss": 0.0259,
"mean_token_accuracy": 0.9923617303371429,
"num_tokens": 39488052.0,
"step": 1470
},
{
"epoch": 4.59594383775351,
"grad_norm": 0.23083713655395227,
"learning_rate": 5.216387766878246e-06,
"loss": 0.0253,
"mean_token_accuracy": 0.9925037145614624,
"num_tokens": 39622300.0,
"step": 1475
},
{
"epoch": 4.611544461778471,
"grad_norm": 0.19731666531160486,
"learning_rate": 5.158684362377381e-06,
"loss": 0.0247,
"mean_token_accuracy": 0.992599630355835,
"num_tokens": 39761796.0,
"step": 1480
},
{
"epoch": 4.627145085803432,
"grad_norm": 0.2673103549266464,
"learning_rate": 5.100980957876515e-06,
"loss": 0.0256,
"mean_token_accuracy": 0.9922535717487335,
"num_tokens": 39895523.0,
"step": 1485
},
{
"epoch": 4.642745709828393,
"grad_norm": 0.19942831397711988,
"learning_rate": 5.04327755337565e-06,
"loss": 0.0246,
"mean_token_accuracy": 0.9923189103603363,
"num_tokens": 40032480.0,
"step": 1490
},
{
"epoch": 4.658346333853354,
"grad_norm": 0.2390543299543101,
"learning_rate": 4.985574148874784e-06,
"loss": 0.0259,
"mean_token_accuracy": 0.992202627658844,
"num_tokens": 40164238.0,
"step": 1495
},
{
"epoch": 4.673946957878315,
"grad_norm": 0.2043619266630321,
"learning_rate": 4.927870744373918e-06,
"loss": 0.0251,
"mean_token_accuracy": 0.9922064006328583,
"num_tokens": 40298050.0,
"step": 1500
},
{
"epoch": 4.689547581903276,
"grad_norm": 0.19900313130075178,
"learning_rate": 4.870167339873053e-06,
"loss": 0.0251,
"mean_token_accuracy": 0.9926720380783081,
"num_tokens": 40432899.0,
"step": 1505
},
{
"epoch": 4.7051482059282375,
"grad_norm": 0.23864486350990322,
"learning_rate": 4.812463935372187e-06,
"loss": 0.025,
"mean_token_accuracy": 0.9923921763896942,
"num_tokens": 40567752.0,
"step": 1510
},
{
"epoch": 4.720748829953198,
"grad_norm": 0.25871353222186716,
"learning_rate": 4.754760530871322e-06,
"loss": 0.0251,
"mean_token_accuracy": 0.9923587918281556,
"num_tokens": 40701848.0,
"step": 1515
},
{
"epoch": 4.736349453978159,
"grad_norm": 0.25605007304500094,
"learning_rate": 4.697057126370456e-06,
"loss": 0.0254,
"mean_token_accuracy": 0.9922918856143952,
"num_tokens": 40836084.0,
"step": 1520
},
{
"epoch": 4.75195007800312,
"grad_norm": 0.20973968723381536,
"learning_rate": 4.6393537218695904e-06,
"loss": 0.0259,
"mean_token_accuracy": 0.9920334756374359,
"num_tokens": 40967976.0,
"step": 1525
},
{
"epoch": 4.767550702028081,
"grad_norm": 0.2721131562347456,
"learning_rate": 4.581650317368725e-06,
"loss": 0.0249,
"mean_token_accuracy": 0.9922464430332184,
"num_tokens": 41101418.0,
"step": 1530
},
{
"epoch": 4.7831513260530425,
"grad_norm": 0.19496771982733335,
"learning_rate": 4.52394691286786e-06,
"loss": 0.0251,
"mean_token_accuracy": 0.9924694299697876,
"num_tokens": 41234419.0,
"step": 1535
},
{
"epoch": 4.798751950078003,
"grad_norm": 0.1856919453879232,
"learning_rate": 4.466243508366994e-06,
"loss": 0.0245,
"mean_token_accuracy": 0.992746913433075,
"num_tokens": 41371203.0,
"step": 1540
},
{
"epoch": 4.814352574102964,
"grad_norm": 0.27502501789045736,
"learning_rate": 4.408540103866128e-06,
"loss": 0.0254,
"mean_token_accuracy": 0.9923159599304199,
"num_tokens": 41505318.0,
"step": 1545
},
{
"epoch": 4.829953198127925,
"grad_norm": 0.24734756683413492,
"learning_rate": 4.350836699365263e-06,
"loss": 0.025,
"mean_token_accuracy": 0.9924296736717224,
"num_tokens": 41640529.0,
"step": 1550
},
{
"epoch": 4.845553822152886,
"grad_norm": 0.2747226907102041,
"learning_rate": 4.2931332948643974e-06,
"loss": 0.0258,
"mean_token_accuracy": 0.9920921742916107,
"num_tokens": 41772590.0,
"step": 1555
},
{
"epoch": 4.8611544461778475,
"grad_norm": 0.1987124720913511,
"learning_rate": 4.235429890363531e-06,
"loss": 0.0246,
"mean_token_accuracy": 0.9926553666591644,
"num_tokens": 41907842.0,
"step": 1560
},
{
"epoch": 4.876755070202808,
"grad_norm": 0.2007681676713943,
"learning_rate": 4.177726485862667e-06,
"loss": 0.025,
"mean_token_accuracy": 0.9924448490142822,
"num_tokens": 42041393.0,
"step": 1565
},
{
"epoch": 4.892355694227769,
"grad_norm": 0.22496355495648218,
"learning_rate": 4.1200230813618005e-06,
"loss": 0.0249,
"mean_token_accuracy": 0.9926417946815491,
"num_tokens": 42175006.0,
"step": 1570
},
{
"epoch": 4.90795631825273,
"grad_norm": 0.20488185006184043,
"learning_rate": 4.062319676860935e-06,
"loss": 0.0247,
"mean_token_accuracy": 0.9925686061382294,
"num_tokens": 42310997.0,
"step": 1575
},
{
"epoch": 4.923556942277691,
"grad_norm": 0.20176175269557198,
"learning_rate": 4.00461627236007e-06,
"loss": 0.0246,
"mean_token_accuracy": 0.9925564587116241,
"num_tokens": 42445041.0,
"step": 1580
},
{
"epoch": 4.939157566302653,
"grad_norm": 0.27843063254870387,
"learning_rate": 3.946912867859204e-06,
"loss": 0.0255,
"mean_token_accuracy": 0.9923796772956848,
"num_tokens": 42577620.0,
"step": 1585
},
{
"epoch": 4.954758190327613,
"grad_norm": 0.1849806436583689,
"learning_rate": 3.889209463358338e-06,
"loss": 0.0252,
"mean_token_accuracy": 0.9921863794326782,
"num_tokens": 42711042.0,
"step": 1590
},
{
"epoch": 4.970358814352574,
"grad_norm": 0.2260995129591358,
"learning_rate": 3.831506058857473e-06,
"loss": 0.025,
"mean_token_accuracy": 0.9923262357711792,
"num_tokens": 42844345.0,
"step": 1595
},
{
"epoch": 4.985959438377535,
"grad_norm": 0.23583146631665872,
"learning_rate": 3.7738026543566075e-06,
"loss": 0.0252,
"mean_token_accuracy": 0.9923614859580994,
"num_tokens": 42975604.0,
"step": 1600
},
{
"epoch": 5.0,
"grad_norm": 0.21933550919111908,
"learning_rate": 3.7160992498557417e-06,
"loss": 0.0221,
"mean_token_accuracy": 0.9925718638632033,
"num_tokens": 43097746.0,
"step": 1605
},
{
"epoch": 5.015600624024961,
"grad_norm": 0.1525090326151516,
"learning_rate": 3.6583958453548764e-06,
"loss": 0.0238,
"mean_token_accuracy": 0.9927153348922729,
"num_tokens": 43232905.0,
"step": 1610
},
{
"epoch": 5.031201248049922,
"grad_norm": 0.20467619947473584,
"learning_rate": 3.6006924408540106e-06,
"loss": 0.0244,
"mean_token_accuracy": 0.9926062643527984,
"num_tokens": 43366038.0,
"step": 1615
},
{
"epoch": 5.046801872074883,
"grad_norm": 0.1360921218261351,
"learning_rate": 3.542989036353145e-06,
"loss": 0.0242,
"mean_token_accuracy": 0.9924696981906891,
"num_tokens": 43499140.0,
"step": 1620
},
{
"epoch": 5.062402496099844,
"grad_norm": 0.15296937067760497,
"learning_rate": 3.4852856318522794e-06,
"loss": 0.0242,
"mean_token_accuracy": 0.9926361858844757,
"num_tokens": 43631085.0,
"step": 1625
},
{
"epoch": 5.078003120124805,
"grad_norm": 0.1310938697562029,
"learning_rate": 3.427582227351414e-06,
"loss": 0.0238,
"mean_token_accuracy": 0.992695951461792,
"num_tokens": 43766631.0,
"step": 1630
},
{
"epoch": 5.093603744149766,
"grad_norm": 0.12923692759517427,
"learning_rate": 3.3698788228505487e-06,
"loss": 0.0241,
"mean_token_accuracy": 0.9928207635879517,
"num_tokens": 43899731.0,
"step": 1635
},
{
"epoch": 5.109204368174727,
"grad_norm": 0.19286616296038664,
"learning_rate": 3.312175418349683e-06,
"loss": 0.0235,
"mean_token_accuracy": 0.9928472936153412,
"num_tokens": 44035950.0,
"step": 1640
},
{
"epoch": 5.124804992199688,
"grad_norm": 0.13601247264012203,
"learning_rate": 3.254472013848817e-06,
"loss": 0.0241,
"mean_token_accuracy": 0.9925517261028289,
"num_tokens": 44169592.0,
"step": 1645
},
{
"epoch": 5.140405616224649,
"grad_norm": 0.12224126619272041,
"learning_rate": 3.196768609347952e-06,
"loss": 0.0239,
"mean_token_accuracy": 0.9928341090679169,
"num_tokens": 44303806.0,
"step": 1650
},
{
"epoch": 5.15600624024961,
"grad_norm": 0.1562685200560613,
"learning_rate": 3.139065204847086e-06,
"loss": 0.0235,
"mean_token_accuracy": 0.9929479837417603,
"num_tokens": 44441856.0,
"step": 1655
},
{
"epoch": 5.171606864274571,
"grad_norm": 0.1769130623237416,
"learning_rate": 3.081361800346221e-06,
"loss": 0.0243,
"mean_token_accuracy": 0.9923935830593109,
"num_tokens": 44574190.0,
"step": 1660
},
{
"epoch": 5.187207488299532,
"grad_norm": 0.13712712227031879,
"learning_rate": 3.0236583958453553e-06,
"loss": 0.0239,
"mean_token_accuracy": 0.9927611231803894,
"num_tokens": 44709889.0,
"step": 1665
},
{
"epoch": 5.202808112324493,
"grad_norm": 0.1270063308272785,
"learning_rate": 2.9659549913444895e-06,
"loss": 0.0241,
"mean_token_accuracy": 0.992726308107376,
"num_tokens": 44842799.0,
"step": 1670
},
{
"epoch": 5.218408736349454,
"grad_norm": 0.2437413697601591,
"learning_rate": 2.908251586843624e-06,
"loss": 0.0243,
"mean_token_accuracy": 0.9925644099712372,
"num_tokens": 44976216.0,
"step": 1675
},
{
"epoch": 5.234009360374415,
"grad_norm": 0.12921566402941945,
"learning_rate": 2.8505481823427584e-06,
"loss": 0.0244,
"mean_token_accuracy": 0.9923673212528229,
"num_tokens": 45108784.0,
"step": 1680
},
{
"epoch": 5.249609984399376,
"grad_norm": 0.13105411576678402,
"learning_rate": 2.7928447778418926e-06,
"loss": 0.0242,
"mean_token_accuracy": 0.9926429033279419,
"num_tokens": 45241655.0,
"step": 1685
},
{
"epoch": 5.265210608424337,
"grad_norm": 0.12608799588120717,
"learning_rate": 2.7351413733410277e-06,
"loss": 0.024,
"mean_token_accuracy": 0.9926675736904145,
"num_tokens": 45375891.0,
"step": 1690
},
{
"epoch": 5.280811232449298,
"grad_norm": 0.12064418233877248,
"learning_rate": 2.677437968840162e-06,
"loss": 0.0243,
"mean_token_accuracy": 0.992779678106308,
"num_tokens": 45507823.0,
"step": 1695
},
{
"epoch": 5.296411856474259,
"grad_norm": 0.17004625510690358,
"learning_rate": 2.619734564339296e-06,
"loss": 0.0245,
"mean_token_accuracy": 0.9925733089447022,
"num_tokens": 45639010.0,
"step": 1700
},
{
"epoch": 5.31201248049922,
"grad_norm": 0.13527413962954762,
"learning_rate": 2.5620311598384307e-06,
"loss": 0.0238,
"mean_token_accuracy": 0.9928280174732208,
"num_tokens": 45773707.0,
"step": 1705
},
{
"epoch": 5.327613104524181,
"grad_norm": 0.15724887606296575,
"learning_rate": 2.504327755337565e-06,
"loss": 0.0238,
"mean_token_accuracy": 0.992822802066803,
"num_tokens": 45909418.0,
"step": 1710
},
{
"epoch": 5.343213728549142,
"grad_norm": 0.12042487098433259,
"learning_rate": 2.4466243508366996e-06,
"loss": 0.0235,
"mean_token_accuracy": 0.9930182099342346,
"num_tokens": 46046532.0,
"step": 1715
},
{
"epoch": 5.358814352574103,
"grad_norm": 0.12420171711829726,
"learning_rate": 2.388920946335834e-06,
"loss": 0.0242,
"mean_token_accuracy": 0.9925850927829742,
"num_tokens": 46180373.0,
"step": 1720
},
{
"epoch": 5.374414976599064,
"grad_norm": 0.13681414490380384,
"learning_rate": 2.3312175418349685e-06,
"loss": 0.0231,
"mean_token_accuracy": 0.9930324614048004,
"num_tokens": 46318628.0,
"step": 1725
},
{
"epoch": 5.390015600624025,
"grad_norm": 0.16276711166895205,
"learning_rate": 2.273514137334103e-06,
"loss": 0.0245,
"mean_token_accuracy": 0.9926711559295655,
"num_tokens": 46449238.0,
"step": 1730
},
{
"epoch": 5.405616224648986,
"grad_norm": 0.15664237651413515,
"learning_rate": 2.2158107328332373e-06,
"loss": 0.024,
"mean_token_accuracy": 0.9928957402706147,
"num_tokens": 46583827.0,
"step": 1735
},
{
"epoch": 5.4212168486739465,
"grad_norm": 0.12142597261174552,
"learning_rate": 2.1581073283323715e-06,
"loss": 0.0245,
"mean_token_accuracy": 0.9924871027469635,
"num_tokens": 46715746.0,
"step": 1740
},
{
"epoch": 5.436817472698908,
"grad_norm": 0.15964824128276517,
"learning_rate": 2.100403923831506e-06,
"loss": 0.0236,
"mean_token_accuracy": 0.9927206993103027,
"num_tokens": 46852344.0,
"step": 1745
},
{
"epoch": 5.452418096723869,
"grad_norm": 0.15611306327636612,
"learning_rate": 2.042700519330641e-06,
"loss": 0.0244,
"mean_token_accuracy": 0.9925235331058502,
"num_tokens": 46983899.0,
"step": 1750
},
{
"epoch": 5.46801872074883,
"grad_norm": 0.14718402845371603,
"learning_rate": 1.984997114829775e-06,
"loss": 0.0233,
"mean_token_accuracy": 0.9928801476955413,
"num_tokens": 47121965.0,
"step": 1755
},
{
"epoch": 5.483619344773791,
"grad_norm": 0.17532809899956678,
"learning_rate": 1.9272937103289097e-06,
"loss": 0.0235,
"mean_token_accuracy": 0.9928483843803406,
"num_tokens": 47259573.0,
"step": 1760
},
{
"epoch": 5.4992199687987515,
"grad_norm": 0.15620539955228097,
"learning_rate": 1.869590305828044e-06,
"loss": 0.0239,
"mean_token_accuracy": 0.9927187144756318,
"num_tokens": 47393619.0,
"step": 1765
},
{
"epoch": 5.514820592823713,
"grad_norm": 0.15452994664407602,
"learning_rate": 1.8118869013271783e-06,
"loss": 0.0233,
"mean_token_accuracy": 0.9930829882621766,
"num_tokens": 47531932.0,
"step": 1770
},
{
"epoch": 5.530421216848674,
"grad_norm": 0.1609193936605019,
"learning_rate": 1.754183496826313e-06,
"loss": 0.0241,
"mean_token_accuracy": 0.9925650417804718,
"num_tokens": 47664935.0,
"step": 1775
},
{
"epoch": 5.546021840873635,
"grad_norm": 0.18069485411822495,
"learning_rate": 1.6964800923254474e-06,
"loss": 0.0237,
"mean_token_accuracy": 0.9928048968315124,
"num_tokens": 47801019.0,
"step": 1780
},
{
"epoch": 5.561622464898596,
"grad_norm": 0.14569556410470508,
"learning_rate": 1.6387766878245816e-06,
"loss": 0.0241,
"mean_token_accuracy": 0.9925419509410858,
"num_tokens": 47933966.0,
"step": 1785
},
{
"epoch": 5.577223088923557,
"grad_norm": 0.13725463666507065,
"learning_rate": 1.5810732833237163e-06,
"loss": 0.0237,
"mean_token_accuracy": 0.9926219820976258,
"num_tokens": 48069691.0,
"step": 1790
},
{
"epoch": 5.592823712948518,
"grad_norm": 0.15563435263990227,
"learning_rate": 1.5233698788228507e-06,
"loss": 0.0249,
"mean_token_accuracy": 0.99224454164505,
"num_tokens": 48198004.0,
"step": 1795
},
{
"epoch": 5.608424336973479,
"grad_norm": 0.19150391237914954,
"learning_rate": 1.4656664743219851e-06,
"loss": 0.0233,
"mean_token_accuracy": 0.9930429100990296,
"num_tokens": 48336169.0,
"step": 1800
},
{
"epoch": 5.62402496099844,
"grad_norm": 0.13266766596594196,
"learning_rate": 1.4079630698211198e-06,
"loss": 0.0239,
"mean_token_accuracy": 0.9926740467548371,
"num_tokens": 48471104.0,
"step": 1805
},
{
"epoch": 5.639625585023401,
"grad_norm": 0.1788362488762517,
"learning_rate": 1.350259665320254e-06,
"loss": 0.0243,
"mean_token_accuracy": 0.99261354804039,
"num_tokens": 48603882.0,
"step": 1810
},
{
"epoch": 5.655226209048362,
"grad_norm": 0.14065816169187795,
"learning_rate": 1.2925562608193884e-06,
"loss": 0.0247,
"mean_token_accuracy": 0.9923071384429931,
"num_tokens": 48733494.0,
"step": 1815
},
{
"epoch": 5.670826833073323,
"grad_norm": 0.17040381288127784,
"learning_rate": 1.2348528563185228e-06,
"loss": 0.0237,
"mean_token_accuracy": 0.9929697871208191,
"num_tokens": 48869301.0,
"step": 1820
},
{
"epoch": 5.686427457098284,
"grad_norm": 0.16017704059144489,
"learning_rate": 1.1771494518176575e-06,
"loss": 0.0238,
"mean_token_accuracy": 0.9927242577075959,
"num_tokens": 49003656.0,
"step": 1825
},
{
"epoch": 5.702028081123245,
"grad_norm": 0.11848368940519473,
"learning_rate": 1.1194460473167917e-06,
"loss": 0.0237,
"mean_token_accuracy": 0.9926847636699676,
"num_tokens": 49139812.0,
"step": 1830
},
{
"epoch": 5.717628705148206,
"grad_norm": 0.1517735016635918,
"learning_rate": 1.0617426428159263e-06,
"loss": 0.0237,
"mean_token_accuracy": 0.9928111970424652,
"num_tokens": 49273201.0,
"step": 1835
},
{
"epoch": 5.733229329173167,
"grad_norm": 0.18940074866811016,
"learning_rate": 1.0040392383150608e-06,
"loss": 0.0235,
"mean_token_accuracy": 0.9927483320236206,
"num_tokens": 49408994.0,
"step": 1840
},
{
"epoch": 5.748829953198128,
"grad_norm": 0.1516633870907396,
"learning_rate": 9.463358338141951e-07,
"loss": 0.0238,
"mean_token_accuracy": 0.9929431319236756,
"num_tokens": 49545733.0,
"step": 1845
},
{
"epoch": 5.764430577223089,
"grad_norm": 0.12054317524665162,
"learning_rate": 8.886324293133296e-07,
"loss": 0.0238,
"mean_token_accuracy": 0.992936760187149,
"num_tokens": 49680062.0,
"step": 1850
},
{
"epoch": 5.78003120124805,
"grad_norm": 0.1370174323773597,
"learning_rate": 8.30929024812464e-07,
"loss": 0.0238,
"mean_token_accuracy": 0.9928847312927246,
"num_tokens": 49814813.0,
"step": 1855
},
{
"epoch": 5.795631825273011,
"grad_norm": 0.14145943543200018,
"learning_rate": 7.732256203115985e-07,
"loss": 0.0235,
"mean_token_accuracy": 0.9928096294403076,
"num_tokens": 49951082.0,
"step": 1860
},
{
"epoch": 5.811232449297972,
"grad_norm": 0.26191584460732914,
"learning_rate": 7.155222158107329e-07,
"loss": 0.0237,
"mean_token_accuracy": 0.992737352848053,
"num_tokens": 50086639.0,
"step": 1865
},
{
"epoch": 5.826833073322933,
"grad_norm": 0.1321209476672431,
"learning_rate": 6.578188113098672e-07,
"loss": 0.0237,
"mean_token_accuracy": 0.9928857266902924,
"num_tokens": 50222788.0,
"step": 1870
},
{
"epoch": 5.842433697347894,
"grad_norm": 0.10316470824727872,
"learning_rate": 6.001154068090018e-07,
"loss": 0.0239,
"mean_token_accuracy": 0.9926657855510712,
"num_tokens": 50355976.0,
"step": 1875
},
{
"epoch": 5.858034321372855,
"grad_norm": 0.13714009102884916,
"learning_rate": 5.424120023081362e-07,
"loss": 0.0237,
"mean_token_accuracy": 0.992606920003891,
"num_tokens": 50491743.0,
"step": 1880
},
{
"epoch": 5.873634945397816,
"grad_norm": 0.16210992403980792,
"learning_rate": 4.847085978072707e-07,
"loss": 0.024,
"mean_token_accuracy": 0.9926069259643555,
"num_tokens": 50624599.0,
"step": 1885
},
{
"epoch": 5.889235569422777,
"grad_norm": 0.14443601980669765,
"learning_rate": 4.270051933064051e-07,
"loss": 0.0241,
"mean_token_accuracy": 0.99270578622818,
"num_tokens": 50758742.0,
"step": 1890
},
{
"epoch": 5.904836193447738,
"grad_norm": 0.1127113758113436,
"learning_rate": 3.6930178880553954e-07,
"loss": 0.024,
"mean_token_accuracy": 0.9925020098686218,
"num_tokens": 50891503.0,
"step": 1895
},
{
"epoch": 5.920436817472699,
"grad_norm": 0.13570504066067704,
"learning_rate": 3.11598384304674e-07,
"loss": 0.0244,
"mean_token_accuracy": 0.9926927983760834,
"num_tokens": 51022815.0,
"step": 1900
},
{
"epoch": 5.9360374414976596,
"grad_norm": 0.14715744807730147,
"learning_rate": 2.5389497980380845e-07,
"loss": 0.0232,
"mean_token_accuracy": 0.993007630109787,
"num_tokens": 51161194.0,
"step": 1905
},
{
"epoch": 5.951638065522621,
"grad_norm": 0.13374492992277562,
"learning_rate": 1.9619157530294288e-07,
"loss": 0.0235,
"mean_token_accuracy": 0.9928073644638061,
"num_tokens": 51297114.0,
"step": 1910
},
{
"epoch": 5.967238689547582,
"grad_norm": 0.14188936039069744,
"learning_rate": 1.3848817080207733e-07,
"loss": 0.0233,
"mean_token_accuracy": 0.9929638624191284,
"num_tokens": 51434451.0,
"step": 1915
},
{
"epoch": 5.982839313572543,
"grad_norm": 0.1212138838070634,
"learning_rate": 8.078476630121177e-08,
"loss": 0.0235,
"mean_token_accuracy": 0.9928582549095154,
"num_tokens": 51570117.0,
"step": 1920
},
{
"epoch": 5.998439937597504,
"grad_norm": 0.12760759914906278,
"learning_rate": 2.308136180034622e-08,
"loss": 0.024,
"mean_token_accuracy": 0.9928393125534057,
"num_tokens": 51703514.0,
"step": 1925
},
{
"epoch": 6.0,
"mean_token_accuracy": 0.9926674365997314,
"num_tokens": 51716911.0,
"step": 1926,
"total_flos": 309043048284160.0,
"train_loss": 0.11322491052526659,
"train_runtime": 3926.7142,
"train_samples_per_second": 31.316,
"train_steps_per_second": 0.49
}
],
"logging_steps": 5,
"max_steps": 1926,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 309043048284160.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}