{ "best_metric": 0.293356, "best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v2-20241220-000522/checkpoint-200", "epoch": 9.786516853932584, "eval_steps": 200, "global_step": 440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.76875001, "epoch": 0.02247191011235955, "grad_norm": 29.44770175695366, "learning_rate": 0.0, "loss": 1.35673797, "memory(GiB)": 54.56, "step": 1, "train_speed(iter/s)": 0.023627 }, { "acc": 0.77130413, "epoch": 0.11235955056179775, "grad_norm": 17.994017917618898, "learning_rate": 5.206780355557714e-06, "loss": 1.32594085, "memory(GiB)": 73.74, "step": 5, "train_speed(iter/s)": 0.030845 }, { "acc": 0.8387435, "epoch": 0.2247191011235955, "grad_norm": 16.0062785233572, "learning_rate": 7.44921859773347e-06, "loss": 0.88349724, "memory(GiB)": 73.74, "step": 10, "train_speed(iter/s)": 0.032052 }, { "acc": 0.84626484, "epoch": 0.33707865168539325, "grad_norm": 13.905238872281126, "learning_rate": 8.760960879589352e-06, "loss": 0.80604382, "memory(GiB)": 73.74, "step": 15, "train_speed(iter/s)": 0.032459 }, { "acc": 0.8715888, "epoch": 0.449438202247191, "grad_norm": 22.76985259858647, "learning_rate": 9.691656839909223e-06, "loss": 0.72844338, "memory(GiB)": 73.74, "step": 20, "train_speed(iter/s)": 0.032672 }, { "acc": 0.86277018, "epoch": 0.5617977528089888, "grad_norm": 9.485866543732788, "learning_rate": 9.998729227320474e-06, "loss": 0.75610299, "memory(GiB)": 73.74, "step": 25, "train_speed(iter/s)": 0.032803 }, { "acc": 0.87233868, "epoch": 0.6741573033707865, "grad_norm": 6.654731329050647, "learning_rate": 9.990965733615236e-06, "loss": 0.74341016, "memory(GiB)": 73.74, "step": 30, "train_speed(iter/s)": 0.032887 }, { "acc": 0.89718065, "epoch": 0.7865168539325843, "grad_norm": 4.770562351177425, "learning_rate": 9.976155679821608e-06, "loss": 0.56813421, "memory(GiB)": 73.74, "step": 35, "train_speed(iter/s)": 0.032948 }, { "acc": 0.89619627, "epoch": 0.898876404494382, "grad_norm": 8.554885957290168, "learning_rate": 9.954319977802235e-06, "loss": 0.62929983, "memory(GiB)": 73.74, "step": 40, "train_speed(iter/s)": 0.032994 }, { "acc": 0.81503201, "epoch": 1.0, "grad_norm": 5.79442681868791, "learning_rate": 9.93181333636191e-06, "loss": 0.47364488, "memory(GiB)": 73.74, "step": 45, "train_speed(iter/s)": 0.033394 }, { "acc": 0.89444332, "epoch": 1.1123595505617978, "grad_norm": 3.682921265648056, "learning_rate": 9.897415803421993e-06, "loss": 0.58168054, "memory(GiB)": 73.74, "step": 50, "train_speed(iter/s)": 0.033362 }, { "acc": 0.8984375, "epoch": 1.2247191011235956, "grad_norm": 2.535914624102986, "learning_rate": 9.856103803316381e-06, "loss": 0.51347799, "memory(GiB)": 73.74, "step": 55, "train_speed(iter/s)": 0.033359 }, { "acc": 0.89978828, "epoch": 1.3370786516853932, "grad_norm": 5.335117504633301, "learning_rate": 9.807935668775252e-06, "loss": 0.54684973, "memory(GiB)": 73.74, "step": 60, "train_speed(iter/s)": 0.033353 }, { "acc": 0.89867792, "epoch": 1.449438202247191, "grad_norm": 40.42003979452331, "learning_rate": 9.75297941342149e-06, "loss": 0.55415287, "memory(GiB)": 73.74, "step": 65, "train_speed(iter/s)": 0.033349 }, { "acc": 0.90041676, "epoch": 1.5617977528089888, "grad_norm": 1.6426325860542945, "learning_rate": 9.691312635735121e-06, "loss": 0.53396959, "memory(GiB)": 73.74, "step": 70, "train_speed(iter/s)": 0.03335 }, { "acc": 0.90246105, "epoch": 1.6741573033707864, "grad_norm": 4.014056769196663, "learning_rate": 9.623022409483936e-06, "loss": 0.55537992, "memory(GiB)": 73.74, "step": 75, "train_speed(iter/s)": 0.033348 }, { "acc": 0.90795784, "epoch": 1.7865168539325844, "grad_norm": 3.23575776477156, "learning_rate": 9.548205160774915e-06, "loss": 0.52041235, "memory(GiB)": 73.74, "step": 80, "train_speed(iter/s)": 0.033347 }, { "acc": 0.91264877, "epoch": 1.898876404494382, "grad_norm": 2.5399340191963864, "learning_rate": 9.466966531900144e-06, "loss": 0.46114645, "memory(GiB)": 73.74, "step": 85, "train_speed(iter/s)": 0.033347 }, { "acc": 0.82142859, "epoch": 2.0, "grad_norm": 3.6343629668293307, "learning_rate": 9.379421232169423e-06, "loss": 0.45789909, "memory(GiB)": 73.74, "step": 90, "train_speed(iter/s)": 0.03353 }, { "acc": 0.915625, "epoch": 2.1123595505617976, "grad_norm": 1.6179837171031195, "learning_rate": 9.28569287594019e-06, "loss": 0.46370001, "memory(GiB)": 73.74, "step": 95, "train_speed(iter/s)": 0.033507 }, { "acc": 0.90414162, "epoch": 2.2247191011235956, "grad_norm": 5.590072417062122, "learning_rate": 9.185913808073513e-06, "loss": 0.50312757, "memory(GiB)": 73.74, "step": 100, "train_speed(iter/s)": 0.033499 }, { "acc": 0.91226768, "epoch": 2.337078651685393, "grad_norm": 9.564526807649141, "learning_rate": 9.080224917062532e-06, "loss": 0.47500219, "memory(GiB)": 73.74, "step": 105, "train_speed(iter/s)": 0.03349 }, { "acc": 0.91093407, "epoch": 2.449438202247191, "grad_norm": 6.570383478659623, "learning_rate": 8.968775436097282e-06, "loss": 0.52737265, "memory(GiB)": 73.74, "step": 110, "train_speed(iter/s)": 0.033482 }, { "acc": 0.91071424, "epoch": 2.561797752808989, "grad_norm": 1.8655069425175796, "learning_rate": 8.851722732346752e-06, "loss": 0.47163167, "memory(GiB)": 73.74, "step": 115, "train_speed(iter/s)": 0.033476 }, { "acc": 0.91114578, "epoch": 2.6741573033707864, "grad_norm": 2.901011775727484, "learning_rate": 8.729232084755738e-06, "loss": 0.46654186, "memory(GiB)": 73.74, "step": 120, "train_speed(iter/s)": 0.03347 }, { "acc": 0.90819826, "epoch": 2.7865168539325844, "grad_norm": 4.343820784102479, "learning_rate": 8.601476450670227e-06, "loss": 0.53424926, "memory(GiB)": 73.74, "step": 125, "train_speed(iter/s)": 0.033465 }, { "acc": 0.91757097, "epoch": 2.898876404494382, "grad_norm": 3.1842118143777656, "learning_rate": 8.46863622162084e-06, "loss": 0.41356473, "memory(GiB)": 73.74, "step": 130, "train_speed(iter/s)": 0.033459 }, { "acc": 0.82385426, "epoch": 3.0, "grad_norm": 1.975990401101327, "learning_rate": 8.358828943202956e-06, "loss": 0.39099255, "memory(GiB)": 73.74, "step": 135, "train_speed(iter/s)": 0.033578 }, { "acc": 0.9165472, "epoch": 3.1123595505617976, "grad_norm": 2.6917437650551115, "learning_rate": 8.21731377791749e-06, "loss": 0.40746679, "memory(GiB)": 73.74, "step": 140, "train_speed(iter/s)": 0.033561 }, { "acc": 0.92046127, "epoch": 3.2247191011235956, "grad_norm": 5.909642248639357, "learning_rate": 8.071256457091995e-06, "loss": 0.43311391, "memory(GiB)": 73.74, "step": 145, "train_speed(iter/s)": 0.033553 }, { "acc": 0.91113091, "epoch": 3.337078651685393, "grad_norm": 2.6036121532424756, "learning_rate": 7.920863214321187e-06, "loss": 0.44874792, "memory(GiB)": 73.74, "step": 150, "train_speed(iter/s)": 0.033541 }, { "acc": 0.9097435, "epoch": 3.449438202247191, "grad_norm": 6.1713985264762306, "learning_rate": 7.766346405541094e-06, "loss": 0.51926117, "memory(GiB)": 73.74, "step": 155, "train_speed(iter/s)": 0.033535 }, { "acc": 0.92806206, "epoch": 3.561797752808989, "grad_norm": 2.412081700611902, "learning_rate": 7.607924209181516e-06, "loss": 0.38799758, "memory(GiB)": 73.74, "step": 160, "train_speed(iter/s)": 0.033525 }, { "acc": 0.92054138, "epoch": 3.6741573033707864, "grad_norm": 2.1941813592964388, "learning_rate": 7.4458203180971015e-06, "loss": 0.42576523, "memory(GiB)": 73.74, "step": 165, "train_speed(iter/s)": 0.033519 }, { "acc": 0.92455921, "epoch": 3.7865168539325844, "grad_norm": 4.272653895156769, "learning_rate": 7.280263623712031e-06, "loss": 0.41130261, "memory(GiB)": 73.74, "step": 170, "train_speed(iter/s)": 0.033513 }, { "acc": 0.91369047, "epoch": 3.898876404494382, "grad_norm": 2.8977700340263075, "learning_rate": 7.111487892824296e-06, "loss": 0.48630247, "memory(GiB)": 73.74, "step": 175, "train_speed(iter/s)": 0.033507 }, { "acc": 0.82516022, "epoch": 4.0, "grad_norm": 3.391382449679615, "learning_rate": 6.93973143752592e-06, "loss": 0.38337431, "memory(GiB)": 73.74, "step": 180, "train_speed(iter/s)": 0.033594 }, { "acc": 0.92612181, "epoch": 4.112359550561798, "grad_norm": 2.135848118924622, "learning_rate": 6.765236778705218e-06, "loss": 0.38170204, "memory(GiB)": 73.74, "step": 185, "train_speed(iter/s)": 0.03358 }, { "acc": 0.92732372, "epoch": 4.224719101123595, "grad_norm": 3.8786327338817457, "learning_rate": 6.588250303606212e-06, "loss": 0.39752412, "memory(GiB)": 73.74, "step": 190, "train_speed(iter/s)": 0.033573 }, { "acc": 0.93400297, "epoch": 4.337078651685394, "grad_norm": 2.813459786292316, "learning_rate": 6.409021917928728e-06, "loss": 0.36603143, "memory(GiB)": 73.74, "step": 195, "train_speed(iter/s)": 0.033567 }, { "acc": 0.91639881, "epoch": 4.449438202247191, "grad_norm": 4.01835473195211, "learning_rate": 6.2278046929604265e-06, "loss": 0.43047934, "memory(GiB)": 73.74, "step": 200, "train_speed(iter/s)": 0.033561 }, { "epoch": 4.449438202247191, "eval_acc": 0.8910891089108911, "eval_loss": 0.2933560013771057, "eval_runtime": 23.6523, "eval_samples_per_second": 1.564, "eval_steps_per_second": 0.211, "step": 200 }, { "acc": 0.92292318, "epoch": 4.561797752808989, "grad_norm": 1.6369815657821725, "learning_rate": 6.044854508238997e-06, "loss": 0.40975094, "memory(GiB)": 73.74, "step": 205, "train_speed(iter/s)": 0.033139 }, { "acc": 0.91226768, "epoch": 4.674157303370786, "grad_norm": 1.431036936231456, "learning_rate": 5.860429690249112e-06, "loss": 0.43579521, "memory(GiB)": 73.74, "step": 210, "train_speed(iter/s)": 0.033143 }, { "acc": 0.9255209, "epoch": 4.786516853932584, "grad_norm": 2.4044387977133983, "learning_rate": 5.67479064766425e-06, "loss": 0.40912371, "memory(GiB)": 73.74, "step": 215, "train_speed(iter/s)": 0.033148 }, { "acc": 0.93004465, "epoch": 4.898876404494382, "grad_norm": 2.7562169302233164, "learning_rate": 5.488199503648495e-06, "loss": 0.36853147, "memory(GiB)": 73.74, "step": 220, "train_speed(iter/s)": 0.033152 }, { "acc": 0.83264885, "epoch": 5.0, "grad_norm": 2.2257866781685123, "learning_rate": 5.3384180899106556e-06, "loss": 0.3400677, "memory(GiB)": 73.74, "step": 225, "train_speed(iter/s)": 0.03323 }, { "acc": 0.9432291, "epoch": 5.112359550561798, "grad_norm": 2.566986402074134, "learning_rate": 5.150777767468499e-06, "loss": 0.30140314, "memory(GiB)": 73.74, "step": 230, "train_speed(iter/s)": 0.033226 }, { "acc": 0.9411459, "epoch": 5.224719101123595, "grad_norm": 5.780459507895721, "learning_rate": 4.962925252141961e-06, "loss": 0.30237057, "memory(GiB)": 73.74, "step": 235, "train_speed(iter/s)": 0.033228 }, { "acc": 0.95072918, "epoch": 5.337078651685394, "grad_norm": 2.64665613388647, "learning_rate": 4.775125792528603e-06, "loss": 0.2761975, "memory(GiB)": 73.74, "step": 240, "train_speed(iter/s)": 0.03323 }, { "acc": 0.9291666, "epoch": 5.449438202247191, "grad_norm": 3.8481202024150143, "learning_rate": 4.587644562311076e-06, "loss": 0.40160847, "memory(GiB)": 73.74, "step": 245, "train_speed(iter/s)": 0.033231 }, { "acc": 0.9379488, "epoch": 5.561797752808989, "grad_norm": 4.029989472171904, "learning_rate": 4.40074628583069e-06, "loss": 0.3265132, "memory(GiB)": 73.74, "step": 250, "train_speed(iter/s)": 0.033232 }, { "acc": 0.93661861, "epoch": 5.674157303370786, "grad_norm": 2.6570505846702814, "learning_rate": 4.2146948642954574e-06, "loss": 0.33681786, "memory(GiB)": 73.74, "step": 255, "train_speed(iter/s)": 0.033234 }, { "acc": 0.94726496, "epoch": 5.786516853932584, "grad_norm": 4.004672781401515, "learning_rate": 4.029753003150392e-06, "loss": 0.2932775, "memory(GiB)": 73.74, "step": 260, "train_speed(iter/s)": 0.033235 }, { "acc": 0.94092255, "epoch": 5.898876404494382, "grad_norm": 3.2850100343996504, "learning_rate": 3.846181841136244e-06, "loss": 0.32246752, "memory(GiB)": 73.74, "step": 265, "train_speed(iter/s)": 0.033237 }, { "acc": 0.85164261, "epoch": 6.0, "grad_norm": 1.9710778886933134, "learning_rate": 3.6642405815604103e-06, "loss": 0.23691957, "memory(GiB)": 73.74, "step": 270, "train_speed(iter/s)": 0.0333 }, { "acc": 0.9598959, "epoch": 6.112359550561798, "grad_norm": 2.303033611971756, "learning_rate": 3.484186126300713e-06, "loss": 0.22067578, "memory(GiB)": 73.74, "step": 275, "train_speed(iter/s)": 0.033297 }, { "acc": 0.96287136, "epoch": 6.224719101123595, "grad_norm": 1.8384638463574428, "learning_rate": 3.3062727130587803e-06, "loss": 0.19998964, "memory(GiB)": 73.74, "step": 280, "train_speed(iter/s)": 0.033297 }, { "acc": 0.9651041, "epoch": 6.337078651685394, "grad_norm": 4.725974876428552, "learning_rate": 3.13075155637527e-06, "loss": 0.18892503, "memory(GiB)": 73.74, "step": 285, "train_speed(iter/s)": 0.033298 }, { "acc": 0.96175594, "epoch": 6.449438202247191, "grad_norm": 9.502223534616613, "learning_rate": 2.9578704929138064e-06, "loss": 0.20733333, "memory(GiB)": 73.74, "step": 290, "train_speed(iter/s)": 0.033299 }, { "acc": 0.96951122, "epoch": 6.561797752808989, "grad_norm": 2.546188949128991, "learning_rate": 2.7878736315144977e-06, "loss": 0.16884755, "memory(GiB)": 73.74, "step": 295, "train_speed(iter/s)": 0.033299 }, { "acc": 0.965625, "epoch": 6.674157303370786, "grad_norm": 3.1119637375221187, "learning_rate": 2.6210010085111507e-06, "loss": 0.2041734, "memory(GiB)": 73.74, "step": 300, "train_speed(iter/s)": 0.0333 }, { "acc": 0.9609375, "epoch": 6.786516853932584, "grad_norm": 3.058815303922382, "learning_rate": 2.457488248798889e-06, "loss": 0.20187621, "memory(GiB)": 73.74, "step": 305, "train_speed(iter/s)": 0.0333 }, { "acc": 0.96494045, "epoch": 6.898876404494382, "grad_norm": 3.8564431087943083, "learning_rate": 2.2975662331307396e-06, "loss": 0.18565775, "memory(GiB)": 73.74, "step": 310, "train_speed(iter/s)": 0.0333 }, { "acc": 0.86153851, "epoch": 7.0, "grad_norm": 4.0061923294982265, "learning_rate": 2.172365879572515e-06, "loss": 0.20486598, "memory(GiB)": 73.74, "step": 315, "train_speed(iter/s)": 0.033353 }, { "acc": 0.98302078, "epoch": 7.112359550561798, "grad_norm": 1.663401616070064, "learning_rate": 2.019472635029862e-06, "loss": 0.11347539, "memory(GiB)": 73.74, "step": 320, "train_speed(iter/s)": 0.03335 }, { "acc": 0.9796876, "epoch": 7.224719101123595, "grad_norm": 2.108343772751513, "learning_rate": 1.8707886145503884e-06, "loss": 0.10599253, "memory(GiB)": 73.74, "step": 325, "train_speed(iter/s)": 0.03335 }, { "acc": 0.978125, "epoch": 7.337078651685394, "grad_norm": 5.368826656538573, "learning_rate": 1.7265237606405478e-06, "loss": 0.12192621, "memory(GiB)": 73.74, "step": 330, "train_speed(iter/s)": 0.033349 }, { "acc": 0.9833333, "epoch": 7.449438202247191, "grad_norm": 3.0375418035451185, "learning_rate": 1.586881775923699e-06, "loss": 0.09868686, "memory(GiB)": 73.74, "step": 335, "train_speed(iter/s)": 0.033349 }, { "acc": 0.9828126, "epoch": 7.561797752808989, "grad_norm": 4.911606371812447, "learning_rate": 1.4520598355110829e-06, "loss": 0.08233165, "memory(GiB)": 73.74, "step": 340, "train_speed(iter/s)": 0.033349 }, { "acc": 0.98376856, "epoch": 7.674157303370786, "grad_norm": 3.4831106310102666, "learning_rate": 1.3222483085896786e-06, "loss": 0.10817692, "memory(GiB)": 73.74, "step": 345, "train_speed(iter/s)": 0.033349 }, { "acc": 0.98430061, "epoch": 7.786516853932584, "grad_norm": 4.986849900540735, "learning_rate": 1.1976304896200528e-06, "loss": 0.08723032, "memory(GiB)": 73.74, "step": 350, "train_speed(iter/s)": 0.033348 }, { "acc": 0.984375, "epoch": 7.898876404494382, "grad_norm": 3.9336851693841908, "learning_rate": 1.0783823395237517e-06, "loss": 0.09413821, "memory(GiB)": 73.74, "step": 355, "train_speed(iter/s)": 0.033348 }, { "acc": 0.8802084, "epoch": 8.0, "grad_norm": 6.501526095791916, "learning_rate": 9.64672237225702e-07, "loss": 0.09963516, "memory(GiB)": 73.74, "step": 360, "train_speed(iter/s)": 0.033394 }, { "acc": 0.98816967, "epoch": 8.112359550561798, "grad_norm": 2.8360853647582007, "learning_rate": 8.566607419023945e-07, "loss": 0.06000836, "memory(GiB)": 73.74, "step": 365, "train_speed(iter/s)": 0.03339 }, { "acc": 0.9916667, "epoch": 8.224719101123595, "grad_norm": 0.4542723437208033, "learning_rate": 7.545003662716096e-07, "loss": 0.05037628, "memory(GiB)": 73.74, "step": 370, "train_speed(iter/s)": 0.033389 }, { "acc": 0.9911459, "epoch": 8.337078651685394, "grad_norm": 6.380106445692744, "learning_rate": 6.583353612437747e-07, "loss": 0.04841857, "memory(GiB)": 73.74, "step": 375, "train_speed(iter/s)": 0.033384 }, { "acc": 0.9942709, "epoch": 8.44943820224719, "grad_norm": 3.7199862774294017, "learning_rate": 5.683015122390326e-07, "loss": 0.03848048, "memory(GiB)": 73.74, "step": 380, "train_speed(iter/s)": 0.033383 }, { "acc": 0.9885416, "epoch": 8.561797752808989, "grad_norm": 4.710079695441364, "learning_rate": 4.845259474576071e-07, "loss": 0.05525554, "memory(GiB)": 73.74, "step": 385, "train_speed(iter/s)": 0.033381 }, { "acc": 0.99285717, "epoch": 8.674157303370787, "grad_norm": 2.4615853898296414, "learning_rate": 4.071269583742181e-07, "loss": 0.03691708, "memory(GiB)": 73.74, "step": 390, "train_speed(iter/s)": 0.03338 }, { "acc": 0.9927084, "epoch": 8.786516853932584, "grad_norm": 1.8476191283143928, "learning_rate": 3.362138327099859e-07, "loss": 0.05118319, "memory(GiB)": 73.74, "step": 395, "train_speed(iter/s)": 0.03338 }, { "acc": 0.9947916, "epoch": 8.898876404494382, "grad_norm": 2.2505867885041373, "learning_rate": 2.718867001176766e-07, "loss": 0.03349712, "memory(GiB)": 73.74, "step": 400, "train_speed(iter/s)": 0.033379 }, { "epoch": 8.898876404494382, "eval_acc": 0.8921892189218922, "eval_loss": 0.5818256735801697, "eval_runtime": 23.5761, "eval_samples_per_second": 1.569, "eval_steps_per_second": 0.212, "step": 400 }, { "acc": 0.89285717, "epoch": 9.0, "grad_norm": 2.2974888116552354, "learning_rate": 2.2522826275497088e-07, "loss": 0.03983199, "memory(GiB)": 73.74, "step": 405, "train_speed(iter/s)": 0.033216 }, { "acc": 0.996875, "epoch": 9.112359550561798, "grad_norm": 1.285992986956722, "learning_rate": 1.7297847835114335e-07, "loss": 0.02367347, "memory(GiB)": 73.74, "step": 410, "train_speed(iter/s)": 0.033214 }, { "acc": 0.9963541, "epoch": 9.224719101123595, "grad_norm": 1.2867436412135123, "learning_rate": 1.2754517607465674e-07, "loss": 0.02105925, "memory(GiB)": 73.74, "step": 415, "train_speed(iter/s)": 0.033216 }, { "acc": 0.996875, "epoch": 9.337078651685394, "grad_norm": 1.1259557805986131, "learning_rate": 8.899250795291715e-08, "loss": 0.02281179, "memory(GiB)": 73.74, "step": 420, "train_speed(iter/s)": 0.033218 }, { "acc": 0.99702377, "epoch": 9.44943820224719, "grad_norm": 1.2383070789592576, "learning_rate": 5.737491052691826e-08, "loss": 0.0230862, "memory(GiB)": 73.74, "step": 425, "train_speed(iter/s)": 0.033219 }, { "acc": 0.9958334, "epoch": 9.561797752808989, "grad_norm": 1.7856609209836365, "learning_rate": 3.2737027986603485e-08, "loss": 0.02335083, "memory(GiB)": 73.74, "step": 430, "train_speed(iter/s)": 0.03322 }, { "acc": 0.9947917, "epoch": 9.674157303370787, "grad_norm": 1.467317687678897, "learning_rate": 1.511364913306491e-08, "loss": 0.03332668, "memory(GiB)": 73.74, "step": 435, "train_speed(iter/s)": 0.033221 }, { "acc": 0.9963542, "epoch": 9.786516853932584, "grad_norm": 0.4218710204447015, "learning_rate": 4.5296582566044525e-09, "loss": 0.02611139, "memory(GiB)": 73.74, "step": 440, "train_speed(iter/s)": 0.033223 }, { "epoch": 9.786516853932584, "eval_acc": 0.8921892189218922, "eval_loss": 0.5951706171035767, "eval_runtime": 23.5698, "eval_samples_per_second": 1.57, "eval_steps_per_second": 0.212, "step": 440 } ], "logging_steps": 5, "max_steps": 440, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1050754427650048.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }