{ "best_metric": 0.017104836092177864, "best_model_checkpoint": "./whisper-large-finetuned/checkpoint-1000", "epoch": 1.7428073234524848, "eval_steps": 125, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008718395815170008, "grad_norm": 934.4418334960938, "learning_rate": 2.0833333333333335e-08, "loss": 31.0573, "step": 5 }, { "epoch": 0.017436791630340016, "grad_norm": 941.2315063476562, "learning_rate": 1.2500000000000002e-07, "loss": 30.6931, "step": 10 }, { "epoch": 0.026155187445510025, "grad_norm": 905.4633178710938, "learning_rate": 2.291666666666667e-07, "loss": 30.2051, "step": 15 }, { "epoch": 0.03487358326068003, "grad_norm": 606.8495483398438, "learning_rate": 3.3333333333333335e-07, "loss": 27.5929, "step": 20 }, { "epoch": 0.043591979075850044, "grad_norm": 322.0547180175781, "learning_rate": 4.3750000000000005e-07, "loss": 22.3102, "step": 25 }, { "epoch": 0.05231037489102005, "grad_norm": 289.5645751953125, "learning_rate": 5.416666666666667e-07, "loss": 16.8524, "step": 30 }, { "epoch": 0.06102877070619006, "grad_norm": 180.1189422607422, "learning_rate": 6.458333333333333e-07, "loss": 13.885, "step": 35 }, { "epoch": 0.06974716652136007, "grad_norm": 136.20899963378906, "learning_rate": 7.5e-07, "loss": 10.6818, "step": 40 }, { "epoch": 0.07846556233653008, "grad_norm": 112.72056579589844, "learning_rate": 8.541666666666666e-07, "loss": 8.517, "step": 45 }, { "epoch": 0.08718395815170009, "grad_norm": 92.7685546875, "learning_rate": 9.583333333333334e-07, "loss": 6.7615, "step": 50 }, { "epoch": 0.0959023539668701, "grad_norm": 82.76348876953125, "learning_rate": 1.0625000000000002e-06, "loss": 5.2322, "step": 55 }, { "epoch": 0.1046207497820401, "grad_norm": 62.46050262451172, "learning_rate": 1.1666666666666668e-06, "loss": 4.2162, "step": 60 }, { "epoch": 0.11333914559721012, "grad_norm": 65.8958969116211, "learning_rate": 1.2708333333333334e-06, "loss": 3.2443, "step": 65 }, { "epoch": 0.12205754141238012, "grad_norm": 40.446800231933594, "learning_rate": 1.3750000000000002e-06, "loss": 2.6679, "step": 70 }, { "epoch": 0.13077593722755013, "grad_norm": 29.84249496459961, "learning_rate": 1.4791666666666668e-06, "loss": 2.2718, "step": 75 }, { "epoch": 0.13949433304272013, "grad_norm": 34.509098052978516, "learning_rate": 1.5833333333333336e-06, "loss": 1.7758, "step": 80 }, { "epoch": 0.14821272885789014, "grad_norm": 38.756187438964844, "learning_rate": 1.6875000000000001e-06, "loss": 1.6045, "step": 85 }, { "epoch": 0.15693112467306017, "grad_norm": 37.90249252319336, "learning_rate": 1.7916666666666667e-06, "loss": 1.4181, "step": 90 }, { "epoch": 0.16564952048823017, "grad_norm": 62.175453186035156, "learning_rate": 1.8958333333333335e-06, "loss": 1.3154, "step": 95 }, { "epoch": 0.17436791630340018, "grad_norm": 21.30702781677246, "learning_rate": 2.0000000000000003e-06, "loss": 1.1437, "step": 100 }, { "epoch": 0.18308631211857018, "grad_norm": 40.73722839355469, "learning_rate": 2.1041666666666667e-06, "loss": 1.0276, "step": 105 }, { "epoch": 0.1918047079337402, "grad_norm": 39.23003005981445, "learning_rate": 2.2083333333333335e-06, "loss": 0.9993, "step": 110 }, { "epoch": 0.2005231037489102, "grad_norm": 31.393991470336914, "learning_rate": 2.3125e-06, "loss": 0.9329, "step": 115 }, { "epoch": 0.2092414995640802, "grad_norm": 33.02350997924805, "learning_rate": 2.4166666666666667e-06, "loss": 0.8536, "step": 120 }, { "epoch": 0.21795989537925023, "grad_norm": 28.391830444335938, "learning_rate": 2.5208333333333335e-06, "loss": 0.7985, "step": 125 }, { "epoch": 0.21795989537925023, "eval_cer": 0.0605322947095099, "eval_loss": 0.10038114339113235, "eval_runtime": 211.5139, "eval_samples_per_second": 1.754, "eval_steps_per_second": 0.222, "step": 125 }, { "epoch": 0.22667829119442023, "grad_norm": 29.52627182006836, "learning_rate": 2.625e-06, "loss": 0.7896, "step": 130 }, { "epoch": 0.23539668700959024, "grad_norm": 25.15435028076172, "learning_rate": 2.7291666666666667e-06, "loss": 0.7167, "step": 135 }, { "epoch": 0.24411508282476024, "grad_norm": 27.16618537902832, "learning_rate": 2.8333333333333335e-06, "loss": 0.6817, "step": 140 }, { "epoch": 0.2528334786399303, "grad_norm": 29.221731185913086, "learning_rate": 2.9375e-06, "loss": 0.7135, "step": 145 }, { "epoch": 0.26155187445510025, "grad_norm": 40.65930938720703, "learning_rate": 3.041666666666667e-06, "loss": 0.643, "step": 150 }, { "epoch": 0.2702702702702703, "grad_norm": 36.81560134887695, "learning_rate": 3.1458333333333334e-06, "loss": 0.6227, "step": 155 }, { "epoch": 0.27898866608544026, "grad_norm": 27.88620376586914, "learning_rate": 3.2500000000000002e-06, "loss": 0.6461, "step": 160 }, { "epoch": 0.2877070619006103, "grad_norm": 35.246299743652344, "learning_rate": 3.3541666666666666e-06, "loss": 0.602, "step": 165 }, { "epoch": 0.29642545771578027, "grad_norm": 21.056604385375977, "learning_rate": 3.4583333333333334e-06, "loss": 0.5815, "step": 170 }, { "epoch": 0.3051438535309503, "grad_norm": 32.8330078125, "learning_rate": 3.5624999999999998e-06, "loss": 0.5754, "step": 175 }, { "epoch": 0.31386224934612034, "grad_norm": 18.76894187927246, "learning_rate": 3.666666666666667e-06, "loss": 0.5358, "step": 180 }, { "epoch": 0.3225806451612903, "grad_norm": 22.874624252319336, "learning_rate": 3.770833333333334e-06, "loss": 0.5767, "step": 185 }, { "epoch": 0.33129904097646035, "grad_norm": 31.20766830444336, "learning_rate": 3.875e-06, "loss": 0.5296, "step": 190 }, { "epoch": 0.3400174367916303, "grad_norm": 15.421257972717285, "learning_rate": 3.979166666666667e-06, "loss": 0.5882, "step": 195 }, { "epoch": 0.34873583260680036, "grad_norm": 28.616559982299805, "learning_rate": 4.083333333333334e-06, "loss": 0.5434, "step": 200 }, { "epoch": 0.35745422842197033, "grad_norm": 16.531875610351562, "learning_rate": 4.1875e-06, "loss": 0.5753, "step": 205 }, { "epoch": 0.36617262423714037, "grad_norm": 22.337430953979492, "learning_rate": 4.2916666666666665e-06, "loss": 0.5279, "step": 210 }, { "epoch": 0.3748910200523104, "grad_norm": 23.263813018798828, "learning_rate": 4.395833333333334e-06, "loss": 0.4958, "step": 215 }, { "epoch": 0.3836094158674804, "grad_norm": 17.456787109375, "learning_rate": 4.5e-06, "loss": 0.5024, "step": 220 }, { "epoch": 0.3923278116826504, "grad_norm": 29.84246063232422, "learning_rate": 4.604166666666667e-06, "loss": 0.5349, "step": 225 }, { "epoch": 0.4010462074978204, "grad_norm": 21.097015380859375, "learning_rate": 4.708333333333334e-06, "loss": 0.4826, "step": 230 }, { "epoch": 0.4097646033129904, "grad_norm": 14.620965957641602, "learning_rate": 4.8125e-06, "loss": 0.5414, "step": 235 }, { "epoch": 0.4184829991281604, "grad_norm": 22.258108139038086, "learning_rate": 4.9166666666666665e-06, "loss": 0.4802, "step": 240 }, { "epoch": 0.4272013949433304, "grad_norm": 16.524280548095703, "learning_rate": 5.020833333333334e-06, "loss": 0.478, "step": 245 }, { "epoch": 0.43591979075850046, "grad_norm": 19.307289123535156, "learning_rate": 5.125e-06, "loss": 0.4407, "step": 250 }, { "epoch": 0.43591979075850046, "eval_cer": 0.03700097370983447, "eval_loss": 0.05736248940229416, "eval_runtime": 212.9084, "eval_samples_per_second": 1.743, "eval_steps_per_second": 0.221, "step": 250 }, { "epoch": 0.44463818657367044, "grad_norm": 11.922791481018066, "learning_rate": 5.229166666666667e-06, "loss": 0.458, "step": 255 }, { "epoch": 0.45335658238884047, "grad_norm": 12.10986614227295, "learning_rate": 5.333333333333334e-06, "loss": 0.4751, "step": 260 }, { "epoch": 0.46207497820401044, "grad_norm": 28.33596420288086, "learning_rate": 5.4375e-06, "loss": 0.416, "step": 265 }, { "epoch": 0.4707933740191805, "grad_norm": 27.863170623779297, "learning_rate": 5.541666666666667e-06, "loss": 0.4643, "step": 270 }, { "epoch": 0.47951176983435045, "grad_norm": 18.146421432495117, "learning_rate": 5.645833333333334e-06, "loss": 0.4753, "step": 275 }, { "epoch": 0.4882301656495205, "grad_norm": 11.3834867477417, "learning_rate": 5.750000000000001e-06, "loss": 0.3643, "step": 280 }, { "epoch": 0.4969485614646905, "grad_norm": 32.558372497558594, "learning_rate": 5.854166666666667e-06, "loss": 0.4635, "step": 285 }, { "epoch": 0.5056669572798606, "grad_norm": 16.592920303344727, "learning_rate": 5.958333333333334e-06, "loss": 0.398, "step": 290 }, { "epoch": 0.5143853530950305, "grad_norm": 17.8409423828125, "learning_rate": 6.0625e-06, "loss": 0.4353, "step": 295 }, { "epoch": 0.5231037489102005, "grad_norm": 12.89478588104248, "learning_rate": 6.166666666666667e-06, "loss": 0.3784, "step": 300 }, { "epoch": 0.5318221447253705, "grad_norm": 18.778263092041016, "learning_rate": 6.25e-06, "loss": 0.3828, "step": 305 }, { "epoch": 0.5405405405405406, "grad_norm": 16.030410766601562, "learning_rate": 6.25e-06, "loss": 0.41, "step": 310 }, { "epoch": 0.5492589363557105, "grad_norm": 28.651365280151367, "learning_rate": 6.25e-06, "loss": 0.4401, "step": 315 }, { "epoch": 0.5579773321708805, "grad_norm": 24.014606475830078, "learning_rate": 6.25e-06, "loss": 0.3942, "step": 320 }, { "epoch": 0.5666957279860506, "grad_norm": 23.663330078125, "learning_rate": 6.25e-06, "loss": 0.4192, "step": 325 }, { "epoch": 0.5754141238012206, "grad_norm": 18.65936851501465, "learning_rate": 6.25e-06, "loss": 0.3927, "step": 330 }, { "epoch": 0.5841325196163906, "grad_norm": 13.435624122619629, "learning_rate": 6.25e-06, "loss": 0.3786, "step": 335 }, { "epoch": 0.5928509154315605, "grad_norm": 13.244964599609375, "learning_rate": 6.25e-06, "loss": 0.3748, "step": 340 }, { "epoch": 0.6015693112467306, "grad_norm": 13.424527168273926, "learning_rate": 6.25e-06, "loss": 0.3506, "step": 345 }, { "epoch": 0.6102877070619006, "grad_norm": 19.737199783325195, "learning_rate": 6.25e-06, "loss": 0.3287, "step": 350 }, { "epoch": 0.6190061028770706, "grad_norm": 28.116437911987305, "learning_rate": 6.25e-06, "loss": 0.4045, "step": 355 }, { "epoch": 0.6277244986922407, "grad_norm": 10.127391815185547, "learning_rate": 6.25e-06, "loss": 0.3479, "step": 360 }, { "epoch": 0.6364428945074107, "grad_norm": 14.744454383850098, "learning_rate": 6.25e-06, "loss": 0.353, "step": 365 }, { "epoch": 0.6451612903225806, "grad_norm": 12.78295612335205, "learning_rate": 6.25e-06, "loss": 0.3954, "step": 370 }, { "epoch": 0.6538796861377506, "grad_norm": 14.76408863067627, "learning_rate": 6.25e-06, "loss": 0.407, "step": 375 }, { "epoch": 0.6538796861377506, "eval_cer": 0.028367413177539758, "eval_loss": 0.04341413080692291, "eval_runtime": 212.2095, "eval_samples_per_second": 1.748, "eval_steps_per_second": 0.221, "step": 375 }, { "epoch": 0.6625980819529207, "grad_norm": 16.073122024536133, "learning_rate": 6.25e-06, "loss": 0.3726, "step": 380 }, { "epoch": 0.6713164777680907, "grad_norm": 17.39419174194336, "learning_rate": 6.25e-06, "loss": 0.3617, "step": 385 }, { "epoch": 0.6800348735832606, "grad_norm": 13.058511734008789, "learning_rate": 6.25e-06, "loss": 0.3645, "step": 390 }, { "epoch": 0.6887532693984307, "grad_norm": 13.417937278747559, "learning_rate": 6.25e-06, "loss": 0.32, "step": 395 }, { "epoch": 0.6974716652136007, "grad_norm": 14.515336036682129, "learning_rate": 6.25e-06, "loss": 0.3277, "step": 400 }, { "epoch": 0.7061900610287707, "grad_norm": 14.097831726074219, "learning_rate": 6.25e-06, "loss": 0.3659, "step": 405 }, { "epoch": 0.7149084568439407, "grad_norm": 11.983433723449707, "learning_rate": 6.25e-06, "loss": 0.3619, "step": 410 }, { "epoch": 0.7236268526591108, "grad_norm": 9.440328598022461, "learning_rate": 6.25e-06, "loss": 0.3428, "step": 415 }, { "epoch": 0.7323452484742807, "grad_norm": 25.6270809173584, "learning_rate": 6.25e-06, "loss": 0.3351, "step": 420 }, { "epoch": 0.7410636442894507, "grad_norm": 10.264219284057617, "learning_rate": 6.25e-06, "loss": 0.3555, "step": 425 }, { "epoch": 0.7497820401046208, "grad_norm": 19.086257934570312, "learning_rate": 6.25e-06, "loss": 0.3809, "step": 430 }, { "epoch": 0.7585004359197908, "grad_norm": 11.476728439331055, "learning_rate": 6.25e-06, "loss": 0.3065, "step": 435 }, { "epoch": 0.7672188317349607, "grad_norm": 9.463808059692383, "learning_rate": 6.25e-06, "loss": 0.3436, "step": 440 }, { "epoch": 0.7759372275501307, "grad_norm": 11.51846981048584, "learning_rate": 6.25e-06, "loss": 0.3266, "step": 445 }, { "epoch": 0.7846556233653008, "grad_norm": 13.305411338806152, "learning_rate": 6.25e-06, "loss": 0.3176, "step": 450 }, { "epoch": 0.7933740191804708, "grad_norm": 13.012825965881348, "learning_rate": 6.25e-06, "loss": 0.3931, "step": 455 }, { "epoch": 0.8020924149956408, "grad_norm": 11.237920761108398, "learning_rate": 6.25e-06, "loss": 0.358, "step": 460 }, { "epoch": 0.8108108108108109, "grad_norm": 7.914185523986816, "learning_rate": 6.25e-06, "loss": 0.3237, "step": 465 }, { "epoch": 0.8195292066259808, "grad_norm": 13.393628120422363, "learning_rate": 6.25e-06, "loss": 0.3197, "step": 470 }, { "epoch": 0.8282476024411508, "grad_norm": 12.152388572692871, "learning_rate": 6.25e-06, "loss": 0.3181, "step": 475 }, { "epoch": 0.8369659982563208, "grad_norm": 11.488780975341797, "learning_rate": 6.25e-06, "loss": 0.284, "step": 480 }, { "epoch": 0.8456843940714909, "grad_norm": 11.495716094970703, "learning_rate": 6.25e-06, "loss": 0.3057, "step": 485 }, { "epoch": 0.8544027898866609, "grad_norm": 12.111288070678711, "learning_rate": 6.25e-06, "loss": 0.3214, "step": 490 }, { "epoch": 0.8631211857018308, "grad_norm": 10.114215850830078, "learning_rate": 6.25e-06, "loss": 0.3147, "step": 495 }, { "epoch": 0.8718395815170009, "grad_norm": 18.122196197509766, "learning_rate": 6.25e-06, "loss": 0.3517, "step": 500 }, { "epoch": 0.8718395815170009, "eval_cer": 0.02557611165206102, "eval_loss": 0.03728635609149933, "eval_runtime": 212.3354, "eval_samples_per_second": 1.747, "eval_steps_per_second": 0.221, "step": 500 }, { "epoch": 0.8805579773321709, "grad_norm": 10.484134674072266, "learning_rate": 6.25e-06, "loss": 0.3316, "step": 505 }, { "epoch": 0.8892763731473409, "grad_norm": 19.02334213256836, "learning_rate": 6.25e-06, "loss": 0.3026, "step": 510 }, { "epoch": 0.8979947689625108, "grad_norm": 10.848348617553711, "learning_rate": 6.25e-06, "loss": 0.295, "step": 515 }, { "epoch": 0.9067131647776809, "grad_norm": 16.650806427001953, "learning_rate": 6.25e-06, "loss": 0.2775, "step": 520 }, { "epoch": 0.9154315605928509, "grad_norm": 12.108776092529297, "learning_rate": 6.25e-06, "loss": 0.3105, "step": 525 }, { "epoch": 0.9241499564080209, "grad_norm": 11.745095252990723, "learning_rate": 6.25e-06, "loss": 0.2738, "step": 530 }, { "epoch": 0.932868352223191, "grad_norm": 14.852615356445312, "learning_rate": 6.25e-06, "loss": 0.2659, "step": 535 }, { "epoch": 0.941586748038361, "grad_norm": 9.099912643432617, "learning_rate": 6.25e-06, "loss": 0.2875, "step": 540 }, { "epoch": 0.9503051438535309, "grad_norm": 7.640926837921143, "learning_rate": 6.25e-06, "loss": 0.2907, "step": 545 }, { "epoch": 0.9590235396687009, "grad_norm": 9.235946655273438, "learning_rate": 6.25e-06, "loss": 0.3091, "step": 550 }, { "epoch": 0.967741935483871, "grad_norm": 11.265698432922363, "learning_rate": 6.25e-06, "loss": 0.2927, "step": 555 }, { "epoch": 0.976460331299041, "grad_norm": 8.588621139526367, "learning_rate": 6.25e-06, "loss": 0.2904, "step": 560 }, { "epoch": 0.985178727114211, "grad_norm": 12.887239456176758, "learning_rate": 6.25e-06, "loss": 0.2831, "step": 565 }, { "epoch": 0.993897122929381, "grad_norm": 9.062308311462402, "learning_rate": 6.25e-06, "loss": 0.2547, "step": 570 }, { "epoch": 1.001743679163034, "grad_norm": 7.798802852630615, "learning_rate": 6.25e-06, "loss": 0.2329, "step": 575 }, { "epoch": 1.010462074978204, "grad_norm": 9.294201850891113, "learning_rate": 6.25e-06, "loss": 0.1935, "step": 580 }, { "epoch": 1.019180470793374, "grad_norm": 9.507511138916016, "learning_rate": 6.25e-06, "loss": 0.192, "step": 585 }, { "epoch": 1.027898866608544, "grad_norm": 10.073638916015625, "learning_rate": 6.25e-06, "loss": 0.2104, "step": 590 }, { "epoch": 1.036617262423714, "grad_norm": 8.133487701416016, "learning_rate": 6.25e-06, "loss": 0.2347, "step": 595 }, { "epoch": 1.045335658238884, "grad_norm": 7.9588422775268555, "learning_rate": 6.25e-06, "loss": 0.2596, "step": 600 }, { "epoch": 1.054054054054054, "grad_norm": 14.125382423400879, "learning_rate": 6.25e-06, "loss": 0.2095, "step": 605 }, { "epoch": 1.0627724498692241, "grad_norm": 11.050345420837402, "learning_rate": 6.25e-06, "loss": 0.2371, "step": 610 }, { "epoch": 1.0714908456843941, "grad_norm": 11.05793571472168, "learning_rate": 6.25e-06, "loss": 0.2307, "step": 615 }, { "epoch": 1.080209241499564, "grad_norm": 9.359646797180176, "learning_rate": 6.25e-06, "loss": 0.2155, "step": 620 }, { "epoch": 1.088927637314734, "grad_norm": 8.282297134399414, "learning_rate": 6.25e-06, "loss": 0.2219, "step": 625 }, { "epoch": 1.088927637314734, "eval_cer": 0.02112950340798442, "eval_loss": 0.03238729014992714, "eval_runtime": 212.432, "eval_samples_per_second": 1.746, "eval_steps_per_second": 0.221, "step": 625 }, { "epoch": 1.097646033129904, "grad_norm": 12.268821716308594, "learning_rate": 6.25e-06, "loss": 0.2242, "step": 630 }, { "epoch": 1.106364428945074, "grad_norm": 8.24521255493164, "learning_rate": 6.25e-06, "loss": 0.2143, "step": 635 }, { "epoch": 1.1150828247602442, "grad_norm": 7.206003665924072, "learning_rate": 6.25e-06, "loss": 0.2086, "step": 640 }, { "epoch": 1.1238012205754142, "grad_norm": 9.055399894714355, "learning_rate": 6.25e-06, "loss": 0.2174, "step": 645 }, { "epoch": 1.1325196163905842, "grad_norm": 8.929832458496094, "learning_rate": 6.25e-06, "loss": 0.2133, "step": 650 }, { "epoch": 1.1412380122057542, "grad_norm": 11.186538696289062, "learning_rate": 6.25e-06, "loss": 0.2193, "step": 655 }, { "epoch": 1.1499564080209241, "grad_norm": 7.43967342376709, "learning_rate": 6.25e-06, "loss": 0.2042, "step": 660 }, { "epoch": 1.1586748038360941, "grad_norm": 8.988080024719238, "learning_rate": 6.25e-06, "loss": 0.2118, "step": 665 }, { "epoch": 1.167393199651264, "grad_norm": 9.018464088439941, "learning_rate": 6.25e-06, "loss": 0.1938, "step": 670 }, { "epoch": 1.176111595466434, "grad_norm": 9.74881649017334, "learning_rate": 6.25e-06, "loss": 0.2395, "step": 675 }, { "epoch": 1.1848299912816043, "grad_norm": 11.451117515563965, "learning_rate": 6.25e-06, "loss": 0.2105, "step": 680 }, { "epoch": 1.1935483870967742, "grad_norm": 9.217769622802734, "learning_rate": 6.25e-06, "loss": 0.2072, "step": 685 }, { "epoch": 1.2022667829119442, "grad_norm": 11.924041748046875, "learning_rate": 6.25e-06, "loss": 0.2578, "step": 690 }, { "epoch": 1.2109851787271142, "grad_norm": 11.975310325622559, "learning_rate": 6.25e-06, "loss": 0.2249, "step": 695 }, { "epoch": 1.2197035745422842, "grad_norm": 9.89647102355957, "learning_rate": 6.25e-06, "loss": 0.1899, "step": 700 }, { "epoch": 1.2284219703574542, "grad_norm": 10.440160751342773, "learning_rate": 6.25e-06, "loss": 0.1898, "step": 705 }, { "epoch": 1.2371403661726244, "grad_norm": 7.328766822814941, "learning_rate": 6.25e-06, "loss": 0.1654, "step": 710 }, { "epoch": 1.2458587619877943, "grad_norm": 9.291050910949707, "learning_rate": 6.25e-06, "loss": 0.2527, "step": 715 }, { "epoch": 1.2545771578029643, "grad_norm": 10.257084846496582, "learning_rate": 6.25e-06, "loss": 0.1948, "step": 720 }, { "epoch": 1.2632955536181343, "grad_norm": 13.300470352172852, "learning_rate": 6.25e-06, "loss": 0.2211, "step": 725 }, { "epoch": 1.2720139494333043, "grad_norm": 7.229800701141357, "learning_rate": 6.25e-06, "loss": 0.2218, "step": 730 }, { "epoch": 1.2807323452484742, "grad_norm": 7.776634216308594, "learning_rate": 6.25e-06, "loss": 0.2189, "step": 735 }, { "epoch": 1.2894507410636442, "grad_norm": 6.323727130889893, "learning_rate": 6.25e-06, "loss": 0.1901, "step": 740 }, { "epoch": 1.2981691368788142, "grad_norm": 7.206363201141357, "learning_rate": 6.25e-06, "loss": 0.1756, "step": 745 }, { "epoch": 1.3068875326939844, "grad_norm": 7.8909783363342285, "learning_rate": 6.25e-06, "loss": 0.1981, "step": 750 }, { "epoch": 1.3068875326939844, "eval_cer": 0.019636481661798117, "eval_loss": 0.03092861734330654, "eval_runtime": 212.2998, "eval_samples_per_second": 1.748, "eval_steps_per_second": 0.221, "step": 750 }, { "epoch": 1.3156059285091544, "grad_norm": 11.669926643371582, "learning_rate": 6.25e-06, "loss": 0.2066, "step": 755 }, { "epoch": 1.3243243243243243, "grad_norm": 8.25307559967041, "learning_rate": 6.25e-06, "loss": 0.1892, "step": 760 }, { "epoch": 1.3330427201394943, "grad_norm": 6.406688213348389, "learning_rate": 6.25e-06, "loss": 0.1877, "step": 765 }, { "epoch": 1.3417611159546643, "grad_norm": 8.468006134033203, "learning_rate": 6.25e-06, "loss": 0.2034, "step": 770 }, { "epoch": 1.3504795117698343, "grad_norm": 9.530508995056152, "learning_rate": 6.25e-06, "loss": 0.2026, "step": 775 }, { "epoch": 1.3591979075850045, "grad_norm": 6.790230751037598, "learning_rate": 6.25e-06, "loss": 0.219, "step": 780 }, { "epoch": 1.3679163034001744, "grad_norm": 8.093545913696289, "learning_rate": 6.25e-06, "loss": 0.1843, "step": 785 }, { "epoch": 1.3766346992153444, "grad_norm": 8.671226501464844, "learning_rate": 6.25e-06, "loss": 0.188, "step": 790 }, { "epoch": 1.3853530950305144, "grad_norm": 8.892730712890625, "learning_rate": 6.25e-06, "loss": 0.1962, "step": 795 }, { "epoch": 1.3940714908456844, "grad_norm": 10.313153266906738, "learning_rate": 6.25e-06, "loss": 0.1669, "step": 800 }, { "epoch": 1.4027898866608544, "grad_norm": 8.458233833312988, "learning_rate": 6.25e-06, "loss": 0.1923, "step": 805 }, { "epoch": 1.4115082824760243, "grad_norm": 7.510833740234375, "learning_rate": 6.25e-06, "loss": 0.2031, "step": 810 }, { "epoch": 1.4202266782911943, "grad_norm": 8.42331314086914, "learning_rate": 6.25e-06, "loss": 0.183, "step": 815 }, { "epoch": 1.4289450741063645, "grad_norm": 9.886040687561035, "learning_rate": 6.25e-06, "loss": 0.1964, "step": 820 }, { "epoch": 1.4376634699215345, "grad_norm": 6.524728298187256, "learning_rate": 6.25e-06, "loss": 0.1827, "step": 825 }, { "epoch": 1.4463818657367045, "grad_norm": 8.29251766204834, "learning_rate": 6.25e-06, "loss": 0.184, "step": 830 }, { "epoch": 1.4551002615518744, "grad_norm": 8.159611701965332, "learning_rate": 6.25e-06, "loss": 0.2232, "step": 835 }, { "epoch": 1.4638186573670444, "grad_norm": 7.022614479064941, "learning_rate": 6.25e-06, "loss": 0.1658, "step": 840 }, { "epoch": 1.4725370531822144, "grad_norm": 7.722076892852783, "learning_rate": 6.25e-06, "loss": 0.2011, "step": 845 }, { "epoch": 1.4812554489973846, "grad_norm": 6.505570411682129, "learning_rate": 6.25e-06, "loss": 0.1959, "step": 850 }, { "epoch": 1.4899738448125546, "grad_norm": 8.246548652648926, "learning_rate": 6.25e-06, "loss": 0.1859, "step": 855 }, { "epoch": 1.4986922406277245, "grad_norm": 6.680718421936035, "learning_rate": 6.25e-06, "loss": 0.1589, "step": 860 }, { "epoch": 1.5074106364428945, "grad_norm": 6.155304908752441, "learning_rate": 6.25e-06, "loss": 0.1619, "step": 865 }, { "epoch": 1.5161290322580645, "grad_norm": 8.872124671936035, "learning_rate": 6.25e-06, "loss": 0.2289, "step": 870 }, { "epoch": 1.5248474280732345, "grad_norm": 6.164592742919922, "learning_rate": 6.25e-06, "loss": 0.1718, "step": 875 }, { "epoch": 1.5248474280732345, "eval_cer": 0.018500486854917234, "eval_loss": 0.029056401923298836, "eval_runtime": 211.7367, "eval_samples_per_second": 1.752, "eval_steps_per_second": 0.222, "step": 875 }, { "epoch": 1.5335658238884045, "grad_norm": 6.420663833618164, "learning_rate": 6.25e-06, "loss": 0.1753, "step": 880 }, { "epoch": 1.5422842197035744, "grad_norm": 10.49184799194336, "learning_rate": 6.25e-06, "loss": 0.2049, "step": 885 }, { "epoch": 1.5510026155187444, "grad_norm": 7.615091800689697, "learning_rate": 6.25e-06, "loss": 0.1734, "step": 890 }, { "epoch": 1.5597210113339146, "grad_norm": 8.663217544555664, "learning_rate": 6.25e-06, "loss": 0.1645, "step": 895 }, { "epoch": 1.5684394071490846, "grad_norm": 6.248359203338623, "learning_rate": 6.25e-06, "loss": 0.1813, "step": 900 }, { "epoch": 1.5771578029642546, "grad_norm": 9.073200225830078, "learning_rate": 6.25e-06, "loss": 0.2042, "step": 905 }, { "epoch": 1.5858761987794245, "grad_norm": 5.36655855178833, "learning_rate": 6.25e-06, "loss": 0.1848, "step": 910 }, { "epoch": 1.5945945945945947, "grad_norm": 6.54818868637085, "learning_rate": 6.25e-06, "loss": 0.1829, "step": 915 }, { "epoch": 1.6033129904097647, "grad_norm": 8.669767379760742, "learning_rate": 6.25e-06, "loss": 0.1669, "step": 920 }, { "epoch": 1.6120313862249347, "grad_norm": 6.731302738189697, "learning_rate": 6.25e-06, "loss": 0.1855, "step": 925 }, { "epoch": 1.6207497820401047, "grad_norm": 6.575505256652832, "learning_rate": 6.25e-06, "loss": 0.174, "step": 930 }, { "epoch": 1.6294681778552746, "grad_norm": 7.167125701904297, "learning_rate": 6.25e-06, "loss": 0.2021, "step": 935 }, { "epoch": 1.6381865736704446, "grad_norm": 9.105996131896973, "learning_rate": 6.25e-06, "loss": 0.2631, "step": 940 }, { "epoch": 1.6469049694856146, "grad_norm": 7.38139009475708, "learning_rate": 6.25e-06, "loss": 0.192, "step": 945 }, { "epoch": 1.6556233653007846, "grad_norm": 7.435015678405762, "learning_rate": 6.25e-06, "loss": 0.163, "step": 950 }, { "epoch": 1.6643417611159546, "grad_norm": 9.437699317932129, "learning_rate": 6.25e-06, "loss": 0.1717, "step": 955 }, { "epoch": 1.6730601569311245, "grad_norm": 8.728166580200195, "learning_rate": 6.25e-06, "loss": 0.1718, "step": 960 }, { "epoch": 1.6817785527462947, "grad_norm": 8.472909927368164, "learning_rate": 6.25e-06, "loss": 0.1835, "step": 965 }, { "epoch": 1.6904969485614647, "grad_norm": 7.229650974273682, "learning_rate": 6.25e-06, "loss": 0.1783, "step": 970 }, { "epoch": 1.6992153443766347, "grad_norm": 8.300793647766113, "learning_rate": 6.25e-06, "loss": 0.1544, "step": 975 }, { "epoch": 1.7079337401918047, "grad_norm": 8.715877532958984, "learning_rate": 6.25e-06, "loss": 0.1943, "step": 980 }, { "epoch": 1.7166521360069749, "grad_norm": 8.236053466796875, "learning_rate": 6.25e-06, "loss": 0.1969, "step": 985 }, { "epoch": 1.7253705318221448, "grad_norm": 10.76419448852539, "learning_rate": 6.25e-06, "loss": 0.1807, "step": 990 }, { "epoch": 1.7340889276373148, "grad_norm": 7.493725776672363, "learning_rate": 6.25e-06, "loss": 0.2034, "step": 995 }, { "epoch": 1.7428073234524848, "grad_norm": 10.558133125305176, "learning_rate": 6.25e-06, "loss": 0.2042, "step": 1000 }, { "epoch": 1.7428073234524848, "eval_cer": 0.017104836092177864, "eval_loss": 0.028250334784388542, "eval_runtime": 212.8205, "eval_samples_per_second": 1.743, "eval_steps_per_second": 0.221, "step": 1000 } ], "logging_steps": 5, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 125, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0905549552746496e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }