{ "best_metric": 0.46308374404907227, "best_model_checkpoint": "wav2vec2-xls-r-300m-fula/checkpoint-4000", "epoch": 5.154639175257732, "eval_steps": 2000, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01288659793814433, "grad_norm": 10.90772533416748, "learning_rate": 3.4499999999999996e-06, "loss": 19.1295, "step": 25 }, { "epoch": 0.02577319587628866, "grad_norm": 12.727262496948242, "learning_rate": 7.2e-06, "loss": 16.6655, "step": 50 }, { "epoch": 0.03865979381443299, "grad_norm": 17.578950881958008, "learning_rate": 1.0949999999999998e-05, "loss": 13.462, "step": 75 }, { "epoch": 0.05154639175257732, "grad_norm": 14.574068069458008, "learning_rate": 1.47e-05, "loss": 7.4496, "step": 100 }, { "epoch": 0.06443298969072164, "grad_norm": 11.906006813049316, "learning_rate": 1.8449999999999998e-05, "loss": 6.1341, "step": 125 }, { "epoch": 0.07731958762886598, "grad_norm": 10.743711471557617, "learning_rate": 2.2199999999999998e-05, "loss": 5.0479, "step": 150 }, { "epoch": 0.09020618556701031, "grad_norm": 9.327588081359863, "learning_rate": 2.5949999999999997e-05, "loss": 4.86, "step": 175 }, { "epoch": 0.10309278350515463, "grad_norm": 8.118748664855957, "learning_rate": 2.97e-05, "loss": 4.3188, "step": 200 }, { "epoch": 0.11597938144329897, "grad_norm": 4.968245983123779, "learning_rate": 3.345e-05, "loss": 4.142, "step": 225 }, { "epoch": 0.12886597938144329, "grad_norm": 3.5491788387298584, "learning_rate": 3.7199999999999996e-05, "loss": 3.7329, "step": 250 }, { "epoch": 0.14175257731958762, "grad_norm": 2.228337049484253, "learning_rate": 4.095e-05, "loss": 3.5432, "step": 275 }, { "epoch": 0.15463917525773196, "grad_norm": 1.3472025394439697, "learning_rate": 4.4699999999999996e-05, "loss": 3.3229, "step": 300 }, { "epoch": 0.16752577319587628, "grad_norm": 0.8919354677200317, "learning_rate": 4.845e-05, "loss": 3.1852, "step": 325 }, { "epoch": 0.18041237113402062, "grad_norm": 0.9615734815597534, "learning_rate": 5.2199999999999995e-05, "loss": 3.0339, "step": 350 }, { "epoch": 0.19329896907216496, "grad_norm": 1.0601171255111694, "learning_rate": 5.595e-05, "loss": 2.9307, "step": 375 }, { "epoch": 0.20618556701030927, "grad_norm": 1.2133561372756958, "learning_rate": 5.97e-05, "loss": 2.7395, "step": 400 }, { "epoch": 0.2190721649484536, "grad_norm": 0.922024130821228, "learning_rate": 6.345e-05, "loss": 2.5765, "step": 425 }, { "epoch": 0.23195876288659795, "grad_norm": 1.0528242588043213, "learning_rate": 6.72e-05, "loss": 2.2256, "step": 450 }, { "epoch": 0.24484536082474226, "grad_norm": 1.39167058467865, "learning_rate": 7.094999999999999e-05, "loss": 1.7335, "step": 475 }, { "epoch": 0.25773195876288657, "grad_norm": 1.182673692703247, "learning_rate": 7.47e-05, "loss": 1.2321, "step": 500 }, { "epoch": 0.2706185567010309, "grad_norm": 0.9412080645561218, "learning_rate": 7.845e-05, "loss": 0.9694, "step": 525 }, { "epoch": 0.28350515463917525, "grad_norm": 1.3718018531799316, "learning_rate": 8.22e-05, "loss": 0.9061, "step": 550 }, { "epoch": 0.2963917525773196, "grad_norm": 0.7417821884155273, "learning_rate": 8.594999999999999e-05, "loss": 0.8102, "step": 575 }, { "epoch": 0.30927835051546393, "grad_norm": 0.9690466523170471, "learning_rate": 8.969999999999998e-05, "loss": 0.7434, "step": 600 }, { "epoch": 0.32216494845360827, "grad_norm": 0.8293886780738831, "learning_rate": 9.345e-05, "loss": 0.7381, "step": 625 }, { "epoch": 0.33505154639175255, "grad_norm": 0.8486959934234619, "learning_rate": 9.719999999999999e-05, "loss": 0.6224, "step": 650 }, { "epoch": 0.3479381443298969, "grad_norm": 0.7055938243865967, "learning_rate": 0.00010095, "loss": 0.6925, "step": 675 }, { "epoch": 0.36082474226804123, "grad_norm": 0.7605512142181396, "learning_rate": 0.00010469999999999998, "loss": 0.6136, "step": 700 }, { "epoch": 0.37371134020618557, "grad_norm": 0.6695568561553955, "learning_rate": 0.00010845, "loss": 0.6419, "step": 725 }, { "epoch": 0.3865979381443299, "grad_norm": 0.8840702772140503, "learning_rate": 0.00011219999999999999, "loss": 0.6667, "step": 750 }, { "epoch": 0.39948453608247425, "grad_norm": 0.7186158299446106, "learning_rate": 0.00011595, "loss": 0.6397, "step": 775 }, { "epoch": 0.41237113402061853, "grad_norm": 0.6683000922203064, "learning_rate": 0.0001197, "loss": 0.5647, "step": 800 }, { "epoch": 0.4252577319587629, "grad_norm": 0.8537669777870178, "learning_rate": 0.00012345, "loss": 0.5624, "step": 825 }, { "epoch": 0.4381443298969072, "grad_norm": 1.0267616510391235, "learning_rate": 0.00012719999999999997, "loss": 0.624, "step": 850 }, { "epoch": 0.45103092783505155, "grad_norm": 0.6453070640563965, "learning_rate": 0.00013094999999999998, "loss": 0.5539, "step": 875 }, { "epoch": 0.4639175257731959, "grad_norm": 1.0308513641357422, "learning_rate": 0.0001347, "loss": 0.5728, "step": 900 }, { "epoch": 0.47680412371134023, "grad_norm": 0.560512125492096, "learning_rate": 0.00013845, "loss": 0.5305, "step": 925 }, { "epoch": 0.4896907216494845, "grad_norm": 0.7196568250656128, "learning_rate": 0.0001422, "loss": 0.5669, "step": 950 }, { "epoch": 0.5025773195876289, "grad_norm": 0.5675578713417053, "learning_rate": 0.00014595, "loss": 0.5093, "step": 975 }, { "epoch": 0.5154639175257731, "grad_norm": 0.8834163546562195, "learning_rate": 0.00014969999999999998, "loss": 0.5757, "step": 1000 }, { "epoch": 0.5283505154639175, "grad_norm": 0.5083448886871338, "learning_rate": 0.00015344999999999996, "loss": 0.5412, "step": 1025 }, { "epoch": 0.5412371134020618, "grad_norm": 0.747595489025116, "learning_rate": 0.0001572, "loss": 0.5849, "step": 1050 }, { "epoch": 0.5541237113402062, "grad_norm": 0.5015640258789062, "learning_rate": 0.00016094999999999998, "loss": 0.5049, "step": 1075 }, { "epoch": 0.5670103092783505, "grad_norm": 0.7311388850212097, "learning_rate": 0.0001647, "loss": 0.4726, "step": 1100 }, { "epoch": 0.5798969072164949, "grad_norm": 0.6942028999328613, "learning_rate": 0.00016844999999999997, "loss": 0.4934, "step": 1125 }, { "epoch": 0.5927835051546392, "grad_norm": 0.7268182635307312, "learning_rate": 0.00017219999999999998, "loss": 0.5074, "step": 1150 }, { "epoch": 0.6056701030927835, "grad_norm": 0.5172975063323975, "learning_rate": 0.00017595, "loss": 0.5293, "step": 1175 }, { "epoch": 0.6185567010309279, "grad_norm": 0.5973320603370667, "learning_rate": 0.00017969999999999998, "loss": 0.5018, "step": 1200 }, { "epoch": 0.6314432989690721, "grad_norm": 0.6601810455322266, "learning_rate": 0.00018345, "loss": 0.4953, "step": 1225 }, { "epoch": 0.6443298969072165, "grad_norm": 0.7992896437644958, "learning_rate": 0.0001872, "loss": 0.4748, "step": 1250 }, { "epoch": 0.6572164948453608, "grad_norm": 0.5774939060211182, "learning_rate": 0.00019094999999999998, "loss": 0.4659, "step": 1275 }, { "epoch": 0.6701030927835051, "grad_norm": 0.7045702338218689, "learning_rate": 0.0001947, "loss": 0.4433, "step": 1300 }, { "epoch": 0.6829896907216495, "grad_norm": 0.6067873239517212, "learning_rate": 0.00019844999999999997, "loss": 0.4872, "step": 1325 }, { "epoch": 0.6958762886597938, "grad_norm": 1.0101662874221802, "learning_rate": 0.0002022, "loss": 0.4933, "step": 1350 }, { "epoch": 0.7087628865979382, "grad_norm": 0.4532999098300934, "learning_rate": 0.00020595, "loss": 0.474, "step": 1375 }, { "epoch": 0.7216494845360825, "grad_norm": 0.8769963383674622, "learning_rate": 0.00020969999999999997, "loss": 0.4923, "step": 1400 }, { "epoch": 0.7345360824742269, "grad_norm": 0.38735705614089966, "learning_rate": 0.00021344999999999998, "loss": 0.5358, "step": 1425 }, { "epoch": 0.7474226804123711, "grad_norm": 0.5298680663108826, "learning_rate": 0.00021719999999999997, "loss": 0.4961, "step": 1450 }, { "epoch": 0.7603092783505154, "grad_norm": 0.6393166780471802, "learning_rate": 0.00022095, "loss": 0.5057, "step": 1475 }, { "epoch": 0.7731958762886598, "grad_norm": 1.908589243888855, "learning_rate": 0.0002247, "loss": 0.5043, "step": 1500 }, { "epoch": 0.7860824742268041, "grad_norm": 0.5039921402931213, "learning_rate": 0.00022844999999999997, "loss": 0.4767, "step": 1525 }, { "epoch": 0.7989690721649485, "grad_norm": 0.5750266909599304, "learning_rate": 0.00023219999999999998, "loss": 0.5002, "step": 1550 }, { "epoch": 0.8118556701030928, "grad_norm": 0.5906339883804321, "learning_rate": 0.00023594999999999996, "loss": 0.4776, "step": 1575 }, { "epoch": 0.8247422680412371, "grad_norm": 0.5718595385551453, "learning_rate": 0.0002397, "loss": 0.4934, "step": 1600 }, { "epoch": 0.8376288659793815, "grad_norm": 0.5280390381813049, "learning_rate": 0.00024344999999999998, "loss": 0.4533, "step": 1625 }, { "epoch": 0.8505154639175257, "grad_norm": 1.016766905784607, "learning_rate": 0.0002472, "loss": 0.4795, "step": 1650 }, { "epoch": 0.8634020618556701, "grad_norm": 0.4700835943222046, "learning_rate": 0.00025095, "loss": 0.4399, "step": 1675 }, { "epoch": 0.8762886597938144, "grad_norm": 0.7027618885040283, "learning_rate": 0.00025469999999999996, "loss": 0.4801, "step": 1700 }, { "epoch": 0.8891752577319587, "grad_norm": 0.41857969760894775, "learning_rate": 0.00025844999999999997, "loss": 0.4449, "step": 1725 }, { "epoch": 0.9020618556701031, "grad_norm": 0.7206704020500183, "learning_rate": 0.0002622, "loss": 0.4648, "step": 1750 }, { "epoch": 0.9149484536082474, "grad_norm": 0.485895037651062, "learning_rate": 0.00026595, "loss": 0.4598, "step": 1775 }, { "epoch": 0.9278350515463918, "grad_norm": 0.7267158031463623, "learning_rate": 0.0002697, "loss": 0.462, "step": 1800 }, { "epoch": 0.9407216494845361, "grad_norm": 0.4129009246826172, "learning_rate": 0.00027344999999999995, "loss": 0.4585, "step": 1825 }, { "epoch": 0.9536082474226805, "grad_norm": 0.6467506885528564, "learning_rate": 0.0002772, "loss": 0.4539, "step": 1850 }, { "epoch": 0.9664948453608248, "grad_norm": 0.6241980791091919, "learning_rate": 0.00028094999999999997, "loss": 0.4917, "step": 1875 }, { "epoch": 0.979381443298969, "grad_norm": 0.817642867565155, "learning_rate": 0.0002847, "loss": 0.4337, "step": 1900 }, { "epoch": 0.9922680412371134, "grad_norm": 1.18275785446167, "learning_rate": 0.00028845, "loss": 0.4591, "step": 1925 }, { "epoch": 1.0051546391752577, "grad_norm": 0.5338313579559326, "learning_rate": 0.00029219999999999995, "loss": 0.5187, "step": 1950 }, { "epoch": 1.018041237113402, "grad_norm": 0.40176838636398315, "learning_rate": 0.00029595, "loss": 0.4012, "step": 1975 }, { "epoch": 1.0309278350515463, "grad_norm": 0.6469115018844604, "learning_rate": 0.00029969999999999997, "loss": 0.4752, "step": 2000 }, { "epoch": 1.0309278350515463, "eval_loss": 0.59544438123703, "eval_runtime": 196.5387, "eval_samples_per_second": 6.233, "eval_steps_per_second": 0.784, "eval_wer": 1.0, "step": 2000 }, { "epoch": 1.0438144329896908, "grad_norm": 0.6261550188064575, "learning_rate": 0.00029987722419928825, "loss": 0.4102, "step": 2025 }, { "epoch": 1.056701030927835, "grad_norm": 0.8379774689674377, "learning_rate": 0.00029974377224199284, "loss": 0.4658, "step": 2050 }, { "epoch": 1.0695876288659794, "grad_norm": 0.503239631652832, "learning_rate": 0.0002996103202846975, "loss": 0.4167, "step": 2075 }, { "epoch": 1.0824742268041236, "grad_norm": 0.3943336606025696, "learning_rate": 0.00029947686832740213, "loss": 0.4686, "step": 2100 }, { "epoch": 1.0953608247422681, "grad_norm": 0.4782889783382416, "learning_rate": 0.0002993434163701067, "loss": 0.4003, "step": 2125 }, { "epoch": 1.1082474226804124, "grad_norm": 0.37270641326904297, "learning_rate": 0.00029920996441281137, "loss": 0.4578, "step": 2150 }, { "epoch": 1.1211340206185567, "grad_norm": 0.5629658699035645, "learning_rate": 0.000299076512455516, "loss": 0.3687, "step": 2175 }, { "epoch": 1.134020618556701, "grad_norm": 0.3787396252155304, "learning_rate": 0.0002989430604982206, "loss": 0.4322, "step": 2200 }, { "epoch": 1.1469072164948453, "grad_norm": 0.6377401947975159, "learning_rate": 0.00029880960854092526, "loss": 0.4403, "step": 2225 }, { "epoch": 1.1597938144329896, "grad_norm": 0.37100082635879517, "learning_rate": 0.00029867615658362985, "loss": 0.453, "step": 2250 }, { "epoch": 1.172680412371134, "grad_norm": 0.7276923060417175, "learning_rate": 0.0002985427046263345, "loss": 0.4145, "step": 2275 }, { "epoch": 1.1855670103092784, "grad_norm": 0.8605408668518066, "learning_rate": 0.00029840925266903914, "loss": 0.4539, "step": 2300 }, { "epoch": 1.1984536082474226, "grad_norm": 0.45449143648147583, "learning_rate": 0.00029827580071174373, "loss": 0.4031, "step": 2325 }, { "epoch": 1.211340206185567, "grad_norm": 0.36433449387550354, "learning_rate": 0.0002981423487544484, "loss": 0.4548, "step": 2350 }, { "epoch": 1.2242268041237114, "grad_norm": 0.48323678970336914, "learning_rate": 0.000298008896797153, "loss": 0.4236, "step": 2375 }, { "epoch": 1.2371134020618557, "grad_norm": 0.6236255764961243, "learning_rate": 0.0002978754448398576, "loss": 0.438, "step": 2400 }, { "epoch": 1.25, "grad_norm": 0.721978485584259, "learning_rate": 0.00029774199288256227, "loss": 0.3803, "step": 2425 }, { "epoch": 1.2628865979381443, "grad_norm": 0.33121246099472046, "learning_rate": 0.00029760854092526686, "loss": 0.4759, "step": 2450 }, { "epoch": 1.2757731958762886, "grad_norm": 0.6318331360816956, "learning_rate": 0.0002974750889679715, "loss": 0.3616, "step": 2475 }, { "epoch": 1.2886597938144329, "grad_norm": 0.30613037943840027, "learning_rate": 0.0002973416370106761, "loss": 0.4355, "step": 2500 }, { "epoch": 1.3015463917525774, "grad_norm": 0.6804624199867249, "learning_rate": 0.00029720818505338075, "loss": 0.4296, "step": 2525 }, { "epoch": 1.3144329896907216, "grad_norm": 0.5847098231315613, "learning_rate": 0.0002970747330960854, "loss": 0.4774, "step": 2550 }, { "epoch": 1.327319587628866, "grad_norm": 0.43900150060653687, "learning_rate": 0.00029694128113879, "loss": 0.4249, "step": 2575 }, { "epoch": 1.3402061855670104, "grad_norm": 0.26748043298721313, "learning_rate": 0.00029680782918149463, "loss": 0.4713, "step": 2600 }, { "epoch": 1.3530927835051547, "grad_norm": 0.5893319845199585, "learning_rate": 0.0002966743772241993, "loss": 0.3566, "step": 2625 }, { "epoch": 1.365979381443299, "grad_norm": 0.2939490079879761, "learning_rate": 0.00029654092526690387, "loss": 0.427, "step": 2650 }, { "epoch": 1.3788659793814433, "grad_norm": 0.6302582025527954, "learning_rate": 0.0002964074733096085, "loss": 0.413, "step": 2675 }, { "epoch": 1.3917525773195876, "grad_norm": 0.5481074452400208, "learning_rate": 0.0002962740213523131, "loss": 0.5027, "step": 2700 }, { "epoch": 1.4046391752577319, "grad_norm": 0.41078057885169983, "learning_rate": 0.00029614056939501776, "loss": 0.3935, "step": 2725 }, { "epoch": 1.4175257731958764, "grad_norm": 0.5955342054367065, "learning_rate": 0.0002960071174377224, "loss": 0.4454, "step": 2750 }, { "epoch": 1.4304123711340206, "grad_norm": 0.47366246581077576, "learning_rate": 0.000295873665480427, "loss": 0.3861, "step": 2775 }, { "epoch": 1.443298969072165, "grad_norm": 0.5429800748825073, "learning_rate": 0.00029574021352313164, "loss": 0.4525, "step": 2800 }, { "epoch": 1.4561855670103092, "grad_norm": 0.5089389681816101, "learning_rate": 0.00029560676156583623, "loss": 0.3727, "step": 2825 }, { "epoch": 1.4690721649484537, "grad_norm": 0.3353477716445923, "learning_rate": 0.0002954733096085409, "loss": 0.4855, "step": 2850 }, { "epoch": 1.481958762886598, "grad_norm": 0.6160407662391663, "learning_rate": 0.0002953398576512455, "loss": 0.3635, "step": 2875 }, { "epoch": 1.4948453608247423, "grad_norm": 0.3237319886684418, "learning_rate": 0.0002952064056939501, "loss": 0.4285, "step": 2900 }, { "epoch": 1.5077319587628866, "grad_norm": 0.450309693813324, "learning_rate": 0.00029507295373665477, "loss": 0.3759, "step": 2925 }, { "epoch": 1.5206185567010309, "grad_norm": 0.3550674319267273, "learning_rate": 0.0002949395017793594, "loss": 0.4442, "step": 2950 }, { "epoch": 1.5335051546391751, "grad_norm": 0.3989160656929016, "learning_rate": 0.000294806049822064, "loss": 0.4255, "step": 2975 }, { "epoch": 1.5463917525773194, "grad_norm": 0.5774266719818115, "learning_rate": 0.00029467259786476865, "loss": 0.4526, "step": 3000 }, { "epoch": 1.559278350515464, "grad_norm": 0.6264383792877197, "learning_rate": 0.0002945391459074733, "loss": 0.4097, "step": 3025 }, { "epoch": 1.5721649484536082, "grad_norm": 0.33969295024871826, "learning_rate": 0.0002944056939501779, "loss": 0.4241, "step": 3050 }, { "epoch": 1.5850515463917527, "grad_norm": 0.39299994707107544, "learning_rate": 0.00029427224199288254, "loss": 0.3778, "step": 3075 }, { "epoch": 1.597938144329897, "grad_norm": 0.42388004064559937, "learning_rate": 0.0002941387900355872, "loss": 0.4004, "step": 3100 }, { "epoch": 1.6108247422680413, "grad_norm": 0.9131516814231873, "learning_rate": 0.0002940053380782918, "loss": 0.3531, "step": 3125 }, { "epoch": 1.6237113402061856, "grad_norm": 0.296908974647522, "learning_rate": 0.0002938718861209964, "loss": 0.4613, "step": 3150 }, { "epoch": 1.6365979381443299, "grad_norm": 0.4583122134208679, "learning_rate": 0.000293738434163701, "loss": 0.3833, "step": 3175 }, { "epoch": 1.6494845360824741, "grad_norm": 0.35052600502967834, "learning_rate": 0.00029360498220640566, "loss": 0.4432, "step": 3200 }, { "epoch": 1.6623711340206184, "grad_norm": 0.532720685005188, "learning_rate": 0.0002934715302491103, "loss": 0.3635, "step": 3225 }, { "epoch": 1.675257731958763, "grad_norm": 0.3807854652404785, "learning_rate": 0.0002933380782918149, "loss": 0.43, "step": 3250 }, { "epoch": 1.6881443298969072, "grad_norm": 0.5288501381874084, "learning_rate": 0.00029320462633451955, "loss": 0.3449, "step": 3275 }, { "epoch": 1.7010309278350515, "grad_norm": 0.3881712853908539, "learning_rate": 0.0002930711743772242, "loss": 0.4406, "step": 3300 }, { "epoch": 1.713917525773196, "grad_norm": 0.42132484912872314, "learning_rate": 0.0002929377224199288, "loss": 0.3888, "step": 3325 }, { "epoch": 1.7268041237113403, "grad_norm": 0.3974430561065674, "learning_rate": 0.00029280427046263343, "loss": 0.4265, "step": 3350 }, { "epoch": 1.7396907216494846, "grad_norm": 0.4618494212627411, "learning_rate": 0.0002926708185053381, "loss": 0.3537, "step": 3375 }, { "epoch": 1.7525773195876289, "grad_norm": 0.347777783870697, "learning_rate": 0.00029253736654804267, "loss": 0.4282, "step": 3400 }, { "epoch": 1.7654639175257731, "grad_norm": 0.5885565280914307, "learning_rate": 0.0002924039145907473, "loss": 0.3694, "step": 3425 }, { "epoch": 1.7783505154639174, "grad_norm": 0.5113171935081482, "learning_rate": 0.00029227046263345197, "loss": 0.5256, "step": 3450 }, { "epoch": 1.7912371134020617, "grad_norm": 0.7724625468254089, "learning_rate": 0.00029213701067615656, "loss": 0.3649, "step": 3475 }, { "epoch": 1.8041237113402062, "grad_norm": 0.44778281450271606, "learning_rate": 0.0002920035587188612, "loss": 0.4313, "step": 3500 }, { "epoch": 1.8170103092783505, "grad_norm": 0.8251272439956665, "learning_rate": 0.00029187010676156585, "loss": 0.3839, "step": 3525 }, { "epoch": 1.829896907216495, "grad_norm": 0.4858299493789673, "learning_rate": 0.00029173665480427044, "loss": 0.4489, "step": 3550 }, { "epoch": 1.8427835051546393, "grad_norm": 0.5172144770622253, "learning_rate": 0.0002916032028469751, "loss": 0.409, "step": 3575 }, { "epoch": 1.8556701030927836, "grad_norm": 0.34371522068977356, "learning_rate": 0.0002914697508896797, "loss": 0.4365, "step": 3600 }, { "epoch": 1.8685567010309279, "grad_norm": 0.5957440137863159, "learning_rate": 0.00029133629893238433, "loss": 0.3611, "step": 3625 }, { "epoch": 1.8814432989690721, "grad_norm": 0.3856901526451111, "learning_rate": 0.000291202846975089, "loss": 0.451, "step": 3650 }, { "epoch": 1.8943298969072164, "grad_norm": 0.5961311459541321, "learning_rate": 0.00029106939501779357, "loss": 0.3934, "step": 3675 }, { "epoch": 1.9072164948453607, "grad_norm": 0.4491939842700958, "learning_rate": 0.0002909359430604982, "loss": 0.4466, "step": 3700 }, { "epoch": 1.920103092783505, "grad_norm": 0.41777607798576355, "learning_rate": 0.0002908024911032028, "loss": 0.3754, "step": 3725 }, { "epoch": 1.9329896907216495, "grad_norm": 0.42550450563430786, "learning_rate": 0.00029066903914590745, "loss": 0.4027, "step": 3750 }, { "epoch": 1.9458762886597938, "grad_norm": 0.427433043718338, "learning_rate": 0.0002905355871886121, "loss": 0.3603, "step": 3775 }, { "epoch": 1.9587628865979383, "grad_norm": 0.8279537558555603, "learning_rate": 0.0002904021352313167, "loss": 0.4098, "step": 3800 }, { "epoch": 1.9716494845360826, "grad_norm": 0.6759387850761414, "learning_rate": 0.00029026868327402134, "loss": 0.3756, "step": 3825 }, { "epoch": 1.9845360824742269, "grad_norm": 0.29660704731941223, "learning_rate": 0.000290135231316726, "loss": 0.4591, "step": 3850 }, { "epoch": 1.9974226804123711, "grad_norm": 0.4846726655960083, "learning_rate": 0.0002900017793594306, "loss": 0.3944, "step": 3875 }, { "epoch": 2.0103092783505154, "grad_norm": 0.4863591492176056, "learning_rate": 0.0002898683274021352, "loss": 0.3813, "step": 3900 }, { "epoch": 2.0231958762886597, "grad_norm": 0.37275585532188416, "learning_rate": 0.0002897348754448398, "loss": 0.3753, "step": 3925 }, { "epoch": 2.036082474226804, "grad_norm": 0.36628881096839905, "learning_rate": 0.00028960142348754446, "loss": 0.403, "step": 3950 }, { "epoch": 2.0489690721649483, "grad_norm": 0.4523802697658539, "learning_rate": 0.0002894679715302491, "loss": 0.3601, "step": 3975 }, { "epoch": 2.0618556701030926, "grad_norm": 0.4722582697868347, "learning_rate": 0.0002893345195729537, "loss": 0.3858, "step": 4000 }, { "epoch": 2.0618556701030926, "eval_loss": 0.46308374404907227, "eval_runtime": 201.4429, "eval_samples_per_second": 6.081, "eval_steps_per_second": 0.764, "eval_wer": 1.0057142857142858, "step": 4000 }, { "epoch": 2.0747422680412373, "grad_norm": 0.5041220784187317, "learning_rate": 0.00028920106761565835, "loss": 0.3518, "step": 4025 }, { "epoch": 2.0876288659793816, "grad_norm": 0.48610439896583557, "learning_rate": 0.00028906761565836294, "loss": 0.3818, "step": 4050 }, { "epoch": 2.100515463917526, "grad_norm": 0.6834833025932312, "learning_rate": 0.0002889341637010676, "loss": 0.3778, "step": 4075 }, { "epoch": 2.11340206185567, "grad_norm": 0.687245786190033, "learning_rate": 0.00028880071174377224, "loss": 0.3771, "step": 4100 }, { "epoch": 2.1262886597938144, "grad_norm": 0.706832230091095, "learning_rate": 0.00028866725978647683, "loss": 0.3868, "step": 4125 }, { "epoch": 2.1391752577319587, "grad_norm": 0.7049499154090881, "learning_rate": 0.0002885338078291815, "loss": 0.3646, "step": 4150 }, { "epoch": 2.152061855670103, "grad_norm": 0.40853607654571533, "learning_rate": 0.00028840035587188607, "loss": 0.3235, "step": 4175 }, { "epoch": 2.1649484536082473, "grad_norm": 0.3675331771373749, "learning_rate": 0.0002882669039145907, "loss": 0.4079, "step": 4200 }, { "epoch": 2.1778350515463916, "grad_norm": 1.3320142030715942, "learning_rate": 0.00028813345195729536, "loss": 0.3031, "step": 4225 }, { "epoch": 2.1907216494845363, "grad_norm": 0.6416336297988892, "learning_rate": 0.00028799999999999995, "loss": 0.4087, "step": 4250 }, { "epoch": 2.2036082474226806, "grad_norm": 0.43675994873046875, "learning_rate": 0.0002878665480427046, "loss": 0.3577, "step": 4275 }, { "epoch": 2.216494845360825, "grad_norm": 0.3541963994503021, "learning_rate": 0.00028773309608540925, "loss": 0.3706, "step": 4300 }, { "epoch": 2.229381443298969, "grad_norm": 0.4469320476055145, "learning_rate": 0.00028759964412811384, "loss": 0.3692, "step": 4325 }, { "epoch": 2.2422680412371134, "grad_norm": 0.4056352376937866, "learning_rate": 0.0002874661921708185, "loss": 0.3659, "step": 4350 }, { "epoch": 2.2551546391752577, "grad_norm": 0.3547820746898651, "learning_rate": 0.0002873327402135231, "loss": 0.3564, "step": 4375 }, { "epoch": 2.268041237113402, "grad_norm": 0.32645678520202637, "learning_rate": 0.0002871992882562277, "loss": 0.3346, "step": 4400 }, { "epoch": 2.2809278350515463, "grad_norm": 0.7449667453765869, "learning_rate": 0.00028706583629893237, "loss": 0.3696, "step": 4425 }, { "epoch": 2.2938144329896906, "grad_norm": 0.4612623155117035, "learning_rate": 0.00028693238434163696, "loss": 0.3925, "step": 4450 }, { "epoch": 2.306701030927835, "grad_norm": 0.6325463056564331, "learning_rate": 0.0002867989323843416, "loss": 0.3512, "step": 4475 }, { "epoch": 2.319587628865979, "grad_norm": 0.29471156001091003, "learning_rate": 0.0002866654804270462, "loss": 0.3569, "step": 4500 }, { "epoch": 2.332474226804124, "grad_norm": 1.076217770576477, "learning_rate": 0.00028653202846975085, "loss": 0.3375, "step": 4525 }, { "epoch": 2.345360824742268, "grad_norm": 0.614162027835846, "learning_rate": 0.0002863985765124555, "loss": 0.4078, "step": 4550 }, { "epoch": 2.3582474226804124, "grad_norm": 0.4514384865760803, "learning_rate": 0.0002862651245551601, "loss": 0.3758, "step": 4575 }, { "epoch": 2.3711340206185567, "grad_norm": 0.314336359500885, "learning_rate": 0.00028613167259786473, "loss": 0.3765, "step": 4600 }, { "epoch": 2.384020618556701, "grad_norm": 0.5032577514648438, "learning_rate": 0.0002859982206405694, "loss": 0.3226, "step": 4625 }, { "epoch": 2.3969072164948453, "grad_norm": 0.6402880549430847, "learning_rate": 0.000285864768683274, "loss": 0.3986, "step": 4650 }, { "epoch": 2.4097938144329896, "grad_norm": 0.5510269403457642, "learning_rate": 0.0002857313167259786, "loss": 0.3757, "step": 4675 }, { "epoch": 2.422680412371134, "grad_norm": 0.5594025254249573, "learning_rate": 0.0002855978647686832, "loss": 0.4297, "step": 4700 }, { "epoch": 2.4355670103092786, "grad_norm": 0.3621445894241333, "learning_rate": 0.00028546441281138786, "loss": 0.3336, "step": 4725 }, { "epoch": 2.448453608247423, "grad_norm": 0.5683943033218384, "learning_rate": 0.0002853309608540925, "loss": 0.3395, "step": 4750 }, { "epoch": 2.461340206185567, "grad_norm": 0.7661644816398621, "learning_rate": 0.0002851975088967971, "loss": 0.377, "step": 4775 }, { "epoch": 2.4742268041237114, "grad_norm": 0.456636905670166, "learning_rate": 0.00028506405693950175, "loss": 0.3645, "step": 4800 }, { "epoch": 2.4871134020618557, "grad_norm": 0.49251827597618103, "learning_rate": 0.0002849306049822064, "loss": 0.34, "step": 4825 }, { "epoch": 2.5, "grad_norm": 0.32308971881866455, "learning_rate": 0.000284797153024911, "loss": 0.4092, "step": 4850 }, { "epoch": 2.5128865979381443, "grad_norm": 0.9722476005554199, "learning_rate": 0.00028466370106761563, "loss": 0.3651, "step": 4875 }, { "epoch": 2.5257731958762886, "grad_norm": 0.4552549123764038, "learning_rate": 0.0002845302491103203, "loss": 0.3837, "step": 4900 }, { "epoch": 2.538659793814433, "grad_norm": 0.4148350656032562, "learning_rate": 0.00028439679715302487, "loss": 0.3931, "step": 4925 }, { "epoch": 2.551546391752577, "grad_norm": 0.41644537448883057, "learning_rate": 0.0002842633451957295, "loss": 0.3677, "step": 4950 }, { "epoch": 2.5644329896907214, "grad_norm": 0.3989255428314209, "learning_rate": 0.00028412989323843416, "loss": 0.3759, "step": 4975 }, { "epoch": 2.5773195876288657, "grad_norm": 0.472091943025589, "learning_rate": 0.00028399644128113876, "loss": 0.4008, "step": 5000 }, { "epoch": 2.5902061855670104, "grad_norm": 0.9913691282272339, "learning_rate": 0.0002838629893238434, "loss": 0.3555, "step": 5025 }, { "epoch": 2.6030927835051547, "grad_norm": 0.425589382648468, "learning_rate": 0.000283729537366548, "loss": 0.3774, "step": 5050 }, { "epoch": 2.615979381443299, "grad_norm": 0.7836791276931763, "learning_rate": 0.00028359608540925264, "loss": 0.317, "step": 5075 }, { "epoch": 2.6288659793814433, "grad_norm": 0.7071629166603088, "learning_rate": 0.0002834626334519573, "loss": 0.4039, "step": 5100 }, { "epoch": 2.6417525773195876, "grad_norm": 0.5756880640983582, "learning_rate": 0.0002833291814946619, "loss": 0.342, "step": 5125 }, { "epoch": 2.654639175257732, "grad_norm": 0.425029993057251, "learning_rate": 0.0002831957295373665, "loss": 0.3765, "step": 5150 }, { "epoch": 2.667525773195876, "grad_norm": 0.5212023854255676, "learning_rate": 0.0002830622775800712, "loss": 0.3681, "step": 5175 }, { "epoch": 2.680412371134021, "grad_norm": 0.39988288283348083, "learning_rate": 0.00028292882562277577, "loss": 0.3525, "step": 5200 }, { "epoch": 2.693298969072165, "grad_norm": 0.31254854798316956, "learning_rate": 0.0002827953736654804, "loss": 0.3521, "step": 5225 }, { "epoch": 2.7061855670103094, "grad_norm": 0.5564956665039062, "learning_rate": 0.00028266192170818506, "loss": 0.3602, "step": 5250 }, { "epoch": 2.7190721649484537, "grad_norm": 0.45266616344451904, "learning_rate": 0.00028252846975088965, "loss": 0.3462, "step": 5275 }, { "epoch": 2.731958762886598, "grad_norm": 0.26552554965019226, "learning_rate": 0.0002823950177935943, "loss": 0.3787, "step": 5300 }, { "epoch": 2.7448453608247423, "grad_norm": 0.5850217938423157, "learning_rate": 0.00028226156583629894, "loss": 0.3289, "step": 5325 }, { "epoch": 2.7577319587628866, "grad_norm": 0.47871604561805725, "learning_rate": 0.00028212811387900354, "loss": 0.4246, "step": 5350 }, { "epoch": 2.770618556701031, "grad_norm": 0.5951977372169495, "learning_rate": 0.0002819946619217082, "loss": 0.3326, "step": 5375 }, { "epoch": 2.783505154639175, "grad_norm": 0.5806294083595276, "learning_rate": 0.0002818612099644128, "loss": 0.3952, "step": 5400 }, { "epoch": 2.7963917525773194, "grad_norm": 0.6218500733375549, "learning_rate": 0.0002817277580071174, "loss": 0.3222, "step": 5425 }, { "epoch": 2.8092783505154637, "grad_norm": 0.38604310154914856, "learning_rate": 0.00028159430604982207, "loss": 0.4408, "step": 5450 }, { "epoch": 2.822164948453608, "grad_norm": 2.576910972595215, "learning_rate": 0.00028146085409252666, "loss": 0.355, "step": 5475 }, { "epoch": 2.8350515463917527, "grad_norm": 0.28045961260795593, "learning_rate": 0.0002813274021352313, "loss": 0.3479, "step": 5500 }, { "epoch": 2.847938144329897, "grad_norm": 0.6178203225135803, "learning_rate": 0.00028119395017793596, "loss": 0.3097, "step": 5525 }, { "epoch": 2.8608247422680413, "grad_norm": 0.36241769790649414, "learning_rate": 0.00028106049822064055, "loss": 0.3762, "step": 5550 }, { "epoch": 2.8737113402061856, "grad_norm": 0.6459288597106934, "learning_rate": 0.0002809270462633452, "loss": 0.3261, "step": 5575 }, { "epoch": 2.88659793814433, "grad_norm": 0.458281546831131, "learning_rate": 0.0002807935943060498, "loss": 0.378, "step": 5600 }, { "epoch": 2.899484536082474, "grad_norm": 0.4269348382949829, "learning_rate": 0.00028066014234875443, "loss": 0.3483, "step": 5625 }, { "epoch": 2.9123711340206184, "grad_norm": 0.36018800735473633, "learning_rate": 0.0002805266903914591, "loss": 0.3592, "step": 5650 }, { "epoch": 2.925257731958763, "grad_norm": 0.6075245141983032, "learning_rate": 0.00028039323843416367, "loss": 0.3, "step": 5675 }, { "epoch": 2.9381443298969074, "grad_norm": 0.3082279860973358, "learning_rate": 0.0002802597864768683, "loss": 0.4306, "step": 5700 }, { "epoch": 2.9510309278350517, "grad_norm": 0.6344396471977234, "learning_rate": 0.0002801263345195729, "loss": 0.3795, "step": 5725 }, { "epoch": 2.963917525773196, "grad_norm": 0.3072182536125183, "learning_rate": 0.00027999288256227756, "loss": 0.3425, "step": 5750 }, { "epoch": 2.9768041237113403, "grad_norm": 0.5076513886451721, "learning_rate": 0.0002798594306049822, "loss": 0.3214, "step": 5775 }, { "epoch": 2.9896907216494846, "grad_norm": 0.34852054715156555, "learning_rate": 0.0002797259786476868, "loss": 0.3678, "step": 5800 }, { "epoch": 3.002577319587629, "grad_norm": 0.36334386467933655, "learning_rate": 0.00027959252669039144, "loss": 0.3984, "step": 5825 }, { "epoch": 3.015463917525773, "grad_norm": 0.46211138367652893, "learning_rate": 0.00027945907473309604, "loss": 0.2912, "step": 5850 }, { "epoch": 3.0283505154639174, "grad_norm": 0.3730103373527527, "learning_rate": 0.0002793256227758007, "loss": 0.337, "step": 5875 }, { "epoch": 3.0412371134020617, "grad_norm": 0.37066757678985596, "learning_rate": 0.00027919217081850533, "loss": 0.2959, "step": 5900 }, { "epoch": 3.054123711340206, "grad_norm": 0.5712897181510925, "learning_rate": 0.0002790587188612099, "loss": 0.3279, "step": 5925 }, { "epoch": 3.0670103092783507, "grad_norm": 0.39846915006637573, "learning_rate": 0.00027892526690391457, "loss": 0.3532, "step": 5950 }, { "epoch": 3.079896907216495, "grad_norm": 0.3366047143936157, "learning_rate": 0.0002787918149466192, "loss": 0.3486, "step": 5975 }, { "epoch": 3.0927835051546393, "grad_norm": 0.5053852796554565, "learning_rate": 0.0002786583629893238, "loss": 0.3571, "step": 6000 }, { "epoch": 3.0927835051546393, "eval_loss": 0.48639050126075745, "eval_runtime": 195.6249, "eval_samples_per_second": 6.262, "eval_steps_per_second": 0.787, "eval_wer": 1.0073469387755103, "step": 6000 }, { "epoch": 3.1056701030927836, "grad_norm": 0.35252827405929565, "learning_rate": 0.00027852491103202845, "loss": 0.3401, "step": 6025 }, { "epoch": 3.118556701030928, "grad_norm": 0.3530094027519226, "learning_rate": 0.00027839145907473305, "loss": 0.3094, "step": 6050 }, { "epoch": 3.131443298969072, "grad_norm": 0.9595320224761963, "learning_rate": 0.0002782580071174377, "loss": 0.3692, "step": 6075 }, { "epoch": 3.1443298969072164, "grad_norm": 0.3585176467895508, "learning_rate": 0.00027812455516014234, "loss": 0.3351, "step": 6100 }, { "epoch": 3.1572164948453607, "grad_norm": 0.4432651996612549, "learning_rate": 0.00027799110320284693, "loss": 0.3494, "step": 6125 }, { "epoch": 3.170103092783505, "grad_norm": 0.5367820858955383, "learning_rate": 0.0002778576512455516, "loss": 0.3311, "step": 6150 }, { "epoch": 3.1829896907216493, "grad_norm": 0.3430980443954468, "learning_rate": 0.00027772419928825617, "loss": 0.3357, "step": 6175 }, { "epoch": 3.195876288659794, "grad_norm": 1.0889408588409424, "learning_rate": 0.0002775907473309608, "loss": 0.3056, "step": 6200 }, { "epoch": 3.2087628865979383, "grad_norm": 0.3883308172225952, "learning_rate": 0.00027745729537366546, "loss": 0.3251, "step": 6225 }, { "epoch": 3.2216494845360826, "grad_norm": 0.41912856698036194, "learning_rate": 0.00027732384341637006, "loss": 0.3292, "step": 6250 }, { "epoch": 3.234536082474227, "grad_norm": 0.45084699988365173, "learning_rate": 0.0002771903914590747, "loss": 0.3516, "step": 6275 }, { "epoch": 3.247422680412371, "grad_norm": 0.4294661581516266, "learning_rate": 0.00027705693950177935, "loss": 0.3297, "step": 6300 }, { "epoch": 3.2603092783505154, "grad_norm": 0.25462472438812256, "learning_rate": 0.00027692348754448394, "loss": 0.3513, "step": 6325 }, { "epoch": 3.2731958762886597, "grad_norm": 0.2990482747554779, "learning_rate": 0.0002767900355871886, "loss": 0.3472, "step": 6350 }, { "epoch": 3.286082474226804, "grad_norm": 0.5474823713302612, "learning_rate": 0.0002766565836298932, "loss": 0.3404, "step": 6375 }, { "epoch": 3.2989690721649483, "grad_norm": 0.31416311860084534, "learning_rate": 0.00027652313167259783, "loss": 0.3052, "step": 6400 }, { "epoch": 3.3118556701030926, "grad_norm": 0.6990143656730652, "learning_rate": 0.0002763896797153025, "loss": 0.3661, "step": 6425 }, { "epoch": 3.3247422680412373, "grad_norm": 0.4336131811141968, "learning_rate": 0.00027625622775800707, "loss": 0.2757, "step": 6450 }, { "epoch": 3.3376288659793816, "grad_norm": 0.35333672165870667, "learning_rate": 0.0002761227758007117, "loss": 0.3275, "step": 6475 }, { "epoch": 3.350515463917526, "grad_norm": 0.7743633985519409, "learning_rate": 0.0002759893238434163, "loss": 0.2855, "step": 6500 }, { "epoch": 3.36340206185567, "grad_norm": 0.5316669344902039, "learning_rate": 0.00027585587188612095, "loss": 0.3742, "step": 6525 }, { "epoch": 3.3762886597938144, "grad_norm": 0.3642534017562866, "learning_rate": 0.0002757277580071174, "loss": 0.3676, "step": 6550 }, { "epoch": 3.3891752577319587, "grad_norm": 0.6164928078651428, "learning_rate": 0.00027559430604982203, "loss": 0.3658, "step": 6575 }, { "epoch": 3.402061855670103, "grad_norm": 0.3114078938961029, "learning_rate": 0.0002754608540925266, "loss": 0.2681, "step": 6600 }, { "epoch": 3.4149484536082473, "grad_norm": 0.8610777258872986, "learning_rate": 0.00027532740213523127, "loss": 0.3607, "step": 6625 }, { "epoch": 3.4278350515463916, "grad_norm": 0.49727797508239746, "learning_rate": 0.0002751939501779359, "loss": 0.3306, "step": 6650 }, { "epoch": 3.4407216494845363, "grad_norm": 0.3058234751224518, "learning_rate": 0.0002750604982206405, "loss": 0.3313, "step": 6675 }, { "epoch": 3.4536082474226806, "grad_norm": 0.5317339301109314, "learning_rate": 0.00027492704626334516, "loss": 0.3316, "step": 6700 }, { "epoch": 3.466494845360825, "grad_norm": 0.4639209806919098, "learning_rate": 0.0002747935943060498, "loss": 0.3773, "step": 6725 }, { "epoch": 3.479381443298969, "grad_norm": 0.28237494826316833, "learning_rate": 0.0002746601423487544, "loss": 0.3275, "step": 6750 }, { "epoch": 3.4922680412371134, "grad_norm": 0.44046372175216675, "learning_rate": 0.00027452669039145904, "loss": 0.3777, "step": 6775 }, { "epoch": 3.5051546391752577, "grad_norm": 0.40542760491371155, "learning_rate": 0.0002743932384341637, "loss": 0.3197, "step": 6800 }, { "epoch": 3.518041237113402, "grad_norm": 0.587062418460846, "learning_rate": 0.0002742597864768683, "loss": 0.3441, "step": 6825 }, { "epoch": 3.5309278350515463, "grad_norm": 0.34003278613090515, "learning_rate": 0.00027412633451957293, "loss": 0.3189, "step": 6850 }, { "epoch": 3.5438144329896906, "grad_norm": 0.40320533514022827, "learning_rate": 0.0002739928825622776, "loss": 0.3115, "step": 6875 }, { "epoch": 3.556701030927835, "grad_norm": 0.34437236189842224, "learning_rate": 0.00027385943060498217, "loss": 0.3344, "step": 6900 }, { "epoch": 3.569587628865979, "grad_norm": 0.42826181650161743, "learning_rate": 0.0002737259786476868, "loss": 0.3716, "step": 6925 }, { "epoch": 3.582474226804124, "grad_norm": 0.4200308620929718, "learning_rate": 0.00027359252669039146, "loss": 0.3511, "step": 6950 }, { "epoch": 3.595360824742268, "grad_norm": 0.3897481858730316, "learning_rate": 0.00027345907473309605, "loss": 0.3651, "step": 6975 }, { "epoch": 3.6082474226804124, "grad_norm": 0.3066980838775635, "learning_rate": 0.0002733256227758007, "loss": 0.314, "step": 7000 }, { "epoch": 3.6211340206185567, "grad_norm": 0.39002224802970886, "learning_rate": 0.0002731921708185053, "loss": 0.411, "step": 7025 }, { "epoch": 3.634020618556701, "grad_norm": 0.8328560590744019, "learning_rate": 0.00027305871886120994, "loss": 0.3154, "step": 7050 }, { "epoch": 3.6469072164948453, "grad_norm": 0.43831050395965576, "learning_rate": 0.0002729252669039146, "loss": 0.396, "step": 7075 }, { "epoch": 3.6597938144329896, "grad_norm": 0.3992260694503784, "learning_rate": 0.0002727918149466192, "loss": 0.3142, "step": 7100 }, { "epoch": 3.6726804123711343, "grad_norm": 0.2941119074821472, "learning_rate": 0.0002726583629893238, "loss": 0.3527, "step": 7125 }, { "epoch": 3.6855670103092786, "grad_norm": 0.4881301522254944, "learning_rate": 0.00027252491103202847, "loss": 0.3171, "step": 7150 }, { "epoch": 3.698453608247423, "grad_norm": 0.3898142874240875, "learning_rate": 0.00027239145907473306, "loss": 0.3817, "step": 7175 }, { "epoch": 3.711340206185567, "grad_norm": 0.31580379605293274, "learning_rate": 0.0002722580071174377, "loss": 0.2595, "step": 7200 }, { "epoch": 3.7242268041237114, "grad_norm": 0.3299170136451721, "learning_rate": 0.00027212455516014236, "loss": 0.3416, "step": 7225 }, { "epoch": 3.7371134020618557, "grad_norm": 0.6964473724365234, "learning_rate": 0.00027199110320284695, "loss": 0.2942, "step": 7250 }, { "epoch": 3.75, "grad_norm": 0.3565264046192169, "learning_rate": 0.0002718576512455516, "loss": 0.3283, "step": 7275 }, { "epoch": 3.7628865979381443, "grad_norm": 0.28134214878082275, "learning_rate": 0.00027172419928825624, "loss": 0.3203, "step": 7300 }, { "epoch": 3.7757731958762886, "grad_norm": 0.4196174442768097, "learning_rate": 0.00027159074733096083, "loss": 0.3396, "step": 7325 }, { "epoch": 3.788659793814433, "grad_norm": 0.4770311415195465, "learning_rate": 0.0002714572953736655, "loss": 0.3144, "step": 7350 }, { "epoch": 3.801546391752577, "grad_norm": 0.29138994216918945, "learning_rate": 0.0002713238434163701, "loss": 0.3403, "step": 7375 }, { "epoch": 3.8144329896907214, "grad_norm": 0.47035351395606995, "learning_rate": 0.0002711903914590747, "loss": 0.2623, "step": 7400 }, { "epoch": 3.8273195876288657, "grad_norm": 0.32780078053474426, "learning_rate": 0.00027105693950177937, "loss": 0.3863, "step": 7425 }, { "epoch": 3.8402061855670104, "grad_norm": 0.4419979751110077, "learning_rate": 0.00027092348754448396, "loss": 0.3207, "step": 7450 }, { "epoch": 3.8530927835051547, "grad_norm": 0.28167861700057983, "learning_rate": 0.0002707900355871886, "loss": 0.3308, "step": 7475 }, { "epoch": 3.865979381443299, "grad_norm": 0.3411625921726227, "learning_rate": 0.00027065658362989325, "loss": 0.3387, "step": 7500 }, { "epoch": 3.8788659793814433, "grad_norm": 0.31229549646377563, "learning_rate": 0.00027052313167259785, "loss": 0.408, "step": 7525 }, { "epoch": 3.8917525773195876, "grad_norm": 0.3603403866291046, "learning_rate": 0.0002703896797153025, "loss": 0.3259, "step": 7550 }, { "epoch": 3.904639175257732, "grad_norm": 0.2832438051700592, "learning_rate": 0.0002702562277580071, "loss": 0.3423, "step": 7575 }, { "epoch": 3.917525773195876, "grad_norm": 0.3435526192188263, "learning_rate": 0.00027012277580071173, "loss": 0.2835, "step": 7600 }, { "epoch": 3.930412371134021, "grad_norm": 0.3645428419113159, "learning_rate": 0.0002699893238434164, "loss": 0.3483, "step": 7625 }, { "epoch": 3.943298969072165, "grad_norm": 0.7062532901763916, "learning_rate": 0.00026985587188612097, "loss": 0.2586, "step": 7650 }, { "epoch": 3.9561855670103094, "grad_norm": 0.4606476426124573, "learning_rate": 0.0002697224199288256, "loss": 0.3939, "step": 7675 }, { "epoch": 3.9690721649484537, "grad_norm": 0.4727219045162201, "learning_rate": 0.0002695889679715302, "loss": 0.3151, "step": 7700 }, { "epoch": 3.981958762886598, "grad_norm": 0.2774180471897125, "learning_rate": 0.00026945551601423486, "loss": 0.3519, "step": 7725 }, { "epoch": 3.9948453608247423, "grad_norm": 0.4793704152107239, "learning_rate": 0.0002693220640569395, "loss": 0.3023, "step": 7750 }, { "epoch": 4.007731958762887, "grad_norm": 0.3699122965335846, "learning_rate": 0.0002691886120996441, "loss": 0.3664, "step": 7775 }, { "epoch": 4.020618556701031, "grad_norm": 0.6096347570419312, "learning_rate": 0.00026905516014234874, "loss": 0.2579, "step": 7800 }, { "epoch": 4.033505154639175, "grad_norm": 0.2851110100746155, "learning_rate": 0.00026892170818505333, "loss": 0.3347, "step": 7825 }, { "epoch": 4.046391752577319, "grad_norm": 0.5487604141235352, "learning_rate": 0.000268788256227758, "loss": 0.2676, "step": 7850 }, { "epoch": 4.059278350515464, "grad_norm": 0.29913634061813354, "learning_rate": 0.0002686548042704626, "loss": 0.3061, "step": 7875 }, { "epoch": 4.072164948453608, "grad_norm": 0.3672349750995636, "learning_rate": 0.0002685213523131672, "loss": 0.2632, "step": 7900 }, { "epoch": 4.085051546391752, "grad_norm": 0.5969862341880798, "learning_rate": 0.00026838790035587187, "loss": 0.2868, "step": 7925 }, { "epoch": 4.097938144329897, "grad_norm": 0.4409433901309967, "learning_rate": 0.0002682544483985765, "loss": 0.2907, "step": 7950 }, { "epoch": 4.110824742268041, "grad_norm": 0.4578838050365448, "learning_rate": 0.0002681209964412811, "loss": 0.3377, "step": 7975 }, { "epoch": 4.123711340206185, "grad_norm": 0.302491694688797, "learning_rate": 0.00026798754448398575, "loss": 0.2857, "step": 8000 }, { "epoch": 4.123711340206185, "eval_loss": 0.4647158980369568, "eval_runtime": 189.5598, "eval_samples_per_second": 6.462, "eval_steps_per_second": 0.812, "eval_wer": 1.0008163265306123, "step": 8000 }, { "epoch": 4.13659793814433, "grad_norm": 0.5263584852218628, "learning_rate": 0.00026785409252669034, "loss": 0.3246, "step": 8025 }, { "epoch": 4.149484536082475, "grad_norm": 0.48449578881263733, "learning_rate": 0.000267720640569395, "loss": 0.3163, "step": 8050 }, { "epoch": 4.162371134020619, "grad_norm": 0.3020855784416199, "learning_rate": 0.00026758718861209964, "loss": 0.3248, "step": 8075 }, { "epoch": 4.175257731958763, "grad_norm": 0.3554665148258209, "learning_rate": 0.00026745373665480423, "loss": 0.2863, "step": 8100 }, { "epoch": 4.188144329896907, "grad_norm": 0.4055439531803131, "learning_rate": 0.0002673202846975089, "loss": 0.3289, "step": 8125 }, { "epoch": 4.201030927835052, "grad_norm": 0.6498619914054871, "learning_rate": 0.00026718683274021347, "loss": 0.289, "step": 8150 }, { "epoch": 4.213917525773196, "grad_norm": 0.23331980407238007, "learning_rate": 0.0002670533807829181, "loss": 0.2744, "step": 8175 }, { "epoch": 4.22680412371134, "grad_norm": 0.5142727494239807, "learning_rate": 0.00026691992882562276, "loss": 0.3139, "step": 8200 }, { "epoch": 4.239690721649485, "grad_norm": 0.4468501806259155, "learning_rate": 0.00026678647686832735, "loss": 0.3392, "step": 8225 }, { "epoch": 4.252577319587629, "grad_norm": 0.36652007699012756, "learning_rate": 0.000266653024911032, "loss": 0.2499, "step": 8250 }, { "epoch": 4.265463917525773, "grad_norm": 0.3004089891910553, "learning_rate": 0.0002665195729537366, "loss": 0.31, "step": 8275 }, { "epoch": 4.278350515463917, "grad_norm": 0.4099670946598053, "learning_rate": 0.00026638612099644124, "loss": 0.2798, "step": 8300 }, { "epoch": 4.291237113402062, "grad_norm": 0.26817041635513306, "learning_rate": 0.0002662526690391459, "loss": 0.3055, "step": 8325 }, { "epoch": 4.304123711340206, "grad_norm": 0.4156699478626251, "learning_rate": 0.0002661192170818505, "loss": 0.2942, "step": 8350 }, { "epoch": 4.31701030927835, "grad_norm": 2.611400842666626, "learning_rate": 0.0002659857651245551, "loss": 0.3659, "step": 8375 }, { "epoch": 4.329896907216495, "grad_norm": 0.46959781646728516, "learning_rate": 0.00026585231316725977, "loss": 0.2982, "step": 8400 }, { "epoch": 4.342783505154639, "grad_norm": 0.2867629826068878, "learning_rate": 0.00026571886120996436, "loss": 0.3511, "step": 8425 }, { "epoch": 4.355670103092783, "grad_norm": 0.38449224829673767, "learning_rate": 0.000265585409252669, "loss": 0.2826, "step": 8450 }, { "epoch": 4.368556701030927, "grad_norm": 0.4524473249912262, "learning_rate": 0.0002654519572953736, "loss": 0.313, "step": 8475 }, { "epoch": 4.381443298969073, "grad_norm": 0.3989889919757843, "learning_rate": 0.00026531850533807825, "loss": 0.2825, "step": 8500 }, { "epoch": 4.394329896907217, "grad_norm": 0.3007287085056305, "learning_rate": 0.0002651850533807829, "loss": 0.3369, "step": 8525 }, { "epoch": 4.407216494845361, "grad_norm": 0.7760800123214722, "learning_rate": 0.0002650516014234875, "loss": 0.2835, "step": 8550 }, { "epoch": 4.420103092783505, "grad_norm": 0.27871614694595337, "learning_rate": 0.00026491814946619214, "loss": 0.3172, "step": 8575 }, { "epoch": 4.43298969072165, "grad_norm": 0.48607179522514343, "learning_rate": 0.0002647846975088968, "loss": 0.303, "step": 8600 }, { "epoch": 4.445876288659794, "grad_norm": 0.3540396988391876, "learning_rate": 0.0002646512455516014, "loss": 0.355, "step": 8625 }, { "epoch": 4.458762886597938, "grad_norm": 1.2004367113113403, "learning_rate": 0.000264517793594306, "loss": 0.3197, "step": 8650 }, { "epoch": 4.471649484536083, "grad_norm": 0.2405807226896286, "learning_rate": 0.00026438434163701067, "loss": 0.3179, "step": 8675 }, { "epoch": 4.484536082474227, "grad_norm": 0.36553826928138733, "learning_rate": 0.00026425088967971526, "loss": 0.2793, "step": 8700 }, { "epoch": 4.497422680412371, "grad_norm": 0.26435115933418274, "learning_rate": 0.0002641174377224199, "loss": 0.2849, "step": 8725 }, { "epoch": 4.510309278350515, "grad_norm": 0.6196132302284241, "learning_rate": 0.00026398398576512455, "loss": 0.3267, "step": 8750 }, { "epoch": 4.52319587628866, "grad_norm": 0.2987557649612427, "learning_rate": 0.00026385053380782915, "loss": 0.3152, "step": 8775 }, { "epoch": 4.536082474226804, "grad_norm": 0.4445992112159729, "learning_rate": 0.0002637170818505338, "loss": 0.3, "step": 8800 }, { "epoch": 4.548969072164948, "grad_norm": 0.4059930741786957, "learning_rate": 0.00026358362989323844, "loss": 0.3395, "step": 8825 }, { "epoch": 4.561855670103093, "grad_norm": 0.522637665271759, "learning_rate": 0.00026345017793594303, "loss": 0.2783, "step": 8850 }, { "epoch": 4.574742268041237, "grad_norm": 0.1963280737400055, "learning_rate": 0.0002633167259786477, "loss": 0.2979, "step": 8875 }, { "epoch": 4.587628865979381, "grad_norm": 0.7622120976448059, "learning_rate": 0.00026318327402135227, "loss": 0.3204, "step": 8900 }, { "epoch": 4.600515463917525, "grad_norm": 0.2749217450618744, "learning_rate": 0.0002630498220640569, "loss": 0.3365, "step": 8925 }, { "epoch": 4.61340206185567, "grad_norm": 0.5664824843406677, "learning_rate": 0.00026291637010676156, "loss": 0.2772, "step": 8950 }, { "epoch": 4.626288659793815, "grad_norm": 0.3661792278289795, "learning_rate": 0.00026278291814946616, "loss": 0.3314, "step": 8975 }, { "epoch": 4.639175257731958, "grad_norm": 0.37070122361183167, "learning_rate": 0.0002626494661921708, "loss": 0.2991, "step": 9000 }, { "epoch": 4.652061855670103, "grad_norm": 0.4963271915912628, "learning_rate": 0.00026251601423487545, "loss": 0.3312, "step": 9025 }, { "epoch": 4.664948453608248, "grad_norm": 0.5193173885345459, "learning_rate": 0.00026238256227758004, "loss": 0.2914, "step": 9050 }, { "epoch": 4.677835051546392, "grad_norm": 0.3158729076385498, "learning_rate": 0.0002622491103202847, "loss": 0.3317, "step": 9075 }, { "epoch": 4.690721649484536, "grad_norm": 0.37290090322494507, "learning_rate": 0.00026211565836298934, "loss": 0.2612, "step": 9100 }, { "epoch": 4.703608247422681, "grad_norm": 0.334089070558548, "learning_rate": 0.00026198220640569393, "loss": 0.3235, "step": 9125 }, { "epoch": 4.716494845360825, "grad_norm": 0.3259222209453583, "learning_rate": 0.0002618487544483986, "loss": 0.2952, "step": 9150 }, { "epoch": 4.729381443298969, "grad_norm": 0.2776556611061096, "learning_rate": 0.0002617153024911032, "loss": 0.3405, "step": 9175 }, { "epoch": 4.742268041237113, "grad_norm": 0.33884450793266296, "learning_rate": 0.0002615818505338078, "loss": 0.2813, "step": 9200 }, { "epoch": 4.755154639175258, "grad_norm": 0.48323071002960205, "learning_rate": 0.00026144839857651246, "loss": 0.3044, "step": 9225 }, { "epoch": 4.768041237113402, "grad_norm": 0.32996150851249695, "learning_rate": 0.00026131494661921705, "loss": 0.2817, "step": 9250 }, { "epoch": 4.780927835051546, "grad_norm": 0.3732437491416931, "learning_rate": 0.0002611814946619217, "loss": 0.3165, "step": 9275 }, { "epoch": 4.793814432989691, "grad_norm": 0.5148000717163086, "learning_rate": 0.00026104804270462635, "loss": 0.2636, "step": 9300 }, { "epoch": 4.806701030927835, "grad_norm": 0.3256881833076477, "learning_rate": 0.00026091459074733094, "loss": 0.3677, "step": 9325 }, { "epoch": 4.819587628865979, "grad_norm": 0.5248320698738098, "learning_rate": 0.0002607811387900356, "loss": 0.2959, "step": 9350 }, { "epoch": 4.832474226804123, "grad_norm": 0.25363728404045105, "learning_rate": 0.0002606476868327402, "loss": 0.355, "step": 9375 }, { "epoch": 4.845360824742268, "grad_norm": 1.110967755317688, "learning_rate": 0.0002605142348754448, "loss": 0.3208, "step": 9400 }, { "epoch": 4.858247422680412, "grad_norm": 0.3510916531085968, "learning_rate": 0.00026038078291814947, "loss": 0.3421, "step": 9425 }, { "epoch": 4.871134020618557, "grad_norm": 0.33312925696372986, "learning_rate": 0.00026024733096085406, "loss": 0.3046, "step": 9450 }, { "epoch": 4.8840206185567006, "grad_norm": 0.32453814148902893, "learning_rate": 0.0002601138790035587, "loss": 0.3294, "step": 9475 }, { "epoch": 4.896907216494846, "grad_norm": 0.3632301092147827, "learning_rate": 0.0002599804270462633, "loss": 0.3261, "step": 9500 }, { "epoch": 4.90979381443299, "grad_norm": 0.3007452189922333, "learning_rate": 0.00025984697508896795, "loss": 0.3524, "step": 9525 }, { "epoch": 4.922680412371134, "grad_norm": 0.47182586789131165, "learning_rate": 0.0002597135231316726, "loss": 0.3287, "step": 9550 }, { "epoch": 4.935567010309279, "grad_norm": 0.3959861397743225, "learning_rate": 0.0002595800711743772, "loss": 0.3134, "step": 9575 }, { "epoch": 4.948453608247423, "grad_norm": 0.28187814354896545, "learning_rate": 0.00025944661921708183, "loss": 0.3154, "step": 9600 }, { "epoch": 4.961340206185567, "grad_norm": 0.32686540484428406, "learning_rate": 0.0002593131672597865, "loss": 0.3464, "step": 9625 }, { "epoch": 4.974226804123711, "grad_norm": 0.46848079562187195, "learning_rate": 0.0002591797153024911, "loss": 0.2753, "step": 9650 }, { "epoch": 4.987113402061856, "grad_norm": 0.25264793634414673, "learning_rate": 0.0002590462633451957, "loss": 0.3403, "step": 9675 }, { "epoch": 5.0, "grad_norm": 0.4163696765899658, "learning_rate": 0.0002589128113879003, "loss": 0.2669, "step": 9700 }, { "epoch": 5.012886597938144, "grad_norm": 0.29742294549942017, "learning_rate": 0.00025877935943060496, "loss": 0.3203, "step": 9725 }, { "epoch": 5.025773195876289, "grad_norm": 0.8651419281959534, "learning_rate": 0.0002586459074733096, "loss": 0.2589, "step": 9750 }, { "epoch": 5.038659793814433, "grad_norm": 0.2916441261768341, "learning_rate": 0.0002585124555160142, "loss": 0.3, "step": 9775 }, { "epoch": 5.051546391752577, "grad_norm": 1.4987293481826782, "learning_rate": 0.00025837900355871884, "loss": 0.3148, "step": 9800 }, { "epoch": 5.064432989690721, "grad_norm": 0.46689650416374207, "learning_rate": 0.00025824555160142344, "loss": 0.2935, "step": 9825 }, { "epoch": 5.077319587628866, "grad_norm": 0.3726617991924286, "learning_rate": 0.0002581120996441281, "loss": 0.2871, "step": 9850 }, { "epoch": 5.09020618556701, "grad_norm": 0.29355180263519287, "learning_rate": 0.00025797864768683273, "loss": 0.2422, "step": 9875 }, { "epoch": 5.103092783505154, "grad_norm": 0.6249547600746155, "learning_rate": 0.0002578451957295373, "loss": 0.2749, "step": 9900 }, { "epoch": 5.115979381443299, "grad_norm": 0.42613521218299866, "learning_rate": 0.00025771174377224197, "loss": 0.2998, "step": 9925 }, { "epoch": 5.128865979381443, "grad_norm": 0.4285449981689453, "learning_rate": 0.00025757829181494656, "loss": 0.3241, "step": 9950 }, { "epoch": 5.141752577319588, "grad_norm": 0.2756471037864685, "learning_rate": 0.0002574448398576512, "loss": 0.2788, "step": 9975 }, { "epoch": 5.154639175257732, "grad_norm": 0.650787889957428, "learning_rate": 0.00025731138790035586, "loss": 0.2628, "step": 10000 }, { "epoch": 5.154639175257732, "eval_loss": 0.5844400525093079, "eval_runtime": 196.2011, "eval_samples_per_second": 6.244, "eval_steps_per_second": 0.785, "eval_wer": 1.006530612244898, "step": 10000 }, { "epoch": 5.154639175257732, "step": 10000, "total_flos": 5.52048529474528e+19, "train_loss": 0.6429409675121307, "train_runtime": 62307.8234, "train_samples_per_second": 22.413, "train_steps_per_second": 0.934 } ], "logging_steps": 25, "max_steps": 58200, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 2000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.52048529474528e+19, "train_batch_size": 12, "trial_name": null, "trial_params": null }