diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13729 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.131931740614334, + "global_step": 65300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.998743993010048e-05, + "loss": 5.9677, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 4.99737876802097e-05, + "loss": 3.506, + "step": 100 + }, + { + "epoch": 0.01, + "eval_cer": 0.4539164593605962, + "eval_loss": 2.664681911468506, + "eval_runtime": 1456.6338, + "eval_samples_per_second": 5.158, + "eval_steps_per_second": 0.645, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 4.996013543031892e-05, + "loss": 2.7796, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 4.994648318042814e-05, + "loss": 2.4129, + "step": 200 + }, + { + "epoch": 0.02, + "eval_cer": 0.41749788701790647, + "eval_loss": 2.389951705932617, + "eval_runtime": 1302.8974, + "eval_samples_per_second": 5.766, + "eval_steps_per_second": 0.721, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 4.993283093053736e-05, + "loss": 2.2714, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919178680646574e-05, + "loss": 2.2433, + "step": 300 + }, + { + "epoch": 0.03, + "eval_cer": 0.3008544317874631, + "eval_loss": 1.778554916381836, + "eval_runtime": 1143.042, + "eval_samples_per_second": 6.573, + "eval_steps_per_second": 0.822, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 4.990552643075579e-05, + "loss": 1.9436, + "step": 350 + }, + { + "epoch": 0.04, + "learning_rate": 4.989187418086501e-05, + "loss": 1.8459, + "step": 400 + }, + { + "epoch": 0.04, + "eval_cer": 0.26927090996814274, + "eval_loss": 1.7008424997329712, + "eval_runtime": 1063.6345, + "eval_samples_per_second": 7.064, + "eval_steps_per_second": 0.884, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 4.987822193097423e-05, + "loss": 1.7638, + "step": 450 + }, + { + "epoch": 0.05, + "learning_rate": 4.9864569681083444e-05, + "loss": 1.7375, + "step": 500 + }, + { + "epoch": 0.05, + "eval_cer": 0.28988061223442296, + "eval_loss": 1.690486192703247, + "eval_runtime": 1230.2876, + "eval_samples_per_second": 6.107, + "eval_steps_per_second": 0.764, + "step": 500 + }, + { + "epoch": 0.06, + "learning_rate": 4.985091743119266e-05, + "loss": 1.6963, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 4.9837538226299694e-05, + "loss": 1.6133, + "step": 600 + }, + { + "epoch": 0.07, + "eval_cer": 0.24955430620617913, + "eval_loss": 1.496834397315979, + "eval_runtime": 1083.5748, + "eval_samples_per_second": 6.934, + "eval_steps_per_second": 0.867, + "step": 600 + }, + { + "epoch": 0.07, + "learning_rate": 4.982388597640891e-05, + "loss": 1.5559, + "step": 650 + }, + { + "epoch": 0.08, + "learning_rate": 4.981023372651813e-05, + "loss": 1.5477, + "step": 700 + }, + { + "epoch": 0.08, + "eval_cer": 0.23770791914892161, + "eval_loss": 1.4269628524780273, + "eval_runtime": 1176.6532, + "eval_samples_per_second": 6.385, + "eval_steps_per_second": 0.799, + "step": 700 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796581476627355e-05, + "loss": 1.5112, + "step": 750 + }, + { + "epoch": 0.09, + "learning_rate": 4.978292922673657e-05, + "loss": 1.4376, + "step": 800 + }, + { + "epoch": 0.09, + "eval_cer": 0.25057743437779095, + "eval_loss": 1.388639211654663, + "eval_runtime": 1506.6054, + "eval_samples_per_second": 4.987, + "eval_steps_per_second": 0.624, + "step": 800 + }, + { + "epoch": 0.09, + "learning_rate": 4.976927697684579e-05, + "loss": 1.4, + "step": 850 + }, + { + "epoch": 0.1, + "learning_rate": 4.9755624726955e-05, + "loss": 1.2982, + "step": 900 + }, + { + "epoch": 0.1, + "eval_cer": 0.27491693499543185, + "eval_loss": 1.3276695013046265, + "eval_runtime": 1886.1604, + "eval_samples_per_second": 3.983, + "eval_steps_per_second": 0.498, + "step": 900 + }, + { + "epoch": 0.1, + "learning_rate": 4.9741972477064225e-05, + "loss": 1.394, + "step": 950 + }, + { + "epoch": 0.11, + "learning_rate": 4.972832022717344e-05, + "loss": 1.2517, + "step": 1000 + }, + { + "epoch": 0.11, + "eval_cer": 0.19267659226460354, + "eval_loss": 1.152963399887085, + "eval_runtime": 1240.808, + "eval_samples_per_second": 6.055, + "eval_steps_per_second": 0.758, + "step": 1000 + }, + { + "epoch": 0.11, + "learning_rate": 4.971466797728266e-05, + "loss": 1.2324, + "step": 1050 + }, + { + "epoch": 0.12, + "learning_rate": 4.970101572739187e-05, + "loss": 1.2044, + "step": 1100 + }, + { + "epoch": 0.12, + "eval_cer": 0.1959649741138307, + "eval_loss": 1.1271371841430664, + "eval_runtime": 1192.9542, + "eval_samples_per_second": 6.298, + "eval_steps_per_second": 0.788, + "step": 1100 + }, + { + "epoch": 0.13, + "learning_rate": 4.9687363477501095e-05, + "loss": 1.1874, + "step": 1150 + }, + { + "epoch": 0.13, + "learning_rate": 4.967371122761031e-05, + "loss": 1.1988, + "step": 1200 + }, + { + "epoch": 0.13, + "eval_cer": 0.2019942444763055, + "eval_loss": 1.1370463371276855, + "eval_runtime": 1202.9894, + "eval_samples_per_second": 6.245, + "eval_steps_per_second": 0.781, + "step": 1200 + }, + { + "epoch": 0.14, + "learning_rate": 4.9660058977719534e-05, + "loss": 1.1858, + "step": 1250 + }, + { + "epoch": 0.14, + "learning_rate": 4.964640672782875e-05, + "loss": 1.1685, + "step": 1300 + }, + { + "epoch": 0.14, + "eval_cer": 0.1879031347415318, + "eval_loss": 1.1137348413467407, + "eval_runtime": 1125.3048, + "eval_samples_per_second": 6.676, + "eval_steps_per_second": 0.835, + "step": 1300 + }, + { + "epoch": 0.15, + "learning_rate": 4.9632754477937965e-05, + "loss": 1.1165, + "step": 1350 + }, + { + "epoch": 0.15, + "learning_rate": 4.961910222804718e-05, + "loss": 1.135, + "step": 1400 + }, + { + "epoch": 0.15, + "eval_cer": 0.18725983007175584, + "eval_loss": 1.0930421352386475, + "eval_runtime": 1179.9127, + "eval_samples_per_second": 6.367, + "eval_steps_per_second": 0.797, + "step": 1400 + }, + { + "epoch": 0.16, + "learning_rate": 4.9605449978156404e-05, + "loss": 1.0825, + "step": 1450 + }, + { + "epoch": 0.16, + "learning_rate": 4.959179772826562e-05, + "loss": 1.0914, + "step": 1500 + }, + { + "epoch": 0.16, + "eval_cer": 0.18079940870719713, + "eval_loss": 1.032211184501648, + "eval_runtime": 1192.2191, + "eval_samples_per_second": 6.302, + "eval_steps_per_second": 0.788, + "step": 1500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9578145478374836e-05, + "loss": 1.0302, + "step": 1550 + }, + { + "epoch": 0.17, + "learning_rate": 4.956449322848405e-05, + "loss": 1.1226, + "step": 1600 + }, + { + "epoch": 0.17, + "eval_cer": 0.17277863133509672, + "eval_loss": 1.0084491968154907, + "eval_runtime": 1179.5417, + "eval_samples_per_second": 6.369, + "eval_steps_per_second": 0.797, + "step": 1600 + }, + { + "epoch": 0.18, + "learning_rate": 4.9550840978593274e-05, + "loss": 1.0392, + "step": 1650 + }, + { + "epoch": 0.19, + "learning_rate": 4.9537188728702497e-05, + "loss": 1.0158, + "step": 1700 + }, + { + "epoch": 0.19, + "eval_cer": 0.16822417114641683, + "eval_loss": 1.0280424356460571, + "eval_runtime": 1116.1285, + "eval_samples_per_second": 6.731, + "eval_steps_per_second": 0.842, + "step": 1700 + }, + { + "epoch": 0.19, + "learning_rate": 4.952353647881171e-05, + "loss": 1.0508, + "step": 1750 + }, + { + "epoch": 0.2, + "learning_rate": 4.950988422892093e-05, + "loss": 1.0199, + "step": 1800 + }, + { + "epoch": 0.2, + "eval_cer": 0.1638339589585308, + "eval_loss": 0.9974864721298218, + "eval_runtime": 1121.8717, + "eval_samples_per_second": 6.697, + "eval_steps_per_second": 0.838, + "step": 1800 + }, + { + "epoch": 0.2, + "learning_rate": 4.9496231979030144e-05, + "loss": 1.0024, + "step": 1850 + }, + { + "epoch": 0.21, + "learning_rate": 4.9482579729139367e-05, + "loss": 0.9729, + "step": 1900 + }, + { + "epoch": 0.21, + "eval_cer": 0.17101638715991255, + "eval_loss": 0.9937753081321716, + "eval_runtime": 1156.6861, + "eval_samples_per_second": 6.495, + "eval_steps_per_second": 0.813, + "step": 1900 + }, + { + "epoch": 0.21, + "learning_rate": 4.946892747924858e-05, + "loss": 1.0344, + "step": 1950 + }, + { + "epoch": 0.22, + "learning_rate": 4.94552752293578e-05, + "loss": 0.9539, + "step": 2000 + }, + { + "epoch": 0.22, + "eval_cer": 0.15910498526900743, + "eval_loss": 0.9161514639854431, + "eval_runtime": 1178.9029, + "eval_samples_per_second": 6.373, + "eval_steps_per_second": 0.797, + "step": 2000 + }, + { + "epoch": 0.22, + "learning_rate": 4.9441622979467014e-05, + "loss": 0.9257, + "step": 2050 + }, + { + "epoch": 0.23, + "learning_rate": 4.9427970729576237e-05, + "loss": 1.0588, + "step": 2100 + }, + { + "epoch": 0.23, + "eval_cer": 0.15335972707457202, + "eval_loss": 0.9163301587104797, + "eval_runtime": 1165.5791, + "eval_samples_per_second": 6.446, + "eval_steps_per_second": 0.806, + "step": 2100 + }, + { + "epoch": 0.23, + "learning_rate": 4.941431847968546e-05, + "loss": 0.942, + "step": 2150 + }, + { + "epoch": 0.24, + "learning_rate": 4.9400666229794675e-05, + "loss": 1.0196, + "step": 2200 + }, + { + "epoch": 0.24, + "eval_cer": 0.16217094795049292, + "eval_loss": 0.9203193783760071, + "eval_runtime": 1167.9514, + "eval_samples_per_second": 6.433, + "eval_steps_per_second": 0.805, + "step": 2200 + }, + { + "epoch": 0.25, + "learning_rate": 4.938701397990389e-05, + "loss": 0.9518, + "step": 2250 + }, + { + "epoch": 0.25, + "learning_rate": 4.937336173001311e-05, + "loss": 0.9547, + "step": 2300 + }, + { + "epoch": 0.25, + "eval_cer": 0.1646825736292991, + "eval_loss": 0.8791403770446777, + "eval_runtime": 1267.8673, + "eval_samples_per_second": 5.926, + "eval_steps_per_second": 0.741, + "step": 2300 + }, + { + "epoch": 0.26, + "learning_rate": 4.935970948012233e-05, + "loss": 0.9117, + "step": 2350 + }, + { + "epoch": 0.26, + "learning_rate": 4.9346057230231545e-05, + "loss": 0.9091, + "step": 2400 + }, + { + "epoch": 0.26, + "eval_cer": 0.14374437536143114, + "eval_loss": 0.8452526330947876, + "eval_runtime": 1242.3604, + "eval_samples_per_second": 6.047, + "eval_steps_per_second": 0.757, + "step": 2400 + }, + { + "epoch": 0.27, + "learning_rate": 4.933240498034076e-05, + "loss": 0.9276, + "step": 2450 + }, + { + "epoch": 0.27, + "learning_rate": 4.931875273044998e-05, + "loss": 0.9199, + "step": 2500 + }, + { + "epoch": 0.27, + "eval_cer": 0.14217033202048993, + "eval_loss": 0.828856348991394, + "eval_runtime": 1444.7023, + "eval_samples_per_second": 5.2, + "eval_steps_per_second": 0.651, + "step": 2500 + }, + { + "epoch": 0.28, + "learning_rate": 4.93051004805592e-05, + "loss": 0.9014, + "step": 2550 + }, + { + "epoch": 0.28, + "learning_rate": 4.929144823066842e-05, + "loss": 0.8825, + "step": 2600 + }, + { + "epoch": 0.28, + "eval_cer": 0.13830366033513436, + "eval_loss": 0.8257022500038147, + "eval_runtime": 1455.1873, + "eval_samples_per_second": 5.163, + "eval_steps_per_second": 0.646, + "step": 2600 + }, + { + "epoch": 0.29, + "learning_rate": 4.927779598077764e-05, + "loss": 0.9311, + "step": 2650 + }, + { + "epoch": 0.29, + "learning_rate": 4.9264143730886854e-05, + "loss": 0.8439, + "step": 2700 + }, + { + "epoch": 0.29, + "eval_cer": 0.146505794874778, + "eval_loss": 0.8311769366264343, + "eval_runtime": 1282.7141, + "eval_samples_per_second": 5.857, + "eval_steps_per_second": 0.733, + "step": 2700 + }, + { + "epoch": 0.3, + "learning_rate": 4.925049148099607e-05, + "loss": 0.8646, + "step": 2750 + }, + { + "epoch": 0.31, + "learning_rate": 4.9236839231105285e-05, + "loss": 0.8431, + "step": 2800 + }, + { + "epoch": 0.31, + "eval_cer": 0.136733038827543, + "eval_loss": 0.8049759864807129, + "eval_runtime": 1195.0102, + "eval_samples_per_second": 6.287, + "eval_steps_per_second": 0.787, + "step": 2800 + }, + { + "epoch": 0.31, + "learning_rate": 4.922318698121451e-05, + "loss": 0.8711, + "step": 2850 + }, + { + "epoch": 0.32, + "learning_rate": 4.9209534731323724e-05, + "loss": 0.8549, + "step": 2900 + }, + { + "epoch": 0.32, + "eval_cer": 0.1316276634695337, + "eval_loss": 0.7712754011154175, + "eval_runtime": 1199.3555, + "eval_samples_per_second": 6.264, + "eval_steps_per_second": 0.784, + "step": 2900 + }, + { + "epoch": 0.32, + "learning_rate": 4.919588248143294e-05, + "loss": 0.8794, + "step": 2950 + }, + { + "epoch": 0.33, + "learning_rate": 4.918223023154216e-05, + "loss": 0.8285, + "step": 3000 + }, + { + "epoch": 0.33, + "eval_cer": 0.13497079465235884, + "eval_loss": 0.7963769435882568, + "eval_runtime": 1149.3465, + "eval_samples_per_second": 6.537, + "eval_steps_per_second": 0.818, + "step": 3000 + }, + { + "epoch": 0.33, + "learning_rate": 4.916857798165138e-05, + "loss": 0.8368, + "step": 3050 + }, + { + "epoch": 0.34, + "learning_rate": 4.91549257317606e-05, + "loss": 0.7937, + "step": 3100 + }, + { + "epoch": 0.34, + "eval_cer": 0.12768571145048094, + "eval_loss": 0.7565470933914185, + "eval_runtime": 1157.2539, + "eval_samples_per_second": 6.492, + "eval_steps_per_second": 0.812, + "step": 3100 + }, + { + "epoch": 0.34, + "learning_rate": 4.9141273481869816e-05, + "loss": 0.8224, + "step": 3150 + }, + { + "epoch": 0.35, + "learning_rate": 4.912762123197903e-05, + "loss": 0.7887, + "step": 3200 + }, + { + "epoch": 0.35, + "eval_cer": 0.1362300293251118, + "eval_loss": 0.7719568014144897, + "eval_runtime": 1229.1352, + "eval_samples_per_second": 6.112, + "eval_steps_per_second": 0.765, + "step": 3200 + }, + { + "epoch": 0.35, + "learning_rate": 4.911396898208825e-05, + "loss": 0.788, + "step": 3250 + }, + { + "epoch": 0.36, + "learning_rate": 4.910031673219747e-05, + "loss": 0.8322, + "step": 3300 + }, + { + "epoch": 0.36, + "eval_cer": 0.13488182698526216, + "eval_loss": 0.7592117190361023, + "eval_runtime": 1412.8986, + "eval_samples_per_second": 5.317, + "eval_steps_per_second": 0.665, + "step": 3300 + }, + { + "epoch": 0.37, + "learning_rate": 4.9086664482306686e-05, + "loss": 0.8167, + "step": 3350 + }, + { + "epoch": 0.37, + "learning_rate": 4.90730122324159e-05, + "loss": 0.8116, + "step": 3400 + }, + { + "epoch": 0.37, + "eval_cer": 0.16377578779158297, + "eval_loss": 0.7804229259490967, + "eval_runtime": 1491.597, + "eval_samples_per_second": 5.037, + "eval_steps_per_second": 0.63, + "step": 3400 + }, + { + "epoch": 0.38, + "learning_rate": 4.9059359982525125e-05, + "loss": 0.7847, + "step": 3450 + }, + { + "epoch": 0.38, + "learning_rate": 4.904570773263434e-05, + "loss": 0.816, + "step": 3500 + }, + { + "epoch": 0.38, + "eval_cer": 0.13126152730109739, + "eval_loss": 0.7453513145446777, + "eval_runtime": 1217.9912, + "eval_samples_per_second": 6.168, + "eval_steps_per_second": 0.772, + "step": 3500 + }, + { + "epoch": 0.39, + "learning_rate": 4.903205548274356e-05, + "loss": 0.781, + "step": 3550 + }, + { + "epoch": 0.39, + "learning_rate": 4.901840323285278e-05, + "loss": 0.7941, + "step": 3600 + }, + { + "epoch": 0.39, + "eval_cer": 0.13394424464739718, + "eval_loss": 0.7644733786582947, + "eval_runtime": 1216.9116, + "eval_samples_per_second": 6.174, + "eval_steps_per_second": 0.772, + "step": 3600 + }, + { + "epoch": 0.4, + "learning_rate": 4.9004750982961995e-05, + "loss": 0.7896, + "step": 3650 + }, + { + "epoch": 0.4, + "learning_rate": 4.899109873307121e-05, + "loss": 0.7556, + "step": 3700 + }, + { + "epoch": 0.4, + "eval_cer": 0.12223130909078465, + "eval_loss": 0.7185453772544861, + "eval_runtime": 1225.4621, + "eval_samples_per_second": 6.131, + "eval_steps_per_second": 0.767, + "step": 3700 + }, + { + "epoch": 0.41, + "learning_rate": 4.8977446483180426e-05, + "loss": 0.7616, + "step": 3750 + }, + { + "epoch": 0.42, + "learning_rate": 4.896379423328965e-05, + "loss": 0.766, + "step": 3800 + }, + { + "epoch": 0.42, + "eval_cer": 0.12583792144154995, + "eval_loss": 0.7360116839408875, + "eval_runtime": 1167.0386, + "eval_samples_per_second": 6.438, + "eval_steps_per_second": 0.805, + "step": 3800 + }, + { + "epoch": 0.42, + "learning_rate": 4.8950141983398865e-05, + "loss": 0.7699, + "step": 3850 + }, + { + "epoch": 0.43, + "learning_rate": 4.893648973350809e-05, + "loss": 0.7201, + "step": 3900 + }, + { + "epoch": 0.43, + "eval_cer": 0.12545809793971413, + "eval_loss": 0.7248350381851196, + "eval_runtime": 1214.3633, + "eval_samples_per_second": 6.187, + "eval_steps_per_second": 0.774, + "step": 3900 + }, + { + "epoch": 0.43, + "learning_rate": 4.89228374836173e-05, + "loss": 0.7671, + "step": 3950 + }, + { + "epoch": 0.44, + "learning_rate": 4.890918523372652e-05, + "loss": 0.7502, + "step": 4000 + }, + { + "epoch": 0.44, + "eval_cer": 0.11814221823768739, + "eval_loss": 0.7216148376464844, + "eval_runtime": 1179.5688, + "eval_samples_per_second": 6.369, + "eval_steps_per_second": 0.797, + "step": 4000 + }, + { + "epoch": 0.44, + "learning_rate": 4.889553298383574e-05, + "loss": 0.7319, + "step": 4050 + }, + { + "epoch": 0.45, + "learning_rate": 4.888188073394496e-05, + "loss": 0.7187, + "step": 4100 + }, + { + "epoch": 0.45, + "eval_cer": 0.11959649741138308, + "eval_loss": 0.7175608277320862, + "eval_runtime": 1154.0936, + "eval_samples_per_second": 6.51, + "eval_steps_per_second": 0.814, + "step": 4100 + }, + { + "epoch": 0.45, + "learning_rate": 4.886822848405417e-05, + "loss": 0.725, + "step": 4150 + }, + { + "epoch": 0.46, + "learning_rate": 4.885457623416339e-05, + "loss": 0.7606, + "step": 4200 + }, + { + "epoch": 0.46, + "eval_cer": 0.1217967362553509, + "eval_loss": 0.7174265384674072, + "eval_runtime": 1171.3267, + "eval_samples_per_second": 6.414, + "eval_steps_per_second": 0.803, + "step": 4200 + }, + { + "epoch": 0.46, + "learning_rate": 4.884092398427261e-05, + "loss": 0.7281, + "step": 4250 + }, + { + "epoch": 0.47, + "learning_rate": 4.882727173438183e-05, + "loss": 0.7184, + "step": 4300 + }, + { + "epoch": 0.47, + "eval_cer": 0.11260569187759417, + "eval_loss": 0.6905629634857178, + "eval_runtime": 1345.0272, + "eval_samples_per_second": 5.586, + "eval_steps_per_second": 0.699, + "step": 4300 + }, + { + "epoch": 0.48, + "learning_rate": 4.881361948449104e-05, + "loss": 0.7349, + "step": 4350 + }, + { + "epoch": 0.48, + "learning_rate": 4.8799967234600266e-05, + "loss": 0.7077, + "step": 4400 + }, + { + "epoch": 0.48, + "eval_cer": 0.11969915241187924, + "eval_loss": 0.6907770037651062, + "eval_runtime": 1448.5704, + "eval_samples_per_second": 5.186, + "eval_steps_per_second": 0.649, + "step": 4400 + }, + { + "epoch": 0.49, + "learning_rate": 4.878631498470948e-05, + "loss": 0.6844, + "step": 4450 + }, + { + "epoch": 0.49, + "learning_rate": 4.8772662734818704e-05, + "loss": 0.7288, + "step": 4500 + }, + { + "epoch": 0.49, + "eval_cer": 0.11132592620474198, + "eval_loss": 0.6686322093009949, + "eval_runtime": 1442.9609, + "eval_samples_per_second": 5.207, + "eval_steps_per_second": 0.651, + "step": 4500 + }, + { + "epoch": 0.5, + "learning_rate": 4.875901048492792e-05, + "loss": 0.7375, + "step": 4550 + }, + { + "epoch": 0.5, + "learning_rate": 4.8745358235037136e-05, + "loss": 0.7161, + "step": 4600 + }, + { + "epoch": 0.5, + "eval_cer": 0.12221077809068542, + "eval_loss": 0.6822640895843506, + "eval_runtime": 1261.344, + "eval_samples_per_second": 5.956, + "eval_steps_per_second": 0.745, + "step": 4600 + }, + { + "epoch": 0.51, + "learning_rate": 4.873170598514635e-05, + "loss": 0.6956, + "step": 4650 + }, + { + "epoch": 0.51, + "learning_rate": 4.871805373525557e-05, + "loss": 0.6666, + "step": 4700 + }, + { + "epoch": 0.51, + "eval_cer": 0.1256976262742052, + "eval_loss": 0.673389196395874, + "eval_runtime": 1470.5096, + "eval_samples_per_second": 5.109, + "eval_steps_per_second": 0.639, + "step": 4700 + }, + { + "epoch": 0.52, + "learning_rate": 4.870440148536479e-05, + "loss": 0.7583, + "step": 4750 + }, + { + "epoch": 0.52, + "learning_rate": 4.8690749235474006e-05, + "loss": 0.7239, + "step": 4800 + }, + { + "epoch": 0.52, + "eval_cer": 0.11146622137208674, + "eval_loss": 0.6682428121566772, + "eval_runtime": 1393.331, + "eval_samples_per_second": 5.392, + "eval_steps_per_second": 0.675, + "step": 4800 + }, + { + "epoch": 0.53, + "learning_rate": 4.867709698558323e-05, + "loss": 0.7167, + "step": 4850 + }, + { + "epoch": 0.54, + "learning_rate": 4.8663444735692444e-05, + "loss": 0.6723, + "step": 4900 + }, + { + "epoch": 0.54, + "eval_cer": 0.11162020387283099, + "eval_loss": 0.6625186204910278, + "eval_runtime": 1481.6224, + "eval_samples_per_second": 5.071, + "eval_steps_per_second": 0.634, + "step": 4900 + }, + { + "epoch": 0.54, + "learning_rate": 4.864979248580167e-05, + "loss": 0.7173, + "step": 4950 + }, + { + "epoch": 0.55, + "learning_rate": 4.863614023591088e-05, + "loss": 0.683, + "step": 5000 + }, + { + "epoch": 0.55, + "eval_cer": 0.11195554354111846, + "eval_loss": 0.6574082970619202, + "eval_runtime": 1193.9295, + "eval_samples_per_second": 6.293, + "eval_steps_per_second": 0.787, + "step": 5000 + }, + { + "epoch": 0.55, + "learning_rate": 4.86224879860201e-05, + "loss": 0.6623, + "step": 5050 + }, + { + "epoch": 0.56, + "learning_rate": 4.8608835736129314e-05, + "loss": 0.6982, + "step": 5100 + }, + { + "epoch": 0.56, + "eval_cer": 0.11072710536851434, + "eval_loss": 0.6460687518119812, + "eval_runtime": 1243.3246, + "eval_samples_per_second": 6.043, + "eval_steps_per_second": 0.756, + "step": 5100 + }, + { + "epoch": 0.56, + "learning_rate": 4.859518348623853e-05, + "loss": 0.6779, + "step": 5150 + }, + { + "epoch": 0.57, + "learning_rate": 4.858153123634775e-05, + "loss": 0.6681, + "step": 5200 + }, + { + "epoch": 0.57, + "eval_cer": 0.12255638325902252, + "eval_loss": 0.6535465121269226, + "eval_runtime": 1269.3739, + "eval_samples_per_second": 5.919, + "eval_steps_per_second": 0.741, + "step": 5200 + }, + { + "epoch": 0.57, + "learning_rate": 4.856787898645697e-05, + "loss": 0.6924, + "step": 5250 + }, + { + "epoch": 0.58, + "learning_rate": 4.855422673656619e-05, + "loss": 0.6701, + "step": 5300 + }, + { + "epoch": 0.58, + "eval_cer": 0.10567647934410299, + "eval_loss": 0.6478390693664551, + "eval_runtime": 1185.9589, + "eval_samples_per_second": 6.335, + "eval_steps_per_second": 0.793, + "step": 5300 + }, + { + "epoch": 0.58, + "learning_rate": 4.854057448667541e-05, + "loss": 0.7025, + "step": 5350 + }, + { + "epoch": 0.59, + "learning_rate": 4.852692223678462e-05, + "loss": 0.6928, + "step": 5400 + }, + { + "epoch": 0.59, + "eval_cer": 0.10365417583432851, + "eval_loss": 0.6242822408676147, + "eval_runtime": 1155.8357, + "eval_samples_per_second": 6.5, + "eval_steps_per_second": 0.813, + "step": 5400 + }, + { + "epoch": 0.6, + "learning_rate": 4.8513269986893845e-05, + "loss": 0.6665, + "step": 5450 + }, + { + "epoch": 0.6, + "learning_rate": 4.849961773700306e-05, + "loss": 0.6919, + "step": 5500 + }, + { + "epoch": 0.6, + "eval_cer": 0.10333252349944053, + "eval_loss": 0.6287544369697571, + "eval_runtime": 1139.7191, + "eval_samples_per_second": 6.592, + "eval_steps_per_second": 0.825, + "step": 5500 + }, + { + "epoch": 0.61, + "learning_rate": 4.848596548711228e-05, + "loss": 0.6408, + "step": 5550 + }, + { + "epoch": 0.61, + "learning_rate": 4.847231323722149e-05, + "loss": 0.6573, + "step": 5600 + }, + { + "epoch": 0.61, + "eval_cer": 0.11020698669933376, + "eval_loss": 0.6342067122459412, + "eval_runtime": 1184.0803, + "eval_samples_per_second": 6.345, + "eval_steps_per_second": 0.794, + "step": 5600 + }, + { + "epoch": 0.62, + "learning_rate": 4.845866098733071e-05, + "loss": 0.7322, + "step": 5650 + }, + { + "epoch": 0.62, + "learning_rate": 4.844500873743993e-05, + "loss": 0.6518, + "step": 5700 + }, + { + "epoch": 0.62, + "eval_cer": 0.10025287348455555, + "eval_loss": 0.6224654316902161, + "eval_runtime": 1109.2466, + "eval_samples_per_second": 6.773, + "eval_steps_per_second": 0.847, + "step": 5700 + }, + { + "epoch": 0.63, + "learning_rate": 4.8431356487549154e-05, + "loss": 0.6683, + "step": 5750 + }, + { + "epoch": 0.63, + "learning_rate": 4.841770423765837e-05, + "loss": 0.6502, + "step": 5800 + }, + { + "epoch": 0.63, + "eval_cer": 0.09801841630708902, + "eval_loss": 0.6061940789222717, + "eval_runtime": 1144.048, + "eval_samples_per_second": 6.567, + "eval_steps_per_second": 0.822, + "step": 5800 + }, + { + "epoch": 0.64, + "learning_rate": 4.8404051987767586e-05, + "loss": 0.6692, + "step": 5850 + }, + { + "epoch": 0.64, + "learning_rate": 4.839039973787681e-05, + "loss": 0.6802, + "step": 5900 + }, + { + "epoch": 0.64, + "eval_cer": 0.10424273117050653, + "eval_loss": 0.6255876421928406, + "eval_runtime": 1180.3322, + "eval_samples_per_second": 6.365, + "eval_steps_per_second": 0.796, + "step": 5900 + }, + { + "epoch": 0.65, + "learning_rate": 4.8376747487986024e-05, + "loss": 0.6465, + "step": 5950 + }, + { + "epoch": 0.66, + "learning_rate": 4.836309523809524e-05, + "loss": 0.6015, + "step": 6000 + }, + { + "epoch": 0.66, + "eval_cer": 0.10244969049517351, + "eval_loss": 0.6007630825042725, + "eval_runtime": 1184.9976, + "eval_samples_per_second": 6.34, + "eval_steps_per_second": 0.793, + "step": 6000 + }, + { + "epoch": 0.66, + "learning_rate": 4.8349442988204456e-05, + "loss": 0.6529, + "step": 6050 + }, + { + "epoch": 0.67, + "learning_rate": 4.833579073831367e-05, + "loss": 0.6279, + "step": 6100 + }, + { + "epoch": 0.67, + "eval_cer": 0.0989183584781054, + "eval_loss": 0.5849039554595947, + "eval_runtime": 1147.0099, + "eval_samples_per_second": 6.55, + "eval_steps_per_second": 0.82, + "step": 6100 + }, + { + "epoch": 0.67, + "learning_rate": 4.8322138488422894e-05, + "loss": 0.612, + "step": 6150 + }, + { + "epoch": 0.68, + "learning_rate": 4.8308486238532117e-05, + "loss": 0.636, + "step": 6200 + }, + { + "epoch": 0.68, + "eval_cer": 0.10187482249239498, + "eval_loss": 0.602621853351593, + "eval_runtime": 1163.5869, + "eval_samples_per_second": 6.457, + "eval_steps_per_second": 0.808, + "step": 6200 + }, + { + "epoch": 0.68, + "learning_rate": 4.829483398864133e-05, + "loss": 0.6153, + "step": 6250 + }, + { + "epoch": 0.69, + "learning_rate": 4.828118173875055e-05, + "loss": 0.5761, + "step": 6300 + }, + { + "epoch": 0.69, + "eval_cer": 0.10323329033229424, + "eval_loss": 0.6136672496795654, + "eval_runtime": 1156.1641, + "eval_samples_per_second": 6.498, + "eval_steps_per_second": 0.813, + "step": 6300 + }, + { + "epoch": 0.69, + "learning_rate": 4.8267529488859764e-05, + "loss": 0.6564, + "step": 6350 + }, + { + "epoch": 0.7, + "learning_rate": 4.825387723896899e-05, + "loss": 0.6106, + "step": 6400 + }, + { + "epoch": 0.7, + "eval_cer": 0.10424957483720627, + "eval_loss": 0.5963040590286255, + "eval_runtime": 1209.0706, + "eval_samples_per_second": 6.214, + "eval_steps_per_second": 0.777, + "step": 6400 + }, + { + "epoch": 0.7, + "learning_rate": 4.82402249890782e-05, + "loss": 0.6102, + "step": 6450 + }, + { + "epoch": 0.71, + "learning_rate": 4.822657273918742e-05, + "loss": 0.6471, + "step": 6500 + }, + { + "epoch": 0.71, + "eval_cer": 0.09800130714033965, + "eval_loss": 0.5907582640647888, + "eval_runtime": 1159.4797, + "eval_samples_per_second": 6.48, + "eval_steps_per_second": 0.811, + "step": 6500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8212920489296634e-05, + "loss": 0.6466, + "step": 6550 + }, + { + "epoch": 0.72, + "learning_rate": 4.819926823940586e-05, + "loss": 0.5745, + "step": 6600 + }, + { + "epoch": 0.72, + "eval_cer": 0.0992263234795939, + "eval_loss": 0.5862923264503479, + "eval_runtime": 1177.95, + "eval_samples_per_second": 6.378, + "eval_steps_per_second": 0.798, + "step": 6600 + }, + { + "epoch": 0.73, + "learning_rate": 4.818561598951508e-05, + "loss": 0.5995, + "step": 6650 + }, + { + "epoch": 0.73, + "learning_rate": 4.8171963739624295e-05, + "loss": 0.6058, + "step": 6700 + }, + { + "epoch": 0.73, + "eval_cer": 0.09561628929547873, + "eval_loss": 0.5826772451400757, + "eval_runtime": 1154.6872, + "eval_samples_per_second": 6.507, + "eval_steps_per_second": 0.814, + "step": 6700 + }, + { + "epoch": 0.74, + "learning_rate": 4.815831148973351e-05, + "loss": 0.6223, + "step": 6750 + }, + { + "epoch": 0.74, + "learning_rate": 4.814465923984273e-05, + "loss": 0.5968, + "step": 6800 + }, + { + "epoch": 0.74, + "eval_cer": 0.09658124630014268, + "eval_loss": 0.5713803172111511, + "eval_runtime": 1148.4157, + "eval_samples_per_second": 6.542, + "eval_steps_per_second": 0.819, + "step": 6800 + }, + { + "epoch": 0.75, + "learning_rate": 4.813100698995195e-05, + "loss": 0.6244, + "step": 6850 + }, + { + "epoch": 0.75, + "learning_rate": 4.8117354740061165e-05, + "loss": 0.6262, + "step": 6900 + }, + { + "epoch": 0.75, + "eval_cer": 0.09553074346173193, + "eval_loss": 0.5605288147926331, + "eval_runtime": 1166.3348, + "eval_samples_per_second": 6.442, + "eval_steps_per_second": 0.806, + "step": 6900 + }, + { + "epoch": 0.76, + "learning_rate": 4.810370249017038e-05, + "loss": 0.5902, + "step": 6950 + }, + { + "epoch": 0.76, + "learning_rate": 4.80900502402796e-05, + "loss": 0.566, + "step": 7000 + }, + { + "epoch": 0.76, + "eval_cer": 0.11679743773118761, + "eval_loss": 0.6162604689598083, + "eval_runtime": 1280.6689, + "eval_samples_per_second": 5.866, + "eval_steps_per_second": 0.734, + "step": 7000 + }, + { + "epoch": 0.77, + "learning_rate": 4.807639799038882e-05, + "loss": 0.6245, + "step": 7050 + }, + { + "epoch": 0.78, + "learning_rate": 4.806274574049804e-05, + "loss": 0.5711, + "step": 7100 + }, + { + "epoch": 0.78, + "eval_cer": 0.0962595939652547, + "eval_loss": 0.5639761090278625, + "eval_runtime": 1182.4487, + "eval_samples_per_second": 6.354, + "eval_steps_per_second": 0.795, + "step": 7100 + }, + { + "epoch": 0.78, + "learning_rate": 4.804909349060726e-05, + "loss": 0.621, + "step": 7150 + }, + { + "epoch": 0.79, + "learning_rate": 4.803571428571429e-05, + "loss": 0.6164, + "step": 7200 + }, + { + "epoch": 0.79, + "eval_cer": 0.09956166314788137, + "eval_loss": 0.561359703540802, + "eval_runtime": 1220.9677, + "eval_samples_per_second": 6.153, + "eval_steps_per_second": 0.77, + "step": 7200 + }, + { + "epoch": 0.79, + "learning_rate": 4.802206203582351e-05, + "loss": 0.582, + "step": 7250 + }, + { + "epoch": 0.8, + "learning_rate": 4.8008409785932724e-05, + "loss": 0.5574, + "step": 7300 + }, + { + "epoch": 0.8, + "eval_cer": 0.09406277695463676, + "eval_loss": 0.5611019134521484, + "eval_runtime": 1157.6717, + "eval_samples_per_second": 6.49, + "eval_steps_per_second": 0.812, + "step": 7300 + }, + { + "epoch": 0.8, + "learning_rate": 4.799475753604194e-05, + "loss": 0.5702, + "step": 7350 + }, + { + "epoch": 0.81, + "learning_rate": 4.798110528615116e-05, + "loss": 0.5772, + "step": 7400 + }, + { + "epoch": 0.81, + "eval_cer": 0.09428861795572832, + "eval_loss": 0.5664366483688354, + "eval_runtime": 1143.9283, + "eval_samples_per_second": 6.568, + "eval_steps_per_second": 0.822, + "step": 7400 + }, + { + "epoch": 0.81, + "learning_rate": 4.796745303626038e-05, + "loss": 0.5813, + "step": 7450 + }, + { + "epoch": 0.82, + "learning_rate": 4.7953800786369594e-05, + "loss": 0.5836, + "step": 7500 + }, + { + "epoch": 0.82, + "eval_cer": 0.09800130714033965, + "eval_loss": 0.569098174571991, + "eval_runtime": 1171.894, + "eval_samples_per_second": 6.411, + "eval_steps_per_second": 0.802, + "step": 7500 + }, + { + "epoch": 0.82, + "learning_rate": 4.794014853647881e-05, + "loss": 0.5426, + "step": 7550 + }, + { + "epoch": 0.83, + "learning_rate": 4.792649628658803e-05, + "loss": 0.5729, + "step": 7600 + }, + { + "epoch": 0.83, + "eval_cer": 0.09232790744625155, + "eval_loss": 0.5581706762313843, + "eval_runtime": 1144.9014, + "eval_samples_per_second": 6.562, + "eval_steps_per_second": 0.821, + "step": 7600 + }, + { + "epoch": 0.84, + "learning_rate": 4.7912844036697255e-05, + "loss": 0.5861, + "step": 7650 + }, + { + "epoch": 0.84, + "learning_rate": 4.789919178680647e-05, + "loss": 0.5852, + "step": 7700 + }, + { + "epoch": 0.84, + "eval_cer": 0.0938848416204434, + "eval_loss": 0.5493680834770203, + "eval_runtime": 1170.4762, + "eval_samples_per_second": 6.419, + "eval_steps_per_second": 0.803, + "step": 7700 + }, + { + "epoch": 0.85, + "learning_rate": 4.788553953691569e-05, + "loss": 0.5467, + "step": 7750 + }, + { + "epoch": 0.85, + "learning_rate": 4.78718872870249e-05, + "loss": 0.5809, + "step": 7800 + }, + { + "epoch": 0.85, + "eval_cer": 0.0912260771075927, + "eval_loss": 0.5456804037094116, + "eval_runtime": 1175.5115, + "eval_samples_per_second": 6.391, + "eval_steps_per_second": 0.8, + "step": 7800 + }, + { + "epoch": 0.86, + "learning_rate": 4.785823503713412e-05, + "loss": 0.5769, + "step": 7850 + }, + { + "epoch": 0.86, + "learning_rate": 4.784458278724334e-05, + "loss": 0.6034, + "step": 7900 + }, + { + "epoch": 0.86, + "eval_cer": 0.09061014710461571, + "eval_loss": 0.544008195400238, + "eval_runtime": 1149.8414, + "eval_samples_per_second": 6.534, + "eval_steps_per_second": 0.818, + "step": 7900 + }, + { + "epoch": 0.87, + "learning_rate": 4.783093053735256e-05, + "loss": 0.5827, + "step": 7950 + }, + { + "epoch": 0.87, + "learning_rate": 4.781727828746177e-05, + "loss": 0.5553, + "step": 8000 + }, + { + "epoch": 0.87, + "eval_cer": 0.0900215917684377, + "eval_loss": 0.5491546392440796, + "eval_runtime": 1136.1393, + "eval_samples_per_second": 6.613, + "eval_steps_per_second": 0.827, + "step": 8000 + }, + { + "epoch": 0.88, + "learning_rate": 4.7803626037570995e-05, + "loss": 0.6212, + "step": 8050 + }, + { + "epoch": 0.88, + "learning_rate": 4.778997378768021e-05, + "loss": 0.5762, + "step": 8100 + }, + { + "epoch": 0.88, + "eval_cer": 0.09335103561786334, + "eval_loss": 0.5435011386871338, + "eval_runtime": 1264.0887, + "eval_samples_per_second": 5.943, + "eval_steps_per_second": 0.744, + "step": 8100 + }, + { + "epoch": 0.89, + "learning_rate": 4.7776321537789434e-05, + "loss": 0.5516, + "step": 8150 + }, + { + "epoch": 0.9, + "learning_rate": 4.776266928789865e-05, + "loss": 0.5587, + "step": 8200 + }, + { + "epoch": 0.9, + "eval_cer": 0.09445286595652219, + "eval_loss": 0.5462570190429688, + "eval_runtime": 1479.3094, + "eval_samples_per_second": 5.079, + "eval_steps_per_second": 0.635, + "step": 8200 + }, + { + "epoch": 0.9, + "learning_rate": 4.7749017038007865e-05, + "loss": 0.5745, + "step": 8250 + }, + { + "epoch": 0.91, + "learning_rate": 4.773536478811708e-05, + "loss": 0.5416, + "step": 8300 + }, + { + "epoch": 0.91, + "eval_cer": 0.08941250543216045, + "eval_loss": 0.5406492948532104, + "eval_runtime": 1459.8897, + "eval_samples_per_second": 5.146, + "eval_steps_per_second": 0.644, + "step": 8300 + }, + { + "epoch": 0.91, + "learning_rate": 4.7721712538226304e-05, + "loss": 0.561, + "step": 8350 + }, + { + "epoch": 0.92, + "learning_rate": 4.770806028833552e-05, + "loss": 0.5443, + "step": 8400 + }, + { + "epoch": 0.92, + "eval_cer": 0.08896766709667706, + "eval_loss": 0.5334160327911377, + "eval_runtime": 1344.1845, + "eval_samples_per_second": 5.589, + "eval_steps_per_second": 0.699, + "step": 8400 + }, + { + "epoch": 0.92, + "learning_rate": 4.7694408038444735e-05, + "loss": 0.5721, + "step": 8450 + }, + { + "epoch": 0.93, + "learning_rate": 4.768075578855395e-05, + "loss": 0.5798, + "step": 8500 + }, + { + "epoch": 0.93, + "eval_cer": 0.09518513829339484, + "eval_loss": 0.5398730039596558, + "eval_runtime": 1463.1068, + "eval_samples_per_second": 5.135, + "eval_steps_per_second": 0.642, + "step": 8500 + }, + { + "epoch": 0.93, + "learning_rate": 4.7667103538663174e-05, + "loss": 0.5383, + "step": 8550 + }, + { + "epoch": 0.94, + "learning_rate": 4.7653451288772397e-05, + "loss": 0.57, + "step": 8600 + }, + { + "epoch": 0.94, + "eval_cer": 0.09988673731611923, + "eval_loss": 0.5255935788154602, + "eval_runtime": 1549.7656, + "eval_samples_per_second": 4.848, + "eval_steps_per_second": 0.607, + "step": 8600 + }, + { + "epoch": 0.94, + "learning_rate": 4.763979903888161e-05, + "loss": 0.559, + "step": 8650 + }, + { + "epoch": 0.95, + "learning_rate": 4.762614678899083e-05, + "loss": 0.5773, + "step": 8700 + }, + { + "epoch": 0.95, + "eval_cer": 0.08790005509151694, + "eval_loss": 0.528082549571991, + "eval_runtime": 1175.5328, + "eval_samples_per_second": 6.391, + "eval_steps_per_second": 0.8, + "step": 8700 + }, + { + "epoch": 0.96, + "learning_rate": 4.7612494539100044e-05, + "loss": 0.5594, + "step": 8750 + }, + { + "epoch": 0.96, + "learning_rate": 4.7598842289209267e-05, + "loss": 0.5252, + "step": 8800 + }, + { + "epoch": 0.96, + "eval_cer": 0.08552530274670563, + "eval_loss": 0.510725200176239, + "eval_runtime": 1194.9094, + "eval_samples_per_second": 6.288, + "eval_steps_per_second": 0.787, + "step": 8800 + }, + { + "epoch": 0.97, + "learning_rate": 4.758519003931848e-05, + "loss": 0.5344, + "step": 8850 + }, + { + "epoch": 0.97, + "learning_rate": 4.75715377894277e-05, + "loss": 0.5453, + "step": 8900 + }, + { + "epoch": 0.97, + "eval_cer": 0.09173935211007353, + "eval_loss": 0.5302219390869141, + "eval_runtime": 1258.3889, + "eval_samples_per_second": 5.97, + "eval_steps_per_second": 0.747, + "step": 8900 + }, + { + "epoch": 0.98, + "learning_rate": 4.7557885539536914e-05, + "loss": 0.5709, + "step": 8950 + }, + { + "epoch": 0.98, + "learning_rate": 4.7544233289646137e-05, + "loss": 0.5435, + "step": 9000 + }, + { + "epoch": 0.98, + "eval_cer": 0.08849887592774457, + "eval_loss": 0.5267897248268127, + "eval_runtime": 1447.3374, + "eval_samples_per_second": 5.191, + "eval_steps_per_second": 0.649, + "step": 9000 + }, + { + "epoch": 0.99, + "learning_rate": 4.753058103975536e-05, + "loss": 0.5276, + "step": 9050 + }, + { + "epoch": 0.99, + "learning_rate": 4.7516928789864575e-05, + "loss": 0.5448, + "step": 9100 + }, + { + "epoch": 0.99, + "eval_cer": 0.09276248028168532, + "eval_loss": 0.521221399307251, + "eval_runtime": 1481.2749, + "eval_samples_per_second": 5.072, + "eval_steps_per_second": 0.635, + "step": 9100 + }, + { + "epoch": 1.0, + "learning_rate": 4.750327653997379e-05, + "loss": 0.5232, + "step": 9150 + }, + { + "epoch": 1.0, + "learning_rate": 4.7489897335080826e-05, + "loss": 0.4876, + "step": 9200 + }, + { + "epoch": 1.0, + "eval_cer": 0.08981628176744536, + "eval_loss": 0.5108721256256104, + "eval_runtime": 1277.6548, + "eval_samples_per_second": 5.88, + "eval_steps_per_second": 0.736, + "step": 9200 + }, + { + "epoch": 1.01, + "learning_rate": 4.747624508519004e-05, + "loss": 0.4831, + "step": 9250 + }, + { + "epoch": 1.02, + "learning_rate": 4.746259283529926e-05, + "loss": 0.4454, + "step": 9300 + }, + { + "epoch": 1.02, + "eval_cer": 0.08573403458104784, + "eval_loss": 0.5104303359985352, + "eval_runtime": 1166.2306, + "eval_samples_per_second": 6.442, + "eval_steps_per_second": 0.806, + "step": 9300 + }, + { + "epoch": 1.02, + "learning_rate": 4.744894058540848e-05, + "loss": 0.492, + "step": 9350 + }, + { + "epoch": 1.03, + "learning_rate": 4.7435288335517696e-05, + "loss": 0.4575, + "step": 9400 + }, + { + "epoch": 1.03, + "eval_cer": 0.0861822947498811, + "eval_loss": 0.5089101195335388, + "eval_runtime": 1209.7895, + "eval_samples_per_second": 6.21, + "eval_steps_per_second": 0.777, + "step": 9400 + }, + { + "epoch": 1.03, + "learning_rate": 4.742163608562691e-05, + "loss": 0.4754, + "step": 9450 + }, + { + "epoch": 1.04, + "learning_rate": 4.740798383573613e-05, + "loss": 0.5084, + "step": 9500 + }, + { + "epoch": 1.04, + "eval_cer": 0.08640471391762278, + "eval_loss": 0.5144868493080139, + "eval_runtime": 1163.3162, + "eval_samples_per_second": 6.458, + "eval_steps_per_second": 0.808, + "step": 9500 + }, + { + "epoch": 1.04, + "learning_rate": 4.739433158584535e-05, + "loss": 0.4832, + "step": 9550 + }, + { + "epoch": 1.05, + "learning_rate": 4.738067933595457e-05, + "loss": 0.474, + "step": 9600 + }, + { + "epoch": 1.05, + "eval_cer": 0.08605568691593582, + "eval_loss": 0.5110841989517212, + "eval_runtime": 1149.6213, + "eval_samples_per_second": 6.535, + "eval_steps_per_second": 0.818, + "step": 9600 + }, + { + "epoch": 1.05, + "learning_rate": 4.736702708606379e-05, + "loss": 0.479, + "step": 9650 + }, + { + "epoch": 1.06, + "learning_rate": 4.7353374836173004e-05, + "loss": 0.4837, + "step": 9700 + }, + { + "epoch": 1.06, + "eval_cer": 0.08458429857549078, + "eval_loss": 0.5060144066810608, + "eval_runtime": 1137.2697, + "eval_samples_per_second": 6.606, + "eval_steps_per_second": 0.827, + "step": 9700 + }, + { + "epoch": 1.06, + "learning_rate": 4.733972258628222e-05, + "loss": 0.4502, + "step": 9750 + }, + { + "epoch": 1.07, + "learning_rate": 4.7326070336391436e-05, + "loss": 0.4898, + "step": 9800 + }, + { + "epoch": 1.07, + "eval_cer": 0.08435161390769946, + "eval_loss": 0.5079366564750671, + "eval_runtime": 1150.9922, + "eval_samples_per_second": 6.527, + "eval_steps_per_second": 0.817, + "step": 9800 + }, + { + "epoch": 1.08, + "learning_rate": 4.731241808650066e-05, + "loss": 0.4827, + "step": 9850 + }, + { + "epoch": 1.08, + "learning_rate": 4.7298765836609874e-05, + "loss": 0.47, + "step": 9900 + }, + { + "epoch": 1.08, + "eval_cer": 0.08341060973648462, + "eval_loss": 0.4975820481777191, + "eval_runtime": 1149.8902, + "eval_samples_per_second": 6.534, + "eval_steps_per_second": 0.817, + "step": 9900 + }, + { + "epoch": 1.09, + "learning_rate": 4.728511358671909e-05, + "loss": 0.4675, + "step": 9950 + }, + { + "epoch": 1.09, + "learning_rate": 4.727146133682831e-05, + "loss": 0.4593, + "step": 10000 + }, + { + "epoch": 1.09, + "eval_cer": 0.08362618523752656, + "eval_loss": 0.4920589327812195, + "eval_runtime": 1171.3132, + "eval_samples_per_second": 6.414, + "eval_steps_per_second": 0.803, + "step": 10000 + }, + { + "epoch": 1.1, + "learning_rate": 4.7257809086937535e-05, + "loss": 0.4898, + "step": 10050 + }, + { + "epoch": 1.1, + "learning_rate": 4.724415683704675e-05, + "loss": 0.4723, + "step": 10100 + }, + { + "epoch": 1.1, + "eval_cer": 0.087362827255587, + "eval_loss": 0.5022254586219788, + "eval_runtime": 1155.2292, + "eval_samples_per_second": 6.503, + "eval_steps_per_second": 0.814, + "step": 10100 + }, + { + "epoch": 1.11, + "learning_rate": 4.723050458715597e-05, + "loss": 0.4627, + "step": 10150 + }, + { + "epoch": 1.11, + "learning_rate": 4.721685233726518e-05, + "loss": 0.4562, + "step": 10200 + }, + { + "epoch": 1.11, + "eval_cer": 0.08214110956368204, + "eval_loss": 0.4981001019477844, + "eval_runtime": 1146.0235, + "eval_samples_per_second": 6.556, + "eval_steps_per_second": 0.82, + "step": 10200 + }, + { + "epoch": 1.12, + "learning_rate": 4.72032000873744e-05, + "loss": 0.43, + "step": 10250 + }, + { + "epoch": 1.12, + "learning_rate": 4.718954783748362e-05, + "loss": 0.4701, + "step": 10300 + }, + { + "epoch": 1.12, + "eval_cer": 0.08535421107921203, + "eval_loss": 0.5037410259246826, + "eval_runtime": 1149.3397, + "eval_samples_per_second": 6.537, + "eval_steps_per_second": 0.818, + "step": 10300 + }, + { + "epoch": 1.13, + "learning_rate": 4.717589558759284e-05, + "loss": 0.4429, + "step": 10350 + }, + { + "epoch": 1.14, + "learning_rate": 4.716224333770205e-05, + "loss": 0.4207, + "step": 10400 + }, + { + "epoch": 1.14, + "eval_cer": 0.08262016623266413, + "eval_loss": 0.4911841154098511, + "eval_runtime": 1168.0546, + "eval_samples_per_second": 6.432, + "eval_steps_per_second": 0.805, + "step": 10400 + }, + { + "epoch": 1.14, + "learning_rate": 4.7148591087811275e-05, + "loss": 0.4737, + "step": 10450 + }, + { + "epoch": 1.15, + "learning_rate": 4.713493883792049e-05, + "loss": 0.4413, + "step": 10500 + }, + { + "epoch": 1.15, + "eval_cer": 0.08833120609360083, + "eval_loss": 0.49298039078712463, + "eval_runtime": 1177.0621, + "eval_samples_per_second": 6.383, + "eval_steps_per_second": 0.799, + "step": 10500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7121286588029714e-05, + "loss": 0.4406, + "step": 10550 + }, + { + "epoch": 1.16, + "learning_rate": 4.710763433813893e-05, + "loss": 0.4868, + "step": 10600 + }, + { + "epoch": 1.16, + "eval_cer": 0.08830383142680186, + "eval_loss": 0.5031660795211792, + "eval_runtime": 1196.51, + "eval_samples_per_second": 6.279, + "eval_steps_per_second": 0.786, + "step": 10600 + }, + { + "epoch": 1.16, + "learning_rate": 4.7093982088248145e-05, + "loss": 0.4756, + "step": 10650 + }, + { + "epoch": 1.17, + "learning_rate": 4.708032983835736e-05, + "loss": 0.4392, + "step": 10700 + }, + { + "epoch": 1.17, + "eval_cer": 0.09982514431582153, + "eval_loss": 0.4886401891708374, + "eval_runtime": 1325.7127, + "eval_samples_per_second": 5.667, + "eval_steps_per_second": 0.709, + "step": 10700 + }, + { + "epoch": 1.17, + "learning_rate": 4.706667758846658e-05, + "loss": 0.4553, + "step": 10750 + }, + { + "epoch": 1.18, + "learning_rate": 4.70530253385758e-05, + "loss": 0.4832, + "step": 10800 + }, + { + "epoch": 1.18, + "eval_cer": 0.08149096122720631, + "eval_loss": 0.4746004343032837, + "eval_runtime": 1165.4679, + "eval_samples_per_second": 6.446, + "eval_steps_per_second": 0.807, + "step": 10800 + }, + { + "epoch": 1.19, + "learning_rate": 4.7039373088685015e-05, + "loss": 0.4311, + "step": 10850 + }, + { + "epoch": 1.19, + "learning_rate": 4.702572083879424e-05, + "loss": 0.4427, + "step": 10900 + }, + { + "epoch": 1.19, + "eval_cer": 0.08554583374680487, + "eval_loss": 0.4992435574531555, + "eval_runtime": 1168.4072, + "eval_samples_per_second": 6.43, + "eval_steps_per_second": 0.805, + "step": 10900 + }, + { + "epoch": 1.2, + "learning_rate": 4.7012068588903454e-05, + "loss": 0.4384, + "step": 10950 + }, + { + "epoch": 1.2, + "learning_rate": 4.6998416339012676e-05, + "loss": 0.4192, + "step": 11000 + }, + { + "epoch": 1.2, + "eval_cer": 0.08168258389479915, + "eval_loss": 0.4806784689426422, + "eval_runtime": 1157.026, + "eval_samples_per_second": 6.493, + "eval_steps_per_second": 0.812, + "step": 11000 + }, + { + "epoch": 1.21, + "learning_rate": 4.698476408912189e-05, + "loss": 0.4645, + "step": 11050 + }, + { + "epoch": 1.21, + "learning_rate": 4.697111183923111e-05, + "loss": 0.4342, + "step": 11100 + }, + { + "epoch": 1.21, + "eval_cer": 0.08099479539147485, + "eval_loss": 0.48337244987487793, + "eval_runtime": 1152.0989, + "eval_samples_per_second": 6.521, + "eval_steps_per_second": 0.816, + "step": 11100 + }, + { + "epoch": 1.22, + "learning_rate": 4.6957459589340324e-05, + "loss": 0.4268, + "step": 11150 + }, + { + "epoch": 1.22, + "learning_rate": 4.694380733944954e-05, + "loss": 0.4439, + "step": 11200 + }, + { + "epoch": 1.22, + "eval_cer": 0.0805328478892421, + "eval_loss": 0.47774767875671387, + "eval_runtime": 1151.292, + "eval_samples_per_second": 6.526, + "eval_steps_per_second": 0.816, + "step": 11200 + }, + { + "epoch": 1.23, + "learning_rate": 4.693015508955876e-05, + "loss": 0.4213, + "step": 11250 + }, + { + "epoch": 1.23, + "learning_rate": 4.69167758846658e-05, + "loss": 0.4614, + "step": 11300 + }, + { + "epoch": 1.23, + "eval_cer": 0.08316423773529381, + "eval_loss": 0.47571513056755066, + "eval_runtime": 1198.3379, + "eval_samples_per_second": 6.27, + "eval_steps_per_second": 0.784, + "step": 11300 + }, + { + "epoch": 1.24, + "learning_rate": 4.690312363477501e-05, + "loss": 0.4417, + "step": 11350 + }, + { + "epoch": 1.25, + "learning_rate": 4.688947138488423e-05, + "loss": 0.4773, + "step": 11400 + }, + { + "epoch": 1.25, + "eval_cer": 0.0813438223931618, + "eval_loss": 0.4786456525325775, + "eval_runtime": 1170.4782, + "eval_samples_per_second": 6.419, + "eval_steps_per_second": 0.803, + "step": 11400 + }, + { + "epoch": 1.25, + "learning_rate": 4.687581913499345e-05, + "loss": 0.4111, + "step": 11450 + }, + { + "epoch": 1.26, + "learning_rate": 4.686216688510267e-05, + "loss": 0.4483, + "step": 11500 + }, + { + "epoch": 1.26, + "eval_cer": 0.08518311941171841, + "eval_loss": 0.4720217287540436, + "eval_runtime": 1220.5252, + "eval_samples_per_second": 6.156, + "eval_steps_per_second": 0.77, + "step": 11500 + }, + { + "epoch": 1.26, + "learning_rate": 4.684851463521189e-05, + "loss": 0.467, + "step": 11550 + }, + { + "epoch": 1.27, + "learning_rate": 4.6834862385321106e-05, + "loss": 0.4479, + "step": 11600 + }, + { + "epoch": 1.27, + "eval_cer": 0.08453981474194244, + "eval_loss": 0.470368891954422, + "eval_runtime": 1179.3323, + "eval_samples_per_second": 6.371, + "eval_steps_per_second": 0.797, + "step": 11600 + }, + { + "epoch": 1.27, + "learning_rate": 4.682121013543032e-05, + "loss": 0.435, + "step": 11650 + }, + { + "epoch": 1.28, + "learning_rate": 4.680755788553954e-05, + "loss": 0.4549, + "step": 11700 + }, + { + "epoch": 1.28, + "eval_cer": 0.0796705458850743, + "eval_loss": 0.47497525811195374, + "eval_runtime": 1142.2374, + "eval_samples_per_second": 6.577, + "eval_steps_per_second": 0.823, + "step": 11700 + }, + { + "epoch": 1.28, + "learning_rate": 4.679390563564875e-05, + "loss": 0.4084, + "step": 11750 + }, + { + "epoch": 1.29, + "learning_rate": 4.6780253385757976e-05, + "loss": 0.4918, + "step": 11800 + }, + { + "epoch": 1.29, + "eval_cer": 0.07762771137520061, + "eval_loss": 0.46993914246559143, + "eval_runtime": 1152.0072, + "eval_samples_per_second": 6.522, + "eval_steps_per_second": 0.816, + "step": 11800 + }, + { + "epoch": 1.29, + "learning_rate": 4.676660113586719e-05, + "loss": 0.4552, + "step": 11850 + }, + { + "epoch": 1.3, + "learning_rate": 4.6752948885976414e-05, + "loss": 0.471, + "step": 11900 + }, + { + "epoch": 1.3, + "eval_cer": 0.07532139569738674, + "eval_loss": 0.4603424072265625, + "eval_runtime": 1141.6643, + "eval_samples_per_second": 6.581, + "eval_steps_per_second": 0.823, + "step": 11900 + }, + { + "epoch": 1.31, + "learning_rate": 4.673929663608563e-05, + "loss": 0.3913, + "step": 11950 + }, + { + "epoch": 1.31, + "learning_rate": 4.6725644386194846e-05, + "loss": 0.476, + "step": 12000 + }, + { + "epoch": 1.31, + "eval_cer": 0.07731974637371211, + "eval_loss": 0.46096158027648926, + "eval_runtime": 1151.1862, + "eval_samples_per_second": 6.526, + "eval_steps_per_second": 0.817, + "step": 12000 + }, + { + "epoch": 1.32, + "learning_rate": 4.671199213630407e-05, + "loss": 0.4183, + "step": 12050 + }, + { + "epoch": 1.32, + "learning_rate": 4.6698339886413284e-05, + "loss": 0.4305, + "step": 12100 + }, + { + "epoch": 1.32, + "eval_cer": 0.07767903887544869, + "eval_loss": 0.46618959307670593, + "eval_runtime": 1147.4361, + "eval_samples_per_second": 6.548, + "eval_steps_per_second": 0.819, + "step": 12100 + }, + { + "epoch": 1.33, + "learning_rate": 4.66846876365225e-05, + "loss": 0.4616, + "step": 12150 + }, + { + "epoch": 1.33, + "learning_rate": 4.6671035386631716e-05, + "loss": 0.4678, + "step": 12200 + }, + { + "epoch": 1.33, + "eval_cer": 0.08326689273578998, + "eval_loss": 0.4704437553882599, + "eval_runtime": 1193.667, + "eval_samples_per_second": 6.294, + "eval_steps_per_second": 0.787, + "step": 12200 + }, + { + "epoch": 1.34, + "learning_rate": 4.665738313674094e-05, + "loss": 0.4114, + "step": 12250 + }, + { + "epoch": 1.34, + "learning_rate": 4.6643730886850154e-05, + "loss": 0.4238, + "step": 12300 + }, + { + "epoch": 1.34, + "eval_cer": 0.07919491104944207, + "eval_loss": 0.4706832468509674, + "eval_runtime": 1155.3699, + "eval_samples_per_second": 6.503, + "eval_steps_per_second": 0.814, + "step": 12300 + }, + { + "epoch": 1.35, + "learning_rate": 4.663007863695938e-05, + "loss": 0.4057, + "step": 12350 + }, + { + "epoch": 1.35, + "learning_rate": 4.661642638706859e-05, + "loss": 0.4633, + "step": 12400 + }, + { + "epoch": 1.35, + "eval_cer": 0.07833260904527428, + "eval_loss": 0.46324703097343445, + "eval_runtime": 1156.5395, + "eval_samples_per_second": 6.496, + "eval_steps_per_second": 0.813, + "step": 12400 + }, + { + "epoch": 1.36, + "learning_rate": 4.660277413717781e-05, + "loss": 0.4525, + "step": 12450 + }, + { + "epoch": 1.37, + "learning_rate": 4.658912188728703e-05, + "loss": 0.4248, + "step": 12500 + }, + { + "epoch": 1.37, + "eval_cer": 0.07947207955078171, + "eval_loss": 0.4599738121032715, + "eval_runtime": 1174.5926, + "eval_samples_per_second": 6.396, + "eval_steps_per_second": 0.8, + "step": 12500 + }, + { + "epoch": 1.37, + "learning_rate": 4.657546963739625e-05, + "loss": 0.4218, + "step": 12550 + }, + { + "epoch": 1.38, + "learning_rate": 4.656181738750546e-05, + "loss": 0.4595, + "step": 12600 + }, + { + "epoch": 1.38, + "eval_cer": 0.07839762387892185, + "eval_loss": 0.455277681350708, + "eval_runtime": 1183.3222, + "eval_samples_per_second": 6.349, + "eval_steps_per_second": 0.794, + "step": 12600 + }, + { + "epoch": 1.38, + "learning_rate": 4.654816513761468e-05, + "loss": 0.462, + "step": 12650 + }, + { + "epoch": 1.39, + "learning_rate": 4.6534512887723894e-05, + "loss": 0.4352, + "step": 12700 + }, + { + "epoch": 1.39, + "eval_cer": 0.07753874370810393, + "eval_loss": 0.45391160249710083, + "eval_runtime": 1160.3415, + "eval_samples_per_second": 6.475, + "eval_steps_per_second": 0.81, + "step": 12700 + }, + { + "epoch": 1.39, + "learning_rate": 4.652086063783312e-05, + "loss": 0.4388, + "step": 12750 + }, + { + "epoch": 1.4, + "learning_rate": 4.650720838794233e-05, + "loss": 0.4238, + "step": 12800 + }, + { + "epoch": 1.4, + "eval_cer": 0.07942075205053363, + "eval_loss": 0.4653577208518982, + "eval_runtime": 1157.7661, + "eval_samples_per_second": 6.489, + "eval_steps_per_second": 0.812, + "step": 12800 + }, + { + "epoch": 1.4, + "learning_rate": 4.6493556138051555e-05, + "loss": 0.4501, + "step": 12850 + }, + { + "epoch": 1.41, + "learning_rate": 4.647990388816077e-05, + "loss": 0.4153, + "step": 12900 + }, + { + "epoch": 1.41, + "eval_cer": 0.07721709137321595, + "eval_loss": 0.45755788683891296, + "eval_runtime": 1151.6369, + "eval_samples_per_second": 6.524, + "eval_steps_per_second": 0.816, + "step": 12900 + }, + { + "epoch": 1.41, + "learning_rate": 4.646625163826999e-05, + "loss": 0.4314, + "step": 12950 + }, + { + "epoch": 1.42, + "learning_rate": 4.645259938837921e-05, + "loss": 0.4354, + "step": 13000 + }, + { + "epoch": 1.42, + "eval_cer": 0.0735659951889023, + "eval_loss": 0.44789427518844604, + "eval_runtime": 1142.8652, + "eval_samples_per_second": 6.574, + "eval_steps_per_second": 0.822, + "step": 13000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6438947138488425e-05, + "loss": 0.3863, + "step": 13050 + }, + { + "epoch": 1.43, + "learning_rate": 4.642529488859764e-05, + "loss": 0.3837, + "step": 13100 + }, + { + "epoch": 1.43, + "eval_cer": 0.07796647287683796, + "eval_loss": 0.4503418803215027, + "eval_runtime": 1162.223, + "eval_samples_per_second": 6.464, + "eval_steps_per_second": 0.809, + "step": 13100 + }, + { + "epoch": 1.44, + "learning_rate": 4.641164263870686e-05, + "loss": 0.4048, + "step": 13150 + }, + { + "epoch": 1.44, + "learning_rate": 4.639799038881608e-05, + "loss": 0.4325, + "step": 13200 + }, + { + "epoch": 1.44, + "eval_cer": 0.07542062886453305, + "eval_loss": 0.4409065842628479, + "eval_runtime": 1164.1583, + "eval_samples_per_second": 6.454, + "eval_steps_per_second": 0.807, + "step": 13200 + }, + { + "epoch": 1.45, + "learning_rate": 4.6384338138925295e-05, + "loss": 0.4185, + "step": 13250 + }, + { + "epoch": 1.45, + "learning_rate": 4.637068588903452e-05, + "loss": 0.3799, + "step": 13300 + }, + { + "epoch": 1.45, + "eval_cer": 0.07549590919823022, + "eval_loss": 0.44226187467575073, + "eval_runtime": 1167.1145, + "eval_samples_per_second": 6.437, + "eval_steps_per_second": 0.805, + "step": 13300 + }, + { + "epoch": 1.46, + "learning_rate": 4.6357033639143734e-05, + "loss": 0.4415, + "step": 13350 + }, + { + "epoch": 1.46, + "learning_rate": 4.634338138925295e-05, + "loss": 0.4073, + "step": 13400 + }, + { + "epoch": 1.46, + "eval_cer": 0.07687490803822872, + "eval_loss": 0.4516383111476898, + "eval_runtime": 1152.5579, + "eval_samples_per_second": 6.519, + "eval_steps_per_second": 0.816, + "step": 13400 + }, + { + "epoch": 1.47, + "learning_rate": 4.632972913936217e-05, + "loss": 0.4191, + "step": 13450 + }, + { + "epoch": 1.47, + "learning_rate": 4.631607688947139e-05, + "loss": 0.4051, + "step": 13500 + }, + { + "epoch": 1.47, + "eval_cer": 0.0723854626831964, + "eval_loss": 0.44055524468421936, + "eval_runtime": 1134.572, + "eval_samples_per_second": 6.622, + "eval_steps_per_second": 0.829, + "step": 13500 + }, + { + "epoch": 1.48, + "learning_rate": 4.6302424639580604e-05, + "loss": 0.4252, + "step": 13550 + }, + { + "epoch": 1.49, + "learning_rate": 4.628877238968982e-05, + "loss": 0.4494, + "step": 13600 + }, + { + "epoch": 1.49, + "eval_cer": 0.07588942003346553, + "eval_loss": 0.4374730885028839, + "eval_runtime": 1179.3142, + "eval_samples_per_second": 6.371, + "eval_steps_per_second": 0.797, + "step": 13600 + }, + { + "epoch": 1.49, + "learning_rate": 4.6275120139799035e-05, + "loss": 0.4612, + "step": 13650 + }, + { + "epoch": 1.5, + "learning_rate": 4.626146788990826e-05, + "loss": 0.4158, + "step": 13700 + }, + { + "epoch": 1.5, + "eval_cer": 0.07792541087663948, + "eval_loss": 0.44300341606140137, + "eval_runtime": 1180.7216, + "eval_samples_per_second": 6.363, + "eval_steps_per_second": 0.796, + "step": 13700 + }, + { + "epoch": 1.5, + "learning_rate": 4.624781564001748e-05, + "loss": 0.4, + "step": 13750 + }, + { + "epoch": 1.51, + "learning_rate": 4.6234163390126696e-05, + "loss": 0.424, + "step": 13800 + }, + { + "epoch": 1.51, + "eval_cer": 0.07315879702026752, + "eval_loss": 0.4424927234649658, + "eval_runtime": 1179.1114, + "eval_samples_per_second": 6.372, + "eval_steps_per_second": 0.797, + "step": 13800 + }, + { + "epoch": 1.51, + "learning_rate": 4.622051114023591e-05, + "loss": 0.4021, + "step": 13850 + }, + { + "epoch": 1.52, + "learning_rate": 4.6206858890345135e-05, + "loss": 0.4351, + "step": 13900 + }, + { + "epoch": 1.52, + "eval_cer": 0.07417165969182969, + "eval_loss": 0.44169941544532776, + "eval_runtime": 1443.9806, + "eval_samples_per_second": 5.203, + "eval_steps_per_second": 0.651, + "step": 13900 + }, + { + "epoch": 1.52, + "learning_rate": 4.619320664045435e-05, + "loss": 0.4413, + "step": 13950 + }, + { + "epoch": 1.53, + "learning_rate": 4.6179554390563567e-05, + "loss": 0.4298, + "step": 14000 + }, + { + "epoch": 1.53, + "eval_cer": 0.08301025523454957, + "eval_loss": 0.44149938225746155, + "eval_runtime": 1516.9826, + "eval_samples_per_second": 4.953, + "eval_steps_per_second": 0.62, + "step": 14000 + }, + { + "epoch": 1.53, + "learning_rate": 4.616590214067278e-05, + "loss": 0.4379, + "step": 14050 + }, + { + "epoch": 1.54, + "learning_rate": 4.6152249890782e-05, + "loss": 0.409, + "step": 14100 + }, + { + "epoch": 1.54, + "eval_cer": 0.07659773953688907, + "eval_loss": 0.45608383417129517, + "eval_runtime": 1213.6853, + "eval_samples_per_second": 6.19, + "eval_steps_per_second": 0.775, + "step": 14100 + }, + { + "epoch": 1.55, + "learning_rate": 4.613859764089122e-05, + "loss": 0.3977, + "step": 14150 + }, + { + "epoch": 1.55, + "learning_rate": 4.612494539100044e-05, + "loss": 0.4253, + "step": 14200 + }, + { + "epoch": 1.55, + "eval_cer": 0.07670381637073512, + "eval_loss": 0.4387846887111664, + "eval_runtime": 1160.8358, + "eval_samples_per_second": 6.472, + "eval_steps_per_second": 0.81, + "step": 14200 + }, + { + "epoch": 1.56, + "learning_rate": 4.611129314110966e-05, + "loss": 0.4107, + "step": 14250 + }, + { + "epoch": 1.56, + "learning_rate": 4.6097640891218875e-05, + "loss": 0.4413, + "step": 14300 + }, + { + "epoch": 1.56, + "eval_cer": 0.07185165668061634, + "eval_loss": 0.42884722352027893, + "eval_runtime": 1159.6073, + "eval_samples_per_second": 6.479, + "eval_steps_per_second": 0.811, + "step": 14300 + }, + { + "epoch": 1.57, + "learning_rate": 4.608398864132809e-05, + "loss": 0.4288, + "step": 14350 + }, + { + "epoch": 1.57, + "learning_rate": 4.607033639143731e-05, + "loss": 0.4253, + "step": 14400 + }, + { + "epoch": 1.57, + "eval_cer": 0.07265236568448644, + "eval_loss": 0.4315667152404785, + "eval_runtime": 1149.883, + "eval_samples_per_second": 6.534, + "eval_steps_per_second": 0.817, + "step": 14400 + }, + { + "epoch": 1.58, + "learning_rate": 4.605668414154653e-05, + "loss": 0.396, + "step": 14450 + }, + { + "epoch": 1.58, + "learning_rate": 4.6043031891655745e-05, + "loss": 0.4063, + "step": 14500 + }, + { + "epoch": 1.58, + "eval_cer": 0.07520847519684096, + "eval_loss": 0.42946287989616394, + "eval_runtime": 1172.1069, + "eval_samples_per_second": 6.41, + "eval_steps_per_second": 0.802, + "step": 14500 + }, + { + "epoch": 1.59, + "learning_rate": 4.602937964176496e-05, + "loss": 0.4058, + "step": 14550 + }, + { + "epoch": 1.59, + "learning_rate": 4.6015727391874183e-05, + "loss": 0.4054, + "step": 14600 + }, + { + "epoch": 1.59, + "eval_cer": 0.07184823484726646, + "eval_loss": 0.43230387568473816, + "eval_runtime": 1137.0608, + "eval_samples_per_second": 6.607, + "eval_steps_per_second": 0.827, + "step": 14600 + }, + { + "epoch": 1.6, + "learning_rate": 4.6002075141983406e-05, + "loss": 0.4349, + "step": 14650 + }, + { + "epoch": 1.61, + "learning_rate": 4.598842289209262e-05, + "loss": 0.4117, + "step": 14700 + }, + { + "epoch": 1.61, + "eval_cer": 0.0719064060142143, + "eval_loss": 0.4254385530948639, + "eval_runtime": 1163.8583, + "eval_samples_per_second": 6.455, + "eval_steps_per_second": 0.808, + "step": 14700 + }, + { + "epoch": 1.61, + "learning_rate": 4.597477064220184e-05, + "loss": 0.3811, + "step": 14750 + }, + { + "epoch": 1.62, + "learning_rate": 4.5961118392311053e-05, + "loss": 0.3963, + "step": 14800 + }, + { + "epoch": 1.62, + "eval_cer": 0.07202274834810996, + "eval_loss": 0.42654383182525635, + "eval_runtime": 1155.0915, + "eval_samples_per_second": 6.504, + "eval_steps_per_second": 0.814, + "step": 14800 + }, + { + "epoch": 1.62, + "learning_rate": 4.5947466142420276e-05, + "loss": 0.3831, + "step": 14850 + }, + { + "epoch": 1.63, + "learning_rate": 4.593381389252949e-05, + "loss": 0.4148, + "step": 14900 + }, + { + "epoch": 1.63, + "eval_cer": 0.07073271717520813, + "eval_loss": 0.42668265104293823, + "eval_runtime": 1139.0726, + "eval_samples_per_second": 6.596, + "eval_steps_per_second": 0.825, + "step": 14900 + }, + { + "epoch": 1.63, + "learning_rate": 4.592016164263871e-05, + "loss": 0.4053, + "step": 14950 + }, + { + "epoch": 1.64, + "learning_rate": 4.5906509392747924e-05, + "loss": 0.4056, + "step": 15000 + }, + { + "epoch": 1.64, + "eval_cer": 0.07124257034433909, + "eval_loss": 0.41616320610046387, + "eval_runtime": 1406.7221, + "eval_samples_per_second": 5.341, + "eval_steps_per_second": 0.668, + "step": 15000 + }, + { + "epoch": 1.64, + "learning_rate": 4.5892857142857146e-05, + "loss": 0.3748, + "step": 15050 + }, + { + "epoch": 1.65, + "learning_rate": 4.587920489296637e-05, + "loss": 0.3842, + "step": 15100 + }, + { + "epoch": 1.65, + "eval_cer": 0.06995253917143728, + "eval_loss": 0.41423478722572327, + "eval_runtime": 1316.6097, + "eval_samples_per_second": 5.706, + "eval_steps_per_second": 0.714, + "step": 15100 + }, + { + "epoch": 1.65, + "learning_rate": 4.5865552643075585e-05, + "loss": 0.3705, + "step": 15150 + }, + { + "epoch": 1.66, + "learning_rate": 4.58519003931848e-05, + "loss": 0.4091, + "step": 15200 + }, + { + "epoch": 1.66, + "eval_cer": 0.06825530982990066, + "eval_loss": 0.4133824408054352, + "eval_runtime": 1388.2867, + "eval_samples_per_second": 5.412, + "eval_steps_per_second": 0.677, + "step": 15200 + }, + { + "epoch": 1.67, + "learning_rate": 4.5838248143294016e-05, + "loss": 0.3925, + "step": 15250 + }, + { + "epoch": 1.67, + "learning_rate": 4.582459589340323e-05, + "loss": 0.3844, + "step": 15300 + }, + { + "epoch": 1.67, + "eval_cer": 0.07246416485024346, + "eval_loss": 0.42426353693008423, + "eval_runtime": 1364.96, + "eval_samples_per_second": 5.504, + "eval_steps_per_second": 0.689, + "step": 15300 + }, + { + "epoch": 1.68, + "learning_rate": 4.5810943643512455e-05, + "loss": 0.4009, + "step": 15350 + }, + { + "epoch": 1.68, + "learning_rate": 4.579729139362167e-05, + "loss": 0.4136, + "step": 15400 + }, + { + "epoch": 1.68, + "eval_cer": 0.07132811617808589, + "eval_loss": 0.4231643080711365, + "eval_runtime": 1408.2259, + "eval_samples_per_second": 5.335, + "eval_steps_per_second": 0.668, + "step": 15400 + }, + { + "epoch": 1.69, + "learning_rate": 4.5783639143730886e-05, + "loss": 0.4087, + "step": 15450 + }, + { + "epoch": 1.69, + "learning_rate": 4.576998689384011e-05, + "loss": 0.4216, + "step": 15500 + }, + { + "epoch": 1.69, + "eval_cer": 0.07285425385212889, + "eval_loss": 0.4176812171936035, + "eval_runtime": 1387.2566, + "eval_samples_per_second": 5.416, + "eval_steps_per_second": 0.678, + "step": 15500 + }, + { + "epoch": 1.7, + "learning_rate": 4.5756334643949325e-05, + "loss": 0.4044, + "step": 15550 + }, + { + "epoch": 1.7, + "learning_rate": 4.574268239405855e-05, + "loss": 0.4197, + "step": 15600 + }, + { + "epoch": 1.7, + "eval_cer": 0.07007230333868279, + "eval_loss": 0.4150015711784363, + "eval_runtime": 1151.9702, + "eval_samples_per_second": 6.522, + "eval_steps_per_second": 0.816, + "step": 15600 + }, + { + "epoch": 1.71, + "learning_rate": 4.572903014416776e-05, + "loss": 0.3793, + "step": 15650 + }, + { + "epoch": 1.71, + "learning_rate": 4.571537789427698e-05, + "loss": 0.3867, + "step": 15700 + }, + { + "epoch": 1.71, + "eval_cer": 0.06928185983486232, + "eval_loss": 0.4184330999851227, + "eval_runtime": 1160.7464, + "eval_samples_per_second": 6.473, + "eval_steps_per_second": 0.81, + "step": 15700 + }, + { + "epoch": 1.72, + "learning_rate": 4.5701725644386195e-05, + "loss": 0.4238, + "step": 15750 + }, + { + "epoch": 1.73, + "learning_rate": 4.568807339449542e-05, + "loss": 0.4149, + "step": 15800 + }, + { + "epoch": 1.73, + "eval_cer": 0.07171820517997132, + "eval_loss": 0.41364967823028564, + "eval_runtime": 1161.925, + "eval_samples_per_second": 6.466, + "eval_steps_per_second": 0.809, + "step": 15800 + }, + { + "epoch": 1.73, + "learning_rate": 4.567442114460463e-05, + "loss": 0.4028, + "step": 15850 + }, + { + "epoch": 1.74, + "learning_rate": 4.566076889471385e-05, + "loss": 0.4122, + "step": 15900 + }, + { + "epoch": 1.74, + "eval_cer": 0.07077035734205672, + "eval_loss": 0.4160652160644531, + "eval_runtime": 1143.2321, + "eval_samples_per_second": 6.572, + "eval_steps_per_second": 0.822, + "step": 15900 + }, + { + "epoch": 1.74, + "learning_rate": 4.5647116644823065e-05, + "loss": 0.4263, + "step": 15950 + }, + { + "epoch": 1.75, + "learning_rate": 4.56337374399301e-05, + "loss": 0.424, + "step": 16000 + }, + { + "epoch": 1.75, + "eval_cer": 0.07280634818523068, + "eval_loss": 0.4145605266094208, + "eval_runtime": 1166.7151, + "eval_samples_per_second": 6.439, + "eval_steps_per_second": 0.806, + "step": 16000 + }, + { + "epoch": 1.75, + "learning_rate": 4.562008519003932e-05, + "loss": 0.4219, + "step": 16050 + }, + { + "epoch": 1.76, + "learning_rate": 4.560643294014854e-05, + "loss": 0.4096, + "step": 16100 + }, + { + "epoch": 1.76, + "eval_cer": 0.07225201118255138, + "eval_loss": 0.41958579421043396, + "eval_runtime": 1161.2372, + "eval_samples_per_second": 6.47, + "eval_steps_per_second": 0.809, + "step": 16100 + }, + { + "epoch": 1.76, + "learning_rate": 4.559278069025776e-05, + "loss": 0.3963, + "step": 16150 + }, + { + "epoch": 1.77, + "learning_rate": 4.5579128440366976e-05, + "loss": 0.3692, + "step": 16200 + }, + { + "epoch": 1.77, + "eval_cer": 0.07155737901252733, + "eval_loss": 0.4091661870479584, + "eval_runtime": 1183.6151, + "eval_samples_per_second": 6.348, + "eval_steps_per_second": 0.794, + "step": 16200 + }, + { + "epoch": 1.77, + "learning_rate": 4.556547619047619e-05, + "loss": 0.3938, + "step": 16250 + }, + { + "epoch": 1.78, + "learning_rate": 4.555182394058541e-05, + "loss": 0.4094, + "step": 16300 + }, + { + "epoch": 1.78, + "eval_cer": 0.07196115534781225, + "eval_loss": 0.40848201513290405, + "eval_runtime": 1175.7709, + "eval_samples_per_second": 6.39, + "eval_steps_per_second": 0.799, + "step": 16300 + }, + { + "epoch": 1.79, + "learning_rate": 4.553817169069463e-05, + "loss": 0.4047, + "step": 16350 + }, + { + "epoch": 1.79, + "learning_rate": 4.5524519440803846e-05, + "loss": 0.3869, + "step": 16400 + }, + { + "epoch": 1.79, + "eval_cer": 0.06926132883476309, + "eval_loss": 0.40593230724334717, + "eval_runtime": 1163.6552, + "eval_samples_per_second": 6.456, + "eval_steps_per_second": 0.808, + "step": 16400 + }, + { + "epoch": 1.8, + "learning_rate": 4.551086719091306e-05, + "loss": 0.4197, + "step": 16450 + }, + { + "epoch": 1.8, + "learning_rate": 4.549721494102228e-05, + "loss": 0.3814, + "step": 16500 + }, + { + "epoch": 1.8, + "eval_cer": 0.07038711200687105, + "eval_loss": 0.40712347626686096, + "eval_runtime": 1163.3519, + "eval_samples_per_second": 6.458, + "eval_steps_per_second": 0.808, + "step": 16500 + }, + { + "epoch": 1.81, + "learning_rate": 4.54835626911315e-05, + "loss": 0.3532, + "step": 16550 + }, + { + "epoch": 1.81, + "learning_rate": 4.546991044124072e-05, + "loss": 0.4102, + "step": 16600 + }, + { + "epoch": 1.81, + "eval_cer": 0.0705513600076649, + "eval_loss": 0.4008554518222809, + "eval_runtime": 1174.5887, + "eval_samples_per_second": 6.396, + "eval_steps_per_second": 0.8, + "step": 16600 + }, + { + "epoch": 1.82, + "learning_rate": 4.545625819134994e-05, + "loss": 0.3598, + "step": 16650 + }, + { + "epoch": 1.82, + "learning_rate": 4.5442605941459155e-05, + "loss": 0.4015, + "step": 16700 + }, + { + "epoch": 1.82, + "eval_cer": 0.06728350915853697, + "eval_loss": 0.3957868218421936, + "eval_runtime": 1148.3376, + "eval_samples_per_second": 6.543, + "eval_steps_per_second": 0.819, + "step": 16700 + }, + { + "epoch": 1.83, + "learning_rate": 4.542895369156837e-05, + "loss": 0.3946, + "step": 16750 + }, + { + "epoch": 1.83, + "learning_rate": 4.5415574486675406e-05, + "loss": 0.3765, + "step": 16800 + }, + { + "epoch": 1.83, + "eval_cer": 0.06861802416498712, + "eval_loss": 0.4007869362831116, + "eval_runtime": 1150.6892, + "eval_samples_per_second": 6.529, + "eval_steps_per_second": 0.817, + "step": 16800 + }, + { + "epoch": 1.84, + "learning_rate": 4.540192223678462e-05, + "loss": 0.359, + "step": 16850 + }, + { + "epoch": 1.85, + "learning_rate": 4.5388269986893844e-05, + "loss": 0.4047, + "step": 16900 + }, + { + "epoch": 1.85, + "eval_cer": 0.07087985600925263, + "eval_loss": 0.40284430980682373, + "eval_runtime": 1179.5455, + "eval_samples_per_second": 6.369, + "eval_steps_per_second": 0.797, + "step": 16900 + }, + { + "epoch": 1.85, + "learning_rate": 4.537461773700306e-05, + "loss": 0.3911, + "step": 16950 + }, + { + "epoch": 1.86, + "learning_rate": 4.5360965487112276e-05, + "loss": 0.3915, + "step": 17000 + }, + { + "epoch": 1.86, + "eval_cer": 0.06871725733213341, + "eval_loss": 0.3993259072303772, + "eval_runtime": 1144.6731, + "eval_samples_per_second": 6.563, + "eval_steps_per_second": 0.821, + "step": 17000 + }, + { + "epoch": 1.86, + "learning_rate": 4.534731323722149e-05, + "loss": 0.3652, + "step": 17050 + }, + { + "epoch": 1.87, + "learning_rate": 4.5333660987330714e-05, + "loss": 0.3939, + "step": 17100 + }, + { + "epoch": 1.87, + "eval_cer": 0.06720138515814003, + "eval_loss": 0.395829975605011, + "eval_runtime": 1148.798, + "eval_samples_per_second": 6.54, + "eval_steps_per_second": 0.818, + "step": 17100 + }, + { + "epoch": 1.87, + "learning_rate": 4.532000873743994e-05, + "loss": 0.3864, + "step": 17150 + }, + { + "epoch": 1.88, + "learning_rate": 4.530635648754915e-05, + "loss": 0.3617, + "step": 17200 + }, + { + "epoch": 1.88, + "eval_cer": 0.07021259850602755, + "eval_loss": 0.4057924449443817, + "eval_runtime": 1153.1488, + "eval_samples_per_second": 6.515, + "eval_steps_per_second": 0.815, + "step": 17200 + }, + { + "epoch": 1.88, + "learning_rate": 4.529270423765837e-05, + "loss": 0.3855, + "step": 17250 + }, + { + "epoch": 1.89, + "learning_rate": 4.5279051987767584e-05, + "loss": 0.3741, + "step": 17300 + }, + { + "epoch": 1.89, + "eval_cer": 0.07238888451654628, + "eval_loss": 0.41401273012161255, + "eval_runtime": 1185.6581, + "eval_samples_per_second": 6.337, + "eval_steps_per_second": 0.793, + "step": 17300 + }, + { + "epoch": 1.89, + "learning_rate": 4.526539973787681e-05, + "loss": 0.3724, + "step": 17350 + }, + { + "epoch": 1.9, + "learning_rate": 4.525174748798602e-05, + "loss": 0.408, + "step": 17400 + }, + { + "epoch": 1.9, + "eval_cer": 0.06712610482444284, + "eval_loss": 0.398795485496521, + "eval_runtime": 1152.8077, + "eval_samples_per_second": 6.517, + "eval_steps_per_second": 0.815, + "step": 17400 + }, + { + "epoch": 1.91, + "learning_rate": 4.523809523809524e-05, + "loss": 0.3974, + "step": 17450 + }, + { + "epoch": 1.91, + "learning_rate": 4.5224442988204454e-05, + "loss": 0.3812, + "step": 17500 + }, + { + "epoch": 1.91, + "eval_cer": 0.07113991534384292, + "eval_loss": 0.41947415471076965, + "eval_runtime": 1123.4148, + "eval_samples_per_second": 6.688, + "eval_steps_per_second": 0.837, + "step": 17500 + }, + { + "epoch": 1.92, + "learning_rate": 4.521079073831368e-05, + "loss": 0.3823, + "step": 17550 + }, + { + "epoch": 1.92, + "learning_rate": 4.51971384884229e-05, + "loss": 0.3848, + "step": 17600 + }, + { + "epoch": 1.92, + "eval_cer": 0.0706882333416598, + "eval_loss": 0.401214599609375, + "eval_runtime": 1163.9479, + "eval_samples_per_second": 6.455, + "eval_steps_per_second": 0.808, + "step": 17600 + }, + { + "epoch": 1.93, + "learning_rate": 4.5183486238532115e-05, + "loss": 0.3645, + "step": 17650 + }, + { + "epoch": 1.93, + "learning_rate": 4.516983398864133e-05, + "loss": 0.3587, + "step": 17700 + }, + { + "epoch": 1.93, + "eval_cer": 0.0772342005399653, + "eval_loss": 0.40040454268455505, + "eval_runtime": 1206.7666, + "eval_samples_per_second": 6.226, + "eval_steps_per_second": 0.779, + "step": 17700 + }, + { + "epoch": 1.94, + "learning_rate": 4.515618173875055e-05, + "loss": 0.4196, + "step": 17750 + }, + { + "epoch": 1.94, + "learning_rate": 4.514252948885976e-05, + "loss": 0.364, + "step": 17800 + }, + { + "epoch": 1.94, + "eval_cer": 0.07031525350652372, + "eval_loss": 0.3970361649990082, + "eval_runtime": 1161.1563, + "eval_samples_per_second": 6.47, + "eval_steps_per_second": 0.81, + "step": 17800 + }, + { + "epoch": 1.95, + "learning_rate": 4.5128877238968985e-05, + "loss": 0.4055, + "step": 17850 + }, + { + "epoch": 1.95, + "learning_rate": 4.51152249890782e-05, + "loss": 0.3667, + "step": 17900 + }, + { + "epoch": 1.95, + "eval_cer": 0.07077035734205672, + "eval_loss": 0.3838033676147461, + "eval_runtime": 1185.1264, + "eval_samples_per_second": 6.339, + "eval_steps_per_second": 0.793, + "step": 17900 + }, + { + "epoch": 1.96, + "learning_rate": 4.510157273918742e-05, + "loss": 0.3816, + "step": 17950 + }, + { + "epoch": 1.97, + "learning_rate": 4.508792048929664e-05, + "loss": 0.3619, + "step": 18000 + }, + { + "epoch": 1.97, + "eval_cer": 0.07000728850503522, + "eval_loss": 0.39164894819259644, + "eval_runtime": 1179.3108, + "eval_samples_per_second": 6.371, + "eval_steps_per_second": 0.797, + "step": 18000 + }, + { + "epoch": 1.97, + "learning_rate": 4.5074268239405855e-05, + "loss": 0.3989, + "step": 18050 + }, + { + "epoch": 1.98, + "learning_rate": 4.506061598951508e-05, + "loss": 0.4249, + "step": 18100 + }, + { + "epoch": 1.98, + "eval_cer": 0.06841613599734465, + "eval_loss": 0.3852728307247162, + "eval_runtime": 1156.9882, + "eval_samples_per_second": 6.494, + "eval_steps_per_second": 0.812, + "step": 18100 + }, + { + "epoch": 1.98, + "learning_rate": 4.5046963739624294e-05, + "loss": 0.3723, + "step": 18150 + }, + { + "epoch": 1.99, + "learning_rate": 4.503331148973351e-05, + "loss": 0.3633, + "step": 18200 + }, + { + "epoch": 1.99, + "eval_cer": 0.06889519266632677, + "eval_loss": 0.387567400932312, + "eval_runtime": 1174.6487, + "eval_samples_per_second": 6.396, + "eval_steps_per_second": 0.8, + "step": 18200 + }, + { + "epoch": 1.99, + "learning_rate": 4.5019659239842725e-05, + "loss": 0.3559, + "step": 18250 + }, + { + "epoch": 2.0, + "learning_rate": 4.500600698995195e-05, + "loss": 0.4151, + "step": 18300 + }, + { + "epoch": 2.0, + "eval_cer": 0.06865224249848584, + "eval_loss": 0.38729149103164673, + "eval_runtime": 1181.6644, + "eval_samples_per_second": 6.358, + "eval_steps_per_second": 0.795, + "step": 18300 + }, + { + "epoch": 2.0, + "learning_rate": 4.4992354740061164e-05, + "loss": 0.317, + "step": 18350 + }, + { + "epoch": 2.01, + "learning_rate": 4.497870249017038e-05, + "loss": 0.2872, + "step": 18400 + }, + { + "epoch": 2.01, + "eval_cer": 0.06717058865799118, + "eval_loss": 0.392042875289917, + "eval_runtime": 1137.7306, + "eval_samples_per_second": 6.603, + "eval_steps_per_second": 0.826, + "step": 18400 + }, + { + "epoch": 2.02, + "learning_rate": 4.49650502402796e-05, + "loss": 0.2926, + "step": 18450 + }, + { + "epoch": 2.02, + "learning_rate": 4.495139799038882e-05, + "loss": 0.3, + "step": 18500 + }, + { + "epoch": 2.02, + "eval_cer": 0.06902864416697178, + "eval_loss": 0.38552695512771606, + "eval_runtime": 1176.6053, + "eval_samples_per_second": 6.385, + "eval_steps_per_second": 0.799, + "step": 18500 + }, + { + "epoch": 2.03, + "learning_rate": 4.493774574049804e-05, + "loss": 0.3154, + "step": 18550 + }, + { + "epoch": 2.03, + "learning_rate": 4.4924093490607256e-05, + "loss": 0.3287, + "step": 18600 + }, + { + "epoch": 2.03, + "eval_cer": 0.06579158981799269, + "eval_loss": 0.39401352405548096, + "eval_runtime": 1146.5226, + "eval_samples_per_second": 6.553, + "eval_steps_per_second": 0.82, + "step": 18600 + }, + { + "epoch": 2.04, + "learning_rate": 4.491044124071647e-05, + "loss": 0.3288, + "step": 18650 + }, + { + "epoch": 2.04, + "learning_rate": 4.489678899082569e-05, + "loss": 0.3124, + "step": 18700 + }, + { + "epoch": 2.04, + "eval_cer": 0.06543571914960597, + "eval_loss": 0.3829745948314667, + "eval_runtime": 1170.1361, + "eval_samples_per_second": 6.421, + "eval_steps_per_second": 0.803, + "step": 18700 + }, + { + "epoch": 2.05, + "learning_rate": 4.4883136740934904e-05, + "loss": 0.3132, + "step": 18750 + }, + { + "epoch": 2.05, + "learning_rate": 4.4869484491044126e-05, + "loss": 0.2994, + "step": 18800 + }, + { + "epoch": 2.05, + "eval_cer": 0.06607560198603207, + "eval_loss": 0.3876435458660126, + "eval_runtime": 1143.5794, + "eval_samples_per_second": 6.57, + "eval_steps_per_second": 0.822, + "step": 18800 + }, + { + "epoch": 2.06, + "learning_rate": 4.485583224115334e-05, + "loss": 0.3342, + "step": 18850 + }, + { + "epoch": 2.06, + "learning_rate": 4.4842179991262565e-05, + "loss": 0.2923, + "step": 18900 + }, + { + "epoch": 2.06, + "eval_cer": 0.06948032616915491, + "eval_loss": 0.3895299434661865, + "eval_runtime": 1153.5637, + "eval_samples_per_second": 6.513, + "eval_steps_per_second": 0.815, + "step": 18900 + }, + { + "epoch": 2.07, + "learning_rate": 4.482852774137178e-05, + "loss": 0.3297, + "step": 18950 + }, + { + "epoch": 2.08, + "learning_rate": 4.4814875491481e-05, + "loss": 0.3179, + "step": 19000 + }, + { + "epoch": 2.08, + "eval_cer": 0.06705766815744539, + "eval_loss": 0.38019657135009766, + "eval_runtime": 1179.7174, + "eval_samples_per_second": 6.368, + "eval_steps_per_second": 0.797, + "step": 19000 + }, + { + "epoch": 2.08, + "learning_rate": 4.480122324159022e-05, + "loss": 0.2929, + "step": 19050 + }, + { + "epoch": 2.09, + "learning_rate": 4.4787570991699435e-05, + "loss": 0.3521, + "step": 19100 + }, + { + "epoch": 2.09, + "eval_cer": 0.06731772749203568, + "eval_loss": 0.37924471497535706, + "eval_runtime": 1165.4502, + "eval_samples_per_second": 6.446, + "eval_steps_per_second": 0.807, + "step": 19100 + }, + { + "epoch": 2.09, + "learning_rate": 4.477391874180865e-05, + "loss": 0.3111, + "step": 19150 + }, + { + "epoch": 2.1, + "learning_rate": 4.4760266491917866e-05, + "loss": 0.3007, + "step": 19200 + }, + { + "epoch": 2.1, + "eval_cer": 0.06791997016161319, + "eval_loss": 0.3855767548084259, + "eval_runtime": 1158.659, + "eval_samples_per_second": 6.484, + "eval_steps_per_second": 0.811, + "step": 19200 + }, + { + "epoch": 2.1, + "learning_rate": 4.474661424202709e-05, + "loss": 0.3176, + "step": 19250 + }, + { + "epoch": 2.11, + "learning_rate": 4.4732961992136305e-05, + "loss": 0.294, + "step": 19300 + }, + { + "epoch": 2.11, + "eval_cer": 0.06327654230583662, + "eval_loss": 0.37883859872817993, + "eval_runtime": 1146.7655, + "eval_samples_per_second": 6.551, + "eval_steps_per_second": 0.82, + "step": 19300 + }, + { + "epoch": 2.11, + "learning_rate": 4.471930974224553e-05, + "loss": 0.3376, + "step": 19350 + }, + { + "epoch": 2.12, + "learning_rate": 4.470565749235474e-05, + "loss": 0.3348, + "step": 19400 + }, + { + "epoch": 2.12, + "eval_cer": 0.06263323763606064, + "eval_loss": 0.37968873977661133, + "eval_runtime": 1136.9087, + "eval_samples_per_second": 6.608, + "eval_steps_per_second": 0.827, + "step": 19400 + }, + { + "epoch": 2.12, + "learning_rate": 4.469200524246396e-05, + "loss": 0.2803, + "step": 19450 + }, + { + "epoch": 2.13, + "learning_rate": 4.467835299257318e-05, + "loss": 0.2794, + "step": 19500 + }, + { + "epoch": 2.13, + "eval_cer": 0.06554179598345201, + "eval_loss": 0.37674739956855774, + "eval_runtime": 1167.3995, + "eval_samples_per_second": 6.436, + "eval_steps_per_second": 0.805, + "step": 19500 + }, + { + "epoch": 2.14, + "learning_rate": 4.46647007426824e-05, + "loss": 0.3175, + "step": 19550 + }, + { + "epoch": 2.14, + "learning_rate": 4.465104849279161e-05, + "loss": 0.3098, + "step": 19600 + }, + { + "epoch": 2.14, + "eval_cer": 0.06358108547397524, + "eval_loss": 0.3796171247959137, + "eval_runtime": 1151.1497, + "eval_samples_per_second": 6.527, + "eval_steps_per_second": 0.817, + "step": 19600 + }, + { + "epoch": 2.15, + "learning_rate": 4.463739624290083e-05, + "loss": 0.3399, + "step": 19650 + }, + { + "epoch": 2.15, + "learning_rate": 4.4623743993010045e-05, + "loss": 0.289, + "step": 19700 + }, + { + "epoch": 2.15, + "eval_cer": 0.06953165366940299, + "eval_loss": 0.39493194222450256, + "eval_runtime": 1365.0679, + "eval_samples_per_second": 5.504, + "eval_steps_per_second": 0.689, + "step": 19700 + }, + { + "epoch": 2.16, + "learning_rate": 4.461009174311927e-05, + "loss": 0.3148, + "step": 19750 + }, + { + "epoch": 2.16, + "learning_rate": 4.459643949322849e-05, + "loss": 0.3033, + "step": 19800 + }, + { + "epoch": 2.16, + "eval_cer": 0.06477530531308064, + "eval_loss": 0.3851621747016907, + "eval_runtime": 1450.2983, + "eval_samples_per_second": 5.18, + "eval_steps_per_second": 0.648, + "step": 19800 + }, + { + "epoch": 2.17, + "learning_rate": 4.458306028833552e-05, + "loss": 0.3174, + "step": 19850 + }, + { + "epoch": 2.17, + "learning_rate": 4.456940803844474e-05, + "loss": 0.3354, + "step": 19900 + }, + { + "epoch": 2.17, + "eval_cer": 0.06344763397333023, + "eval_loss": 0.38615599274635315, + "eval_runtime": 1300.2394, + "eval_samples_per_second": 5.778, + "eval_steps_per_second": 0.723, + "step": 19900 + }, + { + "epoch": 2.18, + "learning_rate": 4.455575578855396e-05, + "loss": 0.3462, + "step": 19950 + }, + { + "epoch": 2.18, + "learning_rate": 4.454210353866317e-05, + "loss": 0.3373, + "step": 20000 + }, + { + "epoch": 2.18, + "eval_cer": 0.06389247230881362, + "eval_loss": 0.3779688775539398, + "eval_runtime": 1247.3861, + "eval_samples_per_second": 6.023, + "eval_steps_per_second": 0.754, + "step": 20000 + }, + { + "epoch": 2.19, + "learning_rate": 4.4528451288772395e-05, + "loss": 0.3024, + "step": 20050 + }, + { + "epoch": 2.2, + "learning_rate": 4.451479903888161e-05, + "loss": 0.2955, + "step": 20100 + }, + { + "epoch": 2.2, + "eval_cer": 0.06926817250146283, + "eval_loss": 0.3931354880332947, + "eval_runtime": 1363.3975, + "eval_samples_per_second": 5.51, + "eval_steps_per_second": 0.689, + "step": 20100 + }, + { + "epoch": 2.2, + "learning_rate": 4.450114678899083e-05, + "loss": 0.3167, + "step": 20150 + }, + { + "epoch": 2.21, + "learning_rate": 4.448749453910004e-05, + "loss": 0.3228, + "step": 20200 + }, + { + "epoch": 2.21, + "eval_cer": 0.06707135549084489, + "eval_loss": 0.37632882595062256, + "eval_runtime": 1493.5017, + "eval_samples_per_second": 5.03, + "eval_steps_per_second": 0.629, + "step": 20200 + }, + { + "epoch": 2.21, + "learning_rate": 4.4473842289209265e-05, + "loss": 0.316, + "step": 20250 + }, + { + "epoch": 2.22, + "learning_rate": 4.446019003931848e-05, + "loss": 0.3392, + "step": 20300 + }, + { + "epoch": 2.22, + "eval_cer": 0.06558970165035022, + "eval_loss": 0.38044553995132446, + "eval_runtime": 1382.6368, + "eval_samples_per_second": 5.434, + "eval_steps_per_second": 0.68, + "step": 20300 + }, + { + "epoch": 2.22, + "learning_rate": 4.4446537789427704e-05, + "loss": 0.2981, + "step": 20350 + }, + { + "epoch": 2.23, + "learning_rate": 4.443288553953692e-05, + "loss": 0.3149, + "step": 20400 + }, + { + "epoch": 2.23, + "eval_cer": 0.0647547743129814, + "eval_loss": 0.3692227602005005, + "eval_runtime": 1474.8703, + "eval_samples_per_second": 5.094, + "eval_steps_per_second": 0.637, + "step": 20400 + }, + { + "epoch": 2.23, + "learning_rate": 4.4419233289646135e-05, + "loss": 0.311, + "step": 20450 + }, + { + "epoch": 2.24, + "learning_rate": 4.440558103975536e-05, + "loss": 0.2676, + "step": 20500 + }, + { + "epoch": 2.24, + "eval_cer": 0.06711583932439322, + "eval_loss": 0.3739226758480072, + "eval_runtime": 1465.1736, + "eval_samples_per_second": 5.128, + "eval_steps_per_second": 0.642, + "step": 20500 + }, + { + "epoch": 2.24, + "learning_rate": 4.4391928789864574e-05, + "loss": 0.2736, + "step": 20550 + }, + { + "epoch": 2.25, + "learning_rate": 4.437827653997379e-05, + "loss": 0.3111, + "step": 20600 + }, + { + "epoch": 2.25, + "eval_cer": 0.0664417381544684, + "eval_loss": 0.3745150864124298, + "eval_runtime": 1170.2827, + "eval_samples_per_second": 6.42, + "eval_steps_per_second": 0.803, + "step": 20600 + }, + { + "epoch": 2.26, + "learning_rate": 4.4364624290083005e-05, + "loss": 0.2983, + "step": 20650 + }, + { + "epoch": 2.26, + "learning_rate": 4.435097204019222e-05, + "loss": 0.3049, + "step": 20700 + }, + { + "epoch": 2.26, + "eval_cer": 0.06717401049134106, + "eval_loss": 0.3762642443180084, + "eval_runtime": 1206.4849, + "eval_samples_per_second": 6.227, + "eval_steps_per_second": 0.779, + "step": 20700 + }, + { + "epoch": 2.27, + "learning_rate": 4.4337319790301444e-05, + "loss": 0.3181, + "step": 20750 + }, + { + "epoch": 2.27, + "learning_rate": 4.432366754041066e-05, + "loss": 0.2593, + "step": 20800 + }, + { + "epoch": 2.27, + "eval_cer": 0.06381034830841668, + "eval_loss": 0.37320199608802795, + "eval_runtime": 1196.0544, + "eval_samples_per_second": 6.281, + "eval_steps_per_second": 0.786, + "step": 20800 + }, + { + "epoch": 2.28, + "learning_rate": 4.431001529051988e-05, + "loss": 0.3106, + "step": 20850 + }, + { + "epoch": 2.28, + "learning_rate": 4.42963630406291e-05, + "loss": 0.2765, + "step": 20900 + }, + { + "epoch": 2.28, + "eval_cer": 0.06492586598047502, + "eval_loss": 0.37560248374938965, + "eval_runtime": 1186.1802, + "eval_samples_per_second": 6.334, + "eval_steps_per_second": 0.792, + "step": 20900 + }, + { + "epoch": 2.29, + "learning_rate": 4.4282710790738314e-05, + "loss": 0.2789, + "step": 20950 + }, + { + "epoch": 2.29, + "learning_rate": 4.4269058540847536e-05, + "loss": 0.3202, + "step": 21000 + }, + { + "epoch": 2.29, + "eval_cer": 0.06184279413224017, + "eval_loss": 0.3614885210990906, + "eval_runtime": 1146.8682, + "eval_samples_per_second": 6.551, + "eval_steps_per_second": 0.82, + "step": 21000 + }, + { + "epoch": 2.3, + "learning_rate": 4.425540629095675e-05, + "loss": 0.3247, + "step": 21050 + }, + { + "epoch": 2.3, + "learning_rate": 4.424175404106597e-05, + "loss": 0.3105, + "step": 21100 + }, + { + "epoch": 2.3, + "eval_cer": 0.0640669858096571, + "eval_loss": 0.3699856996536255, + "eval_runtime": 1154.5375, + "eval_samples_per_second": 6.507, + "eval_steps_per_second": 0.814, + "step": 21100 + }, + { + "epoch": 2.31, + "learning_rate": 4.4228101791175184e-05, + "loss": 0.2951, + "step": 21150 + }, + { + "epoch": 2.32, + "learning_rate": 4.4214449541284406e-05, + "loss": 0.3252, + "step": 21200 + }, + { + "epoch": 2.32, + "eval_cer": 0.06651017482146585, + "eval_loss": 0.367857426404953, + "eval_runtime": 1184.7517, + "eval_samples_per_second": 6.341, + "eval_steps_per_second": 0.793, + "step": 21200 + }, + { + "epoch": 2.32, + "learning_rate": 4.420079729139362e-05, + "loss": 0.2952, + "step": 21250 + }, + { + "epoch": 2.33, + "learning_rate": 4.4187145041502845e-05, + "loss": 0.3112, + "step": 21300 + }, + { + "epoch": 2.33, + "eval_cer": 0.06518934714841518, + "eval_loss": 0.3664465844631195, + "eval_runtime": 1166.6886, + "eval_samples_per_second": 6.44, + "eval_steps_per_second": 0.806, + "step": 21300 + }, + { + "epoch": 2.33, + "learning_rate": 4.417349279161206e-05, + "loss": 0.3146, + "step": 21350 + }, + { + "epoch": 2.34, + "learning_rate": 4.4159840541721276e-05, + "loss": 0.3088, + "step": 21400 + }, + { + "epoch": 2.34, + "eval_cer": 0.06262981580271078, + "eval_loss": 0.3656906187534332, + "eval_runtime": 1164.8946, + "eval_samples_per_second": 6.45, + "eval_steps_per_second": 0.807, + "step": 21400 + }, + { + "epoch": 2.34, + "learning_rate": 4.41461882918305e-05, + "loss": 0.3172, + "step": 21450 + }, + { + "epoch": 2.35, + "learning_rate": 4.4132536041939715e-05, + "loss": 0.3134, + "step": 21500 + }, + { + "epoch": 2.35, + "eval_cer": 0.06641094165431956, + "eval_loss": 0.37143638730049133, + "eval_runtime": 1189.7181, + "eval_samples_per_second": 6.315, + "eval_steps_per_second": 0.79, + "step": 21500 + }, + { + "epoch": 2.35, + "learning_rate": 4.411888379204893e-05, + "loss": 0.312, + "step": 21550 + }, + { + "epoch": 2.36, + "learning_rate": 4.4105231542158146e-05, + "loss": 0.2935, + "step": 21600 + }, + { + "epoch": 2.36, + "eval_cer": 0.060775182127080045, + "eval_loss": 0.36119917035102844, + "eval_runtime": 1143.708, + "eval_samples_per_second": 6.569, + "eval_steps_per_second": 0.822, + "step": 21600 + }, + { + "epoch": 2.36, + "learning_rate": 4.409157929226736e-05, + "loss": 0.3247, + "step": 21650 + }, + { + "epoch": 2.37, + "learning_rate": 4.4077927042376585e-05, + "loss": 0.3451, + "step": 21700 + }, + { + "epoch": 2.37, + "eval_cer": 0.061541672797451416, + "eval_loss": 0.3566524386405945, + "eval_runtime": 1165.536, + "eval_samples_per_second": 6.446, + "eval_steps_per_second": 0.806, + "step": 21700 + }, + { + "epoch": 2.38, + "learning_rate": 4.406427479248581e-05, + "loss": 0.3164, + "step": 21750 + }, + { + "epoch": 2.38, + "learning_rate": 4.405062254259502e-05, + "loss": 0.3113, + "step": 21800 + }, + { + "epoch": 2.38, + "eval_cer": 0.06236291280142074, + "eval_loss": 0.3675330579280853, + "eval_runtime": 1149.7199, + "eval_samples_per_second": 6.535, + "eval_steps_per_second": 0.818, + "step": 21800 + }, + { + "epoch": 2.39, + "learning_rate": 4.403697029270424e-05, + "loss": 0.3079, + "step": 21850 + }, + { + "epoch": 2.39, + "learning_rate": 4.4023318042813455e-05, + "loss": 0.2795, + "step": 21900 + }, + { + "epoch": 2.39, + "eval_cer": 0.062013885799733784, + "eval_loss": 0.3630739152431488, + "eval_runtime": 1162.4946, + "eval_samples_per_second": 6.463, + "eval_steps_per_second": 0.809, + "step": 21900 + }, + { + "epoch": 2.4, + "learning_rate": 4.400966579292268e-05, + "loss": 0.3273, + "step": 21950 + }, + { + "epoch": 2.4, + "learning_rate": 4.399601354303189e-05, + "loss": 0.3165, + "step": 22000 + }, + { + "epoch": 2.4, + "eval_cer": 0.06382403564181617, + "eval_loss": 0.3620857298374176, + "eval_runtime": 1163.7577, + "eval_samples_per_second": 6.456, + "eval_steps_per_second": 0.808, + "step": 22000 + }, + { + "epoch": 2.41, + "learning_rate": 4.398236129314111e-05, + "loss": 0.3265, + "step": 22050 + }, + { + "epoch": 2.41, + "learning_rate": 4.3968709043250325e-05, + "loss": 0.293, + "step": 22100 + }, + { + "epoch": 2.41, + "eval_cer": 0.06423123381045097, + "eval_loss": 0.36310431361198425, + "eval_runtime": 1165.8222, + "eval_samples_per_second": 6.444, + "eval_steps_per_second": 0.806, + "step": 22100 + }, + { + "epoch": 2.42, + "learning_rate": 4.395505679335955e-05, + "loss": 0.3111, + "step": 22150 + }, + { + "epoch": 2.42, + "learning_rate": 4.394140454346877e-05, + "loss": 0.3052, + "step": 22200 + }, + { + "epoch": 2.42, + "eval_cer": 0.06236291280142074, + "eval_loss": 0.3586342930793762, + "eval_runtime": 1151.2813, + "eval_samples_per_second": 6.526, + "eval_steps_per_second": 0.816, + "step": 22200 + }, + { + "epoch": 2.43, + "learning_rate": 4.3927752293577986e-05, + "loss": 0.3033, + "step": 22250 + }, + { + "epoch": 2.44, + "learning_rate": 4.39141000436872e-05, + "loss": 0.3183, + "step": 22300 + }, + { + "epoch": 2.44, + "eval_cer": 0.060504857292440144, + "eval_loss": 0.3589232861995697, + "eval_runtime": 1143.1903, + "eval_samples_per_second": 6.572, + "eval_steps_per_second": 0.822, + "step": 22300 + }, + { + "epoch": 2.44, + "learning_rate": 4.390044779379642e-05, + "loss": 0.299, + "step": 22350 + }, + { + "epoch": 2.45, + "learning_rate": 4.388679554390564e-05, + "loss": 0.3395, + "step": 22400 + }, + { + "epoch": 2.45, + "eval_cer": 0.06262297213601103, + "eval_loss": 0.3562231659889221, + "eval_runtime": 1171.0509, + "eval_samples_per_second": 6.416, + "eval_steps_per_second": 0.803, + "step": 22400 + }, + { + "epoch": 2.45, + "learning_rate": 4.3873143294014856e-05, + "loss": 0.2849, + "step": 22450 + }, + { + "epoch": 2.46, + "learning_rate": 4.385949104412407e-05, + "loss": 0.2904, + "step": 22500 + }, + { + "epoch": 2.46, + "eval_cer": 0.06267429963625912, + "eval_loss": 0.3656773865222931, + "eval_runtime": 1135.83, + "eval_samples_per_second": 6.615, + "eval_steps_per_second": 0.828, + "step": 22500 + }, + { + "epoch": 2.46, + "learning_rate": 4.384583879423329e-05, + "loss": 0.2895, + "step": 22550 + }, + { + "epoch": 2.47, + "learning_rate": 4.383218654434251e-05, + "loss": 0.3042, + "step": 22600 + }, + { + "epoch": 2.47, + "eval_cer": 0.06159642213104937, + "eval_loss": 0.35998255014419556, + "eval_runtime": 1154.6766, + "eval_samples_per_second": 6.507, + "eval_steps_per_second": 0.814, + "step": 22600 + }, + { + "epoch": 2.47, + "learning_rate": 4.381853429445173e-05, + "loss": 0.2958, + "step": 22650 + }, + { + "epoch": 2.48, + "learning_rate": 4.380488204456095e-05, + "loss": 0.2925, + "step": 22700 + }, + { + "epoch": 2.48, + "eval_cer": 0.0625408481356141, + "eval_loss": 0.35356906056404114, + "eval_runtime": 1163.7126, + "eval_samples_per_second": 6.456, + "eval_steps_per_second": 0.808, + "step": 22700 + }, + { + "epoch": 2.48, + "learning_rate": 4.3791229794670164e-05, + "loss": 0.3009, + "step": 22750 + }, + { + "epoch": 2.49, + "learning_rate": 4.377757754477938e-05, + "loss": 0.2875, + "step": 22800 + }, + { + "epoch": 2.49, + "eval_cer": 0.06025506345789947, + "eval_loss": 0.3555847704410553, + "eval_runtime": 1146.3487, + "eval_samples_per_second": 6.554, + "eval_steps_per_second": 0.82, + "step": 22800 + }, + { + "epoch": 2.5, + "learning_rate": 4.37639252948886e-05, + "loss": 0.3123, + "step": 22850 + }, + { + "epoch": 2.5, + "learning_rate": 4.375027304499782e-05, + "loss": 0.3042, + "step": 22900 + }, + { + "epoch": 2.5, + "eval_cer": 0.06097022662802276, + "eval_loss": 0.3487367331981659, + "eval_runtime": 1161.3152, + "eval_samples_per_second": 6.469, + "eval_steps_per_second": 0.809, + "step": 22900 + }, + { + "epoch": 2.51, + "learning_rate": 4.3736620795107034e-05, + "loss": 0.2855, + "step": 22950 + }, + { + "epoch": 2.51, + "learning_rate": 4.372296854521625e-05, + "loss": 0.277, + "step": 23000 + }, + { + "epoch": 2.51, + "eval_cer": 0.060617777792985925, + "eval_loss": 0.34876227378845215, + "eval_runtime": 1159.4584, + "eval_samples_per_second": 6.48, + "eval_steps_per_second": 0.811, + "step": 23000 + }, + { + "epoch": 2.52, + "learning_rate": 4.370931629532547e-05, + "loss": 0.2976, + "step": 23050 + }, + { + "epoch": 2.52, + "learning_rate": 4.3695664045434695e-05, + "loss": 0.3092, + "step": 23100 + }, + { + "epoch": 2.52, + "eval_cer": 0.059519369287676954, + "eval_loss": 0.3475951552391052, + "eval_runtime": 1144.1568, + "eval_samples_per_second": 6.566, + "eval_steps_per_second": 0.822, + "step": 23100 + }, + { + "epoch": 2.53, + "learning_rate": 4.368201179554391e-05, + "loss": 0.2982, + "step": 23150 + }, + { + "epoch": 2.53, + "learning_rate": 4.366835954565313e-05, + "loss": 0.3148, + "step": 23200 + }, + { + "epoch": 2.53, + "eval_cer": 0.0602892817913982, + "eval_loss": 0.3553316295146942, + "eval_runtime": 1154.791, + "eval_samples_per_second": 6.506, + "eval_steps_per_second": 0.814, + "step": 23200 + }, + { + "epoch": 2.54, + "learning_rate": 4.365470729576234e-05, + "loss": 0.3058, + "step": 23250 + }, + { + "epoch": 2.54, + "learning_rate": 4.364105504587156e-05, + "loss": 0.3245, + "step": 23300 + }, + { + "epoch": 2.54, + "eval_cer": 0.07005177233858356, + "eval_loss": 0.3529204726219177, + "eval_runtime": 1195.1336, + "eval_samples_per_second": 6.286, + "eval_steps_per_second": 0.787, + "step": 23300 + }, + { + "epoch": 2.55, + "learning_rate": 4.362740279598078e-05, + "loss": 0.3058, + "step": 23350 + }, + { + "epoch": 2.56, + "learning_rate": 4.361375054609e-05, + "loss": 0.2828, + "step": 23400 + }, + { + "epoch": 2.56, + "eval_cer": 0.062260257800924576, + "eval_loss": 0.35099321603775024, + "eval_runtime": 1152.0634, + "eval_samples_per_second": 6.521, + "eval_steps_per_second": 0.816, + "step": 23400 + }, + { + "epoch": 2.56, + "learning_rate": 4.360009829619921e-05, + "loss": 0.3004, + "step": 23450 + }, + { + "epoch": 2.57, + "learning_rate": 4.3586446046308436e-05, + "loss": 0.3078, + "step": 23500 + }, + { + "epoch": 2.57, + "eval_cer": 0.06309176330494352, + "eval_loss": 0.3564070463180542, + "eval_runtime": 1168.3273, + "eval_samples_per_second": 6.431, + "eval_steps_per_second": 0.805, + "step": 23500 + }, + { + "epoch": 2.57, + "learning_rate": 4.357279379641765e-05, + "loss": 0.3201, + "step": 23550 + }, + { + "epoch": 2.58, + "learning_rate": 4.3559141546526874e-05, + "loss": 0.297, + "step": 23600 + }, + { + "epoch": 2.58, + "eval_cer": 0.0617161862982949, + "eval_loss": 0.3490002155303955, + "eval_runtime": 1170.8594, + "eval_samples_per_second": 6.417, + "eval_steps_per_second": 0.803, + "step": 23600 + }, + { + "epoch": 2.58, + "learning_rate": 4.354548929663609e-05, + "loss": 0.2804, + "step": 23650 + }, + { + "epoch": 2.59, + "learning_rate": 4.3531837046745306e-05, + "loss": 0.2812, + "step": 23700 + }, + { + "epoch": 2.59, + "eval_cer": 0.05973836662206877, + "eval_loss": 0.34518638253211975, + "eval_runtime": 1156.4633, + "eval_samples_per_second": 6.497, + "eval_steps_per_second": 0.813, + "step": 23700 + }, + { + "epoch": 2.59, + "learning_rate": 4.351818479685452e-05, + "loss": 0.2962, + "step": 23750 + }, + { + "epoch": 2.6, + "learning_rate": 4.3504532546963744e-05, + "loss": 0.2938, + "step": 23800 + }, + { + "epoch": 2.6, + "eval_cer": 0.06105577246176957, + "eval_loss": 0.3440614342689514, + "eval_runtime": 1166.2989, + "eval_samples_per_second": 6.442, + "eval_steps_per_second": 0.806, + "step": 23800 + }, + { + "epoch": 2.6, + "learning_rate": 4.349115334207077e-05, + "loss": 0.2741, + "step": 23850 + }, + { + "epoch": 2.61, + "learning_rate": 4.3477501092179995e-05, + "loss": 0.2992, + "step": 23900 + }, + { + "epoch": 2.61, + "eval_cer": 0.06274273630325655, + "eval_loss": 0.34478849172592163, + "eval_runtime": 1168.87, + "eval_samples_per_second": 6.428, + "eval_steps_per_second": 0.804, + "step": 23900 + }, + { + "epoch": 2.62, + "learning_rate": 4.346384884228921e-05, + "loss": 0.3067, + "step": 23950 + }, + { + "epoch": 2.62, + "learning_rate": 4.3450196592398426e-05, + "loss": 0.3091, + "step": 24000 + }, + { + "epoch": 2.62, + "eval_cer": 0.062486098802016145, + "eval_loss": 0.3409504294395447, + "eval_runtime": 1187.759, + "eval_samples_per_second": 6.325, + "eval_steps_per_second": 0.791, + "step": 24000 + }, + { + "epoch": 2.63, + "learning_rate": 4.343654434250765e-05, + "loss": 0.2917, + "step": 24050 + }, + { + "epoch": 2.63, + "learning_rate": 4.3422892092616865e-05, + "loss": 0.2831, + "step": 24100 + }, + { + "epoch": 2.63, + "eval_cer": 0.061654593297997204, + "eval_loss": 0.34767550230026245, + "eval_runtime": 1162.5382, + "eval_samples_per_second": 6.463, + "eval_steps_per_second": 0.809, + "step": 24100 + }, + { + "epoch": 2.64, + "learning_rate": 4.340923984272609e-05, + "loss": 0.2899, + "step": 24150 + }, + { + "epoch": 2.64, + "learning_rate": 4.33955875928353e-05, + "loss": 0.316, + "step": 24200 + }, + { + "epoch": 2.64, + "eval_cer": 0.06590793215188834, + "eval_loss": 0.3456661105155945, + "eval_runtime": 1228.1783, + "eval_samples_per_second": 6.117, + "eval_steps_per_second": 0.765, + "step": 24200 + }, + { + "epoch": 2.65, + "learning_rate": 4.338193534294452e-05, + "loss": 0.2818, + "step": 24250 + }, + { + "epoch": 2.65, + "learning_rate": 4.3368283093053735e-05, + "loss": 0.3216, + "step": 24300 + }, + { + "epoch": 2.65, + "eval_cer": 0.06014214295735369, + "eval_loss": 0.3410555124282837, + "eval_runtime": 1339.8538, + "eval_samples_per_second": 5.607, + "eval_steps_per_second": 0.702, + "step": 24300 + }, + { + "epoch": 2.66, + "learning_rate": 4.335463084316296e-05, + "loss": 0.3092, + "step": 24350 + }, + { + "epoch": 2.66, + "learning_rate": 4.334097859327217e-05, + "loss": 0.3041, + "step": 24400 + }, + { + "epoch": 2.66, + "eval_cer": 0.06188727796578851, + "eval_loss": 0.345905065536499, + "eval_runtime": 1188.7161, + "eval_samples_per_second": 6.32, + "eval_steps_per_second": 0.791, + "step": 24400 + }, + { + "epoch": 2.67, + "learning_rate": 4.332732634338139e-05, + "loss": 0.2921, + "step": 24450 + }, + { + "epoch": 2.68, + "learning_rate": 4.331367409349061e-05, + "loss": 0.2946, + "step": 24500 + }, + { + "epoch": 2.68, + "eval_cer": 0.061298722629610496, + "eval_loss": 0.34493544697761536, + "eval_runtime": 1156.8097, + "eval_samples_per_second": 6.495, + "eval_steps_per_second": 0.813, + "step": 24500 + }, + { + "epoch": 2.68, + "learning_rate": 4.330002184359983e-05, + "loss": 0.2961, + "step": 24550 + }, + { + "epoch": 2.69, + "learning_rate": 4.328636959370905e-05, + "loss": 0.2969, + "step": 24600 + }, + { + "epoch": 2.69, + "eval_cer": 0.06008739362375574, + "eval_loss": 0.341984361410141, + "eval_runtime": 1170.1423, + "eval_samples_per_second": 6.421, + "eval_steps_per_second": 0.803, + "step": 24600 + }, + { + "epoch": 2.69, + "learning_rate": 4.3272717343818266e-05, + "loss": 0.2912, + "step": 24650 + }, + { + "epoch": 2.7, + "learning_rate": 4.325906509392748e-05, + "loss": 0.3111, + "step": 24700 + }, + { + "epoch": 2.7, + "eval_cer": 0.060039487956857526, + "eval_loss": 0.3431969881057739, + "eval_runtime": 1162.683, + "eval_samples_per_second": 6.462, + "eval_steps_per_second": 0.808, + "step": 24700 + }, + { + "epoch": 2.7, + "learning_rate": 4.32454128440367e-05, + "loss": 0.3057, + "step": 24750 + }, + { + "epoch": 2.71, + "learning_rate": 4.323176059414591e-05, + "loss": 0.2951, + "step": 24800 + }, + { + "epoch": 2.71, + "eval_cer": 0.05896503228499766, + "eval_loss": 0.3386366367340088, + "eval_runtime": 1155.7752, + "eval_samples_per_second": 6.5, + "eval_steps_per_second": 0.813, + "step": 24800 + }, + { + "epoch": 2.71, + "learning_rate": 4.3218108344255136e-05, + "loss": 0.2803, + "step": 24850 + }, + { + "epoch": 2.72, + "learning_rate": 4.320445609436435e-05, + "loss": 0.2848, + "step": 24900 + }, + { + "epoch": 2.72, + "eval_cer": 0.06460079181223716, + "eval_loss": 0.34501972794532776, + "eval_runtime": 1185.2116, + "eval_samples_per_second": 6.339, + "eval_steps_per_second": 0.793, + "step": 24900 + }, + { + "epoch": 2.72, + "learning_rate": 4.319080384447357e-05, + "loss": 0.2823, + "step": 24950 + }, + { + "epoch": 2.73, + "learning_rate": 4.317715159458279e-05, + "loss": 0.3066, + "step": 25000 + }, + { + "epoch": 2.73, + "eval_cer": 0.06447076214494202, + "eval_loss": 0.3437393605709076, + "eval_runtime": 1201.6699, + "eval_samples_per_second": 6.252, + "eval_steps_per_second": 0.782, + "step": 25000 + }, + { + "epoch": 2.74, + "learning_rate": 4.316349934469201e-05, + "loss": 0.2575, + "step": 25050 + }, + { + "epoch": 2.74, + "learning_rate": 4.314984709480123e-05, + "loss": 0.2893, + "step": 25100 + }, + { + "epoch": 2.74, + "eval_cer": 0.06085046246077724, + "eval_loss": 0.3420575261116028, + "eval_runtime": 1164.3677, + "eval_samples_per_second": 6.452, + "eval_steps_per_second": 0.807, + "step": 25100 + }, + { + "epoch": 2.75, + "learning_rate": 4.3136194844910444e-05, + "loss": 0.3092, + "step": 25150 + }, + { + "epoch": 2.75, + "learning_rate": 4.312254259501966e-05, + "loss": 0.2743, + "step": 25200 + }, + { + "epoch": 2.75, + "eval_cer": 0.05815747961442782, + "eval_loss": 0.331033319234848, + "eval_runtime": 1172.6397, + "eval_samples_per_second": 6.407, + "eval_steps_per_second": 0.802, + "step": 25200 + }, + { + "epoch": 2.76, + "learning_rate": 4.3108890345128876e-05, + "loss": 0.2971, + "step": 25250 + }, + { + "epoch": 2.76, + "learning_rate": 4.30952380952381e-05, + "loss": 0.2997, + "step": 25300 + }, + { + "epoch": 2.76, + "eval_cer": 0.05869812928370762, + "eval_loss": 0.3358995318412781, + "eval_runtime": 1146.9262, + "eval_samples_per_second": 6.551, + "eval_steps_per_second": 0.82, + "step": 25300 + }, + { + "epoch": 2.77, + "learning_rate": 4.3081585845347314e-05, + "loss": 0.2666, + "step": 25350 + }, + { + "epoch": 2.77, + "learning_rate": 4.306793359545653e-05, + "loss": 0.2939, + "step": 25400 + }, + { + "epoch": 2.77, + "eval_cer": 0.058930813951498934, + "eval_loss": 0.33529844880104065, + "eval_runtime": 1187.2055, + "eval_samples_per_second": 6.328, + "eval_steps_per_second": 0.792, + "step": 25400 + }, + { + "epoch": 2.78, + "learning_rate": 4.305428134556575e-05, + "loss": 0.289, + "step": 25450 + }, + { + "epoch": 2.79, + "learning_rate": 4.304062909567497e-05, + "loss": 0.3051, + "step": 25500 + }, + { + "epoch": 2.79, + "eval_cer": 0.05896503228499766, + "eval_loss": 0.3372917175292969, + "eval_runtime": 1419.1461, + "eval_samples_per_second": 5.294, + "eval_steps_per_second": 0.662, + "step": 25500 + }, + { + "epoch": 2.79, + "learning_rate": 4.302697684578419e-05, + "loss": 0.2801, + "step": 25550 + }, + { + "epoch": 2.8, + "learning_rate": 4.301332459589341e-05, + "loss": 0.3016, + "step": 25600 + }, + { + "epoch": 2.8, + "eval_cer": 0.05905742178544421, + "eval_loss": 0.3363926410675049, + "eval_runtime": 1464.9044, + "eval_samples_per_second": 5.129, + "eval_steps_per_second": 0.642, + "step": 25600 + }, + { + "epoch": 2.8, + "learning_rate": 4.299967234600262e-05, + "loss": 0.271, + "step": 25650 + }, + { + "epoch": 2.81, + "learning_rate": 4.298602009611184e-05, + "loss": 0.3148, + "step": 25700 + }, + { + "epoch": 2.81, + "eval_cer": 0.05901635978524574, + "eval_loss": 0.33722782135009766, + "eval_runtime": 1460.2748, + "eval_samples_per_second": 5.145, + "eval_steps_per_second": 0.644, + "step": 25700 + }, + { + "epoch": 2.81, + "learning_rate": 4.2972367846221054e-05, + "loss": 0.2866, + "step": 25750 + }, + { + "epoch": 2.82, + "learning_rate": 4.295871559633028e-05, + "loss": 0.3225, + "step": 25800 + }, + { + "epoch": 2.82, + "eval_cer": 0.057562080611550055, + "eval_loss": 0.33164745569229126, + "eval_runtime": 1430.5344, + "eval_samples_per_second": 5.252, + "eval_steps_per_second": 0.657, + "step": 25800 + }, + { + "epoch": 2.82, + "learning_rate": 4.294506334643949e-05, + "loss": 0.2966, + "step": 25850 + }, + { + "epoch": 2.83, + "learning_rate": 4.293168414154653e-05, + "loss": 0.2998, + "step": 25900 + }, + { + "epoch": 2.83, + "eval_cer": 0.057589455278349036, + "eval_loss": 0.33457526564598083, + "eval_runtime": 1421.5781, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 0.661, + "step": 25900 + }, + { + "epoch": 2.83, + "learning_rate": 4.2918031891655744e-05, + "loss": 0.2741, + "step": 25950 + }, + { + "epoch": 2.84, + "learning_rate": 4.2904379641764966e-05, + "loss": 0.3025, + "step": 26000 + }, + { + "epoch": 2.84, + "eval_cer": 0.05718567894306412, + "eval_loss": 0.33198389410972595, + "eval_runtime": 1194.7499, + "eval_samples_per_second": 6.288, + "eval_steps_per_second": 0.787, + "step": 26000 + }, + { + "epoch": 2.85, + "learning_rate": 4.289072739187418e-05, + "loss": 0.2841, + "step": 26050 + }, + { + "epoch": 2.85, + "learning_rate": 4.2877075141983405e-05, + "loss": 0.2882, + "step": 26100 + }, + { + "epoch": 2.85, + "eval_cer": 0.05745942561105389, + "eval_loss": 0.3311343789100647, + "eval_runtime": 1280.2736, + "eval_samples_per_second": 5.868, + "eval_steps_per_second": 0.734, + "step": 26100 + }, + { + "epoch": 2.86, + "learning_rate": 4.286342289209262e-05, + "loss": 0.2725, + "step": 26150 + }, + { + "epoch": 2.86, + "learning_rate": 4.2849770642201836e-05, + "loss": 0.3311, + "step": 26200 + }, + { + "epoch": 2.86, + "eval_cer": 0.05959464962137414, + "eval_loss": 0.33679836988449097, + "eval_runtime": 1337.0011, + "eval_samples_per_second": 5.619, + "eval_steps_per_second": 0.703, + "step": 26200 + }, + { + "epoch": 2.87, + "learning_rate": 4.283611839231105e-05, + "loss": 0.2945, + "step": 26250 + }, + { + "epoch": 2.87, + "learning_rate": 4.2822466142420275e-05, + "loss": 0.2976, + "step": 26300 + }, + { + "epoch": 2.87, + "eval_cer": 0.05775712511249277, + "eval_loss": 0.328110933303833, + "eval_runtime": 1396.3907, + "eval_samples_per_second": 5.38, + "eval_steps_per_second": 0.673, + "step": 26300 + }, + { + "epoch": 2.88, + "learning_rate": 4.280881389252949e-05, + "loss": 0.311, + "step": 26350 + }, + { + "epoch": 2.88, + "learning_rate": 4.2795161642638706e-05, + "loss": 0.2873, + "step": 26400 + }, + { + "epoch": 2.88, + "eval_cer": 0.0635263361403773, + "eval_loss": 0.32722070813179016, + "eval_runtime": 1462.697, + "eval_samples_per_second": 5.136, + "eval_steps_per_second": 0.643, + "step": 26400 + }, + { + "epoch": 2.89, + "learning_rate": 4.278150939274793e-05, + "loss": 0.2689, + "step": 26450 + }, + { + "epoch": 2.89, + "learning_rate": 4.2767857142857145e-05, + "loss": 0.2823, + "step": 26500 + }, + { + "epoch": 2.89, + "eval_cer": 0.06995938283813702, + "eval_loss": 0.34899574518203735, + "eval_runtime": 1723.4358, + "eval_samples_per_second": 4.359, + "eval_steps_per_second": 0.545, + "step": 26500 + }, + { + "epoch": 2.9, + "learning_rate": 4.275420489296637e-05, + "loss": 0.2491, + "step": 26550 + }, + { + "epoch": 2.91, + "learning_rate": 4.274055264307558e-05, + "loss": 0.2775, + "step": 26600 + }, + { + "epoch": 2.91, + "eval_cer": 0.05953647845442631, + "eval_loss": 0.33239951729774475, + "eval_runtime": 1151.2678, + "eval_samples_per_second": 6.526, + "eval_steps_per_second": 0.816, + "step": 26600 + }, + { + "epoch": 2.91, + "learning_rate": 4.27269003931848e-05, + "loss": 0.2867, + "step": 26650 + }, + { + "epoch": 2.92, + "learning_rate": 4.2713248143294015e-05, + "loss": 0.2721, + "step": 26700 + }, + { + "epoch": 2.92, + "eval_cer": 0.059091640118942926, + "eval_loss": 0.32586294412612915, + "eval_runtime": 1182.0981, + "eval_samples_per_second": 6.356, + "eval_steps_per_second": 0.795, + "step": 26700 + }, + { + "epoch": 2.92, + "learning_rate": 4.269959589340323e-05, + "loss": 0.3009, + "step": 26750 + }, + { + "epoch": 2.93, + "learning_rate": 4.268594364351245e-05, + "loss": 0.2994, + "step": 26800 + }, + { + "epoch": 2.93, + "eval_cer": 0.0615690474642504, + "eval_loss": 0.33627235889434814, + "eval_runtime": 1216.0666, + "eval_samples_per_second": 6.178, + "eval_steps_per_second": 0.773, + "step": 26800 + }, + { + "epoch": 2.93, + "learning_rate": 4.267229139362167e-05, + "loss": 0.3008, + "step": 26850 + }, + { + "epoch": 2.94, + "learning_rate": 4.265863914373089e-05, + "loss": 0.3001, + "step": 26900 + }, + { + "epoch": 2.94, + "eval_cer": 0.06160668763109899, + "eval_loss": 0.33180370926856995, + "eval_runtime": 1284.8633, + "eval_samples_per_second": 5.847, + "eval_steps_per_second": 0.732, + "step": 26900 + }, + { + "epoch": 2.94, + "learning_rate": 4.264498689384011e-05, + "loss": 0.2861, + "step": 26950 + }, + { + "epoch": 2.95, + "learning_rate": 4.263133464394932e-05, + "loss": 0.2527, + "step": 27000 + }, + { + "epoch": 2.95, + "eval_cer": 0.06202757313313327, + "eval_loss": 0.32616278529167175, + "eval_runtime": 1198.1406, + "eval_samples_per_second": 6.271, + "eval_steps_per_second": 0.785, + "step": 27000 + }, + { + "epoch": 2.95, + "learning_rate": 4.2617682394058546e-05, + "loss": 0.2836, + "step": 27050 + }, + { + "epoch": 2.96, + "learning_rate": 4.260403014416776e-05, + "loss": 0.2746, + "step": 27100 + }, + { + "epoch": 2.96, + "eval_cer": 0.058434648115767464, + "eval_loss": 0.32697415351867676, + "eval_runtime": 1148.8419, + "eval_samples_per_second": 6.54, + "eval_steps_per_second": 0.818, + "step": 27100 + }, + { + "epoch": 2.97, + "learning_rate": 4.259037789427698e-05, + "loss": 0.2861, + "step": 27150 + }, + { + "epoch": 2.97, + "learning_rate": 4.257672564438619e-05, + "loss": 0.277, + "step": 27200 + }, + { + "epoch": 2.97, + "eval_cer": 0.057876889279738296, + "eval_loss": 0.32712793350219727, + "eval_runtime": 1150.7522, + "eval_samples_per_second": 6.529, + "eval_steps_per_second": 0.817, + "step": 27200 + }, + { + "epoch": 2.98, + "learning_rate": 4.2563073394495416e-05, + "loss": 0.2656, + "step": 27250 + }, + { + "epoch": 2.98, + "learning_rate": 4.254942114460463e-05, + "loss": 0.3059, + "step": 27300 + }, + { + "epoch": 2.98, + "eval_cer": 0.05807193378068101, + "eval_loss": 0.32142749428749084, + "eval_runtime": 1151.3245, + "eval_samples_per_second": 6.526, + "eval_steps_per_second": 0.816, + "step": 27300 + }, + { + "epoch": 2.99, + "learning_rate": 4.2535768894713854e-05, + "loss": 0.2706, + "step": 27350 + }, + { + "epoch": 2.99, + "learning_rate": 4.252211664482307e-05, + "loss": 0.2603, + "step": 27400 + }, + { + "epoch": 2.99, + "eval_cer": 0.05766815744539609, + "eval_loss": 0.3238297700881958, + "eval_runtime": 1147.7577, + "eval_samples_per_second": 6.546, + "eval_steps_per_second": 0.819, + "step": 27400 + }, + { + "epoch": 3.0, + "learning_rate": 4.2508464394932286e-05, + "loss": 0.2459, + "step": 27450 + }, + { + "epoch": 3.0, + "learning_rate": 4.249481214504151e-05, + "loss": 0.2803, + "step": 27500 + }, + { + "epoch": 3.0, + "eval_cer": 0.05597777177055923, + "eval_loss": 0.32824379205703735, + "eval_runtime": 1138.6321, + "eval_samples_per_second": 6.598, + "eval_steps_per_second": 0.826, + "step": 27500 + }, + { + "epoch": 3.01, + "learning_rate": 4.2481159895150724e-05, + "loss": 0.2304, + "step": 27550 + }, + { + "epoch": 3.01, + "learning_rate": 4.246750764525994e-05, + "loss": 0.233, + "step": 27600 + }, + { + "epoch": 3.01, + "eval_cer": 0.060460373458891804, + "eval_loss": 0.32600587606430054, + "eval_runtime": 1173.6339, + "eval_samples_per_second": 6.401, + "eval_steps_per_second": 0.801, + "step": 27600 + }, + { + "epoch": 3.02, + "learning_rate": 4.2453855395369156e-05, + "loss": 0.2192, + "step": 27650 + }, + { + "epoch": 3.03, + "learning_rate": 4.244020314547837e-05, + "loss": 0.245, + "step": 27700 + }, + { + "epoch": 3.03, + "eval_cer": 0.05650815593978942, + "eval_loss": 0.3225695788860321, + "eval_runtime": 1140.3828, + "eval_samples_per_second": 6.588, + "eval_steps_per_second": 0.824, + "step": 27700 + }, + { + "epoch": 3.03, + "learning_rate": 4.2426550895587594e-05, + "loss": 0.2309, + "step": 27750 + }, + { + "epoch": 3.04, + "learning_rate": 4.241289864569682e-05, + "loss": 0.241, + "step": 27800 + }, + { + "epoch": 3.04, + "eval_cer": 0.05897529778504727, + "eval_loss": 0.3347548544406891, + "eval_runtime": 1129.3341, + "eval_samples_per_second": 6.653, + "eval_steps_per_second": 0.832, + "step": 27800 + }, + { + "epoch": 3.04, + "learning_rate": 4.239924639580603e-05, + "loss": 0.2422, + "step": 27850 + }, + { + "epoch": 3.05, + "learning_rate": 4.238559414591525e-05, + "loss": 0.2394, + "step": 27900 + }, + { + "epoch": 3.05, + "eval_cer": 0.05539606010108096, + "eval_loss": 0.32074490189552307, + "eval_runtime": 1144.36, + "eval_samples_per_second": 6.565, + "eval_steps_per_second": 0.821, + "step": 27900 + }, + { + "epoch": 3.05, + "learning_rate": 4.237194189602447e-05, + "loss": 0.2108, + "step": 27950 + }, + { + "epoch": 3.06, + "learning_rate": 4.235828964613369e-05, + "loss": 0.2254, + "step": 28000 + }, + { + "epoch": 3.06, + "eval_cer": 0.05462956943070959, + "eval_loss": 0.318286269903183, + "eval_runtime": 1136.2023, + "eval_samples_per_second": 6.612, + "eval_steps_per_second": 0.827, + "step": 28000 + }, + { + "epoch": 3.06, + "learning_rate": 4.23446373962429e-05, + "loss": 0.2219, + "step": 28050 + }, + { + "epoch": 3.07, + "learning_rate": 4.233098514635212e-05, + "loss": 0.2416, + "step": 28100 + }, + { + "epoch": 3.07, + "eval_cer": 0.056121488771253863, + "eval_loss": 0.322205513715744, + "eval_runtime": 1151.0902, + "eval_samples_per_second": 6.527, + "eval_steps_per_second": 0.817, + "step": 28100 + }, + { + "epoch": 3.07, + "learning_rate": 4.2317332896461334e-05, + "loss": 0.2157, + "step": 28150 + }, + { + "epoch": 3.08, + "learning_rate": 4.230368064657056e-05, + "loss": 0.2262, + "step": 28200 + }, + { + "epoch": 3.08, + "eval_cer": 0.05716172610961501, + "eval_loss": 0.32194942235946655, + "eval_runtime": 1150.2928, + "eval_samples_per_second": 6.531, + "eval_steps_per_second": 0.817, + "step": 28200 + }, + { + "epoch": 3.09, + "learning_rate": 4.229002839667977e-05, + "loss": 0.2428, + "step": 28250 + }, + { + "epoch": 3.09, + "learning_rate": 4.2276376146788995e-05, + "loss": 0.2193, + "step": 28300 + }, + { + "epoch": 3.09, + "eval_cer": 0.05849281928271529, + "eval_loss": 0.32632672786712646, + "eval_runtime": 1147.7549, + "eval_samples_per_second": 6.546, + "eval_steps_per_second": 0.819, + "step": 28300 + }, + { + "epoch": 3.1, + "learning_rate": 4.226272389689821e-05, + "loss": 0.2357, + "step": 28350 + }, + { + "epoch": 3.1, + "learning_rate": 4.224907164700743e-05, + "loss": 0.248, + "step": 28400 + }, + { + "epoch": 3.1, + "eval_cer": 0.05890001745135008, + "eval_loss": 0.320720911026001, + "eval_runtime": 1158.2635, + "eval_samples_per_second": 6.486, + "eval_steps_per_second": 0.812, + "step": 28400 + }, + { + "epoch": 3.11, + "learning_rate": 4.223541939711665e-05, + "loss": 0.2138, + "step": 28450 + }, + { + "epoch": 3.11, + "learning_rate": 4.2221767147225865e-05, + "loss": 0.2215, + "step": 28500 + }, + { + "epoch": 3.11, + "eval_cer": 0.05739441077740632, + "eval_loss": 0.32517117261886597, + "eval_runtime": 1159.9181, + "eval_samples_per_second": 6.477, + "eval_steps_per_second": 0.81, + "step": 28500 + }, + { + "epoch": 3.12, + "learning_rate": 4.22083879423329e-05, + "loss": 0.2348, + "step": 28550 + }, + { + "epoch": 3.12, + "learning_rate": 4.2194735692442116e-05, + "loss": 0.2149, + "step": 28600 + }, + { + "epoch": 3.12, + "eval_cer": 0.05660738910693571, + "eval_loss": 0.3221997618675232, + "eval_runtime": 1165.8197, + "eval_samples_per_second": 6.444, + "eval_steps_per_second": 0.806, + "step": 28600 + }, + { + "epoch": 3.13, + "learning_rate": 4.218108344255133e-05, + "loss": 0.237, + "step": 28650 + }, + { + "epoch": 3.13, + "learning_rate": 4.216743119266055e-05, + "loss": 0.2135, + "step": 28700 + }, + { + "epoch": 3.13, + "eval_cer": 0.057018009108920374, + "eval_loss": 0.32162022590637207, + "eval_runtime": 1320.2861, + "eval_samples_per_second": 5.69, + "eval_steps_per_second": 0.712, + "step": 28700 + }, + { + "epoch": 3.14, + "learning_rate": 4.215377894276977e-05, + "loss": 0.2187, + "step": 28750 + }, + { + "epoch": 3.15, + "learning_rate": 4.2140126692878986e-05, + "loss": 0.2358, + "step": 28800 + }, + { + "epoch": 3.15, + "eval_cer": 0.05756550244489993, + "eval_loss": 0.3216935396194458, + "eval_runtime": 1263.617, + "eval_samples_per_second": 5.946, + "eval_steps_per_second": 0.744, + "step": 28800 + }, + { + "epoch": 3.15, + "learning_rate": 4.212647444298821e-05, + "loss": 0.2094, + "step": 28850 + }, + { + "epoch": 3.16, + "learning_rate": 4.2112822193097425e-05, + "loss": 0.2131, + "step": 28900 + }, + { + "epoch": 3.16, + "eval_cer": 0.05689824494167485, + "eval_loss": 0.3266179859638214, + "eval_runtime": 1161.2332, + "eval_samples_per_second": 6.47, + "eval_steps_per_second": 0.809, + "step": 28900 + }, + { + "epoch": 3.16, + "learning_rate": 4.209916994320664e-05, + "loss": 0.2061, + "step": 28950 + }, + { + "epoch": 3.17, + "learning_rate": 4.208551769331586e-05, + "loss": 0.2162, + "step": 29000 + }, + { + "epoch": 3.17, + "eval_cer": 0.05746284744440376, + "eval_loss": 0.32129406929016113, + "eval_runtime": 1211.1118, + "eval_samples_per_second": 6.203, + "eval_steps_per_second": 0.776, + "step": 29000 + }, + { + "epoch": 3.17, + "learning_rate": 4.207186544342508e-05, + "loss": 0.2245, + "step": 29050 + }, + { + "epoch": 3.18, + "learning_rate": 4.2058213193534295e-05, + "loss": 0.221, + "step": 29100 + }, + { + "epoch": 3.18, + "eval_cer": 0.05628573677204773, + "eval_loss": 0.3239326477050781, + "eval_runtime": 1182.7128, + "eval_samples_per_second": 6.352, + "eval_steps_per_second": 0.795, + "step": 29100 + }, + { + "epoch": 3.18, + "learning_rate": 4.204456094364351e-05, + "loss": 0.2451, + "step": 29150 + }, + { + "epoch": 3.19, + "learning_rate": 4.203090869375273e-05, + "loss": 0.2629, + "step": 29200 + }, + { + "epoch": 3.19, + "eval_cer": 0.056980368942071785, + "eval_loss": 0.3187606930732727, + "eval_runtime": 1203.5217, + "eval_samples_per_second": 6.243, + "eval_steps_per_second": 0.781, + "step": 29200 + }, + { + "epoch": 3.19, + "learning_rate": 4.201725644386195e-05, + "loss": 0.2233, + "step": 29250 + }, + { + "epoch": 3.2, + "learning_rate": 4.200360419397117e-05, + "loss": 0.2185, + "step": 29300 + }, + { + "epoch": 3.2, + "eval_cer": 0.05524207760033671, + "eval_loss": 0.31554532051086426, + "eval_runtime": 1201.7452, + "eval_samples_per_second": 6.252, + "eval_steps_per_second": 0.782, + "step": 29300 + }, + { + "epoch": 3.21, + "learning_rate": 4.198995194408039e-05, + "loss": 0.2282, + "step": 29350 + }, + { + "epoch": 3.21, + "learning_rate": 4.19762996941896e-05, + "loss": 0.2115, + "step": 29400 + }, + { + "epoch": 3.21, + "eval_cer": 0.05779818711269124, + "eval_loss": 0.31857335567474365, + "eval_runtime": 1213.1112, + "eval_samples_per_second": 6.193, + "eval_steps_per_second": 0.775, + "step": 29400 + }, + { + "epoch": 3.22, + "learning_rate": 4.1962647444298826e-05, + "loss": 0.2139, + "step": 29450 + }, + { + "epoch": 3.22, + "learning_rate": 4.194899519440804e-05, + "loss": 0.2145, + "step": 29500 + }, + { + "epoch": 3.22, + "eval_cer": 0.06090863362772506, + "eval_loss": 0.3194052278995514, + "eval_runtime": 1182.218, + "eval_samples_per_second": 6.355, + "eval_steps_per_second": 0.795, + "step": 29500 + }, + { + "epoch": 3.23, + "learning_rate": 4.193534294451726e-05, + "loss": 0.2111, + "step": 29550 + }, + { + "epoch": 3.23, + "learning_rate": 4.192169069462647e-05, + "loss": 0.2189, + "step": 29600 + }, + { + "epoch": 3.23, + "eval_cer": 0.057295177610260026, + "eval_loss": 0.3254971206188202, + "eval_runtime": 1158.4532, + "eval_samples_per_second": 6.485, + "eval_steps_per_second": 0.811, + "step": 29600 + }, + { + "epoch": 3.24, + "learning_rate": 4.190803844473569e-05, + "loss": 0.2112, + "step": 29650 + }, + { + "epoch": 3.24, + "learning_rate": 4.189438619484491e-05, + "loss": 0.2175, + "step": 29700 + }, + { + "epoch": 3.24, + "eval_cer": 0.05672373144083137, + "eval_loss": 0.3198707401752472, + "eval_runtime": 1157.0972, + "eval_samples_per_second": 6.493, + "eval_steps_per_second": 0.812, + "step": 29700 + }, + { + "epoch": 3.25, + "learning_rate": 4.1880733944954134e-05, + "loss": 0.2352, + "step": 29750 + }, + { + "epoch": 3.25, + "learning_rate": 4.186708169506335e-05, + "loss": 0.2431, + "step": 29800 + }, + { + "epoch": 3.25, + "eval_cer": 0.05510862609969169, + "eval_loss": 0.3135843276977539, + "eval_runtime": 1163.9894, + "eval_samples_per_second": 6.455, + "eval_steps_per_second": 0.808, + "step": 29800 + }, + { + "epoch": 3.26, + "learning_rate": 4.1853429445172566e-05, + "loss": 0.216, + "step": 29850 + }, + { + "epoch": 3.27, + "learning_rate": 4.183977719528178e-05, + "loss": 0.2095, + "step": 29900 + }, + { + "epoch": 3.27, + "eval_cer": 0.05986839628936391, + "eval_loss": 0.316039502620697, + "eval_runtime": 1182.2319, + "eval_samples_per_second": 6.355, + "eval_steps_per_second": 0.795, + "step": 29900 + }, + { + "epoch": 3.27, + "learning_rate": 4.1826124945391004e-05, + "loss": 0.218, + "step": 29950 + }, + { + "epoch": 3.28, + "learning_rate": 4.181247269550022e-05, + "loss": 0.2449, + "step": 30000 + }, + { + "epoch": 3.28, + "eval_cer": 0.05585116393661396, + "eval_loss": 0.31165117025375366, + "eval_runtime": 1175.7213, + "eval_samples_per_second": 6.39, + "eval_steps_per_second": 0.8, + "step": 30000 + }, + { + "epoch": 3.28, + "learning_rate": 4.1798820445609436e-05, + "loss": 0.2124, + "step": 30050 + }, + { + "epoch": 3.29, + "learning_rate": 4.178516819571865e-05, + "loss": 0.2007, + "step": 30100 + }, + { + "epoch": 3.29, + "eval_cer": 0.05651842143983904, + "eval_loss": 0.31462639570236206, + "eval_runtime": 1169.6031, + "eval_samples_per_second": 6.424, + "eval_steps_per_second": 0.804, + "step": 30100 + }, + { + "epoch": 3.29, + "learning_rate": 4.1771515945827874e-05, + "loss": 0.2019, + "step": 30150 + }, + { + "epoch": 3.3, + "learning_rate": 4.17578636959371e-05, + "loss": 0.2395, + "step": 30200 + }, + { + "epoch": 3.3, + "eval_cer": 0.05408549792807991, + "eval_loss": 0.31565436720848083, + "eval_runtime": 1163.1123, + "eval_samples_per_second": 6.459, + "eval_steps_per_second": 0.808, + "step": 30200 + }, + { + "epoch": 3.3, + "learning_rate": 4.174421144604631e-05, + "loss": 0.2252, + "step": 30250 + }, + { + "epoch": 3.31, + "learning_rate": 4.173055919615553e-05, + "loss": 0.2166, + "step": 30300 + }, + { + "epoch": 3.31, + "eval_cer": 0.05582721110316485, + "eval_loss": 0.3155282437801361, + "eval_runtime": 1216.7838, + "eval_samples_per_second": 6.174, + "eval_steps_per_second": 0.773, + "step": 30300 + }, + { + "epoch": 3.31, + "learning_rate": 4.1716906946264744e-05, + "loss": 0.224, + "step": 30350 + }, + { + "epoch": 3.32, + "learning_rate": 4.170325469637397e-05, + "loss": 0.2512, + "step": 30400 + }, + { + "epoch": 3.32, + "eval_cer": 0.055180484600039006, + "eval_loss": 0.31320416927337646, + "eval_runtime": 1208.865, + "eval_samples_per_second": 6.215, + "eval_steps_per_second": 0.778, + "step": 30400 + }, + { + "epoch": 3.33, + "learning_rate": 4.168960244648318e-05, + "loss": 0.2261, + "step": 30450 + }, + { + "epoch": 3.33, + "learning_rate": 4.16759501965924e-05, + "loss": 0.2319, + "step": 30500 + }, + { + "epoch": 3.33, + "eval_cer": 0.055782727269616514, + "eval_loss": 0.3205634355545044, + "eval_runtime": 1175.9742, + "eval_samples_per_second": 6.389, + "eval_steps_per_second": 0.799, + "step": 30500 + }, + { + "epoch": 3.34, + "learning_rate": 4.1662297946701614e-05, + "loss": 0.222, + "step": 30550 + }, + { + "epoch": 3.34, + "learning_rate": 4.164864569681084e-05, + "loss": 0.2333, + "step": 30600 + }, + { + "epoch": 3.34, + "eval_cer": 0.05437977559616892, + "eval_loss": 0.31233859062194824, + "eval_runtime": 1187.2819, + "eval_samples_per_second": 6.328, + "eval_steps_per_second": 0.792, + "step": 30600 + }, + { + "epoch": 3.35, + "learning_rate": 4.163499344692006e-05, + "loss": 0.2463, + "step": 30650 + }, + { + "epoch": 3.35, + "learning_rate": 4.1621341197029275e-05, + "loss": 0.2376, + "step": 30700 + }, + { + "epoch": 3.35, + "eval_cer": 0.05478355193145384, + "eval_loss": 0.31242722272872925, + "eval_runtime": 1189.2518, + "eval_samples_per_second": 6.317, + "eval_steps_per_second": 0.79, + "step": 30700 + }, + { + "epoch": 3.36, + "learning_rate": 4.160768894713849e-05, + "loss": 0.2144, + "step": 30750 + }, + { + "epoch": 3.36, + "learning_rate": 4.159403669724771e-05, + "loss": 0.2149, + "step": 30800 + }, + { + "epoch": 3.36, + "eval_cer": 0.05400337392768297, + "eval_loss": 0.31088629364967346, + "eval_runtime": 1204.1106, + "eval_samples_per_second": 6.239, + "eval_steps_per_second": 0.781, + "step": 30800 + }, + { + "epoch": 3.37, + "learning_rate": 4.158038444735692e-05, + "loss": 0.2245, + "step": 30850 + }, + { + "epoch": 3.37, + "learning_rate": 4.1566732197466145e-05, + "loss": 0.2346, + "step": 30900 + }, + { + "epoch": 3.37, + "eval_cer": 0.053336116424457895, + "eval_loss": 0.3114325702190399, + "eval_runtime": 1199.4806, + "eval_samples_per_second": 6.264, + "eval_steps_per_second": 0.784, + "step": 30900 + }, + { + "epoch": 3.38, + "learning_rate": 4.155307994757536e-05, + "loss": 0.2346, + "step": 30950 + }, + { + "epoch": 3.39, + "learning_rate": 4.153942769768458e-05, + "loss": 0.22, + "step": 31000 + }, + { + "epoch": 3.39, + "eval_cer": 0.05588196043676281, + "eval_loss": 0.3127199113368988, + "eval_runtime": 1171.8911, + "eval_samples_per_second": 6.411, + "eval_steps_per_second": 0.802, + "step": 31000 + }, + { + "epoch": 3.39, + "learning_rate": 4.15257754477938e-05, + "loss": 0.2366, + "step": 31050 + }, + { + "epoch": 3.4, + "learning_rate": 4.151212319790302e-05, + "loss": 0.2589, + "step": 31100 + }, + { + "epoch": 3.4, + "eval_cer": 0.0532129304238625, + "eval_loss": 0.3078201115131378, + "eval_runtime": 1179.1501, + "eval_samples_per_second": 6.372, + "eval_steps_per_second": 0.797, + "step": 31100 + }, + { + "epoch": 3.4, + "learning_rate": 4.149847094801224e-05, + "loss": 0.2256, + "step": 31150 + }, + { + "epoch": 3.41, + "learning_rate": 4.1484818698121454e-05, + "loss": 0.2331, + "step": 31200 + }, + { + "epoch": 3.41, + "eval_cer": 0.055495293268227254, + "eval_loss": 0.30701497197151184, + "eval_runtime": 1363.5561, + "eval_samples_per_second": 5.51, + "eval_steps_per_second": 0.689, + "step": 31200 + }, + { + "epoch": 3.41, + "learning_rate": 4.147116644823067e-05, + "loss": 0.2491, + "step": 31250 + }, + { + "epoch": 3.42, + "learning_rate": 4.1457514198339885e-05, + "loss": 0.1912, + "step": 31300 + }, + { + "epoch": 3.42, + "eval_cer": 0.054999127432495784, + "eval_loss": 0.31193986535072327, + "eval_runtime": 1470.9109, + "eval_samples_per_second": 5.108, + "eval_steps_per_second": 0.639, + "step": 31300 + }, + { + "epoch": 3.42, + "learning_rate": 4.144386194844911e-05, + "loss": 0.2294, + "step": 31350 + }, + { + "epoch": 3.43, + "learning_rate": 4.1430209698558324e-05, + "loss": 0.2178, + "step": 31400 + }, + { + "epoch": 3.43, + "eval_cer": 0.055721134269318816, + "eval_loss": 0.31223970651626587, + "eval_runtime": 1504.6961, + "eval_samples_per_second": 4.993, + "eval_steps_per_second": 0.625, + "step": 31400 + }, + { + "epoch": 3.43, + "learning_rate": 4.141655744866754e-05, + "loss": 0.227, + "step": 31450 + }, + { + "epoch": 3.44, + "learning_rate": 4.140290519877676e-05, + "loss": 0.2341, + "step": 31500 + }, + { + "epoch": 3.44, + "eval_cer": 0.05363723775924665, + "eval_loss": 0.3130796253681183, + "eval_runtime": 1438.0987, + "eval_samples_per_second": 5.224, + "eval_steps_per_second": 0.654, + "step": 31500 + }, + { + "epoch": 3.45, + "learning_rate": 4.138925294888598e-05, + "loss": 0.2199, + "step": 31550 + }, + { + "epoch": 3.45, + "learning_rate": 4.13756006989952e-05, + "loss": 0.227, + "step": 31600 + }, + { + "epoch": 3.45, + "eval_cer": 0.052867325255525406, + "eval_loss": 0.3069811761379242, + "eval_runtime": 1423.702, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 0.66, + "step": 31600 + }, + { + "epoch": 3.46, + "learning_rate": 4.1361948449104417e-05, + "loss": 0.2467, + "step": 31650 + }, + { + "epoch": 3.46, + "learning_rate": 4.134856924421145e-05, + "loss": 0.2168, + "step": 31700 + }, + { + "epoch": 3.46, + "eval_cer": 0.057274646610160795, + "eval_loss": 0.30950018763542175, + "eval_runtime": 1485.941, + "eval_samples_per_second": 5.056, + "eval_steps_per_second": 0.633, + "step": 31700 + }, + { + "epoch": 3.47, + "learning_rate": 4.133491699432067e-05, + "loss": 0.2426, + "step": 31750 + }, + { + "epoch": 3.47, + "learning_rate": 4.132126474442988e-05, + "loss": 0.2088, + "step": 31800 + }, + { + "epoch": 3.47, + "eval_cer": 0.05539263826773109, + "eval_loss": 0.3096722960472107, + "eval_runtime": 1520.0393, + "eval_samples_per_second": 4.943, + "eval_steps_per_second": 0.618, + "step": 31800 + }, + { + "epoch": 3.48, + "learning_rate": 4.13076124945391e-05, + "loss": 0.2269, + "step": 31850 + }, + { + "epoch": 3.48, + "learning_rate": 4.129396024464832e-05, + "loss": 0.2332, + "step": 31900 + }, + { + "epoch": 3.48, + "eval_cer": 0.05160124691607269, + "eval_loss": 0.3041413426399231, + "eval_runtime": 1479.0494, + "eval_samples_per_second": 5.08, + "eval_steps_per_second": 0.636, + "step": 31900 + }, + { + "epoch": 3.49, + "learning_rate": 4.128030799475754e-05, + "loss": 0.2293, + "step": 31950 + }, + { + "epoch": 3.49, + "learning_rate": 4.126665574486675e-05, + "loss": 0.2501, + "step": 32000 + }, + { + "epoch": 3.49, + "eval_cer": 0.05571429060261907, + "eval_loss": 0.3081224262714386, + "eval_runtime": 1376.0064, + "eval_samples_per_second": 5.46, + "eval_steps_per_second": 0.683, + "step": 32000 + }, + { + "epoch": 3.5, + "learning_rate": 4.1253003494975976e-05, + "loss": 0.2398, + "step": 32050 + }, + { + "epoch": 3.51, + "learning_rate": 4.123935124508519e-05, + "loss": 0.2375, + "step": 32100 + }, + { + "epoch": 3.51, + "eval_cer": 0.053113697256716205, + "eval_loss": 0.3007121980190277, + "eval_runtime": 1448.1367, + "eval_samples_per_second": 5.188, + "eval_steps_per_second": 0.649, + "step": 32100 + }, + { + "epoch": 3.51, + "learning_rate": 4.1225698995194414e-05, + "loss": 0.2178, + "step": 32150 + }, + { + "epoch": 3.52, + "learning_rate": 4.121204674530363e-05, + "loss": 0.213, + "step": 32200 + }, + { + "epoch": 3.52, + "eval_cer": 0.05521470293353773, + "eval_loss": 0.3040596842765808, + "eval_runtime": 1193.1522, + "eval_samples_per_second": 6.297, + "eval_steps_per_second": 0.788, + "step": 32200 + }, + { + "epoch": 3.52, + "learning_rate": 4.1198394495412846e-05, + "loss": 0.1994, + "step": 32250 + }, + { + "epoch": 3.53, + "learning_rate": 4.118474224552206e-05, + "loss": 0.2116, + "step": 32300 + }, + { + "epoch": 3.53, + "eval_cer": 0.05221717691904969, + "eval_loss": 0.3014806807041168, + "eval_runtime": 1146.7847, + "eval_samples_per_second": 6.551, + "eval_steps_per_second": 0.82, + "step": 32300 + }, + { + "epoch": 3.53, + "learning_rate": 4.1171089995631284e-05, + "loss": 0.2131, + "step": 32350 + }, + { + "epoch": 3.54, + "learning_rate": 4.11574377457405e-05, + "loss": 0.2425, + "step": 32400 + }, + { + "epoch": 3.54, + "eval_cer": 0.058444913615817086, + "eval_loss": 0.3058461546897888, + "eval_runtime": 1212.9428, + "eval_samples_per_second": 6.194, + "eval_steps_per_second": 0.775, + "step": 32400 + }, + { + "epoch": 3.54, + "learning_rate": 4.1143785495849716e-05, + "loss": 0.2267, + "step": 32450 + }, + { + "epoch": 3.55, + "learning_rate": 4.113013324595894e-05, + "loss": 0.2195, + "step": 32500 + }, + { + "epoch": 3.55, + "eval_cer": 0.05577588360291677, + "eval_loss": 0.302491158246994, + "eval_runtime": 1260.9141, + "eval_samples_per_second": 5.958, + "eval_steps_per_second": 0.745, + "step": 32500 + }, + { + "epoch": 3.56, + "learning_rate": 4.1116480996068154e-05, + "loss": 0.2363, + "step": 32550 + }, + { + "epoch": 3.56, + "learning_rate": 4.110282874617738e-05, + "loss": 0.217, + "step": 32600 + }, + { + "epoch": 3.56, + "eval_cer": 0.05627547127199811, + "eval_loss": 0.30593034625053406, + "eval_runtime": 1316.3658, + "eval_samples_per_second": 5.707, + "eval_steps_per_second": 0.714, + "step": 32600 + }, + { + "epoch": 3.57, + "learning_rate": 4.108917649628659e-05, + "loss": 0.2104, + "step": 32650 + }, + { + "epoch": 3.57, + "learning_rate": 4.107552424639581e-05, + "loss": 0.226, + "step": 32700 + }, + { + "epoch": 3.57, + "eval_cer": 0.05334638192450751, + "eval_loss": 0.30213332176208496, + "eval_runtime": 1175.6285, + "eval_samples_per_second": 6.391, + "eval_steps_per_second": 0.8, + "step": 32700 + }, + { + "epoch": 3.58, + "learning_rate": 4.1061871996505024e-05, + "loss": 0.2309, + "step": 32750 + }, + { + "epoch": 3.58, + "learning_rate": 4.104821974661424e-05, + "loss": 0.245, + "step": 32800 + }, + { + "epoch": 3.58, + "eval_cer": 0.05487251959855051, + "eval_loss": 0.30643877387046814, + "eval_runtime": 1166.4446, + "eval_samples_per_second": 6.441, + "eval_steps_per_second": 0.806, + "step": 32800 + }, + { + "epoch": 3.59, + "learning_rate": 4.103456749672346e-05, + "loss": 0.2258, + "step": 32850 + }, + { + "epoch": 3.59, + "learning_rate": 4.102091524683268e-05, + "loss": 0.2279, + "step": 32900 + }, + { + "epoch": 3.59, + "eval_cer": 0.053449036925003676, + "eval_loss": 0.3022903501987457, + "eval_runtime": 1152.2154, + "eval_samples_per_second": 6.52, + "eval_steps_per_second": 0.816, + "step": 32900 + }, + { + "epoch": 3.6, + "learning_rate": 4.1007262996941894e-05, + "loss": 0.2067, + "step": 32950 + }, + { + "epoch": 3.6, + "learning_rate": 4.099361074705112e-05, + "loss": 0.2144, + "step": 33000 + }, + { + "epoch": 3.6, + "eval_cer": 0.052997354922820546, + "eval_loss": 0.298502653837204, + "eval_runtime": 1158.0782, + "eval_samples_per_second": 6.487, + "eval_steps_per_second": 0.812, + "step": 33000 + }, + { + "epoch": 3.61, + "learning_rate": 4.097995849716034e-05, + "loss": 0.2372, + "step": 33050 + }, + { + "epoch": 3.62, + "learning_rate": 4.0966306247269555e-05, + "loss": 0.254, + "step": 33100 + }, + { + "epoch": 3.62, + "eval_cer": 0.05426685509562313, + "eval_loss": 0.30310696363449097, + "eval_runtime": 1161.4263, + "eval_samples_per_second": 6.469, + "eval_steps_per_second": 0.809, + "step": 33100 + }, + { + "epoch": 3.62, + "learning_rate": 4.095265399737877e-05, + "loss": 0.2082, + "step": 33150 + }, + { + "epoch": 3.63, + "learning_rate": 4.093900174748799e-05, + "loss": 0.2498, + "step": 33200 + }, + { + "epoch": 3.63, + "eval_cer": 0.05420526209532543, + "eval_loss": 0.29893621802330017, + "eval_runtime": 1174.6966, + "eval_samples_per_second": 6.396, + "eval_steps_per_second": 0.8, + "step": 33200 + }, + { + "epoch": 3.63, + "learning_rate": 4.09253494975972e-05, + "loss": 0.2326, + "step": 33250 + }, + { + "epoch": 3.64, + "learning_rate": 4.0911697247706425e-05, + "loss": 0.2402, + "step": 33300 + }, + { + "epoch": 3.64, + "eval_cer": 0.057024852775620125, + "eval_loss": 0.30172908306121826, + "eval_runtime": 1168.0386, + "eval_samples_per_second": 6.432, + "eval_steps_per_second": 0.805, + "step": 33300 + }, + { + "epoch": 3.64, + "learning_rate": 4.089804499781564e-05, + "loss": 0.2413, + "step": 33350 + }, + { + "epoch": 3.65, + "learning_rate": 4.088439274792486e-05, + "loss": 0.2352, + "step": 33400 + }, + { + "epoch": 3.65, + "eval_cer": 0.05724042827666207, + "eval_loss": 0.29973098635673523, + "eval_runtime": 1177.8105, + "eval_samples_per_second": 6.379, + "eval_steps_per_second": 0.798, + "step": 33400 + }, + { + "epoch": 3.65, + "learning_rate": 4.087074049803408e-05, + "loss": 0.2159, + "step": 33450 + }, + { + "epoch": 3.66, + "learning_rate": 4.0857088248143295e-05, + "loss": 0.2342, + "step": 33500 + }, + { + "epoch": 3.66, + "eval_cer": 0.05494095626554796, + "eval_loss": 0.3017541170120239, + "eval_runtime": 1163.4535, + "eval_samples_per_second": 6.457, + "eval_steps_per_second": 0.808, + "step": 33500 + }, + { + "epoch": 3.66, + "learning_rate": 4.084343599825252e-05, + "loss": 0.2366, + "step": 33550 + }, + { + "epoch": 3.67, + "learning_rate": 4.0829783748361734e-05, + "loss": 0.2155, + "step": 33600 + }, + { + "epoch": 3.67, + "eval_cer": 0.055628744768872265, + "eval_loss": 0.3011651635169983, + "eval_runtime": 1171.9956, + "eval_samples_per_second": 6.41, + "eval_steps_per_second": 0.802, + "step": 33600 + }, + { + "epoch": 3.68, + "learning_rate": 4.081613149847095e-05, + "loss": 0.2168, + "step": 33650 + }, + { + "epoch": 3.68, + "learning_rate": 4.0802479248580165e-05, + "loss": 0.219, + "step": 33700 + }, + { + "epoch": 3.68, + "eval_cer": 0.05231298825284611, + "eval_loss": 0.2997644245624542, + "eval_runtime": 1153.1499, + "eval_samples_per_second": 6.515, + "eval_steps_per_second": 0.815, + "step": 33700 + }, + { + "epoch": 3.69, + "learning_rate": 4.078882699868938e-05, + "loss": 0.2006, + "step": 33750 + }, + { + "epoch": 3.69, + "learning_rate": 4.0775174748798604e-05, + "loss": 0.2, + "step": 33800 + }, + { + "epoch": 3.69, + "eval_cer": 0.05348667709185227, + "eval_loss": 0.2957291901111603, + "eval_runtime": 1161.6858, + "eval_samples_per_second": 6.467, + "eval_steps_per_second": 0.809, + "step": 33800 + }, + { + "epoch": 3.7, + "learning_rate": 4.076152249890782e-05, + "loss": 0.2528, + "step": 33850 + }, + { + "epoch": 3.7, + "learning_rate": 4.074787024901704e-05, + "loss": 0.2247, + "step": 33900 + }, + { + "epoch": 3.7, + "eval_cer": 0.05367145609274537, + "eval_loss": 0.29320961236953735, + "eval_runtime": 1170.6891, + "eval_samples_per_second": 6.418, + "eval_steps_per_second": 0.803, + "step": 33900 + }, + { + "epoch": 3.71, + "learning_rate": 4.073421799912626e-05, + "loss": 0.2122, + "step": 33950 + }, + { + "epoch": 3.71, + "learning_rate": 4.072056574923548e-05, + "loss": 0.2386, + "step": 34000 + }, + { + "epoch": 3.71, + "eval_cer": 0.05359617575904818, + "eval_loss": 0.29429566860198975, + "eval_runtime": 1174.7513, + "eval_samples_per_second": 6.395, + "eval_steps_per_second": 0.8, + "step": 34000 + }, + { + "epoch": 3.72, + "learning_rate": 4.0706913499344696e-05, + "loss": 0.2547, + "step": 34050 + }, + { + "epoch": 3.72, + "learning_rate": 4.069326124945391e-05, + "loss": 0.2235, + "step": 34100 + }, + { + "epoch": 3.72, + "eval_cer": 0.05282284142197707, + "eval_loss": 0.293638676404953, + "eval_runtime": 1171.6751, + "eval_samples_per_second": 6.412, + "eval_steps_per_second": 0.802, + "step": 34100 + }, + { + "epoch": 3.73, + "learning_rate": 4.067960899956313e-05, + "loss": 0.2224, + "step": 34150 + }, + { + "epoch": 3.74, + "learning_rate": 4.0665956749672344e-05, + "loss": 0.2245, + "step": 34200 + }, + { + "epoch": 3.74, + "eval_cer": 0.052275348085997514, + "eval_loss": 0.30025115609169006, + "eval_runtime": 1146.2881, + "eval_samples_per_second": 6.554, + "eval_steps_per_second": 0.82, + "step": 34200 + }, + { + "epoch": 3.74, + "learning_rate": 4.0652304499781567e-05, + "loss": 0.2471, + "step": 34250 + }, + { + "epoch": 3.75, + "learning_rate": 4.063865224989078e-05, + "loss": 0.2363, + "step": 34300 + }, + { + "epoch": 3.75, + "eval_cer": 0.053336116424457895, + "eval_loss": 0.2921861410140991, + "eval_runtime": 1188.9193, + "eval_samples_per_second": 6.319, + "eval_steps_per_second": 0.791, + "step": 34300 + }, + { + "epoch": 3.75, + "learning_rate": 4.0625000000000005e-05, + "loss": 0.2122, + "step": 34350 + }, + { + "epoch": 3.76, + "learning_rate": 4.061134775010922e-05, + "loss": 0.2613, + "step": 34400 + }, + { + "epoch": 3.76, + "eval_cer": 0.05454060176361291, + "eval_loss": 0.29321932792663574, + "eval_runtime": 1219.9866, + "eval_samples_per_second": 6.158, + "eval_steps_per_second": 0.771, + "step": 34400 + }, + { + "epoch": 3.76, + "learning_rate": 4.0597695500218437e-05, + "loss": 0.2628, + "step": 34450 + }, + { + "epoch": 3.77, + "learning_rate": 4.058404325032766e-05, + "loss": 0.2226, + "step": 34500 + }, + { + "epoch": 3.77, + "eval_cer": 0.051833931583864006, + "eval_loss": 0.29715830087661743, + "eval_runtime": 1206.9387, + "eval_samples_per_second": 6.225, + "eval_steps_per_second": 0.779, + "step": 34500 + }, + { + "epoch": 3.77, + "learning_rate": 4.0570391000436875e-05, + "loss": 0.1874, + "step": 34550 + }, + { + "epoch": 3.78, + "learning_rate": 4.055673875054609e-05, + "loss": 0.244, + "step": 34600 + }, + { + "epoch": 3.78, + "eval_cer": 0.051714167416618474, + "eval_loss": 0.29057419300079346, + "eval_runtime": 1202.0051, + "eval_samples_per_second": 6.25, + "eval_steps_per_second": 0.782, + "step": 34600 + }, + { + "epoch": 3.78, + "learning_rate": 4.0543086500655307e-05, + "loss": 0.2246, + "step": 34650 + }, + { + "epoch": 3.79, + "learning_rate": 4.052943425076452e-05, + "loss": 0.2198, + "step": 34700 + }, + { + "epoch": 3.79, + "eval_cer": 0.05181340058376477, + "eval_loss": 0.2926875650882721, + "eval_runtime": 1199.7282, + "eval_samples_per_second": 6.262, + "eval_steps_per_second": 0.784, + "step": 34700 + }, + { + "epoch": 3.8, + "learning_rate": 4.0515782000873745e-05, + "loss": 0.2338, + "step": 34750 + }, + { + "epoch": 3.8, + "learning_rate": 4.050212975098297e-05, + "loss": 0.2304, + "step": 34800 + }, + { + "epoch": 3.8, + "eval_cer": 0.05333953825780777, + "eval_loss": 0.2972395420074463, + "eval_runtime": 1220.1743, + "eval_samples_per_second": 6.157, + "eval_steps_per_second": 0.77, + "step": 34800 + }, + { + "epoch": 3.81, + "learning_rate": 4.0488750546089996e-05, + "loss": 0.2541, + "step": 34850 + }, + { + "epoch": 3.81, + "learning_rate": 4.047509829619922e-05, + "loss": 0.2478, + "step": 34900 + }, + { + "epoch": 3.81, + "eval_cer": 0.052597000420885505, + "eval_loss": 0.2926911413669586, + "eval_runtime": 1192.6912, + "eval_samples_per_second": 6.299, + "eval_steps_per_second": 0.788, + "step": 34900 + }, + { + "epoch": 3.82, + "learning_rate": 4.0461446046308434e-05, + "loss": 0.224, + "step": 34950 + }, + { + "epoch": 3.82, + "learning_rate": 4.044779379641765e-05, + "loss": 0.2234, + "step": 35000 + }, + { + "epoch": 3.82, + "eval_cer": 0.05634048610564568, + "eval_loss": 0.29346540570259094, + "eval_runtime": 1213.2584, + "eval_samples_per_second": 6.192, + "eval_steps_per_second": 0.775, + "step": 35000 + }, + { + "epoch": 3.83, + "learning_rate": 4.043414154652687e-05, + "loss": 0.2257, + "step": 35050 + }, + { + "epoch": 3.83, + "learning_rate": 4.042048929663609e-05, + "loss": 0.2041, + "step": 35100 + }, + { + "epoch": 3.83, + "eval_cer": 0.05821907261472552, + "eval_loss": 0.2906344532966614, + "eval_runtime": 1243.2388, + "eval_samples_per_second": 6.043, + "eval_steps_per_second": 0.756, + "step": 35100 + }, + { + "epoch": 3.84, + "learning_rate": 4.0406837046745304e-05, + "loss": 0.2152, + "step": 35150 + }, + { + "epoch": 3.84, + "learning_rate": 4.039318479685452e-05, + "loss": 0.2172, + "step": 35200 + }, + { + "epoch": 3.84, + "eval_cer": 0.05438661926286866, + "eval_loss": 0.29344433546066284, + "eval_runtime": 1198.4162, + "eval_samples_per_second": 6.269, + "eval_steps_per_second": 0.784, + "step": 35200 + }, + { + "epoch": 3.85, + "learning_rate": 4.037953254696374e-05, + "loss": 0.2172, + "step": 35250 + }, + { + "epoch": 3.86, + "learning_rate": 4.036588029707296e-05, + "loss": 0.2321, + "step": 35300 + }, + { + "epoch": 3.86, + "eval_cer": 0.05137198408163126, + "eval_loss": 0.29439395666122437, + "eval_runtime": 1168.9807, + "eval_samples_per_second": 6.427, + "eval_steps_per_second": 0.804, + "step": 35300 + }, + { + "epoch": 3.86, + "learning_rate": 4.035222804718218e-05, + "loss": 0.2222, + "step": 35350 + }, + { + "epoch": 3.87, + "learning_rate": 4.03385757972914e-05, + "loss": 0.2207, + "step": 35400 + }, + { + "epoch": 3.87, + "eval_cer": 0.05017092057582612, + "eval_loss": 0.2857843339443207, + "eval_runtime": 1150.2318, + "eval_samples_per_second": 6.532, + "eval_steps_per_second": 0.817, + "step": 35400 + }, + { + "epoch": 3.87, + "learning_rate": 4.032492354740061e-05, + "loss": 0.2155, + "step": 35450 + }, + { + "epoch": 3.88, + "learning_rate": 4.0311271297509835e-05, + "loss": 0.2405, + "step": 35500 + }, + { + "epoch": 3.88, + "eval_cer": 0.051833931583864006, + "eval_loss": 0.28927794098854065, + "eval_runtime": 1156.4708, + "eval_samples_per_second": 6.496, + "eval_steps_per_second": 0.813, + "step": 35500 + }, + { + "epoch": 3.88, + "learning_rate": 4.029761904761905e-05, + "loss": 0.2211, + "step": 35550 + }, + { + "epoch": 3.89, + "learning_rate": 4.028396679772827e-05, + "loss": 0.2579, + "step": 35600 + }, + { + "epoch": 3.89, + "eval_cer": 0.05231641008619598, + "eval_loss": 0.28790414333343506, + "eval_runtime": 1154.5522, + "eval_samples_per_second": 6.507, + "eval_steps_per_second": 0.814, + "step": 35600 + }, + { + "epoch": 3.89, + "learning_rate": 4.027031454783748e-05, + "loss": 0.2331, + "step": 35650 + }, + { + "epoch": 3.9, + "learning_rate": 4.02566622979467e-05, + "loss": 0.2188, + "step": 35700 + }, + { + "epoch": 3.9, + "eval_cer": 0.05226850441929777, + "eval_loss": 0.2905963063240051, + "eval_runtime": 1151.8907, + "eval_samples_per_second": 6.522, + "eval_steps_per_second": 0.816, + "step": 35700 + }, + { + "epoch": 3.9, + "learning_rate": 4.024301004805592e-05, + "loss": 0.2364, + "step": 35750 + }, + { + "epoch": 3.91, + "learning_rate": 4.0229357798165144e-05, + "loss": 0.205, + "step": 35800 + }, + { + "epoch": 3.91, + "eval_cer": 0.053407974924805215, + "eval_loss": 0.2937009036540985, + "eval_runtime": 1188.6581, + "eval_samples_per_second": 6.321, + "eval_steps_per_second": 0.791, + "step": 35800 + }, + { + "epoch": 3.92, + "learning_rate": 4.021570554827436e-05, + "loss": 0.2184, + "step": 35850 + }, + { + "epoch": 3.92, + "learning_rate": 4.0202053298383575e-05, + "loss": 0.2432, + "step": 35900 + }, + { + "epoch": 3.92, + "eval_cer": 0.051584137749323335, + "eval_loss": 0.287141352891922, + "eval_runtime": 1147.8604, + "eval_samples_per_second": 6.545, + "eval_steps_per_second": 0.819, + "step": 35900 + }, + { + "epoch": 3.93, + "learning_rate": 4.018840104849279e-05, + "loss": 0.2394, + "step": 35950 + }, + { + "epoch": 3.93, + "learning_rate": 4.0174748798602014e-05, + "loss": 0.2324, + "step": 36000 + }, + { + "epoch": 3.93, + "eval_cer": 0.050954520412946847, + "eval_loss": 0.2883908152580261, + "eval_runtime": 1153.0185, + "eval_samples_per_second": 6.516, + "eval_steps_per_second": 0.815, + "step": 36000 + }, + { + "epoch": 3.94, + "learning_rate": 4.016109654871123e-05, + "loss": 0.2182, + "step": 36050 + }, + { + "epoch": 3.94, + "learning_rate": 4.0147444298820445e-05, + "loss": 0.22, + "step": 36100 + }, + { + "epoch": 3.94, + "eval_cer": 0.0522650825859479, + "eval_loss": 0.2854180932044983, + "eval_runtime": 1185.1981, + "eval_samples_per_second": 6.339, + "eval_steps_per_second": 0.793, + "step": 36100 + }, + { + "epoch": 3.95, + "learning_rate": 4.013379204892966e-05, + "loss": 0.2301, + "step": 36150 + }, + { + "epoch": 3.95, + "learning_rate": 4.0120139799038884e-05, + "loss": 0.2026, + "step": 36200 + }, + { + "epoch": 3.95, + "eval_cer": 0.051697058249869116, + "eval_loss": 0.2849681079387665, + "eval_runtime": 1167.8292, + "eval_samples_per_second": 6.433, + "eval_steps_per_second": 0.805, + "step": 36200 + }, + { + "epoch": 3.96, + "learning_rate": 4.01064875491481e-05, + "loss": 0.2268, + "step": 36250 + }, + { + "epoch": 3.96, + "learning_rate": 4.009283529925732e-05, + "loss": 0.2164, + "step": 36300 + }, + { + "epoch": 3.96, + "eval_cer": 0.050372808743468576, + "eval_loss": 0.28152966499328613, + "eval_runtime": 1156.0778, + "eval_samples_per_second": 6.499, + "eval_steps_per_second": 0.813, + "step": 36300 + }, + { + "epoch": 3.97, + "learning_rate": 4.007918304936654e-05, + "loss": 0.2254, + "step": 36350 + }, + { + "epoch": 3.98, + "learning_rate": 4.0065530799475754e-05, + "loss": 0.2153, + "step": 36400 + }, + { + "epoch": 3.98, + "eval_cer": 0.05203924158485634, + "eval_loss": 0.2812017798423767, + "eval_runtime": 1170.4653, + "eval_samples_per_second": 6.419, + "eval_steps_per_second": 0.803, + "step": 36400 + }, + { + "epoch": 3.98, + "learning_rate": 4.0051878549584976e-05, + "loss": 0.2031, + "step": 36450 + }, + { + "epoch": 3.99, + "learning_rate": 4.003822629969419e-05, + "loss": 0.2128, + "step": 36500 + }, + { + "epoch": 3.99, + "eval_cer": 0.05301788592291978, + "eval_loss": 0.2868877649307251, + "eval_runtime": 1161.9761, + "eval_samples_per_second": 6.466, + "eval_steps_per_second": 0.809, + "step": 36500 + }, + { + "epoch": 3.99, + "learning_rate": 4.002457404980341e-05, + "loss": 0.2131, + "step": 36550 + }, + { + "epoch": 4.0, + "learning_rate": 4.0010921799912624e-05, + "loss": 0.2209, + "step": 36600 + }, + { + "epoch": 4.0, + "eval_cer": 0.050047734575230714, + "eval_loss": 0.28464749455451965, + "eval_runtime": 1152.6846, + "eval_samples_per_second": 6.518, + "eval_steps_per_second": 0.815, + "step": 36600 + }, + { + "epoch": 4.0, + "learning_rate": 3.9997269550021846e-05, + "loss": 0.1963, + "step": 36650 + }, + { + "epoch": 4.01, + "learning_rate": 3.998361730013106e-05, + "loss": 0.1744, + "step": 36700 + }, + { + "epoch": 4.01, + "eval_cer": 0.055201015600138244, + "eval_loss": 0.29001498222351074, + "eval_runtime": 1191.6495, + "eval_samples_per_second": 6.305, + "eval_steps_per_second": 0.789, + "step": 36700 + }, + { + "epoch": 4.01, + "learning_rate": 3.9969965050240285e-05, + "loss": 0.1677, + "step": 36750 + }, + { + "epoch": 4.02, + "learning_rate": 3.99563128003495e-05, + "loss": 0.1774, + "step": 36800 + }, + { + "epoch": 4.02, + "eval_cer": 0.05026331007627267, + "eval_loss": 0.2934856712818146, + "eval_runtime": 1168.1862, + "eval_samples_per_second": 6.431, + "eval_steps_per_second": 0.805, + "step": 36800 + }, + { + "epoch": 4.02, + "learning_rate": 3.9942660550458716e-05, + "loss": 0.1708, + "step": 36850 + }, + { + "epoch": 4.03, + "learning_rate": 3.992900830056793e-05, + "loss": 0.1749, + "step": 36900 + }, + { + "epoch": 4.03, + "eval_cer": 0.05236431575309419, + "eval_loss": 0.2882940471172333, + "eval_runtime": 1173.1669, + "eval_samples_per_second": 6.404, + "eval_steps_per_second": 0.801, + "step": 36900 + }, + { + "epoch": 4.04, + "learning_rate": 3.9915356050677155e-05, + "loss": 0.159, + "step": 36950 + }, + { + "epoch": 4.04, + "learning_rate": 3.990170380078637e-05, + "loss": 0.166, + "step": 37000 + }, + { + "epoch": 4.04, + "eval_cer": 0.05076974141205375, + "eval_loss": 0.28926318883895874, + "eval_runtime": 1465.1032, + "eval_samples_per_second": 5.128, + "eval_steps_per_second": 0.642, + "step": 37000 + }, + { + "epoch": 4.05, + "learning_rate": 3.9888051550895587e-05, + "loss": 0.1893, + "step": 37050 + }, + { + "epoch": 4.05, + "learning_rate": 3.98743993010048e-05, + "loss": 0.184, + "step": 37100 + }, + { + "epoch": 4.05, + "eval_cer": 0.050547322244312055, + "eval_loss": 0.28999829292297363, + "eval_runtime": 1279.6549, + "eval_samples_per_second": 5.871, + "eval_steps_per_second": 0.735, + "step": 37100 + }, + { + "epoch": 4.06, + "learning_rate": 3.9860747051114025e-05, + "loss": 0.1672, + "step": 37150 + }, + { + "epoch": 4.06, + "learning_rate": 3.984709480122325e-05, + "loss": 0.1614, + "step": 37200 + }, + { + "epoch": 4.06, + "eval_cer": 0.05136856224828138, + "eval_loss": 0.28663578629493713, + "eval_runtime": 1454.1636, + "eval_samples_per_second": 5.167, + "eval_steps_per_second": 0.646, + "step": 37200 + }, + { + "epoch": 4.07, + "learning_rate": 3.983344255133246e-05, + "loss": 0.1651, + "step": 37250 + }, + { + "epoch": 4.07, + "learning_rate": 3.981979030144168e-05, + "loss": 0.1842, + "step": 37300 + }, + { + "epoch": 4.07, + "eval_cer": 0.05069788291170643, + "eval_loss": 0.29018309712409973, + "eval_runtime": 1179.9051, + "eval_samples_per_second": 6.367, + "eval_steps_per_second": 0.797, + "step": 37300 + }, + { + "epoch": 4.08, + "learning_rate": 3.9806138051550895e-05, + "loss": 0.1817, + "step": 37350 + }, + { + "epoch": 4.08, + "learning_rate": 3.979248580166012e-05, + "loss": 0.1681, + "step": 37400 + }, + { + "epoch": 4.08, + "eval_cer": 0.0527201864214809, + "eval_loss": 0.29232168197631836, + "eval_runtime": 1152.2145, + "eval_samples_per_second": 6.52, + "eval_steps_per_second": 0.816, + "step": 37400 + }, + { + "epoch": 4.09, + "learning_rate": 3.9778833551769333e-05, + "loss": 0.1687, + "step": 37450 + }, + { + "epoch": 4.1, + "learning_rate": 3.976518130187855e-05, + "loss": 0.1613, + "step": 37500 + }, + { + "epoch": 4.1, + "eval_cer": 0.05114272124718982, + "eval_loss": 0.28755250573158264, + "eval_runtime": 1157.0153, + "eval_samples_per_second": 6.493, + "eval_steps_per_second": 0.812, + "step": 37500 + }, + { + "epoch": 4.1, + "learning_rate": 3.9751529051987765e-05, + "loss": 0.183, + "step": 37550 + }, + { + "epoch": 4.11, + "learning_rate": 3.973787680209699e-05, + "loss": 0.1678, + "step": 37600 + }, + { + "epoch": 4.11, + "eval_cer": 0.051245376247685985, + "eval_loss": 0.2812196612358093, + "eval_runtime": 1157.3725, + "eval_samples_per_second": 6.491, + "eval_steps_per_second": 0.812, + "step": 37600 + }, + { + "epoch": 4.11, + "learning_rate": 3.972422455220621e-05, + "loss": 0.171, + "step": 37650 + }, + { + "epoch": 4.12, + "learning_rate": 3.9710572302315426e-05, + "loss": 0.1763, + "step": 37700 + }, + { + "epoch": 4.12, + "eval_cer": 0.050886083745949405, + "eval_loss": 0.2909301817417145, + "eval_runtime": 1146.6592, + "eval_samples_per_second": 6.552, + "eval_steps_per_second": 0.82, + "step": 37700 + }, + { + "epoch": 4.12, + "learning_rate": 3.969692005242464e-05, + "loss": 0.1461, + "step": 37750 + }, + { + "epoch": 4.13, + "learning_rate": 3.968326780253386e-05, + "loss": 0.1714, + "step": 37800 + }, + { + "epoch": 4.13, + "eval_cer": 0.05107770641354225, + "eval_loss": 0.28945988416671753, + "eval_runtime": 1206.247, + "eval_samples_per_second": 6.228, + "eval_steps_per_second": 0.779, + "step": 37800 + }, + { + "epoch": 4.13, + "learning_rate": 3.966961555264308e-05, + "loss": 0.1786, + "step": 37850 + }, + { + "epoch": 4.14, + "learning_rate": 3.9655963302752296e-05, + "loss": 0.1665, + "step": 37900 + }, + { + "epoch": 4.14, + "eval_cer": 0.04967817657344452, + "eval_loss": 0.2878923714160919, + "eval_runtime": 1367.4992, + "eval_samples_per_second": 5.494, + "eval_steps_per_second": 0.687, + "step": 37900 + }, + { + "epoch": 4.14, + "learning_rate": 3.964231105286151e-05, + "loss": 0.1629, + "step": 37950 + }, + { + "epoch": 4.15, + "learning_rate": 3.962865880297073e-05, + "loss": 0.1744, + "step": 38000 + }, + { + "epoch": 4.15, + "eval_cer": 0.05413340359497812, + "eval_loss": 0.28816238045692444, + "eval_runtime": 1362.8508, + "eval_samples_per_second": 5.513, + "eval_steps_per_second": 0.69, + "step": 38000 + }, + { + "epoch": 4.16, + "learning_rate": 3.961500655307995e-05, + "loss": 0.1571, + "step": 38050 + }, + { + "epoch": 4.16, + "learning_rate": 3.960135430318917e-05, + "loss": 0.1769, + "step": 38100 + }, + { + "epoch": 4.16, + "eval_cer": 0.05594697527041038, + "eval_loss": 0.28923317790031433, + "eval_runtime": 1450.036, + "eval_samples_per_second": 5.181, + "eval_steps_per_second": 0.648, + "step": 38100 + }, + { + "epoch": 4.17, + "learning_rate": 3.958770205329839e-05, + "loss": 0.1781, + "step": 38150 + }, + { + "epoch": 4.17, + "learning_rate": 3.9574049803407605e-05, + "loss": 0.1769, + "step": 38200 + }, + { + "epoch": 4.17, + "eval_cer": 0.05179629141701541, + "eval_loss": 0.28494036197662354, + "eval_runtime": 1181.7013, + "eval_samples_per_second": 6.358, + "eval_steps_per_second": 0.795, + "step": 38200 + }, + { + "epoch": 4.18, + "learning_rate": 3.956039755351682e-05, + "loss": 0.18, + "step": 38250 + }, + { + "epoch": 4.18, + "learning_rate": 3.9546745303626036e-05, + "loss": 0.1846, + "step": 38300 + }, + { + "epoch": 4.18, + "eval_cer": 0.0502462009095233, + "eval_loss": 0.2893485128879547, + "eval_runtime": 1168.764, + "eval_samples_per_second": 6.428, + "eval_steps_per_second": 0.804, + "step": 38300 + }, + { + "epoch": 4.19, + "learning_rate": 3.953309305373526e-05, + "loss": 0.1811, + "step": 38350 + }, + { + "epoch": 4.19, + "learning_rate": 3.9519440803844475e-05, + "loss": 0.1944, + "step": 38400 + }, + { + "epoch": 4.19, + "eval_cer": 0.05107428458019238, + "eval_loss": 0.28696149587631226, + "eval_runtime": 1172.9327, + "eval_samples_per_second": 6.405, + "eval_steps_per_second": 0.801, + "step": 38400 + }, + { + "epoch": 4.2, + "learning_rate": 3.950578855395369e-05, + "loss": 0.1608, + "step": 38450 + }, + { + "epoch": 4.2, + "learning_rate": 3.949213630406291e-05, + "loss": 0.1626, + "step": 38500 + }, + { + "epoch": 4.2, + "eval_cer": 0.049335993238457304, + "eval_loss": 0.2923007607460022, + "eval_runtime": 1166.3295, + "eval_samples_per_second": 6.442, + "eval_steps_per_second": 0.806, + "step": 38500 + }, + { + "epoch": 4.21, + "learning_rate": 3.947848405417213e-05, + "loss": 0.1567, + "step": 38550 + }, + { + "epoch": 4.22, + "learning_rate": 3.946483180428135e-05, + "loss": 0.1635, + "step": 38600 + }, + { + "epoch": 4.22, + "eval_cer": 0.056754527940980216, + "eval_loss": 0.29253220558166504, + "eval_runtime": 1320.7091, + "eval_samples_per_second": 5.689, + "eval_steps_per_second": 0.712, + "step": 38600 + }, + { + "epoch": 4.22, + "learning_rate": 3.945117955439057e-05, + "loss": 0.1848, + "step": 38650 + }, + { + "epoch": 4.23, + "learning_rate": 3.943752730449978e-05, + "loss": 0.1633, + "step": 38700 + }, + { + "epoch": 4.23, + "eval_cer": 0.04997245424153353, + "eval_loss": 0.28573524951934814, + "eval_runtime": 1459.4135, + "eval_samples_per_second": 5.148, + "eval_steps_per_second": 0.644, + "step": 38700 + }, + { + "epoch": 4.23, + "learning_rate": 3.9423875054609e-05, + "loss": 0.175, + "step": 38750 + }, + { + "epoch": 4.24, + "learning_rate": 3.941022280471822e-05, + "loss": 0.1895, + "step": 38800 + }, + { + "epoch": 4.24, + "eval_cer": 0.0490040754035197, + "eval_loss": 0.2857818901538849, + "eval_runtime": 1374.6623, + "eval_samples_per_second": 5.465, + "eval_steps_per_second": 0.684, + "step": 38800 + }, + { + "epoch": 4.24, + "learning_rate": 3.939657055482744e-05, + "loss": 0.156, + "step": 38850 + }, + { + "epoch": 4.25, + "learning_rate": 3.938291830493665e-05, + "loss": 0.1762, + "step": 38900 + }, + { + "epoch": 4.25, + "eval_cer": 0.04956525607289874, + "eval_loss": 0.28907591104507446, + "eval_runtime": 1452.6364, + "eval_samples_per_second": 5.172, + "eval_steps_per_second": 0.647, + "step": 38900 + }, + { + "epoch": 4.25, + "learning_rate": 3.9369266055045876e-05, + "loss": 0.1567, + "step": 38950 + }, + { + "epoch": 4.26, + "learning_rate": 3.935561380515509e-05, + "loss": 0.1567, + "step": 39000 + }, + { + "epoch": 4.26, + "eval_cer": 0.049517350406000527, + "eval_loss": 0.28758588433265686, + "eval_runtime": 1139.6582, + "eval_samples_per_second": 6.592, + "eval_steps_per_second": 0.825, + "step": 39000 + }, + { + "epoch": 4.26, + "learning_rate": 3.9341961555264314e-05, + "loss": 0.1706, + "step": 39050 + }, + { + "epoch": 4.27, + "learning_rate": 3.932830930537353e-05, + "loss": 0.1542, + "step": 39100 + }, + { + "epoch": 4.27, + "eval_cer": 0.05102980074664404, + "eval_loss": 0.2869040369987488, + "eval_runtime": 1158.6876, + "eval_samples_per_second": 6.484, + "eval_steps_per_second": 0.811, + "step": 39100 + }, + { + "epoch": 4.28, + "learning_rate": 3.9314657055482746e-05, + "loss": 0.1741, + "step": 39150 + }, + { + "epoch": 4.28, + "learning_rate": 3.930100480559196e-05, + "loss": 0.178, + "step": 39200 + }, + { + "epoch": 4.28, + "eval_cer": 0.049780831573940684, + "eval_loss": 0.28901004791259766, + "eval_runtime": 1161.4194, + "eval_samples_per_second": 6.469, + "eval_steps_per_second": 0.809, + "step": 39200 + }, + { + "epoch": 4.29, + "learning_rate": 3.928735255570118e-05, + "loss": 0.1832, + "step": 39250 + }, + { + "epoch": 4.29, + "learning_rate": 3.92737003058104e-05, + "loss": 0.1613, + "step": 39300 + }, + { + "epoch": 4.29, + "eval_cer": 0.05036254324341896, + "eval_loss": 0.2918330132961273, + "eval_runtime": 1156.9299, + "eval_samples_per_second": 6.494, + "eval_steps_per_second": 0.812, + "step": 39300 + }, + { + "epoch": 4.3, + "learning_rate": 3.9260048055919616e-05, + "loss": 0.1793, + "step": 39350 + }, + { + "epoch": 4.3, + "learning_rate": 3.924639580602884e-05, + "loss": 0.1801, + "step": 39400 + }, + { + "epoch": 4.3, + "eval_cer": 0.050280419243022026, + "eval_loss": 0.28874126076698303, + "eval_runtime": 1174.6787, + "eval_samples_per_second": 6.396, + "eval_steps_per_second": 0.8, + "step": 39400 + }, + { + "epoch": 4.31, + "learning_rate": 3.9232743556138054e-05, + "loss": 0.1789, + "step": 39450 + }, + { + "epoch": 4.31, + "learning_rate": 3.921909130624727e-05, + "loss": 0.1885, + "step": 39500 + }, + { + "epoch": 4.31, + "eval_cer": 0.05452691443021342, + "eval_loss": 0.282078355550766, + "eval_runtime": 1194.6585, + "eval_samples_per_second": 6.289, + "eval_steps_per_second": 0.787, + "step": 39500 + }, + { + "epoch": 4.32, + "learning_rate": 3.920543905635649e-05, + "loss": 0.1593, + "step": 39550 + }, + { + "epoch": 4.33, + "learning_rate": 3.919178680646571e-05, + "loss": 0.1773, + "step": 39600 + }, + { + "epoch": 4.33, + "eval_cer": 0.05213163108530288, + "eval_loss": 0.2819925844669342, + "eval_runtime": 1165.2004, + "eval_samples_per_second": 6.448, + "eval_steps_per_second": 0.807, + "step": 39600 + }, + { + "epoch": 4.33, + "learning_rate": 3.9178134556574924e-05, + "loss": 0.1718, + "step": 39650 + }, + { + "epoch": 4.34, + "learning_rate": 3.916448230668414e-05, + "loss": 0.1718, + "step": 39700 + }, + { + "epoch": 4.34, + "eval_cer": 0.049561834239548866, + "eval_loss": 0.2793809175491333, + "eval_runtime": 1157.9372, + "eval_samples_per_second": 6.488, + "eval_steps_per_second": 0.812, + "step": 39700 + }, + { + "epoch": 4.34, + "learning_rate": 3.915083005679336e-05, + "loss": 0.1726, + "step": 39750 + }, + { + "epoch": 4.35, + "learning_rate": 3.913717780690258e-05, + "loss": 0.1756, + "step": 39800 + }, + { + "epoch": 4.35, + "eval_cer": 0.04887746756957442, + "eval_loss": 0.2796994745731354, + "eval_runtime": 1151.5087, + "eval_samples_per_second": 6.524, + "eval_steps_per_second": 0.816, + "step": 39800 + }, + { + "epoch": 4.35, + "learning_rate": 3.9123525557011794e-05, + "loss": 0.1565, + "step": 39850 + }, + { + "epoch": 4.36, + "learning_rate": 3.910987330712102e-05, + "loss": 0.1785, + "step": 39900 + }, + { + "epoch": 4.36, + "eval_cer": 0.052774935755078856, + "eval_loss": 0.28161171078681946, + "eval_runtime": 1287.3124, + "eval_samples_per_second": 5.836, + "eval_steps_per_second": 0.73, + "step": 39900 + }, + { + "epoch": 4.36, + "learning_rate": 3.909622105723023e-05, + "loss": 0.1682, + "step": 39950 + }, + { + "epoch": 4.37, + "learning_rate": 3.9082568807339455e-05, + "loss": 0.1748, + "step": 40000 + }, + { + "epoch": 4.37, + "eval_cer": 0.05262437508768448, + "eval_loss": 0.2795943319797516, + "eval_runtime": 1472.9079, + "eval_samples_per_second": 5.101, + "eval_steps_per_second": 0.638, + "step": 40000 + }, + { + "epoch": 4.37, + "learning_rate": 3.906891655744867e-05, + "loss": 0.1823, + "step": 40050 + }, + { + "epoch": 4.38, + "learning_rate": 3.905526430755789e-05, + "loss": 0.1526, + "step": 40100 + }, + { + "epoch": 4.38, + "eval_cer": 0.048610564568284394, + "eval_loss": 0.2830022871494293, + "eval_runtime": 1231.8361, + "eval_samples_per_second": 6.099, + "eval_steps_per_second": 0.763, + "step": 40100 + }, + { + "epoch": 4.39, + "learning_rate": 3.90416120576671e-05, + "loss": 0.1638, + "step": 40150 + }, + { + "epoch": 4.39, + "learning_rate": 3.902795980777632e-05, + "loss": 0.1873, + "step": 40200 + }, + { + "epoch": 4.39, + "eval_cer": 0.053401131258105465, + "eval_loss": 0.2826146185398102, + "eval_runtime": 1183.8819, + "eval_samples_per_second": 6.346, + "eval_steps_per_second": 0.794, + "step": 40200 + }, + { + "epoch": 4.4, + "learning_rate": 3.901430755788554e-05, + "loss": 0.1605, + "step": 40250 + }, + { + "epoch": 4.4, + "learning_rate": 3.900065530799476e-05, + "loss": 0.1576, + "step": 40300 + }, + { + "epoch": 4.4, + "eval_cer": 0.04860714273493452, + "eval_loss": 0.28039631247520447, + "eval_runtime": 1139.9713, + "eval_samples_per_second": 6.591, + "eval_steps_per_second": 0.825, + "step": 40300 + }, + { + "epoch": 4.41, + "learning_rate": 3.898700305810398e-05, + "loss": 0.1778, + "step": 40350 + }, + { + "epoch": 4.41, + "learning_rate": 3.8973350808213195e-05, + "loss": 0.1715, + "step": 40400 + }, + { + "epoch": 4.41, + "eval_cer": 0.049062246570467524, + "eval_loss": 0.27709659934043884, + "eval_runtime": 1152.7021, + "eval_samples_per_second": 6.518, + "eval_steps_per_second": 0.815, + "step": 40400 + }, + { + "epoch": 4.42, + "learning_rate": 3.895969855832242e-05, + "loss": 0.1774, + "step": 40450 + }, + { + "epoch": 4.42, + "learning_rate": 3.8946046308431634e-05, + "loss": 0.1675, + "step": 40500 + }, + { + "epoch": 4.42, + "eval_cer": 0.047919354231610214, + "eval_loss": 0.280836284160614, + "eval_runtime": 1141.4777, + "eval_samples_per_second": 6.582, + "eval_steps_per_second": 0.823, + "step": 40500 + }, + { + "epoch": 4.43, + "learning_rate": 3.893239405854085e-05, + "loss": 0.1776, + "step": 40550 + }, + { + "epoch": 4.43, + "learning_rate": 3.8918741808650065e-05, + "loss": 0.1735, + "step": 40600 + }, + { + "epoch": 4.43, + "eval_cer": 0.050119593075578034, + "eval_loss": 0.28058889508247375, + "eval_runtime": 1152.5493, + "eval_samples_per_second": 6.519, + "eval_steps_per_second": 0.816, + "step": 40600 + }, + { + "epoch": 4.44, + "learning_rate": 3.890508955875928e-05, + "loss": 0.1555, + "step": 40650 + }, + { + "epoch": 4.45, + "learning_rate": 3.8891437308868504e-05, + "loss": 0.1462, + "step": 40700 + }, + { + "epoch": 4.45, + "eval_cer": 0.050150389575726886, + "eval_loss": 0.2807774245738983, + "eval_runtime": 1157.0666, + "eval_samples_per_second": 6.493, + "eval_steps_per_second": 0.812, + "step": 40700 + }, + { + "epoch": 4.45, + "learning_rate": 3.887778505897772e-05, + "loss": 0.1823, + "step": 40750 + }, + { + "epoch": 4.46, + "learning_rate": 3.886413280908694e-05, + "loss": 0.1817, + "step": 40800 + }, + { + "epoch": 4.46, + "eval_cer": 0.04801516556540664, + "eval_loss": 0.27960580587387085, + "eval_runtime": 1144.0954, + "eval_samples_per_second": 6.567, + "eval_steps_per_second": 0.822, + "step": 40800 + }, + { + "epoch": 4.46, + "learning_rate": 3.885048055919616e-05, + "loss": 0.1831, + "step": 40850 + }, + { + "epoch": 4.47, + "learning_rate": 3.8836828309305374e-05, + "loss": 0.1561, + "step": 40900 + }, + { + "epoch": 4.47, + "eval_cer": 0.04895959156997136, + "eval_loss": 0.2814878821372986, + "eval_runtime": 1156.0109, + "eval_samples_per_second": 6.499, + "eval_steps_per_second": 0.813, + "step": 40900 + }, + { + "epoch": 4.47, + "learning_rate": 3.882344910441241e-05, + "loss": 0.1673, + "step": 40950 + }, + { + "epoch": 4.48, + "learning_rate": 3.880979685452163e-05, + "loss": 0.2072, + "step": 41000 + }, + { + "epoch": 4.48, + "eval_cer": 0.05174496391676733, + "eval_loss": 0.28528130054473877, + "eval_runtime": 1279.9842, + "eval_samples_per_second": 5.87, + "eval_steps_per_second": 0.734, + "step": 41000 + }, + { + "epoch": 4.48, + "learning_rate": 3.879614460463085e-05, + "loss": 0.1719, + "step": 41050 + }, + { + "epoch": 4.49, + "learning_rate": 3.878249235474006e-05, + "loss": 0.1675, + "step": 41100 + }, + { + "epoch": 4.49, + "eval_cer": 0.04981162807408954, + "eval_loss": 0.2832522988319397, + "eval_runtime": 1157.4874, + "eval_samples_per_second": 6.491, + "eval_steps_per_second": 0.812, + "step": 41100 + }, + { + "epoch": 4.49, + "learning_rate": 3.876884010484928e-05, + "loss": 0.1789, + "step": 41150 + }, + { + "epoch": 4.5, + "learning_rate": 3.8755187854958495e-05, + "loss": 0.2017, + "step": 41200 + }, + { + "epoch": 4.5, + "eval_cer": 0.04828549040004654, + "eval_loss": 0.28346341848373413, + "eval_runtime": 1137.5974, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.826, + "step": 41200 + }, + { + "epoch": 4.51, + "learning_rate": 3.874153560506772e-05, + "loss": 0.1719, + "step": 41250 + }, + { + "epoch": 4.51, + "learning_rate": 3.872788335517693e-05, + "loss": 0.2023, + "step": 41300 + }, + { + "epoch": 4.51, + "eval_cer": 0.05051310391081334, + "eval_loss": 0.27435365319252014, + "eval_runtime": 1287.7747, + "eval_samples_per_second": 5.834, + "eval_steps_per_second": 0.73, + "step": 41300 + }, + { + "epoch": 4.52, + "learning_rate": 3.8714231105286156e-05, + "loss": 0.1624, + "step": 41350 + }, + { + "epoch": 4.52, + "learning_rate": 3.870057885539537e-05, + "loss": 0.1696, + "step": 41400 + }, + { + "epoch": 4.52, + "eval_cer": 0.047361595395581046, + "eval_loss": 0.2742946147918701, + "eval_runtime": 1143.0851, + "eval_samples_per_second": 6.573, + "eval_steps_per_second": 0.822, + "step": 41400 + }, + { + "epoch": 4.53, + "learning_rate": 3.868692660550459e-05, + "loss": 0.1732, + "step": 41450 + }, + { + "epoch": 4.53, + "learning_rate": 3.867327435561381e-05, + "loss": 0.1709, + "step": 41500 + }, + { + "epoch": 4.53, + "eval_cer": 0.04716655089463833, + "eval_loss": 0.27990689873695374, + "eval_runtime": 1145.7372, + "eval_samples_per_second": 6.557, + "eval_steps_per_second": 0.82, + "step": 41500 + }, + { + "epoch": 4.54, + "learning_rate": 3.8659622105723026e-05, + "loss": 0.1567, + "step": 41550 + }, + { + "epoch": 4.54, + "learning_rate": 3.864596985583224e-05, + "loss": 0.1785, + "step": 41600 + }, + { + "epoch": 4.54, + "eval_cer": 0.04891168590307315, + "eval_loss": 0.2770211398601532, + "eval_runtime": 1150.6726, + "eval_samples_per_second": 6.529, + "eval_steps_per_second": 0.817, + "step": 41600 + }, + { + "epoch": 4.55, + "learning_rate": 3.863231760594146e-05, + "loss": 0.1767, + "step": 41650 + }, + { + "epoch": 4.55, + "learning_rate": 3.861866535605068e-05, + "loss": 0.1923, + "step": 41700 + }, + { + "epoch": 4.55, + "eval_cer": 0.04813492973265216, + "eval_loss": 0.27829426527023315, + "eval_runtime": 1159.8988, + "eval_samples_per_second": 6.477, + "eval_steps_per_second": 0.81, + "step": 41700 + }, + { + "epoch": 4.56, + "learning_rate": 3.8605013106159896e-05, + "loss": 0.1827, + "step": 41750 + }, + { + "epoch": 4.57, + "learning_rate": 3.859136085626912e-05, + "loss": 0.2008, + "step": 41800 + }, + { + "epoch": 4.57, + "eval_cer": 0.04740950106247926, + "eval_loss": 0.27811819314956665, + "eval_runtime": 1139.7714, + "eval_samples_per_second": 6.592, + "eval_steps_per_second": 0.825, + "step": 41800 + }, + { + "epoch": 4.57, + "learning_rate": 3.8577708606378334e-05, + "loss": 0.1752, + "step": 41850 + }, + { + "epoch": 4.58, + "learning_rate": 3.856405635648755e-05, + "loss": 0.1746, + "step": 41900 + }, + { + "epoch": 4.58, + "eval_cer": 0.047378704562330405, + "eval_loss": 0.2777942717075348, + "eval_runtime": 1136.9039, + "eval_samples_per_second": 6.608, + "eval_steps_per_second": 0.827, + "step": 41900 + }, + { + "epoch": 4.58, + "learning_rate": 3.855040410659677e-05, + "loss": 0.1668, + "step": 41950 + }, + { + "epoch": 4.59, + "learning_rate": 3.853675185670599e-05, + "loss": 0.1732, + "step": 42000 + }, + { + "epoch": 4.59, + "eval_cer": 0.05069446107835656, + "eval_loss": 0.2736971378326416, + "eval_runtime": 1150.4296, + "eval_samples_per_second": 6.531, + "eval_steps_per_second": 0.817, + "step": 42000 + }, + { + "epoch": 4.59, + "learning_rate": 3.8523099606815204e-05, + "loss": 0.1847, + "step": 42050 + }, + { + "epoch": 4.6, + "learning_rate": 3.850944735692442e-05, + "loss": 0.1883, + "step": 42100 + }, + { + "epoch": 4.6, + "eval_cer": 0.05216242758545173, + "eval_loss": 0.2768714427947998, + "eval_runtime": 1164.9599, + "eval_samples_per_second": 6.449, + "eval_steps_per_second": 0.807, + "step": 42100 + }, + { + "epoch": 4.6, + "learning_rate": 3.8495795107033636e-05, + "loss": 0.1605, + "step": 42150 + }, + { + "epoch": 4.61, + "learning_rate": 3.848214285714286e-05, + "loss": 0.1462, + "step": 42200 + }, + { + "epoch": 4.61, + "eval_cer": 0.05187841541741234, + "eval_loss": 0.27714163064956665, + "eval_runtime": 1164.401, + "eval_samples_per_second": 6.452, + "eval_steps_per_second": 0.807, + "step": 42200 + }, + { + "epoch": 4.61, + "learning_rate": 3.846849060725208e-05, + "loss": 0.1724, + "step": 42250 + }, + { + "epoch": 4.62, + "learning_rate": 3.84548383573613e-05, + "loss": 0.1813, + "step": 42300 + }, + { + "epoch": 4.62, + "eval_cer": 0.048521596901187715, + "eval_loss": 0.27737104892730713, + "eval_runtime": 1153.5255, + "eval_samples_per_second": 6.513, + "eval_steps_per_second": 0.815, + "step": 42300 + }, + { + "epoch": 4.63, + "learning_rate": 3.844118610747051e-05, + "loss": 0.1676, + "step": 42350 + }, + { + "epoch": 4.63, + "learning_rate": 3.842753385757973e-05, + "loss": 0.2156, + "step": 42400 + }, + { + "epoch": 4.63, + "eval_cer": 0.04726236222843475, + "eval_loss": 0.2811882495880127, + "eval_runtime": 1139.084, + "eval_samples_per_second": 6.596, + "eval_steps_per_second": 0.825, + "step": 42400 + }, + { + "epoch": 4.64, + "learning_rate": 3.841388160768895e-05, + "loss": 0.175, + "step": 42450 + }, + { + "epoch": 4.64, + "learning_rate": 3.840022935779817e-05, + "loss": 0.1854, + "step": 42500 + }, + { + "epoch": 4.64, + "eval_cer": 0.04832997423359488, + "eval_loss": 0.27360132336616516, + "eval_runtime": 1176.5116, + "eval_samples_per_second": 6.386, + "eval_steps_per_second": 0.799, + "step": 42500 + }, + { + "epoch": 4.65, + "learning_rate": 3.838657710790738e-05, + "loss": 0.1697, + "step": 42550 + }, + { + "epoch": 4.65, + "learning_rate": 3.83729248580166e-05, + "loss": 0.1728, + "step": 42600 + }, + { + "epoch": 4.65, + "eval_cer": 0.04801174373205676, + "eval_loss": 0.2755264639854431, + "eval_runtime": 1183.4193, + "eval_samples_per_second": 6.349, + "eval_steps_per_second": 0.794, + "step": 42600 + }, + { + "epoch": 4.66, + "learning_rate": 3.835927260812582e-05, + "loss": 0.1757, + "step": 42650 + }, + { + "epoch": 4.66, + "learning_rate": 3.8345620358235044e-05, + "loss": 0.1784, + "step": 42700 + }, + { + "epoch": 4.66, + "eval_cer": 0.04826838123329718, + "eval_loss": 0.27424073219299316, + "eval_runtime": 1313.6948, + "eval_samples_per_second": 5.719, + "eval_steps_per_second": 0.716, + "step": 42700 + }, + { + "epoch": 4.67, + "learning_rate": 3.833196810834426e-05, + "loss": 0.1756, + "step": 42750 + }, + { + "epoch": 4.67, + "learning_rate": 3.8318315858453475e-05, + "loss": 0.1549, + "step": 42800 + }, + { + "epoch": 4.67, + "eval_cer": 0.05006826557532995, + "eval_loss": 0.2791966199874878, + "eval_runtime": 1518.3443, + "eval_samples_per_second": 4.948, + "eval_steps_per_second": 0.619, + "step": 42800 + }, + { + "epoch": 4.68, + "learning_rate": 3.830466360856269e-05, + "loss": 0.1886, + "step": 42850 + }, + { + "epoch": 4.69, + "learning_rate": 3.8291284403669726e-05, + "loss": 0.178, + "step": 42900 + }, + { + "epoch": 4.69, + "eval_cer": 0.048815874569276725, + "eval_loss": 0.2769632041454315, + "eval_runtime": 1583.845, + "eval_samples_per_second": 4.744, + "eval_steps_per_second": 0.593, + "step": 42900 + }, + { + "epoch": 4.69, + "learning_rate": 3.827763215377895e-05, + "loss": 0.1778, + "step": 42950 + }, + { + "epoch": 4.7, + "learning_rate": 3.8263979903888164e-05, + "loss": 0.1683, + "step": 43000 + }, + { + "epoch": 4.7, + "eval_cer": 0.048702954068730944, + "eval_loss": 0.2739656865596771, + "eval_runtime": 1397.9506, + "eval_samples_per_second": 5.374, + "eval_steps_per_second": 0.672, + "step": 43000 + }, + { + "epoch": 4.7, + "learning_rate": 3.825032765399738e-05, + "loss": 0.1738, + "step": 43050 + }, + { + "epoch": 4.71, + "learning_rate": 3.8236675404106596e-05, + "loss": 0.183, + "step": 43100 + }, + { + "epoch": 4.71, + "eval_cer": 0.04755663989652376, + "eval_loss": 0.2752831280231476, + "eval_runtime": 1171.1241, + "eval_samples_per_second": 6.415, + "eval_steps_per_second": 0.803, + "step": 43100 + }, + { + "epoch": 4.71, + "learning_rate": 3.822302315421581e-05, + "loss": 0.1802, + "step": 43150 + }, + { + "epoch": 4.72, + "learning_rate": 3.8209370904325034e-05, + "loss": 0.1727, + "step": 43200 + }, + { + "epoch": 4.72, + "eval_cer": 0.05161151241612231, + "eval_loss": 0.2738773226737976, + "eval_runtime": 1208.5144, + "eval_samples_per_second": 6.217, + "eval_steps_per_second": 0.778, + "step": 43200 + }, + { + "epoch": 4.72, + "learning_rate": 3.819571865443426e-05, + "loss": 0.1818, + "step": 43250 + }, + { + "epoch": 4.73, + "learning_rate": 3.818206640454347e-05, + "loss": 0.1866, + "step": 43300 + }, + { + "epoch": 4.73, + "eval_cer": 0.049089621237266505, + "eval_loss": 0.2753521502017975, + "eval_runtime": 1160.1441, + "eval_samples_per_second": 6.476, + "eval_steps_per_second": 0.81, + "step": 43300 + }, + { + "epoch": 4.73, + "learning_rate": 3.816841415465269e-05, + "loss": 0.1413, + "step": 43350 + }, + { + "epoch": 4.74, + "learning_rate": 3.8154761904761904e-05, + "loss": 0.1639, + "step": 43400 + }, + { + "epoch": 4.74, + "eval_cer": 0.04883298373602609, + "eval_loss": 0.27695029973983765, + "eval_runtime": 1168.439, + "eval_samples_per_second": 6.43, + "eval_steps_per_second": 0.804, + "step": 43400 + }, + { + "epoch": 4.75, + "learning_rate": 3.814110965487113e-05, + "loss": 0.1736, + "step": 43450 + }, + { + "epoch": 4.75, + "learning_rate": 3.812745740498034e-05, + "loss": 0.1608, + "step": 43500 + }, + { + "epoch": 4.75, + "eval_cer": 0.047128910727789734, + "eval_loss": 0.271191269159317, + "eval_runtime": 1202.4574, + "eval_samples_per_second": 6.248, + "eval_steps_per_second": 0.782, + "step": 43500 + }, + { + "epoch": 4.76, + "learning_rate": 3.811380515508956e-05, + "loss": 0.1753, + "step": 43550 + }, + { + "epoch": 4.76, + "learning_rate": 3.8100152905198775e-05, + "loss": 0.1793, + "step": 43600 + }, + { + "epoch": 4.76, + "eval_cer": 0.04675593089265367, + "eval_loss": 0.27248117327690125, + "eval_runtime": 1138.4082, + "eval_samples_per_second": 6.6, + "eval_steps_per_second": 0.826, + "step": 43600 + }, + { + "epoch": 4.77, + "learning_rate": 3.8086500655308e-05, + "loss": 0.1798, + "step": 43650 + }, + { + "epoch": 4.77, + "learning_rate": 3.807284840541721e-05, + "loss": 0.1765, + "step": 43700 + }, + { + "epoch": 4.77, + "eval_cer": 0.04771062239726801, + "eval_loss": 0.2712666094303131, + "eval_runtime": 1149.9923, + "eval_samples_per_second": 6.533, + "eval_steps_per_second": 0.817, + "step": 43700 + }, + { + "epoch": 4.78, + "learning_rate": 3.8059196155526436e-05, + "loss": 0.1543, + "step": 43750 + }, + { + "epoch": 4.78, + "learning_rate": 3.804554390563565e-05, + "loss": 0.197, + "step": 43800 + }, + { + "epoch": 4.78, + "eval_cer": 0.047895401398161105, + "eval_loss": 0.27246350049972534, + "eval_runtime": 1136.3564, + "eval_samples_per_second": 6.611, + "eval_steps_per_second": 0.827, + "step": 43800 + }, + { + "epoch": 4.79, + "learning_rate": 3.803189165574487e-05, + "loss": 0.1743, + "step": 43850 + }, + { + "epoch": 4.79, + "learning_rate": 3.801823940585409e-05, + "loss": 0.1795, + "step": 43900 + }, + { + "epoch": 4.79, + "eval_cer": 0.04925729107141024, + "eval_loss": 0.26492729783058167, + "eval_runtime": 1162.7543, + "eval_samples_per_second": 6.461, + "eval_steps_per_second": 0.808, + "step": 43900 + }, + { + "epoch": 4.8, + "learning_rate": 3.8004587155963306e-05, + "loss": 0.2045, + "step": 43950 + }, + { + "epoch": 4.81, + "learning_rate": 3.799093490607252e-05, + "loss": 0.1855, + "step": 44000 + }, + { + "epoch": 4.81, + "eval_cer": 0.048767968902378514, + "eval_loss": 0.2690161168575287, + "eval_runtime": 1151.9666, + "eval_samples_per_second": 6.522, + "eval_steps_per_second": 0.816, + "step": 44000 + }, + { + "epoch": 4.81, + "learning_rate": 3.797728265618174e-05, + "loss": 0.1865, + "step": 44050 + }, + { + "epoch": 4.82, + "learning_rate": 3.796363040629096e-05, + "loss": 0.1731, + "step": 44100 + }, + { + "epoch": 4.82, + "eval_cer": 0.04666354139220712, + "eval_loss": 0.26857924461364746, + "eval_runtime": 1138.8024, + "eval_samples_per_second": 6.597, + "eval_steps_per_second": 0.825, + "step": 44100 + }, + { + "epoch": 4.82, + "learning_rate": 3.7949978156400176e-05, + "loss": 0.1811, + "step": 44150 + }, + { + "epoch": 4.83, + "learning_rate": 3.79363259065094e-05, + "loss": 0.1909, + "step": 44200 + }, + { + "epoch": 4.83, + "eval_cer": 0.048347083400344236, + "eval_loss": 0.26807311177253723, + "eval_runtime": 1136.7106, + "eval_samples_per_second": 6.609, + "eval_steps_per_second": 0.827, + "step": 44200 + }, + { + "epoch": 4.83, + "learning_rate": 3.7922673656618614e-05, + "loss": 0.1673, + "step": 44250 + }, + { + "epoch": 4.84, + "learning_rate": 3.790902140672783e-05, + "loss": 0.1722, + "step": 44300 + }, + { + "epoch": 4.84, + "eval_cer": 0.04721787839488641, + "eval_loss": 0.2638927400112152, + "eval_runtime": 1151.5301, + "eval_samples_per_second": 6.524, + "eval_steps_per_second": 0.816, + "step": 44300 + }, + { + "epoch": 4.84, + "learning_rate": 3.7895369156837046e-05, + "loss": 0.1785, + "step": 44350 + }, + { + "epoch": 4.85, + "learning_rate": 3.788171690694627e-05, + "loss": 0.1691, + "step": 44400 + }, + { + "epoch": 4.85, + "eval_cer": 0.04767640406376929, + "eval_loss": 0.2721834182739258, + "eval_runtime": 1200.0834, + "eval_samples_per_second": 6.26, + "eval_steps_per_second": 0.783, + "step": 44400 + }, + { + "epoch": 4.85, + "learning_rate": 3.7868064657055484e-05, + "loss": 0.1949, + "step": 44450 + }, + { + "epoch": 4.86, + "learning_rate": 3.78544124071647e-05, + "loss": 0.1563, + "step": 44500 + }, + { + "epoch": 4.86, + "eval_cer": 0.04779274639766494, + "eval_loss": 0.26372846961021423, + "eval_runtime": 1394.4107, + "eval_samples_per_second": 5.388, + "eval_steps_per_second": 0.674, + "step": 44500 + }, + { + "epoch": 4.87, + "learning_rate": 3.7840760157273916e-05, + "loss": 0.1728, + "step": 44550 + }, + { + "epoch": 4.87, + "learning_rate": 3.782710790738314e-05, + "loss": 0.159, + "step": 44600 + }, + { + "epoch": 4.87, + "eval_cer": 0.04847369123428951, + "eval_loss": 0.26685479283332825, + "eval_runtime": 1409.3734, + "eval_samples_per_second": 5.331, + "eval_steps_per_second": 0.667, + "step": 44600 + }, + { + "epoch": 4.88, + "learning_rate": 3.781345565749236e-05, + "loss": 0.1585, + "step": 44650 + }, + { + "epoch": 4.88, + "learning_rate": 3.779980340760158e-05, + "loss": 0.1902, + "step": 44700 + }, + { + "epoch": 4.88, + "eval_cer": 0.04875428156897903, + "eval_loss": 0.26479294896125793, + "eval_runtime": 1474.9654, + "eval_samples_per_second": 5.094, + "eval_steps_per_second": 0.637, + "step": 44700 + }, + { + "epoch": 4.89, + "learning_rate": 3.778615115771079e-05, + "loss": 0.1986, + "step": 44750 + }, + { + "epoch": 4.89, + "learning_rate": 3.777249890782001e-05, + "loss": 0.1975, + "step": 44800 + }, + { + "epoch": 4.89, + "eval_cer": 0.04979451890734018, + "eval_loss": 0.2669293284416199, + "eval_runtime": 1193.7755, + "eval_samples_per_second": 6.293, + "eval_steps_per_second": 0.787, + "step": 44800 + }, + { + "epoch": 4.9, + "learning_rate": 3.775884665792923e-05, + "loss": 0.1921, + "step": 44850 + }, + { + "epoch": 4.9, + "learning_rate": 3.774519440803845e-05, + "loss": 0.1757, + "step": 44900 + }, + { + "epoch": 4.9, + "eval_cer": 0.046643010392107886, + "eval_loss": 0.26375335454940796, + "eval_runtime": 1407.5693, + "eval_samples_per_second": 5.338, + "eval_steps_per_second": 0.668, + "step": 44900 + }, + { + "epoch": 4.91, + "learning_rate": 3.773154215814766e-05, + "loss": 0.1806, + "step": 44950 + }, + { + "epoch": 4.91, + "learning_rate": 3.771788990825688e-05, + "loss": 0.1499, + "step": 45000 + }, + { + "epoch": 4.91, + "eval_cer": 0.04707758322754165, + "eval_loss": 0.26409247517585754, + "eval_runtime": 1611.3579, + "eval_samples_per_second": 4.663, + "eval_steps_per_second": 0.583, + "step": 45000 + }, + { + "epoch": 4.92, + "learning_rate": 3.77042376583661e-05, + "loss": 0.1803, + "step": 45050 + }, + { + "epoch": 4.93, + "learning_rate": 3.7690585408475324e-05, + "loss": 0.1808, + "step": 45100 + }, + { + "epoch": 4.93, + "eval_cer": 0.04849764406773861, + "eval_loss": 0.267391562461853, + "eval_runtime": 1404.0846, + "eval_samples_per_second": 5.351, + "eval_steps_per_second": 0.669, + "step": 45100 + }, + { + "epoch": 4.93, + "learning_rate": 3.767693315858454e-05, + "loss": 0.1713, + "step": 45150 + }, + { + "epoch": 4.94, + "learning_rate": 3.7663280908693755e-05, + "loss": 0.1783, + "step": 45200 + }, + { + "epoch": 4.94, + "eval_cer": 0.047553218063173884, + "eval_loss": 0.2665572762489319, + "eval_runtime": 1430.7712, + "eval_samples_per_second": 5.251, + "eval_steps_per_second": 0.657, + "step": 45200 + }, + { + "epoch": 4.94, + "learning_rate": 3.764962865880297e-05, + "loss": 0.1873, + "step": 45250 + }, + { + "epoch": 4.95, + "learning_rate": 3.763597640891219e-05, + "loss": 0.1676, + "step": 45300 + }, + { + "epoch": 4.95, + "eval_cer": 0.04647191872461427, + "eval_loss": 0.26687777042388916, + "eval_runtime": 1165.6382, + "eval_samples_per_second": 6.445, + "eval_steps_per_second": 0.806, + "step": 45300 + }, + { + "epoch": 4.95, + "learning_rate": 3.762232415902141e-05, + "loss": 0.1789, + "step": 45350 + }, + { + "epoch": 4.96, + "learning_rate": 3.7608671909130625e-05, + "loss": 0.1869, + "step": 45400 + }, + { + "epoch": 4.96, + "eval_cer": 0.04635899822406849, + "eval_loss": 0.2622717320919037, + "eval_runtime": 1313.8343, + "eval_samples_per_second": 5.718, + "eval_steps_per_second": 0.715, + "step": 45400 + }, + { + "epoch": 4.96, + "learning_rate": 3.759501965923984e-05, + "loss": 0.1605, + "step": 45450 + }, + { + "epoch": 4.97, + "learning_rate": 3.7581367409349064e-05, + "loss": 0.1633, + "step": 45500 + }, + { + "epoch": 4.97, + "eval_cer": 0.047361595395581046, + "eval_loss": 0.26969292759895325, + "eval_runtime": 1475.6758, + "eval_samples_per_second": 5.091, + "eval_steps_per_second": 0.637, + "step": 45500 + }, + { + "epoch": 4.97, + "learning_rate": 3.7567715159458286e-05, + "loss": 0.1649, + "step": 45550 + }, + { + "epoch": 4.98, + "learning_rate": 3.75540629095675e-05, + "loss": 0.1488, + "step": 45600 + }, + { + "epoch": 4.98, + "eval_cer": 0.047204191061486926, + "eval_loss": 0.26253628730773926, + "eval_runtime": 1213.3298, + "eval_samples_per_second": 6.192, + "eval_steps_per_second": 0.775, + "step": 45600 + }, + { + "epoch": 4.99, + "learning_rate": 3.754041065967672e-05, + "loss": 0.1776, + "step": 45650 + }, + { + "epoch": 4.99, + "learning_rate": 3.7526758409785934e-05, + "loss": 0.1415, + "step": 45700 + }, + { + "epoch": 4.99, + "eval_cer": 0.05035912141006909, + "eval_loss": 0.2769641876220703, + "eval_runtime": 1194.9242, + "eval_samples_per_second": 6.287, + "eval_steps_per_second": 0.787, + "step": 45700 + }, + { + "epoch": 5.0, + "learning_rate": 3.751310615989515e-05, + "loss": 0.1828, + "step": 45750 + }, + { + "epoch": 5.0, + "learning_rate": 3.749945391000437e-05, + "loss": 0.1522, + "step": 45800 + }, + { + "epoch": 5.0, + "eval_cer": 0.0480562275656051, + "eval_loss": 0.2658143639564514, + "eval_runtime": 1166.1118, + "eval_samples_per_second": 6.443, + "eval_steps_per_second": 0.806, + "step": 45800 + }, + { + "epoch": 5.01, + "learning_rate": 3.748580166011359e-05, + "loss": 0.1204, + "step": 45850 + }, + { + "epoch": 5.01, + "learning_rate": 3.7472149410222804e-05, + "loss": 0.1203, + "step": 45900 + }, + { + "epoch": 5.01, + "eval_cer": 0.04960973990644708, + "eval_loss": 0.272051066160202, + "eval_runtime": 1159.3999, + "eval_samples_per_second": 6.48, + "eval_steps_per_second": 0.811, + "step": 45900 + }, + { + "epoch": 5.02, + "learning_rate": 3.7458497160332026e-05, + "loss": 0.1304, + "step": 45950 + }, + { + "epoch": 5.02, + "learning_rate": 3.744484491044124e-05, + "loss": 0.1256, + "step": 46000 + }, + { + "epoch": 5.02, + "eval_cer": 0.04871321956878056, + "eval_loss": 0.267743319272995, + "eval_runtime": 1167.536, + "eval_samples_per_second": 6.435, + "eval_steps_per_second": 0.805, + "step": 46000 + }, + { + "epoch": 5.03, + "learning_rate": 3.7431192660550465e-05, + "loss": 0.1462, + "step": 46050 + }, + { + "epoch": 5.03, + "learning_rate": 3.741754041065968e-05, + "loss": 0.1306, + "step": 46100 + }, + { + "epoch": 5.03, + "eval_cer": 0.04901434090356931, + "eval_loss": 0.2696629464626312, + "eval_runtime": 1172.6718, + "eval_samples_per_second": 6.407, + "eval_steps_per_second": 0.802, + "step": 46100 + }, + { + "epoch": 5.04, + "learning_rate": 3.7403888160768896e-05, + "loss": 0.1378, + "step": 46150 + }, + { + "epoch": 5.05, + "learning_rate": 3.739023591087811e-05, + "loss": 0.1373, + "step": 46200 + }, + { + "epoch": 5.05, + "eval_cer": 0.04708100506089152, + "eval_loss": 0.26910731196403503, + "eval_runtime": 1223.1444, + "eval_samples_per_second": 6.142, + "eval_steps_per_second": 0.769, + "step": 46200 + }, + { + "epoch": 5.05, + "learning_rate": 3.737658366098733e-05, + "loss": 0.1354, + "step": 46250 + }, + { + "epoch": 5.06, + "learning_rate": 3.736293141109655e-05, + "loss": 0.1421, + "step": 46300 + }, + { + "epoch": 5.06, + "eval_cer": 0.04830944323349564, + "eval_loss": 0.2727450728416443, + "eval_runtime": 1245.9075, + "eval_samples_per_second": 6.03, + "eval_steps_per_second": 0.754, + "step": 46300 + }, + { + "epoch": 5.06, + "learning_rate": 3.7349279161205766e-05, + "loss": 0.115, + "step": 46350 + }, + { + "epoch": 5.07, + "learning_rate": 3.733562691131499e-05, + "loss": 0.1281, + "step": 46400 + }, + { + "epoch": 5.07, + "eval_cer": 0.050030625408481355, + "eval_loss": 0.2738962173461914, + "eval_runtime": 1175.6177, + "eval_samples_per_second": 6.391, + "eval_steps_per_second": 0.8, + "step": 46400 + }, + { + "epoch": 5.07, + "learning_rate": 3.7321974661424205e-05, + "loss": 0.1344, + "step": 46450 + }, + { + "epoch": 5.08, + "learning_rate": 3.730832241153343e-05, + "loss": 0.1257, + "step": 46500 + }, + { + "epoch": 5.08, + "eval_cer": 0.050684195578306945, + "eval_loss": 0.26944053173065186, + "eval_runtime": 1196.4417, + "eval_samples_per_second": 6.279, + "eval_steps_per_second": 0.786, + "step": 46500 + }, + { + "epoch": 5.08, + "learning_rate": 3.729467016164264e-05, + "loss": 0.1408, + "step": 46550 + }, + { + "epoch": 5.09, + "learning_rate": 3.728101791175186e-05, + "loss": 0.1329, + "step": 46600 + }, + { + "epoch": 5.09, + "eval_cer": 0.04847711306763938, + "eval_loss": 0.27407070994377136, + "eval_runtime": 1178.5414, + "eval_samples_per_second": 6.375, + "eval_steps_per_second": 0.798, + "step": 46600 + }, + { + "epoch": 5.1, + "learning_rate": 3.7267365661861075e-05, + "loss": 0.1386, + "step": 46650 + }, + { + "epoch": 5.1, + "learning_rate": 3.725371341197029e-05, + "loss": 0.1181, + "step": 46700 + }, + { + "epoch": 5.1, + "eval_cer": 0.048323130566895134, + "eval_loss": 0.27152273058891296, + "eval_runtime": 1196.4525, + "eval_samples_per_second": 6.279, + "eval_steps_per_second": 0.786, + "step": 46700 + }, + { + "epoch": 5.11, + "learning_rate": 3.724006116207951e-05, + "loss": 0.1269, + "step": 46750 + }, + { + "epoch": 5.11, + "learning_rate": 3.722640891218873e-05, + "loss": 0.1485, + "step": 46800 + }, + { + "epoch": 5.11, + "eval_cer": 0.04719050372808743, + "eval_loss": 0.272051602602005, + "eval_runtime": 1208.2115, + "eval_samples_per_second": 6.218, + "eval_steps_per_second": 0.778, + "step": 46800 + }, + { + "epoch": 5.12, + "learning_rate": 3.721275666229795e-05, + "loss": 0.1299, + "step": 46850 + }, + { + "epoch": 5.12, + "learning_rate": 3.719910441240717e-05, + "loss": 0.1208, + "step": 46900 + }, + { + "epoch": 5.12, + "eval_cer": 0.046954397226946255, + "eval_loss": 0.2721458971500397, + "eval_runtime": 1204.2184, + "eval_samples_per_second": 6.239, + "eval_steps_per_second": 0.781, + "step": 46900 + }, + { + "epoch": 5.13, + "learning_rate": 3.718545216251638e-05, + "loss": 0.1464, + "step": 46950 + }, + { + "epoch": 5.13, + "learning_rate": 3.7171799912625606e-05, + "loss": 0.1208, + "step": 47000 + }, + { + "epoch": 5.13, + "eval_cer": 0.04879192173582762, + "eval_loss": 0.27154362201690674, + "eval_runtime": 1204.4392, + "eval_samples_per_second": 6.238, + "eval_steps_per_second": 0.78, + "step": 47000 + }, + { + "epoch": 5.14, + "learning_rate": 3.715869375273045e-05, + "loss": 0.1329, + "step": 47050 + }, + { + "epoch": 5.14, + "learning_rate": 3.714504150283967e-05, + "loss": 0.1369, + "step": 47100 + }, + { + "epoch": 5.14, + "eval_cer": 0.048282068566696666, + "eval_loss": 0.27170222997665405, + "eval_runtime": 1194.6083, + "eval_samples_per_second": 6.289, + "eval_steps_per_second": 0.787, + "step": 47100 + }, + { + "epoch": 5.15, + "learning_rate": 3.713138925294889e-05, + "loss": 0.1339, + "step": 47150 + }, + { + "epoch": 5.16, + "learning_rate": 3.711773700305811e-05, + "loss": 0.1421, + "step": 47200 + }, + { + "epoch": 5.16, + "eval_cer": 0.04744714122932785, + "eval_loss": 0.26895907521247864, + "eval_runtime": 1185.6835, + "eval_samples_per_second": 6.336, + "eval_steps_per_second": 0.793, + "step": 47200 + }, + { + "epoch": 5.16, + "learning_rate": 3.710408475316732e-05, + "loss": 0.1211, + "step": 47250 + }, + { + "epoch": 5.17, + "learning_rate": 3.709043250327654e-05, + "loss": 0.1335, + "step": 47300 + }, + { + "epoch": 5.17, + "eval_cer": 0.04832997423359488, + "eval_loss": 0.26967930793762207, + "eval_runtime": 1179.959, + "eval_samples_per_second": 6.367, + "eval_steps_per_second": 0.797, + "step": 47300 + }, + { + "epoch": 5.17, + "learning_rate": 3.707678025338576e-05, + "loss": 0.1212, + "step": 47350 + }, + { + "epoch": 5.18, + "learning_rate": 3.706312800349498e-05, + "loss": 0.1288, + "step": 47400 + }, + { + "epoch": 5.18, + "eval_cer": 0.04718023822803782, + "eval_loss": 0.2701777219772339, + "eval_runtime": 1172.1235, + "eval_samples_per_second": 6.41, + "eval_steps_per_second": 0.802, + "step": 47400 + }, + { + "epoch": 5.18, + "learning_rate": 3.704947575360419e-05, + "loss": 0.1317, + "step": 47450 + }, + { + "epoch": 5.19, + "learning_rate": 3.7035823503713416e-05, + "loss": 0.1178, + "step": 47500 + }, + { + "epoch": 5.19, + "eval_cer": 0.04687569505989919, + "eval_loss": 0.2715287208557129, + "eval_runtime": 1159.6419, + "eval_samples_per_second": 6.479, + "eval_steps_per_second": 0.811, + "step": 47500 + }, + { + "epoch": 5.19, + "learning_rate": 3.702217125382263e-05, + "loss": 0.1387, + "step": 47550 + }, + { + "epoch": 5.2, + "learning_rate": 3.7008519003931854e-05, + "loss": 0.1401, + "step": 47600 + }, + { + "epoch": 5.2, + "eval_cer": 0.0451887312184122, + "eval_loss": 0.2689366936683655, + "eval_runtime": 1153.2799, + "eval_samples_per_second": 6.514, + "eval_steps_per_second": 0.815, + "step": 47600 + }, + { + "epoch": 5.2, + "learning_rate": 3.699486675404107e-05, + "loss": 0.1537, + "step": 47650 + }, + { + "epoch": 5.21, + "learning_rate": 3.6981214504150286e-05, + "loss": 0.131, + "step": 47700 + }, + { + "epoch": 5.21, + "eval_cer": 0.04627003055697181, + "eval_loss": 0.27360963821411133, + "eval_runtime": 1144.9881, + "eval_samples_per_second": 6.562, + "eval_steps_per_second": 0.821, + "step": 47700 + }, + { + "epoch": 5.22, + "learning_rate": 3.69675622542595e-05, + "loss": 0.1171, + "step": 47750 + }, + { + "epoch": 5.22, + "learning_rate": 3.695391000436872e-05, + "loss": 0.1227, + "step": 47800 + }, + { + "epoch": 5.22, + "eval_cer": 0.04878849990247775, + "eval_loss": 0.2697356641292572, + "eval_runtime": 1167.8301, + "eval_samples_per_second": 6.433, + "eval_steps_per_second": 0.805, + "step": 47800 + }, + { + "epoch": 5.23, + "learning_rate": 3.694025775447794e-05, + "loss": 0.1356, + "step": 47850 + }, + { + "epoch": 5.23, + "learning_rate": 3.6926605504587156e-05, + "loss": 0.1377, + "step": 47900 + }, + { + "epoch": 5.23, + "eval_cer": 0.05130696924798368, + "eval_loss": 0.27117544412612915, + "eval_runtime": 1196.2902, + "eval_samples_per_second": 6.28, + "eval_steps_per_second": 0.786, + "step": 47900 + }, + { + "epoch": 5.24, + "learning_rate": 3.691295325469638e-05, + "loss": 0.1426, + "step": 47950 + }, + { + "epoch": 5.24, + "learning_rate": 3.6899301004805594e-05, + "loss": 0.1354, + "step": 48000 + }, + { + "epoch": 5.24, + "eval_cer": 0.047423188395878745, + "eval_loss": 0.2683400809764862, + "eval_runtime": 1161.2453, + "eval_samples_per_second": 6.47, + "eval_steps_per_second": 0.809, + "step": 48000 + }, + { + "epoch": 5.25, + "learning_rate": 3.688564875491482e-05, + "loss": 0.1168, + "step": 48050 + }, + { + "epoch": 5.25, + "learning_rate": 3.687199650502403e-05, + "loss": 0.1334, + "step": 48100 + }, + { + "epoch": 5.25, + "eval_cer": 0.04782012106446392, + "eval_loss": 0.2731897532939911, + "eval_runtime": 1159.6513, + "eval_samples_per_second": 6.479, + "eval_steps_per_second": 0.811, + "step": 48100 + }, + { + "epoch": 5.26, + "learning_rate": 3.685834425513325e-05, + "loss": 0.1383, + "step": 48150 + }, + { + "epoch": 5.26, + "learning_rate": 3.6844692005242464e-05, + "loss": 0.1405, + "step": 48200 + }, + { + "epoch": 5.26, + "eval_cer": 0.045941534555384085, + "eval_loss": 0.27034494280815125, + "eval_runtime": 1268.9769, + "eval_samples_per_second": 5.921, + "eval_steps_per_second": 0.741, + "step": 48200 + }, + { + "epoch": 5.27, + "learning_rate": 3.683103975535168e-05, + "loss": 0.1294, + "step": 48250 + }, + { + "epoch": 5.28, + "learning_rate": 3.68173875054609e-05, + "loss": 0.116, + "step": 48300 + }, + { + "epoch": 5.28, + "eval_cer": 0.04806649306565472, + "eval_loss": 0.27247393131256104, + "eval_runtime": 1271.3216, + "eval_samples_per_second": 5.91, + "eval_steps_per_second": 0.739, + "step": 48300 + }, + { + "epoch": 5.28, + "learning_rate": 3.680373525557012e-05, + "loss": 0.1136, + "step": 48350 + }, + { + "epoch": 5.29, + "learning_rate": 3.6790083005679334e-05, + "loss": 0.1479, + "step": 48400 + }, + { + "epoch": 5.29, + "eval_cer": 0.044699409049380474, + "eval_loss": 0.27049344778060913, + "eval_runtime": 1251.951, + "eval_samples_per_second": 6.001, + "eval_steps_per_second": 0.751, + "step": 48400 + }, + { + "epoch": 5.29, + "learning_rate": 3.677643075578856e-05, + "loss": 0.1308, + "step": 48450 + }, + { + "epoch": 5.3, + "learning_rate": 3.676277850589777e-05, + "loss": 0.1352, + "step": 48500 + }, + { + "epoch": 5.3, + "eval_cer": 0.04500737405086897, + "eval_loss": 0.26786038279533386, + "eval_runtime": 1245.5699, + "eval_samples_per_second": 6.032, + "eval_steps_per_second": 0.755, + "step": 48500 + }, + { + "epoch": 5.3, + "learning_rate": 3.6749126256006995e-05, + "loss": 0.1188, + "step": 48550 + }, + { + "epoch": 5.31, + "learning_rate": 3.673547400611621e-05, + "loss": 0.1272, + "step": 48600 + }, + { + "epoch": 5.31, + "eval_cer": 0.045086076217916035, + "eval_loss": 0.27316638827323914, + "eval_runtime": 1239.7313, + "eval_samples_per_second": 6.06, + "eval_steps_per_second": 0.758, + "step": 48600 + }, + { + "epoch": 5.31, + "learning_rate": 3.672182175622543e-05, + "loss": 0.1235, + "step": 48650 + }, + { + "epoch": 5.32, + "learning_rate": 3.670816950633464e-05, + "loss": 0.1682, + "step": 48700 + }, + { + "epoch": 5.32, + "eval_cer": 0.04581834855478868, + "eval_loss": 0.2646952271461487, + "eval_runtime": 1268.735, + "eval_samples_per_second": 5.922, + "eval_steps_per_second": 0.741, + "step": 48700 + }, + { + "epoch": 5.32, + "learning_rate": 3.669451725644386e-05, + "loss": 0.133, + "step": 48750 + }, + { + "epoch": 5.33, + "learning_rate": 3.668086500655308e-05, + "loss": 0.1172, + "step": 48800 + }, + { + "epoch": 5.33, + "eval_cer": 0.04660537022525929, + "eval_loss": 0.269662082195282, + "eval_runtime": 1258.7176, + "eval_samples_per_second": 5.969, + "eval_steps_per_second": 0.747, + "step": 48800 + }, + { + "epoch": 5.34, + "learning_rate": 3.66672127566623e-05, + "loss": 0.1423, + "step": 48850 + }, + { + "epoch": 5.34, + "learning_rate": 3.665356050677152e-05, + "loss": 0.1353, + "step": 48900 + }, + { + "epoch": 5.34, + "eval_cer": 0.04585941055498715, + "eval_loss": 0.26135268807411194, + "eval_runtime": 1262.3154, + "eval_samples_per_second": 5.952, + "eval_steps_per_second": 0.745, + "step": 48900 + }, + { + "epoch": 5.35, + "learning_rate": 3.6639908256880735e-05, + "loss": 0.1148, + "step": 48950 + }, + { + "epoch": 5.35, + "learning_rate": 3.662625600698996e-05, + "loss": 0.1316, + "step": 49000 + }, + { + "epoch": 5.35, + "eval_cer": 0.045784130221289965, + "eval_loss": 0.26784536242485046, + "eval_runtime": 1276.1191, + "eval_samples_per_second": 5.887, + "eval_steps_per_second": 0.737, + "step": 49000 + }, + { + "epoch": 5.36, + "learning_rate": 3.6612603757099174e-05, + "loss": 0.1386, + "step": 49050 + }, + { + "epoch": 5.36, + "learning_rate": 3.659895150720839e-05, + "loss": 0.1637, + "step": 49100 + }, + { + "epoch": 5.36, + "eval_cer": 0.04674224355925418, + "eval_loss": 0.26811158657073975, + "eval_runtime": 1235.9712, + "eval_samples_per_second": 6.079, + "eval_steps_per_second": 0.761, + "step": 49100 + }, + { + "epoch": 5.37, + "learning_rate": 3.6585299257317606e-05, + "loss": 0.122, + "step": 49150 + }, + { + "epoch": 5.37, + "learning_rate": 3.657164700742682e-05, + "loss": 0.1155, + "step": 49200 + }, + { + "epoch": 5.37, + "eval_cer": 0.04763876389692069, + "eval_loss": 0.2680990397930145, + "eval_runtime": 1264.4978, + "eval_samples_per_second": 5.941, + "eval_steps_per_second": 0.743, + "step": 49200 + }, + { + "epoch": 5.38, + "learning_rate": 3.6557994757536044e-05, + "loss": 0.139, + "step": 49250 + }, + { + "epoch": 5.38, + "learning_rate": 3.654434250764526e-05, + "loss": 0.118, + "step": 49300 + }, + { + "epoch": 5.38, + "eval_cer": 0.04588336338843626, + "eval_loss": 0.2674948275089264, + "eval_runtime": 1261.4278, + "eval_samples_per_second": 5.956, + "eval_steps_per_second": 0.745, + "step": 49300 + }, + { + "epoch": 5.39, + "learning_rate": 3.653069025775448e-05, + "loss": 0.1548, + "step": 49350 + }, + { + "epoch": 5.4, + "learning_rate": 3.65170380078637e-05, + "loss": 0.1212, + "step": 49400 + }, + { + "epoch": 5.4, + "eval_cer": 0.04661905755865878, + "eval_loss": 0.2663722634315491, + "eval_runtime": 1258.5871, + "eval_samples_per_second": 5.969, + "eval_steps_per_second": 0.747, + "step": 49400 + }, + { + "epoch": 5.4, + "learning_rate": 3.6503385757972914e-05, + "loss": 0.1128, + "step": 49450 + }, + { + "epoch": 5.41, + "learning_rate": 3.6489733508082137e-05, + "loss": 0.1252, + "step": 49500 + }, + { + "epoch": 5.41, + "eval_cer": 0.04388159087876102, + "eval_loss": 0.26354527473449707, + "eval_runtime": 1249.6738, + "eval_samples_per_second": 6.012, + "eval_steps_per_second": 0.752, + "step": 49500 + }, + { + "epoch": 5.41, + "learning_rate": 3.647608125819135e-05, + "loss": 0.1367, + "step": 49550 + }, + { + "epoch": 5.42, + "learning_rate": 3.646242900830057e-05, + "loss": 0.1254, + "step": 49600 + }, + { + "epoch": 5.42, + "eval_cer": 0.044230617880447985, + "eval_loss": 0.26343604922294617, + "eval_runtime": 1249.6982, + "eval_samples_per_second": 6.012, + "eval_steps_per_second": 0.752, + "step": 49600 + }, + { + "epoch": 5.42, + "learning_rate": 3.6448776758409784e-05, + "loss": 0.115, + "step": 49650 + }, + { + "epoch": 5.43, + "learning_rate": 3.6435124508519e-05, + "loss": 0.1205, + "step": 49700 + }, + { + "epoch": 5.43, + "eval_cer": 0.04457280121543521, + "eval_loss": 0.2652714252471924, + "eval_runtime": 1248.8867, + "eval_samples_per_second": 6.016, + "eval_steps_per_second": 0.753, + "step": 49700 + }, + { + "epoch": 5.43, + "learning_rate": 3.6421745303626035e-05, + "loss": 0.1693, + "step": 49750 + }, + { + "epoch": 5.44, + "learning_rate": 3.640809305373526e-05, + "loss": 0.1395, + "step": 49800 + }, + { + "epoch": 5.44, + "eval_cer": 0.04441881871469096, + "eval_loss": 0.26012542843818665, + "eval_runtime": 1252.5778, + "eval_samples_per_second": 5.998, + "eval_steps_per_second": 0.75, + "step": 49800 + }, + { + "epoch": 5.44, + "learning_rate": 3.639444080384447e-05, + "loss": 0.1174, + "step": 49850 + }, + { + "epoch": 5.45, + "learning_rate": 3.6380788553953696e-05, + "loss": 0.1248, + "step": 49900 + }, + { + "epoch": 5.45, + "eval_cer": 0.04295427404094566, + "eval_loss": 0.2604806423187256, + "eval_runtime": 1250.2865, + "eval_samples_per_second": 6.009, + "eval_steps_per_second": 0.752, + "step": 49900 + }, + { + "epoch": 5.46, + "learning_rate": 3.636713630406291e-05, + "loss": 0.1504, + "step": 49950 + }, + { + "epoch": 5.46, + "learning_rate": 3.635348405417213e-05, + "loss": 0.1354, + "step": 50000 + }, + { + "epoch": 5.46, + "eval_cer": 0.044569379382085335, + "eval_loss": 0.25881579518318176, + "eval_runtime": 1274.8628, + "eval_samples_per_second": 5.893, + "eval_steps_per_second": 0.737, + "step": 50000 + }, + { + "epoch": 5.47, + "learning_rate": 3.633983180428135e-05, + "loss": 0.1333, + "step": 50050 + }, + { + "epoch": 5.47, + "learning_rate": 3.6326179554390566e-05, + "loss": 0.1274, + "step": 50100 + }, + { + "epoch": 5.47, + "eval_cer": 0.04327934820918351, + "eval_loss": 0.26810508966445923, + "eval_runtime": 1234.1932, + "eval_samples_per_second": 6.087, + "eval_steps_per_second": 0.762, + "step": 50100 + }, + { + "epoch": 5.48, + "learning_rate": 3.631252730449978e-05, + "loss": 0.1529, + "step": 50150 + }, + { + "epoch": 5.48, + "learning_rate": 3.6298875054609e-05, + "loss": 0.1358, + "step": 50200 + }, + { + "epoch": 5.48, + "eval_cer": 0.04586625422168689, + "eval_loss": 0.2661205530166626, + "eval_runtime": 1253.0382, + "eval_samples_per_second": 5.996, + "eval_steps_per_second": 0.75, + "step": 50200 + }, + { + "epoch": 5.49, + "learning_rate": 3.628522280471822e-05, + "loss": 0.1378, + "step": 50250 + }, + { + "epoch": 5.49, + "learning_rate": 3.6271570554827436e-05, + "loss": 0.1197, + "step": 50300 + }, + { + "epoch": 5.49, + "eval_cer": 0.047036521227343184, + "eval_loss": 0.26336920261383057, + "eval_runtime": 1281.5933, + "eval_samples_per_second": 5.862, + "eval_steps_per_second": 0.733, + "step": 50300 + }, + { + "epoch": 5.5, + "learning_rate": 3.625791830493666e-05, + "loss": 0.1307, + "step": 50350 + }, + { + "epoch": 5.5, + "learning_rate": 3.6244266055045874e-05, + "loss": 0.1206, + "step": 50400 + }, + { + "epoch": 5.5, + "eval_cer": 0.04744714122932785, + "eval_loss": 0.2674347162246704, + "eval_runtime": 1268.2958, + "eval_samples_per_second": 5.924, + "eval_steps_per_second": 0.741, + "step": 50400 + }, + { + "epoch": 5.51, + "learning_rate": 3.623061380515509e-05, + "loss": 0.1378, + "step": 50450 + }, + { + "epoch": 5.52, + "learning_rate": 3.621696155526431e-05, + "loss": 0.1356, + "step": 50500 + }, + { + "epoch": 5.52, + "eval_cer": 0.04557197655359789, + "eval_loss": 0.2662409842014313, + "eval_runtime": 1262.0384, + "eval_samples_per_second": 5.953, + "eval_steps_per_second": 0.745, + "step": 50500 + }, + { + "epoch": 5.52, + "learning_rate": 3.620330930537353e-05, + "loss": 0.1419, + "step": 50550 + }, + { + "epoch": 5.53, + "learning_rate": 3.6189657055482744e-05, + "loss": 0.1365, + "step": 50600 + }, + { + "epoch": 5.53, + "eval_cer": 0.0467148688924552, + "eval_loss": 0.2625742554664612, + "eval_runtime": 1264.3349, + "eval_samples_per_second": 5.942, + "eval_steps_per_second": 0.743, + "step": 50600 + }, + { + "epoch": 5.53, + "learning_rate": 3.617600480559196e-05, + "loss": 0.1267, + "step": 50650 + }, + { + "epoch": 5.54, + "learning_rate": 3.6162352555701176e-05, + "loss": 0.1394, + "step": 50700 + }, + { + "epoch": 5.54, + "eval_cer": 0.05017776424252586, + "eval_loss": 0.26222458481788635, + "eval_runtime": 1284.6728, + "eval_samples_per_second": 5.848, + "eval_steps_per_second": 0.732, + "step": 50700 + }, + { + "epoch": 5.54, + "learning_rate": 3.61487003058104e-05, + "loss": 0.1296, + "step": 50750 + }, + { + "epoch": 5.55, + "learning_rate": 3.613504805591962e-05, + "loss": 0.1269, + "step": 50800 + }, + { + "epoch": 5.55, + "eval_cer": 0.04537008838595543, + "eval_loss": 0.26166456937789917, + "eval_runtime": 1264.8055, + "eval_samples_per_second": 5.94, + "eval_steps_per_second": 0.743, + "step": 50800 + }, + { + "epoch": 5.55, + "learning_rate": 3.612139580602884e-05, + "loss": 0.123, + "step": 50850 + }, + { + "epoch": 5.56, + "learning_rate": 3.610774355613805e-05, + "loss": 0.1518, + "step": 50900 + }, + { + "epoch": 5.56, + "eval_cer": 0.045038170551017824, + "eval_loss": 0.2633424401283264, + "eval_runtime": 1261.6473, + "eval_samples_per_second": 5.955, + "eval_steps_per_second": 0.745, + "step": 50900 + }, + { + "epoch": 5.56, + "learning_rate": 3.609409130624727e-05, + "loss": 0.1285, + "step": 50950 + }, + { + "epoch": 5.57, + "learning_rate": 3.608043905635649e-05, + "loss": 0.1507, + "step": 51000 + }, + { + "epoch": 5.57, + "eval_cer": 0.0446720343825815, + "eval_loss": 0.26199015974998474, + "eval_runtime": 1265.2148, + "eval_samples_per_second": 5.938, + "eval_steps_per_second": 0.743, + "step": 51000 + }, + { + "epoch": 5.58, + "learning_rate": 3.606678680646571e-05, + "loss": 0.147, + "step": 51050 + }, + { + "epoch": 5.58, + "learning_rate": 3.605313455657492e-05, + "loss": 0.1235, + "step": 51100 + }, + { + "epoch": 5.58, + "eval_cer": 0.04783380839786341, + "eval_loss": 0.26737144589424133, + "eval_runtime": 1274.337, + "eval_samples_per_second": 5.896, + "eval_steps_per_second": 0.738, + "step": 51100 + }, + { + "epoch": 5.59, + "learning_rate": 3.603948230668414e-05, + "loss": 0.1562, + "step": 51150 + }, + { + "epoch": 5.59, + "learning_rate": 3.602583005679336e-05, + "loss": 0.1505, + "step": 51200 + }, + { + "epoch": 5.59, + "eval_cer": 0.045335870052456706, + "eval_loss": 0.2606879472732544, + "eval_runtime": 1267.847, + "eval_samples_per_second": 5.926, + "eval_steps_per_second": 0.741, + "step": 51200 + }, + { + "epoch": 5.6, + "learning_rate": 3.6012177806902584e-05, + "loss": 0.1231, + "step": 51250 + }, + { + "epoch": 5.6, + "learning_rate": 3.59985255570118e-05, + "loss": 0.1292, + "step": 51300 + }, + { + "epoch": 5.6, + "eval_cer": 0.04468914354933086, + "eval_loss": 0.26606661081314087, + "eval_runtime": 1268.994, + "eval_samples_per_second": 5.92, + "eval_steps_per_second": 0.741, + "step": 51300 + }, + { + "epoch": 5.61, + "learning_rate": 3.5984873307121015e-05, + "loss": 0.1312, + "step": 51350 + }, + { + "epoch": 5.61, + "learning_rate": 3.597122105723023e-05, + "loss": 0.1643, + "step": 51400 + }, + { + "epoch": 5.61, + "eval_cer": 0.04536324471925568, + "eval_loss": 0.2628079056739807, + "eval_runtime": 1244.4761, + "eval_samples_per_second": 6.037, + "eval_steps_per_second": 0.755, + "step": 51400 + }, + { + "epoch": 5.62, + "learning_rate": 3.5957568807339454e-05, + "loss": 0.1213, + "step": 51450 + }, + { + "epoch": 5.62, + "learning_rate": 3.594391655744867e-05, + "loss": 0.132, + "step": 51500 + }, + { + "epoch": 5.62, + "eval_cer": 0.04487734438357383, + "eval_loss": 0.26130521297454834, + "eval_runtime": 1244.9819, + "eval_samples_per_second": 6.035, + "eval_steps_per_second": 0.755, + "step": 51500 + }, + { + "epoch": 5.63, + "learning_rate": 3.5930264307557885e-05, + "loss": 0.14, + "step": 51550 + }, + { + "epoch": 5.64, + "learning_rate": 3.59166120576671e-05, + "loss": 0.1372, + "step": 51600 + }, + { + "epoch": 5.64, + "eval_cer": 0.04443592788144032, + "eval_loss": 0.26238834857940674, + "eval_runtime": 1252.4851, + "eval_samples_per_second": 5.998, + "eval_steps_per_second": 0.751, + "step": 51600 + }, + { + "epoch": 5.64, + "learning_rate": 3.5902959807776324e-05, + "loss": 0.1436, + "step": 51650 + }, + { + "epoch": 5.65, + "learning_rate": 3.5889307557885546e-05, + "loss": 0.1445, + "step": 51700 + }, + { + "epoch": 5.65, + "eval_cer": 0.04557539838694776, + "eval_loss": 0.2595983147621155, + "eval_runtime": 1271.1092, + "eval_samples_per_second": 5.911, + "eval_steps_per_second": 0.74, + "step": 51700 + }, + { + "epoch": 5.65, + "learning_rate": 3.587565530799476e-05, + "loss": 0.1417, + "step": 51750 + }, + { + "epoch": 5.66, + "learning_rate": 3.586200305810398e-05, + "loss": 0.1249, + "step": 51800 + }, + { + "epoch": 5.66, + "eval_cer": 0.04567805338744393, + "eval_loss": 0.2596184015274048, + "eval_runtime": 1269.3848, + "eval_samples_per_second": 5.919, + "eval_steps_per_second": 0.741, + "step": 51800 + }, + { + "epoch": 5.66, + "learning_rate": 3.5848350808213194e-05, + "loss": 0.1367, + "step": 51850 + }, + { + "epoch": 5.67, + "learning_rate": 3.5834698558322417e-05, + "loss": 0.1467, + "step": 51900 + }, + { + "epoch": 5.67, + "eval_cer": 0.04477811121642754, + "eval_loss": 0.2584436237812042, + "eval_runtime": 1255.3233, + "eval_samples_per_second": 5.985, + "eval_steps_per_second": 0.749, + "step": 51900 + }, + { + "epoch": 5.67, + "learning_rate": 3.582104630843163e-05, + "loss": 0.1345, + "step": 51950 + }, + { + "epoch": 5.68, + "learning_rate": 3.580739405854085e-05, + "loss": 0.1216, + "step": 52000 + }, + { + "epoch": 5.68, + "eval_cer": 0.044542004715286354, + "eval_loss": 0.26097363233566284, + "eval_runtime": 1262.6332, + "eval_samples_per_second": 5.95, + "eval_steps_per_second": 0.744, + "step": 52000 + }, + { + "epoch": 5.68, + "learning_rate": 3.5793741808650064e-05, + "loss": 0.1363, + "step": 52050 + }, + { + "epoch": 5.69, + "learning_rate": 3.5780089558759287e-05, + "loss": 0.132, + "step": 52100 + }, + { + "epoch": 5.69, + "eval_cer": 0.04648560605801376, + "eval_loss": 0.2578682601451874, + "eval_runtime": 1261.5333, + "eval_samples_per_second": 5.955, + "eval_steps_per_second": 0.745, + "step": 52100 + }, + { + "epoch": 5.7, + "learning_rate": 3.57664373088685e-05, + "loss": 0.1387, + "step": 52150 + }, + { + "epoch": 5.7, + "learning_rate": 3.5752785058977725e-05, + "loss": 0.1331, + "step": 52200 + }, + { + "epoch": 5.7, + "eval_cer": 0.04765929489701993, + "eval_loss": 0.25392112135887146, + "eval_runtime": 1269.6731, + "eval_samples_per_second": 5.917, + "eval_steps_per_second": 0.74, + "step": 52200 + }, + { + "epoch": 5.71, + "learning_rate": 3.573913280908694e-05, + "loss": 0.1492, + "step": 52250 + }, + { + "epoch": 5.71, + "learning_rate": 3.5725480559196157e-05, + "loss": 0.1219, + "step": 52300 + }, + { + "epoch": 5.71, + "eval_cer": 0.044743892882928814, + "eval_loss": 0.2626784145832062, + "eval_runtime": 1270.185, + "eval_samples_per_second": 5.915, + "eval_steps_per_second": 0.74, + "step": 52300 + }, + { + "epoch": 5.72, + "learning_rate": 3.571182830930537e-05, + "loss": 0.1599, + "step": 52350 + }, + { + "epoch": 5.72, + "learning_rate": 3.5698176059414595e-05, + "loss": 0.1252, + "step": 52400 + }, + { + "epoch": 5.72, + "eval_cer": 0.0451271382181145, + "eval_loss": 0.2560093402862549, + "eval_runtime": 1269.5004, + "eval_samples_per_second": 5.918, + "eval_steps_per_second": 0.74, + "step": 52400 + }, + { + "epoch": 5.73, + "learning_rate": 3.568452380952381e-05, + "loss": 0.1291, + "step": 52450 + }, + { + "epoch": 5.73, + "learning_rate": 3.567087155963303e-05, + "loss": 0.1374, + "step": 52500 + }, + { + "epoch": 5.73, + "eval_cer": 0.044836282383375364, + "eval_loss": 0.25710663199424744, + "eval_runtime": 1269.5647, + "eval_samples_per_second": 5.918, + "eval_steps_per_second": 0.74, + "step": 52500 + }, + { + "epoch": 5.74, + "learning_rate": 3.565721930974224e-05, + "loss": 0.1405, + "step": 52550 + }, + { + "epoch": 5.74, + "learning_rate": 3.5643567059851465e-05, + "loss": 0.1355, + "step": 52600 + }, + { + "epoch": 5.74, + "eval_cer": 0.04604761138923012, + "eval_loss": 0.2563580572605133, + "eval_runtime": 1264.2998, + "eval_samples_per_second": 5.942, + "eval_steps_per_second": 0.743, + "step": 52600 + }, + { + "epoch": 5.75, + "learning_rate": 3.562991480996069e-05, + "loss": 0.1349, + "step": 52650 + }, + { + "epoch": 5.76, + "learning_rate": 3.5616262560069903e-05, + "loss": 0.1225, + "step": 52700 + }, + { + "epoch": 5.76, + "eval_cer": 0.04685858589314983, + "eval_loss": 0.2595473527908325, + "eval_runtime": 1271.6988, + "eval_samples_per_second": 5.908, + "eval_steps_per_second": 0.739, + "step": 52700 + }, + { + "epoch": 5.76, + "learning_rate": 3.560261031017912e-05, + "loss": 0.1259, + "step": 52750 + }, + { + "epoch": 5.77, + "learning_rate": 3.5588958060288335e-05, + "loss": 0.1449, + "step": 52800 + }, + { + "epoch": 5.77, + "eval_cer": 0.044747314716278686, + "eval_loss": 0.2548329830169678, + "eval_runtime": 1266.1674, + "eval_samples_per_second": 5.934, + "eval_steps_per_second": 0.742, + "step": 52800 + }, + { + "epoch": 5.77, + "learning_rate": 3.557530581039756e-05, + "loss": 0.1331, + "step": 52850 + }, + { + "epoch": 5.78, + "learning_rate": 3.5561653560506774e-05, + "loss": 0.1509, + "step": 52900 + }, + { + "epoch": 5.78, + "eval_cer": 0.04509976355131552, + "eval_loss": 0.2525275647640228, + "eval_runtime": 1264.0478, + "eval_samples_per_second": 5.944, + "eval_steps_per_second": 0.744, + "step": 52900 + }, + { + "epoch": 5.78, + "learning_rate": 3.554800131061599e-05, + "loss": 0.1198, + "step": 52950 + }, + { + "epoch": 5.79, + "learning_rate": 3.5534349060725205e-05, + "loss": 0.1418, + "step": 53000 + }, + { + "epoch": 5.79, + "eval_cer": 0.04541457221950376, + "eval_loss": 0.2532413899898529, + "eval_runtime": 1261.684, + "eval_samples_per_second": 5.955, + "eval_steps_per_second": 0.745, + "step": 53000 + }, + { + "epoch": 5.79, + "learning_rate": 3.552069681083443e-05, + "loss": 0.1342, + "step": 53050 + }, + { + "epoch": 5.8, + "learning_rate": 3.550704456094365e-05, + "loss": 0.1365, + "step": 53100 + }, + { + "epoch": 5.8, + "eval_cer": 0.04493551555052166, + "eval_loss": 0.2554800808429718, + "eval_runtime": 1248.7086, + "eval_samples_per_second": 6.017, + "eval_steps_per_second": 0.753, + "step": 53100 + }, + { + "epoch": 5.8, + "learning_rate": 3.5493392311052866e-05, + "loss": 0.115, + "step": 53150 + }, + { + "epoch": 5.81, + "learning_rate": 3.547974006116208e-05, + "loss": 0.1405, + "step": 53200 + }, + { + "epoch": 5.81, + "eval_cer": 0.044367491214442875, + "eval_loss": 0.2578386068344116, + "eval_runtime": 1259.4446, + "eval_samples_per_second": 5.965, + "eval_steps_per_second": 0.746, + "step": 53200 + }, + { + "epoch": 5.82, + "learning_rate": 3.54660878112713e-05, + "loss": 0.1349, + "step": 53250 + }, + { + "epoch": 5.82, + "learning_rate": 3.5452435561380514e-05, + "loss": 0.1415, + "step": 53300 + }, + { + "epoch": 5.82, + "eval_cer": 0.048206788232999474, + "eval_loss": 0.2606782019138336, + "eval_runtime": 1285.9663, + "eval_samples_per_second": 5.842, + "eval_steps_per_second": 0.731, + "step": 53300 + }, + { + "epoch": 5.83, + "learning_rate": 3.5438783311489736e-05, + "loss": 0.1643, + "step": 53350 + }, + { + "epoch": 5.83, + "learning_rate": 3.542513106159895e-05, + "loss": 0.122, + "step": 53400 + }, + { + "epoch": 5.83, + "eval_cer": 0.04651640255816261, + "eval_loss": 0.2521412670612335, + "eval_runtime": 1275.8742, + "eval_samples_per_second": 5.889, + "eval_steps_per_second": 0.737, + "step": 53400 + }, + { + "epoch": 5.84, + "learning_rate": 3.541147881170817e-05, + "loss": 0.1179, + "step": 53450 + }, + { + "epoch": 5.84, + "learning_rate": 3.539782656181739e-05, + "loss": 0.136, + "step": 53500 + }, + { + "epoch": 5.84, + "eval_cer": 0.045017639550918594, + "eval_loss": 0.2541681230068207, + "eval_runtime": 1144.0813, + "eval_samples_per_second": 6.567, + "eval_steps_per_second": 0.822, + "step": 53500 + }, + { + "epoch": 5.85, + "learning_rate": 3.538444735692442e-05, + "loss": 0.1342, + "step": 53550 + }, + { + "epoch": 5.85, + "learning_rate": 3.537079510703364e-05, + "loss": 0.1477, + "step": 53600 + }, + { + "epoch": 5.85, + "eval_cer": 0.045428259552903257, + "eval_loss": 0.2599349021911621, + "eval_runtime": 1156.9156, + "eval_samples_per_second": 6.494, + "eval_steps_per_second": 0.813, + "step": 53600 + }, + { + "epoch": 5.86, + "learning_rate": 3.5357142857142864e-05, + "loss": 0.141, + "step": 53650 + }, + { + "epoch": 5.86, + "learning_rate": 3.534349060725208e-05, + "loss": 0.1443, + "step": 53700 + }, + { + "epoch": 5.86, + "eval_cer": 0.044945781050571273, + "eval_loss": 0.2558267116546631, + "eval_runtime": 1247.3648, + "eval_samples_per_second": 6.023, + "eval_steps_per_second": 0.754, + "step": 53700 + }, + { + "epoch": 5.87, + "learning_rate": 3.5329838357361295e-05, + "loss": 0.1352, + "step": 53750 + }, + { + "epoch": 5.88, + "learning_rate": 3.531618610747051e-05, + "loss": 0.1668, + "step": 53800 + }, + { + "epoch": 5.88, + "eval_cer": 0.04712548889443986, + "eval_loss": 0.25672096014022827, + "eval_runtime": 1282.4517, + "eval_samples_per_second": 5.858, + "eval_steps_per_second": 0.733, + "step": 53800 + }, + { + "epoch": 5.88, + "learning_rate": 3.530253385757973e-05, + "loss": 0.1336, + "step": 53850 + }, + { + "epoch": 5.89, + "learning_rate": 3.528888160768895e-05, + "loss": 0.1148, + "step": 53900 + }, + { + "epoch": 5.89, + "eval_cer": 0.044613863215633674, + "eval_loss": 0.25303828716278076, + "eval_runtime": 1256.3406, + "eval_samples_per_second": 5.98, + "eval_steps_per_second": 0.748, + "step": 53900 + }, + { + "epoch": 5.89, + "learning_rate": 3.5275229357798165e-05, + "loss": 0.1592, + "step": 53950 + }, + { + "epoch": 5.9, + "learning_rate": 3.526157710790738e-05, + "loss": 0.1355, + "step": 54000 + }, + { + "epoch": 5.9, + "eval_cer": 0.043782357711614726, + "eval_loss": 0.25274816155433655, + "eval_runtime": 1245.1946, + "eval_samples_per_second": 6.034, + "eval_steps_per_second": 0.755, + "step": 54000 + }, + { + "epoch": 5.9, + "learning_rate": 3.5247924858016604e-05, + "loss": 0.1342, + "step": 54050 + }, + { + "epoch": 5.91, + "learning_rate": 3.5234272608125826e-05, + "loss": 0.13, + "step": 54100 + }, + { + "epoch": 5.91, + "eval_cer": 0.04428878904739581, + "eval_loss": 0.25448983907699585, + "eval_runtime": 1257.6849, + "eval_samples_per_second": 5.974, + "eval_steps_per_second": 0.747, + "step": 54100 + }, + { + "epoch": 5.91, + "learning_rate": 3.522062035823504e-05, + "loss": 0.1447, + "step": 54150 + }, + { + "epoch": 5.92, + "learning_rate": 3.520696810834426e-05, + "loss": 0.1317, + "step": 54200 + }, + { + "epoch": 5.92, + "eval_cer": 0.04340595604312879, + "eval_loss": 0.253248006105423, + "eval_runtime": 1252.06, + "eval_samples_per_second": 6.001, + "eval_steps_per_second": 0.751, + "step": 54200 + }, + { + "epoch": 5.93, + "learning_rate": 3.5193315858453474e-05, + "loss": 0.1265, + "step": 54250 + }, + { + "epoch": 5.93, + "learning_rate": 3.517966360856269e-05, + "loss": 0.1338, + "step": 54300 + }, + { + "epoch": 5.93, + "eval_cer": 0.04605787688927974, + "eval_loss": 0.2523036301136017, + "eval_runtime": 1270.608, + "eval_samples_per_second": 5.913, + "eval_steps_per_second": 0.74, + "step": 54300 + }, + { + "epoch": 5.94, + "learning_rate": 3.516601135867191e-05, + "loss": 0.1246, + "step": 54350 + }, + { + "epoch": 5.94, + "learning_rate": 3.515235910878113e-05, + "loss": 0.1266, + "step": 54400 + }, + { + "epoch": 5.94, + "eval_cer": 0.04314247487518863, + "eval_loss": 0.2512058913707733, + "eval_runtime": 1257.0108, + "eval_samples_per_second": 5.977, + "eval_steps_per_second": 0.748, + "step": 54400 + }, + { + "epoch": 5.95, + "learning_rate": 3.5138706858890344e-05, + "loss": 0.1275, + "step": 54450 + }, + { + "epoch": 5.95, + "learning_rate": 3.5125054608999566e-05, + "loss": 0.1268, + "step": 54500 + }, + { + "epoch": 5.95, + "eval_cer": 0.04472336188282958, + "eval_loss": 0.25320523977279663, + "eval_runtime": 1262.1566, + "eval_samples_per_second": 5.953, + "eval_steps_per_second": 0.745, + "step": 54500 + }, + { + "epoch": 5.96, + "learning_rate": 3.511140235910878e-05, + "loss": 0.1611, + "step": 54550 + }, + { + "epoch": 5.96, + "learning_rate": 3.5097750109218005e-05, + "loss": 0.1215, + "step": 54600 + }, + { + "epoch": 5.96, + "eval_cer": 0.044791798549827025, + "eval_loss": 0.2544041574001312, + "eval_runtime": 1258.3474, + "eval_samples_per_second": 5.971, + "eval_steps_per_second": 0.747, + "step": 54600 + }, + { + "epoch": 5.97, + "learning_rate": 3.508409785932722e-05, + "loss": 0.1343, + "step": 54650 + }, + { + "epoch": 5.97, + "learning_rate": 3.5070445609436437e-05, + "loss": 0.1416, + "step": 54700 + }, + { + "epoch": 5.97, + "eval_cer": 0.04583887955488792, + "eval_loss": 0.24952231347560883, + "eval_runtime": 1266.7565, + "eval_samples_per_second": 5.931, + "eval_steps_per_second": 0.742, + "step": 54700 + }, + { + "epoch": 5.98, + "learning_rate": 3.505679335954565e-05, + "loss": 0.1357, + "step": 54750 + }, + { + "epoch": 5.99, + "learning_rate": 3.504314110965487e-05, + "loss": 0.1256, + "step": 54800 + }, + { + "epoch": 5.99, + "eval_cer": 0.04646507505791453, + "eval_loss": 0.2494717687368393, + "eval_runtime": 1299.795, + "eval_samples_per_second": 5.78, + "eval_steps_per_second": 0.723, + "step": 54800 + }, + { + "epoch": 5.99, + "learning_rate": 3.502948885976409e-05, + "loss": 0.1354, + "step": 54850 + }, + { + "epoch": 6.0, + "learning_rate": 3.5015836609873307e-05, + "loss": 0.1463, + "step": 54900 + }, + { + "epoch": 6.0, + "eval_cer": 0.04498342121741987, + "eval_loss": 0.2538948655128479, + "eval_runtime": 1272.2595, + "eval_samples_per_second": 5.905, + "eval_steps_per_second": 0.739, + "step": 54900 + }, + { + "epoch": 6.0, + "learning_rate": 3.500218435998253e-05, + "loss": 0.1226, + "step": 54950 + }, + { + "epoch": 6.01, + "learning_rate": 3.4988532110091745e-05, + "loss": 0.0819, + "step": 55000 + }, + { + "epoch": 6.01, + "eval_cer": 0.04370365554456767, + "eval_loss": 0.2585710883140564, + "eval_runtime": 1254.3721, + "eval_samples_per_second": 5.989, + "eval_steps_per_second": 0.749, + "step": 55000 + }, + { + "epoch": 6.01, + "learning_rate": 3.497487986020097e-05, + "loss": 0.0823, + "step": 55050 + }, + { + "epoch": 6.02, + "learning_rate": 3.4961227610310183e-05, + "loss": 0.1003, + "step": 55100 + }, + { + "epoch": 6.02, + "eval_cer": 0.044815751383276134, + "eval_loss": 0.2569202780723572, + "eval_runtime": 1267.7723, + "eval_samples_per_second": 5.926, + "eval_steps_per_second": 0.741, + "step": 55100 + }, + { + "epoch": 6.02, + "learning_rate": 3.49475753604194e-05, + "loss": 0.1029, + "step": 55150 + }, + { + "epoch": 6.03, + "learning_rate": 3.4933923110528615e-05, + "loss": 0.0956, + "step": 55200 + }, + { + "epoch": 6.03, + "eval_cer": 0.04651640255816261, + "eval_loss": 0.2601701319217682, + "eval_runtime": 1273.0175, + "eval_samples_per_second": 5.902, + "eval_steps_per_second": 0.738, + "step": 55200 + }, + { + "epoch": 6.03, + "learning_rate": 3.492027086063783e-05, + "loss": 0.0996, + "step": 55250 + }, + { + "epoch": 6.04, + "learning_rate": 3.4906618610747053e-05, + "loss": 0.089, + "step": 55300 + }, + { + "epoch": 6.04, + "eval_cer": 0.04504843605106744, + "eval_loss": 0.2632923126220703, + "eval_runtime": 1249.6925, + "eval_samples_per_second": 6.012, + "eval_steps_per_second": 0.752, + "step": 55300 + }, + { + "epoch": 6.05, + "learning_rate": 3.489296636085627e-05, + "loss": 0.0868, + "step": 55350 + }, + { + "epoch": 6.05, + "learning_rate": 3.487931411096549e-05, + "loss": 0.0898, + "step": 55400 + }, + { + "epoch": 6.05, + "eval_cer": 0.04392265287895949, + "eval_loss": 0.258096843957901, + "eval_runtime": 1258.7235, + "eval_samples_per_second": 5.969, + "eval_steps_per_second": 0.747, + "step": 55400 + }, + { + "epoch": 6.06, + "learning_rate": 3.486566186107471e-05, + "loss": 0.0907, + "step": 55450 + }, + { + "epoch": 6.06, + "learning_rate": 3.4852009611183923e-05, + "loss": 0.1034, + "step": 55500 + }, + { + "epoch": 6.06, + "eval_cer": 0.04375156121146588, + "eval_loss": 0.25894665718078613, + "eval_runtime": 1253.9835, + "eval_samples_per_second": 5.991, + "eval_steps_per_second": 0.75, + "step": 55500 + }, + { + "epoch": 6.07, + "learning_rate": 3.4838357361293146e-05, + "loss": 0.0812, + "step": 55550 + }, + { + "epoch": 6.07, + "learning_rate": 3.482470511140236e-05, + "loss": 0.098, + "step": 55600 + }, + { + "epoch": 6.07, + "eval_cer": 0.044367491214442875, + "eval_loss": 0.2576400339603424, + "eval_runtime": 1270.1032, + "eval_samples_per_second": 5.915, + "eval_steps_per_second": 0.74, + "step": 55600 + }, + { + "epoch": 6.08, + "learning_rate": 3.481105286151158e-05, + "loss": 0.0796, + "step": 55650 + }, + { + "epoch": 6.08, + "learning_rate": 3.4797400611620794e-05, + "loss": 0.1013, + "step": 55700 + }, + { + "epoch": 6.08, + "eval_cer": 0.0437857795449646, + "eval_loss": 0.261007159948349, + "eval_runtime": 1243.8076, + "eval_samples_per_second": 6.04, + "eval_steps_per_second": 0.756, + "step": 55700 + }, + { + "epoch": 6.09, + "learning_rate": 3.4783748361730016e-05, + "loss": 0.0766, + "step": 55750 + }, + { + "epoch": 6.09, + "learning_rate": 3.477009611183923e-05, + "loss": 0.0943, + "step": 55800 + }, + { + "epoch": 6.09, + "eval_cer": 0.04406979171300399, + "eval_loss": 0.2620924413204193, + "eval_runtime": 1266.0942, + "eval_samples_per_second": 5.934, + "eval_steps_per_second": 0.742, + "step": 55800 + }, + { + "epoch": 6.1, + "learning_rate": 3.475644386194845e-05, + "loss": 0.0942, + "step": 55850 + }, + { + "epoch": 6.11, + "learning_rate": 3.474279161205767e-05, + "loss": 0.0958, + "step": 55900 + }, + { + "epoch": 6.11, + "eval_cer": 0.043104834708340034, + "eval_loss": 0.2605457901954651, + "eval_runtime": 1243.8093, + "eval_samples_per_second": 6.04, + "eval_steps_per_second": 0.756, + "step": 55900 + }, + { + "epoch": 6.11, + "learning_rate": 3.4729139362166886e-05, + "loss": 0.1016, + "step": 55950 + }, + { + "epoch": 6.12, + "learning_rate": 3.471548711227611e-05, + "loss": 0.0975, + "step": 56000 + }, + { + "epoch": 6.12, + "eval_cer": 0.04476100204967818, + "eval_loss": 0.261498361825943, + "eval_runtime": 1275.5024, + "eval_samples_per_second": 5.89, + "eval_steps_per_second": 0.737, + "step": 56000 + }, + { + "epoch": 6.12, + "learning_rate": 3.4701834862385325e-05, + "loss": 0.1053, + "step": 56050 + }, + { + "epoch": 6.13, + "learning_rate": 3.468818261249454e-05, + "loss": 0.1016, + "step": 56100 + }, + { + "epoch": 6.13, + "eval_cer": 0.04587994155508639, + "eval_loss": 0.2610916197299957, + "eval_runtime": 1273.2615, + "eval_samples_per_second": 5.901, + "eval_steps_per_second": 0.738, + "step": 56100 + }, + { + "epoch": 6.13, + "learning_rate": 3.4674530362603756e-05, + "loss": 0.1116, + "step": 56150 + }, + { + "epoch": 6.14, + "learning_rate": 3.466087811271297e-05, + "loss": 0.0968, + "step": 56200 + }, + { + "epoch": 6.14, + "eval_cer": 0.044709674549430097, + "eval_loss": 0.2603790760040283, + "eval_runtime": 1265.8881, + "eval_samples_per_second": 5.935, + "eval_steps_per_second": 0.743, + "step": 56200 + }, + { + "epoch": 6.14, + "learning_rate": 3.4647225862822195e-05, + "loss": 0.1051, + "step": 56250 + }, + { + "epoch": 6.15, + "learning_rate": 3.463357361293141e-05, + "loss": 0.0935, + "step": 56300 + }, + { + "epoch": 6.15, + "eval_cer": 0.04556513288689814, + "eval_loss": 0.2579694092273712, + "eval_runtime": 1263.4804, + "eval_samples_per_second": 5.946, + "eval_steps_per_second": 0.744, + "step": 56300 + }, + { + "epoch": 6.15, + "learning_rate": 3.461992136304063e-05, + "loss": 0.0914, + "step": 56350 + }, + { + "epoch": 6.16, + "learning_rate": 3.460626911314985e-05, + "loss": 0.0953, + "step": 56400 + }, + { + "epoch": 6.16, + "eval_cer": 0.047361595395581046, + "eval_loss": 0.2568546533584595, + "eval_runtime": 1283.114, + "eval_samples_per_second": 5.855, + "eval_steps_per_second": 0.733, + "step": 56400 + }, + { + "epoch": 6.17, + "learning_rate": 3.4592616863259065e-05, + "loss": 0.0879, + "step": 56450 + }, + { + "epoch": 6.17, + "learning_rate": 3.457896461336829e-05, + "loss": 0.0939, + "step": 56500 + }, + { + "epoch": 6.17, + "eval_cer": 0.04408005721305361, + "eval_loss": 0.2597881853580475, + "eval_runtime": 1254.7553, + "eval_samples_per_second": 5.988, + "eval_steps_per_second": 0.749, + "step": 56500 + }, + { + "epoch": 6.18, + "learning_rate": 3.45653123634775e-05, + "loss": 0.1095, + "step": 56550 + }, + { + "epoch": 6.18, + "learning_rate": 3.455193315858454e-05, + "loss": 0.0884, + "step": 56600 + }, + { + "epoch": 6.18, + "eval_cer": 0.044097166379802974, + "eval_loss": 0.2602521479129791, + "eval_runtime": 1251.1413, + "eval_samples_per_second": 6.005, + "eval_steps_per_second": 0.751, + "step": 56600 + }, + { + "epoch": 6.19, + "learning_rate": 3.4538280908693754e-05, + "loss": 0.0902, + "step": 56650 + }, + { + "epoch": 6.19, + "learning_rate": 3.452462865880297e-05, + "loss": 0.0902, + "step": 56700 + }, + { + "epoch": 6.19, + "eval_cer": 0.043761826711515496, + "eval_loss": 0.26123595237731934, + "eval_runtime": 1261.7321, + "eval_samples_per_second": 5.955, + "eval_steps_per_second": 0.745, + "step": 56700 + }, + { + "epoch": 6.2, + "learning_rate": 3.4510976408912185e-05, + "loss": 0.0991, + "step": 56750 + }, + { + "epoch": 6.2, + "learning_rate": 3.449732415902141e-05, + "loss": 0.0936, + "step": 56800 + }, + { + "epoch": 6.2, + "eval_cer": 0.04275922954000294, + "eval_loss": 0.2600439190864563, + "eval_runtime": 1244.3219, + "eval_samples_per_second": 6.038, + "eval_steps_per_second": 0.755, + "step": 56800 + }, + { + "epoch": 6.21, + "learning_rate": 3.4483671909130624e-05, + "loss": 0.0899, + "step": 56850 + }, + { + "epoch": 6.21, + "learning_rate": 3.4470019659239846e-05, + "loss": 0.0966, + "step": 56900 + }, + { + "epoch": 6.21, + "eval_cer": 0.042122768536926716, + "eval_loss": 0.25872135162353516, + "eval_runtime": 1250.8633, + "eval_samples_per_second": 6.006, + "eval_steps_per_second": 0.751, + "step": 56900 + }, + { + "epoch": 6.22, + "learning_rate": 3.445636740934906e-05, + "loss": 0.0958, + "step": 56950 + }, + { + "epoch": 6.23, + "learning_rate": 3.4442715159458285e-05, + "loss": 0.0898, + "step": 57000 + }, + { + "epoch": 6.23, + "eval_cer": 0.04389185637881064, + "eval_loss": 0.2622995972633362, + "eval_runtime": 1258.0312, + "eval_samples_per_second": 5.972, + "eval_steps_per_second": 0.747, + "step": 57000 + }, + { + "epoch": 6.23, + "learning_rate": 3.44290629095675e-05, + "loss": 0.1116, + "step": 57050 + }, + { + "epoch": 6.24, + "learning_rate": 3.4415410659676716e-05, + "loss": 0.0959, + "step": 57100 + }, + { + "epoch": 6.24, + "eval_cer": 0.042683949206305756, + "eval_loss": 0.2615799307823181, + "eval_runtime": 1264.8249, + "eval_samples_per_second": 5.94, + "eval_steps_per_second": 0.743, + "step": 57100 + }, + { + "epoch": 6.24, + "learning_rate": 3.440175840978593e-05, + "loss": 0.0954, + "step": 57150 + }, + { + "epoch": 6.25, + "learning_rate": 3.438810615989515e-05, + "loss": 0.1009, + "step": 57200 + }, + { + "epoch": 6.25, + "eval_cer": 0.04379604504501422, + "eval_loss": 0.25624653697013855, + "eval_runtime": 1276.2697, + "eval_samples_per_second": 5.887, + "eval_steps_per_second": 0.737, + "step": 57200 + }, + { + "epoch": 6.25, + "learning_rate": 3.437445391000437e-05, + "loss": 0.0968, + "step": 57250 + }, + { + "epoch": 6.26, + "learning_rate": 3.4360801660113587e-05, + "loss": 0.0981, + "step": 57300 + }, + { + "epoch": 6.26, + "eval_cer": 0.041849021868936936, + "eval_loss": 0.25614550709724426, + "eval_runtime": 1247.1429, + "eval_samples_per_second": 6.024, + "eval_steps_per_second": 0.754, + "step": 57300 + }, + { + "epoch": 6.26, + "learning_rate": 3.434714941022281e-05, + "loss": 0.1056, + "step": 57350 + }, + { + "epoch": 6.27, + "learning_rate": 3.4333497160332025e-05, + "loss": 0.0938, + "step": 57400 + }, + { + "epoch": 6.27, + "eval_cer": 0.04287215004054872, + "eval_loss": 0.2652567923069, + "eval_runtime": 1242.8949, + "eval_samples_per_second": 6.045, + "eval_steps_per_second": 0.756, + "step": 57400 + }, + { + "epoch": 6.27, + "learning_rate": 3.431984491044124e-05, + "loss": 0.1049, + "step": 57450 + }, + { + "epoch": 6.28, + "learning_rate": 3.430619266055046e-05, + "loss": 0.1063, + "step": 57500 + }, + { + "epoch": 6.28, + "eval_cer": 0.04191403670258451, + "eval_loss": 0.2614375054836273, + "eval_runtime": 1265.454, + "eval_samples_per_second": 5.937, + "eval_steps_per_second": 0.743, + "step": 57500 + }, + { + "epoch": 6.29, + "learning_rate": 3.429254041065968e-05, + "loss": 0.0941, + "step": 57550 + }, + { + "epoch": 6.29, + "learning_rate": 3.4278888160768895e-05, + "loss": 0.1112, + "step": 57600 + }, + { + "epoch": 6.29, + "eval_cer": 0.04245810820521419, + "eval_loss": 0.2571166455745697, + "eval_runtime": 1277.9712, + "eval_samples_per_second": 5.879, + "eval_steps_per_second": 0.736, + "step": 57600 + }, + { + "epoch": 6.3, + "learning_rate": 3.426523591087811e-05, + "loss": 0.0947, + "step": 57650 + }, + { + "epoch": 6.3, + "learning_rate": 3.4251583660987327e-05, + "loss": 0.0998, + "step": 57700 + }, + { + "epoch": 6.3, + "eval_cer": 0.041513682200649465, + "eval_loss": 0.26077941060066223, + "eval_runtime": 1245.1251, + "eval_samples_per_second": 6.034, + "eval_steps_per_second": 0.755, + "step": 57700 + }, + { + "epoch": 6.31, + "learning_rate": 3.423793141109655e-05, + "loss": 0.1019, + "step": 57750 + }, + { + "epoch": 6.31, + "learning_rate": 3.422427916120577e-05, + "loss": 0.1096, + "step": 57800 + }, + { + "epoch": 6.31, + "eval_cer": 0.04350518921027508, + "eval_loss": 0.25821661949157715, + "eval_runtime": 1268.0984, + "eval_samples_per_second": 5.925, + "eval_steps_per_second": 0.741, + "step": 57800 + }, + { + "epoch": 6.32, + "learning_rate": 3.421062691131499e-05, + "loss": 0.0893, + "step": 57850 + }, + { + "epoch": 6.32, + "learning_rate": 3.4196974661424203e-05, + "loss": 0.0944, + "step": 57900 + }, + { + "epoch": 6.32, + "eval_cer": 0.04171214853494205, + "eval_loss": 0.2577536404132843, + "eval_runtime": 1243.3201, + "eval_samples_per_second": 6.043, + "eval_steps_per_second": 0.756, + "step": 57900 + }, + { + "epoch": 6.33, + "learning_rate": 3.4183322411533426e-05, + "loss": 0.0963, + "step": 57950 + }, + { + "epoch": 6.33, + "learning_rate": 3.416967016164264e-05, + "loss": 0.0984, + "step": 58000 + }, + { + "epoch": 6.33, + "eval_cer": 0.041961942369482724, + "eval_loss": 0.2591804265975952, + "eval_runtime": 1279.5484, + "eval_samples_per_second": 5.872, + "eval_steps_per_second": 0.735, + "step": 58000 + }, + { + "epoch": 6.34, + "learning_rate": 3.415601791175186e-05, + "loss": 0.097, + "step": 58050 + }, + { + "epoch": 6.35, + "learning_rate": 3.4142365661861073e-05, + "loss": 0.0939, + "step": 58100 + }, + { + "epoch": 6.35, + "eval_cer": 0.042122768536926716, + "eval_loss": 0.2615301012992859, + "eval_runtime": 1263.1636, + "eval_samples_per_second": 5.948, + "eval_steps_per_second": 0.744, + "step": 58100 + }, + { + "epoch": 6.35, + "learning_rate": 3.412871341197029e-05, + "loss": 0.0986, + "step": 58150 + }, + { + "epoch": 6.36, + "learning_rate": 3.411506116207951e-05, + "loss": 0.1071, + "step": 58200 + }, + { + "epoch": 6.36, + "eval_cer": 0.04117834253236199, + "eval_loss": 0.2618873119354248, + "eval_runtime": 1248.1411, + "eval_samples_per_second": 6.019, + "eval_steps_per_second": 0.753, + "step": 58200 + }, + { + "epoch": 6.36, + "learning_rate": 3.4101408912188734e-05, + "loss": 0.1074, + "step": 58250 + }, + { + "epoch": 6.37, + "learning_rate": 3.408775666229795e-05, + "loss": 0.1063, + "step": 58300 + }, + { + "epoch": 6.37, + "eval_cer": 0.04217067420382493, + "eval_loss": 0.26021838188171387, + "eval_runtime": 1253.6554, + "eval_samples_per_second": 5.993, + "eval_steps_per_second": 0.75, + "step": 58300 + }, + { + "epoch": 6.37, + "learning_rate": 3.4074104412407166e-05, + "loss": 0.1066, + "step": 58350 + }, + { + "epoch": 6.38, + "learning_rate": 3.406045216251638e-05, + "loss": 0.1085, + "step": 58400 + }, + { + "epoch": 6.38, + "eval_cer": 0.04305692904144182, + "eval_loss": 0.25534170866012573, + "eval_runtime": 1268.3522, + "eval_samples_per_second": 5.923, + "eval_steps_per_second": 0.741, + "step": 58400 + }, + { + "epoch": 6.38, + "learning_rate": 3.4046799912625605e-05, + "loss": 0.0962, + "step": 58450 + }, + { + "epoch": 6.39, + "learning_rate": 3.403314766273482e-05, + "loss": 0.111, + "step": 58500 + }, + { + "epoch": 6.39, + "eval_cer": 0.04086353386417375, + "eval_loss": 0.2595382034778595, + "eval_runtime": 1225.4425, + "eval_samples_per_second": 6.131, + "eval_steps_per_second": 0.767, + "step": 58500 + }, + { + "epoch": 6.39, + "learning_rate": 3.4019495412844036e-05, + "loss": 0.0946, + "step": 58550 + }, + { + "epoch": 6.4, + "learning_rate": 3.400611620795107e-05, + "loss": 0.1098, + "step": 58600 + }, + { + "epoch": 6.4, + "eval_cer": 0.04325539537573441, + "eval_loss": 0.25475308299064636, + "eval_runtime": 1267.9837, + "eval_samples_per_second": 5.925, + "eval_steps_per_second": 0.741, + "step": 58600 + }, + { + "epoch": 6.41, + "learning_rate": 3.399246395806029e-05, + "loss": 0.1111, + "step": 58650 + }, + { + "epoch": 6.41, + "learning_rate": 3.39788117081695e-05, + "loss": 0.1086, + "step": 58700 + }, + { + "epoch": 6.41, + "eval_cer": 0.0416573992013441, + "eval_loss": 0.2619175314903259, + "eval_runtime": 1229.6208, + "eval_samples_per_second": 6.11, + "eval_steps_per_second": 0.764, + "step": 58700 + }, + { + "epoch": 6.42, + "learning_rate": 3.3965159458278725e-05, + "loss": 0.1006, + "step": 58750 + }, + { + "epoch": 6.42, + "learning_rate": 3.395150720838795e-05, + "loss": 0.1019, + "step": 58800 + }, + { + "epoch": 6.42, + "eval_cer": 0.04108937486526531, + "eval_loss": 0.25999510288238525, + "eval_runtime": 1248.8363, + "eval_samples_per_second": 6.016, + "eval_steps_per_second": 0.753, + "step": 58800 + }, + { + "epoch": 6.43, + "learning_rate": 3.3937854958497164e-05, + "loss": 0.0999, + "step": 58850 + }, + { + "epoch": 6.43, + "learning_rate": 3.392420270860638e-05, + "loss": 0.1032, + "step": 58900 + }, + { + "epoch": 6.43, + "eval_cer": 0.041397339866753806, + "eval_loss": 0.2621907591819763, + "eval_runtime": 1264.5165, + "eval_samples_per_second": 5.941, + "eval_steps_per_second": 0.743, + "step": 58900 + }, + { + "epoch": 6.44, + "learning_rate": 3.3910550458715595e-05, + "loss": 0.1118, + "step": 58950 + }, + { + "epoch": 6.44, + "learning_rate": 3.389689820882482e-05, + "loss": 0.0951, + "step": 59000 + }, + { + "epoch": 6.44, + "eval_cer": 0.04185244370228681, + "eval_loss": 0.2569536566734314, + "eval_runtime": 1232.8244, + "eval_samples_per_second": 6.094, + "eval_steps_per_second": 0.762, + "step": 59000 + }, + { + "epoch": 6.45, + "learning_rate": 3.3883245958934034e-05, + "loss": 0.0999, + "step": 59050 + }, + { + "epoch": 6.45, + "learning_rate": 3.386959370904325e-05, + "loss": 0.0831, + "step": 59100 + }, + { + "epoch": 6.45, + "eval_cer": 0.04179427253533898, + "eval_loss": 0.2573639154434204, + "eval_runtime": 1143.636, + "eval_samples_per_second": 6.569, + "eval_steps_per_second": 0.822, + "step": 59100 + }, + { + "epoch": 6.46, + "learning_rate": 3.3855941459152465e-05, + "loss": 0.1098, + "step": 59150 + }, + { + "epoch": 6.47, + "learning_rate": 3.384228920926169e-05, + "loss": 0.0976, + "step": 59200 + }, + { + "epoch": 6.47, + "eval_cer": 0.04181480353543822, + "eval_loss": 0.257639616727829, + "eval_runtime": 1202.3685, + "eval_samples_per_second": 6.249, + "eval_steps_per_second": 0.782, + "step": 59200 + }, + { + "epoch": 6.47, + "learning_rate": 3.382863695937091e-05, + "loss": 0.0973, + "step": 59250 + }, + { + "epoch": 6.48, + "learning_rate": 3.3814984709480126e-05, + "loss": 0.0994, + "step": 59300 + }, + { + "epoch": 6.48, + "eval_cer": 0.04163002453454512, + "eval_loss": 0.2602895498275757, + "eval_runtime": 1241.769, + "eval_samples_per_second": 6.05, + "eval_steps_per_second": 0.757, + "step": 59300 + }, + { + "epoch": 6.48, + "learning_rate": 3.380133245958934e-05, + "loss": 0.112, + "step": 59350 + }, + { + "epoch": 6.49, + "learning_rate": 3.378768020969856e-05, + "loss": 0.1099, + "step": 59400 + }, + { + "epoch": 6.49, + "eval_cer": 0.042813978873600896, + "eval_loss": 0.2561176121234894, + "eval_runtime": 1262.5271, + "eval_samples_per_second": 5.951, + "eval_steps_per_second": 0.745, + "step": 59400 + }, + { + "epoch": 6.49, + "learning_rate": 3.377402795980778e-05, + "loss": 0.1025, + "step": 59450 + }, + { + "epoch": 6.5, + "learning_rate": 3.3760375709916996e-05, + "loss": 0.1198, + "step": 59500 + }, + { + "epoch": 6.5, + "eval_cer": 0.04231096937116968, + "eval_loss": 0.254476934671402, + "eval_runtime": 1262.5413, + "eval_samples_per_second": 5.951, + "eval_steps_per_second": 0.745, + "step": 59500 + }, + { + "epoch": 6.5, + "learning_rate": 3.374672346002621e-05, + "loss": 0.0967, + "step": 59550 + }, + { + "epoch": 6.51, + "learning_rate": 3.373307121013543e-05, + "loss": 0.0976, + "step": 59600 + }, + { + "epoch": 6.51, + "eval_cer": 0.041267310199458666, + "eval_loss": 0.2605424225330353, + "eval_runtime": 1248.7159, + "eval_samples_per_second": 6.017, + "eval_steps_per_second": 0.753, + "step": 59600 + }, + { + "epoch": 6.51, + "learning_rate": 3.371941896024465e-05, + "loss": 0.0928, + "step": 59650 + }, + { + "epoch": 6.52, + "learning_rate": 3.370576671035387e-05, + "loss": 0.0977, + "step": 59700 + }, + { + "epoch": 6.52, + "eval_cer": 0.04313905304183876, + "eval_loss": 0.2529686689376831, + "eval_runtime": 1261.516, + "eval_samples_per_second": 5.956, + "eval_steps_per_second": 0.745, + "step": 59700 + }, + { + "epoch": 6.53, + "learning_rate": 3.369211446046309e-05, + "loss": 0.104, + "step": 59750 + }, + { + "epoch": 6.53, + "learning_rate": 3.3678462210572305e-05, + "loss": 0.1016, + "step": 59800 + }, + { + "epoch": 6.53, + "eval_cer": 0.0412878411995579, + "eval_loss": 0.25390204787254333, + "eval_runtime": 1246.3223, + "eval_samples_per_second": 6.028, + "eval_steps_per_second": 0.754, + "step": 59800 + }, + { + "epoch": 6.54, + "learning_rate": 3.366480996068152e-05, + "loss": 0.098, + "step": 59850 + }, + { + "epoch": 6.54, + "learning_rate": 3.3651157710790736e-05, + "loss": 0.1009, + "step": 59900 + }, + { + "epoch": 6.54, + "eval_cer": 0.0418661310356863, + "eval_loss": 0.25637781620025635, + "eval_runtime": 1245.1762, + "eval_samples_per_second": 6.034, + "eval_steps_per_second": 0.755, + "step": 59900 + }, + { + "epoch": 6.55, + "learning_rate": 3.363750546089996e-05, + "loss": 0.1131, + "step": 59950 + }, + { + "epoch": 6.55, + "learning_rate": 3.3623853211009175e-05, + "loss": 0.1037, + "step": 60000 + }, + { + "epoch": 6.55, + "eval_cer": 0.04255391953901061, + "eval_loss": 0.25451895594596863, + "eval_runtime": 1255.9082, + "eval_samples_per_second": 5.982, + "eval_steps_per_second": 0.748, + "step": 60000 + }, + { + "epoch": 6.56, + "learning_rate": 3.361020096111839e-05, + "loss": 0.1005, + "step": 60050 + }, + { + "epoch": 6.56, + "learning_rate": 3.359654871122761e-05, + "loss": 0.0934, + "step": 60100 + }, + { + "epoch": 6.56, + "eval_cer": 0.041825069035487834, + "eval_loss": 0.25655898451805115, + "eval_runtime": 1247.2834, + "eval_samples_per_second": 6.023, + "eval_steps_per_second": 0.754, + "step": 60100 + }, + { + "epoch": 6.57, + "learning_rate": 3.358289646133683e-05, + "loss": 0.1122, + "step": 60150 + }, + { + "epoch": 6.57, + "learning_rate": 3.356924421144605e-05, + "loss": 0.1072, + "step": 60200 + }, + { + "epoch": 6.57, + "eval_cer": 0.044210086880348755, + "eval_loss": 0.2543743848800659, + "eval_runtime": 1267.4241, + "eval_samples_per_second": 5.928, + "eval_steps_per_second": 0.742, + "step": 60200 + }, + { + "epoch": 6.58, + "learning_rate": 3.355559196155527e-05, + "loss": 0.0921, + "step": 60250 + }, + { + "epoch": 6.59, + "learning_rate": 3.354193971166448e-05, + "loss": 0.1002, + "step": 60300 + }, + { + "epoch": 6.59, + "eval_cer": 0.041893505702485276, + "eval_loss": 0.2538849711418152, + "eval_runtime": 1265.4609, + "eval_samples_per_second": 5.937, + "eval_steps_per_second": 0.743, + "step": 60300 + }, + { + "epoch": 6.59, + "learning_rate": 3.35282874617737e-05, + "loss": 0.1134, + "step": 60350 + }, + { + "epoch": 6.6, + "learning_rate": 3.351463521188292e-05, + "loss": 0.0978, + "step": 60400 + }, + { + "epoch": 6.6, + "eval_cer": 0.041623180867845375, + "eval_loss": 0.2523655593395233, + "eval_runtime": 1254.741, + "eval_samples_per_second": 5.988, + "eval_steps_per_second": 0.749, + "step": 60400 + }, + { + "epoch": 6.6, + "learning_rate": 3.350098296199214e-05, + "loss": 0.1185, + "step": 60450 + }, + { + "epoch": 6.61, + "learning_rate": 3.3487330712101353e-05, + "loss": 0.0937, + "step": 60500 + }, + { + "epoch": 6.61, + "eval_cer": 0.04146235470040138, + "eval_loss": 0.25337615609169006, + "eval_runtime": 1271.1459, + "eval_samples_per_second": 5.91, + "eval_steps_per_second": 0.739, + "step": 60500 + }, + { + "epoch": 6.61, + "learning_rate": 3.347367846221057e-05, + "loss": 0.1064, + "step": 60550 + }, + { + "epoch": 6.62, + "learning_rate": 3.346002621231979e-05, + "loss": 0.0945, + "step": 60600 + }, + { + "epoch": 6.62, + "eval_cer": 0.041797694368688854, + "eval_loss": 0.25419652462005615, + "eval_runtime": 1257.8593, + "eval_samples_per_second": 5.973, + "eval_steps_per_second": 0.747, + "step": 60600 + }, + { + "epoch": 6.62, + "learning_rate": 3.3446373962429014e-05, + "loss": 0.1099, + "step": 60650 + }, + { + "epoch": 6.63, + "learning_rate": 3.343272171253823e-05, + "loss": 0.1081, + "step": 60700 + }, + { + "epoch": 6.63, + "eval_cer": 0.04154790053414818, + "eval_loss": 0.2513173520565033, + "eval_runtime": 1254.1003, + "eval_samples_per_second": 5.991, + "eval_steps_per_second": 0.75, + "step": 60700 + }, + { + "epoch": 6.63, + "learning_rate": 3.3419069462647446e-05, + "loss": 0.1266, + "step": 60750 + }, + { + "epoch": 6.64, + "learning_rate": 3.340541721275666e-05, + "loss": 0.1107, + "step": 60800 + }, + { + "epoch": 6.64, + "eval_cer": 0.04119545169911135, + "eval_loss": 0.257914274930954, + "eval_runtime": 1252.8755, + "eval_samples_per_second": 5.997, + "eval_steps_per_second": 0.75, + "step": 60800 + }, + { + "epoch": 6.65, + "learning_rate": 3.3391764962865884e-05, + "loss": 0.115, + "step": 60850 + }, + { + "epoch": 6.65, + "learning_rate": 3.33781127129751e-05, + "loss": 0.0888, + "step": 60900 + }, + { + "epoch": 6.65, + "eval_cer": 0.042701058373055115, + "eval_loss": 0.2549211382865906, + "eval_runtime": 1286.7653, + "eval_samples_per_second": 5.839, + "eval_steps_per_second": 0.731, + "step": 60900 + }, + { + "epoch": 6.66, + "learning_rate": 3.3364460463084316e-05, + "loss": 0.1262, + "step": 60950 + }, + { + "epoch": 6.66, + "learning_rate": 3.335080821319353e-05, + "loss": 0.0977, + "step": 61000 + }, + { + "epoch": 6.66, + "eval_cer": 0.04167793020144333, + "eval_loss": 0.25344541668891907, + "eval_runtime": 1267.7275, + "eval_samples_per_second": 5.926, + "eval_steps_per_second": 0.741, + "step": 61000 + }, + { + "epoch": 6.67, + "learning_rate": 3.3337155963302755e-05, + "loss": 0.111, + "step": 61050 + }, + { + "epoch": 6.67, + "learning_rate": 3.332350371341198e-05, + "loss": 0.1139, + "step": 61100 + }, + { + "epoch": 6.67, + "eval_cer": 0.040428961028739975, + "eval_loss": 0.24946226179599762, + "eval_runtime": 1235.1051, + "eval_samples_per_second": 6.083, + "eval_steps_per_second": 0.761, + "step": 61100 + }, + { + "epoch": 6.68, + "learning_rate": 3.330985146352119e-05, + "loss": 0.1222, + "step": 61150 + }, + { + "epoch": 6.68, + "learning_rate": 3.329619921363041e-05, + "loss": 0.1013, + "step": 61200 + }, + { + "epoch": 6.68, + "eval_cer": 0.04048713219568781, + "eval_loss": 0.25472667813301086, + "eval_runtime": 1244.3934, + "eval_samples_per_second": 6.037, + "eval_steps_per_second": 0.755, + "step": 61200 + }, + { + "epoch": 6.69, + "learning_rate": 3.3282546963739625e-05, + "loss": 0.1026, + "step": 61250 + }, + { + "epoch": 6.7, + "learning_rate": 3.326889471384884e-05, + "loss": 0.0975, + "step": 61300 + }, + { + "epoch": 6.7, + "eval_cer": 0.04055556886268525, + "eval_loss": 0.25184065103530884, + "eval_runtime": 1215.7918, + "eval_samples_per_second": 6.18, + "eval_steps_per_second": 0.773, + "step": 61300 + }, + { + "epoch": 6.7, + "learning_rate": 3.325524246395806e-05, + "loss": 0.1175, + "step": 61350 + }, + { + "epoch": 6.71, + "learning_rate": 3.324159021406728e-05, + "loss": 0.0887, + "step": 61400 + }, + { + "epoch": 6.71, + "eval_cer": 0.04122282636591033, + "eval_loss": 0.252297580242157, + "eval_runtime": 1438.8019, + "eval_samples_per_second": 5.222, + "eval_steps_per_second": 0.653, + "step": 61400 + }, + { + "epoch": 6.71, + "learning_rate": 3.3227937964176495e-05, + "loss": 0.1122, + "step": 61450 + }, + { + "epoch": 6.72, + "learning_rate": 3.321428571428572e-05, + "loss": 0.1085, + "step": 61500 + }, + { + "epoch": 6.72, + "eval_cer": 0.039946482526408, + "eval_loss": 0.24887217581272125, + "eval_runtime": 1450.663, + "eval_samples_per_second": 5.179, + "eval_steps_per_second": 0.648, + "step": 61500 + }, + { + "epoch": 6.72, + "learning_rate": 3.320063346439493e-05, + "loss": 0.106, + "step": 61550 + }, + { + "epoch": 6.73, + "learning_rate": 3.3186981214504156e-05, + "loss": 0.0979, + "step": 61600 + }, + { + "epoch": 6.73, + "eval_cer": 0.04282082254030064, + "eval_loss": 0.24803991615772247, + "eval_runtime": 1237.0461, + "eval_samples_per_second": 6.073, + "eval_steps_per_second": 0.76, + "step": 61600 + }, + { + "epoch": 6.73, + "learning_rate": 3.317332896461337e-05, + "loss": 0.1033, + "step": 61650 + }, + { + "epoch": 6.74, + "learning_rate": 3.315967671472259e-05, + "loss": 0.1077, + "step": 61700 + }, + { + "epoch": 6.74, + "eval_cer": 0.04303297620799272, + "eval_loss": 0.2528606653213501, + "eval_runtime": 1154.6782, + "eval_samples_per_second": 6.507, + "eval_steps_per_second": 0.814, + "step": 61700 + }, + { + "epoch": 6.74, + "learning_rate": 3.31460244648318e-05, + "loss": 0.0988, + "step": 61750 + }, + { + "epoch": 6.75, + "learning_rate": 3.3132372214941026e-05, + "loss": 0.1144, + "step": 61800 + }, + { + "epoch": 6.75, + "eval_cer": 0.04078140986377681, + "eval_loss": 0.25002339482307434, + "eval_runtime": 1141.3931, + "eval_samples_per_second": 6.582, + "eval_steps_per_second": 0.824, + "step": 61800 + }, + { + "epoch": 6.76, + "learning_rate": 3.311871996505024e-05, + "loss": 0.1088, + "step": 61850 + }, + { + "epoch": 6.76, + "learning_rate": 3.310506771515946e-05, + "loss": 0.1093, + "step": 61900 + }, + { + "epoch": 6.76, + "eval_cer": 0.04480548588322651, + "eval_loss": 0.255926251411438, + "eval_runtime": 1166.6291, + "eval_samples_per_second": 6.44, + "eval_steps_per_second": 0.806, + "step": 61900 + }, + { + "epoch": 6.77, + "learning_rate": 3.309141546526868e-05, + "loss": 0.096, + "step": 61950 + }, + { + "epoch": 6.77, + "learning_rate": 3.3077763215377896e-05, + "loss": 0.0998, + "step": 62000 + }, + { + "epoch": 6.77, + "eval_cer": 0.042680527372955884, + "eval_loss": 0.2501034438610077, + "eval_runtime": 1157.0123, + "eval_samples_per_second": 6.493, + "eval_steps_per_second": 0.812, + "step": 62000 + }, + { + "epoch": 6.78, + "learning_rate": 3.306411096548712e-05, + "loss": 0.1018, + "step": 62050 + }, + { + "epoch": 6.78, + "learning_rate": 3.3050458715596334e-05, + "loss": 0.1106, + "step": 62100 + }, + { + "epoch": 6.78, + "eval_cer": 0.04183533453553745, + "eval_loss": 0.2515909969806671, + "eval_runtime": 1422.3584, + "eval_samples_per_second": 5.282, + "eval_steps_per_second": 0.661, + "step": 62100 + }, + { + "epoch": 6.79, + "learning_rate": 3.303680646570555e-05, + "loss": 0.1262, + "step": 62150 + }, + { + "epoch": 6.79, + "learning_rate": 3.3023154215814766e-05, + "loss": 0.094, + "step": 62200 + }, + { + "epoch": 6.79, + "eval_cer": 0.042208314370673516, + "eval_loss": 0.25220343470573425, + "eval_runtime": 1461.0595, + "eval_samples_per_second": 5.142, + "eval_steps_per_second": 0.643, + "step": 62200 + }, + { + "epoch": 6.8, + "learning_rate": 3.300950196592398e-05, + "loss": 0.1151, + "step": 62250 + }, + { + "epoch": 6.8, + "learning_rate": 3.2995849716033204e-05, + "loss": 0.1205, + "step": 62300 + }, + { + "epoch": 6.8, + "eval_cer": 0.040049137526904165, + "eval_loss": 0.2456672489643097, + "eval_runtime": 1314.9991, + "eval_samples_per_second": 5.713, + "eval_steps_per_second": 0.715, + "step": 62300 + }, + { + "epoch": 6.81, + "learning_rate": 3.298219746614242e-05, + "loss": 0.1051, + "step": 62350 + }, + { + "epoch": 6.82, + "learning_rate": 3.296854521625164e-05, + "loss": 0.1036, + "step": 62400 + }, + { + "epoch": 6.82, + "eval_cer": 0.042499170205412655, + "eval_loss": 0.2505302429199219, + "eval_runtime": 1428.5485, + "eval_samples_per_second": 5.259, + "eval_steps_per_second": 0.658, + "step": 62400 + }, + { + "epoch": 6.82, + "learning_rate": 3.295489296636086e-05, + "loss": 0.0972, + "step": 62450 + }, + { + "epoch": 6.83, + "learning_rate": 3.2941240716470074e-05, + "loss": 0.1272, + "step": 62500 + }, + { + "epoch": 6.83, + "eval_cer": 0.042047488203229524, + "eval_loss": 0.2512877285480499, + "eval_runtime": 1355.077, + "eval_samples_per_second": 5.544, + "eval_steps_per_second": 0.694, + "step": 62500 + }, + { + "epoch": 6.83, + "learning_rate": 3.29275884665793e-05, + "loss": 0.0955, + "step": 62550 + }, + { + "epoch": 6.84, + "learning_rate": 3.291393621668851e-05, + "loss": 0.1016, + "step": 62600 + }, + { + "epoch": 6.84, + "eval_cer": 0.040839581030724645, + "eval_loss": 0.25099560618400574, + "eval_runtime": 1146.6199, + "eval_samples_per_second": 6.552, + "eval_steps_per_second": 0.82, + "step": 62600 + }, + { + "epoch": 6.84, + "learning_rate": 3.290028396679773e-05, + "loss": 0.106, + "step": 62650 + }, + { + "epoch": 6.85, + "learning_rate": 3.2886631716906944e-05, + "loss": 0.099, + "step": 62700 + }, + { + "epoch": 6.85, + "eval_cer": 0.043180115042037226, + "eval_loss": 0.24449311196804047, + "eval_runtime": 1175.3376, + "eval_samples_per_second": 6.392, + "eval_steps_per_second": 0.8, + "step": 62700 + }, + { + "epoch": 6.85, + "learning_rate": 3.287297946701617e-05, + "loss": 0.1034, + "step": 62750 + }, + { + "epoch": 6.86, + "learning_rate": 3.2859600262123195e-05, + "loss": 0.1019, + "step": 62800 + }, + { + "epoch": 6.86, + "eval_cer": 0.041034625531667354, + "eval_loss": 0.2486434280872345, + "eval_runtime": 1303.8827, + "eval_samples_per_second": 5.762, + "eval_steps_per_second": 0.721, + "step": 62800 + }, + { + "epoch": 6.86, + "learning_rate": 3.284594801223242e-05, + "loss": 0.1046, + "step": 62850 + }, + { + "epoch": 6.87, + "learning_rate": 3.283229576234163e-05, + "loss": 0.1003, + "step": 62900 + }, + { + "epoch": 6.87, + "eval_cer": 0.040189432694248926, + "eval_loss": 0.24836403131484985, + "eval_runtime": 1369.5448, + "eval_samples_per_second": 5.486, + "eval_steps_per_second": 0.686, + "step": 62900 + }, + { + "epoch": 6.88, + "learning_rate": 3.2818643512450856e-05, + "loss": 0.1131, + "step": 62950 + }, + { + "epoch": 6.88, + "learning_rate": 3.280499126256007e-05, + "loss": 0.1166, + "step": 63000 + }, + { + "epoch": 6.88, + "eval_cer": 0.04154105686744844, + "eval_loss": 0.24888207018375397, + "eval_runtime": 1440.5402, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.653, + "step": 63000 + }, + { + "epoch": 6.89, + "learning_rate": 3.2791339012669294e-05, + "loss": 0.1196, + "step": 63050 + }, + { + "epoch": 6.89, + "learning_rate": 3.277768676277851e-05, + "loss": 0.1125, + "step": 63100 + }, + { + "epoch": 6.89, + "eval_cer": 0.04208170653672825, + "eval_loss": 0.24433410167694092, + "eval_runtime": 1218.0166, + "eval_samples_per_second": 6.168, + "eval_steps_per_second": 0.772, + "step": 63100 + }, + { + "epoch": 6.9, + "learning_rate": 3.2764034512887726e-05, + "loss": 0.1071, + "step": 63150 + }, + { + "epoch": 6.9, + "learning_rate": 3.275038226299694e-05, + "loss": 0.1019, + "step": 63200 + }, + { + "epoch": 6.9, + "eval_cer": 0.04413138471330169, + "eval_loss": 0.2512649595737457, + "eval_runtime": 1158.8136, + "eval_samples_per_second": 6.483, + "eval_steps_per_second": 0.811, + "step": 63200 + }, + { + "epoch": 6.91, + "learning_rate": 3.273673001310616e-05, + "loss": 0.1049, + "step": 63250 + }, + { + "epoch": 6.91, + "learning_rate": 3.272307776321538e-05, + "loss": 0.1092, + "step": 63300 + }, + { + "epoch": 6.91, + "eval_cer": 0.04346070537672674, + "eval_loss": 0.25279322266578674, + "eval_runtime": 1145.6556, + "eval_samples_per_second": 6.558, + "eval_steps_per_second": 0.82, + "step": 63300 + }, + { + "epoch": 6.92, + "learning_rate": 3.2709425513324596e-05, + "loss": 0.1084, + "step": 63350 + }, + { + "epoch": 6.92, + "learning_rate": 3.269577326343382e-05, + "loss": 0.1058, + "step": 63400 + }, + { + "epoch": 6.92, + "eval_cer": 0.04239309337156662, + "eval_loss": 0.24921201169490814, + "eval_runtime": 1147.0743, + "eval_samples_per_second": 6.55, + "eval_steps_per_second": 0.819, + "step": 63400 + }, + { + "epoch": 6.93, + "learning_rate": 3.2682121013543034e-05, + "loss": 0.1045, + "step": 63450 + }, + { + "epoch": 6.94, + "learning_rate": 3.266846876365225e-05, + "loss": 0.1019, + "step": 63500 + }, + { + "epoch": 6.94, + "eval_cer": 0.04116123336561263, + "eval_loss": 0.24884535372257233, + "eval_runtime": 1153.8324, + "eval_samples_per_second": 6.511, + "eval_steps_per_second": 0.815, + "step": 63500 + }, + { + "epoch": 6.94, + "learning_rate": 3.265481651376147e-05, + "loss": 0.1261, + "step": 63550 + }, + { + "epoch": 6.95, + "learning_rate": 3.264116426387069e-05, + "loss": 0.0912, + "step": 63600 + }, + { + "epoch": 6.95, + "eval_cer": 0.04406636987965412, + "eval_loss": 0.24965716898441315, + "eval_runtime": 1168.6161, + "eval_samples_per_second": 6.429, + "eval_steps_per_second": 0.804, + "step": 63600 + }, + { + "epoch": 6.95, + "learning_rate": 3.2627512013979904e-05, + "loss": 0.1142, + "step": 63650 + }, + { + "epoch": 6.96, + "learning_rate": 3.261385976408912e-05, + "loss": 0.1138, + "step": 63700 + }, + { + "epoch": 6.96, + "eval_cer": 0.04194483320273336, + "eval_loss": 0.24717257916927338, + "eval_runtime": 1271.4905, + "eval_samples_per_second": 5.909, + "eval_steps_per_second": 0.739, + "step": 63700 + }, + { + "epoch": 6.96, + "learning_rate": 3.2600207514198336e-05, + "loss": 0.1051, + "step": 63750 + }, + { + "epoch": 6.97, + "learning_rate": 3.258655526430756e-05, + "loss": 0.1056, + "step": 63800 + }, + { + "epoch": 6.97, + "eval_cer": 0.040675333029930774, + "eval_loss": 0.24858665466308594, + "eval_runtime": 1147.8949, + "eval_samples_per_second": 6.545, + "eval_steps_per_second": 0.819, + "step": 63800 + }, + { + "epoch": 6.97, + "learning_rate": 3.257290301441678e-05, + "loss": 0.1035, + "step": 63850 + }, + { + "epoch": 6.98, + "learning_rate": 3.2559250764526e-05, + "loss": 0.1128, + "step": 63900 + }, + { + "epoch": 6.98, + "eval_cer": 0.04135285603320547, + "eval_loss": 0.2538721561431885, + "eval_runtime": 1172.9039, + "eval_samples_per_second": 6.405, + "eval_steps_per_second": 0.801, + "step": 63900 + }, + { + "epoch": 6.98, + "learning_rate": 3.254559851463521e-05, + "loss": 0.1054, + "step": 63950 + }, + { + "epoch": 6.99, + "learning_rate": 3.2531946264744436e-05, + "loss": 0.1068, + "step": 64000 + }, + { + "epoch": 6.99, + "eval_cer": 0.043919231045609616, + "eval_loss": 0.24928919970989227, + "eval_runtime": 1456.3353, + "eval_samples_per_second": 5.159, + "eval_steps_per_second": 0.645, + "step": 64000 + }, + { + "epoch": 7.0, + "learning_rate": 3.251829401485365e-05, + "loss": 0.1057, + "step": 64050 + }, + { + "epoch": 7.0, + "learning_rate": 3.250464176496287e-05, + "loss": 0.0955, + "step": 64100 + }, + { + "epoch": 7.0, + "eval_cer": 0.042163830537125184, + "eval_loss": 0.25061729550361633, + "eval_runtime": 1154.3718, + "eval_samples_per_second": 6.508, + "eval_steps_per_second": 0.814, + "step": 64100 + }, + { + "epoch": 7.01, + "learning_rate": 3.249098951507208e-05, + "loss": 0.0608, + "step": 64150 + }, + { + "epoch": 7.01, + "learning_rate": 3.24773372651813e-05, + "loss": 0.0623, + "step": 64200 + }, + { + "epoch": 7.01, + "eval_cer": 0.04060347452958346, + "eval_loss": 0.26202353835105896, + "eval_runtime": 1185.2912, + "eval_samples_per_second": 6.339, + "eval_steps_per_second": 0.793, + "step": 64200 + }, + { + "epoch": 7.02, + "learning_rate": 3.246368501529052e-05, + "loss": 0.0705, + "step": 64250 + }, + { + "epoch": 7.02, + "learning_rate": 3.245003276539974e-05, + "loss": 0.0741, + "step": 64300 + }, + { + "epoch": 7.02, + "eval_cer": 0.0432074897088362, + "eval_loss": 0.2552080750465393, + "eval_runtime": 1154.0385, + "eval_samples_per_second": 6.51, + "eval_steps_per_second": 0.815, + "step": 64300 + }, + { + "epoch": 7.03, + "learning_rate": 3.243638051550896e-05, + "loss": 0.0684, + "step": 64350 + }, + { + "epoch": 7.03, + "learning_rate": 3.2422728265618176e-05, + "loss": 0.0807, + "step": 64400 + }, + { + "epoch": 7.03, + "eval_cer": 0.04506212338446693, + "eval_loss": 0.2651134431362152, + "eval_runtime": 1150.1682, + "eval_samples_per_second": 6.532, + "eval_steps_per_second": 0.817, + "step": 64400 + }, + { + "epoch": 7.04, + "learning_rate": 3.240907601572739e-05, + "loss": 0.0703, + "step": 64450 + }, + { + "epoch": 7.04, + "learning_rate": 3.2395423765836614e-05, + "loss": 0.0725, + "step": 64500 + }, + { + "epoch": 7.04, + "eval_cer": 0.041239935532659686, + "eval_loss": 0.25625482201576233, + "eval_runtime": 1149.5117, + "eval_samples_per_second": 6.536, + "eval_steps_per_second": 0.818, + "step": 64500 + }, + { + "epoch": 7.05, + "learning_rate": 3.238177151594583e-05, + "loss": 0.0605, + "step": 64550 + }, + { + "epoch": 7.06, + "learning_rate": 3.2368119266055046e-05, + "loss": 0.076, + "step": 64600 + }, + { + "epoch": 7.06, + "eval_cer": 0.03970695419191694, + "eval_loss": 0.2562285363674164, + "eval_runtime": 1137.9033, + "eval_samples_per_second": 6.602, + "eval_steps_per_second": 0.826, + "step": 64600 + }, + { + "epoch": 7.06, + "learning_rate": 3.235446701616426e-05, + "loss": 0.067, + "step": 64650 + }, + { + "epoch": 7.07, + "learning_rate": 3.2340814766273484e-05, + "loss": 0.0735, + "step": 64700 + }, + { + "epoch": 7.07, + "eval_cer": 0.040117574193901606, + "eval_loss": 0.25367167592048645, + "eval_runtime": 1145.7217, + "eval_samples_per_second": 6.557, + "eval_steps_per_second": 0.82, + "step": 64700 + }, + { + "epoch": 7.07, + "learning_rate": 3.23271625163827e-05, + "loss": 0.0742, + "step": 64750 + }, + { + "epoch": 7.08, + "learning_rate": 3.231351026649192e-05, + "loss": 0.0679, + "step": 64800 + }, + { + "epoch": 7.08, + "eval_cer": 0.03999438819330621, + "eval_loss": 0.25834035873413086, + "eval_runtime": 1164.3956, + "eval_samples_per_second": 6.452, + "eval_steps_per_second": 0.807, + "step": 64800 + }, + { + "epoch": 7.08, + "learning_rate": 3.229985801660114e-05, + "loss": 0.0664, + "step": 64850 + }, + { + "epoch": 7.09, + "learning_rate": 3.2286205766710354e-05, + "loss": 0.0609, + "step": 64900 + }, + { + "epoch": 7.09, + "eval_cer": 0.0412022953658111, + "eval_loss": 0.2590734362602234, + "eval_runtime": 1161.4118, + "eval_samples_per_second": 6.469, + "eval_steps_per_second": 0.809, + "step": 64900 + }, + { + "epoch": 7.09, + "learning_rate": 3.227255351681958e-05, + "loss": 0.0931, + "step": 64950 + }, + { + "epoch": 7.1, + "learning_rate": 3.225890126692879e-05, + "loss": 0.0559, + "step": 65000 + }, + { + "epoch": 7.1, + "eval_cer": 0.04062058369633282, + "eval_loss": 0.25673454999923706, + "eval_runtime": 1213.4156, + "eval_samples_per_second": 6.192, + "eval_steps_per_second": 0.775, + "step": 65000 + }, + { + "epoch": 7.1, + "learning_rate": 3.224524901703801e-05, + "loss": 0.0676, + "step": 65050 + }, + { + "epoch": 7.11, + "learning_rate": 3.2231596767147224e-05, + "loss": 0.0704, + "step": 65100 + }, + { + "epoch": 7.11, + "eval_cer": 0.04068217669663052, + "eval_loss": 0.25640252232551575, + "eval_runtime": 1150.6735, + "eval_samples_per_second": 6.529, + "eval_steps_per_second": 0.817, + "step": 65100 + }, + { + "epoch": 7.12, + "learning_rate": 3.221794451725644e-05, + "loss": 0.0661, + "step": 65150 + }, + { + "epoch": 7.12, + "learning_rate": 3.220429226736566e-05, + "loss": 0.0771, + "step": 65200 + }, + { + "epoch": 7.12, + "eval_cer": 0.04037078986179215, + "eval_loss": 0.25912144780158997, + "eval_runtime": 1469.693, + "eval_samples_per_second": 5.112, + "eval_steps_per_second": 0.64, + "step": 65200 + }, + { + "epoch": 7.13, + "learning_rate": 3.2190640017474885e-05, + "loss": 0.0632, + "step": 65250 + }, + { + "epoch": 7.13, + "learning_rate": 3.21769877675841e-05, + "loss": 0.0656, + "step": 65300 + }, + { + "epoch": 7.13, + "eval_cer": 0.03920394468948573, + "eval_loss": 0.2582501471042633, + "eval_runtime": 1389.4184, + "eval_samples_per_second": 5.407, + "eval_steps_per_second": 0.677, + "step": 65300 + } + ], + "max_steps": 183120, + "num_train_epochs": 20, + "total_flos": 1.5636429250868596e+21, + "trial_name": null, + "trial_params": null +}