|
{ |
|
"best_metric": 0.46308374404907227, |
|
"best_model_checkpoint": "wav2vec2-xls-r-300m-fula/checkpoint-4000", |
|
"epoch": 5.154639175257732, |
|
"eval_steps": 2000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01288659793814433, |
|
"grad_norm": 10.90772533416748, |
|
"learning_rate": 3.4499999999999996e-06, |
|
"loss": 19.1295, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02577319587628866, |
|
"grad_norm": 12.727262496948242, |
|
"learning_rate": 7.2e-06, |
|
"loss": 16.6655, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03865979381443299, |
|
"grad_norm": 17.578950881958008, |
|
"learning_rate": 1.0949999999999998e-05, |
|
"loss": 13.462, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05154639175257732, |
|
"grad_norm": 14.574068069458008, |
|
"learning_rate": 1.47e-05, |
|
"loss": 7.4496, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06443298969072164, |
|
"grad_norm": 11.906006813049316, |
|
"learning_rate": 1.8449999999999998e-05, |
|
"loss": 6.1341, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07731958762886598, |
|
"grad_norm": 10.743711471557617, |
|
"learning_rate": 2.2199999999999998e-05, |
|
"loss": 5.0479, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09020618556701031, |
|
"grad_norm": 9.327588081359863, |
|
"learning_rate": 2.5949999999999997e-05, |
|
"loss": 4.86, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"grad_norm": 8.118748664855957, |
|
"learning_rate": 2.97e-05, |
|
"loss": 4.3188, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11597938144329897, |
|
"grad_norm": 4.968245983123779, |
|
"learning_rate": 3.345e-05, |
|
"loss": 4.142, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.12886597938144329, |
|
"grad_norm": 3.5491788387298584, |
|
"learning_rate": 3.7199999999999996e-05, |
|
"loss": 3.7329, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14175257731958762, |
|
"grad_norm": 2.228337049484253, |
|
"learning_rate": 4.095e-05, |
|
"loss": 3.5432, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.15463917525773196, |
|
"grad_norm": 1.3472025394439697, |
|
"learning_rate": 4.4699999999999996e-05, |
|
"loss": 3.3229, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16752577319587628, |
|
"grad_norm": 0.8919354677200317, |
|
"learning_rate": 4.845e-05, |
|
"loss": 3.1852, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.18041237113402062, |
|
"grad_norm": 0.9615734815597534, |
|
"learning_rate": 5.2199999999999995e-05, |
|
"loss": 3.0339, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19329896907216496, |
|
"grad_norm": 1.0601171255111694, |
|
"learning_rate": 5.595e-05, |
|
"loss": 2.9307, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.20618556701030927, |
|
"grad_norm": 1.2133561372756958, |
|
"learning_rate": 5.97e-05, |
|
"loss": 2.7395, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2190721649484536, |
|
"grad_norm": 0.922024130821228, |
|
"learning_rate": 6.345e-05, |
|
"loss": 2.5765, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.23195876288659795, |
|
"grad_norm": 1.0528242588043213, |
|
"learning_rate": 6.72e-05, |
|
"loss": 2.2256, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24484536082474226, |
|
"grad_norm": 1.39167058467865, |
|
"learning_rate": 7.094999999999999e-05, |
|
"loss": 1.7335, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.25773195876288657, |
|
"grad_norm": 1.182673692703247, |
|
"learning_rate": 7.47e-05, |
|
"loss": 1.2321, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2706185567010309, |
|
"grad_norm": 0.9412080645561218, |
|
"learning_rate": 7.845e-05, |
|
"loss": 0.9694, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.28350515463917525, |
|
"grad_norm": 1.3718018531799316, |
|
"learning_rate": 8.22e-05, |
|
"loss": 0.9061, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2963917525773196, |
|
"grad_norm": 0.7417821884155273, |
|
"learning_rate": 8.594999999999999e-05, |
|
"loss": 0.8102, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.30927835051546393, |
|
"grad_norm": 0.9690466523170471, |
|
"learning_rate": 8.969999999999998e-05, |
|
"loss": 0.7434, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32216494845360827, |
|
"grad_norm": 0.8293886780738831, |
|
"learning_rate": 9.345e-05, |
|
"loss": 0.7381, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.33505154639175255, |
|
"grad_norm": 0.8486959934234619, |
|
"learning_rate": 9.719999999999999e-05, |
|
"loss": 0.6224, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3479381443298969, |
|
"grad_norm": 0.7055938243865967, |
|
"learning_rate": 0.00010095, |
|
"loss": 0.6925, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.36082474226804123, |
|
"grad_norm": 0.7605512142181396, |
|
"learning_rate": 0.00010469999999999998, |
|
"loss": 0.6136, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37371134020618557, |
|
"grad_norm": 0.6695568561553955, |
|
"learning_rate": 0.00010845, |
|
"loss": 0.6419, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3865979381443299, |
|
"grad_norm": 0.8840702772140503, |
|
"learning_rate": 0.00011219999999999999, |
|
"loss": 0.6667, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39948453608247425, |
|
"grad_norm": 0.7186158299446106, |
|
"learning_rate": 0.00011595, |
|
"loss": 0.6397, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 0.6683000922203064, |
|
"learning_rate": 0.0001197, |
|
"loss": 0.5647, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4252577319587629, |
|
"grad_norm": 0.8537669777870178, |
|
"learning_rate": 0.00012345, |
|
"loss": 0.5624, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4381443298969072, |
|
"grad_norm": 1.0267616510391235, |
|
"learning_rate": 0.00012719999999999997, |
|
"loss": 0.624, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45103092783505155, |
|
"grad_norm": 0.6453070640563965, |
|
"learning_rate": 0.00013094999999999998, |
|
"loss": 0.5539, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4639175257731959, |
|
"grad_norm": 1.0308513641357422, |
|
"learning_rate": 0.0001347, |
|
"loss": 0.5728, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47680412371134023, |
|
"grad_norm": 0.560512125492096, |
|
"learning_rate": 0.00013845, |
|
"loss": 0.5305, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4896907216494845, |
|
"grad_norm": 0.7196568250656128, |
|
"learning_rate": 0.0001422, |
|
"loss": 0.5669, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5025773195876289, |
|
"grad_norm": 0.5675578713417053, |
|
"learning_rate": 0.00014595, |
|
"loss": 0.5093, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"grad_norm": 0.8834163546562195, |
|
"learning_rate": 0.00014969999999999998, |
|
"loss": 0.5757, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5283505154639175, |
|
"grad_norm": 0.5083448886871338, |
|
"learning_rate": 0.00015344999999999996, |
|
"loss": 0.5412, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5412371134020618, |
|
"grad_norm": 0.747595489025116, |
|
"learning_rate": 0.0001572, |
|
"loss": 0.5849, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5541237113402062, |
|
"grad_norm": 0.5015640258789062, |
|
"learning_rate": 0.00016094999999999998, |
|
"loss": 0.5049, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5670103092783505, |
|
"grad_norm": 0.7311388850212097, |
|
"learning_rate": 0.0001647, |
|
"loss": 0.4726, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5798969072164949, |
|
"grad_norm": 0.6942028999328613, |
|
"learning_rate": 0.00016844999999999997, |
|
"loss": 0.4934, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5927835051546392, |
|
"grad_norm": 0.7268182635307312, |
|
"learning_rate": 0.00017219999999999998, |
|
"loss": 0.5074, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6056701030927835, |
|
"grad_norm": 0.5172975063323975, |
|
"learning_rate": 0.00017595, |
|
"loss": 0.5293, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6185567010309279, |
|
"grad_norm": 0.5973320603370667, |
|
"learning_rate": 0.00017969999999999998, |
|
"loss": 0.5018, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6314432989690721, |
|
"grad_norm": 0.6601810455322266, |
|
"learning_rate": 0.00018345, |
|
"loss": 0.4953, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6443298969072165, |
|
"grad_norm": 0.7992896437644958, |
|
"learning_rate": 0.0001872, |
|
"loss": 0.4748, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6572164948453608, |
|
"grad_norm": 0.5774939060211182, |
|
"learning_rate": 0.00019094999999999998, |
|
"loss": 0.4659, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6701030927835051, |
|
"grad_norm": 0.7045702338218689, |
|
"learning_rate": 0.0001947, |
|
"loss": 0.4433, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6829896907216495, |
|
"grad_norm": 0.6067873239517212, |
|
"learning_rate": 0.00019844999999999997, |
|
"loss": 0.4872, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6958762886597938, |
|
"grad_norm": 1.0101662874221802, |
|
"learning_rate": 0.0002022, |
|
"loss": 0.4933, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7087628865979382, |
|
"grad_norm": 0.4532999098300934, |
|
"learning_rate": 0.00020595, |
|
"loss": 0.474, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7216494845360825, |
|
"grad_norm": 0.8769963383674622, |
|
"learning_rate": 0.00020969999999999997, |
|
"loss": 0.4923, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7345360824742269, |
|
"grad_norm": 0.38735705614089966, |
|
"learning_rate": 0.00021344999999999998, |
|
"loss": 0.5358, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7474226804123711, |
|
"grad_norm": 0.5298680663108826, |
|
"learning_rate": 0.00021719999999999997, |
|
"loss": 0.4961, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7603092783505154, |
|
"grad_norm": 0.6393166780471802, |
|
"learning_rate": 0.00022095, |
|
"loss": 0.5057, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.7731958762886598, |
|
"grad_norm": 1.908589243888855, |
|
"learning_rate": 0.0002247, |
|
"loss": 0.5043, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7860824742268041, |
|
"grad_norm": 0.5039921402931213, |
|
"learning_rate": 0.00022844999999999997, |
|
"loss": 0.4767, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7989690721649485, |
|
"grad_norm": 0.5750266909599304, |
|
"learning_rate": 0.00023219999999999998, |
|
"loss": 0.5002, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8118556701030928, |
|
"grad_norm": 0.5906339883804321, |
|
"learning_rate": 0.00023594999999999996, |
|
"loss": 0.4776, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 0.5718595385551453, |
|
"learning_rate": 0.0002397, |
|
"loss": 0.4934, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8376288659793815, |
|
"grad_norm": 0.5280390381813049, |
|
"learning_rate": 0.00024344999999999998, |
|
"loss": 0.4533, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8505154639175257, |
|
"grad_norm": 1.016766905784607, |
|
"learning_rate": 0.0002472, |
|
"loss": 0.4795, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8634020618556701, |
|
"grad_norm": 0.4700835943222046, |
|
"learning_rate": 0.00025095, |
|
"loss": 0.4399, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.8762886597938144, |
|
"grad_norm": 0.7027618885040283, |
|
"learning_rate": 0.00025469999999999996, |
|
"loss": 0.4801, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8891752577319587, |
|
"grad_norm": 0.41857969760894775, |
|
"learning_rate": 0.00025844999999999997, |
|
"loss": 0.4449, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.9020618556701031, |
|
"grad_norm": 0.7206704020500183, |
|
"learning_rate": 0.0002622, |
|
"loss": 0.4648, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9149484536082474, |
|
"grad_norm": 0.485895037651062, |
|
"learning_rate": 0.00026595, |
|
"loss": 0.4598, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.9278350515463918, |
|
"grad_norm": 0.7267158031463623, |
|
"learning_rate": 0.0002697, |
|
"loss": 0.462, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9407216494845361, |
|
"grad_norm": 0.4129009246826172, |
|
"learning_rate": 0.00027344999999999995, |
|
"loss": 0.4585, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9536082474226805, |
|
"grad_norm": 0.6467506885528564, |
|
"learning_rate": 0.0002772, |
|
"loss": 0.4539, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9664948453608248, |
|
"grad_norm": 0.6241980791091919, |
|
"learning_rate": 0.00028094999999999997, |
|
"loss": 0.4917, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.979381443298969, |
|
"grad_norm": 0.817642867565155, |
|
"learning_rate": 0.0002847, |
|
"loss": 0.4337, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9922680412371134, |
|
"grad_norm": 1.18275785446167, |
|
"learning_rate": 0.00028845, |
|
"loss": 0.4591, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.0051546391752577, |
|
"grad_norm": 0.5338313579559326, |
|
"learning_rate": 0.00029219999999999995, |
|
"loss": 0.5187, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.018041237113402, |
|
"grad_norm": 0.40176838636398315, |
|
"learning_rate": 0.00029595, |
|
"loss": 0.4012, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 0.6469115018844604, |
|
"learning_rate": 0.00029969999999999997, |
|
"loss": 0.4752, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"eval_loss": 0.59544438123703, |
|
"eval_runtime": 196.5387, |
|
"eval_samples_per_second": 6.233, |
|
"eval_steps_per_second": 0.784, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0438144329896908, |
|
"grad_norm": 0.6261550188064575, |
|
"learning_rate": 0.00029987722419928825, |
|
"loss": 0.4102, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.056701030927835, |
|
"grad_norm": 0.8379774689674377, |
|
"learning_rate": 0.00029974377224199284, |
|
"loss": 0.4658, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.0695876288659794, |
|
"grad_norm": 0.503239631652832, |
|
"learning_rate": 0.0002996103202846975, |
|
"loss": 0.4167, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.0824742268041236, |
|
"grad_norm": 0.3943336606025696, |
|
"learning_rate": 0.00029947686832740213, |
|
"loss": 0.4686, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0953608247422681, |
|
"grad_norm": 0.4782889783382416, |
|
"learning_rate": 0.0002993434163701067, |
|
"loss": 0.4003, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.1082474226804124, |
|
"grad_norm": 0.37270641326904297, |
|
"learning_rate": 0.00029920996441281137, |
|
"loss": 0.4578, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.1211340206185567, |
|
"grad_norm": 0.5629658699035645, |
|
"learning_rate": 0.000299076512455516, |
|
"loss": 0.3687, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.134020618556701, |
|
"grad_norm": 0.3787396252155304, |
|
"learning_rate": 0.0002989430604982206, |
|
"loss": 0.4322, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1469072164948453, |
|
"grad_norm": 0.6377401947975159, |
|
"learning_rate": 0.00029880960854092526, |
|
"loss": 0.4403, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.1597938144329896, |
|
"grad_norm": 0.37100082635879517, |
|
"learning_rate": 0.00029867615658362985, |
|
"loss": 0.453, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.172680412371134, |
|
"grad_norm": 0.7276923060417175, |
|
"learning_rate": 0.0002985427046263345, |
|
"loss": 0.4145, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.1855670103092784, |
|
"grad_norm": 0.8605408668518066, |
|
"learning_rate": 0.00029840925266903914, |
|
"loss": 0.4539, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1984536082474226, |
|
"grad_norm": 0.45449143648147583, |
|
"learning_rate": 0.00029827580071174373, |
|
"loss": 0.4031, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.211340206185567, |
|
"grad_norm": 0.36433449387550354, |
|
"learning_rate": 0.0002981423487544484, |
|
"loss": 0.4548, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.2242268041237114, |
|
"grad_norm": 0.48323678970336914, |
|
"learning_rate": 0.000298008896797153, |
|
"loss": 0.4236, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 0.6236255764961243, |
|
"learning_rate": 0.0002978754448398576, |
|
"loss": 0.438, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.721978485584259, |
|
"learning_rate": 0.00029774199288256227, |
|
"loss": 0.3803, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.2628865979381443, |
|
"grad_norm": 0.33121246099472046, |
|
"learning_rate": 0.00029760854092526686, |
|
"loss": 0.4759, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2757731958762886, |
|
"grad_norm": 0.6318331360816956, |
|
"learning_rate": 0.0002974750889679715, |
|
"loss": 0.3616, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.2886597938144329, |
|
"grad_norm": 0.30613037943840027, |
|
"learning_rate": 0.0002973416370106761, |
|
"loss": 0.4355, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3015463917525774, |
|
"grad_norm": 0.6804624199867249, |
|
"learning_rate": 0.00029720818505338075, |
|
"loss": 0.4296, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.3144329896907216, |
|
"grad_norm": 0.5847098231315613, |
|
"learning_rate": 0.0002970747330960854, |
|
"loss": 0.4774, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.327319587628866, |
|
"grad_norm": 0.43900150060653687, |
|
"learning_rate": 0.00029694128113879, |
|
"loss": 0.4249, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.3402061855670104, |
|
"grad_norm": 0.26748043298721313, |
|
"learning_rate": 0.00029680782918149463, |
|
"loss": 0.4713, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3530927835051547, |
|
"grad_norm": 0.5893319845199585, |
|
"learning_rate": 0.0002966743772241993, |
|
"loss": 0.3566, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.365979381443299, |
|
"grad_norm": 0.2939490079879761, |
|
"learning_rate": 0.00029654092526690387, |
|
"loss": 0.427, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.3788659793814433, |
|
"grad_norm": 0.6302582025527954, |
|
"learning_rate": 0.0002964074733096085, |
|
"loss": 0.413, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.3917525773195876, |
|
"grad_norm": 0.5481074452400208, |
|
"learning_rate": 0.0002962740213523131, |
|
"loss": 0.5027, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.4046391752577319, |
|
"grad_norm": 0.41078057885169983, |
|
"learning_rate": 0.00029614056939501776, |
|
"loss": 0.3935, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.4175257731958764, |
|
"grad_norm": 0.5955342054367065, |
|
"learning_rate": 0.0002960071174377224, |
|
"loss": 0.4454, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.4304123711340206, |
|
"grad_norm": 0.47366246581077576, |
|
"learning_rate": 0.000295873665480427, |
|
"loss": 0.3861, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.443298969072165, |
|
"grad_norm": 0.5429800748825073, |
|
"learning_rate": 0.00029574021352313164, |
|
"loss": 0.4525, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.4561855670103092, |
|
"grad_norm": 0.5089389681816101, |
|
"learning_rate": 0.00029560676156583623, |
|
"loss": 0.3727, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.4690721649484537, |
|
"grad_norm": 0.3353477716445923, |
|
"learning_rate": 0.0002954733096085409, |
|
"loss": 0.4855, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.481958762886598, |
|
"grad_norm": 0.6160407662391663, |
|
"learning_rate": 0.0002953398576512455, |
|
"loss": 0.3635, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.4948453608247423, |
|
"grad_norm": 0.3237319886684418, |
|
"learning_rate": 0.0002952064056939501, |
|
"loss": 0.4285, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5077319587628866, |
|
"grad_norm": 0.450309693813324, |
|
"learning_rate": 0.00029507295373665477, |
|
"loss": 0.3759, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.5206185567010309, |
|
"grad_norm": 0.3550674319267273, |
|
"learning_rate": 0.0002949395017793594, |
|
"loss": 0.4442, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.5335051546391751, |
|
"grad_norm": 0.3989160656929016, |
|
"learning_rate": 0.000294806049822064, |
|
"loss": 0.4255, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"grad_norm": 0.5774266719818115, |
|
"learning_rate": 0.00029467259786476865, |
|
"loss": 0.4526, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.559278350515464, |
|
"grad_norm": 0.6264383792877197, |
|
"learning_rate": 0.0002945391459074733, |
|
"loss": 0.4097, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.5721649484536082, |
|
"grad_norm": 0.33969295024871826, |
|
"learning_rate": 0.0002944056939501779, |
|
"loss": 0.4241, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.5850515463917527, |
|
"grad_norm": 0.39299994707107544, |
|
"learning_rate": 0.00029427224199288254, |
|
"loss": 0.3778, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.597938144329897, |
|
"grad_norm": 0.42388004064559937, |
|
"learning_rate": 0.0002941387900355872, |
|
"loss": 0.4004, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.6108247422680413, |
|
"grad_norm": 0.9131516814231873, |
|
"learning_rate": 0.0002940053380782918, |
|
"loss": 0.3531, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.6237113402061856, |
|
"grad_norm": 0.296908974647522, |
|
"learning_rate": 0.0002938718861209964, |
|
"loss": 0.4613, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.6365979381443299, |
|
"grad_norm": 0.4583122134208679, |
|
"learning_rate": 0.000293738434163701, |
|
"loss": 0.3833, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 0.35052600502967834, |
|
"learning_rate": 0.00029360498220640566, |
|
"loss": 0.4432, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6623711340206184, |
|
"grad_norm": 0.532720685005188, |
|
"learning_rate": 0.0002934715302491103, |
|
"loss": 0.3635, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.675257731958763, |
|
"grad_norm": 0.3807854652404785, |
|
"learning_rate": 0.0002933380782918149, |
|
"loss": 0.43, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.6881443298969072, |
|
"grad_norm": 0.5288501381874084, |
|
"learning_rate": 0.00029320462633451955, |
|
"loss": 0.3449, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.7010309278350515, |
|
"grad_norm": 0.3881712853908539, |
|
"learning_rate": 0.0002930711743772242, |
|
"loss": 0.4406, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.713917525773196, |
|
"grad_norm": 0.42132484912872314, |
|
"learning_rate": 0.0002929377224199288, |
|
"loss": 0.3888, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.7268041237113403, |
|
"grad_norm": 0.3974430561065674, |
|
"learning_rate": 0.00029280427046263343, |
|
"loss": 0.4265, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.7396907216494846, |
|
"grad_norm": 0.4618494212627411, |
|
"learning_rate": 0.0002926708185053381, |
|
"loss": 0.3537, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.7525773195876289, |
|
"grad_norm": 0.347777783870697, |
|
"learning_rate": 0.00029253736654804267, |
|
"loss": 0.4282, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.7654639175257731, |
|
"grad_norm": 0.5885565280914307, |
|
"learning_rate": 0.0002924039145907473, |
|
"loss": 0.3694, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.7783505154639174, |
|
"grad_norm": 0.5113171935081482, |
|
"learning_rate": 0.00029227046263345197, |
|
"loss": 0.5256, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.7912371134020617, |
|
"grad_norm": 0.7724625468254089, |
|
"learning_rate": 0.00029213701067615656, |
|
"loss": 0.3649, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.8041237113402062, |
|
"grad_norm": 0.44778281450271606, |
|
"learning_rate": 0.0002920035587188612, |
|
"loss": 0.4313, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.8170103092783505, |
|
"grad_norm": 0.8251272439956665, |
|
"learning_rate": 0.00029187010676156585, |
|
"loss": 0.3839, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.829896907216495, |
|
"grad_norm": 0.4858299493789673, |
|
"learning_rate": 0.00029173665480427044, |
|
"loss": 0.4489, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.8427835051546393, |
|
"grad_norm": 0.5172144770622253, |
|
"learning_rate": 0.0002916032028469751, |
|
"loss": 0.409, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.8556701030927836, |
|
"grad_norm": 0.34371522068977356, |
|
"learning_rate": 0.0002914697508896797, |
|
"loss": 0.4365, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8685567010309279, |
|
"grad_norm": 0.5957440137863159, |
|
"learning_rate": 0.00029133629893238433, |
|
"loss": 0.3611, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.8814432989690721, |
|
"grad_norm": 0.3856901526451111, |
|
"learning_rate": 0.000291202846975089, |
|
"loss": 0.451, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.8943298969072164, |
|
"grad_norm": 0.5961311459541321, |
|
"learning_rate": 0.00029106939501779357, |
|
"loss": 0.3934, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.9072164948453607, |
|
"grad_norm": 0.4491939842700958, |
|
"learning_rate": 0.0002909359430604982, |
|
"loss": 0.4466, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.920103092783505, |
|
"grad_norm": 0.41777607798576355, |
|
"learning_rate": 0.0002908024911032028, |
|
"loss": 0.3754, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.9329896907216495, |
|
"grad_norm": 0.42550450563430786, |
|
"learning_rate": 0.00029066903914590745, |
|
"loss": 0.4027, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9458762886597938, |
|
"grad_norm": 0.427433043718338, |
|
"learning_rate": 0.0002905355871886121, |
|
"loss": 0.3603, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.9587628865979383, |
|
"grad_norm": 0.8279537558555603, |
|
"learning_rate": 0.0002904021352313167, |
|
"loss": 0.4098, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.9716494845360826, |
|
"grad_norm": 0.6759387850761414, |
|
"learning_rate": 0.00029026868327402134, |
|
"loss": 0.3756, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.9845360824742269, |
|
"grad_norm": 0.29660704731941223, |
|
"learning_rate": 0.000290135231316726, |
|
"loss": 0.4591, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.9974226804123711, |
|
"grad_norm": 0.4846726655960083, |
|
"learning_rate": 0.0002900017793594306, |
|
"loss": 0.3944, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.0103092783505154, |
|
"grad_norm": 0.4863591492176056, |
|
"learning_rate": 0.0002898683274021352, |
|
"loss": 0.3813, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.0231958762886597, |
|
"grad_norm": 0.37275585532188416, |
|
"learning_rate": 0.0002897348754448398, |
|
"loss": 0.3753, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.036082474226804, |
|
"grad_norm": 0.36628881096839905, |
|
"learning_rate": 0.00028960142348754446, |
|
"loss": 0.403, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.0489690721649483, |
|
"grad_norm": 0.4523802697658539, |
|
"learning_rate": 0.0002894679715302491, |
|
"loss": 0.3601, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 0.4722582697868347, |
|
"learning_rate": 0.0002893345195729537, |
|
"loss": 0.3858, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"eval_loss": 0.46308374404907227, |
|
"eval_runtime": 201.4429, |
|
"eval_samples_per_second": 6.081, |
|
"eval_steps_per_second": 0.764, |
|
"eval_wer": 1.0057142857142858, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0747422680412373, |
|
"grad_norm": 0.5041220784187317, |
|
"learning_rate": 0.00028920106761565835, |
|
"loss": 0.3518, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.0876288659793816, |
|
"grad_norm": 0.48610439896583557, |
|
"learning_rate": 0.00028906761565836294, |
|
"loss": 0.3818, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.100515463917526, |
|
"grad_norm": 0.6834833025932312, |
|
"learning_rate": 0.0002889341637010676, |
|
"loss": 0.3778, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.11340206185567, |
|
"grad_norm": 0.687245786190033, |
|
"learning_rate": 0.00028880071174377224, |
|
"loss": 0.3771, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.1262886597938144, |
|
"grad_norm": 0.706832230091095, |
|
"learning_rate": 0.00028866725978647683, |
|
"loss": 0.3868, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.1391752577319587, |
|
"grad_norm": 0.7049499154090881, |
|
"learning_rate": 0.0002885338078291815, |
|
"loss": 0.3646, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.152061855670103, |
|
"grad_norm": 0.40853607654571533, |
|
"learning_rate": 0.00028840035587188607, |
|
"loss": 0.3235, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.1649484536082473, |
|
"grad_norm": 0.3675331771373749, |
|
"learning_rate": 0.0002882669039145907, |
|
"loss": 0.4079, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.1778350515463916, |
|
"grad_norm": 1.3320142030715942, |
|
"learning_rate": 0.00028813345195729536, |
|
"loss": 0.3031, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.1907216494845363, |
|
"grad_norm": 0.6416336297988892, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 0.4087, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.2036082474226806, |
|
"grad_norm": 0.43675994873046875, |
|
"learning_rate": 0.0002878665480427046, |
|
"loss": 0.3577, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.216494845360825, |
|
"grad_norm": 0.3541963994503021, |
|
"learning_rate": 0.00028773309608540925, |
|
"loss": 0.3706, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.229381443298969, |
|
"grad_norm": 0.4469320476055145, |
|
"learning_rate": 0.00028759964412811384, |
|
"loss": 0.3692, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.2422680412371134, |
|
"grad_norm": 0.4056352376937866, |
|
"learning_rate": 0.0002874661921708185, |
|
"loss": 0.3659, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.2551546391752577, |
|
"grad_norm": 0.3547820746898651, |
|
"learning_rate": 0.0002873327402135231, |
|
"loss": 0.3564, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.268041237113402, |
|
"grad_norm": 0.32645678520202637, |
|
"learning_rate": 0.0002871992882562277, |
|
"loss": 0.3346, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.2809278350515463, |
|
"grad_norm": 0.7449667453765869, |
|
"learning_rate": 0.00028706583629893237, |
|
"loss": 0.3696, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.2938144329896906, |
|
"grad_norm": 0.4612623155117035, |
|
"learning_rate": 0.00028693238434163696, |
|
"loss": 0.3925, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.306701030927835, |
|
"grad_norm": 0.6325463056564331, |
|
"learning_rate": 0.0002867989323843416, |
|
"loss": 0.3512, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.319587628865979, |
|
"grad_norm": 0.29471156001091003, |
|
"learning_rate": 0.0002866654804270462, |
|
"loss": 0.3569, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.332474226804124, |
|
"grad_norm": 1.076217770576477, |
|
"learning_rate": 0.00028653202846975085, |
|
"loss": 0.3375, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.345360824742268, |
|
"grad_norm": 0.614162027835846, |
|
"learning_rate": 0.0002863985765124555, |
|
"loss": 0.4078, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.3582474226804124, |
|
"grad_norm": 0.4514384865760803, |
|
"learning_rate": 0.0002862651245551601, |
|
"loss": 0.3758, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.3711340206185567, |
|
"grad_norm": 0.314336359500885, |
|
"learning_rate": 0.00028613167259786473, |
|
"loss": 0.3765, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.384020618556701, |
|
"grad_norm": 0.5032577514648438, |
|
"learning_rate": 0.0002859982206405694, |
|
"loss": 0.3226, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.3969072164948453, |
|
"grad_norm": 0.6402880549430847, |
|
"learning_rate": 0.000285864768683274, |
|
"loss": 0.3986, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.4097938144329896, |
|
"grad_norm": 0.5510269403457642, |
|
"learning_rate": 0.0002857313167259786, |
|
"loss": 0.3757, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.422680412371134, |
|
"grad_norm": 0.5594025254249573, |
|
"learning_rate": 0.0002855978647686832, |
|
"loss": 0.4297, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.4355670103092786, |
|
"grad_norm": 0.3621445894241333, |
|
"learning_rate": 0.00028546441281138786, |
|
"loss": 0.3336, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.448453608247423, |
|
"grad_norm": 0.5683943033218384, |
|
"learning_rate": 0.0002853309608540925, |
|
"loss": 0.3395, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.461340206185567, |
|
"grad_norm": 0.7661644816398621, |
|
"learning_rate": 0.0002851975088967971, |
|
"loss": 0.377, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.4742268041237114, |
|
"grad_norm": 0.456636905670166, |
|
"learning_rate": 0.00028506405693950175, |
|
"loss": 0.3645, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.4871134020618557, |
|
"grad_norm": 0.49251827597618103, |
|
"learning_rate": 0.0002849306049822064, |
|
"loss": 0.34, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.32308971881866455, |
|
"learning_rate": 0.000284797153024911, |
|
"loss": 0.4092, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.5128865979381443, |
|
"grad_norm": 0.9722476005554199, |
|
"learning_rate": 0.00028466370106761563, |
|
"loss": 0.3651, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.5257731958762886, |
|
"grad_norm": 0.4552549123764038, |
|
"learning_rate": 0.0002845302491103203, |
|
"loss": 0.3837, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.538659793814433, |
|
"grad_norm": 0.4148350656032562, |
|
"learning_rate": 0.00028439679715302487, |
|
"loss": 0.3931, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.551546391752577, |
|
"grad_norm": 0.41644537448883057, |
|
"learning_rate": 0.0002842633451957295, |
|
"loss": 0.3677, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.5644329896907214, |
|
"grad_norm": 0.3989255428314209, |
|
"learning_rate": 0.00028412989323843416, |
|
"loss": 0.3759, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.5773195876288657, |
|
"grad_norm": 0.472091943025589, |
|
"learning_rate": 0.00028399644128113876, |
|
"loss": 0.4008, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.5902061855670104, |
|
"grad_norm": 0.9913691282272339, |
|
"learning_rate": 0.0002838629893238434, |
|
"loss": 0.3555, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.6030927835051547, |
|
"grad_norm": 0.425589382648468, |
|
"learning_rate": 0.000283729537366548, |
|
"loss": 0.3774, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.615979381443299, |
|
"grad_norm": 0.7836791276931763, |
|
"learning_rate": 0.00028359608540925264, |
|
"loss": 0.317, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.6288659793814433, |
|
"grad_norm": 0.7071629166603088, |
|
"learning_rate": 0.0002834626334519573, |
|
"loss": 0.4039, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.6417525773195876, |
|
"grad_norm": 0.5756880640983582, |
|
"learning_rate": 0.0002833291814946619, |
|
"loss": 0.342, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.654639175257732, |
|
"grad_norm": 0.425029993057251, |
|
"learning_rate": 0.0002831957295373665, |
|
"loss": 0.3765, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.667525773195876, |
|
"grad_norm": 0.5212023854255676, |
|
"learning_rate": 0.0002830622775800712, |
|
"loss": 0.3681, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.680412371134021, |
|
"grad_norm": 0.39988288283348083, |
|
"learning_rate": 0.00028292882562277577, |
|
"loss": 0.3525, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.693298969072165, |
|
"grad_norm": 0.31254854798316956, |
|
"learning_rate": 0.0002827953736654804, |
|
"loss": 0.3521, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.7061855670103094, |
|
"grad_norm": 0.5564956665039062, |
|
"learning_rate": 0.00028266192170818506, |
|
"loss": 0.3602, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.7190721649484537, |
|
"grad_norm": 0.45266616344451904, |
|
"learning_rate": 0.00028252846975088965, |
|
"loss": 0.3462, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.731958762886598, |
|
"grad_norm": 0.26552554965019226, |
|
"learning_rate": 0.0002823950177935943, |
|
"loss": 0.3787, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.7448453608247423, |
|
"grad_norm": 0.5850217938423157, |
|
"learning_rate": 0.00028226156583629894, |
|
"loss": 0.3289, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.7577319587628866, |
|
"grad_norm": 0.47871604561805725, |
|
"learning_rate": 0.00028212811387900354, |
|
"loss": 0.4246, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.770618556701031, |
|
"grad_norm": 0.5951977372169495, |
|
"learning_rate": 0.0002819946619217082, |
|
"loss": 0.3326, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.783505154639175, |
|
"grad_norm": 0.5806294083595276, |
|
"learning_rate": 0.0002818612099644128, |
|
"loss": 0.3952, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.7963917525773194, |
|
"grad_norm": 0.6218500733375549, |
|
"learning_rate": 0.0002817277580071174, |
|
"loss": 0.3222, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.8092783505154637, |
|
"grad_norm": 0.38604310154914856, |
|
"learning_rate": 0.00028159430604982207, |
|
"loss": 0.4408, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.822164948453608, |
|
"grad_norm": 2.576910972595215, |
|
"learning_rate": 0.00028146085409252666, |
|
"loss": 0.355, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.8350515463917527, |
|
"grad_norm": 0.28045961260795593, |
|
"learning_rate": 0.0002813274021352313, |
|
"loss": 0.3479, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.847938144329897, |
|
"grad_norm": 0.6178203225135803, |
|
"learning_rate": 0.00028119395017793596, |
|
"loss": 0.3097, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.8608247422680413, |
|
"grad_norm": 0.36241769790649414, |
|
"learning_rate": 0.00028106049822064055, |
|
"loss": 0.3762, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.8737113402061856, |
|
"grad_norm": 0.6459288597106934, |
|
"learning_rate": 0.0002809270462633452, |
|
"loss": 0.3261, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.88659793814433, |
|
"grad_norm": 0.458281546831131, |
|
"learning_rate": 0.0002807935943060498, |
|
"loss": 0.378, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.899484536082474, |
|
"grad_norm": 0.4269348382949829, |
|
"learning_rate": 0.00028066014234875443, |
|
"loss": 0.3483, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.9123711340206184, |
|
"grad_norm": 0.36018800735473633, |
|
"learning_rate": 0.0002805266903914591, |
|
"loss": 0.3592, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.925257731958763, |
|
"grad_norm": 0.6075245141983032, |
|
"learning_rate": 0.00028039323843416367, |
|
"loss": 0.3, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.9381443298969074, |
|
"grad_norm": 0.3082279860973358, |
|
"learning_rate": 0.0002802597864768683, |
|
"loss": 0.4306, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.9510309278350517, |
|
"grad_norm": 0.6344396471977234, |
|
"learning_rate": 0.0002801263345195729, |
|
"loss": 0.3795, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.963917525773196, |
|
"grad_norm": 0.3072182536125183, |
|
"learning_rate": 0.00027999288256227756, |
|
"loss": 0.3425, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.9768041237113403, |
|
"grad_norm": 0.5076513886451721, |
|
"learning_rate": 0.0002798594306049822, |
|
"loss": 0.3214, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.9896907216494846, |
|
"grad_norm": 0.34852054715156555, |
|
"learning_rate": 0.0002797259786476868, |
|
"loss": 0.3678, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.002577319587629, |
|
"grad_norm": 0.36334386467933655, |
|
"learning_rate": 0.00027959252669039144, |
|
"loss": 0.3984, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 3.015463917525773, |
|
"grad_norm": 0.46211138367652893, |
|
"learning_rate": 0.00027945907473309604, |
|
"loss": 0.2912, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 3.0283505154639174, |
|
"grad_norm": 0.3730103373527527, |
|
"learning_rate": 0.0002793256227758007, |
|
"loss": 0.337, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 3.0412371134020617, |
|
"grad_norm": 0.37066757678985596, |
|
"learning_rate": 0.00027919217081850533, |
|
"loss": 0.2959, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.054123711340206, |
|
"grad_norm": 0.5712897181510925, |
|
"learning_rate": 0.0002790587188612099, |
|
"loss": 0.3279, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 3.0670103092783507, |
|
"grad_norm": 0.39846915006637573, |
|
"learning_rate": 0.00027892526690391457, |
|
"loss": 0.3532, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.079896907216495, |
|
"grad_norm": 0.3366047143936157, |
|
"learning_rate": 0.0002787918149466192, |
|
"loss": 0.3486, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 3.0927835051546393, |
|
"grad_norm": 0.5053852796554565, |
|
"learning_rate": 0.0002786583629893238, |
|
"loss": 0.3571, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0927835051546393, |
|
"eval_loss": 0.48639050126075745, |
|
"eval_runtime": 195.6249, |
|
"eval_samples_per_second": 6.262, |
|
"eval_steps_per_second": 0.787, |
|
"eval_wer": 1.0073469387755103, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.1056701030927836, |
|
"grad_norm": 0.35252827405929565, |
|
"learning_rate": 0.00027852491103202845, |
|
"loss": 0.3401, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 3.118556701030928, |
|
"grad_norm": 0.3530094027519226, |
|
"learning_rate": 0.00027839145907473305, |
|
"loss": 0.3094, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.131443298969072, |
|
"grad_norm": 0.9595320224761963, |
|
"learning_rate": 0.0002782580071174377, |
|
"loss": 0.3692, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 3.1443298969072164, |
|
"grad_norm": 0.3585176467895508, |
|
"learning_rate": 0.00027812455516014234, |
|
"loss": 0.3351, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.1572164948453607, |
|
"grad_norm": 0.4432651996612549, |
|
"learning_rate": 0.00027799110320284693, |
|
"loss": 0.3494, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 3.170103092783505, |
|
"grad_norm": 0.5367820858955383, |
|
"learning_rate": 0.0002778576512455516, |
|
"loss": 0.3311, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.1829896907216493, |
|
"grad_norm": 0.3430980443954468, |
|
"learning_rate": 0.00027772419928825617, |
|
"loss": 0.3357, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 3.195876288659794, |
|
"grad_norm": 1.0889408588409424, |
|
"learning_rate": 0.0002775907473309608, |
|
"loss": 0.3056, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.2087628865979383, |
|
"grad_norm": 0.3883308172225952, |
|
"learning_rate": 0.00027745729537366546, |
|
"loss": 0.3251, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 3.2216494845360826, |
|
"grad_norm": 0.41912856698036194, |
|
"learning_rate": 0.00027732384341637006, |
|
"loss": 0.3292, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.234536082474227, |
|
"grad_norm": 0.45084699988365173, |
|
"learning_rate": 0.0002771903914590747, |
|
"loss": 0.3516, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 3.247422680412371, |
|
"grad_norm": 0.4294661581516266, |
|
"learning_rate": 0.00027705693950177935, |
|
"loss": 0.3297, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.2603092783505154, |
|
"grad_norm": 0.25462472438812256, |
|
"learning_rate": 0.00027692348754448394, |
|
"loss": 0.3513, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 3.2731958762886597, |
|
"grad_norm": 0.2990482747554779, |
|
"learning_rate": 0.0002767900355871886, |
|
"loss": 0.3472, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.286082474226804, |
|
"grad_norm": 0.5474823713302612, |
|
"learning_rate": 0.0002766565836298932, |
|
"loss": 0.3404, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 3.2989690721649483, |
|
"grad_norm": 0.31416311860084534, |
|
"learning_rate": 0.00027652313167259783, |
|
"loss": 0.3052, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.3118556701030926, |
|
"grad_norm": 0.6990143656730652, |
|
"learning_rate": 0.0002763896797153025, |
|
"loss": 0.3661, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 3.3247422680412373, |
|
"grad_norm": 0.4336131811141968, |
|
"learning_rate": 0.00027625622775800707, |
|
"loss": 0.2757, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.3376288659793816, |
|
"grad_norm": 0.35333672165870667, |
|
"learning_rate": 0.0002761227758007117, |
|
"loss": 0.3275, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 3.350515463917526, |
|
"grad_norm": 0.7743633985519409, |
|
"learning_rate": 0.0002759893238434163, |
|
"loss": 0.2855, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.36340206185567, |
|
"grad_norm": 0.5316669344902039, |
|
"learning_rate": 0.00027585587188612095, |
|
"loss": 0.3742, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 3.3762886597938144, |
|
"grad_norm": 0.3642534017562866, |
|
"learning_rate": 0.0002757277580071174, |
|
"loss": 0.3676, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.3891752577319587, |
|
"grad_norm": 0.6164928078651428, |
|
"learning_rate": 0.00027559430604982203, |
|
"loss": 0.3658, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 3.402061855670103, |
|
"grad_norm": 0.3114078938961029, |
|
"learning_rate": 0.0002754608540925266, |
|
"loss": 0.2681, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.4149484536082473, |
|
"grad_norm": 0.8610777258872986, |
|
"learning_rate": 0.00027532740213523127, |
|
"loss": 0.3607, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 3.4278350515463916, |
|
"grad_norm": 0.49727797508239746, |
|
"learning_rate": 0.0002751939501779359, |
|
"loss": 0.3306, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.4407216494845363, |
|
"grad_norm": 0.3058234751224518, |
|
"learning_rate": 0.0002750604982206405, |
|
"loss": 0.3313, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 3.4536082474226806, |
|
"grad_norm": 0.5317339301109314, |
|
"learning_rate": 0.00027492704626334516, |
|
"loss": 0.3316, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.466494845360825, |
|
"grad_norm": 0.4639209806919098, |
|
"learning_rate": 0.0002747935943060498, |
|
"loss": 0.3773, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 3.479381443298969, |
|
"grad_norm": 0.28237494826316833, |
|
"learning_rate": 0.0002746601423487544, |
|
"loss": 0.3275, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.4922680412371134, |
|
"grad_norm": 0.44046372175216675, |
|
"learning_rate": 0.00027452669039145904, |
|
"loss": 0.3777, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 3.5051546391752577, |
|
"grad_norm": 0.40542760491371155, |
|
"learning_rate": 0.0002743932384341637, |
|
"loss": 0.3197, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.518041237113402, |
|
"grad_norm": 0.587062418460846, |
|
"learning_rate": 0.0002742597864768683, |
|
"loss": 0.3441, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 3.5309278350515463, |
|
"grad_norm": 0.34003278613090515, |
|
"learning_rate": 0.00027412633451957293, |
|
"loss": 0.3189, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.5438144329896906, |
|
"grad_norm": 0.40320533514022827, |
|
"learning_rate": 0.0002739928825622776, |
|
"loss": 0.3115, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 3.556701030927835, |
|
"grad_norm": 0.34437236189842224, |
|
"learning_rate": 0.00027385943060498217, |
|
"loss": 0.3344, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.569587628865979, |
|
"grad_norm": 0.42826181650161743, |
|
"learning_rate": 0.0002737259786476868, |
|
"loss": 0.3716, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 3.582474226804124, |
|
"grad_norm": 0.4200308620929718, |
|
"learning_rate": 0.00027359252669039146, |
|
"loss": 0.3511, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.595360824742268, |
|
"grad_norm": 0.3897481858730316, |
|
"learning_rate": 0.00027345907473309605, |
|
"loss": 0.3651, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 3.6082474226804124, |
|
"grad_norm": 0.3066980838775635, |
|
"learning_rate": 0.0002733256227758007, |
|
"loss": 0.314, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.6211340206185567, |
|
"grad_norm": 0.39002224802970886, |
|
"learning_rate": 0.0002731921708185053, |
|
"loss": 0.411, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 3.634020618556701, |
|
"grad_norm": 0.8328560590744019, |
|
"learning_rate": 0.00027305871886120994, |
|
"loss": 0.3154, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.6469072164948453, |
|
"grad_norm": 0.43831050395965576, |
|
"learning_rate": 0.0002729252669039146, |
|
"loss": 0.396, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 3.6597938144329896, |
|
"grad_norm": 0.3992260694503784, |
|
"learning_rate": 0.0002727918149466192, |
|
"loss": 0.3142, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.6726804123711343, |
|
"grad_norm": 0.2941119074821472, |
|
"learning_rate": 0.0002726583629893238, |
|
"loss": 0.3527, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 3.6855670103092786, |
|
"grad_norm": 0.4881301522254944, |
|
"learning_rate": 0.00027252491103202847, |
|
"loss": 0.3171, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.698453608247423, |
|
"grad_norm": 0.3898142874240875, |
|
"learning_rate": 0.00027239145907473306, |
|
"loss": 0.3817, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 3.711340206185567, |
|
"grad_norm": 0.31580379605293274, |
|
"learning_rate": 0.0002722580071174377, |
|
"loss": 0.2595, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.7242268041237114, |
|
"grad_norm": 0.3299170136451721, |
|
"learning_rate": 0.00027212455516014236, |
|
"loss": 0.3416, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 3.7371134020618557, |
|
"grad_norm": 0.6964473724365234, |
|
"learning_rate": 0.00027199110320284695, |
|
"loss": 0.2942, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.3565264046192169, |
|
"learning_rate": 0.0002718576512455516, |
|
"loss": 0.3283, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 3.7628865979381443, |
|
"grad_norm": 0.28134214878082275, |
|
"learning_rate": 0.00027172419928825624, |
|
"loss": 0.3203, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.7757731958762886, |
|
"grad_norm": 0.4196174442768097, |
|
"learning_rate": 0.00027159074733096083, |
|
"loss": 0.3396, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 3.788659793814433, |
|
"grad_norm": 0.4770311415195465, |
|
"learning_rate": 0.0002714572953736655, |
|
"loss": 0.3144, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.801546391752577, |
|
"grad_norm": 0.29138994216918945, |
|
"learning_rate": 0.0002713238434163701, |
|
"loss": 0.3403, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 3.8144329896907214, |
|
"grad_norm": 0.47035351395606995, |
|
"learning_rate": 0.0002711903914590747, |
|
"loss": 0.2623, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.8273195876288657, |
|
"grad_norm": 0.32780078053474426, |
|
"learning_rate": 0.00027105693950177937, |
|
"loss": 0.3863, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 3.8402061855670104, |
|
"grad_norm": 0.4419979751110077, |
|
"learning_rate": 0.00027092348754448396, |
|
"loss": 0.3207, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.8530927835051547, |
|
"grad_norm": 0.28167861700057983, |
|
"learning_rate": 0.0002707900355871886, |
|
"loss": 0.3308, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 3.865979381443299, |
|
"grad_norm": 0.3411625921726227, |
|
"learning_rate": 0.00027065658362989325, |
|
"loss": 0.3387, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.8788659793814433, |
|
"grad_norm": 0.31229549646377563, |
|
"learning_rate": 0.00027052313167259785, |
|
"loss": 0.408, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 3.8917525773195876, |
|
"grad_norm": 0.3603403866291046, |
|
"learning_rate": 0.0002703896797153025, |
|
"loss": 0.3259, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.904639175257732, |
|
"grad_norm": 0.2832438051700592, |
|
"learning_rate": 0.0002702562277580071, |
|
"loss": 0.3423, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 3.917525773195876, |
|
"grad_norm": 0.3435526192188263, |
|
"learning_rate": 0.00027012277580071173, |
|
"loss": 0.2835, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.930412371134021, |
|
"grad_norm": 0.3645428419113159, |
|
"learning_rate": 0.0002699893238434164, |
|
"loss": 0.3483, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 3.943298969072165, |
|
"grad_norm": 0.7062532901763916, |
|
"learning_rate": 0.00026985587188612097, |
|
"loss": 0.2586, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.9561855670103094, |
|
"grad_norm": 0.4606476426124573, |
|
"learning_rate": 0.0002697224199288256, |
|
"loss": 0.3939, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 3.9690721649484537, |
|
"grad_norm": 0.4727219045162201, |
|
"learning_rate": 0.0002695889679715302, |
|
"loss": 0.3151, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.981958762886598, |
|
"grad_norm": 0.2774180471897125, |
|
"learning_rate": 0.00026945551601423486, |
|
"loss": 0.3519, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 3.9948453608247423, |
|
"grad_norm": 0.4793704152107239, |
|
"learning_rate": 0.0002693220640569395, |
|
"loss": 0.3023, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 4.007731958762887, |
|
"grad_norm": 0.3699122965335846, |
|
"learning_rate": 0.0002691886120996441, |
|
"loss": 0.3664, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 4.020618556701031, |
|
"grad_norm": 0.6096347570419312, |
|
"learning_rate": 0.00026905516014234874, |
|
"loss": 0.2579, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.033505154639175, |
|
"grad_norm": 0.2851110100746155, |
|
"learning_rate": 0.00026892170818505333, |
|
"loss": 0.3347, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 4.046391752577319, |
|
"grad_norm": 0.5487604141235352, |
|
"learning_rate": 0.000268788256227758, |
|
"loss": 0.2676, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 4.059278350515464, |
|
"grad_norm": 0.29913634061813354, |
|
"learning_rate": 0.0002686548042704626, |
|
"loss": 0.3061, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 4.072164948453608, |
|
"grad_norm": 0.3672349750995636, |
|
"learning_rate": 0.0002685213523131672, |
|
"loss": 0.2632, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.085051546391752, |
|
"grad_norm": 0.5969862341880798, |
|
"learning_rate": 0.00026838790035587187, |
|
"loss": 0.2868, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 4.097938144329897, |
|
"grad_norm": 0.4409433901309967, |
|
"learning_rate": 0.0002682544483985765, |
|
"loss": 0.2907, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 4.110824742268041, |
|
"grad_norm": 0.4578838050365448, |
|
"learning_rate": 0.0002681209964412811, |
|
"loss": 0.3377, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 4.123711340206185, |
|
"grad_norm": 0.302491694688797, |
|
"learning_rate": 0.00026798754448398575, |
|
"loss": 0.2857, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.123711340206185, |
|
"eval_loss": 0.4647158980369568, |
|
"eval_runtime": 189.5598, |
|
"eval_samples_per_second": 6.462, |
|
"eval_steps_per_second": 0.812, |
|
"eval_wer": 1.0008163265306123, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.13659793814433, |
|
"grad_norm": 0.5263584852218628, |
|
"learning_rate": 0.00026785409252669034, |
|
"loss": 0.3246, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 4.149484536082475, |
|
"grad_norm": 0.48449578881263733, |
|
"learning_rate": 0.000267720640569395, |
|
"loss": 0.3163, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 4.162371134020619, |
|
"grad_norm": 0.3020855784416199, |
|
"learning_rate": 0.00026758718861209964, |
|
"loss": 0.3248, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 4.175257731958763, |
|
"grad_norm": 0.3554665148258209, |
|
"learning_rate": 0.00026745373665480423, |
|
"loss": 0.2863, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.188144329896907, |
|
"grad_norm": 0.4055439531803131, |
|
"learning_rate": 0.0002673202846975089, |
|
"loss": 0.3289, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 4.201030927835052, |
|
"grad_norm": 0.6498619914054871, |
|
"learning_rate": 0.00026718683274021347, |
|
"loss": 0.289, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.213917525773196, |
|
"grad_norm": 0.23331980407238007, |
|
"learning_rate": 0.0002670533807829181, |
|
"loss": 0.2744, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 4.22680412371134, |
|
"grad_norm": 0.5142727494239807, |
|
"learning_rate": 0.00026691992882562276, |
|
"loss": 0.3139, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.239690721649485, |
|
"grad_norm": 0.4468501806259155, |
|
"learning_rate": 0.00026678647686832735, |
|
"loss": 0.3392, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 4.252577319587629, |
|
"grad_norm": 0.36652007699012756, |
|
"learning_rate": 0.000266653024911032, |
|
"loss": 0.2499, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.265463917525773, |
|
"grad_norm": 0.3004089891910553, |
|
"learning_rate": 0.0002665195729537366, |
|
"loss": 0.31, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 4.278350515463917, |
|
"grad_norm": 0.4099670946598053, |
|
"learning_rate": 0.00026638612099644124, |
|
"loss": 0.2798, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.291237113402062, |
|
"grad_norm": 0.26817041635513306, |
|
"learning_rate": 0.0002662526690391459, |
|
"loss": 0.3055, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 4.304123711340206, |
|
"grad_norm": 0.4156699478626251, |
|
"learning_rate": 0.0002661192170818505, |
|
"loss": 0.2942, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.31701030927835, |
|
"grad_norm": 2.611400842666626, |
|
"learning_rate": 0.0002659857651245551, |
|
"loss": 0.3659, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 4.329896907216495, |
|
"grad_norm": 0.46959781646728516, |
|
"learning_rate": 0.00026585231316725977, |
|
"loss": 0.2982, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.342783505154639, |
|
"grad_norm": 0.2867629826068878, |
|
"learning_rate": 0.00026571886120996436, |
|
"loss": 0.3511, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 4.355670103092783, |
|
"grad_norm": 0.38449224829673767, |
|
"learning_rate": 0.000265585409252669, |
|
"loss": 0.2826, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.368556701030927, |
|
"grad_norm": 0.4524473249912262, |
|
"learning_rate": 0.0002654519572953736, |
|
"loss": 0.313, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 4.381443298969073, |
|
"grad_norm": 0.3989889919757843, |
|
"learning_rate": 0.00026531850533807825, |
|
"loss": 0.2825, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.394329896907217, |
|
"grad_norm": 0.3007287085056305, |
|
"learning_rate": 0.0002651850533807829, |
|
"loss": 0.3369, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 4.407216494845361, |
|
"grad_norm": 0.7760800123214722, |
|
"learning_rate": 0.0002650516014234875, |
|
"loss": 0.2835, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 4.420103092783505, |
|
"grad_norm": 0.27871614694595337, |
|
"learning_rate": 0.00026491814946619214, |
|
"loss": 0.3172, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 4.43298969072165, |
|
"grad_norm": 0.48607179522514343, |
|
"learning_rate": 0.0002647846975088968, |
|
"loss": 0.303, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.445876288659794, |
|
"grad_norm": 0.3540396988391876, |
|
"learning_rate": 0.0002646512455516014, |
|
"loss": 0.355, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 4.458762886597938, |
|
"grad_norm": 1.2004367113113403, |
|
"learning_rate": 0.000264517793594306, |
|
"loss": 0.3197, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 4.471649484536083, |
|
"grad_norm": 0.2405807226896286, |
|
"learning_rate": 0.00026438434163701067, |
|
"loss": 0.3179, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 4.484536082474227, |
|
"grad_norm": 0.36553826928138733, |
|
"learning_rate": 0.00026425088967971526, |
|
"loss": 0.2793, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.497422680412371, |
|
"grad_norm": 0.26435115933418274, |
|
"learning_rate": 0.0002641174377224199, |
|
"loss": 0.2849, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 4.510309278350515, |
|
"grad_norm": 0.6196132302284241, |
|
"learning_rate": 0.00026398398576512455, |
|
"loss": 0.3267, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 4.52319587628866, |
|
"grad_norm": 0.2987557649612427, |
|
"learning_rate": 0.00026385053380782915, |
|
"loss": 0.3152, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 4.536082474226804, |
|
"grad_norm": 0.4445992112159729, |
|
"learning_rate": 0.0002637170818505338, |
|
"loss": 0.3, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.548969072164948, |
|
"grad_norm": 0.4059930741786957, |
|
"learning_rate": 0.00026358362989323844, |
|
"loss": 0.3395, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 4.561855670103093, |
|
"grad_norm": 0.522637665271759, |
|
"learning_rate": 0.00026345017793594303, |
|
"loss": 0.2783, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 4.574742268041237, |
|
"grad_norm": 0.1963280737400055, |
|
"learning_rate": 0.0002633167259786477, |
|
"loss": 0.2979, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 4.587628865979381, |
|
"grad_norm": 0.7622120976448059, |
|
"learning_rate": 0.00026318327402135227, |
|
"loss": 0.3204, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.600515463917525, |
|
"grad_norm": 0.2749217450618744, |
|
"learning_rate": 0.0002630498220640569, |
|
"loss": 0.3365, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 4.61340206185567, |
|
"grad_norm": 0.5664824843406677, |
|
"learning_rate": 0.00026291637010676156, |
|
"loss": 0.2772, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.626288659793815, |
|
"grad_norm": 0.3661792278289795, |
|
"learning_rate": 0.00026278291814946616, |
|
"loss": 0.3314, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 4.639175257731958, |
|
"grad_norm": 0.37070122361183167, |
|
"learning_rate": 0.0002626494661921708, |
|
"loss": 0.2991, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.652061855670103, |
|
"grad_norm": 0.4963271915912628, |
|
"learning_rate": 0.00026251601423487545, |
|
"loss": 0.3312, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 4.664948453608248, |
|
"grad_norm": 0.5193173885345459, |
|
"learning_rate": 0.00026238256227758004, |
|
"loss": 0.2914, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.677835051546392, |
|
"grad_norm": 0.3158729076385498, |
|
"learning_rate": 0.0002622491103202847, |
|
"loss": 0.3317, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 4.690721649484536, |
|
"grad_norm": 0.37290090322494507, |
|
"learning_rate": 0.00026211565836298934, |
|
"loss": 0.2612, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.703608247422681, |
|
"grad_norm": 0.334089070558548, |
|
"learning_rate": 0.00026198220640569393, |
|
"loss": 0.3235, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 4.716494845360825, |
|
"grad_norm": 0.3259222209453583, |
|
"learning_rate": 0.0002618487544483986, |
|
"loss": 0.2952, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.729381443298969, |
|
"grad_norm": 0.2776556611061096, |
|
"learning_rate": 0.0002617153024911032, |
|
"loss": 0.3405, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 4.742268041237113, |
|
"grad_norm": 0.33884450793266296, |
|
"learning_rate": 0.0002615818505338078, |
|
"loss": 0.2813, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.755154639175258, |
|
"grad_norm": 0.48323071002960205, |
|
"learning_rate": 0.00026144839857651246, |
|
"loss": 0.3044, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 4.768041237113402, |
|
"grad_norm": 0.32996150851249695, |
|
"learning_rate": 0.00026131494661921705, |
|
"loss": 0.2817, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.780927835051546, |
|
"grad_norm": 0.3732437491416931, |
|
"learning_rate": 0.0002611814946619217, |
|
"loss": 0.3165, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 4.793814432989691, |
|
"grad_norm": 0.5148000717163086, |
|
"learning_rate": 0.00026104804270462635, |
|
"loss": 0.2636, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.806701030927835, |
|
"grad_norm": 0.3256881833076477, |
|
"learning_rate": 0.00026091459074733094, |
|
"loss": 0.3677, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 4.819587628865979, |
|
"grad_norm": 0.5248320698738098, |
|
"learning_rate": 0.0002607811387900356, |
|
"loss": 0.2959, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.832474226804123, |
|
"grad_norm": 0.25363728404045105, |
|
"learning_rate": 0.0002606476868327402, |
|
"loss": 0.355, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 4.845360824742268, |
|
"grad_norm": 1.110967755317688, |
|
"learning_rate": 0.0002605142348754448, |
|
"loss": 0.3208, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.858247422680412, |
|
"grad_norm": 0.3510916531085968, |
|
"learning_rate": 0.00026038078291814947, |
|
"loss": 0.3421, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 4.871134020618557, |
|
"grad_norm": 0.33312925696372986, |
|
"learning_rate": 0.00026024733096085406, |
|
"loss": 0.3046, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.8840206185567006, |
|
"grad_norm": 0.32453814148902893, |
|
"learning_rate": 0.0002601138790035587, |
|
"loss": 0.3294, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 4.896907216494846, |
|
"grad_norm": 0.3632301092147827, |
|
"learning_rate": 0.0002599804270462633, |
|
"loss": 0.3261, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.90979381443299, |
|
"grad_norm": 0.3007452189922333, |
|
"learning_rate": 0.00025984697508896795, |
|
"loss": 0.3524, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 4.922680412371134, |
|
"grad_norm": 0.47182586789131165, |
|
"learning_rate": 0.0002597135231316726, |
|
"loss": 0.3287, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.935567010309279, |
|
"grad_norm": 0.3959861397743225, |
|
"learning_rate": 0.0002595800711743772, |
|
"loss": 0.3134, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 4.948453608247423, |
|
"grad_norm": 0.28187814354896545, |
|
"learning_rate": 0.00025944661921708183, |
|
"loss": 0.3154, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.961340206185567, |
|
"grad_norm": 0.32686540484428406, |
|
"learning_rate": 0.0002593131672597865, |
|
"loss": 0.3464, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 4.974226804123711, |
|
"grad_norm": 0.46848079562187195, |
|
"learning_rate": 0.0002591797153024911, |
|
"loss": 0.2753, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.987113402061856, |
|
"grad_norm": 0.25264793634414673, |
|
"learning_rate": 0.0002590462633451957, |
|
"loss": 0.3403, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.4163696765899658, |
|
"learning_rate": 0.0002589128113879003, |
|
"loss": 0.2669, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 5.012886597938144, |
|
"grad_norm": 0.29742294549942017, |
|
"learning_rate": 0.00025877935943060496, |
|
"loss": 0.3203, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 5.025773195876289, |
|
"grad_norm": 0.8651419281959534, |
|
"learning_rate": 0.0002586459074733096, |
|
"loss": 0.2589, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 5.038659793814433, |
|
"grad_norm": 0.2916441261768341, |
|
"learning_rate": 0.0002585124555160142, |
|
"loss": 0.3, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 5.051546391752577, |
|
"grad_norm": 1.4987293481826782, |
|
"learning_rate": 0.00025837900355871884, |
|
"loss": 0.3148, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.064432989690721, |
|
"grad_norm": 0.46689650416374207, |
|
"learning_rate": 0.00025824555160142344, |
|
"loss": 0.2935, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 5.077319587628866, |
|
"grad_norm": 0.3726617991924286, |
|
"learning_rate": 0.0002581120996441281, |
|
"loss": 0.2871, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 5.09020618556701, |
|
"grad_norm": 0.29355180263519287, |
|
"learning_rate": 0.00025797864768683273, |
|
"loss": 0.2422, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 5.103092783505154, |
|
"grad_norm": 0.6249547600746155, |
|
"learning_rate": 0.0002578451957295373, |
|
"loss": 0.2749, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 5.115979381443299, |
|
"grad_norm": 0.42613521218299866, |
|
"learning_rate": 0.00025771174377224197, |
|
"loss": 0.2998, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 5.128865979381443, |
|
"grad_norm": 0.4285449981689453, |
|
"learning_rate": 0.00025757829181494656, |
|
"loss": 0.3241, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 5.141752577319588, |
|
"grad_norm": 0.2756471037864685, |
|
"learning_rate": 0.0002574448398576512, |
|
"loss": 0.2788, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 5.154639175257732, |
|
"grad_norm": 0.650787889957428, |
|
"learning_rate": 0.00025731138790035586, |
|
"loss": 0.2628, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.154639175257732, |
|
"eval_loss": 0.5844400525093079, |
|
"eval_runtime": 196.2011, |
|
"eval_samples_per_second": 6.244, |
|
"eval_steps_per_second": 0.785, |
|
"eval_wer": 1.006530612244898, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.154639175257732, |
|
"step": 10000, |
|
"total_flos": 5.52048529474528e+19, |
|
"train_loss": 0.6429409675121307, |
|
"train_runtime": 62307.8234, |
|
"train_samples_per_second": 22.413, |
|
"train_steps_per_second": 0.934 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 58200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.52048529474528e+19, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|