{ "best_metric": 53.673860298812414, "best_model_checkpoint": "./whisper-medium-te/checkpoint-2000", "epoch": 2.971768202080238, "eval_steps": 1000, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03714710252600297, "grad_norm": 9.450862884521484, "learning_rate": 4.6000000000000004e-07, "loss": 1.1494, "step": 25 }, { "epoch": 0.07429420505200594, "grad_norm": 5.792855262756348, "learning_rate": 9.600000000000001e-07, "loss": 0.8665, "step": 50 }, { "epoch": 0.11144130757800892, "grad_norm": 5.076374530792236, "learning_rate": 1.46e-06, "loss": 0.6443, "step": 75 }, { "epoch": 0.1485884101040119, "grad_norm": 4.388276100158691, "learning_rate": 1.9600000000000003e-06, "loss": 0.5233, "step": 100 }, { "epoch": 0.18573551263001487, "grad_norm": 4.2827301025390625, "learning_rate": 2.46e-06, "loss": 0.4417, "step": 125 }, { "epoch": 0.22288261515601784, "grad_norm": 4.33107328414917, "learning_rate": 2.96e-06, "loss": 0.3846, "step": 150 }, { "epoch": 0.2600297176820208, "grad_norm": 5.062885284423828, "learning_rate": 3.46e-06, "loss": 0.3456, "step": 175 }, { "epoch": 0.2971768202080238, "grad_norm": 3.433645725250244, "learning_rate": 3.96e-06, "loss": 0.3304, "step": 200 }, { "epoch": 0.3343239227340267, "grad_norm": 4.322815418243408, "learning_rate": 4.4600000000000005e-06, "loss": 0.3323, "step": 225 }, { "epoch": 0.37147102526002973, "grad_norm": 4.137588024139404, "learning_rate": 4.960000000000001e-06, "loss": 0.3087, "step": 250 }, { "epoch": 0.4086181277860327, "grad_norm": 3.188852071762085, "learning_rate": 5.460000000000001e-06, "loss": 0.2858, "step": 275 }, { "epoch": 0.4457652303120357, "grad_norm": 3.801391839981079, "learning_rate": 5.9600000000000005e-06, "loss": 0.2804, "step": 300 }, { "epoch": 0.48291233283803864, "grad_norm": 3.4735641479492188, "learning_rate": 6.460000000000001e-06, "loss": 0.2746, "step": 325 }, { "epoch": 0.5200594353640416, "grad_norm": 4.813545227050781, "learning_rate": 6.96e-06, "loss": 0.278, "step": 350 }, { "epoch": 0.5572065378900446, "grad_norm": 3.2244017124176025, "learning_rate": 7.4600000000000006e-06, "loss": 0.2655, "step": 375 }, { "epoch": 0.5943536404160475, "grad_norm": 3.476280689239502, "learning_rate": 7.960000000000002e-06, "loss": 0.2571, "step": 400 }, { "epoch": 0.6315007429420505, "grad_norm": 3.5403590202331543, "learning_rate": 8.46e-06, "loss": 0.2522, "step": 425 }, { "epoch": 0.6686478454680534, "grad_norm": 4.525882244110107, "learning_rate": 8.96e-06, "loss": 0.2473, "step": 450 }, { "epoch": 0.7057949479940565, "grad_norm": 4.107092380523682, "learning_rate": 9.460000000000001e-06, "loss": 0.2519, "step": 475 }, { "epoch": 0.7429420505200595, "grad_norm": 3.5582709312438965, "learning_rate": 9.960000000000001e-06, "loss": 0.2508, "step": 500 }, { "epoch": 0.7800891530460624, "grad_norm": 2.6124589443206787, "learning_rate": 9.984137931034483e-06, "loss": 0.2437, "step": 525 }, { "epoch": 0.8172362555720654, "grad_norm": 3.1363003253936768, "learning_rate": 9.96689655172414e-06, "loss": 0.241, "step": 550 }, { "epoch": 0.8543833580980683, "grad_norm": 2.3347408771514893, "learning_rate": 9.949655172413793e-06, "loss": 0.2369, "step": 575 }, { "epoch": 0.8915304606240714, "grad_norm": 2.609605312347412, "learning_rate": 9.93241379310345e-06, "loss": 0.2204, "step": 600 }, { "epoch": 0.9286775631500743, "grad_norm": 3.050915479660034, "learning_rate": 9.915172413793104e-06, "loss": 0.2183, "step": 625 }, { "epoch": 0.9658246656760773, "grad_norm": 2.9425406455993652, "learning_rate": 9.897931034482759e-06, "loss": 0.2321, "step": 650 }, { "epoch": 1.0029717682020802, "grad_norm": 3.074657917022705, "learning_rate": 9.880689655172414e-06, "loss": 0.2241, "step": 675 }, { "epoch": 1.0401188707280833, "grad_norm": 2.8157992362976074, "learning_rate": 9.86344827586207e-06, "loss": 0.1898, "step": 700 }, { "epoch": 1.0772659732540861, "grad_norm": 2.4690845012664795, "learning_rate": 9.846206896551725e-06, "loss": 0.1857, "step": 725 }, { "epoch": 1.1144130757800892, "grad_norm": 2.4655447006225586, "learning_rate": 9.82896551724138e-06, "loss": 0.1743, "step": 750 }, { "epoch": 1.151560178306092, "grad_norm": 2.103703498840332, "learning_rate": 9.811724137931035e-06, "loss": 0.1825, "step": 775 }, { "epoch": 1.188707280832095, "grad_norm": 2.727170467376709, "learning_rate": 9.79448275862069e-06, "loss": 0.1861, "step": 800 }, { "epoch": 1.2258543833580982, "grad_norm": 2.1180167198181152, "learning_rate": 9.777241379310347e-06, "loss": 0.179, "step": 825 }, { "epoch": 1.263001485884101, "grad_norm": 2.076005697250366, "learning_rate": 9.760000000000001e-06, "loss": 0.1854, "step": 850 }, { "epoch": 1.300148588410104, "grad_norm": 1.8460164070129395, "learning_rate": 9.742758620689656e-06, "loss": 0.1831, "step": 875 }, { "epoch": 1.3372956909361071, "grad_norm": 4.341026306152344, "learning_rate": 9.725517241379311e-06, "loss": 0.177, "step": 900 }, { "epoch": 1.37444279346211, "grad_norm": 2.86643123626709, "learning_rate": 9.708275862068966e-06, "loss": 0.1853, "step": 925 }, { "epoch": 1.4115898959881128, "grad_norm": 2.4118528366088867, "learning_rate": 9.691034482758621e-06, "loss": 0.1741, "step": 950 }, { "epoch": 1.4487369985141159, "grad_norm": 2.506206512451172, "learning_rate": 9.673793103448277e-06, "loss": 0.1713, "step": 975 }, { "epoch": 1.485884101040119, "grad_norm": 2.32373309135437, "learning_rate": 9.65655172413793e-06, "loss": 0.1662, "step": 1000 }, { "epoch": 1.485884101040119, "eval_loss": 0.2365463376045227, "eval_runtime": 3136.2768, "eval_samples_per_second": 0.863, "eval_steps_per_second": 0.108, "eval_wer": 55.854935512705914, "step": 1000 }, { "epoch": 1.5230312035661218, "grad_norm": 2.9691216945648193, "learning_rate": 9.639310344827587e-06, "loss": 0.1792, "step": 1025 }, { "epoch": 1.5601783060921248, "grad_norm": 2.158869981765747, "learning_rate": 9.622068965517242e-06, "loss": 0.1654, "step": 1050 }, { "epoch": 1.5973254086181279, "grad_norm": 2.132694721221924, "learning_rate": 9.604827586206897e-06, "loss": 0.1805, "step": 1075 }, { "epoch": 1.6344725111441307, "grad_norm": 2.4110560417175293, "learning_rate": 9.587586206896554e-06, "loss": 0.1688, "step": 1100 }, { "epoch": 1.6716196136701336, "grad_norm": 3.177272319793701, "learning_rate": 9.570344827586208e-06, "loss": 0.1746, "step": 1125 }, { "epoch": 1.7087667161961368, "grad_norm": 2.058962821960449, "learning_rate": 9.553103448275863e-06, "loss": 0.1695, "step": 1150 }, { "epoch": 1.7459138187221397, "grad_norm": 2.6047332286834717, "learning_rate": 9.535862068965518e-06, "loss": 0.1767, "step": 1175 }, { "epoch": 1.7830609212481425, "grad_norm": 2.676912546157837, "learning_rate": 9.518620689655173e-06, "loss": 0.186, "step": 1200 }, { "epoch": 1.8202080237741456, "grad_norm": 2.346376895904541, "learning_rate": 9.501379310344828e-06, "loss": 0.1832, "step": 1225 }, { "epoch": 1.8573551263001487, "grad_norm": 2.172449827194214, "learning_rate": 9.484137931034484e-06, "loss": 0.1791, "step": 1250 }, { "epoch": 1.8945022288261515, "grad_norm": 2.105217933654785, "learning_rate": 9.46689655172414e-06, "loss": 0.1771, "step": 1275 }, { "epoch": 1.9316493313521546, "grad_norm": 2.7176296710968018, "learning_rate": 9.449655172413794e-06, "loss": 0.1711, "step": 1300 }, { "epoch": 1.9687964338781576, "grad_norm": 2.633023262023926, "learning_rate": 9.432413793103449e-06, "loss": 0.1717, "step": 1325 }, { "epoch": 2.0059435364041605, "grad_norm": 1.6349059343338013, "learning_rate": 9.415172413793104e-06, "loss": 0.1649, "step": 1350 }, { "epoch": 2.0430906389301633, "grad_norm": 2.0442230701446533, "learning_rate": 9.397931034482759e-06, "loss": 0.1204, "step": 1375 }, { "epoch": 2.0802377414561666, "grad_norm": 1.8747773170471191, "learning_rate": 9.380689655172415e-06, "loss": 0.1214, "step": 1400 }, { "epoch": 2.1173848439821694, "grad_norm": 2.2875783443450928, "learning_rate": 9.363448275862069e-06, "loss": 0.1344, "step": 1425 }, { "epoch": 2.1545319465081723, "grad_norm": 1.9838837385177612, "learning_rate": 9.346206896551725e-06, "loss": 0.1199, "step": 1450 }, { "epoch": 2.1916790490341755, "grad_norm": 2.1067237854003906, "learning_rate": 9.32896551724138e-06, "loss": 0.1181, "step": 1475 }, { "epoch": 2.2288261515601784, "grad_norm": 2.6312592029571533, "learning_rate": 9.311724137931035e-06, "loss": 0.1256, "step": 1500 }, { "epoch": 2.265973254086181, "grad_norm": 1.992762804031372, "learning_rate": 9.294482758620691e-06, "loss": 0.1272, "step": 1525 }, { "epoch": 2.303120356612184, "grad_norm": 2.416465997695923, "learning_rate": 9.277241379310346e-06, "loss": 0.1305, "step": 1550 }, { "epoch": 2.3402674591381873, "grad_norm": 1.7433266639709473, "learning_rate": 9.260000000000001e-06, "loss": 0.1262, "step": 1575 }, { "epoch": 2.37741456166419, "grad_norm": 1.751561164855957, "learning_rate": 9.242758620689656e-06, "loss": 0.1216, "step": 1600 }, { "epoch": 2.414561664190193, "grad_norm": 2.123859405517578, "learning_rate": 9.225517241379311e-06, "loss": 0.1366, "step": 1625 }, { "epoch": 2.4517087667161963, "grad_norm": 1.9784679412841797, "learning_rate": 9.208275862068966e-06, "loss": 0.1265, "step": 1650 }, { "epoch": 2.488855869242199, "grad_norm": 2.4382882118225098, "learning_rate": 9.191034482758622e-06, "loss": 0.1228, "step": 1675 }, { "epoch": 2.526002971768202, "grad_norm": 2.2365972995758057, "learning_rate": 9.173793103448277e-06, "loss": 0.1326, "step": 1700 }, { "epoch": 2.563150074294205, "grad_norm": 1.7844051122665405, "learning_rate": 9.156551724137932e-06, "loss": 0.1234, "step": 1725 }, { "epoch": 2.600297176820208, "grad_norm": 1.6666020154953003, "learning_rate": 9.139310344827587e-06, "loss": 0.1343, "step": 1750 }, { "epoch": 2.637444279346211, "grad_norm": 2.0560336112976074, "learning_rate": 9.122068965517242e-06, "loss": 0.1235, "step": 1775 }, { "epoch": 2.6745913818722142, "grad_norm": 3.1412951946258545, "learning_rate": 9.104827586206897e-06, "loss": 0.1274, "step": 1800 }, { "epoch": 2.711738484398217, "grad_norm": 1.8942031860351562, "learning_rate": 9.087586206896553e-06, "loss": 0.1285, "step": 1825 }, { "epoch": 2.74888558692422, "grad_norm": 1.8972444534301758, "learning_rate": 9.070344827586206e-06, "loss": 0.1335, "step": 1850 }, { "epoch": 2.7860326894502228, "grad_norm": 1.9524787664413452, "learning_rate": 9.053103448275863e-06, "loss": 0.1209, "step": 1875 }, { "epoch": 2.8231797919762256, "grad_norm": 1.6888232231140137, "learning_rate": 9.035862068965518e-06, "loss": 0.1361, "step": 1900 }, { "epoch": 2.860326894502229, "grad_norm": 2.2144405841827393, "learning_rate": 9.018620689655173e-06, "loss": 0.134, "step": 1925 }, { "epoch": 2.8974739970282317, "grad_norm": 2.0730419158935547, "learning_rate": 9.00137931034483e-06, "loss": 0.1282, "step": 1950 }, { "epoch": 2.934621099554235, "grad_norm": 1.705673098564148, "learning_rate": 8.984137931034484e-06, "loss": 0.1206, "step": 1975 }, { "epoch": 2.971768202080238, "grad_norm": 1.9598541259765625, "learning_rate": 8.966896551724139e-06, "loss": 0.1253, "step": 2000 }, { "epoch": 2.971768202080238, "eval_loss": 0.21802061796188354, "eval_runtime": 3105.4135, "eval_samples_per_second": 0.872, "eval_steps_per_second": 0.109, "eval_wer": 53.673860298812414, "step": 2000 } ], "logging_steps": 25, "max_steps": 15000, "num_input_tokens_seen": 0, "num_train_epochs": 23, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.265527462100992e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }