|
{ |
|
"best_metric": 0.4354272186756134, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-bemgen-100f50m-model/checkpoint-1400", |
|
"epoch": 2.6391548365797295, |
|
"eval_steps": 200, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03301419610432486, |
|
"grad_norm": 117.47059631347656, |
|
"learning_rate": 3.8e-07, |
|
"loss": 10.9949, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06602839220864971, |
|
"grad_norm": 102.7651138305664, |
|
"learning_rate": 8.8e-07, |
|
"loss": 9.4482, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09904258831297458, |
|
"grad_norm": 64.81011199951172, |
|
"learning_rate": 1.3800000000000001e-06, |
|
"loss": 7.2641, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13205678441729943, |
|
"grad_norm": 81.74808502197266, |
|
"learning_rate": 1.8800000000000002e-06, |
|
"loss": 5.5236, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1650709805216243, |
|
"grad_norm": 93.43115234375, |
|
"learning_rate": 2.38e-06, |
|
"loss": 4.7369, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19808517662594916, |
|
"grad_norm": 89.20719909667969, |
|
"learning_rate": 2.88e-06, |
|
"loss": 3.8232, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.231099372730274, |
|
"grad_norm": 64.15678405761719, |
|
"learning_rate": 3.3800000000000007e-06, |
|
"loss": 3.806, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.26411356883459886, |
|
"grad_norm": 64.91326904296875, |
|
"learning_rate": 3.88e-06, |
|
"loss": 3.1512, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26411356883459886, |
|
"eval_loss": 0.8282185792922974, |
|
"eval_runtime": 423.1335, |
|
"eval_samples_per_second": 1.791, |
|
"eval_steps_per_second": 0.896, |
|
"eval_wer": 0.6533637400228051, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29712776493892373, |
|
"grad_norm": 75.13513946533203, |
|
"learning_rate": 4.38e-06, |
|
"loss": 3.3773, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3301419610432486, |
|
"grad_norm": 62.38654708862305, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 3.3841, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.36315615714757343, |
|
"grad_norm": 49.35546875, |
|
"learning_rate": 5.380000000000001e-06, |
|
"loss": 2.8367, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3961703532518983, |
|
"grad_norm": 77.38804626464844, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 2.9174, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4291845493562232, |
|
"grad_norm": 78.50695037841797, |
|
"learning_rate": 6.380000000000001e-06, |
|
"loss": 2.6359, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.462198745460548, |
|
"grad_norm": 55.40019607543945, |
|
"learning_rate": 6.88e-06, |
|
"loss": 2.4471, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4952129415648729, |
|
"grad_norm": 55.860809326171875, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 2.6426, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5282271376691977, |
|
"grad_norm": 51.24520492553711, |
|
"learning_rate": 7.88e-06, |
|
"loss": 2.4008, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5282271376691977, |
|
"eval_loss": 0.6202582716941833, |
|
"eval_runtime": 422.1868, |
|
"eval_samples_per_second": 1.795, |
|
"eval_steps_per_second": 0.898, |
|
"eval_wer": 0.5096921322690992, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5612413337735226, |
|
"grad_norm": 70.62708282470703, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 2.4166, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5942555298778475, |
|
"grad_norm": 54.51411437988281, |
|
"learning_rate": 8.880000000000001e-06, |
|
"loss": 2.7424, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6272697259821723, |
|
"grad_norm": 51.03986740112305, |
|
"learning_rate": 9.38e-06, |
|
"loss": 2.2441, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6602839220864972, |
|
"grad_norm": 64.73434448242188, |
|
"learning_rate": 9.88e-06, |
|
"loss": 2.1702, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6932981181908221, |
|
"grad_norm": 58.62995147705078, |
|
"learning_rate": 9.957777777777779e-06, |
|
"loss": 2.3904, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7263123142951469, |
|
"grad_norm": 61.52521514892578, |
|
"learning_rate": 9.902222222222223e-06, |
|
"loss": 2.2456, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7593265103994717, |
|
"grad_norm": 45.40699768066406, |
|
"learning_rate": 9.846666666666668e-06, |
|
"loss": 2.3398, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7923407065037966, |
|
"grad_norm": 52.031795501708984, |
|
"learning_rate": 9.791111111111112e-06, |
|
"loss": 2.2459, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7923407065037966, |
|
"eval_loss": 0.5506373643875122, |
|
"eval_runtime": 424.3675, |
|
"eval_samples_per_second": 1.786, |
|
"eval_steps_per_second": 0.893, |
|
"eval_wer": 0.4643671607753706, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8253549026081215, |
|
"grad_norm": 51.51828384399414, |
|
"learning_rate": 9.735555555555556e-06, |
|
"loss": 2.1322, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.8583690987124464, |
|
"grad_norm": 47.97977066040039, |
|
"learning_rate": 9.68e-06, |
|
"loss": 2.3046, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8913832948167713, |
|
"grad_norm": 47.63246154785156, |
|
"learning_rate": 9.624444444444445e-06, |
|
"loss": 2.0285, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.924397490921096, |
|
"grad_norm": 36.78068161010742, |
|
"learning_rate": 9.56888888888889e-06, |
|
"loss": 1.9158, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9574116870254209, |
|
"grad_norm": 57.10271453857422, |
|
"learning_rate": 9.513333333333334e-06, |
|
"loss": 2.034, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9904258831297458, |
|
"grad_norm": 35.37094497680664, |
|
"learning_rate": 9.457777777777778e-06, |
|
"loss": 1.92, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0224496533509408, |
|
"grad_norm": 59.17182159423828, |
|
"learning_rate": 9.402222222222222e-06, |
|
"loss": 1.5202, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.0554638494552657, |
|
"grad_norm": 48.809059143066406, |
|
"learning_rate": 9.346666666666666e-06, |
|
"loss": 1.3319, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0554638494552657, |
|
"eval_loss": 0.5028851628303528, |
|
"eval_runtime": 423.0433, |
|
"eval_samples_per_second": 1.792, |
|
"eval_steps_per_second": 0.896, |
|
"eval_wer": 0.4016533637400228, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0884780455595906, |
|
"grad_norm": 59.115562438964844, |
|
"learning_rate": 9.291111111111112e-06, |
|
"loss": 1.2718, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1214922416639155, |
|
"grad_norm": 22.62116050720215, |
|
"learning_rate": 9.235555555555556e-06, |
|
"loss": 1.4598, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1545064377682404, |
|
"grad_norm": 37.58869934082031, |
|
"learning_rate": 9.180000000000002e-06, |
|
"loss": 1.33, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.1875206338725652, |
|
"grad_norm": 41.18476867675781, |
|
"learning_rate": 9.124444444444444e-06, |
|
"loss": 1.4802, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2205348299768901, |
|
"grad_norm": 34.091915130615234, |
|
"learning_rate": 9.06888888888889e-06, |
|
"loss": 1.325, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.253549026081215, |
|
"grad_norm": 43.94548416137695, |
|
"learning_rate": 9.013333333333334e-06, |
|
"loss": 1.3758, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.2865632221855399, |
|
"grad_norm": 50.9454231262207, |
|
"learning_rate": 8.957777777777778e-06, |
|
"loss": 1.4197, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.3195774182898647, |
|
"grad_norm": 39.400634765625, |
|
"learning_rate": 8.902222222222224e-06, |
|
"loss": 1.5588, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3195774182898647, |
|
"eval_loss": 0.4675042927265167, |
|
"eval_runtime": 426.8283, |
|
"eval_samples_per_second": 1.776, |
|
"eval_steps_per_second": 0.888, |
|
"eval_wer": 0.3896807297605473, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3525916143941896, |
|
"grad_norm": 64.05392456054688, |
|
"learning_rate": 8.846666666666668e-06, |
|
"loss": 1.4914, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.3856058104985143, |
|
"grad_norm": 38.36800765991211, |
|
"learning_rate": 8.791111111111112e-06, |
|
"loss": 1.3193, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.4186200066028392, |
|
"grad_norm": 56.90019226074219, |
|
"learning_rate": 8.735555555555556e-06, |
|
"loss": 1.3192, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.451634202707164, |
|
"grad_norm": 41.730682373046875, |
|
"learning_rate": 8.68e-06, |
|
"loss": 1.4852, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.484648398811489, |
|
"grad_norm": 37.495849609375, |
|
"learning_rate": 8.624444444444446e-06, |
|
"loss": 1.244, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.5176625949158138, |
|
"grad_norm": 34.61660385131836, |
|
"learning_rate": 8.56888888888889e-06, |
|
"loss": 1.3055, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.5506767910201387, |
|
"grad_norm": 32.87508773803711, |
|
"learning_rate": 8.513333333333335e-06, |
|
"loss": 1.4434, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.5836909871244635, |
|
"grad_norm": 43.57270812988281, |
|
"learning_rate": 8.457777777777778e-06, |
|
"loss": 1.2908, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5836909871244635, |
|
"eval_loss": 0.45341047644615173, |
|
"eval_runtime": 422.1999, |
|
"eval_samples_per_second": 1.795, |
|
"eval_steps_per_second": 0.898, |
|
"eval_wer": 0.3727194982896237, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6167051832287884, |
|
"grad_norm": 46.83863830566406, |
|
"learning_rate": 8.402222222222223e-06, |
|
"loss": 1.3848, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.649719379333113, |
|
"grad_norm": 46.93707275390625, |
|
"learning_rate": 8.346666666666668e-06, |
|
"loss": 1.2536, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.682733575437438, |
|
"grad_norm": 44.47553253173828, |
|
"learning_rate": 8.291111111111112e-06, |
|
"loss": 1.3358, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.7157477715417628, |
|
"grad_norm": 35.15113830566406, |
|
"learning_rate": 8.235555555555557e-06, |
|
"loss": 1.1214, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7487619676460877, |
|
"grad_norm": 27.538331985473633, |
|
"learning_rate": 8.18e-06, |
|
"loss": 1.097, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.7817761637504126, |
|
"grad_norm": 35.05278015136719, |
|
"learning_rate": 8.124444444444445e-06, |
|
"loss": 1.1746, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.8147903598547375, |
|
"grad_norm": 50.673095703125, |
|
"learning_rate": 8.06888888888889e-06, |
|
"loss": 1.397, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.8478045559590623, |
|
"grad_norm": 38.40850067138672, |
|
"learning_rate": 8.013333333333333e-06, |
|
"loss": 1.4258, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8478045559590623, |
|
"eval_loss": 0.4354272186756134, |
|
"eval_runtime": 431.6561, |
|
"eval_samples_per_second": 1.756, |
|
"eval_steps_per_second": 0.878, |
|
"eval_wer": 0.38982326111744586, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8808187520633872, |
|
"grad_norm": 27.05866050720215, |
|
"learning_rate": 7.957777777777779e-06, |
|
"loss": 1.4042, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.913832948167712, |
|
"grad_norm": 36.17206573486328, |
|
"learning_rate": 7.902222222222223e-06, |
|
"loss": 1.0702, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.946847144272037, |
|
"grad_norm": 40.80569076538086, |
|
"learning_rate": 7.846666666666667e-06, |
|
"loss": 1.2552, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.9798613403763619, |
|
"grad_norm": 34.74424362182617, |
|
"learning_rate": 7.791111111111111e-06, |
|
"loss": 1.349, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.011885110597557, |
|
"grad_norm": 18.330013275146484, |
|
"learning_rate": 7.735555555555557e-06, |
|
"loss": 1.0107, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.0448993067018817, |
|
"grad_norm": 20.316329956054688, |
|
"learning_rate": 7.680000000000001e-06, |
|
"loss": 0.6744, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.0779135028062066, |
|
"grad_norm": 56.3316764831543, |
|
"learning_rate": 7.624444444444445e-06, |
|
"loss": 0.6948, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.1109276989105314, |
|
"grad_norm": 28.286052703857422, |
|
"learning_rate": 7.56888888888889e-06, |
|
"loss": 0.6383, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.1109276989105314, |
|
"eval_loss": 0.44802892208099365, |
|
"eval_runtime": 429.0328, |
|
"eval_samples_per_second": 1.767, |
|
"eval_steps_per_second": 0.883, |
|
"eval_wer": 0.3600342075256556, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.1439418950148563, |
|
"grad_norm": 33.71493148803711, |
|
"learning_rate": 7.513333333333334e-06, |
|
"loss": 0.5558, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.176956091119181, |
|
"grad_norm": 29.385910034179688, |
|
"learning_rate": 7.457777777777778e-06, |
|
"loss": 0.6777, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.209970287223506, |
|
"grad_norm": 20.755077362060547, |
|
"learning_rate": 7.402222222222223e-06, |
|
"loss": 0.6332, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.242984483327831, |
|
"grad_norm": 37.17219543457031, |
|
"learning_rate": 7.346666666666668e-06, |
|
"loss": 0.6242, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.275998679432156, |
|
"grad_norm": 37.963409423828125, |
|
"learning_rate": 7.291111111111112e-06, |
|
"loss": 0.7322, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.3090128755364807, |
|
"grad_norm": 25.863115310668945, |
|
"learning_rate": 7.235555555555556e-06, |
|
"loss": 0.697, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.3420270716408056, |
|
"grad_norm": 25.60062026977539, |
|
"learning_rate": 7.180000000000001e-06, |
|
"loss": 0.6266, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.3750412677451305, |
|
"grad_norm": 20.695463180541992, |
|
"learning_rate": 7.124444444444445e-06, |
|
"loss": 0.6079, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.3750412677451305, |
|
"eval_loss": 0.4444292187690735, |
|
"eval_runtime": 421.917, |
|
"eval_samples_per_second": 1.797, |
|
"eval_steps_per_second": 0.898, |
|
"eval_wer": 0.3482041049030787, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4080554638494553, |
|
"grad_norm": 26.363622665405273, |
|
"learning_rate": 7.06888888888889e-06, |
|
"loss": 0.663, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.4410696599537802, |
|
"grad_norm": 31.212791442871094, |
|
"learning_rate": 7.0133333333333345e-06, |
|
"loss": 0.655, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.474083856058105, |
|
"grad_norm": 39.38019561767578, |
|
"learning_rate": 6.9577777777777785e-06, |
|
"loss": 0.7364, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.50709805216243, |
|
"grad_norm": 26.713680267333984, |
|
"learning_rate": 6.902222222222223e-06, |
|
"loss": 0.647, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.540112248266755, |
|
"grad_norm": 24.708005905151367, |
|
"learning_rate": 6.846666666666667e-06, |
|
"loss": 0.6818, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.5731264443710797, |
|
"grad_norm": 22.732967376708984, |
|
"learning_rate": 6.7911111111111115e-06, |
|
"loss": 0.6711, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.606140640475404, |
|
"grad_norm": 23.73015022277832, |
|
"learning_rate": 6.735555555555556e-06, |
|
"loss": 0.6492, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.6391548365797295, |
|
"grad_norm": 30.428665161132812, |
|
"learning_rate": 6.680000000000001e-06, |
|
"loss": 0.5709, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.6391548365797295, |
|
"eval_loss": 0.4367344081401825, |
|
"eval_runtime": 423.1943, |
|
"eval_samples_per_second": 1.791, |
|
"eval_steps_per_second": 0.896, |
|
"eval_wer": 0.3405074116305587, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.6391548365797295, |
|
"step": 2000, |
|
"total_flos": 1.631539005161472e+19, |
|
"train_loss": 1.9525959782600404, |
|
"train_runtime": 8300.7195, |
|
"train_samples_per_second": 4.819, |
|
"train_steps_per_second": 0.602 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.631539005161472e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|