|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.2, |
|
"eval_steps": 720, |
|
"global_step": 14400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0013333333333333333, |
|
"grad_norm": 16.625, |
|
"learning_rate": 2e-06, |
|
"loss": 2.3594, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9537, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 0.0004, |
|
"loss": 0.5927, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 0.0006, |
|
"loss": 0.5721, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 0.0008, |
|
"loss": 0.4882, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 0.3812, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 0.0012, |
|
"loss": 0.353, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.0014, |
|
"loss": 0.3347, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_commonvoice-bn-transcription_loss": 0.6287540793418884, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 11.4061, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 5.611, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.701, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 0.0016, |
|
"loss": 0.2786, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 0.0018000000000000002, |
|
"loss": 0.282, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 0.002, |
|
"loss": 0.2787, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 0.0019997526665988343, |
|
"loss": 0.2676, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 0.00199901080233714, |
|
"loss": 0.2267, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 0.0019977748149656092, |
|
"loss": 0.2247, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 0.0019960453838197083, |
|
"loss": 0.2223, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_commonvoice-bn-transcription_loss": 0.5989859104156494, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.0069, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.92, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.615, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 0.0019938234594462988, |
|
"loss": 0.1983, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 0.0019911102630811857, |
|
"loss": 0.1747, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.2666666666666666, |
|
"grad_norm": 0.2060546875, |
|
"learning_rate": 0.001987907285977892, |
|
"loss": 0.1814, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 0.0019842162885880185, |
|
"loss": 0.181, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.533333333333333, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 0.0019800392995936505, |
|
"loss": 0.1567, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 0.0019753786147923315, |
|
"loss": 0.1517, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 0.0019702367958352256, |
|
"loss": 0.1467, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_commonvoice-bn-transcription_loss": 0.5943728685379028, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 12.3498, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 5.182, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.648, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.9333333333333336, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 0.001964616668819156, |
|
"loss": 0.1458, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.066666666666667, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 0.001958521322733301, |
|
"loss": 0.116, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 0.0019519541077613908, |
|
"loss": 0.1273, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 0.0019449186334403478, |
|
"loss": 0.1232, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.466666666666667, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 0.001937418766676378, |
|
"loss": 0.1197, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 0.0019294586296196034, |
|
"loss": 0.0999, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 0.0019210425973984072, |
|
"loss": 0.1029, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_commonvoice-bn-transcription_loss": 0.6720843315124512, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 11.4916, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 5.569, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.696, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.8666666666666667, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.0019121752957147297, |
|
"loss": 0.106, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 0.0019028615983016476, |
|
"loss": 0.0966, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.133333333333334, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 0.0018931066242446255, |
|
"loss": 0.0845, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.266666666666667, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0018829157351679113, |
|
"loss": 0.0889, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 0.0018722945322876298, |
|
"loss": 0.0915, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.533333333333333, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0018612488533331878, |
|
"loss": 0.0824, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0018497847693386823, |
|
"loss": 0.0767, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0018379085813060821, |
|
"loss": 0.0775, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_commonvoice-bn-transcription_loss": 0.6684668064117432, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 11.4545, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 5.587, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.698, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.933333333333334, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 0.0018256268167420066, |
|
"loss": 0.0778, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.066666666666666, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 0.0018129462260700162, |
|
"loss": 0.0638, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 0.001799873778920377, |
|
"loss": 0.0672, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 0.0017864166602993437, |
|
"loss": 0.0671, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.466666666666667, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0017725822666400696, |
|
"loss": 0.0654, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 0.001758378201737302, |
|
"loss": 0.0574, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.733333333333333, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 0.001743812272568115, |
|
"loss": 0.0593, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_commonvoice-bn-transcription_loss": 0.6580834984779358, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 11.8027, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 5.422, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.678, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 5.866666666666667, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 0.0017288924850009575, |
|
"loss": 0.0578, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0017136270393953942, |
|
"loss": 0.0545, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.133333333333334, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0016980243260949395, |
|
"loss": 0.0505, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.266666666666667, |
|
"grad_norm": 0.037841796875, |
|
"learning_rate": 0.0016820929208154786, |
|
"loss": 0.0521, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 0.0016658415799317965, |
|
"loss": 0.0513, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.533333333333333, |
|
"grad_norm": 0.02783203125, |
|
"learning_rate": 0.001649279235664813, |
|
"loss": 0.0495, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 0.0016324149911721704, |
|
"loss": 0.0479, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_commonvoice-bn-transcription_loss": 0.6727238893508911, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 14.0248, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.563, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.57, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 0.0016152581155448648, |
|
"loss": 0.0473, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.933333333333334, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 0.0015978180387126795, |
|
"loss": 0.0474, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.066666666666666, |
|
"grad_norm": 0.033447265625, |
|
"learning_rate": 0.0015801043462612132, |
|
"loss": 0.0445, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 0.0015621267741633578, |
|
"loss": 0.045, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.333333333333333, |
|
"grad_norm": 0.0234375, |
|
"learning_rate": 0.0015438952034281166, |
|
"loss": 0.0444, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.466666666666667, |
|
"grad_norm": 0.033447265625, |
|
"learning_rate": 0.0015254196546697087, |
|
"loss": 0.0444, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 0.03271484375, |
|
"learning_rate": 0.0015067102825999403, |
|
"loss": 0.0426, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_commonvoice-bn-transcription_loss": 0.6831041574478149, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 16.8726, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 3.793, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.474, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 7.733333333333333, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 0.0014877773704468733, |
|
"loss": 0.0426, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.866666666666667, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 0.001468631324302856, |
|
"loss": 0.0426, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.0283203125, |
|
"learning_rate": 0.0014492826674050248, |
|
"loss": 0.0416, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.133333333333333, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 0.0014297420343514216, |
|
"loss": 0.0409, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 8.266666666666667, |
|
"grad_norm": 0.0213623046875, |
|
"learning_rate": 0.0014100201652558998, |
|
"loss": 0.0411, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.036865234375, |
|
"learning_rate": 0.0013901278998450384, |
|
"loss": 0.0415, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.533333333333333, |
|
"grad_norm": 0.022705078125, |
|
"learning_rate": 0.0013700761715003068, |
|
"loss": 0.04, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"eval_commonvoice-bn-transcription_loss": 0.6834076642990112, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 12.4556, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 5.138, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.642, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 8.666666666666666, |
|
"grad_norm": 0.0191650390625, |
|
"learning_rate": 0.0013498760012487503, |
|
"loss": 0.0402, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 0.001329538491705509, |
|
"loss": 0.04, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.933333333333334, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 0.001309074820971485, |
|
"loss": 0.0405, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 9.066666666666666, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 0.0012884962364895303, |
|
"loss": 0.0375, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 0.022216796875, |
|
"learning_rate": 0.0012678140488625131, |
|
"loss": 0.0391, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.333333333333334, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 0.0012470396256366771, |
|
"loss": 0.0393, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.466666666666667, |
|
"grad_norm": 0.0213623046875, |
|
"learning_rate": 0.0012261843850536976, |
|
"loss": 0.0396, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 0.0012052597897748746, |
|
"loss": 0.0375, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_commonvoice-bn-transcription_loss": 0.6838600039482117, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.9275, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.595, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.574, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.733333333333333, |
|
"grad_norm": 0.04052734375, |
|
"learning_rate": 0.001184277340580916, |
|
"loss": 0.0385, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 9.866666666666667, |
|
"grad_norm": 0.02880859375, |
|
"learning_rate": 0.0011632485700507637, |
|
"loss": 0.0389, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.0234375, |
|
"learning_rate": 0.001142185036222946, |
|
"loss": 0.0377, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.133333333333333, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 0.0011210983162429347, |
|
"loss": 0.037, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 10.266666666666667, |
|
"grad_norm": 0.023193359375, |
|
"learning_rate": 0.0011, |
|
"loss": 0.0378, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"grad_norm": 0.03662109375, |
|
"learning_rate": 0.0010789016837570657, |
|
"loss": 0.0384, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 10.533333333333333, |
|
"grad_norm": 0.0201416015625, |
|
"learning_rate": 0.001057814963777054, |
|
"loss": 0.0371, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"eval_commonvoice-bn-transcription_loss": 0.6807686686515808, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.6962, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.673, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.584, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 10.666666666666666, |
|
"grad_norm": 0.01806640625, |
|
"learning_rate": 0.0010367514299492366, |
|
"loss": 0.0374, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"grad_norm": 0.032470703125, |
|
"learning_rate": 0.0010157226594190844, |
|
"loss": 0.0375, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 10.933333333333334, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 0.0009947402102251258, |
|
"loss": 0.0381, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 11.066666666666666, |
|
"grad_norm": 0.0260009765625, |
|
"learning_rate": 0.0009738156149463029, |
|
"loss": 0.0355, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"grad_norm": 0.0235595703125, |
|
"learning_rate": 0.0009529603743633229, |
|
"loss": 0.0371, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 11.333333333333334, |
|
"grad_norm": 0.03271484375, |
|
"learning_rate": 0.000932185951137487, |
|
"loss": 0.0373, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.466666666666667, |
|
"grad_norm": 0.02392578125, |
|
"learning_rate": 0.0009115037635104702, |
|
"loss": 0.0376, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_commonvoice-bn-transcription_loss": 0.6774536967277527, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 12.1429, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 5.271, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.659, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"grad_norm": 0.0205078125, |
|
"learning_rate": 0.0008909251790285153, |
|
"loss": 0.0359, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 11.733333333333333, |
|
"grad_norm": 0.0198974609375, |
|
"learning_rate": 0.0008704615082944913, |
|
"loss": 0.0368, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 11.866666666666667, |
|
"grad_norm": 0.0234375, |
|
"learning_rate": 0.0008501239987512495, |
|
"loss": 0.0373, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.0238037109375, |
|
"learning_rate": 0.0008299238284996935, |
|
"loss": 0.0363, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.133333333333333, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 0.0008098721001549618, |
|
"loss": 0.0358, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 12.266666666666667, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 0.0007899798347441006, |
|
"loss": 0.0366, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 0.0007702579656485785, |
|
"loss": 0.0371, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"eval_commonvoice-bn-transcription_loss": 0.683639645576477, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.4595, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.755, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.594, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 12.533333333333333, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 0.0007507173325949752, |
|
"loss": 0.036, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 12.666666666666666, |
|
"grad_norm": 0.0272216796875, |
|
"learning_rate": 0.0007313686756971443, |
|
"loss": 0.0363, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"grad_norm": 0.0306396484375, |
|
"learning_rate": 0.0007122226295531266, |
|
"loss": 0.0365, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 12.933333333333334, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 0.0006932897174000596, |
|
"loss": 0.037, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 13.066666666666666, |
|
"grad_norm": 0.02197265625, |
|
"learning_rate": 0.0006745803453302912, |
|
"loss": 0.0347, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"grad_norm": 0.0277099609375, |
|
"learning_rate": 0.0006561047965718835, |
|
"loss": 0.0363, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 0.0006378732258366421, |
|
"loss": 0.0365, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_commonvoice-bn-transcription_loss": 0.6834643483161926, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.0019, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.922, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.615, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 13.466666666666667, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 0.0006198956537387869, |
|
"loss": 0.0369, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"grad_norm": 0.0281982421875, |
|
"learning_rate": 0.0006021819612873205, |
|
"loss": 0.0352, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 13.733333333333333, |
|
"grad_norm": 0.018310546875, |
|
"learning_rate": 0.0005847418844551355, |
|
"loss": 0.0362, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 13.866666666666667, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 0.0005675850088278299, |
|
"loss": 0.0367, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.03271484375, |
|
"learning_rate": 0.0005507207643351873, |
|
"loss": 0.0358, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 14.133333333333333, |
|
"grad_norm": 0.0166015625, |
|
"learning_rate": 0.0005341584200682039, |
|
"loss": 0.0353, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 14.266666666666667, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 0.0005179070791845213, |
|
"loss": 0.0361, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 0.0005019756739050605, |
|
"loss": 0.0367, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_commonvoice-bn-transcription_loss": 0.6827465295791626, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.7129, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.667, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.583, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 14.533333333333333, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 0.00048637296060460595, |
|
"loss": 0.0357, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 14.666666666666666, |
|
"grad_norm": 0.04296875, |
|
"learning_rate": 0.0004711075149990425, |
|
"loss": 0.0359, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"grad_norm": 0.0255126953125, |
|
"learning_rate": 0.0004561877274318854, |
|
"loss": 0.0361, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 14.933333333333334, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 0.0004416217982626981, |
|
"loss": 0.0367, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 15.066666666666666, |
|
"grad_norm": 0.018310546875, |
|
"learning_rate": 0.0004274177333599306, |
|
"loss": 0.0345, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 0.00041358333970065636, |
|
"loss": 0.036, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 15.333333333333334, |
|
"grad_norm": 0.01806640625, |
|
"learning_rate": 0.00040012622107962314, |
|
"loss": 0.0363, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"eval_commonvoice-bn-transcription_loss": 0.6828598380088806, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.9421, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.59, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.574, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 15.466666666666667, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 0.0003870537739299836, |
|
"loss": 0.0366, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"grad_norm": 0.02197265625, |
|
"learning_rate": 0.0003743731832579933, |
|
"loss": 0.035, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 15.733333333333333, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 0.00036209141869391796, |
|
"loss": 0.036, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 15.866666666666667, |
|
"grad_norm": 0.0179443359375, |
|
"learning_rate": 0.00035021523066131776, |
|
"loss": 0.0365, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0252685546875, |
|
"learning_rate": 0.00033875114666681236, |
|
"loss": 0.0356, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 16.133333333333333, |
|
"grad_norm": 0.0284423828125, |
|
"learning_rate": 0.0003277054677123703, |
|
"loss": 0.0351, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 16.266666666666666, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 0.00031708426483208885, |
|
"loss": 0.0359, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"eval_commonvoice-bn-transcription_loss": 0.6832164525985718, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 16.9968, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 3.765, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.471, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"grad_norm": 0.038818359375, |
|
"learning_rate": 0.00030689337575537455, |
|
"loss": 0.0365, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 16.533333333333335, |
|
"grad_norm": 0.019775390625, |
|
"learning_rate": 0.00029713840169835217, |
|
"loss": 0.0355, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 0.0002878247042852705, |
|
"loss": 0.0358, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"grad_norm": 0.0283203125, |
|
"learning_rate": 0.0002789574026015931, |
|
"loss": 0.036, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 16.933333333333334, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 0.0002705413703803964, |
|
"loss": 0.0366, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 17.066666666666666, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 0.00026258123332362215, |
|
"loss": 0.0344, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 0.0002550813665596523, |
|
"loss": 0.0359, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"eval_commonvoice-bn-transcription_loss": 0.6824997067451477, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.9582, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.585, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.573, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 17.333333333333332, |
|
"grad_norm": 0.0177001953125, |
|
"learning_rate": 0.00024804589223860934, |
|
"loss": 0.0362, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 17.466666666666665, |
|
"grad_norm": 0.02734375, |
|
"learning_rate": 0.00024147867726669888, |
|
"loss": 0.0365, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"grad_norm": 0.0191650390625, |
|
"learning_rate": 0.00023538333118084396, |
|
"loss": 0.035, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 17.733333333333334, |
|
"grad_norm": 0.026611328125, |
|
"learning_rate": 0.00022976320416477446, |
|
"loss": 0.0359, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 17.866666666666667, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 0.00022462138520766832, |
|
"loss": 0.0364, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.02294921875, |
|
"learning_rate": 0.0002199607004063494, |
|
"loss": 0.0356, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 18.133333333333333, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 0.00021578371141198153, |
|
"loss": 0.0352, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"eval_commonvoice-bn-transcription_loss": 0.6843233704566956, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 13.7375, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.659, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.582, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 18.266666666666666, |
|
"grad_norm": 0.0228271484375, |
|
"learning_rate": 0.00021209271402210822, |
|
"loss": 0.0359, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"grad_norm": 0.044921875, |
|
"learning_rate": 0.00020888973691881416, |
|
"loss": 0.0365, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 18.533333333333335, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 0.0002061765405537013, |
|
"loss": 0.0355, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 18.666666666666668, |
|
"grad_norm": 0.026611328125, |
|
"learning_rate": 0.00020395461618029175, |
|
"loss": 0.0358, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"grad_norm": 0.0201416015625, |
|
"learning_rate": 0.0002022251850343909, |
|
"loss": 0.036, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 18.933333333333334, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 0.0002009891976628598, |
|
"loss": 0.0365, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 19.066666666666666, |
|
"grad_norm": 0.021728515625, |
|
"learning_rate": 0.00020024733340116572, |
|
"loss": 0.0344, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"grad_norm": 0.0177001953125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0358, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_commonvoice-bn-transcription_loss": 0.6850356459617615, |
|
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074, |
|
"eval_commonvoice-bn-transcription_runtime": 15.339, |
|
"eval_commonvoice-bn-transcription_samples_per_second": 4.172, |
|
"eval_commonvoice-bn-transcription_steps_per_second": 0.522, |
|
"step": 14400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 14400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 3600, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.421884240590438e+17, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|