uv_1 / trainer_state.json
dewsworld's picture
Upload 20 files
308644e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.2,
"eval_steps": 720,
"global_step": 14400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0013333333333333333,
"grad_norm": 16.625,
"learning_rate": 2e-06,
"loss": 2.3594,
"step": 1
},
{
"epoch": 0.13333333333333333,
"grad_norm": 0.2490234375,
"learning_rate": 0.0002,
"loss": 0.9537,
"step": 100
},
{
"epoch": 0.26666666666666666,
"grad_norm": 0.263671875,
"learning_rate": 0.0004,
"loss": 0.5927,
"step": 200
},
{
"epoch": 0.4,
"grad_norm": 0.2890625,
"learning_rate": 0.0006,
"loss": 0.5721,
"step": 300
},
{
"epoch": 0.5333333333333333,
"grad_norm": 0.546875,
"learning_rate": 0.0008,
"loss": 0.4882,
"step": 400
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.42578125,
"learning_rate": 0.001,
"loss": 0.3812,
"step": 500
},
{
"epoch": 0.8,
"grad_norm": 0.474609375,
"learning_rate": 0.0012,
"loss": 0.353,
"step": 600
},
{
"epoch": 0.9333333333333333,
"grad_norm": 0.333984375,
"learning_rate": 0.0014,
"loss": 0.3347,
"step": 700
},
{
"epoch": 0.96,
"eval_commonvoice-bn-transcription_loss": 0.6287540793418884,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 11.4061,
"eval_commonvoice-bn-transcription_samples_per_second": 5.611,
"eval_commonvoice-bn-transcription_steps_per_second": 0.701,
"step": 720
},
{
"epoch": 1.0666666666666667,
"grad_norm": 0.275390625,
"learning_rate": 0.0016,
"loss": 0.2786,
"step": 800
},
{
"epoch": 1.2,
"grad_norm": 0.234375,
"learning_rate": 0.0018000000000000002,
"loss": 0.282,
"step": 900
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.1884765625,
"learning_rate": 0.002,
"loss": 0.2787,
"step": 1000
},
{
"epoch": 1.4666666666666668,
"grad_norm": 0.279296875,
"learning_rate": 0.0019997526665988343,
"loss": 0.2676,
"step": 1100
},
{
"epoch": 1.6,
"grad_norm": 0.19921875,
"learning_rate": 0.00199901080233714,
"loss": 0.2267,
"step": 1200
},
{
"epoch": 1.7333333333333334,
"grad_norm": 0.208984375,
"learning_rate": 0.0019977748149656092,
"loss": 0.2247,
"step": 1300
},
{
"epoch": 1.8666666666666667,
"grad_norm": 0.2021484375,
"learning_rate": 0.0019960453838197083,
"loss": 0.2223,
"step": 1400
},
{
"epoch": 1.92,
"eval_commonvoice-bn-transcription_loss": 0.5989859104156494,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.0069,
"eval_commonvoice-bn-transcription_samples_per_second": 4.92,
"eval_commonvoice-bn-transcription_steps_per_second": 0.615,
"step": 1440
},
{
"epoch": 2.0,
"grad_norm": 0.1337890625,
"learning_rate": 0.0019938234594462988,
"loss": 0.1983,
"step": 1500
},
{
"epoch": 2.1333333333333333,
"grad_norm": 0.203125,
"learning_rate": 0.0019911102630811857,
"loss": 0.1747,
"step": 1600
},
{
"epoch": 2.2666666666666666,
"grad_norm": 0.2060546875,
"learning_rate": 0.001987907285977892,
"loss": 0.1814,
"step": 1700
},
{
"epoch": 2.4,
"grad_norm": 0.1689453125,
"learning_rate": 0.0019842162885880185,
"loss": 0.181,
"step": 1800
},
{
"epoch": 2.533333333333333,
"grad_norm": 0.1630859375,
"learning_rate": 0.0019800392995936505,
"loss": 0.1567,
"step": 1900
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.1357421875,
"learning_rate": 0.0019753786147923315,
"loss": 0.1517,
"step": 2000
},
{
"epoch": 2.8,
"grad_norm": 0.11083984375,
"learning_rate": 0.0019702367958352256,
"loss": 0.1467,
"step": 2100
},
{
"epoch": 2.88,
"eval_commonvoice-bn-transcription_loss": 0.5943728685379028,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 12.3498,
"eval_commonvoice-bn-transcription_samples_per_second": 5.182,
"eval_commonvoice-bn-transcription_steps_per_second": 0.648,
"step": 2160
},
{
"epoch": 2.9333333333333336,
"grad_norm": 0.1845703125,
"learning_rate": 0.001964616668819156,
"loss": 0.1458,
"step": 2200
},
{
"epoch": 3.066666666666667,
"grad_norm": 0.10595703125,
"learning_rate": 0.001958521322733301,
"loss": 0.116,
"step": 2300
},
{
"epoch": 3.2,
"grad_norm": 0.177734375,
"learning_rate": 0.0019519541077613908,
"loss": 0.1273,
"step": 2400
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.1435546875,
"learning_rate": 0.0019449186334403478,
"loss": 0.1232,
"step": 2500
},
{
"epoch": 3.466666666666667,
"grad_norm": 0.1201171875,
"learning_rate": 0.001937418766676378,
"loss": 0.1197,
"step": 2600
},
{
"epoch": 3.6,
"grad_norm": 0.11572265625,
"learning_rate": 0.0019294586296196034,
"loss": 0.0999,
"step": 2700
},
{
"epoch": 3.7333333333333334,
"grad_norm": 0.1943359375,
"learning_rate": 0.0019210425973984072,
"loss": 0.1029,
"step": 2800
},
{
"epoch": 3.84,
"eval_commonvoice-bn-transcription_loss": 0.6720843315124512,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 11.4916,
"eval_commonvoice-bn-transcription_samples_per_second": 5.569,
"eval_commonvoice-bn-transcription_steps_per_second": 0.696,
"step": 2880
},
{
"epoch": 3.8666666666666667,
"grad_norm": 0.30859375,
"learning_rate": 0.0019121752957147297,
"loss": 0.106,
"step": 2900
},
{
"epoch": 4.0,
"grad_norm": 0.0634765625,
"learning_rate": 0.0019028615983016476,
"loss": 0.0966,
"step": 3000
},
{
"epoch": 4.133333333333334,
"grad_norm": 0.275390625,
"learning_rate": 0.0018931066242446255,
"loss": 0.0845,
"step": 3100
},
{
"epoch": 4.266666666666667,
"grad_norm": 0.09765625,
"learning_rate": 0.0018829157351679113,
"loss": 0.0889,
"step": 3200
},
{
"epoch": 4.4,
"grad_norm": 0.2177734375,
"learning_rate": 0.0018722945322876298,
"loss": 0.0915,
"step": 3300
},
{
"epoch": 4.533333333333333,
"grad_norm": 0.091796875,
"learning_rate": 0.0018612488533331878,
"loss": 0.0824,
"step": 3400
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.07763671875,
"learning_rate": 0.0018497847693386823,
"loss": 0.0767,
"step": 3500
},
{
"epoch": 4.8,
"grad_norm": 0.09130859375,
"learning_rate": 0.0018379085813060821,
"loss": 0.0775,
"step": 3600
},
{
"epoch": 4.8,
"eval_commonvoice-bn-transcription_loss": 0.6684668064117432,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 11.4545,
"eval_commonvoice-bn-transcription_samples_per_second": 5.587,
"eval_commonvoice-bn-transcription_steps_per_second": 0.698,
"step": 3600
},
{
"epoch": 4.933333333333334,
"grad_norm": 0.1220703125,
"learning_rate": 0.0018256268167420066,
"loss": 0.0778,
"step": 3700
},
{
"epoch": 5.066666666666666,
"grad_norm": 0.060546875,
"learning_rate": 0.0018129462260700162,
"loss": 0.0638,
"step": 3800
},
{
"epoch": 5.2,
"grad_norm": 0.068359375,
"learning_rate": 0.001799873778920377,
"loss": 0.0672,
"step": 3900
},
{
"epoch": 5.333333333333333,
"grad_norm": 0.1376953125,
"learning_rate": 0.0017864166602993437,
"loss": 0.0671,
"step": 4000
},
{
"epoch": 5.466666666666667,
"grad_norm": 0.080078125,
"learning_rate": 0.0017725822666400696,
"loss": 0.0654,
"step": 4100
},
{
"epoch": 5.6,
"grad_norm": 0.0732421875,
"learning_rate": 0.001758378201737302,
"loss": 0.0574,
"step": 4200
},
{
"epoch": 5.733333333333333,
"grad_norm": 0.06591796875,
"learning_rate": 0.001743812272568115,
"loss": 0.0593,
"step": 4300
},
{
"epoch": 5.76,
"eval_commonvoice-bn-transcription_loss": 0.6580834984779358,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 11.8027,
"eval_commonvoice-bn-transcription_samples_per_second": 5.422,
"eval_commonvoice-bn-transcription_steps_per_second": 0.678,
"step": 4320
},
{
"epoch": 5.866666666666667,
"grad_norm": 0.038330078125,
"learning_rate": 0.0017288924850009575,
"loss": 0.0578,
"step": 4400
},
{
"epoch": 6.0,
"grad_norm": 0.09423828125,
"learning_rate": 0.0017136270393953942,
"loss": 0.0545,
"step": 4500
},
{
"epoch": 6.133333333333334,
"grad_norm": 0.09521484375,
"learning_rate": 0.0016980243260949395,
"loss": 0.0505,
"step": 4600
},
{
"epoch": 6.266666666666667,
"grad_norm": 0.037841796875,
"learning_rate": 0.0016820929208154786,
"loss": 0.0521,
"step": 4700
},
{
"epoch": 6.4,
"grad_norm": 0.037109375,
"learning_rate": 0.0016658415799317965,
"loss": 0.0513,
"step": 4800
},
{
"epoch": 6.533333333333333,
"grad_norm": 0.02783203125,
"learning_rate": 0.001649279235664813,
"loss": 0.0495,
"step": 4900
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.031494140625,
"learning_rate": 0.0016324149911721704,
"loss": 0.0479,
"step": 5000
},
{
"epoch": 6.72,
"eval_commonvoice-bn-transcription_loss": 0.6727238893508911,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 14.0248,
"eval_commonvoice-bn-transcription_samples_per_second": 4.563,
"eval_commonvoice-bn-transcription_steps_per_second": 0.57,
"step": 5040
},
{
"epoch": 6.8,
"grad_norm": 0.034423828125,
"learning_rate": 0.0016152581155448648,
"loss": 0.0473,
"step": 5100
},
{
"epoch": 6.933333333333334,
"grad_norm": 0.061279296875,
"learning_rate": 0.0015978180387126795,
"loss": 0.0474,
"step": 5200
},
{
"epoch": 7.066666666666666,
"grad_norm": 0.033447265625,
"learning_rate": 0.0015801043462612132,
"loss": 0.0445,
"step": 5300
},
{
"epoch": 7.2,
"grad_norm": 0.041748046875,
"learning_rate": 0.0015621267741633578,
"loss": 0.045,
"step": 5400
},
{
"epoch": 7.333333333333333,
"grad_norm": 0.0234375,
"learning_rate": 0.0015438952034281166,
"loss": 0.0444,
"step": 5500
},
{
"epoch": 7.466666666666667,
"grad_norm": 0.033447265625,
"learning_rate": 0.0015254196546697087,
"loss": 0.0444,
"step": 5600
},
{
"epoch": 7.6,
"grad_norm": 0.03271484375,
"learning_rate": 0.0015067102825999403,
"loss": 0.0426,
"step": 5700
},
{
"epoch": 7.68,
"eval_commonvoice-bn-transcription_loss": 0.6831041574478149,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 16.8726,
"eval_commonvoice-bn-transcription_samples_per_second": 3.793,
"eval_commonvoice-bn-transcription_steps_per_second": 0.474,
"step": 5760
},
{
"epoch": 7.733333333333333,
"grad_norm": 0.05029296875,
"learning_rate": 0.0014877773704468733,
"loss": 0.0426,
"step": 5800
},
{
"epoch": 7.866666666666667,
"grad_norm": 0.0262451171875,
"learning_rate": 0.001468631324302856,
"loss": 0.0426,
"step": 5900
},
{
"epoch": 8.0,
"grad_norm": 0.0283203125,
"learning_rate": 0.0014492826674050248,
"loss": 0.0416,
"step": 6000
},
{
"epoch": 8.133333333333333,
"grad_norm": 0.0224609375,
"learning_rate": 0.0014297420343514216,
"loss": 0.0409,
"step": 6100
},
{
"epoch": 8.266666666666667,
"grad_norm": 0.0213623046875,
"learning_rate": 0.0014100201652558998,
"loss": 0.0411,
"step": 6200
},
{
"epoch": 8.4,
"grad_norm": 0.036865234375,
"learning_rate": 0.0013901278998450384,
"loss": 0.0415,
"step": 6300
},
{
"epoch": 8.533333333333333,
"grad_norm": 0.022705078125,
"learning_rate": 0.0013700761715003068,
"loss": 0.04,
"step": 6400
},
{
"epoch": 8.64,
"eval_commonvoice-bn-transcription_loss": 0.6834076642990112,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 12.4556,
"eval_commonvoice-bn-transcription_samples_per_second": 5.138,
"eval_commonvoice-bn-transcription_steps_per_second": 0.642,
"step": 6480
},
{
"epoch": 8.666666666666666,
"grad_norm": 0.0191650390625,
"learning_rate": 0.0013498760012487503,
"loss": 0.0402,
"step": 6500
},
{
"epoch": 8.8,
"grad_norm": 0.03515625,
"learning_rate": 0.001329538491705509,
"loss": 0.04,
"step": 6600
},
{
"epoch": 8.933333333333334,
"grad_norm": 0.043701171875,
"learning_rate": 0.001309074820971485,
"loss": 0.0405,
"step": 6700
},
{
"epoch": 9.066666666666666,
"grad_norm": 0.020751953125,
"learning_rate": 0.0012884962364895303,
"loss": 0.0375,
"step": 6800
},
{
"epoch": 9.2,
"grad_norm": 0.022216796875,
"learning_rate": 0.0012678140488625131,
"loss": 0.0391,
"step": 6900
},
{
"epoch": 9.333333333333334,
"grad_norm": 0.03515625,
"learning_rate": 0.0012470396256366771,
"loss": 0.0393,
"step": 7000
},
{
"epoch": 9.466666666666667,
"grad_norm": 0.0213623046875,
"learning_rate": 0.0012261843850536976,
"loss": 0.0396,
"step": 7100
},
{
"epoch": 9.6,
"grad_norm": 0.0194091796875,
"learning_rate": 0.0012052597897748746,
"loss": 0.0375,
"step": 7200
},
{
"epoch": 9.6,
"eval_commonvoice-bn-transcription_loss": 0.6838600039482117,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.9275,
"eval_commonvoice-bn-transcription_samples_per_second": 4.595,
"eval_commonvoice-bn-transcription_steps_per_second": 0.574,
"step": 7200
},
{
"epoch": 9.733333333333333,
"grad_norm": 0.04052734375,
"learning_rate": 0.001184277340580916,
"loss": 0.0385,
"step": 7300
},
{
"epoch": 9.866666666666667,
"grad_norm": 0.02880859375,
"learning_rate": 0.0011632485700507637,
"loss": 0.0389,
"step": 7400
},
{
"epoch": 10.0,
"grad_norm": 0.0234375,
"learning_rate": 0.001142185036222946,
"loss": 0.0377,
"step": 7500
},
{
"epoch": 10.133333333333333,
"grad_norm": 0.01708984375,
"learning_rate": 0.0011210983162429347,
"loss": 0.037,
"step": 7600
},
{
"epoch": 10.266666666666667,
"grad_norm": 0.023193359375,
"learning_rate": 0.0011,
"loss": 0.0378,
"step": 7700
},
{
"epoch": 10.4,
"grad_norm": 0.03662109375,
"learning_rate": 0.0010789016837570657,
"loss": 0.0384,
"step": 7800
},
{
"epoch": 10.533333333333333,
"grad_norm": 0.0201416015625,
"learning_rate": 0.001057814963777054,
"loss": 0.0371,
"step": 7900
},
{
"epoch": 10.56,
"eval_commonvoice-bn-transcription_loss": 0.6807686686515808,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.6962,
"eval_commonvoice-bn-transcription_samples_per_second": 4.673,
"eval_commonvoice-bn-transcription_steps_per_second": 0.584,
"step": 7920
},
{
"epoch": 10.666666666666666,
"grad_norm": 0.01806640625,
"learning_rate": 0.0010367514299492366,
"loss": 0.0374,
"step": 8000
},
{
"epoch": 10.8,
"grad_norm": 0.032470703125,
"learning_rate": 0.0010157226594190844,
"loss": 0.0375,
"step": 8100
},
{
"epoch": 10.933333333333334,
"grad_norm": 0.043212890625,
"learning_rate": 0.0009947402102251258,
"loss": 0.0381,
"step": 8200
},
{
"epoch": 11.066666666666666,
"grad_norm": 0.0260009765625,
"learning_rate": 0.0009738156149463029,
"loss": 0.0355,
"step": 8300
},
{
"epoch": 11.2,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0009529603743633229,
"loss": 0.0371,
"step": 8400
},
{
"epoch": 11.333333333333334,
"grad_norm": 0.03271484375,
"learning_rate": 0.000932185951137487,
"loss": 0.0373,
"step": 8500
},
{
"epoch": 11.466666666666667,
"grad_norm": 0.02392578125,
"learning_rate": 0.0009115037635104702,
"loss": 0.0376,
"step": 8600
},
{
"epoch": 11.52,
"eval_commonvoice-bn-transcription_loss": 0.6774536967277527,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 12.1429,
"eval_commonvoice-bn-transcription_samples_per_second": 5.271,
"eval_commonvoice-bn-transcription_steps_per_second": 0.659,
"step": 8640
},
{
"epoch": 11.6,
"grad_norm": 0.0205078125,
"learning_rate": 0.0008909251790285153,
"loss": 0.0359,
"step": 8700
},
{
"epoch": 11.733333333333333,
"grad_norm": 0.0198974609375,
"learning_rate": 0.0008704615082944913,
"loss": 0.0368,
"step": 8800
},
{
"epoch": 11.866666666666667,
"grad_norm": 0.0234375,
"learning_rate": 0.0008501239987512495,
"loss": 0.0373,
"step": 8900
},
{
"epoch": 12.0,
"grad_norm": 0.0238037109375,
"learning_rate": 0.0008299238284996935,
"loss": 0.0363,
"step": 9000
},
{
"epoch": 12.133333333333333,
"grad_norm": 0.0164794921875,
"learning_rate": 0.0008098721001549618,
"loss": 0.0358,
"step": 9100
},
{
"epoch": 12.266666666666667,
"grad_norm": 0.018798828125,
"learning_rate": 0.0007899798347441006,
"loss": 0.0366,
"step": 9200
},
{
"epoch": 12.4,
"grad_norm": 0.048583984375,
"learning_rate": 0.0007702579656485785,
"loss": 0.0371,
"step": 9300
},
{
"epoch": 12.48,
"eval_commonvoice-bn-transcription_loss": 0.683639645576477,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.4595,
"eval_commonvoice-bn-transcription_samples_per_second": 4.755,
"eval_commonvoice-bn-transcription_steps_per_second": 0.594,
"step": 9360
},
{
"epoch": 12.533333333333333,
"grad_norm": 0.0189208984375,
"learning_rate": 0.0007507173325949752,
"loss": 0.036,
"step": 9400
},
{
"epoch": 12.666666666666666,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0007313686756971443,
"loss": 0.0363,
"step": 9500
},
{
"epoch": 12.8,
"grad_norm": 0.0306396484375,
"learning_rate": 0.0007122226295531266,
"loss": 0.0365,
"step": 9600
},
{
"epoch": 12.933333333333334,
"grad_norm": 0.035400390625,
"learning_rate": 0.0006932897174000596,
"loss": 0.037,
"step": 9700
},
{
"epoch": 13.066666666666666,
"grad_norm": 0.02197265625,
"learning_rate": 0.0006745803453302912,
"loss": 0.0347,
"step": 9800
},
{
"epoch": 13.2,
"grad_norm": 0.0277099609375,
"learning_rate": 0.0006561047965718835,
"loss": 0.0363,
"step": 9900
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.017333984375,
"learning_rate": 0.0006378732258366421,
"loss": 0.0365,
"step": 10000
},
{
"epoch": 13.44,
"eval_commonvoice-bn-transcription_loss": 0.6834643483161926,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.0019,
"eval_commonvoice-bn-transcription_samples_per_second": 4.922,
"eval_commonvoice-bn-transcription_steps_per_second": 0.615,
"step": 10080
},
{
"epoch": 13.466666666666667,
"grad_norm": 0.0230712890625,
"learning_rate": 0.0006198956537387869,
"loss": 0.0369,
"step": 10100
},
{
"epoch": 13.6,
"grad_norm": 0.0281982421875,
"learning_rate": 0.0006021819612873205,
"loss": 0.0352,
"step": 10200
},
{
"epoch": 13.733333333333333,
"grad_norm": 0.018310546875,
"learning_rate": 0.0005847418844551355,
"loss": 0.0362,
"step": 10300
},
{
"epoch": 13.866666666666667,
"grad_norm": 0.0194091796875,
"learning_rate": 0.0005675850088278299,
"loss": 0.0367,
"step": 10400
},
{
"epoch": 14.0,
"grad_norm": 0.03271484375,
"learning_rate": 0.0005507207643351873,
"loss": 0.0358,
"step": 10500
},
{
"epoch": 14.133333333333333,
"grad_norm": 0.0166015625,
"learning_rate": 0.0005341584200682039,
"loss": 0.0353,
"step": 10600
},
{
"epoch": 14.266666666666667,
"grad_norm": 0.017578125,
"learning_rate": 0.0005179070791845213,
"loss": 0.0361,
"step": 10700
},
{
"epoch": 14.4,
"grad_norm": 0.04833984375,
"learning_rate": 0.0005019756739050605,
"loss": 0.0367,
"step": 10800
},
{
"epoch": 14.4,
"eval_commonvoice-bn-transcription_loss": 0.6827465295791626,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.7129,
"eval_commonvoice-bn-transcription_samples_per_second": 4.667,
"eval_commonvoice-bn-transcription_steps_per_second": 0.583,
"step": 10800
},
{
"epoch": 14.533333333333333,
"grad_norm": 0.018798828125,
"learning_rate": 0.00048637296060460595,
"loss": 0.0357,
"step": 10900
},
{
"epoch": 14.666666666666666,
"grad_norm": 0.04296875,
"learning_rate": 0.0004711075149990425,
"loss": 0.0359,
"step": 11000
},
{
"epoch": 14.8,
"grad_norm": 0.0255126953125,
"learning_rate": 0.0004561877274318854,
"loss": 0.0361,
"step": 11100
},
{
"epoch": 14.933333333333334,
"grad_norm": 0.03515625,
"learning_rate": 0.0004416217982626981,
"loss": 0.0367,
"step": 11200
},
{
"epoch": 15.066666666666666,
"grad_norm": 0.018310546875,
"learning_rate": 0.0004274177333599306,
"loss": 0.0345,
"step": 11300
},
{
"epoch": 15.2,
"grad_norm": 0.0194091796875,
"learning_rate": 0.00041358333970065636,
"loss": 0.036,
"step": 11400
},
{
"epoch": 15.333333333333334,
"grad_norm": 0.01806640625,
"learning_rate": 0.00040012622107962314,
"loss": 0.0363,
"step": 11500
},
{
"epoch": 15.36,
"eval_commonvoice-bn-transcription_loss": 0.6828598380088806,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.9421,
"eval_commonvoice-bn-transcription_samples_per_second": 4.59,
"eval_commonvoice-bn-transcription_steps_per_second": 0.574,
"step": 11520
},
{
"epoch": 15.466666666666667,
"grad_norm": 0.034423828125,
"learning_rate": 0.0003870537739299836,
"loss": 0.0366,
"step": 11600
},
{
"epoch": 15.6,
"grad_norm": 0.02197265625,
"learning_rate": 0.0003743731832579933,
"loss": 0.035,
"step": 11700
},
{
"epoch": 15.733333333333333,
"grad_norm": 0.017822265625,
"learning_rate": 0.00036209141869391796,
"loss": 0.036,
"step": 11800
},
{
"epoch": 15.866666666666667,
"grad_norm": 0.0179443359375,
"learning_rate": 0.00035021523066131776,
"loss": 0.0365,
"step": 11900
},
{
"epoch": 16.0,
"grad_norm": 0.0252685546875,
"learning_rate": 0.00033875114666681236,
"loss": 0.0356,
"step": 12000
},
{
"epoch": 16.133333333333333,
"grad_norm": 0.0284423828125,
"learning_rate": 0.0003277054677123703,
"loss": 0.0351,
"step": 12100
},
{
"epoch": 16.266666666666666,
"grad_norm": 0.01708984375,
"learning_rate": 0.00031708426483208885,
"loss": 0.0359,
"step": 12200
},
{
"epoch": 16.32,
"eval_commonvoice-bn-transcription_loss": 0.6832164525985718,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 16.9968,
"eval_commonvoice-bn-transcription_samples_per_second": 3.765,
"eval_commonvoice-bn-transcription_steps_per_second": 0.471,
"step": 12240
},
{
"epoch": 16.4,
"grad_norm": 0.038818359375,
"learning_rate": 0.00030689337575537455,
"loss": 0.0365,
"step": 12300
},
{
"epoch": 16.533333333333335,
"grad_norm": 0.019775390625,
"learning_rate": 0.00029713840169835217,
"loss": 0.0355,
"step": 12400
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.03369140625,
"learning_rate": 0.0002878247042852705,
"loss": 0.0358,
"step": 12500
},
{
"epoch": 16.8,
"grad_norm": 0.0283203125,
"learning_rate": 0.0002789574026015931,
"loss": 0.036,
"step": 12600
},
{
"epoch": 16.933333333333334,
"grad_norm": 0.035888671875,
"learning_rate": 0.0002705413703803964,
"loss": 0.0366,
"step": 12700
},
{
"epoch": 17.066666666666666,
"grad_norm": 0.0174560546875,
"learning_rate": 0.00026258123332362215,
"loss": 0.0344,
"step": 12800
},
{
"epoch": 17.2,
"grad_norm": 0.016845703125,
"learning_rate": 0.0002550813665596523,
"loss": 0.0359,
"step": 12900
},
{
"epoch": 17.28,
"eval_commonvoice-bn-transcription_loss": 0.6824997067451477,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.9582,
"eval_commonvoice-bn-transcription_samples_per_second": 4.585,
"eval_commonvoice-bn-transcription_steps_per_second": 0.573,
"step": 12960
},
{
"epoch": 17.333333333333332,
"grad_norm": 0.0177001953125,
"learning_rate": 0.00024804589223860934,
"loss": 0.0362,
"step": 13000
},
{
"epoch": 17.466666666666665,
"grad_norm": 0.02734375,
"learning_rate": 0.00024147867726669888,
"loss": 0.0365,
"step": 13100
},
{
"epoch": 17.6,
"grad_norm": 0.0191650390625,
"learning_rate": 0.00023538333118084396,
"loss": 0.035,
"step": 13200
},
{
"epoch": 17.733333333333334,
"grad_norm": 0.026611328125,
"learning_rate": 0.00022976320416477446,
"loss": 0.0359,
"step": 13300
},
{
"epoch": 17.866666666666667,
"grad_norm": 0.0189208984375,
"learning_rate": 0.00022462138520766832,
"loss": 0.0364,
"step": 13400
},
{
"epoch": 18.0,
"grad_norm": 0.02294921875,
"learning_rate": 0.0002199607004063494,
"loss": 0.0356,
"step": 13500
},
{
"epoch": 18.133333333333333,
"grad_norm": 0.0390625,
"learning_rate": 0.00021578371141198153,
"loss": 0.0352,
"step": 13600
},
{
"epoch": 18.24,
"eval_commonvoice-bn-transcription_loss": 0.6843233704566956,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 13.7375,
"eval_commonvoice-bn-transcription_samples_per_second": 4.659,
"eval_commonvoice-bn-transcription_steps_per_second": 0.582,
"step": 13680
},
{
"epoch": 18.266666666666666,
"grad_norm": 0.0228271484375,
"learning_rate": 0.00021209271402210822,
"loss": 0.0359,
"step": 13700
},
{
"epoch": 18.4,
"grad_norm": 0.044921875,
"learning_rate": 0.00020888973691881416,
"loss": 0.0365,
"step": 13800
},
{
"epoch": 18.533333333333335,
"grad_norm": 0.0262451171875,
"learning_rate": 0.0002061765405537013,
"loss": 0.0355,
"step": 13900
},
{
"epoch": 18.666666666666668,
"grad_norm": 0.026611328125,
"learning_rate": 0.00020395461618029175,
"loss": 0.0358,
"step": 14000
},
{
"epoch": 18.8,
"grad_norm": 0.0201416015625,
"learning_rate": 0.0002022251850343909,
"loss": 0.036,
"step": 14100
},
{
"epoch": 18.933333333333334,
"grad_norm": 0.0439453125,
"learning_rate": 0.0002009891976628598,
"loss": 0.0365,
"step": 14200
},
{
"epoch": 19.066666666666666,
"grad_norm": 0.021728515625,
"learning_rate": 0.00020024733340116572,
"loss": 0.0344,
"step": 14300
},
{
"epoch": 19.2,
"grad_norm": 0.0177001953125,
"learning_rate": 0.0002,
"loss": 0.0358,
"step": 14400
},
{
"epoch": 19.2,
"eval_commonvoice-bn-transcription_loss": 0.6850356459617615,
"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
"eval_commonvoice-bn-transcription_runtime": 15.339,
"eval_commonvoice-bn-transcription_samples_per_second": 4.172,
"eval_commonvoice-bn-transcription_steps_per_second": 0.522,
"step": 14400
}
],
"logging_steps": 100,
"max_steps": 14400,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 3600,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.421884240590438e+17,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}