uv_1 / trainer_state.json

Upload 20 files

308644e verified 3 months ago

32.2 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 19.2,
	"eval_steps": 720,
	"global_step": 14400,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0013333333333333333,
	"grad_norm": 16.625,
	"learning_rate": 2e-06,
	"loss": 2.3594,
	"step": 1
	},
	{
	"epoch": 0.13333333333333333,
	"grad_norm": 0.2490234375,
	"learning_rate": 0.0002,
	"loss": 0.9537,
	"step": 100
	},
	{
	"epoch": 0.26666666666666666,
	"grad_norm": 0.263671875,
	"learning_rate": 0.0004,
	"loss": 0.5927,
	"step": 200
	},
	{
	"epoch": 0.4,
	"grad_norm": 0.2890625,
	"learning_rate": 0.0006,
	"loss": 0.5721,
	"step": 300
	},
	{
	"epoch": 0.5333333333333333,
	"grad_norm": 0.546875,
	"learning_rate": 0.0008,
	"loss": 0.4882,
	"step": 400
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 0.42578125,
	"learning_rate": 0.001,
	"loss": 0.3812,
	"step": 500
	},
	{
	"epoch": 0.8,
	"grad_norm": 0.474609375,
	"learning_rate": 0.0012,
	"loss": 0.353,
	"step": 600
	},
	{
	"epoch": 0.9333333333333333,
	"grad_norm": 0.333984375,
	"learning_rate": 0.0014,
	"loss": 0.3347,
	"step": 700
	},
	{
	"epoch": 0.96,
	"eval_commonvoice-bn-transcription_loss": 0.6287540793418884,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 11.4061,
	"eval_commonvoice-bn-transcription_samples_per_second": 5.611,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.701,
	"step": 720
	},
	{
	"epoch": 1.0666666666666667,
	"grad_norm": 0.275390625,
	"learning_rate": 0.0016,
	"loss": 0.2786,
	"step": 800
	},
	{
	"epoch": 1.2,
	"grad_norm": 0.234375,
	"learning_rate": 0.0018000000000000002,
	"loss": 0.282,
	"step": 900
	},
	{
	"epoch": 1.3333333333333333,
	"grad_norm": 0.1884765625,
	"learning_rate": 0.002,
	"loss": 0.2787,
	"step": 1000
	},
	{
	"epoch": 1.4666666666666668,
	"grad_norm": 0.279296875,
	"learning_rate": 0.0019997526665988343,
	"loss": 0.2676,
	"step": 1100
	},
	{
	"epoch": 1.6,
	"grad_norm": 0.19921875,
	"learning_rate": 0.00199901080233714,
	"loss": 0.2267,
	"step": 1200
	},
	{
	"epoch": 1.7333333333333334,
	"grad_norm": 0.208984375,
	"learning_rate": 0.0019977748149656092,
	"loss": 0.2247,
	"step": 1300
	},
	{
	"epoch": 1.8666666666666667,
	"grad_norm": 0.2021484375,
	"learning_rate": 0.0019960453838197083,
	"loss": 0.2223,
	"step": 1400
	},
	{
	"epoch": 1.92,
	"eval_commonvoice-bn-transcription_loss": 0.5989859104156494,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.0069,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.92,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.615,
	"step": 1440
	},
	{
	"epoch": 2.0,
	"grad_norm": 0.1337890625,
	"learning_rate": 0.0019938234594462988,
	"loss": 0.1983,
	"step": 1500
	},
	{
	"epoch": 2.1333333333333333,
	"grad_norm": 0.203125,
	"learning_rate": 0.0019911102630811857,
	"loss": 0.1747,
	"step": 1600
	},
	{
	"epoch": 2.2666666666666666,
	"grad_norm": 0.2060546875,
	"learning_rate": 0.001987907285977892,
	"loss": 0.1814,
	"step": 1700
	},
	{
	"epoch": 2.4,
	"grad_norm": 0.1689453125,
	"learning_rate": 0.0019842162885880185,
	"loss": 0.181,
	"step": 1800
	},
	{
	"epoch": 2.533333333333333,
	"grad_norm": 0.1630859375,
	"learning_rate": 0.0019800392995936505,
	"loss": 0.1567,
	"step": 1900
	},
	{
	"epoch": 2.6666666666666665,
	"grad_norm": 0.1357421875,
	"learning_rate": 0.0019753786147923315,
	"loss": 0.1517,
	"step": 2000
	},
	{
	"epoch": 2.8,
	"grad_norm": 0.11083984375,
	"learning_rate": 0.0019702367958352256,
	"loss": 0.1467,
	"step": 2100
	},
	{
	"epoch": 2.88,
	"eval_commonvoice-bn-transcription_loss": 0.5943728685379028,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 12.3498,
	"eval_commonvoice-bn-transcription_samples_per_second": 5.182,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.648,
	"step": 2160
	},
	{
	"epoch": 2.9333333333333336,
	"grad_norm": 0.1845703125,
	"learning_rate": 0.001964616668819156,
	"loss": 0.1458,
	"step": 2200
	},
	{
	"epoch": 3.066666666666667,
	"grad_norm": 0.10595703125,
	"learning_rate": 0.001958521322733301,
	"loss": 0.116,
	"step": 2300
	},
	{
	"epoch": 3.2,
	"grad_norm": 0.177734375,
	"learning_rate": 0.0019519541077613908,
	"loss": 0.1273,
	"step": 2400
	},
	{
	"epoch": 3.3333333333333335,
	"grad_norm": 0.1435546875,
	"learning_rate": 0.0019449186334403478,
	"loss": 0.1232,
	"step": 2500
	},
	{
	"epoch": 3.466666666666667,
	"grad_norm": 0.1201171875,
	"learning_rate": 0.001937418766676378,
	"loss": 0.1197,
	"step": 2600
	},
	{
	"epoch": 3.6,
	"grad_norm": 0.11572265625,
	"learning_rate": 0.0019294586296196034,
	"loss": 0.0999,
	"step": 2700
	},
	{
	"epoch": 3.7333333333333334,
	"grad_norm": 0.1943359375,
	"learning_rate": 0.0019210425973984072,
	"loss": 0.1029,
	"step": 2800
	},
	{
	"epoch": 3.84,
	"eval_commonvoice-bn-transcription_loss": 0.6720843315124512,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 11.4916,
	"eval_commonvoice-bn-transcription_samples_per_second": 5.569,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.696,
	"step": 2880
	},
	{
	"epoch": 3.8666666666666667,
	"grad_norm": 0.30859375,
	"learning_rate": 0.0019121752957147297,
	"loss": 0.106,
	"step": 2900
	},
	{
	"epoch": 4.0,
	"grad_norm": 0.0634765625,
	"learning_rate": 0.0019028615983016476,
	"loss": 0.0966,
	"step": 3000
	},
	{
	"epoch": 4.133333333333334,
	"grad_norm": 0.275390625,
	"learning_rate": 0.0018931066242446255,
	"loss": 0.0845,
	"step": 3100
	},
	{
	"epoch": 4.266666666666667,
	"grad_norm": 0.09765625,
	"learning_rate": 0.0018829157351679113,
	"loss": 0.0889,
	"step": 3200
	},
	{
	"epoch": 4.4,
	"grad_norm": 0.2177734375,
	"learning_rate": 0.0018722945322876298,
	"loss": 0.0915,
	"step": 3300
	},
	{
	"epoch": 4.533333333333333,
	"grad_norm": 0.091796875,
	"learning_rate": 0.0018612488533331878,
	"loss": 0.0824,
	"step": 3400
	},
	{
	"epoch": 4.666666666666667,
	"grad_norm": 0.07763671875,
	"learning_rate": 0.0018497847693386823,
	"loss": 0.0767,
	"step": 3500
	},
	{
	"epoch": 4.8,
	"grad_norm": 0.09130859375,
	"learning_rate": 0.0018379085813060821,
	"loss": 0.0775,
	"step": 3600
	},
	{
	"epoch": 4.8,
	"eval_commonvoice-bn-transcription_loss": 0.6684668064117432,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 11.4545,
	"eval_commonvoice-bn-transcription_samples_per_second": 5.587,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.698,
	"step": 3600
	},
	{
	"epoch": 4.933333333333334,
	"grad_norm": 0.1220703125,
	"learning_rate": 0.0018256268167420066,
	"loss": 0.0778,
	"step": 3700
	},
	{
	"epoch": 5.066666666666666,
	"grad_norm": 0.060546875,
	"learning_rate": 0.0018129462260700162,
	"loss": 0.0638,
	"step": 3800
	},
	{
	"epoch": 5.2,
	"grad_norm": 0.068359375,
	"learning_rate": 0.001799873778920377,
	"loss": 0.0672,
	"step": 3900
	},
	{
	"epoch": 5.333333333333333,
	"grad_norm": 0.1376953125,
	"learning_rate": 0.0017864166602993437,
	"loss": 0.0671,
	"step": 4000
	},
	{
	"epoch": 5.466666666666667,
	"grad_norm": 0.080078125,
	"learning_rate": 0.0017725822666400696,
	"loss": 0.0654,
	"step": 4100
	},
	{
	"epoch": 5.6,
	"grad_norm": 0.0732421875,
	"learning_rate": 0.001758378201737302,
	"loss": 0.0574,
	"step": 4200
	},
	{
	"epoch": 5.733333333333333,
	"grad_norm": 0.06591796875,
	"learning_rate": 0.001743812272568115,
	"loss": 0.0593,
	"step": 4300
	},
	{
	"epoch": 5.76,
	"eval_commonvoice-bn-transcription_loss": 0.6580834984779358,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 11.8027,
	"eval_commonvoice-bn-transcription_samples_per_second": 5.422,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.678,
	"step": 4320
	},
	{
	"epoch": 5.866666666666667,
	"grad_norm": 0.038330078125,
	"learning_rate": 0.0017288924850009575,
	"loss": 0.0578,
	"step": 4400
	},
	{
	"epoch": 6.0,
	"grad_norm": 0.09423828125,
	"learning_rate": 0.0017136270393953942,
	"loss": 0.0545,
	"step": 4500
	},
	{
	"epoch": 6.133333333333334,
	"grad_norm": 0.09521484375,
	"learning_rate": 0.0016980243260949395,
	"loss": 0.0505,
	"step": 4600
	},
	{
	"epoch": 6.266666666666667,
	"grad_norm": 0.037841796875,
	"learning_rate": 0.0016820929208154786,
	"loss": 0.0521,
	"step": 4700
	},
	{
	"epoch": 6.4,
	"grad_norm": 0.037109375,
	"learning_rate": 0.0016658415799317965,
	"loss": 0.0513,
	"step": 4800
	},
	{
	"epoch": 6.533333333333333,
	"grad_norm": 0.02783203125,
	"learning_rate": 0.001649279235664813,
	"loss": 0.0495,
	"step": 4900
	},
	{
	"epoch": 6.666666666666667,
	"grad_norm": 0.031494140625,
	"learning_rate": 0.0016324149911721704,
	"loss": 0.0479,
	"step": 5000
	},
	{
	"epoch": 6.72,
	"eval_commonvoice-bn-transcription_loss": 0.6727238893508911,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 14.0248,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.563,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.57,
	"step": 5040
	},
	{
	"epoch": 6.8,
	"grad_norm": 0.034423828125,
	"learning_rate": 0.0016152581155448648,
	"loss": 0.0473,
	"step": 5100
	},
	{
	"epoch": 6.933333333333334,
	"grad_norm": 0.061279296875,
	"learning_rate": 0.0015978180387126795,
	"loss": 0.0474,
	"step": 5200
	},
	{
	"epoch": 7.066666666666666,
	"grad_norm": 0.033447265625,
	"learning_rate": 0.0015801043462612132,
	"loss": 0.0445,
	"step": 5300
	},
	{
	"epoch": 7.2,
	"grad_norm": 0.041748046875,
	"learning_rate": 0.0015621267741633578,
	"loss": 0.045,
	"step": 5400
	},
	{
	"epoch": 7.333333333333333,
	"grad_norm": 0.0234375,
	"learning_rate": 0.0015438952034281166,
	"loss": 0.0444,
	"step": 5500
	},
	{
	"epoch": 7.466666666666667,
	"grad_norm": 0.033447265625,
	"learning_rate": 0.0015254196546697087,
	"loss": 0.0444,
	"step": 5600
	},
	{
	"epoch": 7.6,
	"grad_norm": 0.03271484375,
	"learning_rate": 0.0015067102825999403,
	"loss": 0.0426,
	"step": 5700
	},
	{
	"epoch": 7.68,
	"eval_commonvoice-bn-transcription_loss": 0.6831041574478149,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 16.8726,
	"eval_commonvoice-bn-transcription_samples_per_second": 3.793,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.474,
	"step": 5760
	},
	{
	"epoch": 7.733333333333333,
	"grad_norm": 0.05029296875,
	"learning_rate": 0.0014877773704468733,
	"loss": 0.0426,
	"step": 5800
	},
	{
	"epoch": 7.866666666666667,
	"grad_norm": 0.0262451171875,
	"learning_rate": 0.001468631324302856,
	"loss": 0.0426,
	"step": 5900
	},
	{
	"epoch": 8.0,
	"grad_norm": 0.0283203125,
	"learning_rate": 0.0014492826674050248,
	"loss": 0.0416,
	"step": 6000
	},
	{
	"epoch": 8.133333333333333,
	"grad_norm": 0.0224609375,
	"learning_rate": 0.0014297420343514216,
	"loss": 0.0409,
	"step": 6100
	},
	{
	"epoch": 8.266666666666667,
	"grad_norm": 0.0213623046875,
	"learning_rate": 0.0014100201652558998,
	"loss": 0.0411,
	"step": 6200
	},
	{
	"epoch": 8.4,
	"grad_norm": 0.036865234375,
	"learning_rate": 0.0013901278998450384,
	"loss": 0.0415,
	"step": 6300
	},
	{
	"epoch": 8.533333333333333,
	"grad_norm": 0.022705078125,
	"learning_rate": 0.0013700761715003068,
	"loss": 0.04,
	"step": 6400
	},
	{
	"epoch": 8.64,
	"eval_commonvoice-bn-transcription_loss": 0.6834076642990112,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 12.4556,
	"eval_commonvoice-bn-transcription_samples_per_second": 5.138,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.642,
	"step": 6480
	},
	{
	"epoch": 8.666666666666666,
	"grad_norm": 0.0191650390625,
	"learning_rate": 0.0013498760012487503,
	"loss": 0.0402,
	"step": 6500
	},
	{
	"epoch": 8.8,
	"grad_norm": 0.03515625,
	"learning_rate": 0.001329538491705509,
	"loss": 0.04,
	"step": 6600
	},
	{
	"epoch": 8.933333333333334,
	"grad_norm": 0.043701171875,
	"learning_rate": 0.001309074820971485,
	"loss": 0.0405,
	"step": 6700
	},
	{
	"epoch": 9.066666666666666,
	"grad_norm": 0.020751953125,
	"learning_rate": 0.0012884962364895303,
	"loss": 0.0375,
	"step": 6800
	},
	{
	"epoch": 9.2,
	"grad_norm": 0.022216796875,
	"learning_rate": 0.0012678140488625131,
	"loss": 0.0391,
	"step": 6900
	},
	{
	"epoch": 9.333333333333334,
	"grad_norm": 0.03515625,
	"learning_rate": 0.0012470396256366771,
	"loss": 0.0393,
	"step": 7000
	},
	{
	"epoch": 9.466666666666667,
	"grad_norm": 0.0213623046875,
	"learning_rate": 0.0012261843850536976,
	"loss": 0.0396,
	"step": 7100
	},
	{
	"epoch": 9.6,
	"grad_norm": 0.0194091796875,
	"learning_rate": 0.0012052597897748746,
	"loss": 0.0375,
	"step": 7200
	},
	{
	"epoch": 9.6,
	"eval_commonvoice-bn-transcription_loss": 0.6838600039482117,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.9275,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.595,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.574,
	"step": 7200
	},
	{
	"epoch": 9.733333333333333,
	"grad_norm": 0.04052734375,
	"learning_rate": 0.001184277340580916,
	"loss": 0.0385,
	"step": 7300
	},
	{
	"epoch": 9.866666666666667,
	"grad_norm": 0.02880859375,
	"learning_rate": 0.0011632485700507637,
	"loss": 0.0389,
	"step": 7400
	},
	{
	"epoch": 10.0,
	"grad_norm": 0.0234375,
	"learning_rate": 0.001142185036222946,
	"loss": 0.0377,
	"step": 7500
	},
	{
	"epoch": 10.133333333333333,
	"grad_norm": 0.01708984375,
	"learning_rate": 0.0011210983162429347,
	"loss": 0.037,
	"step": 7600
	},
	{
	"epoch": 10.266666666666667,
	"grad_norm": 0.023193359375,
	"learning_rate": 0.0011,
	"loss": 0.0378,
	"step": 7700
	},
	{
	"epoch": 10.4,
	"grad_norm": 0.03662109375,
	"learning_rate": 0.0010789016837570657,
	"loss": 0.0384,
	"step": 7800
	},
	{
	"epoch": 10.533333333333333,
	"grad_norm": 0.0201416015625,
	"learning_rate": 0.001057814963777054,
	"loss": 0.0371,
	"step": 7900
	},
	{
	"epoch": 10.56,
	"eval_commonvoice-bn-transcription_loss": 0.6807686686515808,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.6962,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.673,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.584,
	"step": 7920
	},
	{
	"epoch": 10.666666666666666,
	"grad_norm": 0.01806640625,
	"learning_rate": 0.0010367514299492366,
	"loss": 0.0374,
	"step": 8000
	},
	{
	"epoch": 10.8,
	"grad_norm": 0.032470703125,
	"learning_rate": 0.0010157226594190844,
	"loss": 0.0375,
	"step": 8100
	},
	{
	"epoch": 10.933333333333334,
	"grad_norm": 0.043212890625,
	"learning_rate": 0.0009947402102251258,
	"loss": 0.0381,
	"step": 8200
	},
	{
	"epoch": 11.066666666666666,
	"grad_norm": 0.0260009765625,
	"learning_rate": 0.0009738156149463029,
	"loss": 0.0355,
	"step": 8300
	},
	{
	"epoch": 11.2,
	"grad_norm": 0.0235595703125,
	"learning_rate": 0.0009529603743633229,
	"loss": 0.0371,
	"step": 8400
	},
	{
	"epoch": 11.333333333333334,
	"grad_norm": 0.03271484375,
	"learning_rate": 0.000932185951137487,
	"loss": 0.0373,
	"step": 8500
	},
	{
	"epoch": 11.466666666666667,
	"grad_norm": 0.02392578125,
	"learning_rate": 0.0009115037635104702,
	"loss": 0.0376,
	"step": 8600
	},
	{
	"epoch": 11.52,
	"eval_commonvoice-bn-transcription_loss": 0.6774536967277527,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 12.1429,
	"eval_commonvoice-bn-transcription_samples_per_second": 5.271,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.659,
	"step": 8640
	},
	{
	"epoch": 11.6,
	"grad_norm": 0.0205078125,
	"learning_rate": 0.0008909251790285153,
	"loss": 0.0359,
	"step": 8700
	},
	{
	"epoch": 11.733333333333333,
	"grad_norm": 0.0198974609375,
	"learning_rate": 0.0008704615082944913,
	"loss": 0.0368,
	"step": 8800
	},
	{
	"epoch": 11.866666666666667,
	"grad_norm": 0.0234375,
	"learning_rate": 0.0008501239987512495,
	"loss": 0.0373,
	"step": 8900
	},
	{
	"epoch": 12.0,
	"grad_norm": 0.0238037109375,
	"learning_rate": 0.0008299238284996935,
	"loss": 0.0363,
	"step": 9000
	},
	{
	"epoch": 12.133333333333333,
	"grad_norm": 0.0164794921875,
	"learning_rate": 0.0008098721001549618,
	"loss": 0.0358,
	"step": 9100
	},
	{
	"epoch": 12.266666666666667,
	"grad_norm": 0.018798828125,
	"learning_rate": 0.0007899798347441006,
	"loss": 0.0366,
	"step": 9200
	},
	{
	"epoch": 12.4,
	"grad_norm": 0.048583984375,
	"learning_rate": 0.0007702579656485785,
	"loss": 0.0371,
	"step": 9300
	},
	{
	"epoch": 12.48,
	"eval_commonvoice-bn-transcription_loss": 0.683639645576477,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.4595,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.755,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.594,
	"step": 9360
	},
	{
	"epoch": 12.533333333333333,
	"grad_norm": 0.0189208984375,
	"learning_rate": 0.0007507173325949752,
	"loss": 0.036,
	"step": 9400
	},
	{
	"epoch": 12.666666666666666,
	"grad_norm": 0.0272216796875,
	"learning_rate": 0.0007313686756971443,
	"loss": 0.0363,
	"step": 9500
	},
	{
	"epoch": 12.8,
	"grad_norm": 0.0306396484375,
	"learning_rate": 0.0007122226295531266,
	"loss": 0.0365,
	"step": 9600
	},
	{
	"epoch": 12.933333333333334,
	"grad_norm": 0.035400390625,
	"learning_rate": 0.0006932897174000596,
	"loss": 0.037,
	"step": 9700
	},
	{
	"epoch": 13.066666666666666,
	"grad_norm": 0.02197265625,
	"learning_rate": 0.0006745803453302912,
	"loss": 0.0347,
	"step": 9800
	},
	{
	"epoch": 13.2,
	"grad_norm": 0.0277099609375,
	"learning_rate": 0.0006561047965718835,
	"loss": 0.0363,
	"step": 9900
	},
	{
	"epoch": 13.333333333333334,
	"grad_norm": 0.017333984375,
	"learning_rate": 0.0006378732258366421,
	"loss": 0.0365,
	"step": 10000
	},
	{
	"epoch": 13.44,
	"eval_commonvoice-bn-transcription_loss": 0.6834643483161926,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.0019,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.922,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.615,
	"step": 10080
	},
	{
	"epoch": 13.466666666666667,
	"grad_norm": 0.0230712890625,
	"learning_rate": 0.0006198956537387869,
	"loss": 0.0369,
	"step": 10100
	},
	{
	"epoch": 13.6,
	"grad_norm": 0.0281982421875,
	"learning_rate": 0.0006021819612873205,
	"loss": 0.0352,
	"step": 10200
	},
	{
	"epoch": 13.733333333333333,
	"grad_norm": 0.018310546875,
	"learning_rate": 0.0005847418844551355,
	"loss": 0.0362,
	"step": 10300
	},
	{
	"epoch": 13.866666666666667,
	"grad_norm": 0.0194091796875,
	"learning_rate": 0.0005675850088278299,
	"loss": 0.0367,
	"step": 10400
	},
	{
	"epoch": 14.0,
	"grad_norm": 0.03271484375,
	"learning_rate": 0.0005507207643351873,
	"loss": 0.0358,
	"step": 10500
	},
	{
	"epoch": 14.133333333333333,
	"grad_norm": 0.0166015625,
	"learning_rate": 0.0005341584200682039,
	"loss": 0.0353,
	"step": 10600
	},
	{
	"epoch": 14.266666666666667,
	"grad_norm": 0.017578125,
	"learning_rate": 0.0005179070791845213,
	"loss": 0.0361,
	"step": 10700
	},
	{
	"epoch": 14.4,
	"grad_norm": 0.04833984375,
	"learning_rate": 0.0005019756739050605,
	"loss": 0.0367,
	"step": 10800
	},
	{
	"epoch": 14.4,
	"eval_commonvoice-bn-transcription_loss": 0.6827465295791626,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.7129,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.667,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.583,
	"step": 10800
	},
	{
	"epoch": 14.533333333333333,
	"grad_norm": 0.018798828125,
	"learning_rate": 0.00048637296060460595,
	"loss": 0.0357,
	"step": 10900
	},
	{
	"epoch": 14.666666666666666,
	"grad_norm": 0.04296875,
	"learning_rate": 0.0004711075149990425,
	"loss": 0.0359,
	"step": 11000
	},
	{
	"epoch": 14.8,
	"grad_norm": 0.0255126953125,
	"learning_rate": 0.0004561877274318854,
	"loss": 0.0361,
	"step": 11100
	},
	{
	"epoch": 14.933333333333334,
	"grad_norm": 0.03515625,
	"learning_rate": 0.0004416217982626981,
	"loss": 0.0367,
	"step": 11200
	},
	{
	"epoch": 15.066666666666666,
	"grad_norm": 0.018310546875,
	"learning_rate": 0.0004274177333599306,
	"loss": 0.0345,
	"step": 11300
	},
	{
	"epoch": 15.2,
	"grad_norm": 0.0194091796875,
	"learning_rate": 0.00041358333970065636,
	"loss": 0.036,
	"step": 11400
	},
	{
	"epoch": 15.333333333333334,
	"grad_norm": 0.01806640625,
	"learning_rate": 0.00040012622107962314,
	"loss": 0.0363,
	"step": 11500
	},
	{
	"epoch": 15.36,
	"eval_commonvoice-bn-transcription_loss": 0.6828598380088806,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.9421,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.59,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.574,
	"step": 11520
	},
	{
	"epoch": 15.466666666666667,
	"grad_norm": 0.034423828125,
	"learning_rate": 0.0003870537739299836,
	"loss": 0.0366,
	"step": 11600
	},
	{
	"epoch": 15.6,
	"grad_norm": 0.02197265625,
	"learning_rate": 0.0003743731832579933,
	"loss": 0.035,
	"step": 11700
	},
	{
	"epoch": 15.733333333333333,
	"grad_norm": 0.017822265625,
	"learning_rate": 0.00036209141869391796,
	"loss": 0.036,
	"step": 11800
	},
	{
	"epoch": 15.866666666666667,
	"grad_norm": 0.0179443359375,
	"learning_rate": 0.00035021523066131776,
	"loss": 0.0365,
	"step": 11900
	},
	{
	"epoch": 16.0,
	"grad_norm": 0.0252685546875,
	"learning_rate": 0.00033875114666681236,
	"loss": 0.0356,
	"step": 12000
	},
	{
	"epoch": 16.133333333333333,
	"grad_norm": 0.0284423828125,
	"learning_rate": 0.0003277054677123703,
	"loss": 0.0351,
	"step": 12100
	},
	{
	"epoch": 16.266666666666666,
	"grad_norm": 0.01708984375,
	"learning_rate": 0.00031708426483208885,
	"loss": 0.0359,
	"step": 12200
	},
	{
	"epoch": 16.32,
	"eval_commonvoice-bn-transcription_loss": 0.6832164525985718,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 16.9968,
	"eval_commonvoice-bn-transcription_samples_per_second": 3.765,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.471,
	"step": 12240
	},
	{
	"epoch": 16.4,
	"grad_norm": 0.038818359375,
	"learning_rate": 0.00030689337575537455,
	"loss": 0.0365,
	"step": 12300
	},
	{
	"epoch": 16.533333333333335,
	"grad_norm": 0.019775390625,
	"learning_rate": 0.00029713840169835217,
	"loss": 0.0355,
	"step": 12400
	},
	{
	"epoch": 16.666666666666668,
	"grad_norm": 0.03369140625,
	"learning_rate": 0.0002878247042852705,
	"loss": 0.0358,
	"step": 12500
	},
	{
	"epoch": 16.8,
	"grad_norm": 0.0283203125,
	"learning_rate": 0.0002789574026015931,
	"loss": 0.036,
	"step": 12600
	},
	{
	"epoch": 16.933333333333334,
	"grad_norm": 0.035888671875,
	"learning_rate": 0.0002705413703803964,
	"loss": 0.0366,
	"step": 12700
	},
	{
	"epoch": 17.066666666666666,
	"grad_norm": 0.0174560546875,
	"learning_rate": 0.00026258123332362215,
	"loss": 0.0344,
	"step": 12800
	},
	{
	"epoch": 17.2,
	"grad_norm": 0.016845703125,
	"learning_rate": 0.0002550813665596523,
	"loss": 0.0359,
	"step": 12900
	},
	{
	"epoch": 17.28,
	"eval_commonvoice-bn-transcription_loss": 0.6824997067451477,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.9582,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.585,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.573,
	"step": 12960
	},
	{
	"epoch": 17.333333333333332,
	"grad_norm": 0.0177001953125,
	"learning_rate": 0.00024804589223860934,
	"loss": 0.0362,
	"step": 13000
	},
	{
	"epoch": 17.466666666666665,
	"grad_norm": 0.02734375,
	"learning_rate": 0.00024147867726669888,
	"loss": 0.0365,
	"step": 13100
	},
	{
	"epoch": 17.6,
	"grad_norm": 0.0191650390625,
	"learning_rate": 0.00023538333118084396,
	"loss": 0.035,
	"step": 13200
	},
	{
	"epoch": 17.733333333333334,
	"grad_norm": 0.026611328125,
	"learning_rate": 0.00022976320416477446,
	"loss": 0.0359,
	"step": 13300
	},
	{
	"epoch": 17.866666666666667,
	"grad_norm": 0.0189208984375,
	"learning_rate": 0.00022462138520766832,
	"loss": 0.0364,
	"step": 13400
	},
	{
	"epoch": 18.0,
	"grad_norm": 0.02294921875,
	"learning_rate": 0.0002199607004063494,
	"loss": 0.0356,
	"step": 13500
	},
	{
	"epoch": 18.133333333333333,
	"grad_norm": 0.0390625,
	"learning_rate": 0.00021578371141198153,
	"loss": 0.0352,
	"step": 13600
	},
	{
	"epoch": 18.24,
	"eval_commonvoice-bn-transcription_loss": 0.6843233704566956,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 13.7375,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.659,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.582,
	"step": 13680
	},
	{
	"epoch": 18.266666666666666,
	"grad_norm": 0.0228271484375,
	"learning_rate": 0.00021209271402210822,
	"loss": 0.0359,
	"step": 13700
	},
	{
	"epoch": 18.4,
	"grad_norm": 0.044921875,
	"learning_rate": 0.00020888973691881416,
	"loss": 0.0365,
	"step": 13800
	},
	{
	"epoch": 18.533333333333335,
	"grad_norm": 0.0262451171875,
	"learning_rate": 0.0002061765405537013,
	"loss": 0.0355,
	"step": 13900
	},
	{
	"epoch": 18.666666666666668,
	"grad_norm": 0.026611328125,
	"learning_rate": 0.00020395461618029175,
	"loss": 0.0358,
	"step": 14000
	},
	{
	"epoch": 18.8,
	"grad_norm": 0.0201416015625,
	"learning_rate": 0.0002022251850343909,
	"loss": 0.036,
	"step": 14100
	},
	{
	"epoch": 18.933333333333334,
	"grad_norm": 0.0439453125,
	"learning_rate": 0.0002009891976628598,
	"loss": 0.0365,
	"step": 14200
	},
	{
	"epoch": 19.066666666666666,
	"grad_norm": 0.021728515625,
	"learning_rate": 0.00020024733340116572,
	"loss": 0.0344,
	"step": 14300
	},
	{
	"epoch": 19.2,
	"grad_norm": 0.0177001953125,
	"learning_rate": 0.0002,
	"loss": 0.0358,
	"step": 14400
	},
	{
	"epoch": 19.2,
	"eval_commonvoice-bn-transcription_loss": 0.6850356459617615,
	"eval_commonvoice-bn-transcription_model_preparation_time": 0.0074,
	"eval_commonvoice-bn-transcription_runtime": 15.339,
	"eval_commonvoice-bn-transcription_samples_per_second": 4.172,
	"eval_commonvoice-bn-transcription_steps_per_second": 0.522,
	"step": 14400
	}
	],
	"logging_steps": 100,
	"max_steps": 14400,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 20,
	"save_steps": 3600,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6.421884240590438e+17,
	"train_batch_size": 24,
	"trial_name": null,
	"trial_params": null
	}