harisali9211's picture
All Dunn!!!
08a3d02 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1682,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0011890606420927466,
"grad_norm": 128.86566162109375,
"learning_rate": 4.9970273483947685e-05,
"loss": 9.559,
"step": 1
},
{
"epoch": 0.034482758620689655,
"grad_norm": 23.844635009765625,
"learning_rate": 4.913793103448276e-05,
"loss": 2.0043,
"step": 29
},
{
"epoch": 0.06896551724137931,
"grad_norm": 43.76908493041992,
"learning_rate": 4.827586206896552e-05,
"loss": 1.6394,
"step": 58
},
{
"epoch": 0.10344827586206896,
"grad_norm": 112.72496032714844,
"learning_rate": 4.741379310344828e-05,
"loss": 1.1869,
"step": 87
},
{
"epoch": 0.13793103448275862,
"grad_norm": 57.330135345458984,
"learning_rate": 4.655172413793104e-05,
"loss": 1.4015,
"step": 116
},
{
"epoch": 0.1724137931034483,
"grad_norm": 12.192914962768555,
"learning_rate": 4.5689655172413794e-05,
"loss": 1.1619,
"step": 145
},
{
"epoch": 0.20689655172413793,
"grad_norm": 49.972900390625,
"learning_rate": 4.482758620689655e-05,
"loss": 1.4928,
"step": 174
},
{
"epoch": 0.2413793103448276,
"grad_norm": 28.061901092529297,
"learning_rate": 4.396551724137931e-05,
"loss": 1.1286,
"step": 203
},
{
"epoch": 0.27586206896551724,
"grad_norm": 13.678406715393066,
"learning_rate": 4.3103448275862066e-05,
"loss": 0.9936,
"step": 232
},
{
"epoch": 0.3103448275862069,
"grad_norm": 56.66800308227539,
"learning_rate": 4.224137931034483e-05,
"loss": 1.1352,
"step": 261
},
{
"epoch": 0.3448275862068966,
"grad_norm": 18.401317596435547,
"learning_rate": 4.1379310344827587e-05,
"loss": 1.0754,
"step": 290
},
{
"epoch": 0.3793103448275862,
"grad_norm": 28.412200927734375,
"learning_rate": 4.0517241379310344e-05,
"loss": 1.0104,
"step": 319
},
{
"epoch": 0.41379310344827586,
"grad_norm": 62.137596130371094,
"learning_rate": 3.965517241379311e-05,
"loss": 0.9393,
"step": 348
},
{
"epoch": 0.4482758620689655,
"grad_norm": 44.91804504394531,
"learning_rate": 3.8793103448275865e-05,
"loss": 0.727,
"step": 377
},
{
"epoch": 0.4827586206896552,
"grad_norm": 15.308109283447266,
"learning_rate": 3.793103448275862e-05,
"loss": 0.8675,
"step": 406
},
{
"epoch": 0.5172413793103449,
"grad_norm": 11.947402000427246,
"learning_rate": 3.7068965517241385e-05,
"loss": 0.7525,
"step": 435
},
{
"epoch": 0.5517241379310345,
"grad_norm": 22.51788902282715,
"learning_rate": 3.620689655172414e-05,
"loss": 0.7872,
"step": 464
},
{
"epoch": 0.5862068965517241,
"grad_norm": 39.137386322021484,
"learning_rate": 3.53448275862069e-05,
"loss": 0.7889,
"step": 493
},
{
"epoch": 0.6206896551724138,
"grad_norm": 38.08049774169922,
"learning_rate": 3.4482758620689657e-05,
"loss": 0.7347,
"step": 522
},
{
"epoch": 0.6551724137931034,
"grad_norm": 10.072871208190918,
"learning_rate": 3.3620689655172414e-05,
"loss": 0.7422,
"step": 551
},
{
"epoch": 0.6896551724137931,
"grad_norm": 24.6478328704834,
"learning_rate": 3.275862068965517e-05,
"loss": 0.7217,
"step": 580
},
{
"epoch": 0.7241379310344828,
"grad_norm": 8.815550804138184,
"learning_rate": 3.1896551724137935e-05,
"loss": 0.767,
"step": 609
},
{
"epoch": 0.7586206896551724,
"grad_norm": 7.418780326843262,
"learning_rate": 3.103448275862069e-05,
"loss": 0.7365,
"step": 638
},
{
"epoch": 0.7931034482758621,
"grad_norm": 16.163270950317383,
"learning_rate": 3.017241379310345e-05,
"loss": 0.6203,
"step": 667
},
{
"epoch": 0.8275862068965517,
"grad_norm": 47.155818939208984,
"learning_rate": 2.9310344827586206e-05,
"loss": 0.7505,
"step": 696
},
{
"epoch": 0.8620689655172413,
"grad_norm": 17.693836212158203,
"learning_rate": 2.844827586206897e-05,
"loss": 0.6014,
"step": 725
},
{
"epoch": 0.896551724137931,
"grad_norm": 15.081289291381836,
"learning_rate": 2.7586206896551727e-05,
"loss": 0.5907,
"step": 754
},
{
"epoch": 0.9310344827586207,
"grad_norm": 235.15663146972656,
"learning_rate": 2.672413793103448e-05,
"loss": 0.5196,
"step": 783
},
{
"epoch": 0.9655172413793104,
"grad_norm": 13.673110961914062,
"learning_rate": 2.5862068965517244e-05,
"loss": 0.5441,
"step": 812
},
{
"epoch": 1.0,
"grad_norm": 22.076805114746094,
"learning_rate": 2.5e-05,
"loss": 0.5455,
"step": 841
},
{
"epoch": 1.0,
"eval_cer": 0.020121099208197483,
"eval_loss": 0.46177592873573303,
"eval_runtime": 644.2587,
"eval_samples_per_second": 2.611,
"eval_steps_per_second": 0.328,
"step": 841
},
{
"epoch": 1.0344827586206897,
"grad_norm": 6.016767501831055,
"learning_rate": 2.413793103448276e-05,
"loss": 0.416,
"step": 870
},
{
"epoch": 1.0689655172413792,
"grad_norm": 5.592987060546875,
"learning_rate": 2.327586206896552e-05,
"loss": 0.4161,
"step": 899
},
{
"epoch": 1.103448275862069,
"grad_norm": 4.175529479980469,
"learning_rate": 2.2413793103448276e-05,
"loss": 0.4516,
"step": 928
},
{
"epoch": 1.1379310344827587,
"grad_norm": 7.126400470733643,
"learning_rate": 2.1551724137931033e-05,
"loss": 0.4583,
"step": 957
},
{
"epoch": 1.1724137931034484,
"grad_norm": 5.696765899658203,
"learning_rate": 2.0689655172413793e-05,
"loss": 0.3918,
"step": 986
},
{
"epoch": 1.206896551724138,
"grad_norm": 11.261072158813477,
"learning_rate": 1.9827586206896554e-05,
"loss": 0.4423,
"step": 1015
},
{
"epoch": 1.2413793103448276,
"grad_norm": 3.23542857170105,
"learning_rate": 1.896551724137931e-05,
"loss": 0.3769,
"step": 1044
},
{
"epoch": 1.2758620689655173,
"grad_norm": 4.922264099121094,
"learning_rate": 1.810344827586207e-05,
"loss": 0.4311,
"step": 1073
},
{
"epoch": 1.3103448275862069,
"grad_norm": 3.692586898803711,
"learning_rate": 1.7241379310344828e-05,
"loss": 0.3667,
"step": 1102
},
{
"epoch": 1.3448275862068966,
"grad_norm": 2.88181471824646,
"learning_rate": 1.6379310344827585e-05,
"loss": 0.3167,
"step": 1131
},
{
"epoch": 1.3793103448275863,
"grad_norm": 3.277984142303467,
"learning_rate": 1.5517241379310346e-05,
"loss": 0.3331,
"step": 1160
},
{
"epoch": 1.4137931034482758,
"grad_norm": 7.566446304321289,
"learning_rate": 1.4655172413793103e-05,
"loss": 0.3046,
"step": 1189
},
{
"epoch": 1.4482758620689655,
"grad_norm": 17.953258514404297,
"learning_rate": 1.3793103448275863e-05,
"loss": 0.3332,
"step": 1218
},
{
"epoch": 1.4827586206896552,
"grad_norm": 11.560026168823242,
"learning_rate": 1.2931034482758622e-05,
"loss": 0.3299,
"step": 1247
},
{
"epoch": 1.5172413793103448,
"grad_norm": 5.917276859283447,
"learning_rate": 1.206896551724138e-05,
"loss": 0.2961,
"step": 1276
},
{
"epoch": 1.5517241379310345,
"grad_norm": 3.665133476257324,
"learning_rate": 1.1206896551724138e-05,
"loss": 0.3142,
"step": 1305
},
{
"epoch": 1.5862068965517242,
"grad_norm": 2.3258779048919678,
"learning_rate": 1.0344827586206897e-05,
"loss": 0.3005,
"step": 1334
},
{
"epoch": 1.6206896551724137,
"grad_norm": 2.856088638305664,
"learning_rate": 9.482758620689655e-06,
"loss": 0.2652,
"step": 1363
},
{
"epoch": 1.6551724137931034,
"grad_norm": 8.568778991699219,
"learning_rate": 8.620689655172414e-06,
"loss": 0.2652,
"step": 1392
},
{
"epoch": 1.6896551724137931,
"grad_norm": 4.4803667068481445,
"learning_rate": 7.758620689655173e-06,
"loss": 0.2541,
"step": 1421
},
{
"epoch": 1.7241379310344827,
"grad_norm": 13.121492385864258,
"learning_rate": 6.896551724137932e-06,
"loss": 0.2754,
"step": 1450
},
{
"epoch": 1.7586206896551724,
"grad_norm": 2.48468279838562,
"learning_rate": 6.03448275862069e-06,
"loss": 0.2379,
"step": 1479
},
{
"epoch": 1.793103448275862,
"grad_norm": 1.497287631034851,
"learning_rate": 5.172413793103448e-06,
"loss": 0.2273,
"step": 1508
},
{
"epoch": 1.8275862068965516,
"grad_norm": 2.972078800201416,
"learning_rate": 4.310344827586207e-06,
"loss": 0.2254,
"step": 1537
},
{
"epoch": 1.8620689655172413,
"grad_norm": 12.911340713500977,
"learning_rate": 3.448275862068966e-06,
"loss": 0.2448,
"step": 1566
},
{
"epoch": 1.896551724137931,
"grad_norm": 1.3689017295837402,
"learning_rate": 2.586206896551724e-06,
"loss": 0.2089,
"step": 1595
},
{
"epoch": 1.9310344827586206,
"grad_norm": 4.04969596862793,
"learning_rate": 1.724137931034483e-06,
"loss": 0.2174,
"step": 1624
},
{
"epoch": 1.9655172413793105,
"grad_norm": 2.9180474281311035,
"learning_rate": 8.620689655172415e-07,
"loss": 0.2381,
"step": 1653
},
{
"epoch": 2.0,
"grad_norm": 4.1190409660339355,
"learning_rate": 0.0,
"loss": 0.2068,
"step": 1682
},
{
"epoch": 2.0,
"eval_cer": 0.0032914143766495886,
"eval_loss": 0.23910197615623474,
"eval_runtime": 636.093,
"eval_samples_per_second": 2.644,
"eval_steps_per_second": 0.332,
"step": 1682
},
{
"epoch": 2.0,
"step": 1682,
"total_flos": 1.9906356553640313e+19,
"train_loss": 0.6271170827069549,
"train_runtime": 2733.4434,
"train_samples_per_second": 4.92,
"train_steps_per_second": 0.615
}
],
"logging_steps": 29,
"max_steps": 1682,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.9906356553640313e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}