nabil-tazi's picture
Upload folder using huggingface_hub
0b18469 verified
{
"best_metric": 0.14303971827030182,
"best_model_checkpoint": "autotrain-tckj2-dxpo0/checkpoint-203",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 203,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1724137931034483,
"grad_norm": 3.970233678817749,
"learning_rate": 4.761904761904762e-06,
"loss": 0.7808,
"step": 5
},
{
"epoch": 0.3448275862068966,
"grad_norm": 2.5622565746307373,
"learning_rate": 9.523809523809525e-06,
"loss": 0.7224,
"step": 10
},
{
"epoch": 0.5172413793103449,
"grad_norm": 1.819740891456604,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.5833,
"step": 15
},
{
"epoch": 0.6896551724137931,
"grad_norm": 1.9450643062591553,
"learning_rate": 1.904761904761905e-05,
"loss": 0.4336,
"step": 20
},
{
"epoch": 0.8620689655172413,
"grad_norm": 1.2047266960144043,
"learning_rate": 1.9560439560439563e-05,
"loss": 0.426,
"step": 25
},
{
"epoch": 1.0,
"eval_loss": 0.42094460129737854,
"eval_runtime": 1.7024,
"eval_samples_per_second": 67.553,
"eval_steps_per_second": 2.35,
"step": 29
},
{
"epoch": 1.0344827586206897,
"grad_norm": 1.0075544118881226,
"learning_rate": 1.901098901098901e-05,
"loss": 0.407,
"step": 30
},
{
"epoch": 1.206896551724138,
"grad_norm": 2.6184139251708984,
"learning_rate": 1.8461538461538465e-05,
"loss": 0.4633,
"step": 35
},
{
"epoch": 1.3793103448275863,
"grad_norm": 1.3700945377349854,
"learning_rate": 1.7912087912087915e-05,
"loss": 0.2629,
"step": 40
},
{
"epoch": 1.5517241379310345,
"grad_norm": 1.5076994895935059,
"learning_rate": 1.7362637362637363e-05,
"loss": 0.4468,
"step": 45
},
{
"epoch": 1.7241379310344827,
"grad_norm": 1.0693202018737793,
"learning_rate": 1.6813186813186814e-05,
"loss": 0.3665,
"step": 50
},
{
"epoch": 1.896551724137931,
"grad_norm": 1.7655493021011353,
"learning_rate": 1.6263736263736265e-05,
"loss": 0.2735,
"step": 55
},
{
"epoch": 2.0,
"eval_loss": 0.3269391655921936,
"eval_runtime": 1.3905,
"eval_samples_per_second": 82.706,
"eval_steps_per_second": 2.877,
"step": 58
},
{
"epoch": 2.0689655172413794,
"grad_norm": 1.0194451808929443,
"learning_rate": 1.5714285714285715e-05,
"loss": 0.2472,
"step": 60
},
{
"epoch": 2.2413793103448274,
"grad_norm": 2.360098361968994,
"learning_rate": 1.5164835164835166e-05,
"loss": 0.2586,
"step": 65
},
{
"epoch": 2.413793103448276,
"grad_norm": 1.4176146984100342,
"learning_rate": 1.4615384615384615e-05,
"loss": 0.2281,
"step": 70
},
{
"epoch": 2.586206896551724,
"grad_norm": 3.5215818881988525,
"learning_rate": 1.4065934065934068e-05,
"loss": 0.3056,
"step": 75
},
{
"epoch": 2.7586206896551726,
"grad_norm": 1.205544352531433,
"learning_rate": 1.3516483516483519e-05,
"loss": 0.2166,
"step": 80
},
{
"epoch": 2.9310344827586206,
"grad_norm": 1.4513664245605469,
"learning_rate": 1.296703296703297e-05,
"loss": 0.2243,
"step": 85
},
{
"epoch": 3.0,
"eval_loss": 0.2470882385969162,
"eval_runtime": 1.6355,
"eval_samples_per_second": 70.316,
"eval_steps_per_second": 2.446,
"step": 87
},
{
"epoch": 3.103448275862069,
"grad_norm": 1.3780584335327148,
"learning_rate": 1.2417582417582419e-05,
"loss": 0.2233,
"step": 90
},
{
"epoch": 3.2758620689655173,
"grad_norm": 1.5511927604675293,
"learning_rate": 1.186813186813187e-05,
"loss": 0.1625,
"step": 95
},
{
"epoch": 3.4482758620689653,
"grad_norm": 1.3662214279174805,
"learning_rate": 1.131868131868132e-05,
"loss": 0.1718,
"step": 100
},
{
"epoch": 3.6206896551724137,
"grad_norm": 2.4403982162475586,
"learning_rate": 1.076923076923077e-05,
"loss": 0.1728,
"step": 105
},
{
"epoch": 3.793103448275862,
"grad_norm": 1.4150804281234741,
"learning_rate": 1.021978021978022e-05,
"loss": 0.1949,
"step": 110
},
{
"epoch": 3.9655172413793105,
"grad_norm": 2.6226606369018555,
"learning_rate": 9.670329670329671e-06,
"loss": 0.0891,
"step": 115
},
{
"epoch": 4.0,
"eval_loss": 0.19967548549175262,
"eval_runtime": 1.725,
"eval_samples_per_second": 66.667,
"eval_steps_per_second": 2.319,
"step": 116
},
{
"epoch": 4.137931034482759,
"grad_norm": 2.1667020320892334,
"learning_rate": 9.120879120879122e-06,
"loss": 0.1895,
"step": 120
},
{
"epoch": 4.310344827586207,
"grad_norm": 1.2279396057128906,
"learning_rate": 8.571428571428571e-06,
"loss": 0.1021,
"step": 125
},
{
"epoch": 4.482758620689655,
"grad_norm": 2.164337396621704,
"learning_rate": 8.021978021978023e-06,
"loss": 0.1232,
"step": 130
},
{
"epoch": 4.655172413793103,
"grad_norm": 0.4767724573612213,
"learning_rate": 7.472527472527473e-06,
"loss": 0.0891,
"step": 135
},
{
"epoch": 4.827586206896552,
"grad_norm": 0.7710694074630737,
"learning_rate": 6.923076923076923e-06,
"loss": 0.109,
"step": 140
},
{
"epoch": 5.0,
"grad_norm": 0.6246473789215088,
"learning_rate": 6.373626373626373e-06,
"loss": 0.0879,
"step": 145
},
{
"epoch": 5.0,
"eval_loss": 0.16786590218544006,
"eval_runtime": 1.7475,
"eval_samples_per_second": 65.808,
"eval_steps_per_second": 2.289,
"step": 145
},
{
"epoch": 5.172413793103448,
"grad_norm": 1.7630176544189453,
"learning_rate": 5.824175824175825e-06,
"loss": 0.0814,
"step": 150
},
{
"epoch": 5.344827586206897,
"grad_norm": 0.9055896401405334,
"learning_rate": 5.274725274725275e-06,
"loss": 0.1015,
"step": 155
},
{
"epoch": 5.517241379310345,
"grad_norm": 1.1590590476989746,
"learning_rate": 4.725274725274726e-06,
"loss": 0.0822,
"step": 160
},
{
"epoch": 5.689655172413794,
"grad_norm": 3.4120426177978516,
"learning_rate": 4.175824175824177e-06,
"loss": 0.1054,
"step": 165
},
{
"epoch": 5.862068965517241,
"grad_norm": 1.244531273841858,
"learning_rate": 3.6263736263736266e-06,
"loss": 0.1093,
"step": 170
},
{
"epoch": 6.0,
"eval_loss": 0.14788025617599487,
"eval_runtime": 1.4731,
"eval_samples_per_second": 78.065,
"eval_steps_per_second": 2.715,
"step": 174
},
{
"epoch": 6.0344827586206895,
"grad_norm": 0.7680085301399231,
"learning_rate": 3.0769230769230774e-06,
"loss": 0.0911,
"step": 175
},
{
"epoch": 6.206896551724138,
"grad_norm": 1.3764543533325195,
"learning_rate": 2.5274725274725274e-06,
"loss": 0.0804,
"step": 180
},
{
"epoch": 6.379310344827586,
"grad_norm": 1.9512306451797485,
"learning_rate": 1.9780219780219782e-06,
"loss": 0.1063,
"step": 185
},
{
"epoch": 6.551724137931035,
"grad_norm": 1.7474387884140015,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.0821,
"step": 190
},
{
"epoch": 6.724137931034483,
"grad_norm": 3.119471788406372,
"learning_rate": 8.791208791208792e-07,
"loss": 0.0988,
"step": 195
},
{
"epoch": 6.896551724137931,
"grad_norm": 1.1896252632141113,
"learning_rate": 3.296703296703297e-07,
"loss": 0.0691,
"step": 200
},
{
"epoch": 7.0,
"eval_loss": 0.14303971827030182,
"eval_runtime": 1.4807,
"eval_samples_per_second": 77.664,
"eval_steps_per_second": 2.701,
"step": 203
}
],
"logging_steps": 5,
"max_steps": 203,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}