|
{ |
|
"best_metric": 0.14303971827030182, |
|
"best_model_checkpoint": "autotrain-tckj2-dxpo0/checkpoint-203", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 203, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 3.970233678817749, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.7808, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 2.5622565746307373, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.7224, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 1.819740891456604, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.5833, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 1.9450643062591553, |
|
"learning_rate": 1.904761904761905e-05, |
|
"loss": 0.4336, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 1.2047266960144043, |
|
"learning_rate": 1.9560439560439563e-05, |
|
"loss": 0.426, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.42094460129737854, |
|
"eval_runtime": 1.7024, |
|
"eval_samples_per_second": 67.553, |
|
"eval_steps_per_second": 2.35, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 1.0075544118881226, |
|
"learning_rate": 1.901098901098901e-05, |
|
"loss": 0.407, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.206896551724138, |
|
"grad_norm": 2.6184139251708984, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 0.4633, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 1.3700945377349854, |
|
"learning_rate": 1.7912087912087915e-05, |
|
"loss": 0.2629, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.5517241379310345, |
|
"grad_norm": 1.5076994895935059, |
|
"learning_rate": 1.7362637362637363e-05, |
|
"loss": 0.4468, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 1.0693202018737793, |
|
"learning_rate": 1.6813186813186814e-05, |
|
"loss": 0.3665, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.896551724137931, |
|
"grad_norm": 1.7655493021011353, |
|
"learning_rate": 1.6263736263736265e-05, |
|
"loss": 0.2735, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.3269391655921936, |
|
"eval_runtime": 1.3905, |
|
"eval_samples_per_second": 82.706, |
|
"eval_steps_per_second": 2.877, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 1.0194451808929443, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 0.2472, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.2413793103448274, |
|
"grad_norm": 2.360098361968994, |
|
"learning_rate": 1.5164835164835166e-05, |
|
"loss": 0.2586, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 1.4176146984100342, |
|
"learning_rate": 1.4615384615384615e-05, |
|
"loss": 0.2281, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 3.5215818881988525, |
|
"learning_rate": 1.4065934065934068e-05, |
|
"loss": 0.3056, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 1.205544352531433, |
|
"learning_rate": 1.3516483516483519e-05, |
|
"loss": 0.2166, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.9310344827586206, |
|
"grad_norm": 1.4513664245605469, |
|
"learning_rate": 1.296703296703297e-05, |
|
"loss": 0.2243, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.2470882385969162, |
|
"eval_runtime": 1.6355, |
|
"eval_samples_per_second": 70.316, |
|
"eval_steps_per_second": 2.446, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.103448275862069, |
|
"grad_norm": 1.3780584335327148, |
|
"learning_rate": 1.2417582417582419e-05, |
|
"loss": 0.2233, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.2758620689655173, |
|
"grad_norm": 1.5511927604675293, |
|
"learning_rate": 1.186813186813187e-05, |
|
"loss": 0.1625, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 3.4482758620689653, |
|
"grad_norm": 1.3662214279174805, |
|
"learning_rate": 1.131868131868132e-05, |
|
"loss": 0.1718, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.6206896551724137, |
|
"grad_norm": 2.4403982162475586, |
|
"learning_rate": 1.076923076923077e-05, |
|
"loss": 0.1728, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.793103448275862, |
|
"grad_norm": 1.4150804281234741, |
|
"learning_rate": 1.021978021978022e-05, |
|
"loss": 0.1949, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.9655172413793105, |
|
"grad_norm": 2.6226606369018555, |
|
"learning_rate": 9.670329670329671e-06, |
|
"loss": 0.0891, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.19967548549175262, |
|
"eval_runtime": 1.725, |
|
"eval_samples_per_second": 66.667, |
|
"eval_steps_per_second": 2.319, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 2.1667020320892334, |
|
"learning_rate": 9.120879120879122e-06, |
|
"loss": 0.1895, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.310344827586207, |
|
"grad_norm": 1.2279396057128906, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.1021, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 4.482758620689655, |
|
"grad_norm": 2.164337396621704, |
|
"learning_rate": 8.021978021978023e-06, |
|
"loss": 0.1232, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.655172413793103, |
|
"grad_norm": 0.4767724573612213, |
|
"learning_rate": 7.472527472527473e-06, |
|
"loss": 0.0891, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 4.827586206896552, |
|
"grad_norm": 0.7710694074630737, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 0.109, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.6246473789215088, |
|
"learning_rate": 6.373626373626373e-06, |
|
"loss": 0.0879, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.16786590218544006, |
|
"eval_runtime": 1.7475, |
|
"eval_samples_per_second": 65.808, |
|
"eval_steps_per_second": 2.289, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.172413793103448, |
|
"grad_norm": 1.7630176544189453, |
|
"learning_rate": 5.824175824175825e-06, |
|
"loss": 0.0814, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.344827586206897, |
|
"grad_norm": 0.9055896401405334, |
|
"learning_rate": 5.274725274725275e-06, |
|
"loss": 0.1015, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 5.517241379310345, |
|
"grad_norm": 1.1590590476989746, |
|
"learning_rate": 4.725274725274726e-06, |
|
"loss": 0.0822, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.689655172413794, |
|
"grad_norm": 3.4120426177978516, |
|
"learning_rate": 4.175824175824177e-06, |
|
"loss": 0.1054, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 5.862068965517241, |
|
"grad_norm": 1.244531273841858, |
|
"learning_rate": 3.6263736263736266e-06, |
|
"loss": 0.1093, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.14788025617599487, |
|
"eval_runtime": 1.4731, |
|
"eval_samples_per_second": 78.065, |
|
"eval_steps_per_second": 2.715, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 6.0344827586206895, |
|
"grad_norm": 0.7680085301399231, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 0.0911, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 6.206896551724138, |
|
"grad_norm": 1.3764543533325195, |
|
"learning_rate": 2.5274725274725274e-06, |
|
"loss": 0.0804, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.379310344827586, |
|
"grad_norm": 1.9512306451797485, |
|
"learning_rate": 1.9780219780219782e-06, |
|
"loss": 0.1063, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 6.551724137931035, |
|
"grad_norm": 1.7474387884140015, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.0821, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.724137931034483, |
|
"grad_norm": 3.119471788406372, |
|
"learning_rate": 8.791208791208792e-07, |
|
"loss": 0.0988, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 6.896551724137931, |
|
"grad_norm": 1.1896252632141113, |
|
"learning_rate": 3.296703296703297e-07, |
|
"loss": 0.0691, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.14303971827030182, |
|
"eval_runtime": 1.4807, |
|
"eval_samples_per_second": 77.664, |
|
"eval_steps_per_second": 2.701, |
|
"step": 203 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 203, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|