|
{ |
|
"best_metric": 0.9318181818181818, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-85-fold2/checkpoint-46", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2727272727272727, |
|
"eval_loss": 1.2745391130447388, |
|
"eval_runtime": 0.6014, |
|
"eval_samples_per_second": 73.159, |
|
"eval_steps_per_second": 3.325, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 0.8028427362442017, |
|
"eval_runtime": 0.6032, |
|
"eval_samples_per_second": 72.94, |
|
"eval_steps_per_second": 3.315, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7045454545454546, |
|
"eval_loss": 0.7455913424491882, |
|
"eval_runtime": 0.6175, |
|
"eval_samples_per_second": 71.255, |
|
"eval_steps_per_second": 3.239, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7045454545454546, |
|
"eval_loss": 0.7981840372085571, |
|
"eval_runtime": 0.6096, |
|
"eval_samples_per_second": 72.178, |
|
"eval_steps_per_second": 3.281, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.3311023712158203, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7325, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7045454545454546, |
|
"eval_loss": 0.6233007311820984, |
|
"eval_runtime": 0.6071, |
|
"eval_samples_per_second": 72.47, |
|
"eval_steps_per_second": 3.294, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.5093103051185608, |
|
"eval_runtime": 0.6339, |
|
"eval_samples_per_second": 69.412, |
|
"eval_steps_per_second": 3.155, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7045454545454546, |
|
"eval_loss": 0.5565758347511292, |
|
"eval_runtime": 0.6082, |
|
"eval_samples_per_second": 72.35, |
|
"eval_steps_per_second": 3.289, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7045454545454546, |
|
"eval_loss": 0.6838734745979309, |
|
"eval_runtime": 0.6019, |
|
"eval_samples_per_second": 73.107, |
|
"eval_steps_per_second": 3.323, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.48205462098121643, |
|
"eval_runtime": 0.6025, |
|
"eval_samples_per_second": 73.034, |
|
"eval_steps_per_second": 3.32, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.509533882141113, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4472, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.4365437626838684, |
|
"eval_runtime": 0.6039, |
|
"eval_samples_per_second": 72.86, |
|
"eval_steps_per_second": 3.312, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.5157524347305298, |
|
"eval_runtime": 0.6015, |
|
"eval_samples_per_second": 73.149, |
|
"eval_steps_per_second": 3.325, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.41960516571998596, |
|
"eval_runtime": 0.6153, |
|
"eval_samples_per_second": 71.513, |
|
"eval_steps_per_second": 3.251, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8409090909090909, |
|
"eval_loss": 0.35994166135787964, |
|
"eval_runtime": 0.6066, |
|
"eval_samples_per_second": 72.534, |
|
"eval_steps_per_second": 3.297, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.36041271686553955, |
|
"eval_runtime": 0.607, |
|
"eval_samples_per_second": 72.489, |
|
"eval_steps_per_second": 3.295, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.2131314277648926, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.3483, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.36339208483695984, |
|
"eval_runtime": 0.5982, |
|
"eval_samples_per_second": 73.558, |
|
"eval_steps_per_second": 3.344, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.2802547514438629, |
|
"eval_runtime": 0.6116, |
|
"eval_samples_per_second": 71.94, |
|
"eval_steps_per_second": 3.27, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.25918081402778625, |
|
"eval_runtime": 0.6122, |
|
"eval_samples_per_second": 71.876, |
|
"eval_steps_per_second": 3.267, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.2654772102832794, |
|
"eval_runtime": 0.6337, |
|
"eval_samples_per_second": 69.432, |
|
"eval_steps_per_second": 3.156, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.2333187609910965, |
|
"eval_runtime": 0.6516, |
|
"eval_samples_per_second": 67.523, |
|
"eval_steps_per_second": 3.069, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.3891751766204834, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.2514, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.25902560353279114, |
|
"eval_runtime": 0.6102, |
|
"eval_samples_per_second": 72.107, |
|
"eval_steps_per_second": 3.278, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.26424452662467957, |
|
"eval_runtime": 0.6031, |
|
"eval_samples_per_second": 72.952, |
|
"eval_steps_per_second": 3.316, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.26372984051704407, |
|
"eval_runtime": 0.6239, |
|
"eval_samples_per_second": 70.529, |
|
"eval_steps_per_second": 3.206, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19909273087978363, |
|
"eval_runtime": 0.6092, |
|
"eval_samples_per_second": 72.228, |
|
"eval_steps_per_second": 3.283, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.19407562911510468, |
|
"eval_runtime": 0.6232, |
|
"eval_samples_per_second": 70.602, |
|
"eval_steps_per_second": 3.209, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 3.262606382369995, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.1847, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.18681906163692474, |
|
"eval_runtime": 0.6022, |
|
"eval_samples_per_second": 73.069, |
|
"eval_steps_per_second": 3.321, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.1828092634677887, |
|
"eval_runtime": 0.6083, |
|
"eval_samples_per_second": 72.327, |
|
"eval_steps_per_second": 3.288, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.17112162709236145, |
|
"eval_runtime": 0.6062, |
|
"eval_samples_per_second": 72.577, |
|
"eval_steps_per_second": 3.299, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.2423066645860672, |
|
"eval_runtime": 0.6086, |
|
"eval_samples_per_second": 72.296, |
|
"eval_steps_per_second": 3.286, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.21621073782444, |
|
"eval_runtime": 0.6093, |
|
"eval_samples_per_second": 72.213, |
|
"eval_steps_per_second": 3.282, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.5355308055877686, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1501, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.18539918959140778, |
|
"eval_runtime": 0.6182, |
|
"eval_samples_per_second": 71.169, |
|
"eval_steps_per_second": 3.235, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.30705034732818604, |
|
"eval_runtime": 0.6154, |
|
"eval_samples_per_second": 71.493, |
|
"eval_steps_per_second": 3.25, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.2435438185930252, |
|
"eval_runtime": 0.622, |
|
"eval_samples_per_second": 70.736, |
|
"eval_steps_per_second": 3.215, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.17283663153648376, |
|
"eval_runtime": 0.6112, |
|
"eval_samples_per_second": 71.984, |
|
"eval_steps_per_second": 3.272, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.16438254714012146, |
|
"eval_runtime": 0.626, |
|
"eval_samples_per_second": 70.291, |
|
"eval_steps_per_second": 3.195, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 2.833869695663452, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.13, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8409090909090909, |
|
"eval_loss": 0.2768351137638092, |
|
"eval_runtime": 0.6028, |
|
"eval_samples_per_second": 72.987, |
|
"eval_steps_per_second": 3.318, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.15386301279067993, |
|
"eval_runtime": 0.6151, |
|
"eval_samples_per_second": 71.537, |
|
"eval_steps_per_second": 3.252, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.25800377130508423, |
|
"eval_runtime": 0.6195, |
|
"eval_samples_per_second": 71.023, |
|
"eval_steps_per_second": 3.228, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.17828179895877838, |
|
"eval_runtime": 0.6253, |
|
"eval_samples_per_second": 70.368, |
|
"eval_steps_per_second": 3.199, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.1782391220331192, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 71.678, |
|
"eval_steps_per_second": 3.258, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 2.5041236877441406, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1357, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.20351167023181915, |
|
"eval_runtime": 0.6115, |
|
"eval_samples_per_second": 71.96, |
|
"eval_steps_per_second": 3.271, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.21165017783641815, |
|
"eval_runtime": 0.6088, |
|
"eval_samples_per_second": 72.277, |
|
"eval_steps_per_second": 3.285, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.17929820716381073, |
|
"eval_runtime": 0.6063, |
|
"eval_samples_per_second": 72.573, |
|
"eval_steps_per_second": 3.299, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.2002231925725937, |
|
"eval_runtime": 0.6078, |
|
"eval_samples_per_second": 72.392, |
|
"eval_steps_per_second": 3.291, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.23656944930553436, |
|
"eval_runtime": 0.6049, |
|
"eval_samples_per_second": 72.736, |
|
"eval_steps_per_second": 3.306, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 2.3605904579162598, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.105, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.20083707571029663, |
|
"eval_runtime": 0.602, |
|
"eval_samples_per_second": 73.092, |
|
"eval_steps_per_second": 3.322, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.23675359785556793, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 71.834, |
|
"eval_steps_per_second": 3.265, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.21420449018478394, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 71.747, |
|
"eval_steps_per_second": 3.261, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.21165300905704498, |
|
"eval_runtime": 0.6093, |
|
"eval_samples_per_second": 72.213, |
|
"eval_steps_per_second": 3.282, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.2621056139469147, |
|
"eval_runtime": 0.6066, |
|
"eval_samples_per_second": 72.53, |
|
"eval_steps_per_second": 3.297, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 3.734884023666382, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1091, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.22308549284934998, |
|
"eval_runtime": 0.6187, |
|
"eval_samples_per_second": 71.12, |
|
"eval_steps_per_second": 3.233, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19459280371665955, |
|
"eval_runtime": 0.6087, |
|
"eval_samples_per_second": 72.285, |
|
"eval_steps_per_second": 3.286, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.20005172491073608, |
|
"eval_runtime": 0.617, |
|
"eval_samples_per_second": 71.316, |
|
"eval_steps_per_second": 3.242, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.20313407480716705, |
|
"eval_runtime": 0.606, |
|
"eval_samples_per_second": 72.607, |
|
"eval_steps_per_second": 3.3, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.20779913663864136, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 71.816, |
|
"eval_steps_per_second": 3.264, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 1.503174066543579, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1054, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.22496341168880463, |
|
"eval_runtime": 0.5985, |
|
"eval_samples_per_second": 73.523, |
|
"eval_steps_per_second": 3.342, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.2180013209581375, |
|
"eval_runtime": 0.6105, |
|
"eval_samples_per_second": 72.071, |
|
"eval_steps_per_second": 3.276, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.1915305107831955, |
|
"eval_runtime": 0.6101, |
|
"eval_samples_per_second": 72.117, |
|
"eval_steps_per_second": 3.278, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.2227245271205902, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 71.814, |
|
"eval_steps_per_second": 3.264, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.23516863584518433, |
|
"eval_runtime": 0.6379, |
|
"eval_samples_per_second": 68.976, |
|
"eval_steps_per_second": 3.135, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 2.845384120941162, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0982, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.23286031186580658, |
|
"eval_runtime": 0.6246, |
|
"eval_samples_per_second": 70.446, |
|
"eval_steps_per_second": 3.202, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.2135220468044281, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 71.681, |
|
"eval_steps_per_second": 3.258, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.19485782086849213, |
|
"eval_runtime": 0.6095, |
|
"eval_samples_per_second": 72.185, |
|
"eval_steps_per_second": 3.281, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.21486514806747437, |
|
"eval_runtime": 0.6086, |
|
"eval_samples_per_second": 72.296, |
|
"eval_steps_per_second": 3.286, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.24350209534168243, |
|
"eval_runtime": 0.6151, |
|
"eval_samples_per_second": 71.537, |
|
"eval_steps_per_second": 3.252, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"grad_norm": 1.9481189250946045, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0808, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.2541444003582001, |
|
"eval_runtime": 0.6056, |
|
"eval_samples_per_second": 72.657, |
|
"eval_steps_per_second": 3.303, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.24467478692531586, |
|
"eval_runtime": 0.6059, |
|
"eval_samples_per_second": 72.62, |
|
"eval_steps_per_second": 3.301, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19038386642932892, |
|
"eval_runtime": 0.6075, |
|
"eval_samples_per_second": 72.428, |
|
"eval_steps_per_second": 3.292, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.24367865920066833, |
|
"eval_runtime": 0.6168, |
|
"eval_samples_per_second": 71.342, |
|
"eval_steps_per_second": 3.243, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.35932543873786926, |
|
"eval_runtime": 0.607, |
|
"eval_samples_per_second": 72.488, |
|
"eval_steps_per_second": 3.295, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 2.6166303157806396, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0843, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.4187169373035431, |
|
"eval_runtime": 0.6057, |
|
"eval_samples_per_second": 72.648, |
|
"eval_steps_per_second": 3.302, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.8863636363636364, |
|
"eval_loss": 0.35100072622299194, |
|
"eval_runtime": 0.605, |
|
"eval_samples_per_second": 72.726, |
|
"eval_steps_per_second": 3.306, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.2315276712179184, |
|
"eval_runtime": 0.6151, |
|
"eval_samples_per_second": 71.531, |
|
"eval_steps_per_second": 3.251, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.20485752820968628, |
|
"eval_runtime": 0.6119, |
|
"eval_samples_per_second": 71.905, |
|
"eval_steps_per_second": 3.268, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.21503090858459473, |
|
"eval_runtime": 0.6275, |
|
"eval_samples_per_second": 70.124, |
|
"eval_steps_per_second": 3.187, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 2.6058237552642822, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0942, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.21164561808109283, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 71.662, |
|
"eval_steps_per_second": 3.257, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.2013980746269226, |
|
"eval_runtime": 0.6105, |
|
"eval_samples_per_second": 72.068, |
|
"eval_steps_per_second": 3.276, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.21975825726985931, |
|
"eval_runtime": 0.6262, |
|
"eval_samples_per_second": 70.265, |
|
"eval_steps_per_second": 3.194, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.2538079023361206, |
|
"eval_runtime": 0.6264, |
|
"eval_samples_per_second": 70.245, |
|
"eval_steps_per_second": 3.193, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.27548667788505554, |
|
"eval_runtime": 0.6345, |
|
"eval_samples_per_second": 69.35, |
|
"eval_steps_per_second": 3.152, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 1.9026525020599365, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0884, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.24910703301429749, |
|
"eval_runtime": 0.6525, |
|
"eval_samples_per_second": 67.433, |
|
"eval_steps_per_second": 3.065, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.21000614762306213, |
|
"eval_runtime": 0.607, |
|
"eval_samples_per_second": 72.491, |
|
"eval_steps_per_second": 3.295, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.1976689249277115, |
|
"eval_runtime": 0.6101, |
|
"eval_samples_per_second": 72.123, |
|
"eval_steps_per_second": 3.278, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.1979275345802307, |
|
"eval_runtime": 0.6116, |
|
"eval_samples_per_second": 71.94, |
|
"eval_steps_per_second": 3.27, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.21450987458229065, |
|
"eval_runtime": 0.612, |
|
"eval_samples_per_second": 71.895, |
|
"eval_steps_per_second": 3.268, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"grad_norm": 2.5778989791870117, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0637, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.21915043890476227, |
|
"eval_runtime": 0.6078, |
|
"eval_samples_per_second": 72.395, |
|
"eval_steps_per_second": 3.291, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.2055453360080719, |
|
"eval_runtime": 0.607, |
|
"eval_samples_per_second": 72.493, |
|
"eval_steps_per_second": 3.295, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19937144219875336, |
|
"eval_runtime": 0.6058, |
|
"eval_samples_per_second": 72.632, |
|
"eval_steps_per_second": 3.301, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.19752758741378784, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 71.774, |
|
"eval_steps_per_second": 3.262, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.19738946855068207, |
|
"eval_runtime": 0.6029, |
|
"eval_samples_per_second": 72.982, |
|
"eval_steps_per_second": 3.317, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 1.4332749843597412, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0923, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.1964712142944336, |
|
"eval_runtime": 0.6224, |
|
"eval_samples_per_second": 70.694, |
|
"eval_steps_per_second": 3.213, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.19246001541614532, |
|
"eval_runtime": 0.6066, |
|
"eval_samples_per_second": 72.537, |
|
"eval_steps_per_second": 3.297, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.19422116875648499, |
|
"eval_runtime": 0.6186, |
|
"eval_samples_per_second": 71.123, |
|
"eval_steps_per_second": 3.233, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19693025946617126, |
|
"eval_runtime": 0.6105, |
|
"eval_samples_per_second": 72.075, |
|
"eval_steps_per_second": 3.276, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.1949141025543213, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 71.78, |
|
"eval_steps_per_second": 3.263, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"grad_norm": 1.9863300323486328, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0657, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19037796556949615, |
|
"eval_runtime": 0.6049, |
|
"eval_samples_per_second": 72.734, |
|
"eval_steps_per_second": 3.306, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.18766190111637115, |
|
"eval_runtime": 0.6224, |
|
"eval_samples_per_second": 70.689, |
|
"eval_steps_per_second": 3.213, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.1884867548942566, |
|
"eval_runtime": 0.6265, |
|
"eval_samples_per_second": 70.237, |
|
"eval_steps_per_second": 3.193, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.1901746690273285, |
|
"eval_runtime": 0.6104, |
|
"eval_samples_per_second": 72.087, |
|
"eval_steps_per_second": 3.277, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19220541417598724, |
|
"eval_runtime": 0.6116, |
|
"eval_samples_per_second": 71.943, |
|
"eval_steps_per_second": 3.27, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 1.5260493755340576, |
|
"learning_rate": 0.0, |
|
"loss": 0.0822, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19321714341640472, |
|
"eval_runtime": 0.6259, |
|
"eval_samples_per_second": 70.297, |
|
"eval_steps_per_second": 3.195, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 200, |
|
"total_flos": 1.883089058199552e+18, |
|
"train_loss": 0.17245761930942535, |
|
"train_runtime": 1120.8578, |
|
"train_samples_per_second": 21.68, |
|
"train_steps_per_second": 0.178 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.19909273087978363, |
|
"eval_runtime": 0.6966, |
|
"eval_samples_per_second": 63.167, |
|
"eval_steps_per_second": 2.871, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.883089058199552e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|