|
{ |
|
"best_metric": 0.9069767441860465, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-75-fold1/checkpoint-88", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6511627906976745, |
|
"eval_loss": 0.6476810574531555, |
|
"eval_runtime": 0.6318, |
|
"eval_samples_per_second": 68.06, |
|
"eval_steps_per_second": 3.166, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.6527612805366516, |
|
"eval_runtime": 0.6173, |
|
"eval_samples_per_second": 69.663, |
|
"eval_steps_per_second": 3.24, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.8095551133155823, |
|
"eval_runtime": 0.6043, |
|
"eval_samples_per_second": 71.154, |
|
"eval_steps_per_second": 3.309, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.7679464817047119, |
|
"eval_runtime": 0.6207, |
|
"eval_samples_per_second": 69.279, |
|
"eval_steps_per_second": 3.222, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.9488413333892822, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.5994, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.5481935739517212, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 70.117, |
|
"eval_steps_per_second": 3.261, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.4983576536178589, |
|
"eval_runtime": 0.6175, |
|
"eval_samples_per_second": 69.631, |
|
"eval_steps_per_second": 3.239, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.6156066656112671, |
|
"eval_runtime": 0.6217, |
|
"eval_samples_per_second": 69.167, |
|
"eval_steps_per_second": 3.217, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7674418604651163, |
|
"eval_loss": 0.530674397945404, |
|
"eval_runtime": 0.6286, |
|
"eval_samples_per_second": 68.405, |
|
"eval_steps_per_second": 3.182, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7674418604651163, |
|
"eval_loss": 0.40364784002304077, |
|
"eval_runtime": 0.6233, |
|
"eval_samples_per_second": 68.986, |
|
"eval_steps_per_second": 3.209, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.075669288635254, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3806, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.42405834794044495, |
|
"eval_runtime": 0.6323, |
|
"eval_samples_per_second": 68.001, |
|
"eval_steps_per_second": 3.163, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.4263027012348175, |
|
"eval_runtime": 0.6181, |
|
"eval_samples_per_second": 69.564, |
|
"eval_steps_per_second": 3.236, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.6778327226638794, |
|
"eval_runtime": 0.642, |
|
"eval_samples_per_second": 66.973, |
|
"eval_steps_per_second": 3.115, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7674418604651163, |
|
"eval_loss": 0.5884802937507629, |
|
"eval_runtime": 0.6192, |
|
"eval_samples_per_second": 69.444, |
|
"eval_steps_per_second": 3.23, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.6048120260238647, |
|
"eval_runtime": 0.6378, |
|
"eval_samples_per_second": 67.423, |
|
"eval_steps_per_second": 3.136, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.8426947593688965, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.273, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.5110020041465759, |
|
"eval_runtime": 0.6259, |
|
"eval_samples_per_second": 68.697, |
|
"eval_steps_per_second": 3.195, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.3793295919895172, |
|
"eval_runtime": 0.6397, |
|
"eval_samples_per_second": 67.215, |
|
"eval_steps_per_second": 3.126, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.3635205626487732, |
|
"eval_runtime": 0.63, |
|
"eval_samples_per_second": 68.259, |
|
"eval_steps_per_second": 3.175, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.3863283097743988, |
|
"eval_runtime": 0.6683, |
|
"eval_samples_per_second": 64.347, |
|
"eval_steps_per_second": 2.993, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.37879544496536255, |
|
"eval_runtime": 0.621, |
|
"eval_samples_per_second": 69.245, |
|
"eval_steps_per_second": 3.221, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 4.38327693939209, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.2388, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.33901217579841614, |
|
"eval_runtime": 0.6226, |
|
"eval_samples_per_second": 69.07, |
|
"eval_steps_per_second": 3.213, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.45933058857917786, |
|
"eval_runtime": 0.6161, |
|
"eval_samples_per_second": 69.798, |
|
"eval_steps_per_second": 3.246, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.344134122133255, |
|
"eval_runtime": 0.6226, |
|
"eval_samples_per_second": 69.067, |
|
"eval_steps_per_second": 3.212, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.5482814908027649, |
|
"eval_runtime": 0.6286, |
|
"eval_samples_per_second": 68.407, |
|
"eval_steps_per_second": 3.182, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.6398680210113525, |
|
"eval_runtime": 0.6347, |
|
"eval_samples_per_second": 67.748, |
|
"eval_steps_per_second": 3.151, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 1.9829111099243164, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.189, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3333226442337036, |
|
"eval_runtime": 0.6246, |
|
"eval_samples_per_second": 68.841, |
|
"eval_steps_per_second": 3.202, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3325919210910797, |
|
"eval_runtime": 0.6227, |
|
"eval_samples_per_second": 69.059, |
|
"eval_steps_per_second": 3.212, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.41504570841789246, |
|
"eval_runtime": 0.6243, |
|
"eval_samples_per_second": 68.877, |
|
"eval_steps_per_second": 3.204, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.34200432896614075, |
|
"eval_runtime": 0.6237, |
|
"eval_samples_per_second": 68.94, |
|
"eval_steps_per_second": 3.207, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.36485448479652405, |
|
"eval_runtime": 0.6179, |
|
"eval_samples_per_second": 69.595, |
|
"eval_steps_per_second": 3.237, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.870300531387329, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1718, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.36506161093711853, |
|
"eval_runtime": 0.6332, |
|
"eval_samples_per_second": 67.905, |
|
"eval_steps_per_second": 3.158, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.46762949228286743, |
|
"eval_runtime": 0.6159, |
|
"eval_samples_per_second": 69.813, |
|
"eval_steps_per_second": 3.247, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3543034791946411, |
|
"eval_runtime": 0.6304, |
|
"eval_samples_per_second": 68.211, |
|
"eval_steps_per_second": 3.173, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.3209027349948883, |
|
"eval_runtime": 0.6291, |
|
"eval_samples_per_second": 68.348, |
|
"eval_steps_per_second": 3.179, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.34199726581573486, |
|
"eval_runtime": 0.6272, |
|
"eval_samples_per_second": 68.564, |
|
"eval_steps_per_second": 3.189, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 3.6730294227600098, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.1466, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.37998369336128235, |
|
"eval_runtime": 0.6314, |
|
"eval_samples_per_second": 68.108, |
|
"eval_steps_per_second": 3.168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.6547455191612244, |
|
"eval_runtime": 0.6789, |
|
"eval_samples_per_second": 63.339, |
|
"eval_steps_per_second": 2.946, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7674418604651163, |
|
"eval_loss": 0.9743425250053406, |
|
"eval_runtime": 0.6571, |
|
"eval_samples_per_second": 65.436, |
|
"eval_steps_per_second": 3.044, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.6677446961402893, |
|
"eval_runtime": 0.6757, |
|
"eval_samples_per_second": 63.638, |
|
"eval_steps_per_second": 2.96, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.5691245794296265, |
|
"eval_runtime": 0.6355, |
|
"eval_samples_per_second": 67.666, |
|
"eval_steps_per_second": 3.147, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 1.461414098739624, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.119, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4796452224254608, |
|
"eval_runtime": 0.6302, |
|
"eval_samples_per_second": 68.228, |
|
"eval_steps_per_second": 3.173, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.32433825731277466, |
|
"eval_runtime": 0.6208, |
|
"eval_samples_per_second": 69.265, |
|
"eval_steps_per_second": 3.222, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.29689204692840576, |
|
"eval_runtime": 0.6213, |
|
"eval_samples_per_second": 69.211, |
|
"eval_steps_per_second": 3.219, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.3637494146823883, |
|
"eval_runtime": 0.6187, |
|
"eval_samples_per_second": 69.505, |
|
"eval_steps_per_second": 3.233, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.30976200103759766, |
|
"eval_runtime": 0.6227, |
|
"eval_samples_per_second": 69.059, |
|
"eval_steps_per_second": 3.212, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 3.725754737854004, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.1123, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3953772783279419, |
|
"eval_runtime": 0.6186, |
|
"eval_samples_per_second": 69.516, |
|
"eval_steps_per_second": 3.233, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.3197100758552551, |
|
"eval_runtime": 0.631, |
|
"eval_samples_per_second": 68.144, |
|
"eval_steps_per_second": 3.169, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.318818062543869, |
|
"eval_runtime": 0.6195, |
|
"eval_samples_per_second": 69.405, |
|
"eval_steps_per_second": 3.228, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.30999791622161865, |
|
"eval_runtime": 0.6294, |
|
"eval_samples_per_second": 68.321, |
|
"eval_steps_per_second": 3.178, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3653392791748047, |
|
"eval_runtime": 0.6265, |
|
"eval_samples_per_second": 68.633, |
|
"eval_steps_per_second": 3.192, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 3.50057315826416, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1136, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.35266682505607605, |
|
"eval_runtime": 0.6288, |
|
"eval_samples_per_second": 68.382, |
|
"eval_steps_per_second": 3.181, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3152053952217102, |
|
"eval_runtime": 0.6272, |
|
"eval_samples_per_second": 68.564, |
|
"eval_steps_per_second": 3.189, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.32772013545036316, |
|
"eval_runtime": 0.6243, |
|
"eval_samples_per_second": 68.875, |
|
"eval_steps_per_second": 3.203, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.32207396626472473, |
|
"eval_runtime": 0.6252, |
|
"eval_samples_per_second": 68.781, |
|
"eval_steps_per_second": 3.199, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.34381797909736633, |
|
"eval_runtime": 0.6342, |
|
"eval_samples_per_second": 67.803, |
|
"eval_steps_per_second": 3.154, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 1.3971636295318604, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0858, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4682579040527344, |
|
"eval_runtime": 0.6268, |
|
"eval_samples_per_second": 68.598, |
|
"eval_steps_per_second": 3.191, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4511352479457855, |
|
"eval_runtime": 0.636, |
|
"eval_samples_per_second": 67.609, |
|
"eval_steps_per_second": 3.145, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3486385643482208, |
|
"eval_runtime": 0.636, |
|
"eval_samples_per_second": 67.605, |
|
"eval_steps_per_second": 3.144, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.35942766070365906, |
|
"eval_runtime": 0.6431, |
|
"eval_samples_per_second": 66.86, |
|
"eval_steps_per_second": 3.11, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.39140263199806213, |
|
"eval_runtime": 0.6182, |
|
"eval_samples_per_second": 69.554, |
|
"eval_steps_per_second": 3.235, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 3.632699728012085, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.084, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4256921410560608, |
|
"eval_runtime": 0.6209, |
|
"eval_samples_per_second": 69.256, |
|
"eval_steps_per_second": 3.221, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4505208134651184, |
|
"eval_runtime": 0.6326, |
|
"eval_samples_per_second": 67.972, |
|
"eval_steps_per_second": 3.162, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4038391411304474, |
|
"eval_runtime": 0.6342, |
|
"eval_samples_per_second": 67.797, |
|
"eval_steps_per_second": 3.153, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.3745191693305969, |
|
"eval_runtime": 0.6342, |
|
"eval_samples_per_second": 67.8, |
|
"eval_steps_per_second": 3.153, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.3773575723171234, |
|
"eval_runtime": 0.6457, |
|
"eval_samples_per_second": 66.599, |
|
"eval_steps_per_second": 3.098, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"grad_norm": 1.8791605234146118, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0938, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.37120649218559265, |
|
"eval_runtime": 0.6273, |
|
"eval_samples_per_second": 68.551, |
|
"eval_steps_per_second": 3.188, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.3736191987991333, |
|
"eval_runtime": 0.6296, |
|
"eval_samples_per_second": 68.293, |
|
"eval_steps_per_second": 3.176, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3839561641216278, |
|
"eval_runtime": 0.6195, |
|
"eval_samples_per_second": 69.415, |
|
"eval_steps_per_second": 3.229, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.39015671610832214, |
|
"eval_runtime": 0.623, |
|
"eval_samples_per_second": 69.018, |
|
"eval_steps_per_second": 3.21, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.41052982211112976, |
|
"eval_runtime": 0.6203, |
|
"eval_samples_per_second": 69.32, |
|
"eval_steps_per_second": 3.224, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 1.6623305082321167, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.055, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.44984933733940125, |
|
"eval_runtime": 0.6218, |
|
"eval_samples_per_second": 69.156, |
|
"eval_steps_per_second": 3.217, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4954254627227783, |
|
"eval_runtime": 0.6202, |
|
"eval_samples_per_second": 69.328, |
|
"eval_steps_per_second": 3.225, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.6397253274917603, |
|
"eval_runtime": 0.6291, |
|
"eval_samples_per_second": 68.351, |
|
"eval_steps_per_second": 3.179, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.6271260976791382, |
|
"eval_runtime": 0.6211, |
|
"eval_samples_per_second": 69.235, |
|
"eval_steps_per_second": 3.22, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4821247160434723, |
|
"eval_runtime": 0.6297, |
|
"eval_samples_per_second": 68.291, |
|
"eval_steps_per_second": 3.176, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 2.1010677814483643, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0755, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.36991921067237854, |
|
"eval_runtime": 0.6162, |
|
"eval_samples_per_second": 69.782, |
|
"eval_steps_per_second": 3.246, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.33030539751052856, |
|
"eval_runtime": 0.6223, |
|
"eval_samples_per_second": 69.094, |
|
"eval_steps_per_second": 3.214, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.328202486038208, |
|
"eval_runtime": 0.6165, |
|
"eval_samples_per_second": 69.745, |
|
"eval_steps_per_second": 3.244, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.31808754801750183, |
|
"eval_runtime": 0.6336, |
|
"eval_samples_per_second": 67.862, |
|
"eval_steps_per_second": 3.156, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3082694709300995, |
|
"eval_runtime": 0.6161, |
|
"eval_samples_per_second": 69.798, |
|
"eval_steps_per_second": 3.246, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 1.6037031412124634, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0603, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.3170202970504761, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 69.918, |
|
"eval_steps_per_second": 3.252, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.33968400955200195, |
|
"eval_runtime": 0.6268, |
|
"eval_samples_per_second": 68.606, |
|
"eval_steps_per_second": 3.191, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.3537546992301941, |
|
"eval_runtime": 0.6325, |
|
"eval_samples_per_second": 67.982, |
|
"eval_steps_per_second": 3.162, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.3461407721042633, |
|
"eval_runtime": 0.6296, |
|
"eval_samples_per_second": 68.3, |
|
"eval_steps_per_second": 3.177, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.33368217945098877, |
|
"eval_runtime": 0.6261, |
|
"eval_samples_per_second": 68.682, |
|
"eval_steps_per_second": 3.195, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"grad_norm": 2.1904349327087402, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0653, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.3330100178718567, |
|
"eval_runtime": 0.6375, |
|
"eval_samples_per_second": 67.448, |
|
"eval_steps_per_second": 3.137, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.345066636800766, |
|
"eval_runtime": 0.6175, |
|
"eval_samples_per_second": 69.64, |
|
"eval_steps_per_second": 3.239, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3611831068992615, |
|
"eval_runtime": 0.6169, |
|
"eval_samples_per_second": 69.709, |
|
"eval_steps_per_second": 3.242, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.382154643535614, |
|
"eval_runtime": 0.6328, |
|
"eval_samples_per_second": 67.955, |
|
"eval_steps_per_second": 3.161, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.38750723004341125, |
|
"eval_runtime": 0.6278, |
|
"eval_samples_per_second": 68.495, |
|
"eval_steps_per_second": 3.186, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 1.9912004470825195, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0571, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.38448840379714966, |
|
"eval_runtime": 0.6272, |
|
"eval_samples_per_second": 68.554, |
|
"eval_steps_per_second": 3.189, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3641882836818695, |
|
"eval_runtime": 0.625, |
|
"eval_samples_per_second": 68.795, |
|
"eval_steps_per_second": 3.2, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3528924584388733, |
|
"eval_runtime": 0.631, |
|
"eval_samples_per_second": 68.15, |
|
"eval_steps_per_second": 3.17, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.347101092338562, |
|
"eval_runtime": 0.6246, |
|
"eval_samples_per_second": 68.848, |
|
"eval_steps_per_second": 3.202, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.35397639870643616, |
|
"eval_runtime": 0.6263, |
|
"eval_samples_per_second": 68.662, |
|
"eval_steps_per_second": 3.194, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"grad_norm": 1.0477629899978638, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.069, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.36087337136268616, |
|
"eval_runtime": 0.6222, |
|
"eval_samples_per_second": 69.109, |
|
"eval_steps_per_second": 3.214, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3609488904476166, |
|
"eval_runtime": 0.6221, |
|
"eval_samples_per_second": 69.119, |
|
"eval_steps_per_second": 3.215, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3633655905723572, |
|
"eval_runtime": 0.6181, |
|
"eval_samples_per_second": 69.562, |
|
"eval_steps_per_second": 3.235, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3627144694328308, |
|
"eval_runtime": 0.6291, |
|
"eval_samples_per_second": 68.35, |
|
"eval_steps_per_second": 3.179, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.36094027757644653, |
|
"eval_runtime": 0.6307, |
|
"eval_samples_per_second": 68.181, |
|
"eval_steps_per_second": 3.171, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 1.145646572113037, |
|
"learning_rate": 0.0, |
|
"loss": 0.0667, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.36037373542785645, |
|
"eval_runtime": 0.6299, |
|
"eval_samples_per_second": 68.267, |
|
"eval_steps_per_second": 3.175, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 200, |
|
"total_flos": 1.867590382823424e+18, |
|
"train_loss": 0.15281517714262008, |
|
"train_runtime": 1123.7579, |
|
"train_samples_per_second": 21.446, |
|
"train_steps_per_second": 0.178 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.30976200103759766, |
|
"eval_runtime": 0.6605, |
|
"eval_samples_per_second": 65.101, |
|
"eval_steps_per_second": 3.028, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.867590382823424e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|