Text Classification
Transformers
TensorBoard
Safetensors
modernbert
wissamantoun's picture
Upload folder using huggingface_hub
7983357 verified
{
"best_global_step": 471,
"best_metric": 0.7473118279569892,
"best_model_checkpoint": "/linkhome/rech/genini01/udd26kf/scratch/weborganizer/models/runs/answerdotai--ModernBERT-base_TopicAnnotations-Llama-3.1-8B_bsz512_lr1e-4_epochs5_warmup0.1_url1_TopicAnnotations-Llama-3.1-405B-FP8_bsz512_lr1e-4_epochs5_warmup0.1_url1/checkpoint-471",
"epoch": 4.9728,
"eval_steps": 500,
"global_step": 780,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.64,
"grad_norm": 8.25,
"learning_rate": 9.686609686609687e-05,
"loss": 2.1544,
"num_input_tokens_seen": 249204064,
"step": 100
},
{
"epoch": 1.0,
"eval_validation.parquet_accuracy": 0.8451,
"eval_validation.parquet_accuracy__0": 0.9019607843137255,
"eval_validation.parquet_accuracy__1": 0.7975460122699386,
"eval_validation.parquet_accuracy__10": 0.9105882352941177,
"eval_validation.parquet_accuracy__11": 0.8687258687258688,
"eval_validation.parquet_accuracy__12": 0.8734622144112478,
"eval_validation.parquet_accuracy__13": 0.6827956989247311,
"eval_validation.parquet_accuracy__14": 0.8229461756373938,
"eval_validation.parquet_accuracy__15": 0.8427947598253275,
"eval_validation.parquet_accuracy__16": 0.8194842406876791,
"eval_validation.parquet_accuracy__17": 0.8260135135135135,
"eval_validation.parquet_accuracy__18": 0.8732876712328768,
"eval_validation.parquet_accuracy__19": 0.861764705882353,
"eval_validation.parquet_accuracy__2": 0.8159203980099502,
"eval_validation.parquet_accuracy__20": 0.7183098591549296,
"eval_validation.parquet_accuracy__21": 0.8975409836065574,
"eval_validation.parquet_accuracy__22": 0.7981651376146789,
"eval_validation.parquet_accuracy__23": 0.863481228668942,
"eval_validation.parquet_accuracy__3": 0.9017857142857143,
"eval_validation.parquet_accuracy__4": 0.8697829716193656,
"eval_validation.parquet_accuracy__5": 0.8246268656716418,
"eval_validation.parquet_accuracy__6": 0.8907168037602821,
"eval_validation.parquet_accuracy__7": 0.7784946236559139,
"eval_validation.parquet_accuracy__8": 0.8932584269662921,
"eval_validation.parquet_accuracy__9": 0.8230403800475059,
"eval_validation.parquet_accuracy_conf50": 0.8559460563955864,
"eval_validation.parquet_accuracy_conf50__0": 0.9108910891089109,
"eval_validation.parquet_accuracy_conf50__1": 0.8087774294670846,
"eval_validation.parquet_accuracy_conf50__10": 0.9148936170212766,
"eval_validation.parquet_accuracy_conf50__11": 0.8745098039215686,
"eval_validation.parquet_accuracy_conf50__12": 0.8857142857142857,
"eval_validation.parquet_accuracy_conf50__13": 0.7085714285714285,
"eval_validation.parquet_accuracy_conf50__14": 0.8338150289017341,
"eval_validation.parquet_accuracy_conf50__15": 0.8609865470852018,
"eval_validation.parquet_accuracy_conf50__16": 0.8240469208211144,
"eval_validation.parquet_accuracy_conf50__17": 0.8319039451114922,
"eval_validation.parquet_accuracy_conf50__18": 0.8788927335640139,
"eval_validation.parquet_accuracy_conf50__19": 0.8761329305135952,
"eval_validation.parquet_accuracy_conf50__2": 0.8350515463917526,
"eval_validation.parquet_accuracy_conf50__20": 0.7323529411764705,
"eval_validation.parquet_accuracy_conf50__21": 0.9009628610729024,
"eval_validation.parquet_accuracy_conf50__22": 0.8113207547169812,
"eval_validation.parquet_accuracy_conf50__23": 0.865979381443299,
"eval_validation.parquet_accuracy_conf50__3": 0.9195046439628483,
"eval_validation.parquet_accuracy_conf50__4": 0.8807495741056218,
"eval_validation.parquet_accuracy_conf50__5": 0.8358778625954199,
"eval_validation.parquet_accuracy_conf50__6": 0.9020310633213859,
"eval_validation.parquet_accuracy_conf50__7": 0.7986425339366516,
"eval_validation.parquet_accuracy_conf50__8": 0.8951841359773371,
"eval_validation.parquet_accuracy_conf50__9": 0.8345498783454988,
"eval_validation.parquet_accuracy_conf75": 0.9065606361829026,
"eval_validation.parquet_accuracy_conf75__0": 0.967391304347826,
"eval_validation.parquet_accuracy_conf75__1": 0.8828125,
"eval_validation.parquet_accuracy_conf75__10": 0.9493670886075949,
"eval_validation.parquet_accuracy_conf75__11": 0.9037656903765691,
"eval_validation.parquet_accuracy_conf75__12": 0.9176470588235294,
"eval_validation.parquet_accuracy_conf75__13": 0.7906976744186046,
"eval_validation.parquet_accuracy_conf75__14": 0.9027303754266212,
"eval_validation.parquet_accuracy_conf75__15": 0.9281767955801105,
"eval_validation.parquet_accuracy_conf75__16": 0.886986301369863,
"eval_validation.parquet_accuracy_conf75__17": 0.8814229249011858,
"eval_validation.parquet_accuracy_conf75__18": 0.9176029962546817,
"eval_validation.parquet_accuracy_conf75__19": 0.9185185185185185,
"eval_validation.parquet_accuracy_conf75__2": 0.9047619047619048,
"eval_validation.parquet_accuracy_conf75__20": 0.8022813688212928,
"eval_validation.parquet_accuracy_conf75__21": 0.9327485380116959,
"eval_validation.parquet_accuracy_conf75__22": 0.8741007194244604,
"eval_validation.parquet_accuracy_conf75__23": 0.9246031746031746,
"eval_validation.parquet_accuracy_conf75__3": 0.9473684210526315,
"eval_validation.parquet_accuracy_conf75__4": 0.9242718446601942,
"eval_validation.parquet_accuracy_conf75__5": 0.8879310344827587,
"eval_validation.parquet_accuracy_conf75__6": 0.9322033898305084,
"eval_validation.parquet_accuracy_conf75__7": 0.848,
"eval_validation.parquet_accuracy_conf75__8": 0.9339622641509434,
"eval_validation.parquet_accuracy_conf75__9": 0.9001447178002895,
"eval_validation.parquet_accuracy_label_average": 0.8398538864075228,
"eval_validation.parquet_accuracy_label_average_conf50": 0.8508892890353281,
"eval_validation.parquet_accuracy_label_average_conf75": 0.9024790252593734,
"eval_validation.parquet_accuracy_label_min": 0.6827956989247311,
"eval_validation.parquet_accuracy_label_min_conf50": 0.7085714285714285,
"eval_validation.parquet_accuracy_label_min_conf75": 0.7906976744186046,
"eval_validation.parquet_loss": 0.5004527568817139,
"eval_validation.parquet_proportion_conf50": 0.9788,
"eval_validation.parquet_proportion_conf75": 0.8551,
"eval_validation.parquet_runtime": 10.52,
"eval_validation.parquet_samples_per_second": 950.571,
"eval_validation.parquet_steps_per_second": 29.753,
"num_input_tokens_seen": 390215936,
"step": 157
},
{
"epoch": 1.2752,
"grad_norm": 9.875,
"learning_rate": 8.262108262108262e-05,
"loss": 1.8475,
"num_input_tokens_seen": 499147424,
"step": 200
},
{
"epoch": 1.9152,
"grad_norm": 7.53125,
"learning_rate": 6.837606837606838e-05,
"loss": 1.7317,
"num_input_tokens_seen": 751160992,
"step": 300
},
{
"epoch": 2.0,
"eval_validation.parquet_accuracy": 0.8526,
"eval_validation.parquet_accuracy__0": 0.8725490196078431,
"eval_validation.parquet_accuracy__1": 0.8128834355828221,
"eval_validation.parquet_accuracy__10": 0.9176470588235294,
"eval_validation.parquet_accuracy__11": 0.9073359073359073,
"eval_validation.parquet_accuracy__12": 0.9138840070298769,
"eval_validation.parquet_accuracy__13": 0.7419354838709677,
"eval_validation.parquet_accuracy__14": 0.7818696883852692,
"eval_validation.parquet_accuracy__15": 0.8427947598253275,
"eval_validation.parquet_accuracy__16": 0.8481375358166189,
"eval_validation.parquet_accuracy__17": 0.8733108108108109,
"eval_validation.parquet_accuracy__18": 0.8732876712328768,
"eval_validation.parquet_accuracy__19": 0.8205882352941176,
"eval_validation.parquet_accuracy__2": 0.7860696517412935,
"eval_validation.parquet_accuracy__20": 0.7830985915492957,
"eval_validation.parquet_accuracy__21": 0.9344262295081968,
"eval_validation.parquet_accuracy__22": 0.8562691131498471,
"eval_validation.parquet_accuracy__23": 0.9078498293515358,
"eval_validation.parquet_accuracy__3": 0.8541666666666666,
"eval_validation.parquet_accuracy__4": 0.8414023372287145,
"eval_validation.parquet_accuracy__5": 0.8208955223880597,
"eval_validation.parquet_accuracy__6": 0.8883666274970623,
"eval_validation.parquet_accuracy__7": 0.7784946236559139,
"eval_validation.parquet_accuracy__8": 0.8960674157303371,
"eval_validation.parquet_accuracy__9": 0.8111638954869359,
"eval_validation.parquet_accuracy_conf50": 0.8627911728647323,
"eval_validation.parquet_accuracy_conf50__0": 0.8811881188118812,
"eval_validation.parquet_accuracy_conf50__1": 0.8244514106583072,
"eval_validation.parquet_accuracy_conf50__10": 0.9219858156028369,
"eval_validation.parquet_accuracy_conf50__11": 0.9137254901960784,
"eval_validation.parquet_accuracy_conf50__12": 0.9214285714285714,
"eval_validation.parquet_accuracy_conf50__13": 0.7771428571428571,
"eval_validation.parquet_accuracy_conf50__14": 0.7947976878612717,
"eval_validation.parquet_accuracy_conf50__15": 0.8565022421524664,
"eval_validation.parquet_accuracy_conf50__16": 0.8533724340175953,
"eval_validation.parquet_accuracy_conf50__17": 0.8782161234991424,
"eval_validation.parquet_accuracy_conf50__18": 0.8788927335640139,
"eval_validation.parquet_accuracy_conf50__19": 0.8368580060422961,
"eval_validation.parquet_accuracy_conf50__2": 0.8041237113402062,
"eval_validation.parquet_accuracy_conf50__20": 0.8,
"eval_validation.parquet_accuracy_conf50__21": 0.936726272352132,
"eval_validation.parquet_accuracy_conf50__22": 0.8679245283018868,
"eval_validation.parquet_accuracy_conf50__23": 0.9072164948453608,
"eval_validation.parquet_accuracy_conf50__3": 0.8761609907120743,
"eval_validation.parquet_accuracy_conf50__4": 0.8534923339011925,
"eval_validation.parquet_accuracy_conf50__5": 0.8320610687022901,
"eval_validation.parquet_accuracy_conf50__6": 0.8984468339307049,
"eval_validation.parquet_accuracy_conf50__7": 0.7873303167420814,
"eval_validation.parquet_accuracy_conf50__8": 0.8980169971671388,
"eval_validation.parquet_accuracy_conf50__9": 0.8211678832116789,
"eval_validation.parquet_accuracy_conf75": 0.9124079055081277,
"eval_validation.parquet_accuracy_conf75__0": 0.9347826086956522,
"eval_validation.parquet_accuracy_conf75__1": 0.8984375,
"eval_validation.parquet_accuracy_conf75__10": 0.9620253164556962,
"eval_validation.parquet_accuracy_conf75__11": 0.9372384937238494,
"eval_validation.parquet_accuracy_conf75__12": 0.9529411764705882,
"eval_validation.parquet_accuracy_conf75__13": 0.8294573643410853,
"eval_validation.parquet_accuracy_conf75__14": 0.8686006825938567,
"eval_validation.parquet_accuracy_conf75__15": 0.9226519337016574,
"eval_validation.parquet_accuracy_conf75__16": 0.9143835616438356,
"eval_validation.parquet_accuracy_conf75__17": 0.9209486166007905,
"eval_validation.parquet_accuracy_conf75__18": 0.9250936329588015,
"eval_validation.parquet_accuracy_conf75__19": 0.8962962962962963,
"eval_validation.parquet_accuracy_conf75__2": 0.8809523809523809,
"eval_validation.parquet_accuracy_conf75__20": 0.8593155893536122,
"eval_validation.parquet_accuracy_conf75__21": 0.9576023391812866,
"eval_validation.parquet_accuracy_conf75__22": 0.8992805755395683,
"eval_validation.parquet_accuracy_conf75__23": 0.9603174603174603,
"eval_validation.parquet_accuracy_conf75__3": 0.9192982456140351,
"eval_validation.parquet_accuracy_conf75__4": 0.8990291262135922,
"eval_validation.parquet_accuracy_conf75__5": 0.8879310344827587,
"eval_validation.parquet_accuracy_conf75__6": 0.9282920469361148,
"eval_validation.parquet_accuracy_conf75__7": 0.84,
"eval_validation.parquet_accuracy_conf75__8": 0.9339622641509434,
"eval_validation.parquet_accuracy_conf75__9": 0.8900144717800289,
"eval_validation.parquet_accuracy_label_average": 0.8485205882320762,
"eval_validation.parquet_accuracy_label_average_conf50": 0.8592178717576693,
"eval_validation.parquet_accuracy_label_average_conf75": 0.909118863250162,
"eval_validation.parquet_accuracy_label_min": 0.7419354838709677,
"eval_validation.parquet_accuracy_label_min_conf50": 0.7771428571428571,
"eval_validation.parquet_accuracy_label_min_conf75": 0.8294573643410853,
"eval_validation.parquet_loss": 0.4816047251224518,
"eval_validation.parquet_proportion_conf50": 0.9788,
"eval_validation.parquet_proportion_conf75": 0.8551,
"eval_validation.parquet_runtime": 8.307,
"eval_validation.parquet_samples_per_second": 1203.799,
"eval_validation.parquet_steps_per_second": 37.679,
"num_input_tokens_seen": 783399104,
"step": 314
},
{
"epoch": 2.5504,
"grad_norm": 7.59375,
"learning_rate": 5.413105413105414e-05,
"loss": 1.5837,
"num_input_tokens_seen": 999700736,
"step": 400
},
{
"epoch": 3.0,
"eval_validation.parquet_accuracy": 0.8558,
"eval_validation.parquet_accuracy__0": 0.8627450980392157,
"eval_validation.parquet_accuracy__1": 0.7607361963190185,
"eval_validation.parquet_accuracy__10": 0.9035294117647059,
"eval_validation.parquet_accuracy__11": 0.8764478764478765,
"eval_validation.parquet_accuracy__12": 0.8980667838312829,
"eval_validation.parquet_accuracy__13": 0.7473118279569892,
"eval_validation.parquet_accuracy__14": 0.839943342776204,
"eval_validation.parquet_accuracy__15": 0.8427947598253275,
"eval_validation.parquet_accuracy__16": 0.830945558739255,
"eval_validation.parquet_accuracy__17": 0.839527027027027,
"eval_validation.parquet_accuracy__18": 0.8801369863013698,
"eval_validation.parquet_accuracy__19": 0.8117647058823529,
"eval_validation.parquet_accuracy__2": 0.7860696517412935,
"eval_validation.parquet_accuracy__20": 0.7633802816901408,
"eval_validation.parquet_accuracy__21": 0.9289617486338798,
"eval_validation.parquet_accuracy__22": 0.8562691131498471,
"eval_validation.parquet_accuracy__23": 0.8805460750853242,
"eval_validation.parquet_accuracy__3": 0.8660714285714286,
"eval_validation.parquet_accuracy__4": 0.8530884808013356,
"eval_validation.parquet_accuracy__5": 0.8694029850746269,
"eval_validation.parquet_accuracy__6": 0.900117508813161,
"eval_validation.parquet_accuracy__7": 0.7741935483870968,
"eval_validation.parquet_accuracy__8": 0.8904494382022472,
"eval_validation.parquet_accuracy__9": 0.8669833729216152,
"eval_validation.parquet_accuracy_conf50": 0.8663669799754802,
"eval_validation.parquet_accuracy_conf50__0": 0.8712871287128713,
"eval_validation.parquet_accuracy_conf50__1": 0.7711598746081505,
"eval_validation.parquet_accuracy_conf50__10": 0.9078014184397163,
"eval_validation.parquet_accuracy_conf50__11": 0.8823529411764706,
"eval_validation.parquet_accuracy_conf50__12": 0.9089285714285714,
"eval_validation.parquet_accuracy_conf50__13": 0.7828571428571428,
"eval_validation.parquet_accuracy_conf50__14": 0.8511560693641619,
"eval_validation.parquet_accuracy_conf50__15": 0.8565022421524664,
"eval_validation.parquet_accuracy_conf50__16": 0.8357771260997068,
"eval_validation.parquet_accuracy_conf50__17": 0.8456260720411664,
"eval_validation.parquet_accuracy_conf50__18": 0.8858131487889274,
"eval_validation.parquet_accuracy_conf50__19": 0.8277945619335347,
"eval_validation.parquet_accuracy_conf50__2": 0.8041237113402062,
"eval_validation.parquet_accuracy_conf50__20": 0.7794117647058824,
"eval_validation.parquet_accuracy_conf50__21": 0.9312242090784044,
"eval_validation.parquet_accuracy_conf50__22": 0.8710691823899371,
"eval_validation.parquet_accuracy_conf50__23": 0.8797250859106529,
"eval_validation.parquet_accuracy_conf50__3": 0.8885448916408669,
"eval_validation.parquet_accuracy_conf50__4": 0.8637137989778535,
"eval_validation.parquet_accuracy_conf50__5": 0.8816793893129771,
"eval_validation.parquet_accuracy_conf50__6": 0.9115890083632019,
"eval_validation.parquet_accuracy_conf50__7": 0.7850678733031674,
"eval_validation.parquet_accuracy_conf50__8": 0.8923512747875354,
"eval_validation.parquet_accuracy_conf50__9": 0.878345498783455,
"eval_validation.parquet_accuracy_conf75": 0.9145129224652088,
"eval_validation.parquet_accuracy_conf75__0": 0.9239130434782609,
"eval_validation.parquet_accuracy_conf75__1": 0.85546875,
"eval_validation.parquet_accuracy_conf75__10": 0.9493670886075949,
"eval_validation.parquet_accuracy_conf75__11": 0.9121338912133892,
"eval_validation.parquet_accuracy_conf75__12": 0.9450980392156862,
"eval_validation.parquet_accuracy_conf75__13": 0.8294573643410853,
"eval_validation.parquet_accuracy_conf75__14": 0.9129692832764505,
"eval_validation.parquet_accuracy_conf75__15": 0.9392265193370166,
"eval_validation.parquet_accuracy_conf75__16": 0.8972602739726028,
"eval_validation.parquet_accuracy_conf75__17": 0.8893280632411067,
"eval_validation.parquet_accuracy_conf75__18": 0.9288389513108615,
"eval_validation.parquet_accuracy_conf75__19": 0.8925925925925926,
"eval_validation.parquet_accuracy_conf75__2": 0.8809523809523809,
"eval_validation.parquet_accuracy_conf75__20": 0.8479087452471483,
"eval_validation.parquet_accuracy_conf75__21": 0.9502923976608187,
"eval_validation.parquet_accuracy_conf75__22": 0.9064748201438849,
"eval_validation.parquet_accuracy_conf75__23": 0.9404761904761905,
"eval_validation.parquet_accuracy_conf75__3": 0.9228070175438596,
"eval_validation.parquet_accuracy_conf75__4": 0.9067961165048544,
"eval_validation.parquet_accuracy_conf75__5": 0.9224137931034483,
"eval_validation.parquet_accuracy_conf75__6": 0.9374185136897001,
"eval_validation.parquet_accuracy_conf75__7": 0.8453333333333334,
"eval_validation.parquet_accuracy_conf75__8": 0.9276729559748428,
"eval_validation.parquet_accuracy_conf75__9": 0.9305354558610709,
"eval_validation.parquet_accuracy_label_average": 0.8470618003326092,
"eval_validation.parquet_accuracy_label_average_conf50": 0.8580792494248763,
"eval_validation.parquet_accuracy_label_average_conf75": 0.9081139825449239,
"eval_validation.parquet_accuracy_label_min": 0.7473118279569892,
"eval_validation.parquet_accuracy_label_min_conf50": 0.7711598746081505,
"eval_validation.parquet_accuracy_label_min_conf75": 0.8294573643410853,
"eval_validation.parquet_loss": 0.4807276427745819,
"eval_validation.parquet_proportion_conf50": 0.9788,
"eval_validation.parquet_proportion_conf75": 0.8551,
"eval_validation.parquet_runtime": 8.2886,
"eval_validation.parquet_samples_per_second": 1206.475,
"eval_validation.parquet_steps_per_second": 37.763,
"num_input_tokens_seen": 1176307328,
"step": 471
},
{
"epoch": 3.1856,
"grad_norm": 6.53125,
"learning_rate": 3.988603988603989e-05,
"loss": 1.5392,
"num_input_tokens_seen": 1250925472,
"step": 500
},
{
"epoch": 3.8256,
"grad_norm": 7.0625,
"learning_rate": 2.564102564102564e-05,
"loss": 1.4928,
"num_input_tokens_seen": 1499507040,
"step": 600
},
{
"epoch": 4.0,
"eval_validation.parquet_accuracy": 0.8567,
"eval_validation.parquet_accuracy__0": 0.8725490196078431,
"eval_validation.parquet_accuracy__1": 0.8006134969325154,
"eval_validation.parquet_accuracy__10": 0.9105882352941177,
"eval_validation.parquet_accuracy__11": 0.888030888030888,
"eval_validation.parquet_accuracy__12": 0.9086115992970123,
"eval_validation.parquet_accuracy__13": 0.7419354838709677,
"eval_validation.parquet_accuracy__14": 0.8271954674220963,
"eval_validation.parquet_accuracy__15": 0.851528384279476,
"eval_validation.parquet_accuracy__16": 0.8510028653295129,
"eval_validation.parquet_accuracy__17": 0.8817567567567568,
"eval_validation.parquet_accuracy__18": 0.8664383561643836,
"eval_validation.parquet_accuracy__19": 0.8088235294117647,
"eval_validation.parquet_accuracy__2": 0.8059701492537313,
"eval_validation.parquet_accuracy__20": 0.7492957746478873,
"eval_validation.parquet_accuracy__21": 0.924863387978142,
"eval_validation.parquet_accuracy__22": 0.8379204892966361,
"eval_validation.parquet_accuracy__23": 0.863481228668942,
"eval_validation.parquet_accuracy__3": 0.8779761904761905,
"eval_validation.parquet_accuracy__4": 0.8464106844741235,
"eval_validation.parquet_accuracy__5": 0.8731343283582089,
"eval_validation.parquet_accuracy__6": 0.881316098707403,
"eval_validation.parquet_accuracy__7": 0.810752688172043,
"eval_validation.parquet_accuracy__8": 0.8904494382022472,
"eval_validation.parquet_accuracy__9": 0.8396674584323041,
"eval_validation.parquet_accuracy_conf50": 0.8674908050674295,
"eval_validation.parquet_accuracy_conf50__0": 0.8811881188118812,
"eval_validation.parquet_accuracy_conf50__1": 0.8119122257053292,
"eval_validation.parquet_accuracy_conf50__10": 0.9148936170212766,
"eval_validation.parquet_accuracy_conf50__11": 0.8980392156862745,
"eval_validation.parquet_accuracy_conf50__12": 0.9178571428571428,
"eval_validation.parquet_accuracy_conf50__13": 0.7771428571428571,
"eval_validation.parquet_accuracy_conf50__14": 0.838150289017341,
"eval_validation.parquet_accuracy_conf50__15": 0.8654708520179372,
"eval_validation.parquet_accuracy_conf50__16": 0.8563049853372434,
"eval_validation.parquet_accuracy_conf50__17": 0.8867924528301887,
"eval_validation.parquet_accuracy_conf50__18": 0.8719723183391004,
"eval_validation.parquet_accuracy_conf50__19": 0.824773413897281,
"eval_validation.parquet_accuracy_conf50__2": 0.8247422680412371,
"eval_validation.parquet_accuracy_conf50__20": 0.7676470588235295,
"eval_validation.parquet_accuracy_conf50__21": 0.9270976616231087,
"eval_validation.parquet_accuracy_conf50__22": 0.8522012578616353,
"eval_validation.parquet_accuracy_conf50__23": 0.8625429553264605,
"eval_validation.parquet_accuracy_conf50__3": 0.9009287925696594,
"eval_validation.parquet_accuracy_conf50__4": 0.858603066439523,
"eval_validation.parquet_accuracy_conf50__5": 0.8854961832061069,
"eval_validation.parquet_accuracy_conf50__6": 0.8936678614097969,
"eval_validation.parquet_accuracy_conf50__7": 0.8235294117647058,
"eval_validation.parquet_accuracy_conf50__8": 0.8923512747875354,
"eval_validation.parquet_accuracy_conf50__9": 0.8503649635036497,
"eval_validation.parquet_accuracy_conf75": 0.9156823763302537,
"eval_validation.parquet_accuracy_conf75__0": 0.9347826086956522,
"eval_validation.parquet_accuracy_conf75__1": 0.88671875,
"eval_validation.parquet_accuracy_conf75__10": 0.9544303797468354,
"eval_validation.parquet_accuracy_conf75__11": 0.9288702928870293,
"eval_validation.parquet_accuracy_conf75__12": 0.9509803921568627,
"eval_validation.parquet_accuracy_conf75__13": 0.8217054263565892,
"eval_validation.parquet_accuracy_conf75__14": 0.9027303754266212,
"eval_validation.parquet_accuracy_conf75__15": 0.9392265193370166,
"eval_validation.parquet_accuracy_conf75__16": 0.910958904109589,
"eval_validation.parquet_accuracy_conf75__17": 0.9308300395256917,
"eval_validation.parquet_accuracy_conf75__18": 0.9138576779026217,
"eval_validation.parquet_accuracy_conf75__19": 0.8851851851851852,
"eval_validation.parquet_accuracy_conf75__2": 0.9047619047619048,
"eval_validation.parquet_accuracy_conf75__20": 0.8365019011406845,
"eval_validation.parquet_accuracy_conf75__21": 0.9488304093567251,
"eval_validation.parquet_accuracy_conf75__22": 0.89568345323741,
"eval_validation.parquet_accuracy_conf75__23": 0.9365079365079365,
"eval_validation.parquet_accuracy_conf75__3": 0.9333333333333333,
"eval_validation.parquet_accuracy_conf75__4": 0.8932038834951457,
"eval_validation.parquet_accuracy_conf75__5": 0.9224137931034483,
"eval_validation.parquet_accuracy_conf75__6": 0.9230769230769231,
"eval_validation.parquet_accuracy_conf75__7": 0.8746666666666667,
"eval_validation.parquet_accuracy_conf75__8": 0.9339622641509434,
"eval_validation.parquet_accuracy_conf75__9": 0.91027496382055,
"eval_validation.parquet_accuracy_label_average": 0.8504296666277162,
"eval_validation.parquet_accuracy_label_average_conf50": 0.8618195935008668,
"eval_validation.parquet_accuracy_label_average_conf75": 0.9113955826658905,
"eval_validation.parquet_accuracy_label_min": 0.7419354838709677,
"eval_validation.parquet_accuracy_label_min_conf50": 0.7676470588235295,
"eval_validation.parquet_accuracy_label_min_conf75": 0.8217054263565892,
"eval_validation.parquet_loss": 0.47853514552116394,
"eval_validation.parquet_proportion_conf50": 0.9788,
"eval_validation.parquet_proportion_conf75": 0.8551,
"eval_validation.parquet_runtime": 8.3896,
"eval_validation.parquet_samples_per_second": 1191.949,
"eval_validation.parquet_steps_per_second": 37.308,
"num_input_tokens_seen": 1566401088,
"step": 628
},
{
"epoch": 4.4608,
"grad_norm": 7.5625,
"learning_rate": 1.1396011396011397e-05,
"loss": 1.4653,
"num_input_tokens_seen": 1745927840,
"step": 700
},
{
"epoch": 4.9728,
"eval_validation.parquet_accuracy": 0.8571,
"eval_validation.parquet_accuracy__0": 0.8725490196078431,
"eval_validation.parquet_accuracy__1": 0.7914110429447853,
"eval_validation.parquet_accuracy__10": 0.9105882352941177,
"eval_validation.parquet_accuracy__11": 0.8918918918918919,
"eval_validation.parquet_accuracy__12": 0.9033391915641477,
"eval_validation.parquet_accuracy__13": 0.7419354838709677,
"eval_validation.parquet_accuracy__14": 0.8314447592067988,
"eval_validation.parquet_accuracy__15": 0.8558951965065502,
"eval_validation.parquet_accuracy__16": 0.8481375358166189,
"eval_validation.parquet_accuracy__17": 0.875,
"eval_validation.parquet_accuracy__18": 0.8595890410958904,
"eval_validation.parquet_accuracy__19": 0.8117647058823529,
"eval_validation.parquet_accuracy__2": 0.8109452736318408,
"eval_validation.parquet_accuracy__20": 0.7436619718309859,
"eval_validation.parquet_accuracy__21": 0.9262295081967213,
"eval_validation.parquet_accuracy__22": 0.8440366972477065,
"eval_validation.parquet_accuracy__23": 0.863481228668942,
"eval_validation.parquet_accuracy__3": 0.8809523809523809,
"eval_validation.parquet_accuracy__4": 0.8497495826377296,
"eval_validation.parquet_accuracy__5": 0.8731343283582089,
"eval_validation.parquet_accuracy__6": 0.8883666274970623,
"eval_validation.parquet_accuracy__7": 0.7956989247311828,
"eval_validation.parquet_accuracy__8": 0.9044943820224719,
"eval_validation.parquet_accuracy__9": 0.8420427553444181,
"eval_validation.parquet_accuracy_conf50": 0.8678994687372292,
"eval_validation.parquet_accuracy_conf50__0": 0.8811881188118812,
"eval_validation.parquet_accuracy_conf50__1": 0.8025078369905956,
"eval_validation.parquet_accuracy_conf50__10": 0.9148936170212766,
"eval_validation.parquet_accuracy_conf50__11": 0.8980392156862745,
"eval_validation.parquet_accuracy_conf50__12": 0.9125,
"eval_validation.parquet_accuracy_conf50__13": 0.7771428571428571,
"eval_validation.parquet_accuracy_conf50__14": 0.8424855491329479,
"eval_validation.parquet_accuracy_conf50__15": 0.8699551569506726,
"eval_validation.parquet_accuracy_conf50__16": 0.8533724340175953,
"eval_validation.parquet_accuracy_conf50__17": 0.8799313893653516,
"eval_validation.parquet_accuracy_conf50__18": 0.8650519031141869,
"eval_validation.parquet_accuracy_conf50__19": 0.8277945619335347,
"eval_validation.parquet_accuracy_conf50__2": 0.8298969072164949,
"eval_validation.parquet_accuracy_conf50__20": 0.7647058823529411,
"eval_validation.parquet_accuracy_conf50__21": 0.9284731774415406,
"eval_validation.parquet_accuracy_conf50__22": 0.8584905660377359,
"eval_validation.parquet_accuracy_conf50__23": 0.8625429553264605,
"eval_validation.parquet_accuracy_conf50__3": 0.9040247678018576,
"eval_validation.parquet_accuracy_conf50__4": 0.8620102214650767,
"eval_validation.parquet_accuracy_conf50__5": 0.8854961832061069,
"eval_validation.parquet_accuracy_conf50__6": 0.9008363201911589,
"eval_validation.parquet_accuracy_conf50__7": 0.8076923076923077,
"eval_validation.parquet_accuracy_conf50__8": 0.9065155807365439,
"eval_validation.parquet_accuracy_conf50__9": 0.8527980535279805,
"eval_validation.parquet_accuracy_conf75": 0.9163840486492808,
"eval_validation.parquet_accuracy_conf75__0": 0.9347826086956522,
"eval_validation.parquet_accuracy_conf75__1": 0.87890625,
"eval_validation.parquet_accuracy_conf75__10": 0.9518987341772152,
"eval_validation.parquet_accuracy_conf75__11": 0.9288702928870293,
"eval_validation.parquet_accuracy_conf75__12": 0.9470588235294117,
"eval_validation.parquet_accuracy_conf75__13": 0.8217054263565892,
"eval_validation.parquet_accuracy_conf75__14": 0.9061433447098977,
"eval_validation.parquet_accuracy_conf75__15": 0.9447513812154696,
"eval_validation.parquet_accuracy_conf75__16": 0.910958904109589,
"eval_validation.parquet_accuracy_conf75__17": 0.924901185770751,
"eval_validation.parquet_accuracy_conf75__18": 0.9101123595505618,
"eval_validation.parquet_accuracy_conf75__19": 0.8888888888888888,
"eval_validation.parquet_accuracy_conf75__2": 0.9047619047619048,
"eval_validation.parquet_accuracy_conf75__20": 0.8326996197718631,
"eval_validation.parquet_accuracy_conf75__21": 0.9502923976608187,
"eval_validation.parquet_accuracy_conf75__22": 0.9028776978417267,
"eval_validation.parquet_accuracy_conf75__23": 0.9325396825396826,
"eval_validation.parquet_accuracy_conf75__3": 0.9368421052631579,
"eval_validation.parquet_accuracy_conf75__4": 0.9009708737864077,
"eval_validation.parquet_accuracy_conf75__5": 0.9224137931034483,
"eval_validation.parquet_accuracy_conf75__6": 0.9308996088657105,
"eval_validation.parquet_accuracy_conf75__7": 0.864,
"eval_validation.parquet_accuracy_conf75__8": 0.940251572327044,
"eval_validation.parquet_accuracy_conf75__9": 0.9117221418234442,
"eval_validation.parquet_accuracy_label_average": 0.8506808235334006,
"eval_validation.parquet_accuracy_label_average_conf50": 0.8620143984651407,
"eval_validation.parquet_accuracy_label_average_conf75": 0.9116353999015111,
"eval_validation.parquet_accuracy_label_min": 0.7419354838709677,
"eval_validation.parquet_accuracy_label_min_conf50": 0.7647058823529411,
"eval_validation.parquet_accuracy_label_min_conf75": 0.8217054263565892,
"eval_validation.parquet_loss": 0.47900858521461487,
"eval_validation.parquet_proportion_conf50": 0.9788,
"eval_validation.parquet_proportion_conf75": 0.8551,
"eval_validation.parquet_runtime": 8.446,
"eval_validation.parquet_samples_per_second": 1183.995,
"eval_validation.parquet_steps_per_second": 37.059,
"num_input_tokens_seen": 1949274656,
"step": 780
},
{
"epoch": 4.9728,
"num_input_tokens_seen": 1949274656,
"step": 780,
"total_flos": 1.297523316772307e+18,
"train_loss": 1.6634563641670423,
"train_runtime": 573.9155,
"train_samples_per_second": 696.967,
"train_steps_per_second": 1.359
}
],
"logging_steps": 100,
"max_steps": 780,
"num_input_tokens_seen": 1949274656,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.297523316772307e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}