dinov2-base-imagenet1k-1-layer-finetuned-galaxy10-decals-finetuned-galaxy_mnist
/
trainer_state.json
{ | |
"best_metric": 0.943, | |
"best_model_checkpoint": "dinov2-base-imagenet1k-1-layer-finetuned-galaxy10-decals-finetuned-galaxy_mnist/checkpoint-593", | |
"epoch": 29.76, | |
"eval_steps": 500, | |
"global_step": 930, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.32, | |
"learning_rate": 5.376344086021506e-07, | |
"loss": 1.5723, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.64, | |
"learning_rate": 1.0752688172043011e-06, | |
"loss": 1.2678, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 1.6129032258064516e-06, | |
"loss": 0.8616, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.99, | |
"eval_accuracy": 0.8345, | |
"eval_f1": 0.8341866866352989, | |
"eval_loss": 0.4898146092891693, | |
"eval_precision": 0.8394097848227641, | |
"eval_recall": 0.8345, | |
"eval_runtime": 16.9059, | |
"eval_samples_per_second": 118.302, | |
"eval_steps_per_second": 1.893, | |
"step": 31 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 2.1505376344086023e-06, | |
"loss": 0.5089, | |
"step": 40 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 2.688172043010753e-06, | |
"loss": 0.359, | |
"step": 50 | |
}, | |
{ | |
"epoch": 1.92, | |
"learning_rate": 3.225806451612903e-06, | |
"loss": 0.3162, | |
"step": 60 | |
}, | |
{ | |
"epoch": 1.98, | |
"eval_accuracy": 0.921, | |
"eval_f1": 0.9210386136523859, | |
"eval_loss": 0.1962936967611313, | |
"eval_precision": 0.9217325685157318, | |
"eval_recall": 0.921, | |
"eval_runtime": 16.9428, | |
"eval_samples_per_second": 118.044, | |
"eval_steps_per_second": 1.889, | |
"step": 62 | |
}, | |
{ | |
"epoch": 2.24, | |
"learning_rate": 3.763440860215054e-06, | |
"loss": 0.2808, | |
"step": 70 | |
}, | |
{ | |
"epoch": 2.56, | |
"learning_rate": 4.3010752688172045e-06, | |
"loss": 0.2568, | |
"step": 80 | |
}, | |
{ | |
"epoch": 2.88, | |
"learning_rate": 4.838709677419355e-06, | |
"loss": 0.2596, | |
"step": 90 | |
}, | |
{ | |
"epoch": 2.98, | |
"eval_accuracy": 0.9315, | |
"eval_f1": 0.931337514438196, | |
"eval_loss": 0.16661500930786133, | |
"eval_precision": 0.9321601033671261, | |
"eval_recall": 0.9315, | |
"eval_runtime": 17.1134, | |
"eval_samples_per_second": 116.867, | |
"eval_steps_per_second": 1.87, | |
"step": 93 | |
}, | |
{ | |
"epoch": 3.2, | |
"learning_rate": 4.9581839904420555e-06, | |
"loss": 0.2554, | |
"step": 100 | |
}, | |
{ | |
"epoch": 3.52, | |
"learning_rate": 4.898446833930705e-06, | |
"loss": 0.233, | |
"step": 110 | |
}, | |
{ | |
"epoch": 3.84, | |
"learning_rate": 4.838709677419355e-06, | |
"loss": 0.2237, | |
"step": 120 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.9385, | |
"eval_f1": 0.9384888451054542, | |
"eval_loss": 0.15792010724544525, | |
"eval_precision": 0.9385964586622481, | |
"eval_recall": 0.9385, | |
"eval_runtime": 16.9469, | |
"eval_samples_per_second": 118.016, | |
"eval_steps_per_second": 1.888, | |
"step": 125 | |
}, | |
{ | |
"epoch": 4.16, | |
"learning_rate": 4.7789725209080055e-06, | |
"loss": 0.2327, | |
"step": 130 | |
}, | |
{ | |
"epoch": 4.48, | |
"learning_rate": 4.719235364396655e-06, | |
"loss": 0.2128, | |
"step": 140 | |
}, | |
{ | |
"epoch": 4.8, | |
"learning_rate": 4.659498207885305e-06, | |
"loss": 0.2304, | |
"step": 150 | |
}, | |
{ | |
"epoch": 4.99, | |
"eval_accuracy": 0.936, | |
"eval_f1": 0.9360872179829776, | |
"eval_loss": 0.16309350728988647, | |
"eval_precision": 0.9364428208218962, | |
"eval_recall": 0.936, | |
"eval_runtime": 16.8892, | |
"eval_samples_per_second": 118.419, | |
"eval_steps_per_second": 1.895, | |
"step": 156 | |
}, | |
{ | |
"epoch": 5.12, | |
"learning_rate": 4.599761051373955e-06, | |
"loss": 0.2114, | |
"step": 160 | |
}, | |
{ | |
"epoch": 5.44, | |
"learning_rate": 4.540023894862605e-06, | |
"loss": 0.214, | |
"step": 170 | |
}, | |
{ | |
"epoch": 5.76, | |
"learning_rate": 4.480286738351255e-06, | |
"loss": 0.2096, | |
"step": 180 | |
}, | |
{ | |
"epoch": 5.98, | |
"eval_accuracy": 0.933, | |
"eval_f1": 0.9328862729862898, | |
"eval_loss": 0.16860799491405487, | |
"eval_precision": 0.9341164621275567, | |
"eval_recall": 0.933, | |
"eval_runtime": 17.0326, | |
"eval_samples_per_second": 117.422, | |
"eval_steps_per_second": 1.879, | |
"step": 187 | |
}, | |
{ | |
"epoch": 6.08, | |
"learning_rate": 4.420549581839905e-06, | |
"loss": 0.213, | |
"step": 190 | |
}, | |
{ | |
"epoch": 6.4, | |
"learning_rate": 4.360812425328555e-06, | |
"loss": 0.1829, | |
"step": 200 | |
}, | |
{ | |
"epoch": 6.72, | |
"learning_rate": 4.3010752688172045e-06, | |
"loss": 0.1935, | |
"step": 210 | |
}, | |
{ | |
"epoch": 6.98, | |
"eval_accuracy": 0.934, | |
"eval_f1": 0.9338744194680119, | |
"eval_loss": 0.16596421599388123, | |
"eval_precision": 0.9340568636949389, | |
"eval_recall": 0.934, | |
"eval_runtime": 16.8862, | |
"eval_samples_per_second": 118.44, | |
"eval_steps_per_second": 1.895, | |
"step": 218 | |
}, | |
{ | |
"epoch": 7.04, | |
"learning_rate": 4.241338112305855e-06, | |
"loss": 0.2185, | |
"step": 220 | |
}, | |
{ | |
"epoch": 7.36, | |
"learning_rate": 4.181600955794505e-06, | |
"loss": 0.2051, | |
"step": 230 | |
}, | |
{ | |
"epoch": 7.68, | |
"learning_rate": 4.121863799283155e-06, | |
"loss": 0.2055, | |
"step": 240 | |
}, | |
{ | |
"epoch": 8.0, | |
"learning_rate": 4.062126642771804e-06, | |
"loss": 0.1829, | |
"step": 250 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.9415, | |
"eval_f1": 0.9415406456356198, | |
"eval_loss": 0.15956956148147583, | |
"eval_precision": 0.9417968383864156, | |
"eval_recall": 0.9415, | |
"eval_runtime": 16.9782, | |
"eval_samples_per_second": 117.798, | |
"eval_steps_per_second": 1.885, | |
"step": 250 | |
}, | |
{ | |
"epoch": 8.32, | |
"learning_rate": 4.002389486260454e-06, | |
"loss": 0.1818, | |
"step": 260 | |
}, | |
{ | |
"epoch": 8.64, | |
"learning_rate": 3.942652329749105e-06, | |
"loss": 0.1932, | |
"step": 270 | |
}, | |
{ | |
"epoch": 8.96, | |
"learning_rate": 3.882915173237754e-06, | |
"loss": 0.178, | |
"step": 280 | |
}, | |
{ | |
"epoch": 8.99, | |
"eval_accuracy": 0.937, | |
"eval_f1": 0.9370365766449181, | |
"eval_loss": 0.16126687824726105, | |
"eval_precision": 0.9381157280242598, | |
"eval_recall": 0.937, | |
"eval_runtime": 16.0023, | |
"eval_samples_per_second": 124.982, | |
"eval_steps_per_second": 2.0, | |
"step": 281 | |
}, | |
{ | |
"epoch": 9.28, | |
"learning_rate": 3.823178016726404e-06, | |
"loss": 0.177, | |
"step": 290 | |
}, | |
{ | |
"epoch": 9.6, | |
"learning_rate": 3.763440860215054e-06, | |
"loss": 0.1733, | |
"step": 300 | |
}, | |
{ | |
"epoch": 9.92, | |
"learning_rate": 3.7037037037037037e-06, | |
"loss": 0.158, | |
"step": 310 | |
}, | |
{ | |
"epoch": 9.98, | |
"eval_accuracy": 0.9335, | |
"eval_f1": 0.9333931964437308, | |
"eval_loss": 0.1697198748588562, | |
"eval_precision": 0.9358458444952584, | |
"eval_recall": 0.9335, | |
"eval_runtime": 16.6749, | |
"eval_samples_per_second": 119.94, | |
"eval_steps_per_second": 1.919, | |
"step": 312 | |
}, | |
{ | |
"epoch": 10.24, | |
"learning_rate": 3.643966547192354e-06, | |
"loss": 0.1671, | |
"step": 320 | |
}, | |
{ | |
"epoch": 10.56, | |
"learning_rate": 3.584229390681004e-06, | |
"loss": 0.1766, | |
"step": 330 | |
}, | |
{ | |
"epoch": 10.88, | |
"learning_rate": 3.5244922341696534e-06, | |
"loss": 0.1767, | |
"step": 340 | |
}, | |
{ | |
"epoch": 10.98, | |
"eval_accuracy": 0.935, | |
"eval_f1": 0.9349256377578329, | |
"eval_loss": 0.16532444953918457, | |
"eval_precision": 0.935028529749686, | |
"eval_recall": 0.935, | |
"eval_runtime": 16.8891, | |
"eval_samples_per_second": 118.42, | |
"eval_steps_per_second": 1.895, | |
"step": 343 | |
}, | |
{ | |
"epoch": 11.2, | |
"learning_rate": 3.4647550776583037e-06, | |
"loss": 0.1605, | |
"step": 350 | |
}, | |
{ | |
"epoch": 11.52, | |
"learning_rate": 3.4050179211469536e-06, | |
"loss": 0.159, | |
"step": 360 | |
}, | |
{ | |
"epoch": 11.84, | |
"learning_rate": 3.3452807646356034e-06, | |
"loss": 0.176, | |
"step": 370 | |
}, | |
{ | |
"epoch": 12.0, | |
"eval_accuracy": 0.936, | |
"eval_f1": 0.9357497306188325, | |
"eval_loss": 0.175233393907547, | |
"eval_precision": 0.9374822451457967, | |
"eval_recall": 0.936, | |
"eval_runtime": 15.5069, | |
"eval_samples_per_second": 128.975, | |
"eval_steps_per_second": 2.064, | |
"step": 375 | |
}, | |
{ | |
"epoch": 12.16, | |
"learning_rate": 3.2855436081242537e-06, | |
"loss": 0.1542, | |
"step": 380 | |
}, | |
{ | |
"epoch": 12.48, | |
"learning_rate": 3.225806451612903e-06, | |
"loss": 0.1675, | |
"step": 390 | |
}, | |
{ | |
"epoch": 12.8, | |
"learning_rate": 3.1660692951015535e-06, | |
"loss": 0.1563, | |
"step": 400 | |
}, | |
{ | |
"epoch": 12.99, | |
"eval_accuracy": 0.932, | |
"eval_f1": 0.9319429646523535, | |
"eval_loss": 0.18922363221645355, | |
"eval_precision": 0.9338500617422867, | |
"eval_recall": 0.932, | |
"eval_runtime": 16.9454, | |
"eval_samples_per_second": 118.026, | |
"eval_steps_per_second": 1.888, | |
"step": 406 | |
}, | |
{ | |
"epoch": 13.12, | |
"learning_rate": 3.1063321385902034e-06, | |
"loss": 0.1717, | |
"step": 410 | |
}, | |
{ | |
"epoch": 13.44, | |
"learning_rate": 3.0465949820788532e-06, | |
"loss": 0.1404, | |
"step": 420 | |
}, | |
{ | |
"epoch": 13.76, | |
"learning_rate": 2.9868578255675035e-06, | |
"loss": 0.1499, | |
"step": 430 | |
}, | |
{ | |
"epoch": 13.98, | |
"eval_accuracy": 0.9345, | |
"eval_f1": 0.9344245143489335, | |
"eval_loss": 0.19463878870010376, | |
"eval_precision": 0.9352803545218006, | |
"eval_recall": 0.9345, | |
"eval_runtime": 17.0054, | |
"eval_samples_per_second": 117.61, | |
"eval_steps_per_second": 1.882, | |
"step": 437 | |
}, | |
{ | |
"epoch": 14.08, | |
"learning_rate": 2.9271206690561534e-06, | |
"loss": 0.1446, | |
"step": 440 | |
}, | |
{ | |
"epoch": 14.4, | |
"learning_rate": 2.867383512544803e-06, | |
"loss": 0.1729, | |
"step": 450 | |
}, | |
{ | |
"epoch": 14.72, | |
"learning_rate": 2.807646356033453e-06, | |
"loss": 0.1388, | |
"step": 460 | |
}, | |
{ | |
"epoch": 14.98, | |
"eval_accuracy": 0.937, | |
"eval_f1": 0.9369559606039481, | |
"eval_loss": 0.1763281524181366, | |
"eval_precision": 0.937053922233709, | |
"eval_recall": 0.937, | |
"eval_runtime": 16.6713, | |
"eval_samples_per_second": 119.967, | |
"eval_steps_per_second": 1.919, | |
"step": 468 | |
}, | |
{ | |
"epoch": 15.04, | |
"learning_rate": 2.747909199522103e-06, | |
"loss": 0.1597, | |
"step": 470 | |
}, | |
{ | |
"epoch": 15.36, | |
"learning_rate": 2.688172043010753e-06, | |
"loss": 0.1448, | |
"step": 480 | |
}, | |
{ | |
"epoch": 15.68, | |
"learning_rate": 2.6284348864994032e-06, | |
"loss": 0.15, | |
"step": 490 | |
}, | |
{ | |
"epoch": 16.0, | |
"learning_rate": 2.5686977299880527e-06, | |
"loss": 0.1418, | |
"step": 500 | |
}, | |
{ | |
"epoch": 16.0, | |
"eval_accuracy": 0.9375, | |
"eval_f1": 0.9374575657923622, | |
"eval_loss": 0.1875288337469101, | |
"eval_precision": 0.9390310584589143, | |
"eval_recall": 0.9375, | |
"eval_runtime": 16.9059, | |
"eval_samples_per_second": 118.302, | |
"eval_steps_per_second": 1.893, | |
"step": 500 | |
}, | |
{ | |
"epoch": 16.32, | |
"learning_rate": 2.5089605734767026e-06, | |
"loss": 0.1434, | |
"step": 510 | |
}, | |
{ | |
"epoch": 16.64, | |
"learning_rate": 2.4492234169653525e-06, | |
"loss": 0.1511, | |
"step": 520 | |
}, | |
{ | |
"epoch": 16.96, | |
"learning_rate": 2.3894862604540028e-06, | |
"loss": 0.1332, | |
"step": 530 | |
}, | |
{ | |
"epoch": 16.99, | |
"eval_accuracy": 0.9365, | |
"eval_f1": 0.9364286755728805, | |
"eval_loss": 0.17689362168312073, | |
"eval_precision": 0.9364143381142894, | |
"eval_recall": 0.9365, | |
"eval_runtime": 15.4316, | |
"eval_samples_per_second": 129.604, | |
"eval_steps_per_second": 2.074, | |
"step": 531 | |
}, | |
{ | |
"epoch": 17.28, | |
"learning_rate": 2.3297491039426526e-06, | |
"loss": 0.1508, | |
"step": 540 | |
}, | |
{ | |
"epoch": 17.6, | |
"learning_rate": 2.2700119474313025e-06, | |
"loss": 0.1454, | |
"step": 550 | |
}, | |
{ | |
"epoch": 17.92, | |
"learning_rate": 2.2102747909199524e-06, | |
"loss": 0.1413, | |
"step": 560 | |
}, | |
{ | |
"epoch": 17.98, | |
"eval_accuracy": 0.9355, | |
"eval_f1": 0.9354743546167706, | |
"eval_loss": 0.18507656455039978, | |
"eval_precision": 0.9363458136046509, | |
"eval_recall": 0.9355, | |
"eval_runtime": 18.6594, | |
"eval_samples_per_second": 107.185, | |
"eval_steps_per_second": 1.715, | |
"step": 562 | |
}, | |
{ | |
"epoch": 18.24, | |
"learning_rate": 2.1505376344086023e-06, | |
"loss": 0.1419, | |
"step": 570 | |
}, | |
{ | |
"epoch": 18.56, | |
"learning_rate": 2.0908004778972526e-06, | |
"loss": 0.1337, | |
"step": 580 | |
}, | |
{ | |
"epoch": 18.88, | |
"learning_rate": 2.031063321385902e-06, | |
"loss": 0.1363, | |
"step": 590 | |
}, | |
{ | |
"epoch": 18.98, | |
"eval_accuracy": 0.943, | |
"eval_f1": 0.9429937041520492, | |
"eval_loss": 0.1833750605583191, | |
"eval_precision": 0.9430181576590594, | |
"eval_recall": 0.943, | |
"eval_runtime": 15.341, | |
"eval_samples_per_second": 130.369, | |
"eval_steps_per_second": 2.086, | |
"step": 593 | |
}, | |
{ | |
"epoch": 19.2, | |
"learning_rate": 1.9713261648745523e-06, | |
"loss": 0.1406, | |
"step": 600 | |
}, | |
{ | |
"epoch": 19.52, | |
"learning_rate": 1.911589008363202e-06, | |
"loss": 0.1349, | |
"step": 610 | |
}, | |
{ | |
"epoch": 19.84, | |
"learning_rate": 1.8518518518518519e-06, | |
"loss": 0.1454, | |
"step": 620 | |
}, | |
{ | |
"epoch": 20.0, | |
"eval_accuracy": 0.938, | |
"eval_f1": 0.9379689326488776, | |
"eval_loss": 0.18226294219493866, | |
"eval_precision": 0.9383502558933098, | |
"eval_recall": 0.938, | |
"eval_runtime": 15.7297, | |
"eval_samples_per_second": 127.148, | |
"eval_steps_per_second": 2.034, | |
"step": 625 | |
}, | |
{ | |
"epoch": 20.16, | |
"learning_rate": 1.792114695340502e-06, | |
"loss": 0.133, | |
"step": 630 | |
}, | |
{ | |
"epoch": 20.48, | |
"learning_rate": 1.7323775388291518e-06, | |
"loss": 0.145, | |
"step": 640 | |
}, | |
{ | |
"epoch": 20.8, | |
"learning_rate": 1.6726403823178017e-06, | |
"loss": 0.1369, | |
"step": 650 | |
}, | |
{ | |
"epoch": 20.99, | |
"eval_accuracy": 0.938, | |
"eval_f1": 0.9380062136957606, | |
"eval_loss": 0.18339130282402039, | |
"eval_precision": 0.9380276098599477, | |
"eval_recall": 0.938, | |
"eval_runtime": 16.8974, | |
"eval_samples_per_second": 118.361, | |
"eval_steps_per_second": 1.894, | |
"step": 656 | |
}, | |
{ | |
"epoch": 21.12, | |
"learning_rate": 1.6129032258064516e-06, | |
"loss": 0.1374, | |
"step": 660 | |
}, | |
{ | |
"epoch": 21.44, | |
"learning_rate": 1.5531660692951017e-06, | |
"loss": 0.1238, | |
"step": 670 | |
}, | |
{ | |
"epoch": 21.76, | |
"learning_rate": 1.4934289127837518e-06, | |
"loss": 0.1298, | |
"step": 680 | |
}, | |
{ | |
"epoch": 21.98, | |
"eval_accuracy": 0.932, | |
"eval_f1": 0.9318412947479982, | |
"eval_loss": 0.1959647685289383, | |
"eval_precision": 0.9334789348146822, | |
"eval_recall": 0.932, | |
"eval_runtime": 17.048, | |
"eval_samples_per_second": 117.316, | |
"eval_steps_per_second": 1.877, | |
"step": 687 | |
}, | |
{ | |
"epoch": 22.08, | |
"learning_rate": 1.4336917562724014e-06, | |
"loss": 0.1175, | |
"step": 690 | |
}, | |
{ | |
"epoch": 22.4, | |
"learning_rate": 1.3739545997610515e-06, | |
"loss": 0.1302, | |
"step": 700 | |
}, | |
{ | |
"epoch": 22.72, | |
"learning_rate": 1.3142174432497016e-06, | |
"loss": 0.1215, | |
"step": 710 | |
}, | |
{ | |
"epoch": 22.98, | |
"eval_accuracy": 0.941, | |
"eval_f1": 0.9410040331011128, | |
"eval_loss": 0.1756083071231842, | |
"eval_precision": 0.9410129175539667, | |
"eval_recall": 0.941, | |
"eval_runtime": 15.4692, | |
"eval_samples_per_second": 129.289, | |
"eval_steps_per_second": 2.069, | |
"step": 718 | |
}, | |
{ | |
"epoch": 23.04, | |
"learning_rate": 1.2544802867383513e-06, | |
"loss": 0.1148, | |
"step": 720 | |
}, | |
{ | |
"epoch": 23.36, | |
"learning_rate": 1.1947431302270014e-06, | |
"loss": 0.1171, | |
"step": 730 | |
}, | |
{ | |
"epoch": 23.68, | |
"learning_rate": 1.1350059737156513e-06, | |
"loss": 0.1157, | |
"step": 740 | |
}, | |
{ | |
"epoch": 24.0, | |
"learning_rate": 1.0752688172043011e-06, | |
"loss": 0.1206, | |
"step": 750 | |
}, | |
{ | |
"epoch": 24.0, | |
"eval_accuracy": 0.9395, | |
"eval_f1": 0.9394278516200029, | |
"eval_loss": 0.1916501373052597, | |
"eval_precision": 0.9397489284158689, | |
"eval_recall": 0.9395, | |
"eval_runtime": 16.9045, | |
"eval_samples_per_second": 118.312, | |
"eval_steps_per_second": 1.893, | |
"step": 750 | |
}, | |
{ | |
"epoch": 24.32, | |
"learning_rate": 1.015531660692951e-06, | |
"loss": 0.1022, | |
"step": 760 | |
}, | |
{ | |
"epoch": 24.64, | |
"learning_rate": 9.55794504181601e-07, | |
"loss": 0.1327, | |
"step": 770 | |
}, | |
{ | |
"epoch": 24.96, | |
"learning_rate": 8.96057347670251e-07, | |
"loss": 0.1173, | |
"step": 780 | |
}, | |
{ | |
"epoch": 24.99, | |
"eval_accuracy": 0.937, | |
"eval_f1": 0.9369852952028479, | |
"eval_loss": 0.18728119134902954, | |
"eval_precision": 0.9370030773977623, | |
"eval_recall": 0.937, | |
"eval_runtime": 17.1038, | |
"eval_samples_per_second": 116.933, | |
"eval_steps_per_second": 1.871, | |
"step": 781 | |
}, | |
{ | |
"epoch": 25.28, | |
"learning_rate": 8.363201911589009e-07, | |
"loss": 0.1258, | |
"step": 790 | |
}, | |
{ | |
"epoch": 25.6, | |
"learning_rate": 7.765830346475508e-07, | |
"loss": 0.1276, | |
"step": 800 | |
}, | |
{ | |
"epoch": 25.92, | |
"learning_rate": 7.168458781362007e-07, | |
"loss": 0.1071, | |
"step": 810 | |
}, | |
{ | |
"epoch": 25.98, | |
"eval_accuracy": 0.9375, | |
"eval_f1": 0.9374993924800131, | |
"eval_loss": 0.18555869162082672, | |
"eval_precision": 0.9376175499868795, | |
"eval_recall": 0.9375, | |
"eval_runtime": 16.8982, | |
"eval_samples_per_second": 118.356, | |
"eval_steps_per_second": 1.894, | |
"step": 812 | |
}, | |
{ | |
"epoch": 26.24, | |
"learning_rate": 6.571087216248508e-07, | |
"loss": 0.1021, | |
"step": 820 | |
}, | |
{ | |
"epoch": 26.56, | |
"learning_rate": 5.973715651135007e-07, | |
"loss": 0.1158, | |
"step": 830 | |
}, | |
{ | |
"epoch": 26.88, | |
"learning_rate": 5.376344086021506e-07, | |
"loss": 0.1259, | |
"step": 840 | |
}, | |
{ | |
"epoch": 26.98, | |
"eval_accuracy": 0.938, | |
"eval_f1": 0.9379899286569715, | |
"eval_loss": 0.18707333505153656, | |
"eval_precision": 0.9380024377015922, | |
"eval_recall": 0.938, | |
"eval_runtime": 16.9001, | |
"eval_samples_per_second": 118.343, | |
"eval_steps_per_second": 1.893, | |
"step": 843 | |
}, | |
{ | |
"epoch": 27.2, | |
"learning_rate": 4.778972520908006e-07, | |
"loss": 0.1183, | |
"step": 850 | |
}, | |
{ | |
"epoch": 27.52, | |
"learning_rate": 4.1816009557945043e-07, | |
"loss": 0.1137, | |
"step": 860 | |
}, | |
{ | |
"epoch": 27.84, | |
"learning_rate": 3.5842293906810036e-07, | |
"loss": 0.1245, | |
"step": 870 | |
}, | |
{ | |
"epoch": 28.0, | |
"eval_accuracy": 0.9395, | |
"eval_f1": 0.939497158553919, | |
"eval_loss": 0.18657149374485016, | |
"eval_precision": 0.939552682681715, | |
"eval_recall": 0.9395, | |
"eval_runtime": 16.8688, | |
"eval_samples_per_second": 118.562, | |
"eval_steps_per_second": 1.897, | |
"step": 875 | |
}, | |
{ | |
"epoch": 28.16, | |
"learning_rate": 2.9868578255675034e-07, | |
"loss": 0.1135, | |
"step": 880 | |
}, | |
{ | |
"epoch": 28.48, | |
"learning_rate": 2.389486260454003e-07, | |
"loss": 0.1205, | |
"step": 890 | |
}, | |
{ | |
"epoch": 28.8, | |
"learning_rate": 1.7921146953405018e-07, | |
"loss": 0.1065, | |
"step": 900 | |
}, | |
{ | |
"epoch": 28.99, | |
"eval_accuracy": 0.94, | |
"eval_f1": 0.9400001912638307, | |
"eval_loss": 0.18697066605091095, | |
"eval_precision": 0.9401017745085161, | |
"eval_recall": 0.94, | |
"eval_runtime": 15.2987, | |
"eval_samples_per_second": 130.73, | |
"eval_steps_per_second": 2.092, | |
"step": 906 | |
}, | |
{ | |
"epoch": 29.12, | |
"learning_rate": 1.1947431302270014e-07, | |
"loss": 0.1142, | |
"step": 910 | |
}, | |
{ | |
"epoch": 29.44, | |
"learning_rate": 5.973715651135007e-08, | |
"loss": 0.1139, | |
"step": 920 | |
}, | |
{ | |
"epoch": 29.76, | |
"learning_rate": 0.0, | |
"loss": 0.1066, | |
"step": 930 | |
}, | |
{ | |
"epoch": 29.76, | |
"eval_accuracy": 0.941, | |
"eval_f1": 0.9409785275547128, | |
"eval_loss": 0.18623514473438263, | |
"eval_precision": 0.9410413966989568, | |
"eval_recall": 0.941, | |
"eval_runtime": 15.5653, | |
"eval_samples_per_second": 128.491, | |
"eval_steps_per_second": 2.056, | |
"step": 930 | |
}, | |
{ | |
"epoch": 29.76, | |
"step": 930, | |
"total_flos": 2.4317906065576428e+19, | |
"train_loss": 0.20007605360400293, | |
"train_runtime": 4421.4809, | |
"train_samples_per_second": 54.28, | |
"train_steps_per_second": 0.21 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 930, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 30, | |
"save_steps": 500, | |
"total_flos": 2.4317906065576428e+19, | |
"train_batch_size": 64, | |
"trial_name": null, | |
"trial_params": null | |
} | |