|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.24259993896856882, |
|
"global_step": 3180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 3.030700922012329, |
|
"eval_runtime": 51.1742, |
|
"eval_samples_per_second": 6.116, |
|
"eval_steps_per_second": 0.391, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.9279651641845703, |
|
"eval_runtime": 96.1374, |
|
"eval_samples_per_second": 3.256, |
|
"eval_steps_per_second": 0.416, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.7418131828308105, |
|
"eval_runtime": 95.6416, |
|
"eval_samples_per_second": 3.273, |
|
"eval_steps_per_second": 0.418, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.666383743286133, |
|
"eval_runtime": 95.265, |
|
"eval_samples_per_second": 3.286, |
|
"eval_steps_per_second": 0.42, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.5859625339508057, |
|
"eval_runtime": 96.3364, |
|
"eval_samples_per_second": 3.249, |
|
"eval_steps_per_second": 0.415, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.533696174621582, |
|
"eval_runtime": 94.072, |
|
"eval_samples_per_second": 3.327, |
|
"eval_steps_per_second": 0.425, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.4880690574645996, |
|
"eval_runtime": 97.015, |
|
"eval_samples_per_second": 3.226, |
|
"eval_steps_per_second": 0.412, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.484375, |
|
"eval_runtime": 94.9062, |
|
"eval_samples_per_second": 3.298, |
|
"eval_steps_per_second": 0.421, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.444089412689209, |
|
"eval_runtime": 96.2311, |
|
"eval_samples_per_second": 3.253, |
|
"eval_steps_per_second": 0.416, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.4108924865722656, |
|
"eval_runtime": 94.1107, |
|
"eval_samples_per_second": 3.326, |
|
"eval_steps_per_second": 0.425, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.4031050205230713, |
|
"eval_runtime": 96.8698, |
|
"eval_samples_per_second": 3.231, |
|
"eval_steps_per_second": 0.413, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.3866562843322754, |
|
"eval_runtime": 94.3449, |
|
"eval_samples_per_second": 3.318, |
|
"eval_steps_per_second": 0.424, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.364941120147705, |
|
"eval_runtime": 94.3238, |
|
"eval_samples_per_second": 3.318, |
|
"eval_steps_per_second": 0.424, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.3453474044799805, |
|
"eval_runtime": 95.7368, |
|
"eval_samples_per_second": 3.269, |
|
"eval_steps_per_second": 0.418, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.3416035175323486, |
|
"eval_runtime": 94.4091, |
|
"eval_samples_per_second": 3.315, |
|
"eval_steps_per_second": 0.424, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.328873872756958, |
|
"eval_runtime": 98.9931, |
|
"eval_samples_per_second": 3.162, |
|
"eval_steps_per_second": 0.404, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.314546823501587, |
|
"eval_runtime": 98.9879, |
|
"eval_samples_per_second": 3.162, |
|
"eval_steps_per_second": 0.404, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.3136730194091797, |
|
"eval_runtime": 97.3738, |
|
"eval_samples_per_second": 3.214, |
|
"eval_steps_per_second": 0.411, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.3047373294830322, |
|
"eval_runtime": 95.8162, |
|
"eval_samples_per_second": 3.267, |
|
"eval_steps_per_second": 0.417, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.2875399589538574, |
|
"eval_runtime": 96.4936, |
|
"eval_samples_per_second": 3.244, |
|
"eval_steps_per_second": 0.415, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.28057599067688, |
|
"eval_runtime": 96.1017, |
|
"eval_samples_per_second": 3.257, |
|
"eval_steps_per_second": 0.416, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.2859673500061035, |
|
"eval_runtime": 94.3087, |
|
"eval_samples_per_second": 3.319, |
|
"eval_steps_per_second": 0.424, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.2626798152923584, |
|
"eval_runtime": 94.7992, |
|
"eval_samples_per_second": 3.302, |
|
"eval_steps_per_second": 0.422, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.2764577865600586, |
|
"eval_runtime": 95.9001, |
|
"eval_samples_per_second": 3.264, |
|
"eval_steps_per_second": 0.417, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1752, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.270517110824585, |
|
"eval_runtime": 95.5609, |
|
"eval_samples_per_second": 3.275, |
|
"eval_steps_per_second": 0.419, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.2533695697784424, |
|
"eval_runtime": 95.989, |
|
"eval_samples_per_second": 3.261, |
|
"eval_steps_per_second": 0.417, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.2560901641845703, |
|
"eval_runtime": 95.7778, |
|
"eval_samples_per_second": 3.268, |
|
"eval_steps_per_second": 0.418, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.2439846992492676, |
|
"eval_runtime": 95.3723, |
|
"eval_samples_per_second": 3.282, |
|
"eval_steps_per_second": 0.419, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.22643780708313, |
|
"eval_runtime": 95.2142, |
|
"eval_samples_per_second": 3.287, |
|
"eval_steps_per_second": 0.42, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.2366464138031006, |
|
"eval_runtime": 94.7461, |
|
"eval_samples_per_second": 3.304, |
|
"eval_steps_per_second": 0.422, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.2283596992492676, |
|
"eval_runtime": 94.7042, |
|
"eval_samples_per_second": 3.305, |
|
"eval_steps_per_second": 0.422, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.228659152984619, |
|
"eval_runtime": 95.4845, |
|
"eval_samples_per_second": 3.278, |
|
"eval_steps_per_second": 0.419, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.2173023223876953, |
|
"eval_runtime": 96.1967, |
|
"eval_samples_per_second": 3.254, |
|
"eval_steps_per_second": 0.416, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.209789276123047, |
|
"eval_runtime": 95.9623, |
|
"eval_samples_per_second": 3.262, |
|
"eval_steps_per_second": 0.417, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.2206969261169434, |
|
"eval_runtime": 96.1394, |
|
"eval_samples_per_second": 3.256, |
|
"eval_steps_per_second": 0.416, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.2181010246276855, |
|
"eval_runtime": 95.7693, |
|
"eval_samples_per_second": 3.268, |
|
"eval_steps_per_second": 0.418, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.202101707458496, |
|
"eval_runtime": 94.766, |
|
"eval_samples_per_second": 3.303, |
|
"eval_steps_per_second": 0.422, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.196211099624634, |
|
"eval_runtime": 96.1177, |
|
"eval_samples_per_second": 3.256, |
|
"eval_steps_per_second": 0.416, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.19002103805542, |
|
"eval_runtime": 95.6171, |
|
"eval_samples_per_second": 3.273, |
|
"eval_steps_per_second": 0.418, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.1929662227630615, |
|
"eval_runtime": 95.261, |
|
"eval_samples_per_second": 3.286, |
|
"eval_steps_per_second": 0.42, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.1929163932800293, |
|
"eval_runtime": 94.6411, |
|
"eval_samples_per_second": 3.307, |
|
"eval_steps_per_second": 0.423, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.1864266395568848, |
|
"eval_runtime": 94.742, |
|
"eval_samples_per_second": 3.304, |
|
"eval_steps_per_second": 0.422, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.18817400932312, |
|
"eval_runtime": 93.1113, |
|
"eval_samples_per_second": 3.362, |
|
"eval_steps_per_second": 0.43, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.181110143661499, |
|
"eval_runtime": 96.1016, |
|
"eval_samples_per_second": 3.257, |
|
"eval_steps_per_second": 0.416, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.184654474258423, |
|
"eval_runtime": 95.0876, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 0.421, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.185678005218506, |
|
"eval_runtime": 94.0134, |
|
"eval_samples_per_second": 3.329, |
|
"eval_steps_per_second": 0.425, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.176767110824585, |
|
"eval_runtime": 95.7833, |
|
"eval_samples_per_second": 3.268, |
|
"eval_steps_per_second": 0.418, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.1729233264923096, |
|
"eval_runtime": 95.1591, |
|
"eval_samples_per_second": 3.289, |
|
"eval_steps_per_second": 0.42, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.1674821376800537, |
|
"eval_runtime": 96.3234, |
|
"eval_samples_per_second": 3.249, |
|
"eval_steps_per_second": 0.415, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0502, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.164886236190796, |
|
"eval_runtime": 98.1731, |
|
"eval_samples_per_second": 3.188, |
|
"eval_steps_per_second": 0.407, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.1528055667877197, |
|
"eval_runtime": 96.9725, |
|
"eval_samples_per_second": 3.228, |
|
"eval_steps_per_second": 0.412, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.167107582092285, |
|
"eval_runtime": 96.6542, |
|
"eval_samples_per_second": 3.238, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.1584465503692627, |
|
"eval_runtime": 98.4331, |
|
"eval_samples_per_second": 3.18, |
|
"eval_steps_per_second": 0.406, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.152431011199951, |
|
"eval_runtime": 95.2227, |
|
"eval_samples_per_second": 3.287, |
|
"eval_steps_per_second": 0.42, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.1513078212738037, |
|
"eval_runtime": 95.2983, |
|
"eval_samples_per_second": 3.284, |
|
"eval_steps_per_second": 0.42, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.1465654373168945, |
|
"eval_runtime": 96.5284, |
|
"eval_samples_per_second": 3.243, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.155775785446167, |
|
"eval_runtime": 95.918, |
|
"eval_samples_per_second": 3.263, |
|
"eval_steps_per_second": 0.417, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.148512363433838, |
|
"eval_runtime": 96.3942, |
|
"eval_samples_per_second": 3.247, |
|
"eval_steps_per_second": 0.415, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.1571736335754395, |
|
"eval_runtime": 94.8278, |
|
"eval_samples_per_second": 3.301, |
|
"eval_steps_per_second": 0.422, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.1482129096984863, |
|
"eval_runtime": 95.7915, |
|
"eval_samples_per_second": 3.268, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.1445436477661133, |
|
"eval_runtime": 97.0007, |
|
"eval_samples_per_second": 3.227, |
|
"eval_steps_per_second": 0.412, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.1457667350769043, |
|
"eval_runtime": 93.8614, |
|
"eval_samples_per_second": 3.335, |
|
"eval_steps_per_second": 0.426, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.155850648880005, |
|
"eval_runtime": 94.8073, |
|
"eval_samples_per_second": 3.301, |
|
"eval_steps_per_second": 0.422, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.1380791664123535, |
|
"eval_runtime": 95.9912, |
|
"eval_samples_per_second": 3.261, |
|
"eval_steps_per_second": 0.417, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.1424720287323, |
|
"eval_runtime": 94.1052, |
|
"eval_samples_per_second": 3.326, |
|
"eval_steps_per_second": 0.425, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.1322383880615234, |
|
"eval_runtime": 95.6595, |
|
"eval_samples_per_second": 3.272, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.1327874660491943, |
|
"eval_runtime": 95.3348, |
|
"eval_samples_per_second": 3.283, |
|
"eval_steps_per_second": 0.42, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.1295926570892334, |
|
"eval_runtime": 94.6735, |
|
"eval_samples_per_second": 3.306, |
|
"eval_steps_per_second": 0.423, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1335363388061523, |
|
"eval_runtime": 94.2932, |
|
"eval_samples_per_second": 3.319, |
|
"eval_steps_per_second": 0.424, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.126971960067749, |
|
"eval_runtime": 92.6523, |
|
"eval_samples_per_second": 3.378, |
|
"eval_steps_per_second": 0.432, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1153903007507324, |
|
"eval_runtime": 96.6374, |
|
"eval_samples_per_second": 3.239, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1179113388061523, |
|
"eval_runtime": 96.4386, |
|
"eval_samples_per_second": 3.246, |
|
"eval_steps_per_second": 0.415, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1170127391815186, |
|
"eval_runtime": 94.461, |
|
"eval_samples_per_second": 3.314, |
|
"eval_steps_per_second": 0.423, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1248252391815186, |
|
"eval_runtime": 95.6005, |
|
"eval_samples_per_second": 3.274, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9893, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.117586851119995, |
|
"eval_runtime": 95.6648, |
|
"eval_samples_per_second": 3.272, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1058804988861084, |
|
"eval_runtime": 95.0515, |
|
"eval_samples_per_second": 3.293, |
|
"eval_steps_per_second": 0.421, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1127195358276367, |
|
"eval_runtime": 95.0085, |
|
"eval_samples_per_second": 3.294, |
|
"eval_steps_per_second": 0.421, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.115964412689209, |
|
"eval_runtime": 36.4716, |
|
"eval_samples_per_second": 8.582, |
|
"eval_steps_per_second": 0.384, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1093251705169678, |
|
"eval_runtime": 37.1698, |
|
"eval_samples_per_second": 8.421, |
|
"eval_steps_per_second": 0.377, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1045827865600586, |
|
"eval_runtime": 36.7758, |
|
"eval_samples_per_second": 8.511, |
|
"eval_steps_per_second": 0.381, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1027355194091797, |
|
"eval_runtime": 35.9986, |
|
"eval_samples_per_second": 8.695, |
|
"eval_steps_per_second": 0.389, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1164636611938477, |
|
"eval_runtime": 36.3352, |
|
"eval_samples_per_second": 8.614, |
|
"eval_steps_per_second": 0.385, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.105306625366211, |
|
"eval_runtime": 36.3036, |
|
"eval_samples_per_second": 8.622, |
|
"eval_steps_per_second": 0.386, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1107728481292725, |
|
"eval_runtime": 35.8293, |
|
"eval_samples_per_second": 8.736, |
|
"eval_steps_per_second": 0.391, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1059305667877197, |
|
"eval_runtime": 38.1951, |
|
"eval_samples_per_second": 8.195, |
|
"eval_steps_per_second": 0.367, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.109574794769287, |
|
"eval_runtime": 36.6746, |
|
"eval_samples_per_second": 8.535, |
|
"eval_steps_per_second": 0.382, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1008386611938477, |
|
"eval_runtime": 38.8587, |
|
"eval_samples_per_second": 8.055, |
|
"eval_steps_per_second": 0.36, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1023361682891846, |
|
"eval_runtime": 35.8353, |
|
"eval_samples_per_second": 8.734, |
|
"eval_steps_per_second": 0.391, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.1008386611938477, |
|
"eval_runtime": 36.6886, |
|
"eval_samples_per_second": 8.531, |
|
"eval_steps_per_second": 0.382, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.093350648880005, |
|
"eval_runtime": 38.2167, |
|
"eval_samples_per_second": 8.19, |
|
"eval_steps_per_second": 0.366, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.1041831970214844, |
|
"eval_runtime": 36.3105, |
|
"eval_samples_per_second": 8.62, |
|
"eval_steps_per_second": 0.386, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.0942492485046387, |
|
"eval_runtime": 37.4668, |
|
"eval_samples_per_second": 8.354, |
|
"eval_steps_per_second": 0.374, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.0858376026153564, |
|
"eval_runtime": 36.0576, |
|
"eval_samples_per_second": 8.681, |
|
"eval_steps_per_second": 0.388, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.1020865440368652, |
|
"eval_runtime": 37.7141, |
|
"eval_samples_per_second": 8.299, |
|
"eval_steps_per_second": 0.371, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.1026856899261475, |
|
"eval_runtime": 35.4823, |
|
"eval_samples_per_second": 8.821, |
|
"eval_steps_per_second": 0.395, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0936501026153564, |
|
"eval_runtime": 37.4147, |
|
"eval_samples_per_second": 8.366, |
|
"eval_steps_per_second": 0.374, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0930511951446533, |
|
"eval_runtime": 37.4908, |
|
"eval_samples_per_second": 8.349, |
|
"eval_steps_per_second": 0.373, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0927765369415283, |
|
"eval_runtime": 35.6866, |
|
"eval_samples_per_second": 8.771, |
|
"eval_steps_per_second": 0.392, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0972445011138916, |
|
"eval_runtime": 36.708, |
|
"eval_samples_per_second": 8.527, |
|
"eval_steps_per_second": 0.381, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9023, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0981428623199463, |
|
"eval_runtime": 37.7854, |
|
"eval_samples_per_second": 8.284, |
|
"eval_steps_per_second": 0.371, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0930511951446533, |
|
"eval_runtime": 35.9143, |
|
"eval_samples_per_second": 8.715, |
|
"eval_steps_per_second": 0.39, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0959465503692627, |
|
"eval_runtime": 36.7602, |
|
"eval_samples_per_second": 8.515, |
|
"eval_steps_per_second": 0.381, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.094498872756958, |
|
"eval_runtime": 35.6393, |
|
"eval_samples_per_second": 8.782, |
|
"eval_steps_per_second": 0.393, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.10168719291687, |
|
"eval_runtime": 36.6139, |
|
"eval_samples_per_second": 8.549, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.089132308959961, |
|
"eval_runtime": 37.1222, |
|
"eval_samples_per_second": 8.432, |
|
"eval_steps_per_second": 0.377, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0930261611938477, |
|
"eval_runtime": 36.6415, |
|
"eval_samples_per_second": 8.542, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0837409496307373, |
|
"eval_runtime": 35.3492, |
|
"eval_samples_per_second": 8.855, |
|
"eval_steps_per_second": 0.396, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0924770832061768, |
|
"eval_runtime": 35.8577, |
|
"eval_samples_per_second": 8.729, |
|
"eval_steps_per_second": 0.39, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.0876598358154297, |
|
"eval_runtime": 36.7333, |
|
"eval_samples_per_second": 8.521, |
|
"eval_steps_per_second": 0.381, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.0903303623199463, |
|
"eval_runtime": 36.5753, |
|
"eval_samples_per_second": 8.558, |
|
"eval_steps_per_second": 0.383, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.0882089138031006, |
|
"eval_runtime": 37.2231, |
|
"eval_samples_per_second": 8.409, |
|
"eval_steps_per_second": 0.376, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.0914785861968994, |
|
"eval_runtime": 35.3728, |
|
"eval_samples_per_second": 8.849, |
|
"eval_steps_per_second": 0.396, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.092726707458496, |
|
"eval_runtime": 40.8988, |
|
"eval_samples_per_second": 7.653, |
|
"eval_steps_per_second": 0.342, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.092102527618408, |
|
"eval_runtime": 38.6861, |
|
"eval_samples_per_second": 8.091, |
|
"eval_steps_per_second": 0.362, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0902554988861084, |
|
"eval_runtime": 37.1228, |
|
"eval_samples_per_second": 8.431, |
|
"eval_steps_per_second": 0.377, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.1011133193969727, |
|
"eval_runtime": 38.2059, |
|
"eval_samples_per_second": 8.192, |
|
"eval_steps_per_second": 0.366, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0915534496307373, |
|
"eval_runtime": 37.7371, |
|
"eval_samples_per_second": 8.294, |
|
"eval_steps_per_second": 0.371, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.084639549255371, |
|
"eval_runtime": 37.4914, |
|
"eval_samples_per_second": 8.349, |
|
"eval_steps_per_second": 0.373, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0891075134277344, |
|
"eval_runtime": 37.0809, |
|
"eval_samples_per_second": 8.441, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.080421209335327, |
|
"eval_runtime": 38.2834, |
|
"eval_samples_per_second": 8.176, |
|
"eval_steps_per_second": 0.366, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0774011611938477, |
|
"eval_runtime": 36.0821, |
|
"eval_samples_per_second": 8.675, |
|
"eval_steps_per_second": 0.388, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.074655532836914, |
|
"eval_runtime": 38.4174, |
|
"eval_samples_per_second": 8.147, |
|
"eval_steps_per_second": 0.364, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.0918281078338623, |
|
"eval_runtime": 37.5135, |
|
"eval_samples_per_second": 8.344, |
|
"eval_steps_per_second": 0.373, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.0866613388061523, |
|
"eval_runtime": 37.8846, |
|
"eval_samples_per_second": 8.262, |
|
"eval_steps_per_second": 0.37, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8656, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.0818939208984375, |
|
"eval_runtime": 37.1254, |
|
"eval_samples_per_second": 8.431, |
|
"eval_steps_per_second": 0.377, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.082193374633789, |
|
"eval_runtime": 37.2165, |
|
"eval_samples_per_second": 8.41, |
|
"eval_steps_per_second": 0.376, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.078274726867676, |
|
"eval_runtime": 38.1535, |
|
"eval_samples_per_second": 8.204, |
|
"eval_steps_per_second": 0.367, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.0924270153045654, |
|
"eval_runtime": 37.0529, |
|
"eval_samples_per_second": 8.447, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.0776758193969727, |
|
"eval_runtime": 38.2095, |
|
"eval_samples_per_second": 8.192, |
|
"eval_steps_per_second": 0.366, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.074331045150757, |
|
"eval_runtime": 38.0087, |
|
"eval_samples_per_second": 8.235, |
|
"eval_steps_per_second": 0.368, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.0753045082092285, |
|
"eval_runtime": 35.62, |
|
"eval_samples_per_second": 8.787, |
|
"eval_steps_per_second": 0.393, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.0662689208984375, |
|
"eval_runtime": 42.1091, |
|
"eval_samples_per_second": 7.433, |
|
"eval_steps_per_second": 0.332, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.066293954849243, |
|
"eval_runtime": 39.286, |
|
"eval_samples_per_second": 7.967, |
|
"eval_steps_per_second": 0.356, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.0750298500061035, |
|
"eval_runtime": 37.7908, |
|
"eval_samples_per_second": 8.282, |
|
"eval_steps_per_second": 0.37, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.072883367538452, |
|
"eval_runtime": 36.9744, |
|
"eval_samples_per_second": 8.465, |
|
"eval_steps_per_second": 0.379, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.0656700134277344, |
|
"eval_runtime": 38.6743, |
|
"eval_samples_per_second": 8.093, |
|
"eval_steps_per_second": 0.362, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.061077356338501, |
|
"eval_runtime": 37.2607, |
|
"eval_samples_per_second": 8.4, |
|
"eval_steps_per_second": 0.376, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.0596296787261963, |
|
"eval_runtime": 38.4938, |
|
"eval_samples_per_second": 8.131, |
|
"eval_steps_per_second": 0.364, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.0695137977600098, |
|
"eval_runtime": 38.4555, |
|
"eval_samples_per_second": 8.139, |
|
"eval_steps_per_second": 0.364, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.0653703212738037, |
|
"eval_runtime": 40.8818, |
|
"eval_samples_per_second": 7.656, |
|
"eval_steps_per_second": 0.342, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.0632736682891846, |
|
"eval_runtime": 37.3448, |
|
"eval_samples_per_second": 8.381, |
|
"eval_steps_per_second": 0.375, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.068690061569214, |
|
"eval_runtime": 38.9945, |
|
"eval_samples_per_second": 8.027, |
|
"eval_steps_per_second": 0.359, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.0744807720184326, |
|
"eval_runtime": 37.3809, |
|
"eval_samples_per_second": 8.373, |
|
"eval_steps_per_second": 0.375, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.068140983581543, |
|
"eval_runtime": 37.0455, |
|
"eval_samples_per_second": 8.449, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.0711112022399902, |
|
"eval_runtime": 36.5798, |
|
"eval_samples_per_second": 8.557, |
|
"eval_steps_per_second": 0.383, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.0659945011138916, |
|
"eval_runtime": 37.3716, |
|
"eval_samples_per_second": 8.375, |
|
"eval_steps_per_second": 0.375, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.076228141784668, |
|
"eval_runtime": 38.1126, |
|
"eval_samples_per_second": 8.213, |
|
"eval_steps_per_second": 0.367, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.072284460067749, |
|
"eval_runtime": 37.7328, |
|
"eval_samples_per_second": 8.295, |
|
"eval_steps_per_second": 0.371, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.0797972679138184, |
|
"eval_runtime": 39.3148, |
|
"eval_samples_per_second": 7.961, |
|
"eval_steps_per_second": 0.356, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8034, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.0818939208984375, |
|
"eval_runtime": 37.1291, |
|
"eval_samples_per_second": 8.43, |
|
"eval_steps_per_second": 0.377, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.0645217895507812, |
|
"eval_runtime": 38.134, |
|
"eval_samples_per_second": 8.208, |
|
"eval_steps_per_second": 0.367, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.0636231899261475, |
|
"eval_runtime": 38.8835, |
|
"eval_samples_per_second": 8.05, |
|
"eval_steps_per_second": 0.36, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.0678415298461914, |
|
"eval_runtime": 38.0811, |
|
"eval_samples_per_second": 8.219, |
|
"eval_steps_per_second": 0.368, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.0711112022399902, |
|
"eval_runtime": 38.2925, |
|
"eval_samples_per_second": 8.174, |
|
"eval_steps_per_second": 0.366, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.063648223876953, |
|
"eval_runtime": 37.5261, |
|
"eval_samples_per_second": 8.341, |
|
"eval_steps_per_second": 0.373, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.0624501705169678, |
|
"eval_runtime": 37.6407, |
|
"eval_samples_per_second": 8.315, |
|
"eval_steps_per_second": 0.372, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.0669429302215576, |
|
"eval_runtime": 36.2761, |
|
"eval_samples_per_second": 8.628, |
|
"eval_steps_per_second": 0.386, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.056734323501587, |
|
"eval_runtime": 39.277, |
|
"eval_samples_per_second": 7.969, |
|
"eval_steps_per_second": 0.356, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.0456268787384033, |
|
"eval_runtime": 37.5675, |
|
"eval_samples_per_second": 8.332, |
|
"eval_steps_per_second": 0.373, |
|
"step": 3180 |
|
} |
|
], |
|
"max_steps": 13108, |
|
"num_train_epochs": 1, |
|
"total_flos": 46808734629888.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|