{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.24259993896856882, "global_step": 3180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "eval_loss": 3.030700922012329, "eval_runtime": 51.1742, "eval_samples_per_second": 6.116, "eval_steps_per_second": 0.391, "step": 20 }, { "epoch": 0.0, "eval_loss": 2.9279651641845703, "eval_runtime": 96.1374, "eval_samples_per_second": 3.256, "eval_steps_per_second": 0.416, "step": 40 }, { "epoch": 0.0, "eval_loss": 2.7418131828308105, "eval_runtime": 95.6416, "eval_samples_per_second": 3.273, "eval_steps_per_second": 0.418, "step": 60 }, { "epoch": 0.0, "eval_loss": 2.666383743286133, "eval_runtime": 95.265, "eval_samples_per_second": 3.286, "eval_steps_per_second": 0.42, "step": 80 }, { "epoch": 0.01, "eval_loss": 2.5859625339508057, "eval_runtime": 96.3364, "eval_samples_per_second": 3.249, "eval_steps_per_second": 0.415, "step": 100 }, { "epoch": 0.01, "eval_loss": 2.533696174621582, "eval_runtime": 94.072, "eval_samples_per_second": 3.327, "eval_steps_per_second": 0.425, "step": 120 }, { "epoch": 0.01, "eval_loss": 2.4880690574645996, "eval_runtime": 97.015, "eval_samples_per_second": 3.226, "eval_steps_per_second": 0.412, "step": 140 }, { "epoch": 0.01, "eval_loss": 2.484375, "eval_runtime": 94.9062, "eval_samples_per_second": 3.298, "eval_steps_per_second": 0.421, "step": 160 }, { "epoch": 0.01, "eval_loss": 2.444089412689209, "eval_runtime": 96.2311, "eval_samples_per_second": 3.253, "eval_steps_per_second": 0.416, "step": 180 }, { "epoch": 0.02, "eval_loss": 2.4108924865722656, "eval_runtime": 94.1107, "eval_samples_per_second": 3.326, "eval_steps_per_second": 0.425, "step": 200 }, { "epoch": 0.02, "eval_loss": 2.4031050205230713, "eval_runtime": 96.8698, "eval_samples_per_second": 3.231, "eval_steps_per_second": 0.413, "step": 220 }, { "epoch": 0.02, "eval_loss": 2.3866562843322754, "eval_runtime": 94.3449, "eval_samples_per_second": 3.318, "eval_steps_per_second": 0.424, "step": 240 }, { "epoch": 0.02, "eval_loss": 2.364941120147705, "eval_runtime": 94.3238, "eval_samples_per_second": 3.318, "eval_steps_per_second": 0.424, "step": 260 }, { "epoch": 0.02, "eval_loss": 2.3453474044799805, "eval_runtime": 95.7368, "eval_samples_per_second": 3.269, "eval_steps_per_second": 0.418, "step": 280 }, { "epoch": 0.02, "eval_loss": 2.3416035175323486, "eval_runtime": 94.4091, "eval_samples_per_second": 3.315, "eval_steps_per_second": 0.424, "step": 300 }, { "epoch": 0.02, "eval_loss": 2.328873872756958, "eval_runtime": 98.9931, "eval_samples_per_second": 3.162, "eval_steps_per_second": 0.404, "step": 320 }, { "epoch": 0.03, "eval_loss": 2.314546823501587, "eval_runtime": 98.9879, "eval_samples_per_second": 3.162, "eval_steps_per_second": 0.404, "step": 340 }, { "epoch": 0.03, "eval_loss": 2.3136730194091797, "eval_runtime": 97.3738, "eval_samples_per_second": 3.214, "eval_steps_per_second": 0.411, "step": 360 }, { "epoch": 0.03, "eval_loss": 2.3047373294830322, "eval_runtime": 95.8162, "eval_samples_per_second": 3.267, "eval_steps_per_second": 0.417, "step": 380 }, { "epoch": 0.03, "eval_loss": 2.2875399589538574, "eval_runtime": 96.4936, "eval_samples_per_second": 3.244, "eval_steps_per_second": 0.415, "step": 400 }, { "epoch": 0.03, "eval_loss": 2.28057599067688, "eval_runtime": 96.1017, "eval_samples_per_second": 3.257, "eval_steps_per_second": 0.416, "step": 420 }, { "epoch": 0.03, "eval_loss": 2.2859673500061035, "eval_runtime": 94.3087, "eval_samples_per_second": 3.319, "eval_steps_per_second": 0.424, "step": 440 }, { "epoch": 0.04, "eval_loss": 2.2626798152923584, "eval_runtime": 94.7992, "eval_samples_per_second": 3.302, "eval_steps_per_second": 0.422, "step": 460 }, { "epoch": 0.04, "eval_loss": 2.2764577865600586, "eval_runtime": 95.9001, "eval_samples_per_second": 3.264, "eval_steps_per_second": 0.417, "step": 480 }, { "epoch": 0.04, "learning_rate": 5e-05, "loss": 2.1752, "step": 500 }, { "epoch": 0.04, "eval_loss": 2.270517110824585, "eval_runtime": 95.5609, "eval_samples_per_second": 3.275, "eval_steps_per_second": 0.419, "step": 500 }, { "epoch": 0.04, "eval_loss": 2.2533695697784424, "eval_runtime": 95.989, "eval_samples_per_second": 3.261, "eval_steps_per_second": 0.417, "step": 520 }, { "epoch": 0.04, "eval_loss": 2.2560901641845703, "eval_runtime": 95.7778, "eval_samples_per_second": 3.268, "eval_steps_per_second": 0.418, "step": 540 }, { "epoch": 0.04, "eval_loss": 2.2439846992492676, "eval_runtime": 95.3723, "eval_samples_per_second": 3.282, "eval_steps_per_second": 0.419, "step": 560 }, { "epoch": 0.04, "eval_loss": 2.22643780708313, "eval_runtime": 95.2142, "eval_samples_per_second": 3.287, "eval_steps_per_second": 0.42, "step": 580 }, { "epoch": 0.05, "eval_loss": 2.2366464138031006, "eval_runtime": 94.7461, "eval_samples_per_second": 3.304, "eval_steps_per_second": 0.422, "step": 600 }, { "epoch": 0.05, "eval_loss": 2.2283596992492676, "eval_runtime": 94.7042, "eval_samples_per_second": 3.305, "eval_steps_per_second": 0.422, "step": 620 }, { "epoch": 0.05, "eval_loss": 2.228659152984619, "eval_runtime": 95.4845, "eval_samples_per_second": 3.278, "eval_steps_per_second": 0.419, "step": 640 }, { "epoch": 0.05, "eval_loss": 2.2173023223876953, "eval_runtime": 96.1967, "eval_samples_per_second": 3.254, "eval_steps_per_second": 0.416, "step": 660 }, { "epoch": 0.05, "eval_loss": 2.209789276123047, "eval_runtime": 95.9623, "eval_samples_per_second": 3.262, "eval_steps_per_second": 0.417, "step": 680 }, { "epoch": 0.05, "eval_loss": 2.2206969261169434, "eval_runtime": 96.1394, "eval_samples_per_second": 3.256, "eval_steps_per_second": 0.416, "step": 700 }, { "epoch": 0.05, "eval_loss": 2.2181010246276855, "eval_runtime": 95.7693, "eval_samples_per_second": 3.268, "eval_steps_per_second": 0.418, "step": 720 }, { "epoch": 0.06, "eval_loss": 2.202101707458496, "eval_runtime": 94.766, "eval_samples_per_second": 3.303, "eval_steps_per_second": 0.422, "step": 740 }, { "epoch": 0.06, "eval_loss": 2.196211099624634, "eval_runtime": 96.1177, "eval_samples_per_second": 3.256, "eval_steps_per_second": 0.416, "step": 760 }, { "epoch": 0.06, "eval_loss": 2.19002103805542, "eval_runtime": 95.6171, "eval_samples_per_second": 3.273, "eval_steps_per_second": 0.418, "step": 780 }, { "epoch": 0.06, "eval_loss": 2.1929662227630615, "eval_runtime": 95.261, "eval_samples_per_second": 3.286, "eval_steps_per_second": 0.42, "step": 800 }, { "epoch": 0.06, "eval_loss": 2.1929163932800293, "eval_runtime": 94.6411, "eval_samples_per_second": 3.307, "eval_steps_per_second": 0.423, "step": 820 }, { "epoch": 0.06, "eval_loss": 2.1864266395568848, "eval_runtime": 94.742, "eval_samples_per_second": 3.304, "eval_steps_per_second": 0.422, "step": 840 }, { "epoch": 0.07, "eval_loss": 2.18817400932312, "eval_runtime": 93.1113, "eval_samples_per_second": 3.362, "eval_steps_per_second": 0.43, "step": 860 }, { "epoch": 0.07, "eval_loss": 2.181110143661499, "eval_runtime": 96.1016, "eval_samples_per_second": 3.257, "eval_steps_per_second": 0.416, "step": 880 }, { "epoch": 0.07, "eval_loss": 2.184654474258423, "eval_runtime": 95.0876, "eval_samples_per_second": 3.292, "eval_steps_per_second": 0.421, "step": 900 }, { "epoch": 0.07, "eval_loss": 2.185678005218506, "eval_runtime": 94.0134, "eval_samples_per_second": 3.329, "eval_steps_per_second": 0.425, "step": 920 }, { "epoch": 0.07, "eval_loss": 2.176767110824585, "eval_runtime": 95.7833, "eval_samples_per_second": 3.268, "eval_steps_per_second": 0.418, "step": 940 }, { "epoch": 0.07, "eval_loss": 2.1729233264923096, "eval_runtime": 95.1591, "eval_samples_per_second": 3.289, "eval_steps_per_second": 0.42, "step": 960 }, { "epoch": 0.07, "eval_loss": 2.1674821376800537, "eval_runtime": 96.3234, "eval_samples_per_second": 3.249, "eval_steps_per_second": 0.415, "step": 980 }, { "epoch": 0.08, "learning_rate": 5e-05, "loss": 2.0502, "step": 1000 }, { "epoch": 0.08, "eval_loss": 2.164886236190796, "eval_runtime": 98.1731, "eval_samples_per_second": 3.188, "eval_steps_per_second": 0.407, "step": 1000 }, { "epoch": 0.08, "eval_loss": 2.1528055667877197, "eval_runtime": 96.9725, "eval_samples_per_second": 3.228, "eval_steps_per_second": 0.412, "step": 1020 }, { "epoch": 0.08, "eval_loss": 2.167107582092285, "eval_runtime": 96.6542, "eval_samples_per_second": 3.238, "eval_steps_per_second": 0.414, "step": 1040 }, { "epoch": 0.08, "eval_loss": 2.1584465503692627, "eval_runtime": 98.4331, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.406, "step": 1060 }, { "epoch": 0.08, "eval_loss": 2.152431011199951, "eval_runtime": 95.2227, "eval_samples_per_second": 3.287, "eval_steps_per_second": 0.42, "step": 1080 }, { "epoch": 0.08, "eval_loss": 2.1513078212738037, "eval_runtime": 95.2983, "eval_samples_per_second": 3.284, "eval_steps_per_second": 0.42, "step": 1100 }, { "epoch": 0.09, "eval_loss": 2.1465654373168945, "eval_runtime": 96.5284, "eval_samples_per_second": 3.243, "eval_steps_per_second": 0.414, "step": 1120 }, { "epoch": 0.09, "eval_loss": 2.155775785446167, "eval_runtime": 95.918, "eval_samples_per_second": 3.263, "eval_steps_per_second": 0.417, "step": 1140 }, { "epoch": 0.09, "eval_loss": 2.148512363433838, "eval_runtime": 96.3942, "eval_samples_per_second": 3.247, "eval_steps_per_second": 0.415, "step": 1160 }, { "epoch": 0.09, "eval_loss": 2.1571736335754395, "eval_runtime": 94.8278, "eval_samples_per_second": 3.301, "eval_steps_per_second": 0.422, "step": 1180 }, { "epoch": 0.09, "eval_loss": 2.1482129096984863, "eval_runtime": 95.7915, "eval_samples_per_second": 3.268, "eval_steps_per_second": 0.418, "step": 1200 }, { "epoch": 0.09, "eval_loss": 2.1445436477661133, "eval_runtime": 97.0007, "eval_samples_per_second": 3.227, "eval_steps_per_second": 0.412, "step": 1220 }, { "epoch": 0.09, "eval_loss": 2.1457667350769043, "eval_runtime": 93.8614, "eval_samples_per_second": 3.335, "eval_steps_per_second": 0.426, "step": 1240 }, { "epoch": 0.1, "eval_loss": 2.155850648880005, "eval_runtime": 94.8073, "eval_samples_per_second": 3.301, "eval_steps_per_second": 0.422, "step": 1260 }, { "epoch": 0.1, "eval_loss": 2.1380791664123535, "eval_runtime": 95.9912, "eval_samples_per_second": 3.261, "eval_steps_per_second": 0.417, "step": 1280 }, { "epoch": 0.1, "eval_loss": 2.1424720287323, "eval_runtime": 94.1052, "eval_samples_per_second": 3.326, "eval_steps_per_second": 0.425, "step": 1300 }, { "epoch": 0.1, "eval_loss": 2.1322383880615234, "eval_runtime": 95.6595, "eval_samples_per_second": 3.272, "eval_steps_per_second": 0.418, "step": 1320 }, { "epoch": 0.1, "eval_loss": 2.1327874660491943, "eval_runtime": 95.3348, "eval_samples_per_second": 3.283, "eval_steps_per_second": 0.42, "step": 1340 }, { "epoch": 0.1, "eval_loss": 2.1295926570892334, "eval_runtime": 94.6735, "eval_samples_per_second": 3.306, "eval_steps_per_second": 0.423, "step": 1360 }, { "epoch": 0.11, "eval_loss": 2.1335363388061523, "eval_runtime": 94.2932, "eval_samples_per_second": 3.319, "eval_steps_per_second": 0.424, "step": 1380 }, { "epoch": 0.11, "eval_loss": 2.126971960067749, "eval_runtime": 92.6523, "eval_samples_per_second": 3.378, "eval_steps_per_second": 0.432, "step": 1400 }, { "epoch": 0.11, "eval_loss": 2.1153903007507324, "eval_runtime": 96.6374, "eval_samples_per_second": 3.239, "eval_steps_per_second": 0.414, "step": 1420 }, { "epoch": 0.11, "eval_loss": 2.1179113388061523, "eval_runtime": 96.4386, "eval_samples_per_second": 3.246, "eval_steps_per_second": 0.415, "step": 1440 }, { "epoch": 0.11, "eval_loss": 2.1170127391815186, "eval_runtime": 94.461, "eval_samples_per_second": 3.314, "eval_steps_per_second": 0.423, "step": 1460 }, { "epoch": 0.11, "eval_loss": 2.1248252391815186, "eval_runtime": 95.6005, "eval_samples_per_second": 3.274, "eval_steps_per_second": 0.418, "step": 1480 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 1.9893, "step": 1500 }, { "epoch": 0.11, "eval_loss": 2.117586851119995, "eval_runtime": 95.6648, "eval_samples_per_second": 3.272, "eval_steps_per_second": 0.418, "step": 1500 }, { "epoch": 0.12, "eval_loss": 2.1058804988861084, "eval_runtime": 95.0515, "eval_samples_per_second": 3.293, "eval_steps_per_second": 0.421, "step": 1520 }, { "epoch": 0.12, "eval_loss": 2.1127195358276367, "eval_runtime": 95.0085, "eval_samples_per_second": 3.294, "eval_steps_per_second": 0.421, "step": 1540 }, { "epoch": 0.12, "eval_loss": 2.115964412689209, "eval_runtime": 36.4716, "eval_samples_per_second": 8.582, "eval_steps_per_second": 0.384, "step": 1560 }, { "epoch": 0.12, "eval_loss": 2.1093251705169678, "eval_runtime": 37.1698, "eval_samples_per_second": 8.421, "eval_steps_per_second": 0.377, "step": 1580 }, { "epoch": 0.12, "eval_loss": 2.1045827865600586, "eval_runtime": 36.7758, "eval_samples_per_second": 8.511, "eval_steps_per_second": 0.381, "step": 1600 }, { "epoch": 0.12, "eval_loss": 2.1027355194091797, "eval_runtime": 35.9986, "eval_samples_per_second": 8.695, "eval_steps_per_second": 0.389, "step": 1620 }, { "epoch": 0.13, "eval_loss": 2.1164636611938477, "eval_runtime": 36.3352, "eval_samples_per_second": 8.614, "eval_steps_per_second": 0.385, "step": 1640 }, { "epoch": 0.13, "eval_loss": 2.105306625366211, "eval_runtime": 36.3036, "eval_samples_per_second": 8.622, "eval_steps_per_second": 0.386, "step": 1660 }, { "epoch": 0.13, "eval_loss": 2.1107728481292725, "eval_runtime": 35.8293, "eval_samples_per_second": 8.736, "eval_steps_per_second": 0.391, "step": 1680 }, { "epoch": 0.13, "eval_loss": 2.1059305667877197, "eval_runtime": 38.1951, "eval_samples_per_second": 8.195, "eval_steps_per_second": 0.367, "step": 1700 }, { "epoch": 0.13, "eval_loss": 2.109574794769287, "eval_runtime": 36.6746, "eval_samples_per_second": 8.535, "eval_steps_per_second": 0.382, "step": 1720 }, { "epoch": 0.13, "eval_loss": 2.1008386611938477, "eval_runtime": 38.8587, "eval_samples_per_second": 8.055, "eval_steps_per_second": 0.36, "step": 1740 }, { "epoch": 0.13, "eval_loss": 2.1023361682891846, "eval_runtime": 35.8353, "eval_samples_per_second": 8.734, "eval_steps_per_second": 0.391, "step": 1760 }, { "epoch": 0.14, "eval_loss": 2.1008386611938477, "eval_runtime": 36.6886, "eval_samples_per_second": 8.531, "eval_steps_per_second": 0.382, "step": 1780 }, { "epoch": 0.14, "eval_loss": 2.093350648880005, "eval_runtime": 38.2167, "eval_samples_per_second": 8.19, "eval_steps_per_second": 0.366, "step": 1800 }, { "epoch": 0.14, "eval_loss": 2.1041831970214844, "eval_runtime": 36.3105, "eval_samples_per_second": 8.62, "eval_steps_per_second": 0.386, "step": 1820 }, { "epoch": 0.14, "eval_loss": 2.0942492485046387, "eval_runtime": 37.4668, "eval_samples_per_second": 8.354, "eval_steps_per_second": 0.374, "step": 1840 }, { "epoch": 0.14, "eval_loss": 2.0858376026153564, "eval_runtime": 36.0576, "eval_samples_per_second": 8.681, "eval_steps_per_second": 0.388, "step": 1860 }, { "epoch": 0.14, "eval_loss": 2.1020865440368652, "eval_runtime": 37.7141, "eval_samples_per_second": 8.299, "eval_steps_per_second": 0.371, "step": 1880 }, { "epoch": 0.14, "eval_loss": 2.1026856899261475, "eval_runtime": 35.4823, "eval_samples_per_second": 8.821, "eval_steps_per_second": 0.395, "step": 1900 }, { "epoch": 0.15, "eval_loss": 2.0936501026153564, "eval_runtime": 37.4147, "eval_samples_per_second": 8.366, "eval_steps_per_second": 0.374, "step": 1920 }, { "epoch": 0.15, "eval_loss": 2.0930511951446533, "eval_runtime": 37.4908, "eval_samples_per_second": 8.349, "eval_steps_per_second": 0.373, "step": 1940 }, { "epoch": 0.15, "eval_loss": 2.0927765369415283, "eval_runtime": 35.6866, "eval_samples_per_second": 8.771, "eval_steps_per_second": 0.392, "step": 1960 }, { "epoch": 0.15, "eval_loss": 2.0972445011138916, "eval_runtime": 36.708, "eval_samples_per_second": 8.527, "eval_steps_per_second": 0.381, "step": 1980 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 1.9023, "step": 2000 }, { "epoch": 0.15, "eval_loss": 2.0981428623199463, "eval_runtime": 37.7854, "eval_samples_per_second": 8.284, "eval_steps_per_second": 0.371, "step": 2000 }, { "epoch": 0.15, "eval_loss": 2.0930511951446533, "eval_runtime": 35.9143, "eval_samples_per_second": 8.715, "eval_steps_per_second": 0.39, "step": 2020 }, { "epoch": 0.16, "eval_loss": 2.0959465503692627, "eval_runtime": 36.7602, "eval_samples_per_second": 8.515, "eval_steps_per_second": 0.381, "step": 2040 }, { "epoch": 0.16, "eval_loss": 2.094498872756958, "eval_runtime": 35.6393, "eval_samples_per_second": 8.782, "eval_steps_per_second": 0.393, "step": 2060 }, { "epoch": 0.16, "eval_loss": 2.10168719291687, "eval_runtime": 36.6139, "eval_samples_per_second": 8.549, "eval_steps_per_second": 0.382, "step": 2080 }, { "epoch": 0.16, "eval_loss": 2.089132308959961, "eval_runtime": 37.1222, "eval_samples_per_second": 8.432, "eval_steps_per_second": 0.377, "step": 2100 }, { "epoch": 0.16, "eval_loss": 2.0930261611938477, "eval_runtime": 36.6415, "eval_samples_per_second": 8.542, "eval_steps_per_second": 0.382, "step": 2120 }, { "epoch": 0.16, "eval_loss": 2.0837409496307373, "eval_runtime": 35.3492, "eval_samples_per_second": 8.855, "eval_steps_per_second": 0.396, "step": 2140 }, { "epoch": 0.16, "eval_loss": 2.0924770832061768, "eval_runtime": 35.8577, "eval_samples_per_second": 8.729, "eval_steps_per_second": 0.39, "step": 2160 }, { "epoch": 0.17, "eval_loss": 2.0876598358154297, "eval_runtime": 36.7333, "eval_samples_per_second": 8.521, "eval_steps_per_second": 0.381, "step": 2180 }, { "epoch": 0.17, "eval_loss": 2.0903303623199463, "eval_runtime": 36.5753, "eval_samples_per_second": 8.558, "eval_steps_per_second": 0.383, "step": 2200 }, { "epoch": 0.17, "eval_loss": 2.0882089138031006, "eval_runtime": 37.2231, "eval_samples_per_second": 8.409, "eval_steps_per_second": 0.376, "step": 2220 }, { "epoch": 0.17, "eval_loss": 2.0914785861968994, "eval_runtime": 35.3728, "eval_samples_per_second": 8.849, "eval_steps_per_second": 0.396, "step": 2240 }, { "epoch": 0.17, "eval_loss": 2.092726707458496, "eval_runtime": 40.8988, "eval_samples_per_second": 7.653, "eval_steps_per_second": 0.342, "step": 2260 }, { "epoch": 0.17, "eval_loss": 2.092102527618408, "eval_runtime": 38.6861, "eval_samples_per_second": 8.091, "eval_steps_per_second": 0.362, "step": 2280 }, { "epoch": 0.18, "eval_loss": 2.0902554988861084, "eval_runtime": 37.1228, "eval_samples_per_second": 8.431, "eval_steps_per_second": 0.377, "step": 2300 }, { "epoch": 0.18, "eval_loss": 2.1011133193969727, "eval_runtime": 38.2059, "eval_samples_per_second": 8.192, "eval_steps_per_second": 0.366, "step": 2320 }, { "epoch": 0.18, "eval_loss": 2.0915534496307373, "eval_runtime": 37.7371, "eval_samples_per_second": 8.294, "eval_steps_per_second": 0.371, "step": 2340 }, { "epoch": 0.18, "eval_loss": 2.084639549255371, "eval_runtime": 37.4914, "eval_samples_per_second": 8.349, "eval_steps_per_second": 0.373, "step": 2360 }, { "epoch": 0.18, "eval_loss": 2.0891075134277344, "eval_runtime": 37.0809, "eval_samples_per_second": 8.441, "eval_steps_per_second": 0.378, "step": 2380 }, { "epoch": 0.18, "eval_loss": 2.080421209335327, "eval_runtime": 38.2834, "eval_samples_per_second": 8.176, "eval_steps_per_second": 0.366, "step": 2400 }, { "epoch": 0.18, "eval_loss": 2.0774011611938477, "eval_runtime": 36.0821, "eval_samples_per_second": 8.675, "eval_steps_per_second": 0.388, "step": 2420 }, { "epoch": 0.19, "eval_loss": 2.074655532836914, "eval_runtime": 38.4174, "eval_samples_per_second": 8.147, "eval_steps_per_second": 0.364, "step": 2440 }, { "epoch": 0.19, "eval_loss": 2.0918281078338623, "eval_runtime": 37.5135, "eval_samples_per_second": 8.344, "eval_steps_per_second": 0.373, "step": 2460 }, { "epoch": 0.19, "eval_loss": 2.0866613388061523, "eval_runtime": 37.8846, "eval_samples_per_second": 8.262, "eval_steps_per_second": 0.37, "step": 2480 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 1.8656, "step": 2500 }, { "epoch": 0.19, "eval_loss": 2.0818939208984375, "eval_runtime": 37.1254, "eval_samples_per_second": 8.431, "eval_steps_per_second": 0.377, "step": 2500 }, { "epoch": 0.19, "eval_loss": 2.082193374633789, "eval_runtime": 37.2165, "eval_samples_per_second": 8.41, "eval_steps_per_second": 0.376, "step": 2520 }, { "epoch": 0.19, "eval_loss": 2.078274726867676, "eval_runtime": 38.1535, "eval_samples_per_second": 8.204, "eval_steps_per_second": 0.367, "step": 2540 }, { "epoch": 0.2, "eval_loss": 2.0924270153045654, "eval_runtime": 37.0529, "eval_samples_per_second": 8.447, "eval_steps_per_second": 0.378, "step": 2560 }, { "epoch": 0.2, "eval_loss": 2.0776758193969727, "eval_runtime": 38.2095, "eval_samples_per_second": 8.192, "eval_steps_per_second": 0.366, "step": 2580 }, { "epoch": 0.2, "eval_loss": 2.074331045150757, "eval_runtime": 38.0087, "eval_samples_per_second": 8.235, "eval_steps_per_second": 0.368, "step": 2600 }, { "epoch": 0.2, "eval_loss": 2.0753045082092285, "eval_runtime": 35.62, "eval_samples_per_second": 8.787, "eval_steps_per_second": 0.393, "step": 2620 }, { "epoch": 0.2, "eval_loss": 2.0662689208984375, "eval_runtime": 42.1091, "eval_samples_per_second": 7.433, "eval_steps_per_second": 0.332, "step": 2640 }, { "epoch": 0.2, "eval_loss": 2.066293954849243, "eval_runtime": 39.286, "eval_samples_per_second": 7.967, "eval_steps_per_second": 0.356, "step": 2660 }, { "epoch": 0.2, "eval_loss": 2.0750298500061035, "eval_runtime": 37.7908, "eval_samples_per_second": 8.282, "eval_steps_per_second": 0.37, "step": 2680 }, { "epoch": 0.21, "eval_loss": 2.072883367538452, "eval_runtime": 36.9744, "eval_samples_per_second": 8.465, "eval_steps_per_second": 0.379, "step": 2700 }, { "epoch": 0.21, "eval_loss": 2.0656700134277344, "eval_runtime": 38.6743, "eval_samples_per_second": 8.093, "eval_steps_per_second": 0.362, "step": 2720 }, { "epoch": 0.21, "eval_loss": 2.061077356338501, "eval_runtime": 37.2607, "eval_samples_per_second": 8.4, "eval_steps_per_second": 0.376, "step": 2740 }, { "epoch": 0.21, "eval_loss": 2.0596296787261963, "eval_runtime": 38.4938, "eval_samples_per_second": 8.131, "eval_steps_per_second": 0.364, "step": 2760 }, { "epoch": 0.21, "eval_loss": 2.0695137977600098, "eval_runtime": 38.4555, "eval_samples_per_second": 8.139, "eval_steps_per_second": 0.364, "step": 2780 }, { "epoch": 0.21, "eval_loss": 2.0653703212738037, "eval_runtime": 40.8818, "eval_samples_per_second": 7.656, "eval_steps_per_second": 0.342, "step": 2800 }, { "epoch": 0.22, "eval_loss": 2.0632736682891846, "eval_runtime": 37.3448, "eval_samples_per_second": 8.381, "eval_steps_per_second": 0.375, "step": 2820 }, { "epoch": 0.22, "eval_loss": 2.068690061569214, "eval_runtime": 38.9945, "eval_samples_per_second": 8.027, "eval_steps_per_second": 0.359, "step": 2840 }, { "epoch": 0.22, "eval_loss": 2.0744807720184326, "eval_runtime": 37.3809, "eval_samples_per_second": 8.373, "eval_steps_per_second": 0.375, "step": 2860 }, { "epoch": 0.22, "eval_loss": 2.068140983581543, "eval_runtime": 37.0455, "eval_samples_per_second": 8.449, "eval_steps_per_second": 0.378, "step": 2880 }, { "epoch": 0.22, "eval_loss": 2.0711112022399902, "eval_runtime": 36.5798, "eval_samples_per_second": 8.557, "eval_steps_per_second": 0.383, "step": 2900 }, { "epoch": 0.22, "eval_loss": 2.0659945011138916, "eval_runtime": 37.3716, "eval_samples_per_second": 8.375, "eval_steps_per_second": 0.375, "step": 2920 }, { "epoch": 0.22, "eval_loss": 2.076228141784668, "eval_runtime": 38.1126, "eval_samples_per_second": 8.213, "eval_steps_per_second": 0.367, "step": 2940 }, { "epoch": 0.23, "eval_loss": 2.072284460067749, "eval_runtime": 37.7328, "eval_samples_per_second": 8.295, "eval_steps_per_second": 0.371, "step": 2960 }, { "epoch": 0.23, "eval_loss": 2.0797972679138184, "eval_runtime": 39.3148, "eval_samples_per_second": 7.961, "eval_steps_per_second": 0.356, "step": 2980 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 1.8034, "step": 3000 }, { "epoch": 0.23, "eval_loss": 2.0818939208984375, "eval_runtime": 37.1291, "eval_samples_per_second": 8.43, "eval_steps_per_second": 0.377, "step": 3000 }, { "epoch": 0.23, "eval_loss": 2.0645217895507812, "eval_runtime": 38.134, "eval_samples_per_second": 8.208, "eval_steps_per_second": 0.367, "step": 3020 }, { "epoch": 0.23, "eval_loss": 2.0636231899261475, "eval_runtime": 38.8835, "eval_samples_per_second": 8.05, "eval_steps_per_second": 0.36, "step": 3040 }, { "epoch": 0.23, "eval_loss": 2.0678415298461914, "eval_runtime": 38.0811, "eval_samples_per_second": 8.219, "eval_steps_per_second": 0.368, "step": 3060 }, { "epoch": 0.23, "eval_loss": 2.0711112022399902, "eval_runtime": 38.2925, "eval_samples_per_second": 8.174, "eval_steps_per_second": 0.366, "step": 3080 }, { "epoch": 0.24, "eval_loss": 2.063648223876953, "eval_runtime": 37.5261, "eval_samples_per_second": 8.341, "eval_steps_per_second": 0.373, "step": 3100 }, { "epoch": 0.24, "eval_loss": 2.0624501705169678, "eval_runtime": 37.6407, "eval_samples_per_second": 8.315, "eval_steps_per_second": 0.372, "step": 3120 }, { "epoch": 0.24, "eval_loss": 2.0669429302215576, "eval_runtime": 36.2761, "eval_samples_per_second": 8.628, "eval_steps_per_second": 0.386, "step": 3140 }, { "epoch": 0.24, "eval_loss": 2.056734323501587, "eval_runtime": 39.277, "eval_samples_per_second": 7.969, "eval_steps_per_second": 0.356, "step": 3160 }, { "epoch": 0.24, "eval_loss": 2.0456268787384033, "eval_runtime": 37.5675, "eval_samples_per_second": 8.332, "eval_steps_per_second": 0.373, "step": 3180 } ], "max_steps": 13108, "num_train_epochs": 1, "total_flos": 46808734629888.0, "trial_name": null, "trial_params": null }