llama3-1_8b_math_500000_samples / trainer_log.jsonl
gsmyrnis's picture
Training in progress, epoch 3
30ebb24 verified
{"current_steps": 10, "total_steps": 1281, "loss": 0.7124, "lr": 5e-06, "epoch": 0.0234192037470726, "percentage": 0.78, "elapsed_time": "0:01:00", "remaining_time": "2:08:43"}
{"current_steps": 20, "total_steps": 1281, "loss": 0.6153, "lr": 5e-06, "epoch": 0.0468384074941452, "percentage": 1.56, "elapsed_time": "0:02:00", "remaining_time": "2:06:47"}
{"current_steps": 30, "total_steps": 1281, "loss": 0.5859, "lr": 5e-06, "epoch": 0.0702576112412178, "percentage": 2.34, "elapsed_time": "0:03:00", "remaining_time": "2:05:10"}
{"current_steps": 40, "total_steps": 1281, "loss": 0.5711, "lr": 5e-06, "epoch": 0.0936768149882904, "percentage": 3.12, "elapsed_time": "0:03:59", "remaining_time": "2:03:52"}
{"current_steps": 50, "total_steps": 1281, "loss": 0.5668, "lr": 5e-06, "epoch": 0.117096018735363, "percentage": 3.9, "elapsed_time": "0:04:58", "remaining_time": "2:02:35"}
{"current_steps": 60, "total_steps": 1281, "loss": 0.559, "lr": 5e-06, "epoch": 0.1405152224824356, "percentage": 4.68, "elapsed_time": "0:05:57", "remaining_time": "2:01:24"}
{"current_steps": 70, "total_steps": 1281, "loss": 0.555, "lr": 5e-06, "epoch": 0.16393442622950818, "percentage": 5.46, "elapsed_time": "0:06:57", "remaining_time": "2:00:19"}
{"current_steps": 80, "total_steps": 1281, "loss": 0.548, "lr": 5e-06, "epoch": 0.1873536299765808, "percentage": 6.25, "elapsed_time": "0:07:56", "remaining_time": "1:59:15"}
{"current_steps": 90, "total_steps": 1281, "loss": 0.5481, "lr": 5e-06, "epoch": 0.2107728337236534, "percentage": 7.03, "elapsed_time": "0:08:55", "remaining_time": "1:58:11"}
{"current_steps": 100, "total_steps": 1281, "loss": 0.5449, "lr": 5e-06, "epoch": 0.234192037470726, "percentage": 7.81, "elapsed_time": "0:09:55", "remaining_time": "1:57:08"}
{"current_steps": 110, "total_steps": 1281, "loss": 0.541, "lr": 5e-06, "epoch": 0.2576112412177986, "percentage": 8.59, "elapsed_time": "0:10:54", "remaining_time": "1:56:06"}
{"current_steps": 120, "total_steps": 1281, "loss": 0.5397, "lr": 5e-06, "epoch": 0.2810304449648712, "percentage": 9.37, "elapsed_time": "0:11:53", "remaining_time": "1:55:05"}
{"current_steps": 130, "total_steps": 1281, "loss": 0.5357, "lr": 5e-06, "epoch": 0.3044496487119438, "percentage": 10.15, "elapsed_time": "0:12:53", "remaining_time": "1:54:04"}
{"current_steps": 140, "total_steps": 1281, "loss": 0.535, "lr": 5e-06, "epoch": 0.32786885245901637, "percentage": 10.93, "elapsed_time": "0:13:52", "remaining_time": "1:53:03"}
{"current_steps": 150, "total_steps": 1281, "loss": 0.5373, "lr": 5e-06, "epoch": 0.351288056206089, "percentage": 11.71, "elapsed_time": "0:14:51", "remaining_time": "1:52:02"}
{"current_steps": 160, "total_steps": 1281, "loss": 0.5346, "lr": 5e-06, "epoch": 0.3747072599531616, "percentage": 12.49, "elapsed_time": "0:15:50", "remaining_time": "1:51:02"}
{"current_steps": 170, "total_steps": 1281, "loss": 0.5312, "lr": 5e-06, "epoch": 0.3981264637002342, "percentage": 13.27, "elapsed_time": "0:16:50", "remaining_time": "1:50:01"}
{"current_steps": 180, "total_steps": 1281, "loss": 0.5332, "lr": 5e-06, "epoch": 0.4215456674473068, "percentage": 14.05, "elapsed_time": "0:17:49", "remaining_time": "1:49:01"}
{"current_steps": 190, "total_steps": 1281, "loss": 0.5368, "lr": 5e-06, "epoch": 0.4449648711943794, "percentage": 14.83, "elapsed_time": "0:18:48", "remaining_time": "1:48:01"}
{"current_steps": 200, "total_steps": 1281, "loss": 0.5348, "lr": 5e-06, "epoch": 0.468384074941452, "percentage": 15.61, "elapsed_time": "0:19:48", "remaining_time": "1:47:01"}
{"current_steps": 210, "total_steps": 1281, "loss": 0.5298, "lr": 5e-06, "epoch": 0.4918032786885246, "percentage": 16.39, "elapsed_time": "0:20:47", "remaining_time": "1:46:01"}
{"current_steps": 220, "total_steps": 1281, "loss": 0.527, "lr": 5e-06, "epoch": 0.5152224824355972, "percentage": 17.17, "elapsed_time": "0:21:46", "remaining_time": "1:45:00"}
{"current_steps": 230, "total_steps": 1281, "loss": 0.5295, "lr": 5e-06, "epoch": 0.5386416861826698, "percentage": 17.95, "elapsed_time": "0:22:45", "remaining_time": "1:44:01"}
{"current_steps": 240, "total_steps": 1281, "loss": 0.5264, "lr": 5e-06, "epoch": 0.5620608899297423, "percentage": 18.74, "elapsed_time": "0:23:45", "remaining_time": "1:43:01"}
{"current_steps": 250, "total_steps": 1281, "loss": 0.5292, "lr": 5e-06, "epoch": 0.585480093676815, "percentage": 19.52, "elapsed_time": "0:24:44", "remaining_time": "1:42:02"}
{"current_steps": 260, "total_steps": 1281, "loss": 0.5253, "lr": 5e-06, "epoch": 0.6088992974238876, "percentage": 20.3, "elapsed_time": "0:25:43", "remaining_time": "1:41:02"}
{"current_steps": 270, "total_steps": 1281, "loss": 0.5238, "lr": 5e-06, "epoch": 0.6323185011709602, "percentage": 21.08, "elapsed_time": "0:26:43", "remaining_time": "1:40:02"}
{"current_steps": 280, "total_steps": 1281, "loss": 0.522, "lr": 5e-06, "epoch": 0.6557377049180327, "percentage": 21.86, "elapsed_time": "0:27:42", "remaining_time": "1:39:02"}
{"current_steps": 290, "total_steps": 1281, "loss": 0.5243, "lr": 5e-06, "epoch": 0.6791569086651054, "percentage": 22.64, "elapsed_time": "0:28:41", "remaining_time": "1:38:03"}
{"current_steps": 300, "total_steps": 1281, "loss": 0.5217, "lr": 5e-06, "epoch": 0.702576112412178, "percentage": 23.42, "elapsed_time": "0:29:40", "remaining_time": "1:37:03"}
{"current_steps": 310, "total_steps": 1281, "loss": 0.5226, "lr": 5e-06, "epoch": 0.7259953161592506, "percentage": 24.2, "elapsed_time": "0:30:40", "remaining_time": "1:36:04"}
{"current_steps": 320, "total_steps": 1281, "loss": 0.521, "lr": 5e-06, "epoch": 0.7494145199063232, "percentage": 24.98, "elapsed_time": "0:31:39", "remaining_time": "1:35:04"}
{"current_steps": 330, "total_steps": 1281, "loss": 0.5201, "lr": 5e-06, "epoch": 0.7728337236533958, "percentage": 25.76, "elapsed_time": "0:32:38", "remaining_time": "1:34:04"}
{"current_steps": 340, "total_steps": 1281, "loss": 0.5199, "lr": 5e-06, "epoch": 0.7962529274004684, "percentage": 26.54, "elapsed_time": "0:33:38", "remaining_time": "1:33:05"}
{"current_steps": 350, "total_steps": 1281, "loss": 0.5204, "lr": 5e-06, "epoch": 0.819672131147541, "percentage": 27.32, "elapsed_time": "0:34:37", "remaining_time": "1:32:05"}
{"current_steps": 360, "total_steps": 1281, "loss": 0.5221, "lr": 5e-06, "epoch": 0.8430913348946136, "percentage": 28.1, "elapsed_time": "0:35:36", "remaining_time": "1:31:06"}
{"current_steps": 370, "total_steps": 1281, "loss": 0.519, "lr": 5e-06, "epoch": 0.8665105386416861, "percentage": 28.88, "elapsed_time": "0:36:36", "remaining_time": "1:30:07"}
{"current_steps": 380, "total_steps": 1281, "loss": 0.5193, "lr": 5e-06, "epoch": 0.8899297423887588, "percentage": 29.66, "elapsed_time": "0:37:35", "remaining_time": "1:29:07"}
{"current_steps": 390, "total_steps": 1281, "loss": 0.5178, "lr": 5e-06, "epoch": 0.9133489461358314, "percentage": 30.44, "elapsed_time": "0:38:34", "remaining_time": "1:28:08"}
{"current_steps": 400, "total_steps": 1281, "loss": 0.5201, "lr": 5e-06, "epoch": 0.936768149882904, "percentage": 31.23, "elapsed_time": "0:39:34", "remaining_time": "1:27:09"}
{"current_steps": 410, "total_steps": 1281, "loss": 0.5146, "lr": 5e-06, "epoch": 0.9601873536299765, "percentage": 32.01, "elapsed_time": "0:40:33", "remaining_time": "1:26:09"}
{"current_steps": 420, "total_steps": 1281, "loss": 0.514, "lr": 5e-06, "epoch": 0.9836065573770492, "percentage": 32.79, "elapsed_time": "0:41:32", "remaining_time": "1:25:10"}
{"current_steps": 427, "total_steps": 1281, "eval_loss": 0.5167434215545654, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:42:55", "remaining_time": "1:25:51"}
{"current_steps": 430, "total_steps": 1281, "loss": 0.51, "lr": 5e-06, "epoch": 1.0070257611241218, "percentage": 33.57, "elapsed_time": "0:44:38", "remaining_time": "1:28:21"}
{"current_steps": 440, "total_steps": 1281, "loss": 0.4977, "lr": 5e-06, "epoch": 1.0304449648711944, "percentage": 34.35, "elapsed_time": "0:45:38", "remaining_time": "1:27:15"}
{"current_steps": 450, "total_steps": 1281, "loss": 0.4974, "lr": 5e-06, "epoch": 1.053864168618267, "percentage": 35.13, "elapsed_time": "0:46:38", "remaining_time": "1:26:07"}
{"current_steps": 460, "total_steps": 1281, "loss": 0.4947, "lr": 5e-06, "epoch": 1.0772833723653397, "percentage": 35.91, "elapsed_time": "0:47:37", "remaining_time": "1:25:00"}
{"current_steps": 470, "total_steps": 1281, "loss": 0.4909, "lr": 5e-06, "epoch": 1.100702576112412, "percentage": 36.69, "elapsed_time": "0:48:37", "remaining_time": "1:23:53"}
{"current_steps": 480, "total_steps": 1281, "loss": 0.4932, "lr": 5e-06, "epoch": 1.1241217798594847, "percentage": 37.47, "elapsed_time": "0:49:36", "remaining_time": "1:22:47"}
{"current_steps": 490, "total_steps": 1281, "loss": 0.4894, "lr": 5e-06, "epoch": 1.1475409836065573, "percentage": 38.25, "elapsed_time": "0:50:36", "remaining_time": "1:21:41"}
{"current_steps": 500, "total_steps": 1281, "loss": 0.4943, "lr": 5e-06, "epoch": 1.17096018735363, "percentage": 39.03, "elapsed_time": "0:51:35", "remaining_time": "1:20:35"}
{"current_steps": 510, "total_steps": 1281, "loss": 0.497, "lr": 5e-06, "epoch": 1.1943793911007026, "percentage": 39.81, "elapsed_time": "0:52:35", "remaining_time": "1:19:30"}
{"current_steps": 520, "total_steps": 1281, "loss": 0.4916, "lr": 5e-06, "epoch": 1.2177985948477752, "percentage": 40.59, "elapsed_time": "0:53:34", "remaining_time": "1:18:24"}
{"current_steps": 530, "total_steps": 1281, "loss": 0.4904, "lr": 5e-06, "epoch": 1.2412177985948478, "percentage": 41.37, "elapsed_time": "0:54:34", "remaining_time": "1:17:19"}
{"current_steps": 540, "total_steps": 1281, "loss": 0.4941, "lr": 5e-06, "epoch": 1.2646370023419204, "percentage": 42.15, "elapsed_time": "0:55:33", "remaining_time": "1:16:14"}
{"current_steps": 550, "total_steps": 1281, "loss": 0.4924, "lr": 5e-06, "epoch": 1.288056206088993, "percentage": 42.94, "elapsed_time": "0:56:33", "remaining_time": "1:15:10"}
{"current_steps": 560, "total_steps": 1281, "loss": 0.4925, "lr": 5e-06, "epoch": 1.3114754098360657, "percentage": 43.72, "elapsed_time": "0:57:33", "remaining_time": "1:14:06"}
{"current_steps": 570, "total_steps": 1281, "loss": 0.4943, "lr": 5e-06, "epoch": 1.334894613583138, "percentage": 44.5, "elapsed_time": "0:58:32", "remaining_time": "1:13:01"}
{"current_steps": 580, "total_steps": 1281, "loss": 0.4935, "lr": 5e-06, "epoch": 1.3583138173302107, "percentage": 45.28, "elapsed_time": "0:59:32", "remaining_time": "1:11:57"}
{"current_steps": 590, "total_steps": 1281, "loss": 0.4956, "lr": 5e-06, "epoch": 1.3817330210772834, "percentage": 46.06, "elapsed_time": "1:00:31", "remaining_time": "1:10:53"}
{"current_steps": 600, "total_steps": 1281, "loss": 0.4883, "lr": 5e-06, "epoch": 1.405152224824356, "percentage": 46.84, "elapsed_time": "1:01:30", "remaining_time": "1:09:48"}
{"current_steps": 610, "total_steps": 1281, "loss": 0.496, "lr": 5e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "1:02:30", "remaining_time": "1:08:45"}
{"current_steps": 620, "total_steps": 1281, "loss": 0.4953, "lr": 5e-06, "epoch": 1.4519906323185012, "percentage": 48.4, "elapsed_time": "1:03:32", "remaining_time": "1:07:44"}
{"current_steps": 630, "total_steps": 1281, "loss": 0.4951, "lr": 5e-06, "epoch": 1.4754098360655736, "percentage": 49.18, "elapsed_time": "1:04:32", "remaining_time": "1:06:41"}
{"current_steps": 640, "total_steps": 1281, "loss": 0.4929, "lr": 5e-06, "epoch": 1.4988290398126463, "percentage": 49.96, "elapsed_time": "1:05:32", "remaining_time": "1:05:38"}
{"current_steps": 650, "total_steps": 1281, "loss": 0.4896, "lr": 5e-06, "epoch": 1.5222482435597189, "percentage": 50.74, "elapsed_time": "1:06:31", "remaining_time": "1:04:34"}
{"current_steps": 660, "total_steps": 1281, "loss": 0.4928, "lr": 5e-06, "epoch": 1.5456674473067915, "percentage": 51.52, "elapsed_time": "1:07:31", "remaining_time": "1:03:31"}
{"current_steps": 670, "total_steps": 1281, "loss": 0.488, "lr": 5e-06, "epoch": 1.5690866510538641, "percentage": 52.3, "elapsed_time": "1:08:30", "remaining_time": "1:02:28"}
{"current_steps": 680, "total_steps": 1281, "loss": 0.4949, "lr": 5e-06, "epoch": 1.5925058548009368, "percentage": 53.08, "elapsed_time": "1:09:30", "remaining_time": "1:01:25"}
{"current_steps": 690, "total_steps": 1281, "loss": 0.4883, "lr": 5e-06, "epoch": 1.6159250585480094, "percentage": 53.86, "elapsed_time": "1:10:29", "remaining_time": "1:00:22"}
{"current_steps": 700, "total_steps": 1281, "loss": 0.4936, "lr": 5e-06, "epoch": 1.639344262295082, "percentage": 54.64, "elapsed_time": "1:11:29", "remaining_time": "0:59:20"}
{"current_steps": 710, "total_steps": 1281, "loss": 0.4938, "lr": 5e-06, "epoch": 1.6627634660421546, "percentage": 55.43, "elapsed_time": "1:12:28", "remaining_time": "0:58:17"}
{"current_steps": 720, "total_steps": 1281, "loss": 0.4967, "lr": 5e-06, "epoch": 1.6861826697892273, "percentage": 56.21, "elapsed_time": "1:13:28", "remaining_time": "0:57:14"}
{"current_steps": 730, "total_steps": 1281, "loss": 0.4891, "lr": 5e-06, "epoch": 1.7096018735362999, "percentage": 56.99, "elapsed_time": "1:14:27", "remaining_time": "0:56:11"}
{"current_steps": 740, "total_steps": 1281, "loss": 0.491, "lr": 5e-06, "epoch": 1.7330210772833725, "percentage": 57.77, "elapsed_time": "1:15:26", "remaining_time": "0:55:09"}
{"current_steps": 750, "total_steps": 1281, "loss": 0.4949, "lr": 5e-06, "epoch": 1.756440281030445, "percentage": 58.55, "elapsed_time": "1:16:25", "remaining_time": "0:54:06"}
{"current_steps": 760, "total_steps": 1281, "loss": 0.4864, "lr": 5e-06, "epoch": 1.7798594847775175, "percentage": 59.33, "elapsed_time": "1:17:25", "remaining_time": "0:53:04"}
{"current_steps": 770, "total_steps": 1281, "loss": 0.4877, "lr": 5e-06, "epoch": 1.8032786885245902, "percentage": 60.11, "elapsed_time": "1:18:24", "remaining_time": "0:52:01"}
{"current_steps": 780, "total_steps": 1281, "loss": 0.491, "lr": 5e-06, "epoch": 1.8266978922716628, "percentage": 60.89, "elapsed_time": "1:19:23", "remaining_time": "0:50:59"}
{"current_steps": 790, "total_steps": 1281, "loss": 0.4919, "lr": 5e-06, "epoch": 1.8501170960187352, "percentage": 61.67, "elapsed_time": "1:20:23", "remaining_time": "0:49:57"}
{"current_steps": 800, "total_steps": 1281, "loss": 0.4875, "lr": 5e-06, "epoch": 1.8735362997658078, "percentage": 62.45, "elapsed_time": "1:21:22", "remaining_time": "0:48:55"}
{"current_steps": 810, "total_steps": 1281, "loss": 0.4936, "lr": 5e-06, "epoch": 1.8969555035128804, "percentage": 63.23, "elapsed_time": "1:22:21", "remaining_time": "0:47:53"}
{"current_steps": 820, "total_steps": 1281, "loss": 0.4888, "lr": 5e-06, "epoch": 1.920374707259953, "percentage": 64.01, "elapsed_time": "1:23:21", "remaining_time": "0:46:51"}
{"current_steps": 830, "total_steps": 1281, "loss": 0.4893, "lr": 5e-06, "epoch": 1.9437939110070257, "percentage": 64.79, "elapsed_time": "1:24:20", "remaining_time": "0:45:49"}
{"current_steps": 840, "total_steps": 1281, "loss": 0.4915, "lr": 5e-06, "epoch": 1.9672131147540983, "percentage": 65.57, "elapsed_time": "1:25:19", "remaining_time": "0:44:47"}
{"current_steps": 850, "total_steps": 1281, "loss": 0.4902, "lr": 5e-06, "epoch": 1.990632318501171, "percentage": 66.35, "elapsed_time": "1:26:19", "remaining_time": "0:43:46"}
{"current_steps": 854, "total_steps": 1281, "eval_loss": 0.5077147483825684, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:27:24", "remaining_time": "0:43:42"}
{"current_steps": 860, "total_steps": 1281, "loss": 0.4752, "lr": 5e-06, "epoch": 2.0140515222482436, "percentage": 67.14, "elapsed_time": "1:29:27", "remaining_time": "0:43:47"}
{"current_steps": 870, "total_steps": 1281, "loss": 0.4669, "lr": 5e-06, "epoch": 2.037470725995316, "percentage": 67.92, "elapsed_time": "1:30:27", "remaining_time": "0:42:44"}
{"current_steps": 880, "total_steps": 1281, "loss": 0.4614, "lr": 5e-06, "epoch": 2.060889929742389, "percentage": 68.7, "elapsed_time": "1:31:27", "remaining_time": "0:41:40"}
{"current_steps": 890, "total_steps": 1281, "loss": 0.4659, "lr": 5e-06, "epoch": 2.0843091334894615, "percentage": 69.48, "elapsed_time": "1:32:26", "remaining_time": "0:40:36"}
{"current_steps": 900, "total_steps": 1281, "loss": 0.4593, "lr": 5e-06, "epoch": 2.107728337236534, "percentage": 70.26, "elapsed_time": "1:33:25", "remaining_time": "0:39:33"}
{"current_steps": 910, "total_steps": 1281, "loss": 0.4677, "lr": 5e-06, "epoch": 2.1311475409836067, "percentage": 71.04, "elapsed_time": "1:34:25", "remaining_time": "0:38:29"}
{"current_steps": 920, "total_steps": 1281, "loss": 0.4652, "lr": 5e-06, "epoch": 2.1545667447306793, "percentage": 71.82, "elapsed_time": "1:35:24", "remaining_time": "0:37:26"}
{"current_steps": 930, "total_steps": 1281, "loss": 0.4681, "lr": 5e-06, "epoch": 2.177985948477752, "percentage": 72.6, "elapsed_time": "1:36:23", "remaining_time": "0:36:22"}
{"current_steps": 940, "total_steps": 1281, "loss": 0.4586, "lr": 5e-06, "epoch": 2.201405152224824, "percentage": 73.38, "elapsed_time": "1:37:23", "remaining_time": "0:35:19"}
{"current_steps": 950, "total_steps": 1281, "loss": 0.4669, "lr": 5e-06, "epoch": 2.2248243559718968, "percentage": 74.16, "elapsed_time": "1:38:22", "remaining_time": "0:34:16"}
{"current_steps": 960, "total_steps": 1281, "loss": 0.4665, "lr": 5e-06, "epoch": 2.2482435597189694, "percentage": 74.94, "elapsed_time": "1:39:21", "remaining_time": "0:33:13"}
{"current_steps": 970, "total_steps": 1281, "loss": 0.4665, "lr": 5e-06, "epoch": 2.271662763466042, "percentage": 75.72, "elapsed_time": "1:40:21", "remaining_time": "0:32:10"}
{"current_steps": 980, "total_steps": 1281, "loss": 0.4662, "lr": 5e-06, "epoch": 2.2950819672131146, "percentage": 76.5, "elapsed_time": "1:41:21", "remaining_time": "0:31:07"}
{"current_steps": 990, "total_steps": 1281, "loss": 0.4646, "lr": 5e-06, "epoch": 2.3185011709601873, "percentage": 77.28, "elapsed_time": "1:42:22", "remaining_time": "0:30:05"}
{"current_steps": 1000, "total_steps": 1281, "loss": 0.4647, "lr": 5e-06, "epoch": 2.34192037470726, "percentage": 78.06, "elapsed_time": "1:43:22", "remaining_time": "0:29:02"}
{"current_steps": 1010, "total_steps": 1281, "loss": 0.4659, "lr": 5e-06, "epoch": 2.3653395784543325, "percentage": 78.84, "elapsed_time": "1:44:21", "remaining_time": "0:28:00"}
{"current_steps": 1020, "total_steps": 1281, "loss": 0.4663, "lr": 5e-06, "epoch": 2.388758782201405, "percentage": 79.63, "elapsed_time": "1:45:21", "remaining_time": "0:26:57"}
{"current_steps": 1030, "total_steps": 1281, "loss": 0.4676, "lr": 5e-06, "epoch": 2.4121779859484778, "percentage": 80.41, "elapsed_time": "1:46:20", "remaining_time": "0:25:54"}
{"current_steps": 1040, "total_steps": 1281, "loss": 0.4673, "lr": 5e-06, "epoch": 2.4355971896955504, "percentage": 81.19, "elapsed_time": "1:47:20", "remaining_time": "0:24:52"}
{"current_steps": 1050, "total_steps": 1281, "loss": 0.4654, "lr": 5e-06, "epoch": 2.459016393442623, "percentage": 81.97, "elapsed_time": "1:48:19", "remaining_time": "0:23:49"}
{"current_steps": 1060, "total_steps": 1281, "loss": 0.469, "lr": 5e-06, "epoch": 2.4824355971896956, "percentage": 82.75, "elapsed_time": "1:49:18", "remaining_time": "0:22:47"}
{"current_steps": 1070, "total_steps": 1281, "loss": 0.47, "lr": 5e-06, "epoch": 2.5058548009367683, "percentage": 83.53, "elapsed_time": "1:50:18", "remaining_time": "0:21:45"}
{"current_steps": 1080, "total_steps": 1281, "loss": 0.4685, "lr": 5e-06, "epoch": 2.529274004683841, "percentage": 84.31, "elapsed_time": "1:51:17", "remaining_time": "0:20:42"}
{"current_steps": 1090, "total_steps": 1281, "loss": 0.4677, "lr": 5e-06, "epoch": 2.552693208430913, "percentage": 85.09, "elapsed_time": "1:52:17", "remaining_time": "0:19:40"}
{"current_steps": 1100, "total_steps": 1281, "loss": 0.4695, "lr": 5e-06, "epoch": 2.576112412177986, "percentage": 85.87, "elapsed_time": "1:53:17", "remaining_time": "0:18:38"}
{"current_steps": 1110, "total_steps": 1281, "loss": 0.4677, "lr": 5e-06, "epoch": 2.5995316159250583, "percentage": 86.65, "elapsed_time": "1:54:16", "remaining_time": "0:17:36"}
{"current_steps": 1120, "total_steps": 1281, "loss": 0.4696, "lr": 5e-06, "epoch": 2.6229508196721314, "percentage": 87.43, "elapsed_time": "1:55:16", "remaining_time": "0:16:34"}
{"current_steps": 1130, "total_steps": 1281, "loss": 0.4664, "lr": 5e-06, "epoch": 2.6463700234192036, "percentage": 88.21, "elapsed_time": "1:56:15", "remaining_time": "0:15:32"}
{"current_steps": 1140, "total_steps": 1281, "loss": 0.4693, "lr": 5e-06, "epoch": 2.669789227166276, "percentage": 88.99, "elapsed_time": "1:57:14", "remaining_time": "0:14:30"}
{"current_steps": 1150, "total_steps": 1281, "loss": 0.4692, "lr": 5e-06, "epoch": 2.693208430913349, "percentage": 89.77, "elapsed_time": "1:58:14", "remaining_time": "0:13:28"}
{"current_steps": 1160, "total_steps": 1281, "loss": 0.4704, "lr": 5e-06, "epoch": 2.7166276346604215, "percentage": 90.55, "elapsed_time": "1:59:13", "remaining_time": "0:12:26"}
{"current_steps": 1170, "total_steps": 1281, "loss": 0.4648, "lr": 5e-06, "epoch": 2.740046838407494, "percentage": 91.33, "elapsed_time": "2:00:13", "remaining_time": "0:11:24"}
{"current_steps": 1180, "total_steps": 1281, "loss": 0.4629, "lr": 5e-06, "epoch": 2.7634660421545667, "percentage": 92.12, "elapsed_time": "2:01:12", "remaining_time": "0:10:22"}
{"current_steps": 1190, "total_steps": 1281, "loss": 0.4704, "lr": 5e-06, "epoch": 2.7868852459016393, "percentage": 92.9, "elapsed_time": "2:02:11", "remaining_time": "0:09:20"}
{"current_steps": 1200, "total_steps": 1281, "loss": 0.4707, "lr": 5e-06, "epoch": 2.810304449648712, "percentage": 93.68, "elapsed_time": "2:03:11", "remaining_time": "0:08:18"}
{"current_steps": 1210, "total_steps": 1281, "loss": 0.4679, "lr": 5e-06, "epoch": 2.8337236533957846, "percentage": 94.46, "elapsed_time": "2:04:10", "remaining_time": "0:07:17"}
{"current_steps": 1220, "total_steps": 1281, "loss": 0.4711, "lr": 5e-06, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "2:05:09", "remaining_time": "0:06:15"}
{"current_steps": 1230, "total_steps": 1281, "loss": 0.4653, "lr": 5e-06, "epoch": 2.88056206088993, "percentage": 96.02, "elapsed_time": "2:06:08", "remaining_time": "0:05:13"}
{"current_steps": 1240, "total_steps": 1281, "loss": 0.4667, "lr": 5e-06, "epoch": 2.9039812646370025, "percentage": 96.8, "elapsed_time": "2:07:08", "remaining_time": "0:04:12"}
{"current_steps": 1250, "total_steps": 1281, "loss": 0.4664, "lr": 5e-06, "epoch": 2.927400468384075, "percentage": 97.58, "elapsed_time": "2:08:08", "remaining_time": "0:03:10"}
{"current_steps": 1260, "total_steps": 1281, "loss": 0.468, "lr": 5e-06, "epoch": 2.9508196721311473, "percentage": 98.36, "elapsed_time": "2:09:09", "remaining_time": "0:02:09"}
{"current_steps": 1270, "total_steps": 1281, "loss": 0.4654, "lr": 5e-06, "epoch": 2.9742388758782203, "percentage": 99.14, "elapsed_time": "2:10:09", "remaining_time": "0:01:07"}
{"current_steps": 1280, "total_steps": 1281, "loss": 0.47, "lr": 5e-06, "epoch": 2.9976580796252925, "percentage": 99.92, "elapsed_time": "2:11:08", "remaining_time": "0:00:06"}
{"current_steps": 1281, "total_steps": 1281, "eval_loss": 0.5069288611412048, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:13:48", "remaining_time": "0:00:00"}
{"current_steps": 1281, "total_steps": 1281, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:15:16", "remaining_time": "0:00:00"}