llama3_8b_baseline_dcft_oh_v3 / trainer_log.jsonl
gsmyrnis's picture
Training in progress, epoch 2
ff25441 verified
raw
history blame
16.8 kB
{"current_steps": 10, "total_steps": 1269, "loss": 0.9144, "learning_rate": 5e-06, "epoch": 0.02364066193853428, "percentage": 0.79, "elapsed_time": "0:01:03", "remaining_time": "2:13:19"}
{"current_steps": 20, "total_steps": 1269, "loss": 0.7992, "learning_rate": 5e-06, "epoch": 0.04728132387706856, "percentage": 1.58, "elapsed_time": "0:02:05", "remaining_time": "2:10:55"}
{"current_steps": 30, "total_steps": 1269, "loss": 0.7649, "learning_rate": 5e-06, "epoch": 0.07092198581560284, "percentage": 2.36, "elapsed_time": "0:03:08", "remaining_time": "2:09:40"}
{"current_steps": 40, "total_steps": 1269, "loss": 0.7444, "learning_rate": 5e-06, "epoch": 0.09456264775413711, "percentage": 3.15, "elapsed_time": "0:04:10", "remaining_time": "2:08:29"}
{"current_steps": 50, "total_steps": 1269, "loss": 0.729, "learning_rate": 5e-06, "epoch": 0.1182033096926714, "percentage": 3.94, "elapsed_time": "0:05:13", "remaining_time": "2:07:23"}
{"current_steps": 60, "total_steps": 1269, "loss": 0.724, "learning_rate": 5e-06, "epoch": 0.14184397163120568, "percentage": 4.73, "elapsed_time": "0:06:15", "remaining_time": "2:06:11"}
{"current_steps": 70, "total_steps": 1269, "loss": 0.7111, "learning_rate": 5e-06, "epoch": 0.16548463356973994, "percentage": 5.52, "elapsed_time": "0:07:18", "remaining_time": "2:05:07"}
{"current_steps": 80, "total_steps": 1269, "loss": 0.7019, "learning_rate": 5e-06, "epoch": 0.18912529550827423, "percentage": 6.3, "elapsed_time": "0:08:20", "remaining_time": "2:03:58"}
{"current_steps": 90, "total_steps": 1269, "loss": 0.693, "learning_rate": 5e-06, "epoch": 0.2127659574468085, "percentage": 7.09, "elapsed_time": "0:09:22", "remaining_time": "2:02:54"}
{"current_steps": 100, "total_steps": 1269, "loss": 0.6993, "learning_rate": 5e-06, "epoch": 0.2364066193853428, "percentage": 7.88, "elapsed_time": "0:10:25", "remaining_time": "2:01:48"}
{"current_steps": 110, "total_steps": 1269, "loss": 0.6786, "learning_rate": 5e-06, "epoch": 0.26004728132387706, "percentage": 8.67, "elapsed_time": "0:11:27", "remaining_time": "2:00:46"}
{"current_steps": 120, "total_steps": 1269, "loss": 0.6855, "learning_rate": 5e-06, "epoch": 0.28368794326241137, "percentage": 9.46, "elapsed_time": "0:12:30", "remaining_time": "1:59:41"}
{"current_steps": 130, "total_steps": 1269, "loss": 0.6789, "learning_rate": 5e-06, "epoch": 0.3073286052009456, "percentage": 10.24, "elapsed_time": "0:13:32", "remaining_time": "1:58:41"}
{"current_steps": 140, "total_steps": 1269, "loss": 0.6813, "learning_rate": 5e-06, "epoch": 0.3309692671394799, "percentage": 11.03, "elapsed_time": "0:14:35", "remaining_time": "1:57:36"}
{"current_steps": 150, "total_steps": 1269, "loss": 0.6849, "learning_rate": 5e-06, "epoch": 0.3546099290780142, "percentage": 11.82, "elapsed_time": "0:15:37", "remaining_time": "1:56:33"}
{"current_steps": 160, "total_steps": 1269, "loss": 0.6774, "learning_rate": 5e-06, "epoch": 0.37825059101654845, "percentage": 12.61, "elapsed_time": "0:16:39", "remaining_time": "1:55:30"}
{"current_steps": 170, "total_steps": 1269, "loss": 0.6741, "learning_rate": 5e-06, "epoch": 0.40189125295508277, "percentage": 13.4, "elapsed_time": "0:17:42", "remaining_time": "1:54:28"}
{"current_steps": 180, "total_steps": 1269, "loss": 0.6721, "learning_rate": 5e-06, "epoch": 0.425531914893617, "percentage": 14.18, "elapsed_time": "0:18:44", "remaining_time": "1:53:24"}
{"current_steps": 190, "total_steps": 1269, "loss": 0.6753, "learning_rate": 5e-06, "epoch": 0.4491725768321513, "percentage": 14.97, "elapsed_time": "0:19:47", "remaining_time": "1:52:21"}
{"current_steps": 200, "total_steps": 1269, "loss": 0.6663, "learning_rate": 5e-06, "epoch": 0.4728132387706856, "percentage": 15.76, "elapsed_time": "0:20:49", "remaining_time": "1:51:18"}
{"current_steps": 210, "total_steps": 1269, "loss": 0.6656, "learning_rate": 5e-06, "epoch": 0.49645390070921985, "percentage": 16.55, "elapsed_time": "0:21:51", "remaining_time": "1:50:16"}
{"current_steps": 220, "total_steps": 1269, "loss": 0.6667, "learning_rate": 5e-06, "epoch": 0.5200945626477541, "percentage": 17.34, "elapsed_time": "0:22:54", "remaining_time": "1:49:15"}
{"current_steps": 230, "total_steps": 1269, "loss": 0.6663, "learning_rate": 5e-06, "epoch": 0.5437352245862884, "percentage": 18.12, "elapsed_time": "0:23:57", "remaining_time": "1:48:11"}
{"current_steps": 240, "total_steps": 1269, "loss": 0.6584, "learning_rate": 5e-06, "epoch": 0.5673758865248227, "percentage": 18.91, "elapsed_time": "0:24:59", "remaining_time": "1:47:09"}
{"current_steps": 250, "total_steps": 1269, "loss": 0.6668, "learning_rate": 5e-06, "epoch": 0.5910165484633569, "percentage": 19.7, "elapsed_time": "0:26:02", "remaining_time": "1:46:09"}
{"current_steps": 260, "total_steps": 1269, "loss": 0.655, "learning_rate": 5e-06, "epoch": 0.6146572104018913, "percentage": 20.49, "elapsed_time": "0:27:18", "remaining_time": "1:45:59"}
{"current_steps": 270, "total_steps": 1269, "loss": 0.6619, "learning_rate": 5e-06, "epoch": 0.6382978723404256, "percentage": 21.28, "elapsed_time": "0:28:20", "remaining_time": "1:44:53"}
{"current_steps": 280, "total_steps": 1269, "loss": 0.6632, "learning_rate": 5e-06, "epoch": 0.6619385342789598, "percentage": 22.06, "elapsed_time": "0:29:23", "remaining_time": "1:43:48"}
{"current_steps": 290, "total_steps": 1269, "loss": 0.6605, "learning_rate": 5e-06, "epoch": 0.6855791962174941, "percentage": 22.85, "elapsed_time": "0:30:25", "remaining_time": "1:42:43"}
{"current_steps": 300, "total_steps": 1269, "loss": 0.6544, "learning_rate": 5e-06, "epoch": 0.7092198581560284, "percentage": 23.64, "elapsed_time": "0:31:28", "remaining_time": "1:41:38"}
{"current_steps": 310, "total_steps": 1269, "loss": 0.666, "learning_rate": 5e-06, "epoch": 0.7328605200945626, "percentage": 24.43, "elapsed_time": "0:32:30", "remaining_time": "1:40:34"}
{"current_steps": 320, "total_steps": 1269, "loss": 0.6593, "learning_rate": 5e-06, "epoch": 0.7565011820330969, "percentage": 25.22, "elapsed_time": "0:33:32", "remaining_time": "1:39:29"}
{"current_steps": 330, "total_steps": 1269, "loss": 0.6561, "learning_rate": 5e-06, "epoch": 0.7801418439716312, "percentage": 26.0, "elapsed_time": "0:34:35", "remaining_time": "1:38:26"}
{"current_steps": 340, "total_steps": 1269, "loss": 0.652, "learning_rate": 5e-06, "epoch": 0.8037825059101655, "percentage": 26.79, "elapsed_time": "0:35:37", "remaining_time": "1:37:21"}
{"current_steps": 350, "total_steps": 1269, "loss": 0.6547, "learning_rate": 5e-06, "epoch": 0.8274231678486997, "percentage": 27.58, "elapsed_time": "0:36:40", "remaining_time": "1:36:17"}
{"current_steps": 360, "total_steps": 1269, "loss": 0.6495, "learning_rate": 5e-06, "epoch": 0.851063829787234, "percentage": 28.37, "elapsed_time": "0:37:42", "remaining_time": "1:35:12"}
{"current_steps": 370, "total_steps": 1269, "loss": 0.6525, "learning_rate": 5e-06, "epoch": 0.8747044917257684, "percentage": 29.16, "elapsed_time": "0:38:45", "remaining_time": "1:34:09"}
{"current_steps": 380, "total_steps": 1269, "loss": 0.6575, "learning_rate": 5e-06, "epoch": 0.8983451536643026, "percentage": 29.94, "elapsed_time": "0:39:47", "remaining_time": "1:33:04"}
{"current_steps": 390, "total_steps": 1269, "loss": 0.6468, "learning_rate": 5e-06, "epoch": 0.9219858156028369, "percentage": 30.73, "elapsed_time": "0:40:49", "remaining_time": "1:32:01"}
{"current_steps": 400, "total_steps": 1269, "loss": 0.6538, "learning_rate": 5e-06, "epoch": 0.9456264775413712, "percentage": 31.52, "elapsed_time": "0:41:52", "remaining_time": "1:30:57"}
{"current_steps": 410, "total_steps": 1269, "loss": 0.6474, "learning_rate": 5e-06, "epoch": 0.9692671394799054, "percentage": 32.31, "elapsed_time": "0:42:54", "remaining_time": "1:29:54"}
{"current_steps": 420, "total_steps": 1269, "loss": 0.6524, "learning_rate": 5e-06, "epoch": 0.9929078014184397, "percentage": 33.1, "elapsed_time": "0:43:57", "remaining_time": "1:28:50"}
{"current_steps": 423, "total_steps": 1269, "eval_loss": 0.6508128643035889, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:44:54", "remaining_time": "1:29:49"}
{"current_steps": 430, "total_steps": 1269, "loss": 0.6168, "learning_rate": 5e-06, "epoch": 1.016548463356974, "percentage": 33.88, "elapsed_time": "0:49:50", "remaining_time": "1:37:15"}
{"current_steps": 440, "total_steps": 1269, "loss": 0.6076, "learning_rate": 5e-06, "epoch": 1.0401891252955082, "percentage": 34.67, "elapsed_time": "0:50:54", "remaining_time": "1:35:55"}
{"current_steps": 450, "total_steps": 1269, "loss": 0.607, "learning_rate": 5e-06, "epoch": 1.0638297872340425, "percentage": 35.46, "elapsed_time": "0:52:17", "remaining_time": "1:35:10"}
{"current_steps": 460, "total_steps": 1269, "loss": 0.6053, "learning_rate": 5e-06, "epoch": 1.0874704491725768, "percentage": 36.25, "elapsed_time": "0:53:19", "remaining_time": "1:33:47"}
{"current_steps": 470, "total_steps": 1269, "loss": 0.6125, "learning_rate": 5e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "0:54:23", "remaining_time": "1:32:27"}
{"current_steps": 480, "total_steps": 1269, "loss": 0.6075, "learning_rate": 5e-06, "epoch": 1.1347517730496455, "percentage": 37.83, "elapsed_time": "0:55:35", "remaining_time": "1:31:23"}
{"current_steps": 490, "total_steps": 1269, "loss": 0.6149, "learning_rate": 5e-06, "epoch": 1.1583924349881798, "percentage": 38.61, "elapsed_time": "0:56:38", "remaining_time": "1:30:02"}
{"current_steps": 500, "total_steps": 1269, "loss": 0.6081, "learning_rate": 5e-06, "epoch": 1.1820330969267139, "percentage": 39.4, "elapsed_time": "0:57:46", "remaining_time": "1:28:51"}
{"current_steps": 510, "total_steps": 1269, "loss": 0.6079, "learning_rate": 5e-06, "epoch": 1.2056737588652482, "percentage": 40.19, "elapsed_time": "0:58:58", "remaining_time": "1:27:46"}
{"current_steps": 520, "total_steps": 1269, "loss": 0.6036, "learning_rate": 5e-06, "epoch": 1.2293144208037825, "percentage": 40.98, "elapsed_time": "1:00:01", "remaining_time": "1:26:26"}
{"current_steps": 530, "total_steps": 1269, "loss": 0.6045, "learning_rate": 5e-06, "epoch": 1.2529550827423168, "percentage": 41.77, "elapsed_time": "1:01:03", "remaining_time": "1:25:08"}
{"current_steps": 540, "total_steps": 1269, "loss": 0.6064, "learning_rate": 5e-06, "epoch": 1.2765957446808511, "percentage": 42.55, "elapsed_time": "1:02:05", "remaining_time": "1:23:49"}
{"current_steps": 550, "total_steps": 1269, "loss": 0.6081, "learning_rate": 5e-06, "epoch": 1.3002364066193852, "percentage": 43.34, "elapsed_time": "1:03:08", "remaining_time": "1:22:32"}
{"current_steps": 560, "total_steps": 1269, "loss": 0.6055, "learning_rate": 5e-06, "epoch": 1.3238770685579198, "percentage": 44.13, "elapsed_time": "1:04:10", "remaining_time": "1:21:15"}
{"current_steps": 570, "total_steps": 1269, "loss": 0.6107, "learning_rate": 5e-06, "epoch": 1.3475177304964538, "percentage": 44.92, "elapsed_time": "1:05:13", "remaining_time": "1:19:58"}
{"current_steps": 580, "total_steps": 1269, "loss": 0.6052, "learning_rate": 5e-06, "epoch": 1.3711583924349882, "percentage": 45.71, "elapsed_time": "1:06:15", "remaining_time": "1:18:42"}
{"current_steps": 590, "total_steps": 1269, "loss": 0.6051, "learning_rate": 5e-06, "epoch": 1.3947990543735225, "percentage": 46.49, "elapsed_time": "1:07:28", "remaining_time": "1:17:39"}
{"current_steps": 600, "total_steps": 1269, "loss": 0.6057, "learning_rate": 5e-06, "epoch": 1.4184397163120568, "percentage": 47.28, "elapsed_time": "1:08:31", "remaining_time": "1:16:24"}
{"current_steps": 610, "total_steps": 1269, "loss": 0.6043, "learning_rate": 5e-06, "epoch": 1.442080378250591, "percentage": 48.07, "elapsed_time": "1:09:33", "remaining_time": "1:15:09"}
{"current_steps": 620, "total_steps": 1269, "loss": 0.6033, "learning_rate": 5e-06, "epoch": 1.4657210401891252, "percentage": 48.86, "elapsed_time": "1:10:36", "remaining_time": "1:13:54"}
{"current_steps": 630, "total_steps": 1269, "loss": 0.6132, "learning_rate": 5e-06, "epoch": 1.4893617021276595, "percentage": 49.65, "elapsed_time": "1:11:38", "remaining_time": "1:12:39"}
{"current_steps": 640, "total_steps": 1269, "loss": 0.6047, "learning_rate": 5e-06, "epoch": 1.5130023640661938, "percentage": 50.43, "elapsed_time": "1:12:40", "remaining_time": "1:11:25"}
{"current_steps": 650, "total_steps": 1269, "loss": 0.6073, "learning_rate": 5e-06, "epoch": 1.5366430260047281, "percentage": 51.22, "elapsed_time": "1:13:43", "remaining_time": "1:10:12"}
{"current_steps": 660, "total_steps": 1269, "loss": 0.608, "learning_rate": 5e-06, "epoch": 1.5602836879432624, "percentage": 52.01, "elapsed_time": "1:14:52", "remaining_time": "1:09:05"}
{"current_steps": 670, "total_steps": 1269, "loss": 0.6004, "learning_rate": 5e-06, "epoch": 1.5839243498817965, "percentage": 52.8, "elapsed_time": "1:16:03", "remaining_time": "1:08:00"}
{"current_steps": 680, "total_steps": 1269, "loss": 0.6019, "learning_rate": 5e-06, "epoch": 1.607565011820331, "percentage": 53.59, "elapsed_time": "1:17:06", "remaining_time": "1:06:47"}
{"current_steps": 690, "total_steps": 1269, "loss": 0.6055, "learning_rate": 5e-06, "epoch": 1.6312056737588652, "percentage": 54.37, "elapsed_time": "1:18:09", "remaining_time": "1:05:34"}
{"current_steps": 700, "total_steps": 1269, "loss": 0.6074, "learning_rate": 5e-06, "epoch": 1.6548463356973995, "percentage": 55.16, "elapsed_time": "1:19:11", "remaining_time": "1:04:22"}
{"current_steps": 710, "total_steps": 1269, "loss": 0.6051, "learning_rate": 5e-06, "epoch": 1.6784869976359338, "percentage": 55.95, "elapsed_time": "1:20:13", "remaining_time": "1:03:10"}
{"current_steps": 720, "total_steps": 1269, "loss": 0.5994, "learning_rate": 5e-06, "epoch": 1.702127659574468, "percentage": 56.74, "elapsed_time": "1:21:16", "remaining_time": "1:01:58"}
{"current_steps": 730, "total_steps": 1269, "loss": 0.6044, "learning_rate": 5e-06, "epoch": 1.7257683215130024, "percentage": 57.53, "elapsed_time": "1:22:18", "remaining_time": "1:00:46"}
{"current_steps": 740, "total_steps": 1269, "loss": 0.6089, "learning_rate": 5e-06, "epoch": 1.7494089834515365, "percentage": 58.31, "elapsed_time": "1:23:21", "remaining_time": "0:59:35"}
{"current_steps": 750, "total_steps": 1269, "loss": 0.6003, "learning_rate": 5e-06, "epoch": 1.773049645390071, "percentage": 59.1, "elapsed_time": "1:24:23", "remaining_time": "0:58:23"}
{"current_steps": 760, "total_steps": 1269, "loss": 0.5998, "learning_rate": 5e-06, "epoch": 1.7966903073286051, "percentage": 59.89, "elapsed_time": "1:25:25", "remaining_time": "0:57:12"}
{"current_steps": 770, "total_steps": 1269, "loss": 0.6096, "learning_rate": 5e-06, "epoch": 1.8203309692671394, "percentage": 60.68, "elapsed_time": "1:26:28", "remaining_time": "0:56:02"}
{"current_steps": 780, "total_steps": 1269, "loss": 0.6062, "learning_rate": 5e-06, "epoch": 1.8439716312056738, "percentage": 61.47, "elapsed_time": "1:27:35", "remaining_time": "0:54:54"}
{"current_steps": 790, "total_steps": 1269, "loss": 0.6082, "learning_rate": 5e-06, "epoch": 1.867612293144208, "percentage": 62.25, "elapsed_time": "1:28:43", "remaining_time": "0:53:47"}
{"current_steps": 800, "total_steps": 1269, "loss": 0.5968, "learning_rate": 5e-06, "epoch": 1.8912529550827424, "percentage": 63.04, "elapsed_time": "1:29:45", "remaining_time": "0:52:37"}
{"current_steps": 810, "total_steps": 1269, "loss": 0.5955, "learning_rate": 5e-06, "epoch": 1.9148936170212765, "percentage": 63.83, "elapsed_time": "1:30:48", "remaining_time": "0:51:27"}
{"current_steps": 820, "total_steps": 1269, "loss": 0.606, "learning_rate": 5e-06, "epoch": 1.938534278959811, "percentage": 64.62, "elapsed_time": "1:31:50", "remaining_time": "0:50:17"}
{"current_steps": 830, "total_steps": 1269, "loss": 0.6028, "learning_rate": 5e-06, "epoch": 1.962174940898345, "percentage": 65.41, "elapsed_time": "1:32:53", "remaining_time": "0:49:07"}
{"current_steps": 840, "total_steps": 1269, "loss": 0.6057, "learning_rate": 5e-06, "epoch": 1.9858156028368794, "percentage": 66.19, "elapsed_time": "1:33:55", "remaining_time": "0:47:58"}
{"current_steps": 846, "total_steps": 1269, "eval_loss": 0.6412354111671448, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:35:10", "remaining_time": "0:47:35"}
{"current_steps": 850, "total_steps": 1269, "loss": 0.5827, "learning_rate": 5e-06, "epoch": 2.0094562647754137, "percentage": 66.98, "elapsed_time": "1:39:32", "remaining_time": "0:49:04"}
{"current_steps": 860, "total_steps": 1269, "loss": 0.5511, "learning_rate": 5e-06, "epoch": 2.033096926713948, "percentage": 67.77, "elapsed_time": "1:40:35", "remaining_time": "0:47:50"}
{"current_steps": 870, "total_steps": 1269, "loss": 0.5581, "learning_rate": 5e-06, "epoch": 2.0567375886524824, "percentage": 68.56, "elapsed_time": "1:41:48", "remaining_time": "0:46:41"}