gemma-3-1b-it_MED_NLI / trainer_log.jsonl
presencesw's picture
Training in progress, step 8503
bd39b63 verified
{"current_steps": 50, "total_steps": 8503, "loss": 5.0951, "lr": 5.875440658049355e-07, "epoch": 0.005880277549100318, "percentage": 0.59, "elapsed_time": "0:00:54", "remaining_time": "2:34:35"}
{"current_steps": 100, "total_steps": 8503, "loss": 0.8587, "lr": 1.175088131609871e-06, "epoch": 0.011760555098200636, "percentage": 1.18, "elapsed_time": "0:01:30", "remaining_time": "2:06:11"}
{"current_steps": 150, "total_steps": 8503, "loss": 0.084, "lr": 1.762632197414806e-06, "epoch": 0.01764083264730095, "percentage": 1.76, "elapsed_time": "0:02:06", "remaining_time": "1:57:11"}
{"current_steps": 200, "total_steps": 8503, "loss": 0.051, "lr": 2.350176263219742e-06, "epoch": 0.02352111019640127, "percentage": 2.35, "elapsed_time": "0:02:41", "remaining_time": "1:51:54"}
{"current_steps": 250, "total_steps": 8503, "loss": 0.0592, "lr": 2.937720329024677e-06, "epoch": 0.02940138774550159, "percentage": 2.94, "elapsed_time": "0:03:33", "remaining_time": "1:57:37"}
{"current_steps": 300, "total_steps": 8503, "loss": 0.0519, "lr": 3.525264394829612e-06, "epoch": 0.0352816652946019, "percentage": 3.53, "elapsed_time": "0:04:08", "remaining_time": "1:53:27"}
{"current_steps": 350, "total_steps": 8503, "loss": 0.0418, "lr": 4.112808460634548e-06, "epoch": 0.041161942843702226, "percentage": 4.12, "elapsed_time": "0:04:43", "remaining_time": "1:50:13"}
{"current_steps": 400, "total_steps": 8503, "loss": 0.0665, "lr": 4.700352526439484e-06, "epoch": 0.04704222039280254, "percentage": 4.7, "elapsed_time": "0:05:19", "remaining_time": "1:47:50"}
{"current_steps": 450, "total_steps": 8503, "loss": 0.052, "lr": 5.287896592244419e-06, "epoch": 0.05292249794190286, "percentage": 5.29, "elapsed_time": "0:06:10", "remaining_time": "1:50:38"}
{"current_steps": 500, "total_steps": 8503, "loss": 0.0405, "lr": 5.875440658049354e-06, "epoch": 0.05880277549100318, "percentage": 5.88, "elapsed_time": "0:06:46", "remaining_time": "1:48:29"}
{"current_steps": 550, "total_steps": 8503, "loss": 0.0499, "lr": 6.46298472385429e-06, "epoch": 0.0646830530401035, "percentage": 6.47, "elapsed_time": "0:07:22", "remaining_time": "1:46:43"}
{"current_steps": 600, "total_steps": 8503, "loss": 0.0444, "lr": 7.050528789659224e-06, "epoch": 0.0705633305892038, "percentage": 7.06, "elapsed_time": "0:07:58", "remaining_time": "1:45:00"}
{"current_steps": 650, "total_steps": 8503, "loss": 0.0417, "lr": 7.63807285546416e-06, "epoch": 0.07644360813830413, "percentage": 7.64, "elapsed_time": "0:08:50", "remaining_time": "1:46:43"}
{"current_steps": 700, "total_steps": 8503, "loss": 0.0337, "lr": 8.225616921269097e-06, "epoch": 0.08232388568740445, "percentage": 8.23, "elapsed_time": "0:09:25", "remaining_time": "1:45:08"}
{"current_steps": 750, "total_steps": 8503, "loss": 0.0314, "lr": 8.81316098707403e-06, "epoch": 0.08820416323650476, "percentage": 8.82, "elapsed_time": "0:10:01", "remaining_time": "1:43:37"}
{"current_steps": 800, "total_steps": 8503, "loss": 0.0386, "lr": 9.400705052878968e-06, "epoch": 0.09408444078560509, "percentage": 9.41, "elapsed_time": "0:10:37", "remaining_time": "1:42:18"}
{"current_steps": 850, "total_steps": 8503, "loss": 0.0322, "lr": 9.988249118683903e-06, "epoch": 0.0999647183347054, "percentage": 10.0, "elapsed_time": "0:11:29", "remaining_time": "1:43:25"}
{"current_steps": 900, "total_steps": 8503, "loss": 0.037, "lr": 9.998988263671598e-06, "epoch": 0.10584499588380572, "percentage": 10.58, "elapsed_time": "0:12:05", "remaining_time": "1:42:09"}
{"current_steps": 950, "total_steps": 8503, "loss": 0.0328, "lr": 9.995870471854679e-06, "epoch": 0.11172527343290603, "percentage": 11.17, "elapsed_time": "0:12:40", "remaining_time": "1:40:50"}
{"current_steps": 1000, "total_steps": 8503, "loss": 0.0331, "lr": 9.990647516930925e-06, "epoch": 0.11760555098200635, "percentage": 11.76, "elapsed_time": "0:13:16", "remaining_time": "1:39:39"}
{"current_steps": 1000, "total_steps": 8503, "eval_loss": 0.03788302466273308, "epoch": 0.11760555098200635, "percentage": 11.76, "elapsed_time": "0:23:36", "remaining_time": "2:57:07"}
{"current_steps": 1050, "total_steps": 8503, "loss": 0.0359, "lr": 9.983321599752438e-06, "epoch": 0.12348582853110666, "percentage": 12.35, "elapsed_time": "0:25:05", "remaining_time": "2:58:02"}
{"current_steps": 1100, "total_steps": 8503, "loss": 0.0306, "lr": 9.9738958073189e-06, "epoch": 0.129366106080207, "percentage": 12.94, "elapsed_time": "0:25:41", "remaining_time": "2:52:51"}
{"current_steps": 1150, "total_steps": 8503, "loss": 0.031, "lr": 9.962374111476778e-06, "epoch": 0.1352463836293073, "percentage": 13.52, "elapsed_time": "0:26:17", "remaining_time": "2:48:07"}
{"current_steps": 1200, "total_steps": 8503, "loss": 0.0333, "lr": 9.948761367245665e-06, "epoch": 0.1411266611784076, "percentage": 14.11, "elapsed_time": "0:26:54", "remaining_time": "2:43:46"}
{"current_steps": 1250, "total_steps": 8503, "loss": 0.0347, "lr": 9.933063310772463e-06, "epoch": 0.14700693872750795, "percentage": 14.7, "elapsed_time": "0:27:46", "remaining_time": "2:41:10"}
{"current_steps": 1300, "total_steps": 8503, "loss": 0.0326, "lr": 9.915286556914286e-06, "epoch": 0.15288721627660826, "percentage": 15.29, "elapsed_time": "0:28:23", "remaining_time": "2:37:17"}
{"current_steps": 1350, "total_steps": 8503, "loss": 0.0323, "lr": 9.89543859645109e-06, "epoch": 0.15876749382570857, "percentage": 15.88, "elapsed_time": "0:28:59", "remaining_time": "2:33:35"}
{"current_steps": 1400, "total_steps": 8503, "loss": 0.0274, "lr": 9.873527792929196e-06, "epoch": 0.1646477713748089, "percentage": 16.46, "elapsed_time": "0:29:34", "remaining_time": "2:30:05"}
{"current_steps": 1450, "total_steps": 8503, "loss": 0.0294, "lr": 9.84956337913706e-06, "epoch": 0.17052804892390921, "percentage": 17.05, "elapsed_time": "0:30:26", "remaining_time": "2:28:04"}
{"current_steps": 1500, "total_steps": 8503, "loss": 0.0336, "lr": 9.82355545321475e-06, "epoch": 0.17640832647300952, "percentage": 17.64, "elapsed_time": "0:31:03", "remaining_time": "2:24:59"}
{"current_steps": 1550, "total_steps": 8503, "loss": 0.0295, "lr": 9.795514974398789e-06, "epoch": 0.18228860402210983, "percentage": 18.23, "elapsed_time": "0:31:39", "remaining_time": "2:21:59"}
{"current_steps": 1600, "total_steps": 8503, "loss": 0.0301, "lr": 9.765453758404144e-06, "epoch": 0.18816888157121017, "percentage": 18.82, "elapsed_time": "0:32:15", "remaining_time": "2:19:09"}
{"current_steps": 1650, "total_steps": 8503, "loss": 0.0309, "lr": 9.733384472445308e-06, "epoch": 0.19404915912031048, "percentage": 19.4, "elapsed_time": "0:33:07", "remaining_time": "2:17:35"}
{"current_steps": 1700, "total_steps": 8503, "loss": 0.026, "lr": 9.699320629898589e-06, "epoch": 0.1999294366694108, "percentage": 19.99, "elapsed_time": "0:33:43", "remaining_time": "2:14:58"}
{"current_steps": 1750, "total_steps": 8503, "loss": 0.0264, "lr": 9.663276584607831e-06, "epoch": 0.2058097142185111, "percentage": 20.58, "elapsed_time": "0:34:19", "remaining_time": "2:12:28"}
{"current_steps": 1800, "total_steps": 8503, "loss": 0.0277, "lr": 9.625267524835974e-06, "epoch": 0.21168999176761144, "percentage": 21.17, "elapsed_time": "0:34:56", "remaining_time": "2:10:06"}
{"current_steps": 1850, "total_steps": 8503, "loss": 0.0276, "lr": 9.585309466865029e-06, "epoch": 0.21757026931671175, "percentage": 21.76, "elapsed_time": "0:35:48", "remaining_time": "2:08:46"}
{"current_steps": 1900, "total_steps": 8503, "loss": 0.0289, "lr": 9.54341924824712e-06, "epoch": 0.22345054686581206, "percentage": 22.35, "elapsed_time": "0:36:25", "remaining_time": "2:06:34"}
{"current_steps": 1950, "total_steps": 8503, "loss": 0.0256, "lr": 9.499614520709457e-06, "epoch": 0.22933082441491237, "percentage": 22.93, "elapsed_time": "0:37:01", "remaining_time": "2:04:24"}
{"current_steps": 2000, "total_steps": 8503, "loss": 0.0299, "lr": 9.453913742716256e-06, "epoch": 0.2352111019640127, "percentage": 23.52, "elapsed_time": "0:37:36", "remaining_time": "2:02:17"}
{"current_steps": 2000, "total_steps": 8503, "eval_loss": 0.026240274310112, "epoch": 0.2352111019640127, "percentage": 23.52, "elapsed_time": "0:47:56", "remaining_time": "2:35:51"}
{"current_steps": 2050, "total_steps": 8503, "loss": 0.0273, "lr": 9.40633617169069e-06, "epoch": 0.24109137951311302, "percentage": 24.11, "elapsed_time": "0:49:23", "remaining_time": "2:35:27"}
{"current_steps": 2100, "total_steps": 8503, "loss": 0.0265, "lr": 9.35690185590018e-06, "epoch": 0.24697165706221333, "percentage": 24.7, "elapsed_time": "0:49:58", "remaining_time": "2:32:23"}
{"current_steps": 2150, "total_steps": 8503, "loss": 0.0249, "lr": 9.305631626008454e-06, "epoch": 0.25285193461131367, "percentage": 25.29, "elapsed_time": "0:50:34", "remaining_time": "2:29:26"}
{"current_steps": 2200, "total_steps": 8503, "loss": 0.0243, "lr": 9.252547086297895e-06, "epoch": 0.258732212160414, "percentage": 25.87, "elapsed_time": "0:51:10", "remaining_time": "2:26:37"}
{"current_steps": 2250, "total_steps": 8503, "loss": 0.0271, "lr": 9.197670605565932e-06, "epoch": 0.2646124897095143, "percentage": 26.46, "elapsed_time": "0:52:03", "remaining_time": "2:24:40"}
{"current_steps": 2300, "total_steps": 8503, "loss": 0.0265, "lr": 9.141025307699246e-06, "epoch": 0.2704927672586146, "percentage": 27.05, "elapsed_time": "0:52:39", "remaining_time": "2:22:02"}
{"current_steps": 2350, "total_steps": 8503, "loss": 0.0286, "lr": 9.082635061929817e-06, "epoch": 0.2763730448077149, "percentage": 27.64, "elapsed_time": "0:53:15", "remaining_time": "2:19:27"}
{"current_steps": 2400, "total_steps": 8503, "loss": 0.0288, "lr": 9.022524472776897e-06, "epoch": 0.2822533223568152, "percentage": 28.23, "elapsed_time": "0:53:51", "remaining_time": "2:16:58"}
{"current_steps": 2450, "total_steps": 8503, "loss": 0.0252, "lr": 8.960718869679132e-06, "epoch": 0.2881335999059156, "percentage": 28.81, "elapsed_time": "0:54:44", "remaining_time": "2:15:13"}
{"current_steps": 2500, "total_steps": 8503, "loss": 0.0243, "lr": 8.89724429632124e-06, "epoch": 0.2940138774550159, "percentage": 29.4, "elapsed_time": "0:55:19", "remaining_time": "2:12:51"}
{"current_steps": 2550, "total_steps": 8503, "loss": 0.0216, "lr": 8.832127499659687e-06, "epoch": 0.2998941550041162, "percentage": 29.99, "elapsed_time": "0:55:55", "remaining_time": "2:10:32"}
{"current_steps": 2600, "total_steps": 8503, "loss": 0.0245, "lr": 8.765395918652062e-06, "epoch": 0.3057744325532165, "percentage": 30.58, "elapsed_time": "0:56:31", "remaining_time": "2:08:19"}
{"current_steps": 2650, "total_steps": 8503, "loss": 0.0292, "lr": 8.697077672694809e-06, "epoch": 0.3116547101023168, "percentage": 31.17, "elapsed_time": "0:57:23", "remaining_time": "2:06:45"}
{"current_steps": 2700, "total_steps": 8503, "loss": 0.0243, "lr": 8.627201549774273e-06, "epoch": 0.31753498765141713, "percentage": 31.75, "elapsed_time": "0:57:58", "remaining_time": "2:04:35"}
{"current_steps": 2750, "total_steps": 8503, "loss": 0.0272, "lr": 8.55579699433599e-06, "epoch": 0.32341526520051744, "percentage": 32.34, "elapsed_time": "0:58:34", "remaining_time": "2:02:33"}
{"current_steps": 2800, "total_steps": 8503, "loss": 0.0268, "lr": 8.482894094877372e-06, "epoch": 0.3292955427496178, "percentage": 32.93, "elapsed_time": "0:59:11", "remaining_time": "2:00:32"}
{"current_steps": 2850, "total_steps": 8503, "loss": 0.0251, "lr": 8.408523571269e-06, "epoch": 0.3351758202987181, "percentage": 33.52, "elapsed_time": "1:00:03", "remaining_time": "1:59:06"}
{"current_steps": 2900, "total_steps": 8503, "loss": 0.0304, "lr": 8.332716761809857e-06, "epoch": 0.34105609784781843, "percentage": 34.11, "elapsed_time": "1:00:39", "remaining_time": "1:57:12"}
{"current_steps": 2950, "total_steps": 8503, "loss": 0.0241, "lr": 8.255505610021981e-06, "epoch": 0.34693637539691874, "percentage": 34.69, "elapsed_time": "1:01:15", "remaining_time": "1:55:18"}
{"current_steps": 3000, "total_steps": 8503, "loss": 0.0251, "lr": 8.176922651190085e-06, "epoch": 0.35281665294601905, "percentage": 35.28, "elapsed_time": "1:01:51", "remaining_time": "1:53:27"}
{"current_steps": 3000, "total_steps": 8503, "eval_loss": 0.028408875688910484, "epoch": 0.35281665294601905, "percentage": 35.28, "elapsed_time": "1:12:10", "remaining_time": "2:12:24"}
{"current_steps": 3050, "total_steps": 8503, "loss": 0.0316, "lr": 8.097000998651812e-06, "epoch": 0.35869693049511936, "percentage": 35.87, "elapsed_time": "1:13:38", "remaining_time": "2:11:39"}
{"current_steps": 3100, "total_steps": 8503, "loss": 0.0239, "lr": 8.015774329844417e-06, "epoch": 0.36457720804421967, "percentage": 36.46, "elapsed_time": "1:14:14", "remaining_time": "2:09:23"}
{"current_steps": 3150, "total_steps": 8503, "loss": 0.0226, "lr": 7.933276872113754e-06, "epoch": 0.37045748559332, "percentage": 37.05, "elapsed_time": "1:14:50", "remaining_time": "2:07:10"}
{"current_steps": 3200, "total_steps": 8503, "loss": 0.0256, "lr": 7.849543388291524e-06, "epoch": 0.37633776314242035, "percentage": 37.63, "elapsed_time": "1:15:26", "remaining_time": "2:05:01"}
{"current_steps": 3250, "total_steps": 8503, "loss": 0.0265, "lr": 7.764609162046894e-06, "epoch": 0.38221804069152066, "percentage": 38.22, "elapsed_time": "1:16:19", "remaining_time": "2:03:21"}
{"current_steps": 3300, "total_steps": 8503, "loss": 0.0256, "lr": 7.678509983018656e-06, "epoch": 0.38809831824062097, "percentage": 38.81, "elapsed_time": "1:16:55", "remaining_time": "2:01:16"}
{"current_steps": 3350, "total_steps": 8503, "loss": 0.024, "lr": 7.591282131734139e-06, "epoch": 0.3939785957897213, "percentage": 39.4, "elapsed_time": "1:17:31", "remaining_time": "1:59:15"}
{"current_steps": 3400, "total_steps": 8503, "loss": 0.0208, "lr": 7.50296236432132e-06, "epoch": 0.3998588733388216, "percentage": 39.99, "elapsed_time": "1:18:07", "remaining_time": "1:57:15"}
{"current_steps": 3450, "total_steps": 8503, "loss": 0.0243, "lr": 7.413587897020496e-06, "epoch": 0.4057391508879219, "percentage": 40.57, "elapsed_time": "1:18:59", "remaining_time": "1:55:40"}
{"current_steps": 3500, "total_steps": 8503, "loss": 0.0213, "lr": 7.323196390502074e-06, "epoch": 0.4116194284370222, "percentage": 41.16, "elapsed_time": "1:19:33", "remaining_time": "1:53:44"}
{"current_steps": 3550, "total_steps": 8503, "loss": 0.0261, "lr": 7.231825933997105e-06, "epoch": 0.41749970598612257, "percentage": 41.75, "elapsed_time": "1:20:09", "remaining_time": "1:51:50"}
{"current_steps": 3600, "total_steps": 8503, "loss": 0.0286, "lr": 7.139515029247213e-06, "epoch": 0.4233799835352229, "percentage": 42.34, "elapsed_time": "1:20:45", "remaining_time": "1:49:59"}
{"current_steps": 3650, "total_steps": 8503, "loss": 0.0244, "lr": 7.046302574280703e-06, "epoch": 0.4292602610843232, "percentage": 42.93, "elapsed_time": "1:21:37", "remaining_time": "1:48:31"}
{"current_steps": 3700, "total_steps": 8503, "loss": 0.023, "lr": 6.952227847021697e-06, "epoch": 0.4351405386334235, "percentage": 43.51, "elapsed_time": "1:22:13", "remaining_time": "1:46:44"}
{"current_steps": 3750, "total_steps": 8503, "loss": 0.0224, "lr": 6.857330488739159e-06, "epoch": 0.4410208161825238, "percentage": 44.1, "elapsed_time": "1:22:48", "remaining_time": "1:44:57"}
{"current_steps": 3800, "total_steps": 8503, "loss": 0.0235, "lr": 6.76165048734285e-06, "epoch": 0.4469010937316241, "percentage": 44.69, "elapsed_time": "1:23:24", "remaining_time": "1:43:13"}
{"current_steps": 3850, "total_steps": 8503, "loss": 0.0251, "lr": 6.665228160533186e-06, "epoch": 0.45278137128072443, "percentage": 45.28, "elapsed_time": "1:24:15", "remaining_time": "1:41:50"}
{"current_steps": 3900, "total_steps": 8503, "loss": 0.0208, "lr": 6.568104138812141e-06, "epoch": 0.45866164882982474, "percentage": 45.87, "elapsed_time": "1:24:51", "remaining_time": "1:40:09"}
{"current_steps": 3950, "total_steps": 8503, "loss": 0.0242, "lr": 6.470319348362344e-06, "epoch": 0.4645419263789251, "percentage": 46.45, "elapsed_time": "1:25:27", "remaining_time": "1:38:30"}
{"current_steps": 4000, "total_steps": 8503, "loss": 0.0213, "lr": 6.371914993801573e-06, "epoch": 0.4704222039280254, "percentage": 47.04, "elapsed_time": "1:26:03", "remaining_time": "1:36:52"}
{"current_steps": 4000, "total_steps": 8503, "eval_loss": 0.02518468163907528, "epoch": 0.4704222039280254, "percentage": 47.04, "elapsed_time": "1:36:15", "remaining_time": "1:48:21"}
{"current_steps": 4050, "total_steps": 8503, "loss": 0.0201, "lr": 6.272932540819929e-06, "epoch": 0.4763024814771257, "percentage": 47.63, "elapsed_time": "1:37:46", "remaining_time": "1:47:30"}
{"current_steps": 4100, "total_steps": 8503, "loss": 0.0245, "lr": 6.173413698706999e-06, "epoch": 0.48218275902622604, "percentage": 48.22, "elapsed_time": "1:38:22", "remaining_time": "1:45:38"}
{"current_steps": 4150, "total_steps": 8503, "loss": 0.0229, "lr": 6.073400402776364e-06, "epoch": 0.48806303657532635, "percentage": 48.81, "elapsed_time": "1:38:57", "remaining_time": "1:43:48"}
{"current_steps": 4200, "total_steps": 8503, "loss": 0.0203, "lr": 5.972934796694871e-06, "epoch": 0.49394331412442666, "percentage": 49.39, "elapsed_time": "1:39:33", "remaining_time": "1:41:59"}
{"current_steps": 4250, "total_steps": 8503, "loss": 0.0224, "lr": 5.872059214724112e-06, "epoch": 0.49982359167352697, "percentage": 49.98, "elapsed_time": "1:40:24", "remaining_time": "1:40:29"}
{"current_steps": 4300, "total_steps": 8503, "loss": 0.0233, "lr": 5.770816163881581e-06, "epoch": 0.5057038692226273, "percentage": 50.57, "elapsed_time": "1:41:01", "remaining_time": "1:38:44"}
{"current_steps": 4350, "total_steps": 8503, "loss": 0.0211, "lr": 5.669248306029042e-06, "epoch": 0.5115841467717276, "percentage": 51.16, "elapsed_time": "1:41:36", "remaining_time": "1:37:00"}
{"current_steps": 4400, "total_steps": 8503, "loss": 0.0228, "lr": 5.567398439895643e-06, "epoch": 0.517464424320828, "percentage": 51.75, "elapsed_time": "1:42:13", "remaining_time": "1:35:19"}
{"current_steps": 4450, "total_steps": 8503, "loss": 0.019, "lr": 5.465309483043364e-06, "epoch": 0.5233447018699282, "percentage": 52.33, "elapsed_time": "1:43:05", "remaining_time": "1:33:53"}
{"current_steps": 4500, "total_steps": 8503, "loss": 0.0262, "lr": 5.363024453782388e-06, "epoch": 0.5292249794190286, "percentage": 52.92, "elapsed_time": "1:43:40", "remaining_time": "1:32:13"}
{"current_steps": 4550, "total_steps": 8503, "loss": 0.0208, "lr": 5.260586453044011e-06, "epoch": 0.5351052569681289, "percentage": 53.51, "elapsed_time": "1:44:16", "remaining_time": "1:30:35"}
{"current_steps": 4600, "total_steps": 8503, "loss": 0.0248, "lr": 5.158038646218749e-06, "epoch": 0.5409855345172292, "percentage": 54.1, "elapsed_time": "1:44:52", "remaining_time": "1:28:58"}
{"current_steps": 4650, "total_steps": 8503, "loss": 0.0206, "lr": 5.055424244967284e-06, "epoch": 0.5468658120663296, "percentage": 54.69, "elapsed_time": "1:45:43", "remaining_time": "1:27:36"}
{"current_steps": 4700, "total_steps": 8503, "loss": 0.0202, "lr": 4.95278648901189e-06, "epoch": 0.5527460896154298, "percentage": 55.27, "elapsed_time": "1:46:19", "remaining_time": "1:26:02"}
{"current_steps": 4750, "total_steps": 8503, "loss": 0.023, "lr": 4.850168627916068e-06, "epoch": 0.5586263671645302, "percentage": 55.86, "elapsed_time": "1:46:55", "remaining_time": "1:24:28"}
{"current_steps": 4800, "total_steps": 8503, "loss": 0.0243, "lr": 4.7476139028600085e-06, "epoch": 0.5645066447136304, "percentage": 56.45, "elapsed_time": "1:47:31", "remaining_time": "1:22:56"}
{"current_steps": 4850, "total_steps": 8503, "loss": 0.0199, "lr": 4.645165528419598e-06, "epoch": 0.5703869222627308, "percentage": 57.04, "elapsed_time": "1:48:22", "remaining_time": "1:21:37"}
{"current_steps": 4900, "total_steps": 8503, "loss": 0.0224, "lr": 4.542866674356627e-06, "epoch": 0.5762671998118312, "percentage": 57.63, "elapsed_time": "1:48:59", "remaining_time": "1:20:08"}
{"current_steps": 4950, "total_steps": 8503, "loss": 0.0206, "lr": 4.440760447427899e-06, "epoch": 0.5821474773609314, "percentage": 58.21, "elapsed_time": "1:49:34", "remaining_time": "1:18:39"}
{"current_steps": 5000, "total_steps": 8503, "loss": 0.0264, "lr": 4.338889873220875e-06, "epoch": 0.5880277549100318, "percentage": 58.8, "elapsed_time": "1:50:10", "remaining_time": "1:17:11"}
{"current_steps": 5000, "total_steps": 8503, "eval_loss": 0.022188851609826088, "epoch": 0.5880277549100318, "percentage": 58.8, "elapsed_time": "2:00:26", "remaining_time": "1:24:22"}
{"current_steps": 5050, "total_steps": 8503, "loss": 0.0238, "lr": 4.237297878023512e-06, "epoch": 0.593908032459132, "percentage": 59.39, "elapsed_time": "2:01:57", "remaining_time": "1:23:23"}
{"current_steps": 5100, "total_steps": 8503, "loss": 0.0207, "lr": 4.136027270735971e-06, "epoch": 0.5997883100082324, "percentage": 59.98, "elapsed_time": "2:02:34", "remaining_time": "1:21:47"}
{"current_steps": 5150, "total_steps": 8503, "loss": 0.0208, "lr": 4.035120724831766e-06, "epoch": 0.6056685875573327, "percentage": 60.57, "elapsed_time": "2:03:10", "remaining_time": "1:20:11"}
{"current_steps": 5200, "total_steps": 8503, "loss": 0.0214, "lr": 3.9346207603759966e-06, "epoch": 0.611548865106433, "percentage": 61.15, "elapsed_time": "2:03:46", "remaining_time": "1:18:37"}
{"current_steps": 5250, "total_steps": 8503, "loss": 0.0195, "lr": 3.834569726108201e-06, "epoch": 0.6174291426555334, "percentage": 61.74, "elapsed_time": "2:04:37", "remaining_time": "1:17:13"}
{"current_steps": 5300, "total_steps": 8503, "loss": 0.0229, "lr": 3.7350097815974395e-06, "epoch": 0.6233094202046336, "percentage": 62.33, "elapsed_time": "2:05:12", "remaining_time": "1:15:40"}
{"current_steps": 5350, "total_steps": 8503, "loss": 0.0253, "lr": 3.6359828794770467e-06, "epoch": 0.629189697753734, "percentage": 62.92, "elapsed_time": "2:05:47", "remaining_time": "1:14:08"}
{"current_steps": 5400, "total_steps": 8503, "loss": 0.0197, "lr": 3.5375307477666134e-06, "epoch": 0.6350699753028343, "percentage": 63.51, "elapsed_time": "2:06:22", "remaining_time": "1:12:37"}
{"current_steps": 5450, "total_steps": 8503, "loss": 0.018, "lr": 3.4396948722886065e-06, "epoch": 0.6409502528519346, "percentage": 64.1, "elapsed_time": "2:07:14", "remaining_time": "1:11:16"}
{"current_steps": 5500, "total_steps": 8503, "loss": 0.019, "lr": 3.342516479187047e-06, "epoch": 0.6468305304010349, "percentage": 64.68, "elapsed_time": "2:07:49", "remaining_time": "1:09:47"}
{"current_steps": 5550, "total_steps": 8503, "loss": 0.0218, "lr": 3.246036517555611e-06, "epoch": 0.6527108079501353, "percentage": 65.27, "elapsed_time": "2:08:24", "remaining_time": "1:08:19"}
{"current_steps": 5600, "total_steps": 8503, "loss": 0.0172, "lr": 3.1502956421824714e-06, "epoch": 0.6585910854992356, "percentage": 65.86, "elapsed_time": "2:09:00", "remaining_time": "1:06:52"}
{"current_steps": 5650, "total_steps": 8503, "loss": 0.0213, "lr": 3.0553341964191587e-06, "epoch": 0.6644713630483359, "percentage": 66.45, "elapsed_time": "2:09:51", "remaining_time": "1:05:34"}
{"current_steps": 5700, "total_steps": 8503, "loss": 0.0175, "lr": 2.961192195180657e-06, "epoch": 0.6703516405974362, "percentage": 67.04, "elapsed_time": "2:10:27", "remaining_time": "1:04:09"}
{"current_steps": 5750, "total_steps": 8503, "loss": 0.0167, "lr": 2.867909308083885e-06, "epoch": 0.6762319181465365, "percentage": 67.62, "elapsed_time": "2:11:03", "remaining_time": "1:02:44"}
{"current_steps": 5800, "total_steps": 8503, "loss": 0.0192, "lr": 2.7755248427316976e-06, "epoch": 0.6821121956956369, "percentage": 68.21, "elapsed_time": "2:11:39", "remaining_time": "1:01:21"}
{"current_steps": 5850, "total_steps": 8503, "loss": 0.0176, "lr": 2.68407772814942e-06, "epoch": 0.6879924732447371, "percentage": 68.8, "elapsed_time": "2:12:30", "remaining_time": "1:00:05"}
{"current_steps": 5900, "total_steps": 8503, "loss": 0.0212, "lr": 2.5936064983808994e-06, "epoch": 0.6938727507938375, "percentage": 69.39, "elapsed_time": "2:13:05", "remaining_time": "0:58:43"}
{"current_steps": 5950, "total_steps": 8503, "loss": 0.0206, "lr": 2.5041492762510245e-06, "epoch": 0.6997530283429377, "percentage": 69.98, "elapsed_time": "2:13:41", "remaining_time": "0:57:21"}
{"current_steps": 6000, "total_steps": 8503, "loss": 0.0183, "lr": 2.415743757301486e-06, "epoch": 0.7056333058920381, "percentage": 70.56, "elapsed_time": "2:14:17", "remaining_time": "0:56:01"}
{"current_steps": 6000, "total_steps": 8503, "eval_loss": 0.019069144502282143, "epoch": 0.7056333058920381, "percentage": 70.56, "elapsed_time": "2:24:39", "remaining_time": "1:00:20"}
{"current_steps": 6050, "total_steps": 8503, "loss": 0.0175, "lr": 2.3284271939066127e-06, "epoch": 0.7115135834411385, "percentage": 71.15, "elapsed_time": "2:26:09", "remaining_time": "0:59:15"}
{"current_steps": 6100, "total_steps": 8503, "loss": 0.0212, "lr": 2.2422363795759534e-06, "epoch": 0.7173938609902387, "percentage": 71.74, "elapsed_time": "2:26:45", "remaining_time": "0:57:48"}
{"current_steps": 6150, "total_steps": 8503, "loss": 0.0192, "lr": 2.157207633450183e-06, "epoch": 0.7232741385393391, "percentage": 72.33, "elapsed_time": "2:27:21", "remaining_time": "0:56:22"}
{"current_steps": 6200, "total_steps": 8503, "loss": 0.0205, "lr": 2.073376784996931e-06, "epoch": 0.7291544160884393, "percentage": 72.92, "elapsed_time": "2:27:58", "remaining_time": "0:54:57"}
{"current_steps": 6250, "total_steps": 8503, "loss": 0.0179, "lr": 1.990779158912943e-06, "epoch": 0.7350346936375397, "percentage": 73.5, "elapsed_time": "2:28:50", "remaining_time": "0:53:39"}
{"current_steps": 6300, "total_steps": 8503, "loss": 0.0172, "lr": 1.9094495602389235e-06, "epoch": 0.74091497118664, "percentage": 74.09, "elapsed_time": "2:29:25", "remaining_time": "0:52:15"}
{"current_steps": 6350, "total_steps": 8503, "loss": 0.0186, "lr": 1.829422259693377e-06, "epoch": 0.7467952487357403, "percentage": 74.68, "elapsed_time": "2:30:02", "remaining_time": "0:50:52"}
{"current_steps": 6400, "total_steps": 8503, "loss": 0.0209, "lr": 1.750730979231588e-06, "epoch": 0.7526755262848407, "percentage": 75.27, "elapsed_time": "2:30:37", "remaining_time": "0:49:29"}
{"current_steps": 6450, "total_steps": 8503, "loss": 0.0217, "lr": 1.6734088778358371e-06, "epoch": 0.758555803833941, "percentage": 75.86, "elapsed_time": "2:31:29", "remaining_time": "0:48:13"}
{"current_steps": 6500, "total_steps": 8503, "loss": 0.0169, "lr": 1.5974885375428494e-06, "epoch": 0.7644360813830413, "percentage": 76.44, "elapsed_time": "2:32:04", "remaining_time": "0:46:51"}
{"current_steps": 6550, "total_steps": 8503, "loss": 0.0207, "lr": 1.5230019497143633e-06, "epoch": 0.7703163589321416, "percentage": 77.03, "elapsed_time": "2:32:40", "remaining_time": "0:45:31"}
{"current_steps": 6600, "total_steps": 8503, "loss": 0.0158, "lr": 1.4499805015565754e-06, "epoch": 0.7761966364812419, "percentage": 77.62, "elapsed_time": "2:33:16", "remaining_time": "0:44:11"}
{"current_steps": 6650, "total_steps": 8503, "loss": 0.0189, "lr": 1.378454962894193e-06, "epoch": 0.7820769140303422, "percentage": 78.21, "elapsed_time": "2:34:08", "remaining_time": "0:42:57"}
{"current_steps": 6700, "total_steps": 8503, "loss": 0.0171, "lr": 1.308455473204619e-06, "epoch": 0.7879571915794426, "percentage": 78.8, "elapsed_time": "2:34:44", "remaining_time": "0:41:38"}
{"current_steps": 6750, "total_steps": 8503, "loss": 0.019, "lr": 1.240011528917756e-06, "epoch": 0.7938374691285429, "percentage": 79.38, "elapsed_time": "2:35:20", "remaining_time": "0:40:20"}
{"current_steps": 6800, "total_steps": 8503, "loss": 0.0167, "lr": 1.1731519709867933e-06, "epoch": 0.7997177466776432, "percentage": 79.97, "elapsed_time": "2:35:56", "remaining_time": "0:39:03"}
{"current_steps": 6850, "total_steps": 8503, "loss": 0.0184, "lr": 1.1079049727351726e-06, "epoch": 0.8055980242267435, "percentage": 80.56, "elapsed_time": "2:36:47", "remaining_time": "0:37:50"}
{"current_steps": 6900, "total_steps": 8503, "loss": 0.0193, "lr": 1.0442980279849086e-06, "epoch": 0.8114783017758438, "percentage": 81.15, "elapsed_time": "2:37:22", "remaining_time": "0:36:33"}
{"current_steps": 6950, "total_steps": 8503, "loss": 0.0195, "lr": 9.823579394712175e-07, "epoch": 0.8173585793249442, "percentage": 81.74, "elapsed_time": "2:37:58", "remaining_time": "0:35:17"}
{"current_steps": 7000, "total_steps": 8503, "loss": 0.0171, "lr": 9.221108075483615e-07, "epoch": 0.8232388568740444, "percentage": 82.32, "elapsed_time": "2:38:34", "remaining_time": "0:34:02"}
{"current_steps": 7000, "total_steps": 8503, "eval_loss": 0.017895469442009926, "epoch": 0.8232388568740444, "percentage": 82.32, "elapsed_time": "2:48:52", "remaining_time": "0:36:15"}
{"current_steps": 7050, "total_steps": 8503, "loss": 0.0191, "lr": 8.63582019191469e-07, "epoch": 0.8291191344231448, "percentage": 82.91, "elapsed_time": "2:50:20", "remaining_time": "0:35:06"}
{"current_steps": 7100, "total_steps": 8503, "loss": 0.0168, "lr": 8.067962372989563e-07, "epoch": 0.8349994119722451, "percentage": 83.5, "elapsed_time": "2:50:56", "remaining_time": "0:33:46"}
{"current_steps": 7150, "total_steps": 8503, "loss": 0.0199, "lr": 7.517773903000519e-07, "epoch": 0.8408796895213454, "percentage": 84.09, "elapsed_time": "2:51:32", "remaining_time": "0:32:27"}
{"current_steps": 7200, "total_steps": 8503, "loss": 0.0166, "lr": 6.98548662071828e-07, "epoch": 0.8467599670704458, "percentage": 84.68, "elapsed_time": "2:52:08", "remaining_time": "0:31:09"}
{"current_steps": 7250, "total_steps": 8503, "loss": 0.0156, "lr": 6.471324821699603e-07, "epoch": 0.852640244619546, "percentage": 85.26, "elapsed_time": "2:53:00", "remaining_time": "0:29:54"}
{"current_steps": 7300, "total_steps": 8503, "loss": 0.0215, "lr": 5.975505163773437e-07, "epoch": 0.8585205221686464, "percentage": 85.85, "elapsed_time": "2:53:36", "remaining_time": "0:28:36"}
{"current_steps": 7350, "total_steps": 8503, "loss": 0.0196, "lr": 5.498236575745564e-07, "epoch": 0.8644007997177466, "percentage": 86.44, "elapsed_time": "2:54:12", "remaining_time": "0:27:19"}
{"current_steps": 7400, "total_steps": 8503, "loss": 0.0159, "lr": 5.039720169360013e-07, "epoch": 0.870281077266847, "percentage": 87.03, "elapsed_time": "2:54:48", "remaining_time": "0:26:03"}
{"current_steps": 7450, "total_steps": 8503, "loss": 0.0214, "lr": 4.600149154554501e-07, "epoch": 0.8761613548159473, "percentage": 87.62, "elapsed_time": "2:55:39", "remaining_time": "0:24:49"}
{"current_steps": 7500, "total_steps": 8503, "loss": 0.0181, "lr": 4.179708758045431e-07, "epoch": 0.8820416323650476, "percentage": 88.2, "elapsed_time": "2:56:15", "remaining_time": "0:23:34"}
{"current_steps": 7550, "total_steps": 8503, "loss": 0.02, "lr": 3.7785761452770295e-07, "epoch": 0.887921909914148, "percentage": 88.79, "elapsed_time": "2:56:51", "remaining_time": "0:22:19"}
{"current_steps": 7600, "total_steps": 8503, "loss": 0.0203, "lr": 3.396920345767185e-07, "epoch": 0.8938021874632482, "percentage": 89.38, "elapsed_time": "2:57:26", "remaining_time": "0:21:04"}
{"current_steps": 7650, "total_steps": 8503, "loss": 0.0176, "lr": 3.0349021818817326e-07, "epoch": 0.8996824650123486, "percentage": 89.97, "elapsed_time": "2:58:17", "remaining_time": "0:19:52"}
{"current_steps": 7700, "total_steps": 8503, "loss": 0.0189, "lr": 2.692674201066975e-07, "epoch": 0.9055627425614489, "percentage": 90.56, "elapsed_time": "2:58:53", "remaining_time": "0:18:39"}
{"current_steps": 7750, "total_steps": 8503, "loss": 0.0171, "lr": 2.3703806115691951e-07, "epoch": 0.9114430201105492, "percentage": 91.14, "elapsed_time": "2:59:28", "remaining_time": "0:17:26"}
{"current_steps": 7800, "total_steps": 8503, "loss": 0.0161, "lr": 2.068157221668049e-07, "epoch": 0.9173232976596495, "percentage": 91.73, "elapsed_time": "3:00:03", "remaining_time": "0:16:13"}
{"current_steps": 7850, "total_steps": 8503, "loss": 0.0232, "lr": 1.786131382449602e-07, "epoch": 0.9232035752087498, "percentage": 92.32, "elapsed_time": "3:00:55", "remaining_time": "0:15:03"}
{"current_steps": 7900, "total_steps": 8503, "loss": 0.0197, "lr": 1.5244219341430443e-07, "epoch": 0.9290838527578502, "percentage": 92.91, "elapsed_time": "3:01:30", "remaining_time": "0:13:51"}
{"current_steps": 7950, "total_steps": 8503, "loss": 0.0187, "lr": 1.2831391560437278e-07, "epoch": 0.9349641303069505, "percentage": 93.5, "elapsed_time": "3:02:07", "remaining_time": "0:12:40"}
{"current_steps": 8000, "total_steps": 8503, "loss": 0.0185, "lr": 1.0623847200435966e-07, "epoch": 0.9408444078560508, "percentage": 94.08, "elapsed_time": "3:02:43", "remaining_time": "0:11:29"}
{"current_steps": 8000, "total_steps": 8503, "eval_loss": 0.01769772544503212, "epoch": 0.9408444078560508, "percentage": 94.08, "elapsed_time": "3:12:56", "remaining_time": "0:12:07"}
{"current_steps": 8050, "total_steps": 8503, "loss": 0.019, "lr": 8.62251647788609e-08, "epoch": 0.9467246854051511, "percentage": 94.67, "elapsed_time": "3:14:30", "remaining_time": "0:10:56"}
{"current_steps": 8100, "total_steps": 8503, "loss": 0.0183, "lr": 6.828242714812527e-08, "epoch": 0.9526049629542515, "percentage": 95.26, "elapsed_time": "3:15:06", "remaining_time": "0:09:42"}
{"current_steps": 8150, "total_steps": 8503, "loss": 0.0156, "lr": 5.2417819834454374e-08, "epoch": 0.9584852405033517, "percentage": 95.85, "elapsed_time": "3:15:42", "remaining_time": "0:08:28"}
{"current_steps": 8200, "total_steps": 8503, "loss": 0.0175, "lr": 3.863802787626325e-08, "epoch": 0.9643655180524521, "percentage": 96.44, "elapsed_time": "3:16:18", "remaining_time": "0:07:15"}
{"current_steps": 8250, "total_steps": 8503, "loss": 0.0195, "lr": 2.694885781113432e-08, "epoch": 0.9702457956015524, "percentage": 97.02, "elapsed_time": "3:17:09", "remaining_time": "0:06:02"}
{"current_steps": 8300, "total_steps": 8503, "loss": 0.0182, "lr": 1.735523522905347e-08, "epoch": 0.9761260731506527, "percentage": 97.61, "elapsed_time": "3:17:45", "remaining_time": "0:04:50"}
{"current_steps": 8350, "total_steps": 8503, "loss": 0.0197, "lr": 9.861202696864191e-09, "epoch": 0.9820063506997531, "percentage": 98.2, "elapsed_time": "3:18:21", "remaining_time": "0:03:38"}
{"current_steps": 8400, "total_steps": 8503, "loss": 0.0156, "lr": 4.469918054806344e-09, "epoch": 0.9878866282488533, "percentage": 98.79, "elapsed_time": "3:18:57", "remaining_time": "0:02:26"}
{"current_steps": 8450, "total_steps": 8503, "loss": 0.0197, "lr": 1.1836530858633234e-09, "epoch": 0.9937669057979537, "percentage": 99.38, "elapsed_time": "3:19:50", "remaining_time": "0:01:15"}
{"current_steps": 8500, "total_steps": 8503, "loss": 0.0191, "lr": 3.792558477266894e-12, "epoch": 0.9996471833470539, "percentage": 99.96, "elapsed_time": "3:20:26", "remaining_time": "0:00:04"}
{"current_steps": 8503, "total_steps": 8503, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "3:21:04", "remaining_time": "0:00:00"}