rbelanec commited on
Commit
7ee45c4
verified
1 Parent(s): 5d671cf

Training in progress, step 18408

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +369 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b4d0598e92dd658f73cd29f519a10e04a14ea601be9e01660e6d258a149d882
3
  size 460928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3079985206efa9776ed56d0d60aeffd71e7e843f2d3ebc24c34814aa1be3c921
3
  size 460928
trainer_log.jsonl CHANGED
@@ -3320,3 +3320,372 @@
3320
  {"current_steps": 16560, "total_steps": 18408, "loss": 0.1761, "lr": 0.007406530264870387, "epoch": 2.698826597131682, "percentage": 89.96, "elapsed_time": "7:50:58", "remaining_time": "0:52:33", "throughput": 3179.72, "total_tokens": 89854256}
3321
  {"current_steps": 16565, "total_steps": 18408, "loss": 0.1947, "lr": 0.00736685818384265, "epoch": 2.6996414602346803, "percentage": 89.99, "elapsed_time": "7:51:06", "remaining_time": "0:52:24", "throughput": 3179.76, "total_tokens": 89881904}
3322
  {"current_steps": 16569, "total_steps": 18408, "epoch": 2.7002933507170797, "percentage": 90.01, "elapsed_time": "7:52:55", "remaining_time": "0:52:29", "throughput": 3168.39, "total_tokens": 89903728}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3320
  {"current_steps": 16560, "total_steps": 18408, "loss": 0.1761, "lr": 0.007406530264870387, "epoch": 2.698826597131682, "percentage": 89.96, "elapsed_time": "7:50:58", "remaining_time": "0:52:33", "throughput": 3179.72, "total_tokens": 89854256}
3321
  {"current_steps": 16565, "total_steps": 18408, "loss": 0.1947, "lr": 0.00736685818384265, "epoch": 2.6996414602346803, "percentage": 89.99, "elapsed_time": "7:51:06", "remaining_time": "0:52:24", "throughput": 3179.76, "total_tokens": 89881904}
3322
  {"current_steps": 16569, "total_steps": 18408, "epoch": 2.7002933507170797, "percentage": 90.01, "elapsed_time": "7:52:55", "remaining_time": "0:52:29", "throughput": 3168.39, "total_tokens": 89903728}
3323
+ {"current_steps": 16570, "total_steps": 18408, "loss": 0.1728, "lr": 0.007327289962491273, "epoch": 2.7004563233376793, "percentage": 90.02, "elapsed_time": "7:52:58", "remaining_time": "0:52:27", "throughput": 3168.21, "total_tokens": 89908400}
3324
+ {"current_steps": 16575, "total_steps": 18408, "loss": 0.1828, "lr": 0.007287825629628258, "epoch": 2.701271186440678, "percentage": 90.04, "elapsed_time": "7:53:06", "remaining_time": "0:52:19", "throughput": 3168.23, "total_tokens": 89934704}
3325
+ {"current_steps": 16580, "total_steps": 18408, "loss": 0.1899, "lr": 0.007248465213989919, "epoch": 2.7020860495436767, "percentage": 90.07, "elapsed_time": "7:53:14", "remaining_time": "0:52:10", "throughput": 3168.27, "total_tokens": 89961904}
3326
+ {"current_steps": 16585, "total_steps": 18408, "loss": 0.1834, "lr": 0.007209208744236927, "epoch": 2.702900912646675, "percentage": 90.1, "elapsed_time": "7:53:22", "remaining_time": "0:52:02", "throughput": 3168.24, "total_tokens": 89987440}
3327
+ {"current_steps": 16590, "total_steps": 18408, "loss": 0.1942, "lr": 0.007170056248954254, "epoch": 2.703715775749674, "percentage": 90.12, "elapsed_time": "7:53:31", "remaining_time": "0:51:53", "throughput": 3168.28, "total_tokens": 90014576}
3328
+ {"current_steps": 16595, "total_steps": 18408, "loss": 0.1898, "lr": 0.007131007756651125, "epoch": 2.7045306388526726, "percentage": 90.15, "elapsed_time": "7:53:39", "remaining_time": "0:51:44", "throughput": 3168.36, "total_tokens": 90044016}
3329
+ {"current_steps": 16600, "total_steps": 18408, "loss": 0.1752, "lr": 0.00709206329576113, "epoch": 2.7053455019556716, "percentage": 90.18, "elapsed_time": "7:53:47", "remaining_time": "0:51:36", "throughput": 3168.37, "total_tokens": 90069680}
3330
+ {"current_steps": 16605, "total_steps": 18408, "loss": 0.1634, "lr": 0.0070532228946420025, "epoch": 2.70616036505867, "percentage": 90.21, "elapsed_time": "7:53:55", "remaining_time": "0:51:27", "throughput": 3168.38, "total_tokens": 90095600}
3331
+ {"current_steps": 16610, "total_steps": 18408, "loss": 0.1759, "lr": 0.007014486581575785, "epoch": 2.706975228161669, "percentage": 90.23, "elapsed_time": "7:54:04", "remaining_time": "0:51:19", "throughput": 3168.42, "total_tokens": 90123184}
3332
+ {"current_steps": 16615, "total_steps": 18408, "loss": 0.1766, "lr": 0.0069758543847686484, "epoch": 2.7077900912646675, "percentage": 90.26, "elapsed_time": "7:54:12", "remaining_time": "0:51:10", "throughput": 3168.47, "total_tokens": 90151216}
3333
+ {"current_steps": 16620, "total_steps": 18408, "loss": 0.1999, "lr": 0.0069373263323510405, "epoch": 2.708604954367666, "percentage": 90.29, "elapsed_time": "7:54:20", "remaining_time": "0:51:01", "throughput": 3168.51, "total_tokens": 90178544}
3334
+ {"current_steps": 16625, "total_steps": 18408, "loss": 0.1816, "lr": 0.006898902452377486, "epoch": 2.709419817470665, "percentage": 90.31, "elapsed_time": "7:54:29", "remaining_time": "0:50:53", "throughput": 3168.56, "total_tokens": 90206256}
3335
+ {"current_steps": 16630, "total_steps": 18408, "loss": 0.1869, "lr": 0.006860582772826707, "epoch": 2.710234680573664, "percentage": 90.34, "elapsed_time": "7:54:37", "remaining_time": "0:50:44", "throughput": 3168.59, "total_tokens": 90232944}
3336
+ {"current_steps": 16635, "total_steps": 18408, "loss": 0.1961, "lr": 0.006822367321601563, "epoch": 2.7110495436766624, "percentage": 90.37, "elapsed_time": "7:54:45", "remaining_time": "0:50:36", "throughput": 3168.62, "total_tokens": 90259696}
3337
+ {"current_steps": 16640, "total_steps": 18408, "loss": 0.1814, "lr": 0.0067842561265289815, "epoch": 2.711864406779661, "percentage": 90.4, "elapsed_time": "7:54:53", "remaining_time": "0:50:27", "throughput": 3168.62, "total_tokens": 90285424}
3338
+ {"current_steps": 16645, "total_steps": 18408, "loss": 0.1759, "lr": 0.006746249215360011, "epoch": 2.71267926988266, "percentage": 90.42, "elapsed_time": "7:55:02", "remaining_time": "0:50:18", "throughput": 3168.7, "total_tokens": 90314736}
3339
+ {"current_steps": 16650, "total_steps": 18408, "loss": 0.1884, "lr": 0.006708346615769717, "epoch": 2.7134941329856583, "percentage": 90.45, "elapsed_time": "7:55:10", "remaining_time": "0:50:10", "throughput": 3168.72, "total_tokens": 90341232}
3340
+ {"current_steps": 16655, "total_steps": 18408, "loss": 0.1845, "lr": 0.006670548355357253, "epoch": 2.714308996088657, "percentage": 90.48, "elapsed_time": "7:55:18", "remaining_time": "0:50:01", "throughput": 3168.78, "total_tokens": 90369200}
3341
+ {"current_steps": 16660, "total_steps": 18408, "loss": 0.1724, "lr": 0.006632854461645771, "epoch": 2.7151238591916558, "percentage": 90.5, "elapsed_time": "7:55:26", "remaining_time": "0:49:53", "throughput": 3168.83, "total_tokens": 90397296}
3342
+ {"current_steps": 16665, "total_steps": 18408, "loss": 0.1733, "lr": 0.0065952649620824805, "epoch": 2.7159387222946547, "percentage": 90.53, "elapsed_time": "7:55:35", "remaining_time": "0:49:44", "throughput": 3168.85, "total_tokens": 90423280}
3343
+ {"current_steps": 16670, "total_steps": 18408, "loss": 0.2069, "lr": 0.006557779884038439, "epoch": 2.716753585397653, "percentage": 90.56, "elapsed_time": "7:55:43", "remaining_time": "0:49:35", "throughput": 3168.87, "total_tokens": 90449712}
3344
+ {"current_steps": 16675, "total_steps": 18408, "loss": 0.1817, "lr": 0.006520399254808862, "epoch": 2.7175684485006517, "percentage": 90.59, "elapsed_time": "7:55:51", "remaining_time": "0:49:27", "throughput": 3168.94, "total_tokens": 90478320}
3345
+ {"current_steps": 16680, "total_steps": 18408, "loss": 0.1971, "lr": 0.006483123101612742, "epoch": 2.7183833116036507, "percentage": 90.61, "elapsed_time": "7:55:59", "remaining_time": "0:49:18", "throughput": 3168.95, "total_tokens": 90504496}
3346
+ {"current_steps": 16685, "total_steps": 18408, "loss": 0.2031, "lr": 0.006445951451593118, "epoch": 2.719198174706649, "percentage": 90.64, "elapsed_time": "7:56:07", "remaining_time": "0:49:10", "throughput": 3168.97, "total_tokens": 90530544}
3347
+ {"current_steps": 16690, "total_steps": 18408, "loss": 0.1909, "lr": 0.006408884331816805, "epoch": 2.720013037809648, "percentage": 90.67, "elapsed_time": "7:56:16", "remaining_time": "0:49:01", "throughput": 3168.99, "total_tokens": 90557168}
3348
+ {"current_steps": 16695, "total_steps": 18408, "loss": 0.1729, "lr": 0.006371921769274663, "epoch": 2.7208279009126466, "percentage": 90.69, "elapsed_time": "7:56:24", "remaining_time": "0:48:52", "throughput": 3169.03, "total_tokens": 90584112}
3349
+ {"current_steps": 16700, "total_steps": 18408, "loss": 0.1742, "lr": 0.006335063790881279, "epoch": 2.7216427640156455, "percentage": 90.72, "elapsed_time": "7:56:32", "remaining_time": "0:48:44", "throughput": 3169.09, "total_tokens": 90612400}
3350
+ {"current_steps": 16705, "total_steps": 18408, "loss": 0.1998, "lr": 0.0062983104234751505, "epoch": 2.722457627118644, "percentage": 90.75, "elapsed_time": "7:56:40", "remaining_time": "0:48:35", "throughput": 3169.11, "total_tokens": 90638896}
3351
+ {"current_steps": 16710, "total_steps": 18408, "loss": 0.177, "lr": 0.006261661693818604, "epoch": 2.7232724902216425, "percentage": 90.78, "elapsed_time": "7:56:49", "remaining_time": "0:48:27", "throughput": 3169.15, "total_tokens": 90666288}
3352
+ {"current_steps": 16715, "total_steps": 18408, "loss": 0.175, "lr": 0.006225117628597743, "epoch": 2.7240873533246415, "percentage": 90.8, "elapsed_time": "7:56:57", "remaining_time": "0:48:18", "throughput": 3169.16, "total_tokens": 90692272}
3353
+ {"current_steps": 16720, "total_steps": 18408, "loss": 0.1669, "lr": 0.006188678254422497, "epoch": 2.7249022164276404, "percentage": 90.83, "elapsed_time": "7:57:05", "remaining_time": "0:48:09", "throughput": 3169.19, "total_tokens": 90719216}
3354
+ {"current_steps": 16725, "total_steps": 18408, "loss": 0.2061, "lr": 0.006152343597826509, "epoch": 2.725717079530639, "percentage": 90.86, "elapsed_time": "7:57:13", "remaining_time": "0:48:01", "throughput": 3169.22, "total_tokens": 90746096}
3355
+ {"current_steps": 16730, "total_steps": 18408, "loss": 0.1861, "lr": 0.006116113685267232, "epoch": 2.7265319426336374, "percentage": 90.88, "elapsed_time": "7:57:21", "remaining_time": "0:47:52", "throughput": 3169.26, "total_tokens": 90773296}
3356
+ {"current_steps": 16735, "total_steps": 18408, "loss": 0.1742, "lr": 0.006079988543125814, "epoch": 2.7273468057366363, "percentage": 90.91, "elapsed_time": "7:57:29", "remaining_time": "0:47:44", "throughput": 3169.26, "total_tokens": 90798832}
3357
+ {"current_steps": 16740, "total_steps": 18408, "loss": 0.1869, "lr": 0.006043968197707128, "epoch": 2.728161668839635, "percentage": 90.94, "elapsed_time": "7:57:38", "remaining_time": "0:47:35", "throughput": 3169.31, "total_tokens": 90826672}
3358
+ {"current_steps": 16745, "total_steps": 18408, "loss": 0.1848, "lr": 0.006008052675239711, "epoch": 2.728976531942634, "percentage": 90.97, "elapsed_time": "7:57:46", "remaining_time": "0:47:26", "throughput": 3169.34, "total_tokens": 90853680}
3359
+ {"current_steps": 16750, "total_steps": 18408, "loss": 0.2048, "lr": 0.005972242001875794, "epoch": 2.7297913950456323, "percentage": 90.99, "elapsed_time": "7:57:54", "remaining_time": "0:47:18", "throughput": 3169.37, "total_tokens": 90880432}
3360
+ {"current_steps": 16755, "total_steps": 18408, "loss": 0.1877, "lr": 0.0059365362036912835, "epoch": 2.7306062581486312, "percentage": 91.02, "elapsed_time": "7:58:02", "remaining_time": "0:47:09", "throughput": 3169.41, "total_tokens": 90908080}
3361
+ {"current_steps": 16760, "total_steps": 18408, "loss": 0.1867, "lr": 0.005900935306685667, "epoch": 2.7314211212516297, "percentage": 91.05, "elapsed_time": "7:58:11", "remaining_time": "0:47:01", "throughput": 3169.47, "total_tokens": 90936368}
3362
+ {"current_steps": 16765, "total_steps": 18408, "loss": 0.1714, "lr": 0.005865439336782091, "epoch": 2.7322359843546282, "percentage": 91.07, "elapsed_time": "7:58:19", "remaining_time": "0:46:52", "throughput": 3169.52, "total_tokens": 90964464}
3363
+ {"current_steps": 16770, "total_steps": 18408, "loss": 0.186, "lr": 0.005830048319827264, "epoch": 2.733050847457627, "percentage": 91.1, "elapsed_time": "7:58:27", "remaining_time": "0:46:44", "throughput": 3169.56, "total_tokens": 90991664}
3364
+ {"current_steps": 16775, "total_steps": 18408, "loss": 0.1793, "lr": 0.005794762281591537, "epoch": 2.7338657105606257, "percentage": 91.13, "elapsed_time": "7:58:36", "remaining_time": "0:46:35", "throughput": 3169.59, "total_tokens": 91018800}
3365
+ {"current_steps": 16780, "total_steps": 18408, "loss": 0.182, "lr": 0.0057595812477687245, "epoch": 2.7346805736636246, "percentage": 91.16, "elapsed_time": "7:58:44", "remaining_time": "0:46:26", "throughput": 3169.64, "total_tokens": 91046704}
3366
+ {"current_steps": 16785, "total_steps": 18408, "loss": 0.1917, "lr": 0.005724505243976252, "epoch": 2.735495436766623, "percentage": 91.18, "elapsed_time": "7:58:52", "remaining_time": "0:46:18", "throughput": 3169.66, "total_tokens": 91073264}
3367
+ {"current_steps": 16790, "total_steps": 18408, "loss": 0.1838, "lr": 0.005689534295755039, "epoch": 2.736310299869622, "percentage": 91.21, "elapsed_time": "7:59:01", "remaining_time": "0:46:09", "throughput": 3169.68, "total_tokens": 91099824}
3368
+ {"current_steps": 16795, "total_steps": 18408, "loss": 0.1997, "lr": 0.005654668428569531, "epoch": 2.7371251629726205, "percentage": 91.24, "elapsed_time": "7:59:09", "remaining_time": "0:46:01", "throughput": 3169.67, "total_tokens": 91124912}
3369
+ {"current_steps": 16800, "total_steps": 18408, "loss": 0.1682, "lr": 0.005619907667807622, "epoch": 2.737940026075619, "percentage": 91.26, "elapsed_time": "7:59:17", "remaining_time": "0:45:52", "throughput": 3169.74, "total_tokens": 91154160}
3370
+ {"current_steps": 16805, "total_steps": 18408, "loss": 0.1807, "lr": 0.00558525203878068, "epoch": 2.738754889178618, "percentage": 91.29, "elapsed_time": "7:59:25", "remaining_time": "0:45:43", "throughput": 3169.75, "total_tokens": 91180144}
3371
+ {"current_steps": 16810, "total_steps": 18408, "loss": 0.189, "lr": 0.005550701566723537, "epoch": 2.739569752281617, "percentage": 91.32, "elapsed_time": "7:59:33", "remaining_time": "0:45:35", "throughput": 3169.75, "total_tokens": 91205808}
3372
+ {"current_steps": 16815, "total_steps": 18408, "loss": 0.1859, "lr": 0.005516256276794467, "epoch": 2.7403846153846154, "percentage": 91.35, "elapsed_time": "7:59:42", "remaining_time": "0:45:26", "throughput": 3169.75, "total_tokens": 91232432}
3373
+ {"current_steps": 16820, "total_steps": 18408, "loss": 0.1906, "lr": 0.005481916194075126, "epoch": 2.741199478487614, "percentage": 91.37, "elapsed_time": "7:59:50", "remaining_time": "0:45:18", "throughput": 3169.84, "total_tokens": 91261936}
3374
+ {"current_steps": 16825, "total_steps": 18408, "loss": 0.2007, "lr": 0.005447681343570543, "epoch": 2.742014341590613, "percentage": 91.4, "elapsed_time": "7:59:58", "remaining_time": "0:45:09", "throughput": 3169.84, "total_tokens": 91287088}
3375
+ {"current_steps": 16830, "total_steps": 18408, "loss": 0.1896, "lr": 0.005413551750209211, "epoch": 2.7428292046936114, "percentage": 91.43, "elapsed_time": "8:00:07", "remaining_time": "0:45:00", "throughput": 3169.89, "total_tokens": 91314992}
3376
+ {"current_steps": 16835, "total_steps": 18408, "loss": 0.1869, "lr": 0.005379527438842851, "epoch": 2.7436440677966103, "percentage": 91.45, "elapsed_time": "8:00:15", "remaining_time": "0:44:52", "throughput": 3169.92, "total_tokens": 91341936}
3377
+ {"current_steps": 16840, "total_steps": 18408, "loss": 0.182, "lr": 0.0053456084342466455, "epoch": 2.744458930899609, "percentage": 91.48, "elapsed_time": "8:00:23", "remaining_time": "0:44:43", "throughput": 3169.95, "total_tokens": 91368688}
3378
+ {"current_steps": 16845, "total_steps": 18408, "loss": 0.193, "lr": 0.005311794761118971, "epoch": 2.7452737940026077, "percentage": 91.51, "elapsed_time": "8:00:31", "remaining_time": "0:44:35", "throughput": 3169.99, "total_tokens": 91396208}
3379
+ {"current_steps": 16850, "total_steps": 18408, "loss": 0.1952, "lr": 0.005278086444081648, "epoch": 2.7460886571056062, "percentage": 91.54, "elapsed_time": "8:00:39", "remaining_time": "0:44:26", "throughput": 3170.02, "total_tokens": 91422896}
3380
+ {"current_steps": 16855, "total_steps": 18408, "loss": 0.1866, "lr": 0.005244483507679659, "epoch": 2.7469035202086047, "percentage": 91.56, "elapsed_time": "8:00:48", "remaining_time": "0:44:18", "throughput": 3170.04, "total_tokens": 91449520}
3381
+ {"current_steps": 16860, "total_steps": 18408, "loss": 0.1773, "lr": 0.005210985976381316, "epoch": 2.7477183833116037, "percentage": 91.59, "elapsed_time": "8:00:56", "remaining_time": "0:44:09", "throughput": 3170.11, "total_tokens": 91477872}
3382
+ {"current_steps": 16865, "total_steps": 18408, "loss": 0.1923, "lr": 0.005177593874578157, "epoch": 2.7485332464146026, "percentage": 91.62, "elapsed_time": "8:01:04", "remaining_time": "0:44:00", "throughput": 3170.13, "total_tokens": 91504304}
3383
+ {"current_steps": 16870, "total_steps": 18408, "loss": 0.1946, "lr": 0.005144307226584999, "epoch": 2.749348109517601, "percentage": 91.64, "elapsed_time": "8:01:12", "remaining_time": "0:43:52", "throughput": 3170.17, "total_tokens": 91531632}
3384
+ {"current_steps": 16875, "total_steps": 18408, "loss": 0.1972, "lr": 0.00511112605663977, "epoch": 2.7501629726205996, "percentage": 91.67, "elapsed_time": "8:01:21", "remaining_time": "0:43:43", "throughput": 3170.2, "total_tokens": 91558512}
3385
+ {"current_steps": 16880, "total_steps": 18408, "loss": 0.1841, "lr": 0.005078050388903676, "epoch": 2.7509778357235986, "percentage": 91.7, "elapsed_time": "8:01:29", "remaining_time": "0:43:35", "throughput": 3170.23, "total_tokens": 91585584}
3386
+ {"current_steps": 16885, "total_steps": 18408, "loss": 0.1758, "lr": 0.005045080247461103, "epoch": 2.751792698826597, "percentage": 91.73, "elapsed_time": "8:01:37", "remaining_time": "0:43:26", "throughput": 3170.25, "total_tokens": 91611888}
3387
+ {"current_steps": 16890, "total_steps": 18408, "loss": 0.1853, "lr": 0.005012215656319546, "epoch": 2.752607561929596, "percentage": 91.75, "elapsed_time": "8:01:45", "remaining_time": "0:43:17", "throughput": 3170.26, "total_tokens": 91637872}
3388
+ {"current_steps": 16895, "total_steps": 18408, "loss": 0.1869, "lr": 0.004979456639409679, "epoch": 2.7534224250325945, "percentage": 91.78, "elapsed_time": "8:01:53", "remaining_time": "0:43:09", "throughput": 3170.27, "total_tokens": 91663792}
3389
+ {"current_steps": 16900, "total_steps": 18408, "loss": 0.1933, "lr": 0.004946803220585272, "epoch": 2.7542372881355934, "percentage": 91.81, "elapsed_time": "8:02:01", "remaining_time": "0:43:00", "throughput": 3170.31, "total_tokens": 91691120}
3390
+ {"current_steps": 16905, "total_steps": 18408, "loss": 0.194, "lr": 0.00491425542362327, "epoch": 2.755052151238592, "percentage": 91.84, "elapsed_time": "8:02:10", "remaining_time": "0:42:52", "throughput": 3170.36, "total_tokens": 91719216}
3391
+ {"current_steps": 16910, "total_steps": 18408, "loss": 0.1731, "lr": 0.004881813272223617, "epoch": 2.7558670143415904, "percentage": 91.86, "elapsed_time": "8:02:18", "remaining_time": "0:42:43", "throughput": 3170.4, "total_tokens": 91746288}
3392
+ {"current_steps": 16915, "total_steps": 18408, "loss": 0.1963, "lr": 0.004849476790009383, "epoch": 2.7566818774445894, "percentage": 91.89, "elapsed_time": "8:02:26", "remaining_time": "0:42:34", "throughput": 3170.45, "total_tokens": 91774064}
3393
+ {"current_steps": 16920, "total_steps": 18408, "loss": 0.188, "lr": 0.004817246000526681, "epoch": 2.757496740547588, "percentage": 91.92, "elapsed_time": "8:02:34", "remaining_time": "0:42:26", "throughput": 3170.49, "total_tokens": 91801200}
3394
+ {"current_steps": 16925, "total_steps": 18408, "loss": 0.1872, "lr": 0.00478512092724469, "epoch": 2.758311603650587, "percentage": 91.94, "elapsed_time": "8:02:43", "remaining_time": "0:42:17", "throughput": 3170.57, "total_tokens": 91830640}
3395
+ {"current_steps": 16930, "total_steps": 18408, "loss": 0.1968, "lr": 0.004753101593555548, "epoch": 2.7591264667535853, "percentage": 91.97, "elapsed_time": "8:02:52", "remaining_time": "0:42:09", "throughput": 3170.7, "total_tokens": 91862192}
3396
+ {"current_steps": 16935, "total_steps": 18408, "loss": 0.1891, "lr": 0.004721188022774469, "epoch": 2.7599413298565842, "percentage": 92.0, "elapsed_time": "8:03:00", "remaining_time": "0:42:00", "throughput": 3170.71, "total_tokens": 91888432}
3397
+ {"current_steps": 16940, "total_steps": 18408, "loss": 0.2094, "lr": 0.004689380238139617, "epoch": 2.7607561929595827, "percentage": 92.03, "elapsed_time": "8:03:08", "remaining_time": "0:41:52", "throughput": 3170.74, "total_tokens": 91915120}
3398
+ {"current_steps": 16945, "total_steps": 18408, "loss": 0.1772, "lr": 0.004657678262812115, "epoch": 2.7615710560625812, "percentage": 92.05, "elapsed_time": "8:03:17", "remaining_time": "0:41:43", "throughput": 3170.83, "total_tokens": 91944624}
3399
+ {"current_steps": 16950, "total_steps": 18408, "loss": 0.2103, "lr": 0.004626082119876096, "epoch": 2.76238591916558, "percentage": 92.08, "elapsed_time": "8:03:25", "remaining_time": "0:41:34", "throughput": 3170.83, "total_tokens": 91970224}
3400
+ {"current_steps": 16955, "total_steps": 18408, "loss": 0.1637, "lr": 0.004594591832338524, "epoch": 2.763200782268579, "percentage": 92.11, "elapsed_time": "8:03:33", "remaining_time": "0:41:26", "throughput": 3170.87, "total_tokens": 91997424}
3401
+ {"current_steps": 16960, "total_steps": 18408, "loss": 0.1722, "lr": 0.004563207423129423, "epoch": 2.7640156453715776, "percentage": 92.13, "elapsed_time": "8:03:41", "remaining_time": "0:41:17", "throughput": 3170.88, "total_tokens": 92023536}
3402
+ {"current_steps": 16965, "total_steps": 18408, "loss": 0.1795, "lr": 0.004531928915101613, "epoch": 2.764830508474576, "percentage": 92.16, "elapsed_time": "8:03:49", "remaining_time": "0:41:09", "throughput": 3170.94, "total_tokens": 92051888}
3403
+ {"current_steps": 16970, "total_steps": 18408, "loss": 0.1991, "lr": 0.0045007563310308415, "epoch": 2.765645371577575, "percentage": 92.19, "elapsed_time": "8:03:57", "remaining_time": "0:41:00", "throughput": 3170.97, "total_tokens": 92078512}
3404
+ {"current_steps": 16975, "total_steps": 18408, "loss": 0.2001, "lr": 0.004469689693615703, "epoch": 2.7664602346805736, "percentage": 92.22, "elapsed_time": "8:04:06", "remaining_time": "0:40:52", "throughput": 3171.01, "total_tokens": 92106224}
3405
+ {"current_steps": 16980, "total_steps": 18408, "loss": 0.1685, "lr": 0.004438729025477717, "epoch": 2.7672750977835725, "percentage": 92.24, "elapsed_time": "8:04:14", "remaining_time": "0:40:43", "throughput": 3171.1, "total_tokens": 92136112}
3406
+ {"current_steps": 16985, "total_steps": 18408, "loss": 0.2027, "lr": 0.004407874349161134, "epoch": 2.768089960886571, "percentage": 92.27, "elapsed_time": "8:04:23", "remaining_time": "0:40:34", "throughput": 3171.14, "total_tokens": 92163440}
3407
+ {"current_steps": 16990, "total_steps": 18408, "loss": 0.1961, "lr": 0.004377125687133099, "epoch": 2.76890482398957, "percentage": 92.3, "elapsed_time": "8:04:31", "remaining_time": "0:40:26", "throughput": 3171.14, "total_tokens": 92189168}
3408
+ {"current_steps": 16995, "total_steps": 18408, "loss": 0.1895, "lr": 0.004346483061783535, "epoch": 2.7697196870925684, "percentage": 92.32, "elapsed_time": "8:04:39", "remaining_time": "0:40:17", "throughput": 3171.16, "total_tokens": 92215280}
3409
+ {"current_steps": 17000, "total_steps": 18408, "loss": 0.1754, "lr": 0.004315946495425177, "epoch": 2.770534550195567, "percentage": 92.35, "elapsed_time": "8:04:47", "remaining_time": "0:40:09", "throughput": 3171.18, "total_tokens": 92241776}
3410
+ {"current_steps": 17005, "total_steps": 18408, "loss": 0.1945, "lr": 0.004285516010293522, "epoch": 2.771349413298566, "percentage": 92.38, "elapsed_time": "8:04:55", "remaining_time": "0:40:00", "throughput": 3171.21, "total_tokens": 92268528}
3411
+ {"current_steps": 17010, "total_steps": 18408, "loss": 0.1889, "lr": 0.004255191628546778, "epoch": 2.7721642764015644, "percentage": 92.41, "elapsed_time": "8:05:03", "remaining_time": "0:39:51", "throughput": 3171.24, "total_tokens": 92295472}
3412
+ {"current_steps": 17015, "total_steps": 18408, "loss": 0.1679, "lr": 0.00422497337226595, "epoch": 2.7729791395045633, "percentage": 92.43, "elapsed_time": "8:05:12", "remaining_time": "0:39:43", "throughput": 3171.26, "total_tokens": 92322160}
3413
+ {"current_steps": 17020, "total_steps": 18408, "loss": 0.1998, "lr": 0.004194861263454769, "epoch": 2.773794002607562, "percentage": 92.46, "elapsed_time": "8:05:20", "remaining_time": "0:39:34", "throughput": 3171.32, "total_tokens": 92350832}
3414
+ {"current_steps": 17025, "total_steps": 18408, "loss": 0.1928, "lr": 0.004164855324039645, "epoch": 2.7746088657105608, "percentage": 92.49, "elapsed_time": "8:05:28", "remaining_time": "0:39:26", "throughput": 3171.34, "total_tokens": 92377520}
3415
+ {"current_steps": 17030, "total_steps": 18408, "loss": 0.1843, "lr": 0.00413495557586967, "epoch": 2.7754237288135593, "percentage": 92.51, "elapsed_time": "8:05:37", "remaining_time": "0:39:17", "throughput": 3171.38, "total_tokens": 92404720}
3416
+ {"current_steps": 17035, "total_steps": 18408, "loss": 0.1742, "lr": 0.004105162040716625, "epoch": 2.7762385919165578, "percentage": 92.54, "elapsed_time": "8:05:45", "remaining_time": "0:39:09", "throughput": 3171.39, "total_tokens": 92430896}
3417
+ {"current_steps": 17040, "total_steps": 18408, "loss": 0.1915, "lr": 0.004075474740274976, "epoch": 2.7770534550195567, "percentage": 92.57, "elapsed_time": "8:05:53", "remaining_time": "0:39:00", "throughput": 3171.41, "total_tokens": 92457392}
3418
+ {"current_steps": 17045, "total_steps": 18408, "loss": 0.1641, "lr": 0.004045893696161829, "epoch": 2.7778683181225556, "percentage": 92.6, "elapsed_time": "8:06:01", "remaining_time": "0:38:51", "throughput": 3171.43, "total_tokens": 92484656}
3419
+ {"current_steps": 17050, "total_steps": 18408, "loss": 0.1958, "lr": 0.0040164189299168535, "epoch": 2.778683181225554, "percentage": 92.62, "elapsed_time": "8:06:10", "remaining_time": "0:38:43", "throughput": 3171.47, "total_tokens": 92512368}
3420
+ {"current_steps": 17055, "total_steps": 18408, "loss": 0.1659, "lr": 0.0039870504630024175, "epoch": 2.7794980443285526, "percentage": 92.65, "elapsed_time": "8:06:18", "remaining_time": "0:38:34", "throughput": 3171.5, "total_tokens": 92539184}
3421
+ {"current_steps": 17060, "total_steps": 18408, "loss": 0.1992, "lr": 0.003957788316803428, "epoch": 2.7803129074315516, "percentage": 92.68, "elapsed_time": "8:06:26", "remaining_time": "0:38:26", "throughput": 3171.51, "total_tokens": 92565296}
3422
+ {"current_steps": 17065, "total_steps": 18408, "loss": 0.183, "lr": 0.0039286325126274115, "epoch": 2.78112777053455, "percentage": 92.7, "elapsed_time": "8:06:34", "remaining_time": "0:38:17", "throughput": 3171.55, "total_tokens": 92592752}
3423
+ {"current_steps": 17070, "total_steps": 18408, "loss": 0.1896, "lr": 0.003899583071704432, "epoch": 2.781942633637549, "percentage": 92.73, "elapsed_time": "8:06:42", "remaining_time": "0:38:09", "throughput": 3171.57, "total_tokens": 92619248}
3424
+ {"current_steps": 17075, "total_steps": 18408, "loss": 0.1811, "lr": 0.003870640015187121, "epoch": 2.7827574967405475, "percentage": 92.76, "elapsed_time": "8:06:51", "remaining_time": "0:38:00", "throughput": 3171.62, "total_tokens": 92647216}
3425
+ {"current_steps": 17080, "total_steps": 18408, "loss": 0.198, "lr": 0.0038418033641506697, "epoch": 2.7835723598435465, "percentage": 92.79, "elapsed_time": "8:06:59", "remaining_time": "0:37:51", "throughput": 3171.65, "total_tokens": 92673776}
3426
+ {"current_steps": 17085, "total_steps": 18408, "loss": 0.1878, "lr": 0.0038130731395926987, "epoch": 2.784387222946545, "percentage": 92.81, "elapsed_time": "8:07:07", "remaining_time": "0:37:43", "throughput": 3171.69, "total_tokens": 92701680}
3427
+ {"current_steps": 17090, "total_steps": 18408, "loss": 0.2003, "lr": 0.0037844493624334227, "epoch": 2.7852020860495434, "percentage": 92.84, "elapsed_time": "8:07:15", "remaining_time": "0:37:34", "throughput": 3171.71, "total_tokens": 92727984}
3428
+ {"current_steps": 17095, "total_steps": 18408, "loss": 0.1866, "lr": 0.00375593205351552, "epoch": 2.7860169491525424, "percentage": 92.87, "elapsed_time": "8:07:24", "remaining_time": "0:37:26", "throughput": 3171.75, "total_tokens": 92755376}
3429
+ {"current_steps": 17100, "total_steps": 18408, "loss": 0.1889, "lr": 0.0037275212336041474, "epoch": 2.7868318122555413, "percentage": 92.89, "elapsed_time": "8:07:32", "remaining_time": "0:37:17", "throughput": 3171.74, "total_tokens": 92780720}
3430
+ {"current_steps": 17105, "total_steps": 18408, "loss": 0.1793, "lr": 0.0036992169233868886, "epoch": 2.78764667535854, "percentage": 92.92, "elapsed_time": "8:07:40", "remaining_time": "0:37:08", "throughput": 3171.78, "total_tokens": 92808048}
3431
+ {"current_steps": 17110, "total_steps": 18408, "loss": 0.1782, "lr": 0.003671019143473808, "epoch": 2.7884615384615383, "percentage": 92.95, "elapsed_time": "8:07:49", "remaining_time": "0:37:00", "throughput": 3171.85, "total_tokens": 92837232}
3432
+ {"current_steps": 17115, "total_steps": 18408, "loss": 0.1739, "lr": 0.003642927914397398, "epoch": 2.7892764015645373, "percentage": 92.98, "elapsed_time": "8:07:57", "remaining_time": "0:36:51", "throughput": 3171.88, "total_tokens": 92864240}
3433
+ {"current_steps": 17120, "total_steps": 18408, "loss": 0.1745, "lr": 0.003614943256612546, "epoch": 2.7900912646675358, "percentage": 93.0, "elapsed_time": "8:08:05", "remaining_time": "0:36:43", "throughput": 3171.91, "total_tokens": 92891056}
3434
+ {"current_steps": 17125, "total_steps": 18408, "loss": 0.1979, "lr": 0.0035870651904965686, "epoch": 2.7909061277705347, "percentage": 93.03, "elapsed_time": "8:08:13", "remaining_time": "0:36:34", "throughput": 3171.94, "total_tokens": 92917744}
3435
+ {"current_steps": 17130, "total_steps": 18408, "loss": 0.1937, "lr": 0.0035592937363490783, "epoch": 2.791720990873533, "percentage": 93.06, "elapsed_time": "8:08:21", "remaining_time": "0:36:26", "throughput": 3171.96, "total_tokens": 92944304}
3436
+ {"current_steps": 17135, "total_steps": 18408, "loss": 0.2057, "lr": 0.0035316289143921984, "epoch": 2.792535853976532, "percentage": 93.08, "elapsed_time": "8:08:30", "remaining_time": "0:36:17", "throughput": 3172.01, "total_tokens": 92972016}
3437
+ {"current_steps": 17140, "total_steps": 18408, "loss": 0.1909, "lr": 0.003504070744770282, "epoch": 2.7933507170795306, "percentage": 93.11, "elapsed_time": "8:08:38", "remaining_time": "0:36:08", "throughput": 3172.05, "total_tokens": 92999472}
3438
+ {"current_steps": 17145, "total_steps": 18408, "loss": 0.179, "lr": 0.0034766192475500944, "epoch": 2.794165580182529, "percentage": 93.14, "elapsed_time": "8:08:46", "remaining_time": "0:36:00", "throughput": 3172.05, "total_tokens": 93025328}
3439
+ {"current_steps": 17150, "total_steps": 18408, "loss": 0.1858, "lr": 0.0034492744427206787, "epoch": 2.794980443285528, "percentage": 93.17, "elapsed_time": "8:08:55", "remaining_time": "0:35:51", "throughput": 3172.13, "total_tokens": 93054512}
3440
+ {"current_steps": 17155, "total_steps": 18408, "loss": 0.1814, "lr": 0.0034220363501934747, "epoch": 2.7957953063885266, "percentage": 93.19, "elapsed_time": "8:09:03", "remaining_time": "0:35:43", "throughput": 3172.17, "total_tokens": 93082032}
3441
+ {"current_steps": 17160, "total_steps": 18408, "loss": 0.1771, "lr": 0.0033949049898020834, "epoch": 2.7966101694915255, "percentage": 93.22, "elapsed_time": "8:09:11", "remaining_time": "0:35:34", "throughput": 3172.2, "total_tokens": 93109040}
3442
+ {"current_steps": 17165, "total_steps": 18408, "loss": 0.1771, "lr": 0.003367880381302518, "epoch": 2.797425032594524, "percentage": 93.25, "elapsed_time": "8:09:19", "remaining_time": "0:35:26", "throughput": 3172.24, "total_tokens": 93136240}
3443
+ {"current_steps": 17170, "total_steps": 18408, "loss": 0.1868, "lr": 0.003340962544372972, "epoch": 2.798239895697523, "percentage": 93.27, "elapsed_time": "8:09:27", "remaining_time": "0:35:17", "throughput": 3172.24, "total_tokens": 93161968}
3444
+ {"current_steps": 17175, "total_steps": 18408, "loss": 0.1931, "lr": 0.003314151498613932, "epoch": 2.7990547588005215, "percentage": 93.3, "elapsed_time": "8:09:36", "remaining_time": "0:35:08", "throughput": 3172.27, "total_tokens": 93189104}
3445
+ {"current_steps": 17180, "total_steps": 18408, "loss": 0.1802, "lr": 0.0032874472635481323, "epoch": 2.79986962190352, "percentage": 93.33, "elapsed_time": "8:09:44", "remaining_time": "0:35:00", "throughput": 3172.27, "total_tokens": 93214768}
3446
+ {"current_steps": 17185, "total_steps": 18408, "loss": 0.1778, "lr": 0.0032608498586204683, "epoch": 2.800684485006519, "percentage": 93.36, "elapsed_time": "8:09:52", "remaining_time": "0:34:51", "throughput": 3172.27, "total_tokens": 93240816}
3447
+ {"current_steps": 17190, "total_steps": 18408, "loss": 0.1613, "lr": 0.003234359303198164, "epoch": 2.801499348109518, "percentage": 93.38, "elapsed_time": "8:10:00", "remaining_time": "0:34:43", "throughput": 3172.29, "total_tokens": 93267376}
3448
+ {"current_steps": 17195, "total_steps": 18408, "loss": 0.2004, "lr": 0.0032079756165705074, "epoch": 2.8023142112125163, "percentage": 93.41, "elapsed_time": "8:10:08", "remaining_time": "0:34:34", "throughput": 3172.31, "total_tokens": 93293744}
3449
+ {"current_steps": 17200, "total_steps": 18408, "loss": 0.156, "lr": 0.0031816988179490632, "epoch": 2.803129074315515, "percentage": 93.44, "elapsed_time": "8:10:16", "remaining_time": "0:34:26", "throughput": 3172.29, "total_tokens": 93318768}
3450
+ {"current_steps": 17205, "total_steps": 18408, "loss": 0.1974, "lr": 0.0031555289264675102, "epoch": 2.8039439374185138, "percentage": 93.46, "elapsed_time": "8:10:25", "remaining_time": "0:34:17", "throughput": 3172.32, "total_tokens": 93345840}
3451
+ {"current_steps": 17210, "total_steps": 18408, "loss": 0.1829, "lr": 0.003129465961181721, "epoch": 2.8047588005215123, "percentage": 93.49, "elapsed_time": "8:10:33", "remaining_time": "0:34:08", "throughput": 3172.38, "total_tokens": 93374128}
3452
+ {"current_steps": 17215, "total_steps": 18408, "loss": 0.1816, "lr": 0.0031035099410696818, "epoch": 2.805573663624511, "percentage": 93.52, "elapsed_time": "8:10:41", "remaining_time": "0:34:00", "throughput": 3172.39, "total_tokens": 93400432}
3453
+ {"current_steps": 17220, "total_steps": 18408, "loss": 0.1849, "lr": 0.0030776608850315245, "epoch": 2.8063885267275097, "percentage": 93.55, "elapsed_time": "8:10:50", "remaining_time": "0:33:51", "throughput": 3172.43, "total_tokens": 93428144}
3454
+ {"current_steps": 17225, "total_steps": 18408, "loss": 0.2045, "lr": 0.0030519188118894756, "epoch": 2.8072033898305087, "percentage": 93.57, "elapsed_time": "8:10:58", "remaining_time": "0:33:43", "throughput": 3172.43, "total_tokens": 93453808}
3455
+ {"current_steps": 17230, "total_steps": 18408, "loss": 0.1795, "lr": 0.003026283740387875, "epoch": 2.808018252933507, "percentage": 93.6, "elapsed_time": "8:11:06", "remaining_time": "0:33:34", "throughput": 3172.46, "total_tokens": 93480816}
3456
+ {"current_steps": 17235, "total_steps": 18408, "loss": 0.1799, "lr": 0.003000755689193141, "epoch": 2.8088331160365057, "percentage": 93.63, "elapsed_time": "8:11:14", "remaining_time": "0:33:26", "throughput": 3172.47, "total_tokens": 93507120}
3457
+ {"current_steps": 17240, "total_steps": 18408, "loss": 0.1865, "lr": 0.0029753346768937703, "epoch": 2.8096479791395046, "percentage": 93.65, "elapsed_time": "8:11:22", "remaining_time": "0:33:17", "throughput": 3172.5, "total_tokens": 93534576}
3458
+ {"current_steps": 17245, "total_steps": 18408, "loss": 0.1888, "lr": 0.0029500207220002905, "epoch": 2.8104628422425035, "percentage": 93.68, "elapsed_time": "8:11:31", "remaining_time": "0:33:08", "throughput": 3172.53, "total_tokens": 93561456}
3459
+ {"current_steps": 17250, "total_steps": 18408, "loss": 0.1917, "lr": 0.0029248138429453395, "epoch": 2.811277705345502, "percentage": 93.71, "elapsed_time": "8:11:39", "remaining_time": "0:33:00", "throughput": 3172.55, "total_tokens": 93588272}
3460
+ {"current_steps": 17255, "total_steps": 18408, "loss": 0.1892, "lr": 0.0028997140580835187, "epoch": 2.8120925684485005, "percentage": 93.74, "elapsed_time": "8:11:47", "remaining_time": "0:32:51", "throughput": 3172.55, "total_tokens": 93613744}
3461
+ {"current_steps": 17260, "total_steps": 18408, "loss": 0.1867, "lr": 0.0028747213856914755, "epoch": 2.8129074315514995, "percentage": 93.76, "elapsed_time": "8:11:55", "remaining_time": "0:32:43", "throughput": 3172.55, "total_tokens": 93639728}
3462
+ {"current_steps": 17265, "total_steps": 18408, "loss": 0.1962, "lr": 0.002849835843967885, "epoch": 2.813722294654498, "percentage": 93.79, "elapsed_time": "8:12:03", "remaining_time": "0:32:34", "throughput": 3172.56, "total_tokens": 93666032}
3463
+ {"current_steps": 17270, "total_steps": 18408, "loss": 0.1795, "lr": 0.0028250574510333523, "epoch": 2.8145371577574965, "percentage": 93.82, "elapsed_time": "8:12:11", "remaining_time": "0:32:25", "throughput": 3172.56, "total_tokens": 93691888}
3464
+ {"current_steps": 17275, "total_steps": 18408, "loss": 0.1913, "lr": 0.002800386224930529, "epoch": 2.8153520208604954, "percentage": 93.85, "elapsed_time": "8:12:20", "remaining_time": "0:32:17", "throughput": 3172.57, "total_tokens": 93718192}
3465
+ {"current_steps": 17280, "total_steps": 18408, "loss": 0.1599, "lr": 0.002775822183623977, "epoch": 2.8161668839634943, "percentage": 93.87, "elapsed_time": "8:12:28", "remaining_time": "0:32:08", "throughput": 3172.59, "total_tokens": 93745456}
3466
+ {"current_steps": 17285, "total_steps": 18408, "loss": 0.1815, "lr": 0.0027513653450002727, "epoch": 2.816981747066493, "percentage": 93.9, "elapsed_time": "8:12:36", "remaining_time": "0:32:00", "throughput": 3172.63, "total_tokens": 93773360}
3467
+ {"current_steps": 17290, "total_steps": 18408, "loss": 0.1773, "lr": 0.0027270157268678707, "epoch": 2.8177966101694913, "percentage": 93.93, "elapsed_time": "8:12:45", "remaining_time": "0:31:51", "throughput": 3172.69, "total_tokens": 93801968}
3468
+ {"current_steps": 17295, "total_steps": 18408, "loss": 0.1879, "lr": 0.0027027733469571712, "epoch": 2.8186114732724903, "percentage": 93.95, "elapsed_time": "8:12:53", "remaining_time": "0:31:43", "throughput": 3172.75, "total_tokens": 93830128}
3469
+ {"current_steps": 17300, "total_steps": 18408, "loss": 0.1624, "lr": 0.0026786382229205042, "epoch": 2.819426336375489, "percentage": 93.98, "elapsed_time": "8:13:02", "remaining_time": "0:31:34", "throughput": 3172.77, "total_tokens": 93857072}
3470
+ {"current_steps": 17305, "total_steps": 18408, "loss": 0.1774, "lr": 0.0026546103723320944, "epoch": 2.8202411994784877, "percentage": 94.01, "elapsed_time": "8:13:10", "remaining_time": "0:31:26", "throughput": 3172.75, "total_tokens": 93881840}
3471
+ {"current_steps": 17310, "total_steps": 18408, "loss": 0.1881, "lr": 0.002630689812688064, "epoch": 2.8210560625814862, "percentage": 94.04, "elapsed_time": "8:13:18", "remaining_time": "0:31:17", "throughput": 3172.79, "total_tokens": 93909232}
3472
+ {"current_steps": 17315, "total_steps": 18408, "loss": 0.1671, "lr": 0.002606876561406346, "epoch": 2.821870925684485, "percentage": 94.06, "elapsed_time": "8:13:26", "remaining_time": "0:31:08", "throughput": 3172.86, "total_tokens": 93938608}
3473
+ {"current_steps": 17320, "total_steps": 18408, "loss": 0.1819, "lr": 0.00258317063582682, "epoch": 2.8226857887874837, "percentage": 94.09, "elapsed_time": "8:13:35", "remaining_time": "0:31:00", "throughput": 3172.88, "total_tokens": 93964976}
3474
+ {"current_steps": 17325, "total_steps": 18408, "loss": 0.1906, "lr": 0.002559572053211162, "epoch": 2.823500651890482, "percentage": 94.12, "elapsed_time": "8:13:43", "remaining_time": "0:30:51", "throughput": 3172.93, "total_tokens": 93992560}
3475
+ {"current_steps": 17330, "total_steps": 18408, "loss": 0.1818, "lr": 0.0025360808307429428, "epoch": 2.824315514993481, "percentage": 94.14, "elapsed_time": "8:13:51", "remaining_time": "0:30:43", "throughput": 3172.94, "total_tokens": 94018736}
3476
+ {"current_steps": 17335, "total_steps": 18408, "loss": 0.1972, "lr": 0.0025126969855274795, "epoch": 2.82513037809648, "percentage": 94.17, "elapsed_time": "8:13:59", "remaining_time": "0:30:34", "throughput": 3172.99, "total_tokens": 94046960}
3477
+ {"current_steps": 17340, "total_steps": 18408, "loss": 0.1959, "lr": 0.002489420534591952, "epoch": 2.8259452411994785, "percentage": 94.2, "elapsed_time": "8:14:08", "remaining_time": "0:30:26", "throughput": 3173.03, "total_tokens": 94074544}
3478
+ {"current_steps": 17345, "total_steps": 18408, "loss": 0.1997, "lr": 0.0024662514948853354, "epoch": 2.826760104302477, "percentage": 94.23, "elapsed_time": "8:14:16", "remaining_time": "0:30:17", "throughput": 3173.06, "total_tokens": 94101488}
3479
+ {"current_steps": 17350, "total_steps": 18408, "loss": 0.1774, "lr": 0.002443189883278385, "epoch": 2.827574967405476, "percentage": 94.25, "elapsed_time": "8:14:24", "remaining_time": "0:30:08", "throughput": 3173.07, "total_tokens": 94127792}
3480
+ {"current_steps": 17355, "total_steps": 18408, "loss": 0.1851, "lr": 0.0024202357165636177, "epoch": 2.8283898305084745, "percentage": 94.28, "elapsed_time": "8:14:32", "remaining_time": "0:30:00", "throughput": 3173.07, "total_tokens": 94153520}
3481
+ {"current_steps": 17360, "total_steps": 18408, "loss": 0.1804, "lr": 0.0023973890114553473, "epoch": 2.8292046936114734, "percentage": 94.31, "elapsed_time": "8:14:41", "remaining_time": "0:29:51", "throughput": 3173.12, "total_tokens": 94181552}
3482
+ {"current_steps": 17365, "total_steps": 18408, "loss": 0.1676, "lr": 0.002374649784589633, "epoch": 2.830019556714472, "percentage": 94.33, "elapsed_time": "8:14:49", "remaining_time": "0:29:43", "throughput": 3173.16, "total_tokens": 94208880}
3483
+ {"current_steps": 17370, "total_steps": 18408, "loss": 0.1777, "lr": 0.002352018052524213, "epoch": 2.830834419817471, "percentage": 94.36, "elapsed_time": "8:14:57", "remaining_time": "0:29:34", "throughput": 3173.17, "total_tokens": 94235056}
3484
+ {"current_steps": 17375, "total_steps": 18408, "loss": 0.192, "lr": 0.0023294938317386213, "epoch": 2.8316492829204694, "percentage": 94.39, "elapsed_time": "8:15:05", "remaining_time": "0:29:26", "throughput": 3173.2, "total_tokens": 94262128}
3485
+ {"current_steps": 17380, "total_steps": 18408, "loss": 0.1792, "lr": 0.0023070771386340893, "epoch": 2.832464146023468, "percentage": 94.42, "elapsed_time": "8:15:14", "remaining_time": "0:29:17", "throughput": 3173.26, "total_tokens": 94290608}
3486
+ {"current_steps": 17385, "total_steps": 18408, "loss": 0.1703, "lr": 0.0022847679895335424, "epoch": 2.833279009126467, "percentage": 94.44, "elapsed_time": "8:15:22", "remaining_time": "0:29:08", "throughput": 3173.3, "total_tokens": 94318448}
3487
+ {"current_steps": 17390, "total_steps": 18408, "loss": 0.1881, "lr": 0.0022625664006816035, "epoch": 2.8340938722294653, "percentage": 94.47, "elapsed_time": "8:15:30", "remaining_time": "0:29:00", "throughput": 3173.33, "total_tokens": 94345456}
3488
+ {"current_steps": 17395, "total_steps": 18408, "loss": 0.2032, "lr": 0.002240472388244541, "epoch": 2.8349087353324642, "percentage": 94.5, "elapsed_time": "8:15:39", "remaining_time": "0:28:51", "throughput": 3173.4, "total_tokens": 94374192}
3489
+ {"current_steps": 17400, "total_steps": 18408, "loss": 0.1843, "lr": 0.0022184859683103517, "epoch": 2.8357235984354627, "percentage": 94.52, "elapsed_time": "8:15:47", "remaining_time": "0:28:43", "throughput": 3173.44, "total_tokens": 94401648}
3490
+ {"current_steps": 17405, "total_steps": 18408, "loss": 0.1767, "lr": 0.0021966071568886468, "epoch": 2.8365384615384617, "percentage": 94.55, "elapsed_time": "8:15:55", "remaining_time": "0:28:34", "throughput": 3173.48, "total_tokens": 94429808}
3491
+ {"current_steps": 17410, "total_steps": 18408, "loss": 0.1894, "lr": 0.002174835969910699, "epoch": 2.83735332464146, "percentage": 94.58, "elapsed_time": "8:16:04", "remaining_time": "0:28:26", "throughput": 3173.5, "total_tokens": 94456176}
3492
+ {"current_steps": 17415, "total_steps": 18408, "loss": 0.18, "lr": 0.002153172423229377, "epoch": 2.8381681877444587, "percentage": 94.61, "elapsed_time": "8:16:12", "remaining_time": "0:28:17", "throughput": 3173.53, "total_tokens": 94482992}
3493
+ {"current_steps": 17420, "total_steps": 18408, "loss": 0.194, "lr": 0.0021316165326192293, "epoch": 2.8389830508474576, "percentage": 94.63, "elapsed_time": "8:16:20", "remaining_time": "0:28:09", "throughput": 3173.58, "total_tokens": 94511088}
3494
+ {"current_steps": 17425, "total_steps": 18408, "loss": 0.1841, "lr": 0.0021101683137763672, "epoch": 2.8397979139504566, "percentage": 94.66, "elapsed_time": "8:16:29", "remaining_time": "0:28:00", "throughput": 3173.65, "total_tokens": 94540208}
3495
+ {"current_steps": 17430, "total_steps": 18408, "loss": 0.1832, "lr": 0.002088827782318531, "epoch": 2.840612777053455, "percentage": 94.69, "elapsed_time": "8:16:37", "remaining_time": "0:27:51", "throughput": 3173.69, "total_tokens": 94567600}
3496
+ {"current_steps": 17435, "total_steps": 18408, "loss": 0.1751, "lr": 0.00206759495378499, "epoch": 2.8414276401564535, "percentage": 94.71, "elapsed_time": "8:16:45", "remaining_time": "0:27:43", "throughput": 3173.73, "total_tokens": 94595184}
3497
+ {"current_steps": 17440, "total_steps": 18408, "loss": 0.1883, "lr": 0.0020464698436366943, "epoch": 2.8422425032594525, "percentage": 94.74, "elapsed_time": "8:16:53", "remaining_time": "0:27:34", "throughput": 3173.76, "total_tokens": 94622256}
3498
+ {"current_steps": 17445, "total_steps": 18408, "loss": 0.1805, "lr": 0.0020254524672560226, "epoch": 2.843057366362451, "percentage": 94.77, "elapsed_time": "8:17:02", "remaining_time": "0:27:26", "throughput": 3173.79, "total_tokens": 94649392}
3499
+ {"current_steps": 17450, "total_steps": 18408, "loss": 0.1748, "lr": 0.0020045428399470323, "epoch": 2.84387222946545, "percentage": 94.8, "elapsed_time": "8:17:10", "remaining_time": "0:27:17", "throughput": 3173.87, "total_tokens": 94678896}
3500
+ {"current_steps": 17455, "total_steps": 18408, "loss": 0.1863, "lr": 0.0019837409769352275, "epoch": 2.8446870925684484, "percentage": 94.82, "elapsed_time": "8:17:19", "remaining_time": "0:27:09", "throughput": 3173.9, "total_tokens": 94706224}
3501
+ {"current_steps": 17460, "total_steps": 18408, "loss": 0.1925, "lr": 0.0019630468933677257, "epoch": 2.8455019556714474, "percentage": 94.85, "elapsed_time": "8:17:27", "remaining_time": "0:27:00", "throughput": 3173.92, "total_tokens": 94732592}
3502
+ {"current_steps": 17465, "total_steps": 18408, "loss": 0.202, "lr": 0.0019424606043130731, "epoch": 2.846316818774446, "percentage": 94.88, "elapsed_time": "8:17:35", "remaining_time": "0:26:52", "throughput": 3173.94, "total_tokens": 94759472}
3503
+ {"current_steps": 17470, "total_steps": 18408, "loss": 0.1787, "lr": 0.0019219821247613955, "epoch": 2.8471316818774444, "percentage": 94.9, "elapsed_time": "8:17:43", "remaining_time": "0:26:43", "throughput": 3173.96, "total_tokens": 94785776}
3504
+ {"current_steps": 17475, "total_steps": 18408, "loss": 0.1877, "lr": 0.0019016114696242979, "epoch": 2.8479465449804433, "percentage": 94.93, "elapsed_time": "8:17:51", "remaining_time": "0:26:34", "throughput": 3174.0, "total_tokens": 94813232}
3505
+ {"current_steps": 17480, "total_steps": 18408, "loss": 0.2125, "lr": 0.0018813486537348654, "epoch": 2.8487614080834422, "percentage": 94.96, "elapsed_time": "8:18:00", "remaining_time": "0:26:26", "throughput": 3174.01, "total_tokens": 94839664}
3506
+ {"current_steps": 17485, "total_steps": 18408, "loss": 0.1713, "lr": 0.0018611936918476457, "epoch": 2.8495762711864407, "percentage": 94.99, "elapsed_time": "8:18:08", "remaining_time": "0:26:17", "throughput": 3174.05, "total_tokens": 94867120}
3507
+ {"current_steps": 17490, "total_steps": 18408, "loss": 0.1825, "lr": 0.0018411465986386654, "epoch": 2.8503911342894392, "percentage": 95.01, "elapsed_time": "8:18:16", "remaining_time": "0:26:09", "throughput": 3174.07, "total_tokens": 94893680}
3508
+ {"current_steps": 17495, "total_steps": 18408, "loss": 0.182, "lr": 0.0018212073887054314, "epoch": 2.851205997392438, "percentage": 95.04, "elapsed_time": "8:18:24", "remaining_time": "0:26:00", "throughput": 3174.09, "total_tokens": 94920176}
3509
+ {"current_steps": 17500, "total_steps": 18408, "loss": 0.1825, "lr": 0.0018013760765668296, "epoch": 2.8520208604954367, "percentage": 95.07, "elapsed_time": "8:18:32", "remaining_time": "0:25:52", "throughput": 3174.09, "total_tokens": 94946096}
3510
+ {"current_steps": 17505, "total_steps": 18408, "loss": 0.1868, "lr": 0.001781652676663259, "epoch": 2.8528357235984356, "percentage": 95.09, "elapsed_time": "8:18:41", "remaining_time": "0:25:43", "throughput": 3174.16, "total_tokens": 94975280}
3511
+ {"current_steps": 17510, "total_steps": 18408, "loss": 0.191, "lr": 0.0017620372033564646, "epoch": 2.853650586701434, "percentage": 95.12, "elapsed_time": "8:18:49", "remaining_time": "0:25:34", "throughput": 3174.16, "total_tokens": 95001840}
3512
+ {"current_steps": 17515, "total_steps": 18408, "loss": 0.1996, "lr": 0.001742529670929671, "epoch": 2.854465449804433, "percentage": 95.15, "elapsed_time": "8:18:58", "remaining_time": "0:25:26", "throughput": 3174.2, "total_tokens": 95029360}
3513
+ {"current_steps": 17520, "total_steps": 18408, "loss": 0.1787, "lr": 0.0017231300935874494, "epoch": 2.8552803129074316, "percentage": 95.18, "elapsed_time": "8:19:06", "remaining_time": "0:25:17", "throughput": 3174.2, "total_tokens": 95055536}
3514
+ {"current_steps": 17525, "total_steps": 18408, "loss": 0.1886, "lr": 0.001703838485455783, "epoch": 2.85609517601043, "percentage": 95.2, "elapsed_time": "8:19:14", "remaining_time": "0:25:09", "throughput": 3174.23, "total_tokens": 95082096}
3515
+ {"current_steps": 17530, "total_steps": 18408, "loss": 0.1838, "lr": 0.0016846548605820688, "epoch": 2.856910039113429, "percentage": 95.23, "elapsed_time": "8:19:22", "remaining_time": "0:25:00", "throughput": 3174.26, "total_tokens": 95109168}
3516
+ {"current_steps": 17535, "total_steps": 18408, "loss": 0.207, "lr": 0.001665579232935016, "epoch": 2.8577249022164275, "percentage": 95.26, "elapsed_time": "8:19:30", "remaining_time": "0:24:52", "throughput": 3174.3, "total_tokens": 95136816}
3517
+ {"current_steps": 17540, "total_steps": 18408, "loss": 0.1864, "lr": 0.0016466116164047472, "epoch": 2.8585397653194264, "percentage": 95.28, "elapsed_time": "8:19:39", "remaining_time": "0:24:43", "throughput": 3174.35, "total_tokens": 95164656}
3518
+ {"current_steps": 17545, "total_steps": 18408, "loss": 0.1844, "lr": 0.0016277520248026978, "epoch": 2.859354628422425, "percentage": 95.31, "elapsed_time": "8:19:47", "remaining_time": "0:24:35", "throughput": 3174.43, "total_tokens": 95194032}
3519
+ {"current_steps": 17550, "total_steps": 18408, "loss": 0.1965, "lr": 0.0016090004718616656, "epoch": 2.860169491525424, "percentage": 95.34, "elapsed_time": "8:19:56", "remaining_time": "0:24:26", "throughput": 3174.47, "total_tokens": 95221360}
3520
+ {"current_steps": 17555, "total_steps": 18408, "loss": 0.1812, "lr": 0.0015903569712357624, "epoch": 2.8609843546284224, "percentage": 95.37, "elapsed_time": "8:20:04", "remaining_time": "0:24:17", "throughput": 3174.51, "total_tokens": 95249328}
3521
+ {"current_steps": 17560, "total_steps": 18408, "loss": 0.1845, "lr": 0.0015718215365004284, "epoch": 2.861799217731421, "percentage": 95.39, "elapsed_time": "8:20:12", "remaining_time": "0:24:09", "throughput": 3174.52, "total_tokens": 95275120}
3522
+ {"current_steps": 17565, "total_steps": 18408, "loss": 0.1861, "lr": 0.0015533941811524342, "epoch": 2.86261408083442, "percentage": 95.42, "elapsed_time": "8:20:20", "remaining_time": "0:24:00", "throughput": 3174.52, "total_tokens": 95300528}
3523
+ {"current_steps": 17570, "total_steps": 18408, "loss": 0.1724, "lr": 0.0015350749186098134, "epoch": 2.8634289439374188, "percentage": 95.45, "elapsed_time": "8:20:28", "remaining_time": "0:23:52", "throughput": 3174.54, "total_tokens": 95327344}
3524
+ {"current_steps": 17575, "total_steps": 18408, "loss": 0.1786, "lr": 0.0015168637622119286, "epoch": 2.8642438070404173, "percentage": 95.47, "elapsed_time": "8:20:36", "remaining_time": "0:23:43", "throughput": 3174.56, "total_tokens": 95353648}
3525
+ {"current_steps": 17580, "total_steps": 18408, "loss": 0.1852, "lr": 0.0014987607252194056, "epoch": 2.8650586701434158, "percentage": 95.5, "elapsed_time": "8:20:45", "remaining_time": "0:23:35", "throughput": 3174.58, "total_tokens": 95380464}
3526
+ {"current_steps": 17585, "total_steps": 18408, "loss": 0.1648, "lr": 0.0014807658208141172, "epoch": 2.8658735332464147, "percentage": 95.53, "elapsed_time": "8:20:53", "remaining_time": "0:23:26", "throughput": 3174.63, "total_tokens": 95408176}
3527
+ {"current_steps": 17590, "total_steps": 18408, "loss": 0.1895, "lr": 0.001462879062099248, "epoch": 2.866688396349413, "percentage": 95.56, "elapsed_time": "8:21:01", "remaining_time": "0:23:17", "throughput": 3174.67, "total_tokens": 95435504}
3528
+ {"current_steps": 17595, "total_steps": 18408, "loss": 0.1884, "lr": 0.0014451004620992137, "epoch": 2.867503259452412, "percentage": 95.58, "elapsed_time": "8:21:09", "remaining_time": "0:23:09", "throughput": 3174.71, "total_tokens": 95463024}
3529
+ {"current_steps": 17600, "total_steps": 18408, "loss": 0.1739, "lr": 0.001427430033759658, "epoch": 2.8683181225554106, "percentage": 95.61, "elapsed_time": "8:21:18", "remaining_time": "0:23:00", "throughput": 3174.74, "total_tokens": 95489840}
3530
+ {"current_steps": 17605, "total_steps": 18408, "loss": 0.1766, "lr": 0.001409867789947472, "epoch": 2.8691329856584096, "percentage": 95.64, "elapsed_time": "8:21:26", "remaining_time": "0:22:52", "throughput": 3174.8, "total_tokens": 95518384}
3531
+ {"current_steps": 17610, "total_steps": 18408, "loss": 0.172, "lr": 0.0013924137434507765, "epoch": 2.869947848761408, "percentage": 95.66, "elapsed_time": "8:21:34", "remaining_time": "0:22:43", "throughput": 3174.83, "total_tokens": 95545200}
3532
+ {"current_steps": 17615, "total_steps": 18408, "loss": 0.1778, "lr": 0.0013750679069789052, "epoch": 2.8707627118644066, "percentage": 95.69, "elapsed_time": "8:21:42", "remaining_time": "0:22:35", "throughput": 3174.85, "total_tokens": 95571632}
3533
+ {"current_steps": 17620, "total_steps": 18408, "loss": 0.1807, "lr": 0.0013578302931623709, "epoch": 2.8715775749674055, "percentage": 95.72, "elapsed_time": "8:21:50", "remaining_time": "0:22:26", "throughput": 3174.87, "total_tokens": 95598256}
3534
+ {"current_steps": 17625, "total_steps": 18408, "loss": 0.1763, "lr": 0.0013407009145529336, "epoch": 2.872392438070404, "percentage": 95.75, "elapsed_time": "8:21:59", "remaining_time": "0:22:18", "throughput": 3174.92, "total_tokens": 95625712}
3535
+ {"current_steps": 17630, "total_steps": 18408, "loss": 0.1841, "lr": 0.0013236797836234991, "epoch": 2.873207301173403, "percentage": 95.77, "elapsed_time": "8:22:07", "remaining_time": "0:22:09", "throughput": 3174.93, "total_tokens": 95651952}
3536
+ {"current_steps": 17635, "total_steps": 18408, "loss": 0.1795, "lr": 0.0013067669127681536, "epoch": 2.8740221642764014, "percentage": 95.8, "elapsed_time": "8:22:15", "remaining_time": "0:22:00", "throughput": 3174.98, "total_tokens": 95679408}
3537
+ {"current_steps": 17640, "total_steps": 18408, "loss": 0.1756, "lr": 0.0012899623143021953, "epoch": 2.8748370273794004, "percentage": 95.83, "elapsed_time": "8:22:23", "remaining_time": "0:21:52", "throughput": 3175.03, "total_tokens": 95707568}
3538
+ {"current_steps": 17645, "total_steps": 18408, "loss": 0.183, "lr": 0.0012732660004620366, "epoch": 2.875651890482399, "percentage": 95.86, "elapsed_time": "8:22:32", "remaining_time": "0:21:43", "throughput": 3175.09, "total_tokens": 95735536}
3539
+ {"current_steps": 17650, "total_steps": 18408, "loss": 0.1872, "lr": 0.0012566779834052354, "epoch": 2.8764667535853974, "percentage": 95.88, "elapsed_time": "8:22:40", "remaining_time": "0:21:35", "throughput": 3175.13, "total_tokens": 95763248}
3540
+ {"current_steps": 17655, "total_steps": 18408, "loss": 0.1845, "lr": 0.0012401982752105467, "epoch": 2.8772816166883963, "percentage": 95.91, "elapsed_time": "8:22:48", "remaining_time": "0:21:26", "throughput": 3175.16, "total_tokens": 95790064}
3541
+ {"current_steps": 17660, "total_steps": 18408, "loss": 0.1811, "lr": 0.0012238268878778046, "epoch": 2.8780964797913953, "percentage": 95.94, "elapsed_time": "8:22:56", "remaining_time": "0:21:18", "throughput": 3175.19, "total_tokens": 95817456}
3542
+ {"current_steps": 17665, "total_steps": 18408, "loss": 0.1921, "lr": 0.001207563833328007, "epoch": 2.8789113428943938, "percentage": 95.96, "elapsed_time": "8:23:05", "remaining_time": "0:21:09", "throughput": 3175.3, "total_tokens": 95848496}
3543
+ {"current_steps": 17670, "total_steps": 18408, "loss": 0.1844, "lr": 0.0011914091234032475, "epoch": 2.8797262059973923, "percentage": 95.99, "elapsed_time": "8:23:13", "remaining_time": "0:21:01", "throughput": 3175.35, "total_tokens": 95876400}
3544
+ {"current_steps": 17675, "total_steps": 18408, "loss": 0.1898, "lr": 0.0011753627698667334, "epoch": 2.880541069100391, "percentage": 96.02, "elapsed_time": "8:23:22", "remaining_time": "0:20:52", "throughput": 3175.44, "total_tokens": 95906480}
3545
+ {"current_steps": 17680, "total_steps": 18408, "loss": 0.1739, "lr": 0.0011594247844027516, "epoch": 2.8813559322033897, "percentage": 96.05, "elapsed_time": "8:23:30", "remaining_time": "0:20:43", "throughput": 3175.48, "total_tokens": 95934256}
3546
+ {"current_steps": 17685, "total_steps": 18408, "loss": 0.1927, "lr": 0.001143595178616702, "epoch": 2.8821707953063886, "percentage": 96.07, "elapsed_time": "8:23:38", "remaining_time": "0:20:35", "throughput": 3175.47, "total_tokens": 95959536}
3547
+ {"current_steps": 17690, "total_steps": 18408, "loss": 0.1703, "lr": 0.0011278739640350976, "epoch": 2.882985658409387, "percentage": 96.1, "elapsed_time": "8:23:47", "remaining_time": "0:20:26", "throughput": 3175.5, "total_tokens": 95986288}
3548
+ {"current_steps": 17695, "total_steps": 18408, "loss": 0.2101, "lr": 0.0011122611521054316, "epoch": 2.883800521512386, "percentage": 96.13, "elapsed_time": "8:23:55", "remaining_time": "0:20:18", "throughput": 3175.58, "total_tokens": 96015472}
3549
+ {"current_steps": 17700, "total_steps": 18408, "loss": 0.1817, "lr": 0.0010967567541963596, "epoch": 2.8846153846153846, "percentage": 96.15, "elapsed_time": "8:24:03", "remaining_time": "0:20:09", "throughput": 3175.59, "total_tokens": 96041200}
3550
+ {"current_steps": 17705, "total_steps": 18408, "loss": 0.1837, "lr": 0.0010813607815975512, "epoch": 2.885430247718383, "percentage": 96.18, "elapsed_time": "8:24:11", "remaining_time": "0:20:01", "throughput": 3175.63, "total_tokens": 96068400}
3551
+ {"current_steps": 17710, "total_steps": 18408, "loss": 0.182, "lr": 0.0010660732455197385, "epoch": 2.886245110821382, "percentage": 96.21, "elapsed_time": "8:24:20", "remaining_time": "0:19:52", "throughput": 3175.68, "total_tokens": 96096176}
3552
+ {"current_steps": 17715, "total_steps": 18408, "loss": 0.1885, "lr": 0.0010508941570946673, "epoch": 2.887059973924381, "percentage": 96.24, "elapsed_time": "8:24:28", "remaining_time": "0:19:44", "throughput": 3175.73, "total_tokens": 96124208}
3553
+ {"current_steps": 17720, "total_steps": 18408, "loss": 0.1813, "lr": 0.0010358235273751292, "epoch": 2.8878748370273795, "percentage": 96.26, "elapsed_time": "8:24:36", "remaining_time": "0:19:35", "throughput": 3175.81, "total_tokens": 96154032}
3554
+ {"current_steps": 17725, "total_steps": 18408, "loss": 0.1818, "lr": 0.0010208613673349798, "epoch": 2.888689700130378, "percentage": 96.29, "elapsed_time": "8:24:44", "remaining_time": "0:19:26", "throughput": 3175.8, "total_tokens": 96178736}
3555
+ {"current_steps": 17730, "total_steps": 18408, "loss": 0.1769, "lr": 0.001006007687869037, "epoch": 2.889504563233377, "percentage": 96.32, "elapsed_time": "8:24:53", "remaining_time": "0:19:18", "throughput": 3175.9, "total_tokens": 96209072}
3556
+ {"current_steps": 17735, "total_steps": 18408, "loss": 0.2035, "lr": 0.000991262499793133, "epoch": 2.8903194263363754, "percentage": 96.34, "elapsed_time": "8:25:01", "remaining_time": "0:19:09", "throughput": 3175.95, "total_tokens": 96236464}
3557
+ {"current_steps": 17740, "total_steps": 18408, "loss": 0.1683, "lr": 0.0009766258138441451, "epoch": 2.8911342894393743, "percentage": 96.37, "elapsed_time": "8:25:10", "remaining_time": "0:19:01", "throughput": 3176.01, "total_tokens": 96266032}
3558
+ {"current_steps": 17745, "total_steps": 18408, "loss": 0.1857, "lr": 0.0009620976406798986, "epoch": 2.891949152542373, "percentage": 96.4, "elapsed_time": "8:25:18", "remaining_time": "0:18:52", "throughput": 3176.04, "total_tokens": 96292848}
3559
+ {"current_steps": 17750, "total_steps": 18408, "loss": 0.182, "lr": 0.000947677990879231, "epoch": 2.8927640156453718, "percentage": 96.43, "elapsed_time": "8:25:26", "remaining_time": "0:18:44", "throughput": 3176.06, "total_tokens": 96319280}
3560
+ {"current_steps": 17755, "total_steps": 18408, "loss": 0.1882, "lr": 0.0009333668749419266, "epoch": 2.8935788787483703, "percentage": 96.45, "elapsed_time": "8:25:34", "remaining_time": "0:18:35", "throughput": 3176.09, "total_tokens": 96346416}
3561
+ {"current_steps": 17760, "total_steps": 18408, "loss": 0.1912, "lr": 0.0009191643032887831, "epoch": 2.8943937418513688, "percentage": 96.48, "elapsed_time": "8:25:43", "remaining_time": "0:18:27", "throughput": 3176.11, "total_tokens": 96372592}
3562
+ {"current_steps": 17765, "total_steps": 18408, "loss": 0.1953, "lr": 0.0009050702862615278, "epoch": 2.8952086049543677, "percentage": 96.51, "elapsed_time": "8:25:51", "remaining_time": "0:18:18", "throughput": 3176.12, "total_tokens": 96398896}
3563
+ {"current_steps": 17770, "total_steps": 18408, "loss": 0.1882, "lr": 0.0008910848341228683, "epoch": 2.896023468057366, "percentage": 96.53, "elapsed_time": "8:25:59", "remaining_time": "0:18:09", "throughput": 3176.16, "total_tokens": 96426096}
3564
+ {"current_steps": 17775, "total_steps": 18408, "loss": 0.1925, "lr": 0.0008772079570564084, "epoch": 2.896838331160365, "percentage": 96.56, "elapsed_time": "8:26:07", "remaining_time": "0:18:01", "throughput": 3176.19, "total_tokens": 96452912}
3565
+ {"current_steps": 17780, "total_steps": 18408, "loss": 0.1736, "lr": 0.0008634396651667652, "epoch": 2.8976531942633637, "percentage": 96.59, "elapsed_time": "8:26:15", "remaining_time": "0:17:52", "throughput": 3176.24, "total_tokens": 96481008}
3566
+ {"current_steps": 17785, "total_steps": 18408, "loss": 0.2002, "lr": 0.000849779968479436, "epoch": 2.8984680573663626, "percentage": 96.62, "elapsed_time": "8:26:23", "remaining_time": "0:17:44", "throughput": 3176.24, "total_tokens": 96506480}
3567
+ {"current_steps": 17790, "total_steps": 18408, "loss": 0.1898, "lr": 0.0008362288769408643, "epoch": 2.899282920469361, "percentage": 96.64, "elapsed_time": "8:26:32", "remaining_time": "0:17:35", "throughput": 3176.28, "total_tokens": 96533808}
3568
+ {"current_steps": 17795, "total_steps": 18408, "loss": 0.1901, "lr": 0.0008227864004183737, "epoch": 2.9000977835723596, "percentage": 96.67, "elapsed_time": "8:26:40", "remaining_time": "0:17:27", "throughput": 3176.3, "total_tokens": 96559920}
3569
+ {"current_steps": 17800, "total_steps": 18408, "loss": 0.1798, "lr": 0.0008094525487002845, "epoch": 2.9009126466753585, "percentage": 96.7, "elapsed_time": "8:26:48", "remaining_time": "0:17:18", "throughput": 3176.33, "total_tokens": 96586928}
3570
+ {"current_steps": 17805, "total_steps": 18408, "loss": 0.1856, "lr": 0.0007962273314957302, "epoch": 2.9017275097783575, "percentage": 96.72, "elapsed_time": "8:26:56", "remaining_time": "0:17:10", "throughput": 3176.37, "total_tokens": 96614576}
3571
+ {"current_steps": 17810, "total_steps": 18408, "loss": 0.1902, "lr": 0.0007831107584347907, "epoch": 2.902542372881356, "percentage": 96.75, "elapsed_time": "8:27:05", "remaining_time": "0:17:01", "throughput": 3176.42, "total_tokens": 96642544}
3572
+ {"current_steps": 17815, "total_steps": 18408, "loss": 0.1885, "lr": 0.0007701028390684261, "epoch": 2.9033572359843545, "percentage": 96.78, "elapsed_time": "8:27:13", "remaining_time": "0:16:53", "throughput": 3176.48, "total_tokens": 96671216}
3573
+ {"current_steps": 17820, "total_steps": 18408, "loss": 0.1906, "lr": 0.0007572035828684598, "epoch": 2.9041720990873534, "percentage": 96.81, "elapsed_time": "8:27:22", "remaining_time": "0:16:44", "throughput": 3176.59, "total_tokens": 96701936}
3574
+ {"current_steps": 17825, "total_steps": 18408, "loss": 0.1783, "lr": 0.0007444129992276449, "epoch": 2.904986962190352, "percentage": 96.83, "elapsed_time": "8:27:30", "remaining_time": "0:16:35", "throughput": 3176.61, "total_tokens": 96728304}
3575
+ {"current_steps": 17830, "total_steps": 18408, "loss": 0.1901, "lr": 0.0007317310974595147, "epoch": 2.905801825293351, "percentage": 96.86, "elapsed_time": "8:27:38", "remaining_time": "0:16:27", "throughput": 3176.62, "total_tokens": 96754416}
3576
+ {"current_steps": 17835, "total_steps": 18408, "loss": 0.1775, "lr": 0.0007191578867985493, "epoch": 2.9066166883963493, "percentage": 96.89, "elapsed_time": "8:27:46", "remaining_time": "0:16:18", "throughput": 3176.64, "total_tokens": 96780784}
3577
+ {"current_steps": 17840, "total_steps": 18408, "loss": 0.1844, "lr": 0.0007066933764000582, "epoch": 2.9074315514993483, "percentage": 96.91, "elapsed_time": "8:27:54", "remaining_time": "0:16:10", "throughput": 3176.68, "total_tokens": 96808368}
3578
+ {"current_steps": 17845, "total_steps": 18408, "loss": 0.1827, "lr": 0.0006943375753401815, "epoch": 2.908246414602347, "percentage": 96.94, "elapsed_time": "8:28:03", "remaining_time": "0:16:01", "throughput": 3176.76, "total_tokens": 96837808}
3579
+ {"current_steps": 17850, "total_steps": 18408, "loss": 0.1723, "lr": 0.0006820904926158888, "epoch": 2.9090612777053453, "percentage": 96.97, "elapsed_time": "8:28:11", "remaining_time": "0:15:53", "throughput": 3176.8, "total_tokens": 96865328}
3580
+ {"current_steps": 17855, "total_steps": 18408, "loss": 0.1807, "lr": 0.0006699521371450468, "epoch": 2.9098761408083442, "percentage": 97.0, "elapsed_time": "8:28:19", "remaining_time": "0:15:44", "throughput": 3176.85, "total_tokens": 96893616}
3581
+ {"current_steps": 17860, "total_steps": 18408, "loss": 0.1838, "lr": 0.0006579225177663018, "epoch": 2.910691003911343, "percentage": 97.02, "elapsed_time": "8:28:28", "remaining_time": "0:15:36", "throughput": 3176.93, "total_tokens": 96922864}
3582
+ {"current_steps": 17865, "total_steps": 18408, "loss": 0.2043, "lr": 0.0006460016432391303, "epoch": 2.9115058670143417, "percentage": 97.05, "elapsed_time": "8:28:36", "remaining_time": "0:15:27", "throughput": 3176.96, "total_tokens": 96949936}
3583
+ {"current_steps": 17870, "total_steps": 18408, "loss": 0.1817, "lr": 0.000634189522243822, "epoch": 2.91232073011734, "percentage": 97.08, "elapsed_time": "8:28:44", "remaining_time": "0:15:18", "throughput": 3176.99, "total_tokens": 96976944}
3584
+ {"current_steps": 17875, "total_steps": 18408, "loss": 0.1847, "lr": 0.0006224861633815137, "epoch": 2.913135593220339, "percentage": 97.1, "elapsed_time": "8:28:53", "remaining_time": "0:15:10", "throughput": 3177.04, "total_tokens": 97005168}
3585
+ {"current_steps": 17880, "total_steps": 18408, "loss": 0.1856, "lr": 0.0006108915751740884, "epoch": 2.9139504563233376, "percentage": 97.13, "elapsed_time": "8:29:01", "remaining_time": "0:15:01", "throughput": 3177.05, "total_tokens": 97031408}
3586
+ {"current_steps": 17885, "total_steps": 18408, "loss": 0.1895, "lr": 0.0005994057660642593, "epoch": 2.914765319426336, "percentage": 97.16, "elapsed_time": "8:29:09", "remaining_time": "0:14:53", "throughput": 3177.09, "total_tokens": 97058736}
3587
+ {"current_steps": 17890, "total_steps": 18408, "loss": 0.1708, "lr": 0.000588028744415553, "epoch": 2.915580182529335, "percentage": 97.19, "elapsed_time": "8:29:17", "remaining_time": "0:14:44", "throughput": 3177.11, "total_tokens": 97085552}
3588
+ {"current_steps": 17895, "total_steps": 18408, "loss": 0.1872, "lr": 0.0005767605185122259, "epoch": 2.916395045632334, "percentage": 97.21, "elapsed_time": "8:29:25", "remaining_time": "0:14:36", "throughput": 3177.11, "total_tokens": 97110896}
3589
+ {"current_steps": 17900, "total_steps": 18408, "loss": 0.1792, "lr": 0.000565601096559365, "epoch": 2.9172099087353325, "percentage": 97.24, "elapsed_time": "8:29:33", "remaining_time": "0:14:27", "throughput": 3177.1, "total_tokens": 97136240}
3590
+ {"current_steps": 17905, "total_steps": 18408, "loss": 0.1939, "lr": 0.0005545504866827866, "epoch": 2.918024771838331, "percentage": 97.27, "elapsed_time": "8:29:42", "remaining_time": "0:14:19", "throughput": 3177.14, "total_tokens": 97163760}
3591
+ {"current_steps": 17910, "total_steps": 18408, "loss": 0.1715, "lr": 0.000543608696929121, "epoch": 2.91883963494133, "percentage": 97.29, "elapsed_time": "8:29:50", "remaining_time": "0:14:10", "throughput": 3177.2, "total_tokens": 97192176}
3592
+ {"current_steps": 17915, "total_steps": 18408, "loss": 0.1875, "lr": 0.0005327757352657281, "epoch": 2.9196544980443284, "percentage": 97.32, "elapsed_time": "8:29:58", "remaining_time": "0:14:02", "throughput": 3177.21, "total_tokens": 97218736}
3593
+ {"current_steps": 17920, "total_steps": 18408, "loss": 0.1662, "lr": 0.0005220516095807314, "epoch": 2.9204693611473274, "percentage": 97.35, "elapsed_time": "8:30:06", "remaining_time": "0:13:53", "throughput": 3177.25, "total_tokens": 97245936}
3594
+ {"current_steps": 17925, "total_steps": 18408, "loss": 0.1896, "lr": 0.0005114363276829847, "epoch": 2.921284224250326, "percentage": 97.38, "elapsed_time": "8:30:15", "remaining_time": "0:13:44", "throughput": 3177.29, "total_tokens": 97273584}
3595
+ {"current_steps": 17930, "total_steps": 18408, "loss": 0.1879, "lr": 0.0005009298973021381, "epoch": 2.922099087353325, "percentage": 97.4, "elapsed_time": "8:30:23", "remaining_time": "0:13:36", "throughput": 3177.32, "total_tokens": 97300720}
3596
+ {"current_steps": 17935, "total_steps": 18408, "loss": 0.1842, "lr": 0.0004905323260885219, "epoch": 2.9229139504563233, "percentage": 97.43, "elapsed_time": "8:30:31", "remaining_time": "0:13:27", "throughput": 3177.36, "total_tokens": 97328432}
3597
+ {"current_steps": 17940, "total_steps": 18408, "loss": 0.2093, "lr": 0.00048024362161326324, "epoch": 2.923728813559322, "percentage": 97.46, "elapsed_time": "8:30:40", "remaining_time": "0:13:19", "throughput": 3177.39, "total_tokens": 97355760}
3598
+ {"current_steps": 17945, "total_steps": 18408, "loss": 0.1742, "lr": 0.000470063791368136, "epoch": 2.9245436766623207, "percentage": 97.48, "elapsed_time": "8:30:48", "remaining_time": "0:13:10", "throughput": 3177.45, "total_tokens": 97384240}
3599
+ {"current_steps": 17950, "total_steps": 18408, "loss": 0.1837, "lr": 0.00045999284276571073, "epoch": 2.9253585397653197, "percentage": 97.51, "elapsed_time": "8:30:56", "remaining_time": "0:13:02", "throughput": 3177.47, "total_tokens": 97410928}
3600
+ {"current_steps": 17955, "total_steps": 18408, "loss": 0.182, "lr": 0.00045003078313923806, "epoch": 2.926173402868318, "percentage": 97.54, "elapsed_time": "8:31:04", "remaining_time": "0:12:53", "throughput": 3177.49, "total_tokens": 97437296}
3601
+ {"current_steps": 17960, "total_steps": 18408, "loss": 0.1882, "lr": 0.00044017761974266543, "epoch": 2.9269882659713167, "percentage": 97.57, "elapsed_time": "8:31:13", "remaining_time": "0:12:45", "throughput": 3177.53, "total_tokens": 97465072}
3602
+ {"current_steps": 17965, "total_steps": 18408, "loss": 0.1787, "lr": 0.00043043335975068706, "epoch": 2.9278031290743156, "percentage": 97.59, "elapsed_time": "8:31:21", "remaining_time": "0:12:36", "throughput": 3177.56, "total_tokens": 97492464}
3603
+ {"current_steps": 17970, "total_steps": 18408, "loss": 0.1867, "lr": 0.0004207980102586939, "epoch": 2.928617992177314, "percentage": 97.62, "elapsed_time": "8:31:29", "remaining_time": "0:12:28", "throughput": 3177.54, "total_tokens": 97518512}
3604
+ {"current_steps": 17975, "total_steps": 18408, "loss": 0.1933, "lr": 0.00041127157828272363, "epoch": 2.929432855280313, "percentage": 97.65, "elapsed_time": "8:31:38", "remaining_time": "0:12:19", "throughput": 3177.58, "total_tokens": 97546608}
3605
+ {"current_steps": 17980, "total_steps": 18408, "loss": 0.172, "lr": 0.0004018540707595608, "epoch": 2.9302477183833116, "percentage": 97.67, "elapsed_time": "8:31:46", "remaining_time": "0:12:10", "throughput": 3177.64, "total_tokens": 97575472}
3606
+ {"current_steps": 17985, "total_steps": 18408, "loss": 0.1967, "lr": 0.00039254549454663666, "epoch": 2.9310625814863105, "percentage": 97.7, "elapsed_time": "8:31:55", "remaining_time": "0:12:02", "throughput": 3177.65, "total_tokens": 97602032}
3607
+ {"current_steps": 17990, "total_steps": 18408, "loss": 0.1826, "lr": 0.0003833458564220793, "epoch": 2.931877444589309, "percentage": 97.73, "elapsed_time": "8:32:03", "remaining_time": "0:11:53", "throughput": 3177.64, "total_tokens": 97627568}
3608
+ {"current_steps": 17995, "total_steps": 18408, "loss": 0.1831, "lr": 0.0003742551630847135, "epoch": 2.9326923076923075, "percentage": 97.76, "elapsed_time": "8:32:11", "remaining_time": "0:11:45", "throughput": 3177.68, "total_tokens": 97655920}
3609
+ {"current_steps": 18000, "total_steps": 18408, "loss": 0.1863, "lr": 0.00036527342115401097, "epoch": 2.9335071707953064, "percentage": 97.78, "elapsed_time": "8:32:20", "remaining_time": "0:11:36", "throughput": 3177.69, "total_tokens": 97682288}
3610
+ {"current_steps": 18005, "total_steps": 18408, "loss": 0.1893, "lr": 0.00035640063717009006, "epoch": 2.934322033898305, "percentage": 97.81, "elapsed_time": "8:32:28", "remaining_time": "0:11:28", "throughput": 3177.68, "total_tokens": 97707696}
3611
+ {"current_steps": 18010, "total_steps": 18408, "loss": 0.1915, "lr": 0.0003476368175937661, "epoch": 2.935136897001304, "percentage": 97.84, "elapsed_time": "8:32:36", "remaining_time": "0:11:19", "throughput": 3177.75, "total_tokens": 97736944}
3612
+ {"current_steps": 18015, "total_steps": 18408, "loss": 0.1882, "lr": 0.000338981968806501, "epoch": 2.9359517601043024, "percentage": 97.87, "elapsed_time": "8:32:45", "remaining_time": "0:11:11", "throughput": 3177.77, "total_tokens": 97764016}
3613
+ {"current_steps": 18020, "total_steps": 18408, "loss": 0.17, "lr": 0.0003304360971104037, "epoch": 2.9367666232073013, "percentage": 97.89, "elapsed_time": "8:32:53", "remaining_time": "0:11:02", "throughput": 3177.82, "total_tokens": 97792816}
3614
+ {"current_steps": 18025, "total_steps": 18408, "loss": 0.1945, "lr": 0.00032199920872821305, "epoch": 2.9375814863103, "percentage": 97.92, "elapsed_time": "8:33:02", "remaining_time": "0:10:54", "throughput": 3177.85, "total_tokens": 97820912}
3615
+ {"current_steps": 18030, "total_steps": 18408, "loss": 0.1673, "lr": 0.000313671309803365, "epoch": 2.9383963494132983, "percentage": 97.95, "elapsed_time": "8:33:10", "remaining_time": "0:10:45", "throughput": 3177.86, "total_tokens": 97847280}
3616
+ {"current_steps": 18035, "total_steps": 18408, "loss": 0.1874, "lr": 0.00030545240639987534, "epoch": 2.9392112125162972, "percentage": 97.97, "elapsed_time": "8:33:18", "remaining_time": "0:10:36", "throughput": 3177.88, "total_tokens": 97874096}
3617
+ {"current_steps": 18040, "total_steps": 18408, "loss": 0.1686, "lr": 0.00029734250450240695, "epoch": 2.940026075619296, "percentage": 98.0, "elapsed_time": "8:33:26", "remaining_time": "0:10:28", "throughput": 3177.88, "total_tokens": 97900464}
3618
+ {"current_steps": 18045, "total_steps": 18408, "loss": 0.1836, "lr": 0.000289341610016286, "epoch": 2.9408409387222947, "percentage": 98.03, "elapsed_time": "8:33:35", "remaining_time": "0:10:19", "throughput": 3177.95, "total_tokens": 97929968}
3619
+ {"current_steps": 18050, "total_steps": 18408, "loss": 0.1839, "lr": 0.0002814497287674356, "epoch": 2.941655801825293, "percentage": 98.06, "elapsed_time": "8:33:43", "remaining_time": "0:10:11", "throughput": 3178.0, "total_tokens": 97958576}
3620
+ {"current_steps": 18055, "total_steps": 18408, "loss": 0.1823, "lr": 0.0002736668665023756, "epoch": 2.942470664928292, "percentage": 98.08, "elapsed_time": "8:33:52", "remaining_time": "0:10:02", "throughput": 3177.99, "total_tokens": 97984304}
3621
+ {"current_steps": 18060, "total_steps": 18408, "loss": 0.1793, "lr": 0.0002659930288883061, "epoch": 2.9432855280312906, "percentage": 98.11, "elapsed_time": "8:34:00", "remaining_time": "0:09:54", "throughput": 3177.96, "total_tokens": 98009008}
3622
+ {"current_steps": 18065, "total_steps": 18408, "loss": 0.1783, "lr": 0.00025842822151299073, "epoch": 2.9441003911342896, "percentage": 98.14, "elapsed_time": "8:34:08", "remaining_time": "0:09:45", "throughput": 3178.04, "total_tokens": 98039024}
3623
+ {"current_steps": 18070, "total_steps": 18408, "loss": 0.1787, "lr": 0.00025097244988482316, "epoch": 2.944915254237288, "percentage": 98.16, "elapsed_time": "8:34:17", "remaining_time": "0:09:37", "throughput": 3178.06, "total_tokens": 98065904}
3624
+ {"current_steps": 18075, "total_steps": 18408, "loss": 0.1763, "lr": 0.00024362571943277732, "epoch": 2.945730117340287, "percentage": 98.19, "elapsed_time": "8:34:25", "remaining_time": "0:09:28", "throughput": 3178.09, "total_tokens": 98093104}
3625
+ {"current_steps": 18080, "total_steps": 18408, "loss": 0.1914, "lr": 0.00023638803550645737, "epoch": 2.9465449804432855, "percentage": 98.22, "elapsed_time": "8:34:33", "remaining_time": "0:09:20", "throughput": 3178.1, "total_tokens": 98119920}
3626
+ {"current_steps": 18085, "total_steps": 18408, "loss": 0.1974, "lr": 0.00022925940337604754, "epoch": 2.947359843546284, "percentage": 98.25, "elapsed_time": "8:34:41", "remaining_time": "0:09:11", "throughput": 3178.08, "total_tokens": 98145072}
3627
+ {"current_steps": 18090, "total_steps": 18408, "loss": 0.167, "lr": 0.00022223982823232902, "epoch": 2.948174706649283, "percentage": 98.27, "elapsed_time": "8:34:50", "remaining_time": "0:09:03", "throughput": 3178.1, "total_tokens": 98171888}
3628
+ {"current_steps": 18095, "total_steps": 18408, "loss": 0.1726, "lr": 0.00021532931518667973, "epoch": 2.948989569752282, "percentage": 98.3, "elapsed_time": "8:34:58", "remaining_time": "0:08:54", "throughput": 3178.12, "total_tokens": 98198768}
3629
+ {"current_steps": 18100, "total_steps": 18408, "loss": 0.1728, "lr": 0.00020852786927105793, "epoch": 2.9498044328552804, "percentage": 98.33, "elapsed_time": "8:35:07", "remaining_time": "0:08:45", "throughput": 3178.18, "total_tokens": 98228272}
3630
+ {"current_steps": 18105, "total_steps": 18408, "loss": 0.1894, "lr": 0.0002018354954379853, "epoch": 2.950619295958279, "percentage": 98.35, "elapsed_time": "8:35:15", "remaining_time": "0:08:37", "throughput": 3178.21, "total_tokens": 98255600}
3631
+ {"current_steps": 18110, "total_steps": 18408, "loss": 0.1937, "lr": 0.0001952521985605804, "epoch": 2.951434159061278, "percentage": 98.38, "elapsed_time": "8:35:23", "remaining_time": "0:08:28", "throughput": 3178.24, "total_tokens": 98283376}
3632
+ {"current_steps": 18115, "total_steps": 18408, "loss": 0.188, "lr": 0.00018877798343252538, "epoch": 2.9522490221642763, "percentage": 98.41, "elapsed_time": "8:35:32", "remaining_time": "0:08:20", "throughput": 3178.3, "total_tokens": 98312304}
3633
+ {"current_steps": 18120, "total_steps": 18408, "loss": 0.1762, "lr": 0.00018241285476811587, "epoch": 2.9530638852672753, "percentage": 98.44, "elapsed_time": "8:35:40", "remaining_time": "0:08:11", "throughput": 3178.33, "total_tokens": 98339376}
3634
+ {"current_steps": 18125, "total_steps": 18408, "loss": 0.1909, "lr": 0.00017615681720214437, "epoch": 2.9538787483702738, "percentage": 98.46, "elapsed_time": "8:35:48", "remaining_time": "0:08:03", "throughput": 3178.32, "total_tokens": 98365296}
3635
+ {"current_steps": 18130, "total_steps": 18408, "loss": 0.1764, "lr": 0.00017000987529001697, "epoch": 2.9546936114732727, "percentage": 98.49, "elapsed_time": "8:35:57", "remaining_time": "0:07:54", "throughput": 3178.34, "total_tokens": 98392112}
3636
+ {"current_steps": 18135, "total_steps": 18408, "loss": 0.1973, "lr": 0.00016397203350770328, "epoch": 2.955508474576271, "percentage": 98.52, "elapsed_time": "8:36:05", "remaining_time": "0:07:46", "throughput": 3178.34, "total_tokens": 98417904}
3637
+ {"current_steps": 18140, "total_steps": 18408, "loss": 0.1856, "lr": 0.00015804329625168645, "epoch": 2.9563233376792697, "percentage": 98.54, "elapsed_time": "8:36:13", "remaining_time": "0:07:37", "throughput": 3178.36, "total_tokens": 98444976}
3638
+ {"current_steps": 18145, "total_steps": 18408, "loss": 0.1765, "lr": 0.0001522236678390465, "epoch": 2.9571382007822686, "percentage": 98.57, "elapsed_time": "8:36:21", "remaining_time": "0:07:29", "throughput": 3178.39, "total_tokens": 98472368}
3639
+ {"current_steps": 18150, "total_steps": 18408, "loss": 0.2036, "lr": 0.00014651315250741037, "epoch": 2.957953063885267, "percentage": 98.6, "elapsed_time": "8:36:30", "remaining_time": "0:07:20", "throughput": 3178.44, "total_tokens": 98501104}
3640
+ {"current_steps": 18155, "total_steps": 18408, "loss": 0.1991, "lr": 0.0001409117544149352, "epoch": 2.958767926988266, "percentage": 98.63, "elapsed_time": "8:36:38", "remaining_time": "0:07:11", "throughput": 3178.47, "total_tokens": 98528624}
3641
+ {"current_steps": 18160, "total_steps": 18408, "loss": 0.199, "lr": 0.000135419477640325, "epoch": 2.9595827900912646, "percentage": 98.65, "elapsed_time": "8:36:47", "remaining_time": "0:07:03", "throughput": 3178.49, "total_tokens": 98555760}
3642
+ {"current_steps": 18165, "total_steps": 18408, "loss": 0.1795, "lr": 0.00013003632618284743, "epoch": 2.9603976531942635, "percentage": 98.68, "elapsed_time": "8:36:55", "remaining_time": "0:06:54", "throughput": 3178.49, "total_tokens": 98581360}
3643
+ {"current_steps": 18170, "total_steps": 18408, "loss": 0.1754, "lr": 0.0001247623039622836, "epoch": 2.961212516297262, "percentage": 98.71, "elapsed_time": "8:37:03", "remaining_time": "0:06:46", "throughput": 3178.54, "total_tokens": 98609904}
3644
+ {"current_steps": 18175, "total_steps": 18408, "loss": 0.1642, "lr": 0.00011959741481897823, "epoch": 2.9620273794002605, "percentage": 98.73, "elapsed_time": "8:37:12", "remaining_time": "0:06:37", "throughput": 3178.6, "total_tokens": 98638896}
3645
+ {"current_steps": 18180, "total_steps": 18408, "loss": 0.1845, "lr": 0.00011454166251377295, "epoch": 2.9628422425032594, "percentage": 98.76, "elapsed_time": "8:37:20", "remaining_time": "0:06:29", "throughput": 3178.62, "total_tokens": 98666096}
3646
+ {"current_steps": 18185, "total_steps": 18408, "loss": 0.1876, "lr": 0.00010959505072807296, "epoch": 2.9636571056062584, "percentage": 98.79, "elapsed_time": "8:37:28", "remaining_time": "0:06:20", "throughput": 3178.65, "total_tokens": 98693872}
3647
+ {"current_steps": 18190, "total_steps": 18408, "loss": 0.1875, "lr": 0.000104757583063797, "epoch": 2.964471968709257, "percentage": 98.82, "elapsed_time": "8:37:37", "remaining_time": "0:06:12", "throughput": 3178.69, "total_tokens": 98721712}
3648
+ {"current_steps": 18195, "total_steps": 18408, "loss": 0.1892, "lr": 0.00010002926304341076, "epoch": 2.9652868318122554, "percentage": 98.84, "elapsed_time": "8:37:45", "remaining_time": "0:06:03", "throughput": 3178.73, "total_tokens": 98749872}
3649
+ {"current_steps": 18200, "total_steps": 18408, "loss": 0.1724, "lr": 9.541009410986012e-05, "epoch": 2.9661016949152543, "percentage": 98.87, "elapsed_time": "8:37:54", "remaining_time": "0:05:55", "throughput": 3178.79, "total_tokens": 98779312}
3650
+ {"current_steps": 18205, "total_steps": 18408, "loss": 0.1797, "lr": 9.090007962665458e-05, "epoch": 2.966916558018253, "percentage": 98.9, "elapsed_time": "8:38:02", "remaining_time": "0:05:46", "throughput": 3178.8, "total_tokens": 98805936}
3651
+ {"current_steps": 18210, "total_steps": 18408, "loss": 0.1918, "lr": 8.649922287778388e-05, "epoch": 2.9677314211212518, "percentage": 98.92, "elapsed_time": "8:38:11", "remaining_time": "0:05:38", "throughput": 3178.82, "total_tokens": 98832944}
3652
+ {"current_steps": 18215, "total_steps": 18408, "loss": 0.1867, "lr": 8.220752706780132e-05, "epoch": 2.9685462842242503, "percentage": 98.95, "elapsed_time": "8:38:19", "remaining_time": "0:05:29", "throughput": 3178.82, "total_tokens": 98859248}
3653
+ {"current_steps": 18220, "total_steps": 18408, "loss": 0.1833, "lr": 7.80249953217238e-05, "epoch": 2.969361147327249, "percentage": 98.98, "elapsed_time": "8:38:27", "remaining_time": "0:05:20", "throughput": 3178.87, "total_tokens": 98887472}
3654
+ {"current_steps": 18225, "total_steps": 18408, "loss": 0.1735, "lr": 7.395163068511512e-05, "epoch": 2.9701760104302477, "percentage": 99.01, "elapsed_time": "8:38:36", "remaining_time": "0:05:12", "throughput": 3178.9, "total_tokens": 98915056}
3655
+ {"current_steps": 18230, "total_steps": 18408, "loss": 0.1933, "lr": 6.998743612401936e-05, "epoch": 2.970990873533246, "percentage": 99.03, "elapsed_time": "8:38:44", "remaining_time": "0:05:03", "throughput": 3178.95, "total_tokens": 98943472}
3656
+ {"current_steps": 18235, "total_steps": 18408, "loss": 0.1817, "lr": 6.613241452502749e-05, "epoch": 2.971805736636245, "percentage": 99.06, "elapsed_time": "8:38:53", "remaining_time": "0:04:55", "throughput": 3179.03, "total_tokens": 98973808}
3657
+ {"current_steps": 18240, "total_steps": 18408, "loss": 0.1997, "lr": 6.238656869517744e-05, "epoch": 2.9726205997392436, "percentage": 99.09, "elapsed_time": "8:39:01", "remaining_time": "0:04:46", "throughput": 3179.05, "total_tokens": 99000752}
3658
+ {"current_steps": 18245, "total_steps": 18408, "loss": 0.1734, "lr": 5.87499013620707e-05, "epoch": 2.9734354628422426, "percentage": 99.11, "elapsed_time": "8:39:10", "remaining_time": "0:04:38", "throughput": 3179.13, "total_tokens": 99030576}
3659
+ {"current_steps": 18250, "total_steps": 18408, "loss": 0.1858, "lr": 5.52224151737557e-05, "epoch": 2.974250325945241, "percentage": 99.14, "elapsed_time": "8:39:18", "remaining_time": "0:04:29", "throughput": 3179.13, "total_tokens": 99056752}
3660
+ {"current_steps": 18255, "total_steps": 18408, "loss": 0.1926, "lr": 5.180411269882778e-05, "epoch": 2.97506518904824, "percentage": 99.17, "elapsed_time": "8:39:26", "remaining_time": "0:04:21", "throughput": 3179.15, "total_tokens": 99083440}
3661
+ {"current_steps": 18260, "total_steps": 18408, "loss": 0.1949, "lr": 4.8494996426345914e-05, "epoch": 2.9758800521512385, "percentage": 99.2, "elapsed_time": "8:39:35", "remaining_time": "0:04:12", "throughput": 3179.17, "total_tokens": 99111024}
3662
+ {"current_steps": 18265, "total_steps": 18408, "loss": 0.1946, "lr": 4.529506876588263e-05, "epoch": 2.976694915254237, "percentage": 99.22, "elapsed_time": "8:39:43", "remaining_time": "0:04:04", "throughput": 3179.24, "total_tokens": 99140016}
3663
+ {"current_steps": 18270, "total_steps": 18408, "loss": 0.1845, "lr": 4.220433204747409e-05, "epoch": 2.977509778357236, "percentage": 99.25, "elapsed_time": "8:39:52", "remaining_time": "0:03:55", "throughput": 3179.28, "total_tokens": 99168304}
3664
+ {"current_steps": 18275, "total_steps": 18408, "loss": 0.188, "lr": 3.9222788521703354e-05, "epoch": 2.978324641460235, "percentage": 99.28, "elapsed_time": "8:40:00", "remaining_time": "0:03:47", "throughput": 3179.3, "total_tokens": 99195568}
3665
+ {"current_steps": 18280, "total_steps": 18408, "loss": 0.1977, "lr": 3.635044035958379e-05, "epoch": 2.9791395045632334, "percentage": 99.3, "elapsed_time": "8:40:08", "remaining_time": "0:03:38", "throughput": 3179.33, "total_tokens": 99222960}
3666
+ {"current_steps": 18285, "total_steps": 18408, "loss": 0.185, "lr": 3.358728965264235e-05, "epoch": 2.979954367666232, "percentage": 99.33, "elapsed_time": "8:40:17", "remaining_time": "0:03:29", "throughput": 3179.34, "total_tokens": 99249456}
3667
+ {"current_steps": 18290, "total_steps": 18408, "loss": 0.1734, "lr": 3.093333841290291e-05, "epoch": 2.980769230769231, "percentage": 99.36, "elapsed_time": "8:40:25", "remaining_time": "0:03:21", "throughput": 3179.39, "total_tokens": 99278320}
3668
+ {"current_steps": 18295, "total_steps": 18408, "loss": 0.2037, "lr": 2.838858857286963e-05, "epoch": 2.9815840938722293, "percentage": 99.39, "elapsed_time": "8:40:33", "remaining_time": "0:03:12", "throughput": 3179.36, "total_tokens": 99302960}
3669
+ {"current_steps": 18300, "total_steps": 18408, "loss": 0.158, "lr": 2.5953041985510294e-05, "epoch": 2.9823989569752283, "percentage": 99.41, "elapsed_time": "8:40:41", "remaining_time": "0:03:04", "throughput": 3179.37, "total_tokens": 99329264}
3670
+ {"current_steps": 18305, "total_steps": 18408, "loss": 0.1885, "lr": 2.362670042432291e-05, "epoch": 2.9832138200782268, "percentage": 99.44, "elapsed_time": "8:40:50", "remaining_time": "0:02:55", "throughput": 3179.48, "total_tokens": 99360688}
3671
+ {"current_steps": 18310, "total_steps": 18408, "loss": 0.1771, "lr": 2.140956558321916e-05, "epoch": 2.9840286831812257, "percentage": 99.47, "elapsed_time": "8:40:58", "remaining_time": "0:02:47", "throughput": 3179.48, "total_tokens": 99387312}
3672
+ {"current_steps": 18315, "total_steps": 18408, "loss": 0.1907, "lr": 1.9301639076640952e-05, "epoch": 2.984843546284224, "percentage": 99.49, "elapsed_time": "8:41:07", "remaining_time": "0:02:38", "throughput": 3179.49, "total_tokens": 99413488}
3673
+ {"current_steps": 18320, "total_steps": 18408, "loss": 0.172, "lr": 1.730292243949383e-05, "epoch": 2.9856584093872227, "percentage": 99.52, "elapsed_time": "8:41:15", "remaining_time": "0:02:30", "throughput": 3179.54, "total_tokens": 99441840}
3674
+ {"current_steps": 18325, "total_steps": 18408, "loss": 0.1726, "lr": 1.5413417127163596e-05, "epoch": 2.9864732724902217, "percentage": 99.55, "elapsed_time": "8:41:23", "remaining_time": "0:02:21", "throughput": 3179.57, "total_tokens": 99469552}
3675
+ {"current_steps": 18330, "total_steps": 18408, "loss": 0.1803, "lr": 1.3633124515499695e-05, "epoch": 2.9872881355932206, "percentage": 99.58, "elapsed_time": "8:41:32", "remaining_time": "0:02:13", "throughput": 3179.61, "total_tokens": 99497584}
3676
+ {"current_steps": 18335, "total_steps": 18408, "loss": 0.1864, "lr": 1.1962045900865137e-05, "epoch": 2.988102998696219, "percentage": 99.6, "elapsed_time": "8:41:40", "remaining_time": "0:02:04", "throughput": 3179.64, "total_tokens": 99525552}
3677
+ {"current_steps": 18340, "total_steps": 18408, "loss": 0.1839, "lr": 1.0400182500053256e-05, "epoch": 2.9889178617992176, "percentage": 99.63, "elapsed_time": "8:41:49", "remaining_time": "0:01:56", "throughput": 3179.66, "total_tokens": 99552624}
3678
+ {"current_steps": 18345, "total_steps": 18408, "loss": 0.1755, "lr": 8.947535450370968e-06, "epoch": 2.9897327249022165, "percentage": 99.66, "elapsed_time": "8:41:57", "remaining_time": "0:01:47", "throughput": 3179.7, "total_tokens": 99580464}
3679
+ {"current_steps": 18350, "total_steps": 18408, "loss": 0.1799, "lr": 7.604105809538852e-06, "epoch": 2.990547588005215, "percentage": 99.68, "elapsed_time": "8:42:06", "remaining_time": "0:01:39", "throughput": 3179.74, "total_tokens": 99609008}
3680
+ {"current_steps": 18355, "total_steps": 18408, "loss": 0.1784, "lr": 6.36989455580772e-06, "epoch": 2.991362451108214, "percentage": 99.71, "elapsed_time": "8:42:14", "remaining_time": "0:01:30", "throughput": 3179.75, "total_tokens": 99635184}
3681
+ {"current_steps": 18360, "total_steps": 18408, "loss": 0.1774, "lr": 5.244902587892008e-06, "epoch": 2.9921773142112125, "percentage": 99.74, "elapsed_time": "8:42:22", "remaining_time": "0:01:21", "throughput": 3179.78, "total_tokens": 99662512}
3682
+ {"current_steps": 18365, "total_steps": 18408, "loss": 0.1821, "lr": 4.229130724953123e-06, "epoch": 2.9929921773142114, "percentage": 99.77, "elapsed_time": "8:42:30", "remaining_time": "0:01:13", "throughput": 3179.79, "total_tokens": 99689264}
3683
+ {"current_steps": 18370, "total_steps": 18408, "loss": 0.2047, "lr": 3.3225797066327442e-06, "epoch": 2.99380704041721, "percentage": 99.79, "elapsed_time": "8:42:39", "remaining_time": "0:01:04", "throughput": 3179.82, "total_tokens": 99717104}
3684
+ {"current_steps": 18375, "total_steps": 18408, "loss": 0.2018, "lr": 2.52525019305283e-06, "epoch": 2.9946219035202084, "percentage": 99.82, "elapsed_time": "8:42:47", "remaining_time": "0:00:56", "throughput": 3179.85, "total_tokens": 99744560}
3685
+ {"current_steps": 18380, "total_steps": 18408, "loss": 0.2029, "lr": 1.83714276479896e-06, "epoch": 2.9954367666232073, "percentage": 99.85, "elapsed_time": "8:42:55", "remaining_time": "0:00:47", "throughput": 3179.85, "total_tokens": 99770288}
3686
+ {"current_steps": 18385, "total_steps": 18408, "loss": 0.196, "lr": 1.2582579229036826e-06, "epoch": 2.996251629726206, "percentage": 99.88, "elapsed_time": "8:43:04", "remaining_time": "0:00:39", "throughput": 3179.88, "total_tokens": 99797808}
3687
+ {"current_steps": 18390, "total_steps": 18408, "loss": 0.1957, "lr": 7.88596088896476e-07, "epoch": 2.997066492829205, "percentage": 99.9, "elapsed_time": "8:43:12", "remaining_time": "0:00:30", "throughput": 3179.9, "total_tokens": 99825520}
3688
+ {"current_steps": 18395, "total_steps": 18408, "loss": 0.1947, "lr": 4.2815760478709386e-07, "epoch": 2.9978813559322033, "percentage": 99.93, "elapsed_time": "8:43:20", "remaining_time": "0:00:22", "throughput": 3179.9, "total_tokens": 99851568}
3689
+ {"current_steps": 18400, "total_steps": 18408, "loss": 0.1901, "lr": 1.7694273299895257e-07, "epoch": 2.9986962190352022, "percentage": 99.96, "elapsed_time": "8:43:29", "remaining_time": "0:00:13", "throughput": 3180.0, "total_tokens": 99882544}
3690
+ {"current_steps": 18405, "total_steps": 18408, "loss": 0.1955, "lr": 3.495165646905107e-08, "epoch": 2.9995110821382007, "percentage": 99.98, "elapsed_time": "8:43:37", "remaining_time": "0:00:05", "throughput": 3180.03, "total_tokens": 99910192}
3691
+ {"current_steps": 18408, "total_steps": 18408, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "8:43:44", "remaining_time": "0:00:00", "throughput": 3179.81, "total_tokens": 99924976}