aleegis commited on
Commit
777eb1b
·
verified ·
1 Parent(s): 0af61ea

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59d2eed6ad7b6c61be9559b52cf752c4381844a3d89151424a37f1507db2acf9
3
  size 671473298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02bd66363a55075e209836624ecb54de2195ec9603078f8e8f2b8ef1c42fa3cd
3
  size 671473298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e35ce6f141652be1d01336e435bb985bf3376bb78e4cdd52eb2b9e1ea884cff4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c480dbc18df566c0de1af7dfd5354085eb62e45149b5f5c58c54a341328e4ea
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0ab4b5576cffbfe1e9f6fc42d7fb34658a6fda81c06eb0c633b50cbe234b60f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891cad020bf7bee78efa739dc10e1e4315e34b096ed70226b38590ec81d7d418
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 1271,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1274,6 +1274,237 @@
1274
  "learning_rate": 6.680041969810203e-06,
1275
  "loss": 0.0,
1276
  "step": 1267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1277
  }
1278
  ],
1279
  "logging_steps": 7,
@@ -1288,12 +1519,12 @@
1288
  "should_evaluate": false,
1289
  "should_log": false,
1290
  "should_save": true,
1291
- "should_training_stop": false
1292
  },
1293
  "attributes": {}
1294
  }
1295
  },
1296
- "total_flos": 9.064492133221663e+17,
1297
  "train_batch_size": 2,
1298
  "trial_name": null,
1299
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3603461841070024,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1274
  "learning_rate": 6.680041969810203e-06,
1275
  "loss": 0.0,
1276
  "step": 1267
1277
+ },
1278
+ {
1279
+ "epoch": 2.004720692368214,
1280
+ "grad_norm": NaN,
1281
+ "learning_rate": 6.293212171147206e-06,
1282
+ "loss": 0.0,
1283
+ "step": 1274
1284
+ },
1285
+ {
1286
+ "epoch": 2.01573564122738,
1287
+ "grad_norm": NaN,
1288
+ "learning_rate": 5.917166368382277e-06,
1289
+ "loss": 0.0,
1290
+ "step": 1281
1291
+ },
1292
+ {
1293
+ "epoch": 2.026750590086546,
1294
+ "grad_norm": NaN,
1295
+ "learning_rate": 5.5519973451903405e-06,
1296
+ "loss": 0.0,
1297
+ "step": 1288
1298
+ },
1299
+ {
1300
+ "epoch": 2.037765538945712,
1301
+ "grad_norm": NaN,
1302
+ "learning_rate": 5.197795201563743e-06,
1303
+ "loss": 0.0,
1304
+ "step": 1295
1305
+ },
1306
+ {
1307
+ "epoch": 2.048780487804878,
1308
+ "grad_norm": NaN,
1309
+ "learning_rate": 4.8546473315813856e-06,
1310
+ "loss": 0.0,
1311
+ "step": 1302
1312
+ },
1313
+ {
1314
+ "epoch": 2.059795436664044,
1315
+ "grad_norm": NaN,
1316
+ "learning_rate": 4.522638401845547e-06,
1317
+ "loss": 0.0,
1318
+ "step": 1309
1319
+ },
1320
+ {
1321
+ "epoch": 2.07081038552321,
1322
+ "grad_norm": NaN,
1323
+ "learning_rate": 4.2018503305916775e-06,
1324
+ "loss": 0.0,
1325
+ "step": 1316
1326
+ },
1327
+ {
1328
+ "epoch": 2.0818253343823763,
1329
+ "grad_norm": NaN,
1330
+ "learning_rate": 3.892362267476313e-06,
1331
+ "loss": 0.0,
1332
+ "step": 1323
1333
+ },
1334
+ {
1335
+ "epoch": 2.092840283241542,
1336
+ "grad_norm": NaN,
1337
+ "learning_rate": 3.5942505740480582e-06,
1338
+ "loss": 0.0,
1339
+ "step": 1330
1340
+ },
1341
+ {
1342
+ "epoch": 2.1038552321007082,
1343
+ "grad_norm": NaN,
1344
+ "learning_rate": 3.3075888049065196e-06,
1345
+ "loss": 0.0,
1346
+ "step": 1337
1347
+ },
1348
+ {
1349
+ "epoch": 2.114870180959874,
1350
+ "grad_norm": NaN,
1351
+ "learning_rate": 3.03244768955383e-06,
1352
+ "loss": 0.0,
1353
+ "step": 1344
1354
+ },
1355
+ {
1356
+ "epoch": 2.12588512981904,
1357
+ "grad_norm": NaN,
1358
+ "learning_rate": 2.7688951149431595e-06,
1359
+ "loss": 0.0,
1360
+ "step": 1351
1361
+ },
1362
+ {
1363
+ "epoch": 2.136900078678206,
1364
+ "grad_norm": NaN,
1365
+ "learning_rate": 2.5169961087286974e-06,
1366
+ "loss": 0.0,
1367
+ "step": 1358
1368
+ },
1369
+ {
1370
+ "epoch": 2.147915027537372,
1371
+ "grad_norm": NaN,
1372
+ "learning_rate": 2.276812823220964e-06,
1373
+ "loss": 0.0,
1374
+ "step": 1365
1375
+ },
1376
+ {
1377
+ "epoch": 2.1589299763965384,
1378
+ "grad_norm": NaN,
1379
+ "learning_rate": 2.048404520051722e-06,
1380
+ "loss": 0.0,
1381
+ "step": 1372
1382
+ },
1383
+ {
1384
+ "epoch": 2.169944925255704,
1385
+ "grad_norm": NaN,
1386
+ "learning_rate": 1.8318275555520237e-06,
1387
+ "loss": 0.0,
1388
+ "step": 1379
1389
+ },
1390
+ {
1391
+ "epoch": 2.1809598741148704,
1392
+ "grad_norm": NaN,
1393
+ "learning_rate": 1.6271353668471655e-06,
1394
+ "loss": 0.0,
1395
+ "step": 1386
1396
+ },
1397
+ {
1398
+ "epoch": 2.191974822974036,
1399
+ "grad_norm": NaN,
1400
+ "learning_rate": 1.4343784586718311e-06,
1401
+ "loss": 0.0,
1402
+ "step": 1393
1403
+ },
1404
+ {
1405
+ "epoch": 2.2029897718332023,
1406
+ "grad_norm": NaN,
1407
+ "learning_rate": 1.2536043909088191e-06,
1408
+ "loss": 0.0,
1409
+ "step": 1400
1410
+ },
1411
+ {
1412
+ "epoch": 2.214004720692368,
1413
+ "grad_norm": NaN,
1414
+ "learning_rate": 1.0848577668543802e-06,
1415
+ "loss": 0.0,
1416
+ "step": 1407
1417
+ },
1418
+ {
1419
+ "epoch": 2.2250196695515343,
1420
+ "grad_norm": NaN,
1421
+ "learning_rate": 9.281802222129765e-07,
1422
+ "loss": 0.0,
1423
+ "step": 1414
1424
+ },
1425
+ {
1426
+ "epoch": 2.2360346184107,
1427
+ "grad_norm": NaN,
1428
+ "learning_rate": 7.836104148243484e-07,
1429
+ "loss": 0.0,
1430
+ "step": 1421
1431
+ },
1432
+ {
1433
+ "epoch": 2.2470495672698663,
1434
+ "grad_norm": NaN,
1435
+ "learning_rate": 6.511840151252169e-07,
1436
+ "loss": 0.0,
1437
+ "step": 1428
1438
+ },
1439
+ {
1440
+ "epoch": 2.258064516129032,
1441
+ "grad_norm": NaN,
1442
+ "learning_rate": 5.309336973481683e-07,
1443
+ "loss": 0.0,
1444
+ "step": 1435
1445
+ },
1446
+ {
1447
+ "epoch": 2.2690794649881982,
1448
+ "grad_norm": NaN,
1449
+ "learning_rate": 4.228891314597694e-07,
1450
+ "loss": 0.0,
1451
+ "step": 1442
1452
+ },
1453
+ {
1454
+ "epoch": 2.2800944138473644,
1455
+ "grad_norm": NaN,
1456
+ "learning_rate": 3.2707697583995167e-07,
1457
+ "loss": 0.0,
1458
+ "step": 1449
1459
+ },
1460
+ {
1461
+ "epoch": 2.29110936270653,
1462
+ "grad_norm": NaN,
1463
+ "learning_rate": 2.4352087070443895e-07,
1464
+ "loss": 0.0,
1465
+ "step": 1456
1466
+ },
1467
+ {
1468
+ "epoch": 2.3021243115656964,
1469
+ "grad_norm": NaN,
1470
+ "learning_rate": 1.7224143227190236e-07,
1471
+ "loss": 0.0,
1472
+ "step": 1463
1473
+ },
1474
+ {
1475
+ "epoch": 2.313139260424862,
1476
+ "grad_norm": NaN,
1477
+ "learning_rate": 1.132562476771959e-07,
1478
+ "loss": 0.0,
1479
+ "step": 1470
1480
+ },
1481
+ {
1482
+ "epoch": 2.3241542092840284,
1483
+ "grad_norm": NaN,
1484
+ "learning_rate": 6.657987063200533e-08,
1485
+ "loss": 0.0,
1486
+ "step": 1477
1487
+ },
1488
+ {
1489
+ "epoch": 2.335169158143194,
1490
+ "grad_norm": NaN,
1491
+ "learning_rate": 3.2223817833931805e-08,
1492
+ "loss": 0.0,
1493
+ "step": 1484
1494
+ },
1495
+ {
1496
+ "epoch": 2.3461841070023604,
1497
+ "grad_norm": NaN,
1498
+ "learning_rate": 1.019656612492592e-08,
1499
+ "loss": 0.0,
1500
+ "step": 1491
1501
+ },
1502
+ {
1503
+ "epoch": 2.3571990558615266,
1504
+ "grad_norm": NaN,
1505
+ "learning_rate": 5.035503997385949e-10,
1506
+ "loss": 0.0,
1507
+ "step": 1498
1508
  }
1509
  ],
1510
  "logging_steps": 7,
 
1519
  "should_evaluate": false,
1520
  "should_log": false,
1521
  "should_save": true,
1522
+ "should_training_stop": true
1523
  },
1524
  "attributes": {}
1525
  }
1526
  },
1527
+ "total_flos": 1.0698275558774538e+18,
1528
  "train_batch_size": 2,
1529
  "trial_name": null,
1530
  "trial_params": null