aleegis commited on
Commit
184aa52
·
verified ·
1 Parent(s): 8eea68b

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:076a9e08fa23f4c8bfb5c9f047bea205e3658d1a28942b0b2595597bf2431ec3
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84836aadd5b7668801f43f94256af1462d4f6fcd0638b1078edbab3752655c83
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec55549db4cf183d94da9ff06bd0988d76c99bcbbb7944c2410ffc0f979d1188
3
  size 671473763
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110c7016c149e8b740c8790f09dcce9198bbae6ee7cc39f1fd5d7f46e2210a01
3
  size 671473763
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6b8943477e33b4ca1220d2abfc5f6bc3df7264ba11576be9d2c6af84fce0ae2
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00df7897a1fc6c054036bcebad4f3efec8761e2e58635a805d2ef197e09b731c
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a717c54c61f00318563a2243900cad87ed16f178d7acf7675538ea64c8f7c0e3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675c447775e07c9091c55f2dffbbf5646a2b0f067a2b30d2abcb37bd027db68f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.036713527405118476,
6
  "eval_steps": 500,
7
- "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1205,6 +1205,307 @@
1205
  "learning_rate": 1.1998599831119912e-05,
1206
  "loss": 1.491,
1207
  "step": 1197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1208
  }
1209
  ],
1210
  "logging_steps": 7,
@@ -1219,12 +1520,12 @@
1219
  "should_evaluate": false,
1220
  "should_log": false,
1221
  "should_save": true,
1222
- "should_training_stop": false
1223
  },
1224
  "attributes": {}
1225
  }
1226
  },
1227
- "total_flos": 8.952125425975296e+17,
1228
  "train_batch_size": 8,
1229
  "trial_name": null,
1230
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0458919092563981,
6
  "eval_steps": 500,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1205
  "learning_rate": 1.1998599831119912e-05,
1206
  "loss": 1.491,
1207
  "step": 1197
1208
+ },
1209
+ {
1210
+ "epoch": 0.03683590582980221,
1211
+ "grad_norm": 1.5545629262924194,
1212
+ "learning_rate": 1.1474337861210543e-05,
1213
+ "loss": 1.4981,
1214
+ "step": 1204
1215
+ },
1216
+ {
1217
+ "epoch": 0.03705006807299873,
1218
+ "grad_norm": 2.0076568126678467,
1219
+ "learning_rate": 1.096029866616704e-05,
1220
+ "loss": 1.5171,
1221
+ "step": 1211
1222
+ },
1223
+ {
1224
+ "epoch": 0.03726423031619525,
1225
+ "grad_norm": 1.0814955234527588,
1226
+ "learning_rate": 1.0456618646161954e-05,
1227
+ "loss": 1.3583,
1228
+ "step": 1218
1229
+ },
1230
+ {
1231
+ "epoch": 0.03747839255939178,
1232
+ "grad_norm": 1.3351236581802368,
1233
+ "learning_rate": 9.963431452563332e-06,
1234
+ "loss": 1.4998,
1235
+ "step": 1225
1236
+ },
1237
+ {
1238
+ "epoch": 0.037692554802588304,
1239
+ "grad_norm": 1.345676064491272,
1240
+ "learning_rate": 9.480867952470284e-06,
1241
+ "loss": 1.4631,
1242
+ "step": 1232
1243
+ },
1244
+ {
1245
+ "epoch": 0.03790671704578483,
1246
+ "grad_norm": 1.1110200881958008,
1247
+ "learning_rate": 9.00905619398757e-06,
1248
+ "loss": 1.4986,
1249
+ "step": 1239
1250
+ },
1251
+ {
1252
+ "epoch": 0.03812087928898135,
1253
+ "grad_norm": 1.1065698862075806,
1254
+ "learning_rate": 8.548121372247918e-06,
1255
+ "loss": 1.4631,
1256
+ "step": 1246
1257
+ },
1258
+ {
1259
+ "epoch": 0.03833504153217788,
1260
+ "grad_norm": 1.2057925462722778,
1261
+ "learning_rate": 8.098185796191631e-06,
1262
+ "loss": 1.439,
1263
+ "step": 1253
1264
+ },
1265
+ {
1266
+ "epoch": 0.0385492037753744,
1267
+ "grad_norm": 1.4041844606399536,
1268
+ "learning_rate": 7.659368856111926e-06,
1269
+ "loss": 1.4684,
1270
+ "step": 1260
1271
+ },
1272
+ {
1273
+ "epoch": 0.03876336601857092,
1274
+ "grad_norm": 1.3971853256225586,
1275
+ "learning_rate": 7.2317869919746705e-06,
1276
+ "loss": 1.439,
1277
+ "step": 1267
1278
+ },
1279
+ {
1280
+ "epoch": 0.03897752826176745,
1281
+ "grad_norm": 1.2180246114730835,
1282
+ "learning_rate": 6.815553662521185e-06,
1283
+ "loss": 1.5764,
1284
+ "step": 1274
1285
+ },
1286
+ {
1287
+ "epoch": 0.039191690504963975,
1288
+ "grad_norm": 1.822922706604004,
1289
+ "learning_rate": 6.410779315161886e-06,
1290
+ "loss": 1.4794,
1291
+ "step": 1281
1292
+ },
1293
+ {
1294
+ "epoch": 0.0394058527481605,
1295
+ "grad_norm": 1.2133909463882446,
1296
+ "learning_rate": 6.017571356669183e-06,
1297
+ "loss": 1.4784,
1298
+ "step": 1288
1299
+ },
1300
+ {
1301
+ "epoch": 0.039620014991357026,
1302
+ "grad_norm": 1.4232499599456787,
1303
+ "learning_rate": 5.636034124677042e-06,
1304
+ "loss": 1.405,
1305
+ "step": 1295
1306
+ },
1307
+ {
1308
+ "epoch": 0.03983417723455355,
1309
+ "grad_norm": 1.8573203086853027,
1310
+ "learning_rate": 5.266268859995083e-06,
1311
+ "loss": 1.4153,
1312
+ "step": 1302
1313
+ },
1314
+ {
1315
+ "epoch": 0.04004833947775007,
1316
+ "grad_norm": 1.6481467485427856,
1317
+ "learning_rate": 4.908373679744316e-06,
1318
+ "loss": 1.3081,
1319
+ "step": 1309
1320
+ },
1321
+ {
1322
+ "epoch": 0.0402625017209466,
1323
+ "grad_norm": 2.072063446044922,
1324
+ "learning_rate": 4.562443551321788e-06,
1325
+ "loss": 1.2842,
1326
+ "step": 1316
1327
+ },
1328
+ {
1329
+ "epoch": 0.04047666396414312,
1330
+ "grad_norm": 1.2372018098831177,
1331
+ "learning_rate": 4.228570267201049e-06,
1332
+ "loss": 1.5924,
1333
+ "step": 1323
1334
+ },
1335
+ {
1336
+ "epoch": 0.040690826207339645,
1337
+ "grad_norm": 1.206811785697937,
1338
+ "learning_rate": 3.90684242057498e-06,
1339
+ "loss": 1.4247,
1340
+ "step": 1330
1341
+ },
1342
+ {
1343
+ "epoch": 0.04090498845053617,
1344
+ "grad_norm": 1.6587467193603516,
1345
+ "learning_rate": 3.5973453818476556e-06,
1346
+ "loss": 1.4644,
1347
+ "step": 1337
1348
+ },
1349
+ {
1350
+ "epoch": 0.041119150693732696,
1351
+ "grad_norm": 1.1888831853866577,
1352
+ "learning_rate": 3.3001612759813393e-06,
1353
+ "loss": 1.5327,
1354
+ "step": 1344
1355
+ },
1356
+ {
1357
+ "epoch": 0.04133331293692922,
1358
+ "grad_norm": 1.0991014242172241,
1359
+ "learning_rate": 3.0153689607045845e-06,
1360
+ "loss": 1.4681,
1361
+ "step": 1351
1362
+ },
1363
+ {
1364
+ "epoch": 0.04154747518012574,
1365
+ "grad_norm": 1.5934925079345703,
1366
+ "learning_rate": 2.743044005587425e-06,
1367
+ "loss": 1.3098,
1368
+ "step": 1358
1369
+ },
1370
+ {
1371
+ "epoch": 0.04176163742332227,
1372
+ "grad_norm": 1.4214099645614624,
1373
+ "learning_rate": 2.4832586719889416e-06,
1374
+ "loss": 1.4065,
1375
+ "step": 1365
1376
+ },
1377
+ {
1378
+ "epoch": 0.04197579966651879,
1379
+ "grad_norm": 1.2822719812393188,
1380
+ "learning_rate": 2.2360818938828187e-06,
1381
+ "loss": 1.4728,
1382
+ "step": 1372
1383
+ },
1384
+ {
1385
+ "epoch": 0.042189961909715315,
1386
+ "grad_norm": 1.119520664215088,
1387
+ "learning_rate": 2.0015792595656226e-06,
1388
+ "loss": 1.4284,
1389
+ "step": 1379
1390
+ },
1391
+ {
1392
+ "epoch": 0.042404124152911844,
1393
+ "grad_norm": 1.3280854225158691,
1394
+ "learning_rate": 1.7798129942530551e-06,
1395
+ "loss": 1.4559,
1396
+ "step": 1386
1397
+ },
1398
+ {
1399
+ "epoch": 0.04261828639610837,
1400
+ "grad_norm": 1.1883761882781982,
1401
+ "learning_rate": 1.5708419435684462e-06,
1402
+ "loss": 1.3757,
1403
+ "step": 1393
1404
+ },
1405
+ {
1406
+ "epoch": 0.04283244863930489,
1407
+ "grad_norm": 1.3203167915344238,
1408
+ "learning_rate": 1.374721557928116e-06,
1409
+ "loss": 1.6273,
1410
+ "step": 1400
1411
+ },
1412
+ {
1413
+ "epoch": 0.04304661088250142,
1414
+ "grad_norm": 1.2010129690170288,
1415
+ "learning_rate": 1.191503877827621e-06,
1416
+ "loss": 1.4944,
1417
+ "step": 1407
1418
+ },
1419
+ {
1420
+ "epoch": 0.04326077312569794,
1421
+ "grad_norm": 1.6128370761871338,
1422
+ "learning_rate": 1.0212375200327973e-06,
1423
+ "loss": 1.6092,
1424
+ "step": 1414
1425
+ },
1426
+ {
1427
+ "epoch": 0.04347493536889446,
1428
+ "grad_norm": 1.0786170959472656,
1429
+ "learning_rate": 8.639676646793382e-07,
1430
+ "loss": 1.4227,
1431
+ "step": 1421
1432
+ },
1433
+ {
1434
+ "epoch": 0.043689097612090985,
1435
+ "grad_norm": 1.0348191261291504,
1436
+ "learning_rate": 7.197360432842359e-07,
1437
+ "loss": 1.5162,
1438
+ "step": 1428
1439
+ },
1440
+ {
1441
+ "epoch": 0.043903259855287514,
1442
+ "grad_norm": 1.0884389877319336,
1443
+ "learning_rate": 5.885809276723608e-07,
1444
+ "loss": 1.5015,
1445
+ "step": 1435
1446
+ },
1447
+ {
1448
+ "epoch": 0.04411742209848404,
1449
+ "grad_norm": 1.5776199102401733,
1450
+ "learning_rate": 4.705371198210129e-07,
1451
+ "loss": 1.5789,
1452
+ "step": 1442
1453
+ },
1454
+ {
1455
+ "epoch": 0.04433158434168056,
1456
+ "grad_norm": 1.4270485639572144,
1457
+ "learning_rate": 3.65635942625242e-07,
1458
+ "loss": 1.5011,
1459
+ "step": 1449
1460
+ },
1461
+ {
1462
+ "epoch": 0.04454574658487709,
1463
+ "grad_norm": 1.2836881875991821,
1464
+ "learning_rate": 2.7390523158633554e-07,
1465
+ "loss": 1.4921,
1466
+ "step": 1456
1467
+ },
1468
+ {
1469
+ "epoch": 0.04475990882807361,
1470
+ "grad_norm": 1.3851501941680908,
1471
+ "learning_rate": 1.953693274256374e-07,
1472
+ "loss": 1.5536,
1473
+ "step": 1463
1474
+ },
1475
+ {
1476
+ "epoch": 0.04497407107127013,
1477
+ "grad_norm": 1.9082521200180054,
1478
+ "learning_rate": 1.3004906962578721e-07,
1479
+ "loss": 1.3908,
1480
+ "step": 1470
1481
+ },
1482
+ {
1483
+ "epoch": 0.04518823331446666,
1484
+ "grad_norm": 1.3173164129257202,
1485
+ "learning_rate": 7.796179090094891e-08,
1486
+ "loss": 1.4139,
1487
+ "step": 1477
1488
+ },
1489
+ {
1490
+ "epoch": 0.045402395557663185,
1491
+ "grad_norm": 1.4804590940475464,
1492
+ "learning_rate": 3.9121312597573125e-08,
1493
+ "loss": 1.4134,
1494
+ "step": 1484
1495
+ },
1496
+ {
1497
+ "epoch": 0.04561655780085971,
1498
+ "grad_norm": 1.5944395065307617,
1499
+ "learning_rate": 1.3537941026914303e-08,
1500
+ "loss": 1.301,
1501
+ "step": 1491
1502
+ },
1503
+ {
1504
+ "epoch": 0.045830720044056236,
1505
+ "grad_norm": 1.4432575702667236,
1506
+ "learning_rate": 1.2184647302626583e-09,
1507
+ "loss": 1.5252,
1508
+ "step": 1498
1509
  }
1510
  ],
1511
  "logging_steps": 7,
 
1520
  "should_evaluate": false,
1521
  "should_log": false,
1522
  "should_save": true,
1523
+ "should_training_stop": true
1524
  },
1525
  "attributes": {}
1526
  }
1527
  },
1528
+ "total_flos": 1.119015678246912e+18,
1529
  "train_batch_size": 8,
1530
  "trial_name": null,
1531
  "trial_params": null