wcyat commited on
Commit
164de98
·
verified ·
1 Parent(s): 506d5f4

Training in progress, step 1625, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee4e2d84e38cb73fc215ea160b72d8c65fb8ef814b9ce18e21001c4fceb2971e
3
  size 410636248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2235952bcc8cb41758f7b984fe58dd654193b0b7df69b358dea989152d0f6278
3
  size 410636248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db5189682206216f1eb6c749f032237c2b81ba6a5a04045dadba360a7ff157fd
3
  size 821393658
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c529f4a84229c3ae46dce2f3e5222e610cf9eae06334adfd54155d57792572
3
  size 821393658
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70171b5cf4c5d65f8d1801780619a5ef6eaa49858f51f2d188c1b4ae2878778d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fc3183918ae68b4e8a620d193012eab94fc760f0b8667287157217a5c88f2d3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0db746135b039e0910ffb0031dacdf1ae5245ec4309f44b2f467d1af6778d5b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52dbf015c0fbf1630a81175a221e8cc220f8e050930d561b1a00f55047f5328b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.20959021151065826,
3
  "best_model_checkpoint": "./results/checkpoint-640",
4
- "epoch": 4.615384615384615,
5
  "eval_steps": 20,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,6 +1207,102 @@
1207
  "eval_samples_per_second": 33.928,
1208
  "eval_steps_per_second": 8.595,
1209
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 20,
@@ -1221,12 +1317,12 @@
1221
  "should_evaluate": false,
1222
  "should_log": false,
1223
  "should_save": true,
1224
- "should_training_stop": false
1225
  },
1226
  "attributes": {}
1227
  }
1228
  },
1229
- "total_flos": 1380510818592000.0,
1230
  "train_batch_size": 4,
1231
  "trial_name": null,
1232
  "trial_params": null
 
1
  {
2
  "best_metric": 0.20959021151065826,
3
  "best_model_checkpoint": "./results/checkpoint-640",
4
+ "epoch": 5.0,
5
  "eval_steps": 20,
6
+ "global_step": 1625,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "eval_samples_per_second": 33.928,
1208
  "eval_steps_per_second": 8.595,
1209
  "step": 1500
1210
+ },
1211
+ {
1212
+ "epoch": 4.676923076923077,
1213
+ "grad_norm": 0.007997775450348854,
1214
+ "learning_rate": 1.2923076923076924e-06,
1215
+ "loss": 0.1421,
1216
+ "step": 1520
1217
+ },
1218
+ {
1219
+ "epoch": 4.676923076923077,
1220
+ "eval_accuracy": 0.9333333333333333,
1221
+ "eval_loss": 0.400637149810791,
1222
+ "eval_runtime": 4.3593,
1223
+ "eval_samples_per_second": 34.41,
1224
+ "eval_steps_per_second": 8.717,
1225
+ "step": 1520
1226
+ },
1227
+ {
1228
+ "epoch": 4.7384615384615385,
1229
+ "grad_norm": 0.004181304015219212,
1230
+ "learning_rate": 1.0461538461538463e-06,
1231
+ "loss": 0.0002,
1232
+ "step": 1540
1233
+ },
1234
+ {
1235
+ "epoch": 4.7384615384615385,
1236
+ "eval_accuracy": 0.9266666666666666,
1237
+ "eval_loss": 0.40295225381851196,
1238
+ "eval_runtime": 4.5588,
1239
+ "eval_samples_per_second": 32.903,
1240
+ "eval_steps_per_second": 8.336,
1241
+ "step": 1540
1242
+ },
1243
+ {
1244
+ "epoch": 4.8,
1245
+ "grad_norm": 0.003899338422343135,
1246
+ "learning_rate": 8.000000000000001e-07,
1247
+ "loss": 0.0002,
1248
+ "step": 1560
1249
+ },
1250
+ {
1251
+ "epoch": 4.8,
1252
+ "eval_accuracy": 0.9266666666666666,
1253
+ "eval_loss": 0.4034216105937958,
1254
+ "eval_runtime": 4.6347,
1255
+ "eval_samples_per_second": 32.365,
1256
+ "eval_steps_per_second": 8.199,
1257
+ "step": 1560
1258
+ },
1259
+ {
1260
+ "epoch": 4.861538461538462,
1261
+ "grad_norm": 0.004380157217383385,
1262
+ "learning_rate": 5.53846153846154e-07,
1263
+ "loss": 0.0628,
1264
+ "step": 1580
1265
+ },
1266
+ {
1267
+ "epoch": 4.861538461538462,
1268
+ "eval_accuracy": 0.9333333333333333,
1269
+ "eval_loss": 0.38764962553977966,
1270
+ "eval_runtime": 4.5609,
1271
+ "eval_samples_per_second": 32.888,
1272
+ "eval_steps_per_second": 8.332,
1273
+ "step": 1580
1274
+ },
1275
+ {
1276
+ "epoch": 4.923076923076923,
1277
+ "grad_norm": 0.004398785065859556,
1278
+ "learning_rate": 3.0769230769230774e-07,
1279
+ "loss": 0.0003,
1280
+ "step": 1600
1281
+ },
1282
+ {
1283
+ "epoch": 4.923076923076923,
1284
+ "eval_accuracy": 0.9333333333333333,
1285
+ "eval_loss": 0.38795334100723267,
1286
+ "eval_runtime": 4.4706,
1287
+ "eval_samples_per_second": 33.553,
1288
+ "eval_steps_per_second": 8.5,
1289
+ "step": 1600
1290
+ },
1291
+ {
1292
+ "epoch": 4.984615384615385,
1293
+ "grad_norm": 0.0024876066017895937,
1294
+ "learning_rate": 6.153846153846154e-08,
1295
+ "loss": 0.0003,
1296
+ "step": 1620
1297
+ },
1298
+ {
1299
+ "epoch": 4.984615384615385,
1300
+ "eval_accuracy": 0.9333333333333333,
1301
+ "eval_loss": 0.39026668667793274,
1302
+ "eval_runtime": 4.3714,
1303
+ "eval_samples_per_second": 34.314,
1304
+ "eval_steps_per_second": 8.693,
1305
+ "step": 1620
1306
  }
1307
  ],
1308
  "logging_steps": 20,
 
1317
  "should_evaluate": false,
1318
  "should_log": false,
1319
  "should_save": true,
1320
+ "should_training_stop": true
1321
  },
1322
  "attributes": {}
1323
  }
1324
  },
1325
+ "total_flos": 1494172738952400.0,
1326
  "train_batch_size": 4,
1327
  "trial_name": null,
1328
  "trial_params": null