qingy2024 commited on
Commit
62cb3b0
·
verified ·
1 Parent(s): 6ea7da8

Upload checkpoint 8702

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3f2b3d40d5d70cd69a4100d88be206276bdab19bdf6613332b1a3e205e2a0d3
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b6ed8f166125c4e0ccb4438463feac8c4befdcfa3b5fc23df50b931dd37964
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47bda3ba51a5d0d14e4e6d1e5e1a1a499e86cac640e7a65e02a17da73c72f9bc
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfeea3b480b0ac3a7be7edb3e3d45b0b6eed0bc230ddbdd298ef3463ce89ffd9
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c078439a2268de07efa415fd281c455eb2d1821139fd97a7039a0e011516f93
3
  size 17893865224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc5cc673934837b111c47f04c42854cbbf4155db979be7dde3c8474b55635ed6
3
  size 17893865224
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:618163f6be35a05ac5e460626b2bd512fd3db05ca7c47320572b7f41f91978fb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb0516760d88d4baa388b6e998401e0078c7ad3407932309df513a21a23fcf7a
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9928181557023844,
5
  "eval_steps": 500,
6
- "global_step": 8640,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -20167,6 +20167,146 @@
20167
  "learning_rate": 2.5107040290095474e-08,
20168
  "loss": 1.6549,
20169
  "step": 8640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20170
  }
20171
  ],
20172
  "logging_steps": 3,
@@ -20181,12 +20321,12 @@
20181
  "should_evaluate": false,
20182
  "should_log": false,
20183
  "should_save": true,
20184
- "should_training_stop": false
20185
  },
20186
  "attributes": {}
20187
  }
20188
  },
20189
- "total_flos": 3.1423252906849075e+19,
20190
  "train_batch_size": 4,
20191
  "trial_name": null,
20192
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9999425452456191,
5
  "eval_steps": 500,
6
+ "global_step": 8702,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
20167
  "learning_rate": 2.5107040290095474e-08,
20168
  "loss": 1.6549,
20169
  "step": 8640
20170
+ },
20171
+ {
20172
+ "epoch": 0.9931628842286699,
20173
+ "grad_norm": 0.38671875,
20174
+ "learning_rate": 2.2736199954154213e-08,
20175
+ "loss": 1.6457,
20176
+ "step": 8643
20177
+ },
20178
+ {
20179
+ "epoch": 0.9935076127549555,
20180
+ "grad_norm": 0.3828125,
20181
+ "learning_rate": 2.0482904581231588e-08,
20182
+ "loss": 1.6282,
20183
+ "step": 8646
20184
+ },
20185
+ {
20186
+ "epoch": 0.993852341281241,
20187
+ "grad_norm": 0.376953125,
20188
+ "learning_rate": 1.834715682056398e-08,
20189
+ "loss": 1.654,
20190
+ "step": 8649
20191
+ },
20192
+ {
20193
+ "epoch": 0.9941970698075265,
20194
+ "grad_norm": 0.388671875,
20195
+ "learning_rate": 1.632895918319832e-08,
20196
+ "loss": 1.6449,
20197
+ "step": 8652
20198
+ },
20199
+ {
20200
+ "epoch": 0.9945417983338122,
20201
+ "grad_norm": 0.376953125,
20202
+ "learning_rate": 1.4428314041958767e-08,
20203
+ "loss": 1.6957,
20204
+ "step": 8655
20205
+ },
20206
+ {
20207
+ "epoch": 0.9948865268600977,
20208
+ "grad_norm": 0.380859375,
20209
+ "learning_rate": 1.2645223631457815e-08,
20210
+ "loss": 1.6494,
20211
+ "step": 8658
20212
+ },
20213
+ {
20214
+ "epoch": 0.9952312553863832,
20215
+ "grad_norm": 0.39453125,
20216
+ "learning_rate": 1.0979690048107394e-08,
20217
+ "loss": 1.6152,
20218
+ "step": 8661
20219
+ },
20220
+ {
20221
+ "epoch": 0.9955759839126688,
20222
+ "grad_norm": 0.37109375,
20223
+ "learning_rate": 9.431715250118878e-09,
20224
+ "loss": 1.5803,
20225
+ "step": 8664
20226
+ },
20227
+ {
20228
+ "epoch": 0.9959207124389543,
20229
+ "grad_norm": 0.400390625,
20230
+ "learning_rate": 8.001301057447563e-09,
20231
+ "loss": 1.6867,
20232
+ "step": 8667
20233
+ },
20234
+ {
20235
+ "epoch": 0.9962654409652398,
20236
+ "grad_norm": 0.392578125,
20237
+ "learning_rate": 6.688449151881493e-09,
20238
+ "loss": 1.6015,
20239
+ "step": 8670
20240
+ },
20241
+ {
20242
+ "epoch": 0.9966101694915255,
20243
+ "grad_norm": 0.390625,
20244
+ "learning_rate": 5.493161076941533e-09,
20245
+ "loss": 1.5957,
20246
+ "step": 8673
20247
+ },
20248
+ {
20249
+ "epoch": 0.996954898017811,
20250
+ "grad_norm": 0.373046875,
20251
+ "learning_rate": 4.415438237959091e-09,
20252
+ "loss": 1.6479,
20253
+ "step": 8676
20254
+ },
20255
+ {
20256
+ "epoch": 0.9972996265440965,
20257
+ "grad_norm": 0.37890625,
20258
+ "learning_rate": 3.455281902031704e-09,
20259
+ "loss": 1.6768,
20260
+ "step": 8679
20261
+ },
20262
+ {
20263
+ "epoch": 0.9976443550703821,
20264
+ "grad_norm": 0.3671875,
20265
+ "learning_rate": 2.612693198023042e-09,
20266
+ "loss": 1.6631,
20267
+ "step": 8682
20268
+ },
20269
+ {
20270
+ "epoch": 0.9979890835966676,
20271
+ "grad_norm": 0.380859375,
20272
+ "learning_rate": 1.8876731165962115e-09,
20273
+ "loss": 1.6653,
20274
+ "step": 8685
20275
+ },
20276
+ {
20277
+ "epoch": 0.9983338121229531,
20278
+ "grad_norm": 0.369140625,
20279
+ "learning_rate": 1.2802225101471444e-09,
20280
+ "loss": 1.5957,
20281
+ "step": 8688
20282
+ },
20283
+ {
20284
+ "epoch": 0.9986785406492388,
20285
+ "grad_norm": 0.392578125,
20286
+ "learning_rate": 7.903420928823124e-10,
20287
+ "loss": 1.6107,
20288
+ "step": 8691
20289
+ },
20290
+ {
20291
+ "epoch": 0.9990232691755243,
20292
+ "grad_norm": 0.384765625,
20293
+ "learning_rate": 4.1803244075211414e-10,
20294
+ "loss": 1.6701,
20295
+ "step": 8694
20296
+ },
20297
+ {
20298
+ "epoch": 0.9993679977018098,
20299
+ "grad_norm": 0.384765625,
20300
+ "learning_rate": 1.6329399149528356e-10,
20301
+ "loss": 1.6652,
20302
+ "step": 8697
20303
+ },
20304
+ {
20305
+ "epoch": 0.9997127262280954,
20306
+ "grad_norm": 0.384765625,
20307
+ "learning_rate": 2.6127044616686137e-11,
20308
+ "loss": 1.7509,
20309
+ "step": 8700
20310
  }
20311
  ],
20312
  "logging_steps": 3,
 
20321
  "should_evaluate": false,
20322
  "should_log": false,
20323
  "should_save": true,
20324
+ "should_training_stop": true
20325
  },
20326
  "attributes": {}
20327
  }
20328
  },
20329
+ "total_flos": 3.1626194748539142e+19,
20330
  "train_batch_size": 4,
20331
  "trial_name": null,
20332
  "trial_params": null