VERSIL91 commited on
Commit
f108889
·
verified ·
1 Parent(s): a457d2a

Training in progress, step 33, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77b70dab3d58dc476a4fb45f21e234f375d139466519260294ab6a7ef6620d2b
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d925971b321d3628a896b0a6287f71845e124a70fc895ca37a9604bb6e92208
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:329a94965eebe95f88224d777d3f5f5fa5743a28632813359666f79fe72040fd
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c681725b8ebf2d3821561c80309c34828063021efe9b6cb6879a9c80d102173
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8ff3b7e6316ad4c7ea8c75404f4f8cd2b82017803f1a148e607e3dbc0c68582
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc294d29ebec9d1210c7657cf895a2c7400965b701bd85b28a138e9bb423a612
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94970865aded7a663ddce729ff20409247b7e0c4a27787ee16909b1bc06fc14
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cd3f6e9267a2b7ffad41dddfa33b74e45aa7c480b8283eebbe1a013b2da49f9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8355899419729207,
5
  "eval_steps": 9,
6
- "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -228,6 +228,48 @@
228
  "eval_samples_per_second": 6.864,
229
  "eval_steps_per_second": 3.494,
230
  "step": 27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  }
232
  ],
233
  "logging_steps": 1,
@@ -242,12 +284,12 @@
242
  "should_evaluate": false,
243
  "should_log": false,
244
  "should_save": true,
245
- "should_training_stop": false
246
  },
247
  "attributes": {}
248
  }
249
  },
250
- "total_flos": 1.6024765324905677e+17,
251
  "train_batch_size": 2,
252
  "trial_name": null,
253
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0212765957446808,
5
  "eval_steps": 9,
6
+ "global_step": 33,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
228
  "eval_samples_per_second": 6.864,
229
  "eval_steps_per_second": 3.494,
230
  "step": 27
231
+ },
232
+ {
233
+ "epoch": 0.8665377176015474,
234
+ "grad_norm": 3.091240882873535,
235
+ "learning_rate": 1.1214435464779006e-05,
236
+ "loss": 0.0223,
237
+ "step": 28
238
+ },
239
+ {
240
+ "epoch": 0.8974854932301741,
241
+ "grad_norm": 0.4273183047771454,
242
+ "learning_rate": 7.2790297726755716e-06,
243
+ "loss": 0.005,
244
+ "step": 29
245
+ },
246
+ {
247
+ "epoch": 0.9284332688588007,
248
+ "grad_norm": 0.8694038391113281,
249
+ "learning_rate": 4.139434924727359e-06,
250
+ "loss": 0.0089,
251
+ "step": 30
252
+ },
253
+ {
254
+ "epoch": 0.9593810444874274,
255
+ "grad_norm": 0.19907452166080475,
256
+ "learning_rate": 1.8541356326100433e-06,
257
+ "loss": 0.0039,
258
+ "step": 31
259
+ },
260
+ {
261
+ "epoch": 0.9903288201160542,
262
+ "grad_norm": 4.229534149169922,
263
+ "learning_rate": 4.6570269818346224e-07,
264
+ "loss": 0.0346,
265
+ "step": 32
266
+ },
267
+ {
268
+ "epoch": 1.0212765957446808,
269
+ "grad_norm": 0.12014052271842957,
270
+ "learning_rate": 0.0,
271
+ "loss": 0.0034,
272
+ "step": 33
273
  }
274
  ],
275
  "logging_steps": 1,
 
284
  "should_evaluate": false,
285
  "should_log": false,
286
  "should_save": true,
287
+ "should_training_stop": true
288
  },
289
  "attributes": {}
290
  }
291
  },
292
+ "total_flos": 1.9585824285995827e+17,
293
  "train_batch_size": 2,
294
  "trial_name": null,
295
  "trial_params": null