tryingpro commited on
Commit
8dd7e1a
·
verified ·
1 Parent(s): d57449a

Training in progress, step 88, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f50037fbe29d558b558b5da86bd282f8c2cc3d70f5f736424c507f9ee91d20a1
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91681cc39adfefb90bdea99ca5745b581f4801561d0227fe660b2c43d77ec0dc
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69449aa1570d8410b8bbb6561fe971c7cfda814511d3976c93d9231ef3c36ee6
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4f676d3431bbfbfaeff2ffbc79740e2a9a6c4dba4a0a98b90195be616411c5
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:806a71728c0ed74ec6dd226da0f332bc82d1779c640cd6c7384ae5d0e8c3fafc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:045ee9d5cf40840149ae782fad88c8a05540105615a0bd2b1c43fb04b7d7e58f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:916fdec13e6efc15b18e2a6b6d384a81cd69075368b316d9722fd8205c98017b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a4e7013bdf0cfef44b18348c4082377d680816016af2638e8995bf0bb3c8d9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.088135593220339,
5
  "eval_steps": 8,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -277,6 +277,35 @@
277
  "eval_samples_per_second": 137.482,
278
  "eval_steps_per_second": 69.017,
279
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  }
281
  ],
282
  "logging_steps": 3,
@@ -296,7 +325,7 @@
296
  "attributes": {}
297
  }
298
  },
299
- "total_flos": 62842317570048.0,
300
  "train_batch_size": 2,
301
  "trial_name": null,
302
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1966101694915254,
5
  "eval_steps": 8,
6
+ "global_step": 88,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
277
  "eval_samples_per_second": 137.482,
278
  "eval_steps_per_second": 69.017,
279
  "step": 80
280
+ },
281
+ {
282
+ "epoch": 1.1016949152542372,
283
+ "grad_norm": 0.0535203292965889,
284
+ "learning_rate": 8.047222744854943e-06,
285
+ "loss": 10.3181,
286
+ "step": 81
287
+ },
288
+ {
289
+ "epoch": 1.1423728813559322,
290
+ "grad_norm": 0.07268232852220535,
291
+ "learning_rate": 3.6037139304146762e-06,
292
+ "loss": 10.3939,
293
+ "step": 84
294
+ },
295
+ {
296
+ "epoch": 1.1830508474576271,
297
+ "grad_norm": 0.0458713173866272,
298
+ "learning_rate": 9.0502382320653e-07,
299
+ "loss": 10.3273,
300
+ "step": 87
301
+ },
302
+ {
303
+ "epoch": 1.1966101694915254,
304
+ "eval_loss": 10.348885536193848,
305
+ "eval_runtime": 1.801,
306
+ "eval_samples_per_second": 138.257,
307
+ "eval_steps_per_second": 69.406,
308
+ "step": 88
309
  }
310
  ],
311
  "logging_steps": 3,
 
325
  "attributes": {}
326
  }
327
  },
328
+ "total_flos": 69137241341952.0,
329
  "train_batch_size": 2,
330
  "trial_name": null,
331
  "trial_params": null