elvispresniy commited on
Commit
5b413b5
·
verified ·
1 Parent(s): 60de098

Training in progress, step 63000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21fd7d3229c28617cd8b7ee9c5b265a0b6c8e5c1472537520b44ebd5131a34fe
3
  size 1882177840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73903da31c9b9d848fa3dc081b0cc0b8784e6aef4a1ec1bce8df8211ea090227
3
  size 1882177840
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d94f47d1cd577af82550ce480e1d26bff5ac1076a79522e98f2d5f6d846e3c5
3
  size 37161530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f41f1d65446549e90258f21fed00d90b0c96c4f8a66571ccae685e2c8b63d5
3
  size 37161530
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b3814db4cbaa1d1f7ae10f72fc8ff620b9669469050d4879f09f386790fbc59
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c29d4681b39690eaaafbf2164900dcbb7e4adbbf215e4232929b945620d1964
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9233d091885437459c3c43d3eb157cdd78c220a64977478d6fbab6ba06baa1a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bbd1aa5de53f2295365b6cba9c613b814db5492cf7e5c0c7f70f0bb0df8980c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6949540774345632,
5
  "eval_steps": 1000,
6
- "global_step": 62500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4897,6 +4897,49 @@
4897
  "learning_rate": 4.259859885713584e-05,
4898
  "loss": 1.9038,
4899
  "step": 62500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4900
  }
4901
  ],
4902
  "logging_steps": 100,
@@ -4916,7 +4959,7 @@
4916
  "attributes": {}
4917
  }
4918
  },
4919
- "total_flos": 1.1770566229056e+17,
4920
  "train_batch_size": 1,
4921
  "trial_name": null,
4922
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7005137100540396,
5
  "eval_steps": 1000,
6
+ "global_step": 63000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4897
  "learning_rate": 4.259859885713584e-05,
4898
  "loss": 1.9038,
4899
  "step": 62500
4900
+ },
4901
+ {
4902
+ "epoch": 0.6960660039584584,
4903
+ "grad_norm": 0.0,
4904
+ "learning_rate": 4.231259127717636e-05,
4905
+ "loss": 1.9314,
4906
+ "step": 62600
4907
+ },
4908
+ {
4909
+ "epoch": 0.6971779304823538,
4910
+ "grad_norm": 0.0,
4911
+ "learning_rate": 4.2027289200206564e-05,
4912
+ "loss": 1.8182,
4913
+ "step": 62700
4914
+ },
4915
+ {
4916
+ "epoch": 0.698289857006249,
4917
+ "grad_norm": 0.0,
4918
+ "learning_rate": 4.174269611540145e-05,
4919
+ "loss": 1.7234,
4920
+ "step": 62800
4921
+ },
4922
+ {
4923
+ "epoch": 0.6994017835301444,
4924
+ "grad_norm": 0.0,
4925
+ "learning_rate": 4.1458815503265315e-05,
4926
+ "loss": 1.9986,
4927
+ "step": 62900
4928
+ },
4929
+ {
4930
+ "epoch": 0.7005137100540396,
4931
+ "grad_norm": 0.0,
4932
+ "learning_rate": 4.117565083558909e-05,
4933
+ "loss": 1.9098,
4934
+ "step": 63000
4935
+ },
4936
+ {
4937
+ "epoch": 0.7005137100540396,
4938
+ "eval_loss": 2.144634485244751,
4939
+ "eval_runtime": 29.3973,
4940
+ "eval_samples_per_second": 2.041,
4941
+ "eval_steps_per_second": 2.041,
4942
+ "step": 63000
4943
  }
4944
  ],
4945
  "logging_steps": 100,
 
4959
  "attributes": {}
4960
  }
4961
  },
4962
+ "total_flos": 1.181810744019456e+17,
4963
  "train_batch_size": 1,
4964
  "trial_name": null,
4965
  "trial_params": null