elvispresniy commited on
Commit
b3ca161
·
verified ·
1 Parent(s): 592ca68

Training in progress, step 63500, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "k_proj",
25
- "gate_proj",
26
  "q_proj",
 
 
 
27
  "v_proj",
28
- "down_proj",
29
- "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
 
24
  "q_proj",
25
+ "up_proj",
26
+ "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73903da31c9b9d848fa3dc081b0cc0b8784e6aef4a1ec1bce8df8211ea090227
3
  size 1882177840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2d9cff269682a6e1e23cdfca052e9f8c3fbccbf78453c2b7f70a6fe8d269149
3
  size 1882177840
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7f41f1d65446549e90258f21fed00d90b0c96c4f8a66571ccae685e2c8b63d5
3
  size 37161530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2616a8e813904405bc939d148b53a7a439f7a299eb3700731870f85cd2fa724
3
  size 37161530
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c29d4681b39690eaaafbf2164900dcbb7e4adbbf215e4232929b945620d1964
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104890da2c59eb9ea543ff21a4559177df5b0fca409467c18f13be5167350e53
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bbd1aa5de53f2295365b6cba9c613b814db5492cf7e5c0c7f70f0bb0df8980c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:993c7bed5c196ded0eb58f1c8f7c3b64f5c12bdd352fa28eb7317e6257a44cb1
3
  size 1064
last-checkpoint/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
- size 11421896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83396048d512ec1f3178af0d7c1f79a226bba041822614b0e26a4fd2d4b55bf7
3
+ size 11421995
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7005137100540396,
5
  "eval_steps": 1000,
6
- "global_step": 63000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4940,6 +4940,41 @@
4940
  "eval_samples_per_second": 2.041,
4941
  "eval_steps_per_second": 2.041,
4942
  "step": 63000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4943
  }
4944
  ],
4945
  "logging_steps": 100,
@@ -4959,7 +4994,7 @@
4959
  "attributes": {}
4960
  }
4961
  },
4962
- "total_flos": 1.181810744019456e+17,
4963
  "train_batch_size": 1,
4964
  "trial_name": null,
4965
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7060733426735162,
5
  "eval_steps": 1000,
6
+ "global_step": 63500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4940
  "eval_samples_per_second": 2.041,
4941
  "eval_steps_per_second": 2.041,
4942
  "step": 63000
4943
+ },
4944
+ {
4945
+ "epoch": 0.701625636577935,
4946
+ "grad_norm": 0.0,
4947
+ "learning_rate": 4.089320557540778e-05,
4948
+ "loss": 1.9099,
4949
+ "step": 63100
4950
+ },
4951
+ {
4952
+ "epoch": 0.7027375631018302,
4953
+ "grad_norm": 0.0,
4954
+ "learning_rate": 4.061148317695832e-05,
4955
+ "loss": 1.7735,
4956
+ "step": 63200
4957
+ },
4958
+ {
4959
+ "epoch": 0.7038494896257256,
4960
+ "grad_norm": 0.0,
4961
+ "learning_rate": 4.033048708563717e-05,
4962
+ "loss": 1.8294,
4963
+ "step": 63300
4964
+ },
4965
+ {
4966
+ "epoch": 0.7049614161496208,
4967
+ "grad_norm": 0.0,
4968
+ "learning_rate": 4.005022073795828e-05,
4969
+ "loss": 1.8543,
4970
+ "step": 63400
4971
+ },
4972
+ {
4973
+ "epoch": 0.7060733426735162,
4974
+ "grad_norm": 0.0,
4975
+ "learning_rate": 3.97706875615109e-05,
4976
+ "loss": 1.9948,
4977
+ "step": 63500
4978
  }
4979
  ],
4980
  "logging_steps": 100,
 
4994
  "attributes": {}
4995
  }
4996
  },
4997
+ "total_flos": 1.186485573238272e+17,
4998
  "train_batch_size": 1,
4999
  "trial_name": null,
5000
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec38d3cfba225c81db568142403f6ae933af30fd8d4c51ca61ad487333f0fdad
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d738de42803f5441d68d35f8b7efcdaf6fb3c40d177d3ebb15d523d13779cf1f
3
  size 5496