End of training

Files changed (4) hide show

README.md +6 -4
adapter_config.json +73 -1
adapter_model.bin +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -32,13 +32,15 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.01
-- train_batch_size: 32
-- eval_batch_size: 32
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
-- lr_scheduler_warmup_ratio: 0.1
 - num_epochs: 3.0
 ### Training results

 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 16
+- eval_batch_size: 16
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
+- lr_scheduler_warmup_ratio: 0.05
 - num_epochs: 3.0
 ### Training results

adapter_config.json CHANGED Viewed

@@ -15,29 +15,101 @@
   "revision": null,
   "target_modules": [
     "transformer.h.0.attn.c_attn",
     "transformer.h.1.attn.c_attn",
     "transformer.h.2.attn.c_attn",
     "transformer.h.3.attn.c_attn",
     "transformer.h.4.attn.c_attn",
     "transformer.h.5.attn.c_attn",
     "transformer.h.6.attn.c_attn",
     "transformer.h.7.attn.c_attn",
     "transformer.h.8.attn.c_attn",
     "transformer.h.9.attn.c_attn",
     "transformer.h.10.attn.c_attn",
     "transformer.h.11.attn.c_attn",
     "transformer.h.12.attn.c_attn",
     "transformer.h.13.attn.c_attn",
     "transformer.h.14.attn.c_attn",
     "transformer.h.15.attn.c_attn",
     "transformer.h.16.attn.c_attn",
     "transformer.h.17.attn.c_attn",
     "transformer.h.18.attn.c_attn",
     "transformer.h.19.attn.c_attn",
     "transformer.h.20.attn.c_attn",
     "transformer.h.21.attn.c_attn",
     "transformer.h.22.attn.c_attn",
-    "transformer.h.23.attn.c_attn"
   ],
   "task_type": "CAUSAL_LM"
 }

   "revision": null,
   "target_modules": [
     "transformer.h.0.attn.c_attn",
+    "transformer.h.0.attn.c_proj",
     "transformer.h.1.attn.c_attn",
+    "transformer.h.1.attn.c_proj",
     "transformer.h.2.attn.c_attn",
+    "transformer.h.2.attn.c_proj",
     "transformer.h.3.attn.c_attn",
+    "transformer.h.3.attn.c_proj",
     "transformer.h.4.attn.c_attn",
+    "transformer.h.4.attn.c_proj",
     "transformer.h.5.attn.c_attn",
+    "transformer.h.5.attn.c_proj",
     "transformer.h.6.attn.c_attn",
+    "transformer.h.6.attn.c_proj",
     "transformer.h.7.attn.c_attn",
+    "transformer.h.7.attn.c_proj",
     "transformer.h.8.attn.c_attn",
+    "transformer.h.8.attn.c_proj",
     "transformer.h.9.attn.c_attn",
+    "transformer.h.9.attn.c_proj",
     "transformer.h.10.attn.c_attn",
+    "transformer.h.10.attn.c_proj",
     "transformer.h.11.attn.c_attn",
+    "transformer.h.11.attn.c_proj",
     "transformer.h.12.attn.c_attn",
+    "transformer.h.12.attn.c_proj",
     "transformer.h.13.attn.c_attn",
+    "transformer.h.13.attn.c_proj",
     "transformer.h.14.attn.c_attn",
+    "transformer.h.14.attn.c_proj",
     "transformer.h.15.attn.c_attn",
+    "transformer.h.15.attn.c_proj",
     "transformer.h.16.attn.c_attn",
+    "transformer.h.16.attn.c_proj",
     "transformer.h.17.attn.c_attn",
+    "transformer.h.17.attn.c_proj",
     "transformer.h.18.attn.c_attn",
+    "transformer.h.18.attn.c_proj",
     "transformer.h.19.attn.c_attn",
+    "transformer.h.19.attn.c_proj",
     "transformer.h.20.attn.c_attn",
+    "transformer.h.20.attn.c_proj",
     "transformer.h.21.attn.c_attn",
+    "transformer.h.21.attn.c_proj",
     "transformer.h.22.attn.c_attn",
+    "transformer.h.22.attn.c_proj",
+    "transformer.h.23.attn.c_attn",
+    "transformer.h.23.attn.c_proj",
+    "transformer.h.0.mlp.c_fc",
+    "transformer.h.0.mlp.c_proj",
+    "transformer.h.1.mlp.c_fc",
+    "transformer.h.1.mlp.c_proj",
+    "transformer.h.2.mlp.c_fc",
+    "transformer.h.2.mlp.c_proj",
+    "transformer.h.3.mlp.c_fc",
+    "transformer.h.3.mlp.c_proj",
+    "transformer.h.4.mlp.c_fc",
+    "transformer.h.4.mlp.c_proj",
+    "transformer.h.5.mlp.c_fc",
+    "transformer.h.5.mlp.c_proj",
+    "transformer.h.6.mlp.c_fc",
+    "transformer.h.6.mlp.c_proj",
+    "transformer.h.7.mlp.c_fc",
+    "transformer.h.7.mlp.c_proj",
+    "transformer.h.8.mlp.c_fc",
+    "transformer.h.8.mlp.c_proj",
+    "transformer.h.9.mlp.c_fc",
+    "transformer.h.9.mlp.c_proj",
+    "transformer.h.10.mlp.c_fc",
+    "transformer.h.10.mlp.c_proj",
+    "transformer.h.11.mlp.c_fc",
+    "transformer.h.11.mlp.c_proj",
+    "transformer.h.12.mlp.c_fc",
+    "transformer.h.12.mlp.c_proj",
+    "transformer.h.13.mlp.c_fc",
+    "transformer.h.13.mlp.c_proj",
+    "transformer.h.14.mlp.c_fc",
+    "transformer.h.14.mlp.c_proj",
+    "transformer.h.15.mlp.c_fc",
+    "transformer.h.15.mlp.c_proj",
+    "transformer.h.16.mlp.c_fc",
+    "transformer.h.16.mlp.c_proj",
+    "transformer.h.17.mlp.c_fc",
+    "transformer.h.17.mlp.c_proj",
+    "transformer.h.18.mlp.c_fc",
+    "transformer.h.18.mlp.c_proj",
+    "transformer.h.19.mlp.c_fc",
+    "transformer.h.19.mlp.c_proj",
+    "transformer.h.20.mlp.c_fc",
+    "transformer.h.20.mlp.c_proj",
+    "transformer.h.21.mlp.c_fc",
+    "transformer.h.21.mlp.c_proj",
+    "transformer.h.22.mlp.c_fc",
+    "transformer.h.22.mlp.c_proj",
+    "transformer.h.23.mlp.c_fc",
+    "transformer.h.23.mlp.c_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e349a5bc088361e0b11c73ae5d7f5a61fec1281b5a5694af06686fc88025110d
+size 12650829

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7aec30b908f8713803bd37cf76f06b0db58fd5199bc91d0e2a3973e0a745a8f0
 size 4091

 version https://git-lfs.github.com/spec/v1
+oid sha256:3881ce2a7bb2514addd63f19b658ec27b7bf1437c9eac577576e4535e953e79c
 size 4091