DeepDream2045 commited on
Commit
96bfb20
·
verified ·
1 Parent(s): 583046d

End of training

Browse files
README.md CHANGED
@@ -104,7 +104,7 @@ xformers_attention: true
104
 
105
  This model is a fine-tuned version of [NousResearch/Yarn-Mistral-7b-128k](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k) on the None dataset.
106
  It achieves the following results on the evaluation set:
107
- - Loss: 1.0360
108
 
109
  ## Model description
110
 
@@ -142,8 +142,8 @@ The following hyperparameters were used during training:
142
  | Training Loss | Epoch | Step | Validation Loss |
143
  |:-------------:|:------:|:----:|:---------------:|
144
  | 1.7005 | 0.0124 | 1 | 1.5441 |
145
- | 0.9647 | 0.3091 | 25 | 1.0565 |
146
- | 0.9203 | 0.6182 | 50 | 1.0360 |
147
 
148
 
149
  ### Framework versions
 
104
 
105
  This model is a fine-tuned version of [NousResearch/Yarn-Mistral-7b-128k](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k) on the None dataset.
106
  It achieves the following results on the evaluation set:
107
+ - Loss: 1.0354
108
 
109
  ## Model description
110
 
 
142
  | Training Loss | Epoch | Step | Validation Loss |
143
  |:-------------:|:------:|:----:|:---------------:|
144
  | 1.7005 | 0.0124 | 1 | 1.5441 |
145
+ | 0.9649 | 0.3091 | 25 | 1.0556 |
146
+ | 0.9192 | 0.6182 | 50 | 1.0354 |
147
 
148
 
149
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "up_proj",
25
  "q_proj",
 
26
  "gate_proj",
27
  "down_proj",
28
  "v_proj",
29
- "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "up_proj",
24
  "q_proj",
25
+ "k_proj",
26
  "gate_proj",
27
  "down_proj",
28
  "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a161e02ddd69da4bd3d1ea2cd987f07a7455614c5069d7a7a02d4619f3cab5
3
  size 335706186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa6348b9df39a35e85bcde5fa2600cf277fdc34f6a2e9e45c004c6b987969af
3
  size 335706186
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aa329c5af8be49237ec6cbf7cd7b41a9d168f1bd675bb8dd6c40d1f9e518fa0
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20dba402be068580d83c349df7bfa19ffa24da5405bb81aac9b521966ba1281b
3
  size 335604696
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec7a24b66787d2564811f1dc1209b71bc9e9e6e058875b5a7e97f8a60519fc7
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5088dc23c6397af8783b01b33eec8575b490cbf9f4f4e1a8c92e7c4b1b56b82
3
  size 6776