marcus2000
commited on
Commit
•
7a567d3
1
Parent(s):
518c166
Saiga_timelist_task200steps
Browse files- README.md +103 -11
- adapter_config.json +2 -2
- adapter_model.safetensors +1 -1
- training_args.bin +1 -1
README.md
CHANGED
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
13 |
|
14 |
# Saiga_timelist_task200steps
|
15 |
|
16 |
-
This model is a fine-tuned version of [TheBloke/Llama-2-7B-fp16](https://huggingface.co/TheBloke/Llama-2-7B-fp16) on
|
17 |
It achieves the following results on the evaluation set:
|
18 |
-
- Loss: 2.
|
19 |
|
20 |
## Model description
|
21 |
|
@@ -34,7 +34,7 @@ More information needed
|
|
34 |
### Training hyperparameters
|
35 |
|
36 |
The following hyperparameters were used during training:
|
37 |
-
- learning_rate:
|
38 |
- train_batch_size: 2
|
39 |
- eval_batch_size: 8
|
40 |
- seed: 42
|
@@ -48,14 +48,106 @@ The following hyperparameters were used during training:
|
|
48 |
|
49 |
| Training Loss | Epoch | Step | Validation Loss |
|
50 |
|:-------------:|:-----:|:----:|:---------------:|
|
51 |
-
| 2.
|
52 |
-
| 2.
|
53 |
-
| 2.
|
54 |
-
|
|
55 |
-
|
|
56 |
-
|
|
57 |
-
|
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
|
61 |
### Framework versions
|
|
|
13 |
|
14 |
# Saiga_timelist_task200steps
|
15 |
|
16 |
+
This model is a fine-tuned version of [TheBloke/Llama-2-7B-fp16](https://huggingface.co/TheBloke/Llama-2-7B-fp16) on an unknown dataset.
|
17 |
It achieves the following results on the evaluation set:
|
18 |
+
- Loss: 2.4521
|
19 |
|
20 |
## Model description
|
21 |
|
|
|
34 |
### Training hyperparameters
|
35 |
|
36 |
The following hyperparameters were used during training:
|
37 |
+
- learning_rate: 0.0003
|
38 |
- train_batch_size: 2
|
39 |
- eval_batch_size: 8
|
40 |
- seed: 42
|
|
|
48 |
|
49 |
| Training Loss | Epoch | Step | Validation Loss |
|
50 |
|:-------------:|:-----:|:----:|:---------------:|
|
51 |
+
| 2.2298 | 0.37 | 2 | 2.2020 |
|
52 |
+
| 2.0975 | 0.74 | 4 | 2.1478 |
|
53 |
+
| 2.0243 | 1.11 | 6 | 2.1123 |
|
54 |
+
| 1.988 | 1.48 | 8 | 2.0857 |
|
55 |
+
| 1.9585 | 1.85 | 10 | 2.0692 |
|
56 |
+
| 1.883 | 2.22 | 12 | 2.0570 |
|
57 |
+
| 1.9078 | 2.59 | 14 | 2.0477 |
|
58 |
+
| 1.9179 | 2.96 | 16 | 2.0408 |
|
59 |
+
| 1.8663 | 3.33 | 18 | 2.0366 |
|
60 |
+
| 1.8191 | 3.7 | 20 | 2.0325 |
|
61 |
+
| 1.8515 | 4.07 | 22 | 2.0280 |
|
62 |
+
| 1.8189 | 4.44 | 24 | 2.0246 |
|
63 |
+
| 1.8478 | 4.81 | 26 | 2.0215 |
|
64 |
+
| 1.7767 | 5.19 | 28 | 2.0198 |
|
65 |
+
| 1.7685 | 5.56 | 30 | 2.0190 |
|
66 |
+
| 1.7895 | 5.93 | 32 | 2.0189 |
|
67 |
+
| 1.7285 | 6.3 | 34 | 2.0191 |
|
68 |
+
| 1.7609 | 6.67 | 36 | 2.0174 |
|
69 |
+
| 1.7138 | 7.04 | 38 | 2.0156 |
|
70 |
+
| 1.7112 | 7.41 | 40 | 2.0187 |
|
71 |
+
| 1.7029 | 7.78 | 42 | 2.0216 |
|
72 |
+
| 1.6787 | 8.15 | 44 | 2.0203 |
|
73 |
+
| 1.646 | 8.52 | 46 | 2.0243 |
|
74 |
+
| 1.5996 | 8.89 | 48 | 2.0294 |
|
75 |
+
| 1.6838 | 9.26 | 50 | 2.0280 |
|
76 |
+
| 1.6057 | 9.63 | 52 | 2.0254 |
|
77 |
+
| 1.574 | 10.0 | 54 | 2.0310 |
|
78 |
+
| 1.51 | 10.37 | 56 | 2.0547 |
|
79 |
+
| 1.5951 | 10.74 | 58 | 2.0420 |
|
80 |
+
| 1.5455 | 11.11 | 60 | 2.0350 |
|
81 |
+
| 1.5424 | 11.48 | 62 | 2.0612 |
|
82 |
+
| 1.4933 | 11.85 | 64 | 2.0652 |
|
83 |
+
| 1.5766 | 12.22 | 66 | 2.0537 |
|
84 |
+
| 1.4453 | 12.59 | 68 | 2.0732 |
|
85 |
+
| 1.4683 | 12.96 | 70 | 2.0763 |
|
86 |
+
| 1.4734 | 13.33 | 72 | 2.0805 |
|
87 |
+
| 1.4314 | 13.7 | 74 | 2.0908 |
|
88 |
+
| 1.3921 | 14.07 | 76 | 2.0815 |
|
89 |
+
| 1.4099 | 14.44 | 78 | 2.1134 |
|
90 |
+
| 1.4389 | 14.81 | 80 | 2.0955 |
|
91 |
+
| 1.3114 | 15.19 | 82 | 2.1153 |
|
92 |
+
| 1.3093 | 15.56 | 84 | 2.1303 |
|
93 |
+
| 1.3984 | 15.93 | 86 | 2.1246 |
|
94 |
+
| 1.2831 | 16.3 | 88 | 2.1564 |
|
95 |
+
| 1.2971 | 16.67 | 90 | 2.1284 |
|
96 |
+
| 1.3052 | 17.04 | 92 | 2.1608 |
|
97 |
+
| 1.2421 | 17.41 | 94 | 2.1556 |
|
98 |
+
| 1.1835 | 17.78 | 96 | 2.1734 |
|
99 |
+
| 1.283 | 18.15 | 98 | 2.1773 |
|
100 |
+
| 1.2311 | 18.52 | 100 | 2.1992 |
|
101 |
+
| 1.2428 | 18.89 | 102 | 2.1954 |
|
102 |
+
| 1.1959 | 19.26 | 104 | 2.2065 |
|
103 |
+
| 1.2376 | 19.63 | 106 | 2.2124 |
|
104 |
+
| 1.0689 | 20.0 | 108 | 2.2266 |
|
105 |
+
| 1.1471 | 20.37 | 110 | 2.2266 |
|
106 |
+
| 1.0068 | 20.74 | 112 | 2.2451 |
|
107 |
+
| 1.161 | 21.11 | 114 | 2.2501 |
|
108 |
+
| 1.1252 | 21.48 | 116 | 2.2579 |
|
109 |
+
| 1.0683 | 21.85 | 118 | 2.2595 |
|
110 |
+
| 1.1279 | 22.22 | 120 | 2.2904 |
|
111 |
+
| 0.9923 | 22.59 | 122 | 2.2693 |
|
112 |
+
| 1.0139 | 22.96 | 124 | 2.3008 |
|
113 |
+
| 0.9924 | 23.33 | 126 | 2.3036 |
|
114 |
+
| 1.0418 | 23.7 | 128 | 2.3277 |
|
115 |
+
| 1.0463 | 24.07 | 130 | 2.3043 |
|
116 |
+
| 1.0556 | 24.44 | 132 | 2.3262 |
|
117 |
+
| 0.9991 | 24.81 | 134 | 2.3299 |
|
118 |
+
| 0.96 | 25.19 | 136 | 2.3481 |
|
119 |
+
| 0.9677 | 25.56 | 138 | 2.3458 |
|
120 |
+
| 0.9107 | 25.93 | 140 | 2.3607 |
|
121 |
+
| 0.8962 | 26.3 | 142 | 2.3644 |
|
122 |
+
| 0.916 | 26.67 | 144 | 2.3700 |
|
123 |
+
| 0.9284 | 27.04 | 146 | 2.3726 |
|
124 |
+
| 0.99 | 27.41 | 148 | 2.3860 |
|
125 |
+
| 0.8308 | 27.78 | 150 | 2.3918 |
|
126 |
+
| 0.9459 | 28.15 | 152 | 2.3971 |
|
127 |
+
| 0.9283 | 28.52 | 154 | 2.4030 |
|
128 |
+
| 0.863 | 28.89 | 156 | 2.4024 |
|
129 |
+
| 0.9068 | 29.26 | 158 | 2.4083 |
|
130 |
+
| 0.8623 | 29.63 | 160 | 2.4179 |
|
131 |
+
| 0.8359 | 30.0 | 162 | 2.4262 |
|
132 |
+
| 0.953 | 30.37 | 164 | 2.4281 |
|
133 |
+
| 0.7937 | 30.74 | 166 | 2.4381 |
|
134 |
+
| 0.8274 | 31.11 | 168 | 2.4255 |
|
135 |
+
| 0.8862 | 31.48 | 170 | 2.4330 |
|
136 |
+
| 0.7913 | 31.85 | 172 | 2.4511 |
|
137 |
+
| 0.8436 | 32.22 | 174 | 2.4522 |
|
138 |
+
| 0.8519 | 32.59 | 176 | 2.4413 |
|
139 |
+
| 0.8089 | 32.96 | 178 | 2.4371 |
|
140 |
+
| 0.8876 | 33.33 | 180 | 2.4434 |
|
141 |
+
| 0.7836 | 33.7 | 182 | 2.4532 |
|
142 |
+
| 0.8232 | 34.07 | 184 | 2.4566 |
|
143 |
+
| 0.8299 | 34.44 | 186 | 2.4582 |
|
144 |
+
| 0.7977 | 34.81 | 188 | 2.4553 |
|
145 |
+
| 0.8635 | 35.19 | 190 | 2.4522 |
|
146 |
+
| 0.883 | 35.56 | 192 | 2.4518 |
|
147 |
+
| 0.8158 | 35.93 | 194 | 2.4513 |
|
148 |
+
| 0.8732 | 36.3 | 196 | 2.4518 |
|
149 |
+
| 0.8112 | 36.67 | 198 | 2.4522 |
|
150 |
+
| 0.7869 | 37.04 | 200 | 2.4521 |
|
151 |
|
152 |
|
153 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -20,10 +20,10 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"o_proj",
|
24 |
-
"q_proj",
|
25 |
"k_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
24 |
"o_proj",
|
|
|
25 |
"k_proj",
|
26 |
+
"q_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 33589040
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9bea5c265256b70b68972ba32de9e67f310fbffec033de0305bd8e948543c49
|
3 |
size 33589040
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf2621031a95373f5c749bcddb2b247868060027a4b737bc35b798763727f080
|
3 |
size 4920
|