tyzhu commited on
Commit
fab413d
·
verified ·
1 Parent(s): 5e21d89

End of training

Browse files
Files changed (6) hide show
  1. README.md +14 -2
  2. all_results.json +15 -0
  3. eval_results.json +10 -0
  4. tokenizer.json +1 -6
  5. train_results.json +8 -0
  6. trainer_state.json +322 -0
README.md CHANGED
@@ -3,11 +3,23 @@ license: llama2
3
  base_model: meta-llama/Llama-2-7b-hf
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: lmind_nq_train6000_eval6489_v1_docidx_v3_meta-llama_Llama-2-7b-hf_lora2
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +27,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # lmind_nq_train6000_eval6489_v1_docidx_v3_meta-llama_Llama-2-7b-hf_lora2
17
 
18
- This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 4.3991
21
  - Accuracy: 0.4471
 
3
  base_model: meta-llama/Llama-2-7b-hf
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3
8
  metrics:
9
  - accuracy
10
  model-index:
11
  - name: lmind_nq_train6000_eval6489_v1_docidx_v3_meta-llama_Llama-2-7b-hf_lora2
12
+ results:
13
+ - task:
14
+ name: Causal Language Modeling
15
+ type: text-generation
16
+ dataset:
17
+ name: tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3
18
+ type: tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3
19
+ metrics:
20
+ - name: Accuracy
21
+ type: accuracy
22
+ value: 0.4470769230769231
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
27
 
28
  # lmind_nq_train6000_eval6489_v1_docidx_v3_meta-llama_Llama-2-7b-hf_lora2
29
 
30
+ This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on the tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3 dataset.
31
  It achieves the following results on the evaluation set:
32
  - Loss: 4.3991
33
  - Accuracy: 0.4471
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.99,
3
+ "eval_accuracy": 0.4470769230769231,
4
+ "eval_loss": 4.39909029006958,
5
+ "eval_runtime": 5.0156,
6
+ "eval_samples": 500,
7
+ "eval_samples_per_second": 99.688,
8
+ "eval_steps_per_second": 12.561,
9
+ "perplexity": 81.37680569392009,
10
+ "train_loss": 0.9158074310448163,
11
+ "train_runtime": 2920.4519,
12
+ "train_samples": 10925,
13
+ "train_samples_per_second": 37.409,
14
+ "train_steps_per_second": 1.168
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.99,
3
+ "eval_accuracy": 0.4470769230769231,
4
+ "eval_loss": 4.39909029006958,
5
+ "eval_runtime": 5.0156,
6
+ "eval_samples": 500,
7
+ "eval_samples_per_second": 99.688,
8
+ "eval_steps_per_second": 12.561,
9
+ "perplexity": 81.37680569392009
10
+ }
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1024,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.99,
3
+ "train_loss": 0.9158074310448163,
4
+ "train_runtime": 2920.4519,
5
+ "train_samples": 10925,
6
+ "train_samples_per_second": 37.409,
7
+ "train_steps_per_second": 1.168
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.985358711566619,
5
+ "eval_steps": 500,
6
+ "global_step": 3410,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.29,
13
+ "learning_rate": 0.0001,
14
+ "loss": 1.4842,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.59,
19
+ "learning_rate": 0.0001,
20
+ "loss": 1.4016,
21
+ "step": 200
22
+ },
23
+ {
24
+ "epoch": 0.88,
25
+ "learning_rate": 0.0001,
26
+ "loss": 1.3892,
27
+ "step": 300
28
+ },
29
+ {
30
+ "epoch": 1.0,
31
+ "eval_accuracy": 0.4543589743589744,
32
+ "eval_loss": 3.4055869579315186,
33
+ "eval_runtime": 4.7835,
34
+ "eval_samples_per_second": 104.525,
35
+ "eval_steps_per_second": 13.17,
36
+ "step": 341
37
+ },
38
+ {
39
+ "epoch": 1.17,
40
+ "learning_rate": 0.0001,
41
+ "loss": 1.3608,
42
+ "step": 400
43
+ },
44
+ {
45
+ "epoch": 1.46,
46
+ "learning_rate": 0.0001,
47
+ "loss": 1.3456,
48
+ "step": 500
49
+ },
50
+ {
51
+ "epoch": 1.76,
52
+ "learning_rate": 0.0001,
53
+ "loss": 1.3499,
54
+ "step": 600
55
+ },
56
+ {
57
+ "epoch": 2.0,
58
+ "eval_accuracy": 0.4576923076923077,
59
+ "eval_loss": 3.453113317489624,
60
+ "eval_runtime": 4.7768,
61
+ "eval_samples_per_second": 104.672,
62
+ "eval_steps_per_second": 13.189,
63
+ "step": 683
64
+ },
65
+ {
66
+ "epoch": 2.05,
67
+ "learning_rate": 0.0001,
68
+ "loss": 1.3238,
69
+ "step": 700
70
+ },
71
+ {
72
+ "epoch": 2.34,
73
+ "learning_rate": 0.0001,
74
+ "loss": 1.2486,
75
+ "step": 800
76
+ },
77
+ {
78
+ "epoch": 2.64,
79
+ "learning_rate": 0.0001,
80
+ "loss": 1.2452,
81
+ "step": 900
82
+ },
83
+ {
84
+ "epoch": 2.93,
85
+ "learning_rate": 0.0001,
86
+ "loss": 1.2427,
87
+ "step": 1000
88
+ },
89
+ {
90
+ "epoch": 3.0,
91
+ "eval_accuracy": 0.4584102564102564,
92
+ "eval_loss": 3.671137809753418,
93
+ "eval_runtime": 5.1059,
94
+ "eval_samples_per_second": 97.927,
95
+ "eval_steps_per_second": 12.339,
96
+ "step": 1024
97
+ },
98
+ {
99
+ "epoch": 3.22,
100
+ "learning_rate": 0.0001,
101
+ "loss": 1.1507,
102
+ "step": 1100
103
+ },
104
+ {
105
+ "epoch": 3.51,
106
+ "learning_rate": 0.0001,
107
+ "loss": 1.1218,
108
+ "step": 1200
109
+ },
110
+ {
111
+ "epoch": 3.81,
112
+ "learning_rate": 0.0001,
113
+ "loss": 1.1231,
114
+ "step": 1300
115
+ },
116
+ {
117
+ "epoch": 4.0,
118
+ "eval_accuracy": 0.45697435897435895,
119
+ "eval_loss": 3.7999510765075684,
120
+ "eval_runtime": 4.8751,
121
+ "eval_samples_per_second": 102.561,
122
+ "eval_steps_per_second": 12.923,
123
+ "step": 1366
124
+ },
125
+ {
126
+ "epoch": 4.1,
127
+ "learning_rate": 0.0001,
128
+ "loss": 1.0823,
129
+ "step": 1400
130
+ },
131
+ {
132
+ "epoch": 4.39,
133
+ "learning_rate": 0.0001,
134
+ "loss": 0.997,
135
+ "step": 1500
136
+ },
137
+ {
138
+ "epoch": 4.69,
139
+ "learning_rate": 0.0001,
140
+ "loss": 1.0024,
141
+ "step": 1600
142
+ },
143
+ {
144
+ "epoch": 4.98,
145
+ "learning_rate": 0.0001,
146
+ "loss": 0.995,
147
+ "step": 1700
148
+ },
149
+ {
150
+ "epoch": 5.0,
151
+ "eval_accuracy": 0.4552307692307692,
152
+ "eval_loss": 3.953216314315796,
153
+ "eval_runtime": 4.9741,
154
+ "eval_samples_per_second": 100.522,
155
+ "eval_steps_per_second": 12.666,
156
+ "step": 1707
157
+ },
158
+ {
159
+ "epoch": 5.27,
160
+ "learning_rate": 0.0001,
161
+ "loss": 0.8495,
162
+ "step": 1800
163
+ },
164
+ {
165
+ "epoch": 5.56,
166
+ "learning_rate": 0.0001,
167
+ "loss": 0.854,
168
+ "step": 1900
169
+ },
170
+ {
171
+ "epoch": 5.86,
172
+ "learning_rate": 0.0001,
173
+ "loss": 0.8693,
174
+ "step": 2000
175
+ },
176
+ {
177
+ "epoch": 6.0,
178
+ "eval_accuracy": 0.45261538461538464,
179
+ "eval_loss": 4.0766072273254395,
180
+ "eval_runtime": 5.3625,
181
+ "eval_samples_per_second": 93.24,
182
+ "eval_steps_per_second": 11.748,
183
+ "step": 2049
184
+ },
185
+ {
186
+ "epoch": 6.15,
187
+ "learning_rate": 0.0001,
188
+ "loss": 0.7917,
189
+ "step": 2100
190
+ },
191
+ {
192
+ "epoch": 6.44,
193
+ "learning_rate": 0.0001,
194
+ "loss": 0.7061,
195
+ "step": 2200
196
+ },
197
+ {
198
+ "epoch": 6.73,
199
+ "learning_rate": 0.0001,
200
+ "loss": 0.7302,
201
+ "step": 2300
202
+ },
203
+ {
204
+ "epoch": 7.0,
205
+ "eval_accuracy": 0.4501025641025641,
206
+ "eval_loss": 4.171727180480957,
207
+ "eval_runtime": 5.1508,
208
+ "eval_samples_per_second": 97.073,
209
+ "eval_steps_per_second": 12.231,
210
+ "step": 2390
211
+ },
212
+ {
213
+ "epoch": 7.03,
214
+ "learning_rate": 0.0001,
215
+ "loss": 0.7227,
216
+ "step": 2400
217
+ },
218
+ {
219
+ "epoch": 7.32,
220
+ "learning_rate": 0.0001,
221
+ "loss": 0.573,
222
+ "step": 2500
223
+ },
224
+ {
225
+ "epoch": 7.61,
226
+ "learning_rate": 0.0001,
227
+ "loss": 0.6036,
228
+ "step": 2600
229
+ },
230
+ {
231
+ "epoch": 7.91,
232
+ "learning_rate": 0.0001,
233
+ "loss": 0.6033,
234
+ "step": 2700
235
+ },
236
+ {
237
+ "epoch": 8.0,
238
+ "eval_accuracy": 0.448,
239
+ "eval_loss": 4.277780055999756,
240
+ "eval_runtime": 4.647,
241
+ "eval_samples_per_second": 107.596,
242
+ "eval_steps_per_second": 13.557,
243
+ "step": 2732
244
+ },
245
+ {
246
+ "epoch": 8.2,
247
+ "learning_rate": 0.0001,
248
+ "loss": 0.4945,
249
+ "step": 2800
250
+ },
251
+ {
252
+ "epoch": 8.49,
253
+ "learning_rate": 0.0001,
254
+ "loss": 0.4718,
255
+ "step": 2900
256
+ },
257
+ {
258
+ "epoch": 8.78,
259
+ "learning_rate": 0.0001,
260
+ "loss": 0.4825,
261
+ "step": 3000
262
+ },
263
+ {
264
+ "epoch": 9.0,
265
+ "eval_accuracy": 0.4462051282051282,
266
+ "eval_loss": 4.34151554107666,
267
+ "eval_runtime": 5.1246,
268
+ "eval_samples_per_second": 97.568,
269
+ "eval_steps_per_second": 12.294,
270
+ "step": 3073
271
+ },
272
+ {
273
+ "epoch": 9.08,
274
+ "learning_rate": 0.0001,
275
+ "loss": 0.4538,
276
+ "step": 3100
277
+ },
278
+ {
279
+ "epoch": 9.37,
280
+ "learning_rate": 0.0001,
281
+ "loss": 0.3609,
282
+ "step": 3200
283
+ },
284
+ {
285
+ "epoch": 9.66,
286
+ "learning_rate": 0.0001,
287
+ "loss": 0.3769,
288
+ "step": 3300
289
+ },
290
+ {
291
+ "epoch": 9.96,
292
+ "learning_rate": 0.0001,
293
+ "loss": 0.387,
294
+ "step": 3400
295
+ },
296
+ {
297
+ "epoch": 9.99,
298
+ "eval_accuracy": 0.4470769230769231,
299
+ "eval_loss": 4.39909029006958,
300
+ "eval_runtime": 4.6505,
301
+ "eval_samples_per_second": 107.514,
302
+ "eval_steps_per_second": 13.547,
303
+ "step": 3410
304
+ },
305
+ {
306
+ "epoch": 9.99,
307
+ "step": 3410,
308
+ "total_flos": 6.753170451008061e+17,
309
+ "train_loss": 0.9158074310448163,
310
+ "train_runtime": 2920.4519,
311
+ "train_samples_per_second": 37.409,
312
+ "train_steps_per_second": 1.168
313
+ }
314
+ ],
315
+ "logging_steps": 100,
316
+ "max_steps": 3410,
317
+ "num_train_epochs": 10,
318
+ "save_steps": 500,
319
+ "total_flos": 6.753170451008061e+17,
320
+ "trial_name": null,
321
+ "trial_params": null
322
+ }