tyfeng1997 commited on
Commit
c15d42b
·
verified ·
1 Parent(s): cc8e5ce

Model save

Browse files
Files changed (4) hide show
  1. README.md +58 -0
  2. all_results.json +8 -0
  3. train_results.json +8 -0
  4. trainer_state.json +655 -0
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-0.6B
3
+ library_name: transformers
4
+ model_name: Qwen3-0.6B-math-orca-qlora-10k-ep1
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - sft
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Qwen3-0.6B-math-orca-qlora-10k-ep1
13
+
14
+ This model is a fine-tuned version of [Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="tyfeng1997/Qwen3-0.6B-math-orca-qlora-10k-ep1", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bofeng1997-ty/qwen3-finetune/runs/pd4yxl0p)
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.18.0.dev0
38
+ - Transformers: 4.52.0.dev0
39
+ - Pytorch: 2.6.0
40
+ - Datasets: 3.5.1
41
+ - Tokenizers: 0.21.1
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @misc{vonwerra2022trl,
51
+ title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
53
+ year = 2020,
54
+ journal = {GitHub repository},
55
+ publisher = {GitHub},
56
+ howpublished = {\url{https://github.com/huggingface/trl}}
57
+ }
58
+ ```
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 2.024814536766259e+16,
3
+ "train_loss": 0.3264346291037167,
4
+ "train_runtime": 814.8752,
5
+ "train_samples": 10000,
6
+ "train_samples_per_second": 6.672,
7
+ "train_steps_per_second": 0.417
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 2.024814536766259e+16,
3
+ "train_loss": 0.3264346291037167,
4
+ "train_runtime": 814.8752,
5
+ "train_samples": 10000,
6
+ "train_samples_per_second": 6.672,
7
+ "train_steps_per_second": 0.417
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,655 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 340,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.014705882352941176,
14
+ "grad_norm": 1.2318819761276245,
15
+ "learning_rate": 0.0002,
16
+ "loss": 1.1427,
17
+ "mean_token_accuracy": 0.7514051914215087,
18
+ "num_tokens": 81920.0,
19
+ "step": 5
20
+ },
21
+ {
22
+ "epoch": 0.029411764705882353,
23
+ "grad_norm": 0.612349808216095,
24
+ "learning_rate": 0.0002,
25
+ "loss": 0.5799,
26
+ "mean_token_accuracy": 0.8597629547119141,
27
+ "num_tokens": 163840.0,
28
+ "step": 10
29
+ },
30
+ {
31
+ "epoch": 0.04411764705882353,
32
+ "grad_norm": 0.4277164936065674,
33
+ "learning_rate": 0.0002,
34
+ "loss": 0.4103,
35
+ "mean_token_accuracy": 0.8957844376564026,
36
+ "num_tokens": 245760.0,
37
+ "step": 15
38
+ },
39
+ {
40
+ "epoch": 0.058823529411764705,
41
+ "grad_norm": 0.3906048536300659,
42
+ "learning_rate": 0.0002,
43
+ "loss": 0.4176,
44
+ "mean_token_accuracy": 0.8923509180545807,
45
+ "num_tokens": 327680.0,
46
+ "step": 20
47
+ },
48
+ {
49
+ "epoch": 0.07352941176470588,
50
+ "grad_norm": 0.3483637571334839,
51
+ "learning_rate": 0.0002,
52
+ "loss": 0.3835,
53
+ "mean_token_accuracy": 0.8978738963603974,
54
+ "num_tokens": 409600.0,
55
+ "step": 25
56
+ },
57
+ {
58
+ "epoch": 0.08823529411764706,
59
+ "grad_norm": 0.35842204093933105,
60
+ "learning_rate": 0.0002,
61
+ "loss": 0.3728,
62
+ "mean_token_accuracy": 0.9008919656276703,
63
+ "num_tokens": 491520.0,
64
+ "step": 30
65
+ },
66
+ {
67
+ "epoch": 0.10294117647058823,
68
+ "grad_norm": 0.29427987337112427,
69
+ "learning_rate": 0.0002,
70
+ "loss": 0.339,
71
+ "mean_token_accuracy": 0.9093719601631165,
72
+ "num_tokens": 573440.0,
73
+ "step": 35
74
+ },
75
+ {
76
+ "epoch": 0.11764705882352941,
77
+ "grad_norm": 0.3110213875770569,
78
+ "learning_rate": 0.0002,
79
+ "loss": 0.3479,
80
+ "mean_token_accuracy": 0.9065738081932068,
81
+ "num_tokens": 655360.0,
82
+ "step": 40
83
+ },
84
+ {
85
+ "epoch": 0.1323529411764706,
86
+ "grad_norm": 0.30392590165138245,
87
+ "learning_rate": 0.0002,
88
+ "loss": 0.3632,
89
+ "mean_token_accuracy": 0.90327467918396,
90
+ "num_tokens": 737280.0,
91
+ "step": 45
92
+ },
93
+ {
94
+ "epoch": 0.14705882352941177,
95
+ "grad_norm": 0.3164522647857666,
96
+ "learning_rate": 0.0002,
97
+ "loss": 0.3519,
98
+ "mean_token_accuracy": 0.9053433358669281,
99
+ "num_tokens": 818998.0,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.16176470588235295,
104
+ "grad_norm": 0.29904985427856445,
105
+ "learning_rate": 0.0002,
106
+ "loss": 0.359,
107
+ "mean_token_accuracy": 0.9019794702529907,
108
+ "num_tokens": 900918.0,
109
+ "step": 55
110
+ },
111
+ {
112
+ "epoch": 0.17647058823529413,
113
+ "grad_norm": 0.3399337828159332,
114
+ "learning_rate": 0.0002,
115
+ "loss": 0.3472,
116
+ "mean_token_accuracy": 0.9052419304847718,
117
+ "num_tokens": 982838.0,
118
+ "step": 60
119
+ },
120
+ {
121
+ "epoch": 0.19117647058823528,
122
+ "grad_norm": 0.3235512673854828,
123
+ "learning_rate": 0.0002,
124
+ "loss": 0.3527,
125
+ "mean_token_accuracy": 0.9031891584396362,
126
+ "num_tokens": 1064758.0,
127
+ "step": 65
128
+ },
129
+ {
130
+ "epoch": 0.20588235294117646,
131
+ "grad_norm": 0.32987555861473083,
132
+ "learning_rate": 0.0002,
133
+ "loss": 0.3596,
134
+ "mean_token_accuracy": 0.9010885059833527,
135
+ "num_tokens": 1145665.0,
136
+ "step": 70
137
+ },
138
+ {
139
+ "epoch": 0.22058823529411764,
140
+ "grad_norm": 0.28296959400177,
141
+ "learning_rate": 0.0002,
142
+ "loss": 0.3186,
143
+ "mean_token_accuracy": 0.9128421485424042,
144
+ "num_tokens": 1227585.0,
145
+ "step": 75
146
+ },
147
+ {
148
+ "epoch": 0.23529411764705882,
149
+ "grad_norm": 0.2940562665462494,
150
+ "learning_rate": 0.0002,
151
+ "loss": 0.3189,
152
+ "mean_token_accuracy": 0.9119745969772339,
153
+ "num_tokens": 1309505.0,
154
+ "step": 80
155
+ },
156
+ {
157
+ "epoch": 0.25,
158
+ "grad_norm": 0.31312814354896545,
159
+ "learning_rate": 0.0002,
160
+ "loss": 0.3366,
161
+ "mean_token_accuracy": 0.9083712756633758,
162
+ "num_tokens": 1390498.0,
163
+ "step": 85
164
+ },
165
+ {
166
+ "epoch": 0.2647058823529412,
167
+ "grad_norm": 0.2923528254032135,
168
+ "learning_rate": 0.0002,
169
+ "loss": 0.3114,
170
+ "mean_token_accuracy": 0.9138196527957916,
171
+ "num_tokens": 1472418.0,
172
+ "step": 90
173
+ },
174
+ {
175
+ "epoch": 0.27941176470588236,
176
+ "grad_norm": 0.2987738847732544,
177
+ "learning_rate": 0.0002,
178
+ "loss": 0.3226,
179
+ "mean_token_accuracy": 0.9115102827548981,
180
+ "num_tokens": 1554338.0,
181
+ "step": 95
182
+ },
183
+ {
184
+ "epoch": 0.29411764705882354,
185
+ "grad_norm": 0.3070703446865082,
186
+ "learning_rate": 0.0002,
187
+ "loss": 0.334,
188
+ "mean_token_accuracy": 0.9086510419845581,
189
+ "num_tokens": 1636258.0,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.3088235294117647,
194
+ "grad_norm": 0.2919357419013977,
195
+ "learning_rate": 0.0002,
196
+ "loss": 0.322,
197
+ "mean_token_accuracy": 0.9099951267242432,
198
+ "num_tokens": 1718178.0,
199
+ "step": 105
200
+ },
201
+ {
202
+ "epoch": 0.3235294117647059,
203
+ "grad_norm": 0.3079027235507965,
204
+ "learning_rate": 0.0002,
205
+ "loss": 0.3195,
206
+ "mean_token_accuracy": 0.9123972117900848,
207
+ "num_tokens": 1799262.0,
208
+ "step": 110
209
+ },
210
+ {
211
+ "epoch": 0.3382352941176471,
212
+ "grad_norm": 0.32008472084999084,
213
+ "learning_rate": 0.0002,
214
+ "loss": 0.3211,
215
+ "mean_token_accuracy": 0.9098729312419891,
216
+ "num_tokens": 1881182.0,
217
+ "step": 115
218
+ },
219
+ {
220
+ "epoch": 0.35294117647058826,
221
+ "grad_norm": 0.33167868852615356,
222
+ "learning_rate": 0.0002,
223
+ "loss": 0.3165,
224
+ "mean_token_accuracy": 0.9122434020042419,
225
+ "num_tokens": 1963102.0,
226
+ "step": 120
227
+ },
228
+ {
229
+ "epoch": 0.36764705882352944,
230
+ "grad_norm": 0.26130759716033936,
231
+ "learning_rate": 0.0002,
232
+ "loss": 0.3101,
233
+ "mean_token_accuracy": 0.9149560272693634,
234
+ "num_tokens": 2045022.0,
235
+ "step": 125
236
+ },
237
+ {
238
+ "epoch": 0.38235294117647056,
239
+ "grad_norm": 0.3016408681869507,
240
+ "learning_rate": 0.0002,
241
+ "loss": 0.3149,
242
+ "mean_token_accuracy": 0.9118534028530121,
243
+ "num_tokens": 2126154.0,
244
+ "step": 130
245
+ },
246
+ {
247
+ "epoch": 0.39705882352941174,
248
+ "grad_norm": 0.3000870645046234,
249
+ "learning_rate": 0.0002,
250
+ "loss": 0.3157,
251
+ "mean_token_accuracy": 0.9116202533245087,
252
+ "num_tokens": 2208074.0,
253
+ "step": 135
254
+ },
255
+ {
256
+ "epoch": 0.4117647058823529,
257
+ "grad_norm": 0.2947154939174652,
258
+ "learning_rate": 0.0002,
259
+ "loss": 0.2991,
260
+ "mean_token_accuracy": 0.916422301530838,
261
+ "num_tokens": 2289994.0,
262
+ "step": 140
263
+ },
264
+ {
265
+ "epoch": 0.4264705882352941,
266
+ "grad_norm": 0.29345065355300903,
267
+ "learning_rate": 0.0002,
268
+ "loss": 0.3192,
269
+ "mean_token_accuracy": 0.9102272808551788,
270
+ "num_tokens": 2371914.0,
271
+ "step": 145
272
+ },
273
+ {
274
+ "epoch": 0.4411764705882353,
275
+ "grad_norm": 0.2984428107738495,
276
+ "learning_rate": 0.0002,
277
+ "loss": 0.298,
278
+ "mean_token_accuracy": 0.9163951098918914,
279
+ "num_tokens": 2453143.0,
280
+ "step": 150
281
+ },
282
+ {
283
+ "epoch": 0.45588235294117646,
284
+ "grad_norm": 0.2700878977775574,
285
+ "learning_rate": 0.0002,
286
+ "loss": 0.291,
287
+ "mean_token_accuracy": 0.9183040201663971,
288
+ "num_tokens": 2535063.0,
289
+ "step": 155
290
+ },
291
+ {
292
+ "epoch": 0.47058823529411764,
293
+ "grad_norm": 0.30076536536216736,
294
+ "learning_rate": 0.0002,
295
+ "loss": 0.3097,
296
+ "mean_token_accuracy": 0.9130865216255188,
297
+ "num_tokens": 2616983.0,
298
+ "step": 160
299
+ },
300
+ {
301
+ "epoch": 0.4852941176470588,
302
+ "grad_norm": 0.30549952387809753,
303
+ "learning_rate": 0.0002,
304
+ "loss": 0.3136,
305
+ "mean_token_accuracy": 0.9121212244033814,
306
+ "num_tokens": 2698903.0,
307
+ "step": 165
308
+ },
309
+ {
310
+ "epoch": 0.5,
311
+ "grad_norm": 0.2821143865585327,
312
+ "learning_rate": 0.0002,
313
+ "loss": 0.3006,
314
+ "mean_token_accuracy": 0.9160520434379578,
315
+ "num_tokens": 2780150.0,
316
+ "step": 170
317
+ },
318
+ {
319
+ "epoch": 0.5147058823529411,
320
+ "grad_norm": 0.2865024507045746,
321
+ "learning_rate": 0.0002,
322
+ "loss": 0.3109,
323
+ "mean_token_accuracy": 0.9121701002120972,
324
+ "num_tokens": 2862070.0,
325
+ "step": 175
326
+ },
327
+ {
328
+ "epoch": 0.5294117647058824,
329
+ "grad_norm": 0.299447238445282,
330
+ "learning_rate": 0.0002,
331
+ "loss": 0.3045,
332
+ "mean_token_accuracy": 0.914674985408783,
333
+ "num_tokens": 2943990.0,
334
+ "step": 180
335
+ },
336
+ {
337
+ "epoch": 0.5441176470588235,
338
+ "grad_norm": 0.28584349155426025,
339
+ "learning_rate": 0.0002,
340
+ "loss": 0.296,
341
+ "mean_token_accuracy": 0.9169232726097107,
342
+ "num_tokens": 3025910.0,
343
+ "step": 185
344
+ },
345
+ {
346
+ "epoch": 0.5588235294117647,
347
+ "grad_norm": 0.28912603855133057,
348
+ "learning_rate": 0.0002,
349
+ "loss": 0.2828,
350
+ "mean_token_accuracy": 0.9202346205711365,
351
+ "num_tokens": 3107830.0,
352
+ "step": 190
353
+ },
354
+ {
355
+ "epoch": 0.5735294117647058,
356
+ "grad_norm": 0.2780699133872986,
357
+ "learning_rate": 0.0002,
358
+ "loss": 0.2943,
359
+ "mean_token_accuracy": 0.917925238609314,
360
+ "num_tokens": 3189750.0,
361
+ "step": 195
362
+ },
363
+ {
364
+ "epoch": 0.5882352941176471,
365
+ "grad_norm": 0.2849072813987732,
366
+ "learning_rate": 0.0002,
367
+ "loss": 0.2909,
368
+ "mean_token_accuracy": 0.9186461567878723,
369
+ "num_tokens": 3271670.0,
370
+ "step": 200
371
+ },
372
+ {
373
+ "epoch": 0.6029411764705882,
374
+ "grad_norm": 0.287589967250824,
375
+ "learning_rate": 0.0002,
376
+ "loss": 0.3006,
377
+ "mean_token_accuracy": 0.9150293409824372,
378
+ "num_tokens": 3353590.0,
379
+ "step": 205
380
+ },
381
+ {
382
+ "epoch": 0.6176470588235294,
383
+ "grad_norm": 0.3039202392101288,
384
+ "learning_rate": 0.0002,
385
+ "loss": 0.3017,
386
+ "mean_token_accuracy": 0.9141373574733734,
387
+ "num_tokens": 3435510.0,
388
+ "step": 210
389
+ },
390
+ {
391
+ "epoch": 0.6323529411764706,
392
+ "grad_norm": 0.29136523604393005,
393
+ "learning_rate": 0.0002,
394
+ "loss": 0.2937,
395
+ "mean_token_accuracy": 0.9157746970653534,
396
+ "num_tokens": 3517430.0,
397
+ "step": 215
398
+ },
399
+ {
400
+ "epoch": 0.6470588235294118,
401
+ "grad_norm": 0.28994059562683105,
402
+ "learning_rate": 0.0002,
403
+ "loss": 0.2948,
404
+ "mean_token_accuracy": 0.9153592526912689,
405
+ "num_tokens": 3599350.0,
406
+ "step": 220
407
+ },
408
+ {
409
+ "epoch": 0.6617647058823529,
410
+ "grad_norm": 0.3030713200569153,
411
+ "learning_rate": 0.0002,
412
+ "loss": 0.3021,
413
+ "mean_token_accuracy": 0.9139174222946167,
414
+ "num_tokens": 3681270.0,
415
+ "step": 225
416
+ },
417
+ {
418
+ "epoch": 0.6764705882352942,
419
+ "grad_norm": 0.2715919017791748,
420
+ "learning_rate": 0.0002,
421
+ "loss": 0.2973,
422
+ "mean_token_accuracy": 0.9151881873607636,
423
+ "num_tokens": 3763190.0,
424
+ "step": 230
425
+ },
426
+ {
427
+ "epoch": 0.6911764705882353,
428
+ "grad_norm": 0.29798802733421326,
429
+ "learning_rate": 0.0002,
430
+ "loss": 0.3004,
431
+ "mean_token_accuracy": 0.9151026546955109,
432
+ "num_tokens": 3845110.0,
433
+ "step": 235
434
+ },
435
+ {
436
+ "epoch": 0.7058823529411765,
437
+ "grad_norm": 0.31128421425819397,
438
+ "learning_rate": 0.0002,
439
+ "loss": 0.3049,
440
+ "mean_token_accuracy": 0.9125122249126434,
441
+ "num_tokens": 3927030.0,
442
+ "step": 240
443
+ },
444
+ {
445
+ "epoch": 0.7205882352941176,
446
+ "grad_norm": 0.282503604888916,
447
+ "learning_rate": 0.0002,
448
+ "loss": 0.2808,
449
+ "mean_token_accuracy": 0.919000506401062,
450
+ "num_tokens": 4008950.0,
451
+ "step": 245
452
+ },
453
+ {
454
+ "epoch": 0.7352941176470589,
455
+ "grad_norm": 0.2817753255367279,
456
+ "learning_rate": 0.0002,
457
+ "loss": 0.2879,
458
+ "mean_token_accuracy": 0.9177908301353455,
459
+ "num_tokens": 4090870.0,
460
+ "step": 250
461
+ },
462
+ {
463
+ "epoch": 0.75,
464
+ "grad_norm": 0.29370447993278503,
465
+ "learning_rate": 0.0002,
466
+ "loss": 0.2798,
467
+ "mean_token_accuracy": 0.9193670749664307,
468
+ "num_tokens": 4172790.0,
469
+ "step": 255
470
+ },
471
+ {
472
+ "epoch": 0.7647058823529411,
473
+ "grad_norm": 0.2587876617908478,
474
+ "learning_rate": 0.0002,
475
+ "loss": 0.2799,
476
+ "mean_token_accuracy": 0.920650064945221,
477
+ "num_tokens": 4254710.0,
478
+ "step": 260
479
+ },
480
+ {
481
+ "epoch": 0.7794117647058824,
482
+ "grad_norm": 0.26823118329048157,
483
+ "learning_rate": 0.0002,
484
+ "loss": 0.2896,
485
+ "mean_token_accuracy": 0.9174364805221558,
486
+ "num_tokens": 4336630.0,
487
+ "step": 265
488
+ },
489
+ {
490
+ "epoch": 0.7941176470588235,
491
+ "grad_norm": 0.2886073589324951,
492
+ "learning_rate": 0.0002,
493
+ "loss": 0.2807,
494
+ "mean_token_accuracy": 0.9185728430747986,
495
+ "num_tokens": 4418550.0,
496
+ "step": 270
497
+ },
498
+ {
499
+ "epoch": 0.8088235294117647,
500
+ "grad_norm": 0.2849334478378296,
501
+ "learning_rate": 0.0002,
502
+ "loss": 0.29,
503
+ "mean_token_accuracy": 0.9182918071746826,
504
+ "num_tokens": 4500470.0,
505
+ "step": 275
506
+ },
507
+ {
508
+ "epoch": 0.8235294117647058,
509
+ "grad_norm": 0.3190767467021942,
510
+ "learning_rate": 0.0002,
511
+ "loss": 0.2815,
512
+ "mean_token_accuracy": 0.9185608327388763,
513
+ "num_tokens": 4582187.0,
514
+ "step": 280
515
+ },
516
+ {
517
+ "epoch": 0.8382352941176471,
518
+ "grad_norm": 0.28610959649086,
519
+ "learning_rate": 0.0002,
520
+ "loss": 0.2932,
521
+ "mean_token_accuracy": 0.9168866276741028,
522
+ "num_tokens": 4664107.0,
523
+ "step": 285
524
+ },
525
+ {
526
+ "epoch": 0.8529411764705882,
527
+ "grad_norm": 0.282124787569046,
528
+ "learning_rate": 0.0002,
529
+ "loss": 0.2833,
530
+ "mean_token_accuracy": 0.9193059802055359,
531
+ "num_tokens": 4746027.0,
532
+ "step": 290
533
+ },
534
+ {
535
+ "epoch": 0.8676470588235294,
536
+ "grad_norm": 0.27180016040802,
537
+ "learning_rate": 0.0002,
538
+ "loss": 0.2743,
539
+ "mean_token_accuracy": 0.9207478165626526,
540
+ "num_tokens": 4827947.0,
541
+ "step": 295
542
+ },
543
+ {
544
+ "epoch": 0.8823529411764706,
545
+ "grad_norm": 0.2949499785900116,
546
+ "learning_rate": 0.0002,
547
+ "loss": 0.2809,
548
+ "mean_token_accuracy": 0.9198436141014099,
549
+ "num_tokens": 4909867.0,
550
+ "step": 300
551
+ },
552
+ {
553
+ "epoch": 0.8970588235294118,
554
+ "grad_norm": 0.29020780324935913,
555
+ "learning_rate": 0.0002,
556
+ "loss": 0.2749,
557
+ "mean_token_accuracy": 0.9195137023925781,
558
+ "num_tokens": 4991787.0,
559
+ "step": 305
560
+ },
561
+ {
562
+ "epoch": 0.9117647058823529,
563
+ "grad_norm": 0.28802114725112915,
564
+ "learning_rate": 0.0002,
565
+ "loss": 0.2692,
566
+ "mean_token_accuracy": 0.9228883624076843,
567
+ "num_tokens": 5073398.0,
568
+ "step": 310
569
+ },
570
+ {
571
+ "epoch": 0.9264705882352942,
572
+ "grad_norm": 0.2924538850784302,
573
+ "learning_rate": 0.0002,
574
+ "loss": 0.2765,
575
+ "mean_token_accuracy": 0.919696980714798,
576
+ "num_tokens": 5155318.0,
577
+ "step": 315
578
+ },
579
+ {
580
+ "epoch": 0.9411764705882353,
581
+ "grad_norm": 0.29523536562919617,
582
+ "learning_rate": 0.0002,
583
+ "loss": 0.2729,
584
+ "mean_token_accuracy": 0.920906662940979,
585
+ "num_tokens": 5237238.0,
586
+ "step": 320
587
+ },
588
+ {
589
+ "epoch": 0.9558823529411765,
590
+ "grad_norm": 0.2890452444553375,
591
+ "learning_rate": 0.0002,
592
+ "loss": 0.2734,
593
+ "mean_token_accuracy": 0.9217497706413269,
594
+ "num_tokens": 5319158.0,
595
+ "step": 325
596
+ },
597
+ {
598
+ "epoch": 0.9705882352941176,
599
+ "grad_norm": 0.2990953326225281,
600
+ "learning_rate": 0.0002,
601
+ "loss": 0.2701,
602
+ "mean_token_accuracy": 0.922544002532959,
603
+ "num_tokens": 5401078.0,
604
+ "step": 330
605
+ },
606
+ {
607
+ "epoch": 0.9852941176470589,
608
+ "grad_norm": 0.27057918906211853,
609
+ "learning_rate": 0.0002,
610
+ "loss": 0.2841,
611
+ "mean_token_accuracy": 0.9188294410705566,
612
+ "num_tokens": 5482998.0,
613
+ "step": 335
614
+ },
615
+ {
616
+ "epoch": 1.0,
617
+ "grad_norm": 0.29458126425743103,
618
+ "learning_rate": 0.0002,
619
+ "loss": 0.2791,
620
+ "mean_token_accuracy": 0.9199340343475342,
621
+ "num_tokens": 5561846.0,
622
+ "step": 340
623
+ },
624
+ {
625
+ "epoch": 1.0,
626
+ "step": 340,
627
+ "total_flos": 2.024814536766259e+16,
628
+ "train_loss": 0.3264346291037167,
629
+ "train_runtime": 814.8752,
630
+ "train_samples_per_second": 6.672,
631
+ "train_steps_per_second": 0.417
632
+ }
633
+ ],
634
+ "logging_steps": 5,
635
+ "max_steps": 340,
636
+ "num_input_tokens_seen": 0,
637
+ "num_train_epochs": 1,
638
+ "save_steps": 500,
639
+ "stateful_callbacks": {
640
+ "TrainerControl": {
641
+ "args": {
642
+ "should_epoch_stop": false,
643
+ "should_evaluate": false,
644
+ "should_log": false,
645
+ "should_save": true,
646
+ "should_training_stop": true
647
+ },
648
+ "attributes": {}
649
+ }
650
+ },
651
+ "total_flos": 2.024814536766259e+16,
652
+ "train_batch_size": 8,
653
+ "trial_name": null,
654
+ "trial_params": null
655
+ }