crossroderick commited on
Commit
bdd4daa
·
1 Parent(s): 0371387

Major (v5) training update

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoints/{checkpoint-61500 → checkpoint-46000}/config.json +1 -32
  2. checkpoints/{checkpoint-61500 → checkpoint-46000}/generation_config.json +0 -0
  3. checkpoints/{checkpoint-62000 → checkpoint-46000}/model.safetensors +2 -2
  4. checkpoints/{checkpoint-61500/model.safetensors → checkpoint-46000/optimizer.pt} +2 -2
  5. checkpoints/{checkpoint-62228 → checkpoint-46000}/rng_state.pth +1 -1
  6. checkpoints/{checkpoint-61500 → checkpoint-46000}/scaler.pt +1 -1
  7. checkpoints/{checkpoint-62228 → checkpoint-46000}/scheduler.pt +1 -1
  8. checkpoints/{checkpoint-62228 → checkpoint-46000}/special_tokens_map.json +0 -7
  9. checkpoints/{checkpoint-62000 → checkpoint-46000}/spiece.model +2 -2
  10. checkpoints/checkpoint-46000/tokenizer.json +0 -0
  11. checkpoints/{checkpoint-61500 → checkpoint-46000}/tokenizer_config.json +408 -407
  12. checkpoints/checkpoint-46000/trainer_state.json +678 -0
  13. checkpoints/{checkpoint-61500 → checkpoint-46000}/training_args.bin +0 -0
  14. checkpoints/{checkpoint-62000 → checkpoint-46500}/config.json +1 -32
  15. checkpoints/{checkpoint-62000 → checkpoint-46500}/generation_config.json +0 -0
  16. checkpoints/{checkpoint-62228 → checkpoint-46500}/model.safetensors +2 -2
  17. checkpoints/{checkpoint-62000 → checkpoint-46500}/optimizer.pt +2 -2
  18. checkpoints/{checkpoint-62000 → checkpoint-46500}/rng_state.pth +1 -1
  19. checkpoints/{checkpoint-62228 → checkpoint-46500}/scaler.pt +1 -1
  20. checkpoints/{checkpoint-61500 → checkpoint-46500}/scheduler.pt +1 -1
  21. checkpoints/{checkpoint-61500 → checkpoint-46500}/special_tokens_map.json +0 -7
  22. checkpoints/{checkpoint-62228 → checkpoint-46500}/spiece.model +2 -2
  23. checkpoints/checkpoint-46500/tokenizer.json +0 -0
  24. checkpoints/{checkpoint-62000 → checkpoint-46500}/tokenizer_config.json +408 -407
  25. checkpoints/checkpoint-46500/trainer_state.json +685 -0
  26. checkpoints/{checkpoint-62000 → checkpoint-46500}/training_args.bin +0 -0
  27. checkpoints/{checkpoint-62228 → checkpoint-46670}/config.json +1 -32
  28. checkpoints/{checkpoint-62228 → checkpoint-46670}/generation_config.json +0 -0
  29. checkpoints/checkpoint-46670/model.safetensors +3 -0
  30. checkpoints/{checkpoint-61500 → checkpoint-46670}/optimizer.pt +2 -2
  31. checkpoints/{checkpoint-61500 → checkpoint-46670}/rng_state.pth +1 -1
  32. checkpoints/{checkpoint-62000 → checkpoint-46670}/scaler.pt +1 -1
  33. checkpoints/{checkpoint-62000 → checkpoint-46670}/scheduler.pt +1 -1
  34. checkpoints/{checkpoint-62000 → checkpoint-46670}/special_tokens_map.json +0 -7
  35. checkpoints/{checkpoint-61500 → checkpoint-46670}/spiece.model +2 -2
  36. checkpoints/checkpoint-46670/tokenizer.json +0 -0
  37. checkpoints/{checkpoint-62228 → checkpoint-46670}/tokenizer_config.json +408 -407
  38. checkpoints/checkpoint-46670/trainer_state.json +685 -0
  39. checkpoints/{checkpoint-62228 → checkpoint-46670}/training_args.bin +0 -0
  40. checkpoints/checkpoint-61500/tokenizer.json +0 -0
  41. checkpoints/checkpoint-61500/trainer_state.json +0 -895
  42. checkpoints/checkpoint-62000/tokenizer.json +0 -0
  43. checkpoints/checkpoint-62000/trainer_state.json +0 -902
  44. checkpoints/checkpoint-62228/optimizer.pt +0 -3
  45. checkpoints/checkpoint-62228/tokenizer.json +0 -0
  46. checkpoints/checkpoint-62228/trainer_state.json +0 -902
  47. config.json +28 -59
  48. generation_config.json +7 -0
  49. generator_config.json +0 -7
  50. model.safetensors +2 -2
checkpoints/{checkpoint-61500 → checkpoint-46000}/config.json RENAMED
@@ -16,45 +16,14 @@
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
19
- "n_positions": 512,
20
  "num_decoder_layers": 6,
21
  "num_heads": 8,
22
  "num_layers": 6,
23
- "output_past": true,
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
- "task_specific_params": {
28
- "summarization": {
29
- "early_stopping": true,
30
- "length_penalty": 2.0,
31
- "max_length": 200,
32
- "min_length": 30,
33
- "no_repeat_ngram_size": 3,
34
- "num_beams": 4,
35
- "prefix": "summarize: "
36
- },
37
- "translation_en_to_de": {
38
- "early_stopping": true,
39
- "max_length": 300,
40
- "num_beams": 4,
41
- "prefix": "translate English to German: "
42
- },
43
- "translation_en_to_fr": {
44
- "early_stopping": true,
45
- "max_length": 300,
46
- "num_beams": 4,
47
- "prefix": "translate English to French: "
48
- },
49
- "translation_en_to_ro": {
50
- "early_stopping": true,
51
- "max_length": 300,
52
- "num_beams": 4,
53
- "prefix": "translate English to Romanian: "
54
- }
55
- },
56
  "torch_dtype": "float32",
57
  "transformers_version": "4.51.2",
58
  "use_cache": true,
59
- "vocab_size": 32128
60
  }
 
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
 
19
  "num_decoder_layers": 6,
20
  "num_heads": 8,
21
  "num_layers": 6,
 
22
  "pad_token_id": 0,
23
  "relative_attention_max_distance": 128,
24
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "torch_dtype": "float32",
26
  "transformers_version": "4.51.2",
27
  "use_cache": true,
28
+ "vocab_size": 40100
29
  }
checkpoints/{checkpoint-61500 → checkpoint-46000}/generation_config.json RENAMED
File without changes
checkpoints/{checkpoint-62000 → checkpoint-46000}/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7cd5c5562af41ebf3f5bad6e78dc0b1d978e2c1f8c79e0b2c98beaa577e8f6
3
- size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbcbf0366f577fe5eb44e6508ee646f2c448851577e779789277804c1b3b0109
3
+ size 258368552
checkpoints/{checkpoint-61500/model.safetensors → checkpoint-46000/optimizer.pt} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68a63cff9119ec9d2214a4d45626957783fc01857d6c7e6760338d0ac8d4d117
3
- size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a18c15e597bf280ade08806621abf8b97d301df7a2644d62925def8818b6c04
3
+ size 516816826
checkpoints/{checkpoint-62228 → checkpoint-46000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5d589f36728497480fc3e3ebdf78468879b4caf040cae34b6063cacd6c3c66f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbef4bc0f9887068d0db7f201876b7168b3168f70a98130e6b03a4a01ae21526
3
  size 14244
checkpoints/{checkpoint-61500 → checkpoint-46000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0acbdad870bdcd3f627d6745b7dee6dd7cb36b559345a9d75527af16d7dec0cc
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:548fc86584eafaf1a293dbece88e9fbb1e4a6c458e2f0be88ff6b690236f0848
3
  size 988
checkpoints/{checkpoint-62228 → checkpoint-46000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf0e9c969856e979c4c570e31baf3a86446426bf9fd91e807eb94934e82cb44b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369590859b8297b43f91397f14504bf3f2d1a853ac4e3df5e6fc41a3e8dc7207
3
  size 1064
checkpoints/{checkpoint-62228 → checkpoint-46000}/special_tokens_map.json RENAMED
@@ -101,13 +101,6 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "bos_token": {
105
- "content": "<s>",
106
- "lstrip": false,
107
- "normalized": false,
108
- "rstrip": false,
109
- "single_word": false
110
- },
111
  "eos_token": {
112
  "content": "</s>",
113
  "lstrip": false,
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
 
 
 
 
 
 
 
104
  "eos_token": {
105
  "content": "</s>",
106
  "lstrip": false,
checkpoints/{checkpoint-62000 → checkpoint-46000}/spiece.model RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3365205d18a2c0699fb0ee86ab06f3042d553acaa219eb11aa77c3c56f638538
3
+ size 1047337
checkpoints/checkpoint-46000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/{checkpoint-61500 → checkpoint-46000}/tokenizer_config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
@@ -10,7 +10,7 @@
10
  "special": true
11
  },
12
  "1": {
13
- "content": "<s>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
@@ -18,818 +18,818 @@
18
  "special": true
19
  },
20
  "2": {
21
- "content": "</s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
- "special": true
27
  },
28
  "3": {
29
- "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
- "8000": {
37
- "content": "<extra_id_0>",
38
- "lstrip": false,
39
  "normalized": false,
40
- "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
  },
44
- "8001": {
45
- "content": "<extra_id_1>",
46
- "lstrip": false,
47
  "normalized": false,
48
- "rstrip": false,
49
  "single_word": false,
50
  "special": true
51
  },
52
- "8002": {
53
- "content": "<extra_id_2>",
54
- "lstrip": false,
55
  "normalized": false,
56
- "rstrip": false,
57
  "single_word": false,
58
  "special": true
59
  },
60
- "8003": {
61
- "content": "<extra_id_3>",
62
- "lstrip": false,
63
  "normalized": false,
64
- "rstrip": false,
65
  "single_word": false,
66
  "special": true
67
  },
68
- "8004": {
69
- "content": "<extra_id_4>",
70
- "lstrip": false,
71
  "normalized": false,
72
- "rstrip": false,
73
  "single_word": false,
74
  "special": true
75
  },
76
- "8005": {
77
- "content": "<extra_id_5>",
78
- "lstrip": false,
79
  "normalized": false,
80
- "rstrip": false,
81
  "single_word": false,
82
  "special": true
83
  },
84
- "8006": {
85
- "content": "<extra_id_6>",
86
- "lstrip": false,
87
  "normalized": false,
88
- "rstrip": false,
89
  "single_word": false,
90
  "special": true
91
  },
92
- "8007": {
93
- "content": "<extra_id_7>",
94
- "lstrip": false,
95
  "normalized": false,
96
- "rstrip": false,
97
  "single_word": false,
98
  "special": true
99
  },
100
- "8008": {
101
- "content": "<extra_id_8>",
102
- "lstrip": false,
103
  "normalized": false,
104
- "rstrip": false,
105
  "single_word": false,
106
  "special": true
107
  },
108
- "8009": {
109
- "content": "<extra_id_9>",
110
- "lstrip": false,
111
  "normalized": false,
112
- "rstrip": false,
113
  "single_word": false,
114
  "special": true
115
  },
116
- "8010": {
117
- "content": "<extra_id_10>",
118
- "lstrip": false,
119
  "normalized": false,
120
- "rstrip": false,
121
  "single_word": false,
122
  "special": true
123
  },
124
- "8011": {
125
- "content": "<extra_id_11>",
126
- "lstrip": false,
127
  "normalized": false,
128
- "rstrip": false,
129
  "single_word": false,
130
  "special": true
131
  },
132
- "8012": {
133
- "content": "<extra_id_12>",
134
- "lstrip": false,
135
  "normalized": false,
136
- "rstrip": false,
137
  "single_word": false,
138
  "special": true
139
  },
140
- "8013": {
141
- "content": "<extra_id_13>",
142
- "lstrip": false,
143
  "normalized": false,
144
- "rstrip": false,
145
  "single_word": false,
146
  "special": true
147
  },
148
- "8014": {
149
- "content": "<extra_id_14>",
150
- "lstrip": false,
151
  "normalized": false,
152
- "rstrip": false,
153
  "single_word": false,
154
  "special": true
155
  },
156
- "8015": {
157
- "content": "<extra_id_15>",
158
- "lstrip": false,
159
  "normalized": false,
160
- "rstrip": false,
161
  "single_word": false,
162
  "special": true
163
  },
164
- "8016": {
165
- "content": "<extra_id_16>",
166
- "lstrip": false,
167
  "normalized": false,
168
- "rstrip": false,
169
  "single_word": false,
170
  "special": true
171
  },
172
- "8017": {
173
- "content": "<extra_id_17>",
174
- "lstrip": false,
175
  "normalized": false,
176
- "rstrip": false,
177
  "single_word": false,
178
  "special": true
179
  },
180
- "8018": {
181
- "content": "<extra_id_18>",
182
- "lstrip": false,
183
  "normalized": false,
184
- "rstrip": false,
185
  "single_word": false,
186
  "special": true
187
  },
188
- "8019": {
189
- "content": "<extra_id_19>",
190
- "lstrip": false,
191
  "normalized": false,
192
- "rstrip": false,
193
  "single_word": false,
194
  "special": true
195
  },
196
- "8020": {
197
- "content": "<extra_id_20>",
198
- "lstrip": false,
199
  "normalized": false,
200
- "rstrip": false,
201
  "single_word": false,
202
  "special": true
203
  },
204
- "8021": {
205
- "content": "<extra_id_21>",
206
- "lstrip": false,
207
  "normalized": false,
208
- "rstrip": false,
209
  "single_word": false,
210
  "special": true
211
  },
212
- "8022": {
213
- "content": "<extra_id_22>",
214
- "lstrip": false,
215
  "normalized": false,
216
- "rstrip": false,
217
  "single_word": false,
218
  "special": true
219
  },
220
- "8023": {
221
- "content": "<extra_id_23>",
222
- "lstrip": false,
223
  "normalized": false,
224
- "rstrip": false,
225
  "single_word": false,
226
  "special": true
227
  },
228
- "8024": {
229
- "content": "<extra_id_24>",
230
- "lstrip": false,
231
  "normalized": false,
232
- "rstrip": false,
233
  "single_word": false,
234
  "special": true
235
  },
236
- "8025": {
237
- "content": "<extra_id_25>",
238
- "lstrip": false,
239
  "normalized": false,
240
- "rstrip": false,
241
  "single_word": false,
242
  "special": true
243
  },
244
- "8026": {
245
- "content": "<extra_id_26>",
246
- "lstrip": false,
247
  "normalized": false,
248
- "rstrip": false,
249
  "single_word": false,
250
  "special": true
251
  },
252
- "8027": {
253
- "content": "<extra_id_27>",
254
- "lstrip": false,
255
  "normalized": false,
256
- "rstrip": false,
257
  "single_word": false,
258
  "special": true
259
  },
260
- "8028": {
261
- "content": "<extra_id_28>",
262
- "lstrip": false,
263
  "normalized": false,
264
- "rstrip": false,
265
  "single_word": false,
266
  "special": true
267
  },
268
- "8029": {
269
- "content": "<extra_id_29>",
270
- "lstrip": false,
271
  "normalized": false,
272
- "rstrip": false,
273
  "single_word": false,
274
  "special": true
275
  },
276
- "8030": {
277
- "content": "<extra_id_30>",
278
- "lstrip": false,
279
  "normalized": false,
280
- "rstrip": false,
281
  "single_word": false,
282
  "special": true
283
  },
284
- "8031": {
285
- "content": "<extra_id_31>",
286
- "lstrip": false,
287
  "normalized": false,
288
- "rstrip": false,
289
  "single_word": false,
290
  "special": true
291
  },
292
- "8032": {
293
- "content": "<extra_id_32>",
294
- "lstrip": false,
295
  "normalized": false,
296
- "rstrip": false,
297
  "single_word": false,
298
  "special": true
299
  },
300
- "8033": {
301
- "content": "<extra_id_33>",
302
- "lstrip": false,
303
  "normalized": false,
304
- "rstrip": false,
305
  "single_word": false,
306
  "special": true
307
  },
308
- "8034": {
309
- "content": "<extra_id_34>",
310
- "lstrip": false,
311
  "normalized": false,
312
- "rstrip": false,
313
  "single_word": false,
314
  "special": true
315
  },
316
- "8035": {
317
- "content": "<extra_id_35>",
318
- "lstrip": false,
319
  "normalized": false,
320
- "rstrip": false,
321
  "single_word": false,
322
  "special": true
323
  },
324
- "8036": {
325
- "content": "<extra_id_36>",
326
- "lstrip": false,
327
  "normalized": false,
328
- "rstrip": false,
329
  "single_word": false,
330
  "special": true
331
  },
332
- "8037": {
333
- "content": "<extra_id_37>",
334
- "lstrip": false,
335
  "normalized": false,
336
- "rstrip": false,
337
  "single_word": false,
338
  "special": true
339
  },
340
- "8038": {
341
- "content": "<extra_id_38>",
342
- "lstrip": false,
343
  "normalized": false,
344
- "rstrip": false,
345
  "single_word": false,
346
  "special": true
347
  },
348
- "8039": {
349
- "content": "<extra_id_39>",
350
- "lstrip": false,
351
  "normalized": false,
352
- "rstrip": false,
353
  "single_word": false,
354
  "special": true
355
  },
356
- "8040": {
357
- "content": "<extra_id_40>",
358
- "lstrip": false,
359
  "normalized": false,
360
- "rstrip": false,
361
  "single_word": false,
362
  "special": true
363
  },
364
- "8041": {
365
- "content": "<extra_id_41>",
366
- "lstrip": false,
367
  "normalized": false,
368
- "rstrip": false,
369
  "single_word": false,
370
  "special": true
371
  },
372
- "8042": {
373
- "content": "<extra_id_42>",
374
- "lstrip": false,
375
  "normalized": false,
376
- "rstrip": false,
377
  "single_word": false,
378
  "special": true
379
  },
380
- "8043": {
381
- "content": "<extra_id_43>",
382
- "lstrip": false,
383
  "normalized": false,
384
- "rstrip": false,
385
  "single_word": false,
386
  "special": true
387
  },
388
- "8044": {
389
- "content": "<extra_id_44>",
390
- "lstrip": false,
391
  "normalized": false,
392
- "rstrip": false,
393
  "single_word": false,
394
  "special": true
395
  },
396
- "8045": {
397
- "content": "<extra_id_45>",
398
- "lstrip": false,
399
  "normalized": false,
400
- "rstrip": false,
401
  "single_word": false,
402
  "special": true
403
  },
404
- "8046": {
405
- "content": "<extra_id_46>",
406
- "lstrip": false,
407
  "normalized": false,
408
- "rstrip": false,
409
  "single_word": false,
410
  "special": true
411
  },
412
- "8047": {
413
- "content": "<extra_id_47>",
414
- "lstrip": false,
415
  "normalized": false,
416
- "rstrip": false,
417
  "single_word": false,
418
  "special": true
419
  },
420
- "8048": {
421
- "content": "<extra_id_48>",
422
- "lstrip": false,
423
  "normalized": false,
424
- "rstrip": false,
425
  "single_word": false,
426
  "special": true
427
  },
428
- "8049": {
429
- "content": "<extra_id_49>",
430
- "lstrip": false,
431
  "normalized": false,
432
- "rstrip": false,
433
  "single_word": false,
434
  "special": true
435
  },
436
- "8050": {
437
- "content": "<extra_id_50>",
438
- "lstrip": false,
439
  "normalized": false,
440
- "rstrip": false,
441
  "single_word": false,
442
  "special": true
443
  },
444
- "8051": {
445
- "content": "<extra_id_51>",
446
- "lstrip": false,
447
  "normalized": false,
448
- "rstrip": false,
449
  "single_word": false,
450
  "special": true
451
  },
452
- "8052": {
453
- "content": "<extra_id_52>",
454
- "lstrip": false,
455
  "normalized": false,
456
- "rstrip": false,
457
  "single_word": false,
458
  "special": true
459
  },
460
- "8053": {
461
- "content": "<extra_id_53>",
462
- "lstrip": false,
463
  "normalized": false,
464
- "rstrip": false,
465
  "single_word": false,
466
  "special": true
467
  },
468
- "8054": {
469
- "content": "<extra_id_54>",
470
- "lstrip": false,
471
  "normalized": false,
472
- "rstrip": false,
473
  "single_word": false,
474
  "special": true
475
  },
476
- "8055": {
477
- "content": "<extra_id_55>",
478
- "lstrip": false,
479
  "normalized": false,
480
- "rstrip": false,
481
  "single_word": false,
482
  "special": true
483
  },
484
- "8056": {
485
- "content": "<extra_id_56>",
486
- "lstrip": false,
487
  "normalized": false,
488
- "rstrip": false,
489
  "single_word": false,
490
  "special": true
491
  },
492
- "8057": {
493
- "content": "<extra_id_57>",
494
- "lstrip": false,
495
  "normalized": false,
496
- "rstrip": false,
497
  "single_word": false,
498
  "special": true
499
  },
500
- "8058": {
501
- "content": "<extra_id_58>",
502
- "lstrip": false,
503
  "normalized": false,
504
- "rstrip": false,
505
  "single_word": false,
506
  "special": true
507
  },
508
- "8059": {
509
- "content": "<extra_id_59>",
510
- "lstrip": false,
511
  "normalized": false,
512
- "rstrip": false,
513
  "single_word": false,
514
  "special": true
515
  },
516
- "8060": {
517
- "content": "<extra_id_60>",
518
- "lstrip": false,
519
  "normalized": false,
520
- "rstrip": false,
521
  "single_word": false,
522
  "special": true
523
  },
524
- "8061": {
525
- "content": "<extra_id_61>",
526
- "lstrip": false,
527
  "normalized": false,
528
- "rstrip": false,
529
  "single_word": false,
530
  "special": true
531
  },
532
- "8062": {
533
- "content": "<extra_id_62>",
534
- "lstrip": false,
535
  "normalized": false,
536
- "rstrip": false,
537
  "single_word": false,
538
  "special": true
539
  },
540
- "8063": {
541
- "content": "<extra_id_63>",
542
- "lstrip": false,
543
  "normalized": false,
544
- "rstrip": false,
545
  "single_word": false,
546
  "special": true
547
  },
548
- "8064": {
549
- "content": "<extra_id_64>",
550
- "lstrip": false,
551
  "normalized": false,
552
- "rstrip": false,
553
  "single_word": false,
554
  "special": true
555
  },
556
- "8065": {
557
- "content": "<extra_id_65>",
558
- "lstrip": false,
559
  "normalized": false,
560
- "rstrip": false,
561
  "single_word": false,
562
  "special": true
563
  },
564
- "8066": {
565
- "content": "<extra_id_66>",
566
- "lstrip": false,
567
  "normalized": false,
568
- "rstrip": false,
569
  "single_word": false,
570
  "special": true
571
  },
572
- "8067": {
573
- "content": "<extra_id_67>",
574
- "lstrip": false,
575
  "normalized": false,
576
- "rstrip": false,
577
  "single_word": false,
578
  "special": true
579
  },
580
- "8068": {
581
- "content": "<extra_id_68>",
582
- "lstrip": false,
583
  "normalized": false,
584
- "rstrip": false,
585
  "single_word": false,
586
  "special": true
587
  },
588
- "8069": {
589
- "content": "<extra_id_69>",
590
- "lstrip": false,
591
  "normalized": false,
592
- "rstrip": false,
593
  "single_word": false,
594
  "special": true
595
  },
596
- "8070": {
597
- "content": "<extra_id_70>",
598
- "lstrip": false,
599
  "normalized": false,
600
- "rstrip": false,
601
  "single_word": false,
602
  "special": true
603
  },
604
- "8071": {
605
- "content": "<extra_id_71>",
606
- "lstrip": false,
607
  "normalized": false,
608
- "rstrip": false,
609
  "single_word": false,
610
  "special": true
611
  },
612
- "8072": {
613
- "content": "<extra_id_72>",
614
- "lstrip": false,
615
  "normalized": false,
616
- "rstrip": false,
617
  "single_word": false,
618
  "special": true
619
  },
620
- "8073": {
621
- "content": "<extra_id_73>",
622
- "lstrip": false,
623
  "normalized": false,
624
- "rstrip": false,
625
  "single_word": false,
626
  "special": true
627
  },
628
- "8074": {
629
- "content": "<extra_id_74>",
630
- "lstrip": false,
631
  "normalized": false,
632
- "rstrip": false,
633
  "single_word": false,
634
  "special": true
635
  },
636
- "8075": {
637
- "content": "<extra_id_75>",
638
- "lstrip": false,
639
  "normalized": false,
640
- "rstrip": false,
641
  "single_word": false,
642
  "special": true
643
  },
644
- "8076": {
645
- "content": "<extra_id_76>",
646
- "lstrip": false,
647
  "normalized": false,
648
- "rstrip": false,
649
  "single_word": false,
650
  "special": true
651
  },
652
- "8077": {
653
- "content": "<extra_id_77>",
654
- "lstrip": false,
655
  "normalized": false,
656
- "rstrip": false,
657
  "single_word": false,
658
  "special": true
659
  },
660
- "8078": {
661
- "content": "<extra_id_78>",
662
- "lstrip": false,
663
  "normalized": false,
664
- "rstrip": false,
665
  "single_word": false,
666
  "special": true
667
  },
668
- "8079": {
669
- "content": "<extra_id_79>",
670
- "lstrip": false,
671
  "normalized": false,
672
- "rstrip": false,
673
  "single_word": false,
674
  "special": true
675
  },
676
- "8080": {
677
- "content": "<extra_id_80>",
678
- "lstrip": false,
679
  "normalized": false,
680
- "rstrip": false,
681
  "single_word": false,
682
  "special": true
683
  },
684
- "8081": {
685
- "content": "<extra_id_81>",
686
- "lstrip": false,
687
  "normalized": false,
688
- "rstrip": false,
689
  "single_word": false,
690
  "special": true
691
  },
692
- "8082": {
693
- "content": "<extra_id_82>",
694
- "lstrip": false,
695
  "normalized": false,
696
- "rstrip": false,
697
  "single_word": false,
698
  "special": true
699
  },
700
- "8083": {
701
- "content": "<extra_id_83>",
702
- "lstrip": false,
703
  "normalized": false,
704
- "rstrip": false,
705
  "single_word": false,
706
  "special": true
707
  },
708
- "8084": {
709
- "content": "<extra_id_84>",
710
- "lstrip": false,
711
  "normalized": false,
712
- "rstrip": false,
713
  "single_word": false,
714
  "special": true
715
  },
716
- "8085": {
717
- "content": "<extra_id_85>",
718
- "lstrip": false,
719
  "normalized": false,
720
- "rstrip": false,
721
  "single_word": false,
722
  "special": true
723
  },
724
- "8086": {
725
- "content": "<extra_id_86>",
726
- "lstrip": false,
727
  "normalized": false,
728
- "rstrip": false,
729
  "single_word": false,
730
  "special": true
731
  },
732
- "8087": {
733
- "content": "<extra_id_87>",
734
- "lstrip": false,
735
  "normalized": false,
736
- "rstrip": false,
737
  "single_word": false,
738
  "special": true
739
  },
740
- "8088": {
741
- "content": "<extra_id_88>",
742
- "lstrip": false,
743
  "normalized": false,
744
- "rstrip": false,
745
  "single_word": false,
746
  "special": true
747
  },
748
- "8089": {
749
- "content": "<extra_id_89>",
750
- "lstrip": false,
751
  "normalized": false,
752
- "rstrip": false,
753
  "single_word": false,
754
  "special": true
755
  },
756
- "8090": {
757
- "content": "<extra_id_90>",
758
- "lstrip": false,
759
  "normalized": false,
760
- "rstrip": false,
761
  "single_word": false,
762
  "special": true
763
  },
764
- "8091": {
765
- "content": "<extra_id_91>",
766
- "lstrip": false,
767
  "normalized": false,
768
- "rstrip": false,
769
  "single_word": false,
770
  "special": true
771
  },
772
- "8092": {
773
- "content": "<extra_id_92>",
774
- "lstrip": false,
775
  "normalized": false,
776
- "rstrip": false,
777
  "single_word": false,
778
  "special": true
779
  },
780
- "8093": {
781
- "content": "<extra_id_93>",
782
- "lstrip": false,
783
  "normalized": false,
784
- "rstrip": false,
785
  "single_word": false,
786
  "special": true
787
  },
788
- "8094": {
789
- "content": "<extra_id_94>",
790
- "lstrip": false,
791
  "normalized": false,
792
- "rstrip": false,
793
  "single_word": false,
794
  "special": true
795
  },
796
- "8095": {
797
- "content": "<extra_id_95>",
798
- "lstrip": false,
799
  "normalized": false,
800
- "rstrip": false,
801
  "single_word": false,
802
  "special": true
803
  },
804
- "8096": {
805
- "content": "<extra_id_96>",
806
- "lstrip": false,
807
  "normalized": false,
808
- "rstrip": false,
809
  "single_word": false,
810
  "special": true
811
  },
812
- "8097": {
813
- "content": "<extra_id_97>",
814
- "lstrip": false,
815
  "normalized": false,
816
- "rstrip": false,
817
  "single_word": false,
818
  "special": true
819
  },
820
- "8098": {
821
- "content": "<extra_id_98>",
822
- "lstrip": false,
823
  "normalized": false,
824
- "rstrip": false,
825
  "single_word": false,
826
  "special": true
827
  },
828
- "8099": {
829
- "content": "<extra_id_99>",
830
- "lstrip": false,
831
  "normalized": false,
832
- "rstrip": false,
833
  "single_word": false,
834
  "special": true
835
  }
@@ -936,13 +936,14 @@
936
  "<extra_id_98>",
937
  "<extra_id_99>"
938
  ],
939
- "bos_token": "<s>",
940
  "clean_up_tokenization_spaces": false,
941
  "eos_token": "</s>",
942
  "extra_ids": 100,
943
  "extra_special_tokens": {},
 
944
  "model_max_length": 1000000000000000019884624838656,
945
  "pad_token": "<pad>",
946
- "tokenizer_class": "T5TokenizerFast",
 
947
  "unk_token": "<unk>"
948
  }
 
1
  {
2
+ "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
 
10
  "special": true
11
  },
12
  "1": {
13
+ "content": "<unk>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
 
18
  "special": true
19
  },
20
  "2": {
21
+ "content": "<s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
+ "special": false
27
  },
28
  "3": {
29
+ "content": "</s>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
+ "40000": {
37
+ "content": "<extra_id_99>",
38
+ "lstrip": true,
39
  "normalized": false,
40
+ "rstrip": true,
41
  "single_word": false,
42
  "special": true
43
  },
44
+ "40001": {
45
+ "content": "<extra_id_98>",
46
+ "lstrip": true,
47
  "normalized": false,
48
+ "rstrip": true,
49
  "single_word": false,
50
  "special": true
51
  },
52
+ "40002": {
53
+ "content": "<extra_id_97>",
54
+ "lstrip": true,
55
  "normalized": false,
56
+ "rstrip": true,
57
  "single_word": false,
58
  "special": true
59
  },
60
+ "40003": {
61
+ "content": "<extra_id_96>",
62
+ "lstrip": true,
63
  "normalized": false,
64
+ "rstrip": true,
65
  "single_word": false,
66
  "special": true
67
  },
68
+ "40004": {
69
+ "content": "<extra_id_95>",
70
+ "lstrip": true,
71
  "normalized": false,
72
+ "rstrip": true,
73
  "single_word": false,
74
  "special": true
75
  },
76
+ "40005": {
77
+ "content": "<extra_id_94>",
78
+ "lstrip": true,
79
  "normalized": false,
80
+ "rstrip": true,
81
  "single_word": false,
82
  "special": true
83
  },
84
+ "40006": {
85
+ "content": "<extra_id_93>",
86
+ "lstrip": true,
87
  "normalized": false,
88
+ "rstrip": true,
89
  "single_word": false,
90
  "special": true
91
  },
92
+ "40007": {
93
+ "content": "<extra_id_92>",
94
+ "lstrip": true,
95
  "normalized": false,
96
+ "rstrip": true,
97
  "single_word": false,
98
  "special": true
99
  },
100
+ "40008": {
101
+ "content": "<extra_id_91>",
102
+ "lstrip": true,
103
  "normalized": false,
104
+ "rstrip": true,
105
  "single_word": false,
106
  "special": true
107
  },
108
+ "40009": {
109
+ "content": "<extra_id_90>",
110
+ "lstrip": true,
111
  "normalized": false,
112
+ "rstrip": true,
113
  "single_word": false,
114
  "special": true
115
  },
116
+ "40010": {
117
+ "content": "<extra_id_89>",
118
+ "lstrip": true,
119
  "normalized": false,
120
+ "rstrip": true,
121
  "single_word": false,
122
  "special": true
123
  },
124
+ "40011": {
125
+ "content": "<extra_id_88>",
126
+ "lstrip": true,
127
  "normalized": false,
128
+ "rstrip": true,
129
  "single_word": false,
130
  "special": true
131
  },
132
+ "40012": {
133
+ "content": "<extra_id_87>",
134
+ "lstrip": true,
135
  "normalized": false,
136
+ "rstrip": true,
137
  "single_word": false,
138
  "special": true
139
  },
140
+ "40013": {
141
+ "content": "<extra_id_86>",
142
+ "lstrip": true,
143
  "normalized": false,
144
+ "rstrip": true,
145
  "single_word": false,
146
  "special": true
147
  },
148
+ "40014": {
149
+ "content": "<extra_id_85>",
150
+ "lstrip": true,
151
  "normalized": false,
152
+ "rstrip": true,
153
  "single_word": false,
154
  "special": true
155
  },
156
+ "40015": {
157
+ "content": "<extra_id_84>",
158
+ "lstrip": true,
159
  "normalized": false,
160
+ "rstrip": true,
161
  "single_word": false,
162
  "special": true
163
  },
164
+ "40016": {
165
+ "content": "<extra_id_83>",
166
+ "lstrip": true,
167
  "normalized": false,
168
+ "rstrip": true,
169
  "single_word": false,
170
  "special": true
171
  },
172
+ "40017": {
173
+ "content": "<extra_id_82>",
174
+ "lstrip": true,
175
  "normalized": false,
176
+ "rstrip": true,
177
  "single_word": false,
178
  "special": true
179
  },
180
+ "40018": {
181
+ "content": "<extra_id_81>",
182
+ "lstrip": true,
183
  "normalized": false,
184
+ "rstrip": true,
185
  "single_word": false,
186
  "special": true
187
  },
188
+ "40019": {
189
+ "content": "<extra_id_80>",
190
+ "lstrip": true,
191
  "normalized": false,
192
+ "rstrip": true,
193
  "single_word": false,
194
  "special": true
195
  },
196
+ "40020": {
197
+ "content": "<extra_id_79>",
198
+ "lstrip": true,
199
  "normalized": false,
200
+ "rstrip": true,
201
  "single_word": false,
202
  "special": true
203
  },
204
+ "40021": {
205
+ "content": "<extra_id_78>",
206
+ "lstrip": true,
207
  "normalized": false,
208
+ "rstrip": true,
209
  "single_word": false,
210
  "special": true
211
  },
212
+ "40022": {
213
+ "content": "<extra_id_77>",
214
+ "lstrip": true,
215
  "normalized": false,
216
+ "rstrip": true,
217
  "single_word": false,
218
  "special": true
219
  },
220
+ "40023": {
221
+ "content": "<extra_id_76>",
222
+ "lstrip": true,
223
  "normalized": false,
224
+ "rstrip": true,
225
  "single_word": false,
226
  "special": true
227
  },
228
+ "40024": {
229
+ "content": "<extra_id_75>",
230
+ "lstrip": true,
231
  "normalized": false,
232
+ "rstrip": true,
233
  "single_word": false,
234
  "special": true
235
  },
236
+ "40025": {
237
+ "content": "<extra_id_74>",
238
+ "lstrip": true,
239
  "normalized": false,
240
+ "rstrip": true,
241
  "single_word": false,
242
  "special": true
243
  },
244
+ "40026": {
245
+ "content": "<extra_id_73>",
246
+ "lstrip": true,
247
  "normalized": false,
248
+ "rstrip": true,
249
  "single_word": false,
250
  "special": true
251
  },
252
+ "40027": {
253
+ "content": "<extra_id_72>",
254
+ "lstrip": true,
255
  "normalized": false,
256
+ "rstrip": true,
257
  "single_word": false,
258
  "special": true
259
  },
260
+ "40028": {
261
+ "content": "<extra_id_71>",
262
+ "lstrip": true,
263
  "normalized": false,
264
+ "rstrip": true,
265
  "single_word": false,
266
  "special": true
267
  },
268
+ "40029": {
269
+ "content": "<extra_id_70>",
270
+ "lstrip": true,
271
  "normalized": false,
272
+ "rstrip": true,
273
  "single_word": false,
274
  "special": true
275
  },
276
+ "40030": {
277
+ "content": "<extra_id_69>",
278
+ "lstrip": true,
279
  "normalized": false,
280
+ "rstrip": true,
281
  "single_word": false,
282
  "special": true
283
  },
284
+ "40031": {
285
+ "content": "<extra_id_68>",
286
+ "lstrip": true,
287
  "normalized": false,
288
+ "rstrip": true,
289
  "single_word": false,
290
  "special": true
291
  },
292
+ "40032": {
293
+ "content": "<extra_id_67>",
294
+ "lstrip": true,
295
  "normalized": false,
296
+ "rstrip": true,
297
  "single_word": false,
298
  "special": true
299
  },
300
+ "40033": {
301
+ "content": "<extra_id_66>",
302
+ "lstrip": true,
303
  "normalized": false,
304
+ "rstrip": true,
305
  "single_word": false,
306
  "special": true
307
  },
308
+ "40034": {
309
+ "content": "<extra_id_65>",
310
+ "lstrip": true,
311
  "normalized": false,
312
+ "rstrip": true,
313
  "single_word": false,
314
  "special": true
315
  },
316
+ "40035": {
317
+ "content": "<extra_id_64>",
318
+ "lstrip": true,
319
  "normalized": false,
320
+ "rstrip": true,
321
  "single_word": false,
322
  "special": true
323
  },
324
+ "40036": {
325
+ "content": "<extra_id_63>",
326
+ "lstrip": true,
327
  "normalized": false,
328
+ "rstrip": true,
329
  "single_word": false,
330
  "special": true
331
  },
332
+ "40037": {
333
+ "content": "<extra_id_62>",
334
+ "lstrip": true,
335
  "normalized": false,
336
+ "rstrip": true,
337
  "single_word": false,
338
  "special": true
339
  },
340
+ "40038": {
341
+ "content": "<extra_id_61>",
342
+ "lstrip": true,
343
  "normalized": false,
344
+ "rstrip": true,
345
  "single_word": false,
346
  "special": true
347
  },
348
+ "40039": {
349
+ "content": "<extra_id_60>",
350
+ "lstrip": true,
351
  "normalized": false,
352
+ "rstrip": true,
353
  "single_word": false,
354
  "special": true
355
  },
356
+ "40040": {
357
+ "content": "<extra_id_59>",
358
+ "lstrip": true,
359
  "normalized": false,
360
+ "rstrip": true,
361
  "single_word": false,
362
  "special": true
363
  },
364
+ "40041": {
365
+ "content": "<extra_id_58>",
366
+ "lstrip": true,
367
  "normalized": false,
368
+ "rstrip": true,
369
  "single_word": false,
370
  "special": true
371
  },
372
+ "40042": {
373
+ "content": "<extra_id_57>",
374
+ "lstrip": true,
375
  "normalized": false,
376
+ "rstrip": true,
377
  "single_word": false,
378
  "special": true
379
  },
380
+ "40043": {
381
+ "content": "<extra_id_56>",
382
+ "lstrip": true,
383
  "normalized": false,
384
+ "rstrip": true,
385
  "single_word": false,
386
  "special": true
387
  },
388
+ "40044": {
389
+ "content": "<extra_id_55>",
390
+ "lstrip": true,
391
  "normalized": false,
392
+ "rstrip": true,
393
  "single_word": false,
394
  "special": true
395
  },
396
+ "40045": {
397
+ "content": "<extra_id_54>",
398
+ "lstrip": true,
399
  "normalized": false,
400
+ "rstrip": true,
401
  "single_word": false,
402
  "special": true
403
  },
404
+ "40046": {
405
+ "content": "<extra_id_53>",
406
+ "lstrip": true,
407
  "normalized": false,
408
+ "rstrip": true,
409
  "single_word": false,
410
  "special": true
411
  },
412
+ "40047": {
413
+ "content": "<extra_id_52>",
414
+ "lstrip": true,
415
  "normalized": false,
416
+ "rstrip": true,
417
  "single_word": false,
418
  "special": true
419
  },
420
+ "40048": {
421
+ "content": "<extra_id_51>",
422
+ "lstrip": true,
423
  "normalized": false,
424
+ "rstrip": true,
425
  "single_word": false,
426
  "special": true
427
  },
428
+ "40049": {
429
+ "content": "<extra_id_50>",
430
+ "lstrip": true,
431
  "normalized": false,
432
+ "rstrip": true,
433
  "single_word": false,
434
  "special": true
435
  },
436
+ "40050": {
437
+ "content": "<extra_id_49>",
438
+ "lstrip": true,
439
  "normalized": false,
440
+ "rstrip": true,
441
  "single_word": false,
442
  "special": true
443
  },
444
+ "40051": {
445
+ "content": "<extra_id_48>",
446
+ "lstrip": true,
447
  "normalized": false,
448
+ "rstrip": true,
449
  "single_word": false,
450
  "special": true
451
  },
452
+ "40052": {
453
+ "content": "<extra_id_47>",
454
+ "lstrip": true,
455
  "normalized": false,
456
+ "rstrip": true,
457
  "single_word": false,
458
  "special": true
459
  },
460
+ "40053": {
461
+ "content": "<extra_id_46>",
462
+ "lstrip": true,
463
  "normalized": false,
464
+ "rstrip": true,
465
  "single_word": false,
466
  "special": true
467
  },
468
+ "40054": {
469
+ "content": "<extra_id_45>",
470
+ "lstrip": true,
471
  "normalized": false,
472
+ "rstrip": true,
473
  "single_word": false,
474
  "special": true
475
  },
476
+ "40055": {
477
+ "content": "<extra_id_44>",
478
+ "lstrip": true,
479
  "normalized": false,
480
+ "rstrip": true,
481
  "single_word": false,
482
  "special": true
483
  },
484
+ "40056": {
485
+ "content": "<extra_id_43>",
486
+ "lstrip": true,
487
  "normalized": false,
488
+ "rstrip": true,
489
  "single_word": false,
490
  "special": true
491
  },
492
+ "40057": {
493
+ "content": "<extra_id_42>",
494
+ "lstrip": true,
495
  "normalized": false,
496
+ "rstrip": true,
497
  "single_word": false,
498
  "special": true
499
  },
500
+ "40058": {
501
+ "content": "<extra_id_41>",
502
+ "lstrip": true,
503
  "normalized": false,
504
+ "rstrip": true,
505
  "single_word": false,
506
  "special": true
507
  },
508
+ "40059": {
509
+ "content": "<extra_id_40>",
510
+ "lstrip": true,
511
  "normalized": false,
512
+ "rstrip": true,
513
  "single_word": false,
514
  "special": true
515
  },
516
+ "40060": {
517
+ "content": "<extra_id_39>",
518
+ "lstrip": true,
519
  "normalized": false,
520
+ "rstrip": true,
521
  "single_word": false,
522
  "special": true
523
  },
524
+ "40061": {
525
+ "content": "<extra_id_38>",
526
+ "lstrip": true,
527
  "normalized": false,
528
+ "rstrip": true,
529
  "single_word": false,
530
  "special": true
531
  },
532
+ "40062": {
533
+ "content": "<extra_id_37>",
534
+ "lstrip": true,
535
  "normalized": false,
536
+ "rstrip": true,
537
  "single_word": false,
538
  "special": true
539
  },
540
+ "40063": {
541
+ "content": "<extra_id_36>",
542
+ "lstrip": true,
543
  "normalized": false,
544
+ "rstrip": true,
545
  "single_word": false,
546
  "special": true
547
  },
548
+ "40064": {
549
+ "content": "<extra_id_35>",
550
+ "lstrip": true,
551
  "normalized": false,
552
+ "rstrip": true,
553
  "single_word": false,
554
  "special": true
555
  },
556
+ "40065": {
557
+ "content": "<extra_id_34>",
558
+ "lstrip": true,
559
  "normalized": false,
560
+ "rstrip": true,
561
  "single_word": false,
562
  "special": true
563
  },
564
+ "40066": {
565
+ "content": "<extra_id_33>",
566
+ "lstrip": true,
567
  "normalized": false,
568
+ "rstrip": true,
569
  "single_word": false,
570
  "special": true
571
  },
572
+ "40067": {
573
+ "content": "<extra_id_32>",
574
+ "lstrip": true,
575
  "normalized": false,
576
+ "rstrip": true,
577
  "single_word": false,
578
  "special": true
579
  },
580
+ "40068": {
581
+ "content": "<extra_id_31>",
582
+ "lstrip": true,
583
  "normalized": false,
584
+ "rstrip": true,
585
  "single_word": false,
586
  "special": true
587
  },
588
+ "40069": {
589
+ "content": "<extra_id_30>",
590
+ "lstrip": true,
591
  "normalized": false,
592
+ "rstrip": true,
593
  "single_word": false,
594
  "special": true
595
  },
596
+ "40070": {
597
+ "content": "<extra_id_29>",
598
+ "lstrip": true,
599
  "normalized": false,
600
+ "rstrip": true,
601
  "single_word": false,
602
  "special": true
603
  },
604
+ "40071": {
605
+ "content": "<extra_id_28>",
606
+ "lstrip": true,
607
  "normalized": false,
608
+ "rstrip": true,
609
  "single_word": false,
610
  "special": true
611
  },
612
+ "40072": {
613
+ "content": "<extra_id_27>",
614
+ "lstrip": true,
615
  "normalized": false,
616
+ "rstrip": true,
617
  "single_word": false,
618
  "special": true
619
  },
620
+ "40073": {
621
+ "content": "<extra_id_26>",
622
+ "lstrip": true,
623
  "normalized": false,
624
+ "rstrip": true,
625
  "single_word": false,
626
  "special": true
627
  },
628
+ "40074": {
629
+ "content": "<extra_id_25>",
630
+ "lstrip": true,
631
  "normalized": false,
632
+ "rstrip": true,
633
  "single_word": false,
634
  "special": true
635
  },
636
+ "40075": {
637
+ "content": "<extra_id_24>",
638
+ "lstrip": true,
639
  "normalized": false,
640
+ "rstrip": true,
641
  "single_word": false,
642
  "special": true
643
  },
644
+ "40076": {
645
+ "content": "<extra_id_23>",
646
+ "lstrip": true,
647
  "normalized": false,
648
+ "rstrip": true,
649
  "single_word": false,
650
  "special": true
651
  },
652
+ "40077": {
653
+ "content": "<extra_id_22>",
654
+ "lstrip": true,
655
  "normalized": false,
656
+ "rstrip": true,
657
  "single_word": false,
658
  "special": true
659
  },
660
+ "40078": {
661
+ "content": "<extra_id_21>",
662
+ "lstrip": true,
663
  "normalized": false,
664
+ "rstrip": true,
665
  "single_word": false,
666
  "special": true
667
  },
668
+ "40079": {
669
+ "content": "<extra_id_20>",
670
+ "lstrip": true,
671
  "normalized": false,
672
+ "rstrip": true,
673
  "single_word": false,
674
  "special": true
675
  },
676
+ "40080": {
677
+ "content": "<extra_id_19>",
678
+ "lstrip": true,
679
  "normalized": false,
680
+ "rstrip": true,
681
  "single_word": false,
682
  "special": true
683
  },
684
+ "40081": {
685
+ "content": "<extra_id_18>",
686
+ "lstrip": true,
687
  "normalized": false,
688
+ "rstrip": true,
689
  "single_word": false,
690
  "special": true
691
  },
692
+ "40082": {
693
+ "content": "<extra_id_17>",
694
+ "lstrip": true,
695
  "normalized": false,
696
+ "rstrip": true,
697
  "single_word": false,
698
  "special": true
699
  },
700
+ "40083": {
701
+ "content": "<extra_id_16>",
702
+ "lstrip": true,
703
  "normalized": false,
704
+ "rstrip": true,
705
  "single_word": false,
706
  "special": true
707
  },
708
+ "40084": {
709
+ "content": "<extra_id_15>",
710
+ "lstrip": true,
711
  "normalized": false,
712
+ "rstrip": true,
713
  "single_word": false,
714
  "special": true
715
  },
716
+ "40085": {
717
+ "content": "<extra_id_14>",
718
+ "lstrip": true,
719
  "normalized": false,
720
+ "rstrip": true,
721
  "single_word": false,
722
  "special": true
723
  },
724
+ "40086": {
725
+ "content": "<extra_id_13>",
726
+ "lstrip": true,
727
  "normalized": false,
728
+ "rstrip": true,
729
  "single_word": false,
730
  "special": true
731
  },
732
+ "40087": {
733
+ "content": "<extra_id_12>",
734
+ "lstrip": true,
735
  "normalized": false,
736
+ "rstrip": true,
737
  "single_word": false,
738
  "special": true
739
  },
740
+ "40088": {
741
+ "content": "<extra_id_11>",
742
+ "lstrip": true,
743
  "normalized": false,
744
+ "rstrip": true,
745
  "single_word": false,
746
  "special": true
747
  },
748
+ "40089": {
749
+ "content": "<extra_id_10>",
750
+ "lstrip": true,
751
  "normalized": false,
752
+ "rstrip": true,
753
  "single_word": false,
754
  "special": true
755
  },
756
+ "40090": {
757
+ "content": "<extra_id_9>",
758
+ "lstrip": true,
759
  "normalized": false,
760
+ "rstrip": true,
761
  "single_word": false,
762
  "special": true
763
  },
764
+ "40091": {
765
+ "content": "<extra_id_8>",
766
+ "lstrip": true,
767
  "normalized": false,
768
+ "rstrip": true,
769
  "single_word": false,
770
  "special": true
771
  },
772
+ "40092": {
773
+ "content": "<extra_id_7>",
774
+ "lstrip": true,
775
  "normalized": false,
776
+ "rstrip": true,
777
  "single_word": false,
778
  "special": true
779
  },
780
+ "40093": {
781
+ "content": "<extra_id_6>",
782
+ "lstrip": true,
783
  "normalized": false,
784
+ "rstrip": true,
785
  "single_word": false,
786
  "special": true
787
  },
788
+ "40094": {
789
+ "content": "<extra_id_5>",
790
+ "lstrip": true,
791
  "normalized": false,
792
+ "rstrip": true,
793
  "single_word": false,
794
  "special": true
795
  },
796
+ "40095": {
797
+ "content": "<extra_id_4>",
798
+ "lstrip": true,
799
  "normalized": false,
800
+ "rstrip": true,
801
  "single_word": false,
802
  "special": true
803
  },
804
+ "40096": {
805
+ "content": "<extra_id_3>",
806
+ "lstrip": true,
807
  "normalized": false,
808
+ "rstrip": true,
809
  "single_word": false,
810
  "special": true
811
  },
812
+ "40097": {
813
+ "content": "<extra_id_2>",
814
+ "lstrip": true,
815
  "normalized": false,
816
+ "rstrip": true,
817
  "single_word": false,
818
  "special": true
819
  },
820
+ "40098": {
821
+ "content": "<extra_id_1>",
822
+ "lstrip": true,
823
  "normalized": false,
824
+ "rstrip": true,
825
  "single_word": false,
826
  "special": true
827
  },
828
+ "40099": {
829
+ "content": "<extra_id_0>",
830
+ "lstrip": true,
831
  "normalized": false,
832
+ "rstrip": true,
833
  "single_word": false,
834
  "special": true
835
  }
 
936
  "<extra_id_98>",
937
  "<extra_id_99>"
938
  ],
 
939
  "clean_up_tokenization_spaces": false,
940
  "eos_token": "</s>",
941
  "extra_ids": 100,
942
  "extra_special_tokens": {},
943
+ "legacy": true,
944
  "model_max_length": 1000000000000000019884624838656,
945
  "pad_token": "<pad>",
946
+ "sp_model_kwargs": {},
947
+ "tokenizer_class": "T5Tokenizer",
948
  "unk_token": "<unk>"
949
  }
checkpoints/checkpoint-46000/trainer_state.json ADDED
@@ -0,0 +1,678 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.9712241006192284,
6
+ "eval_steps": 500,
7
+ "global_step": 46000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02142658181740267,
14
+ "grad_norm": 0.4460393786430359,
15
+ "learning_rate": 4.946539532890508e-05,
16
+ "loss": 2.2597,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.04285316363480534,
21
+ "grad_norm": 0.4649476110935211,
22
+ "learning_rate": 4.8929719305763877e-05,
23
+ "loss": 2.0158,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.064279745452208,
28
+ "grad_norm": 0.5113596320152283,
29
+ "learning_rate": 4.839404328262267e-05,
30
+ "loss": 1.9311,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.08570632726961068,
35
+ "grad_norm": 0.49711284041404724,
36
+ "learning_rate": 4.785836725948147e-05,
37
+ "loss": 1.8587,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.10713290908701335,
42
+ "grad_norm": 0.6695660352706909,
43
+ "learning_rate": 4.732269123634027e-05,
44
+ "loss": 1.8219,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.128559490904416,
49
+ "grad_norm": 0.5876463651657104,
50
+ "learning_rate": 4.678701521319906e-05,
51
+ "loss": 1.7645,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.1499860727218187,
56
+ "grad_norm": 0.554977297782898,
57
+ "learning_rate": 4.625133919005786e-05,
58
+ "loss": 1.7136,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.17141265453922136,
63
+ "grad_norm": 0.5401861667633057,
64
+ "learning_rate": 4.571566316691665e-05,
65
+ "loss": 1.696,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.19283923635662403,
70
+ "grad_norm": 0.5951809287071228,
71
+ "learning_rate": 4.517998714377545e-05,
72
+ "loss": 1.6519,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.2142658181740267,
77
+ "grad_norm": 0.6037197113037109,
78
+ "learning_rate": 4.464431112063424e-05,
79
+ "loss": 1.6279,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.23569239999142938,
84
+ "grad_norm": 0.6542425751686096,
85
+ "learning_rate": 4.410863509749304e-05,
86
+ "loss": 1.6104,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.257118981808832,
91
+ "grad_norm": 0.5781915783882141,
92
+ "learning_rate": 4.357295907435183e-05,
93
+ "loss": 1.5976,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.2785455636262347,
98
+ "grad_norm": 0.575715184211731,
99
+ "learning_rate": 4.303728305121063e-05,
100
+ "loss": 1.5373,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.2999721454436374,
105
+ "grad_norm": 0.6789396405220032,
106
+ "learning_rate": 4.2501607028069424e-05,
107
+ "loss": 1.5435,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.3213987272610401,
112
+ "grad_norm": 0.6032198667526245,
113
+ "learning_rate": 4.196593100492822e-05,
114
+ "loss": 1.5081,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.3428253090784427,
119
+ "grad_norm": 0.6690296530723572,
120
+ "learning_rate": 4.1430254981787015e-05,
121
+ "loss": 1.5059,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.36425189089584536,
126
+ "grad_norm": 0.6951475739479065,
127
+ "learning_rate": 4.0894578958645814e-05,
128
+ "loss": 1.4716,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.38567847271324807,
133
+ "grad_norm": 0.7095356583595276,
134
+ "learning_rate": 4.035890293550461e-05,
135
+ "loss": 1.455,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.4071050545306507,
140
+ "grad_norm": 0.8375953435897827,
141
+ "learning_rate": 3.9823226912363405e-05,
142
+ "loss": 1.4527,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.4285316363480534,
147
+ "grad_norm": 0.8582206964492798,
148
+ "learning_rate": 3.9287550889222204e-05,
149
+ "loss": 1.4345,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.44995821816545606,
154
+ "grad_norm": 0.8591541051864624,
155
+ "learning_rate": 3.8751874866081e-05,
156
+ "loss": 1.4268,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.47138479998285876,
161
+ "grad_norm": 0.835361897945404,
162
+ "learning_rate": 3.8216198842939796e-05,
163
+ "loss": 1.4045,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.4928113818002614,
168
+ "grad_norm": 0.9994288086891174,
169
+ "learning_rate": 3.768052281979859e-05,
170
+ "loss": 1.3965,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.514237963617664,
175
+ "grad_norm": 0.8310408592224121,
176
+ "learning_rate": 3.714484679665739e-05,
177
+ "loss": 1.3577,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.5356645454350667,
182
+ "grad_norm": 1.061630368232727,
183
+ "learning_rate": 3.660917077351618e-05,
184
+ "loss": 1.3311,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.5570911272524695,
189
+ "grad_norm": 1.0928676128387451,
190
+ "learning_rate": 3.607349475037498e-05,
191
+ "loss": 1.3203,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 0.5785177090698721,
196
+ "grad_norm": 1.4662648439407349,
197
+ "learning_rate": 3.553781872723377e-05,
198
+ "loss": 1.2778,
199
+ "step": 13500
200
+ },
201
+ {
202
+ "epoch": 0.5999442908872747,
203
+ "grad_norm": 1.2254618406295776,
204
+ "learning_rate": 3.500214270409257e-05,
205
+ "loss": 1.2572,
206
+ "step": 14000
207
+ },
208
+ {
209
+ "epoch": 0.6213708727046774,
210
+ "grad_norm": 1.275029182434082,
211
+ "learning_rate": 3.446860938504393e-05,
212
+ "loss": 1.2707,
213
+ "step": 14500
214
+ },
215
+ {
216
+ "epoch": 0.6427974545220801,
217
+ "grad_norm": 1.3197990655899048,
218
+ "learning_rate": 3.3932933361902726e-05,
219
+ "loss": 1.2308,
220
+ "step": 15000
221
+ },
222
+ {
223
+ "epoch": 0.6642240363394828,
224
+ "grad_norm": 1.934342861175537,
225
+ "learning_rate": 3.339725733876152e-05,
226
+ "loss": 1.2268,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 0.6856506181568854,
231
+ "grad_norm": 1.445302128791809,
232
+ "learning_rate": 3.286158131562032e-05,
233
+ "loss": 1.2007,
234
+ "step": 16000
235
+ },
236
+ {
237
+ "epoch": 0.7070771999742881,
238
+ "grad_norm": 1.2702606916427612,
239
+ "learning_rate": 3.232697664452539e-05,
240
+ "loss": 1.1785,
241
+ "step": 16500
242
+ },
243
+ {
244
+ "epoch": 0.7285037817916907,
245
+ "grad_norm": 1.254937767982483,
246
+ "learning_rate": 3.179130062138419e-05,
247
+ "loss": 1.1616,
248
+ "step": 17000
249
+ },
250
+ {
251
+ "epoch": 0.7499303636090935,
252
+ "grad_norm": 1.3628320693969727,
253
+ "learning_rate": 3.1255624598242984e-05,
254
+ "loss": 1.1352,
255
+ "step": 17500
256
+ },
257
+ {
258
+ "epoch": 0.7713569454264961,
259
+ "grad_norm": 3.0748207569122314,
260
+ "learning_rate": 3.071994857510178e-05,
261
+ "loss": 1.1243,
262
+ "step": 18000
263
+ },
264
+ {
265
+ "epoch": 0.7927835272438988,
266
+ "grad_norm": 1.4262775182724,
267
+ "learning_rate": 3.0185343904006858e-05,
268
+ "loss": 1.1011,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 0.8142101090613014,
273
+ "grad_norm": 2.2326557636260986,
274
+ "learning_rate": 2.9650739232911933e-05,
275
+ "loss": 1.0974,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 0.8356366908787041,
280
+ "grad_norm": 2.791055679321289,
281
+ "learning_rate": 2.9115063209770732e-05,
282
+ "loss": 1.0762,
283
+ "step": 19500
284
+ },
285
+ {
286
+ "epoch": 0.8570632726961068,
287
+ "grad_norm": 3.0886242389678955,
288
+ "learning_rate": 2.857938718662953e-05,
289
+ "loss": 1.0698,
290
+ "step": 20000
291
+ },
292
+ {
293
+ "epoch": 0.8784898545135095,
294
+ "grad_norm": 1.4234964847564697,
295
+ "learning_rate": 2.8043711163488323e-05,
296
+ "loss": 1.0541,
297
+ "step": 20500
298
+ },
299
+ {
300
+ "epoch": 0.8999164363309121,
301
+ "grad_norm": 3.275305986404419,
302
+ "learning_rate": 2.75091064923934e-05,
303
+ "loss": 1.0396,
304
+ "step": 21000
305
+ },
306
+ {
307
+ "epoch": 0.9213430181483148,
308
+ "grad_norm": 1.6365532875061035,
309
+ "learning_rate": 2.6973430469252198e-05,
310
+ "loss": 1.0146,
311
+ "step": 21500
312
+ },
313
+ {
314
+ "epoch": 0.9427695999657175,
315
+ "grad_norm": 3.1808159351348877,
316
+ "learning_rate": 2.6437754446110997e-05,
317
+ "loss": 0.9997,
318
+ "step": 22000
319
+ },
320
+ {
321
+ "epoch": 0.9641961817831202,
322
+ "grad_norm": 1.8644700050354004,
323
+ "learning_rate": 2.590207842296979e-05,
324
+ "loss": 0.9905,
325
+ "step": 22500
326
+ },
327
+ {
328
+ "epoch": 0.9856227636005228,
329
+ "grad_norm": 1.5963947772979736,
330
+ "learning_rate": 2.5367473751874864e-05,
331
+ "loss": 0.9624,
332
+ "step": 23000
333
+ },
334
+ {
335
+ "epoch": 1.007027918836108,
336
+ "grad_norm": 1.5474953651428223,
337
+ "learning_rate": 2.4831797728733663e-05,
338
+ "loss": 0.958,
339
+ "step": 23500
340
+ },
341
+ {
342
+ "epoch": 1.0284545006535109,
343
+ "grad_norm": 3.8564932346343994,
344
+ "learning_rate": 2.429612170559246e-05,
345
+ "loss": 0.9388,
346
+ "step": 24000
347
+ },
348
+ {
349
+ "epoch": 1.0498810824709135,
350
+ "grad_norm": 2.154879331588745,
351
+ "learning_rate": 2.3760445682451254e-05,
352
+ "loss": 0.9132,
353
+ "step": 24500
354
+ },
355
+ {
356
+ "epoch": 1.0713076642883161,
357
+ "grad_norm": 4.118302822113037,
358
+ "learning_rate": 2.3225841011356333e-05,
359
+ "loss": 0.9132,
360
+ "step": 25000
361
+ },
362
+ {
363
+ "epoch": 1.0927342461057188,
364
+ "grad_norm": 2.0971686840057373,
365
+ "learning_rate": 2.2690164988215128e-05,
366
+ "loss": 0.8947,
367
+ "step": 25500
368
+ },
369
+ {
370
+ "epoch": 1.1141608279231214,
371
+ "grad_norm": 3.480602741241455,
372
+ "learning_rate": 2.2154488965073924e-05,
373
+ "loss": 0.8922,
374
+ "step": 26000
375
+ },
376
+ {
377
+ "epoch": 1.135587409740524,
378
+ "grad_norm": 4.0732879638671875,
379
+ "learning_rate": 2.161881294193272e-05,
380
+ "loss": 0.87,
381
+ "step": 26500
382
+ },
383
+ {
384
+ "epoch": 1.1570139915579267,
385
+ "grad_norm": 2.2193567752838135,
386
+ "learning_rate": 2.1084208270837798e-05,
387
+ "loss": 0.8476,
388
+ "step": 27000
389
+ },
390
+ {
391
+ "epoch": 1.1784405733753294,
392
+ "grad_norm": 2.1723575592041016,
393
+ "learning_rate": 2.0548532247696594e-05,
394
+ "loss": 0.8322,
395
+ "step": 27500
396
+ },
397
+ {
398
+ "epoch": 1.199867155192732,
399
+ "grad_norm": 3.3503777980804443,
400
+ "learning_rate": 2.001285622455539e-05,
401
+ "loss": 0.8171,
402
+ "step": 28000
403
+ },
404
+ {
405
+ "epoch": 1.2212937370101349,
406
+ "grad_norm": 3.6080329418182373,
407
+ "learning_rate": 1.9477180201414185e-05,
408
+ "loss": 0.8105,
409
+ "step": 28500
410
+ },
411
+ {
412
+ "epoch": 1.2427203188275375,
413
+ "grad_norm": 2.708294630050659,
414
+ "learning_rate": 1.894150417827298e-05,
415
+ "loss": 0.7808,
416
+ "step": 29000
417
+ },
418
+ {
419
+ "epoch": 1.2641469006449402,
420
+ "grad_norm": 3.300328493118286,
421
+ "learning_rate": 1.8405828155131776e-05,
422
+ "loss": 0.7774,
423
+ "step": 29500
424
+ },
425
+ {
426
+ "epoch": 1.2855734824623428,
427
+ "grad_norm": 2.1943624019622803,
428
+ "learning_rate": 1.7870152131990572e-05,
429
+ "loss": 0.7637,
430
+ "step": 30000
431
+ },
432
+ {
433
+ "epoch": 1.3070000642797455,
434
+ "grad_norm": 2.7251927852630615,
435
+ "learning_rate": 1.733554746089565e-05,
436
+ "loss": 0.7481,
437
+ "step": 30500
438
+ },
439
+ {
440
+ "epoch": 1.3284266460971481,
441
+ "grad_norm": 4.672070026397705,
442
+ "learning_rate": 1.6799871437754446e-05,
443
+ "loss": 0.7255,
444
+ "step": 31000
445
+ },
446
+ {
447
+ "epoch": 1.3498532279145508,
448
+ "grad_norm": 2.4984779357910156,
449
+ "learning_rate": 1.626419541461324e-05,
450
+ "loss": 0.7231,
451
+ "step": 31500
452
+ },
453
+ {
454
+ "epoch": 1.3712798097319534,
455
+ "grad_norm": 1.9306334257125854,
456
+ "learning_rate": 1.5728519391472037e-05,
457
+ "loss": 0.7032,
458
+ "step": 32000
459
+ },
460
+ {
461
+ "epoch": 1.392706391549356,
462
+ "grad_norm": 3.015226125717163,
463
+ "learning_rate": 1.5192843368330834e-05,
464
+ "loss": 0.6923,
465
+ "step": 32500
466
+ },
467
+ {
468
+ "epoch": 1.4141329733667587,
469
+ "grad_norm": 3.646979331970215,
470
+ "learning_rate": 1.4658238697235913e-05,
471
+ "loss": 0.6893,
472
+ "step": 33000
473
+ },
474
+ {
475
+ "epoch": 1.4355595551841613,
476
+ "grad_norm": 1.8951635360717773,
477
+ "learning_rate": 1.4122562674094708e-05,
478
+ "loss": 0.6693,
479
+ "step": 33500
480
+ },
481
+ {
482
+ "epoch": 1.4569861370015642,
483
+ "grad_norm": 2.3496530055999756,
484
+ "learning_rate": 1.3586886650953504e-05,
485
+ "loss": 0.6586,
486
+ "step": 34000
487
+ },
488
+ {
489
+ "epoch": 1.4784127188189669,
490
+ "grad_norm": 2.899231195449829,
491
+ "learning_rate": 1.30512106278123e-05,
492
+ "loss": 0.6338,
493
+ "step": 34500
494
+ },
495
+ {
496
+ "epoch": 1.4998393006363695,
497
+ "grad_norm": 2.7465250492095947,
498
+ "learning_rate": 1.2515534604671095e-05,
499
+ "loss": 0.6291,
500
+ "step": 35000
501
+ },
502
+ {
503
+ "epoch": 1.5212658824537721,
504
+ "grad_norm": 2.3695950508117676,
505
+ "learning_rate": 1.1979858581529891e-05,
506
+ "loss": 0.6192,
507
+ "step": 35500
508
+ },
509
+ {
510
+ "epoch": 1.5426924642711748,
511
+ "grad_norm": 3.0361921787261963,
512
+ "learning_rate": 1.1444182558388687e-05,
513
+ "loss": 0.6147,
514
+ "step": 36000
515
+ },
516
+ {
517
+ "epoch": 1.5641190460885774,
518
+ "grad_norm": 3.5043649673461914,
519
+ "learning_rate": 1.0908506535247482e-05,
520
+ "loss": 0.6024,
521
+ "step": 36500
522
+ },
523
+ {
524
+ "epoch": 1.5855456279059803,
525
+ "grad_norm": 2.1166698932647705,
526
+ "learning_rate": 1.037390186415256e-05,
527
+ "loss": 0.6033,
528
+ "step": 37000
529
+ },
530
+ {
531
+ "epoch": 1.606972209723383,
532
+ "grad_norm": 2.8012290000915527,
533
+ "learning_rate": 9.838225841011358e-06,
534
+ "loss": 0.5935,
535
+ "step": 37500
536
+ },
537
+ {
538
+ "epoch": 1.6283987915407856,
539
+ "grad_norm": 2.49540638923645,
540
+ "learning_rate": 9.302549817870154e-06,
541
+ "loss": 0.5819,
542
+ "step": 38000
543
+ },
544
+ {
545
+ "epoch": 1.6498253733581882,
546
+ "grad_norm": 2.233430862426758,
547
+ "learning_rate": 8.766873794728948e-06,
548
+ "loss": 0.5674,
549
+ "step": 38500
550
+ },
551
+ {
552
+ "epoch": 1.6712519551755909,
553
+ "grad_norm": 2.823101282119751,
554
+ "learning_rate": 8.232269123634026e-06,
555
+ "loss": 0.552,
556
+ "step": 39000
557
+ },
558
+ {
559
+ "epoch": 1.6926785369929935,
560
+ "grad_norm": 2.389265775680542,
561
+ "learning_rate": 7.696593100492823e-06,
562
+ "loss": 0.5564,
563
+ "step": 39500
564
+ },
565
+ {
566
+ "epoch": 1.7141051188103962,
567
+ "grad_norm": 2.1136868000030518,
568
+ "learning_rate": 7.161988429397901e-06,
569
+ "loss": 0.5477,
570
+ "step": 40000
571
+ },
572
+ {
573
+ "epoch": 1.7355317006277988,
574
+ "grad_norm": 2.6307172775268555,
575
+ "learning_rate": 6.626312406256696e-06,
576
+ "loss": 0.545,
577
+ "step": 40500
578
+ },
579
+ {
580
+ "epoch": 1.7569582824452015,
581
+ "grad_norm": 3.4348325729370117,
582
+ "learning_rate": 6.090636383115492e-06,
583
+ "loss": 0.5359,
584
+ "step": 41000
585
+ },
586
+ {
587
+ "epoch": 1.7783848642626041,
588
+ "grad_norm": 2.1959691047668457,
589
+ "learning_rate": 5.554960359974288e-06,
590
+ "loss": 0.5267,
591
+ "step": 41500
592
+ },
593
+ {
594
+ "epoch": 1.7998114460800068,
595
+ "grad_norm": 2.747068405151367,
596
+ "learning_rate": 5.0192843368330835e-06,
597
+ "loss": 0.5316,
598
+ "step": 42000
599
+ },
600
+ {
601
+ "epoch": 1.8212380278974094,
602
+ "grad_norm": 2.965233325958252,
603
+ "learning_rate": 4.48360831369188e-06,
604
+ "loss": 0.5129,
605
+ "step": 42500
606
+ },
607
+ {
608
+ "epoch": 1.842664609714812,
609
+ "grad_norm": 2.3425612449645996,
610
+ "learning_rate": 3.9479322905506756e-06,
611
+ "loss": 0.5058,
612
+ "step": 43000
613
+ },
614
+ {
615
+ "epoch": 1.864091191532215,
616
+ "grad_norm": 1.5650664567947388,
617
+ "learning_rate": 3.4122562674094708e-06,
618
+ "loss": 0.5155,
619
+ "step": 43500
620
+ },
621
+ {
622
+ "epoch": 1.8855177733496176,
623
+ "grad_norm": 2.501758098602295,
624
+ "learning_rate": 2.8776515963145492e-06,
625
+ "loss": 0.5076,
626
+ "step": 44000
627
+ },
628
+ {
629
+ "epoch": 1.9069443551670202,
630
+ "grad_norm": 4.431907653808594,
631
+ "learning_rate": 2.341975573173345e-06,
632
+ "loss": 0.5119,
633
+ "step": 44500
634
+ },
635
+ {
636
+ "epoch": 1.9283709369844229,
637
+ "grad_norm": 3.3445732593536377,
638
+ "learning_rate": 1.8062995500321405e-06,
639
+ "loss": 0.5021,
640
+ "step": 45000
641
+ },
642
+ {
643
+ "epoch": 1.9497975188018255,
644
+ "grad_norm": 2.6224589347839355,
645
+ "learning_rate": 1.2706235268909363e-06,
646
+ "loss": 0.5019,
647
+ "step": 45500
648
+ },
649
+ {
650
+ "epoch": 1.9712241006192284,
651
+ "grad_norm": 2.954880952835083,
652
+ "learning_rate": 7.360188557960147e-07,
653
+ "loss": 0.5055,
654
+ "step": 46000
655
+ }
656
+ ],
657
+ "logging_steps": 500,
658
+ "max_steps": 46670,
659
+ "num_input_tokens_seen": 0,
660
+ "num_train_epochs": 2,
661
+ "save_steps": 500,
662
+ "stateful_callbacks": {
663
+ "TrainerControl": {
664
+ "args": {
665
+ "should_epoch_stop": false,
666
+ "should_evaluate": false,
667
+ "should_log": false,
668
+ "should_save": true,
669
+ "should_training_stop": false
670
+ },
671
+ "attributes": {}
672
+ }
673
+ },
674
+ "total_flos": 9.961028013627802e+16,
675
+ "train_batch_size": 32,
676
+ "trial_name": null,
677
+ "trial_params": null
678
+ }
checkpoints/{checkpoint-61500 → checkpoint-46000}/training_args.bin RENAMED
File without changes
checkpoints/{checkpoint-62000 → checkpoint-46500}/config.json RENAMED
@@ -16,45 +16,14 @@
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
19
- "n_positions": 512,
20
  "num_decoder_layers": 6,
21
  "num_heads": 8,
22
  "num_layers": 6,
23
- "output_past": true,
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
- "task_specific_params": {
28
- "summarization": {
29
- "early_stopping": true,
30
- "length_penalty": 2.0,
31
- "max_length": 200,
32
- "min_length": 30,
33
- "no_repeat_ngram_size": 3,
34
- "num_beams": 4,
35
- "prefix": "summarize: "
36
- },
37
- "translation_en_to_de": {
38
- "early_stopping": true,
39
- "max_length": 300,
40
- "num_beams": 4,
41
- "prefix": "translate English to German: "
42
- },
43
- "translation_en_to_fr": {
44
- "early_stopping": true,
45
- "max_length": 300,
46
- "num_beams": 4,
47
- "prefix": "translate English to French: "
48
- },
49
- "translation_en_to_ro": {
50
- "early_stopping": true,
51
- "max_length": 300,
52
- "num_beams": 4,
53
- "prefix": "translate English to Romanian: "
54
- }
55
- },
56
  "torch_dtype": "float32",
57
  "transformers_version": "4.51.2",
58
  "use_cache": true,
59
- "vocab_size": 32128
60
  }
 
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
 
19
  "num_decoder_layers": 6,
20
  "num_heads": 8,
21
  "num_layers": 6,
 
22
  "pad_token_id": 0,
23
  "relative_attention_max_distance": 128,
24
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "torch_dtype": "float32",
26
  "transformers_version": "4.51.2",
27
  "use_cache": true,
28
+ "vocab_size": 40100
29
  }
checkpoints/{checkpoint-62000 → checkpoint-46500}/generation_config.json RENAMED
File without changes
checkpoints/{checkpoint-62228 → checkpoint-46500}/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05e935bcc5054a0b30234c54409c5479f2bd26857c4d8fb472a2417bfd1badd3
3
- size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6117b39b66d261b25467054d8872d1cf480f368fc98b19571c29019f4c319c24
3
+ size 258368552
checkpoints/{checkpoint-62000 → checkpoint-46500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:984de0c937765020e3882843f25bf26f9e754733e744a0cab794a18a0cd30d19
3
- size 484163514
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97e28cd28207350f8adf469dde5eb590eb34a06b3188c6e0e170e398e6bb7e5f
3
+ size 516816826
checkpoints/{checkpoint-62000 → checkpoint-46500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dbe2b6f46d4bd694697f8d7b585c0657854163b4c972d02503f1f6dca716d22
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18fd37edbbf331360acc434111264c65f41af940a9e83fb6e3951ef9a6525c6d
3
  size 14244
checkpoints/{checkpoint-62228 → checkpoint-46500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e08af9833b930adf68f3cffd3bcb35990d1548a9956216431a88f88a9f020248
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22fa6c27c9a935a390b3f08cb3c9ec477106395810169e531cc7c9dce540414e
3
  size 988
checkpoints/{checkpoint-61500 → checkpoint-46500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7fac910dbf1051bedbd6fe5b67e886e57f5257d7d0c137811a8f5950f00476b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3595bf23a8cb08cab341768a573b8d03fee6936a6899769d1b4996f6bf401593
3
  size 1064
checkpoints/{checkpoint-61500 → checkpoint-46500}/special_tokens_map.json RENAMED
@@ -101,13 +101,6 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "bos_token": {
105
- "content": "<s>",
106
- "lstrip": false,
107
- "normalized": false,
108
- "rstrip": false,
109
- "single_word": false
110
- },
111
  "eos_token": {
112
  "content": "</s>",
113
  "lstrip": false,
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
 
 
 
 
 
 
 
104
  "eos_token": {
105
  "content": "</s>",
106
  "lstrip": false,
checkpoints/{checkpoint-62228 → checkpoint-46500}/spiece.model RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3365205d18a2c0699fb0ee86ab06f3042d553acaa219eb11aa77c3c56f638538
3
+ size 1047337
checkpoints/checkpoint-46500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/{checkpoint-62000 → checkpoint-46500}/tokenizer_config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
@@ -10,7 +10,7 @@
10
  "special": true
11
  },
12
  "1": {
13
- "content": "<s>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
@@ -18,818 +18,818 @@
18
  "special": true
19
  },
20
  "2": {
21
- "content": "</s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
- "special": true
27
  },
28
  "3": {
29
- "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
- "8000": {
37
- "content": "<extra_id_0>",
38
- "lstrip": false,
39
  "normalized": false,
40
- "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
  },
44
- "8001": {
45
- "content": "<extra_id_1>",
46
- "lstrip": false,
47
  "normalized": false,
48
- "rstrip": false,
49
  "single_word": false,
50
  "special": true
51
  },
52
- "8002": {
53
- "content": "<extra_id_2>",
54
- "lstrip": false,
55
  "normalized": false,
56
- "rstrip": false,
57
  "single_word": false,
58
  "special": true
59
  },
60
- "8003": {
61
- "content": "<extra_id_3>",
62
- "lstrip": false,
63
  "normalized": false,
64
- "rstrip": false,
65
  "single_word": false,
66
  "special": true
67
  },
68
- "8004": {
69
- "content": "<extra_id_4>",
70
- "lstrip": false,
71
  "normalized": false,
72
- "rstrip": false,
73
  "single_word": false,
74
  "special": true
75
  },
76
- "8005": {
77
- "content": "<extra_id_5>",
78
- "lstrip": false,
79
  "normalized": false,
80
- "rstrip": false,
81
  "single_word": false,
82
  "special": true
83
  },
84
- "8006": {
85
- "content": "<extra_id_6>",
86
- "lstrip": false,
87
  "normalized": false,
88
- "rstrip": false,
89
  "single_word": false,
90
  "special": true
91
  },
92
- "8007": {
93
- "content": "<extra_id_7>",
94
- "lstrip": false,
95
  "normalized": false,
96
- "rstrip": false,
97
  "single_word": false,
98
  "special": true
99
  },
100
- "8008": {
101
- "content": "<extra_id_8>",
102
- "lstrip": false,
103
  "normalized": false,
104
- "rstrip": false,
105
  "single_word": false,
106
  "special": true
107
  },
108
- "8009": {
109
- "content": "<extra_id_9>",
110
- "lstrip": false,
111
  "normalized": false,
112
- "rstrip": false,
113
  "single_word": false,
114
  "special": true
115
  },
116
- "8010": {
117
- "content": "<extra_id_10>",
118
- "lstrip": false,
119
  "normalized": false,
120
- "rstrip": false,
121
  "single_word": false,
122
  "special": true
123
  },
124
- "8011": {
125
- "content": "<extra_id_11>",
126
- "lstrip": false,
127
  "normalized": false,
128
- "rstrip": false,
129
  "single_word": false,
130
  "special": true
131
  },
132
- "8012": {
133
- "content": "<extra_id_12>",
134
- "lstrip": false,
135
  "normalized": false,
136
- "rstrip": false,
137
  "single_word": false,
138
  "special": true
139
  },
140
- "8013": {
141
- "content": "<extra_id_13>",
142
- "lstrip": false,
143
  "normalized": false,
144
- "rstrip": false,
145
  "single_word": false,
146
  "special": true
147
  },
148
- "8014": {
149
- "content": "<extra_id_14>",
150
- "lstrip": false,
151
  "normalized": false,
152
- "rstrip": false,
153
  "single_word": false,
154
  "special": true
155
  },
156
- "8015": {
157
- "content": "<extra_id_15>",
158
- "lstrip": false,
159
  "normalized": false,
160
- "rstrip": false,
161
  "single_word": false,
162
  "special": true
163
  },
164
- "8016": {
165
- "content": "<extra_id_16>",
166
- "lstrip": false,
167
  "normalized": false,
168
- "rstrip": false,
169
  "single_word": false,
170
  "special": true
171
  },
172
- "8017": {
173
- "content": "<extra_id_17>",
174
- "lstrip": false,
175
  "normalized": false,
176
- "rstrip": false,
177
  "single_word": false,
178
  "special": true
179
  },
180
- "8018": {
181
- "content": "<extra_id_18>",
182
- "lstrip": false,
183
  "normalized": false,
184
- "rstrip": false,
185
  "single_word": false,
186
  "special": true
187
  },
188
- "8019": {
189
- "content": "<extra_id_19>",
190
- "lstrip": false,
191
  "normalized": false,
192
- "rstrip": false,
193
  "single_word": false,
194
  "special": true
195
  },
196
- "8020": {
197
- "content": "<extra_id_20>",
198
- "lstrip": false,
199
  "normalized": false,
200
- "rstrip": false,
201
  "single_word": false,
202
  "special": true
203
  },
204
- "8021": {
205
- "content": "<extra_id_21>",
206
- "lstrip": false,
207
  "normalized": false,
208
- "rstrip": false,
209
  "single_word": false,
210
  "special": true
211
  },
212
- "8022": {
213
- "content": "<extra_id_22>",
214
- "lstrip": false,
215
  "normalized": false,
216
- "rstrip": false,
217
  "single_word": false,
218
  "special": true
219
  },
220
- "8023": {
221
- "content": "<extra_id_23>",
222
- "lstrip": false,
223
  "normalized": false,
224
- "rstrip": false,
225
  "single_word": false,
226
  "special": true
227
  },
228
- "8024": {
229
- "content": "<extra_id_24>",
230
- "lstrip": false,
231
  "normalized": false,
232
- "rstrip": false,
233
  "single_word": false,
234
  "special": true
235
  },
236
- "8025": {
237
- "content": "<extra_id_25>",
238
- "lstrip": false,
239
  "normalized": false,
240
- "rstrip": false,
241
  "single_word": false,
242
  "special": true
243
  },
244
- "8026": {
245
- "content": "<extra_id_26>",
246
- "lstrip": false,
247
  "normalized": false,
248
- "rstrip": false,
249
  "single_word": false,
250
  "special": true
251
  },
252
- "8027": {
253
- "content": "<extra_id_27>",
254
- "lstrip": false,
255
  "normalized": false,
256
- "rstrip": false,
257
  "single_word": false,
258
  "special": true
259
  },
260
- "8028": {
261
- "content": "<extra_id_28>",
262
- "lstrip": false,
263
  "normalized": false,
264
- "rstrip": false,
265
  "single_word": false,
266
  "special": true
267
  },
268
- "8029": {
269
- "content": "<extra_id_29>",
270
- "lstrip": false,
271
  "normalized": false,
272
- "rstrip": false,
273
  "single_word": false,
274
  "special": true
275
  },
276
- "8030": {
277
- "content": "<extra_id_30>",
278
- "lstrip": false,
279
  "normalized": false,
280
- "rstrip": false,
281
  "single_word": false,
282
  "special": true
283
  },
284
- "8031": {
285
- "content": "<extra_id_31>",
286
- "lstrip": false,
287
  "normalized": false,
288
- "rstrip": false,
289
  "single_word": false,
290
  "special": true
291
  },
292
- "8032": {
293
- "content": "<extra_id_32>",
294
- "lstrip": false,
295
  "normalized": false,
296
- "rstrip": false,
297
  "single_word": false,
298
  "special": true
299
  },
300
- "8033": {
301
- "content": "<extra_id_33>",
302
- "lstrip": false,
303
  "normalized": false,
304
- "rstrip": false,
305
  "single_word": false,
306
  "special": true
307
  },
308
- "8034": {
309
- "content": "<extra_id_34>",
310
- "lstrip": false,
311
  "normalized": false,
312
- "rstrip": false,
313
  "single_word": false,
314
  "special": true
315
  },
316
- "8035": {
317
- "content": "<extra_id_35>",
318
- "lstrip": false,
319
  "normalized": false,
320
- "rstrip": false,
321
  "single_word": false,
322
  "special": true
323
  },
324
- "8036": {
325
- "content": "<extra_id_36>",
326
- "lstrip": false,
327
  "normalized": false,
328
- "rstrip": false,
329
  "single_word": false,
330
  "special": true
331
  },
332
- "8037": {
333
- "content": "<extra_id_37>",
334
- "lstrip": false,
335
  "normalized": false,
336
- "rstrip": false,
337
  "single_word": false,
338
  "special": true
339
  },
340
- "8038": {
341
- "content": "<extra_id_38>",
342
- "lstrip": false,
343
  "normalized": false,
344
- "rstrip": false,
345
  "single_word": false,
346
  "special": true
347
  },
348
- "8039": {
349
- "content": "<extra_id_39>",
350
- "lstrip": false,
351
  "normalized": false,
352
- "rstrip": false,
353
  "single_word": false,
354
  "special": true
355
  },
356
- "8040": {
357
- "content": "<extra_id_40>",
358
- "lstrip": false,
359
  "normalized": false,
360
- "rstrip": false,
361
  "single_word": false,
362
  "special": true
363
  },
364
- "8041": {
365
- "content": "<extra_id_41>",
366
- "lstrip": false,
367
  "normalized": false,
368
- "rstrip": false,
369
  "single_word": false,
370
  "special": true
371
  },
372
- "8042": {
373
- "content": "<extra_id_42>",
374
- "lstrip": false,
375
  "normalized": false,
376
- "rstrip": false,
377
  "single_word": false,
378
  "special": true
379
  },
380
- "8043": {
381
- "content": "<extra_id_43>",
382
- "lstrip": false,
383
  "normalized": false,
384
- "rstrip": false,
385
  "single_word": false,
386
  "special": true
387
  },
388
- "8044": {
389
- "content": "<extra_id_44>",
390
- "lstrip": false,
391
  "normalized": false,
392
- "rstrip": false,
393
  "single_word": false,
394
  "special": true
395
  },
396
- "8045": {
397
- "content": "<extra_id_45>",
398
- "lstrip": false,
399
  "normalized": false,
400
- "rstrip": false,
401
  "single_word": false,
402
  "special": true
403
  },
404
- "8046": {
405
- "content": "<extra_id_46>",
406
- "lstrip": false,
407
  "normalized": false,
408
- "rstrip": false,
409
  "single_word": false,
410
  "special": true
411
  },
412
- "8047": {
413
- "content": "<extra_id_47>",
414
- "lstrip": false,
415
  "normalized": false,
416
- "rstrip": false,
417
  "single_word": false,
418
  "special": true
419
  },
420
- "8048": {
421
- "content": "<extra_id_48>",
422
- "lstrip": false,
423
  "normalized": false,
424
- "rstrip": false,
425
  "single_word": false,
426
  "special": true
427
  },
428
- "8049": {
429
- "content": "<extra_id_49>",
430
- "lstrip": false,
431
  "normalized": false,
432
- "rstrip": false,
433
  "single_word": false,
434
  "special": true
435
  },
436
- "8050": {
437
- "content": "<extra_id_50>",
438
- "lstrip": false,
439
  "normalized": false,
440
- "rstrip": false,
441
  "single_word": false,
442
  "special": true
443
  },
444
- "8051": {
445
- "content": "<extra_id_51>",
446
- "lstrip": false,
447
  "normalized": false,
448
- "rstrip": false,
449
  "single_word": false,
450
  "special": true
451
  },
452
- "8052": {
453
- "content": "<extra_id_52>",
454
- "lstrip": false,
455
  "normalized": false,
456
- "rstrip": false,
457
  "single_word": false,
458
  "special": true
459
  },
460
- "8053": {
461
- "content": "<extra_id_53>",
462
- "lstrip": false,
463
  "normalized": false,
464
- "rstrip": false,
465
  "single_word": false,
466
  "special": true
467
  },
468
- "8054": {
469
- "content": "<extra_id_54>",
470
- "lstrip": false,
471
  "normalized": false,
472
- "rstrip": false,
473
  "single_word": false,
474
  "special": true
475
  },
476
- "8055": {
477
- "content": "<extra_id_55>",
478
- "lstrip": false,
479
  "normalized": false,
480
- "rstrip": false,
481
  "single_word": false,
482
  "special": true
483
  },
484
- "8056": {
485
- "content": "<extra_id_56>",
486
- "lstrip": false,
487
  "normalized": false,
488
- "rstrip": false,
489
  "single_word": false,
490
  "special": true
491
  },
492
- "8057": {
493
- "content": "<extra_id_57>",
494
- "lstrip": false,
495
  "normalized": false,
496
- "rstrip": false,
497
  "single_word": false,
498
  "special": true
499
  },
500
- "8058": {
501
- "content": "<extra_id_58>",
502
- "lstrip": false,
503
  "normalized": false,
504
- "rstrip": false,
505
  "single_word": false,
506
  "special": true
507
  },
508
- "8059": {
509
- "content": "<extra_id_59>",
510
- "lstrip": false,
511
  "normalized": false,
512
- "rstrip": false,
513
  "single_word": false,
514
  "special": true
515
  },
516
- "8060": {
517
- "content": "<extra_id_60>",
518
- "lstrip": false,
519
  "normalized": false,
520
- "rstrip": false,
521
  "single_word": false,
522
  "special": true
523
  },
524
- "8061": {
525
- "content": "<extra_id_61>",
526
- "lstrip": false,
527
  "normalized": false,
528
- "rstrip": false,
529
  "single_word": false,
530
  "special": true
531
  },
532
- "8062": {
533
- "content": "<extra_id_62>",
534
- "lstrip": false,
535
  "normalized": false,
536
- "rstrip": false,
537
  "single_word": false,
538
  "special": true
539
  },
540
- "8063": {
541
- "content": "<extra_id_63>",
542
- "lstrip": false,
543
  "normalized": false,
544
- "rstrip": false,
545
  "single_word": false,
546
  "special": true
547
  },
548
- "8064": {
549
- "content": "<extra_id_64>",
550
- "lstrip": false,
551
  "normalized": false,
552
- "rstrip": false,
553
  "single_word": false,
554
  "special": true
555
  },
556
- "8065": {
557
- "content": "<extra_id_65>",
558
- "lstrip": false,
559
  "normalized": false,
560
- "rstrip": false,
561
  "single_word": false,
562
  "special": true
563
  },
564
- "8066": {
565
- "content": "<extra_id_66>",
566
- "lstrip": false,
567
  "normalized": false,
568
- "rstrip": false,
569
  "single_word": false,
570
  "special": true
571
  },
572
- "8067": {
573
- "content": "<extra_id_67>",
574
- "lstrip": false,
575
  "normalized": false,
576
- "rstrip": false,
577
  "single_word": false,
578
  "special": true
579
  },
580
- "8068": {
581
- "content": "<extra_id_68>",
582
- "lstrip": false,
583
  "normalized": false,
584
- "rstrip": false,
585
  "single_word": false,
586
  "special": true
587
  },
588
- "8069": {
589
- "content": "<extra_id_69>",
590
- "lstrip": false,
591
  "normalized": false,
592
- "rstrip": false,
593
  "single_word": false,
594
  "special": true
595
  },
596
- "8070": {
597
- "content": "<extra_id_70>",
598
- "lstrip": false,
599
  "normalized": false,
600
- "rstrip": false,
601
  "single_word": false,
602
  "special": true
603
  },
604
- "8071": {
605
- "content": "<extra_id_71>",
606
- "lstrip": false,
607
  "normalized": false,
608
- "rstrip": false,
609
  "single_word": false,
610
  "special": true
611
  },
612
- "8072": {
613
- "content": "<extra_id_72>",
614
- "lstrip": false,
615
  "normalized": false,
616
- "rstrip": false,
617
  "single_word": false,
618
  "special": true
619
  },
620
- "8073": {
621
- "content": "<extra_id_73>",
622
- "lstrip": false,
623
  "normalized": false,
624
- "rstrip": false,
625
  "single_word": false,
626
  "special": true
627
  },
628
- "8074": {
629
- "content": "<extra_id_74>",
630
- "lstrip": false,
631
  "normalized": false,
632
- "rstrip": false,
633
  "single_word": false,
634
  "special": true
635
  },
636
- "8075": {
637
- "content": "<extra_id_75>",
638
- "lstrip": false,
639
  "normalized": false,
640
- "rstrip": false,
641
  "single_word": false,
642
  "special": true
643
  },
644
- "8076": {
645
- "content": "<extra_id_76>",
646
- "lstrip": false,
647
  "normalized": false,
648
- "rstrip": false,
649
  "single_word": false,
650
  "special": true
651
  },
652
- "8077": {
653
- "content": "<extra_id_77>",
654
- "lstrip": false,
655
  "normalized": false,
656
- "rstrip": false,
657
  "single_word": false,
658
  "special": true
659
  },
660
- "8078": {
661
- "content": "<extra_id_78>",
662
- "lstrip": false,
663
  "normalized": false,
664
- "rstrip": false,
665
  "single_word": false,
666
  "special": true
667
  },
668
- "8079": {
669
- "content": "<extra_id_79>",
670
- "lstrip": false,
671
  "normalized": false,
672
- "rstrip": false,
673
  "single_word": false,
674
  "special": true
675
  },
676
- "8080": {
677
- "content": "<extra_id_80>",
678
- "lstrip": false,
679
  "normalized": false,
680
- "rstrip": false,
681
  "single_word": false,
682
  "special": true
683
  },
684
- "8081": {
685
- "content": "<extra_id_81>",
686
- "lstrip": false,
687
  "normalized": false,
688
- "rstrip": false,
689
  "single_word": false,
690
  "special": true
691
  },
692
- "8082": {
693
- "content": "<extra_id_82>",
694
- "lstrip": false,
695
  "normalized": false,
696
- "rstrip": false,
697
  "single_word": false,
698
  "special": true
699
  },
700
- "8083": {
701
- "content": "<extra_id_83>",
702
- "lstrip": false,
703
  "normalized": false,
704
- "rstrip": false,
705
  "single_word": false,
706
  "special": true
707
  },
708
- "8084": {
709
- "content": "<extra_id_84>",
710
- "lstrip": false,
711
  "normalized": false,
712
- "rstrip": false,
713
  "single_word": false,
714
  "special": true
715
  },
716
- "8085": {
717
- "content": "<extra_id_85>",
718
- "lstrip": false,
719
  "normalized": false,
720
- "rstrip": false,
721
  "single_word": false,
722
  "special": true
723
  },
724
- "8086": {
725
- "content": "<extra_id_86>",
726
- "lstrip": false,
727
  "normalized": false,
728
- "rstrip": false,
729
  "single_word": false,
730
  "special": true
731
  },
732
- "8087": {
733
- "content": "<extra_id_87>",
734
- "lstrip": false,
735
  "normalized": false,
736
- "rstrip": false,
737
  "single_word": false,
738
  "special": true
739
  },
740
- "8088": {
741
- "content": "<extra_id_88>",
742
- "lstrip": false,
743
  "normalized": false,
744
- "rstrip": false,
745
  "single_word": false,
746
  "special": true
747
  },
748
- "8089": {
749
- "content": "<extra_id_89>",
750
- "lstrip": false,
751
  "normalized": false,
752
- "rstrip": false,
753
  "single_word": false,
754
  "special": true
755
  },
756
- "8090": {
757
- "content": "<extra_id_90>",
758
- "lstrip": false,
759
  "normalized": false,
760
- "rstrip": false,
761
  "single_word": false,
762
  "special": true
763
  },
764
- "8091": {
765
- "content": "<extra_id_91>",
766
- "lstrip": false,
767
  "normalized": false,
768
- "rstrip": false,
769
  "single_word": false,
770
  "special": true
771
  },
772
- "8092": {
773
- "content": "<extra_id_92>",
774
- "lstrip": false,
775
  "normalized": false,
776
- "rstrip": false,
777
  "single_word": false,
778
  "special": true
779
  },
780
- "8093": {
781
- "content": "<extra_id_93>",
782
- "lstrip": false,
783
  "normalized": false,
784
- "rstrip": false,
785
  "single_word": false,
786
  "special": true
787
  },
788
- "8094": {
789
- "content": "<extra_id_94>",
790
- "lstrip": false,
791
  "normalized": false,
792
- "rstrip": false,
793
  "single_word": false,
794
  "special": true
795
  },
796
- "8095": {
797
- "content": "<extra_id_95>",
798
- "lstrip": false,
799
  "normalized": false,
800
- "rstrip": false,
801
  "single_word": false,
802
  "special": true
803
  },
804
- "8096": {
805
- "content": "<extra_id_96>",
806
- "lstrip": false,
807
  "normalized": false,
808
- "rstrip": false,
809
  "single_word": false,
810
  "special": true
811
  },
812
- "8097": {
813
- "content": "<extra_id_97>",
814
- "lstrip": false,
815
  "normalized": false,
816
- "rstrip": false,
817
  "single_word": false,
818
  "special": true
819
  },
820
- "8098": {
821
- "content": "<extra_id_98>",
822
- "lstrip": false,
823
  "normalized": false,
824
- "rstrip": false,
825
  "single_word": false,
826
  "special": true
827
  },
828
- "8099": {
829
- "content": "<extra_id_99>",
830
- "lstrip": false,
831
  "normalized": false,
832
- "rstrip": false,
833
  "single_word": false,
834
  "special": true
835
  }
@@ -936,13 +936,14 @@
936
  "<extra_id_98>",
937
  "<extra_id_99>"
938
  ],
939
- "bos_token": "<s>",
940
  "clean_up_tokenization_spaces": false,
941
  "eos_token": "</s>",
942
  "extra_ids": 100,
943
  "extra_special_tokens": {},
 
944
  "model_max_length": 1000000000000000019884624838656,
945
  "pad_token": "<pad>",
946
- "tokenizer_class": "T5TokenizerFast",
 
947
  "unk_token": "<unk>"
948
  }
 
1
  {
2
+ "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
 
10
  "special": true
11
  },
12
  "1": {
13
+ "content": "<unk>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
 
18
  "special": true
19
  },
20
  "2": {
21
+ "content": "<s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
+ "special": false
27
  },
28
  "3": {
29
+ "content": "</s>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
+ "40000": {
37
+ "content": "<extra_id_99>",
38
+ "lstrip": true,
39
  "normalized": false,
40
+ "rstrip": true,
41
  "single_word": false,
42
  "special": true
43
  },
44
+ "40001": {
45
+ "content": "<extra_id_98>",
46
+ "lstrip": true,
47
  "normalized": false,
48
+ "rstrip": true,
49
  "single_word": false,
50
  "special": true
51
  },
52
+ "40002": {
53
+ "content": "<extra_id_97>",
54
+ "lstrip": true,
55
  "normalized": false,
56
+ "rstrip": true,
57
  "single_word": false,
58
  "special": true
59
  },
60
+ "40003": {
61
+ "content": "<extra_id_96>",
62
+ "lstrip": true,
63
  "normalized": false,
64
+ "rstrip": true,
65
  "single_word": false,
66
  "special": true
67
  },
68
+ "40004": {
69
+ "content": "<extra_id_95>",
70
+ "lstrip": true,
71
  "normalized": false,
72
+ "rstrip": true,
73
  "single_word": false,
74
  "special": true
75
  },
76
+ "40005": {
77
+ "content": "<extra_id_94>",
78
+ "lstrip": true,
79
  "normalized": false,
80
+ "rstrip": true,
81
  "single_word": false,
82
  "special": true
83
  },
84
+ "40006": {
85
+ "content": "<extra_id_93>",
86
+ "lstrip": true,
87
  "normalized": false,
88
+ "rstrip": true,
89
  "single_word": false,
90
  "special": true
91
  },
92
+ "40007": {
93
+ "content": "<extra_id_92>",
94
+ "lstrip": true,
95
  "normalized": false,
96
+ "rstrip": true,
97
  "single_word": false,
98
  "special": true
99
  },
100
+ "40008": {
101
+ "content": "<extra_id_91>",
102
+ "lstrip": true,
103
  "normalized": false,
104
+ "rstrip": true,
105
  "single_word": false,
106
  "special": true
107
  },
108
+ "40009": {
109
+ "content": "<extra_id_90>",
110
+ "lstrip": true,
111
  "normalized": false,
112
+ "rstrip": true,
113
  "single_word": false,
114
  "special": true
115
  },
116
+ "40010": {
117
+ "content": "<extra_id_89>",
118
+ "lstrip": true,
119
  "normalized": false,
120
+ "rstrip": true,
121
  "single_word": false,
122
  "special": true
123
  },
124
+ "40011": {
125
+ "content": "<extra_id_88>",
126
+ "lstrip": true,
127
  "normalized": false,
128
+ "rstrip": true,
129
  "single_word": false,
130
  "special": true
131
  },
132
+ "40012": {
133
+ "content": "<extra_id_87>",
134
+ "lstrip": true,
135
  "normalized": false,
136
+ "rstrip": true,
137
  "single_word": false,
138
  "special": true
139
  },
140
+ "40013": {
141
+ "content": "<extra_id_86>",
142
+ "lstrip": true,
143
  "normalized": false,
144
+ "rstrip": true,
145
  "single_word": false,
146
  "special": true
147
  },
148
+ "40014": {
149
+ "content": "<extra_id_85>",
150
+ "lstrip": true,
151
  "normalized": false,
152
+ "rstrip": true,
153
  "single_word": false,
154
  "special": true
155
  },
156
+ "40015": {
157
+ "content": "<extra_id_84>",
158
+ "lstrip": true,
159
  "normalized": false,
160
+ "rstrip": true,
161
  "single_word": false,
162
  "special": true
163
  },
164
+ "40016": {
165
+ "content": "<extra_id_83>",
166
+ "lstrip": true,
167
  "normalized": false,
168
+ "rstrip": true,
169
  "single_word": false,
170
  "special": true
171
  },
172
+ "40017": {
173
+ "content": "<extra_id_82>",
174
+ "lstrip": true,
175
  "normalized": false,
176
+ "rstrip": true,
177
  "single_word": false,
178
  "special": true
179
  },
180
+ "40018": {
181
+ "content": "<extra_id_81>",
182
+ "lstrip": true,
183
  "normalized": false,
184
+ "rstrip": true,
185
  "single_word": false,
186
  "special": true
187
  },
188
+ "40019": {
189
+ "content": "<extra_id_80>",
190
+ "lstrip": true,
191
  "normalized": false,
192
+ "rstrip": true,
193
  "single_word": false,
194
  "special": true
195
  },
196
+ "40020": {
197
+ "content": "<extra_id_79>",
198
+ "lstrip": true,
199
  "normalized": false,
200
+ "rstrip": true,
201
  "single_word": false,
202
  "special": true
203
  },
204
+ "40021": {
205
+ "content": "<extra_id_78>",
206
+ "lstrip": true,
207
  "normalized": false,
208
+ "rstrip": true,
209
  "single_word": false,
210
  "special": true
211
  },
212
+ "40022": {
213
+ "content": "<extra_id_77>",
214
+ "lstrip": true,
215
  "normalized": false,
216
+ "rstrip": true,
217
  "single_word": false,
218
  "special": true
219
  },
220
+ "40023": {
221
+ "content": "<extra_id_76>",
222
+ "lstrip": true,
223
  "normalized": false,
224
+ "rstrip": true,
225
  "single_word": false,
226
  "special": true
227
  },
228
+ "40024": {
229
+ "content": "<extra_id_75>",
230
+ "lstrip": true,
231
  "normalized": false,
232
+ "rstrip": true,
233
  "single_word": false,
234
  "special": true
235
  },
236
+ "40025": {
237
+ "content": "<extra_id_74>",
238
+ "lstrip": true,
239
  "normalized": false,
240
+ "rstrip": true,
241
  "single_word": false,
242
  "special": true
243
  },
244
+ "40026": {
245
+ "content": "<extra_id_73>",
246
+ "lstrip": true,
247
  "normalized": false,
248
+ "rstrip": true,
249
  "single_word": false,
250
  "special": true
251
  },
252
+ "40027": {
253
+ "content": "<extra_id_72>",
254
+ "lstrip": true,
255
  "normalized": false,
256
+ "rstrip": true,
257
  "single_word": false,
258
  "special": true
259
  },
260
+ "40028": {
261
+ "content": "<extra_id_71>",
262
+ "lstrip": true,
263
  "normalized": false,
264
+ "rstrip": true,
265
  "single_word": false,
266
  "special": true
267
  },
268
+ "40029": {
269
+ "content": "<extra_id_70>",
270
+ "lstrip": true,
271
  "normalized": false,
272
+ "rstrip": true,
273
  "single_word": false,
274
  "special": true
275
  },
276
+ "40030": {
277
+ "content": "<extra_id_69>",
278
+ "lstrip": true,
279
  "normalized": false,
280
+ "rstrip": true,
281
  "single_word": false,
282
  "special": true
283
  },
284
+ "40031": {
285
+ "content": "<extra_id_68>",
286
+ "lstrip": true,
287
  "normalized": false,
288
+ "rstrip": true,
289
  "single_word": false,
290
  "special": true
291
  },
292
+ "40032": {
293
+ "content": "<extra_id_67>",
294
+ "lstrip": true,
295
  "normalized": false,
296
+ "rstrip": true,
297
  "single_word": false,
298
  "special": true
299
  },
300
+ "40033": {
301
+ "content": "<extra_id_66>",
302
+ "lstrip": true,
303
  "normalized": false,
304
+ "rstrip": true,
305
  "single_word": false,
306
  "special": true
307
  },
308
+ "40034": {
309
+ "content": "<extra_id_65>",
310
+ "lstrip": true,
311
  "normalized": false,
312
+ "rstrip": true,
313
  "single_word": false,
314
  "special": true
315
  },
316
+ "40035": {
317
+ "content": "<extra_id_64>",
318
+ "lstrip": true,
319
  "normalized": false,
320
+ "rstrip": true,
321
  "single_word": false,
322
  "special": true
323
  },
324
+ "40036": {
325
+ "content": "<extra_id_63>",
326
+ "lstrip": true,
327
  "normalized": false,
328
+ "rstrip": true,
329
  "single_word": false,
330
  "special": true
331
  },
332
+ "40037": {
333
+ "content": "<extra_id_62>",
334
+ "lstrip": true,
335
  "normalized": false,
336
+ "rstrip": true,
337
  "single_word": false,
338
  "special": true
339
  },
340
+ "40038": {
341
+ "content": "<extra_id_61>",
342
+ "lstrip": true,
343
  "normalized": false,
344
+ "rstrip": true,
345
  "single_word": false,
346
  "special": true
347
  },
348
+ "40039": {
349
+ "content": "<extra_id_60>",
350
+ "lstrip": true,
351
  "normalized": false,
352
+ "rstrip": true,
353
  "single_word": false,
354
  "special": true
355
  },
356
+ "40040": {
357
+ "content": "<extra_id_59>",
358
+ "lstrip": true,
359
  "normalized": false,
360
+ "rstrip": true,
361
  "single_word": false,
362
  "special": true
363
  },
364
+ "40041": {
365
+ "content": "<extra_id_58>",
366
+ "lstrip": true,
367
  "normalized": false,
368
+ "rstrip": true,
369
  "single_word": false,
370
  "special": true
371
  },
372
+ "40042": {
373
+ "content": "<extra_id_57>",
374
+ "lstrip": true,
375
  "normalized": false,
376
+ "rstrip": true,
377
  "single_word": false,
378
  "special": true
379
  },
380
+ "40043": {
381
+ "content": "<extra_id_56>",
382
+ "lstrip": true,
383
  "normalized": false,
384
+ "rstrip": true,
385
  "single_word": false,
386
  "special": true
387
  },
388
+ "40044": {
389
+ "content": "<extra_id_55>",
390
+ "lstrip": true,
391
  "normalized": false,
392
+ "rstrip": true,
393
  "single_word": false,
394
  "special": true
395
  },
396
+ "40045": {
397
+ "content": "<extra_id_54>",
398
+ "lstrip": true,
399
  "normalized": false,
400
+ "rstrip": true,
401
  "single_word": false,
402
  "special": true
403
  },
404
+ "40046": {
405
+ "content": "<extra_id_53>",
406
+ "lstrip": true,
407
  "normalized": false,
408
+ "rstrip": true,
409
  "single_word": false,
410
  "special": true
411
  },
412
+ "40047": {
413
+ "content": "<extra_id_52>",
414
+ "lstrip": true,
415
  "normalized": false,
416
+ "rstrip": true,
417
  "single_word": false,
418
  "special": true
419
  },
420
+ "40048": {
421
+ "content": "<extra_id_51>",
422
+ "lstrip": true,
423
  "normalized": false,
424
+ "rstrip": true,
425
  "single_word": false,
426
  "special": true
427
  },
428
+ "40049": {
429
+ "content": "<extra_id_50>",
430
+ "lstrip": true,
431
  "normalized": false,
432
+ "rstrip": true,
433
  "single_word": false,
434
  "special": true
435
  },
436
+ "40050": {
437
+ "content": "<extra_id_49>",
438
+ "lstrip": true,
439
  "normalized": false,
440
+ "rstrip": true,
441
  "single_word": false,
442
  "special": true
443
  },
444
+ "40051": {
445
+ "content": "<extra_id_48>",
446
+ "lstrip": true,
447
  "normalized": false,
448
+ "rstrip": true,
449
  "single_word": false,
450
  "special": true
451
  },
452
+ "40052": {
453
+ "content": "<extra_id_47>",
454
+ "lstrip": true,
455
  "normalized": false,
456
+ "rstrip": true,
457
  "single_word": false,
458
  "special": true
459
  },
460
+ "40053": {
461
+ "content": "<extra_id_46>",
462
+ "lstrip": true,
463
  "normalized": false,
464
+ "rstrip": true,
465
  "single_word": false,
466
  "special": true
467
  },
468
+ "40054": {
469
+ "content": "<extra_id_45>",
470
+ "lstrip": true,
471
  "normalized": false,
472
+ "rstrip": true,
473
  "single_word": false,
474
  "special": true
475
  },
476
+ "40055": {
477
+ "content": "<extra_id_44>",
478
+ "lstrip": true,
479
  "normalized": false,
480
+ "rstrip": true,
481
  "single_word": false,
482
  "special": true
483
  },
484
+ "40056": {
485
+ "content": "<extra_id_43>",
486
+ "lstrip": true,
487
  "normalized": false,
488
+ "rstrip": true,
489
  "single_word": false,
490
  "special": true
491
  },
492
+ "40057": {
493
+ "content": "<extra_id_42>",
494
+ "lstrip": true,
495
  "normalized": false,
496
+ "rstrip": true,
497
  "single_word": false,
498
  "special": true
499
  },
500
+ "40058": {
501
+ "content": "<extra_id_41>",
502
+ "lstrip": true,
503
  "normalized": false,
504
+ "rstrip": true,
505
  "single_word": false,
506
  "special": true
507
  },
508
+ "40059": {
509
+ "content": "<extra_id_40>",
510
+ "lstrip": true,
511
  "normalized": false,
512
+ "rstrip": true,
513
  "single_word": false,
514
  "special": true
515
  },
516
+ "40060": {
517
+ "content": "<extra_id_39>",
518
+ "lstrip": true,
519
  "normalized": false,
520
+ "rstrip": true,
521
  "single_word": false,
522
  "special": true
523
  },
524
+ "40061": {
525
+ "content": "<extra_id_38>",
526
+ "lstrip": true,
527
  "normalized": false,
528
+ "rstrip": true,
529
  "single_word": false,
530
  "special": true
531
  },
532
+ "40062": {
533
+ "content": "<extra_id_37>",
534
+ "lstrip": true,
535
  "normalized": false,
536
+ "rstrip": true,
537
  "single_word": false,
538
  "special": true
539
  },
540
+ "40063": {
541
+ "content": "<extra_id_36>",
542
+ "lstrip": true,
543
  "normalized": false,
544
+ "rstrip": true,
545
  "single_word": false,
546
  "special": true
547
  },
548
+ "40064": {
549
+ "content": "<extra_id_35>",
550
+ "lstrip": true,
551
  "normalized": false,
552
+ "rstrip": true,
553
  "single_word": false,
554
  "special": true
555
  },
556
+ "40065": {
557
+ "content": "<extra_id_34>",
558
+ "lstrip": true,
559
  "normalized": false,
560
+ "rstrip": true,
561
  "single_word": false,
562
  "special": true
563
  },
564
+ "40066": {
565
+ "content": "<extra_id_33>",
566
+ "lstrip": true,
567
  "normalized": false,
568
+ "rstrip": true,
569
  "single_word": false,
570
  "special": true
571
  },
572
+ "40067": {
573
+ "content": "<extra_id_32>",
574
+ "lstrip": true,
575
  "normalized": false,
576
+ "rstrip": true,
577
  "single_word": false,
578
  "special": true
579
  },
580
+ "40068": {
581
+ "content": "<extra_id_31>",
582
+ "lstrip": true,
583
  "normalized": false,
584
+ "rstrip": true,
585
  "single_word": false,
586
  "special": true
587
  },
588
+ "40069": {
589
+ "content": "<extra_id_30>",
590
+ "lstrip": true,
591
  "normalized": false,
592
+ "rstrip": true,
593
  "single_word": false,
594
  "special": true
595
  },
596
+ "40070": {
597
+ "content": "<extra_id_29>",
598
+ "lstrip": true,
599
  "normalized": false,
600
+ "rstrip": true,
601
  "single_word": false,
602
  "special": true
603
  },
604
+ "40071": {
605
+ "content": "<extra_id_28>",
606
+ "lstrip": true,
607
  "normalized": false,
608
+ "rstrip": true,
609
  "single_word": false,
610
  "special": true
611
  },
612
+ "40072": {
613
+ "content": "<extra_id_27>",
614
+ "lstrip": true,
615
  "normalized": false,
616
+ "rstrip": true,
617
  "single_word": false,
618
  "special": true
619
  },
620
+ "40073": {
621
+ "content": "<extra_id_26>",
622
+ "lstrip": true,
623
  "normalized": false,
624
+ "rstrip": true,
625
  "single_word": false,
626
  "special": true
627
  },
628
+ "40074": {
629
+ "content": "<extra_id_25>",
630
+ "lstrip": true,
631
  "normalized": false,
632
+ "rstrip": true,
633
  "single_word": false,
634
  "special": true
635
  },
636
+ "40075": {
637
+ "content": "<extra_id_24>",
638
+ "lstrip": true,
639
  "normalized": false,
640
+ "rstrip": true,
641
  "single_word": false,
642
  "special": true
643
  },
644
+ "40076": {
645
+ "content": "<extra_id_23>",
646
+ "lstrip": true,
647
  "normalized": false,
648
+ "rstrip": true,
649
  "single_word": false,
650
  "special": true
651
  },
652
+ "40077": {
653
+ "content": "<extra_id_22>",
654
+ "lstrip": true,
655
  "normalized": false,
656
+ "rstrip": true,
657
  "single_word": false,
658
  "special": true
659
  },
660
+ "40078": {
661
+ "content": "<extra_id_21>",
662
+ "lstrip": true,
663
  "normalized": false,
664
+ "rstrip": true,
665
  "single_word": false,
666
  "special": true
667
  },
668
+ "40079": {
669
+ "content": "<extra_id_20>",
670
+ "lstrip": true,
671
  "normalized": false,
672
+ "rstrip": true,
673
  "single_word": false,
674
  "special": true
675
  },
676
+ "40080": {
677
+ "content": "<extra_id_19>",
678
+ "lstrip": true,
679
  "normalized": false,
680
+ "rstrip": true,
681
  "single_word": false,
682
  "special": true
683
  },
684
+ "40081": {
685
+ "content": "<extra_id_18>",
686
+ "lstrip": true,
687
  "normalized": false,
688
+ "rstrip": true,
689
  "single_word": false,
690
  "special": true
691
  },
692
+ "40082": {
693
+ "content": "<extra_id_17>",
694
+ "lstrip": true,
695
  "normalized": false,
696
+ "rstrip": true,
697
  "single_word": false,
698
  "special": true
699
  },
700
+ "40083": {
701
+ "content": "<extra_id_16>",
702
+ "lstrip": true,
703
  "normalized": false,
704
+ "rstrip": true,
705
  "single_word": false,
706
  "special": true
707
  },
708
+ "40084": {
709
+ "content": "<extra_id_15>",
710
+ "lstrip": true,
711
  "normalized": false,
712
+ "rstrip": true,
713
  "single_word": false,
714
  "special": true
715
  },
716
+ "40085": {
717
+ "content": "<extra_id_14>",
718
+ "lstrip": true,
719
  "normalized": false,
720
+ "rstrip": true,
721
  "single_word": false,
722
  "special": true
723
  },
724
+ "40086": {
725
+ "content": "<extra_id_13>",
726
+ "lstrip": true,
727
  "normalized": false,
728
+ "rstrip": true,
729
  "single_word": false,
730
  "special": true
731
  },
732
+ "40087": {
733
+ "content": "<extra_id_12>",
734
+ "lstrip": true,
735
  "normalized": false,
736
+ "rstrip": true,
737
  "single_word": false,
738
  "special": true
739
  },
740
+ "40088": {
741
+ "content": "<extra_id_11>",
742
+ "lstrip": true,
743
  "normalized": false,
744
+ "rstrip": true,
745
  "single_word": false,
746
  "special": true
747
  },
748
+ "40089": {
749
+ "content": "<extra_id_10>",
750
+ "lstrip": true,
751
  "normalized": false,
752
+ "rstrip": true,
753
  "single_word": false,
754
  "special": true
755
  },
756
+ "40090": {
757
+ "content": "<extra_id_9>",
758
+ "lstrip": true,
759
  "normalized": false,
760
+ "rstrip": true,
761
  "single_word": false,
762
  "special": true
763
  },
764
+ "40091": {
765
+ "content": "<extra_id_8>",
766
+ "lstrip": true,
767
  "normalized": false,
768
+ "rstrip": true,
769
  "single_word": false,
770
  "special": true
771
  },
772
+ "40092": {
773
+ "content": "<extra_id_7>",
774
+ "lstrip": true,
775
  "normalized": false,
776
+ "rstrip": true,
777
  "single_word": false,
778
  "special": true
779
  },
780
+ "40093": {
781
+ "content": "<extra_id_6>",
782
+ "lstrip": true,
783
  "normalized": false,
784
+ "rstrip": true,
785
  "single_word": false,
786
  "special": true
787
  },
788
+ "40094": {
789
+ "content": "<extra_id_5>",
790
+ "lstrip": true,
791
  "normalized": false,
792
+ "rstrip": true,
793
  "single_word": false,
794
  "special": true
795
  },
796
+ "40095": {
797
+ "content": "<extra_id_4>",
798
+ "lstrip": true,
799
  "normalized": false,
800
+ "rstrip": true,
801
  "single_word": false,
802
  "special": true
803
  },
804
+ "40096": {
805
+ "content": "<extra_id_3>",
806
+ "lstrip": true,
807
  "normalized": false,
808
+ "rstrip": true,
809
  "single_word": false,
810
  "special": true
811
  },
812
+ "40097": {
813
+ "content": "<extra_id_2>",
814
+ "lstrip": true,
815
  "normalized": false,
816
+ "rstrip": true,
817
  "single_word": false,
818
  "special": true
819
  },
820
+ "40098": {
821
+ "content": "<extra_id_1>",
822
+ "lstrip": true,
823
  "normalized": false,
824
+ "rstrip": true,
825
  "single_word": false,
826
  "special": true
827
  },
828
+ "40099": {
829
+ "content": "<extra_id_0>",
830
+ "lstrip": true,
831
  "normalized": false,
832
+ "rstrip": true,
833
  "single_word": false,
834
  "special": true
835
  }
 
936
  "<extra_id_98>",
937
  "<extra_id_99>"
938
  ],
 
939
  "clean_up_tokenization_spaces": false,
940
  "eos_token": "</s>",
941
  "extra_ids": 100,
942
  "extra_special_tokens": {},
943
+ "legacy": true,
944
  "model_max_length": 1000000000000000019884624838656,
945
  "pad_token": "<pad>",
946
+ "sp_model_kwargs": {},
947
+ "tokenizer_class": "T5Tokenizer",
948
  "unk_token": "<unk>"
949
  }
checkpoints/checkpoint-46500/trainer_state.json ADDED
@@ -0,0 +1,685 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.992650682436631,
6
+ "eval_steps": 500,
7
+ "global_step": 46500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02142658181740267,
14
+ "grad_norm": 0.4460393786430359,
15
+ "learning_rate": 4.946539532890508e-05,
16
+ "loss": 2.2597,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.04285316363480534,
21
+ "grad_norm": 0.4649476110935211,
22
+ "learning_rate": 4.8929719305763877e-05,
23
+ "loss": 2.0158,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.064279745452208,
28
+ "grad_norm": 0.5113596320152283,
29
+ "learning_rate": 4.839404328262267e-05,
30
+ "loss": 1.9311,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.08570632726961068,
35
+ "grad_norm": 0.49711284041404724,
36
+ "learning_rate": 4.785836725948147e-05,
37
+ "loss": 1.8587,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.10713290908701335,
42
+ "grad_norm": 0.6695660352706909,
43
+ "learning_rate": 4.732269123634027e-05,
44
+ "loss": 1.8219,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.128559490904416,
49
+ "grad_norm": 0.5876463651657104,
50
+ "learning_rate": 4.678701521319906e-05,
51
+ "loss": 1.7645,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.1499860727218187,
56
+ "grad_norm": 0.554977297782898,
57
+ "learning_rate": 4.625133919005786e-05,
58
+ "loss": 1.7136,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.17141265453922136,
63
+ "grad_norm": 0.5401861667633057,
64
+ "learning_rate": 4.571566316691665e-05,
65
+ "loss": 1.696,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.19283923635662403,
70
+ "grad_norm": 0.5951809287071228,
71
+ "learning_rate": 4.517998714377545e-05,
72
+ "loss": 1.6519,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.2142658181740267,
77
+ "grad_norm": 0.6037197113037109,
78
+ "learning_rate": 4.464431112063424e-05,
79
+ "loss": 1.6279,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.23569239999142938,
84
+ "grad_norm": 0.6542425751686096,
85
+ "learning_rate": 4.410863509749304e-05,
86
+ "loss": 1.6104,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.257118981808832,
91
+ "grad_norm": 0.5781915783882141,
92
+ "learning_rate": 4.357295907435183e-05,
93
+ "loss": 1.5976,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.2785455636262347,
98
+ "grad_norm": 0.575715184211731,
99
+ "learning_rate": 4.303728305121063e-05,
100
+ "loss": 1.5373,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.2999721454436374,
105
+ "grad_norm": 0.6789396405220032,
106
+ "learning_rate": 4.2501607028069424e-05,
107
+ "loss": 1.5435,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.3213987272610401,
112
+ "grad_norm": 0.6032198667526245,
113
+ "learning_rate": 4.196593100492822e-05,
114
+ "loss": 1.5081,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.3428253090784427,
119
+ "grad_norm": 0.6690296530723572,
120
+ "learning_rate": 4.1430254981787015e-05,
121
+ "loss": 1.5059,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.36425189089584536,
126
+ "grad_norm": 0.6951475739479065,
127
+ "learning_rate": 4.0894578958645814e-05,
128
+ "loss": 1.4716,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.38567847271324807,
133
+ "grad_norm": 0.7095356583595276,
134
+ "learning_rate": 4.035890293550461e-05,
135
+ "loss": 1.455,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.4071050545306507,
140
+ "grad_norm": 0.8375953435897827,
141
+ "learning_rate": 3.9823226912363405e-05,
142
+ "loss": 1.4527,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.4285316363480534,
147
+ "grad_norm": 0.8582206964492798,
148
+ "learning_rate": 3.9287550889222204e-05,
149
+ "loss": 1.4345,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.44995821816545606,
154
+ "grad_norm": 0.8591541051864624,
155
+ "learning_rate": 3.8751874866081e-05,
156
+ "loss": 1.4268,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.47138479998285876,
161
+ "grad_norm": 0.835361897945404,
162
+ "learning_rate": 3.8216198842939796e-05,
163
+ "loss": 1.4045,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.4928113818002614,
168
+ "grad_norm": 0.9994288086891174,
169
+ "learning_rate": 3.768052281979859e-05,
170
+ "loss": 1.3965,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.514237963617664,
175
+ "grad_norm": 0.8310408592224121,
176
+ "learning_rate": 3.714484679665739e-05,
177
+ "loss": 1.3577,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.5356645454350667,
182
+ "grad_norm": 1.061630368232727,
183
+ "learning_rate": 3.660917077351618e-05,
184
+ "loss": 1.3311,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.5570911272524695,
189
+ "grad_norm": 1.0928676128387451,
190
+ "learning_rate": 3.607349475037498e-05,
191
+ "loss": 1.3203,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 0.5785177090698721,
196
+ "grad_norm": 1.4662648439407349,
197
+ "learning_rate": 3.553781872723377e-05,
198
+ "loss": 1.2778,
199
+ "step": 13500
200
+ },
201
+ {
202
+ "epoch": 0.5999442908872747,
203
+ "grad_norm": 1.2254618406295776,
204
+ "learning_rate": 3.500214270409257e-05,
205
+ "loss": 1.2572,
206
+ "step": 14000
207
+ },
208
+ {
209
+ "epoch": 0.6213708727046774,
210
+ "grad_norm": 1.275029182434082,
211
+ "learning_rate": 3.446860938504393e-05,
212
+ "loss": 1.2707,
213
+ "step": 14500
214
+ },
215
+ {
216
+ "epoch": 0.6427974545220801,
217
+ "grad_norm": 1.3197990655899048,
218
+ "learning_rate": 3.3932933361902726e-05,
219
+ "loss": 1.2308,
220
+ "step": 15000
221
+ },
222
+ {
223
+ "epoch": 0.6642240363394828,
224
+ "grad_norm": 1.934342861175537,
225
+ "learning_rate": 3.339725733876152e-05,
226
+ "loss": 1.2268,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 0.6856506181568854,
231
+ "grad_norm": 1.445302128791809,
232
+ "learning_rate": 3.286158131562032e-05,
233
+ "loss": 1.2007,
234
+ "step": 16000
235
+ },
236
+ {
237
+ "epoch": 0.7070771999742881,
238
+ "grad_norm": 1.2702606916427612,
239
+ "learning_rate": 3.232697664452539e-05,
240
+ "loss": 1.1785,
241
+ "step": 16500
242
+ },
243
+ {
244
+ "epoch": 0.7285037817916907,
245
+ "grad_norm": 1.254937767982483,
246
+ "learning_rate": 3.179130062138419e-05,
247
+ "loss": 1.1616,
248
+ "step": 17000
249
+ },
250
+ {
251
+ "epoch": 0.7499303636090935,
252
+ "grad_norm": 1.3628320693969727,
253
+ "learning_rate": 3.1255624598242984e-05,
254
+ "loss": 1.1352,
255
+ "step": 17500
256
+ },
257
+ {
258
+ "epoch": 0.7713569454264961,
259
+ "grad_norm": 3.0748207569122314,
260
+ "learning_rate": 3.071994857510178e-05,
261
+ "loss": 1.1243,
262
+ "step": 18000
263
+ },
264
+ {
265
+ "epoch": 0.7927835272438988,
266
+ "grad_norm": 1.4262775182724,
267
+ "learning_rate": 3.0185343904006858e-05,
268
+ "loss": 1.1011,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 0.8142101090613014,
273
+ "grad_norm": 2.2326557636260986,
274
+ "learning_rate": 2.9650739232911933e-05,
275
+ "loss": 1.0974,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 0.8356366908787041,
280
+ "grad_norm": 2.791055679321289,
281
+ "learning_rate": 2.9115063209770732e-05,
282
+ "loss": 1.0762,
283
+ "step": 19500
284
+ },
285
+ {
286
+ "epoch": 0.8570632726961068,
287
+ "grad_norm": 3.0886242389678955,
288
+ "learning_rate": 2.857938718662953e-05,
289
+ "loss": 1.0698,
290
+ "step": 20000
291
+ },
292
+ {
293
+ "epoch": 0.8784898545135095,
294
+ "grad_norm": 1.4234964847564697,
295
+ "learning_rate": 2.8043711163488323e-05,
296
+ "loss": 1.0541,
297
+ "step": 20500
298
+ },
299
+ {
300
+ "epoch": 0.8999164363309121,
301
+ "grad_norm": 3.275305986404419,
302
+ "learning_rate": 2.75091064923934e-05,
303
+ "loss": 1.0396,
304
+ "step": 21000
305
+ },
306
+ {
307
+ "epoch": 0.9213430181483148,
308
+ "grad_norm": 1.6365532875061035,
309
+ "learning_rate": 2.6973430469252198e-05,
310
+ "loss": 1.0146,
311
+ "step": 21500
312
+ },
313
+ {
314
+ "epoch": 0.9427695999657175,
315
+ "grad_norm": 3.1808159351348877,
316
+ "learning_rate": 2.6437754446110997e-05,
317
+ "loss": 0.9997,
318
+ "step": 22000
319
+ },
320
+ {
321
+ "epoch": 0.9641961817831202,
322
+ "grad_norm": 1.8644700050354004,
323
+ "learning_rate": 2.590207842296979e-05,
324
+ "loss": 0.9905,
325
+ "step": 22500
326
+ },
327
+ {
328
+ "epoch": 0.9856227636005228,
329
+ "grad_norm": 1.5963947772979736,
330
+ "learning_rate": 2.5367473751874864e-05,
331
+ "loss": 0.9624,
332
+ "step": 23000
333
+ },
334
+ {
335
+ "epoch": 1.007027918836108,
336
+ "grad_norm": 1.5474953651428223,
337
+ "learning_rate": 2.4831797728733663e-05,
338
+ "loss": 0.958,
339
+ "step": 23500
340
+ },
341
+ {
342
+ "epoch": 1.0284545006535109,
343
+ "grad_norm": 3.8564932346343994,
344
+ "learning_rate": 2.429612170559246e-05,
345
+ "loss": 0.9388,
346
+ "step": 24000
347
+ },
348
+ {
349
+ "epoch": 1.0498810824709135,
350
+ "grad_norm": 2.154879331588745,
351
+ "learning_rate": 2.3760445682451254e-05,
352
+ "loss": 0.9132,
353
+ "step": 24500
354
+ },
355
+ {
356
+ "epoch": 1.0713076642883161,
357
+ "grad_norm": 4.118302822113037,
358
+ "learning_rate": 2.3225841011356333e-05,
359
+ "loss": 0.9132,
360
+ "step": 25000
361
+ },
362
+ {
363
+ "epoch": 1.0927342461057188,
364
+ "grad_norm": 2.0971686840057373,
365
+ "learning_rate": 2.2690164988215128e-05,
366
+ "loss": 0.8947,
367
+ "step": 25500
368
+ },
369
+ {
370
+ "epoch": 1.1141608279231214,
371
+ "grad_norm": 3.480602741241455,
372
+ "learning_rate": 2.2154488965073924e-05,
373
+ "loss": 0.8922,
374
+ "step": 26000
375
+ },
376
+ {
377
+ "epoch": 1.135587409740524,
378
+ "grad_norm": 4.0732879638671875,
379
+ "learning_rate": 2.161881294193272e-05,
380
+ "loss": 0.87,
381
+ "step": 26500
382
+ },
383
+ {
384
+ "epoch": 1.1570139915579267,
385
+ "grad_norm": 2.2193567752838135,
386
+ "learning_rate": 2.1084208270837798e-05,
387
+ "loss": 0.8476,
388
+ "step": 27000
389
+ },
390
+ {
391
+ "epoch": 1.1784405733753294,
392
+ "grad_norm": 2.1723575592041016,
393
+ "learning_rate": 2.0548532247696594e-05,
394
+ "loss": 0.8322,
395
+ "step": 27500
396
+ },
397
+ {
398
+ "epoch": 1.199867155192732,
399
+ "grad_norm": 3.3503777980804443,
400
+ "learning_rate": 2.001285622455539e-05,
401
+ "loss": 0.8171,
402
+ "step": 28000
403
+ },
404
+ {
405
+ "epoch": 1.2212937370101349,
406
+ "grad_norm": 3.6080329418182373,
407
+ "learning_rate": 1.9477180201414185e-05,
408
+ "loss": 0.8105,
409
+ "step": 28500
410
+ },
411
+ {
412
+ "epoch": 1.2427203188275375,
413
+ "grad_norm": 2.708294630050659,
414
+ "learning_rate": 1.894150417827298e-05,
415
+ "loss": 0.7808,
416
+ "step": 29000
417
+ },
418
+ {
419
+ "epoch": 1.2641469006449402,
420
+ "grad_norm": 3.300328493118286,
421
+ "learning_rate": 1.8405828155131776e-05,
422
+ "loss": 0.7774,
423
+ "step": 29500
424
+ },
425
+ {
426
+ "epoch": 1.2855734824623428,
427
+ "grad_norm": 2.1943624019622803,
428
+ "learning_rate": 1.7870152131990572e-05,
429
+ "loss": 0.7637,
430
+ "step": 30000
431
+ },
432
+ {
433
+ "epoch": 1.3070000642797455,
434
+ "grad_norm": 2.7251927852630615,
435
+ "learning_rate": 1.733554746089565e-05,
436
+ "loss": 0.7481,
437
+ "step": 30500
438
+ },
439
+ {
440
+ "epoch": 1.3284266460971481,
441
+ "grad_norm": 4.672070026397705,
442
+ "learning_rate": 1.6799871437754446e-05,
443
+ "loss": 0.7255,
444
+ "step": 31000
445
+ },
446
+ {
447
+ "epoch": 1.3498532279145508,
448
+ "grad_norm": 2.4984779357910156,
449
+ "learning_rate": 1.626419541461324e-05,
450
+ "loss": 0.7231,
451
+ "step": 31500
452
+ },
453
+ {
454
+ "epoch": 1.3712798097319534,
455
+ "grad_norm": 1.9306334257125854,
456
+ "learning_rate": 1.5728519391472037e-05,
457
+ "loss": 0.7032,
458
+ "step": 32000
459
+ },
460
+ {
461
+ "epoch": 1.392706391549356,
462
+ "grad_norm": 3.015226125717163,
463
+ "learning_rate": 1.5192843368330834e-05,
464
+ "loss": 0.6923,
465
+ "step": 32500
466
+ },
467
+ {
468
+ "epoch": 1.4141329733667587,
469
+ "grad_norm": 3.646979331970215,
470
+ "learning_rate": 1.4658238697235913e-05,
471
+ "loss": 0.6893,
472
+ "step": 33000
473
+ },
474
+ {
475
+ "epoch": 1.4355595551841613,
476
+ "grad_norm": 1.8951635360717773,
477
+ "learning_rate": 1.4122562674094708e-05,
478
+ "loss": 0.6693,
479
+ "step": 33500
480
+ },
481
+ {
482
+ "epoch": 1.4569861370015642,
483
+ "grad_norm": 2.3496530055999756,
484
+ "learning_rate": 1.3586886650953504e-05,
485
+ "loss": 0.6586,
486
+ "step": 34000
487
+ },
488
+ {
489
+ "epoch": 1.4784127188189669,
490
+ "grad_norm": 2.899231195449829,
491
+ "learning_rate": 1.30512106278123e-05,
492
+ "loss": 0.6338,
493
+ "step": 34500
494
+ },
495
+ {
496
+ "epoch": 1.4998393006363695,
497
+ "grad_norm": 2.7465250492095947,
498
+ "learning_rate": 1.2515534604671095e-05,
499
+ "loss": 0.6291,
500
+ "step": 35000
501
+ },
502
+ {
503
+ "epoch": 1.5212658824537721,
504
+ "grad_norm": 2.3695950508117676,
505
+ "learning_rate": 1.1979858581529891e-05,
506
+ "loss": 0.6192,
507
+ "step": 35500
508
+ },
509
+ {
510
+ "epoch": 1.5426924642711748,
511
+ "grad_norm": 3.0361921787261963,
512
+ "learning_rate": 1.1444182558388687e-05,
513
+ "loss": 0.6147,
514
+ "step": 36000
515
+ },
516
+ {
517
+ "epoch": 1.5641190460885774,
518
+ "grad_norm": 3.5043649673461914,
519
+ "learning_rate": 1.0908506535247482e-05,
520
+ "loss": 0.6024,
521
+ "step": 36500
522
+ },
523
+ {
524
+ "epoch": 1.5855456279059803,
525
+ "grad_norm": 2.1166698932647705,
526
+ "learning_rate": 1.037390186415256e-05,
527
+ "loss": 0.6033,
528
+ "step": 37000
529
+ },
530
+ {
531
+ "epoch": 1.606972209723383,
532
+ "grad_norm": 2.8012290000915527,
533
+ "learning_rate": 9.838225841011358e-06,
534
+ "loss": 0.5935,
535
+ "step": 37500
536
+ },
537
+ {
538
+ "epoch": 1.6283987915407856,
539
+ "grad_norm": 2.49540638923645,
540
+ "learning_rate": 9.302549817870154e-06,
541
+ "loss": 0.5819,
542
+ "step": 38000
543
+ },
544
+ {
545
+ "epoch": 1.6498253733581882,
546
+ "grad_norm": 2.233430862426758,
547
+ "learning_rate": 8.766873794728948e-06,
548
+ "loss": 0.5674,
549
+ "step": 38500
550
+ },
551
+ {
552
+ "epoch": 1.6712519551755909,
553
+ "grad_norm": 2.823101282119751,
554
+ "learning_rate": 8.232269123634026e-06,
555
+ "loss": 0.552,
556
+ "step": 39000
557
+ },
558
+ {
559
+ "epoch": 1.6926785369929935,
560
+ "grad_norm": 2.389265775680542,
561
+ "learning_rate": 7.696593100492823e-06,
562
+ "loss": 0.5564,
563
+ "step": 39500
564
+ },
565
+ {
566
+ "epoch": 1.7141051188103962,
567
+ "grad_norm": 2.1136868000030518,
568
+ "learning_rate": 7.161988429397901e-06,
569
+ "loss": 0.5477,
570
+ "step": 40000
571
+ },
572
+ {
573
+ "epoch": 1.7355317006277988,
574
+ "grad_norm": 2.6307172775268555,
575
+ "learning_rate": 6.626312406256696e-06,
576
+ "loss": 0.545,
577
+ "step": 40500
578
+ },
579
+ {
580
+ "epoch": 1.7569582824452015,
581
+ "grad_norm": 3.4348325729370117,
582
+ "learning_rate": 6.090636383115492e-06,
583
+ "loss": 0.5359,
584
+ "step": 41000
585
+ },
586
+ {
587
+ "epoch": 1.7783848642626041,
588
+ "grad_norm": 2.1959691047668457,
589
+ "learning_rate": 5.554960359974288e-06,
590
+ "loss": 0.5267,
591
+ "step": 41500
592
+ },
593
+ {
594
+ "epoch": 1.7998114460800068,
595
+ "grad_norm": 2.747068405151367,
596
+ "learning_rate": 5.0192843368330835e-06,
597
+ "loss": 0.5316,
598
+ "step": 42000
599
+ },
600
+ {
601
+ "epoch": 1.8212380278974094,
602
+ "grad_norm": 2.965233325958252,
603
+ "learning_rate": 4.48360831369188e-06,
604
+ "loss": 0.5129,
605
+ "step": 42500
606
+ },
607
+ {
608
+ "epoch": 1.842664609714812,
609
+ "grad_norm": 2.3425612449645996,
610
+ "learning_rate": 3.9479322905506756e-06,
611
+ "loss": 0.5058,
612
+ "step": 43000
613
+ },
614
+ {
615
+ "epoch": 1.864091191532215,
616
+ "grad_norm": 1.5650664567947388,
617
+ "learning_rate": 3.4122562674094708e-06,
618
+ "loss": 0.5155,
619
+ "step": 43500
620
+ },
621
+ {
622
+ "epoch": 1.8855177733496176,
623
+ "grad_norm": 2.501758098602295,
624
+ "learning_rate": 2.8776515963145492e-06,
625
+ "loss": 0.5076,
626
+ "step": 44000
627
+ },
628
+ {
629
+ "epoch": 1.9069443551670202,
630
+ "grad_norm": 4.431907653808594,
631
+ "learning_rate": 2.341975573173345e-06,
632
+ "loss": 0.5119,
633
+ "step": 44500
634
+ },
635
+ {
636
+ "epoch": 1.9283709369844229,
637
+ "grad_norm": 3.3445732593536377,
638
+ "learning_rate": 1.8062995500321405e-06,
639
+ "loss": 0.5021,
640
+ "step": 45000
641
+ },
642
+ {
643
+ "epoch": 1.9497975188018255,
644
+ "grad_norm": 2.6224589347839355,
645
+ "learning_rate": 1.2706235268909363e-06,
646
+ "loss": 0.5019,
647
+ "step": 45500
648
+ },
649
+ {
650
+ "epoch": 1.9712241006192284,
651
+ "grad_norm": 2.954880952835083,
652
+ "learning_rate": 7.360188557960147e-07,
653
+ "loss": 0.5055,
654
+ "step": 46000
655
+ },
656
+ {
657
+ "epoch": 1.992650682436631,
658
+ "grad_norm": 2.903310775756836,
659
+ "learning_rate": 2.003428326548104e-07,
660
+ "loss": 0.5043,
661
+ "step": 46500
662
+ }
663
+ ],
664
+ "logging_steps": 500,
665
+ "max_steps": 46670,
666
+ "num_input_tokens_seen": 0,
667
+ "num_train_epochs": 2,
668
+ "save_steps": 500,
669
+ "stateful_callbacks": {
670
+ "TrainerControl": {
671
+ "args": {
672
+ "should_epoch_stop": false,
673
+ "should_evaluate": false,
674
+ "should_log": false,
675
+ "should_save": true,
676
+ "should_training_stop": false
677
+ },
678
+ "attributes": {}
679
+ }
680
+ },
681
+ "total_flos": 1.0069301454805402e+17,
682
+ "train_batch_size": 32,
683
+ "trial_name": null,
684
+ "trial_params": null
685
+ }
checkpoints/{checkpoint-62000 → checkpoint-46500}/training_args.bin RENAMED
File without changes
checkpoints/{checkpoint-62228 → checkpoint-46670}/config.json RENAMED
@@ -16,45 +16,14 @@
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
19
- "n_positions": 512,
20
  "num_decoder_layers": 6,
21
  "num_heads": 8,
22
  "num_layers": 6,
23
- "output_past": true,
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
- "task_specific_params": {
28
- "summarization": {
29
- "early_stopping": true,
30
- "length_penalty": 2.0,
31
- "max_length": 200,
32
- "min_length": 30,
33
- "no_repeat_ngram_size": 3,
34
- "num_beams": 4,
35
- "prefix": "summarize: "
36
- },
37
- "translation_en_to_de": {
38
- "early_stopping": true,
39
- "max_length": 300,
40
- "num_beams": 4,
41
- "prefix": "translate English to German: "
42
- },
43
- "translation_en_to_fr": {
44
- "early_stopping": true,
45
- "max_length": 300,
46
- "num_beams": 4,
47
- "prefix": "translate English to French: "
48
- },
49
- "translation_en_to_ro": {
50
- "early_stopping": true,
51
- "max_length": 300,
52
- "num_beams": 4,
53
- "prefix": "translate English to Romanian: "
54
- }
55
- },
56
  "torch_dtype": "float32",
57
  "transformers_version": "4.51.2",
58
  "use_cache": true,
59
- "vocab_size": 32128
60
  }
 
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
 
19
  "num_decoder_layers": 6,
20
  "num_heads": 8,
21
  "num_layers": 6,
 
22
  "pad_token_id": 0,
23
  "relative_attention_max_distance": 128,
24
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "torch_dtype": "float32",
26
  "transformers_version": "4.51.2",
27
  "use_cache": true,
28
+ "vocab_size": 40100
29
  }
checkpoints/{checkpoint-62228 → checkpoint-46670}/generation_config.json RENAMED
File without changes
checkpoints/checkpoint-46670/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c4fdcc76ad48f4138a7163703cbb71ce3039d34301ba0d1641ab373f74cb78
3
+ size 258368552
checkpoints/{checkpoint-61500 → checkpoint-46670}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3342964c9be74032ffa9b4cff58e4f7eaed7adc5ad296183509cf0a30227c853
3
- size 484163514
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6016ea2698bff87f113e74d971e40088ef6bfece4c44b9cfc9c8f67607bf3fff
3
+ size 516816826
checkpoints/{checkpoint-61500 → checkpoint-46670}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08afadc7893d56386d744b1e0cbce95e6d6eff9ef96e2c2b3486065fbc550164
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6b74dcc344aa2aa249694c8e8c8a6f10060dd3e030c9a1601ca37e471069c97
3
  size 14244
checkpoints/{checkpoint-62000 → checkpoint-46670}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6892b9a38efebaa7ab1728c9d3e9fb9fb0ccf8b1a48d0799894268d503230ff0
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c8127a094744a92d337cab84c5f2c15868138db8c40ec64abca0da64e1d4093
3
  size 988
checkpoints/{checkpoint-62000 → checkpoint-46670}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7954ecc6645e020e139b5a94e047d4a69048490e85d681c5cfdcc07828c41a06
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:776914bc939f0ea9cebde9f0559df83ab4f63b62e882e67ad16dae999ed24bfc
3
  size 1064
checkpoints/{checkpoint-62000 → checkpoint-46670}/special_tokens_map.json RENAMED
@@ -101,13 +101,6 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "bos_token": {
105
- "content": "<s>",
106
- "lstrip": false,
107
- "normalized": false,
108
- "rstrip": false,
109
- "single_word": false
110
- },
111
  "eos_token": {
112
  "content": "</s>",
113
  "lstrip": false,
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
 
 
 
 
 
 
 
104
  "eos_token": {
105
  "content": "</s>",
106
  "lstrip": false,
checkpoints/{checkpoint-61500 → checkpoint-46670}/spiece.model RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3365205d18a2c0699fb0ee86ab06f3042d553acaa219eb11aa77c3c56f638538
3
+ size 1047337
checkpoints/checkpoint-46670/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/{checkpoint-62228 → checkpoint-46670}/tokenizer_config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
@@ -10,7 +10,7 @@
10
  "special": true
11
  },
12
  "1": {
13
- "content": "<s>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
@@ -18,818 +18,818 @@
18
  "special": true
19
  },
20
  "2": {
21
- "content": "</s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
- "special": true
27
  },
28
  "3": {
29
- "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
- "8000": {
37
- "content": "<extra_id_0>",
38
- "lstrip": false,
39
  "normalized": false,
40
- "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
  },
44
- "8001": {
45
- "content": "<extra_id_1>",
46
- "lstrip": false,
47
  "normalized": false,
48
- "rstrip": false,
49
  "single_word": false,
50
  "special": true
51
  },
52
- "8002": {
53
- "content": "<extra_id_2>",
54
- "lstrip": false,
55
  "normalized": false,
56
- "rstrip": false,
57
  "single_word": false,
58
  "special": true
59
  },
60
- "8003": {
61
- "content": "<extra_id_3>",
62
- "lstrip": false,
63
  "normalized": false,
64
- "rstrip": false,
65
  "single_word": false,
66
  "special": true
67
  },
68
- "8004": {
69
- "content": "<extra_id_4>",
70
- "lstrip": false,
71
  "normalized": false,
72
- "rstrip": false,
73
  "single_word": false,
74
  "special": true
75
  },
76
- "8005": {
77
- "content": "<extra_id_5>",
78
- "lstrip": false,
79
  "normalized": false,
80
- "rstrip": false,
81
  "single_word": false,
82
  "special": true
83
  },
84
- "8006": {
85
- "content": "<extra_id_6>",
86
- "lstrip": false,
87
  "normalized": false,
88
- "rstrip": false,
89
  "single_word": false,
90
  "special": true
91
  },
92
- "8007": {
93
- "content": "<extra_id_7>",
94
- "lstrip": false,
95
  "normalized": false,
96
- "rstrip": false,
97
  "single_word": false,
98
  "special": true
99
  },
100
- "8008": {
101
- "content": "<extra_id_8>",
102
- "lstrip": false,
103
  "normalized": false,
104
- "rstrip": false,
105
  "single_word": false,
106
  "special": true
107
  },
108
- "8009": {
109
- "content": "<extra_id_9>",
110
- "lstrip": false,
111
  "normalized": false,
112
- "rstrip": false,
113
  "single_word": false,
114
  "special": true
115
  },
116
- "8010": {
117
- "content": "<extra_id_10>",
118
- "lstrip": false,
119
  "normalized": false,
120
- "rstrip": false,
121
  "single_word": false,
122
  "special": true
123
  },
124
- "8011": {
125
- "content": "<extra_id_11>",
126
- "lstrip": false,
127
  "normalized": false,
128
- "rstrip": false,
129
  "single_word": false,
130
  "special": true
131
  },
132
- "8012": {
133
- "content": "<extra_id_12>",
134
- "lstrip": false,
135
  "normalized": false,
136
- "rstrip": false,
137
  "single_word": false,
138
  "special": true
139
  },
140
- "8013": {
141
- "content": "<extra_id_13>",
142
- "lstrip": false,
143
  "normalized": false,
144
- "rstrip": false,
145
  "single_word": false,
146
  "special": true
147
  },
148
- "8014": {
149
- "content": "<extra_id_14>",
150
- "lstrip": false,
151
  "normalized": false,
152
- "rstrip": false,
153
  "single_word": false,
154
  "special": true
155
  },
156
- "8015": {
157
- "content": "<extra_id_15>",
158
- "lstrip": false,
159
  "normalized": false,
160
- "rstrip": false,
161
  "single_word": false,
162
  "special": true
163
  },
164
- "8016": {
165
- "content": "<extra_id_16>",
166
- "lstrip": false,
167
  "normalized": false,
168
- "rstrip": false,
169
  "single_word": false,
170
  "special": true
171
  },
172
- "8017": {
173
- "content": "<extra_id_17>",
174
- "lstrip": false,
175
  "normalized": false,
176
- "rstrip": false,
177
  "single_word": false,
178
  "special": true
179
  },
180
- "8018": {
181
- "content": "<extra_id_18>",
182
- "lstrip": false,
183
  "normalized": false,
184
- "rstrip": false,
185
  "single_word": false,
186
  "special": true
187
  },
188
- "8019": {
189
- "content": "<extra_id_19>",
190
- "lstrip": false,
191
  "normalized": false,
192
- "rstrip": false,
193
  "single_word": false,
194
  "special": true
195
  },
196
- "8020": {
197
- "content": "<extra_id_20>",
198
- "lstrip": false,
199
  "normalized": false,
200
- "rstrip": false,
201
  "single_word": false,
202
  "special": true
203
  },
204
- "8021": {
205
- "content": "<extra_id_21>",
206
- "lstrip": false,
207
  "normalized": false,
208
- "rstrip": false,
209
  "single_word": false,
210
  "special": true
211
  },
212
- "8022": {
213
- "content": "<extra_id_22>",
214
- "lstrip": false,
215
  "normalized": false,
216
- "rstrip": false,
217
  "single_word": false,
218
  "special": true
219
  },
220
- "8023": {
221
- "content": "<extra_id_23>",
222
- "lstrip": false,
223
  "normalized": false,
224
- "rstrip": false,
225
  "single_word": false,
226
  "special": true
227
  },
228
- "8024": {
229
- "content": "<extra_id_24>",
230
- "lstrip": false,
231
  "normalized": false,
232
- "rstrip": false,
233
  "single_word": false,
234
  "special": true
235
  },
236
- "8025": {
237
- "content": "<extra_id_25>",
238
- "lstrip": false,
239
  "normalized": false,
240
- "rstrip": false,
241
  "single_word": false,
242
  "special": true
243
  },
244
- "8026": {
245
- "content": "<extra_id_26>",
246
- "lstrip": false,
247
  "normalized": false,
248
- "rstrip": false,
249
  "single_word": false,
250
  "special": true
251
  },
252
- "8027": {
253
- "content": "<extra_id_27>",
254
- "lstrip": false,
255
  "normalized": false,
256
- "rstrip": false,
257
  "single_word": false,
258
  "special": true
259
  },
260
- "8028": {
261
- "content": "<extra_id_28>",
262
- "lstrip": false,
263
  "normalized": false,
264
- "rstrip": false,
265
  "single_word": false,
266
  "special": true
267
  },
268
- "8029": {
269
- "content": "<extra_id_29>",
270
- "lstrip": false,
271
  "normalized": false,
272
- "rstrip": false,
273
  "single_word": false,
274
  "special": true
275
  },
276
- "8030": {
277
- "content": "<extra_id_30>",
278
- "lstrip": false,
279
  "normalized": false,
280
- "rstrip": false,
281
  "single_word": false,
282
  "special": true
283
  },
284
- "8031": {
285
- "content": "<extra_id_31>",
286
- "lstrip": false,
287
  "normalized": false,
288
- "rstrip": false,
289
  "single_word": false,
290
  "special": true
291
  },
292
- "8032": {
293
- "content": "<extra_id_32>",
294
- "lstrip": false,
295
  "normalized": false,
296
- "rstrip": false,
297
  "single_word": false,
298
  "special": true
299
  },
300
- "8033": {
301
- "content": "<extra_id_33>",
302
- "lstrip": false,
303
  "normalized": false,
304
- "rstrip": false,
305
  "single_word": false,
306
  "special": true
307
  },
308
- "8034": {
309
- "content": "<extra_id_34>",
310
- "lstrip": false,
311
  "normalized": false,
312
- "rstrip": false,
313
  "single_word": false,
314
  "special": true
315
  },
316
- "8035": {
317
- "content": "<extra_id_35>",
318
- "lstrip": false,
319
  "normalized": false,
320
- "rstrip": false,
321
  "single_word": false,
322
  "special": true
323
  },
324
- "8036": {
325
- "content": "<extra_id_36>",
326
- "lstrip": false,
327
  "normalized": false,
328
- "rstrip": false,
329
  "single_word": false,
330
  "special": true
331
  },
332
- "8037": {
333
- "content": "<extra_id_37>",
334
- "lstrip": false,
335
  "normalized": false,
336
- "rstrip": false,
337
  "single_word": false,
338
  "special": true
339
  },
340
- "8038": {
341
- "content": "<extra_id_38>",
342
- "lstrip": false,
343
  "normalized": false,
344
- "rstrip": false,
345
  "single_word": false,
346
  "special": true
347
  },
348
- "8039": {
349
- "content": "<extra_id_39>",
350
- "lstrip": false,
351
  "normalized": false,
352
- "rstrip": false,
353
  "single_word": false,
354
  "special": true
355
  },
356
- "8040": {
357
- "content": "<extra_id_40>",
358
- "lstrip": false,
359
  "normalized": false,
360
- "rstrip": false,
361
  "single_word": false,
362
  "special": true
363
  },
364
- "8041": {
365
- "content": "<extra_id_41>",
366
- "lstrip": false,
367
  "normalized": false,
368
- "rstrip": false,
369
  "single_word": false,
370
  "special": true
371
  },
372
- "8042": {
373
- "content": "<extra_id_42>",
374
- "lstrip": false,
375
  "normalized": false,
376
- "rstrip": false,
377
  "single_word": false,
378
  "special": true
379
  },
380
- "8043": {
381
- "content": "<extra_id_43>",
382
- "lstrip": false,
383
  "normalized": false,
384
- "rstrip": false,
385
  "single_word": false,
386
  "special": true
387
  },
388
- "8044": {
389
- "content": "<extra_id_44>",
390
- "lstrip": false,
391
  "normalized": false,
392
- "rstrip": false,
393
  "single_word": false,
394
  "special": true
395
  },
396
- "8045": {
397
- "content": "<extra_id_45>",
398
- "lstrip": false,
399
  "normalized": false,
400
- "rstrip": false,
401
  "single_word": false,
402
  "special": true
403
  },
404
- "8046": {
405
- "content": "<extra_id_46>",
406
- "lstrip": false,
407
  "normalized": false,
408
- "rstrip": false,
409
  "single_word": false,
410
  "special": true
411
  },
412
- "8047": {
413
- "content": "<extra_id_47>",
414
- "lstrip": false,
415
  "normalized": false,
416
- "rstrip": false,
417
  "single_word": false,
418
  "special": true
419
  },
420
- "8048": {
421
- "content": "<extra_id_48>",
422
- "lstrip": false,
423
  "normalized": false,
424
- "rstrip": false,
425
  "single_word": false,
426
  "special": true
427
  },
428
- "8049": {
429
- "content": "<extra_id_49>",
430
- "lstrip": false,
431
  "normalized": false,
432
- "rstrip": false,
433
  "single_word": false,
434
  "special": true
435
  },
436
- "8050": {
437
- "content": "<extra_id_50>",
438
- "lstrip": false,
439
  "normalized": false,
440
- "rstrip": false,
441
  "single_word": false,
442
  "special": true
443
  },
444
- "8051": {
445
- "content": "<extra_id_51>",
446
- "lstrip": false,
447
  "normalized": false,
448
- "rstrip": false,
449
  "single_word": false,
450
  "special": true
451
  },
452
- "8052": {
453
- "content": "<extra_id_52>",
454
- "lstrip": false,
455
  "normalized": false,
456
- "rstrip": false,
457
  "single_word": false,
458
  "special": true
459
  },
460
- "8053": {
461
- "content": "<extra_id_53>",
462
- "lstrip": false,
463
  "normalized": false,
464
- "rstrip": false,
465
  "single_word": false,
466
  "special": true
467
  },
468
- "8054": {
469
- "content": "<extra_id_54>",
470
- "lstrip": false,
471
  "normalized": false,
472
- "rstrip": false,
473
  "single_word": false,
474
  "special": true
475
  },
476
- "8055": {
477
- "content": "<extra_id_55>",
478
- "lstrip": false,
479
  "normalized": false,
480
- "rstrip": false,
481
  "single_word": false,
482
  "special": true
483
  },
484
- "8056": {
485
- "content": "<extra_id_56>",
486
- "lstrip": false,
487
  "normalized": false,
488
- "rstrip": false,
489
  "single_word": false,
490
  "special": true
491
  },
492
- "8057": {
493
- "content": "<extra_id_57>",
494
- "lstrip": false,
495
  "normalized": false,
496
- "rstrip": false,
497
  "single_word": false,
498
  "special": true
499
  },
500
- "8058": {
501
- "content": "<extra_id_58>",
502
- "lstrip": false,
503
  "normalized": false,
504
- "rstrip": false,
505
  "single_word": false,
506
  "special": true
507
  },
508
- "8059": {
509
- "content": "<extra_id_59>",
510
- "lstrip": false,
511
  "normalized": false,
512
- "rstrip": false,
513
  "single_word": false,
514
  "special": true
515
  },
516
- "8060": {
517
- "content": "<extra_id_60>",
518
- "lstrip": false,
519
  "normalized": false,
520
- "rstrip": false,
521
  "single_word": false,
522
  "special": true
523
  },
524
- "8061": {
525
- "content": "<extra_id_61>",
526
- "lstrip": false,
527
  "normalized": false,
528
- "rstrip": false,
529
  "single_word": false,
530
  "special": true
531
  },
532
- "8062": {
533
- "content": "<extra_id_62>",
534
- "lstrip": false,
535
  "normalized": false,
536
- "rstrip": false,
537
  "single_word": false,
538
  "special": true
539
  },
540
- "8063": {
541
- "content": "<extra_id_63>",
542
- "lstrip": false,
543
  "normalized": false,
544
- "rstrip": false,
545
  "single_word": false,
546
  "special": true
547
  },
548
- "8064": {
549
- "content": "<extra_id_64>",
550
- "lstrip": false,
551
  "normalized": false,
552
- "rstrip": false,
553
  "single_word": false,
554
  "special": true
555
  },
556
- "8065": {
557
- "content": "<extra_id_65>",
558
- "lstrip": false,
559
  "normalized": false,
560
- "rstrip": false,
561
  "single_word": false,
562
  "special": true
563
  },
564
- "8066": {
565
- "content": "<extra_id_66>",
566
- "lstrip": false,
567
  "normalized": false,
568
- "rstrip": false,
569
  "single_word": false,
570
  "special": true
571
  },
572
- "8067": {
573
- "content": "<extra_id_67>",
574
- "lstrip": false,
575
  "normalized": false,
576
- "rstrip": false,
577
  "single_word": false,
578
  "special": true
579
  },
580
- "8068": {
581
- "content": "<extra_id_68>",
582
- "lstrip": false,
583
  "normalized": false,
584
- "rstrip": false,
585
  "single_word": false,
586
  "special": true
587
  },
588
- "8069": {
589
- "content": "<extra_id_69>",
590
- "lstrip": false,
591
  "normalized": false,
592
- "rstrip": false,
593
  "single_word": false,
594
  "special": true
595
  },
596
- "8070": {
597
- "content": "<extra_id_70>",
598
- "lstrip": false,
599
  "normalized": false,
600
- "rstrip": false,
601
  "single_word": false,
602
  "special": true
603
  },
604
- "8071": {
605
- "content": "<extra_id_71>",
606
- "lstrip": false,
607
  "normalized": false,
608
- "rstrip": false,
609
  "single_word": false,
610
  "special": true
611
  },
612
- "8072": {
613
- "content": "<extra_id_72>",
614
- "lstrip": false,
615
  "normalized": false,
616
- "rstrip": false,
617
  "single_word": false,
618
  "special": true
619
  },
620
- "8073": {
621
- "content": "<extra_id_73>",
622
- "lstrip": false,
623
  "normalized": false,
624
- "rstrip": false,
625
  "single_word": false,
626
  "special": true
627
  },
628
- "8074": {
629
- "content": "<extra_id_74>",
630
- "lstrip": false,
631
  "normalized": false,
632
- "rstrip": false,
633
  "single_word": false,
634
  "special": true
635
  },
636
- "8075": {
637
- "content": "<extra_id_75>",
638
- "lstrip": false,
639
  "normalized": false,
640
- "rstrip": false,
641
  "single_word": false,
642
  "special": true
643
  },
644
- "8076": {
645
- "content": "<extra_id_76>",
646
- "lstrip": false,
647
  "normalized": false,
648
- "rstrip": false,
649
  "single_word": false,
650
  "special": true
651
  },
652
- "8077": {
653
- "content": "<extra_id_77>",
654
- "lstrip": false,
655
  "normalized": false,
656
- "rstrip": false,
657
  "single_word": false,
658
  "special": true
659
  },
660
- "8078": {
661
- "content": "<extra_id_78>",
662
- "lstrip": false,
663
  "normalized": false,
664
- "rstrip": false,
665
  "single_word": false,
666
  "special": true
667
  },
668
- "8079": {
669
- "content": "<extra_id_79>",
670
- "lstrip": false,
671
  "normalized": false,
672
- "rstrip": false,
673
  "single_word": false,
674
  "special": true
675
  },
676
- "8080": {
677
- "content": "<extra_id_80>",
678
- "lstrip": false,
679
  "normalized": false,
680
- "rstrip": false,
681
  "single_word": false,
682
  "special": true
683
  },
684
- "8081": {
685
- "content": "<extra_id_81>",
686
- "lstrip": false,
687
  "normalized": false,
688
- "rstrip": false,
689
  "single_word": false,
690
  "special": true
691
  },
692
- "8082": {
693
- "content": "<extra_id_82>",
694
- "lstrip": false,
695
  "normalized": false,
696
- "rstrip": false,
697
  "single_word": false,
698
  "special": true
699
  },
700
- "8083": {
701
- "content": "<extra_id_83>",
702
- "lstrip": false,
703
  "normalized": false,
704
- "rstrip": false,
705
  "single_word": false,
706
  "special": true
707
  },
708
- "8084": {
709
- "content": "<extra_id_84>",
710
- "lstrip": false,
711
  "normalized": false,
712
- "rstrip": false,
713
  "single_word": false,
714
  "special": true
715
  },
716
- "8085": {
717
- "content": "<extra_id_85>",
718
- "lstrip": false,
719
  "normalized": false,
720
- "rstrip": false,
721
  "single_word": false,
722
  "special": true
723
  },
724
- "8086": {
725
- "content": "<extra_id_86>",
726
- "lstrip": false,
727
  "normalized": false,
728
- "rstrip": false,
729
  "single_word": false,
730
  "special": true
731
  },
732
- "8087": {
733
- "content": "<extra_id_87>",
734
- "lstrip": false,
735
  "normalized": false,
736
- "rstrip": false,
737
  "single_word": false,
738
  "special": true
739
  },
740
- "8088": {
741
- "content": "<extra_id_88>",
742
- "lstrip": false,
743
  "normalized": false,
744
- "rstrip": false,
745
  "single_word": false,
746
  "special": true
747
  },
748
- "8089": {
749
- "content": "<extra_id_89>",
750
- "lstrip": false,
751
  "normalized": false,
752
- "rstrip": false,
753
  "single_word": false,
754
  "special": true
755
  },
756
- "8090": {
757
- "content": "<extra_id_90>",
758
- "lstrip": false,
759
  "normalized": false,
760
- "rstrip": false,
761
  "single_word": false,
762
  "special": true
763
  },
764
- "8091": {
765
- "content": "<extra_id_91>",
766
- "lstrip": false,
767
  "normalized": false,
768
- "rstrip": false,
769
  "single_word": false,
770
  "special": true
771
  },
772
- "8092": {
773
- "content": "<extra_id_92>",
774
- "lstrip": false,
775
  "normalized": false,
776
- "rstrip": false,
777
  "single_word": false,
778
  "special": true
779
  },
780
- "8093": {
781
- "content": "<extra_id_93>",
782
- "lstrip": false,
783
  "normalized": false,
784
- "rstrip": false,
785
  "single_word": false,
786
  "special": true
787
  },
788
- "8094": {
789
- "content": "<extra_id_94>",
790
- "lstrip": false,
791
  "normalized": false,
792
- "rstrip": false,
793
  "single_word": false,
794
  "special": true
795
  },
796
- "8095": {
797
- "content": "<extra_id_95>",
798
- "lstrip": false,
799
  "normalized": false,
800
- "rstrip": false,
801
  "single_word": false,
802
  "special": true
803
  },
804
- "8096": {
805
- "content": "<extra_id_96>",
806
- "lstrip": false,
807
  "normalized": false,
808
- "rstrip": false,
809
  "single_word": false,
810
  "special": true
811
  },
812
- "8097": {
813
- "content": "<extra_id_97>",
814
- "lstrip": false,
815
  "normalized": false,
816
- "rstrip": false,
817
  "single_word": false,
818
  "special": true
819
  },
820
- "8098": {
821
- "content": "<extra_id_98>",
822
- "lstrip": false,
823
  "normalized": false,
824
- "rstrip": false,
825
  "single_word": false,
826
  "special": true
827
  },
828
- "8099": {
829
- "content": "<extra_id_99>",
830
- "lstrip": false,
831
  "normalized": false,
832
- "rstrip": false,
833
  "single_word": false,
834
  "special": true
835
  }
@@ -936,13 +936,14 @@
936
  "<extra_id_98>",
937
  "<extra_id_99>"
938
  ],
939
- "bos_token": "<s>",
940
  "clean_up_tokenization_spaces": false,
941
  "eos_token": "</s>",
942
  "extra_ids": 100,
943
  "extra_special_tokens": {},
 
944
  "model_max_length": 1000000000000000019884624838656,
945
  "pad_token": "<pad>",
946
- "tokenizer_class": "T5TokenizerFast",
 
947
  "unk_token": "<unk>"
948
  }
 
1
  {
2
+ "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
 
10
  "special": true
11
  },
12
  "1": {
13
+ "content": "<unk>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
 
18
  "special": true
19
  },
20
  "2": {
21
+ "content": "<s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
+ "special": false
27
  },
28
  "3": {
29
+ "content": "</s>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
+ "40000": {
37
+ "content": "<extra_id_99>",
38
+ "lstrip": true,
39
  "normalized": false,
40
+ "rstrip": true,
41
  "single_word": false,
42
  "special": true
43
  },
44
+ "40001": {
45
+ "content": "<extra_id_98>",
46
+ "lstrip": true,
47
  "normalized": false,
48
+ "rstrip": true,
49
  "single_word": false,
50
  "special": true
51
  },
52
+ "40002": {
53
+ "content": "<extra_id_97>",
54
+ "lstrip": true,
55
  "normalized": false,
56
+ "rstrip": true,
57
  "single_word": false,
58
  "special": true
59
  },
60
+ "40003": {
61
+ "content": "<extra_id_96>",
62
+ "lstrip": true,
63
  "normalized": false,
64
+ "rstrip": true,
65
  "single_word": false,
66
  "special": true
67
  },
68
+ "40004": {
69
+ "content": "<extra_id_95>",
70
+ "lstrip": true,
71
  "normalized": false,
72
+ "rstrip": true,
73
  "single_word": false,
74
  "special": true
75
  },
76
+ "40005": {
77
+ "content": "<extra_id_94>",
78
+ "lstrip": true,
79
  "normalized": false,
80
+ "rstrip": true,
81
  "single_word": false,
82
  "special": true
83
  },
84
+ "40006": {
85
+ "content": "<extra_id_93>",
86
+ "lstrip": true,
87
  "normalized": false,
88
+ "rstrip": true,
89
  "single_word": false,
90
  "special": true
91
  },
92
+ "40007": {
93
+ "content": "<extra_id_92>",
94
+ "lstrip": true,
95
  "normalized": false,
96
+ "rstrip": true,
97
  "single_word": false,
98
  "special": true
99
  },
100
+ "40008": {
101
+ "content": "<extra_id_91>",
102
+ "lstrip": true,
103
  "normalized": false,
104
+ "rstrip": true,
105
  "single_word": false,
106
  "special": true
107
  },
108
+ "40009": {
109
+ "content": "<extra_id_90>",
110
+ "lstrip": true,
111
  "normalized": false,
112
+ "rstrip": true,
113
  "single_word": false,
114
  "special": true
115
  },
116
+ "40010": {
117
+ "content": "<extra_id_89>",
118
+ "lstrip": true,
119
  "normalized": false,
120
+ "rstrip": true,
121
  "single_word": false,
122
  "special": true
123
  },
124
+ "40011": {
125
+ "content": "<extra_id_88>",
126
+ "lstrip": true,
127
  "normalized": false,
128
+ "rstrip": true,
129
  "single_word": false,
130
  "special": true
131
  },
132
+ "40012": {
133
+ "content": "<extra_id_87>",
134
+ "lstrip": true,
135
  "normalized": false,
136
+ "rstrip": true,
137
  "single_word": false,
138
  "special": true
139
  },
140
+ "40013": {
141
+ "content": "<extra_id_86>",
142
+ "lstrip": true,
143
  "normalized": false,
144
+ "rstrip": true,
145
  "single_word": false,
146
  "special": true
147
  },
148
+ "40014": {
149
+ "content": "<extra_id_85>",
150
+ "lstrip": true,
151
  "normalized": false,
152
+ "rstrip": true,
153
  "single_word": false,
154
  "special": true
155
  },
156
+ "40015": {
157
+ "content": "<extra_id_84>",
158
+ "lstrip": true,
159
  "normalized": false,
160
+ "rstrip": true,
161
  "single_word": false,
162
  "special": true
163
  },
164
+ "40016": {
165
+ "content": "<extra_id_83>",
166
+ "lstrip": true,
167
  "normalized": false,
168
+ "rstrip": true,
169
  "single_word": false,
170
  "special": true
171
  },
172
+ "40017": {
173
+ "content": "<extra_id_82>",
174
+ "lstrip": true,
175
  "normalized": false,
176
+ "rstrip": true,
177
  "single_word": false,
178
  "special": true
179
  },
180
+ "40018": {
181
+ "content": "<extra_id_81>",
182
+ "lstrip": true,
183
  "normalized": false,
184
+ "rstrip": true,
185
  "single_word": false,
186
  "special": true
187
  },
188
+ "40019": {
189
+ "content": "<extra_id_80>",
190
+ "lstrip": true,
191
  "normalized": false,
192
+ "rstrip": true,
193
  "single_word": false,
194
  "special": true
195
  },
196
+ "40020": {
197
+ "content": "<extra_id_79>",
198
+ "lstrip": true,
199
  "normalized": false,
200
+ "rstrip": true,
201
  "single_word": false,
202
  "special": true
203
  },
204
+ "40021": {
205
+ "content": "<extra_id_78>",
206
+ "lstrip": true,
207
  "normalized": false,
208
+ "rstrip": true,
209
  "single_word": false,
210
  "special": true
211
  },
212
+ "40022": {
213
+ "content": "<extra_id_77>",
214
+ "lstrip": true,
215
  "normalized": false,
216
+ "rstrip": true,
217
  "single_word": false,
218
  "special": true
219
  },
220
+ "40023": {
221
+ "content": "<extra_id_76>",
222
+ "lstrip": true,
223
  "normalized": false,
224
+ "rstrip": true,
225
  "single_word": false,
226
  "special": true
227
  },
228
+ "40024": {
229
+ "content": "<extra_id_75>",
230
+ "lstrip": true,
231
  "normalized": false,
232
+ "rstrip": true,
233
  "single_word": false,
234
  "special": true
235
  },
236
+ "40025": {
237
+ "content": "<extra_id_74>",
238
+ "lstrip": true,
239
  "normalized": false,
240
+ "rstrip": true,
241
  "single_word": false,
242
  "special": true
243
  },
244
+ "40026": {
245
+ "content": "<extra_id_73>",
246
+ "lstrip": true,
247
  "normalized": false,
248
+ "rstrip": true,
249
  "single_word": false,
250
  "special": true
251
  },
252
+ "40027": {
253
+ "content": "<extra_id_72>",
254
+ "lstrip": true,
255
  "normalized": false,
256
+ "rstrip": true,
257
  "single_word": false,
258
  "special": true
259
  },
260
+ "40028": {
261
+ "content": "<extra_id_71>",
262
+ "lstrip": true,
263
  "normalized": false,
264
+ "rstrip": true,
265
  "single_word": false,
266
  "special": true
267
  },
268
+ "40029": {
269
+ "content": "<extra_id_70>",
270
+ "lstrip": true,
271
  "normalized": false,
272
+ "rstrip": true,
273
  "single_word": false,
274
  "special": true
275
  },
276
+ "40030": {
277
+ "content": "<extra_id_69>",
278
+ "lstrip": true,
279
  "normalized": false,
280
+ "rstrip": true,
281
  "single_word": false,
282
  "special": true
283
  },
284
+ "40031": {
285
+ "content": "<extra_id_68>",
286
+ "lstrip": true,
287
  "normalized": false,
288
+ "rstrip": true,
289
  "single_word": false,
290
  "special": true
291
  },
292
+ "40032": {
293
+ "content": "<extra_id_67>",
294
+ "lstrip": true,
295
  "normalized": false,
296
+ "rstrip": true,
297
  "single_word": false,
298
  "special": true
299
  },
300
+ "40033": {
301
+ "content": "<extra_id_66>",
302
+ "lstrip": true,
303
  "normalized": false,
304
+ "rstrip": true,
305
  "single_word": false,
306
  "special": true
307
  },
308
+ "40034": {
309
+ "content": "<extra_id_65>",
310
+ "lstrip": true,
311
  "normalized": false,
312
+ "rstrip": true,
313
  "single_word": false,
314
  "special": true
315
  },
316
+ "40035": {
317
+ "content": "<extra_id_64>",
318
+ "lstrip": true,
319
  "normalized": false,
320
+ "rstrip": true,
321
  "single_word": false,
322
  "special": true
323
  },
324
+ "40036": {
325
+ "content": "<extra_id_63>",
326
+ "lstrip": true,
327
  "normalized": false,
328
+ "rstrip": true,
329
  "single_word": false,
330
  "special": true
331
  },
332
+ "40037": {
333
+ "content": "<extra_id_62>",
334
+ "lstrip": true,
335
  "normalized": false,
336
+ "rstrip": true,
337
  "single_word": false,
338
  "special": true
339
  },
340
+ "40038": {
341
+ "content": "<extra_id_61>",
342
+ "lstrip": true,
343
  "normalized": false,
344
+ "rstrip": true,
345
  "single_word": false,
346
  "special": true
347
  },
348
+ "40039": {
349
+ "content": "<extra_id_60>",
350
+ "lstrip": true,
351
  "normalized": false,
352
+ "rstrip": true,
353
  "single_word": false,
354
  "special": true
355
  },
356
+ "40040": {
357
+ "content": "<extra_id_59>",
358
+ "lstrip": true,
359
  "normalized": false,
360
+ "rstrip": true,
361
  "single_word": false,
362
  "special": true
363
  },
364
+ "40041": {
365
+ "content": "<extra_id_58>",
366
+ "lstrip": true,
367
  "normalized": false,
368
+ "rstrip": true,
369
  "single_word": false,
370
  "special": true
371
  },
372
+ "40042": {
373
+ "content": "<extra_id_57>",
374
+ "lstrip": true,
375
  "normalized": false,
376
+ "rstrip": true,
377
  "single_word": false,
378
  "special": true
379
  },
380
+ "40043": {
381
+ "content": "<extra_id_56>",
382
+ "lstrip": true,
383
  "normalized": false,
384
+ "rstrip": true,
385
  "single_word": false,
386
  "special": true
387
  },
388
+ "40044": {
389
+ "content": "<extra_id_55>",
390
+ "lstrip": true,
391
  "normalized": false,
392
+ "rstrip": true,
393
  "single_word": false,
394
  "special": true
395
  },
396
+ "40045": {
397
+ "content": "<extra_id_54>",
398
+ "lstrip": true,
399
  "normalized": false,
400
+ "rstrip": true,
401
  "single_word": false,
402
  "special": true
403
  },
404
+ "40046": {
405
+ "content": "<extra_id_53>",
406
+ "lstrip": true,
407
  "normalized": false,
408
+ "rstrip": true,
409
  "single_word": false,
410
  "special": true
411
  },
412
+ "40047": {
413
+ "content": "<extra_id_52>",
414
+ "lstrip": true,
415
  "normalized": false,
416
+ "rstrip": true,
417
  "single_word": false,
418
  "special": true
419
  },
420
+ "40048": {
421
+ "content": "<extra_id_51>",
422
+ "lstrip": true,
423
  "normalized": false,
424
+ "rstrip": true,
425
  "single_word": false,
426
  "special": true
427
  },
428
+ "40049": {
429
+ "content": "<extra_id_50>",
430
+ "lstrip": true,
431
  "normalized": false,
432
+ "rstrip": true,
433
  "single_word": false,
434
  "special": true
435
  },
436
+ "40050": {
437
+ "content": "<extra_id_49>",
438
+ "lstrip": true,
439
  "normalized": false,
440
+ "rstrip": true,
441
  "single_word": false,
442
  "special": true
443
  },
444
+ "40051": {
445
+ "content": "<extra_id_48>",
446
+ "lstrip": true,
447
  "normalized": false,
448
+ "rstrip": true,
449
  "single_word": false,
450
  "special": true
451
  },
452
+ "40052": {
453
+ "content": "<extra_id_47>",
454
+ "lstrip": true,
455
  "normalized": false,
456
+ "rstrip": true,
457
  "single_word": false,
458
  "special": true
459
  },
460
+ "40053": {
461
+ "content": "<extra_id_46>",
462
+ "lstrip": true,
463
  "normalized": false,
464
+ "rstrip": true,
465
  "single_word": false,
466
  "special": true
467
  },
468
+ "40054": {
469
+ "content": "<extra_id_45>",
470
+ "lstrip": true,
471
  "normalized": false,
472
+ "rstrip": true,
473
  "single_word": false,
474
  "special": true
475
  },
476
+ "40055": {
477
+ "content": "<extra_id_44>",
478
+ "lstrip": true,
479
  "normalized": false,
480
+ "rstrip": true,
481
  "single_word": false,
482
  "special": true
483
  },
484
+ "40056": {
485
+ "content": "<extra_id_43>",
486
+ "lstrip": true,
487
  "normalized": false,
488
+ "rstrip": true,
489
  "single_word": false,
490
  "special": true
491
  },
492
+ "40057": {
493
+ "content": "<extra_id_42>",
494
+ "lstrip": true,
495
  "normalized": false,
496
+ "rstrip": true,
497
  "single_word": false,
498
  "special": true
499
  },
500
+ "40058": {
501
+ "content": "<extra_id_41>",
502
+ "lstrip": true,
503
  "normalized": false,
504
+ "rstrip": true,
505
  "single_word": false,
506
  "special": true
507
  },
508
+ "40059": {
509
+ "content": "<extra_id_40>",
510
+ "lstrip": true,
511
  "normalized": false,
512
+ "rstrip": true,
513
  "single_word": false,
514
  "special": true
515
  },
516
+ "40060": {
517
+ "content": "<extra_id_39>",
518
+ "lstrip": true,
519
  "normalized": false,
520
+ "rstrip": true,
521
  "single_word": false,
522
  "special": true
523
  },
524
+ "40061": {
525
+ "content": "<extra_id_38>",
526
+ "lstrip": true,
527
  "normalized": false,
528
+ "rstrip": true,
529
  "single_word": false,
530
  "special": true
531
  },
532
+ "40062": {
533
+ "content": "<extra_id_37>",
534
+ "lstrip": true,
535
  "normalized": false,
536
+ "rstrip": true,
537
  "single_word": false,
538
  "special": true
539
  },
540
+ "40063": {
541
+ "content": "<extra_id_36>",
542
+ "lstrip": true,
543
  "normalized": false,
544
+ "rstrip": true,
545
  "single_word": false,
546
  "special": true
547
  },
548
+ "40064": {
549
+ "content": "<extra_id_35>",
550
+ "lstrip": true,
551
  "normalized": false,
552
+ "rstrip": true,
553
  "single_word": false,
554
  "special": true
555
  },
556
+ "40065": {
557
+ "content": "<extra_id_34>",
558
+ "lstrip": true,
559
  "normalized": false,
560
+ "rstrip": true,
561
  "single_word": false,
562
  "special": true
563
  },
564
+ "40066": {
565
+ "content": "<extra_id_33>",
566
+ "lstrip": true,
567
  "normalized": false,
568
+ "rstrip": true,
569
  "single_word": false,
570
  "special": true
571
  },
572
+ "40067": {
573
+ "content": "<extra_id_32>",
574
+ "lstrip": true,
575
  "normalized": false,
576
+ "rstrip": true,
577
  "single_word": false,
578
  "special": true
579
  },
580
+ "40068": {
581
+ "content": "<extra_id_31>",
582
+ "lstrip": true,
583
  "normalized": false,
584
+ "rstrip": true,
585
  "single_word": false,
586
  "special": true
587
  },
588
+ "40069": {
589
+ "content": "<extra_id_30>",
590
+ "lstrip": true,
591
  "normalized": false,
592
+ "rstrip": true,
593
  "single_word": false,
594
  "special": true
595
  },
596
+ "40070": {
597
+ "content": "<extra_id_29>",
598
+ "lstrip": true,
599
  "normalized": false,
600
+ "rstrip": true,
601
  "single_word": false,
602
  "special": true
603
  },
604
+ "40071": {
605
+ "content": "<extra_id_28>",
606
+ "lstrip": true,
607
  "normalized": false,
608
+ "rstrip": true,
609
  "single_word": false,
610
  "special": true
611
  },
612
+ "40072": {
613
+ "content": "<extra_id_27>",
614
+ "lstrip": true,
615
  "normalized": false,
616
+ "rstrip": true,
617
  "single_word": false,
618
  "special": true
619
  },
620
+ "40073": {
621
+ "content": "<extra_id_26>",
622
+ "lstrip": true,
623
  "normalized": false,
624
+ "rstrip": true,
625
  "single_word": false,
626
  "special": true
627
  },
628
+ "40074": {
629
+ "content": "<extra_id_25>",
630
+ "lstrip": true,
631
  "normalized": false,
632
+ "rstrip": true,
633
  "single_word": false,
634
  "special": true
635
  },
636
+ "40075": {
637
+ "content": "<extra_id_24>",
638
+ "lstrip": true,
639
  "normalized": false,
640
+ "rstrip": true,
641
  "single_word": false,
642
  "special": true
643
  },
644
+ "40076": {
645
+ "content": "<extra_id_23>",
646
+ "lstrip": true,
647
  "normalized": false,
648
+ "rstrip": true,
649
  "single_word": false,
650
  "special": true
651
  },
652
+ "40077": {
653
+ "content": "<extra_id_22>",
654
+ "lstrip": true,
655
  "normalized": false,
656
+ "rstrip": true,
657
  "single_word": false,
658
  "special": true
659
  },
660
+ "40078": {
661
+ "content": "<extra_id_21>",
662
+ "lstrip": true,
663
  "normalized": false,
664
+ "rstrip": true,
665
  "single_word": false,
666
  "special": true
667
  },
668
+ "40079": {
669
+ "content": "<extra_id_20>",
670
+ "lstrip": true,
671
  "normalized": false,
672
+ "rstrip": true,
673
  "single_word": false,
674
  "special": true
675
  },
676
+ "40080": {
677
+ "content": "<extra_id_19>",
678
+ "lstrip": true,
679
  "normalized": false,
680
+ "rstrip": true,
681
  "single_word": false,
682
  "special": true
683
  },
684
+ "40081": {
685
+ "content": "<extra_id_18>",
686
+ "lstrip": true,
687
  "normalized": false,
688
+ "rstrip": true,
689
  "single_word": false,
690
  "special": true
691
  },
692
+ "40082": {
693
+ "content": "<extra_id_17>",
694
+ "lstrip": true,
695
  "normalized": false,
696
+ "rstrip": true,
697
  "single_word": false,
698
  "special": true
699
  },
700
+ "40083": {
701
+ "content": "<extra_id_16>",
702
+ "lstrip": true,
703
  "normalized": false,
704
+ "rstrip": true,
705
  "single_word": false,
706
  "special": true
707
  },
708
+ "40084": {
709
+ "content": "<extra_id_15>",
710
+ "lstrip": true,
711
  "normalized": false,
712
+ "rstrip": true,
713
  "single_word": false,
714
  "special": true
715
  },
716
+ "40085": {
717
+ "content": "<extra_id_14>",
718
+ "lstrip": true,
719
  "normalized": false,
720
+ "rstrip": true,
721
  "single_word": false,
722
  "special": true
723
  },
724
+ "40086": {
725
+ "content": "<extra_id_13>",
726
+ "lstrip": true,
727
  "normalized": false,
728
+ "rstrip": true,
729
  "single_word": false,
730
  "special": true
731
  },
732
+ "40087": {
733
+ "content": "<extra_id_12>",
734
+ "lstrip": true,
735
  "normalized": false,
736
+ "rstrip": true,
737
  "single_word": false,
738
  "special": true
739
  },
740
+ "40088": {
741
+ "content": "<extra_id_11>",
742
+ "lstrip": true,
743
  "normalized": false,
744
+ "rstrip": true,
745
  "single_word": false,
746
  "special": true
747
  },
748
+ "40089": {
749
+ "content": "<extra_id_10>",
750
+ "lstrip": true,
751
  "normalized": false,
752
+ "rstrip": true,
753
  "single_word": false,
754
  "special": true
755
  },
756
+ "40090": {
757
+ "content": "<extra_id_9>",
758
+ "lstrip": true,
759
  "normalized": false,
760
+ "rstrip": true,
761
  "single_word": false,
762
  "special": true
763
  },
764
+ "40091": {
765
+ "content": "<extra_id_8>",
766
+ "lstrip": true,
767
  "normalized": false,
768
+ "rstrip": true,
769
  "single_word": false,
770
  "special": true
771
  },
772
+ "40092": {
773
+ "content": "<extra_id_7>",
774
+ "lstrip": true,
775
  "normalized": false,
776
+ "rstrip": true,
777
  "single_word": false,
778
  "special": true
779
  },
780
+ "40093": {
781
+ "content": "<extra_id_6>",
782
+ "lstrip": true,
783
  "normalized": false,
784
+ "rstrip": true,
785
  "single_word": false,
786
  "special": true
787
  },
788
+ "40094": {
789
+ "content": "<extra_id_5>",
790
+ "lstrip": true,
791
  "normalized": false,
792
+ "rstrip": true,
793
  "single_word": false,
794
  "special": true
795
  },
796
+ "40095": {
797
+ "content": "<extra_id_4>",
798
+ "lstrip": true,
799
  "normalized": false,
800
+ "rstrip": true,
801
  "single_word": false,
802
  "special": true
803
  },
804
+ "40096": {
805
+ "content": "<extra_id_3>",
806
+ "lstrip": true,
807
  "normalized": false,
808
+ "rstrip": true,
809
  "single_word": false,
810
  "special": true
811
  },
812
+ "40097": {
813
+ "content": "<extra_id_2>",
814
+ "lstrip": true,
815
  "normalized": false,
816
+ "rstrip": true,
817
  "single_word": false,
818
  "special": true
819
  },
820
+ "40098": {
821
+ "content": "<extra_id_1>",
822
+ "lstrip": true,
823
  "normalized": false,
824
+ "rstrip": true,
825
  "single_word": false,
826
  "special": true
827
  },
828
+ "40099": {
829
+ "content": "<extra_id_0>",
830
+ "lstrip": true,
831
  "normalized": false,
832
+ "rstrip": true,
833
  "single_word": false,
834
  "special": true
835
  }
 
936
  "<extra_id_98>",
937
  "<extra_id_99>"
938
  ],
 
939
  "clean_up_tokenization_spaces": false,
940
  "eos_token": "</s>",
941
  "extra_ids": 100,
942
  "extra_special_tokens": {},
943
+ "legacy": true,
944
  "model_max_length": 1000000000000000019884624838656,
945
  "pad_token": "<pad>",
946
+ "sp_model_kwargs": {},
947
+ "tokenizer_class": "T5Tokenizer",
948
  "unk_token": "<unk>"
949
  }
checkpoints/checkpoint-46670/trainer_state.json ADDED
@@ -0,0 +1,685 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.9999357202545478,
6
+ "eval_steps": 500,
7
+ "global_step": 46670,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02142658181740267,
14
+ "grad_norm": 0.4460393786430359,
15
+ "learning_rate": 4.946539532890508e-05,
16
+ "loss": 2.2597,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.04285316363480534,
21
+ "grad_norm": 0.4649476110935211,
22
+ "learning_rate": 4.8929719305763877e-05,
23
+ "loss": 2.0158,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.064279745452208,
28
+ "grad_norm": 0.5113596320152283,
29
+ "learning_rate": 4.839404328262267e-05,
30
+ "loss": 1.9311,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.08570632726961068,
35
+ "grad_norm": 0.49711284041404724,
36
+ "learning_rate": 4.785836725948147e-05,
37
+ "loss": 1.8587,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.10713290908701335,
42
+ "grad_norm": 0.6695660352706909,
43
+ "learning_rate": 4.732269123634027e-05,
44
+ "loss": 1.8219,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.128559490904416,
49
+ "grad_norm": 0.5876463651657104,
50
+ "learning_rate": 4.678701521319906e-05,
51
+ "loss": 1.7645,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.1499860727218187,
56
+ "grad_norm": 0.554977297782898,
57
+ "learning_rate": 4.625133919005786e-05,
58
+ "loss": 1.7136,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.17141265453922136,
63
+ "grad_norm": 0.5401861667633057,
64
+ "learning_rate": 4.571566316691665e-05,
65
+ "loss": 1.696,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.19283923635662403,
70
+ "grad_norm": 0.5951809287071228,
71
+ "learning_rate": 4.517998714377545e-05,
72
+ "loss": 1.6519,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.2142658181740267,
77
+ "grad_norm": 0.6037197113037109,
78
+ "learning_rate": 4.464431112063424e-05,
79
+ "loss": 1.6279,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.23569239999142938,
84
+ "grad_norm": 0.6542425751686096,
85
+ "learning_rate": 4.410863509749304e-05,
86
+ "loss": 1.6104,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.257118981808832,
91
+ "grad_norm": 0.5781915783882141,
92
+ "learning_rate": 4.357295907435183e-05,
93
+ "loss": 1.5976,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.2785455636262347,
98
+ "grad_norm": 0.575715184211731,
99
+ "learning_rate": 4.303728305121063e-05,
100
+ "loss": 1.5373,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.2999721454436374,
105
+ "grad_norm": 0.6789396405220032,
106
+ "learning_rate": 4.2501607028069424e-05,
107
+ "loss": 1.5435,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.3213987272610401,
112
+ "grad_norm": 0.6032198667526245,
113
+ "learning_rate": 4.196593100492822e-05,
114
+ "loss": 1.5081,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.3428253090784427,
119
+ "grad_norm": 0.6690296530723572,
120
+ "learning_rate": 4.1430254981787015e-05,
121
+ "loss": 1.5059,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.36425189089584536,
126
+ "grad_norm": 0.6951475739479065,
127
+ "learning_rate": 4.0894578958645814e-05,
128
+ "loss": 1.4716,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.38567847271324807,
133
+ "grad_norm": 0.7095356583595276,
134
+ "learning_rate": 4.035890293550461e-05,
135
+ "loss": 1.455,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.4071050545306507,
140
+ "grad_norm": 0.8375953435897827,
141
+ "learning_rate": 3.9823226912363405e-05,
142
+ "loss": 1.4527,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.4285316363480534,
147
+ "grad_norm": 0.8582206964492798,
148
+ "learning_rate": 3.9287550889222204e-05,
149
+ "loss": 1.4345,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.44995821816545606,
154
+ "grad_norm": 0.8591541051864624,
155
+ "learning_rate": 3.8751874866081e-05,
156
+ "loss": 1.4268,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.47138479998285876,
161
+ "grad_norm": 0.835361897945404,
162
+ "learning_rate": 3.8216198842939796e-05,
163
+ "loss": 1.4045,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.4928113818002614,
168
+ "grad_norm": 0.9994288086891174,
169
+ "learning_rate": 3.768052281979859e-05,
170
+ "loss": 1.3965,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.514237963617664,
175
+ "grad_norm": 0.8310408592224121,
176
+ "learning_rate": 3.714484679665739e-05,
177
+ "loss": 1.3577,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.5356645454350667,
182
+ "grad_norm": 1.061630368232727,
183
+ "learning_rate": 3.660917077351618e-05,
184
+ "loss": 1.3311,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.5570911272524695,
189
+ "grad_norm": 1.0928676128387451,
190
+ "learning_rate": 3.607349475037498e-05,
191
+ "loss": 1.3203,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 0.5785177090698721,
196
+ "grad_norm": 1.4662648439407349,
197
+ "learning_rate": 3.553781872723377e-05,
198
+ "loss": 1.2778,
199
+ "step": 13500
200
+ },
201
+ {
202
+ "epoch": 0.5999442908872747,
203
+ "grad_norm": 1.2254618406295776,
204
+ "learning_rate": 3.500214270409257e-05,
205
+ "loss": 1.2572,
206
+ "step": 14000
207
+ },
208
+ {
209
+ "epoch": 0.6213708727046774,
210
+ "grad_norm": 1.275029182434082,
211
+ "learning_rate": 3.446860938504393e-05,
212
+ "loss": 1.2707,
213
+ "step": 14500
214
+ },
215
+ {
216
+ "epoch": 0.6427974545220801,
217
+ "grad_norm": 1.3197990655899048,
218
+ "learning_rate": 3.3932933361902726e-05,
219
+ "loss": 1.2308,
220
+ "step": 15000
221
+ },
222
+ {
223
+ "epoch": 0.6642240363394828,
224
+ "grad_norm": 1.934342861175537,
225
+ "learning_rate": 3.339725733876152e-05,
226
+ "loss": 1.2268,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 0.6856506181568854,
231
+ "grad_norm": 1.445302128791809,
232
+ "learning_rate": 3.286158131562032e-05,
233
+ "loss": 1.2007,
234
+ "step": 16000
235
+ },
236
+ {
237
+ "epoch": 0.7070771999742881,
238
+ "grad_norm": 1.2702606916427612,
239
+ "learning_rate": 3.232697664452539e-05,
240
+ "loss": 1.1785,
241
+ "step": 16500
242
+ },
243
+ {
244
+ "epoch": 0.7285037817916907,
245
+ "grad_norm": 1.254937767982483,
246
+ "learning_rate": 3.179130062138419e-05,
247
+ "loss": 1.1616,
248
+ "step": 17000
249
+ },
250
+ {
251
+ "epoch": 0.7499303636090935,
252
+ "grad_norm": 1.3628320693969727,
253
+ "learning_rate": 3.1255624598242984e-05,
254
+ "loss": 1.1352,
255
+ "step": 17500
256
+ },
257
+ {
258
+ "epoch": 0.7713569454264961,
259
+ "grad_norm": 3.0748207569122314,
260
+ "learning_rate": 3.071994857510178e-05,
261
+ "loss": 1.1243,
262
+ "step": 18000
263
+ },
264
+ {
265
+ "epoch": 0.7927835272438988,
266
+ "grad_norm": 1.4262775182724,
267
+ "learning_rate": 3.0185343904006858e-05,
268
+ "loss": 1.1011,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 0.8142101090613014,
273
+ "grad_norm": 2.2326557636260986,
274
+ "learning_rate": 2.9650739232911933e-05,
275
+ "loss": 1.0974,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 0.8356366908787041,
280
+ "grad_norm": 2.791055679321289,
281
+ "learning_rate": 2.9115063209770732e-05,
282
+ "loss": 1.0762,
283
+ "step": 19500
284
+ },
285
+ {
286
+ "epoch": 0.8570632726961068,
287
+ "grad_norm": 3.0886242389678955,
288
+ "learning_rate": 2.857938718662953e-05,
289
+ "loss": 1.0698,
290
+ "step": 20000
291
+ },
292
+ {
293
+ "epoch": 0.8784898545135095,
294
+ "grad_norm": 1.4234964847564697,
295
+ "learning_rate": 2.8043711163488323e-05,
296
+ "loss": 1.0541,
297
+ "step": 20500
298
+ },
299
+ {
300
+ "epoch": 0.8999164363309121,
301
+ "grad_norm": 3.275305986404419,
302
+ "learning_rate": 2.75091064923934e-05,
303
+ "loss": 1.0396,
304
+ "step": 21000
305
+ },
306
+ {
307
+ "epoch": 0.9213430181483148,
308
+ "grad_norm": 1.6365532875061035,
309
+ "learning_rate": 2.6973430469252198e-05,
310
+ "loss": 1.0146,
311
+ "step": 21500
312
+ },
313
+ {
314
+ "epoch": 0.9427695999657175,
315
+ "grad_norm": 3.1808159351348877,
316
+ "learning_rate": 2.6437754446110997e-05,
317
+ "loss": 0.9997,
318
+ "step": 22000
319
+ },
320
+ {
321
+ "epoch": 0.9641961817831202,
322
+ "grad_norm": 1.8644700050354004,
323
+ "learning_rate": 2.590207842296979e-05,
324
+ "loss": 0.9905,
325
+ "step": 22500
326
+ },
327
+ {
328
+ "epoch": 0.9856227636005228,
329
+ "grad_norm": 1.5963947772979736,
330
+ "learning_rate": 2.5367473751874864e-05,
331
+ "loss": 0.9624,
332
+ "step": 23000
333
+ },
334
+ {
335
+ "epoch": 1.007027918836108,
336
+ "grad_norm": 1.5474953651428223,
337
+ "learning_rate": 2.4831797728733663e-05,
338
+ "loss": 0.958,
339
+ "step": 23500
340
+ },
341
+ {
342
+ "epoch": 1.0284545006535109,
343
+ "grad_norm": 3.8564932346343994,
344
+ "learning_rate": 2.429612170559246e-05,
345
+ "loss": 0.9388,
346
+ "step": 24000
347
+ },
348
+ {
349
+ "epoch": 1.0498810824709135,
350
+ "grad_norm": 2.154879331588745,
351
+ "learning_rate": 2.3760445682451254e-05,
352
+ "loss": 0.9132,
353
+ "step": 24500
354
+ },
355
+ {
356
+ "epoch": 1.0713076642883161,
357
+ "grad_norm": 4.118302822113037,
358
+ "learning_rate": 2.3225841011356333e-05,
359
+ "loss": 0.9132,
360
+ "step": 25000
361
+ },
362
+ {
363
+ "epoch": 1.0927342461057188,
364
+ "grad_norm": 2.0971686840057373,
365
+ "learning_rate": 2.2690164988215128e-05,
366
+ "loss": 0.8947,
367
+ "step": 25500
368
+ },
369
+ {
370
+ "epoch": 1.1141608279231214,
371
+ "grad_norm": 3.480602741241455,
372
+ "learning_rate": 2.2154488965073924e-05,
373
+ "loss": 0.8922,
374
+ "step": 26000
375
+ },
376
+ {
377
+ "epoch": 1.135587409740524,
378
+ "grad_norm": 4.0732879638671875,
379
+ "learning_rate": 2.161881294193272e-05,
380
+ "loss": 0.87,
381
+ "step": 26500
382
+ },
383
+ {
384
+ "epoch": 1.1570139915579267,
385
+ "grad_norm": 2.2193567752838135,
386
+ "learning_rate": 2.1084208270837798e-05,
387
+ "loss": 0.8476,
388
+ "step": 27000
389
+ },
390
+ {
391
+ "epoch": 1.1784405733753294,
392
+ "grad_norm": 2.1723575592041016,
393
+ "learning_rate": 2.0548532247696594e-05,
394
+ "loss": 0.8322,
395
+ "step": 27500
396
+ },
397
+ {
398
+ "epoch": 1.199867155192732,
399
+ "grad_norm": 3.3503777980804443,
400
+ "learning_rate": 2.001285622455539e-05,
401
+ "loss": 0.8171,
402
+ "step": 28000
403
+ },
404
+ {
405
+ "epoch": 1.2212937370101349,
406
+ "grad_norm": 3.6080329418182373,
407
+ "learning_rate": 1.9477180201414185e-05,
408
+ "loss": 0.8105,
409
+ "step": 28500
410
+ },
411
+ {
412
+ "epoch": 1.2427203188275375,
413
+ "grad_norm": 2.708294630050659,
414
+ "learning_rate": 1.894150417827298e-05,
415
+ "loss": 0.7808,
416
+ "step": 29000
417
+ },
418
+ {
419
+ "epoch": 1.2641469006449402,
420
+ "grad_norm": 3.300328493118286,
421
+ "learning_rate": 1.8405828155131776e-05,
422
+ "loss": 0.7774,
423
+ "step": 29500
424
+ },
425
+ {
426
+ "epoch": 1.2855734824623428,
427
+ "grad_norm": 2.1943624019622803,
428
+ "learning_rate": 1.7870152131990572e-05,
429
+ "loss": 0.7637,
430
+ "step": 30000
431
+ },
432
+ {
433
+ "epoch": 1.3070000642797455,
434
+ "grad_norm": 2.7251927852630615,
435
+ "learning_rate": 1.733554746089565e-05,
436
+ "loss": 0.7481,
437
+ "step": 30500
438
+ },
439
+ {
440
+ "epoch": 1.3284266460971481,
441
+ "grad_norm": 4.672070026397705,
442
+ "learning_rate": 1.6799871437754446e-05,
443
+ "loss": 0.7255,
444
+ "step": 31000
445
+ },
446
+ {
447
+ "epoch": 1.3498532279145508,
448
+ "grad_norm": 2.4984779357910156,
449
+ "learning_rate": 1.626419541461324e-05,
450
+ "loss": 0.7231,
451
+ "step": 31500
452
+ },
453
+ {
454
+ "epoch": 1.3712798097319534,
455
+ "grad_norm": 1.9306334257125854,
456
+ "learning_rate": 1.5728519391472037e-05,
457
+ "loss": 0.7032,
458
+ "step": 32000
459
+ },
460
+ {
461
+ "epoch": 1.392706391549356,
462
+ "grad_norm": 3.015226125717163,
463
+ "learning_rate": 1.5192843368330834e-05,
464
+ "loss": 0.6923,
465
+ "step": 32500
466
+ },
467
+ {
468
+ "epoch": 1.4141329733667587,
469
+ "grad_norm": 3.646979331970215,
470
+ "learning_rate": 1.4658238697235913e-05,
471
+ "loss": 0.6893,
472
+ "step": 33000
473
+ },
474
+ {
475
+ "epoch": 1.4355595551841613,
476
+ "grad_norm": 1.8951635360717773,
477
+ "learning_rate": 1.4122562674094708e-05,
478
+ "loss": 0.6693,
479
+ "step": 33500
480
+ },
481
+ {
482
+ "epoch": 1.4569861370015642,
483
+ "grad_norm": 2.3496530055999756,
484
+ "learning_rate": 1.3586886650953504e-05,
485
+ "loss": 0.6586,
486
+ "step": 34000
487
+ },
488
+ {
489
+ "epoch": 1.4784127188189669,
490
+ "grad_norm": 2.899231195449829,
491
+ "learning_rate": 1.30512106278123e-05,
492
+ "loss": 0.6338,
493
+ "step": 34500
494
+ },
495
+ {
496
+ "epoch": 1.4998393006363695,
497
+ "grad_norm": 2.7465250492095947,
498
+ "learning_rate": 1.2515534604671095e-05,
499
+ "loss": 0.6291,
500
+ "step": 35000
501
+ },
502
+ {
503
+ "epoch": 1.5212658824537721,
504
+ "grad_norm": 2.3695950508117676,
505
+ "learning_rate": 1.1979858581529891e-05,
506
+ "loss": 0.6192,
507
+ "step": 35500
508
+ },
509
+ {
510
+ "epoch": 1.5426924642711748,
511
+ "grad_norm": 3.0361921787261963,
512
+ "learning_rate": 1.1444182558388687e-05,
513
+ "loss": 0.6147,
514
+ "step": 36000
515
+ },
516
+ {
517
+ "epoch": 1.5641190460885774,
518
+ "grad_norm": 3.5043649673461914,
519
+ "learning_rate": 1.0908506535247482e-05,
520
+ "loss": 0.6024,
521
+ "step": 36500
522
+ },
523
+ {
524
+ "epoch": 1.5855456279059803,
525
+ "grad_norm": 2.1166698932647705,
526
+ "learning_rate": 1.037390186415256e-05,
527
+ "loss": 0.6033,
528
+ "step": 37000
529
+ },
530
+ {
531
+ "epoch": 1.606972209723383,
532
+ "grad_norm": 2.8012290000915527,
533
+ "learning_rate": 9.838225841011358e-06,
534
+ "loss": 0.5935,
535
+ "step": 37500
536
+ },
537
+ {
538
+ "epoch": 1.6283987915407856,
539
+ "grad_norm": 2.49540638923645,
540
+ "learning_rate": 9.302549817870154e-06,
541
+ "loss": 0.5819,
542
+ "step": 38000
543
+ },
544
+ {
545
+ "epoch": 1.6498253733581882,
546
+ "grad_norm": 2.233430862426758,
547
+ "learning_rate": 8.766873794728948e-06,
548
+ "loss": 0.5674,
549
+ "step": 38500
550
+ },
551
+ {
552
+ "epoch": 1.6712519551755909,
553
+ "grad_norm": 2.823101282119751,
554
+ "learning_rate": 8.232269123634026e-06,
555
+ "loss": 0.552,
556
+ "step": 39000
557
+ },
558
+ {
559
+ "epoch": 1.6926785369929935,
560
+ "grad_norm": 2.389265775680542,
561
+ "learning_rate": 7.696593100492823e-06,
562
+ "loss": 0.5564,
563
+ "step": 39500
564
+ },
565
+ {
566
+ "epoch": 1.7141051188103962,
567
+ "grad_norm": 2.1136868000030518,
568
+ "learning_rate": 7.161988429397901e-06,
569
+ "loss": 0.5477,
570
+ "step": 40000
571
+ },
572
+ {
573
+ "epoch": 1.7355317006277988,
574
+ "grad_norm": 2.6307172775268555,
575
+ "learning_rate": 6.626312406256696e-06,
576
+ "loss": 0.545,
577
+ "step": 40500
578
+ },
579
+ {
580
+ "epoch": 1.7569582824452015,
581
+ "grad_norm": 3.4348325729370117,
582
+ "learning_rate": 6.090636383115492e-06,
583
+ "loss": 0.5359,
584
+ "step": 41000
585
+ },
586
+ {
587
+ "epoch": 1.7783848642626041,
588
+ "grad_norm": 2.1959691047668457,
589
+ "learning_rate": 5.554960359974288e-06,
590
+ "loss": 0.5267,
591
+ "step": 41500
592
+ },
593
+ {
594
+ "epoch": 1.7998114460800068,
595
+ "grad_norm": 2.747068405151367,
596
+ "learning_rate": 5.0192843368330835e-06,
597
+ "loss": 0.5316,
598
+ "step": 42000
599
+ },
600
+ {
601
+ "epoch": 1.8212380278974094,
602
+ "grad_norm": 2.965233325958252,
603
+ "learning_rate": 4.48360831369188e-06,
604
+ "loss": 0.5129,
605
+ "step": 42500
606
+ },
607
+ {
608
+ "epoch": 1.842664609714812,
609
+ "grad_norm": 2.3425612449645996,
610
+ "learning_rate": 3.9479322905506756e-06,
611
+ "loss": 0.5058,
612
+ "step": 43000
613
+ },
614
+ {
615
+ "epoch": 1.864091191532215,
616
+ "grad_norm": 1.5650664567947388,
617
+ "learning_rate": 3.4122562674094708e-06,
618
+ "loss": 0.5155,
619
+ "step": 43500
620
+ },
621
+ {
622
+ "epoch": 1.8855177733496176,
623
+ "grad_norm": 2.501758098602295,
624
+ "learning_rate": 2.8776515963145492e-06,
625
+ "loss": 0.5076,
626
+ "step": 44000
627
+ },
628
+ {
629
+ "epoch": 1.9069443551670202,
630
+ "grad_norm": 4.431907653808594,
631
+ "learning_rate": 2.341975573173345e-06,
632
+ "loss": 0.5119,
633
+ "step": 44500
634
+ },
635
+ {
636
+ "epoch": 1.9283709369844229,
637
+ "grad_norm": 3.3445732593536377,
638
+ "learning_rate": 1.8062995500321405e-06,
639
+ "loss": 0.5021,
640
+ "step": 45000
641
+ },
642
+ {
643
+ "epoch": 1.9497975188018255,
644
+ "grad_norm": 2.6224589347839355,
645
+ "learning_rate": 1.2706235268909363e-06,
646
+ "loss": 0.5019,
647
+ "step": 45500
648
+ },
649
+ {
650
+ "epoch": 1.9712241006192284,
651
+ "grad_norm": 2.954880952835083,
652
+ "learning_rate": 7.360188557960147e-07,
653
+ "loss": 0.5055,
654
+ "step": 46000
655
+ },
656
+ {
657
+ "epoch": 1.992650682436631,
658
+ "grad_norm": 2.903310775756836,
659
+ "learning_rate": 2.003428326548104e-07,
660
+ "loss": 0.5043,
661
+ "step": 46500
662
+ }
663
+ ],
664
+ "logging_steps": 500,
665
+ "max_steps": 46670,
666
+ "num_input_tokens_seen": 0,
667
+ "num_train_epochs": 2,
668
+ "save_steps": 500,
669
+ "stateful_callbacks": {
670
+ "TrainerControl": {
671
+ "args": {
672
+ "should_epoch_stop": false,
673
+ "should_evaluate": false,
674
+ "should_log": false,
675
+ "should_save": true,
676
+ "should_training_stop": true
677
+ },
678
+ "attributes": {}
679
+ }
680
+ },
681
+ "total_flos": 1.0106114424805786e+17,
682
+ "train_batch_size": 32,
683
+ "trial_name": null,
684
+ "trial_params": null
685
+ }
checkpoints/{checkpoint-62228 → checkpoint-46670}/training_args.bin RENAMED
File without changes
checkpoints/checkpoint-61500/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-61500/trainer_state.json DELETED
@@ -1,895 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 1.9766021726553964,
6
- "eval_steps": 500,
7
- "global_step": 61500,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.016069936363052,
14
- "grad_norm": 0.2569522559642792,
15
- "learning_rate": 4.960307257183262e-05,
16
- "loss": 2.9119,
17
- "step": 500
18
- },
19
- {
20
- "epoch": 0.032139872726104,
21
- "grad_norm": 0.26731985807418823,
22
- "learning_rate": 4.9201324162756315e-05,
23
- "loss": 2.2886,
24
- "step": 1000
25
- },
26
- {
27
- "epoch": 0.04820980908915601,
28
- "grad_norm": 0.3099210560321808,
29
- "learning_rate": 4.8799575753680014e-05,
30
- "loss": 2.1431,
31
- "step": 1500
32
- },
33
- {
34
- "epoch": 0.064279745452208,
35
- "grad_norm": 0.28836730122566223,
36
- "learning_rate": 4.839782734460372e-05,
37
- "loss": 2.0369,
38
- "step": 2000
39
- },
40
- {
41
- "epoch": 0.08034968181526002,
42
- "grad_norm": 0.4808545708656311,
43
- "learning_rate": 4.799607893552742e-05,
44
- "loss": 1.932,
45
- "step": 2500
46
- },
47
- {
48
- "epoch": 0.09641961817831202,
49
- "grad_norm": 0.38000208139419556,
50
- "learning_rate": 4.759433052645112e-05,
51
- "loss": 1.7766,
52
- "step": 3000
53
- },
54
- {
55
- "epoch": 0.11248955454136401,
56
- "grad_norm": 0.4310196340084076,
57
- "learning_rate": 4.7192582117374816e-05,
58
- "loss": 1.6022,
59
- "step": 3500
60
- },
61
- {
62
- "epoch": 0.128559490904416,
63
- "grad_norm": 0.40425005555152893,
64
- "learning_rate": 4.6790833708298515e-05,
65
- "loss": 1.4576,
66
- "step": 4000
67
- },
68
- {
69
- "epoch": 0.14462942726746802,
70
- "grad_norm": 0.3811793327331543,
71
- "learning_rate": 4.638908529922222e-05,
72
- "loss": 1.3384,
73
- "step": 4500
74
- },
75
- {
76
- "epoch": 0.16069936363052004,
77
- "grad_norm": 0.38943949341773987,
78
- "learning_rate": 4.598733689014591e-05,
79
- "loss": 1.2233,
80
- "step": 5000
81
- },
82
- {
83
- "epoch": 0.17676929999357202,
84
- "grad_norm": 0.5517480373382568,
85
- "learning_rate": 4.558558848106962e-05,
86
- "loss": 1.1342,
87
- "step": 5500
88
- },
89
- {
90
- "epoch": 0.19283923635662403,
91
- "grad_norm": 0.4235232174396515,
92
- "learning_rate": 4.518384007199332e-05,
93
- "loss": 1.0432,
94
- "step": 6000
95
- },
96
- {
97
- "epoch": 0.20890917271967602,
98
- "grad_norm": 0.4617592692375183,
99
- "learning_rate": 4.478209166291702e-05,
100
- "loss": 0.9781,
101
- "step": 6500
102
- },
103
- {
104
- "epoch": 0.22497910908272803,
105
- "grad_norm": 0.5447149872779846,
106
- "learning_rate": 4.4380343253840714e-05,
107
- "loss": 0.927,
108
- "step": 7000
109
- },
110
- {
111
- "epoch": 0.24104904544578004,
112
- "grad_norm": 0.4740816354751587,
113
- "learning_rate": 4.397859484476441e-05,
114
- "loss": 0.8674,
115
- "step": 7500
116
- },
117
- {
118
- "epoch": 0.257118981808832,
119
- "grad_norm": 0.5207423567771912,
120
- "learning_rate": 4.357684643568812e-05,
121
- "loss": 0.8149,
122
- "step": 8000
123
- },
124
- {
125
- "epoch": 0.27318891817188407,
126
- "grad_norm": 0.47738897800445557,
127
- "learning_rate": 4.317509802661182e-05,
128
- "loss": 0.7685,
129
- "step": 8500
130
- },
131
- {
132
- "epoch": 0.28925885453493605,
133
- "grad_norm": 0.4176841676235199,
134
- "learning_rate": 4.2773349617535516e-05,
135
- "loss": 0.7119,
136
- "step": 9000
137
- },
138
- {
139
- "epoch": 0.30532879089798803,
140
- "grad_norm": 0.381345272064209,
141
- "learning_rate": 4.2371601208459215e-05,
142
- "loss": 0.6682,
143
- "step": 9500
144
- },
145
- {
146
- "epoch": 0.3213987272610401,
147
- "grad_norm": 0.6301918625831604,
148
- "learning_rate": 4.1969852799382914e-05,
149
- "loss": 0.6505,
150
- "step": 10000
151
- },
152
- {
153
- "epoch": 0.33746866362409206,
154
- "grad_norm": 0.4057278335094452,
155
- "learning_rate": 4.156810439030662e-05,
156
- "loss": 0.6063,
157
- "step": 10500
158
- },
159
- {
160
- "epoch": 0.35353859998714404,
161
- "grad_norm": 0.5442121624946594,
162
- "learning_rate": 4.116635598123031e-05,
163
- "loss": 0.5735,
164
- "step": 11000
165
- },
166
- {
167
- "epoch": 0.369608536350196,
168
- "grad_norm": 0.5113051533699036,
169
- "learning_rate": 4.076460757215402e-05,
170
- "loss": 0.5432,
171
- "step": 11500
172
- },
173
- {
174
- "epoch": 0.38567847271324807,
175
- "grad_norm": 0.6383316516876221,
176
- "learning_rate": 4.0362859163077716e-05,
177
- "loss": 0.5143,
178
- "step": 12000
179
- },
180
- {
181
- "epoch": 0.40174840907630005,
182
- "grad_norm": 0.4316321611404419,
183
- "learning_rate": 3.996111075400142e-05,
184
- "loss": 0.4867,
185
- "step": 12500
186
- },
187
- {
188
- "epoch": 0.41781834543935203,
189
- "grad_norm": 0.42703017592430115,
190
- "learning_rate": 3.955936234492511e-05,
191
- "loss": 0.4614,
192
- "step": 13000
193
- },
194
- {
195
- "epoch": 0.4338882818024041,
196
- "grad_norm": 0.4263227880001068,
197
- "learning_rate": 3.915761393584881e-05,
198
- "loss": 0.4391,
199
- "step": 13500
200
- },
201
- {
202
- "epoch": 0.44995821816545606,
203
- "grad_norm": 0.47577473521232605,
204
- "learning_rate": 3.875586552677252e-05,
205
- "loss": 0.4241,
206
- "step": 14000
207
- },
208
- {
209
- "epoch": 0.46602815452850804,
210
- "grad_norm": 0.3419073224067688,
211
- "learning_rate": 3.8354117117696216e-05,
212
- "loss": 0.4019,
213
- "step": 14500
214
- },
215
- {
216
- "epoch": 0.4820980908915601,
217
- "grad_norm": 0.3402538001537323,
218
- "learning_rate": 3.7952368708619915e-05,
219
- "loss": 0.3876,
220
- "step": 15000
221
- },
222
- {
223
- "epoch": 0.49816802725461207,
224
- "grad_norm": 0.7072747349739075,
225
- "learning_rate": 3.7550620299543614e-05,
226
- "loss": 0.364,
227
- "step": 15500
228
- },
229
- {
230
- "epoch": 0.514237963617664,
231
- "grad_norm": 0.31305554509162903,
232
- "learning_rate": 3.714887189046731e-05,
233
- "loss": 0.3463,
234
- "step": 16000
235
- },
236
- {
237
- "epoch": 0.530307899980716,
238
- "grad_norm": 0.4203876554965973,
239
- "learning_rate": 3.674792697820917e-05,
240
- "loss": 0.3371,
241
- "step": 16500
242
- },
243
- {
244
- "epoch": 0.5463778363437681,
245
- "grad_norm": 0.49149152636528015,
246
- "learning_rate": 3.634617856913286e-05,
247
- "loss": 0.3189,
248
- "step": 17000
249
- },
250
- {
251
- "epoch": 0.5624477727068201,
252
- "grad_norm": 0.6438118815422058,
253
- "learning_rate": 3.594443016005657e-05,
254
- "loss": 0.3074,
255
- "step": 17500
256
- },
257
- {
258
- "epoch": 0.5785177090698721,
259
- "grad_norm": 0.6619039177894592,
260
- "learning_rate": 3.554268175098027e-05,
261
- "loss": 0.2989,
262
- "step": 18000
263
- },
264
- {
265
- "epoch": 0.5945876454329241,
266
- "grad_norm": 0.39272341132164,
267
- "learning_rate": 3.514093334190397e-05,
268
- "loss": 0.2818,
269
- "step": 18500
270
- },
271
- {
272
- "epoch": 0.6106575817959761,
273
- "grad_norm": 0.3980565369129181,
274
- "learning_rate": 3.473998842964582e-05,
275
- "loss": 0.273,
276
- "step": 19000
277
- },
278
- {
279
- "epoch": 0.626727518159028,
280
- "grad_norm": 0.3052268922328949,
281
- "learning_rate": 3.4338240020569516e-05,
282
- "loss": 0.2677,
283
- "step": 19500
284
- },
285
- {
286
- "epoch": 0.6427974545220801,
287
- "grad_norm": 0.5999760031700134,
288
- "learning_rate": 3.3937295108311374e-05,
289
- "loss": 0.2572,
290
- "step": 20000
291
- },
292
- {
293
- "epoch": 0.6588673908851321,
294
- "grad_norm": 0.4283508062362671,
295
- "learning_rate": 3.3536350196053226e-05,
296
- "loss": 0.2468,
297
- "step": 20500
298
- },
299
- {
300
- "epoch": 0.6749373272481841,
301
- "grad_norm": 0.4289894700050354,
302
- "learning_rate": 3.3134601786976924e-05,
303
- "loss": 0.2414,
304
- "step": 21000
305
- },
306
- {
307
- "epoch": 0.6910072636112361,
308
- "grad_norm": 0.26386120915412903,
309
- "learning_rate": 3.273285337790062e-05,
310
- "loss": 0.2422,
311
- "step": 21500
312
- },
313
- {
314
- "epoch": 0.7070771999742881,
315
- "grad_norm": 0.41095244884490967,
316
- "learning_rate": 3.233110496882433e-05,
317
- "loss": 0.2282,
318
- "step": 22000
319
- },
320
- {
321
- "epoch": 0.7231471363373401,
322
- "grad_norm": 0.29514652490615845,
323
- "learning_rate": 3.192935655974803e-05,
324
- "loss": 0.2252,
325
- "step": 22500
326
- },
327
- {
328
- "epoch": 0.739217072700392,
329
- "grad_norm": 0.4044126570224762,
330
- "learning_rate": 3.152760815067172e-05,
331
- "loss": 0.2211,
332
- "step": 23000
333
- },
334
- {
335
- "epoch": 0.7552870090634441,
336
- "grad_norm": 0.3767038881778717,
337
- "learning_rate": 3.1125859741595425e-05,
338
- "loss": 0.2115,
339
- "step": 23500
340
- },
341
- {
342
- "epoch": 0.7713569454264961,
343
- "grad_norm": 0.36812517046928406,
344
- "learning_rate": 3.0724111332519124e-05,
345
- "loss": 0.2059,
346
- "step": 24000
347
- },
348
- {
349
- "epoch": 0.7874268817895481,
350
- "grad_norm": 0.3709106147289276,
351
- "learning_rate": 3.0322362923442826e-05,
352
- "loss": 0.2035,
353
- "step": 24500
354
- },
355
- {
356
- "epoch": 0.8034968181526001,
357
- "grad_norm": 0.3285115361213684,
358
- "learning_rate": 2.9920614514366525e-05,
359
- "loss": 0.1993,
360
- "step": 25000
361
- },
362
- {
363
- "epoch": 0.8195667545156521,
364
- "grad_norm": 0.3229790925979614,
365
- "learning_rate": 2.9518866105290227e-05,
366
- "loss": 0.1968,
367
- "step": 25500
368
- },
369
- {
370
- "epoch": 0.8356366908787041,
371
- "grad_norm": 0.37397509813308716,
372
- "learning_rate": 2.9117117696213926e-05,
373
- "loss": 0.194,
374
- "step": 26000
375
- },
376
- {
377
- "epoch": 0.8517066272417562,
378
- "grad_norm": 0.33143311738967896,
379
- "learning_rate": 2.871536928713762e-05,
380
- "loss": 0.1875,
381
- "step": 26500
382
- },
383
- {
384
- "epoch": 0.8677765636048081,
385
- "grad_norm": 0.2748125493526459,
386
- "learning_rate": 2.8313620878061327e-05,
387
- "loss": 0.1854,
388
- "step": 27000
389
- },
390
- {
391
- "epoch": 0.8838464999678601,
392
- "grad_norm": 0.2606910169124603,
393
- "learning_rate": 2.7911872468985022e-05,
394
- "loss": 0.1809,
395
- "step": 27500
396
- },
397
- {
398
- "epoch": 0.8999164363309121,
399
- "grad_norm": 0.28182655572891235,
400
- "learning_rate": 2.7510124059908728e-05,
401
- "loss": 0.1815,
402
- "step": 28000
403
- },
404
- {
405
- "epoch": 0.9159863726939641,
406
- "grad_norm": 0.3056446313858032,
407
- "learning_rate": 2.7109179147650576e-05,
408
- "loss": 0.1775,
409
- "step": 28500
410
- },
411
- {
412
- "epoch": 0.9320563090570161,
413
- "grad_norm": 0.2458430379629135,
414
- "learning_rate": 2.6707430738574275e-05,
415
- "loss": 0.1714,
416
- "step": 29000
417
- },
418
- {
419
- "epoch": 0.9481262454200682,
420
- "grad_norm": 0.2681204080581665,
421
- "learning_rate": 2.6305682329497977e-05,
422
- "loss": 0.1734,
423
- "step": 29500
424
- },
425
- {
426
- "epoch": 0.9641961817831202,
427
- "grad_norm": 0.38170355558395386,
428
- "learning_rate": 2.5903933920421676e-05,
429
- "loss": 0.1701,
430
- "step": 30000
431
- },
432
- {
433
- "epoch": 0.9802661181461721,
434
- "grad_norm": 0.43841251730918884,
435
- "learning_rate": 2.550298900816353e-05,
436
- "loss": 0.1656,
437
- "step": 30500
438
- },
439
- {
440
- "epoch": 0.9963360545092241,
441
- "grad_norm": 0.4082754850387573,
442
- "learning_rate": 2.510124059908723e-05,
443
- "loss": 0.1649,
444
- "step": 31000
445
- },
446
- {
447
- "epoch": 1.0124059908722762,
448
- "grad_norm": 0.27510714530944824,
449
- "learning_rate": 2.4699492190010928e-05,
450
- "loss": 0.1636,
451
- "step": 31500
452
- },
453
- {
454
- "epoch": 1.028475927235328,
455
- "grad_norm": 0.3550429344177246,
456
- "learning_rate": 2.429774378093463e-05,
457
- "loss": 0.1615,
458
- "step": 32000
459
- },
460
- {
461
- "epoch": 1.0445458635983802,
462
- "grad_norm": 0.382055401802063,
463
- "learning_rate": 2.389599537185833e-05,
464
- "loss": 0.1597,
465
- "step": 32500
466
- },
467
- {
468
- "epoch": 1.060615799961432,
469
- "grad_norm": 0.38698843121528625,
470
- "learning_rate": 2.349424696278203e-05,
471
- "loss": 0.155,
472
- "step": 33000
473
- },
474
- {
475
- "epoch": 1.0766857363244842,
476
- "grad_norm": 0.380403995513916,
477
- "learning_rate": 2.309249855370573e-05,
478
- "loss": 0.1594,
479
- "step": 33500
480
- },
481
- {
482
- "epoch": 1.0927556726875363,
483
- "grad_norm": 0.17210371792316437,
484
- "learning_rate": 2.269155364144758e-05,
485
- "loss": 0.1543,
486
- "step": 34000
487
- },
488
- {
489
- "epoch": 1.1088256090505881,
490
- "grad_norm": 0.33378392457962036,
491
- "learning_rate": 2.228980523237128e-05,
492
- "loss": 0.1549,
493
- "step": 34500
494
- },
495
- {
496
- "epoch": 1.1248955454136402,
497
- "grad_norm": 0.282175213098526,
498
- "learning_rate": 2.1888056823294982e-05,
499
- "loss": 0.1509,
500
- "step": 35000
501
- },
502
- {
503
- "epoch": 1.140965481776692,
504
- "grad_norm": 0.4829972982406616,
505
- "learning_rate": 2.148630841421868e-05,
506
- "loss": 0.1508,
507
- "step": 35500
508
- },
509
- {
510
- "epoch": 1.1570354181397442,
511
- "grad_norm": 0.4101378321647644,
512
- "learning_rate": 2.1084560005142383e-05,
513
- "loss": 0.1487,
514
- "step": 36000
515
- },
516
- {
517
- "epoch": 1.173105354502796,
518
- "grad_norm": 0.24467173218727112,
519
- "learning_rate": 2.0682811596066082e-05,
520
- "loss": 0.1482,
521
- "step": 36500
522
- },
523
- {
524
- "epoch": 1.1891752908658482,
525
- "grad_norm": 0.2552469074726105,
526
- "learning_rate": 2.028106318698978e-05,
527
- "loss": 0.1474,
528
- "step": 37000
529
- },
530
- {
531
- "epoch": 1.2052452272289003,
532
- "grad_norm": 0.33155035972595215,
533
- "learning_rate": 1.987931477791348e-05,
534
- "loss": 0.1427,
535
- "step": 37500
536
- },
537
- {
538
- "epoch": 1.2213151635919521,
539
- "grad_norm": 0.41133707761764526,
540
- "learning_rate": 1.9478369865655334e-05,
541
- "loss": 0.143,
542
- "step": 38000
543
- },
544
- {
545
- "epoch": 1.2373850999550042,
546
- "grad_norm": 0.36144211888313293,
547
- "learning_rate": 1.9076621456579033e-05,
548
- "loss": 0.1387,
549
- "step": 38500
550
- },
551
- {
552
- "epoch": 1.253455036318056,
553
- "grad_norm": 0.36597776412963867,
554
- "learning_rate": 1.8674873047502732e-05,
555
- "loss": 0.1415,
556
- "step": 39000
557
- },
558
- {
559
- "epoch": 1.2695249726811082,
560
- "grad_norm": 0.37640953063964844,
561
- "learning_rate": 1.8273124638426434e-05,
562
- "loss": 0.1408,
563
- "step": 39500
564
- },
565
- {
566
- "epoch": 1.28559490904416,
567
- "grad_norm": 0.22886815667152405,
568
- "learning_rate": 1.7872983222986438e-05,
569
- "loss": 0.1366,
570
- "step": 40000
571
- },
572
- {
573
- "epoch": 1.3016648454072122,
574
- "grad_norm": 0.44980695843696594,
575
- "learning_rate": 1.7471234813910137e-05,
576
- "loss": 0.1411,
577
- "step": 40500
578
- },
579
- {
580
- "epoch": 1.3177347817702643,
581
- "grad_norm": 0.46285852789878845,
582
- "learning_rate": 1.706948640483384e-05,
583
- "loss": 0.1367,
584
- "step": 41000
585
- },
586
- {
587
- "epoch": 1.3338047181333161,
588
- "grad_norm": 0.1757335215806961,
589
- "learning_rate": 1.6667737995757538e-05,
590
- "loss": 0.1361,
591
- "step": 41500
592
- },
593
- {
594
- "epoch": 1.3498746544963682,
595
- "grad_norm": 0.28056710958480835,
596
- "learning_rate": 1.6265989586681236e-05,
597
- "loss": 0.1371,
598
- "step": 42000
599
- },
600
- {
601
- "epoch": 1.3659445908594203,
602
- "grad_norm": 0.4234681725502014,
603
- "learning_rate": 1.586424117760494e-05,
604
- "loss": 0.1363,
605
- "step": 42500
606
- },
607
- {
608
- "epoch": 1.3820145272224722,
609
- "grad_norm": 0.2925218641757965,
610
- "learning_rate": 1.5462492768528637e-05,
611
- "loss": 0.1336,
612
- "step": 43000
613
- },
614
- {
615
- "epoch": 1.398084463585524,
616
- "grad_norm": 0.23110254108905792,
617
- "learning_rate": 1.5060744359452336e-05,
618
- "loss": 0.1305,
619
- "step": 43500
620
- },
621
- {
622
- "epoch": 1.4141543999485762,
623
- "grad_norm": 0.4187003970146179,
624
- "learning_rate": 1.4659799447194189e-05,
625
- "loss": 0.1374,
626
- "step": 44000
627
- },
628
- {
629
- "epoch": 1.4302243363116283,
630
- "grad_norm": 0.30868059396743774,
631
- "learning_rate": 1.425805103811789e-05,
632
- "loss": 0.1332,
633
- "step": 44500
634
- },
635
- {
636
- "epoch": 1.4462942726746801,
637
- "grad_norm": 0.24373352527618408,
638
- "learning_rate": 1.385630262904159e-05,
639
- "loss": 0.133,
640
- "step": 45000
641
- },
642
- {
643
- "epoch": 1.4623642090377322,
644
- "grad_norm": 0.3976458013057709,
645
- "learning_rate": 1.345455421996529e-05,
646
- "loss": 0.1317,
647
- "step": 45500
648
- },
649
- {
650
- "epoch": 1.4784341454007843,
651
- "grad_norm": 0.15130922198295593,
652
- "learning_rate": 1.3053609307707144e-05,
653
- "loss": 0.1294,
654
- "step": 46000
655
- },
656
- {
657
- "epoch": 1.4945040817638362,
658
- "grad_norm": 0.26361921429634094,
659
- "learning_rate": 1.2652664395448997e-05,
660
- "loss": 0.1316,
661
- "step": 46500
662
- },
663
- {
664
- "epoch": 1.510574018126888,
665
- "grad_norm": 0.3039293587207794,
666
- "learning_rate": 1.2250915986372695e-05,
667
- "loss": 0.1294,
668
- "step": 47000
669
- },
670
- {
671
- "epoch": 1.5266439544899402,
672
- "grad_norm": 0.23085398972034454,
673
- "learning_rate": 1.1849167577296394e-05,
674
- "loss": 0.1304,
675
- "step": 47500
676
- },
677
- {
678
- "epoch": 1.5427138908529923,
679
- "grad_norm": 0.45066356658935547,
680
- "learning_rate": 1.1447419168220095e-05,
681
- "loss": 0.1283,
682
- "step": 48000
683
- },
684
- {
685
- "epoch": 1.5587838272160441,
686
- "grad_norm": 0.2428194135427475,
687
- "learning_rate": 1.1045670759143795e-05,
688
- "loss": 0.1279,
689
- "step": 48500
690
- },
691
- {
692
- "epoch": 1.5748537635790962,
693
- "grad_norm": 0.15587645769119263,
694
- "learning_rate": 1.0643922350067494e-05,
695
- "loss": 0.1273,
696
- "step": 49000
697
- },
698
- {
699
- "epoch": 1.5909236999421483,
700
- "grad_norm": 0.5055563449859619,
701
- "learning_rate": 1.0242977437809347e-05,
702
- "loss": 0.127,
703
- "step": 49500
704
- },
705
- {
706
- "epoch": 1.6069936363052002,
707
- "grad_norm": 0.31220686435699463,
708
- "learning_rate": 9.841229028733047e-06,
709
- "loss": 0.1284,
710
- "step": 50000
711
- },
712
- {
713
- "epoch": 1.623063572668252,
714
- "grad_norm": 0.3776426613330841,
715
- "learning_rate": 9.439480619656748e-06,
716
- "loss": 0.1251,
717
- "step": 50500
718
- },
719
- {
720
- "epoch": 1.6391335090313044,
721
- "grad_norm": 0.2834898829460144,
722
- "learning_rate": 9.037732210580447e-06,
723
- "loss": 0.1226,
724
- "step": 51000
725
- },
726
- {
727
- "epoch": 1.6552034453943563,
728
- "grad_norm": 0.2295331507921219,
729
- "learning_rate": 8.635983801504147e-06,
730
- "loss": 0.1233,
731
- "step": 51500
732
- },
733
- {
734
- "epoch": 1.6712733817574081,
735
- "grad_norm": 0.22921015322208405,
736
- "learning_rate": 8.234235392427848e-06,
737
- "loss": 0.1256,
738
- "step": 52000
739
- },
740
- {
741
- "epoch": 1.6873433181204602,
742
- "grad_norm": 0.3294677138328552,
743
- "learning_rate": 7.832486983351546e-06,
744
- "loss": 0.1257,
745
- "step": 52500
746
- },
747
- {
748
- "epoch": 1.7034132544835123,
749
- "grad_norm": 0.21186766028404236,
750
- "learning_rate": 7.430738574275246e-06,
751
- "loss": 0.1254,
752
- "step": 53000
753
- },
754
- {
755
- "epoch": 1.7194831908465642,
756
- "grad_norm": 0.43346577882766724,
757
- "learning_rate": 7.029793662017099e-06,
758
- "loss": 0.1228,
759
- "step": 53500
760
- },
761
- {
762
- "epoch": 1.7355531272096163,
763
- "grad_norm": 0.20274986326694489,
764
- "learning_rate": 6.628045252940798e-06,
765
- "loss": 0.124,
766
- "step": 54000
767
- },
768
- {
769
- "epoch": 1.7516230635726684,
770
- "grad_norm": 0.2912587523460388,
771
- "learning_rate": 6.2262968438644984e-06,
772
- "loss": 0.1236,
773
- "step": 54500
774
- },
775
- {
776
- "epoch": 1.7676929999357203,
777
- "grad_norm": 0.5663316249847412,
778
- "learning_rate": 5.824548434788198e-06,
779
- "loss": 0.1236,
780
- "step": 55000
781
- },
782
- {
783
- "epoch": 1.7837629362987721,
784
- "grad_norm": 0.2563399076461792,
785
- "learning_rate": 5.423603522530051e-06,
786
- "loss": 0.1241,
787
- "step": 55500
788
- },
789
- {
790
- "epoch": 1.7998328726618242,
791
- "grad_norm": 0.26923516392707825,
792
- "learning_rate": 5.022658610271903e-06,
793
- "loss": 0.1231,
794
- "step": 56000
795
- },
796
- {
797
- "epoch": 1.8159028090248763,
798
- "grad_norm": 0.15516141057014465,
799
- "learning_rate": 4.620910201195604e-06,
800
- "loss": 0.1225,
801
- "step": 56500
802
- },
803
- {
804
- "epoch": 1.8319727453879282,
805
- "grad_norm": 0.1603991985321045,
806
- "learning_rate": 4.219161792119303e-06,
807
- "loss": 0.1236,
808
- "step": 57000
809
- },
810
- {
811
- "epoch": 1.8480426817509803,
812
- "grad_norm": 0.3031301498413086,
813
- "learning_rate": 3.817413383043003e-06,
814
- "loss": 0.124,
815
- "step": 57500
816
- },
817
- {
818
- "epoch": 1.8641126181140324,
819
- "grad_norm": 0.25160399079322815,
820
- "learning_rate": 3.4156649739667035e-06,
821
- "loss": 0.1212,
822
- "step": 58000
823
- },
824
- {
825
- "epoch": 1.8801825544770843,
826
- "grad_norm": 0.23327353596687317,
827
- "learning_rate": 3.013916564890403e-06,
828
- "loss": 0.1199,
829
- "step": 58500
830
- },
831
- {
832
- "epoch": 1.8962524908401361,
833
- "grad_norm": 0.23530858755111694,
834
- "learning_rate": 2.6129716526322558e-06,
835
- "loss": 0.1228,
836
- "step": 59000
837
- },
838
- {
839
- "epoch": 1.9123224272031882,
840
- "grad_norm": 0.20596709847450256,
841
- "learning_rate": 2.211223243555956e-06,
842
- "loss": 0.1205,
843
- "step": 59500
844
- },
845
- {
846
- "epoch": 1.9283923635662403,
847
- "grad_norm": 0.35043200850486755,
848
- "learning_rate": 1.8094748344796555e-06,
849
- "loss": 0.1188,
850
- "step": 60000
851
- },
852
- {
853
- "epoch": 1.9444622999292922,
854
- "grad_norm": 0.21463052928447723,
855
- "learning_rate": 1.4077264254033555e-06,
856
- "loss": 0.1225,
857
- "step": 60500
858
- },
859
- {
860
- "epoch": 1.9605322362923443,
861
- "grad_norm": 0.27506574988365173,
862
- "learning_rate": 1.0059780163270554e-06,
863
- "loss": 0.1233,
864
- "step": 61000
865
- },
866
- {
867
- "epoch": 1.9766021726553964,
868
- "grad_norm": 0.3260590732097626,
869
- "learning_rate": 6.042296072507553e-07,
870
- "loss": 0.1218,
871
- "step": 61500
872
- }
873
- ],
874
- "logging_steps": 500,
875
- "max_steps": 62228,
876
- "num_input_tokens_seen": 0,
877
- "num_train_epochs": 2,
878
- "save_steps": 500,
879
- "stateful_callbacks": {
880
- "TrainerControl": {
881
- "args": {
882
- "should_epoch_stop": false,
883
- "should_evaluate": false,
884
- "should_log": false,
885
- "should_save": true,
886
- "should_training_stop": false
887
- },
888
- "attributes": {}
889
- }
890
- },
891
- "total_flos": 1.3317606196484506e+17,
892
- "train_batch_size": 32,
893
- "trial_name": null,
894
- "trial_params": null
895
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-62000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-62000/trainer_state.json DELETED
@@ -1,902 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 1.9926721090184483,
6
- "eval_steps": 500,
7
- "global_step": 62000,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.016069936363052,
14
- "grad_norm": 0.2569522559642792,
15
- "learning_rate": 4.960307257183262e-05,
16
- "loss": 2.9119,
17
- "step": 500
18
- },
19
- {
20
- "epoch": 0.032139872726104,
21
- "grad_norm": 0.26731985807418823,
22
- "learning_rate": 4.9201324162756315e-05,
23
- "loss": 2.2886,
24
- "step": 1000
25
- },
26
- {
27
- "epoch": 0.04820980908915601,
28
- "grad_norm": 0.3099210560321808,
29
- "learning_rate": 4.8799575753680014e-05,
30
- "loss": 2.1431,
31
- "step": 1500
32
- },
33
- {
34
- "epoch": 0.064279745452208,
35
- "grad_norm": 0.28836730122566223,
36
- "learning_rate": 4.839782734460372e-05,
37
- "loss": 2.0369,
38
- "step": 2000
39
- },
40
- {
41
- "epoch": 0.08034968181526002,
42
- "grad_norm": 0.4808545708656311,
43
- "learning_rate": 4.799607893552742e-05,
44
- "loss": 1.932,
45
- "step": 2500
46
- },
47
- {
48
- "epoch": 0.09641961817831202,
49
- "grad_norm": 0.38000208139419556,
50
- "learning_rate": 4.759433052645112e-05,
51
- "loss": 1.7766,
52
- "step": 3000
53
- },
54
- {
55
- "epoch": 0.11248955454136401,
56
- "grad_norm": 0.4310196340084076,
57
- "learning_rate": 4.7192582117374816e-05,
58
- "loss": 1.6022,
59
- "step": 3500
60
- },
61
- {
62
- "epoch": 0.128559490904416,
63
- "grad_norm": 0.40425005555152893,
64
- "learning_rate": 4.6790833708298515e-05,
65
- "loss": 1.4576,
66
- "step": 4000
67
- },
68
- {
69
- "epoch": 0.14462942726746802,
70
- "grad_norm": 0.3811793327331543,
71
- "learning_rate": 4.638908529922222e-05,
72
- "loss": 1.3384,
73
- "step": 4500
74
- },
75
- {
76
- "epoch": 0.16069936363052004,
77
- "grad_norm": 0.38943949341773987,
78
- "learning_rate": 4.598733689014591e-05,
79
- "loss": 1.2233,
80
- "step": 5000
81
- },
82
- {
83
- "epoch": 0.17676929999357202,
84
- "grad_norm": 0.5517480373382568,
85
- "learning_rate": 4.558558848106962e-05,
86
- "loss": 1.1342,
87
- "step": 5500
88
- },
89
- {
90
- "epoch": 0.19283923635662403,
91
- "grad_norm": 0.4235232174396515,
92
- "learning_rate": 4.518384007199332e-05,
93
- "loss": 1.0432,
94
- "step": 6000
95
- },
96
- {
97
- "epoch": 0.20890917271967602,
98
- "grad_norm": 0.4617592692375183,
99
- "learning_rate": 4.478209166291702e-05,
100
- "loss": 0.9781,
101
- "step": 6500
102
- },
103
- {
104
- "epoch": 0.22497910908272803,
105
- "grad_norm": 0.5447149872779846,
106
- "learning_rate": 4.4380343253840714e-05,
107
- "loss": 0.927,
108
- "step": 7000
109
- },
110
- {
111
- "epoch": 0.24104904544578004,
112
- "grad_norm": 0.4740816354751587,
113
- "learning_rate": 4.397859484476441e-05,
114
- "loss": 0.8674,
115
- "step": 7500
116
- },
117
- {
118
- "epoch": 0.257118981808832,
119
- "grad_norm": 0.5207423567771912,
120
- "learning_rate": 4.357684643568812e-05,
121
- "loss": 0.8149,
122
- "step": 8000
123
- },
124
- {
125
- "epoch": 0.27318891817188407,
126
- "grad_norm": 0.47738897800445557,
127
- "learning_rate": 4.317509802661182e-05,
128
- "loss": 0.7685,
129
- "step": 8500
130
- },
131
- {
132
- "epoch": 0.28925885453493605,
133
- "grad_norm": 0.4176841676235199,
134
- "learning_rate": 4.2773349617535516e-05,
135
- "loss": 0.7119,
136
- "step": 9000
137
- },
138
- {
139
- "epoch": 0.30532879089798803,
140
- "grad_norm": 0.381345272064209,
141
- "learning_rate": 4.2371601208459215e-05,
142
- "loss": 0.6682,
143
- "step": 9500
144
- },
145
- {
146
- "epoch": 0.3213987272610401,
147
- "grad_norm": 0.6301918625831604,
148
- "learning_rate": 4.1969852799382914e-05,
149
- "loss": 0.6505,
150
- "step": 10000
151
- },
152
- {
153
- "epoch": 0.33746866362409206,
154
- "grad_norm": 0.4057278335094452,
155
- "learning_rate": 4.156810439030662e-05,
156
- "loss": 0.6063,
157
- "step": 10500
158
- },
159
- {
160
- "epoch": 0.35353859998714404,
161
- "grad_norm": 0.5442121624946594,
162
- "learning_rate": 4.116635598123031e-05,
163
- "loss": 0.5735,
164
- "step": 11000
165
- },
166
- {
167
- "epoch": 0.369608536350196,
168
- "grad_norm": 0.5113051533699036,
169
- "learning_rate": 4.076460757215402e-05,
170
- "loss": 0.5432,
171
- "step": 11500
172
- },
173
- {
174
- "epoch": 0.38567847271324807,
175
- "grad_norm": 0.6383316516876221,
176
- "learning_rate": 4.0362859163077716e-05,
177
- "loss": 0.5143,
178
- "step": 12000
179
- },
180
- {
181
- "epoch": 0.40174840907630005,
182
- "grad_norm": 0.4316321611404419,
183
- "learning_rate": 3.996111075400142e-05,
184
- "loss": 0.4867,
185
- "step": 12500
186
- },
187
- {
188
- "epoch": 0.41781834543935203,
189
- "grad_norm": 0.42703017592430115,
190
- "learning_rate": 3.955936234492511e-05,
191
- "loss": 0.4614,
192
- "step": 13000
193
- },
194
- {
195
- "epoch": 0.4338882818024041,
196
- "grad_norm": 0.4263227880001068,
197
- "learning_rate": 3.915761393584881e-05,
198
- "loss": 0.4391,
199
- "step": 13500
200
- },
201
- {
202
- "epoch": 0.44995821816545606,
203
- "grad_norm": 0.47577473521232605,
204
- "learning_rate": 3.875586552677252e-05,
205
- "loss": 0.4241,
206
- "step": 14000
207
- },
208
- {
209
- "epoch": 0.46602815452850804,
210
- "grad_norm": 0.3419073224067688,
211
- "learning_rate": 3.8354117117696216e-05,
212
- "loss": 0.4019,
213
- "step": 14500
214
- },
215
- {
216
- "epoch": 0.4820980908915601,
217
- "grad_norm": 0.3402538001537323,
218
- "learning_rate": 3.7952368708619915e-05,
219
- "loss": 0.3876,
220
- "step": 15000
221
- },
222
- {
223
- "epoch": 0.49816802725461207,
224
- "grad_norm": 0.7072747349739075,
225
- "learning_rate": 3.7550620299543614e-05,
226
- "loss": 0.364,
227
- "step": 15500
228
- },
229
- {
230
- "epoch": 0.514237963617664,
231
- "grad_norm": 0.31305554509162903,
232
- "learning_rate": 3.714887189046731e-05,
233
- "loss": 0.3463,
234
- "step": 16000
235
- },
236
- {
237
- "epoch": 0.530307899980716,
238
- "grad_norm": 0.4203876554965973,
239
- "learning_rate": 3.674792697820917e-05,
240
- "loss": 0.3371,
241
- "step": 16500
242
- },
243
- {
244
- "epoch": 0.5463778363437681,
245
- "grad_norm": 0.49149152636528015,
246
- "learning_rate": 3.634617856913286e-05,
247
- "loss": 0.3189,
248
- "step": 17000
249
- },
250
- {
251
- "epoch": 0.5624477727068201,
252
- "grad_norm": 0.6438118815422058,
253
- "learning_rate": 3.594443016005657e-05,
254
- "loss": 0.3074,
255
- "step": 17500
256
- },
257
- {
258
- "epoch": 0.5785177090698721,
259
- "grad_norm": 0.6619039177894592,
260
- "learning_rate": 3.554268175098027e-05,
261
- "loss": 0.2989,
262
- "step": 18000
263
- },
264
- {
265
- "epoch": 0.5945876454329241,
266
- "grad_norm": 0.39272341132164,
267
- "learning_rate": 3.514093334190397e-05,
268
- "loss": 0.2818,
269
- "step": 18500
270
- },
271
- {
272
- "epoch": 0.6106575817959761,
273
- "grad_norm": 0.3980565369129181,
274
- "learning_rate": 3.473998842964582e-05,
275
- "loss": 0.273,
276
- "step": 19000
277
- },
278
- {
279
- "epoch": 0.626727518159028,
280
- "grad_norm": 0.3052268922328949,
281
- "learning_rate": 3.4338240020569516e-05,
282
- "loss": 0.2677,
283
- "step": 19500
284
- },
285
- {
286
- "epoch": 0.6427974545220801,
287
- "grad_norm": 0.5999760031700134,
288
- "learning_rate": 3.3937295108311374e-05,
289
- "loss": 0.2572,
290
- "step": 20000
291
- },
292
- {
293
- "epoch": 0.6588673908851321,
294
- "grad_norm": 0.4283508062362671,
295
- "learning_rate": 3.3536350196053226e-05,
296
- "loss": 0.2468,
297
- "step": 20500
298
- },
299
- {
300
- "epoch": 0.6749373272481841,
301
- "grad_norm": 0.4289894700050354,
302
- "learning_rate": 3.3134601786976924e-05,
303
- "loss": 0.2414,
304
- "step": 21000
305
- },
306
- {
307
- "epoch": 0.6910072636112361,
308
- "grad_norm": 0.26386120915412903,
309
- "learning_rate": 3.273285337790062e-05,
310
- "loss": 0.2422,
311
- "step": 21500
312
- },
313
- {
314
- "epoch": 0.7070771999742881,
315
- "grad_norm": 0.41095244884490967,
316
- "learning_rate": 3.233110496882433e-05,
317
- "loss": 0.2282,
318
- "step": 22000
319
- },
320
- {
321
- "epoch": 0.7231471363373401,
322
- "grad_norm": 0.29514652490615845,
323
- "learning_rate": 3.192935655974803e-05,
324
- "loss": 0.2252,
325
- "step": 22500
326
- },
327
- {
328
- "epoch": 0.739217072700392,
329
- "grad_norm": 0.4044126570224762,
330
- "learning_rate": 3.152760815067172e-05,
331
- "loss": 0.2211,
332
- "step": 23000
333
- },
334
- {
335
- "epoch": 0.7552870090634441,
336
- "grad_norm": 0.3767038881778717,
337
- "learning_rate": 3.1125859741595425e-05,
338
- "loss": 0.2115,
339
- "step": 23500
340
- },
341
- {
342
- "epoch": 0.7713569454264961,
343
- "grad_norm": 0.36812517046928406,
344
- "learning_rate": 3.0724111332519124e-05,
345
- "loss": 0.2059,
346
- "step": 24000
347
- },
348
- {
349
- "epoch": 0.7874268817895481,
350
- "grad_norm": 0.3709106147289276,
351
- "learning_rate": 3.0322362923442826e-05,
352
- "loss": 0.2035,
353
- "step": 24500
354
- },
355
- {
356
- "epoch": 0.8034968181526001,
357
- "grad_norm": 0.3285115361213684,
358
- "learning_rate": 2.9920614514366525e-05,
359
- "loss": 0.1993,
360
- "step": 25000
361
- },
362
- {
363
- "epoch": 0.8195667545156521,
364
- "grad_norm": 0.3229790925979614,
365
- "learning_rate": 2.9518866105290227e-05,
366
- "loss": 0.1968,
367
- "step": 25500
368
- },
369
- {
370
- "epoch": 0.8356366908787041,
371
- "grad_norm": 0.37397509813308716,
372
- "learning_rate": 2.9117117696213926e-05,
373
- "loss": 0.194,
374
- "step": 26000
375
- },
376
- {
377
- "epoch": 0.8517066272417562,
378
- "grad_norm": 0.33143311738967896,
379
- "learning_rate": 2.871536928713762e-05,
380
- "loss": 0.1875,
381
- "step": 26500
382
- },
383
- {
384
- "epoch": 0.8677765636048081,
385
- "grad_norm": 0.2748125493526459,
386
- "learning_rate": 2.8313620878061327e-05,
387
- "loss": 0.1854,
388
- "step": 27000
389
- },
390
- {
391
- "epoch": 0.8838464999678601,
392
- "grad_norm": 0.2606910169124603,
393
- "learning_rate": 2.7911872468985022e-05,
394
- "loss": 0.1809,
395
- "step": 27500
396
- },
397
- {
398
- "epoch": 0.8999164363309121,
399
- "grad_norm": 0.28182655572891235,
400
- "learning_rate": 2.7510124059908728e-05,
401
- "loss": 0.1815,
402
- "step": 28000
403
- },
404
- {
405
- "epoch": 0.9159863726939641,
406
- "grad_norm": 0.3056446313858032,
407
- "learning_rate": 2.7109179147650576e-05,
408
- "loss": 0.1775,
409
- "step": 28500
410
- },
411
- {
412
- "epoch": 0.9320563090570161,
413
- "grad_norm": 0.2458430379629135,
414
- "learning_rate": 2.6707430738574275e-05,
415
- "loss": 0.1714,
416
- "step": 29000
417
- },
418
- {
419
- "epoch": 0.9481262454200682,
420
- "grad_norm": 0.2681204080581665,
421
- "learning_rate": 2.6305682329497977e-05,
422
- "loss": 0.1734,
423
- "step": 29500
424
- },
425
- {
426
- "epoch": 0.9641961817831202,
427
- "grad_norm": 0.38170355558395386,
428
- "learning_rate": 2.5903933920421676e-05,
429
- "loss": 0.1701,
430
- "step": 30000
431
- },
432
- {
433
- "epoch": 0.9802661181461721,
434
- "grad_norm": 0.43841251730918884,
435
- "learning_rate": 2.550298900816353e-05,
436
- "loss": 0.1656,
437
- "step": 30500
438
- },
439
- {
440
- "epoch": 0.9963360545092241,
441
- "grad_norm": 0.4082754850387573,
442
- "learning_rate": 2.510124059908723e-05,
443
- "loss": 0.1649,
444
- "step": 31000
445
- },
446
- {
447
- "epoch": 1.0124059908722762,
448
- "grad_norm": 0.27510714530944824,
449
- "learning_rate": 2.4699492190010928e-05,
450
- "loss": 0.1636,
451
- "step": 31500
452
- },
453
- {
454
- "epoch": 1.028475927235328,
455
- "grad_norm": 0.3550429344177246,
456
- "learning_rate": 2.429774378093463e-05,
457
- "loss": 0.1615,
458
- "step": 32000
459
- },
460
- {
461
- "epoch": 1.0445458635983802,
462
- "grad_norm": 0.382055401802063,
463
- "learning_rate": 2.389599537185833e-05,
464
- "loss": 0.1597,
465
- "step": 32500
466
- },
467
- {
468
- "epoch": 1.060615799961432,
469
- "grad_norm": 0.38698843121528625,
470
- "learning_rate": 2.349424696278203e-05,
471
- "loss": 0.155,
472
- "step": 33000
473
- },
474
- {
475
- "epoch": 1.0766857363244842,
476
- "grad_norm": 0.380403995513916,
477
- "learning_rate": 2.309249855370573e-05,
478
- "loss": 0.1594,
479
- "step": 33500
480
- },
481
- {
482
- "epoch": 1.0927556726875363,
483
- "grad_norm": 0.17210371792316437,
484
- "learning_rate": 2.269155364144758e-05,
485
- "loss": 0.1543,
486
- "step": 34000
487
- },
488
- {
489
- "epoch": 1.1088256090505881,
490
- "grad_norm": 0.33378392457962036,
491
- "learning_rate": 2.228980523237128e-05,
492
- "loss": 0.1549,
493
- "step": 34500
494
- },
495
- {
496
- "epoch": 1.1248955454136402,
497
- "grad_norm": 0.282175213098526,
498
- "learning_rate": 2.1888056823294982e-05,
499
- "loss": 0.1509,
500
- "step": 35000
501
- },
502
- {
503
- "epoch": 1.140965481776692,
504
- "grad_norm": 0.4829972982406616,
505
- "learning_rate": 2.148630841421868e-05,
506
- "loss": 0.1508,
507
- "step": 35500
508
- },
509
- {
510
- "epoch": 1.1570354181397442,
511
- "grad_norm": 0.4101378321647644,
512
- "learning_rate": 2.1084560005142383e-05,
513
- "loss": 0.1487,
514
- "step": 36000
515
- },
516
- {
517
- "epoch": 1.173105354502796,
518
- "grad_norm": 0.24467173218727112,
519
- "learning_rate": 2.0682811596066082e-05,
520
- "loss": 0.1482,
521
- "step": 36500
522
- },
523
- {
524
- "epoch": 1.1891752908658482,
525
- "grad_norm": 0.2552469074726105,
526
- "learning_rate": 2.028106318698978e-05,
527
- "loss": 0.1474,
528
- "step": 37000
529
- },
530
- {
531
- "epoch": 1.2052452272289003,
532
- "grad_norm": 0.33155035972595215,
533
- "learning_rate": 1.987931477791348e-05,
534
- "loss": 0.1427,
535
- "step": 37500
536
- },
537
- {
538
- "epoch": 1.2213151635919521,
539
- "grad_norm": 0.41133707761764526,
540
- "learning_rate": 1.9478369865655334e-05,
541
- "loss": 0.143,
542
- "step": 38000
543
- },
544
- {
545
- "epoch": 1.2373850999550042,
546
- "grad_norm": 0.36144211888313293,
547
- "learning_rate": 1.9076621456579033e-05,
548
- "loss": 0.1387,
549
- "step": 38500
550
- },
551
- {
552
- "epoch": 1.253455036318056,
553
- "grad_norm": 0.36597776412963867,
554
- "learning_rate": 1.8674873047502732e-05,
555
- "loss": 0.1415,
556
- "step": 39000
557
- },
558
- {
559
- "epoch": 1.2695249726811082,
560
- "grad_norm": 0.37640953063964844,
561
- "learning_rate": 1.8273124638426434e-05,
562
- "loss": 0.1408,
563
- "step": 39500
564
- },
565
- {
566
- "epoch": 1.28559490904416,
567
- "grad_norm": 0.22886815667152405,
568
- "learning_rate": 1.7872983222986438e-05,
569
- "loss": 0.1366,
570
- "step": 40000
571
- },
572
- {
573
- "epoch": 1.3016648454072122,
574
- "grad_norm": 0.44980695843696594,
575
- "learning_rate": 1.7471234813910137e-05,
576
- "loss": 0.1411,
577
- "step": 40500
578
- },
579
- {
580
- "epoch": 1.3177347817702643,
581
- "grad_norm": 0.46285852789878845,
582
- "learning_rate": 1.706948640483384e-05,
583
- "loss": 0.1367,
584
- "step": 41000
585
- },
586
- {
587
- "epoch": 1.3338047181333161,
588
- "grad_norm": 0.1757335215806961,
589
- "learning_rate": 1.6667737995757538e-05,
590
- "loss": 0.1361,
591
- "step": 41500
592
- },
593
- {
594
- "epoch": 1.3498746544963682,
595
- "grad_norm": 0.28056710958480835,
596
- "learning_rate": 1.6265989586681236e-05,
597
- "loss": 0.1371,
598
- "step": 42000
599
- },
600
- {
601
- "epoch": 1.3659445908594203,
602
- "grad_norm": 0.4234681725502014,
603
- "learning_rate": 1.586424117760494e-05,
604
- "loss": 0.1363,
605
- "step": 42500
606
- },
607
- {
608
- "epoch": 1.3820145272224722,
609
- "grad_norm": 0.2925218641757965,
610
- "learning_rate": 1.5462492768528637e-05,
611
- "loss": 0.1336,
612
- "step": 43000
613
- },
614
- {
615
- "epoch": 1.398084463585524,
616
- "grad_norm": 0.23110254108905792,
617
- "learning_rate": 1.5060744359452336e-05,
618
- "loss": 0.1305,
619
- "step": 43500
620
- },
621
- {
622
- "epoch": 1.4141543999485762,
623
- "grad_norm": 0.4187003970146179,
624
- "learning_rate": 1.4659799447194189e-05,
625
- "loss": 0.1374,
626
- "step": 44000
627
- },
628
- {
629
- "epoch": 1.4302243363116283,
630
- "grad_norm": 0.30868059396743774,
631
- "learning_rate": 1.425805103811789e-05,
632
- "loss": 0.1332,
633
- "step": 44500
634
- },
635
- {
636
- "epoch": 1.4462942726746801,
637
- "grad_norm": 0.24373352527618408,
638
- "learning_rate": 1.385630262904159e-05,
639
- "loss": 0.133,
640
- "step": 45000
641
- },
642
- {
643
- "epoch": 1.4623642090377322,
644
- "grad_norm": 0.3976458013057709,
645
- "learning_rate": 1.345455421996529e-05,
646
- "loss": 0.1317,
647
- "step": 45500
648
- },
649
- {
650
- "epoch": 1.4784341454007843,
651
- "grad_norm": 0.15130922198295593,
652
- "learning_rate": 1.3053609307707144e-05,
653
- "loss": 0.1294,
654
- "step": 46000
655
- },
656
- {
657
- "epoch": 1.4945040817638362,
658
- "grad_norm": 0.26361921429634094,
659
- "learning_rate": 1.2652664395448997e-05,
660
- "loss": 0.1316,
661
- "step": 46500
662
- },
663
- {
664
- "epoch": 1.510574018126888,
665
- "grad_norm": 0.3039293587207794,
666
- "learning_rate": 1.2250915986372695e-05,
667
- "loss": 0.1294,
668
- "step": 47000
669
- },
670
- {
671
- "epoch": 1.5266439544899402,
672
- "grad_norm": 0.23085398972034454,
673
- "learning_rate": 1.1849167577296394e-05,
674
- "loss": 0.1304,
675
- "step": 47500
676
- },
677
- {
678
- "epoch": 1.5427138908529923,
679
- "grad_norm": 0.45066356658935547,
680
- "learning_rate": 1.1447419168220095e-05,
681
- "loss": 0.1283,
682
- "step": 48000
683
- },
684
- {
685
- "epoch": 1.5587838272160441,
686
- "grad_norm": 0.2428194135427475,
687
- "learning_rate": 1.1045670759143795e-05,
688
- "loss": 0.1279,
689
- "step": 48500
690
- },
691
- {
692
- "epoch": 1.5748537635790962,
693
- "grad_norm": 0.15587645769119263,
694
- "learning_rate": 1.0643922350067494e-05,
695
- "loss": 0.1273,
696
- "step": 49000
697
- },
698
- {
699
- "epoch": 1.5909236999421483,
700
- "grad_norm": 0.5055563449859619,
701
- "learning_rate": 1.0242977437809347e-05,
702
- "loss": 0.127,
703
- "step": 49500
704
- },
705
- {
706
- "epoch": 1.6069936363052002,
707
- "grad_norm": 0.31220686435699463,
708
- "learning_rate": 9.841229028733047e-06,
709
- "loss": 0.1284,
710
- "step": 50000
711
- },
712
- {
713
- "epoch": 1.623063572668252,
714
- "grad_norm": 0.3776426613330841,
715
- "learning_rate": 9.439480619656748e-06,
716
- "loss": 0.1251,
717
- "step": 50500
718
- },
719
- {
720
- "epoch": 1.6391335090313044,
721
- "grad_norm": 0.2834898829460144,
722
- "learning_rate": 9.037732210580447e-06,
723
- "loss": 0.1226,
724
- "step": 51000
725
- },
726
- {
727
- "epoch": 1.6552034453943563,
728
- "grad_norm": 0.2295331507921219,
729
- "learning_rate": 8.635983801504147e-06,
730
- "loss": 0.1233,
731
- "step": 51500
732
- },
733
- {
734
- "epoch": 1.6712733817574081,
735
- "grad_norm": 0.22921015322208405,
736
- "learning_rate": 8.234235392427848e-06,
737
- "loss": 0.1256,
738
- "step": 52000
739
- },
740
- {
741
- "epoch": 1.6873433181204602,
742
- "grad_norm": 0.3294677138328552,
743
- "learning_rate": 7.832486983351546e-06,
744
- "loss": 0.1257,
745
- "step": 52500
746
- },
747
- {
748
- "epoch": 1.7034132544835123,
749
- "grad_norm": 0.21186766028404236,
750
- "learning_rate": 7.430738574275246e-06,
751
- "loss": 0.1254,
752
- "step": 53000
753
- },
754
- {
755
- "epoch": 1.7194831908465642,
756
- "grad_norm": 0.43346577882766724,
757
- "learning_rate": 7.029793662017099e-06,
758
- "loss": 0.1228,
759
- "step": 53500
760
- },
761
- {
762
- "epoch": 1.7355531272096163,
763
- "grad_norm": 0.20274986326694489,
764
- "learning_rate": 6.628045252940798e-06,
765
- "loss": 0.124,
766
- "step": 54000
767
- },
768
- {
769
- "epoch": 1.7516230635726684,
770
- "grad_norm": 0.2912587523460388,
771
- "learning_rate": 6.2262968438644984e-06,
772
- "loss": 0.1236,
773
- "step": 54500
774
- },
775
- {
776
- "epoch": 1.7676929999357203,
777
- "grad_norm": 0.5663316249847412,
778
- "learning_rate": 5.824548434788198e-06,
779
- "loss": 0.1236,
780
- "step": 55000
781
- },
782
- {
783
- "epoch": 1.7837629362987721,
784
- "grad_norm": 0.2563399076461792,
785
- "learning_rate": 5.423603522530051e-06,
786
- "loss": 0.1241,
787
- "step": 55500
788
- },
789
- {
790
- "epoch": 1.7998328726618242,
791
- "grad_norm": 0.26923516392707825,
792
- "learning_rate": 5.022658610271903e-06,
793
- "loss": 0.1231,
794
- "step": 56000
795
- },
796
- {
797
- "epoch": 1.8159028090248763,
798
- "grad_norm": 0.15516141057014465,
799
- "learning_rate": 4.620910201195604e-06,
800
- "loss": 0.1225,
801
- "step": 56500
802
- },
803
- {
804
- "epoch": 1.8319727453879282,
805
- "grad_norm": 0.1603991985321045,
806
- "learning_rate": 4.219161792119303e-06,
807
- "loss": 0.1236,
808
- "step": 57000
809
- },
810
- {
811
- "epoch": 1.8480426817509803,
812
- "grad_norm": 0.3031301498413086,
813
- "learning_rate": 3.817413383043003e-06,
814
- "loss": 0.124,
815
- "step": 57500
816
- },
817
- {
818
- "epoch": 1.8641126181140324,
819
- "grad_norm": 0.25160399079322815,
820
- "learning_rate": 3.4156649739667035e-06,
821
- "loss": 0.1212,
822
- "step": 58000
823
- },
824
- {
825
- "epoch": 1.8801825544770843,
826
- "grad_norm": 0.23327353596687317,
827
- "learning_rate": 3.013916564890403e-06,
828
- "loss": 0.1199,
829
- "step": 58500
830
- },
831
- {
832
- "epoch": 1.8962524908401361,
833
- "grad_norm": 0.23530858755111694,
834
- "learning_rate": 2.6129716526322558e-06,
835
- "loss": 0.1228,
836
- "step": 59000
837
- },
838
- {
839
- "epoch": 1.9123224272031882,
840
- "grad_norm": 0.20596709847450256,
841
- "learning_rate": 2.211223243555956e-06,
842
- "loss": 0.1205,
843
- "step": 59500
844
- },
845
- {
846
- "epoch": 1.9283923635662403,
847
- "grad_norm": 0.35043200850486755,
848
- "learning_rate": 1.8094748344796555e-06,
849
- "loss": 0.1188,
850
- "step": 60000
851
- },
852
- {
853
- "epoch": 1.9444622999292922,
854
- "grad_norm": 0.21463052928447723,
855
- "learning_rate": 1.4077264254033555e-06,
856
- "loss": 0.1225,
857
- "step": 60500
858
- },
859
- {
860
- "epoch": 1.9605322362923443,
861
- "grad_norm": 0.27506574988365173,
862
- "learning_rate": 1.0059780163270554e-06,
863
- "loss": 0.1233,
864
- "step": 61000
865
- },
866
- {
867
- "epoch": 1.9766021726553964,
868
- "grad_norm": 0.3260590732097626,
869
- "learning_rate": 6.042296072507553e-07,
870
- "loss": 0.1218,
871
- "step": 61500
872
- },
873
- {
874
- "epoch": 1.9926721090184483,
875
- "grad_norm": 0.2609338164329529,
876
- "learning_rate": 2.0248119817445525e-07,
877
- "loss": 0.1212,
878
- "step": 62000
879
- }
880
- ],
881
- "logging_steps": 500,
882
- "max_steps": 62228,
883
- "num_input_tokens_seen": 0,
884
- "num_train_epochs": 2,
885
- "save_steps": 500,
886
- "stateful_callbacks": {
887
- "TrainerControl": {
888
- "args": {
889
- "should_epoch_stop": false,
890
- "should_evaluate": false,
891
- "should_log": false,
892
- "should_save": true,
893
- "should_training_stop": false
894
- },
895
- "attributes": {}
896
- }
897
- },
898
- "total_flos": 1.3425879637662106e+17,
899
- "train_batch_size": 32,
900
- "trial_name": null,
901
- "trial_params": null
902
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-62228/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c54372bd61e30b7426b8e85a283f22608688432422ac7fceaf84d249cf8e2b0
3
- size 484163514
 
 
 
 
checkpoints/checkpoint-62228/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-62228/trainer_state.json DELETED
@@ -1,902 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 2.0,
6
- "eval_steps": 500,
7
- "global_step": 62228,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.016069936363052,
14
- "grad_norm": 0.2569522559642792,
15
- "learning_rate": 4.960307257183262e-05,
16
- "loss": 2.9119,
17
- "step": 500
18
- },
19
- {
20
- "epoch": 0.032139872726104,
21
- "grad_norm": 0.26731985807418823,
22
- "learning_rate": 4.9201324162756315e-05,
23
- "loss": 2.2886,
24
- "step": 1000
25
- },
26
- {
27
- "epoch": 0.04820980908915601,
28
- "grad_norm": 0.3099210560321808,
29
- "learning_rate": 4.8799575753680014e-05,
30
- "loss": 2.1431,
31
- "step": 1500
32
- },
33
- {
34
- "epoch": 0.064279745452208,
35
- "grad_norm": 0.28836730122566223,
36
- "learning_rate": 4.839782734460372e-05,
37
- "loss": 2.0369,
38
- "step": 2000
39
- },
40
- {
41
- "epoch": 0.08034968181526002,
42
- "grad_norm": 0.4808545708656311,
43
- "learning_rate": 4.799607893552742e-05,
44
- "loss": 1.932,
45
- "step": 2500
46
- },
47
- {
48
- "epoch": 0.09641961817831202,
49
- "grad_norm": 0.38000208139419556,
50
- "learning_rate": 4.759433052645112e-05,
51
- "loss": 1.7766,
52
- "step": 3000
53
- },
54
- {
55
- "epoch": 0.11248955454136401,
56
- "grad_norm": 0.4310196340084076,
57
- "learning_rate": 4.7192582117374816e-05,
58
- "loss": 1.6022,
59
- "step": 3500
60
- },
61
- {
62
- "epoch": 0.128559490904416,
63
- "grad_norm": 0.40425005555152893,
64
- "learning_rate": 4.6790833708298515e-05,
65
- "loss": 1.4576,
66
- "step": 4000
67
- },
68
- {
69
- "epoch": 0.14462942726746802,
70
- "grad_norm": 0.3811793327331543,
71
- "learning_rate": 4.638908529922222e-05,
72
- "loss": 1.3384,
73
- "step": 4500
74
- },
75
- {
76
- "epoch": 0.16069936363052004,
77
- "grad_norm": 0.38943949341773987,
78
- "learning_rate": 4.598733689014591e-05,
79
- "loss": 1.2233,
80
- "step": 5000
81
- },
82
- {
83
- "epoch": 0.17676929999357202,
84
- "grad_norm": 0.5517480373382568,
85
- "learning_rate": 4.558558848106962e-05,
86
- "loss": 1.1342,
87
- "step": 5500
88
- },
89
- {
90
- "epoch": 0.19283923635662403,
91
- "grad_norm": 0.4235232174396515,
92
- "learning_rate": 4.518384007199332e-05,
93
- "loss": 1.0432,
94
- "step": 6000
95
- },
96
- {
97
- "epoch": 0.20890917271967602,
98
- "grad_norm": 0.4617592692375183,
99
- "learning_rate": 4.478209166291702e-05,
100
- "loss": 0.9781,
101
- "step": 6500
102
- },
103
- {
104
- "epoch": 0.22497910908272803,
105
- "grad_norm": 0.5447149872779846,
106
- "learning_rate": 4.4380343253840714e-05,
107
- "loss": 0.927,
108
- "step": 7000
109
- },
110
- {
111
- "epoch": 0.24104904544578004,
112
- "grad_norm": 0.4740816354751587,
113
- "learning_rate": 4.397859484476441e-05,
114
- "loss": 0.8674,
115
- "step": 7500
116
- },
117
- {
118
- "epoch": 0.257118981808832,
119
- "grad_norm": 0.5207423567771912,
120
- "learning_rate": 4.357684643568812e-05,
121
- "loss": 0.8149,
122
- "step": 8000
123
- },
124
- {
125
- "epoch": 0.27318891817188407,
126
- "grad_norm": 0.47738897800445557,
127
- "learning_rate": 4.317509802661182e-05,
128
- "loss": 0.7685,
129
- "step": 8500
130
- },
131
- {
132
- "epoch": 0.28925885453493605,
133
- "grad_norm": 0.4176841676235199,
134
- "learning_rate": 4.2773349617535516e-05,
135
- "loss": 0.7119,
136
- "step": 9000
137
- },
138
- {
139
- "epoch": 0.30532879089798803,
140
- "grad_norm": 0.381345272064209,
141
- "learning_rate": 4.2371601208459215e-05,
142
- "loss": 0.6682,
143
- "step": 9500
144
- },
145
- {
146
- "epoch": 0.3213987272610401,
147
- "grad_norm": 0.6301918625831604,
148
- "learning_rate": 4.1969852799382914e-05,
149
- "loss": 0.6505,
150
- "step": 10000
151
- },
152
- {
153
- "epoch": 0.33746866362409206,
154
- "grad_norm": 0.4057278335094452,
155
- "learning_rate": 4.156810439030662e-05,
156
- "loss": 0.6063,
157
- "step": 10500
158
- },
159
- {
160
- "epoch": 0.35353859998714404,
161
- "grad_norm": 0.5442121624946594,
162
- "learning_rate": 4.116635598123031e-05,
163
- "loss": 0.5735,
164
- "step": 11000
165
- },
166
- {
167
- "epoch": 0.369608536350196,
168
- "grad_norm": 0.5113051533699036,
169
- "learning_rate": 4.076460757215402e-05,
170
- "loss": 0.5432,
171
- "step": 11500
172
- },
173
- {
174
- "epoch": 0.38567847271324807,
175
- "grad_norm": 0.6383316516876221,
176
- "learning_rate": 4.0362859163077716e-05,
177
- "loss": 0.5143,
178
- "step": 12000
179
- },
180
- {
181
- "epoch": 0.40174840907630005,
182
- "grad_norm": 0.4316321611404419,
183
- "learning_rate": 3.996111075400142e-05,
184
- "loss": 0.4867,
185
- "step": 12500
186
- },
187
- {
188
- "epoch": 0.41781834543935203,
189
- "grad_norm": 0.42703017592430115,
190
- "learning_rate": 3.955936234492511e-05,
191
- "loss": 0.4614,
192
- "step": 13000
193
- },
194
- {
195
- "epoch": 0.4338882818024041,
196
- "grad_norm": 0.4263227880001068,
197
- "learning_rate": 3.915761393584881e-05,
198
- "loss": 0.4391,
199
- "step": 13500
200
- },
201
- {
202
- "epoch": 0.44995821816545606,
203
- "grad_norm": 0.47577473521232605,
204
- "learning_rate": 3.875586552677252e-05,
205
- "loss": 0.4241,
206
- "step": 14000
207
- },
208
- {
209
- "epoch": 0.46602815452850804,
210
- "grad_norm": 0.3419073224067688,
211
- "learning_rate": 3.8354117117696216e-05,
212
- "loss": 0.4019,
213
- "step": 14500
214
- },
215
- {
216
- "epoch": 0.4820980908915601,
217
- "grad_norm": 0.3402538001537323,
218
- "learning_rate": 3.7952368708619915e-05,
219
- "loss": 0.3876,
220
- "step": 15000
221
- },
222
- {
223
- "epoch": 0.49816802725461207,
224
- "grad_norm": 0.7072747349739075,
225
- "learning_rate": 3.7550620299543614e-05,
226
- "loss": 0.364,
227
- "step": 15500
228
- },
229
- {
230
- "epoch": 0.514237963617664,
231
- "grad_norm": 0.31305554509162903,
232
- "learning_rate": 3.714887189046731e-05,
233
- "loss": 0.3463,
234
- "step": 16000
235
- },
236
- {
237
- "epoch": 0.530307899980716,
238
- "grad_norm": 0.4203876554965973,
239
- "learning_rate": 3.674792697820917e-05,
240
- "loss": 0.3371,
241
- "step": 16500
242
- },
243
- {
244
- "epoch": 0.5463778363437681,
245
- "grad_norm": 0.49149152636528015,
246
- "learning_rate": 3.634617856913286e-05,
247
- "loss": 0.3189,
248
- "step": 17000
249
- },
250
- {
251
- "epoch": 0.5624477727068201,
252
- "grad_norm": 0.6438118815422058,
253
- "learning_rate": 3.594443016005657e-05,
254
- "loss": 0.3074,
255
- "step": 17500
256
- },
257
- {
258
- "epoch": 0.5785177090698721,
259
- "grad_norm": 0.6619039177894592,
260
- "learning_rate": 3.554268175098027e-05,
261
- "loss": 0.2989,
262
- "step": 18000
263
- },
264
- {
265
- "epoch": 0.5945876454329241,
266
- "grad_norm": 0.39272341132164,
267
- "learning_rate": 3.514093334190397e-05,
268
- "loss": 0.2818,
269
- "step": 18500
270
- },
271
- {
272
- "epoch": 0.6106575817959761,
273
- "grad_norm": 0.3980565369129181,
274
- "learning_rate": 3.473998842964582e-05,
275
- "loss": 0.273,
276
- "step": 19000
277
- },
278
- {
279
- "epoch": 0.626727518159028,
280
- "grad_norm": 0.3052268922328949,
281
- "learning_rate": 3.4338240020569516e-05,
282
- "loss": 0.2677,
283
- "step": 19500
284
- },
285
- {
286
- "epoch": 0.6427974545220801,
287
- "grad_norm": 0.5999760031700134,
288
- "learning_rate": 3.3937295108311374e-05,
289
- "loss": 0.2572,
290
- "step": 20000
291
- },
292
- {
293
- "epoch": 0.6588673908851321,
294
- "grad_norm": 0.4283508062362671,
295
- "learning_rate": 3.3536350196053226e-05,
296
- "loss": 0.2468,
297
- "step": 20500
298
- },
299
- {
300
- "epoch": 0.6749373272481841,
301
- "grad_norm": 0.4289894700050354,
302
- "learning_rate": 3.3134601786976924e-05,
303
- "loss": 0.2414,
304
- "step": 21000
305
- },
306
- {
307
- "epoch": 0.6910072636112361,
308
- "grad_norm": 0.26386120915412903,
309
- "learning_rate": 3.273285337790062e-05,
310
- "loss": 0.2422,
311
- "step": 21500
312
- },
313
- {
314
- "epoch": 0.7070771999742881,
315
- "grad_norm": 0.41095244884490967,
316
- "learning_rate": 3.233110496882433e-05,
317
- "loss": 0.2282,
318
- "step": 22000
319
- },
320
- {
321
- "epoch": 0.7231471363373401,
322
- "grad_norm": 0.29514652490615845,
323
- "learning_rate": 3.192935655974803e-05,
324
- "loss": 0.2252,
325
- "step": 22500
326
- },
327
- {
328
- "epoch": 0.739217072700392,
329
- "grad_norm": 0.4044126570224762,
330
- "learning_rate": 3.152760815067172e-05,
331
- "loss": 0.2211,
332
- "step": 23000
333
- },
334
- {
335
- "epoch": 0.7552870090634441,
336
- "grad_norm": 0.3767038881778717,
337
- "learning_rate": 3.1125859741595425e-05,
338
- "loss": 0.2115,
339
- "step": 23500
340
- },
341
- {
342
- "epoch": 0.7713569454264961,
343
- "grad_norm": 0.36812517046928406,
344
- "learning_rate": 3.0724111332519124e-05,
345
- "loss": 0.2059,
346
- "step": 24000
347
- },
348
- {
349
- "epoch": 0.7874268817895481,
350
- "grad_norm": 0.3709106147289276,
351
- "learning_rate": 3.0322362923442826e-05,
352
- "loss": 0.2035,
353
- "step": 24500
354
- },
355
- {
356
- "epoch": 0.8034968181526001,
357
- "grad_norm": 0.3285115361213684,
358
- "learning_rate": 2.9920614514366525e-05,
359
- "loss": 0.1993,
360
- "step": 25000
361
- },
362
- {
363
- "epoch": 0.8195667545156521,
364
- "grad_norm": 0.3229790925979614,
365
- "learning_rate": 2.9518866105290227e-05,
366
- "loss": 0.1968,
367
- "step": 25500
368
- },
369
- {
370
- "epoch": 0.8356366908787041,
371
- "grad_norm": 0.37397509813308716,
372
- "learning_rate": 2.9117117696213926e-05,
373
- "loss": 0.194,
374
- "step": 26000
375
- },
376
- {
377
- "epoch": 0.8517066272417562,
378
- "grad_norm": 0.33143311738967896,
379
- "learning_rate": 2.871536928713762e-05,
380
- "loss": 0.1875,
381
- "step": 26500
382
- },
383
- {
384
- "epoch": 0.8677765636048081,
385
- "grad_norm": 0.2748125493526459,
386
- "learning_rate": 2.8313620878061327e-05,
387
- "loss": 0.1854,
388
- "step": 27000
389
- },
390
- {
391
- "epoch": 0.8838464999678601,
392
- "grad_norm": 0.2606910169124603,
393
- "learning_rate": 2.7911872468985022e-05,
394
- "loss": 0.1809,
395
- "step": 27500
396
- },
397
- {
398
- "epoch": 0.8999164363309121,
399
- "grad_norm": 0.28182655572891235,
400
- "learning_rate": 2.7510124059908728e-05,
401
- "loss": 0.1815,
402
- "step": 28000
403
- },
404
- {
405
- "epoch": 0.9159863726939641,
406
- "grad_norm": 0.3056446313858032,
407
- "learning_rate": 2.7109179147650576e-05,
408
- "loss": 0.1775,
409
- "step": 28500
410
- },
411
- {
412
- "epoch": 0.9320563090570161,
413
- "grad_norm": 0.2458430379629135,
414
- "learning_rate": 2.6707430738574275e-05,
415
- "loss": 0.1714,
416
- "step": 29000
417
- },
418
- {
419
- "epoch": 0.9481262454200682,
420
- "grad_norm": 0.2681204080581665,
421
- "learning_rate": 2.6305682329497977e-05,
422
- "loss": 0.1734,
423
- "step": 29500
424
- },
425
- {
426
- "epoch": 0.9641961817831202,
427
- "grad_norm": 0.38170355558395386,
428
- "learning_rate": 2.5903933920421676e-05,
429
- "loss": 0.1701,
430
- "step": 30000
431
- },
432
- {
433
- "epoch": 0.9802661181461721,
434
- "grad_norm": 0.43841251730918884,
435
- "learning_rate": 2.550298900816353e-05,
436
- "loss": 0.1656,
437
- "step": 30500
438
- },
439
- {
440
- "epoch": 0.9963360545092241,
441
- "grad_norm": 0.4082754850387573,
442
- "learning_rate": 2.510124059908723e-05,
443
- "loss": 0.1649,
444
- "step": 31000
445
- },
446
- {
447
- "epoch": 1.0124059908722762,
448
- "grad_norm": 0.27510714530944824,
449
- "learning_rate": 2.4699492190010928e-05,
450
- "loss": 0.1636,
451
- "step": 31500
452
- },
453
- {
454
- "epoch": 1.028475927235328,
455
- "grad_norm": 0.3550429344177246,
456
- "learning_rate": 2.429774378093463e-05,
457
- "loss": 0.1615,
458
- "step": 32000
459
- },
460
- {
461
- "epoch": 1.0445458635983802,
462
- "grad_norm": 0.382055401802063,
463
- "learning_rate": 2.389599537185833e-05,
464
- "loss": 0.1597,
465
- "step": 32500
466
- },
467
- {
468
- "epoch": 1.060615799961432,
469
- "grad_norm": 0.38698843121528625,
470
- "learning_rate": 2.349424696278203e-05,
471
- "loss": 0.155,
472
- "step": 33000
473
- },
474
- {
475
- "epoch": 1.0766857363244842,
476
- "grad_norm": 0.380403995513916,
477
- "learning_rate": 2.309249855370573e-05,
478
- "loss": 0.1594,
479
- "step": 33500
480
- },
481
- {
482
- "epoch": 1.0927556726875363,
483
- "grad_norm": 0.17210371792316437,
484
- "learning_rate": 2.269155364144758e-05,
485
- "loss": 0.1543,
486
- "step": 34000
487
- },
488
- {
489
- "epoch": 1.1088256090505881,
490
- "grad_norm": 0.33378392457962036,
491
- "learning_rate": 2.228980523237128e-05,
492
- "loss": 0.1549,
493
- "step": 34500
494
- },
495
- {
496
- "epoch": 1.1248955454136402,
497
- "grad_norm": 0.282175213098526,
498
- "learning_rate": 2.1888056823294982e-05,
499
- "loss": 0.1509,
500
- "step": 35000
501
- },
502
- {
503
- "epoch": 1.140965481776692,
504
- "grad_norm": 0.4829972982406616,
505
- "learning_rate": 2.148630841421868e-05,
506
- "loss": 0.1508,
507
- "step": 35500
508
- },
509
- {
510
- "epoch": 1.1570354181397442,
511
- "grad_norm": 0.4101378321647644,
512
- "learning_rate": 2.1084560005142383e-05,
513
- "loss": 0.1487,
514
- "step": 36000
515
- },
516
- {
517
- "epoch": 1.173105354502796,
518
- "grad_norm": 0.24467173218727112,
519
- "learning_rate": 2.0682811596066082e-05,
520
- "loss": 0.1482,
521
- "step": 36500
522
- },
523
- {
524
- "epoch": 1.1891752908658482,
525
- "grad_norm": 0.2552469074726105,
526
- "learning_rate": 2.028106318698978e-05,
527
- "loss": 0.1474,
528
- "step": 37000
529
- },
530
- {
531
- "epoch": 1.2052452272289003,
532
- "grad_norm": 0.33155035972595215,
533
- "learning_rate": 1.987931477791348e-05,
534
- "loss": 0.1427,
535
- "step": 37500
536
- },
537
- {
538
- "epoch": 1.2213151635919521,
539
- "grad_norm": 0.41133707761764526,
540
- "learning_rate": 1.9478369865655334e-05,
541
- "loss": 0.143,
542
- "step": 38000
543
- },
544
- {
545
- "epoch": 1.2373850999550042,
546
- "grad_norm": 0.36144211888313293,
547
- "learning_rate": 1.9076621456579033e-05,
548
- "loss": 0.1387,
549
- "step": 38500
550
- },
551
- {
552
- "epoch": 1.253455036318056,
553
- "grad_norm": 0.36597776412963867,
554
- "learning_rate": 1.8674873047502732e-05,
555
- "loss": 0.1415,
556
- "step": 39000
557
- },
558
- {
559
- "epoch": 1.2695249726811082,
560
- "grad_norm": 0.37640953063964844,
561
- "learning_rate": 1.8273124638426434e-05,
562
- "loss": 0.1408,
563
- "step": 39500
564
- },
565
- {
566
- "epoch": 1.28559490904416,
567
- "grad_norm": 0.22886815667152405,
568
- "learning_rate": 1.7872983222986438e-05,
569
- "loss": 0.1366,
570
- "step": 40000
571
- },
572
- {
573
- "epoch": 1.3016648454072122,
574
- "grad_norm": 0.44980695843696594,
575
- "learning_rate": 1.7471234813910137e-05,
576
- "loss": 0.1411,
577
- "step": 40500
578
- },
579
- {
580
- "epoch": 1.3177347817702643,
581
- "grad_norm": 0.46285852789878845,
582
- "learning_rate": 1.706948640483384e-05,
583
- "loss": 0.1367,
584
- "step": 41000
585
- },
586
- {
587
- "epoch": 1.3338047181333161,
588
- "grad_norm": 0.1757335215806961,
589
- "learning_rate": 1.6667737995757538e-05,
590
- "loss": 0.1361,
591
- "step": 41500
592
- },
593
- {
594
- "epoch": 1.3498746544963682,
595
- "grad_norm": 0.28056710958480835,
596
- "learning_rate": 1.6265989586681236e-05,
597
- "loss": 0.1371,
598
- "step": 42000
599
- },
600
- {
601
- "epoch": 1.3659445908594203,
602
- "grad_norm": 0.4234681725502014,
603
- "learning_rate": 1.586424117760494e-05,
604
- "loss": 0.1363,
605
- "step": 42500
606
- },
607
- {
608
- "epoch": 1.3820145272224722,
609
- "grad_norm": 0.2925218641757965,
610
- "learning_rate": 1.5462492768528637e-05,
611
- "loss": 0.1336,
612
- "step": 43000
613
- },
614
- {
615
- "epoch": 1.398084463585524,
616
- "grad_norm": 0.23110254108905792,
617
- "learning_rate": 1.5060744359452336e-05,
618
- "loss": 0.1305,
619
- "step": 43500
620
- },
621
- {
622
- "epoch": 1.4141543999485762,
623
- "grad_norm": 0.4187003970146179,
624
- "learning_rate": 1.4659799447194189e-05,
625
- "loss": 0.1374,
626
- "step": 44000
627
- },
628
- {
629
- "epoch": 1.4302243363116283,
630
- "grad_norm": 0.30868059396743774,
631
- "learning_rate": 1.425805103811789e-05,
632
- "loss": 0.1332,
633
- "step": 44500
634
- },
635
- {
636
- "epoch": 1.4462942726746801,
637
- "grad_norm": 0.24373352527618408,
638
- "learning_rate": 1.385630262904159e-05,
639
- "loss": 0.133,
640
- "step": 45000
641
- },
642
- {
643
- "epoch": 1.4623642090377322,
644
- "grad_norm": 0.3976458013057709,
645
- "learning_rate": 1.345455421996529e-05,
646
- "loss": 0.1317,
647
- "step": 45500
648
- },
649
- {
650
- "epoch": 1.4784341454007843,
651
- "grad_norm": 0.15130922198295593,
652
- "learning_rate": 1.3053609307707144e-05,
653
- "loss": 0.1294,
654
- "step": 46000
655
- },
656
- {
657
- "epoch": 1.4945040817638362,
658
- "grad_norm": 0.26361921429634094,
659
- "learning_rate": 1.2652664395448997e-05,
660
- "loss": 0.1316,
661
- "step": 46500
662
- },
663
- {
664
- "epoch": 1.510574018126888,
665
- "grad_norm": 0.3039293587207794,
666
- "learning_rate": 1.2250915986372695e-05,
667
- "loss": 0.1294,
668
- "step": 47000
669
- },
670
- {
671
- "epoch": 1.5266439544899402,
672
- "grad_norm": 0.23085398972034454,
673
- "learning_rate": 1.1849167577296394e-05,
674
- "loss": 0.1304,
675
- "step": 47500
676
- },
677
- {
678
- "epoch": 1.5427138908529923,
679
- "grad_norm": 0.45066356658935547,
680
- "learning_rate": 1.1447419168220095e-05,
681
- "loss": 0.1283,
682
- "step": 48000
683
- },
684
- {
685
- "epoch": 1.5587838272160441,
686
- "grad_norm": 0.2428194135427475,
687
- "learning_rate": 1.1045670759143795e-05,
688
- "loss": 0.1279,
689
- "step": 48500
690
- },
691
- {
692
- "epoch": 1.5748537635790962,
693
- "grad_norm": 0.15587645769119263,
694
- "learning_rate": 1.0643922350067494e-05,
695
- "loss": 0.1273,
696
- "step": 49000
697
- },
698
- {
699
- "epoch": 1.5909236999421483,
700
- "grad_norm": 0.5055563449859619,
701
- "learning_rate": 1.0242977437809347e-05,
702
- "loss": 0.127,
703
- "step": 49500
704
- },
705
- {
706
- "epoch": 1.6069936363052002,
707
- "grad_norm": 0.31220686435699463,
708
- "learning_rate": 9.841229028733047e-06,
709
- "loss": 0.1284,
710
- "step": 50000
711
- },
712
- {
713
- "epoch": 1.623063572668252,
714
- "grad_norm": 0.3776426613330841,
715
- "learning_rate": 9.439480619656748e-06,
716
- "loss": 0.1251,
717
- "step": 50500
718
- },
719
- {
720
- "epoch": 1.6391335090313044,
721
- "grad_norm": 0.2834898829460144,
722
- "learning_rate": 9.037732210580447e-06,
723
- "loss": 0.1226,
724
- "step": 51000
725
- },
726
- {
727
- "epoch": 1.6552034453943563,
728
- "grad_norm": 0.2295331507921219,
729
- "learning_rate": 8.635983801504147e-06,
730
- "loss": 0.1233,
731
- "step": 51500
732
- },
733
- {
734
- "epoch": 1.6712733817574081,
735
- "grad_norm": 0.22921015322208405,
736
- "learning_rate": 8.234235392427848e-06,
737
- "loss": 0.1256,
738
- "step": 52000
739
- },
740
- {
741
- "epoch": 1.6873433181204602,
742
- "grad_norm": 0.3294677138328552,
743
- "learning_rate": 7.832486983351546e-06,
744
- "loss": 0.1257,
745
- "step": 52500
746
- },
747
- {
748
- "epoch": 1.7034132544835123,
749
- "grad_norm": 0.21186766028404236,
750
- "learning_rate": 7.430738574275246e-06,
751
- "loss": 0.1254,
752
- "step": 53000
753
- },
754
- {
755
- "epoch": 1.7194831908465642,
756
- "grad_norm": 0.43346577882766724,
757
- "learning_rate": 7.029793662017099e-06,
758
- "loss": 0.1228,
759
- "step": 53500
760
- },
761
- {
762
- "epoch": 1.7355531272096163,
763
- "grad_norm": 0.20274986326694489,
764
- "learning_rate": 6.628045252940798e-06,
765
- "loss": 0.124,
766
- "step": 54000
767
- },
768
- {
769
- "epoch": 1.7516230635726684,
770
- "grad_norm": 0.2912587523460388,
771
- "learning_rate": 6.2262968438644984e-06,
772
- "loss": 0.1236,
773
- "step": 54500
774
- },
775
- {
776
- "epoch": 1.7676929999357203,
777
- "grad_norm": 0.5663316249847412,
778
- "learning_rate": 5.824548434788198e-06,
779
- "loss": 0.1236,
780
- "step": 55000
781
- },
782
- {
783
- "epoch": 1.7837629362987721,
784
- "grad_norm": 0.2563399076461792,
785
- "learning_rate": 5.423603522530051e-06,
786
- "loss": 0.1241,
787
- "step": 55500
788
- },
789
- {
790
- "epoch": 1.7998328726618242,
791
- "grad_norm": 0.26923516392707825,
792
- "learning_rate": 5.022658610271903e-06,
793
- "loss": 0.1231,
794
- "step": 56000
795
- },
796
- {
797
- "epoch": 1.8159028090248763,
798
- "grad_norm": 0.15516141057014465,
799
- "learning_rate": 4.620910201195604e-06,
800
- "loss": 0.1225,
801
- "step": 56500
802
- },
803
- {
804
- "epoch": 1.8319727453879282,
805
- "grad_norm": 0.1603991985321045,
806
- "learning_rate": 4.219161792119303e-06,
807
- "loss": 0.1236,
808
- "step": 57000
809
- },
810
- {
811
- "epoch": 1.8480426817509803,
812
- "grad_norm": 0.3031301498413086,
813
- "learning_rate": 3.817413383043003e-06,
814
- "loss": 0.124,
815
- "step": 57500
816
- },
817
- {
818
- "epoch": 1.8641126181140324,
819
- "grad_norm": 0.25160399079322815,
820
- "learning_rate": 3.4156649739667035e-06,
821
- "loss": 0.1212,
822
- "step": 58000
823
- },
824
- {
825
- "epoch": 1.8801825544770843,
826
- "grad_norm": 0.23327353596687317,
827
- "learning_rate": 3.013916564890403e-06,
828
- "loss": 0.1199,
829
- "step": 58500
830
- },
831
- {
832
- "epoch": 1.8962524908401361,
833
- "grad_norm": 0.23530858755111694,
834
- "learning_rate": 2.6129716526322558e-06,
835
- "loss": 0.1228,
836
- "step": 59000
837
- },
838
- {
839
- "epoch": 1.9123224272031882,
840
- "grad_norm": 0.20596709847450256,
841
- "learning_rate": 2.211223243555956e-06,
842
- "loss": 0.1205,
843
- "step": 59500
844
- },
845
- {
846
- "epoch": 1.9283923635662403,
847
- "grad_norm": 0.35043200850486755,
848
- "learning_rate": 1.8094748344796555e-06,
849
- "loss": 0.1188,
850
- "step": 60000
851
- },
852
- {
853
- "epoch": 1.9444622999292922,
854
- "grad_norm": 0.21463052928447723,
855
- "learning_rate": 1.4077264254033555e-06,
856
- "loss": 0.1225,
857
- "step": 60500
858
- },
859
- {
860
- "epoch": 1.9605322362923443,
861
- "grad_norm": 0.27506574988365173,
862
- "learning_rate": 1.0059780163270554e-06,
863
- "loss": 0.1233,
864
- "step": 61000
865
- },
866
- {
867
- "epoch": 1.9766021726553964,
868
- "grad_norm": 0.3260590732097626,
869
- "learning_rate": 6.042296072507553e-07,
870
- "loss": 0.1218,
871
- "step": 61500
872
- },
873
- {
874
- "epoch": 1.9926721090184483,
875
- "grad_norm": 0.2609338164329529,
876
- "learning_rate": 2.0248119817445525e-07,
877
- "loss": 0.1212,
878
- "step": 62000
879
- }
880
- ],
881
- "logging_steps": 500,
882
- "max_steps": 62228,
883
- "num_input_tokens_seen": 0,
884
- "num_train_epochs": 2,
885
- "save_steps": 500,
886
- "stateful_callbacks": {
887
- "TrainerControl": {
888
- "args": {
889
- "should_epoch_stop": false,
890
- "should_evaluate": false,
891
- "should_log": false,
892
- "should_save": true,
893
- "should_training_stop": true
894
- },
895
- "attributes": {}
896
- }
897
- },
898
- "total_flos": 1.3475225258478797e+17,
899
- "train_batch_size": 32,
900
- "trial_name": null,
901
- "trial_params": null
902
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,60 +1,29 @@
1
  {
2
- "architectures": [
3
- "T5ForConditionalGeneration"
4
- ],
5
- "classifier_dropout": 0.0,
6
- "d_ff": 2048,
7
- "d_kv": 64,
8
- "d_model": 512,
9
- "decoder_start_token_id": 0,
10
- "dense_act_fn": "relu",
11
- "dropout_rate": 0.1,
12
- "eos_token_id": 1,
13
- "feed_forward_proj": "relu",
14
- "initializer_factor": 1.0,
15
- "is_encoder_decoder": true,
16
- "is_gated_act": false,
17
- "layer_norm_epsilon": 1e-06,
18
- "model_type": "t5",
19
- "n_positions": 512,
20
- "num_decoder_layers": 6,
21
- "num_heads": 8,
22
- "num_layers": 6,
23
- "output_past": true,
24
- "pad_token_id": 0,
25
- "relative_attention_max_distance": 128,
26
- "relative_attention_num_buckets": 32,
27
- "task_specific_params": {
28
- "summarization": {
29
- "early_stopping": true,
30
- "length_penalty": 2.0,
31
- "max_length": 200,
32
- "min_length": 30,
33
- "no_repeat_ngram_size": 3,
34
- "num_beams": 4,
35
- "prefix": "summarize: "
36
- },
37
- "translation_en_to_de": {
38
- "early_stopping": true,
39
- "max_length": 300,
40
- "num_beams": 4,
41
- "prefix": "translate English to German: "
42
- },
43
- "translation_en_to_fr": {
44
- "early_stopping": true,
45
- "max_length": 300,
46
- "num_beams": 4,
47
- "prefix": "translate English to French: "
48
- },
49
- "translation_en_to_ro": {
50
- "early_stopping": true,
51
- "max_length": 300,
52
- "num_beams": 4,
53
- "prefix": "translate English to Romanian: "
54
- }
55
- },
56
- "torch_dtype": "float32",
57
- "transformers_version": "4.51.2",
58
- "use_cache": true,
59
- "vocab_size": 32128
60
- }
 
1
  {
2
+ "architectures": [
3
+ "T5ForConditionalGeneration"
4
+ ],
5
+ "classifier_dropout": 0.0,
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 6,
20
+ "num_heads": 8,
21
+ "num_layers": 6,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.51.2",
27
+ "use_cache": true,
28
+ "vocab_size": 40100
29
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.51.2"
7
+ }
generator_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "decoder_start_token_id": 0,
4
- "eos_token_id": 1,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.51.2"
7
- }
 
 
 
 
 
 
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fee19ce79c6f45de80a2e273ede68b16d500dae3a2e3da26235d6b4ebc0f92e
3
- size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c4fdcc76ad48f4138a7163703cbb71ce3039d34301ba0d1641ab373f74cb78
3
+ size 258368552